├── __init__.pyc
├── facenet.pyc
├── d_npy
    ├── det1.npy
    ├── det2.npy
    └── det3.npy
├── detect_face.pyc
├── requirements.txt
├── create_dir_setup.sh
├── LICENSE
├── README.md
├── create_classifier_se.py
├── aligndata_first.py
├── detect_facese_real_time.py
├── detect_facese_real_time_with_incFrame.py
├── facenet.py
└── detect_face.py


/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ishwarsawale/real-time-face-recognition-with-facenet/HEAD/__init__.pyc


--------------------------------------------------------------------------------
/facenet.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ishwarsawale/real-time-face-recognition-with-facenet/HEAD/facenet.pyc


--------------------------------------------------------------------------------
/d_npy/det1.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ishwarsawale/real-time-face-recognition-with-facenet/HEAD/d_npy/det1.npy


--------------------------------------------------------------------------------
/d_npy/det2.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ishwarsawale/real-time-face-recognition-with-facenet/HEAD/d_npy/det2.npy


--------------------------------------------------------------------------------
/d_npy/det3.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ishwarsawale/real-time-face-recognition-with-facenet/HEAD/d_npy/det3.npy


--------------------------------------------------------------------------------
/detect_face.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ishwarsawale/real-time-face-recognition-with-facenet/HEAD/detect_face.pyc


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | tensorflow==1.8.0
2 | scipy==1.0.0
3 | matplotlib==2.1.2
4 | six==1.11.0
5 | numpy==1.14.0
6 | scikit_learn==0.19.1
7 | 


--------------------------------------------------------------------------------
/create_dir_setup.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | echo 'creating dir structure for project'
4 | 
5 | mkdir 'input_dir'
6 | mkdir 'out_dir'
7 | mkdir 'my_class'
8 | mkdir 'pre_model'
9 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2017 icode
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # real-time-face-recognition-with-facenet
 2 | 
 3 | I remember the first day on the job and I was assigned to work on Face Recognition System, but at that time it was like a dream to make a classifier that can do it very well, I was using purely Open-Cv for detection of face and then creating a unique vector for each face. But its accuracy was too less to use it as in any application. Before some months back I read a paper named as  "FaceNet: A Unified Embedding for Face Recognition and Clustering" which present a unified system for face verification.
 4 | 
 5 | 
 6 | 
 7 | Facenet is based on learning a Euclidean embedding per image using deep convolution network, Embedding algorithms search for a lot dimensional continuous representation of data. The network is trained such that the squared L2 distances in the embedding space directly correspond to face similarity. Faces of the same person have small distances and faces of distinct people have large distances.
 8 | 
 9 | 
10 | 
11 | Once this embedding has been produced, then the aforementioned tasks become straight-forward: face verification simply involves thresholding the distance between the two embeddings; recognition becomes a k-NN classification problem, and clustering can be achieved using off-theshelf techniques such as k-means or agglomerative clustering. 
12 | 
13 | [Complete Post is Here](https://www.linkedin.com/pulse/real-time-face-recognition-using-facenet-ishwar-sawale/)
14 | 


--------------------------------------------------------------------------------
/create_classifier_se.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from __future__ import division
 3 | from __future__ import print_function
 4 | 
 5 | import tensorflow as tf
 6 | import numpy as np
 7 | import argparse
 8 | import facenet
 9 | import detect_face
10 | import os
11 | import sys
12 | import math
13 | import pickle
14 | from sklearn.svm import SVC
15 | 
16 | 
17 | with tf.Graph().as_default():
18 | 
19 |     with tf.Session() as sess:
20 | 
21 |         datadir = './out_dir'
22 |         dataset = facenet.get_dataset(datadir)
23 |         paths, labels = facenet.get_image_paths_and_labels(dataset)
24 |         print('Number of classes: %d' % len(dataset))
25 |         print('Number of images: %d' % len(paths))
26 | 
27 |         print('Loading feature extraction model')
28 |         modeldir = './pre_model/20170511-185253.pb'
29 |         facenet.load_model(modeldir)
30 | 
31 |         images_placeholder = tf.get_default_graph().get_tensor_by_name("input:0")
32 |         embeddings = tf.get_default_graph().get_tensor_by_name("embeddings:0")
33 |         phase_train_placeholder = tf.get_default_graph().get_tensor_by_name("phase_train:0")
34 |         embedding_size = embeddings.get_shape()[1]
35 | 
36 |         # Run forward pass to calculate embeddings
37 |         print('Calculating features for images')
38 |         batch_size = 1000
39 |         image_size = 160
40 |         nrof_images = len(paths)
41 |         nrof_batches_per_epoch = int(math.ceil(1.0 * nrof_images / batch_size))
42 |         emb_array = np.zeros((nrof_images, embedding_size))
43 |         for i in range(nrof_batches_per_epoch):
44 |             start_index = i * batch_size
45 |             end_index = min((i + 1) * batch_size, nrof_images)
46 |             paths_batch = paths[start_index:end_index]
47 |             images = facenet.load_data(paths_batch, False, False, image_size)
48 |             feed_dict = {images_placeholder: images, phase_train_placeholder: False}
49 |             emb_array[start_index:end_index, :] = sess.run(embeddings, feed_dict=feed_dict)
50 | 
51 |         classifier_filename = './my_class/my_classifier.pkl'
52 |         classifier_filename_exp = os.path.expanduser(classifier_filename)
53 | 
54 |         # Train classifier
55 |         print('Training classifier')
56 |         model = SVC(kernel='linear', probability=True)
57 |         model.fit(emb_array, labels)
58 | 
59 |         # Create a list of class names
60 |         class_names = [cls.name.replace('_', ' ') for cls in dataset]
61 | 
62 |         # Saving classifier model
63 |         with open(classifier_filename_exp, 'wb') as outfile:
64 |             pickle.dump((model, class_names), outfile)
65 |         print('Saved classifier model to file "%s"' % classifier_filename_exp)
66 |         print('Goodluck')
67 | 


--------------------------------------------------------------------------------
/aligndata_first.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from __future__ import division
  3 | from __future__ import print_function
  4 | 
  5 | from scipy import misc
  6 | import sys
  7 | import os
  8 | import argparse
  9 | import tensorflow as tf
 10 | import numpy as np
 11 | import facenet
 12 | import detect_face
 13 | import random
 14 | from time import sleep
 15 | 
 16 | output_dir_path = './out_dir'
 17 | output_dir = os.path.expanduser(output_dir_path)
 18 | if not os.path.exists(output_dir):
 19 |         os.makedirs(output_dir)
 20 | 
 21 | datadir = './input_dir'
 22 | dataset = facenet.get_dataset(datadir)
 23 | 
 24 | print('Creating networks and loading parameters')
 25 | with tf.Graph().as_default():
 26 |     gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.5)
 27 |     sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False))
 28 |     with sess.as_default():
 29 |         pnet, rnet, onet = detect_face.create_mtcnn(sess, './d_npy')
 30 | 
 31 | minsize = 20  # minimum size of face
 32 | threshold = [0.6, 0.7, 0.7]  # three steps's threshold
 33 | factor = 0.709  # scale factor
 34 | margin = 44
 35 | image_size = 182
 36 | 
 37 | # Add a random key to the filename to allow alignment using multiple processes
 38 | random_key = np.random.randint(0, high=99999)
 39 | bounding_boxes_filename = os.path.join(output_dir, 'bounding_boxes_%05d.txt' % random_key)
 40 | print('Goodluck')
 41 | 
 42 | with open(bounding_boxes_filename, "w") as text_file:
 43 |     nrof_images_total = 0
 44 |     nrof_successfully_aligned = 0
 45 |     for cls in dataset:
 46 |         output_class_dir = os.path.join(output_dir, cls.name)
 47 |         if not os.path.exists(output_class_dir):
 48 |             os.makedirs(output_class_dir)
 49 |         for image_path in cls.image_paths:
 50 |             nrof_images_total += 1
 51 |             filename = os.path.splitext(os.path.split(image_path)[1])[0]
 52 |             output_filename = os.path.join(output_class_dir, filename + '.png')
 53 |             print(image_path)
 54 |             if not os.path.exists(output_filename):
 55 |                 try:
 56 |                     img = misc.imread(image_path)
 57 |                     print('read data dimension: ', img.ndim)
 58 |                 except (IOError, ValueError, IndexError) as e:
 59 |                     errorMessage = '{}: {}'.format(image_path, e)
 60 |                     print(errorMessage)
 61 |                 else:
 62 |                     if img.ndim < 2:
 63 |                         print('Unable to align "%s"' % image_path)
 64 |                         text_file.write('%s\n' % (output_filename))
 65 |                         continue
 66 |                     if img.ndim == 2:
 67 |                         img = facenet.to_rgb(img)
 68 |                         print('to_rgb data dimension: ', img.ndim)
 69 |                     img = img[:, :, 0:3]
 70 |                     print('after data dimension: ', img.ndim)
 71 | 
 72 |                     bounding_boxes, _ = detect_face.detect_face(img, minsize, pnet, rnet, onet, threshold, factor)
 73 |                     nrof_faces = bounding_boxes.shape[0]
 74 |                     print('detected_face: %d' % nrof_faces)
 75 |                     if nrof_faces > 0:
 76 |                         det = bounding_boxes[:, 0:4]
 77 |                         img_size = np.asarray(img.shape)[0:2]
 78 |                         if nrof_faces > 1:
 79 |                             bounding_box_size = (det[:, 2] - det[:, 0]) * (det[:, 3] - det[:, 1])
 80 |                             img_center = img_size / 2
 81 |                             offsets = np.vstack([(det[:, 0] + det[:, 2]) / 2 - img_center[1],
 82 |                                                  (det[:, 1] + det[:, 3]) / 2 - img_center[0]])
 83 |                             offset_dist_squared = np.sum(np.power(offsets, 2.0), 0)
 84 |                             index = np.argmax(bounding_box_size - offset_dist_squared * 2.0)  # some extra weight on the centering
 85 |                             det = det[index, :]
 86 |                         det = np.squeeze(det)
 87 |                         bb_temp = np.zeros(4, dtype=np.int32)
 88 | 
 89 |                         bb_temp[0] = det[0]
 90 |                         bb_temp[1] = det[1]
 91 |                         bb_temp[2] = det[2]
 92 |                         bb_temp[3] = det[3]
 93 |                         try:
 94 |                             cropped_temp = img[bb_temp[1]:bb_temp[3], bb_temp[0]:bb_temp[2], :]
 95 |                             scaled_temp = misc.imresize(cropped_temp, (image_size, image_size), interp='bilinear')
 96 |                             nrof_successfully_aligned += 1
 97 |                             misc.imsave(output_filename, scaled_temp)
 98 |                         # text_file.write('%s %d %d %d %d\n' % (output_filename, bb_temp[0], bb_temp[1], bb_temp[2], bb_temp[3]))
 99 |                         except Exception as e:
100 |                             os.remove(image_path)
101 |                     else:
102 |                         print('Unable to align "%s"' % image_path)
103 |                         text_file.write('%s\n' % (output_filename))
104 | 
105 | print('Total number of images: %d' % nrof_images_total)
106 | print('Number of successfully aligned images: %d' % nrof_successfully_aligned)
107 | 
108 | 
109 | 
110 | 


--------------------------------------------------------------------------------
/detect_facese_real_time.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from __future__ import division
  3 | from __future__ import print_function
  4 | 
  5 | import tensorflow as tf
  6 | from scipy import misc
  7 | import cv2
  8 | import matplotlib.pyplot as plt
  9 | import numpy as np
 10 | import argparse
 11 | import facenet
 12 | import detect_face
 13 | import os
 14 | from os.path import join as pjoin
 15 | import sys
 16 | import time
 17 | import copy
 18 | import math
 19 | import pickle
 20 | from sklearn.svm import SVC
 21 | from sklearn.externals import joblib
 22 | 
 23 | print('Creating networks and loading parameters')
 24 | with tf.Graph().as_default():
 25 |     gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.6)
 26 |     sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False))
 27 |     with sess.as_default():
 28 |         pnet, rnet, onet = detect_face.create_mtcnn(sess, './d_npy')
 29 | 
 30 |         minsize = 20  # minimum size of face
 31 |         threshold = [0.6, 0.7, 0.7]  # three steps's threshold
 32 |         factor = 0.709  # scale factor
 33 |         margin = 44
 34 |         frame_interval = 3
 35 |         batch_size = 1000
 36 |         image_size = 182
 37 |         input_image_size = 160
 38 |         
 39 |         HumanNames = os.listdir("./input_dir")
 40 |         HumanNames.sort()
 41 | 
 42 |         print('Loading feature extraction model')
 43 |         modeldir = './pre_model/20170511-185253.pb'
 44 |         facenet.load_model(modeldir)
 45 | 
 46 |         images_placeholder = tf.get_default_graph().get_tensor_by_name("input:0")
 47 |         embeddings = tf.get_default_graph().get_tensor_by_name("embeddings:0")
 48 |         phase_train_placeholder = tf.get_default_graph().get_tensor_by_name("phase_train:0")
 49 |         embedding_size = embeddings.get_shape()[1]
 50 | 
 51 |         classifier_filename = './my_class/my_classifier.pkl'
 52 |         classifier_filename_exp = os.path.expanduser(classifier_filename)
 53 |         with open(classifier_filename_exp, 'rb') as infile:
 54 |             (model, class_names) = pickle.load(infile)
 55 |             print('load classifier file-> %s' % classifier_filename_exp)
 56 | 
 57 |         video_capture = cv2.VideoCapture(0)
 58 |         c = 0
 59 | 
 60 |         # #video writer
 61 |         fourcc = cv2.VideoWriter_fourcc(*'DIVX')
 62 |         out = cv2.VideoWriter('3F_0726.avi', fourcc, fps=30, frameSize=(640,480))
 63 | 
 64 |         print('Start Recognition!')
 65 |         prevTime = 0
 66 |         while True:
 67 |             ret, frame = video_capture.read()
 68 | 
 69 |             frame = cv2.resize(frame, (0,0), fx=0.5, fy=0.5)    #resize frame (optional)
 70 | 
 71 |             curTime = time.time()+1    # calc fps
 72 |             timeF = frame_interval
 73 | 
 74 |             if (c % timeF == 0):
 75 |                 find_results = []
 76 | 
 77 |                 if frame.ndim == 2:
 78 |                     frame = facenet.to_rgb(frame)
 79 |                 frame = frame[:, :, 0:3]
 80 |                 bounding_boxes, _ = detect_face.detect_face(frame, minsize, pnet, rnet, onet, threshold, factor)
 81 |                 nrof_faces = bounding_boxes.shape[0]
 82 |                 print('Detected_FaceNum: %d' % nrof_faces)
 83 | 
 84 |                 if nrof_faces > 0:
 85 |                     det = bounding_boxes[:, 0:4]
 86 |                     img_size = np.asarray(frame.shape)[0:2]
 87 | 
 88 |                     cropped = []
 89 |                     scaled = []
 90 |                     scaled_reshape = []
 91 |                     bb = np.zeros((nrof_faces,4), dtype=np.int32)
 92 | 
 93 |                     for i in range(nrof_faces):
 94 |                         emb_array = np.zeros((1, embedding_size))
 95 | 
 96 |                         bb[i][0] = det[i][0]
 97 |                         bb[i][1] = det[i][1]
 98 |                         bb[i][2] = det[i][2]
 99 |                         bb[i][3] = det[i][3]
100 | 
101 |                         # inner exception
102 |                         if bb[i][0] <= 0 or bb[i][1] <= 0 or bb[i][2] >= len(frame[0]) or bb[i][3] >= len(frame):
103 |                             print('face is inner of range!')
104 |                             continue
105 | 
106 |                         cropped.append(frame[bb[i][1]:bb[i][3], bb[i][0]:bb[i][2], :])
107 |                         cropped[i] = facenet.flip(cropped[i], False)
108 |                         scaled.append(misc.imresize(cropped[i], (image_size, image_size), interp='bilinear'))
109 |                         scaled[i] = cv2.resize(scaled[i], (input_image_size,input_image_size),
110 |                                                interpolation=cv2.INTER_CUBIC)
111 |                         scaled[i] = facenet.prewhiten(scaled[i])
112 |                         scaled_reshape.append(scaled[i].reshape(-1,input_image_size,input_image_size,3))
113 |                         feed_dict = {images_placeholder: scaled_reshape[i], phase_train_placeholder: False}
114 |                         emb_array[0, :] = sess.run(embeddings, feed_dict=feed_dict)
115 |                         predictions = model.predict_proba(emb_array)
116 |                         print(predictions)
117 |                         best_class_indices = np.argmax(predictions, axis=1)
118 |                         print(best_class_indices)
119 |                         best_class_probabilities = predictions[np.arange(len(best_class_indices)), best_class_indices]
120 |                         print(best_class_probabilities)
121 |                         cv2.rectangle(frame, (bb[i][0], bb[i][1]), (bb[i][2], bb[i][3]), (0, 255, 0), 2)    #boxing face
122 | 
123 |                         #plot result idx under box
124 |                         text_x = bb[i][0]
125 |                         text_y = bb[i][3] + 20
126 |                         print('result: ', best_class_indices[0])
127 |                         print(best_class_indices)
128 |                         print(HumanNames)
129 |                         for H_i in HumanNames:
130 |                             print(H_i)
131 |                             if HumanNames[best_class_indices[0]] == H_i:
132 |                                 result_names = HumanNames[best_class_indices[0]]
133 |                                 cv2.putText(frame, result_names, (text_x, text_y), cv2.FONT_HERSHEY_COMPLEX_SMALL,
134 |                                             1, (0, 0, 255), thickness=1, lineType=2)
135 |                 else:
136 |                     print('Unable to align')
137 | 
138 |             sec = curTime - prevTime
139 |             prevTime = curTime
140 |             fps = 1 / (sec)
141 |             str = 'FPS: %2.3f' % fps
142 |             text_fps_x = len(frame[0]) - 150
143 |             text_fps_y = 20
144 |             cv2.putText(frame, str, (text_fps_x, text_fps_y),
145 |                         cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 0), thickness=1, lineType=2)
146 |             # c+=1
147 |             cv2.imshow('Video', frame)
148 | 
149 |             if cv2.waitKey(1) & 0xFF == ord('q'):
150 |                 break
151 | 
152 |         video_capture.release()
153 |         # #video writer
154 |         out.release()
155 |         cv2.destroyAllWindows()
156 | 


--------------------------------------------------------------------------------
/detect_facese_real_time_with_incFrame.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from __future__ import division
  3 | from __future__ import print_function
  4 | 
  5 | import tensorflow as tf
  6 | from scipy import misc
  7 | import cv2
  8 | import matplotlib.pyplot as plt
  9 | import numpy as np
 10 | import argparse
 11 | import facenet
 12 | import detect_face
 13 | import os
 14 | from os.path import join as pjoin
 15 | import sys
 16 | import time
 17 | import copy
 18 | import math
 19 | import pickle
 20 | from sklearn.svm import SVC
 21 | from sklearn.externals import joblib
 22 | 
 23 | print('Creating networks and loading parameters')
 24 | with tf.Graph().as_default():
 25 |     gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.6)
 26 |     sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False))
 27 |     with sess.as_default():
 28 |         pnet, rnet, onet = detect_face.create_mtcnn(sess, './d_npy')
 29 | 
 30 |         minsize = 20  # minimum size of face
 31 |         threshold = [0.6, 0.7, 0.7]  # three steps's threshold
 32 |         factor = 0.709  # scale factor
 33 |         margin = 44
 34 |         frame_interval = 3
 35 |         batch_size = 1000
 36 |         image_size = 182
 37 |         input_image_size = 160
 38 | 
 39 |         HumanNames = os.listdir("./input_dir")
 40 |         HumanNames.sort()
 41 |         print('Loading feature extraction model')
 42 |         modeldir = './pre_model/20170511-185253.pb'
 43 |         facenet.load_model(modeldir)
 44 | 
 45 |         images_placeholder = tf.get_default_graph().get_tensor_by_name("input:0")
 46 |         embeddings = tf.get_default_graph().get_tensor_by_name("embeddings:0")
 47 |         phase_train_placeholder = tf.get_default_graph().get_tensor_by_name("phase_train:0")
 48 |         embedding_size = embeddings.get_shape()[1]
 49 | 
 50 |         classifier_filename = './my_class/my_classifier.pkl'
 51 |         classifier_filename_exp = os.path.expanduser(classifier_filename)
 52 |         with open(classifier_filename_exp, 'rb') as infile:
 53 |             (model, class_names) = pickle.load(infile)
 54 |             print('load classifier file-> %s' % classifier_filename_exp)
 55 | 
 56 |         video_capture = cv2.VideoCapture(0)
 57 |         c = 0
 58 |         counter = 1
 59 |         # #video writer
 60 |         fourcc = cv2.VideoWriter_fourcc(*'DIVX')
 61 |         out = cv2.VideoWriter('3F_0726.avi', fourcc, fps=14, frameSize=(640,480))
 62 | 
 63 |         print('Start Recognition!')
 64 |         prevTime = 0
 65 |         while True:
 66 |             ret, frame = video_capture.read()
 67 |             frame = cv2.resize(frame, (0,0), fx=0.5, fy=0.5)    #resize frame (optional)
 68 | 
 69 |             curTime = time.time()+1    # calc fps
 70 |             timeF = frame_interval
 71 |             counter += 1
 72 |             if (counter % 12 == 0):
 73 |                 if (c % timeF == 0):
 74 |                     find_results = []
 75 | 
 76 |                     if frame.ndim == 2:
 77 |                         frame = facenet.to_rgb(frame)
 78 |                     frame = frame[:, :, 0:3]
 79 |                     bounding_boxes, _ = detect_face.detect_face(frame, minsize, pnet, rnet, onet, threshold, factor)
 80 |                     nrof_faces = bounding_boxes.shape[0]
 81 |                     print('Detected_FaceNum: %d' % nrof_faces)
 82 | 
 83 |                     if nrof_faces > 0:
 84 |                         det = bounding_boxes[:, 0:4]
 85 |                         img_size = np.asarray(frame.shape)[0:2]
 86 | 
 87 |                         cropped = []
 88 |                         scaled = []
 89 |                         scaled_reshape = []
 90 |                         bb = np.zeros((nrof_faces,4), dtype=np.int32)
 91 | 
 92 |                         for i in range(nrof_faces):
 93 |                             emb_array = np.zeros((1, embedding_size))
 94 | 
 95 |                             bb[i][0] = det[i][0]
 96 |                             bb[i][1] = det[i][1]
 97 |                             bb[i][2] = det[i][2]
 98 |                             bb[i][3] = det[i][3]
 99 | 
100 |                             # inner exception
101 |                             if bb[i][0] <= 0 or bb[i][1] <= 0 or bb[i][2] >= len(frame[0]) or bb[i][3] >= len(frame):
102 |                                 print('face is inner of range!')
103 |                                 continue
104 | 
105 |                             cropped.append(frame[bb[i][1]:bb[i][3], bb[i][0]:bb[i][2], :])
106 |                             cropped[i] = facenet.flip(cropped[i], False)
107 |                             scaled.append(misc.imresize(cropped[i], (image_size, image_size), interp='bilinear'))
108 |                             scaled[i] = cv2.resize(scaled[i], (input_image_size,input_image_size),
109 |                                                    interpolation=cv2.INTER_CUBIC)
110 |                             scaled[i] = facenet.prewhiten(scaled[i])
111 |                             scaled_reshape.append(scaled[i].reshape(-1,input_image_size,input_image_size,3))
112 |                             feed_dict = {images_placeholder: scaled_reshape[i], phase_train_placeholder: False}
113 |                             emb_array[0, :] = sess.run(embeddings, feed_dict=feed_dict)
114 |                             predictions = model.predict_proba(emb_array)
115 |                             print(predictions)
116 |                             best_class_indices = np.argmax(predictions, axis=1)
117 |                             print(best_class_indices)
118 |                             best_class_probabilities = predictions[np.arange(len(best_class_indices)), best_class_indices]
119 |                             print(best_class_probabilities)
120 |                             cv2.rectangle(frame, (bb[i][0], bb[i][1]), (bb[i][2], bb[i][3]), (0, 255, 0), 2)    #boxing face
121 | 
122 |                             #plot result idx under box
123 |                             text_x = bb[i][0]
124 |                             text_y = bb[i][3] + 20
125 |                             print('result: ', best_class_indices[0])
126 |                             print(best_class_indices)
127 |                             print(HumanNames)
128 |                             for H_i in HumanNames:
129 |                                 print(H_i)
130 |                                 if HumanNames[best_class_indices[0]] == H_i:
131 |                                     result_names = HumanNames[best_class_indices[0]]
132 |                                     cv2.putText(frame, result_names, (text_x, text_y), cv2.FONT_HERSHEY_COMPLEX_SMALL,
133 |                                                 1, (0, 0, 255), thickness=1, lineType=2)
134 |                     else:
135 |                         print('Unable to align')
136 | 
137 |                 sec = curTime - prevTime
138 |                 prevTime = curTime
139 |                 fps = 1 / (sec)
140 |                 str = 'FPS: %2.3f' % fps
141 |                 text_fps_x = len(frame[0]) - 150
142 |                 text_fps_y = 20
143 |                 cv2.putText(frame, str, (text_fps_x, text_fps_y),
144 |                             cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 0), thickness=1, lineType=2)
145 |                 # c+=1
146 |                 cv2.imshow('Video', frame)
147 | 
148 |                 if cv2.waitKey(1) & 0xFF == ord('q'):
149 |                     break
150 | 
151 |         video_capture.release()
152 |         # #video writer
153 |         out.release()
154 |         cv2.destroyAllWindows()


--------------------------------------------------------------------------------
/facenet.py:
--------------------------------------------------------------------------------
  1 | """Functions for building the face recognition network.
  2 | """
  3 | # MIT License
  4 | # 
  5 | # Copyright (c) 2016 David Sandberg
  6 | # 
  7 | # Permission is hereby granted, free of charge, to any person obtaining a copy
  8 | # of this software and associated documentation files (the "Software"), to deal
  9 | # in the Software without restriction, including without limitation the rights
 10 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 11 | # copies of the Software, and to permit persons to whom the Software is
 12 | # furnished to do so, subject to the following conditions:
 13 | # 
 14 | # The above copyright notice and this permission notice shall be included in all
 15 | # copies or substantial portions of the Software.
 16 | # 
 17 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 18 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 19 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 20 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 21 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 22 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 23 | # SOFTWARE.
 24 | 
 25 | # pylint: disable=missing-docstring
 26 | from __future__ import absolute_import
 27 | from __future__ import division
 28 | from __future__ import print_function
 29 | 
 30 | import os
 31 | from subprocess import Popen, PIPE
 32 | import tensorflow as tf
 33 | from tensorflow.python.framework import ops
 34 | import numpy as np
 35 | from scipy import misc
 36 | from sklearn.model_selection import KFold
 37 | from scipy import interpolate
 38 | from tensorflow.python.training import training
 39 | import random
 40 | import re
 41 | from tensorflow.python.platform import gfile
 42 | 
 43 | def triplet_loss(anchor, positive, negative, alpha):
 44 |     """Calculate the triplet loss according to the FaceNet paper
 45 |     
 46 |     Args:
 47 |       anchor: the embeddings for the anchor images.
 48 |       positive: the embeddings for the positive images.
 49 |       negative: the embeddings for the negative images.
 50 |   
 51 |     Returns:
 52 |       the triplet loss according to the FaceNet paper as a float tensor.
 53 |     """
 54 |     with tf.variable_scope('triplet_loss'):
 55 |         pos_dist = tf.reduce_sum(tf.square(tf.subtract(anchor, positive)), 1)
 56 |         neg_dist = tf.reduce_sum(tf.square(tf.subtract(anchor, negative)), 1)
 57 |         
 58 |         basic_loss = tf.add(tf.subtract(pos_dist,neg_dist), alpha)
 59 |         loss = tf.reduce_mean(tf.maximum(basic_loss, 0.0), 0)
 60 |       
 61 |     return loss
 62 |   
 63 | def decov_loss(xs):
 64 |     """Decov loss as described in https://arxiv.org/pdf/1511.06068.pdf
 65 |     'Reducing Overfitting In Deep Networks by Decorrelating Representation'
 66 |     """
 67 |     x = tf.reshape(xs, [int(xs.get_shape()[0]), -1])
 68 |     m = tf.reduce_mean(x, 0, True)
 69 |     z = tf.expand_dims(x-m, 2)
 70 |     corr = tf.reduce_mean(tf.matmul(z, tf.transpose(z, perm=[0,2,1])), 0)
 71 |     corr_frob_sqr = tf.reduce_sum(tf.square(corr))
 72 |     corr_diag_sqr = tf.reduce_sum(tf.square(tf.diag_part(corr)))
 73 |     loss = 0.5*(corr_frob_sqr - corr_diag_sqr)
 74 |     return loss 
 75 |   
 76 | def center_loss(features, label, alfa, nrof_classes):
 77 |     """Center loss based on the paper "A Discriminative Feature Learning Approach for Deep Face Recognition"
 78 |        (http://ydwen.github.io/papers/WenECCV16.pdf)
 79 |     """
 80 |     nrof_features = features.get_shape()[1]
 81 |     centers = tf.get_variable('centers', [nrof_classes, nrof_features], dtype=tf.float32,
 82 |         initializer=tf.constant_initializer(0), trainable=False)
 83 |     label = tf.reshape(label, [-1])
 84 |     centers_batch = tf.gather(centers, label)
 85 |     diff = (1 - alfa) * (centers_batch - features)
 86 |     centers = tf.scatter_sub(centers, label, diff)
 87 |     loss = tf.reduce_mean(tf.square(features - centers_batch))
 88 |     return loss, centers
 89 | 
 90 | def get_image_paths_and_labels(dataset):
 91 |     image_paths_flat = []
 92 |     labels_flat = []
 93 |     for i in range(len(dataset)):
 94 |         image_paths_flat += dataset[i].image_paths
 95 |         labels_flat += [i] * len(dataset[i].image_paths)
 96 |     return image_paths_flat, labels_flat
 97 | 
 98 | def shuffle_examples(image_paths, labels):
 99 |     shuffle_list = list(zip(image_paths, labels))
100 |     random.shuffle(shuffle_list)
101 |     image_paths_shuff, labels_shuff = zip(*shuffle_list)
102 |     return image_paths_shuff, labels_shuff
103 | 
104 | def read_images_from_disk(input_queue):
105 |     """Consumes a single filename and label as a ' '-delimited string.
106 |     Args:
107 |       filename_and_label_tensor: A scalar string tensor.
108 |     Returns:
109 |       Two tensors: the decoded image, and the string label.
110 |     """
111 |     label = input_queue[1]
112 |     file_contents = tf.read_file(input_queue[0])
113 |     example = tf.image.decode_png(file_contents, channels=3)
114 |     return example, label
115 |   
116 | def random_rotate_image(image):
117 |     angle = np.random.uniform(low=-10.0, high=10.0)
118 |     return misc.imrotate(image, angle, 'bicubic')
119 |   
120 | def read_and_augment_data(image_list, label_list, image_size, batch_size, max_nrof_epochs, 
121 |         random_crop, random_flip, random_rotate, nrof_preprocess_threads, shuffle=True):
122 |     
123 |     images = ops.convert_to_tensor(image_list, dtype=tf.string)
124 |     labels = ops.convert_to_tensor(label_list, dtype=tf.int32)
125 |     
126 |     # Makes an input queue
127 |     input_queue = tf.train.slice_input_producer([images, labels],
128 |         num_epochs=max_nrof_epochs, shuffle=shuffle)
129 | 
130 |     images_and_labels = []
131 |     for _ in range(nrof_preprocess_threads):
132 |         image, label = read_images_from_disk(input_queue)
133 |         if random_rotate:
134 |             image = tf.py_func(random_rotate_image, [image], tf.uint8)
135 |         if random_crop:
136 |             image = tf.random_crop(image, [image_size, image_size, 3])
137 |         else:
138 |             image = tf.image.resize_image_with_crop_or_pad(image, image_size, image_size)
139 |         if random_flip:
140 |             image = tf.image.random_flip_left_right(image)
141 |         #pylint: disable=no-member
142 |         image.set_shape((image_size, image_size, 3))
143 |         image = tf.image.per_image_standardization(image)
144 |         images_and_labels.append([image, label])
145 | 
146 |     image_batch, label_batch = tf.train.batch_join(
147 |         images_and_labels, batch_size=batch_size,
148 |         capacity=4 * nrof_preprocess_threads * batch_size,
149 |         allow_smaller_final_batch=True)
150 |   
151 |     return image_batch, label_batch
152 |   
153 | def _add_loss_summaries(total_loss):
154 |     """Add summaries for losses.
155 |   
156 |     Generates moving average for all losses and associated summaries for
157 |     visualizing the performance of the network.
158 |   
159 |     Args:
160 |       total_loss: Total loss from loss().
161 |     Returns:
162 |       loss_averages_op: op for generating moving averages of losses.
163 |     """
164 |     # Compute the moving average of all individual losses and the total loss.
165 |     loss_averages = tf.train.ExponentialMovingAverage(0.9, name='avg')
166 |     losses = tf.get_collection('losses')
167 |     loss_averages_op = loss_averages.apply(losses + [total_loss])
168 |   
169 |     # Attach a scalar summmary to all individual losses and the total loss; do the
170 |     # same for the averaged version of the losses.
171 |     for l in losses + [total_loss]:
172 |         # Name each loss as '(raw)' and name the moving average version of the loss
173 |         # as the original loss name.
174 |         tf.summary.scalar(l.op.name +' (raw)', l)
175 |         tf.summary.scalar(l.op.name, loss_averages.average(l))
176 |   
177 |     return loss_averages_op
178 | 
179 | def train(total_loss, global_step, optimizer, learning_rate, moving_average_decay, update_gradient_vars, log_histograms=True):
180 |     # Generate moving averages of all losses and associated summaries.
181 |     loss_averages_op = _add_loss_summaries(total_loss)
182 | 
183 |     # Compute gradients.
184 |     with tf.control_dependencies([loss_averages_op]):
185 |         if optimizer=='ADAGRAD':
186 |             opt = tf.train.AdagradOptimizer(learning_rate)
187 |         elif optimizer=='ADADELTA':
188 |             opt = tf.train.AdadeltaOptimizer(learning_rate, rho=0.9, epsilon=1e-6)
189 |         elif optimizer=='ADAM':
190 |             opt = tf.train.AdamOptimizer(learning_rate, beta1=0.9, beta2=0.999, epsilon=0.1)
191 |         elif optimizer=='RMSPROP':
192 |             opt = tf.train.RMSPropOptimizer(learning_rate, decay=0.9, momentum=0.9, epsilon=1.0)
193 |         elif optimizer=='MOM':
194 |             opt = tf.train.MomentumOptimizer(learning_rate, 0.9, use_nesterov=True)
195 |         else:
196 |             raise ValueError('Invalid optimization algorithm')
197 |     
198 |         grads = opt.compute_gradients(total_loss, update_gradient_vars)
199 |         
200 |     # Apply gradients.
201 |     apply_gradient_op = opt.apply_gradients(grads, global_step=global_step)
202 |   
203 |     # Add histograms for trainable variables.
204 |     if log_histograms:
205 |         for var in tf.trainable_variables():
206 |             tf.summary.histogram(var.op.name, var)
207 |    
208 |     # Add histograms for gradients.
209 |     if log_histograms:
210 |         for grad, var in grads:
211 |             if grad is not None:
212 |                 tf.summary.histogram(var.op.name + '/gradients', grad)
213 |   
214 |     # Track the moving averages of all trainable variables.
215 |     variable_averages = tf.train.ExponentialMovingAverage(
216 |         moving_average_decay, global_step)
217 |     variables_averages_op = variable_averages.apply(tf.trainable_variables())
218 |   
219 |     with tf.control_dependencies([apply_gradient_op, variables_averages_op]):
220 |         train_op = tf.no_op(name='train')
221 |   
222 |     return train_op
223 | 
224 | def prewhiten(x):
225 |     mean = np.mean(x)
226 |     std = np.std(x)
227 |     std_adj = np.maximum(std, 1.0/np.sqrt(x.size))
228 |     y = np.multiply(np.subtract(x, mean), 1/std_adj)
229 |     return y  
230 | 
231 | def crop(image, random_crop, image_size):
232 |     if image.shape[1]>image_size:
233 |         sz1 = int(image.shape[1]//2)
234 |         sz2 = int(image_size//2)
235 |         if random_crop:
236 |             diff = sz1-sz2
237 |             (h, v) = (np.random.randint(-diff, diff+1), np.random.randint(-diff, diff+1))
238 |         else:
239 |             (h, v) = (0,0)
240 |         image = image[(sz1-sz2+v):(sz1+sz2+v),(sz1-sz2+h):(sz1+sz2+h),:]
241 |     return image
242 |   
243 | def flip(image, random_flip):
244 |     if random_flip and np.random.choice([True, False]):
245 |         image = np.fliplr(image)
246 |     return image
247 | 
248 | def to_rgb(img):
249 |     w, h = img.shape
250 |     ret = np.empty((w, h, 3), dtype=np.uint8)
251 |     ret[:, :, 0] = ret[:, :, 1] = ret[:, :, 2] = img
252 |     return ret
253 |   
254 | def load_data(image_paths, do_random_crop, do_random_flip, image_size, do_prewhiten=True):
255 |     nrof_samples = len(image_paths)
256 |     images = np.zeros((nrof_samples, image_size, image_size, 3))
257 |     for i in range(nrof_samples):
258 |         img = misc.imread(image_paths[i])
259 |         if img.ndim == 2:
260 |             img = to_rgb(img)
261 |         if do_prewhiten:
262 |             img = prewhiten(img)
263 |         img = crop(img, do_random_crop, image_size)
264 |         img = flip(img, do_random_flip)
265 |         images[i,:,:,:] = img
266 |     return images
267 | 
268 | def get_label_batch(label_data, batch_size, batch_index):
269 |     nrof_examples = np.size(label_data, 0)
270 |     j = batch_index*batch_size % nrof_examples
271 |     if j+batch_size<=nrof_examples:
272 |         batch = label_data[j:j+batch_size]
273 |     else:
274 |         x1 = label_data[j:nrof_examples]
275 |         x2 = label_data[0:nrof_examples-j]
276 |         batch = np.vstack([x1,x2])
277 |     batch_int = batch.astype(np.int64)
278 |     return batch_int
279 | 
280 | def get_batch(image_data, batch_size, batch_index):
281 |     nrof_examples = np.size(image_data, 0)
282 |     j = batch_index*batch_size % nrof_examples
283 |     if j+batch_size<=nrof_examples:
284 |         batch = image_data[j:j+batch_size,:,:,:]
285 |     else:
286 |         x1 = image_data[j:nrof_examples,:,:,:]
287 |         x2 = image_data[0:nrof_examples-j,:,:,:]
288 |         batch = np.vstack([x1,x2])
289 |     batch_float = batch.astype(np.float32)
290 |     return batch_float
291 | 
292 | def get_triplet_batch(triplets, batch_index, batch_size):
293 |     ax, px, nx = triplets
294 |     a = get_batch(ax, int(batch_size/3), batch_index)
295 |     p = get_batch(px, int(batch_size/3), batch_index)
296 |     n = get_batch(nx, int(batch_size/3), batch_index)
297 |     batch = np.vstack([a, p, n])
298 |     return batch
299 | 
300 | def get_learning_rate_from_file(filename, epoch):
301 |     with open(filename, 'r') as f:
302 |         for line in f.readlines():
303 |             line = line.split('#', 1)[0]
304 |             if line:
305 |                 par = line.strip().split(':')
306 |                 e = int(par[0])
307 |                 lr = float(par[1])
308 |                 if e <= epoch:
309 |                     learning_rate = lr
310 |                 else:
311 |                     return learning_rate
312 | 
313 | class ImageClass():
314 |     "Stores the paths to images for a given class"
315 |     def __init__(self, name, image_paths):
316 |         self.name = name
317 |         self.image_paths = image_paths
318 |   
319 |     def __str__(self):
320 |         return self.name + ', ' + str(len(self.image_paths)) + ' images'
321 |   
322 |     def __len__(self):
323 |         return len(self.image_paths)
324 |   
325 | def get_dataset(paths, has_class_directories=True):
326 |     dataset = []
327 |     for path in paths.split(':'):
328 |         path_exp = os.path.expanduser(path)
329 |         classes = os.listdir(path_exp)
330 |         classes.sort()
331 |         nrof_classes = len(classes)
332 |         for i in range(nrof_classes):
333 |             class_name = classes[i]
334 |             facedir = os.path.join(path_exp, class_name)
335 |             image_paths = get_image_paths(facedir)
336 |             dataset.append(ImageClass(class_name, image_paths))
337 |   
338 |     return dataset
339 | 
340 | def get_image_paths(facedir):
341 |     image_paths = []
342 |     if os.path.isdir(facedir):
343 |         images = os.listdir(facedir)
344 |         image_paths = [os.path.join(facedir,img) for img in images]
345 |     return image_paths
346 |   
347 | def split_dataset(dataset, split_ratio, mode):
348 |     if mode=='SPLIT_CLASSES':
349 |         nrof_classes = len(dataset)
350 |         class_indices = np.arange(nrof_classes)
351 |         np.random.shuffle(class_indices)
352 |         split = int(round(nrof_classes*split_ratio))
353 |         train_set = [dataset[i] for i in class_indices[0:split]]
354 |         test_set = [dataset[i] for i in class_indices[split:-1]]
355 |     elif mode=='SPLIT_IMAGES':
356 |         train_set = []
357 |         test_set = []
358 |         min_nrof_images = 2
359 |         for cls in dataset:
360 |             paths = cls.image_paths
361 |             np.random.shuffle(paths)
362 |             split = int(round(len(paths)*split_ratio))
363 |             if split<min_nrof_images:
364 |                 continue  # Not enough images for test set. Skip class...
365 |             train_set.append(ImageClass(cls.name, paths[0:split]))
366 |             test_set.append(ImageClass(cls.name, paths[split:-1]))
367 |     else:
368 |         raise ValueError('Invalid train/test split mode "%s"' % mode)
369 |     return train_set, test_set
370 | 
371 | def load_model(model):
372 |     # Check if the model is a model directory (containing a metagraph and a checkpoint file)
373 |     #  or if it is a protobuf file with a frozen graph
374 |     model_exp = os.path.expanduser(model)
375 |     if (os.path.isfile(model_exp)):
376 |         print('Model filename: %s' % model_exp)
377 |         with gfile.FastGFile(model_exp,'rb') as f:
378 |             graph_def = tf.GraphDef()
379 |             graph_def.ParseFromString(f.read())
380 |             tf.import_graph_def(graph_def, name='')
381 |     else:
382 |         print('Model directory: %s' % model_exp)
383 |         meta_file, ckpt_file = get_model_filenames(model_exp)
384 |         
385 |         print('Metagraph file: %s' % meta_file)
386 |         print('Checkpoint file: %s' % ckpt_file)
387 |       
388 |         saver = tf.train.import_meta_graph(os.path.join(model_exp, meta_file))
389 |         saver.restore(tf.get_default_session(), os.path.join(model_exp, ckpt_file))
390 |     
391 | def get_model_filenames(model_dir):
392 |     files = os.listdir(model_dir)
393 |     meta_files = [s for s in files if s.endswith('.meta')]
394 |     if len(meta_files)==0:
395 |         raise ValueError('No meta file found in the model directory (%s)' % model_dir)
396 |     elif len(meta_files)>1:
397 |         raise ValueError('There should not be more than one meta file in the model directory (%s)' % model_dir)
398 |     meta_file = meta_files[0]
399 |     meta_files = [s for s in files if '.ckpt' in s]
400 |     max_step = -1
401 |     for f in files:
402 |         step_str = re.match(r'(^model-[\w\- ]+.ckpt-(\d+))', f)
403 |         if step_str is not None and len(step_str.groups())>=2:
404 |             step = int(step_str.groups()[1])
405 |             if step > max_step:
406 |                 max_step = step
407 |                 ckpt_file = step_str.groups()[0]
408 |     return meta_file, ckpt_file
409 | 
410 | def calculate_roc(thresholds, embeddings1, embeddings2, actual_issame, nrof_folds=10):
411 |     assert(embeddings1.shape[0] == embeddings2.shape[0])
412 |     assert(embeddings1.shape[1] == embeddings2.shape[1])
413 |     nrof_pairs = min(len(actual_issame), embeddings1.shape[0])
414 |     nrof_thresholds = len(thresholds)
415 |     k_fold = KFold(n_splits=nrof_folds, shuffle=False)
416 |     
417 |     tprs = np.zeros((nrof_folds,nrof_thresholds))
418 |     fprs = np.zeros((nrof_folds,nrof_thresholds))
419 |     accuracy = np.zeros((nrof_folds))
420 |     
421 |     diff = np.subtract(embeddings1, embeddings2)
422 |     dist = np.sum(np.square(diff),1)
423 |     indices = np.arange(nrof_pairs)
424 |     
425 |     for fold_idx, (train_set, test_set) in enumerate(k_fold.split(indices)):
426 |         
427 |         # Find the best threshold for the fold
428 |         acc_train = np.zeros((nrof_thresholds))
429 |         for threshold_idx, threshold in enumerate(thresholds):
430 |             _, _, acc_train[threshold_idx] = calculate_accuracy(threshold, dist[train_set], actual_issame[train_set])
431 |         best_threshold_index = np.argmax(acc_train)
432 |         for threshold_idx, threshold in enumerate(thresholds):
433 |             tprs[fold_idx,threshold_idx], fprs[fold_idx,threshold_idx], _ = calculate_accuracy(threshold, dist[test_set], actual_issame[test_set])
434 |         _, _, accuracy[fold_idx] = calculate_accuracy(thresholds[best_threshold_index], dist[test_set], actual_issame[test_set])
435 |           
436 |         tpr = np.mean(tprs,0)
437 |         fpr = np.mean(fprs,0)
438 |     return tpr, fpr, accuracy
439 | 
440 | def calculate_accuracy(threshold, dist, actual_issame):
441 |     predict_issame = np.less(dist, threshold)
442 |     tp = np.sum(np.logical_and(predict_issame, actual_issame))
443 |     fp = np.sum(np.logical_and(predict_issame, np.logical_not(actual_issame)))
444 |     tn = np.sum(np.logical_and(np.logical_not(predict_issame), np.logical_not(actual_issame)))
445 |     fn = np.sum(np.logical_and(np.logical_not(predict_issame), actual_issame))
446 |   
447 |     tpr = 0 if (tp+fn==0) else float(tp) / float(tp+fn)
448 |     fpr = 0 if (fp+tn==0) else float(fp) / float(fp+tn)
449 |     acc = float(tp+tn)/dist.size
450 |     return tpr, fpr, acc
451 | 
452 | 
453 |   
454 | def calculate_val(thresholds, embeddings1, embeddings2, actual_issame, far_target, nrof_folds=10):
455 |     assert(embeddings1.shape[0] == embeddings2.shape[0])
456 |     assert(embeddings1.shape[1] == embeddings2.shape[1])
457 |     nrof_pairs = min(len(actual_issame), embeddings1.shape[0])
458 |     nrof_thresholds = len(thresholds)
459 |     k_fold = KFold(n_splits=nrof_folds, shuffle=False)
460 |     
461 |     val = np.zeros(nrof_folds)
462 |     far = np.zeros(nrof_folds)
463 |     
464 |     diff = np.subtract(embeddings1, embeddings2)
465 |     dist = np.sum(np.square(diff),1)
466 |     indices = np.arange(nrof_pairs)
467 |     
468 |     for fold_idx, (train_set, test_set) in enumerate(k_fold.split(indices)):
469 |       
470 |         # Find the threshold that gives FAR = far_target
471 |         far_train = np.zeros(nrof_thresholds)
472 |         for threshold_idx, threshold in enumerate(thresholds):
473 |             _, far_train[threshold_idx] = calculate_val_far(threshold, dist[train_set], actual_issame[train_set])
474 |         if np.max(far_train)>=far_target:
475 |             f = interpolate.interp1d(far_train, thresholds, kind='slinear')
476 |             threshold = f(far_target)
477 |         else:
478 |             threshold = 0.0
479 |     
480 |         val[fold_idx], far[fold_idx] = calculate_val_far(threshold, dist[test_set], actual_issame[test_set])
481 |   
482 |     val_mean = np.mean(val)
483 |     far_mean = np.mean(far)
484 |     val_std = np.std(val)
485 |     return val_mean, val_std, far_mean
486 | 
487 | 
488 | def calculate_val_far(threshold, dist, actual_issame):
489 |     predict_issame = np.less(dist, threshold)
490 |     true_accept = np.sum(np.logical_and(predict_issame, actual_issame))
491 |     false_accept = np.sum(np.logical_and(predict_issame, np.logical_not(actual_issame)))
492 |     n_same = np.sum(actual_issame)
493 |     n_diff = np.sum(np.logical_not(actual_issame))
494 |     val = float(true_accept) / float(n_same)
495 |     far = float(false_accept) / float(n_diff)
496 |     return val, far
497 | 
498 | def store_revision_info(src_path, output_dir, arg_string):
499 |   
500 |     # Get git hash
501 |     gitproc = Popen(['git', 'rev-parse', 'HEAD'], stdout = PIPE, cwd=src_path)
502 |     (stdout, _) = gitproc.communicate()
503 |     git_hash = stdout.strip()
504 |   
505 |     # Get local changes
506 |     gitproc = Popen(['git', 'diff', 'HEAD'], stdout = PIPE, cwd=src_path)
507 |     (stdout, _) = gitproc.communicate()
508 |     git_diff = stdout.strip()
509 |     
510 |     # Store a text file in the log directory
511 |     rev_info_filename = os.path.join(output_dir, 'revision_info.txt')
512 |     with open(rev_info_filename, "w") as text_file:
513 |         text_file.write('arguments: %s\n--------------------\n' % arg_string)
514 |         text_file.write('git hash: %s\n--------------------\n' % git_hash)
515 |         text_file.write('%s' % git_diff)
516 | 
517 | def list_variables(filename):
518 |     reader = training.NewCheckpointReader(filename)
519 |     variable_map = reader.get_variable_to_shape_map()
520 |     names = sorted(variable_map.keys())
521 |     return names
522 | 
523 | def put_images_on_grid(images, shape=(16,8)):
524 |     nrof_images = images.shape[0]
525 |     img_size = images.shape[1]
526 |     bw = 3
527 |     img = np.zeros((shape[1]*(img_size+bw)+bw, shape[0]*(img_size+bw)+bw, 3), np.float32)
528 |     for i in range(shape[1]):
529 |         x_start = i*(img_size+bw)+bw
530 |         for j in range(shape[0]):
531 |             img_index = i*shape[0]+j
532 |             if img_index>=nrof_images:
533 |                 break
534 |             y_start = j*(img_size+bw)+bw
535 |             img[x_start:x_start+img_size, y_start:y_start+img_size, :] = images[img_index, :, :, :]
536 |         if img_index>=nrof_images:
537 |             break
538 |     return img
539 | 
540 | def write_arguments_to_file(args, filename):
541 |     with open(filename, 'w') as f:
542 |         for key, value in vars(args).iteritems():
543 |             f.write('%s: %s\n' % (key, str(value)))
544 | 


--------------------------------------------------------------------------------
/detect_face.py:
--------------------------------------------------------------------------------
  1 | """ Tensorflow implementation of the face detection / alignment algorithm found at
  2 | https://github.com/kpzhang93/MTCNN_face_detection_alignment
  3 | """
  4 | # MIT License
  5 | # 
  6 | # Copyright (c) 2016 David Sandberg
  7 | # 
  8 | # Permission is hereby granted, free of charge, to any person obtaining a copy
  9 | # of this software and associated documentation files (the "Software"), to deal
 10 | # in the Software without restriction, including without limitation the rights
 11 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 12 | # copies of the Software, and to permit persons to whom the Software is
 13 | # furnished to do so, subject to the following conditions:
 14 | # 
 15 | # The above copyright notice and this permission notice shall be included in all
 16 | # copies or substantial portions of the Software.
 17 | # 
 18 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 19 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 20 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 21 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 22 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 23 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 24 | # SOFTWARE.
 25 | 
 26 | from __future__ import absolute_import
 27 | from __future__ import division
 28 | from __future__ import print_function
 29 | from six import string_types, iteritems
 30 | 
 31 | import numpy as np
 32 | import tensorflow as tf
 33 | #from math import floor
 34 | import cv2
 35 | import os
 36 | 
 37 | def layer(op):
 38 |     '''Decorator for composable network layers.'''
 39 | 
 40 |     def layer_decorated(self, *args, **kwargs):
 41 |         # Automatically set a name if not provided.
 42 |         name = kwargs.setdefault('name', self.get_unique_name(op.__name__))
 43 |         # Figure out the layer inputs.
 44 |         if len(self.terminals) == 0:
 45 |             raise RuntimeError('No input variables found for layer %s.' % name)
 46 |         elif len(self.terminals) == 1:
 47 |             layer_input = self.terminals[0]
 48 |         else:
 49 |             layer_input = list(self.terminals)
 50 |         # Perform the operation and get the output.
 51 |         layer_output = op(self, layer_input, *args, **kwargs)
 52 |         # Add to layer LUT.
 53 |         self.layers[name] = layer_output
 54 |         # This output is now the input for the next layer.
 55 |         self.feed(layer_output)
 56 |         # Return self for chained calls.
 57 |         return self
 58 | 
 59 |     return layer_decorated
 60 | 
 61 | class Network(object):
 62 | 
 63 |     def __init__(self, inputs, trainable=True):
 64 |         # The input nodes for this network
 65 |         self.inputs = inputs
 66 |         # The current list of terminal nodes
 67 |         self.terminals = []
 68 |         # Mapping from layer names to layers
 69 |         self.layers = dict(inputs)
 70 |         # If true, the resulting variables are set as trainable
 71 |         self.trainable = trainable
 72 | 
 73 |         self.setup()
 74 | 
 75 |     def setup(self):
 76 |         '''Construct the network. '''
 77 |         raise NotImplementedError('Must be implemented by the subclass.')
 78 | 
 79 |     def load(self, data_path, session, ignore_missing=False):
 80 |         '''Load network weights.
 81 |         data_path: The path to the numpy-serialized network weights
 82 |         session: The current TensorFlow session
 83 |         ignore_missing: If true, serialized weights for missing layers are ignored.
 84 |         '''
 85 |         data_dict = np.load(data_path, encoding='latin1').item() #pylint: disable=no-member
 86 | 
 87 |         for op_name in data_dict:
 88 |             with tf.variable_scope(op_name, reuse=True):
 89 |                 for param_name, data in iteritems(data_dict[op_name]):
 90 |                     try:
 91 |                         var = tf.get_variable(param_name)
 92 |                         session.run(var.assign(data))
 93 |                     except ValueError:
 94 |                         if not ignore_missing:
 95 |                             raise
 96 | 
 97 |     def feed(self, *args):
 98 |         '''Set the input(s) for the next operation by replacing the terminal nodes.
 99 |         The arguments can be either layer names or the actual layers.
100 |         '''
101 |         assert len(args) != 0
102 |         self.terminals = []
103 |         for fed_layer in args:
104 |             if isinstance(fed_layer, string_types):
105 |                 try:
106 |                     fed_layer = self.layers[fed_layer]
107 |                 except KeyError:
108 |                     raise KeyError('Unknown layer name fed: %s' % fed_layer)
109 |             self.terminals.append(fed_layer)
110 |         return self
111 | 
112 |     def get_output(self):
113 |         '''Returns the current network output.'''
114 |         return self.terminals[-1]
115 | 
116 |     def get_unique_name(self, prefix):
117 |         '''Returns an index-suffixed unique name for the given prefix.
118 |         This is used for auto-generating layer names based on the type-prefix.
119 |         '''
120 |         ident = sum(t.startswith(prefix) for t, _ in self.layers.items()) + 1
121 |         return '%s_%d' % (prefix, ident)
122 | 
123 |     def make_var(self, name, shape):
124 |         '''Creates a new TensorFlow variable.'''
125 |         return tf.get_variable(name, shape, trainable=self.trainable)
126 | 
127 |     def validate_padding(self, padding):
128 |         '''Verifies that the padding is one of the supported ones.'''
129 |         assert padding in ('SAME', 'VALID')
130 | 
131 |     @layer
132 |     def conv(self,
133 |              inp,
134 |              k_h,
135 |              k_w,
136 |              c_o,
137 |              s_h,
138 |              s_w,
139 |              name,
140 |              relu=True,
141 |              padding='SAME',
142 |              group=1,
143 |              biased=True):
144 |         # Verify that the padding is acceptable
145 |         self.validate_padding(padding)
146 |         # Get the number of channels in the input
147 |         c_i = int(inp.get_shape()[-1])
148 |         # Verify that the grouping parameter is valid
149 |         assert c_i % group == 0
150 |         assert c_o % group == 0
151 |         # Convolution for a given input and kernel
152 |         convolve = lambda i, k: tf.nn.conv2d(i, k, [1, s_h, s_w, 1], padding=padding)
153 |         with tf.variable_scope(name) as scope:
154 |             kernel = self.make_var('weights', shape=[k_h, k_w, c_i // group, c_o])
155 |             # This is the common-case. Convolve the input without any further complications.
156 |             output = convolve(inp, kernel)
157 |             # Add the biases
158 |             if biased:
159 |                 biases = self.make_var('biases', [c_o])
160 |                 output = tf.nn.bias_add(output, biases)
161 |             if relu:
162 |                 # ReLU non-linearity
163 |                 output = tf.nn.relu(output, name=scope.name)
164 |             return output
165 | 
166 |     @layer
167 |     def prelu(self, inp, name):
168 |         with tf.variable_scope(name):
169 |             i = int(inp.get_shape()[-1])
170 |             alpha = self.make_var('alpha', shape=(i,))
171 |             output = tf.nn.relu(inp) + tf.multiply(alpha, -tf.nn.relu(-inp))
172 |         return output
173 | 
174 |     @layer
175 |     def max_pool(self, inp, k_h, k_w, s_h, s_w, name, padding='SAME'):
176 |         self.validate_padding(padding)
177 |         return tf.nn.max_pool(inp,
178 |                               ksize=[1, k_h, k_w, 1],
179 |                               strides=[1, s_h, s_w, 1],
180 |                               padding=padding,
181 |                               name=name)
182 | 
183 |     @layer
184 |     def fc(self, inp, num_out, name, relu=True):
185 |         with tf.variable_scope(name):
186 |             input_shape = inp.get_shape()
187 |             if input_shape.ndims == 4:
188 |                 # The input is spatial. Vectorize it first.
189 |                 dim = 1
190 |                 for d in input_shape[1:].as_list():
191 |                     dim *= int(d)
192 |                 feed_in = tf.reshape(inp, [-1, dim])
193 |             else:
194 |                 feed_in, dim = (inp, input_shape[-1].value)
195 |             weights = self.make_var('weights', shape=[dim, num_out])
196 |             biases = self.make_var('biases', [num_out])
197 |             op = tf.nn.relu_layer if relu else tf.nn.xw_plus_b
198 |             fc = op(feed_in, weights, biases, name=name)
199 |             return fc
200 | 
201 | 
202 |     """
203 |     Multi dimensional softmax,
204 |     refer to https://github.com/tensorflow/tensorflow/issues/210
205 |     compute softmax along the dimension of target
206 |     the native softmax only supports batch_size x dimension
207 |     """
208 |     @layer
209 |     def softmax(self, target, axis, name=None):
210 |         max_axis = tf.reduce_max(target, axis, keep_dims=True)
211 |         target_exp = tf.exp(target-max_axis)
212 |         normalize = tf.reduce_sum(target_exp, axis, keep_dims=True)
213 |         softmax = tf.div(target_exp, normalize, name)
214 |         return softmax
215 |     
216 | class PNet(Network):
217 |     def setup(self):
218 |         (self.feed('data') #pylint: disable=no-value-for-parameter, no-member
219 |              .conv(3, 3, 10, 1, 1, padding='VALID', relu=False, name='conv1')
220 |              .prelu(name='PReLU1')
221 |              .max_pool(2, 2, 2, 2, name='pool1')
222 |              .conv(3, 3, 16, 1, 1, padding='VALID', relu=False, name='conv2')
223 |              .prelu(name='PReLU2')
224 |              .conv(3, 3, 32, 1, 1, padding='VALID', relu=False, name='conv3')
225 |              .prelu(name='PReLU3')
226 |              .conv(1, 1, 2, 1, 1, relu=False, name='conv4-1')
227 |              .softmax(3,name='prob1'))
228 | 
229 |         (self.feed('PReLU3') #pylint: disable=no-value-for-parameter
230 |              .conv(1, 1, 4, 1, 1, relu=False, name='conv4-2'))
231 |         
232 | class RNet(Network):
233 |     def setup(self):
234 |         (self.feed('data') #pylint: disable=no-value-for-parameter, no-member
235 |              .conv(3, 3, 28, 1, 1, padding='VALID', relu=False, name='conv1')
236 |              .prelu(name='prelu1')
237 |              .max_pool(3, 3, 2, 2, name='pool1')
238 |              .conv(3, 3, 48, 1, 1, padding='VALID', relu=False, name='conv2')
239 |              .prelu(name='prelu2')
240 |              .max_pool(3, 3, 2, 2, padding='VALID', name='pool2')
241 |              .conv(2, 2, 64, 1, 1, padding='VALID', relu=False, name='conv3')
242 |              .prelu(name='prelu3')
243 |              .fc(128, relu=False, name='conv4')
244 |              .prelu(name='prelu4')
245 |              .fc(2, relu=False, name='conv5-1')
246 |              .softmax(1,name='prob1'))
247 | 
248 |         (self.feed('prelu4') #pylint: disable=no-value-for-parameter
249 |              .fc(4, relu=False, name='conv5-2'))
250 | 
251 | class ONet(Network):
252 |     def setup(self):
253 |         (self.feed('data') #pylint: disable=no-value-for-parameter, no-member
254 |              .conv(3, 3, 32, 1, 1, padding='VALID', relu=False, name='conv1')
255 |              .prelu(name='prelu1')
256 |              .max_pool(3, 3, 2, 2, name='pool1')
257 |              .conv(3, 3, 64, 1, 1, padding='VALID', relu=False, name='conv2')
258 |              .prelu(name='prelu2')
259 |              .max_pool(3, 3, 2, 2, padding='VALID', name='pool2')
260 |              .conv(3, 3, 64, 1, 1, padding='VALID', relu=False, name='conv3')
261 |              .prelu(name='prelu3')
262 |              .max_pool(2, 2, 2, 2, name='pool3')
263 |              .conv(2, 2, 128, 1, 1, padding='VALID', relu=False, name='conv4')
264 |              .prelu(name='prelu4')
265 |              .fc(256, relu=False, name='conv5')
266 |              .prelu(name='prelu5')
267 |              .fc(2, relu=False, name='conv6-1')
268 |              .softmax(1, name='prob1'))
269 | 
270 |         (self.feed('prelu5') #pylint: disable=no-value-for-parameter
271 |              .fc(4, relu=False, name='conv6-2'))
272 | 
273 |         (self.feed('prelu5') #pylint: disable=no-value-for-parameter
274 |              .fc(10, relu=False, name='conv6-3'))
275 | 
276 | def create_mtcnn(sess, model_path):
277 |     if not model_path:
278 |         model_path,_ = os.path.split(os.path.realpath(__file__))
279 | 
280 |     with tf.variable_scope('pnet'):
281 |         data = tf.placeholder(tf.float32, (None,None,None,3), 'input')
282 |         pnet = PNet({'data':data})
283 |         pnet.load(os.path.join(model_path, 'det1.npy'), sess)
284 |     with tf.variable_scope('rnet'):
285 |         data = tf.placeholder(tf.float32, (None,24,24,3), 'input')
286 |         rnet = RNet({'data':data})
287 |         rnet.load(os.path.join(model_path, 'det2.npy'), sess)
288 |     with tf.variable_scope('onet'):
289 |         data = tf.placeholder(tf.float32, (None,48,48,3), 'input')
290 |         onet = ONet({'data':data})
291 |         onet.load(os.path.join(model_path, 'det3.npy'), sess)
292 |         
293 |     pnet_fun = lambda img : sess.run(('pnet/conv4-2/BiasAdd:0', 'pnet/prob1:0'), feed_dict={'pnet/input:0':img})
294 |     rnet_fun = lambda img : sess.run(('rnet/conv5-2/conv5-2:0', 'rnet/prob1:0'), feed_dict={'rnet/input:0':img})
295 |     onet_fun = lambda img : sess.run(('onet/conv6-2/conv6-2:0', 'onet/conv6-3/conv6-3:0', 'onet/prob1:0'), feed_dict={'onet/input:0':img})
296 |     return pnet_fun, rnet_fun, onet_fun
297 | 
298 | def detect_face(img, minsize, pnet, rnet, onet, threshold, factor):
299 |     # im: input image
300 |     # minsize: minimum of faces' size
301 |     # pnet, rnet, onet: caffemodel
302 |     # threshold: threshold=[th1 th2 th3], th1-3 are three steps's threshold
303 |     # fastresize: resize img from last scale (using in high-resolution images) if fastresize==true
304 |     factor_count=0
305 |     total_boxes=np.empty((0,9))
306 |     points=np.empty(0)
307 |     h=img.shape[0]
308 |     w=img.shape[1]
309 |     minl=np.amin([h, w])
310 |     m=12.0/minsize
311 |     minl=minl*m
312 |     # creat scale pyramid
313 |     scales=[]
314 |     while minl>=12:
315 |         scales += [m*np.power(factor, factor_count)]
316 |         minl = minl*factor
317 |         factor_count += 1
318 | 
319 |     # first stage
320 |     for j in range(len(scales)):
321 |         scale=scales[j]
322 |         hs=int(np.ceil(h*scale))
323 |         ws=int(np.ceil(w*scale))
324 |         im_data = imresample(img, (hs, ws))
325 |         im_data = (im_data-127.5)*0.0078125
326 |         img_x = np.expand_dims(im_data, 0)
327 |         img_y = np.transpose(img_x, (0,2,1,3))
328 |         out = pnet(img_y)
329 |         out0 = np.transpose(out[0], (0,2,1,3))
330 |         out1 = np.transpose(out[1], (0,2,1,3))
331 |         
332 |         boxes, _ = generateBoundingBox(out1[0,:,:,1].copy(), out0[0,:,:,:].copy(), scale, threshold[0])
333 |         
334 |         # inter-scale nms
335 |         pick = nms(boxes.copy(), 0.5, 'Union')
336 |         if boxes.size>0 and pick.size>0:
337 |             boxes = boxes[pick,:]
338 |             total_boxes = np.append(total_boxes, boxes, axis=0)
339 | 
340 |     numbox = total_boxes.shape[0]
341 |     if numbox>0:
342 |         pick = nms(total_boxes.copy(), 0.7, 'Union')
343 |         total_boxes = total_boxes[pick,:]
344 |         regw = total_boxes[:,2]-total_boxes[:,0]
345 |         regh = total_boxes[:,3]-total_boxes[:,1]
346 |         qq1 = total_boxes[:,0]+total_boxes[:,5]*regw
347 |         qq2 = total_boxes[:,1]+total_boxes[:,6]*regh
348 |         qq3 = total_boxes[:,2]+total_boxes[:,7]*regw
349 |         qq4 = total_boxes[:,3]+total_boxes[:,8]*regh
350 |         total_boxes = np.transpose(np.vstack([qq1, qq2, qq3, qq4, total_boxes[:,4]]))
351 |         total_boxes = rerec(total_boxes.copy())
352 |         total_boxes[:,0:4] = np.fix(total_boxes[:,0:4]).astype(np.int32)
353 |         dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph = pad(total_boxes.copy(), w, h)
354 | 
355 |     numbox = total_boxes.shape[0]
356 |     if numbox>0:
357 |         # second stage
358 |         tempimg = np.zeros((24,24,3,numbox))
359 |         for k in range(0,numbox):
360 |             tmp = np.zeros((int(tmph[k]),int(tmpw[k]),3))
361 |             tmp[dy[k]-1:edy[k],dx[k]-1:edx[k],:] = img[y[k]-1:ey[k],x[k]-1:ex[k],:]
362 |             if tmp.shape[0]>0 and tmp.shape[1]>0 or tmp.shape[0]==0 and tmp.shape[1]==0:
363 |                 tempimg[:,:,:,k] = imresample(tmp, (24, 24))
364 |             else:
365 |                 return np.empty()
366 |         tempimg = (tempimg-127.5)*0.0078125
367 |         tempimg1 = np.transpose(tempimg, (3,1,0,2))
368 |         out = rnet(tempimg1)
369 |         out0 = np.transpose(out[0])
370 |         out1 = np.transpose(out[1])
371 |         score = out1[1,:]
372 |         ipass = np.where(score>threshold[1])
373 |         total_boxes = np.hstack([total_boxes[ipass[0],0:4].copy(), np.expand_dims(score[ipass].copy(),1)])
374 |         mv = out0[:,ipass[0]]
375 |         if total_boxes.shape[0]>0:
376 |             pick = nms(total_boxes, 0.7, 'Union')
377 |             total_boxes = total_boxes[pick,:]
378 |             total_boxes = bbreg(total_boxes.copy(), np.transpose(mv[:,pick]))
379 |             total_boxes = rerec(total_boxes.copy())
380 | 
381 |     numbox = total_boxes.shape[0]
382 |     if numbox>0:
383 |         # third stage
384 |         total_boxes = np.fix(total_boxes).astype(np.int32)
385 |         dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph = pad(total_boxes.copy(), w, h)
386 |         tempimg = np.zeros((48,48,3,numbox))
387 |         for k in range(0,numbox):
388 |             tmp = np.zeros((int(tmph[k]),int(tmpw[k]),3))
389 |             tmp[dy[k]-1:edy[k],dx[k]-1:edx[k],:] = img[y[k]-1:ey[k],x[k]-1:ex[k],:]
390 |             if tmp.shape[0]>0 and tmp.shape[1]>0 or tmp.shape[0]==0 and tmp.shape[1]==0:
391 |                 tempimg[:,:,:,k] = imresample(tmp, (48, 48))
392 |             else:
393 |                 return np.empty()
394 |         tempimg = (tempimg-127.5)*0.0078125
395 |         tempimg1 = np.transpose(tempimg, (3,1,0,2))
396 |         out = onet(tempimg1)
397 |         out0 = np.transpose(out[0])
398 |         out1 = np.transpose(out[1])
399 |         out2 = np.transpose(out[2])
400 |         score = out2[1,:]
401 |         points = out1
402 |         ipass = np.where(score>threshold[2])
403 |         points = points[:,ipass[0]]
404 |         total_boxes = np.hstack([total_boxes[ipass[0],0:4].copy(), np.expand_dims(score[ipass].copy(),1)])
405 |         mv = out0[:,ipass[0]]
406 | 
407 |         w = total_boxes[:,2]-total_boxes[:,0]+1
408 |         h = total_boxes[:,3]-total_boxes[:,1]+1
409 |         points[0:5,:] = np.tile(w,(5, 1))*points[0:5,:] + np.tile(total_boxes[:,0],(5, 1))-1
410 |         points[5:10,:] = np.tile(h,(5, 1))*points[5:10,:] + np.tile(total_boxes[:,1],(5, 1))-1
411 |         if total_boxes.shape[0]>0:
412 |             total_boxes = bbreg(total_boxes.copy(), np.transpose(mv))
413 |             pick = nms(total_boxes.copy(), 0.7, 'Min')
414 |             total_boxes = total_boxes[pick,:]
415 |             points = points[:,pick]
416 |                 
417 |     return total_boxes, points
418 | 
419 | 
420 | def bulk_detect_face(images, detection_window_size_ratio, pnet, rnet, onet, threshold, factor):
421 |     # im: input image
422 |     # minsize: minimum of faces' size
423 |     # pnet, rnet, onet: caffemodel
424 |     # threshold: threshold=[th1 th2 th3], th1-3 are three steps's threshold [0-1]
425 | 
426 |     all_scales = [None] * len(images)
427 |     images_with_boxes = [None] * len(images)
428 | 
429 |     for i in range(len(images)):
430 |         images_with_boxes[i] = {'total_boxes': np.empty((0, 9))}
431 | 
432 |     # create scale pyramid
433 |     for index, img in enumerate(images):
434 |         all_scales[index] = []
435 |         h = img.shape[0]
436 |         w = img.shape[1]
437 |         minsize = int(detection_window_size_ratio * np.minimum(w, h))
438 |         factor_count = 0
439 |         minl = np.amin([h, w])
440 |         if minsize <= 12:
441 |             minsize = 12
442 | 
443 |         m = 12.0 / minsize
444 |         minl = minl * m
445 |         while minl >= 12:
446 |             all_scales[index].append(m * np.power(factor, factor_count))
447 |             minl = minl * factor
448 |             factor_count += 1
449 | 
450 |     # # # # # # # # # # # # #
451 |     # first stage - fast proposal network (pnet) to obtain face candidates
452 |     # # # # # # # # # # # # #
453 | 
454 |     images_obj_per_resolution = {}
455 | 
456 |     # TODO: use some type of rounding to number module 8 to increase probability that pyramid images will have the same resolution across input images
457 | 
458 |     for index, scales in enumerate(all_scales):
459 |         h = images[index].shape[0]
460 |         w = images[index].shape[1]
461 | 
462 |         for scale in scales:
463 |             hs = int(np.ceil(h * scale))
464 |             ws = int(np.ceil(w * scale))
465 | 
466 |             if (ws, hs) not in images_obj_per_resolution:
467 |                 images_obj_per_resolution[(ws, hs)] = []
468 | 
469 |             im_data = imresample(images[index], (hs, ws))
470 |             im_data = (im_data - 127.5) * 0.0078125
471 |             img_y = np.transpose(im_data, (1, 0, 2))  # caffe uses different dimensions ordering
472 |             images_obj_per_resolution[(ws, hs)].append({'scale': scale, 'image': img_y, 'index': index})
473 | 
474 |     for resolution in images_obj_per_resolution:
475 |         images_per_resolution = [i['image'] for i in images_obj_per_resolution[resolution]]
476 |         outs = pnet(images_per_resolution)
477 | 
478 |         for index in range(len(outs[0])):
479 |             scale = images_obj_per_resolution[resolution][index]['scale']
480 |             image_index = images_obj_per_resolution[resolution][index]['index']
481 |             out0 = np.transpose(outs[0][index], (1, 0, 2))
482 |             out1 = np.transpose(outs[1][index], (1, 0, 2))
483 | 
484 |             boxes, _ = generateBoundingBox(out1[:, :, 1].copy(), out0[:, :, :].copy(), scale, threshold[0])
485 | 
486 |             # inter-scale nms
487 |             pick = nms(boxes.copy(), 0.5, 'Union')
488 |             if boxes.size > 0 and pick.size > 0:
489 |                 boxes = boxes[pick, :]
490 |                 images_with_boxes[image_index]['total_boxes'] = np.append(images_with_boxes[image_index]['total_boxes'],
491 |                                                                           boxes,
492 |                                                                           axis=0)
493 | 
494 |     for index, image_obj in enumerate(images_with_boxes):
495 |         numbox = image_obj['total_boxes'].shape[0]
496 |         if numbox > 0:
497 |             h = images[index].shape[0]
498 |             w = images[index].shape[1]
499 |             pick = nms(image_obj['total_boxes'].copy(), 0.7, 'Union')
500 |             image_obj['total_boxes'] = image_obj['total_boxes'][pick, :]
501 |             regw = image_obj['total_boxes'][:, 2] - image_obj['total_boxes'][:, 0]
502 |             regh = image_obj['total_boxes'][:, 3] - image_obj['total_boxes'][:, 1]
503 |             qq1 = image_obj['total_boxes'][:, 0] + image_obj['total_boxes'][:, 5] * regw
504 |             qq2 = image_obj['total_boxes'][:, 1] + image_obj['total_boxes'][:, 6] * regh
505 |             qq3 = image_obj['total_boxes'][:, 2] + image_obj['total_boxes'][:, 7] * regw
506 |             qq4 = image_obj['total_boxes'][:, 3] + image_obj['total_boxes'][:, 8] * regh
507 |             image_obj['total_boxes'] = np.transpose(np.vstack([qq1, qq2, qq3, qq4, image_obj['total_boxes'][:, 4]]))
508 |             image_obj['total_boxes'] = rerec(image_obj['total_boxes'].copy())
509 |             image_obj['total_boxes'][:, 0:4] = np.fix(image_obj['total_boxes'][:, 0:4]).astype(np.int32)
510 |             dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph = pad(image_obj['total_boxes'].copy(), w, h)
511 | 
512 |             numbox = image_obj['total_boxes'].shape[0]
513 |             tempimg = np.zeros((24, 24, 3, numbox))
514 | 
515 |             if numbox > 0:
516 |                 for k in range(0, numbox):
517 |                     tmp = np.zeros((int(tmph[k]), int(tmpw[k]), 3))
518 |                     tmp[dy[k] - 1:edy[k], dx[k] - 1:edx[k], :] = images[index][y[k] - 1:ey[k], x[k] - 1:ex[k], :]
519 |                     if tmp.shape[0] > 0 and tmp.shape[1] > 0 or tmp.shape[0] == 0 and tmp.shape[1] == 0:
520 |                         tempimg[:, :, :, k] = imresample(tmp, (24, 24))
521 |                     else:
522 |                         return np.empty()
523 | 
524 |                 tempimg = (tempimg - 127.5) * 0.0078125
525 |                 image_obj['rnet_input'] = np.transpose(tempimg, (3, 1, 0, 2))
526 | 
527 |     # # # # # # # # # # # # #
528 |     # second stage - refinement of face candidates with rnet
529 |     # # # # # # # # # # # # #
530 | 
531 |     bulk_rnet_input = np.empty((0, 24, 24, 3))
532 |     for index, image_obj in enumerate(images_with_boxes):
533 |         if 'rnet_input' in image_obj:
534 |             bulk_rnet_input = np.append(bulk_rnet_input, image_obj['rnet_input'], axis=0)
535 | 
536 |     out = rnet(bulk_rnet_input)
537 |     out0 = np.transpose(out[0])
538 |     out1 = np.transpose(out[1])
539 |     score = out1[1, :]
540 | 
541 |     i = 0
542 |     for index, image_obj in enumerate(images_with_boxes):
543 |         if 'rnet_input' not in image_obj:
544 |             continue
545 | 
546 |         rnet_input_count = image_obj['rnet_input'].shape[0]
547 |         score_per_image = score[i:i + rnet_input_count]
548 |         out0_per_image = out0[:, i:i + rnet_input_count]
549 | 
550 |         ipass = np.where(score_per_image > threshold[1])
551 |         image_obj['total_boxes'] = np.hstack([image_obj['total_boxes'][ipass[0], 0:4].copy(),
552 |                                               np.expand_dims(score_per_image[ipass].copy(), 1)])
553 | 
554 |         mv = out0_per_image[:, ipass[0]]
555 | 
556 |         if image_obj['total_boxes'].shape[0] > 0:
557 |             h = images[index].shape[0]
558 |             w = images[index].shape[1]
559 |             pick = nms(image_obj['total_boxes'], 0.7, 'Union')
560 |             image_obj['total_boxes'] = image_obj['total_boxes'][pick, :]
561 |             image_obj['total_boxes'] = bbreg(image_obj['total_boxes'].copy(), np.transpose(mv[:, pick]))
562 |             image_obj['total_boxes'] = rerec(image_obj['total_boxes'].copy())
563 | 
564 |             numbox = image_obj['total_boxes'].shape[0]
565 | 
566 |             if numbox > 0:
567 |                 tempimg = np.zeros((48, 48, 3, numbox))
568 |                 image_obj['total_boxes'] = np.fix(image_obj['total_boxes']).astype(np.int32)
569 |                 dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph = pad(image_obj['total_boxes'].copy(), w, h)
570 | 
571 |                 for k in range(0, numbox):
572 |                     tmp = np.zeros((int(tmph[k]), int(tmpw[k]), 3))
573 |                     tmp[dy[k] - 1:edy[k], dx[k] - 1:edx[k], :] = images[index][y[k] - 1:ey[k], x[k] - 1:ex[k], :]
574 |                     if tmp.shape[0] > 0 and tmp.shape[1] > 0 or tmp.shape[0] == 0 and tmp.shape[1] == 0:
575 |                         tempimg[:, :, :, k] = imresample(tmp, (48, 48))
576 |                     else:
577 |                         return np.empty()
578 |                 tempimg = (tempimg - 127.5) * 0.0078125
579 |                 image_obj['onet_input'] = np.transpose(tempimg, (3, 1, 0, 2))
580 | 
581 |         i += rnet_input_count
582 | 
583 |     # # # # # # # # # # # # #
584 |     # third stage - further refinement and facial landmarks positions with onet
585 |     # # # # # # # # # # # # #
586 | 
587 |     bulk_onet_input = np.empty((0, 48, 48, 3))
588 |     for index, image_obj in enumerate(images_with_boxes):
589 |         if 'onet_input' in image_obj:
590 |             bulk_onet_input = np.append(bulk_onet_input, image_obj['onet_input'], axis=0)
591 | 
592 |     out = onet(bulk_onet_input)
593 | 
594 |     out0 = np.transpose(out[0])
595 |     out1 = np.transpose(out[1])
596 |     out2 = np.transpose(out[2])
597 |     score = out2[1, :]
598 |     points = out1
599 | 
600 |     i = 0
601 |     ret = []
602 |     for index, image_obj in enumerate(images_with_boxes):
603 |         if 'onet_input' not in image_obj:
604 |             ret.append(None)
605 |             continue
606 | 
607 |         onet_input_count = image_obj['onet_input'].shape[0]
608 | 
609 |         out0_per_image = out0[:, i:i + onet_input_count]
610 |         score_per_image = score[i:i + onet_input_count]
611 |         points_per_image = points[:, i:i + onet_input_count]
612 | 
613 |         ipass = np.where(score_per_image > threshold[2])
614 |         points_per_image = points_per_image[:, ipass[0]]
615 | 
616 |         image_obj['total_boxes'] = np.hstack([image_obj['total_boxes'][ipass[0], 0:4].copy(),
617 |                                               np.expand_dims(score_per_image[ipass].copy(), 1)])
618 |         mv = out0_per_image[:, ipass[0]]
619 | 
620 |         w = image_obj['total_boxes'][:, 2] - image_obj['total_boxes'][:, 0] + 1
621 |         h = image_obj['total_boxes'][:, 3] - image_obj['total_boxes'][:, 1] + 1
622 |         points_per_image[0:5, :] = np.tile(w, (5, 1)) * points_per_image[0:5, :] + np.tile(
623 |             image_obj['total_boxes'][:, 0], (5, 1)) - 1
624 |         points_per_image[5:10, :] = np.tile(h, (5, 1)) * points_per_image[5:10, :] + np.tile(
625 |             image_obj['total_boxes'][:, 1], (5, 1)) - 1
626 | 
627 |         if image_obj['total_boxes'].shape[0] > 0:
628 |             image_obj['total_boxes'] = bbreg(image_obj['total_boxes'].copy(), np.transpose(mv))
629 |             pick = nms(image_obj['total_boxes'].copy(), 0.7, 'Min')
630 |             image_obj['total_boxes'] = image_obj['total_boxes'][pick, :]
631 |             points_per_image = points_per_image[:, pick]
632 | 
633 |             ret.append((image_obj['total_boxes'], points_per_image))
634 |         else:
635 |             ret.append(None)
636 | 
637 |         i += onet_input_count
638 | 
639 |     return ret
640 | 
641 | 
642 | # function [boundingbox] = bbreg(boundingbox,reg)
643 | def bbreg(boundingbox,reg):
644 |     # calibrate bounding boxes
645 |     if reg.shape[1]==1:
646 |         reg = np.reshape(reg, (reg.shape[2], reg.shape[3]))
647 | 
648 |     w = boundingbox[:,2]-boundingbox[:,0]+1
649 |     h = boundingbox[:,3]-boundingbox[:,1]+1
650 |     b1 = boundingbox[:,0]+reg[:,0]*w
651 |     b2 = boundingbox[:,1]+reg[:,1]*h
652 |     b3 = boundingbox[:,2]+reg[:,2]*w
653 |     b4 = boundingbox[:,3]+reg[:,3]*h
654 |     boundingbox[:,0:4] = np.transpose(np.vstack([b1, b2, b3, b4 ]))
655 |     return boundingbox
656 |  
657 | def generateBoundingBox(imap, reg, scale, t):
658 |     # use heatmap to generate bounding boxes
659 |     stride=2
660 |     cellsize=12
661 | 
662 |     imap = np.transpose(imap)
663 |     dx1 = np.transpose(reg[:,:,0])
664 |     dy1 = np.transpose(reg[:,:,1])
665 |     dx2 = np.transpose(reg[:,:,2])
666 |     dy2 = np.transpose(reg[:,:,3])
667 |     y, x = np.where(imap >= t)
668 |     if y.shape[0]==1:
669 |         dx1 = np.flipud(dx1)
670 |         dy1 = np.flipud(dy1)
671 |         dx2 = np.flipud(dx2)
672 |         dy2 = np.flipud(dy2)
673 |     score = imap[(y,x)]
674 |     reg = np.transpose(np.vstack([ dx1[(y,x)], dy1[(y,x)], dx2[(y,x)], dy2[(y,x)] ]))
675 |     if reg.size==0:
676 |         reg = np.empty((0,3))
677 |     bb = np.transpose(np.vstack([y,x]))
678 |     q1 = np.fix((stride*bb+1)/scale)
679 |     q2 = np.fix((stride*bb+cellsize-1+1)/scale)
680 |     boundingbox = np.hstack([q1, q2, np.expand_dims(score,1), reg])
681 |     return boundingbox, reg
682 |  
683 | # function pick = nms(boxes,threshold,type)
684 | def nms(boxes, threshold, method):
685 |     if boxes.size==0:
686 |         return np.empty((0,3))
687 |     x1 = boxes[:,0]
688 |     y1 = boxes[:,1]
689 |     x2 = boxes[:,2]
690 |     y2 = boxes[:,3]
691 |     s = boxes[:,4]
692 |     area = (x2-x1+1) * (y2-y1+1)
693 |     I = np.argsort(s)
694 |     pick = np.zeros_like(s, dtype=np.int16)
695 |     counter = 0
696 |     while I.size>0:
697 |         i = I[-1]
698 |         pick[counter] = i
699 |         counter += 1
700 |         idx = I[0:-1]
701 |         xx1 = np.maximum(x1[i], x1[idx])
702 |         yy1 = np.maximum(y1[i], y1[idx])
703 |         xx2 = np.minimum(x2[i], x2[idx])
704 |         yy2 = np.minimum(y2[i], y2[idx])
705 |         w = np.maximum(0.0, xx2-xx1+1)
706 |         h = np.maximum(0.0, yy2-yy1+1)
707 |         inter = w * h
708 |         if method is 'Min':
709 |             o = inter / np.minimum(area[i], area[idx])
710 |         else:
711 |             o = inter / (area[i] + area[idx] - inter)
712 |         I = I[np.where(o<=threshold)]
713 |     pick = pick[0:counter]
714 |     return pick
715 | 
716 | # function [dy edy dx edx y ey x ex tmpw tmph] = pad(total_boxes,w,h)
717 | def pad(total_boxes, w, h):
718 |     # compute the padding coordinates (pad the bounding boxes to square)
719 |     tmpw = (total_boxes[:,2]-total_boxes[:,0]+1).astype(np.int32)
720 |     tmph = (total_boxes[:,3]-total_boxes[:,1]+1).astype(np.int32)
721 |     numbox = total_boxes.shape[0]
722 | 
723 |     dx = np.ones((numbox), dtype=np.int32)
724 |     dy = np.ones((numbox), dtype=np.int32)
725 |     edx = tmpw.copy().astype(np.int32)
726 |     edy = tmph.copy().astype(np.int32)
727 | 
728 |     x = total_boxes[:,0].copy().astype(np.int32)
729 |     y = total_boxes[:,1].copy().astype(np.int32)
730 |     ex = total_boxes[:,2].copy().astype(np.int32)
731 |     ey = total_boxes[:,3].copy().astype(np.int32)
732 | 
733 |     tmp = np.where(ex>w)
734 |     edx.flat[tmp] = np.expand_dims(-ex[tmp]+w+tmpw[tmp],1)
735 |     ex[tmp] = w
736 |     
737 |     tmp = np.where(ey>h)
738 |     edy.flat[tmp] = np.expand_dims(-ey[tmp]+h+tmph[tmp],1)
739 |     ey[tmp] = h
740 | 
741 |     tmp = np.where(x<1)
742 |     dx.flat[tmp] = np.expand_dims(2-x[tmp],1)
743 |     x[tmp] = 1
744 | 
745 |     tmp = np.where(y<1)
746 |     dy.flat[tmp] = np.expand_dims(2-y[tmp],1)
747 |     y[tmp] = 1
748 |     
749 |     return dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph
750 | 
751 | # function [bboxA] = rerec(bboxA)
752 | def rerec(bboxA):
753 |     # convert bboxA to square
754 |     h = bboxA[:,3]-bboxA[:,1]
755 |     w = bboxA[:,2]-bboxA[:,0]
756 |     l = np.maximum(w, h)
757 |     bboxA[:,0] = bboxA[:,0]+w*0.5-l*0.5
758 |     bboxA[:,1] = bboxA[:,1]+h*0.5-l*0.5
759 |     bboxA[:,2:4] = bboxA[:,0:2] + np.transpose(np.tile(l,(2,1)))
760 |     return bboxA
761 | 
762 | def imresample(img, sz):
763 |     im_data = cv2.resize(img, (sz[1], sz[0]), interpolation=cv2.INTER_AREA) #@UndefinedVariable
764 |     return im_data
765 | 
766 |     # This method is kept for debugging purpose
767 | #     h=img.shape[0]
768 | #     w=img.shape[1]
769 | #     hs, ws = sz
770 | #     dx = float(w) / ws
771 | #     dy = float(h) / hs
772 | #     im_data = np.zeros((hs,ws,3))
773 | #     for a1 in range(0,hs):
774 | #         for a2 in range(0,ws):
775 | #             for a3 in range(0,3):
776 | #                 im_data[a1,a2,a3] = img[int(floor(a1*dy)),int(floor(a2*dx)),a3]
777 | #     return im_data
778 | 
779 | 


--------------------------------------------------------------------------------