├── __init__.pyc ├── facenet.pyc ├── d_npy ├── det1.npy ├── det2.npy └── det3.npy ├── detect_face.pyc ├── requirements.txt ├── create_dir_setup.sh ├── LICENSE ├── README.md ├── create_classifier_se.py ├── aligndata_first.py ├── detect_facese_real_time.py ├── detect_facese_real_time_with_incFrame.py ├── facenet.py └── detect_face.py /__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ishwarsawale/real-time-face-recognition-with-facenet/HEAD/__init__.pyc -------------------------------------------------------------------------------- /facenet.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ishwarsawale/real-time-face-recognition-with-facenet/HEAD/facenet.pyc -------------------------------------------------------------------------------- /d_npy/det1.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ishwarsawale/real-time-face-recognition-with-facenet/HEAD/d_npy/det1.npy -------------------------------------------------------------------------------- /d_npy/det2.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ishwarsawale/real-time-face-recognition-with-facenet/HEAD/d_npy/det2.npy -------------------------------------------------------------------------------- /d_npy/det3.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ishwarsawale/real-time-face-recognition-with-facenet/HEAD/d_npy/det3.npy -------------------------------------------------------------------------------- /detect_face.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ishwarsawale/real-time-face-recognition-with-facenet/HEAD/detect_face.pyc -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | tensorflow==1.8.0 2 | scipy==1.0.0 3 | matplotlib==2.1.2 4 | six==1.11.0 5 | numpy==1.14.0 6 | scikit_learn==0.19.1 7 | -------------------------------------------------------------------------------- /create_dir_setup.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | echo 'creating dir structure for project' 4 | 5 | mkdir 'input_dir' 6 | mkdir 'out_dir' 7 | mkdir 'my_class' 8 | mkdir 'pre_model' 9 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 icode 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # real-time-face-recognition-with-facenet 2 | 3 | I remember the first day on the job and I was assigned to work on Face Recognition System, but at that time it was like a dream to make a classifier that can do it very well, I was using purely Open-Cv for detection of face and then creating a unique vector for each face. But its accuracy was too less to use it as in any application. Before some months back I read a paper named as "FaceNet: A Unified Embedding for Face Recognition and Clustering" which present a unified system for face verification. 4 | 5 | 6 | 7 | Facenet is based on learning a Euclidean embedding per image using deep convolution network, Embedding algorithms search for a lot dimensional continuous representation of data. The network is trained such that the squared L2 distances in the embedding space directly correspond to face similarity. Faces of the same person have small distances and faces of distinct people have large distances. 8 | 9 | 10 | 11 | Once this embedding has been produced, then the aforementioned tasks become straight-forward: face verification simply involves thresholding the distance between the two embeddings; recognition becomes a k-NN classification problem, and clustering can be achieved using off-theshelf techniques such as k-means or agglomerative clustering. 12 | 13 | [Complete Post is Here](https://www.linkedin.com/pulse/real-time-face-recognition-using-facenet-ishwar-sawale/) 14 | -------------------------------------------------------------------------------- /create_classifier_se.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import tensorflow as tf 6 | import numpy as np 7 | import argparse 8 | import facenet 9 | import detect_face 10 | import os 11 | import sys 12 | import math 13 | import pickle 14 | from sklearn.svm import SVC 15 | 16 | 17 | with tf.Graph().as_default(): 18 | 19 | with tf.Session() as sess: 20 | 21 | datadir = './out_dir' 22 | dataset = facenet.get_dataset(datadir) 23 | paths, labels = facenet.get_image_paths_and_labels(dataset) 24 | print('Number of classes: %d' % len(dataset)) 25 | print('Number of images: %d' % len(paths)) 26 | 27 | print('Loading feature extraction model') 28 | modeldir = './pre_model/20170511-185253.pb' 29 | facenet.load_model(modeldir) 30 | 31 | images_placeholder = tf.get_default_graph().get_tensor_by_name("input:0") 32 | embeddings = tf.get_default_graph().get_tensor_by_name("embeddings:0") 33 | phase_train_placeholder = tf.get_default_graph().get_tensor_by_name("phase_train:0") 34 | embedding_size = embeddings.get_shape()[1] 35 | 36 | # Run forward pass to calculate embeddings 37 | print('Calculating features for images') 38 | batch_size = 1000 39 | image_size = 160 40 | nrof_images = len(paths) 41 | nrof_batches_per_epoch = int(math.ceil(1.0 * nrof_images / batch_size)) 42 | emb_array = np.zeros((nrof_images, embedding_size)) 43 | for i in range(nrof_batches_per_epoch): 44 | start_index = i * batch_size 45 | end_index = min((i + 1) * batch_size, nrof_images) 46 | paths_batch = paths[start_index:end_index] 47 | images = facenet.load_data(paths_batch, False, False, image_size) 48 | feed_dict = {images_placeholder: images, phase_train_placeholder: False} 49 | emb_array[start_index:end_index, :] = sess.run(embeddings, feed_dict=feed_dict) 50 | 51 | classifier_filename = './my_class/my_classifier.pkl' 52 | classifier_filename_exp = os.path.expanduser(classifier_filename) 53 | 54 | # Train classifier 55 | print('Training classifier') 56 | model = SVC(kernel='linear', probability=True) 57 | model.fit(emb_array, labels) 58 | 59 | # Create a list of class names 60 | class_names = [cls.name.replace('_', ' ') for cls in dataset] 61 | 62 | # Saving classifier model 63 | with open(classifier_filename_exp, 'wb') as outfile: 64 | pickle.dump((model, class_names), outfile) 65 | print('Saved classifier model to file "%s"' % classifier_filename_exp) 66 | print('Goodluck') 67 | -------------------------------------------------------------------------------- /aligndata_first.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | from scipy import misc 6 | import sys 7 | import os 8 | import argparse 9 | import tensorflow as tf 10 | import numpy as np 11 | import facenet 12 | import detect_face 13 | import random 14 | from time import sleep 15 | 16 | output_dir_path = './out_dir' 17 | output_dir = os.path.expanduser(output_dir_path) 18 | if not os.path.exists(output_dir): 19 | os.makedirs(output_dir) 20 | 21 | datadir = './input_dir' 22 | dataset = facenet.get_dataset(datadir) 23 | 24 | print('Creating networks and loading parameters') 25 | with tf.Graph().as_default(): 26 | gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.5) 27 | sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) 28 | with sess.as_default(): 29 | pnet, rnet, onet = detect_face.create_mtcnn(sess, './d_npy') 30 | 31 | minsize = 20 # minimum size of face 32 | threshold = [0.6, 0.7, 0.7] # three steps's threshold 33 | factor = 0.709 # scale factor 34 | margin = 44 35 | image_size = 182 36 | 37 | # Add a random key to the filename to allow alignment using multiple processes 38 | random_key = np.random.randint(0, high=99999) 39 | bounding_boxes_filename = os.path.join(output_dir, 'bounding_boxes_%05d.txt' % random_key) 40 | print('Goodluck') 41 | 42 | with open(bounding_boxes_filename, "w") as text_file: 43 | nrof_images_total = 0 44 | nrof_successfully_aligned = 0 45 | for cls in dataset: 46 | output_class_dir = os.path.join(output_dir, cls.name) 47 | if not os.path.exists(output_class_dir): 48 | os.makedirs(output_class_dir) 49 | for image_path in cls.image_paths: 50 | nrof_images_total += 1 51 | filename = os.path.splitext(os.path.split(image_path)[1])[0] 52 | output_filename = os.path.join(output_class_dir, filename + '.png') 53 | print(image_path) 54 | if not os.path.exists(output_filename): 55 | try: 56 | img = misc.imread(image_path) 57 | print('read data dimension: ', img.ndim) 58 | except (IOError, ValueError, IndexError) as e: 59 | errorMessage = '{}: {}'.format(image_path, e) 60 | print(errorMessage) 61 | else: 62 | if img.ndim < 2: 63 | print('Unable to align "%s"' % image_path) 64 | text_file.write('%s\n' % (output_filename)) 65 | continue 66 | if img.ndim == 2: 67 | img = facenet.to_rgb(img) 68 | print('to_rgb data dimension: ', img.ndim) 69 | img = img[:, :, 0:3] 70 | print('after data dimension: ', img.ndim) 71 | 72 | bounding_boxes, _ = detect_face.detect_face(img, minsize, pnet, rnet, onet, threshold, factor) 73 | nrof_faces = bounding_boxes.shape[0] 74 | print('detected_face: %d' % nrof_faces) 75 | if nrof_faces > 0: 76 | det = bounding_boxes[:, 0:4] 77 | img_size = np.asarray(img.shape)[0:2] 78 | if nrof_faces > 1: 79 | bounding_box_size = (det[:, 2] - det[:, 0]) * (det[:, 3] - det[:, 1]) 80 | img_center = img_size / 2 81 | offsets = np.vstack([(det[:, 0] + det[:, 2]) / 2 - img_center[1], 82 | (det[:, 1] + det[:, 3]) / 2 - img_center[0]]) 83 | offset_dist_squared = np.sum(np.power(offsets, 2.0), 0) 84 | index = np.argmax(bounding_box_size - offset_dist_squared * 2.0) # some extra weight on the centering 85 | det = det[index, :] 86 | det = np.squeeze(det) 87 | bb_temp = np.zeros(4, dtype=np.int32) 88 | 89 | bb_temp[0] = det[0] 90 | bb_temp[1] = det[1] 91 | bb_temp[2] = det[2] 92 | bb_temp[3] = det[3] 93 | try: 94 | cropped_temp = img[bb_temp[1]:bb_temp[3], bb_temp[0]:bb_temp[2], :] 95 | scaled_temp = misc.imresize(cropped_temp, (image_size, image_size), interp='bilinear') 96 | nrof_successfully_aligned += 1 97 | misc.imsave(output_filename, scaled_temp) 98 | # text_file.write('%s %d %d %d %d\n' % (output_filename, bb_temp[0], bb_temp[1], bb_temp[2], bb_temp[3])) 99 | except Exception as e: 100 | os.remove(image_path) 101 | else: 102 | print('Unable to align "%s"' % image_path) 103 | text_file.write('%s\n' % (output_filename)) 104 | 105 | print('Total number of images: %d' % nrof_images_total) 106 | print('Number of successfully aligned images: %d' % nrof_successfully_aligned) 107 | 108 | 109 | 110 | -------------------------------------------------------------------------------- /detect_facese_real_time.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import tensorflow as tf 6 | from scipy import misc 7 | import cv2 8 | import matplotlib.pyplot as plt 9 | import numpy as np 10 | import argparse 11 | import facenet 12 | import detect_face 13 | import os 14 | from os.path import join as pjoin 15 | import sys 16 | import time 17 | import copy 18 | import math 19 | import pickle 20 | from sklearn.svm import SVC 21 | from sklearn.externals import joblib 22 | 23 | print('Creating networks and loading parameters') 24 | with tf.Graph().as_default(): 25 | gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.6) 26 | sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) 27 | with sess.as_default(): 28 | pnet, rnet, onet = detect_face.create_mtcnn(sess, './d_npy') 29 | 30 | minsize = 20 # minimum size of face 31 | threshold = [0.6, 0.7, 0.7] # three steps's threshold 32 | factor = 0.709 # scale factor 33 | margin = 44 34 | frame_interval = 3 35 | batch_size = 1000 36 | image_size = 182 37 | input_image_size = 160 38 | 39 | HumanNames = os.listdir("./input_dir") 40 | HumanNames.sort() 41 | 42 | print('Loading feature extraction model') 43 | modeldir = './pre_model/20170511-185253.pb' 44 | facenet.load_model(modeldir) 45 | 46 | images_placeholder = tf.get_default_graph().get_tensor_by_name("input:0") 47 | embeddings = tf.get_default_graph().get_tensor_by_name("embeddings:0") 48 | phase_train_placeholder = tf.get_default_graph().get_tensor_by_name("phase_train:0") 49 | embedding_size = embeddings.get_shape()[1] 50 | 51 | classifier_filename = './my_class/my_classifier.pkl' 52 | classifier_filename_exp = os.path.expanduser(classifier_filename) 53 | with open(classifier_filename_exp, 'rb') as infile: 54 | (model, class_names) = pickle.load(infile) 55 | print('load classifier file-> %s' % classifier_filename_exp) 56 | 57 | video_capture = cv2.VideoCapture(0) 58 | c = 0 59 | 60 | # #video writer 61 | fourcc = cv2.VideoWriter_fourcc(*'DIVX') 62 | out = cv2.VideoWriter('3F_0726.avi', fourcc, fps=30, frameSize=(640,480)) 63 | 64 | print('Start Recognition!') 65 | prevTime = 0 66 | while True: 67 | ret, frame = video_capture.read() 68 | 69 | frame = cv2.resize(frame, (0,0), fx=0.5, fy=0.5) #resize frame (optional) 70 | 71 | curTime = time.time()+1 # calc fps 72 | timeF = frame_interval 73 | 74 | if (c % timeF == 0): 75 | find_results = [] 76 | 77 | if frame.ndim == 2: 78 | frame = facenet.to_rgb(frame) 79 | frame = frame[:, :, 0:3] 80 | bounding_boxes, _ = detect_face.detect_face(frame, minsize, pnet, rnet, onet, threshold, factor) 81 | nrof_faces = bounding_boxes.shape[0] 82 | print('Detected_FaceNum: %d' % nrof_faces) 83 | 84 | if nrof_faces > 0: 85 | det = bounding_boxes[:, 0:4] 86 | img_size = np.asarray(frame.shape)[0:2] 87 | 88 | cropped = [] 89 | scaled = [] 90 | scaled_reshape = [] 91 | bb = np.zeros((nrof_faces,4), dtype=np.int32) 92 | 93 | for i in range(nrof_faces): 94 | emb_array = np.zeros((1, embedding_size)) 95 | 96 | bb[i][0] = det[i][0] 97 | bb[i][1] = det[i][1] 98 | bb[i][2] = det[i][2] 99 | bb[i][3] = det[i][3] 100 | 101 | # inner exception 102 | if bb[i][0] <= 0 or bb[i][1] <= 0 or bb[i][2] >= len(frame[0]) or bb[i][3] >= len(frame): 103 | print('face is inner of range!') 104 | continue 105 | 106 | cropped.append(frame[bb[i][1]:bb[i][3], bb[i][0]:bb[i][2], :]) 107 | cropped[i] = facenet.flip(cropped[i], False) 108 | scaled.append(misc.imresize(cropped[i], (image_size, image_size), interp='bilinear')) 109 | scaled[i] = cv2.resize(scaled[i], (input_image_size,input_image_size), 110 | interpolation=cv2.INTER_CUBIC) 111 | scaled[i] = facenet.prewhiten(scaled[i]) 112 | scaled_reshape.append(scaled[i].reshape(-1,input_image_size,input_image_size,3)) 113 | feed_dict = {images_placeholder: scaled_reshape[i], phase_train_placeholder: False} 114 | emb_array[0, :] = sess.run(embeddings, feed_dict=feed_dict) 115 | predictions = model.predict_proba(emb_array) 116 | print(predictions) 117 | best_class_indices = np.argmax(predictions, axis=1) 118 | print(best_class_indices) 119 | best_class_probabilities = predictions[np.arange(len(best_class_indices)), best_class_indices] 120 | print(best_class_probabilities) 121 | cv2.rectangle(frame, (bb[i][0], bb[i][1]), (bb[i][2], bb[i][3]), (0, 255, 0), 2) #boxing face 122 | 123 | #plot result idx under box 124 | text_x = bb[i][0] 125 | text_y = bb[i][3] + 20 126 | print('result: ', best_class_indices[0]) 127 | print(best_class_indices) 128 | print(HumanNames) 129 | for H_i in HumanNames: 130 | print(H_i) 131 | if HumanNames[best_class_indices[0]] == H_i: 132 | result_names = HumanNames[best_class_indices[0]] 133 | cv2.putText(frame, result_names, (text_x, text_y), cv2.FONT_HERSHEY_COMPLEX_SMALL, 134 | 1, (0, 0, 255), thickness=1, lineType=2) 135 | else: 136 | print('Unable to align') 137 | 138 | sec = curTime - prevTime 139 | prevTime = curTime 140 | fps = 1 / (sec) 141 | str = 'FPS: %2.3f' % fps 142 | text_fps_x = len(frame[0]) - 150 143 | text_fps_y = 20 144 | cv2.putText(frame, str, (text_fps_x, text_fps_y), 145 | cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 0), thickness=1, lineType=2) 146 | # c+=1 147 | cv2.imshow('Video', frame) 148 | 149 | if cv2.waitKey(1) & 0xFF == ord('q'): 150 | break 151 | 152 | video_capture.release() 153 | # #video writer 154 | out.release() 155 | cv2.destroyAllWindows() 156 | -------------------------------------------------------------------------------- /detect_facese_real_time_with_incFrame.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import tensorflow as tf 6 | from scipy import misc 7 | import cv2 8 | import matplotlib.pyplot as plt 9 | import numpy as np 10 | import argparse 11 | import facenet 12 | import detect_face 13 | import os 14 | from os.path import join as pjoin 15 | import sys 16 | import time 17 | import copy 18 | import math 19 | import pickle 20 | from sklearn.svm import SVC 21 | from sklearn.externals import joblib 22 | 23 | print('Creating networks and loading parameters') 24 | with tf.Graph().as_default(): 25 | gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.6) 26 | sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) 27 | with sess.as_default(): 28 | pnet, rnet, onet = detect_face.create_mtcnn(sess, './d_npy') 29 | 30 | minsize = 20 # minimum size of face 31 | threshold = [0.6, 0.7, 0.7] # three steps's threshold 32 | factor = 0.709 # scale factor 33 | margin = 44 34 | frame_interval = 3 35 | batch_size = 1000 36 | image_size = 182 37 | input_image_size = 160 38 | 39 | HumanNames = os.listdir("./input_dir") 40 | HumanNames.sort() 41 | print('Loading feature extraction model') 42 | modeldir = './pre_model/20170511-185253.pb' 43 | facenet.load_model(modeldir) 44 | 45 | images_placeholder = tf.get_default_graph().get_tensor_by_name("input:0") 46 | embeddings = tf.get_default_graph().get_tensor_by_name("embeddings:0") 47 | phase_train_placeholder = tf.get_default_graph().get_tensor_by_name("phase_train:0") 48 | embedding_size = embeddings.get_shape()[1] 49 | 50 | classifier_filename = './my_class/my_classifier.pkl' 51 | classifier_filename_exp = os.path.expanduser(classifier_filename) 52 | with open(classifier_filename_exp, 'rb') as infile: 53 | (model, class_names) = pickle.load(infile) 54 | print('load classifier file-> %s' % classifier_filename_exp) 55 | 56 | video_capture = cv2.VideoCapture(0) 57 | c = 0 58 | counter = 1 59 | # #video writer 60 | fourcc = cv2.VideoWriter_fourcc(*'DIVX') 61 | out = cv2.VideoWriter('3F_0726.avi', fourcc, fps=14, frameSize=(640,480)) 62 | 63 | print('Start Recognition!') 64 | prevTime = 0 65 | while True: 66 | ret, frame = video_capture.read() 67 | frame = cv2.resize(frame, (0,0), fx=0.5, fy=0.5) #resize frame (optional) 68 | 69 | curTime = time.time()+1 # calc fps 70 | timeF = frame_interval 71 | counter += 1 72 | if (counter % 12 == 0): 73 | if (c % timeF == 0): 74 | find_results = [] 75 | 76 | if frame.ndim == 2: 77 | frame = facenet.to_rgb(frame) 78 | frame = frame[:, :, 0:3] 79 | bounding_boxes, _ = detect_face.detect_face(frame, minsize, pnet, rnet, onet, threshold, factor) 80 | nrof_faces = bounding_boxes.shape[0] 81 | print('Detected_FaceNum: %d' % nrof_faces) 82 | 83 | if nrof_faces > 0: 84 | det = bounding_boxes[:, 0:4] 85 | img_size = np.asarray(frame.shape)[0:2] 86 | 87 | cropped = [] 88 | scaled = [] 89 | scaled_reshape = [] 90 | bb = np.zeros((nrof_faces,4), dtype=np.int32) 91 | 92 | for i in range(nrof_faces): 93 | emb_array = np.zeros((1, embedding_size)) 94 | 95 | bb[i][0] = det[i][0] 96 | bb[i][1] = det[i][1] 97 | bb[i][2] = det[i][2] 98 | bb[i][3] = det[i][3] 99 | 100 | # inner exception 101 | if bb[i][0] <= 0 or bb[i][1] <= 0 or bb[i][2] >= len(frame[0]) or bb[i][3] >= len(frame): 102 | print('face is inner of range!') 103 | continue 104 | 105 | cropped.append(frame[bb[i][1]:bb[i][3], bb[i][0]:bb[i][2], :]) 106 | cropped[i] = facenet.flip(cropped[i], False) 107 | scaled.append(misc.imresize(cropped[i], (image_size, image_size), interp='bilinear')) 108 | scaled[i] = cv2.resize(scaled[i], (input_image_size,input_image_size), 109 | interpolation=cv2.INTER_CUBIC) 110 | scaled[i] = facenet.prewhiten(scaled[i]) 111 | scaled_reshape.append(scaled[i].reshape(-1,input_image_size,input_image_size,3)) 112 | feed_dict = {images_placeholder: scaled_reshape[i], phase_train_placeholder: False} 113 | emb_array[0, :] = sess.run(embeddings, feed_dict=feed_dict) 114 | predictions = model.predict_proba(emb_array) 115 | print(predictions) 116 | best_class_indices = np.argmax(predictions, axis=1) 117 | print(best_class_indices) 118 | best_class_probabilities = predictions[np.arange(len(best_class_indices)), best_class_indices] 119 | print(best_class_probabilities) 120 | cv2.rectangle(frame, (bb[i][0], bb[i][1]), (bb[i][2], bb[i][3]), (0, 255, 0), 2) #boxing face 121 | 122 | #plot result idx under box 123 | text_x = bb[i][0] 124 | text_y = bb[i][3] + 20 125 | print('result: ', best_class_indices[0]) 126 | print(best_class_indices) 127 | print(HumanNames) 128 | for H_i in HumanNames: 129 | print(H_i) 130 | if HumanNames[best_class_indices[0]] == H_i: 131 | result_names = HumanNames[best_class_indices[0]] 132 | cv2.putText(frame, result_names, (text_x, text_y), cv2.FONT_HERSHEY_COMPLEX_SMALL, 133 | 1, (0, 0, 255), thickness=1, lineType=2) 134 | else: 135 | print('Unable to align') 136 | 137 | sec = curTime - prevTime 138 | prevTime = curTime 139 | fps = 1 / (sec) 140 | str = 'FPS: %2.3f' % fps 141 | text_fps_x = len(frame[0]) - 150 142 | text_fps_y = 20 143 | cv2.putText(frame, str, (text_fps_x, text_fps_y), 144 | cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 0, 0), thickness=1, lineType=2) 145 | # c+=1 146 | cv2.imshow('Video', frame) 147 | 148 | if cv2.waitKey(1) & 0xFF == ord('q'): 149 | break 150 | 151 | video_capture.release() 152 | # #video writer 153 | out.release() 154 | cv2.destroyAllWindows() -------------------------------------------------------------------------------- /facenet.py: -------------------------------------------------------------------------------- 1 | """Functions for building the face recognition network. 2 | """ 3 | # MIT License 4 | # 5 | # Copyright (c) 2016 David Sandberg 6 | # 7 | # Permission is hereby granted, free of charge, to any person obtaining a copy 8 | # of this software and associated documentation files (the "Software"), to deal 9 | # in the Software without restriction, including without limitation the rights 10 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 | # copies of the Software, and to permit persons to whom the Software is 12 | # furnished to do so, subject to the following conditions: 13 | # 14 | # The above copyright notice and this permission notice shall be included in all 15 | # copies or substantial portions of the Software. 16 | # 17 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | # SOFTWARE. 24 | 25 | # pylint: disable=missing-docstring 26 | from __future__ import absolute_import 27 | from __future__ import division 28 | from __future__ import print_function 29 | 30 | import os 31 | from subprocess import Popen, PIPE 32 | import tensorflow as tf 33 | from tensorflow.python.framework import ops 34 | import numpy as np 35 | from scipy import misc 36 | from sklearn.model_selection import KFold 37 | from scipy import interpolate 38 | from tensorflow.python.training import training 39 | import random 40 | import re 41 | from tensorflow.python.platform import gfile 42 | 43 | def triplet_loss(anchor, positive, negative, alpha): 44 | """Calculate the triplet loss according to the FaceNet paper 45 | 46 | Args: 47 | anchor: the embeddings for the anchor images. 48 | positive: the embeddings for the positive images. 49 | negative: the embeddings for the negative images. 50 | 51 | Returns: 52 | the triplet loss according to the FaceNet paper as a float tensor. 53 | """ 54 | with tf.variable_scope('triplet_loss'): 55 | pos_dist = tf.reduce_sum(tf.square(tf.subtract(anchor, positive)), 1) 56 | neg_dist = tf.reduce_sum(tf.square(tf.subtract(anchor, negative)), 1) 57 | 58 | basic_loss = tf.add(tf.subtract(pos_dist,neg_dist), alpha) 59 | loss = tf.reduce_mean(tf.maximum(basic_loss, 0.0), 0) 60 | 61 | return loss 62 | 63 | def decov_loss(xs): 64 | """Decov loss as described in https://arxiv.org/pdf/1511.06068.pdf 65 | 'Reducing Overfitting In Deep Networks by Decorrelating Representation' 66 | """ 67 | x = tf.reshape(xs, [int(xs.get_shape()[0]), -1]) 68 | m = tf.reduce_mean(x, 0, True) 69 | z = tf.expand_dims(x-m, 2) 70 | corr = tf.reduce_mean(tf.matmul(z, tf.transpose(z, perm=[0,2,1])), 0) 71 | corr_frob_sqr = tf.reduce_sum(tf.square(corr)) 72 | corr_diag_sqr = tf.reduce_sum(tf.square(tf.diag_part(corr))) 73 | loss = 0.5*(corr_frob_sqr - corr_diag_sqr) 74 | return loss 75 | 76 | def center_loss(features, label, alfa, nrof_classes): 77 | """Center loss based on the paper "A Discriminative Feature Learning Approach for Deep Face Recognition" 78 | (http://ydwen.github.io/papers/WenECCV16.pdf) 79 | """ 80 | nrof_features = features.get_shape()[1] 81 | centers = tf.get_variable('centers', [nrof_classes, nrof_features], dtype=tf.float32, 82 | initializer=tf.constant_initializer(0), trainable=False) 83 | label = tf.reshape(label, [-1]) 84 | centers_batch = tf.gather(centers, label) 85 | diff = (1 - alfa) * (centers_batch - features) 86 | centers = tf.scatter_sub(centers, label, diff) 87 | loss = tf.reduce_mean(tf.square(features - centers_batch)) 88 | return loss, centers 89 | 90 | def get_image_paths_and_labels(dataset): 91 | image_paths_flat = [] 92 | labels_flat = [] 93 | for i in range(len(dataset)): 94 | image_paths_flat += dataset[i].image_paths 95 | labels_flat += [i] * len(dataset[i].image_paths) 96 | return image_paths_flat, labels_flat 97 | 98 | def shuffle_examples(image_paths, labels): 99 | shuffle_list = list(zip(image_paths, labels)) 100 | random.shuffle(shuffle_list) 101 | image_paths_shuff, labels_shuff = zip(*shuffle_list) 102 | return image_paths_shuff, labels_shuff 103 | 104 | def read_images_from_disk(input_queue): 105 | """Consumes a single filename and label as a ' '-delimited string. 106 | Args: 107 | filename_and_label_tensor: A scalar string tensor. 108 | Returns: 109 | Two tensors: the decoded image, and the string label. 110 | """ 111 | label = input_queue[1] 112 | file_contents = tf.read_file(input_queue[0]) 113 | example = tf.image.decode_png(file_contents, channels=3) 114 | return example, label 115 | 116 | def random_rotate_image(image): 117 | angle = np.random.uniform(low=-10.0, high=10.0) 118 | return misc.imrotate(image, angle, 'bicubic') 119 | 120 | def read_and_augment_data(image_list, label_list, image_size, batch_size, max_nrof_epochs, 121 | random_crop, random_flip, random_rotate, nrof_preprocess_threads, shuffle=True): 122 | 123 | images = ops.convert_to_tensor(image_list, dtype=tf.string) 124 | labels = ops.convert_to_tensor(label_list, dtype=tf.int32) 125 | 126 | # Makes an input queue 127 | input_queue = tf.train.slice_input_producer([images, labels], 128 | num_epochs=max_nrof_epochs, shuffle=shuffle) 129 | 130 | images_and_labels = [] 131 | for _ in range(nrof_preprocess_threads): 132 | image, label = read_images_from_disk(input_queue) 133 | if random_rotate: 134 | image = tf.py_func(random_rotate_image, [image], tf.uint8) 135 | if random_crop: 136 | image = tf.random_crop(image, [image_size, image_size, 3]) 137 | else: 138 | image = tf.image.resize_image_with_crop_or_pad(image, image_size, image_size) 139 | if random_flip: 140 | image = tf.image.random_flip_left_right(image) 141 | #pylint: disable=no-member 142 | image.set_shape((image_size, image_size, 3)) 143 | image = tf.image.per_image_standardization(image) 144 | images_and_labels.append([image, label]) 145 | 146 | image_batch, label_batch = tf.train.batch_join( 147 | images_and_labels, batch_size=batch_size, 148 | capacity=4 * nrof_preprocess_threads * batch_size, 149 | allow_smaller_final_batch=True) 150 | 151 | return image_batch, label_batch 152 | 153 | def _add_loss_summaries(total_loss): 154 | """Add summaries for losses. 155 | 156 | Generates moving average for all losses and associated summaries for 157 | visualizing the performance of the network. 158 | 159 | Args: 160 | total_loss: Total loss from loss(). 161 | Returns: 162 | loss_averages_op: op for generating moving averages of losses. 163 | """ 164 | # Compute the moving average of all individual losses and the total loss. 165 | loss_averages = tf.train.ExponentialMovingAverage(0.9, name='avg') 166 | losses = tf.get_collection('losses') 167 | loss_averages_op = loss_averages.apply(losses + [total_loss]) 168 | 169 | # Attach a scalar summmary to all individual losses and the total loss; do the 170 | # same for the averaged version of the losses. 171 | for l in losses + [total_loss]: 172 | # Name each loss as '(raw)' and name the moving average version of the loss 173 | # as the original loss name. 174 | tf.summary.scalar(l.op.name +' (raw)', l) 175 | tf.summary.scalar(l.op.name, loss_averages.average(l)) 176 | 177 | return loss_averages_op 178 | 179 | def train(total_loss, global_step, optimizer, learning_rate, moving_average_decay, update_gradient_vars, log_histograms=True): 180 | # Generate moving averages of all losses and associated summaries. 181 | loss_averages_op = _add_loss_summaries(total_loss) 182 | 183 | # Compute gradients. 184 | with tf.control_dependencies([loss_averages_op]): 185 | if optimizer=='ADAGRAD': 186 | opt = tf.train.AdagradOptimizer(learning_rate) 187 | elif optimizer=='ADADELTA': 188 | opt = tf.train.AdadeltaOptimizer(learning_rate, rho=0.9, epsilon=1e-6) 189 | elif optimizer=='ADAM': 190 | opt = tf.train.AdamOptimizer(learning_rate, beta1=0.9, beta2=0.999, epsilon=0.1) 191 | elif optimizer=='RMSPROP': 192 | opt = tf.train.RMSPropOptimizer(learning_rate, decay=0.9, momentum=0.9, epsilon=1.0) 193 | elif optimizer=='MOM': 194 | opt = tf.train.MomentumOptimizer(learning_rate, 0.9, use_nesterov=True) 195 | else: 196 | raise ValueError('Invalid optimization algorithm') 197 | 198 | grads = opt.compute_gradients(total_loss, update_gradient_vars) 199 | 200 | # Apply gradients. 201 | apply_gradient_op = opt.apply_gradients(grads, global_step=global_step) 202 | 203 | # Add histograms for trainable variables. 204 | if log_histograms: 205 | for var in tf.trainable_variables(): 206 | tf.summary.histogram(var.op.name, var) 207 | 208 | # Add histograms for gradients. 209 | if log_histograms: 210 | for grad, var in grads: 211 | if grad is not None: 212 | tf.summary.histogram(var.op.name + '/gradients', grad) 213 | 214 | # Track the moving averages of all trainable variables. 215 | variable_averages = tf.train.ExponentialMovingAverage( 216 | moving_average_decay, global_step) 217 | variables_averages_op = variable_averages.apply(tf.trainable_variables()) 218 | 219 | with tf.control_dependencies([apply_gradient_op, variables_averages_op]): 220 | train_op = tf.no_op(name='train') 221 | 222 | return train_op 223 | 224 | def prewhiten(x): 225 | mean = np.mean(x) 226 | std = np.std(x) 227 | std_adj = np.maximum(std, 1.0/np.sqrt(x.size)) 228 | y = np.multiply(np.subtract(x, mean), 1/std_adj) 229 | return y 230 | 231 | def crop(image, random_crop, image_size): 232 | if image.shape[1]>image_size: 233 | sz1 = int(image.shape[1]//2) 234 | sz2 = int(image_size//2) 235 | if random_crop: 236 | diff = sz1-sz2 237 | (h, v) = (np.random.randint(-diff, diff+1), np.random.randint(-diff, diff+1)) 238 | else: 239 | (h, v) = (0,0) 240 | image = image[(sz1-sz2+v):(sz1+sz2+v),(sz1-sz2+h):(sz1+sz2+h),:] 241 | return image 242 | 243 | def flip(image, random_flip): 244 | if random_flip and np.random.choice([True, False]): 245 | image = np.fliplr(image) 246 | return image 247 | 248 | def to_rgb(img): 249 | w, h = img.shape 250 | ret = np.empty((w, h, 3), dtype=np.uint8) 251 | ret[:, :, 0] = ret[:, :, 1] = ret[:, :, 2] = img 252 | return ret 253 | 254 | def load_data(image_paths, do_random_crop, do_random_flip, image_size, do_prewhiten=True): 255 | nrof_samples = len(image_paths) 256 | images = np.zeros((nrof_samples, image_size, image_size, 3)) 257 | for i in range(nrof_samples): 258 | img = misc.imread(image_paths[i]) 259 | if img.ndim == 2: 260 | img = to_rgb(img) 261 | if do_prewhiten: 262 | img = prewhiten(img) 263 | img = crop(img, do_random_crop, image_size) 264 | img = flip(img, do_random_flip) 265 | images[i,:,:,:] = img 266 | return images 267 | 268 | def get_label_batch(label_data, batch_size, batch_index): 269 | nrof_examples = np.size(label_data, 0) 270 | j = batch_index*batch_size % nrof_examples 271 | if j+batch_size<=nrof_examples: 272 | batch = label_data[j:j+batch_size] 273 | else: 274 | x1 = label_data[j:nrof_examples] 275 | x2 = label_data[0:nrof_examples-j] 276 | batch = np.vstack([x1,x2]) 277 | batch_int = batch.astype(np.int64) 278 | return batch_int 279 | 280 | def get_batch(image_data, batch_size, batch_index): 281 | nrof_examples = np.size(image_data, 0) 282 | j = batch_index*batch_size % nrof_examples 283 | if j+batch_size<=nrof_examples: 284 | batch = image_data[j:j+batch_size,:,:,:] 285 | else: 286 | x1 = image_data[j:nrof_examples,:,:,:] 287 | x2 = image_data[0:nrof_examples-j,:,:,:] 288 | batch = np.vstack([x1,x2]) 289 | batch_float = batch.astype(np.float32) 290 | return batch_float 291 | 292 | def get_triplet_batch(triplets, batch_index, batch_size): 293 | ax, px, nx = triplets 294 | a = get_batch(ax, int(batch_size/3), batch_index) 295 | p = get_batch(px, int(batch_size/3), batch_index) 296 | n = get_batch(nx, int(batch_size/3), batch_index) 297 | batch = np.vstack([a, p, n]) 298 | return batch 299 | 300 | def get_learning_rate_from_file(filename, epoch): 301 | with open(filename, 'r') as f: 302 | for line in f.readlines(): 303 | line = line.split('#', 1)[0] 304 | if line: 305 | par = line.strip().split(':') 306 | e = int(par[0]) 307 | lr = float(par[1]) 308 | if e <= epoch: 309 | learning_rate = lr 310 | else: 311 | return learning_rate 312 | 313 | class ImageClass(): 314 | "Stores the paths to images for a given class" 315 | def __init__(self, name, image_paths): 316 | self.name = name 317 | self.image_paths = image_paths 318 | 319 | def __str__(self): 320 | return self.name + ', ' + str(len(self.image_paths)) + ' images' 321 | 322 | def __len__(self): 323 | return len(self.image_paths) 324 | 325 | def get_dataset(paths, has_class_directories=True): 326 | dataset = [] 327 | for path in paths.split(':'): 328 | path_exp = os.path.expanduser(path) 329 | classes = os.listdir(path_exp) 330 | classes.sort() 331 | nrof_classes = len(classes) 332 | for i in range(nrof_classes): 333 | class_name = classes[i] 334 | facedir = os.path.join(path_exp, class_name) 335 | image_paths = get_image_paths(facedir) 336 | dataset.append(ImageClass(class_name, image_paths)) 337 | 338 | return dataset 339 | 340 | def get_image_paths(facedir): 341 | image_paths = [] 342 | if os.path.isdir(facedir): 343 | images = os.listdir(facedir) 344 | image_paths = [os.path.join(facedir,img) for img in images] 345 | return image_paths 346 | 347 | def split_dataset(dataset, split_ratio, mode): 348 | if mode=='SPLIT_CLASSES': 349 | nrof_classes = len(dataset) 350 | class_indices = np.arange(nrof_classes) 351 | np.random.shuffle(class_indices) 352 | split = int(round(nrof_classes*split_ratio)) 353 | train_set = [dataset[i] for i in class_indices[0:split]] 354 | test_set = [dataset[i] for i in class_indices[split:-1]] 355 | elif mode=='SPLIT_IMAGES': 356 | train_set = [] 357 | test_set = [] 358 | min_nrof_images = 2 359 | for cls in dataset: 360 | paths = cls.image_paths 361 | np.random.shuffle(paths) 362 | split = int(round(len(paths)*split_ratio)) 363 | if split1: 397 | raise ValueError('There should not be more than one meta file in the model directory (%s)' % model_dir) 398 | meta_file = meta_files[0] 399 | meta_files = [s for s in files if '.ckpt' in s] 400 | max_step = -1 401 | for f in files: 402 | step_str = re.match(r'(^model-[\w\- ]+.ckpt-(\d+))', f) 403 | if step_str is not None and len(step_str.groups())>=2: 404 | step = int(step_str.groups()[1]) 405 | if step > max_step: 406 | max_step = step 407 | ckpt_file = step_str.groups()[0] 408 | return meta_file, ckpt_file 409 | 410 | def calculate_roc(thresholds, embeddings1, embeddings2, actual_issame, nrof_folds=10): 411 | assert(embeddings1.shape[0] == embeddings2.shape[0]) 412 | assert(embeddings1.shape[1] == embeddings2.shape[1]) 413 | nrof_pairs = min(len(actual_issame), embeddings1.shape[0]) 414 | nrof_thresholds = len(thresholds) 415 | k_fold = KFold(n_splits=nrof_folds, shuffle=False) 416 | 417 | tprs = np.zeros((nrof_folds,nrof_thresholds)) 418 | fprs = np.zeros((nrof_folds,nrof_thresholds)) 419 | accuracy = np.zeros((nrof_folds)) 420 | 421 | diff = np.subtract(embeddings1, embeddings2) 422 | dist = np.sum(np.square(diff),1) 423 | indices = np.arange(nrof_pairs) 424 | 425 | for fold_idx, (train_set, test_set) in enumerate(k_fold.split(indices)): 426 | 427 | # Find the best threshold for the fold 428 | acc_train = np.zeros((nrof_thresholds)) 429 | for threshold_idx, threshold in enumerate(thresholds): 430 | _, _, acc_train[threshold_idx] = calculate_accuracy(threshold, dist[train_set], actual_issame[train_set]) 431 | best_threshold_index = np.argmax(acc_train) 432 | for threshold_idx, threshold in enumerate(thresholds): 433 | tprs[fold_idx,threshold_idx], fprs[fold_idx,threshold_idx], _ = calculate_accuracy(threshold, dist[test_set], actual_issame[test_set]) 434 | _, _, accuracy[fold_idx] = calculate_accuracy(thresholds[best_threshold_index], dist[test_set], actual_issame[test_set]) 435 | 436 | tpr = np.mean(tprs,0) 437 | fpr = np.mean(fprs,0) 438 | return tpr, fpr, accuracy 439 | 440 | def calculate_accuracy(threshold, dist, actual_issame): 441 | predict_issame = np.less(dist, threshold) 442 | tp = np.sum(np.logical_and(predict_issame, actual_issame)) 443 | fp = np.sum(np.logical_and(predict_issame, np.logical_not(actual_issame))) 444 | tn = np.sum(np.logical_and(np.logical_not(predict_issame), np.logical_not(actual_issame))) 445 | fn = np.sum(np.logical_and(np.logical_not(predict_issame), actual_issame)) 446 | 447 | tpr = 0 if (tp+fn==0) else float(tp) / float(tp+fn) 448 | fpr = 0 if (fp+tn==0) else float(fp) / float(fp+tn) 449 | acc = float(tp+tn)/dist.size 450 | return tpr, fpr, acc 451 | 452 | 453 | 454 | def calculate_val(thresholds, embeddings1, embeddings2, actual_issame, far_target, nrof_folds=10): 455 | assert(embeddings1.shape[0] == embeddings2.shape[0]) 456 | assert(embeddings1.shape[1] == embeddings2.shape[1]) 457 | nrof_pairs = min(len(actual_issame), embeddings1.shape[0]) 458 | nrof_thresholds = len(thresholds) 459 | k_fold = KFold(n_splits=nrof_folds, shuffle=False) 460 | 461 | val = np.zeros(nrof_folds) 462 | far = np.zeros(nrof_folds) 463 | 464 | diff = np.subtract(embeddings1, embeddings2) 465 | dist = np.sum(np.square(diff),1) 466 | indices = np.arange(nrof_pairs) 467 | 468 | for fold_idx, (train_set, test_set) in enumerate(k_fold.split(indices)): 469 | 470 | # Find the threshold that gives FAR = far_target 471 | far_train = np.zeros(nrof_thresholds) 472 | for threshold_idx, threshold in enumerate(thresholds): 473 | _, far_train[threshold_idx] = calculate_val_far(threshold, dist[train_set], actual_issame[train_set]) 474 | if np.max(far_train)>=far_target: 475 | f = interpolate.interp1d(far_train, thresholds, kind='slinear') 476 | threshold = f(far_target) 477 | else: 478 | threshold = 0.0 479 | 480 | val[fold_idx], far[fold_idx] = calculate_val_far(threshold, dist[test_set], actual_issame[test_set]) 481 | 482 | val_mean = np.mean(val) 483 | far_mean = np.mean(far) 484 | val_std = np.std(val) 485 | return val_mean, val_std, far_mean 486 | 487 | 488 | def calculate_val_far(threshold, dist, actual_issame): 489 | predict_issame = np.less(dist, threshold) 490 | true_accept = np.sum(np.logical_and(predict_issame, actual_issame)) 491 | false_accept = np.sum(np.logical_and(predict_issame, np.logical_not(actual_issame))) 492 | n_same = np.sum(actual_issame) 493 | n_diff = np.sum(np.logical_not(actual_issame)) 494 | val = float(true_accept) / float(n_same) 495 | far = float(false_accept) / float(n_diff) 496 | return val, far 497 | 498 | def store_revision_info(src_path, output_dir, arg_string): 499 | 500 | # Get git hash 501 | gitproc = Popen(['git', 'rev-parse', 'HEAD'], stdout = PIPE, cwd=src_path) 502 | (stdout, _) = gitproc.communicate() 503 | git_hash = stdout.strip() 504 | 505 | # Get local changes 506 | gitproc = Popen(['git', 'diff', 'HEAD'], stdout = PIPE, cwd=src_path) 507 | (stdout, _) = gitproc.communicate() 508 | git_diff = stdout.strip() 509 | 510 | # Store a text file in the log directory 511 | rev_info_filename = os.path.join(output_dir, 'revision_info.txt') 512 | with open(rev_info_filename, "w") as text_file: 513 | text_file.write('arguments: %s\n--------------------\n' % arg_string) 514 | text_file.write('git hash: %s\n--------------------\n' % git_hash) 515 | text_file.write('%s' % git_diff) 516 | 517 | def list_variables(filename): 518 | reader = training.NewCheckpointReader(filename) 519 | variable_map = reader.get_variable_to_shape_map() 520 | names = sorted(variable_map.keys()) 521 | return names 522 | 523 | def put_images_on_grid(images, shape=(16,8)): 524 | nrof_images = images.shape[0] 525 | img_size = images.shape[1] 526 | bw = 3 527 | img = np.zeros((shape[1]*(img_size+bw)+bw, shape[0]*(img_size+bw)+bw, 3), np.float32) 528 | for i in range(shape[1]): 529 | x_start = i*(img_size+bw)+bw 530 | for j in range(shape[0]): 531 | img_index = i*shape[0]+j 532 | if img_index>=nrof_images: 533 | break 534 | y_start = j*(img_size+bw)+bw 535 | img[x_start:x_start+img_size, y_start:y_start+img_size, :] = images[img_index, :, :, :] 536 | if img_index>=nrof_images: 537 | break 538 | return img 539 | 540 | def write_arguments_to_file(args, filename): 541 | with open(filename, 'w') as f: 542 | for key, value in vars(args).iteritems(): 543 | f.write('%s: %s\n' % (key, str(value))) 544 | -------------------------------------------------------------------------------- /detect_face.py: -------------------------------------------------------------------------------- 1 | """ Tensorflow implementation of the face detection / alignment algorithm found at 2 | https://github.com/kpzhang93/MTCNN_face_detection_alignment 3 | """ 4 | # MIT License 5 | # 6 | # Copyright (c) 2016 David Sandberg 7 | # 8 | # Permission is hereby granted, free of charge, to any person obtaining a copy 9 | # of this software and associated documentation files (the "Software"), to deal 10 | # in the Software without restriction, including without limitation the rights 11 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | # copies of the Software, and to permit persons to whom the Software is 13 | # furnished to do so, subject to the following conditions: 14 | # 15 | # The above copyright notice and this permission notice shall be included in all 16 | # copies or substantial portions of the Software. 17 | # 18 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 24 | # SOFTWARE. 25 | 26 | from __future__ import absolute_import 27 | from __future__ import division 28 | from __future__ import print_function 29 | from six import string_types, iteritems 30 | 31 | import numpy as np 32 | import tensorflow as tf 33 | #from math import floor 34 | import cv2 35 | import os 36 | 37 | def layer(op): 38 | '''Decorator for composable network layers.''' 39 | 40 | def layer_decorated(self, *args, **kwargs): 41 | # Automatically set a name if not provided. 42 | name = kwargs.setdefault('name', self.get_unique_name(op.__name__)) 43 | # Figure out the layer inputs. 44 | if len(self.terminals) == 0: 45 | raise RuntimeError('No input variables found for layer %s.' % name) 46 | elif len(self.terminals) == 1: 47 | layer_input = self.terminals[0] 48 | else: 49 | layer_input = list(self.terminals) 50 | # Perform the operation and get the output. 51 | layer_output = op(self, layer_input, *args, **kwargs) 52 | # Add to layer LUT. 53 | self.layers[name] = layer_output 54 | # This output is now the input for the next layer. 55 | self.feed(layer_output) 56 | # Return self for chained calls. 57 | return self 58 | 59 | return layer_decorated 60 | 61 | class Network(object): 62 | 63 | def __init__(self, inputs, trainable=True): 64 | # The input nodes for this network 65 | self.inputs = inputs 66 | # The current list of terminal nodes 67 | self.terminals = [] 68 | # Mapping from layer names to layers 69 | self.layers = dict(inputs) 70 | # If true, the resulting variables are set as trainable 71 | self.trainable = trainable 72 | 73 | self.setup() 74 | 75 | def setup(self): 76 | '''Construct the network. ''' 77 | raise NotImplementedError('Must be implemented by the subclass.') 78 | 79 | def load(self, data_path, session, ignore_missing=False): 80 | '''Load network weights. 81 | data_path: The path to the numpy-serialized network weights 82 | session: The current TensorFlow session 83 | ignore_missing: If true, serialized weights for missing layers are ignored. 84 | ''' 85 | data_dict = np.load(data_path, encoding='latin1').item() #pylint: disable=no-member 86 | 87 | for op_name in data_dict: 88 | with tf.variable_scope(op_name, reuse=True): 89 | for param_name, data in iteritems(data_dict[op_name]): 90 | try: 91 | var = tf.get_variable(param_name) 92 | session.run(var.assign(data)) 93 | except ValueError: 94 | if not ignore_missing: 95 | raise 96 | 97 | def feed(self, *args): 98 | '''Set the input(s) for the next operation by replacing the terminal nodes. 99 | The arguments can be either layer names or the actual layers. 100 | ''' 101 | assert len(args) != 0 102 | self.terminals = [] 103 | for fed_layer in args: 104 | if isinstance(fed_layer, string_types): 105 | try: 106 | fed_layer = self.layers[fed_layer] 107 | except KeyError: 108 | raise KeyError('Unknown layer name fed: %s' % fed_layer) 109 | self.terminals.append(fed_layer) 110 | return self 111 | 112 | def get_output(self): 113 | '''Returns the current network output.''' 114 | return self.terminals[-1] 115 | 116 | def get_unique_name(self, prefix): 117 | '''Returns an index-suffixed unique name for the given prefix. 118 | This is used for auto-generating layer names based on the type-prefix. 119 | ''' 120 | ident = sum(t.startswith(prefix) for t, _ in self.layers.items()) + 1 121 | return '%s_%d' % (prefix, ident) 122 | 123 | def make_var(self, name, shape): 124 | '''Creates a new TensorFlow variable.''' 125 | return tf.get_variable(name, shape, trainable=self.trainable) 126 | 127 | def validate_padding(self, padding): 128 | '''Verifies that the padding is one of the supported ones.''' 129 | assert padding in ('SAME', 'VALID') 130 | 131 | @layer 132 | def conv(self, 133 | inp, 134 | k_h, 135 | k_w, 136 | c_o, 137 | s_h, 138 | s_w, 139 | name, 140 | relu=True, 141 | padding='SAME', 142 | group=1, 143 | biased=True): 144 | # Verify that the padding is acceptable 145 | self.validate_padding(padding) 146 | # Get the number of channels in the input 147 | c_i = int(inp.get_shape()[-1]) 148 | # Verify that the grouping parameter is valid 149 | assert c_i % group == 0 150 | assert c_o % group == 0 151 | # Convolution for a given input and kernel 152 | convolve = lambda i, k: tf.nn.conv2d(i, k, [1, s_h, s_w, 1], padding=padding) 153 | with tf.variable_scope(name) as scope: 154 | kernel = self.make_var('weights', shape=[k_h, k_w, c_i // group, c_o]) 155 | # This is the common-case. Convolve the input without any further complications. 156 | output = convolve(inp, kernel) 157 | # Add the biases 158 | if biased: 159 | biases = self.make_var('biases', [c_o]) 160 | output = tf.nn.bias_add(output, biases) 161 | if relu: 162 | # ReLU non-linearity 163 | output = tf.nn.relu(output, name=scope.name) 164 | return output 165 | 166 | @layer 167 | def prelu(self, inp, name): 168 | with tf.variable_scope(name): 169 | i = int(inp.get_shape()[-1]) 170 | alpha = self.make_var('alpha', shape=(i,)) 171 | output = tf.nn.relu(inp) + tf.multiply(alpha, -tf.nn.relu(-inp)) 172 | return output 173 | 174 | @layer 175 | def max_pool(self, inp, k_h, k_w, s_h, s_w, name, padding='SAME'): 176 | self.validate_padding(padding) 177 | return tf.nn.max_pool(inp, 178 | ksize=[1, k_h, k_w, 1], 179 | strides=[1, s_h, s_w, 1], 180 | padding=padding, 181 | name=name) 182 | 183 | @layer 184 | def fc(self, inp, num_out, name, relu=True): 185 | with tf.variable_scope(name): 186 | input_shape = inp.get_shape() 187 | if input_shape.ndims == 4: 188 | # The input is spatial. Vectorize it first. 189 | dim = 1 190 | for d in input_shape[1:].as_list(): 191 | dim *= int(d) 192 | feed_in = tf.reshape(inp, [-1, dim]) 193 | else: 194 | feed_in, dim = (inp, input_shape[-1].value) 195 | weights = self.make_var('weights', shape=[dim, num_out]) 196 | biases = self.make_var('biases', [num_out]) 197 | op = tf.nn.relu_layer if relu else tf.nn.xw_plus_b 198 | fc = op(feed_in, weights, biases, name=name) 199 | return fc 200 | 201 | 202 | """ 203 | Multi dimensional softmax, 204 | refer to https://github.com/tensorflow/tensorflow/issues/210 205 | compute softmax along the dimension of target 206 | the native softmax only supports batch_size x dimension 207 | """ 208 | @layer 209 | def softmax(self, target, axis, name=None): 210 | max_axis = tf.reduce_max(target, axis, keep_dims=True) 211 | target_exp = tf.exp(target-max_axis) 212 | normalize = tf.reduce_sum(target_exp, axis, keep_dims=True) 213 | softmax = tf.div(target_exp, normalize, name) 214 | return softmax 215 | 216 | class PNet(Network): 217 | def setup(self): 218 | (self.feed('data') #pylint: disable=no-value-for-parameter, no-member 219 | .conv(3, 3, 10, 1, 1, padding='VALID', relu=False, name='conv1') 220 | .prelu(name='PReLU1') 221 | .max_pool(2, 2, 2, 2, name='pool1') 222 | .conv(3, 3, 16, 1, 1, padding='VALID', relu=False, name='conv2') 223 | .prelu(name='PReLU2') 224 | .conv(3, 3, 32, 1, 1, padding='VALID', relu=False, name='conv3') 225 | .prelu(name='PReLU3') 226 | .conv(1, 1, 2, 1, 1, relu=False, name='conv4-1') 227 | .softmax(3,name='prob1')) 228 | 229 | (self.feed('PReLU3') #pylint: disable=no-value-for-parameter 230 | .conv(1, 1, 4, 1, 1, relu=False, name='conv4-2')) 231 | 232 | class RNet(Network): 233 | def setup(self): 234 | (self.feed('data') #pylint: disable=no-value-for-parameter, no-member 235 | .conv(3, 3, 28, 1, 1, padding='VALID', relu=False, name='conv1') 236 | .prelu(name='prelu1') 237 | .max_pool(3, 3, 2, 2, name='pool1') 238 | .conv(3, 3, 48, 1, 1, padding='VALID', relu=False, name='conv2') 239 | .prelu(name='prelu2') 240 | .max_pool(3, 3, 2, 2, padding='VALID', name='pool2') 241 | .conv(2, 2, 64, 1, 1, padding='VALID', relu=False, name='conv3') 242 | .prelu(name='prelu3') 243 | .fc(128, relu=False, name='conv4') 244 | .prelu(name='prelu4') 245 | .fc(2, relu=False, name='conv5-1') 246 | .softmax(1,name='prob1')) 247 | 248 | (self.feed('prelu4') #pylint: disable=no-value-for-parameter 249 | .fc(4, relu=False, name='conv5-2')) 250 | 251 | class ONet(Network): 252 | def setup(self): 253 | (self.feed('data') #pylint: disable=no-value-for-parameter, no-member 254 | .conv(3, 3, 32, 1, 1, padding='VALID', relu=False, name='conv1') 255 | .prelu(name='prelu1') 256 | .max_pool(3, 3, 2, 2, name='pool1') 257 | .conv(3, 3, 64, 1, 1, padding='VALID', relu=False, name='conv2') 258 | .prelu(name='prelu2') 259 | .max_pool(3, 3, 2, 2, padding='VALID', name='pool2') 260 | .conv(3, 3, 64, 1, 1, padding='VALID', relu=False, name='conv3') 261 | .prelu(name='prelu3') 262 | .max_pool(2, 2, 2, 2, name='pool3') 263 | .conv(2, 2, 128, 1, 1, padding='VALID', relu=False, name='conv4') 264 | .prelu(name='prelu4') 265 | .fc(256, relu=False, name='conv5') 266 | .prelu(name='prelu5') 267 | .fc(2, relu=False, name='conv6-1') 268 | .softmax(1, name='prob1')) 269 | 270 | (self.feed('prelu5') #pylint: disable=no-value-for-parameter 271 | .fc(4, relu=False, name='conv6-2')) 272 | 273 | (self.feed('prelu5') #pylint: disable=no-value-for-parameter 274 | .fc(10, relu=False, name='conv6-3')) 275 | 276 | def create_mtcnn(sess, model_path): 277 | if not model_path: 278 | model_path,_ = os.path.split(os.path.realpath(__file__)) 279 | 280 | with tf.variable_scope('pnet'): 281 | data = tf.placeholder(tf.float32, (None,None,None,3), 'input') 282 | pnet = PNet({'data':data}) 283 | pnet.load(os.path.join(model_path, 'det1.npy'), sess) 284 | with tf.variable_scope('rnet'): 285 | data = tf.placeholder(tf.float32, (None,24,24,3), 'input') 286 | rnet = RNet({'data':data}) 287 | rnet.load(os.path.join(model_path, 'det2.npy'), sess) 288 | with tf.variable_scope('onet'): 289 | data = tf.placeholder(tf.float32, (None,48,48,3), 'input') 290 | onet = ONet({'data':data}) 291 | onet.load(os.path.join(model_path, 'det3.npy'), sess) 292 | 293 | pnet_fun = lambda img : sess.run(('pnet/conv4-2/BiasAdd:0', 'pnet/prob1:0'), feed_dict={'pnet/input:0':img}) 294 | rnet_fun = lambda img : sess.run(('rnet/conv5-2/conv5-2:0', 'rnet/prob1:0'), feed_dict={'rnet/input:0':img}) 295 | onet_fun = lambda img : sess.run(('onet/conv6-2/conv6-2:0', 'onet/conv6-3/conv6-3:0', 'onet/prob1:0'), feed_dict={'onet/input:0':img}) 296 | return pnet_fun, rnet_fun, onet_fun 297 | 298 | def detect_face(img, minsize, pnet, rnet, onet, threshold, factor): 299 | # im: input image 300 | # minsize: minimum of faces' size 301 | # pnet, rnet, onet: caffemodel 302 | # threshold: threshold=[th1 th2 th3], th1-3 are three steps's threshold 303 | # fastresize: resize img from last scale (using in high-resolution images) if fastresize==true 304 | factor_count=0 305 | total_boxes=np.empty((0,9)) 306 | points=np.empty(0) 307 | h=img.shape[0] 308 | w=img.shape[1] 309 | minl=np.amin([h, w]) 310 | m=12.0/minsize 311 | minl=minl*m 312 | # creat scale pyramid 313 | scales=[] 314 | while minl>=12: 315 | scales += [m*np.power(factor, factor_count)] 316 | minl = minl*factor 317 | factor_count += 1 318 | 319 | # first stage 320 | for j in range(len(scales)): 321 | scale=scales[j] 322 | hs=int(np.ceil(h*scale)) 323 | ws=int(np.ceil(w*scale)) 324 | im_data = imresample(img, (hs, ws)) 325 | im_data = (im_data-127.5)*0.0078125 326 | img_x = np.expand_dims(im_data, 0) 327 | img_y = np.transpose(img_x, (0,2,1,3)) 328 | out = pnet(img_y) 329 | out0 = np.transpose(out[0], (0,2,1,3)) 330 | out1 = np.transpose(out[1], (0,2,1,3)) 331 | 332 | boxes, _ = generateBoundingBox(out1[0,:,:,1].copy(), out0[0,:,:,:].copy(), scale, threshold[0]) 333 | 334 | # inter-scale nms 335 | pick = nms(boxes.copy(), 0.5, 'Union') 336 | if boxes.size>0 and pick.size>0: 337 | boxes = boxes[pick,:] 338 | total_boxes = np.append(total_boxes, boxes, axis=0) 339 | 340 | numbox = total_boxes.shape[0] 341 | if numbox>0: 342 | pick = nms(total_boxes.copy(), 0.7, 'Union') 343 | total_boxes = total_boxes[pick,:] 344 | regw = total_boxes[:,2]-total_boxes[:,0] 345 | regh = total_boxes[:,3]-total_boxes[:,1] 346 | qq1 = total_boxes[:,0]+total_boxes[:,5]*regw 347 | qq2 = total_boxes[:,1]+total_boxes[:,6]*regh 348 | qq3 = total_boxes[:,2]+total_boxes[:,7]*regw 349 | qq4 = total_boxes[:,3]+total_boxes[:,8]*regh 350 | total_boxes = np.transpose(np.vstack([qq1, qq2, qq3, qq4, total_boxes[:,4]])) 351 | total_boxes = rerec(total_boxes.copy()) 352 | total_boxes[:,0:4] = np.fix(total_boxes[:,0:4]).astype(np.int32) 353 | dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph = pad(total_boxes.copy(), w, h) 354 | 355 | numbox = total_boxes.shape[0] 356 | if numbox>0: 357 | # second stage 358 | tempimg = np.zeros((24,24,3,numbox)) 359 | for k in range(0,numbox): 360 | tmp = np.zeros((int(tmph[k]),int(tmpw[k]),3)) 361 | tmp[dy[k]-1:edy[k],dx[k]-1:edx[k],:] = img[y[k]-1:ey[k],x[k]-1:ex[k],:] 362 | if tmp.shape[0]>0 and tmp.shape[1]>0 or tmp.shape[0]==0 and tmp.shape[1]==0: 363 | tempimg[:,:,:,k] = imresample(tmp, (24, 24)) 364 | else: 365 | return np.empty() 366 | tempimg = (tempimg-127.5)*0.0078125 367 | tempimg1 = np.transpose(tempimg, (3,1,0,2)) 368 | out = rnet(tempimg1) 369 | out0 = np.transpose(out[0]) 370 | out1 = np.transpose(out[1]) 371 | score = out1[1,:] 372 | ipass = np.where(score>threshold[1]) 373 | total_boxes = np.hstack([total_boxes[ipass[0],0:4].copy(), np.expand_dims(score[ipass].copy(),1)]) 374 | mv = out0[:,ipass[0]] 375 | if total_boxes.shape[0]>0: 376 | pick = nms(total_boxes, 0.7, 'Union') 377 | total_boxes = total_boxes[pick,:] 378 | total_boxes = bbreg(total_boxes.copy(), np.transpose(mv[:,pick])) 379 | total_boxes = rerec(total_boxes.copy()) 380 | 381 | numbox = total_boxes.shape[0] 382 | if numbox>0: 383 | # third stage 384 | total_boxes = np.fix(total_boxes).astype(np.int32) 385 | dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph = pad(total_boxes.copy(), w, h) 386 | tempimg = np.zeros((48,48,3,numbox)) 387 | for k in range(0,numbox): 388 | tmp = np.zeros((int(tmph[k]),int(tmpw[k]),3)) 389 | tmp[dy[k]-1:edy[k],dx[k]-1:edx[k],:] = img[y[k]-1:ey[k],x[k]-1:ex[k],:] 390 | if tmp.shape[0]>0 and tmp.shape[1]>0 or tmp.shape[0]==0 and tmp.shape[1]==0: 391 | tempimg[:,:,:,k] = imresample(tmp, (48, 48)) 392 | else: 393 | return np.empty() 394 | tempimg = (tempimg-127.5)*0.0078125 395 | tempimg1 = np.transpose(tempimg, (3,1,0,2)) 396 | out = onet(tempimg1) 397 | out0 = np.transpose(out[0]) 398 | out1 = np.transpose(out[1]) 399 | out2 = np.transpose(out[2]) 400 | score = out2[1,:] 401 | points = out1 402 | ipass = np.where(score>threshold[2]) 403 | points = points[:,ipass[0]] 404 | total_boxes = np.hstack([total_boxes[ipass[0],0:4].copy(), np.expand_dims(score[ipass].copy(),1)]) 405 | mv = out0[:,ipass[0]] 406 | 407 | w = total_boxes[:,2]-total_boxes[:,0]+1 408 | h = total_boxes[:,3]-total_boxes[:,1]+1 409 | points[0:5,:] = np.tile(w,(5, 1))*points[0:5,:] + np.tile(total_boxes[:,0],(5, 1))-1 410 | points[5:10,:] = np.tile(h,(5, 1))*points[5:10,:] + np.tile(total_boxes[:,1],(5, 1))-1 411 | if total_boxes.shape[0]>0: 412 | total_boxes = bbreg(total_boxes.copy(), np.transpose(mv)) 413 | pick = nms(total_boxes.copy(), 0.7, 'Min') 414 | total_boxes = total_boxes[pick,:] 415 | points = points[:,pick] 416 | 417 | return total_boxes, points 418 | 419 | 420 | def bulk_detect_face(images, detection_window_size_ratio, pnet, rnet, onet, threshold, factor): 421 | # im: input image 422 | # minsize: minimum of faces' size 423 | # pnet, rnet, onet: caffemodel 424 | # threshold: threshold=[th1 th2 th3], th1-3 are three steps's threshold [0-1] 425 | 426 | all_scales = [None] * len(images) 427 | images_with_boxes = [None] * len(images) 428 | 429 | for i in range(len(images)): 430 | images_with_boxes[i] = {'total_boxes': np.empty((0, 9))} 431 | 432 | # create scale pyramid 433 | for index, img in enumerate(images): 434 | all_scales[index] = [] 435 | h = img.shape[0] 436 | w = img.shape[1] 437 | minsize = int(detection_window_size_ratio * np.minimum(w, h)) 438 | factor_count = 0 439 | minl = np.amin([h, w]) 440 | if minsize <= 12: 441 | minsize = 12 442 | 443 | m = 12.0 / minsize 444 | minl = minl * m 445 | while minl >= 12: 446 | all_scales[index].append(m * np.power(factor, factor_count)) 447 | minl = minl * factor 448 | factor_count += 1 449 | 450 | # # # # # # # # # # # # # 451 | # first stage - fast proposal network (pnet) to obtain face candidates 452 | # # # # # # # # # # # # # 453 | 454 | images_obj_per_resolution = {} 455 | 456 | # TODO: use some type of rounding to number module 8 to increase probability that pyramid images will have the same resolution across input images 457 | 458 | for index, scales in enumerate(all_scales): 459 | h = images[index].shape[0] 460 | w = images[index].shape[1] 461 | 462 | for scale in scales: 463 | hs = int(np.ceil(h * scale)) 464 | ws = int(np.ceil(w * scale)) 465 | 466 | if (ws, hs) not in images_obj_per_resolution: 467 | images_obj_per_resolution[(ws, hs)] = [] 468 | 469 | im_data = imresample(images[index], (hs, ws)) 470 | im_data = (im_data - 127.5) * 0.0078125 471 | img_y = np.transpose(im_data, (1, 0, 2)) # caffe uses different dimensions ordering 472 | images_obj_per_resolution[(ws, hs)].append({'scale': scale, 'image': img_y, 'index': index}) 473 | 474 | for resolution in images_obj_per_resolution: 475 | images_per_resolution = [i['image'] for i in images_obj_per_resolution[resolution]] 476 | outs = pnet(images_per_resolution) 477 | 478 | for index in range(len(outs[0])): 479 | scale = images_obj_per_resolution[resolution][index]['scale'] 480 | image_index = images_obj_per_resolution[resolution][index]['index'] 481 | out0 = np.transpose(outs[0][index], (1, 0, 2)) 482 | out1 = np.transpose(outs[1][index], (1, 0, 2)) 483 | 484 | boxes, _ = generateBoundingBox(out1[:, :, 1].copy(), out0[:, :, :].copy(), scale, threshold[0]) 485 | 486 | # inter-scale nms 487 | pick = nms(boxes.copy(), 0.5, 'Union') 488 | if boxes.size > 0 and pick.size > 0: 489 | boxes = boxes[pick, :] 490 | images_with_boxes[image_index]['total_boxes'] = np.append(images_with_boxes[image_index]['total_boxes'], 491 | boxes, 492 | axis=0) 493 | 494 | for index, image_obj in enumerate(images_with_boxes): 495 | numbox = image_obj['total_boxes'].shape[0] 496 | if numbox > 0: 497 | h = images[index].shape[0] 498 | w = images[index].shape[1] 499 | pick = nms(image_obj['total_boxes'].copy(), 0.7, 'Union') 500 | image_obj['total_boxes'] = image_obj['total_boxes'][pick, :] 501 | regw = image_obj['total_boxes'][:, 2] - image_obj['total_boxes'][:, 0] 502 | regh = image_obj['total_boxes'][:, 3] - image_obj['total_boxes'][:, 1] 503 | qq1 = image_obj['total_boxes'][:, 0] + image_obj['total_boxes'][:, 5] * regw 504 | qq2 = image_obj['total_boxes'][:, 1] + image_obj['total_boxes'][:, 6] * regh 505 | qq3 = image_obj['total_boxes'][:, 2] + image_obj['total_boxes'][:, 7] * regw 506 | qq4 = image_obj['total_boxes'][:, 3] + image_obj['total_boxes'][:, 8] * regh 507 | image_obj['total_boxes'] = np.transpose(np.vstack([qq1, qq2, qq3, qq4, image_obj['total_boxes'][:, 4]])) 508 | image_obj['total_boxes'] = rerec(image_obj['total_boxes'].copy()) 509 | image_obj['total_boxes'][:, 0:4] = np.fix(image_obj['total_boxes'][:, 0:4]).astype(np.int32) 510 | dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph = pad(image_obj['total_boxes'].copy(), w, h) 511 | 512 | numbox = image_obj['total_boxes'].shape[0] 513 | tempimg = np.zeros((24, 24, 3, numbox)) 514 | 515 | if numbox > 0: 516 | for k in range(0, numbox): 517 | tmp = np.zeros((int(tmph[k]), int(tmpw[k]), 3)) 518 | tmp[dy[k] - 1:edy[k], dx[k] - 1:edx[k], :] = images[index][y[k] - 1:ey[k], x[k] - 1:ex[k], :] 519 | if tmp.shape[0] > 0 and tmp.shape[1] > 0 or tmp.shape[0] == 0 and tmp.shape[1] == 0: 520 | tempimg[:, :, :, k] = imresample(tmp, (24, 24)) 521 | else: 522 | return np.empty() 523 | 524 | tempimg = (tempimg - 127.5) * 0.0078125 525 | image_obj['rnet_input'] = np.transpose(tempimg, (3, 1, 0, 2)) 526 | 527 | # # # # # # # # # # # # # 528 | # second stage - refinement of face candidates with rnet 529 | # # # # # # # # # # # # # 530 | 531 | bulk_rnet_input = np.empty((0, 24, 24, 3)) 532 | for index, image_obj in enumerate(images_with_boxes): 533 | if 'rnet_input' in image_obj: 534 | bulk_rnet_input = np.append(bulk_rnet_input, image_obj['rnet_input'], axis=0) 535 | 536 | out = rnet(bulk_rnet_input) 537 | out0 = np.transpose(out[0]) 538 | out1 = np.transpose(out[1]) 539 | score = out1[1, :] 540 | 541 | i = 0 542 | for index, image_obj in enumerate(images_with_boxes): 543 | if 'rnet_input' not in image_obj: 544 | continue 545 | 546 | rnet_input_count = image_obj['rnet_input'].shape[0] 547 | score_per_image = score[i:i + rnet_input_count] 548 | out0_per_image = out0[:, i:i + rnet_input_count] 549 | 550 | ipass = np.where(score_per_image > threshold[1]) 551 | image_obj['total_boxes'] = np.hstack([image_obj['total_boxes'][ipass[0], 0:4].copy(), 552 | np.expand_dims(score_per_image[ipass].copy(), 1)]) 553 | 554 | mv = out0_per_image[:, ipass[0]] 555 | 556 | if image_obj['total_boxes'].shape[0] > 0: 557 | h = images[index].shape[0] 558 | w = images[index].shape[1] 559 | pick = nms(image_obj['total_boxes'], 0.7, 'Union') 560 | image_obj['total_boxes'] = image_obj['total_boxes'][pick, :] 561 | image_obj['total_boxes'] = bbreg(image_obj['total_boxes'].copy(), np.transpose(mv[:, pick])) 562 | image_obj['total_boxes'] = rerec(image_obj['total_boxes'].copy()) 563 | 564 | numbox = image_obj['total_boxes'].shape[0] 565 | 566 | if numbox > 0: 567 | tempimg = np.zeros((48, 48, 3, numbox)) 568 | image_obj['total_boxes'] = np.fix(image_obj['total_boxes']).astype(np.int32) 569 | dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph = pad(image_obj['total_boxes'].copy(), w, h) 570 | 571 | for k in range(0, numbox): 572 | tmp = np.zeros((int(tmph[k]), int(tmpw[k]), 3)) 573 | tmp[dy[k] - 1:edy[k], dx[k] - 1:edx[k], :] = images[index][y[k] - 1:ey[k], x[k] - 1:ex[k], :] 574 | if tmp.shape[0] > 0 and tmp.shape[1] > 0 or tmp.shape[0] == 0 and tmp.shape[1] == 0: 575 | tempimg[:, :, :, k] = imresample(tmp, (48, 48)) 576 | else: 577 | return np.empty() 578 | tempimg = (tempimg - 127.5) * 0.0078125 579 | image_obj['onet_input'] = np.transpose(tempimg, (3, 1, 0, 2)) 580 | 581 | i += rnet_input_count 582 | 583 | # # # # # # # # # # # # # 584 | # third stage - further refinement and facial landmarks positions with onet 585 | # # # # # # # # # # # # # 586 | 587 | bulk_onet_input = np.empty((0, 48, 48, 3)) 588 | for index, image_obj in enumerate(images_with_boxes): 589 | if 'onet_input' in image_obj: 590 | bulk_onet_input = np.append(bulk_onet_input, image_obj['onet_input'], axis=0) 591 | 592 | out = onet(bulk_onet_input) 593 | 594 | out0 = np.transpose(out[0]) 595 | out1 = np.transpose(out[1]) 596 | out2 = np.transpose(out[2]) 597 | score = out2[1, :] 598 | points = out1 599 | 600 | i = 0 601 | ret = [] 602 | for index, image_obj in enumerate(images_with_boxes): 603 | if 'onet_input' not in image_obj: 604 | ret.append(None) 605 | continue 606 | 607 | onet_input_count = image_obj['onet_input'].shape[0] 608 | 609 | out0_per_image = out0[:, i:i + onet_input_count] 610 | score_per_image = score[i:i + onet_input_count] 611 | points_per_image = points[:, i:i + onet_input_count] 612 | 613 | ipass = np.where(score_per_image > threshold[2]) 614 | points_per_image = points_per_image[:, ipass[0]] 615 | 616 | image_obj['total_boxes'] = np.hstack([image_obj['total_boxes'][ipass[0], 0:4].copy(), 617 | np.expand_dims(score_per_image[ipass].copy(), 1)]) 618 | mv = out0_per_image[:, ipass[0]] 619 | 620 | w = image_obj['total_boxes'][:, 2] - image_obj['total_boxes'][:, 0] + 1 621 | h = image_obj['total_boxes'][:, 3] - image_obj['total_boxes'][:, 1] + 1 622 | points_per_image[0:5, :] = np.tile(w, (5, 1)) * points_per_image[0:5, :] + np.tile( 623 | image_obj['total_boxes'][:, 0], (5, 1)) - 1 624 | points_per_image[5:10, :] = np.tile(h, (5, 1)) * points_per_image[5:10, :] + np.tile( 625 | image_obj['total_boxes'][:, 1], (5, 1)) - 1 626 | 627 | if image_obj['total_boxes'].shape[0] > 0: 628 | image_obj['total_boxes'] = bbreg(image_obj['total_boxes'].copy(), np.transpose(mv)) 629 | pick = nms(image_obj['total_boxes'].copy(), 0.7, 'Min') 630 | image_obj['total_boxes'] = image_obj['total_boxes'][pick, :] 631 | points_per_image = points_per_image[:, pick] 632 | 633 | ret.append((image_obj['total_boxes'], points_per_image)) 634 | else: 635 | ret.append(None) 636 | 637 | i += onet_input_count 638 | 639 | return ret 640 | 641 | 642 | # function [boundingbox] = bbreg(boundingbox,reg) 643 | def bbreg(boundingbox,reg): 644 | # calibrate bounding boxes 645 | if reg.shape[1]==1: 646 | reg = np.reshape(reg, (reg.shape[2], reg.shape[3])) 647 | 648 | w = boundingbox[:,2]-boundingbox[:,0]+1 649 | h = boundingbox[:,3]-boundingbox[:,1]+1 650 | b1 = boundingbox[:,0]+reg[:,0]*w 651 | b2 = boundingbox[:,1]+reg[:,1]*h 652 | b3 = boundingbox[:,2]+reg[:,2]*w 653 | b4 = boundingbox[:,3]+reg[:,3]*h 654 | boundingbox[:,0:4] = np.transpose(np.vstack([b1, b2, b3, b4 ])) 655 | return boundingbox 656 | 657 | def generateBoundingBox(imap, reg, scale, t): 658 | # use heatmap to generate bounding boxes 659 | stride=2 660 | cellsize=12 661 | 662 | imap = np.transpose(imap) 663 | dx1 = np.transpose(reg[:,:,0]) 664 | dy1 = np.transpose(reg[:,:,1]) 665 | dx2 = np.transpose(reg[:,:,2]) 666 | dy2 = np.transpose(reg[:,:,3]) 667 | y, x = np.where(imap >= t) 668 | if y.shape[0]==1: 669 | dx1 = np.flipud(dx1) 670 | dy1 = np.flipud(dy1) 671 | dx2 = np.flipud(dx2) 672 | dy2 = np.flipud(dy2) 673 | score = imap[(y,x)] 674 | reg = np.transpose(np.vstack([ dx1[(y,x)], dy1[(y,x)], dx2[(y,x)], dy2[(y,x)] ])) 675 | if reg.size==0: 676 | reg = np.empty((0,3)) 677 | bb = np.transpose(np.vstack([y,x])) 678 | q1 = np.fix((stride*bb+1)/scale) 679 | q2 = np.fix((stride*bb+cellsize-1+1)/scale) 680 | boundingbox = np.hstack([q1, q2, np.expand_dims(score,1), reg]) 681 | return boundingbox, reg 682 | 683 | # function pick = nms(boxes,threshold,type) 684 | def nms(boxes, threshold, method): 685 | if boxes.size==0: 686 | return np.empty((0,3)) 687 | x1 = boxes[:,0] 688 | y1 = boxes[:,1] 689 | x2 = boxes[:,2] 690 | y2 = boxes[:,3] 691 | s = boxes[:,4] 692 | area = (x2-x1+1) * (y2-y1+1) 693 | I = np.argsort(s) 694 | pick = np.zeros_like(s, dtype=np.int16) 695 | counter = 0 696 | while I.size>0: 697 | i = I[-1] 698 | pick[counter] = i 699 | counter += 1 700 | idx = I[0:-1] 701 | xx1 = np.maximum(x1[i], x1[idx]) 702 | yy1 = np.maximum(y1[i], y1[idx]) 703 | xx2 = np.minimum(x2[i], x2[idx]) 704 | yy2 = np.minimum(y2[i], y2[idx]) 705 | w = np.maximum(0.0, xx2-xx1+1) 706 | h = np.maximum(0.0, yy2-yy1+1) 707 | inter = w * h 708 | if method is 'Min': 709 | o = inter / np.minimum(area[i], area[idx]) 710 | else: 711 | o = inter / (area[i] + area[idx] - inter) 712 | I = I[np.where(o<=threshold)] 713 | pick = pick[0:counter] 714 | return pick 715 | 716 | # function [dy edy dx edx y ey x ex tmpw tmph] = pad(total_boxes,w,h) 717 | def pad(total_boxes, w, h): 718 | # compute the padding coordinates (pad the bounding boxes to square) 719 | tmpw = (total_boxes[:,2]-total_boxes[:,0]+1).astype(np.int32) 720 | tmph = (total_boxes[:,3]-total_boxes[:,1]+1).astype(np.int32) 721 | numbox = total_boxes.shape[0] 722 | 723 | dx = np.ones((numbox), dtype=np.int32) 724 | dy = np.ones((numbox), dtype=np.int32) 725 | edx = tmpw.copy().astype(np.int32) 726 | edy = tmph.copy().astype(np.int32) 727 | 728 | x = total_boxes[:,0].copy().astype(np.int32) 729 | y = total_boxes[:,1].copy().astype(np.int32) 730 | ex = total_boxes[:,2].copy().astype(np.int32) 731 | ey = total_boxes[:,3].copy().astype(np.int32) 732 | 733 | tmp = np.where(ex>w) 734 | edx.flat[tmp] = np.expand_dims(-ex[tmp]+w+tmpw[tmp],1) 735 | ex[tmp] = w 736 | 737 | tmp = np.where(ey>h) 738 | edy.flat[tmp] = np.expand_dims(-ey[tmp]+h+tmph[tmp],1) 739 | ey[tmp] = h 740 | 741 | tmp = np.where(x<1) 742 | dx.flat[tmp] = np.expand_dims(2-x[tmp],1) 743 | x[tmp] = 1 744 | 745 | tmp = np.where(y<1) 746 | dy.flat[tmp] = np.expand_dims(2-y[tmp],1) 747 | y[tmp] = 1 748 | 749 | return dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph 750 | 751 | # function [bboxA] = rerec(bboxA) 752 | def rerec(bboxA): 753 | # convert bboxA to square 754 | h = bboxA[:,3]-bboxA[:,1] 755 | w = bboxA[:,2]-bboxA[:,0] 756 | l = np.maximum(w, h) 757 | bboxA[:,0] = bboxA[:,0]+w*0.5-l*0.5 758 | bboxA[:,1] = bboxA[:,1]+h*0.5-l*0.5 759 | bboxA[:,2:4] = bboxA[:,0:2] + np.transpose(np.tile(l,(2,1))) 760 | return bboxA 761 | 762 | def imresample(img, sz): 763 | im_data = cv2.resize(img, (sz[1], sz[0]), interpolation=cv2.INTER_AREA) #@UndefinedVariable 764 | return im_data 765 | 766 | # This method is kept for debugging purpose 767 | # h=img.shape[0] 768 | # w=img.shape[1] 769 | # hs, ws = sz 770 | # dx = float(w) / ws 771 | # dy = float(h) / hs 772 | # im_data = np.zeros((hs,ws,3)) 773 | # for a1 in range(0,hs): 774 | # for a2 in range(0,ws): 775 | # for a3 in range(0,3): 776 | # im_data[a1,a2,a3] = img[int(floor(a1*dy)),int(floor(a2*dx)),a3] 777 | # return im_data 778 | 779 | --------------------------------------------------------------------------------