├── src ├── align │ ├── ___init__.py │ ├── det1.npy │ ├── det2.npy │ ├── det3.npy │ ├── __pycache__ │ │ └── detect_face.cpython-36.pyc │ ├── align_dataset_mtcnn.py │ └── detect_face.py ├── detect_face │ ├── __init__.py │ └── face_detector.py ├── __pycache__ │ ├── lfw.cpython-36.pyc │ └── facenet.cpython-36.pyc ├── data_generator.py ├── dataset.py ├── applications │ ├── __init__.py │ ├── imagenet_utils.py │ ├── mobilenet.py │ └── mobilenet_v2.py ├── import_pb_to_tensorboard.py ├── lfw.py ├── freeze_graph.py ├── build_dataset.py ├── utils.py ├── facenet_live.py ├── validate.py ├── validate_on_lfw.py └── facenet.py ├── requirements.txt ├── .gitignore ├── README.md └── Forward Propagation.ipynb /src/align/___init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/detect_face/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/align/det1.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pedroprates/mobile-face-net/HEAD/src/align/det1.npy -------------------------------------------------------------------------------- /src/align/det2.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pedroprates/mobile-face-net/HEAD/src/align/det2.npy -------------------------------------------------------------------------------- /src/align/det3.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pedroprates/mobile-face-net/HEAD/src/align/det3.npy -------------------------------------------------------------------------------- /src/__pycache__/lfw.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pedroprates/mobile-face-net/HEAD/src/__pycache__/lfw.cpython-36.pyc -------------------------------------------------------------------------------- /src/__pycache__/facenet.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pedroprates/mobile-face-net/HEAD/src/__pycache__/facenet.cpython-36.pyc -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | tensorflow 2 | scipy 3 | scikit-learn 4 | opencv-python 5 | h5py 6 | matplotlib 7 | Pillow 8 | requests 9 | psutil 10 | imageio -------------------------------------------------------------------------------- /src/align/__pycache__/detect_face.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pedroprates/mobile-face-net/HEAD/src/align/__pycache__/detect_face.cpython-36.pyc -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | models/ 2 | .DS_Store 3 | .ipynb_checkpoints/ 4 | face-recognition/ 5 | datasets/ 6 | .vscode/ 7 | src/detect_face/__pycache__/ 8 | src/__pycache__/ 9 | art/ 10 | .idea 11 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # MobileFaceNet 2 | 3 | This is based on my graduation thesis, where I propose the MobileFaceNet, a smaller Convolution Neural Network to perform Facial Recognition. The model was trained based on the technique [Distilling the Knowledge in a Neural Network](https://arxiv.org/abs/1503.02531) proposed by Geoffrey Hinton, and as a coarse model it was used the pretrained [FaceNet from David Sandberg](https://github.com/davidsandberg/facenet), which achieves over 98% of accuracy on the [LFW dataset](http://vis-www.cs.umass.edu/lfw/). 4 | 5 | *This repository is currently under development* -------------------------------------------------------------------------------- /src/data_generator.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from scipy import misc 3 | from keras.utils import Sequence 4 | 5 | 6 | class TCCGenerator(Sequence): 7 | 8 | def __init__(self, image_filenames, labels, batch_size): 9 | self.image_filenames, self.labels = image_filenames, labels 10 | self.batch_size = batch_size 11 | 12 | def __len__(self): 13 | return np.ceil(len(self.image_filenames) / float(self.batch_size)).astype(int) 14 | 15 | def __getitem__(self, idx): 16 | batch_x = self.image_filenames[idx * self.batch_size : (idx+1) * self.batch_size] 17 | batch_y = self.labels[idx * self.batch_size:(idx+1) * self.batch_size] 18 | 19 | embeddings = np.array([np.load(filename) for filename in batch_y]) 20 | images = np.array([misc.imread(filename) for filename in batch_x]) 21 | images = images / 255 22 | 23 | return images, embeddings.reshape(embeddings.shape[0], -1) 24 | -------------------------------------------------------------------------------- /src/detect_face/face_detector.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | 4 | class FaceDetector: 5 | def __init__(self, extractor_path): 6 | self.path = extractor_path 7 | self.face_detector = cv2.CascadeClassifier(extractor_path) 8 | 9 | def detect_faces(self, frame, scaleFactor=1.3, minNeighbors=1, minSize=(30, 30)): 10 | rects = self.face_detector.detectMultiScale(frame, 11 | scaleFactor=scaleFactor, 12 | minNeighbors=minNeighbors, 13 | minSize=minSize, 14 | flags=cv2.CASCADE_SCALE_IMAGE) 15 | 16 | return rects 17 | 18 | @staticmethod 19 | def extract_faces(frame, rects, size=(160, 160)): 20 | nrof_images = len(rects) 21 | images = np.zeros((nrof_images, *size, 3), dtype=np.uint8) 22 | 23 | for idx, (x, y, w, h) in enumerate(rects): 24 | h_margin = int(0.1*w) 25 | v_margin = int(0.1*h) 26 | 27 | cropped_image = frame[y:y+h, x:x+w] 28 | images[idx,:,:,:] = cv2.resize(cropped_image, size) 29 | 30 | return images 31 | -------------------------------------------------------------------------------- /src/dataset.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | import progressbar 4 | 5 | def only_alpha(string): 6 | return all(not a.isdigit() for a in string) 7 | 8 | def clean_name(path_name, with_number=False): 9 | path_name = path_name.split('/')[-1] 10 | path_name = path_name.split('.')[0] 11 | 12 | if with_number: 13 | return path_name 14 | 15 | list_names = path_name.split('_') 16 | 17 | list_names = list(filter(lambda x: only_alpha(x), list_names)) 18 | name = '_'.join(list_names) 19 | return name 20 | 21 | def get_names(data): 22 | """ Return the list of unique names that compose the dataset 23 | 24 | :params data: The dataset to be analyzed 25 | """ 26 | names = [] 27 | for image_path in (data): 28 | name = clean_name(image_path) 29 | 30 | if name not in names: 31 | names.append(name) 32 | 33 | return names 34 | 35 | def build_dataset(data, 36 | output='output', 37 | base_path='/Users/pedroprates/Google Drive/FaceRecognition/datasets/lfw/lfw_mtcnnpy_160'): 38 | people = get_names(data) 39 | embeddings = [] 40 | print('[CHECK] It has %d people on the dataset.' % len(people)) 41 | for person in progressbar.progressbar(people): 42 | person_path = os.path.join(base_path, person) 43 | person_path = os.path.join(person_path, output) 44 | 45 | faces = os.listdir(person_path) 46 | faces = [os.path.join(person_path, f) for f in faces] 47 | nrof_faces = len(faces) 48 | 49 | for idx, face in enumerate(faces): 50 | embedding_face = np.load(face) 51 | embedding = { 'name': clean_name(face, with_number=True), 52 | 'embedding': embedding_face } 53 | embeddings.append(embedding) 54 | 55 | embeddings_output_path = os.path.join(base_path, 'embeddings_test_mac.npy') 56 | if os.path.exists(embeddings_output_path): 57 | os.remove(embeddings_output_path) 58 | 59 | np.save(embeddings_output_path, np.array(embeddings)) 60 | 61 | def main(): 62 | X_test = np.load('/Users/pedroprates/Google Drive/FaceRecognition/datasets/lfw/xtest.npy') 63 | print('[CHECK] Test set has %d files.' % X_test.shape[0]) 64 | print('[STARTING] Building dataset...') 65 | build_dataset(X_test) 66 | 67 | if __name__ == "__main__": 68 | main() -------------------------------------------------------------------------------- /src/applications/__init__.py: -------------------------------------------------------------------------------- 1 | """Enables dynamic setting of underlying Keras module. 2 | """ 3 | from __future__ import absolute_import 4 | from __future__ import division 5 | from __future__ import print_function 6 | 7 | _KERAS_BACKEND = None 8 | _KERAS_LAYERS = None 9 | _KERAS_MODELS = None 10 | _KERAS_UTILS = None 11 | 12 | 13 | def set_keras_submodules(backend=None, 14 | layers=None, 15 | models=None, 16 | utils=None, 17 | engine=None): 18 | # Deprecated, will be removed in the future. 19 | global _KERAS_BACKEND 20 | global _KERAS_LAYERS 21 | global _KERAS_MODELS 22 | global _KERAS_UTILS 23 | _KERAS_BACKEND = backend 24 | _KERAS_LAYERS = layers 25 | _KERAS_MODELS = models 26 | _KERAS_UTILS = utils 27 | 28 | 29 | def get_keras_submodule(name): 30 | # Deprecated, will be removed in the future. 31 | if name not in {'backend', 'layers', 'models', 'utils'}: 32 | raise ImportError( 33 | 'Can only retrieve one of "backend", ' 34 | '"layers", "models", or "utils". ' 35 | 'Requested: %s' % name) 36 | if _KERAS_BACKEND is None: 37 | raise ImportError('You need to first `import keras` ' 38 | 'in order to use `keras_applications`. ' 39 | 'For instance, you can do:\n\n' 40 | '```\n' 41 | 'import keras\n' 42 | 'from keras_applications import vgg16\n' 43 | '```\n\n' 44 | 'Or, preferably, this equivalent formulation:\n\n' 45 | '```\n' 46 | 'from keras import applications\n' 47 | '```\n') 48 | if name == 'backend': 49 | return _KERAS_BACKEND 50 | elif name == 'layers': 51 | return _KERAS_LAYERS 52 | elif name == 'models': 53 | return _KERAS_MODELS 54 | elif name == 'utils': 55 | return _KERAS_UTILS 56 | 57 | 58 | def get_submodules_from_kwargs(kwargs): 59 | backend = kwargs.get('backend', _KERAS_BACKEND) 60 | layers = kwargs.get('layers', _KERAS_LAYERS) 61 | models = kwargs.get('models', _KERAS_MODELS) 62 | utils = kwargs.get('utils', _KERAS_UTILS) 63 | for key in kwargs.keys(): 64 | if key not in ['backend', 'layers', 'models', 'utils']: 65 | raise TypeError('Invalid keyword argument: %s', key) 66 | return backend, layers, models, utils 67 | 68 | 69 | def correct_pad(backend, inputs, kernel_size): 70 | """Returns a tuple for zero-padding for 2D convolution with downsampling. 71 | # Arguments 72 | input_size: An integer or tuple/list of 2 integers. 73 | kernel_size: An integer or tuple/list of 2 integers. 74 | # Returns 75 | A tuple. 76 | """ 77 | img_dim = 2 if backend.image_data_format() == 'channels_first' else 1 78 | input_size = backend.int_shape(inputs)[img_dim:(img_dim + 2)] 79 | 80 | if isinstance(kernel_size, int): 81 | kernel_size = (kernel_size, kernel_size) 82 | 83 | if input_size[0] is None: 84 | adjust = (1, 1) 85 | else: 86 | adjust = (1 - input_size[0] % 2, 1 - input_size[1] % 2) 87 | 88 | correct = (kernel_size[0] // 2, kernel_size[1] // 2) 89 | 90 | return ((correct[0] - adjust[0], correct[0]), 91 | (correct[1] - adjust[1], correct[1])) 92 | -------------------------------------------------------------------------------- /src/import_pb_to_tensorboard.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ================================ 15 | """Imports a protobuf model as a graph in Tensorboard.""" 16 | 17 | from __future__ import absolute_import 18 | from __future__ import division 19 | from __future__ import print_function 20 | 21 | import argparse 22 | import sys 23 | 24 | from tensorflow.core.framework import graph_pb2 25 | from tensorflow.python.client import session 26 | from tensorflow.python.framework import importer 27 | from tensorflow.python.framework import ops 28 | from tensorflow.python.platform import app 29 | from tensorflow.python.platform import gfile 30 | from tensorflow.python.summary import summary 31 | 32 | # Try importing TensorRT ops if available 33 | # TODO(aaroey): ideally we should import everything from contrib, but currently 34 | # tensorrt module would cause build errors when being imported in 35 | # tensorflow/contrib/__init__.py. Fix it. 36 | # pylint: disable=unused-import,g-import-not-at-top,wildcard-import 37 | try: 38 | from tensorflow.contrib.tensorrt.ops.gen_trt_engine_op import * 39 | except ImportError: 40 | pass 41 | # pylint: enable=unused-import,g-import-not-at-top,wildcard-import 42 | 43 | def import_to_tensorboard(model_dir, log_dir): 44 | """View an imported protobuf model (`.pb` file) as a graph in Tensorboard. 45 | Args: 46 | model_dir: The location of the protobuf (`pb`) model to visualize 47 | log_dir: The location for the Tensorboard log to begin visualization from. 48 | Usage: 49 | Call this function with your model location and desired log directory. 50 | Launch Tensorboard by pointing it to the log directory. 51 | View your imported `.pb` model as a graph. 52 | """ 53 | with session.Session(graph=ops.Graph()) as sess: 54 | with gfile.FastGFile(model_dir, "rb") as f: 55 | graph_def = graph_pb2.GraphDef() 56 | graph_def.ParseFromString(f.read()) 57 | importer.import_graph_def(graph_def) 58 | 59 | pb_visual_writer = summary.FileWriter(log_dir) 60 | pb_visual_writer.add_graph(sess.graph) 61 | print("Model Imported. Visualize by running: " 62 | "tensorboard --logdir={}".format(log_dir)) 63 | 64 | 65 | def main(unused_args): 66 | import_to_tensorboard(FLAGS.model_dir, FLAGS.log_dir) 67 | 68 | if __name__ == "__main__": 69 | parser = argparse.ArgumentParser() 70 | parser.register("type", "bool", lambda v: v.lower() == "true") 71 | parser.add_argument( 72 | "--model_dir", 73 | type=str, 74 | default="", 75 | required=True, 76 | help="The location of the protobuf (\'pb\') model to visualize.") 77 | parser.add_argument( 78 | "--log_dir", 79 | type=str, 80 | default="", 81 | required=True, 82 | help="The location for the Tensorboard log to begin visualization from.") 83 | FLAGS, unparsed = parser.parse_known_args() 84 | app.run(main=main, argv=[sys.argv[0]] + unparsed) -------------------------------------------------------------------------------- /src/lfw.py: -------------------------------------------------------------------------------- 1 | """Helper for evaluation on the Labeled Faces in the Wild dataset 2 | """ 3 | 4 | # MIT License 5 | # 6 | # Copyright (c) 2016 David Sandberg 7 | # 8 | # Permission is hereby granted, free of charge, to any person obtaining a copy 9 | # of this software and associated documentation files (the "Software"), to deal 10 | # in the Software without restriction, including without limitation the rights 11 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | # copies of the Software, and to permit persons to whom the Software is 13 | # furnished to do so, subject to the following conditions: 14 | # 15 | # The above copyright notice and this permission notice shall be included in all 16 | # copies or substantial portions of the Software. 17 | # 18 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 24 | # SOFTWARE. 25 | 26 | from __future__ import absolute_import 27 | from __future__ import division 28 | from __future__ import print_function 29 | 30 | import os 31 | import numpy as np 32 | import facenet 33 | 34 | 35 | def evaluate(embeddings, actual_issame, nrof_folds=10, distance_metric=0, subtract_mean=False): 36 | # Calculate evaluation metrics 37 | thresholds = np.arange(0, 4, 0.01) 38 | embeddings1 = embeddings[0::2] 39 | embeddings2 = embeddings[1::2] 40 | tpr, fpr, accuracy = facenet.calculate_roc(thresholds, embeddings1, embeddings2, 41 | np.asarray(actual_issame), nrof_folds=nrof_folds, distance_metric=distance_metric, subtract_mean=subtract_mean) 42 | thresholds = np.arange(0, 4, 0.001) 43 | val, val_std, far = facenet.calculate_val(thresholds, embeddings1, embeddings2, 44 | np.asarray(actual_issame), 1e-3, nrof_folds=nrof_folds, distance_metric=distance_metric, subtract_mean=subtract_mean) 45 | return tpr, fpr, accuracy, val, val_std, far 46 | 47 | 48 | def get_paths(lfw_dir, pairs): 49 | nrof_skipped_pairs = 0 50 | path_list = [] 51 | issame_list = [] 52 | for pair in pairs: 53 | if len(pair) == 3: 54 | path0 = add_extension(os.path.join(lfw_dir, pair[0], pair[0] + '_' + '%04d' % int(pair[1]))) 55 | path1 = add_extension(os.path.join(lfw_dir, pair[0], pair[0] + '_' + '%04d' % int(pair[2]))) 56 | issame = True 57 | elif len(pair) == 4: 58 | path0 = add_extension(os.path.join(lfw_dir, pair[0], pair[0] + '_' + '%04d' % int(pair[1]))) 59 | path1 = add_extension(os.path.join(lfw_dir, pair[2], pair[2] + '_' + '%04d' % int(pair[3]))) 60 | issame = False 61 | if os.path.exists(path0) and os.path.exists(path1): # Only add the pair if both paths exist 62 | path_list += (path0,path1) 63 | issame_list.append(issame) 64 | else: 65 | nrof_skipped_pairs += 1 66 | if nrof_skipped_pairs>0: 67 | print('Skipped %d image pairs' % nrof_skipped_pairs) 68 | 69 | return path_list, issame_list 70 | 71 | 72 | def add_extension(path): 73 | if os.path.exists(path+'.jpg'): 74 | return path+'.jpg' 75 | elif os.path.exists(path+'.png'): 76 | return path+'.png' 77 | else: 78 | raise RuntimeError('No file "%s" with extension png or jpg.' % path) 79 | 80 | 81 | def read_pairs(pairs_filename): 82 | pairs = [] 83 | with open(pairs_filename, 'r') as f: 84 | for line in f.readlines()[1:]: 85 | pair = line.strip().split() 86 | pairs.append(pair) 87 | return np.array(pairs) 88 | 89 | 90 | 91 | -------------------------------------------------------------------------------- /src/freeze_graph.py: -------------------------------------------------------------------------------- 1 | """Imports a model metagraph and checkpoint file, converts the variables to constants 2 | and exports the model as a graphdef protobuf 3 | """ 4 | # MIT License 5 | # 6 | # Copyright (c) 2016 David Sandberg 7 | # 8 | # Permission is hereby granted, free of charge, to any person obtaining a copy 9 | # of this software and associated documentation files (the "Software"), to deal 10 | # in the Software without restriction, including without limitation the rights 11 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | # copies of the Software, and to permit persons to whom the Software is 13 | # furnished to do so, subject to the following conditions: 14 | # 15 | # The above copyright notice and this permission notice shall be included in all 16 | # copies or substantial portions of the Software. 17 | # 18 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 24 | # SOFTWARE. 25 | 26 | from __future__ import absolute_import 27 | from __future__ import division 28 | from __future__ import print_function 29 | 30 | from tensorflow.python.framework import graph_util 31 | import tensorflow as tf 32 | import argparse 33 | import os 34 | import sys 35 | import facenet 36 | from six.moves import xrange # @UnresolvedImport 37 | 38 | def main(args): 39 | with tf.Graph().as_default(): 40 | with tf.Session() as sess: 41 | # Load the model metagraph and checkpoint 42 | print('Model directory: %s' % args.model_dir) 43 | meta_file, ckpt_file = facenet.get_model_filenames(os.path.expanduser(args.model_dir)) 44 | 45 | print('Metagraph file: %s' % meta_file) 46 | print('Checkpoint file: %s' % ckpt_file) 47 | 48 | model_dir_exp = os.path.expanduser(args.model_dir) 49 | saver = tf.train.import_meta_graph(os.path.join(model_dir_exp, meta_file), clear_devices=True) 50 | tf.get_default_session().run(tf.global_variables_initializer()) 51 | tf.get_default_session().run(tf.local_variables_initializer()) 52 | saver.restore(tf.get_default_session(), os.path.join(model_dir_exp, ckpt_file)) 53 | 54 | # Retrieve the protobuf graph definition and fix the batch norm nodes 55 | input_graph_def = sess.graph.as_graph_def() 56 | 57 | # Freeze the graph def 58 | output_graph_def = freeze_graph_def(sess, input_graph_def, 'embeddings,label_batch') 59 | 60 | # Serialize and dump the output graph to the filesystem 61 | with tf.gfile.GFile(args.output_file, 'wb') as f: 62 | f.write(output_graph_def.SerializeToString()) 63 | print("%d ops in the final graph: %s" % (len(output_graph_def.node), args.output_file)) 64 | 65 | def freeze_graph_def(sess, input_graph_def, output_node_names): 66 | for node in input_graph_def.node: 67 | if node.op == 'RefSwitch': 68 | node.op = 'Switch' 69 | for index in xrange(len(node.input)): 70 | if 'moving_' in node.input[index]: 71 | node.input[index] = node.input[index] + '/read' 72 | elif node.op == 'AssignSub': 73 | node.op = 'Sub' 74 | if 'use_locking' in node.attr: del node.attr['use_locking'] 75 | elif node.op == 'AssignAdd': 76 | node.op = 'Add' 77 | if 'use_locking' in node.attr: del node.attr['use_locking'] 78 | 79 | # Get the list of important nodes 80 | whitelist_names = [] 81 | for node in input_graph_def.node: 82 | if (node.name.startswith('InceptionResnet') or node.name.startswith('embeddings') or 83 | node.name.startswith('image_batch') or node.name.startswith('label_batch') or 84 | node.name.startswith('phase_train') or node.name.startswith('Logits')): 85 | whitelist_names.append(node.name) 86 | 87 | # Replace all the variables in the graph with constants of the same values 88 | output_graph_def = graph_util.convert_variables_to_constants( 89 | sess, input_graph_def, output_node_names.split(","), 90 | variable_names_whitelist=whitelist_names) 91 | return output_graph_def 92 | 93 | def parse_arguments(argv): 94 | parser = argparse.ArgumentParser() 95 | 96 | parser.add_argument('model_dir', type=str, 97 | help='Directory containing the metagraph (.meta) file and the checkpoint (ckpt) file containing model parameters') 98 | parser.add_argument('output_file', type=str, 99 | help='Filename for the exported graphdef protobuf (.pb)') 100 | return parser.parse_args(argv) 101 | 102 | if __name__ == '__main__': 103 | main(parse_arguments(sys.argv[1:])) 104 | -------------------------------------------------------------------------------- /src/build_dataset.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import utils 3 | import os 4 | import tensorflow as tf 5 | import cv2 6 | import numpy as np 7 | import time 8 | import keras 9 | import keras.backend as K 10 | import json 11 | 12 | def main(args): 13 | print("[STARTING] Starting the code to create the dataset.") 14 | print(".\n.\n.") 15 | 16 | print("[LOADING] Loading the Convolutional Neural Network model...") 17 | type_mode = args["type"] 18 | assert type_mode in ["MobileFaceNet", "FaceNet"], "Only MobileFaceNet or FaceNet are supported." 19 | 20 | if type_mode == 'FaceNet': 21 | start = time.time() 22 | sess = tf.Session() 23 | utils.load_model(args["model"]) 24 | 25 | images_placeholder = tf.get_default_graph().get_tensor_by_name("input:0") 26 | embeddings = tf.get_default_graph().get_tensor_by_name("embeddings:0") 27 | phase_train_placeholder = tf.get_default_graph().get_tensor_by_name("phase_train:0") 28 | print("[LOADING] Loading the FaceNet weights took %.2f" % (time.time() - start)) 29 | else: 30 | K.clear_session() 31 | define_keras_functions() 32 | with open(args["json"]) as f: 33 | start = time.time() 34 | model_json = json.load(f) 35 | model = keras.models.model_from_json(model_json) 36 | print("[LOADING] Loadng the Weights...") 37 | model.load_weights(args["weights"]) 38 | print("[LOADING] Loading the MobileFaceNet weights took %.2fs" % (time.time() - start)) 39 | 40 | print("[LOADING] Checking the dataset path...") 41 | dataset_path = args['dataset'] 42 | dataset_path = os.path.expanduser(dataset_path) 43 | assert os.path.isdir(dataset_path), "Dataset folder should be the dataset root folder." 44 | people = [person for person in os.listdir(dataset_path) if not person.startswith('.')] 45 | 46 | print('[RUNNING] Building the dataset!') 47 | times = [] 48 | for person in people: 49 | print('\t[BUILD] Building ', person) 50 | person_path = os.path.join(dataset_path, person) 51 | pics = [pic for pic in os.listdir(person_path) if (pic.endswith('jpg') or pic.endswith('jpeg'))] 52 | nrof_pics = len(pics) 53 | images = np.zeros((nrof_pics, args['image'], args['image'], 3)) 54 | 55 | for idx, pic in enumerate(pics): 56 | image = cv2.imread(os.path.join(person_path, pic)) 57 | image = cv2.resize(image, (160, 160)) 58 | image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) 59 | images[idx, :, :, :] = image_rgb / 255 60 | 61 | # Recognize the images 62 | if type_mode == 'FaceNet': 63 | start_time = time.time() 64 | feed_dict = {images_placeholder: images, phase_train_placeholder: False} 65 | embeddings_array = sess.run(embeddings, feed_dict=feed_dict) 66 | times.append(time.time() - start_time) 67 | else: 68 | start_time = time.time() 69 | embeddings_array = model.predict(images) 70 | times.append(time.time() - start_time) 71 | 72 | output_file = os.path.join(person_path, person+'.npy') 73 | 74 | if (os.path.isfile(output_file)): 75 | os.remove(output_file) 76 | 77 | np.save(output_file, embeddings_array) 78 | 79 | def parse_arguments(): 80 | """ Parsing command line arguments 81 | """ 82 | ap = argparse.ArgumentParser() 83 | ap.add_argument('-d', 84 | '--dataset', 85 | type=str, 86 | required=True, 87 | help='Path to the dataset root folder') 88 | ap.add_argument('-m', 89 | '--model', 90 | type=str, 91 | help="Path to the CNN model") 92 | 93 | ap.add_argument('-i', 94 | '--image', 95 | type=int, 96 | default=160, 97 | help='Size of the image') 98 | 99 | ap.add_argument('-t', 100 | '--type', 101 | type=str, 102 | default="MobileFaceNet", 103 | help="Which model to use to create the embeddings") 104 | 105 | ap.add_argument('-j', 106 | '--json', 107 | type=str, 108 | default='/home/pi/Documents/TCC/face-recognition/models/mobilefacenet/model.json', 109 | help='Path to the JSON containing the model structure') 110 | 111 | ap.add_argument('-w', 112 | '--weights', 113 | type=str, 114 | default='/home/pi/Documents/TCC/face-recognition/models/mobilefacenet/model_weights.h5', 115 | help='Path to the weights of the model') 116 | 117 | return vars(ap.parse_args()) 118 | 119 | def define_keras_functions(): 120 | def distillation_loss(y_true, y_pred): 121 | return K.square(y_pred - y_true) 122 | 123 | def max_diff(y_true, y_pred): 124 | return K.max(K.square(y_pred - y_true), axis=-1) 125 | 126 | def sum_diff(y_true, y_pred): 127 | return K.sum(K.square(y_pred - y_true), axis=-1) 128 | 129 | keras.losses.distillation_loss = distillation_loss 130 | keras.metrics.max_diff = max_diff 131 | keras.metrics.sum_diff = sum_diff 132 | 133 | 134 | if __name__ == "__main__": 135 | main(parse_arguments()) 136 | -------------------------------------------------------------------------------- /src/utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | from tensorflow.python.platform import gfile 3 | import tensorflow as tf 4 | import numpy as np 5 | import math 6 | 7 | def load_model(model, input_map=None): 8 | """ Load model given its path. Currently only working with '.pb' saved models 9 | 10 | :param model: Path of where the model was saved 11 | :param input_map: Input map of the model, default to None 12 | """ 13 | model_exp = os.path.expanduser(model) 14 | assert os.path.isfile(model_exp), "Currently its only working with '.pb' model files. So your path should be one." 15 | 16 | print('Model filename: %s' % model_exp) 17 | with gfile.FastGFile(model_exp,'rb') as f: 18 | graph_def = tf.GraphDef() 19 | graph_def.ParseFromString(f.read()) 20 | tf.import_graph_def(graph_def, input_map=input_map, name='') 21 | 22 | def distance(embeddings1, embeddings2, distance_metric='euclidean'): 23 | """ Calculate the distance between two embeddings. Currently working with euclidean and cosine similarity. 24 | 25 | :param embeddings1: First embedding 26 | :param embeddings2: Second embedding 27 | :param distance_metric: Distance metric to be used to make the calculation. Should be either: 'euclidean' or 'cosine' 28 | 29 | :returns: The distance between the `embeddings1` and `embeddings2` 30 | """ 31 | assert distance_metric in ['euclidean', 'cosine'], "The distance metric should be either 'euclidean' or 'cosine'" 32 | 33 | if distance_metric == 'euclidean': 34 | diff = np.subtract(embeddings1, embeddings2) 35 | dist = np.sum(np.square(diff), 1) 36 | 37 | elif distance_metric == 'cosine': 38 | dot = np.sum(np.multiply(embeddings1, embeddings2), axis=1) 39 | norm = np.linalg.norm(embeddings1, axis=1) * np.linalg.norm(embeddings2, axis=1) 40 | similarity = dot / norm 41 | dist = np.arccos(similarity) / math.pi 42 | 43 | return dist 44 | 45 | def build_dataset(path): 46 | """ Building a dataset given the path of the source folder. 47 | The source folder should be structured as described on the github wiki. 48 | 49 | :param path: The path of the source folder 50 | 51 | :returns: Three dictionaries - One with the encodings, and two others mapping names to indexes. 52 | """ 53 | dataset = {} 54 | 55 | people = os.listdir(path) 56 | for person in people: 57 | if person.startswith('.'): 58 | continue 59 | 60 | embs = np.load(path + '/' + person + '/' + person + '.npy') 61 | dataset[person] = embs 62 | 63 | names = iter(dataset.keys()) 64 | idxs = iter(np.arange(len(dataset))) 65 | 66 | names_to_idx = dict(zip(names, idxs)) 67 | idx_to_names = dict([x, v] for v, x in names_to_idx.items()) 68 | 69 | return dataset, names_to_idx, idx_to_names 70 | 71 | def get_image(dataset, name, chosen_n=-1): 72 | """ Given a dataset, get the chosen image in a person base. If the image index equals -1, returns a random image from the person. 73 | 74 | :params dataset: Dataset with known faces 75 | :params name: Name of the known person which the image should be returned 76 | :params chosen_n: The index of image from the given person. If equals to -1, returns a random image from that person. 77 | 78 | :returns: Embeddings from the face image of that given person 79 | """ 80 | assert name in dataset.keys(), "Name not found. Make sure that your name is present on your dataset." 81 | 82 | if chosen_n == -1: 83 | nrof_faces = dataset[name].shape[0] 84 | chosen_n = np.random.randint(nrof_faces) 85 | 86 | chosen = dataset[name][chosen_n] 87 | return chosen.reshape((1, *chosen.shape)) 88 | 89 | 90 | def predict_face(dataset, name_to_idx, idx_to_name, face, threshold=.1, distance_metric='euclidean'): 91 | """ Given the embeddings of a face and the dataset of known embeddings, predict if the person is present on our dataset or not. 92 | 93 | :params dataset: Dataset with the known faces and their names 94 | :params name_to_idx: Dictionary with the mapping name to idx 95 | :params idx_to =_name: Dictionary with the mapping idx to name 96 | :params face: Array with the embeddings of a face 97 | :params threshold: Minimum acceptable distance between a known face and the face, if there aren't any known 98 | faces that fulfill this requirement, it will be predicted as "Unknown" 99 | :params distance_metric: Distance metric to be used to make the calculation. Should be either 'euclidean' or 'cosine' 100 | 101 | :returns: Name of the person, if present on the dataset, or "Unknown" if it does not meet the requirements 102 | """ 103 | distances = np.zeros(len(dataset)) 104 | 105 | for person in dataset.keys(): 106 | nrof_images = len(dataset[person]) 107 | for image in range(nrof_images): 108 | known_face = get_image(dataset, person, image) 109 | d = distance(face, known_face, distance_metric=distance_metric) 110 | 111 | if distances[name_to_idx[person]] == 0: 112 | distances[name_to_idx[person]] = d 113 | elif distances[name_to_idx[person]] > d: 114 | distances[name_to_idx[person]] = d 115 | 116 | idx_min = distances.argmin() 117 | if distances[idx_min] > threshold: 118 | return 'Unknown' 119 | print(idx_to_name) 120 | print(distances) 121 | 122 | return idx_to_name[idx_min].replace('_', ' ') -------------------------------------------------------------------------------- /src/facenet_live.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import imutils 3 | import argparse 4 | from imutils.video import VideoStream, FPS 5 | import time 6 | from detect_face.face_detector import FaceDetector 7 | import tensorflow as tf 8 | import utils 9 | import keras 10 | import keras.backend as K 11 | import json 12 | 13 | def main(args): 14 | 15 | print("[STARTING] Facenet ResNet v1 for Facial Recognition") 16 | print(".\n.\n.") 17 | print("[LOADING] Loading face detector...") 18 | detector = FaceDetector(args["cascade"]) 19 | 20 | print("[LOADING] Loading the faces dataset...") 21 | dataset, name_to_idx, idx_to_name = utils.build_dataset(args["dataset"]) 22 | 23 | print("[LOADING] Loading the Convolutional Neural Network model...") 24 | type_mode = args["type"] 25 | use_pi = args['run'] == 'raspberry' 26 | assert type_mode in ["MobileFaceNet", "FaceNet"], "Only MobileFaceNet or FaceNet are supported." 27 | 28 | if type_mode == 'FaceNet': 29 | start = time.time() 30 | sess = tf.Session() 31 | utils.load_model(args["model"]) 32 | 33 | images_placeholder = tf.get_default_graph().get_tensor_by_name("input:0") 34 | embeddings = tf.get_default_graph().get_tensor_by_name("embeddings:0") 35 | phase_train_placeholder = tf.get_default_graph().get_tensor_by_name("phase_train:0") 36 | print("[LOADING] Loading the FaceNet weights took %.2f" % (time.time() - start)) 37 | else: 38 | K.clear_session() 39 | define_keras_functions() 40 | with open(args["json"]) as f: 41 | start = time.time() 42 | model_json = json.load(f) 43 | model = keras.models.model_from_json(model_json) 44 | print("[LOADING] Loadng the Weights...") 45 | model.load_weights(args["weights"]) 46 | print("[LOADING] Loading the MobileFaceNet weights took %.2fs" % (time.time() - start)) 47 | 48 | print("[LOADING] Starting the video stream...") 49 | if use_pi: 50 | vs = VideoStream(usePiCamera=True).start() 51 | else: 52 | vs = VideoStream(src=0).start() 53 | time.sleep(2.0) 54 | fps = FPS().start() 55 | times = [] 56 | 57 | while True: 58 | frame = vs.read() 59 | frame = imutils.resize(frame, width=500) # Width of the frame is configurable 60 | 61 | gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) 62 | rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) 63 | 64 | # Detect faces on the frame 65 | rects = detector.detect_faces(gray) 66 | nrof_faces = len(rects) 67 | if nrof_faces > 0: 68 | face_images = detector.extract_faces(rgb, rects) 69 | face_images = face_images / 255 70 | # Recognize the images 71 | if type_mode == 'FaceNet': 72 | start_time = time.time() 73 | feed_dict = {images_placeholder: face_images, phase_train_placeholder: False} 74 | embeddings_array = sess.run(embeddings, feed_dict=feed_dict) 75 | times.append(time.time() - start_time) 76 | else: 77 | start_time = time.time() 78 | embeddings_array = model.predict(face_images) 79 | times.append(time.time() - start_time) 80 | 81 | for idx, embedding in enumerate(embeddings_array): 82 | embedding = embedding.reshape((1, *embedding.shape)) 83 | predicted = utils.predict_face(dataset, 84 | name_to_idx, 85 | idx_to_name, 86 | embedding, 87 | threshold=3, 88 | distance_metric='cosine') 89 | x, y, w, h = rects[idx] 90 | color = (0, 0, 255) if predicted == "Unknown" else (0, 255, 0) 91 | cv2.rectangle(frame, (x, y+h), (x+w, y), color, 2) 92 | top = y+h-15 if y+h-15 > 15 else y+h+15 93 | cv2.putText(frame, predicted, (x, top), cv2.FONT_HERSHEY_SIMPLEX, 0.75, color, 2) 94 | 95 | # Display the image 96 | cv2.imshow("Frame", frame) 97 | 98 | key = cv2.waitKey(1) & 0xFF 99 | if key == ord('q'): 100 | break 101 | 102 | fps.update() 103 | 104 | fps.stop() 105 | print("[INFO] elapsed time: {:.2f}".format(fps.elapsed())) 106 | print("[INFO] approximated FPS: {:.2f}fps".format(fps.fps())) 107 | print("[INFO] approximated forward propagation time: {:.2f}s".format(sum(times)/len(times))) 108 | 109 | cv2.destroyAllWindows() 110 | vs.stop() 111 | 112 | 113 | def parse_arguments(): 114 | """ Parsing arguments to run variables to the main 115 | """ 116 | parser = argparse.ArgumentParser() 117 | 118 | parser.add_argument("-c", 119 | "--cascade", 120 | type=str, 121 | default="/home/pi/Documents/TCC/face-recognition/models/haarcascade/haarcascade_frontalface_default.xml", 122 | help="Path to the face cascade config files") 123 | parser.add_argument("-d", 124 | "--dataset", 125 | type=str, 126 | default="../datasets/tcc", 127 | help="Path datasets source folder") 128 | 129 | parser.add_argument("-m", 130 | "--model", 131 | type=str, 132 | default="/home/pi/Documents/TCC/face-recognition/models/facenet/20180402-114759.pb", 133 | help="Path to the CNN model") 134 | 135 | parser.add_argument("-t", 136 | "--type", 137 | type=str, 138 | default="MobileFaceNet", 139 | help="CNN architecture to be used") 140 | 141 | parser.add_argument("-j", 142 | "--json", 143 | type=str, 144 | default="/home/pi/Documents/TCC/face-recognition/models/mobilefacenet/model.json", 145 | help="Path to the JSON file") 146 | 147 | 148 | parser.add_argument("-w", 149 | "--weights", 150 | type=str, 151 | default="/home/pi/Documents/TCC/face-recognition/models/mobilefacenet/model_weights.h5", 152 | help="Path to the weights") 153 | 154 | parser.add_argument("-r", 155 | "--run", 156 | type=str, 157 | default="raspberry", 158 | help="Where to run, either Raspberry or PC") 159 | return vars(parser.parse_args()) 160 | 161 | 162 | def define_keras_functions(): 163 | def distillation_loss(y_true, y_pred): 164 | return K.square(y_pred - y_true) 165 | 166 | def max_diff(y_true, y_pred): 167 | return K.max(K.square(y_pred - y_true), axis=-1) 168 | 169 | def sum_diff(y_true, y_pred): 170 | return K.sum(K.square(y_pred - y_true), axis=-1) 171 | 172 | keras.losses.distillation_loss = distillation_loss 173 | keras.metrics.max_diff = max_diff 174 | keras.metrics.sum_diff = sum_diff 175 | 176 | 177 | if __name__ == "__main__": 178 | main(parse_arguments()) 179 | -------------------------------------------------------------------------------- /src/validate.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import os 3 | import math 4 | from sklearn.model_selection import KFold 5 | 6 | 7 | def read_pairs(path): 8 | pairs = [] 9 | with open(path, 'r') as f: 10 | for line in f.readlines()[1:]: 11 | pair = line.strip().split() 12 | pairs.append(pair) 13 | 14 | return np.array(pairs) 15 | 16 | 17 | def create_path(lfw_dir, pair, output): 18 | if len(pair) == 3: 19 | # TRUE 20 | path0 = os.path.join(lfw_dir, pair[0], output, pair[0] + '_' + '%04d' % int(pair[1])) + '.npy' 21 | path1 = os.path.join(lfw_dir, pair[0], output, pair[0] + '_' + '%04d' % int(pair[2])) + '.npy' 22 | is_same = True 23 | 24 | elif len(pair) == 4: 25 | # FALSE 26 | path0 = os.path.join(lfw_dir, pair[0], output, pair[0] + '_' + '%04d' % int(pair[1])) + '.npy' 27 | path1 = os.path.join(lfw_dir, pair[2], output, pair[2] + '_' + '%04d' % int(pair[3])) + '.npy' 28 | is_same = False 29 | 30 | else: 31 | raise RuntimeError('Error while reading the pair images. It was expected 3 or 4 elements per line\ ' 32 | 'but it was found %d elements.' % len(pair)) 33 | 34 | return path0, path1, is_same 35 | 36 | 37 | def get_paths(lfw_dir, pairs, output): 38 | nrof_skipped_pairs = 0 39 | path_list = [] 40 | is_same_list = [] 41 | 42 | for pair in pairs: 43 | path0, path1, is_same = create_path(lfw_dir, pair, output) 44 | 45 | if os.path.exists(path0) and os.path.exists(path1): 46 | path_list += (path0, path1) 47 | is_same_list.append(is_same) 48 | else: 49 | nrof_skipped_pairs += 1 50 | 51 | if nrof_skipped_pairs > 0: 52 | print("%d pairs couldn't be read." % nrof_skipped_pairs) 53 | 54 | return path_list, is_same_list 55 | 56 | 57 | def distance(embeddings1, embeddings2, distance_metric='euclidean'): 58 | """ Calculate the distance between two embeddings. Currently working with euclidean and cosine similarity. 59 | 60 | :param embeddings1: First embedding 61 | :param embeddings2: Second embedding 62 | :param distance_metric: Distance metric to be used to make the calculation. Should be either: 'euclidean' or 'cosine' 63 | 64 | :returns: The distance between the `embeddings1` and `embeddings2` 65 | """ 66 | assert distance_metric in ['euclidean', 'cosine'], "The distance metric should be either 'euclidean' or 'cosine'" 67 | 68 | if distance_metric == 'euclidean': 69 | diff = np.subtract(embeddings1, embeddings2) 70 | dist = np.sum(np.square(diff), 1) 71 | 72 | elif distance_metric == 'cosine': 73 | dot = np.sum(np.multiply(embeddings1, embeddings2), axis=1) 74 | norm = np.linalg.norm(embeddings1, axis=1) * np.linalg.norm(embeddings2, axis=1) 75 | similarity = dot / norm 76 | dist = np.arccos(similarity) / math.pi 77 | 78 | else: 79 | raise RuntimeError("Distance metric not found %s. It should be either 'cosine' or 'euclidean'" % distance_metric) 80 | 81 | return dist 82 | 83 | 84 | def load_embeddings(paths): 85 | nrof_skips = 0 86 | bt_size = len(paths) 87 | embeddings = np.zeros((bt_size, 512)) 88 | 89 | for i, path in enumerate(paths): 90 | if not os.path.exists(path): 91 | nrof_skips += 1 92 | continue 93 | 94 | emb = np.load(path) 95 | embeddings[i, :] = emb 96 | 97 | if nrof_skips > 0: 98 | print("There was %d skips when trying to read the embeddings.") 99 | 100 | return embeddings 101 | 102 | 103 | def calculate_accuracy(threshold, dist, actual_issame): 104 | predict_issame = np.less(dist, threshold) 105 | tp = np.sum(np.logical_and(predict_issame, actual_issame)) 106 | fp = np.sum(np.logical_and(predict_issame, np.logical_not(actual_issame))) 107 | tn = np.sum(np.logical_and(np.logical_not(predict_issame), np.logical_not(actual_issame))) 108 | fn = np.sum(np.logical_and(np.logical_not(predict_issame), actual_issame)) 109 | 110 | tpr = 0 if tp + fn == 0 else float(tp) / (tp + fn) 111 | fpr = 0 if fp + tn == 0 else float(fp) / (fp + tn) 112 | acc = float(tp + tn) / dist.size 113 | 114 | return tpr, fpr, acc 115 | 116 | 117 | def calculate_roc(thresholds, 118 | embeddings1, 119 | embeddings2, 120 | actual_issame, 121 | distance_metric='cosine', 122 | subtract_mean=True, 123 | nrof_folds=10): 124 | 125 | assert embeddings1.shape[0] == embeddings2.shape[0] 126 | assert embeddings1.shape[1] == embeddings2.shape[1] 127 | 128 | kfolds = KFold(n_splits=nrof_folds, shuffle=False) 129 | 130 | nrof_pairs = min(len(actual_issame), embeddings1.shape[0]) 131 | nrof_thresholds = len(thresholds) 132 | 133 | tprs = np.zeros((nrof_folds, nrof_thresholds)) 134 | fprs = np.zeros((nrof_folds, nrof_thresholds)) 135 | accuracy = np.zeros(nrof_folds) 136 | 137 | indices = np.arange(nrof_pairs) 138 | 139 | for fold_idx, (train_set, test_set) in enumerate(kfolds.split(indices)): 140 | if subtract_mean: 141 | mean = np.mean(np.concatenate([embeddings1[train_set], embeddings2[train_set]], axis=0)) 142 | else: 143 | mean = 0 144 | 145 | dist = distance(embeddings1-mean, embeddings2-mean, distance_metric) 146 | acc_train = np.zeros(nrof_thresholds) 147 | 148 | for threshold_idx, threshold in enumerate(thresholds): 149 | _, _, acc_train[threshold_idx] = calculate_accuracy(threshold, dist[train_set], actual_issame[train_set]) 150 | best_threshold_idx = np.argmax(acc_train) 151 | 152 | for threshold_idx, threshold in enumerate(thresholds): 153 | tprs[fold_idx, threshold_idx], fprs[fold_idx, threshold_idx], _ = calculate_accuracy(threshold, 154 | dist[test_set], 155 | actual_issame[test_set]) 156 | _, _, accuracy[fold_idx] = calculate_accuracy(thresholds[best_threshold_idx], 157 | dist[test_set], 158 | actual_issame[test_set]) 159 | 160 | tpr = np.mean(tprs, 0) 161 | fpr = np.mean(fprs, 0) 162 | 163 | return tpr, fpr, accuracy 164 | 165 | 166 | def evaluate(embeddings, actual_issame, distance_metric='cosine', subtract_mean=False): 167 | thresholds = np.arange(0, 4, 0.01) 168 | embeddings1 = embeddings[0::2] 169 | embeddings2 = embeddings[1::2] 170 | tpr, fpr, acc = calculate_roc(thresholds, 171 | embeddings1, 172 | embeddings2, 173 | np.array(actual_issame), 174 | distance_metric, 175 | subtract_mean) 176 | 177 | return tpr, fpr, acc 178 | 179 | 180 | def main(): 181 | pairs_path = '/Users/pedroprates/Google Drive/FaceRecognition/data/pairs.txt' 182 | lfw_path = '/Users/pedroprates/Google Drive/FaceRecognition/datasets/lfw/lfw_mtcnnpy_160' 183 | 184 | pairs = read_pairs(pairs_path) 185 | 186 | path_list, actual_issame = get_paths(lfw_path, pairs, 'output') 187 | embeddings = load_embeddings(path_list) 188 | tpr, fpr, acc = evaluate(embeddings, actual_issame, subtract_mean=True) 189 | 190 | print("TPR: %.2f" % tpr) 191 | print("FPR: %.2f" % fpr) 192 | print("Accuracy: %.2f" % acc) 193 | 194 | 195 | if __name__ == '__main__': 196 | main() -------------------------------------------------------------------------------- /src/align/align_dataset_mtcnn.py: -------------------------------------------------------------------------------- 1 | """Performs face alignment and stores face thumbnails in the output directory.""" 2 | # MIT License 3 | # 4 | # Copyright (c) 2016 David Sandberg 5 | # 6 | # Permission is hereby granted, free of charge, to any person obtaining a copy 7 | # of this software and associated documentation files (the "Software"), to deal 8 | # in the Software without restriction, including without limitation the rights 9 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | # copies of the Software, and to permit persons to whom the Software is 11 | # furnished to do so, subject to the following conditions: 12 | # 13 | # The above copyright notice and this permission notice shall be included in all 14 | # copies or substantial portions of the Software. 15 | # 16 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | # SOFTWARE. 23 | 24 | from __future__ import absolute_import 25 | from __future__ import division 26 | from __future__ import print_function 27 | 28 | from scipy import misc 29 | import sys 30 | import os 31 | import argparse 32 | import tensorflow as tf 33 | import numpy as np 34 | import facenet 35 | import align.detect_face 36 | import random 37 | from time import sleep 38 | 39 | def main(args): 40 | sleep(random.random()) 41 | output_dir = os.path.expanduser(args.output_dir) 42 | if not os.path.exists(output_dir): 43 | os.makedirs(output_dir) 44 | # Store some git revision info in a text file in the log directory 45 | src_path,_ = os.path.split(os.path.realpath(__file__)) 46 | facenet.store_revision_info(src_path, output_dir, ' '.join(sys.argv)) 47 | dataset = facenet.get_dataset(args.input_dir) 48 | 49 | print('Creating networks and loading parameters') 50 | 51 | with tf.Graph().as_default(): 52 | gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=args.gpu_memory_fraction) 53 | sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) 54 | with sess.as_default(): 55 | pnet, rnet, onet = align.detect_face.create_mtcnn(sess, None) 56 | 57 | minsize = 20 # minimum size of face 58 | threshold = [ 0.6, 0.7, 0.7 ] # three steps's threshold 59 | factor = 0.709 # scale factor 60 | 61 | # Add a random key to the filename to allow alignment using multiple processes 62 | random_key = np.random.randint(0, high=99999) 63 | bounding_boxes_filename = os.path.join(output_dir, 'bounding_boxes_%05d.txt' % random_key) 64 | 65 | with open(bounding_boxes_filename, "w") as text_file: 66 | nrof_images_total = 0 67 | nrof_successfully_aligned = 0 68 | if args.random_order: 69 | random.shuffle(dataset) 70 | for cls in dataset: 71 | output_class_dir = os.path.join(output_dir, cls.name) 72 | if not os.path.exists(output_class_dir): 73 | os.makedirs(output_class_dir) 74 | if args.random_order: 75 | random.shuffle(cls.image_paths) 76 | for image_path in cls.image_paths: 77 | nrof_images_total += 1 78 | filename = os.path.splitext(os.path.split(image_path)[1])[0] 79 | output_filename = os.path.join(output_class_dir, filename+'.png') 80 | print(image_path) 81 | if not os.path.exists(output_filename): 82 | try: 83 | img = misc.imread(image_path) 84 | except (IOError, ValueError, IndexError) as e: 85 | errorMessage = '{}: {}'.format(image_path, e) 86 | print(errorMessage) 87 | else: 88 | if img.ndim<2: 89 | print('Unable to align "%s"' % image_path) 90 | text_file.write('%s\n' % (output_filename)) 91 | continue 92 | if img.ndim == 2: 93 | img = facenet.to_rgb(img) 94 | img = img[:,:,0:3] 95 | 96 | bounding_boxes, _ = align.detect_face.detect_face(img, minsize, pnet, rnet, onet, threshold, factor) 97 | nrof_faces = bounding_boxes.shape[0] 98 | if nrof_faces>0: 99 | det = bounding_boxes[:,0:4] 100 | det_arr = [] 101 | img_size = np.asarray(img.shape)[0:2] 102 | if nrof_faces>1: 103 | if args.detect_multiple_faces: 104 | for i in range(nrof_faces): 105 | det_arr.append(np.squeeze(det[i])) 106 | else: 107 | bounding_box_size = (det[:,2]-det[:,0])*(det[:,3]-det[:,1]) 108 | img_center = img_size / 2 109 | offsets = np.vstack([ (det[:,0]+det[:,2])/2-img_center[1], (det[:,1]+det[:,3])/2-img_center[0] ]) 110 | offset_dist_squared = np.sum(np.power(offsets,2.0),0) 111 | index = np.argmax(bounding_box_size-offset_dist_squared*2.0) # some extra weight on the centering 112 | det_arr.append(det[index,:]) 113 | else: 114 | det_arr.append(np.squeeze(det)) 115 | 116 | for i, det in enumerate(det_arr): 117 | det = np.squeeze(det) 118 | bb = np.zeros(4, dtype=np.int32) 119 | bb[0] = np.maximum(det[0]-args.margin/2, 0) 120 | bb[1] = np.maximum(det[1]-args.margin/2, 0) 121 | bb[2] = np.minimum(det[2]+args.margin/2, img_size[1]) 122 | bb[3] = np.minimum(det[3]+args.margin/2, img_size[0]) 123 | cropped = img[bb[1]:bb[3],bb[0]:bb[2],:] 124 | scaled = misc.imresize(cropped, (args.image_size, args.image_size), interp='bilinear') 125 | nrof_successfully_aligned += 1 126 | filename_base, file_extension = os.path.splitext(output_filename) 127 | if args.detect_multiple_faces: 128 | output_filename_n = "{}_{}{}".format(filename_base, i, file_extension) 129 | else: 130 | output_filename_n = "{}{}".format(filename_base, file_extension) 131 | misc.imsave(output_filename_n, scaled) 132 | text_file.write('%s %d %d %d %d\n' % (output_filename_n, bb[0], bb[1], bb[2], bb[3])) 133 | else: 134 | print('Unable to align "%s"' % image_path) 135 | text_file.write('%s\n' % (output_filename)) 136 | 137 | print('Total number of images: %d' % nrof_images_total) 138 | print('Number of successfully aligned images: %d' % nrof_successfully_aligned) 139 | 140 | 141 | def parse_arguments(argv): 142 | parser = argparse.ArgumentParser() 143 | 144 | parser.add_argument('input_dir', type=str, help='Directory with unaligned images.') 145 | parser.add_argument('output_dir', type=str, help='Directory with aligned face thumbnails.') 146 | parser.add_argument('--image_size', type=int, 147 | help='Image size (height, width) in pixels.', default=182) 148 | parser.add_argument('--margin', type=int, 149 | help='Margin for the crop around the bounding box (height, width) in pixels.', default=44) 150 | parser.add_argument('--random_order', 151 | help='Shuffles the order of images to enable alignment using multiple processes.', action='store_true') 152 | parser.add_argument('--gpu_memory_fraction', type=float, 153 | help='Upper bound on the amount of GPU memory that will be used by the process.', default=1.0) 154 | parser.add_argument('--detect_multiple_faces', type=bool, 155 | help='Detect and align multiple faces per image.', default=False) 156 | return parser.parse_args(argv) 157 | 158 | if __name__ == '__main__': 159 | main(parse_arguments(sys.argv[1:])) 160 | -------------------------------------------------------------------------------- /src/validate_on_lfw.py: -------------------------------------------------------------------------------- 1 | """Validate a face recognizer on the "Labeled Faces in the Wild" dataset (http://vis-www.cs.umass.edu/lfw/). 2 | Embeddings are calculated using the pairs from http://vis-www.cs.umass.edu/lfw/pairs.txt and the ROC curve 3 | is calculated and plotted. Both the model metagraph and the model parameters need to exist 4 | in the same directory, and the metagraph should have the extension '.meta'. 5 | """ 6 | # MIT License 7 | # 8 | # Copyright (c) 2016 David Sandberg 9 | # 10 | # Permission is hereby granted, free of charge, to any person obtaining a copy 11 | # of this software and associated documentation files (the "Software"), to deal 12 | # in the Software without restriction, including without limitation the rights 13 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 14 | # copies of the Software, and to permit persons to whom the Software is 15 | # furnished to do so, subject to the following conditions: 16 | # 17 | # The above copyright notice and this permission notice shall be included in all 18 | # copies or substantial portions of the Software. 19 | # 20 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 21 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 22 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 23 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 24 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 25 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 26 | # SOFTWARE. 27 | 28 | from __future__ import absolute_import 29 | from __future__ import division 30 | from __future__ import print_function 31 | 32 | import tensorflow as tf 33 | import numpy as np 34 | import argparse 35 | import facenet 36 | import lfw 37 | import os 38 | import sys 39 | from tensorflow.python.ops import data_flow_ops 40 | from sklearn import metrics 41 | from scipy.optimize import brentq 42 | from scipy import interpolate 43 | 44 | 45 | def main(args): 46 | 47 | with tf.Graph().as_default(): 48 | 49 | with tf.Session() as sess: 50 | 51 | # Read the file containing the pairs used for testing 52 | pairs = lfw.read_pairs(os.path.expanduser(args.lfw_pairs)) 53 | 54 | # Get the paths for the corresponding images 55 | paths, actual_issame = lfw.get_paths(os.path.expanduser(args.lfw_dir), pairs) 56 | 57 | image_paths_placeholder = tf.placeholder(tf.string, shape=(None,1), name='image_paths') 58 | labels_placeholder = tf.placeholder(tf.int32, shape=(None,1), name='labels') 59 | batch_size_placeholder = tf.placeholder(tf.int32, name='batch_size') 60 | control_placeholder = tf.placeholder(tf.int32, shape=(None,1), name='control') 61 | phase_train_placeholder = tf.placeholder(tf.bool, name='phase_train') 62 | 63 | nrof_preprocess_threads = 4 64 | image_size = (args.image_size, args.image_size) 65 | eval_input_queue = data_flow_ops.FIFOQueue(capacity=2000000, 66 | dtypes=[tf.string, tf.int32, tf.int32], 67 | shapes=[(1,), (1,), (1,)], 68 | shared_name=None, name=None) 69 | eval_enqueue_op = eval_input_queue.enqueue_many([image_paths_placeholder, labels_placeholder, control_placeholder], name='eval_enqueue_op') 70 | image_batch, label_batch = facenet.create_input_pipeline(eval_input_queue, image_size, nrof_preprocess_threads, batch_size_placeholder) 71 | 72 | # Load the model 73 | input_map = {'image_batch': image_batch, 'label_batch': label_batch, 'phase_train': phase_train_placeholder} 74 | facenet.load_model(args.model, input_map=input_map) 75 | 76 | # Get output tensor 77 | embeddings = tf.get_default_graph().get_tensor_by_name("embeddings:0") 78 | 79 | coord = tf.train.Coordinator() 80 | tf.train.start_queue_runners(coord=coord, sess=sess) 81 | 82 | evaluate(sess, eval_enqueue_op, image_paths_placeholder, labels_placeholder, phase_train_placeholder, batch_size_placeholder, control_placeholder, 83 | embeddings, label_batch, paths, actual_issame, args.lfw_batch_size, args.lfw_nrof_folds, args.distance_metric, args.subtract_mean, 84 | args.use_flipped_images, args.use_fixed_image_standardization) 85 | 86 | 87 | def evaluate(sess, enqueue_op, image_paths_placeholder, labels_placeholder, phase_train_placeholder, batch_size_placeholder, control_placeholder, 88 | embeddings, labels, image_paths, actual_issame, batch_size, nrof_folds, distance_metric, subtract_mean, use_flipped_images, use_fixed_image_standardization): 89 | # Run forward pass to calculate embeddings 90 | print('Running forward pass on LFW images') 91 | 92 | # Enqueue one epoch of image paths and labels 93 | nrof_embeddings = len(actual_issame)*2 # nrof_pairs * nrof_images_per_pair 94 | nrof_flips = 2 if use_flipped_images else 1 95 | nrof_images = nrof_embeddings * nrof_flips 96 | labels_array = np.expand_dims(np.arange(0,nrof_images),1) 97 | image_paths_array = np.expand_dims(np.repeat(np.array(image_paths),nrof_flips),1) 98 | control_array = np.zeros_like(labels_array, np.int32) 99 | if use_fixed_image_standardization: 100 | control_array += np.ones_like(labels_array)*facenet.FIXED_STANDARDIZATION 101 | if use_flipped_images: 102 | # Flip every second image 103 | control_array += (labels_array % 2)*facenet.FLIP 104 | sess.run(enqueue_op, {image_paths_placeholder: image_paths_array, labels_placeholder: labels_array, control_placeholder: control_array}) 105 | 106 | embedding_size = int(embeddings.get_shape()[1]) 107 | assert nrof_images % batch_size == 0, 'The number of LFW images must be an integer multiple of the LFW batch size' 108 | nrof_batches = nrof_images // batch_size 109 | emb_array = np.zeros((nrof_images, embedding_size)) 110 | lab_array = np.zeros((nrof_images,)) 111 | for i in range(nrof_batches): 112 | feed_dict = {phase_train_placeholder:False, batch_size_placeholder:batch_size} 113 | emb, lab = sess.run([embeddings, labels], feed_dict=feed_dict) 114 | lab_array[lab] = lab 115 | emb_array[lab, :] = emb 116 | if i % 10 == 9: 117 | print('.', end='') 118 | sys.stdout.flush() 119 | print('') 120 | embeddings = np.zeros((nrof_embeddings, embedding_size*nrof_flips)) 121 | if use_flipped_images: 122 | # Concatenate embeddings for flipped and non flipped version of the images 123 | embeddings[:,:embedding_size] = emb_array[0::2,:] 124 | embeddings[:,embedding_size:] = emb_array[1::2,:] 125 | else: 126 | embeddings = emb_array 127 | 128 | np.save('/Users/pedroprates/Google Drive/FaceRecognition/datasets/all_lfw.npy', embeddings) 129 | 130 | assert np.array_equal(lab_array, np.arange(nrof_images))==True, 'Wrong labels used for evaluation, ' \ 131 | 'possibly caused by training examples left ' \ 132 | 'in the input pipeline' 133 | tpr, fpr, accuracy, val, val_std, far = lfw.evaluate(embeddings, 134 | actual_issame, 135 | nrof_folds=nrof_folds, 136 | distance_metric=distance_metric, 137 | subtract_mean=subtract_mean) 138 | 139 | print('Accuracy: %2.5f+-%2.5f' % (np.mean(accuracy), np.std(accuracy))) 140 | print('Validation rate: %2.5f+-%2.5f @ FAR=%2.5f' % (val, val_std, far)) 141 | 142 | auc = metrics.auc(fpr, tpr) 143 | print('Area Under Curve (AUC): %1.3f' % auc) 144 | eer = brentq(lambda x: 1. - x - interpolate.interp1d(fpr, tpr)(x), 0., 1.) 145 | print('Equal Error Rate (EER): %1.3f' % eer) 146 | 147 | 148 | def parse_arguments(argv): 149 | parser = argparse.ArgumentParser() 150 | 151 | parser.add_argument('lfw_dir', type=str, 152 | help='Path to the data directory containing aligned LFW face patches.') 153 | parser.add_argument('--lfw_batch_size', type=int, 154 | help='Number of images to process in a batch in the LFW test set.', default=100) 155 | parser.add_argument('model', type=str, 156 | help='Could be either a directory containing the meta_file and ckpt_file or a model protobuf (.pb) file') 157 | parser.add_argument('--image_size', type=int, 158 | help='Image size (height, width) in pixels.', default=160) 159 | parser.add_argument('--lfw_pairs', type=str, 160 | help='The file containing the pairs to use for validation.', default='../data/pairs.txt') 161 | parser.add_argument('--lfw_nrof_folds', type=int, 162 | help='Number of folds to use for cross validation. Mainly used for testing.', default=10) 163 | parser.add_argument('--distance_metric', type=int, 164 | help='Distance metric 0:euclidian, 1:cosine similarity.', default=0) 165 | parser.add_argument('--use_flipped_images', 166 | help='Concatenates embeddings for the image and its horizontally flipped counterpart.', action='store_true') 167 | parser.add_argument('--subtract_mean', 168 | help='Subtract feature mean before calculating distance.', action='store_true') 169 | parser.add_argument('--use_fixed_image_standardization', 170 | help='Performs fixed standardization of images.', action='store_true') 171 | return parser.parse_args(argv) 172 | 173 | 174 | if __name__ == '__main__': 175 | main(parse_arguments(sys.argv[1:])) 176 | -------------------------------------------------------------------------------- /src/applications/imagenet_utils.py: -------------------------------------------------------------------------------- 1 | """Utilities for ImageNet data preprocessing & prediction decoding. 2 | """ 3 | from __future__ import absolute_import 4 | from __future__ import division 5 | from __future__ import print_function 6 | 7 | import json 8 | import warnings 9 | import numpy as np 10 | 11 | from . import get_submodules_from_kwargs 12 | 13 | CLASS_INDEX = None 14 | CLASS_INDEX_PATH = ('https://s3.amazonaws.com/deep-learning-models/' 15 | 'image-models/imagenet_class_index.json') 16 | 17 | # Global tensor of imagenet mean for preprocessing symbolic inputs 18 | _IMAGENET_MEAN = None 19 | 20 | 21 | def _preprocess_numpy_input(x, data_format, mode, **kwargs): 22 | """Preprocesses a Numpy array encoding a batch of images. 23 | # Arguments 24 | x: Input array, 3D or 4D. 25 | data_format: Data format of the image array. 26 | mode: One of "caffe", "tf" or "torch". 27 | - caffe: will convert the images from RGB to BGR, 28 | then will zero-center each color channel with 29 | respect to the ImageNet dataset, 30 | without scaling. 31 | - tf: will scale pixels between -1 and 1, 32 | sample-wise. 33 | - torch: will scale pixels between 0 and 1 and then 34 | will normalize each channel with respect to the 35 | ImageNet dataset. 36 | # Returns 37 | Preprocessed Numpy array. 38 | """ 39 | backend, _, _, _ = get_submodules_from_kwargs(kwargs) 40 | if not issubclass(x.dtype.type, np.floating): 41 | x = x.astype(backend.floatx(), copy=False) 42 | 43 | if mode == 'tf': 44 | x /= 127.5 45 | x -= 1. 46 | return x 47 | 48 | if mode == 'torch': 49 | x /= 255. 50 | mean = [0.485, 0.456, 0.406] 51 | std = [0.229, 0.224, 0.225] 52 | else: 53 | if data_format == 'channels_first': 54 | # 'RGB'->'BGR' 55 | if x.ndim == 3: 56 | x = x[::-1, ...] 57 | else: 58 | x = x[:, ::-1, ...] 59 | else: 60 | # 'RGB'->'BGR' 61 | x = x[..., ::-1] 62 | mean = [103.939, 116.779, 123.68] 63 | std = None 64 | 65 | # Zero-center by mean pixel 66 | if data_format == 'channels_first': 67 | if x.ndim == 3: 68 | x[0, :, :] -= mean[0] 69 | x[1, :, :] -= mean[1] 70 | x[2, :, :] -= mean[2] 71 | if std is not None: 72 | x[0, :, :] /= std[0] 73 | x[1, :, :] /= std[1] 74 | x[2, :, :] /= std[2] 75 | else: 76 | x[:, 0, :, :] -= mean[0] 77 | x[:, 1, :, :] -= mean[1] 78 | x[:, 2, :, :] -= mean[2] 79 | if std is not None: 80 | x[:, 0, :, :] /= std[0] 81 | x[:, 1, :, :] /= std[1] 82 | x[:, 2, :, :] /= std[2] 83 | else: 84 | x[..., 0] -= mean[0] 85 | x[..., 1] -= mean[1] 86 | x[..., 2] -= mean[2] 87 | if std is not None: 88 | x[..., 0] /= std[0] 89 | x[..., 1] /= std[1] 90 | x[..., 2] /= std[2] 91 | return x 92 | 93 | 94 | def _preprocess_symbolic_input(x, data_format, mode, **kwargs): 95 | """Preprocesses a tensor encoding a batch of images. 96 | # Arguments 97 | x: Input tensor, 3D or 4D. 98 | data_format: Data format of the image tensor. 99 | mode: One of "caffe", "tf" or "torch". 100 | - caffe: will convert the images from RGB to BGR, 101 | then will zero-center each color channel with 102 | respect to the ImageNet dataset, 103 | without scaling. 104 | - tf: will scale pixels between -1 and 1, 105 | sample-wise. 106 | - torch: will scale pixels between 0 and 1 and then 107 | will normalize each channel with respect to the 108 | ImageNet dataset. 109 | # Returns 110 | Preprocessed tensor. 111 | """ 112 | global _IMAGENET_MEAN 113 | 114 | backend, _, _, _ = get_submodules_from_kwargs(kwargs) 115 | 116 | if mode == 'tf': 117 | x /= 127.5 118 | x -= 1. 119 | return x 120 | 121 | if mode == 'torch': 122 | x /= 255. 123 | mean = [0.485, 0.456, 0.406] 124 | std = [0.229, 0.224, 0.225] 125 | else: 126 | if data_format == 'channels_first': 127 | # 'RGB'->'BGR' 128 | if backend.ndim(x) == 3: 129 | x = x[::-1, ...] 130 | else: 131 | x = x[:, ::-1, ...] 132 | else: 133 | # 'RGB'->'BGR' 134 | x = x[..., ::-1] 135 | mean = [103.939, 116.779, 123.68] 136 | std = None 137 | 138 | if _IMAGENET_MEAN is None: 139 | _IMAGENET_MEAN = backend.constant(-np.array(mean)) 140 | 141 | # Zero-center by mean pixel 142 | if backend.dtype(x) != backend.dtype(_IMAGENET_MEAN): 143 | x = backend.bias_add( 144 | x, backend.cast(_IMAGENET_MEAN, backend.dtype(x)), 145 | data_format=data_format) 146 | else: 147 | x = backend.bias_add(x, _IMAGENET_MEAN, data_format) 148 | if std is not None: 149 | x /= std 150 | return x 151 | 152 | 153 | def preprocess_input(x, data_format=None, mode='caffe', **kwargs): 154 | """Preprocesses a tensor or Numpy array encoding a batch of images. 155 | # Arguments 156 | x: Input Numpy or symbolic tensor, 3D or 4D. 157 | The preprocessed data is written over the input data 158 | if the data types are compatible. To avoid this 159 | behaviour, `numpy.copy(x)` can be used. 160 | data_format: Data format of the image tensor/array. 161 | mode: One of "caffe", "tf" or "torch". 162 | - caffe: will convert the images from RGB to BGR, 163 | then will zero-center each color channel with 164 | respect to the ImageNet dataset, 165 | without scaling. 166 | - tf: will scale pixels between -1 and 1, 167 | sample-wise. 168 | - torch: will scale pixels between 0 and 1 and then 169 | will normalize each channel with respect to the 170 | ImageNet dataset. 171 | # Returns 172 | Preprocessed tensor or Numpy array. 173 | # Raises 174 | ValueError: In case of unknown `data_format` argument. 175 | """ 176 | backend, _, _, _ = get_submodules_from_kwargs(kwargs) 177 | 178 | if data_format is None: 179 | data_format = backend.image_data_format() 180 | if data_format not in {'channels_first', 'channels_last'}: 181 | raise ValueError('Unknown data_format ' + str(data_format)) 182 | 183 | if isinstance(x, np.ndarray): 184 | return _preprocess_numpy_input(x, data_format=data_format, 185 | mode=mode, **kwargs) 186 | else: 187 | return _preprocess_symbolic_input(x, data_format=data_format, 188 | mode=mode, **kwargs) 189 | 190 | 191 | def decode_predictions(preds, top=5, **kwargs): 192 | """Decodes the prediction of an ImageNet model. 193 | # Arguments 194 | preds: Numpy tensor encoding a batch of predictions. 195 | top: Integer, how many top-guesses to return. 196 | # Returns 197 | A list of lists of top class prediction tuples 198 | `(class_name, class_description, score)`. 199 | One list of tuples per sample in batch input. 200 | # Raises 201 | ValueError: In case of invalid shape of the `pred` array 202 | (must be 2D). 203 | """ 204 | global CLASS_INDEX 205 | 206 | backend, _, _, keras_utils = get_submodules_from_kwargs(kwargs) 207 | 208 | if len(preds.shape) != 2 or preds.shape[1] != 1000: 209 | raise ValueError('`decode_predictions` expects ' 210 | 'a batch of predictions ' 211 | '(i.e. a 2D array of shape (samples, 1000)). ' 212 | 'Found array with shape: ' + str(preds.shape)) 213 | if CLASS_INDEX is None: 214 | fpath = keras_utils.get_file( 215 | 'imagenet_class_index.json', 216 | CLASS_INDEX_PATH, 217 | cache_subdir='models', 218 | file_hash='c2c37ea517e94d9795004a39431a14cb') 219 | with open(fpath) as f: 220 | CLASS_INDEX = json.load(f) 221 | results = [] 222 | for pred in preds: 223 | top_indices = pred.argsort()[-top:][::-1] 224 | result = [tuple(CLASS_INDEX[str(i)]) + (pred[i],) for i in top_indices] 225 | result.sort(key=lambda x: x[2], reverse=True) 226 | results.append(result) 227 | return results 228 | 229 | 230 | def _obtain_input_shape(input_shape, 231 | default_size, 232 | min_size, 233 | data_format, 234 | require_flatten, 235 | weights=None): 236 | """Internal utility to compute/validate a model's input shape. 237 | # Arguments 238 | input_shape: Either None (will return the default network input shape), 239 | or a user-provided shape to be validated. 240 | default_size: Default input width/height for the model. 241 | min_size: Minimum input width/height accepted by the model. 242 | data_format: Image data format to use. 243 | require_flatten: Whether the model is expected to 244 | be linked to a classifier via a Flatten layer. 245 | weights: One of `None` (random initialization) 246 | or 'imagenet' (pre-training on ImageNet). 247 | If weights='imagenet' input channels must be equal to 3. 248 | # Returns 249 | An integer shape tuple (may include None entries). 250 | # Raises 251 | ValueError: In case of invalid argument values. 252 | """ 253 | if weights != 'imagenet' and input_shape and len(input_shape) == 3: 254 | if data_format == 'channels_first': 255 | if input_shape[0] not in {1, 3}: 256 | warnings.warn( 257 | 'This model usually expects 1 or 3 input channels. ' 258 | 'However, it was passed an input_shape with ' + 259 | str(input_shape[0]) + ' input channels.') 260 | default_shape = (input_shape[0], default_size, default_size) 261 | else: 262 | if input_shape[-1] not in {1, 3}: 263 | warnings.warn( 264 | 'This model usually expects 1 or 3 input channels. ' 265 | 'However, it was passed an input_shape with ' + 266 | str(input_shape[-1]) + ' input channels.') 267 | default_shape = (default_size, default_size, input_shape[-1]) 268 | else: 269 | if data_format == 'channels_first': 270 | default_shape = (3, default_size, default_size) 271 | else: 272 | default_shape = (default_size, default_size, 3) 273 | if weights == 'imagenet' and require_flatten: 274 | if input_shape is not None: 275 | if input_shape != default_shape: 276 | raise ValueError('When setting`include_top=True` ' 277 | 'and loading `imagenet` weights, ' 278 | '`input_shape` should be ' + 279 | str(default_shape) + '.') 280 | return default_shape 281 | if input_shape: 282 | if data_format == 'channels_first': 283 | if input_shape is not None: 284 | if len(input_shape) != 3: 285 | raise ValueError( 286 | '`input_shape` must be a tuple of three integers.') 287 | if input_shape[0] != 3 and weights == 'imagenet': 288 | raise ValueError('The input must have 3 channels; got ' 289 | '`input_shape=' + str(input_shape) + '`') 290 | if ((input_shape[1] is not None and input_shape[1] < min_size) or 291 | (input_shape[2] is not None and input_shape[2] < min_size)): 292 | raise ValueError('Input size must be at least ' + 293 | str(min_size) + 'x' + str(min_size) + 294 | '; got `input_shape=' + 295 | str(input_shape) + '`') 296 | else: 297 | if input_shape is not None: 298 | if len(input_shape) != 3: 299 | raise ValueError( 300 | '`input_shape` must be a tuple of three integers.') 301 | if input_shape[-1] != 3 and weights == 'imagenet': 302 | raise ValueError('The input must have 3 channels; got ' 303 | '`input_shape=' + str(input_shape) + '`') 304 | if ((input_shape[0] is not None and input_shape[0] < min_size) or 305 | (input_shape[1] is not None and input_shape[1] < min_size)): 306 | raise ValueError('Input size must be at least ' + 307 | str(min_size) + 'x' + str(min_size) + 308 | '; got `input_shape=' + 309 | str(input_shape) + '`') 310 | else: 311 | if require_flatten: 312 | input_shape = default_shape 313 | else: 314 | if data_format == 'channels_first': 315 | input_shape = (3, None, None) 316 | else: 317 | input_shape = (None, None, 3) 318 | if require_flatten: 319 | if None in input_shape: 320 | raise ValueError('If `include_top` is True, ' 321 | 'you should specify a static `input_shape`. ' 322 | 'Got `input_shape=' + str(input_shape) + '`') 323 | return input_shape 324 | -------------------------------------------------------------------------------- /src/applications/mobilenet.py: -------------------------------------------------------------------------------- 1 | """MobileNet v1 models for Keras. 2 | MobileNet is a general architecture and can be used for multiple use cases. 3 | Depending on the use case, it can use different input layer size and 4 | different width factors. This allows different width models to reduce 5 | the number of multiply-adds and thereby 6 | reduce inference cost on mobile devices. 7 | MobileNets support any input size greater than 32 x 32, with larger image sizes 8 | offering better performance. 9 | The number of parameters and number of multiply-adds 10 | can be modified by using the `alpha` parameter, 11 | which increases/decreases the number of filters in each layer. 12 | By altering the image size and `alpha` parameter, 13 | all 16 models from the paper can be built, with ImageNet weights provided. 14 | The paper demonstrates the performance of MobileNets using `alpha` values of 15 | 1.0 (also called 100 % MobileNet), 0.75, 0.5 and 0.25. 16 | For each of these `alpha` values, weights for 4 different input image sizes 17 | are provided (224, 192, 160, 128). 18 | The following table describes the size and accuracy of the 100% MobileNet 19 | on size 224 x 224: 20 | ---------------------------------------------------------------------------- 21 | Width Multiplier (alpha) | ImageNet Acc | Multiply-Adds (M) | Params (M) 22 | ---------------------------------------------------------------------------- 23 | | 1.0 MobileNet-224 | 70.6 % | 529 | 4.2 | 24 | | 0.75 MobileNet-224 | 68.4 % | 325 | 2.6 | 25 | | 0.50 MobileNet-224 | 63.7 % | 149 | 1.3 | 26 | | 0.25 MobileNet-224 | 50.6 % | 41 | 0.5 | 27 | ---------------------------------------------------------------------------- 28 | The following table describes the performance of 29 | the 100 % MobileNet on various input sizes: 30 | ------------------------------------------------------------------------ 31 | Resolution | ImageNet Acc | Multiply-Adds (M) | Params (M) 32 | ------------------------------------------------------------------------ 33 | | 1.0 MobileNet-224 | 70.6 % | 529 | 4.2 | 34 | | 1.0 MobileNet-192 | 69.1 % | 529 | 4.2 | 35 | | 1.0 MobileNet-160 | 67.2 % | 529 | 4.2 | 36 | | 1.0 MobileNet-128 | 64.4 % | 529 | 4.2 | 37 | ------------------------------------------------------------------------ 38 | The weights for all 16 models are obtained and translated 39 | from TensorFlow checkpoints found at 40 | https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet_v1.md 41 | # Reference 42 | - [MobileNets: Efficient Convolutional Neural Networks for 43 | Mobile Vision Applications](https://arxiv.org/pdf/1704.04861.pdf)) 44 | """ 45 | from __future__ import print_function 46 | from __future__ import absolute_import 47 | from __future__ import division 48 | 49 | import os 50 | import warnings 51 | 52 | from . import get_submodules_from_kwargs 53 | from . import imagenet_utils 54 | # from .imagenet_utils import decode_predictions 55 | from .imagenet_utils import _obtain_input_shape 56 | 57 | 58 | BASE_WEIGHT_PATH = ('https://github.com/fchollet/deep-learning-models/' 59 | 'releases/download/v0.6/') 60 | 61 | backend = None 62 | layers = None 63 | models = None 64 | keras_utils = None 65 | 66 | 67 | def preprocess_input(x, **kwargs): 68 | """Preprocesses a numpy array encoding a batch of images. 69 | # Arguments 70 | x: a 4D numpy array consists of RGB values within [0, 255]. 71 | # Returns 72 | Preprocessed array. 73 | """ 74 | return imagenet_utils.preprocess_input(x, mode='tf', **kwargs) 75 | 76 | 77 | def MobileNet(input_shape=None, 78 | alpha=1.0, 79 | depth_multiplier=1, 80 | dropout=1e-3, 81 | include_top=True, 82 | weights='imagenet', 83 | input_tensor=None, 84 | pooling=None, 85 | classes=1000, 86 | **kwargs): 87 | """Instantiates the MobileNet architecture. 88 | # Arguments 89 | input_shape: optional shape tuple, only to be specified 90 | if `include_top` is False (otherwise the input shape 91 | has to be `(224, 224, 3)` 92 | (with `channels_last` data format) 93 | or (3, 224, 224) (with `channels_first` data format). 94 | It should have exactly 3 inputs channels, 95 | and width and height should be no smaller than 32. 96 | E.g. `(200, 200, 3)` would be one valid value. 97 | alpha: controls the width of the network. 98 | - If `alpha` < 1.0, proportionally decreases the number 99 | of filters in each layer. 100 | - If `alpha` > 1.0, proportionally increases the number 101 | of filters in each layer. 102 | - If `alpha` = 1, default number of filters from the paper 103 | are used at each layer. 104 | depth_multiplier: depth multiplier for depthwise convolution 105 | (also called the resolution multiplier) 106 | dropout: dropout rate 107 | include_top: whether to include the fully-connected 108 | layer at the top of the network. 109 | weights: one of `None` (random initialization), 110 | 'imagenet' (pre-training on ImageNet), 111 | or the path to the weights file to be loaded. 112 | input_tensor: optional Keras tensor (i.e. output of 113 | `layers.Input()`) 114 | to use as image input for the model. 115 | pooling: Optional pooling mode for feature extraction 116 | when `include_top` is `False`. 117 | - `None` means that the output of the model 118 | will be the 4D tensor output of the 119 | last convolutional layer. 120 | - `avg` means that global average pooling 121 | will be applied to the output of the 122 | last convolutional layer, and thus 123 | the output of the model will be a 124 | 2D tensor. 125 | - `max` means that global max pooling will 126 | be applied. 127 | classes: optional number of classes to classify images 128 | into, only to be specified if `include_top` is True, and 129 | if no `weights` argument is specified. 130 | # Returns 131 | A Keras model instance. 132 | # Raises 133 | ValueError: in case of invalid argument for `weights`, 134 | or invalid input shape. 135 | RuntimeError: If attempting to run this model with a 136 | backend that does not support separable convolutions. 137 | """ 138 | global backend, layers, models, keras_utils 139 | backend, layers, models, keras_utils = get_submodules_from_kwargs(kwargs) 140 | 141 | if not (weights in {'imagenet', None} or os.path.exists(weights)): 142 | raise ValueError('The `weights` argument should be either ' 143 | '`None` (random initialization), `imagenet` ' 144 | '(pre-training on ImageNet), ' 145 | 'or the path to the weights file to be loaded.') 146 | 147 | if weights == 'imagenet' and include_top and classes != 1000: 148 | raise ValueError('If using `weights` as `"imagenet"` with `include_top` ' 149 | 'as true, `classes` should be 1000') 150 | 151 | # Determine proper input shape and default size. 152 | if input_shape is None: 153 | default_size = 224 154 | else: 155 | if backend.image_data_format() == 'channels_first': 156 | rows = input_shape[1] 157 | cols = input_shape[2] 158 | else: 159 | rows = input_shape[0] 160 | cols = input_shape[1] 161 | 162 | if rows == cols and rows in [128, 160, 192, 224]: 163 | default_size = rows 164 | else: 165 | default_size = 224 166 | 167 | input_shape = _obtain_input_shape(input_shape, 168 | default_size=default_size, 169 | min_size=32, 170 | data_format=backend.image_data_format(), 171 | require_flatten=include_top, 172 | weights=weights) 173 | 174 | if backend.image_data_format() == 'channels_last': 175 | row_axis, col_axis = (0, 1) 176 | else: 177 | row_axis, col_axis = (1, 2) 178 | rows = input_shape[row_axis] 179 | cols = input_shape[col_axis] 180 | 181 | if weights == 'imagenet': 182 | if depth_multiplier != 1: 183 | raise ValueError('If imagenet weights are being loaded, ' 184 | 'depth multiplier must be 1') 185 | 186 | if alpha not in [0.25, 0.50, 0.75, 1.0]: 187 | raise ValueError('If imagenet weights are being loaded, ' 188 | 'alpha can be one of' 189 | '`0.25`, `0.50`, `0.75` or `1.0` only.') 190 | 191 | if rows != cols or rows not in [128, 160, 192, 224]: 192 | if rows is None: 193 | rows = 224 194 | warnings.warn('MobileNet shape is undefined.' 195 | ' Weights for input shape ' 196 | '(224, 224) will be loaded.') 197 | else: 198 | raise ValueError('If imagenet weights are being loaded, ' 199 | 'input must have a static square shape ' 200 | '(one of (128, 128), (160, 160), ' 201 | '(192, 192), or (224, 224)). ' 202 | 'Input shape provided = %s' % (input_shape,)) 203 | 204 | if backend.image_data_format() != 'channels_last': 205 | warnings.warn('The MobileNet family of models is only available ' 206 | 'for the input data format "channels_last" ' 207 | '(width, height, channels). ' 208 | 'However your settings specify the default ' 209 | 'data format "channels_first" (channels, width, height).' 210 | ' You should set `image_data_format="channels_last"` ' 211 | 'in your Keras config located at ~/.keras/keras.json. ' 212 | 'The model being returned right now will expect inputs ' 213 | 'to follow the "channels_last" data format.') 214 | backend.set_image_data_format('channels_last') 215 | old_data_format = 'channels_first' 216 | else: 217 | old_data_format = None 218 | 219 | if input_tensor is None: 220 | img_input = layers.Input(shape=input_shape) 221 | else: 222 | if not backend.is_keras_tensor(input_tensor): 223 | img_input = layers.Input(tensor=input_tensor, shape=input_shape) 224 | else: 225 | img_input = input_tensor 226 | 227 | x = _conv_block(img_input, 32, alpha, strides=(2, 2)) 228 | x = _depthwise_conv_block(x, 64, alpha, depth_multiplier, block_id=1) 229 | 230 | x = _depthwise_conv_block(x, 128, alpha, depth_multiplier, 231 | strides=(2, 2), block_id=2) 232 | x = _depthwise_conv_block(x, 128, alpha, depth_multiplier, block_id=3) 233 | 234 | x = _depthwise_conv_block(x, 256, alpha, depth_multiplier, 235 | strides=(2, 2), block_id=4) 236 | x = _depthwise_conv_block(x, 256, alpha, depth_multiplier, block_id=5) 237 | 238 | x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, 239 | strides=(2, 2), block_id=6) 240 | x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=7) 241 | x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=8) 242 | x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=9) 243 | x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=10) 244 | x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=11) 245 | 246 | x = _depthwise_conv_block(x, 1024, alpha, depth_multiplier, 247 | strides=(2, 2), block_id=12) 248 | x = _depthwise_conv_block(x, 1024, alpha, depth_multiplier, block_id=13) 249 | 250 | if include_top: 251 | if backend.image_data_format() == 'channels_first': 252 | shape = (int(1024 * alpha), 1, 1) 253 | else: 254 | shape = (1, 1, int(1024 * alpha)) 255 | 256 | x = layers.GlobalAveragePooling2D()(x) 257 | x = layers.Reshape(shape, name='reshape_1')(x) 258 | x = layers.Dropout(dropout, name='dropout')(x) 259 | x = layers.Conv2D(classes, (1, 1), 260 | padding='same', 261 | name='conv_preds')(x) 262 | x = layers.Activation('softmax', name='act_softmax')(x) 263 | x = layers.Reshape((classes,), name='reshape_2')(x) 264 | else: 265 | if pooling == 'avg': 266 | x = layers.GlobalAveragePooling2D()(x) 267 | elif pooling == 'max': 268 | x = layers.GlobalMaxPooling2D()(x) 269 | 270 | # Ensure that the model takes into account 271 | # any potential predecessors of `input_tensor`. 272 | if input_tensor is not None: 273 | inputs = keras_utils.get_source_inputs(input_tensor) 274 | else: 275 | inputs = img_input 276 | 277 | # Create model. 278 | model = models.Model(inputs, x, name='mobilenet_%0.2f_%s' % (alpha, rows)) 279 | 280 | # Load weights. 281 | if weights == 'imagenet': 282 | if backend.image_data_format() == 'channels_first': 283 | raise ValueError('Weights for "channels_first" format ' 284 | 'are not available.') 285 | if alpha == 1.0: 286 | alpha_text = '1_0' 287 | elif alpha == 0.75: 288 | alpha_text = '7_5' 289 | elif alpha == 0.50: 290 | alpha_text = '5_0' 291 | else: 292 | alpha_text = '2_5' 293 | 294 | if include_top: 295 | model_name = 'mobilenet_%s_%d_tf.h5' % (alpha_text, rows) 296 | weight_path = BASE_WEIGHT_PATH + model_name 297 | weights_path = keras_utils.get_file(model_name, 298 | weight_path, 299 | cache_subdir='models') 300 | else: 301 | model_name = 'mobilenet_%s_%d_tf_no_top.h5' % (alpha_text, rows) 302 | weight_path = BASE_WEIGHT_PATH + model_name 303 | weights_path = keras_utils.get_file(model_name, 304 | weight_path, 305 | cache_subdir='models') 306 | model.load_weights(weights_path) 307 | elif weights is not None: 308 | model.load_weights(weights) 309 | 310 | if old_data_format: 311 | backend.set_image_data_format(old_data_format) 312 | return model 313 | 314 | 315 | def _conv_block(inputs, filters, alpha, kernel=(3, 3), strides=(1, 1)): 316 | """Adds an initial convolution layer (with batch normalization and relu6). 317 | # Arguments 318 | inputs: Input tensor of shape `(rows, cols, 3)` 319 | (with `channels_last` data format) or 320 | (3, rows, cols) (with `channels_first` data format). 321 | It should have exactly 3 inputs channels, 322 | and width and height should be no smaller than 32. 323 | E.g. `(224, 224, 3)` would be one valid value. 324 | filters: Integer, the dimensionality of the output space 325 | (i.e. the number of output filters in the convolution). 326 | alpha: controls the width of the network. 327 | - If `alpha` < 1.0, proportionally decreases the number 328 | of filters in each layer. 329 | - If `alpha` > 1.0, proportionally increases the number 330 | of filters in each layer. 331 | - If `alpha` = 1, default number of filters from the paper 332 | are used at each layer. 333 | kernel: An integer or tuple/list of 2 integers, specifying the 334 | width and height of the 2D convolution window. 335 | Can be a single integer to specify the same value for 336 | all spatial dimensions. 337 | strides: An integer or tuple/list of 2 integers, 338 | specifying the strides of the convolution 339 | along the width and height. 340 | Can be a single integer to specify the same value for 341 | all spatial dimensions. 342 | Specifying any stride value != 1 is incompatible with specifying 343 | any `dilation_rate` value != 1. 344 | # Input shape 345 | 4D tensor with shape: 346 | `(samples, channels, rows, cols)` if data_format='channels_first' 347 | or 4D tensor with shape: 348 | `(samples, rows, cols, channels)` if data_format='channels_last'. 349 | # Output shape 350 | 4D tensor with shape: 351 | `(samples, filters, new_rows, new_cols)` 352 | if data_format='channels_first' 353 | or 4D tensor with shape: 354 | `(samples, new_rows, new_cols, filters)` 355 | if data_format='channels_last'. 356 | `rows` and `cols` values might have changed due to stride. 357 | # Returns 358 | Output tensor of block. 359 | """ 360 | channel_axis = 1 if backend.image_data_format() == 'channels_first' else -1 361 | filters = int(filters * alpha) 362 | x = layers.ZeroPadding2D(padding=((0, 1), (0, 1)), name='conv1_pad')(inputs) 363 | x = layers.Conv2D(filters, kernel, 364 | padding='valid', 365 | use_bias=False, 366 | strides=strides, 367 | name='conv1')(x) 368 | x = layers.BatchNormalization(axis=channel_axis, name='conv1_bn')(x) 369 | return layers.ReLU(6., name='conv1_relu')(x) 370 | 371 | 372 | def _depthwise_conv_block(inputs, pointwise_conv_filters, alpha, 373 | depth_multiplier=1, strides=(1, 1), block_id=1): 374 | """Adds a depthwise convolution block. 375 | A depthwise convolution block consists of a depthwise conv, 376 | batch normalization, relu6, pointwise convolution, 377 | batch normalization and relu6 activation. 378 | # Arguments 379 | inputs: Input tensor of shape `(rows, cols, channels)` 380 | (with `channels_last` data format) or 381 | (channels, rows, cols) (with `channels_first` data format). 382 | pointwise_conv_filters: Integer, the dimensionality of the output space 383 | (i.e. the number of output filters in the pointwise convolution). 384 | alpha: controls the width of the network. 385 | - If `alpha` < 1.0, proportionally decreases the number 386 | of filters in each layer. 387 | - If `alpha` > 1.0, proportionally increases the number 388 | of filters in each layer. 389 | - If `alpha` = 1, default number of filters from the paper 390 | are used at each layer. 391 | depth_multiplier: The number of depthwise convolution output channels 392 | for each input channel. 393 | The total number of depthwise convolution output 394 | channels will be equal to `filters_in * depth_multiplier`. 395 | strides: An integer or tuple/list of 2 integers, 396 | specifying the strides of the convolution 397 | along the width and height. 398 | Can be a single integer to specify the same value for 399 | all spatial dimensions. 400 | Specifying any stride value != 1 is incompatible with specifying 401 | any `dilation_rate` value != 1. 402 | block_id: Integer, a unique identification designating 403 | the block number. 404 | # Input shape 405 | 4D tensor with shape: 406 | `(batch, channels, rows, cols)` if data_format='channels_first' 407 | or 4D tensor with shape: 408 | `(batch, rows, cols, channels)` if data_format='channels_last'. 409 | # Output shape 410 | 4D tensor with shape: 411 | `(batch, filters, new_rows, new_cols)` 412 | if data_format='channels_first' 413 | or 4D tensor with shape: 414 | `(batch, new_rows, new_cols, filters)` 415 | if data_format='channels_last'. 416 | `rows` and `cols` values might have changed due to stride. 417 | # Returns 418 | Output tensor of block. 419 | """ 420 | channel_axis = 1 if backend.image_data_format() == 'channels_first' else -1 421 | pointwise_conv_filters = int(pointwise_conv_filters * alpha) 422 | 423 | if strides == (1, 1): 424 | x = inputs 425 | else: 426 | x = layers.ZeroPadding2D(((0, 1), (0, 1)), 427 | name='conv_pad_%d' % block_id)(inputs) 428 | x = layers.DepthwiseConv2D((3, 3), 429 | padding='same' if strides == (1, 1) else 'valid', 430 | depth_multiplier=depth_multiplier, 431 | strides=strides, 432 | use_bias=False, 433 | name='conv_dw_%d' % block_id)(x) 434 | x = layers.BatchNormalization( 435 | axis=channel_axis, name='conv_dw_%d_bn' % block_id)(x) 436 | x = layers.ReLU(6., name='conv_dw_%d_relu' % block_id)(x) 437 | 438 | x = layers.Conv2D(pointwise_conv_filters, (1, 1), 439 | padding='same', 440 | use_bias=False, 441 | strides=(1, 1), 442 | name='conv_pw_%d' % block_id)(x) 443 | x = layers.BatchNormalization(axis=channel_axis, 444 | name='conv_pw_%d_bn' % block_id)(x) 445 | return layers.ReLU(6., name='conv_pw_%d_relu' % block_id)(x) 446 | -------------------------------------------------------------------------------- /src/applications/mobilenet_v2.py: -------------------------------------------------------------------------------- 1 | """MobileNet v2 models for Keras. 2 | MobileNetV2 is a general architecture and can be used for multiple use cases. 3 | Depending on the use case, it can use different input layer size and 4 | different width factors. This allows different width models to reduce 5 | the number of multiply-adds and thereby 6 | reduce inference cost on mobile devices. 7 | MobileNetV2 is very similar to the original MobileNet, 8 | except that it uses inverted residual blocks with 9 | bottlenecking features. It has a drastically lower 10 | parameter count than the original MobileNet. 11 | MobileNets support any input size greater 12 | than 32 x 32, with larger image sizes 13 | offering better performance. 14 | The number of parameters and number of multiply-adds 15 | can be modified by using the `alpha` parameter, 16 | which increases/decreases the number of filters in each layer. 17 | By altering the image size and `alpha` parameter, 18 | all 22 models from the paper can be built, with ImageNet weights provided. 19 | The paper demonstrates the performance of MobileNets using `alpha` values of 20 | 1.0 (also called 100 % MobileNet), 0.35, 0.5, 0.75, 1.0, 1.3, and 1.4 21 | For each of these `alpha` values, weights for 5 different input image sizes 22 | are provided (224, 192, 160, 128, and 96). 23 | The following table describes the performance of 24 | MobileNet on various input sizes: 25 | ------------------------------------------------------------------------ 26 | MACs stands for Multiply Adds 27 | Classification Checkpoint| MACs (M) | Parameters (M)| Top 1 Accuracy| Top 5 Accuracy 28 | --------------------------|------------|---------------|---------|----|------------- 29 | | [mobilenet_v2_1.4_224] | 582 | 6.06 | 75.0 | 92.5 | 30 | | [mobilenet_v2_1.3_224] | 509 | 5.34 | 74.4 | 92.1 | 31 | | [mobilenet_v2_1.0_224] | 300 | 3.47 | 71.8 | 91.0 | 32 | | [mobilenet_v2_1.0_192] | 221 | 3.47 | 70.7 | 90.1 | 33 | | [mobilenet_v2_1.0_160] | 154 | 3.47 | 68.8 | 89.0 | 34 | | [mobilenet_v2_1.0_128] | 99 | 3.47 | 65.3 | 86.9 | 35 | | [mobilenet_v2_1.0_96] | 56 | 3.47 | 60.3 | 83.2 | 36 | | [mobilenet_v2_0.75_224] | 209 | 2.61 | 69.8 | 89.6 | 37 | | [mobilenet_v2_0.75_192] | 153 | 2.61 | 68.7 | 88.9 | 38 | | [mobilenet_v2_0.75_160] | 107 | 2.61 | 66.4 | 87.3 | 39 | | [mobilenet_v2_0.75_128] | 69 | 2.61 | 63.2 | 85.3 | 40 | | [mobilenet_v2_0.75_96] | 39 | 2.61 | 58.8 | 81.6 | 41 | | [mobilenet_v2_0.5_224] | 97 | 1.95 | 65.4 | 86.4 | 42 | | [mobilenet_v2_0.5_192] | 71 | 1.95 | 63.9 | 85.4 | 43 | | [mobilenet_v2_0.5_160] | 50 | 1.95 | 61.0 | 83.2 | 44 | | [mobilenet_v2_0.5_128] | 32 | 1.95 | 57.7 | 80.8 | 45 | | [mobilenet_v2_0.5_96] | 18 | 1.95 | 51.2 | 75.8 | 46 | | [mobilenet_v2_0.35_224] | 59 | 1.66 | 60.3 | 82.9 | 47 | | [mobilenet_v2_0.35_192] | 43 | 1.66 | 58.2 | 81.2 | 48 | | [mobilenet_v2_0.35_160] | 30 | 1.66 | 55.7 | 79.1 | 49 | | [mobilenet_v2_0.35_128] | 20 | 1.66 | 50.8 | 75.0 | 50 | | [mobilenet_v2_0.35_96] | 11 | 1.66 | 45.5 | 70.4 | 51 | The weights for all 16 models are obtained and 52 | translated from the Tensorflow checkpoints 53 | from TensorFlow checkpoints found [here] 54 | (https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/README.md). 55 | # Reference 56 | This file contains building code for MobileNetV2, based on 57 | [MobileNetV2: Inverted Residuals and Linear Bottlenecks] 58 | (https://arxiv.org/abs/1801.04381) 59 | Tests comparing this model to the existing Tensorflow model can be 60 | found at [mobilenet_v2_keras] 61 | (https://github.com/JonathanCMitchell/mobilenet_v2_keras) 62 | """ 63 | from __future__ import print_function 64 | from __future__ import absolute_import 65 | from __future__ import division 66 | 67 | import os 68 | import warnings 69 | import numpy as np 70 | 71 | from . import correct_pad 72 | from . import get_submodules_from_kwargs 73 | from .imagenet_utils import decode_predictions 74 | from .imagenet_utils import _obtain_input_shape 75 | 76 | # TODO Change path to v1.1 77 | BASE_WEIGHT_PATH = ('https://github.com/JonathanCMitchell/mobilenet_v2_keras/' 78 | 'releases/download/v1.1/') 79 | 80 | backend = None 81 | layers = None 82 | models = None 83 | keras_utils = None 84 | 85 | 86 | def preprocess_input(x, **kwargs): 87 | """Preprocesses a numpy array encoding a batch of images. 88 | This function applies the "Inception" preprocessing which converts 89 | the RGB values from [0, 255] to [-1, 1]. Note that this preprocessing 90 | function is different from `imagenet_utils.preprocess_input()`. 91 | # Arguments 92 | x: a 4D numpy array consists of RGB values within [0, 255]. 93 | # Returns 94 | Preprocessed array. 95 | """ 96 | x /= 128. 97 | x -= 1. 98 | return x.astype(np.float32) 99 | 100 | 101 | # This function is taken from the original tf repo. 102 | # It ensures that all layers have a channel number that is divisible by 8 103 | # It can be seen here: 104 | # https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py 105 | 106 | 107 | def _make_divisible(v, divisor, min_value=None): 108 | if min_value is None: 109 | min_value = divisor 110 | new_v = max(min_value, int(v + divisor / 2) // divisor * divisor) 111 | # Make sure that round down does not go down by more than 10%. 112 | if new_v < 0.9 * v: 113 | new_v += divisor 114 | return new_v 115 | 116 | 117 | def MobileNetV2(input_shape=None, 118 | alpha=1.0, 119 | depth_multiplier=1, 120 | include_top=True, 121 | weights='imagenet', 122 | input_tensor=None, 123 | pooling=None, 124 | classes=1000, 125 | **kwargs): 126 | """Instantiates the MobileNetV2 architecture. 127 | # Arguments 128 | input_shape: optional shape tuple, to be specified if you would 129 | like to use a model with an input img resolution that is not 130 | (224, 224, 3). 131 | It should have exactly 3 inputs channels (224, 224, 3). 132 | You can also omit this option if you would like 133 | to infer input_shape from an input_tensor. 134 | If you choose to include both input_tensor and input_shape then 135 | input_shape will be used if they match, if the shapes 136 | do not match then we will throw an error. 137 | E.g. `(160, 160, 3)` would be one valid value. 138 | alpha: controls the width of the network. This is known as the 139 | width multiplier in the MobileNetV2 paper. 140 | - If `alpha` < 1.0, proportionally decreases the number 141 | of filters in each layer. 142 | - If `alpha` > 1.0, proportionally increases the number 143 | of filters in each layer. 144 | - If `alpha` = 1, default number of filters from the paper 145 | are used at each layer. 146 | depth_multiplier: depth multiplier for depthwise convolution 147 | (also called the resolution multiplier) 148 | include_top: whether to include the fully-connected 149 | layer at the top of the network. 150 | weights: one of `None` (random initialization), 151 | 'imagenet' (pre-training on ImageNet), 152 | or the path to the weights file to be loaded. 153 | input_tensor: optional Keras tensor (i.e. output of 154 | `layers.Input()`) 155 | to use as image input for the model. 156 | pooling: Optional pooling mode for feature extraction 157 | when `include_top` is `False`. 158 | - `None` means that the output of the model 159 | will be the 4D tensor output of the 160 | last convolutional layer. 161 | - `avg` means that global average pooling 162 | will be applied to the output of the 163 | last convolutional layer, and thus 164 | the output of the model will be a 165 | 2D tensor. 166 | - `max` means that global max pooling will 167 | be applied. 168 | classes: optional number of classes to classify images 169 | into, only to be specified if `include_top` is True, and 170 | if no `weights` argument is specified. 171 | # Returns 172 | A Keras model instance. 173 | # Raises 174 | ValueError: in case of invalid argument for `weights`, 175 | or invalid input shape or invalid depth_multiplier, alpha, 176 | rows when weights='imagenet' 177 | """ 178 | global backend, layers, models, keras_utils 179 | backend, layers, models, keras_utils = get_submodules_from_kwargs(kwargs) 180 | 181 | if not (weights in {'imagenet', None} or os.path.exists(weights)): 182 | raise ValueError('The `weights` argument should be either ' 183 | '`None` (random initialization), `imagenet` ' 184 | '(pre-training on ImageNet), ' 185 | 'or the path to the weights file to be loaded.') 186 | 187 | if weights == 'imagenet' and include_top and classes != 1000: 188 | raise ValueError('If using `weights` as `"imagenet"` with `include_top` ' 189 | 'as true, `classes` should be 1000') 190 | 191 | # Determine proper input shape and default size. 192 | # If both input_shape and input_tensor are used, they should match 193 | if input_shape is not None and input_tensor is not None: 194 | try: 195 | is_input_t_tensor = backend.is_keras_tensor(input_tensor) 196 | except ValueError: 197 | try: 198 | is_input_t_tensor = backend.is_keras_tensor( 199 | keras_utils.get_source_inputs(input_tensor)) 200 | except ValueError: 201 | raise ValueError('input_tensor: ', input_tensor, 202 | 'is not type input_tensor') 203 | if is_input_t_tensor: 204 | if backend.image_data_format == 'channels_first': 205 | if backend.int_shape(input_tensor)[1] != input_shape[1]: 206 | raise ValueError('input_shape: ', input_shape, 207 | 'and input_tensor: ', input_tensor, 208 | 'do not meet the same shape requirements') 209 | else: 210 | if backend.int_shape(input_tensor)[2] != input_shape[1]: 211 | raise ValueError('input_shape: ', input_shape, 212 | 'and input_tensor: ', input_tensor, 213 | 'do not meet the same shape requirements') 214 | else: 215 | raise ValueError('input_tensor specified: ', input_tensor, 216 | 'is not a keras tensor') 217 | 218 | # If input_shape is None, infer shape from input_tensor 219 | if input_shape is None and input_tensor is not None: 220 | 221 | try: 222 | backend.is_keras_tensor(input_tensor) 223 | except ValueError: 224 | raise ValueError('input_tensor: ', input_tensor, 225 | 'is type: ', type(input_tensor), 226 | 'which is not a valid type') 227 | 228 | if input_shape is None and not backend.is_keras_tensor(input_tensor): 229 | default_size = 224 230 | elif input_shape is None and backend.is_keras_tensor(input_tensor): 231 | if backend.image_data_format() == 'channels_first': 232 | rows = backend.int_shape(input_tensor)[2] 233 | cols = backend.int_shape(input_tensor)[3] 234 | else: 235 | rows = backend.int_shape(input_tensor)[1] 236 | cols = backend.int_shape(input_tensor)[2] 237 | 238 | if rows == cols and rows in [96, 128, 160, 192, 224]: 239 | default_size = rows 240 | else: 241 | default_size = 224 242 | 243 | # If input_shape is None and no input_tensor 244 | elif input_shape is None: 245 | default_size = 224 246 | 247 | # If input_shape is not None, assume default size 248 | else: 249 | if backend.image_data_format() == 'channels_first': 250 | rows = input_shape[1] 251 | cols = input_shape[2] 252 | else: 253 | rows = input_shape[0] 254 | cols = input_shape[1] 255 | 256 | if rows == cols and rows in [96, 128, 160, 192, 224]: 257 | default_size = rows 258 | else: 259 | default_size = 224 260 | 261 | input_shape = _obtain_input_shape(input_shape, 262 | default_size=default_size, 263 | min_size=32, 264 | data_format=backend.image_data_format(), 265 | require_flatten=include_top, 266 | weights=weights) 267 | 268 | if backend.image_data_format() == 'channels_last': 269 | row_axis, col_axis = (0, 1) 270 | else: 271 | row_axis, col_axis = (1, 2) 272 | rows = input_shape[row_axis] 273 | cols = input_shape[col_axis] 274 | 275 | if weights == 'imagenet': 276 | if depth_multiplier != 1: 277 | raise ValueError('If imagenet weights are being loaded, ' 278 | 'depth multiplier must be 1') 279 | 280 | if alpha not in [0.35, 0.50, 0.75, 1.0, 1.3, 1.4]: 281 | raise ValueError('If imagenet weights are being loaded, ' 282 | 'alpha can be one of `0.35`, `0.50`, `0.75`, ' 283 | '`1.0`, `1.3` or `1.4` only.') 284 | 285 | if rows != cols or rows not in [96, 128, 160, 192, 224]: 286 | if rows is None: 287 | rows = 224 288 | warnings.warn('MobileNet shape is undefined.' 289 | ' Weights for input shape' 290 | '(224, 224) will be loaded.') 291 | else: 292 | raise ValueError('If imagenet weights are being loaded, ' 293 | 'input must have a static square shape' 294 | '(one of (96, 96), (128, 128), (160, 160),' 295 | '(192, 192), or (224, 224)).' 296 | 'Input shape provided = %s' % (input_shape,)) 297 | 298 | if backend.image_data_format() != 'channels_last': 299 | warnings.warn('The MobileNet family of models is only available ' 300 | 'for the input data format "channels_last" ' 301 | '(width, height, channels). ' 302 | 'However your settings specify the default ' 303 | 'data format "channels_first" (channels, width, height).' 304 | ' You should set `image_data_format="channels_last"` ' 305 | 'in your Keras config located at ~/.keras/keras.json. ' 306 | 'The model being returned right now will expect inputs ' 307 | 'to follow the "channels_last" data format.') 308 | backend.set_image_data_format('channels_last') 309 | old_data_format = 'channels_first' 310 | else: 311 | old_data_format = None 312 | 313 | if input_tensor is None: 314 | img_input = layers.Input(shape=input_shape) 315 | else: 316 | if not backend.is_keras_tensor(input_tensor): 317 | img_input = layers.Input(tensor=input_tensor, shape=input_shape) 318 | else: 319 | img_input = input_tensor 320 | 321 | first_block_filters = _make_divisible(32 * alpha, 8) 322 | x = layers.ZeroPadding2D(padding=correct_pad(backend, img_input, 3), 323 | name='Conv1_pad')(img_input) 324 | x = layers.Conv2D(first_block_filters, 325 | kernel_size=3, 326 | strides=(2, 2), 327 | padding='valid', 328 | use_bias=False, 329 | name='Conv1')(x) 330 | x = layers.BatchNormalization( 331 | epsilon=1e-3, momentum=0.999, name='bn_Conv1')(x) 332 | x = layers.ReLU(6., name='Conv1_relu')(x) 333 | 334 | x = _inverted_res_block(x, filters=16, alpha=alpha, stride=1, 335 | expansion=1, block_id=0) 336 | 337 | x = _inverted_res_block(x, filters=24, alpha=alpha, stride=2, 338 | expansion=6, block_id=1) 339 | x = _inverted_res_block(x, filters=24, alpha=alpha, stride=1, 340 | expansion=6, block_id=2) 341 | 342 | x = _inverted_res_block(x, filters=32, alpha=alpha, stride=2, 343 | expansion=6, block_id=3) 344 | x = _inverted_res_block(x, filters=32, alpha=alpha, stride=1, 345 | expansion=6, block_id=4) 346 | x = _inverted_res_block(x, filters=32, alpha=alpha, stride=1, 347 | expansion=6, block_id=5) 348 | 349 | x = _inverted_res_block(x, filters=64, alpha=alpha, stride=2, 350 | expansion=6, block_id=6) 351 | x = _inverted_res_block(x, filters=64, alpha=alpha, stride=1, 352 | expansion=6, block_id=7) 353 | x = _inverted_res_block(x, filters=64, alpha=alpha, stride=1, 354 | expansion=6, block_id=8) 355 | x = _inverted_res_block(x, filters=64, alpha=alpha, stride=1, 356 | expansion=6, block_id=9) 357 | 358 | x = _inverted_res_block(x, filters=96, alpha=alpha, stride=1, 359 | expansion=6, block_id=10) 360 | x = _inverted_res_block(x, filters=96, alpha=alpha, stride=1, 361 | expansion=6, block_id=11) 362 | x = _inverted_res_block(x, filters=96, alpha=alpha, stride=1, 363 | expansion=6, block_id=12) 364 | 365 | x = _inverted_res_block(x, filters=160, alpha=alpha, stride=2, 366 | expansion=6, block_id=13) 367 | x = _inverted_res_block(x, filters=160, alpha=alpha, stride=1, 368 | expansion=6, block_id=14) 369 | x = _inverted_res_block(x, filters=160, alpha=alpha, stride=1, 370 | expansion=6, block_id=15) 371 | 372 | x = _inverted_res_block(x, filters=320, alpha=alpha, stride=1, 373 | expansion=6, block_id=16) 374 | 375 | # no alpha applied to last conv as stated in the paper: 376 | # if the width multiplier is greater than 1 we 377 | # increase the number of output channels 378 | if alpha > 1.0: 379 | last_block_filters = _make_divisible(1280 * alpha, 8) 380 | else: 381 | last_block_filters = 1280 382 | 383 | x = layers.Conv2D(last_block_filters, 384 | kernel_size=1, 385 | use_bias=False, 386 | name='Conv_1')(x) 387 | x = layers.BatchNormalization(epsilon=1e-3, 388 | momentum=0.999, 389 | name='Conv_1_bn')(x) 390 | x = layers.ReLU(6., name='out_relu')(x) 391 | 392 | if include_top: 393 | x = layers.GlobalAveragePooling2D()(x) 394 | x = layers.Dense(classes, activation='softmax', 395 | use_bias=True, name='Logits')(x) 396 | else: 397 | if pooling == 'avg': 398 | x = layers.GlobalAveragePooling2D()(x) 399 | elif pooling == 'max': 400 | x = layers.GlobalMaxPooling2D()(x) 401 | 402 | # Ensure that the model takes into account 403 | # any potential predecessors of `input_tensor`. 404 | if input_tensor is not None: 405 | inputs = keras_utils.get_source_inputs(input_tensor) 406 | else: 407 | inputs = img_input 408 | 409 | # Create model. 410 | model = models.Model(inputs, x, 411 | name='mobilenetv2_%0.2f_%s' % (alpha, rows)) 412 | 413 | # Load weights. 414 | if weights == 'imagenet': 415 | if backend.image_data_format() == 'channels_first': 416 | raise ValueError('Weights for "channels_first" format ' 417 | 'are not available.') 418 | 419 | if include_top: 420 | model_name = ('mobilenet_v2_weights_tf_dim_ordering_tf_kernels_' + 421 | str(alpha) + '_' + str(rows) + '.h5') 422 | weigh_path = BASE_WEIGHT_PATH + model_name 423 | weights_path = keras_utils.get_file( 424 | model_name, weigh_path, cache_subdir='models') 425 | else: 426 | model_name = ('mobilenet_v2_weights_tf_dim_ordering_tf_kernels_' + 427 | str(alpha) + '_' + str(rows) + '_no_top' + '.h5') 428 | weigh_path = BASE_WEIGHT_PATH + model_name 429 | weights_path = keras_utils.get_file( 430 | model_name, weigh_path, cache_subdir='models') 431 | model.load_weights(weights_path) 432 | elif weights is not None: 433 | model.load_weights(weights) 434 | 435 | if old_data_format: 436 | backend.set_image_data_format(old_data_format) 437 | return model 438 | 439 | 440 | def _inverted_res_block(inputs, expansion, stride, alpha, filters, block_id): 441 | in_channels = backend.int_shape(inputs)[-1] 442 | pointwise_conv_filters = int(filters * alpha) 443 | pointwise_filters = _make_divisible(pointwise_conv_filters, 8) 444 | x = inputs 445 | prefix = 'block_{}_'.format(block_id) 446 | 447 | if block_id: 448 | # Expand 449 | x = layers.Conv2D(expansion * in_channels, 450 | kernel_size=1, 451 | padding='same', 452 | use_bias=False, 453 | activation=None, 454 | name=prefix + 'expand')(x) 455 | x = layers.BatchNormalization(epsilon=1e-3, 456 | momentum=0.999, 457 | name=prefix + 'expand_BN')(x) 458 | x = layers.ReLU(6., name=prefix + 'expand_relu')(x) 459 | else: 460 | prefix = 'expanded_conv_' 461 | 462 | # Depthwise 463 | if stride == 2: 464 | x = layers.ZeroPadding2D(padding=correct_pad(backend, x, 3), 465 | name=prefix + 'pad')(x) 466 | x = layers.DepthwiseConv2D(kernel_size=3, 467 | strides=stride, 468 | activation=None, 469 | use_bias=False, 470 | padding='same' if stride == 1 else 'valid', 471 | name=prefix + 'depthwise')(x) 472 | x = layers.BatchNormalization(epsilon=1e-3, 473 | momentum=0.999, 474 | name=prefix + 'depthwise_BN')(x) 475 | 476 | x = layers.ReLU(6., name=prefix + 'depthwise_relu')(x) 477 | 478 | # Project 479 | x = layers.Conv2D(pointwise_filters, 480 | kernel_size=1, 481 | padding='same', 482 | use_bias=False, 483 | activation=None, 484 | name=prefix + 'project')(x) 485 | x = layers.BatchNormalization( 486 | epsilon=1e-3, momentum=0.999, name=prefix + 'project_BN')(x) 487 | 488 | if in_channels == pointwise_filters and stride == 1: 489 | return layers.Add(name=prefix + 'add')([inputs, x]) 490 | return x -------------------------------------------------------------------------------- /src/facenet.py: -------------------------------------------------------------------------------- 1 | """Functions for building the face recognition network. 2 | """ 3 | # MIT License 4 | # 5 | # Copyright (c) 2016 David Sandberg 6 | # 7 | # Permission is hereby granted, free of charge, to any person obtaining a copy 8 | # of this software and associated documentation files (the "Software"), to deal 9 | # in the Software without restriction, including without limitation the rights 10 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 | # copies of the Software, and to permit persons to whom the Software is 12 | # furnished to do so, subject to the following conditions: 13 | # 14 | # The above copyright notice and this permission notice shall be included in all 15 | # copies or substantial portions of the Software. 16 | # 17 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | # SOFTWARE. 24 | 25 | # pylint: disable=missing-docstring 26 | from __future__ import absolute_import 27 | from __future__ import division 28 | from __future__ import print_function 29 | 30 | import os 31 | from subprocess import Popen, PIPE 32 | import tensorflow as tf 33 | import numpy as np 34 | from scipy import misc 35 | from sklearn.model_selection import KFold 36 | from scipy import interpolate 37 | from tensorflow.python.training import training 38 | import random 39 | import re 40 | from tensorflow.python.platform import gfile 41 | import math 42 | from six import iteritems 43 | 44 | def triplet_loss(anchor, positive, negative, alpha): 45 | """Calculate the triplet loss according to the FaceNet paper 46 | 47 | Args: 48 | anchor: the embeddings for the anchor images. 49 | positive: the embeddings for the positive images. 50 | negative: the embeddings for the negative images. 51 | 52 | Returns: 53 | the triplet loss according to the FaceNet paper as a float tensor. 54 | """ 55 | with tf.variable_scope('triplet_loss'): 56 | pos_dist = tf.reduce_sum(tf.square(tf.subtract(anchor, positive)), 1) 57 | neg_dist = tf.reduce_sum(tf.square(tf.subtract(anchor, negative)), 1) 58 | 59 | basic_loss = tf.add(tf.subtract(pos_dist,neg_dist), alpha) 60 | loss = tf.reduce_mean(tf.maximum(basic_loss, 0.0), 0) 61 | 62 | return loss 63 | 64 | def center_loss(features, label, alfa, nrof_classes): 65 | """Center loss based on the paper "A Discriminative Feature Learning Approach for Deep Face Recognition" 66 | (http://ydwen.github.io/papers/WenECCV16.pdf) 67 | """ 68 | nrof_features = features.get_shape()[1] 69 | centers = tf.get_variable('centers', [nrof_classes, nrof_features], dtype=tf.float32, 70 | initializer=tf.constant_initializer(0), trainable=False) 71 | label = tf.reshape(label, [-1]) 72 | centers_batch = tf.gather(centers, label) 73 | diff = (1 - alfa) * (centers_batch - features) 74 | centers = tf.scatter_sub(centers, label, diff) 75 | with tf.control_dependencies([centers]): 76 | loss = tf.reduce_mean(tf.square(features - centers_batch)) 77 | return loss, centers 78 | 79 | def get_image_paths_and_labels(dataset): 80 | image_paths_flat = [] 81 | labels_flat = [] 82 | for i in range(len(dataset)): 83 | image_paths_flat += dataset[i].image_paths 84 | labels_flat += [i] * len(dataset[i].image_paths) 85 | return image_paths_flat, labels_flat 86 | 87 | def shuffle_examples(image_paths, labels): 88 | shuffle_list = list(zip(image_paths, labels)) 89 | random.shuffle(shuffle_list) 90 | image_paths_shuff, labels_shuff = zip(*shuffle_list) 91 | return image_paths_shuff, labels_shuff 92 | 93 | def random_rotate_image(image): 94 | angle = np.random.uniform(low=-10.0, high=10.0) 95 | return misc.imrotate(image, angle, 'bicubic') 96 | 97 | # 1: Random rotate 2: Random crop 4: Random flip 8: Fixed image standardization 16: Flip 98 | RANDOM_ROTATE = 1 99 | RANDOM_CROP = 2 100 | RANDOM_FLIP = 4 101 | FIXED_STANDARDIZATION = 8 102 | FLIP = 16 103 | def create_input_pipeline(input_queue, image_size, nrof_preprocess_threads, batch_size_placeholder): 104 | images_and_labels_list = [] 105 | for _ in range(nrof_preprocess_threads): 106 | filenames, label, control = input_queue.dequeue() 107 | images = [] 108 | for filename in tf.unstack(filenames): 109 | file_contents = tf.read_file(filename) 110 | image = tf.image.decode_image(file_contents, 3) 111 | image = tf.cond(get_control_flag(control[0], RANDOM_ROTATE), 112 | lambda:tf.py_func(random_rotate_image, [image], tf.uint8), 113 | lambda:tf.identity(image)) 114 | image = tf.cond(get_control_flag(control[0], RANDOM_CROP), 115 | lambda:tf.random_crop(image, image_size + (3,)), 116 | lambda:tf.image.resize_image_with_crop_or_pad(image, image_size[0], image_size[1])) 117 | image = tf.cond(get_control_flag(control[0], RANDOM_FLIP), 118 | lambda:tf.image.random_flip_left_right(image), 119 | lambda:tf.identity(image)) 120 | image = tf.cond(get_control_flag(control[0], FIXED_STANDARDIZATION), 121 | lambda:(tf.cast(image, tf.float32) - 127.5)/128.0, 122 | lambda:tf.image.per_image_standardization(image)) 123 | image = tf.cond(get_control_flag(control[0], FLIP), 124 | lambda:tf.image.flip_left_right(image), 125 | lambda:tf.identity(image)) 126 | #pylint: disable=no-member 127 | image.set_shape(image_size + (3,)) 128 | images.append(image) 129 | images_and_labels_list.append([images, label]) 130 | 131 | image_batch, label_batch = tf.train.batch_join( 132 | images_and_labels_list, batch_size=batch_size_placeholder, 133 | shapes=[image_size + (3,), ()], enqueue_many=True, 134 | capacity=4 * nrof_preprocess_threads * 100, 135 | allow_smaller_final_batch=True) 136 | 137 | return image_batch, label_batch 138 | 139 | def get_control_flag(control, field): 140 | return tf.equal(tf.mod(tf.floor_div(control, field), 2), 1) 141 | 142 | def _add_loss_summaries(total_loss): 143 | """Add summaries for losses. 144 | 145 | Generates moving average for all losses and associated summaries for 146 | visualizing the performance of the network. 147 | 148 | Args: 149 | total_loss: Total loss from loss(). 150 | Returns: 151 | loss_averages_op: op for generating moving averages of losses. 152 | """ 153 | # Compute the moving average of all individual losses and the total loss. 154 | loss_averages = tf.train.ExponentialMovingAverage(0.9, name='avg') 155 | losses = tf.get_collection('losses') 156 | loss_averages_op = loss_averages.apply(losses + [total_loss]) 157 | 158 | # Attach a scalar summmary to all individual losses and the total loss; do the 159 | # same for the averaged version of the losses. 160 | for l in losses + [total_loss]: 161 | # Name each loss as '(raw)' and name the moving average version of the loss 162 | # as the original loss name. 163 | tf.summary.scalar(l.op.name +' (raw)', l) 164 | tf.summary.scalar(l.op.name, loss_averages.average(l)) 165 | 166 | return loss_averages_op 167 | 168 | def train(total_loss, global_step, optimizer, learning_rate, moving_average_decay, update_gradient_vars, log_histograms=True): 169 | # Generate moving averages of all losses and associated summaries. 170 | loss_averages_op = _add_loss_summaries(total_loss) 171 | 172 | # Compute gradients. 173 | with tf.control_dependencies([loss_averages_op]): 174 | if optimizer=='ADAGRAD': 175 | opt = tf.train.AdagradOptimizer(learning_rate) 176 | elif optimizer=='ADADELTA': 177 | opt = tf.train.AdadeltaOptimizer(learning_rate, rho=0.9, epsilon=1e-6) 178 | elif optimizer=='ADAM': 179 | opt = tf.train.AdamOptimizer(learning_rate, beta1=0.9, beta2=0.999, epsilon=0.1) 180 | elif optimizer=='RMSPROP': 181 | opt = tf.train.RMSPropOptimizer(learning_rate, decay=0.9, momentum=0.9, epsilon=1.0) 182 | elif optimizer=='MOM': 183 | opt = tf.train.MomentumOptimizer(learning_rate, 0.9, use_nesterov=True) 184 | else: 185 | raise ValueError('Invalid optimization algorithm') 186 | 187 | grads = opt.compute_gradients(total_loss, update_gradient_vars) 188 | 189 | # Apply gradients. 190 | apply_gradient_op = opt.apply_gradients(grads, global_step=global_step) 191 | 192 | # Add histograms for trainable variables. 193 | if log_histograms: 194 | for var in tf.trainable_variables(): 195 | tf.summary.histogram(var.op.name, var) 196 | 197 | # Add histograms for gradients. 198 | if log_histograms: 199 | for grad, var in grads: 200 | if grad is not None: 201 | tf.summary.histogram(var.op.name + '/gradients', grad) 202 | 203 | # Track the moving averages of all trainable variables. 204 | variable_averages = tf.train.ExponentialMovingAverage( 205 | moving_average_decay, global_step) 206 | variables_averages_op = variable_averages.apply(tf.trainable_variables()) 207 | 208 | with tf.control_dependencies([apply_gradient_op, variables_averages_op]): 209 | train_op = tf.no_op(name='train') 210 | 211 | return train_op 212 | 213 | def prewhiten(x): 214 | mean = np.mean(x) 215 | std = np.std(x) 216 | std_adj = np.maximum(std, 1.0/np.sqrt(x.size)) 217 | y = np.multiply(np.subtract(x, mean), 1/std_adj) 218 | return y 219 | 220 | def crop(image, random_crop, image_size): 221 | if image.shape[1]>image_size: 222 | sz1 = int(image.shape[1]//2) 223 | sz2 = int(image_size//2) 224 | if random_crop: 225 | diff = sz1-sz2 226 | (h, v) = (np.random.randint(-diff, diff+1), np.random.randint(-diff, diff+1)) 227 | else: 228 | (h, v) = (0,0) 229 | image = image[(sz1-sz2+v):(sz1+sz2+v),(sz1-sz2+h):(sz1+sz2+h),:] 230 | return image 231 | 232 | def flip(image, random_flip): 233 | if random_flip and np.random.choice([True, False]): 234 | image = np.fliplr(image) 235 | return image 236 | 237 | def to_rgb(img): 238 | w, h = img.shape 239 | ret = np.empty((w, h, 3), dtype=np.uint8) 240 | ret[:, :, 0] = ret[:, :, 1] = ret[:, :, 2] = img 241 | return ret 242 | 243 | def load_data(image_paths, do_random_crop, do_random_flip, image_size, do_prewhiten=True): 244 | nrof_samples = len(image_paths) 245 | images = np.zeros((nrof_samples, image_size, image_size, 3)) 246 | for i in range(nrof_samples): 247 | img = misc.imread(image_paths[i]) 248 | if img.ndim == 2: 249 | img = to_rgb(img) 250 | if do_prewhiten: 251 | img = prewhiten(img) 252 | img = crop(img, do_random_crop, image_size) 253 | img = flip(img, do_random_flip) 254 | images[i,:,:,:] = img 255 | return images 256 | 257 | def get_label_batch(label_data, batch_size, batch_index): 258 | nrof_examples = np.size(label_data, 0) 259 | j = batch_index*batch_size % nrof_examples 260 | if j+batch_size<=nrof_examples: 261 | batch = label_data[j:j+batch_size] 262 | else: 263 | x1 = label_data[j:nrof_examples] 264 | x2 = label_data[0:nrof_examples-j] 265 | batch = np.vstack([x1,x2]) 266 | batch_int = batch.astype(np.int64) 267 | return batch_int 268 | 269 | def get_batch(image_data, batch_size, batch_index): 270 | nrof_examples = np.size(image_data, 0) 271 | j = batch_index*batch_size % nrof_examples 272 | if j+batch_size<=nrof_examples: 273 | batch = image_data[j:j+batch_size,:,:,:] 274 | else: 275 | x1 = image_data[j:nrof_examples,:,:,:] 276 | x2 = image_data[0:nrof_examples-j,:,:,:] 277 | batch = np.vstack([x1,x2]) 278 | batch_float = batch.astype(np.float32) 279 | return batch_float 280 | 281 | def get_triplet_batch(triplets, batch_index, batch_size): 282 | ax, px, nx = triplets 283 | a = get_batch(ax, int(batch_size/3), batch_index) 284 | p = get_batch(px, int(batch_size/3), batch_index) 285 | n = get_batch(nx, int(batch_size/3), batch_index) 286 | batch = np.vstack([a, p, n]) 287 | return batch 288 | 289 | def get_learning_rate_from_file(filename, epoch): 290 | with open(filename, 'r') as f: 291 | for line in f.readlines(): 292 | line = line.split('#', 1)[0] 293 | if line: 294 | par = line.strip().split(':') 295 | e = int(par[0]) 296 | if par[1]=='-': 297 | lr = -1 298 | else: 299 | lr = float(par[1]) 300 | if e <= epoch: 301 | learning_rate = lr 302 | else: 303 | return learning_rate 304 | 305 | class ImageClass(): 306 | "Stores the paths to images for a given class" 307 | def __init__(self, name, image_paths): 308 | self.name = name 309 | self.image_paths = image_paths 310 | 311 | def __str__(self): 312 | return self.name + ', ' + str(len(self.image_paths)) + ' images' 313 | 314 | def __len__(self): 315 | return len(self.image_paths) 316 | 317 | def get_dataset(path, has_class_directories=True): 318 | dataset = [] 319 | path_exp = os.path.expanduser(path) 320 | classes = [path for path in os.listdir(path_exp) \ 321 | if os.path.isdir(os.path.join(path_exp, path))] 322 | classes.sort() 323 | nrof_classes = len(classes) 324 | for i in range(nrof_classes): 325 | class_name = classes[i] 326 | facedir = os.path.join(path_exp, class_name) 327 | image_paths = get_image_paths(facedir) 328 | dataset.append(ImageClass(class_name, image_paths)) 329 | 330 | return dataset 331 | 332 | def get_image_paths(facedir): 333 | image_paths = [] 334 | if os.path.isdir(facedir): 335 | images = os.listdir(facedir) 336 | image_paths = [os.path.join(facedir,img) for img in images] 337 | return image_paths 338 | 339 | def split_dataset(dataset, split_ratio, min_nrof_images_per_class, mode): 340 | if mode=='SPLIT_CLASSES': 341 | nrof_classes = len(dataset) 342 | class_indices = np.arange(nrof_classes) 343 | np.random.shuffle(class_indices) 344 | split = int(round(nrof_classes*(1-split_ratio))) 345 | train_set = [dataset[i] for i in class_indices[0:split]] 346 | test_set = [dataset[i] for i in class_indices[split:-1]] 347 | elif mode=='SPLIT_IMAGES': 348 | train_set = [] 349 | test_set = [] 350 | for cls in dataset: 351 | paths = cls.image_paths 352 | np.random.shuffle(paths) 353 | nrof_images_in_class = len(paths) 354 | split = int(math.floor(nrof_images_in_class*(1-split_ratio))) 355 | if split==nrof_images_in_class: 356 | split = nrof_images_in_class-1 357 | if split>=min_nrof_images_per_class and nrof_images_in_class-split>=1: 358 | train_set.append(ImageClass(cls.name, paths[:split])) 359 | test_set.append(ImageClass(cls.name, paths[split:])) 360 | else: 361 | raise ValueError('Invalid train/test split mode "%s"' % mode) 362 | return train_set, test_set 363 | 364 | 365 | def load_model(model, input_map=None): 366 | # Check if the model is a model directory (containing a metagraph and a checkpoint file) 367 | # or if it is a protobuf file with a frozen graph 368 | model_exp = os.path.expanduser(model) 369 | if (os.path.isfile(model_exp)): 370 | print('Model filename: %s' % model_exp) 371 | with gfile.FastGFile(model_exp,'rb') as f: 372 | graph_def = tf.GraphDef() 373 | graph_def.ParseFromString(f.read()) 374 | tf.import_graph_def(graph_def, input_map=input_map, name='') 375 | else: 376 | print('Model directory: %s' % model_exp) 377 | meta_file, ckpt_file = get_model_filenames(model_exp) 378 | 379 | print('Metagraph file: %s' % meta_file) 380 | print('Checkpoint file: %s' % ckpt_file) 381 | 382 | saver = tf.train.import_meta_graph(os.path.join(model_exp, meta_file), input_map=input_map) 383 | saver.restore(tf.get_default_session(), os.path.join(model_exp, ckpt_file)) 384 | 385 | 386 | def get_model_filenames(model_dir): 387 | files = os.listdir(model_dir) 388 | meta_files = [s for s in files if s.endswith('.meta')] 389 | if len(meta_files)==0: 390 | raise ValueError('No meta file found in the model directory (%s)' % model_dir) 391 | elif len(meta_files)>1: 392 | raise ValueError('There should not be more than one meta file in the model directory (%s)' % model_dir) 393 | meta_file = meta_files[0] 394 | ckpt = tf.train.get_checkpoint_state(model_dir) 395 | if ckpt and ckpt.model_checkpoint_path: 396 | ckpt_file = os.path.basename(ckpt.model_checkpoint_path) 397 | return meta_file, ckpt_file 398 | 399 | meta_files = [s for s in files if '.ckpt' in s] 400 | max_step = -1 401 | for f in files: 402 | step_str = re.match(r'(^model-[\w\- ]+.ckpt-(\d+))', f) 403 | if step_str is not None and len(step_str.groups())>=2: 404 | step = int(step_str.groups()[1]) 405 | if step > max_step: 406 | max_step = step 407 | ckpt_file = step_str.groups()[0] 408 | return meta_file, ckpt_file 409 | 410 | 411 | def distance(embeddings1, embeddings2, distance_metric=0): 412 | if distance_metric==0: 413 | # Euclidian distance 414 | diff = np.subtract(embeddings1, embeddings2) 415 | dist = np.sum(np.square(diff),1) 416 | elif distance_metric==1: 417 | # Distance based on cosine similarity 418 | dot = np.sum(np.multiply(embeddings1, embeddings2), axis=1) 419 | norm = np.linalg.norm(embeddings1, axis=1) * np.linalg.norm(embeddings2, axis=1) 420 | similarity = dot / norm 421 | dist = np.arccos(similarity) / math.pi 422 | else: 423 | raise 'Undefined distance metric %d' % distance_metric 424 | 425 | return dist 426 | 427 | 428 | def calculate_roc(thresholds, embeddings1, embeddings2, actual_issame, nrof_folds=10, distance_metric=0, subtract_mean=False): 429 | assert(embeddings1.shape[0] == embeddings2.shape[0]) 430 | assert(embeddings1.shape[1] == embeddings2.shape[1]) 431 | nrof_pairs = min(len(actual_issame), embeddings1.shape[0]) 432 | nrof_thresholds = len(thresholds) 433 | k_fold = KFold(n_splits=nrof_folds, shuffle=False) 434 | 435 | tprs = np.zeros((nrof_folds,nrof_thresholds)) 436 | fprs = np.zeros((nrof_folds,nrof_thresholds)) 437 | accuracy = np.zeros((nrof_folds)) 438 | 439 | indices = np.arange(nrof_pairs) 440 | 441 | for fold_idx, (train_set, test_set) in enumerate(k_fold.split(indices)): 442 | if subtract_mean: 443 | mean = np.mean(np.concatenate([embeddings1[train_set], embeddings2[train_set]]), axis=0) 444 | else: 445 | mean = 0.0 446 | dist = distance(embeddings1-mean, embeddings2-mean, distance_metric) 447 | 448 | # Find the best threshold for the fold 449 | acc_train = np.zeros((nrof_thresholds)) 450 | for threshold_idx, threshold in enumerate(thresholds): 451 | _, _, acc_train[threshold_idx] = calculate_accuracy(threshold, dist[train_set], actual_issame[train_set]) 452 | best_threshold_index = np.argmax(acc_train) 453 | for threshold_idx, threshold in enumerate(thresholds): 454 | tprs[fold_idx,threshold_idx], fprs[fold_idx,threshold_idx], _ = calculate_accuracy(threshold, dist[test_set], actual_issame[test_set]) 455 | _, _, accuracy[fold_idx] = calculate_accuracy(thresholds[best_threshold_index], dist[test_set], actual_issame[test_set]) 456 | 457 | tpr = np.mean(tprs,0) 458 | fpr = np.mean(fprs,0) 459 | return tpr, fpr, accuracy 460 | 461 | 462 | def calculate_accuracy(threshold, dist, actual_issame): 463 | predict_issame = np.less(dist, threshold) 464 | tp = np.sum(np.logical_and(predict_issame, actual_issame)) 465 | fp = np.sum(np.logical_and(predict_issame, np.logical_not(actual_issame))) 466 | tn = np.sum(np.logical_and(np.logical_not(predict_issame), np.logical_not(actual_issame))) 467 | fn = np.sum(np.logical_and(np.logical_not(predict_issame), actual_issame)) 468 | 469 | tpr = 0 if (tp+fn==0) else float(tp) / float(tp+fn) 470 | fpr = 0 if (fp+tn==0) else float(fp) / float(fp+tn) 471 | acc = float(tp+tn)/dist.size 472 | return tpr, fpr, acc 473 | 474 | 475 | def calculate_val(thresholds, embeddings1, embeddings2, actual_issame, far_target, nrof_folds=10, distance_metric=0, subtract_mean=False): 476 | assert(embeddings1.shape[0] == embeddings2.shape[0]) 477 | assert(embeddings1.shape[1] == embeddings2.shape[1]) 478 | nrof_pairs = min(len(actual_issame), embeddings1.shape[0]) 479 | nrof_thresholds = len(thresholds) 480 | k_fold = KFold(n_splits=nrof_folds, shuffle=False) 481 | 482 | val = np.zeros(nrof_folds) 483 | far = np.zeros(nrof_folds) 484 | 485 | indices = np.arange(nrof_pairs) 486 | 487 | for fold_idx, (train_set, test_set) in enumerate(k_fold.split(indices)): 488 | if subtract_mean: 489 | mean = np.mean(np.concatenate([embeddings1[train_set], embeddings2[train_set]]), axis=0) 490 | else: 491 | mean = 0.0 492 | dist = distance(embeddings1-mean, embeddings2-mean, distance_metric) 493 | 494 | # Find the threshold that gives FAR = far_target 495 | far_train = np.zeros(nrof_thresholds) 496 | for threshold_idx, threshold in enumerate(thresholds): 497 | _, far_train[threshold_idx] = calculate_val_far(threshold, dist[train_set], actual_issame[train_set]) 498 | if np.max(far_train)>=far_target: 499 | f = interpolate.interp1d(far_train, thresholds, kind='slinear') 500 | threshold = f(far_target) 501 | else: 502 | threshold = 0.0 503 | 504 | val[fold_idx], far[fold_idx] = calculate_val_far(threshold, dist[test_set], actual_issame[test_set]) 505 | 506 | val_mean = np.mean(val) 507 | far_mean = np.mean(far) 508 | val_std = np.std(val) 509 | return val_mean, val_std, far_mean 510 | 511 | 512 | def calculate_val_far(threshold, dist, actual_issame): 513 | predict_issame = np.less(dist, threshold) 514 | true_accept = np.sum(np.logical_and(predict_issame, actual_issame)) 515 | false_accept = np.sum(np.logical_and(predict_issame, np.logical_not(actual_issame))) 516 | n_same = np.sum(actual_issame) 517 | n_diff = np.sum(np.logical_not(actual_issame)) 518 | val = float(true_accept) / float(n_same) 519 | far = float(false_accept) / float(n_diff) 520 | return val, far 521 | 522 | def store_revision_info(src_path, output_dir, arg_string): 523 | try: 524 | # Get git hash 525 | cmd = ['git', 'rev-parse', 'HEAD'] 526 | gitproc = Popen(cmd, stdout = PIPE, cwd=src_path) 527 | (stdout, _) = gitproc.communicate() 528 | git_hash = stdout.strip() 529 | except OSError as e: 530 | git_hash = ' '.join(cmd) + ': ' + e.strerror 531 | 532 | try: 533 | # Get local changes 534 | cmd = ['git', 'diff', 'HEAD'] 535 | gitproc = Popen(cmd, stdout = PIPE, cwd=src_path) 536 | (stdout, _) = gitproc.communicate() 537 | git_diff = stdout.strip() 538 | except OSError as e: 539 | git_diff = ' '.join(cmd) + ': ' + e.strerror 540 | 541 | # Store a text file in the log directory 542 | rev_info_filename = os.path.join(output_dir, 'revision_info.txt') 543 | with open(rev_info_filename, "w") as text_file: 544 | text_file.write('arguments: %s\n--------------------\n' % arg_string) 545 | text_file.write('tensorflow version: %s\n--------------------\n' % tf.__version__) # @UndefinedVariable 546 | text_file.write('git hash: %s\n--------------------\n' % git_hash) 547 | text_file.write('%s' % git_diff) 548 | 549 | def list_variables(filename): 550 | reader = training.NewCheckpointReader(filename) 551 | variable_map = reader.get_variable_to_shape_map() 552 | names = sorted(variable_map.keys()) 553 | return names 554 | 555 | def put_images_on_grid(images, shape=(16,8)): 556 | nrof_images = images.shape[0] 557 | img_size = images.shape[1] 558 | bw = 3 559 | img = np.zeros((shape[1]*(img_size+bw)+bw, shape[0]*(img_size+bw)+bw, 3), np.float32) 560 | for i in range(shape[1]): 561 | x_start = i*(img_size+bw)+bw 562 | for j in range(shape[0]): 563 | img_index = i*shape[0]+j 564 | if img_index>=nrof_images: 565 | break 566 | y_start = j*(img_size+bw)+bw 567 | img[x_start:x_start+img_size, y_start:y_start+img_size, :] = images[img_index, :, :, :] 568 | if img_index>=nrof_images: 569 | break 570 | return img 571 | 572 | def write_arguments_to_file(args, filename): 573 | with open(filename, 'w') as f: 574 | for key, value in iteritems(vars(args)): 575 | f.write('%s: %s\n' % (key, str(value))) 576 | -------------------------------------------------------------------------------- /Forward Propagation.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": { 7 | "ExecuteTime": { 8 | "end_time": "2018-11-06T22:24:48.482520Z", 9 | "start_time": "2018-11-06T22:24:48.213241Z" 10 | } 11 | }, 12 | "outputs": [], 13 | "source": [ 14 | "import os\n", 15 | "import numpy as np\n", 16 | "import progressbar\n", 17 | "from imageio import imread" 18 | ] 19 | }, 20 | { 21 | "cell_type": "code", 22 | "execution_count": 2, 23 | "metadata": { 24 | "ExecuteTime": { 25 | "end_time": "2018-11-06T22:24:49.371199Z", 26 | "start_time": "2018-11-06T22:24:48.700419Z" 27 | } 28 | }, 29 | "outputs": [], 30 | "source": [ 31 | "initial_path = '/Users/pedroprates/Google Drive/FaceRecognition/datasets/lfw/lfw_mtcnnpy_160'\n", 32 | "os.listdir(initial_path)\n", 33 | "\n", 34 | "dirs = [os.path.join(initial_path, d) for d in os.listdir(initial_path) if os.path.isdir(os.path.join(initial_path, d))]" 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": 3, 40 | "metadata": { 41 | "ExecuteTime": { 42 | "end_time": "2018-11-06T22:24:57.928466Z", 43 | "start_time": "2018-11-06T22:24:57.317786Z" 44 | } 45 | }, 46 | "outputs": [ 47 | { 48 | "name": "stderr", 49 | "output_type": "stream", 50 | "text": [ 51 | "100% (5749 of 5749) |####################| Elapsed Time: 0:00:00 Time: 0:00:00\n" 52 | ] 53 | } 54 | ], 55 | "source": [ 56 | "import progressbar\n", 57 | "\n", 58 | "inputs = []\n", 59 | "for d in progressbar.progressbar(dirs):\n", 60 | " for f in os.listdir(d):\n", 61 | "# d = d.replace('/Users/pedroprates/Google Drive/', '/gdrive/My Drive/')\n", 62 | " if f.endswith('png') or f.endswith('jpg') or f.endswith('jpeg'):\n", 63 | " inputs.append(os.path.join(d, f))" 64 | ] 65 | }, 66 | { 67 | "cell_type": "code", 68 | "execution_count": 16, 69 | "metadata": { 70 | "ExecuteTime": { 71 | "end_time": "2018-10-18T00:12:41.487259Z", 72 | "start_time": "2018-10-18T00:12:41.356799Z" 73 | } 74 | }, 75 | "outputs": [ 76 | { 77 | "name": "stderr", 78 | "output_type": "stream", 79 | "text": [ 80 | "100% (13233 of 13233) |##################| Elapsed Time: 0:00:00 Time: 0:00:00\n" 81 | ] 82 | } 83 | ], 84 | "source": [ 85 | "outputs = []\n", 86 | "\n", 87 | "for inp in progressbar.progressbar(inputs):\n", 88 | " filename = inp.split('/')[-1]\n", 89 | " path = inp.split('/')[:-1]\n", 90 | " \n", 91 | " filename = 'output_resnet/' + filename.split('.')[0] + '.npy'\n", 92 | " path = '/'.join(path)\n", 93 | "# path = path.replace('/Users/pedroprates/Google Drive/', '/gdrive/My Drive/')\n", 94 | " \n", 95 | " outputs.append(os.path.join(path, filename))" 96 | ] 97 | }, 98 | { 99 | "cell_type": "code", 100 | "execution_count": 17, 101 | "metadata": { 102 | "ExecuteTime": { 103 | "end_time": "2018-10-18T00:12:42.733095Z", 104 | "start_time": "2018-10-18T00:12:42.590978Z" 105 | } 106 | }, 107 | "outputs": [], 108 | "source": [ 109 | "inputs_np = np.array(inputs)\n", 110 | "outputs_np = np.array(outputs)\n", 111 | "\n", 112 | "np.save('/Users/pedroprates/Google Drive/FaceRecognition/datasets/lfw/input_resnet_mac.npy', inputs_np)\n", 113 | "np.save('/Users/pedroprates/Google Drive/FaceRecognition/datasets/lfw/output_resnet_mac.npy', outputs_np)" 114 | ] 115 | }, 116 | { 117 | "cell_type": "code", 118 | "execution_count": 22, 119 | "metadata": { 120 | "ExecuteTime": { 121 | "end_time": "2018-10-18T00:15:47.235962Z", 122 | "start_time": "2018-10-18T00:15:47.160424Z" 123 | } 124 | }, 125 | "outputs": [], 126 | "source": [ 127 | "from imageio import imread" 128 | ] 129 | }, 130 | { 131 | "cell_type": "code", 132 | "execution_count": 23, 133 | "metadata": { 134 | "ExecuteTime": { 135 | "end_time": "2018-10-18T00:16:46.011880Z", 136 | "start_time": "2018-10-18T00:15:54.521347Z" 137 | } 138 | }, 139 | "outputs": [ 140 | { 141 | "name": "stderr", 142 | "output_type": "stream", 143 | "text": [ 144 | "| | # | 52 Elapsed Time: 0:00:00" 145 | ] 146 | }, 147 | { 148 | "name": "stdout", 149 | "output_type": "stream", 150 | "text": [ 151 | "[RUNNING] X\n" 152 | ] 153 | }, 154 | { 155 | "name": "stderr", 156 | "output_type": "stream", 157 | "text": [ 158 | "| | # | 13232 Elapsed Time: 0:00:38\n", 159 | "| | # | 123 Elapsed Time: 0:00:00" 160 | ] 161 | }, 162 | { 163 | "name": "stdout", 164 | "output_type": "stream", 165 | "text": [ 166 | "[RUNNING] y Train\n" 167 | ] 168 | }, 169 | { 170 | "name": "stderr", 171 | "output_type": "stream", 172 | "text": [ 173 | "| | # | 13232 Elapsed Time: 0:00:12\n" 174 | ] 175 | } 176 | ], 177 | "source": [ 178 | "X = np.zeros((inputs_np.shape[0], 160, 160, 3))\n", 179 | "y = np.zeros((outputs_np.shape[0], 512))\n", 180 | "\n", 181 | "print(\"[RUNNING] X\")\n", 182 | "for ix, element in progressbar.progressbar(enumerate(inputs)):\n", 183 | " im = imread(element)\n", 184 | " X[ix, :, :, :] = im\n", 185 | "\n", 186 | "print(\"[RUNNING] y Train\")\n", 187 | "for ix, element in progressbar.progressbar(enumerate(outputs)):\n", 188 | " em = np.load(element)\n", 189 | " y[ix, :] = em" 190 | ] 191 | }, 192 | { 193 | "cell_type": "code", 194 | "execution_count": 27, 195 | "metadata": { 196 | "ExecuteTime": { 197 | "end_time": "2018-10-18T00:25:53.277355Z", 198 | "start_time": "2018-10-18T00:24:00.565125Z" 199 | } 200 | }, 201 | "outputs": [], 202 | "source": [ 203 | "np.save('/Users/pedroprates/Google Drive/FaceRecognition/datasets/lfw/X.npy', X)\n", 204 | "np.save('/Users/pedroprates/Google Drive/FaceRecognition/datasets/lfw/y.npy', y)" 205 | ] 206 | }, 207 | { 208 | "cell_type": "markdown", 209 | "metadata": {}, 210 | "source": [ 211 | "# Teste" 212 | ] 213 | }, 214 | { 215 | "cell_type": "code", 216 | "execution_count": 5, 217 | "metadata": { 218 | "ExecuteTime": { 219 | "end_time": "2018-11-06T22:25:07.405347Z", 220 | "start_time": "2018-11-06T22:25:07.399740Z" 221 | } 222 | }, 223 | "outputs": [], 224 | "source": [ 225 | "import keras\n", 226 | "import os\n", 227 | "import tensorflow as tf\n", 228 | "import progressbar\n", 229 | "import numpy as np\n", 230 | "from imageio import imread" 231 | ] 232 | }, 233 | { 234 | "cell_type": "code", 235 | "execution_count": 6, 236 | "metadata": { 237 | "ExecuteTime": { 238 | "end_time": "2018-11-06T22:25:21.826440Z", 239 | "start_time": "2018-11-06T22:25:21.811981Z" 240 | } 241 | }, 242 | "outputs": [], 243 | "source": [ 244 | "import keras.backend as K\n", 245 | "# Custom loss function\n", 246 | "def distillation_loss(y_true, y_pred):\n", 247 | " return K.square(y_pred - y_true)\n", 248 | "\n", 249 | "def max_diff(y_true, y_pred):\n", 250 | " return K.max(K.square(y_pred - y_true), axis=-1)\n", 251 | "\n", 252 | "def sum_diff(y_true, y_pred):\n", 253 | " return K.sum(K.square(y_pred - y_true), axis=-1)\n", 254 | "\n", 255 | "keras.losses.distillation_loss = distillation_loss\n", 256 | "keras.metrics.max_diff = max_diff\n", 257 | "keras.metrics.sum_diff = sum_diff" 258 | ] 259 | }, 260 | { 261 | "cell_type": "code", 262 | "execution_count": 7, 263 | "metadata": { 264 | "ExecuteTime": { 265 | "end_time": "2018-11-06T22:25:44.158801Z", 266 | "start_time": "2018-11-06T22:25:28.787974Z" 267 | } 268 | }, 269 | "outputs": [], 270 | "source": [ 271 | "model = keras.models.load_model('/Users/pedroprates/Google Drive/FaceRecognition/models/mobile-net/mobilenetv1_v2.h5')" 272 | ] 273 | }, 274 | { 275 | "cell_type": "code", 276 | "execution_count": 8, 277 | "metadata": { 278 | "ExecuteTime": { 279 | "end_time": "2018-11-06T22:50:14.555540Z", 280 | "start_time": "2018-11-06T22:25:46.211450Z" 281 | } 282 | }, 283 | "outputs": [ 284 | { 285 | "name": "stderr", 286 | "output_type": "stream", 287 | "text": [ 288 | "100% (5755 of 5755) |####################| Elapsed Time: 0:24:28 Time: 0:24:28\n" 289 | ] 290 | } 291 | ], 292 | "source": [ 293 | "base_path = '/Users/pedroprates/Google Drive/FaceRecognition/datasets/lfw/lfw_mtcnnpy_160/'\n", 294 | "list_folders = os.listdir(base_path)\n", 295 | "list_folders = [os.path.join(base_path, x) for x in list_folders]\n", 296 | "\n", 297 | "for folder in progressbar.progressbar(list_folders):\n", 298 | " if not os.path.isdir(folder):\n", 299 | " continue\n", 300 | " \n", 301 | " list_images = os.listdir(folder)\n", 302 | " list_images = [os.path.join(folder, image) for image in list_images]\n", 303 | " list_images = list(filter(lambda x: os.path.isfile(x), list_images))\n", 304 | " list_images = list(filter(lambda x: '.DS_Store' not in x, list_images))\n", 305 | " filenames = [x.split('/')[-1].split('.')[0] for x in list_images]\n", 306 | " output_filenames = [x + '.npy' for x in filenames]\n", 307 | " output_folder = os.path.join(folder, 'mobilenetv1_v2')\n", 308 | "\n", 309 | " # Get the embeddings\n", 310 | " images = np.array([imread(f) / 255 for f in list_images])\n", 311 | " embeddings = model.predict(images)\n", 312 | " \n", 313 | " if not os.path.exists(os.path.join(base_path, output_folder)):\n", 314 | " os.makedirs(os.path.join(base_path, output_folder))\n", 315 | " for idx, embedding in enumerate(embeddings):\n", 316 | " emb_to_save = embedding.reshape(1, *embedding.shape)\n", 317 | " np.save(os.path.join(output_folder, output_filenames[idx]), emb_to_save)" 318 | ] 319 | }, 320 | { 321 | "cell_type": "code", 322 | "execution_count": 9, 323 | "metadata": { 324 | "ExecuteTime": { 325 | "end_time": "2018-11-06T23:38:26.770284Z", 326 | "start_time": "2018-11-06T23:38:26.568794Z" 327 | } 328 | }, 329 | "outputs": [ 330 | { 331 | "name": "stdout", 332 | "output_type": "stream", 333 | "text": [ 334 | "_________________________________________________________________\n", 335 | "Layer (type) Output Shape Param # \n", 336 | "=================================================================\n", 337 | "input_2 (InputLayer) (None, 160, 160, 3) 0 \n", 338 | "_________________________________________________________________\n", 339 | "conv1_pad (ZeroPadding2D) (None, 161, 161, 3) 0 \n", 340 | "_________________________________________________________________\n", 341 | "conv1 (Conv2D) (None, 80, 80, 32) 864 \n", 342 | "_________________________________________________________________\n", 343 | "conv1_bn (BatchNormalization (None, 80, 80, 32) 128 \n", 344 | "_________________________________________________________________\n", 345 | "conv1_relu (ReLU) (None, 80, 80, 32) 0 \n", 346 | "_________________________________________________________________\n", 347 | "conv_dw_1 (DepthwiseConv2D) (None, 80, 80, 32) 288 \n", 348 | "_________________________________________________________________\n", 349 | "conv_dw_1_bn (BatchNormaliza (None, 80, 80, 32) 128 \n", 350 | "_________________________________________________________________\n", 351 | "conv_dw_1_relu (ReLU) (None, 80, 80, 32) 0 \n", 352 | "_________________________________________________________________\n", 353 | "conv_pw_1 (Conv2D) (None, 80, 80, 64) 2048 \n", 354 | "_________________________________________________________________\n", 355 | "conv_pw_1_bn (BatchNormaliza (None, 80, 80, 64) 256 \n", 356 | "_________________________________________________________________\n", 357 | "conv_pw_1_relu (ReLU) (None, 80, 80, 64) 0 \n", 358 | "_________________________________________________________________\n", 359 | "conv_pad_2 (ZeroPadding2D) (None, 81, 81, 64) 0 \n", 360 | "_________________________________________________________________\n", 361 | "conv_dw_2 (DepthwiseConv2D) (None, 40, 40, 64) 576 \n", 362 | "_________________________________________________________________\n", 363 | "conv_dw_2_bn (BatchNormaliza (None, 40, 40, 64) 256 \n", 364 | "_________________________________________________________________\n", 365 | "conv_dw_2_relu (ReLU) (None, 40, 40, 64) 0 \n", 366 | "_________________________________________________________________\n", 367 | "conv_pw_2 (Conv2D) (None, 40, 40, 128) 8192 \n", 368 | "_________________________________________________________________\n", 369 | "conv_pw_2_bn (BatchNormaliza (None, 40, 40, 128) 512 \n", 370 | "_________________________________________________________________\n", 371 | "conv_pw_2_relu (ReLU) (None, 40, 40, 128) 0 \n", 372 | "_________________________________________________________________\n", 373 | "conv_dw_3 (DepthwiseConv2D) (None, 40, 40, 128) 1152 \n", 374 | "_________________________________________________________________\n", 375 | "conv_dw_3_bn (BatchNormaliza (None, 40, 40, 128) 512 \n", 376 | "_________________________________________________________________\n", 377 | "conv_dw_3_relu (ReLU) (None, 40, 40, 128) 0 \n", 378 | "_________________________________________________________________\n", 379 | "conv_pw_3 (Conv2D) (None, 40, 40, 128) 16384 \n", 380 | "_________________________________________________________________\n", 381 | "conv_pw_3_bn (BatchNormaliza (None, 40, 40, 128) 512 \n", 382 | "_________________________________________________________________\n", 383 | "conv_pw_3_relu (ReLU) (None, 40, 40, 128) 0 \n", 384 | "_________________________________________________________________\n", 385 | "conv_pad_4 (ZeroPadding2D) (None, 41, 41, 128) 0 \n", 386 | "_________________________________________________________________\n", 387 | "conv_dw_4 (DepthwiseConv2D) (None, 20, 20, 128) 1152 \n", 388 | "_________________________________________________________________\n", 389 | "conv_dw_4_bn (BatchNormaliza (None, 20, 20, 128) 512 \n", 390 | "_________________________________________________________________\n", 391 | "conv_dw_4_relu (ReLU) (None, 20, 20, 128) 0 \n", 392 | "_________________________________________________________________\n", 393 | "conv_pw_4 (Conv2D) (None, 20, 20, 256) 32768 \n", 394 | "_________________________________________________________________\n", 395 | "conv_pw_4_bn (BatchNormaliza (None, 20, 20, 256) 1024 \n", 396 | "_________________________________________________________________\n", 397 | "conv_pw_4_relu (ReLU) (None, 20, 20, 256) 0 \n", 398 | "_________________________________________________________________\n", 399 | "conv_dw_5 (DepthwiseConv2D) (None, 20, 20, 256) 2304 \n", 400 | "_________________________________________________________________\n", 401 | "conv_dw_5_bn (BatchNormaliza (None, 20, 20, 256) 1024 \n", 402 | "_________________________________________________________________\n", 403 | "conv_dw_5_relu (ReLU) (None, 20, 20, 256) 0 \n", 404 | "_________________________________________________________________\n", 405 | "conv_pw_5 (Conv2D) (None, 20, 20, 256) 65536 \n", 406 | "_________________________________________________________________\n", 407 | "conv_pw_5_bn (BatchNormaliza (None, 20, 20, 256) 1024 \n", 408 | "_________________________________________________________________\n", 409 | "conv_pw_5_relu (ReLU) (None, 20, 20, 256) 0 \n", 410 | "_________________________________________________________________\n", 411 | "conv_pad_6 (ZeroPadding2D) (None, 21, 21, 256) 0 \n", 412 | "_________________________________________________________________\n", 413 | "conv_dw_6 (DepthwiseConv2D) (None, 10, 10, 256) 2304 \n", 414 | "_________________________________________________________________\n", 415 | "conv_dw_6_bn (BatchNormaliza (None, 10, 10, 256) 1024 \n", 416 | "_________________________________________________________________\n", 417 | "conv_dw_6_relu (ReLU) (None, 10, 10, 256) 0 \n", 418 | "_________________________________________________________________\n", 419 | "conv_pw_6 (Conv2D) (None, 10, 10, 512) 131072 \n", 420 | "_________________________________________________________________\n", 421 | "conv_pw_6_bn (BatchNormaliza (None, 10, 10, 512) 2048 \n", 422 | "_________________________________________________________________\n", 423 | "conv_pw_6_relu (ReLU) (None, 10, 10, 512) 0 \n", 424 | "_________________________________________________________________\n", 425 | "conv_dw_7 (DepthwiseConv2D) (None, 10, 10, 512) 4608 \n", 426 | "_________________________________________________________________\n", 427 | "conv_dw_7_bn (BatchNormaliza (None, 10, 10, 512) 2048 \n", 428 | "_________________________________________________________________\n", 429 | "conv_dw_7_relu (ReLU) (None, 10, 10, 512) 0 \n", 430 | "_________________________________________________________________\n", 431 | "conv_pw_7 (Conv2D) (None, 10, 10, 512) 262144 \n", 432 | "_________________________________________________________________\n", 433 | "conv_pw_7_bn (BatchNormaliza (None, 10, 10, 512) 2048 \n", 434 | "_________________________________________________________________\n", 435 | "conv_pw_7_relu (ReLU) (None, 10, 10, 512) 0 \n", 436 | "_________________________________________________________________\n", 437 | "conv_dw_8 (DepthwiseConv2D) (None, 10, 10, 512) 4608 \n", 438 | "_________________________________________________________________\n", 439 | "conv_dw_8_bn (BatchNormaliza (None, 10, 10, 512) 2048 \n", 440 | "_________________________________________________________________\n", 441 | "conv_dw_8_relu (ReLU) (None, 10, 10, 512) 0 \n", 442 | "_________________________________________________________________\n", 443 | "conv_pw_8 (Conv2D) (None, 10, 10, 512) 262144 \n", 444 | "_________________________________________________________________\n", 445 | "conv_pw_8_bn (BatchNormaliza (None, 10, 10, 512) 2048 \n", 446 | "_________________________________________________________________\n", 447 | "conv_pw_8_relu (ReLU) (None, 10, 10, 512) 0 \n", 448 | "_________________________________________________________________\n", 449 | "conv_dw_9 (DepthwiseConv2D) (None, 10, 10, 512) 4608 \n", 450 | "_________________________________________________________________\n", 451 | "conv_dw_9_bn (BatchNormaliza (None, 10, 10, 512) 2048 \n", 452 | "_________________________________________________________________\n", 453 | "conv_dw_9_relu (ReLU) (None, 10, 10, 512) 0 \n", 454 | "_________________________________________________________________\n", 455 | "conv_pw_9 (Conv2D) (None, 10, 10, 512) 262144 \n", 456 | "_________________________________________________________________\n", 457 | "conv_pw_9_bn (BatchNormaliza (None, 10, 10, 512) 2048 \n", 458 | "_________________________________________________________________\n", 459 | "conv_pw_9_relu (ReLU) (None, 10, 10, 512) 0 \n", 460 | "_________________________________________________________________\n", 461 | "conv_dw_10 (DepthwiseConv2D) (None, 10, 10, 512) 4608 \n", 462 | "_________________________________________________________________\n", 463 | "conv_dw_10_bn (BatchNormaliz (None, 10, 10, 512) 2048 \n", 464 | "_________________________________________________________________\n", 465 | "conv_dw_10_relu (ReLU) (None, 10, 10, 512) 0 \n", 466 | "_________________________________________________________________\n", 467 | "conv_pw_10 (Conv2D) (None, 10, 10, 512) 262144 \n", 468 | "_________________________________________________________________\n", 469 | "conv_pw_10_bn (BatchNormaliz (None, 10, 10, 512) 2048 \n", 470 | "_________________________________________________________________\n", 471 | "conv_pw_10_relu (ReLU) (None, 10, 10, 512) 0 \n", 472 | "_________________________________________________________________\n", 473 | "conv_dw_11 (DepthwiseConv2D) (None, 10, 10, 512) 4608 \n", 474 | "_________________________________________________________________\n", 475 | "conv_dw_11_bn (BatchNormaliz (None, 10, 10, 512) 2048 \n", 476 | "_________________________________________________________________\n", 477 | "conv_dw_11_relu (ReLU) (None, 10, 10, 512) 0 \n", 478 | "_________________________________________________________________\n", 479 | "conv_pw_11 (Conv2D) (None, 10, 10, 512) 262144 \n", 480 | "_________________________________________________________________\n", 481 | "conv_pw_11_bn (BatchNormaliz (None, 10, 10, 512) 2048 \n", 482 | "_________________________________________________________________\n", 483 | "conv_pw_11_relu (ReLU) (None, 10, 10, 512) 0 \n", 484 | "_________________________________________________________________\n", 485 | "conv_pad_12 (ZeroPadding2D) (None, 11, 11, 512) 0 \n", 486 | "_________________________________________________________________\n", 487 | "conv_dw_12 (DepthwiseConv2D) (None, 5, 5, 512) 4608 \n", 488 | "_________________________________________________________________\n", 489 | "conv_dw_12_bn (BatchNormaliz (None, 5, 5, 512) 2048 \n", 490 | "_________________________________________________________________\n", 491 | "conv_dw_12_relu (ReLU) (None, 5, 5, 512) 0 \n", 492 | "_________________________________________________________________\n", 493 | "conv_pw_12 (Conv2D) (None, 5, 5, 1024) 524288 \n", 494 | "_________________________________________________________________\n", 495 | "conv_pw_12_bn (BatchNormaliz (None, 5, 5, 1024) 4096 \n", 496 | "_________________________________________________________________\n", 497 | "conv_pw_12_relu (ReLU) (None, 5, 5, 1024) 0 \n", 498 | "_________________________________________________________________\n", 499 | "conv_dw_13 (DepthwiseConv2D) (None, 5, 5, 1024) 9216 \n", 500 | "_________________________________________________________________\n", 501 | "conv_dw_13_bn (BatchNormaliz (None, 5, 5, 1024) 4096 \n", 502 | "_________________________________________________________________\n", 503 | "conv_dw_13_relu (ReLU) (None, 5, 5, 1024) 0 \n", 504 | "_________________________________________________________________\n", 505 | "conv_pw_13 (Conv2D) (None, 5, 5, 1024) 1048576 \n", 506 | "_________________________________________________________________\n", 507 | "conv_pw_13_bn (BatchNormaliz (None, 5, 5, 1024) 4096 \n", 508 | "_________________________________________________________________\n", 509 | "conv_pw_13_relu (ReLU) (None, 5, 5, 1024) 0 \n", 510 | "_________________________________________________________________\n", 511 | "Conv_Last (Conv2D) (None, 5, 5, 512) 13107200 \n", 512 | "_________________________________________________________________\n", 513 | "batch_normalization_1 (Batch (None, 5, 5, 512) 2048 \n", 514 | "_________________________________________________________________\n", 515 | "re_lu_1 (ReLU) (None, 5, 5, 512) 0 \n", 516 | "_________________________________________________________________\n", 517 | "global_average_pooling2d_1 ( (None, 512) 0 \n", 518 | "=================================================================\n", 519 | "Total params: 16,338,112\n", 520 | "Trainable params: 16,315,200\n", 521 | "Non-trainable params: 22,912\n", 522 | "_________________________________________________________________\n" 523 | ] 524 | } 525 | ], 526 | "source": [ 527 | "model.summary()" 528 | ] 529 | }, 530 | { 531 | "cell_type": "code", 532 | "execution_count": 10, 533 | "metadata": { 534 | "ExecuteTime": { 535 | "end_time": "2018-11-06T23:39:53.330378Z", 536 | "start_time": "2018-11-06T23:39:20.210293Z" 537 | } 538 | }, 539 | "outputs": [], 540 | "source": [ 541 | "model16 = keras.models.load_model('/Users/pedroprates/Google Drive/FaceRecognition/models/mobile-net/mobilenetv1_v16.h5')" 542 | ] 543 | }, 544 | { 545 | "cell_type": "code", 546 | "execution_count": 13, 547 | "metadata": { 548 | "ExecuteTime": { 549 | "end_time": "2018-11-06T23:43:02.315489Z", 550 | "start_time": "2018-11-06T23:43:02.299993Z" 551 | } 552 | }, 553 | "outputs": [], 554 | "source": [ 555 | "import json\n", 556 | "\n", 557 | "model_json = model.to_json()\n", 558 | "\n", 559 | "with open('models/mobile-net/json-test/modelv2.json', 'w') as f:\n", 560 | " json.dump(model_json, f)" 561 | ] 562 | }, 563 | { 564 | "cell_type": "code", 565 | "execution_count": 14, 566 | "metadata": { 567 | "ExecuteTime": { 568 | "end_time": "2018-11-06T23:43:29.787850Z", 569 | "start_time": "2018-11-06T23:43:22.568565Z" 570 | } 571 | }, 572 | "outputs": [], 573 | "source": [ 574 | "model.save_weights('models/mobile-net/json-test/modelv2_weights.h5')" 575 | ] 576 | } 577 | ], 578 | "metadata": { 579 | "kernelspec": { 580 | "display_name": "Python 3", 581 | "language": "python", 582 | "name": "python3" 583 | }, 584 | "language_info": { 585 | "codemirror_mode": { 586 | "name": "ipython", 587 | "version": 3 588 | }, 589 | "file_extension": ".py", 590 | "mimetype": "text/x-python", 591 | "name": "python", 592 | "nbconvert_exporter": "python", 593 | "pygments_lexer": "ipython3", 594 | "version": "3.6.4" 595 | } 596 | }, 597 | "nbformat": 4, 598 | "nbformat_minor": 2 599 | } 600 | -------------------------------------------------------------------------------- /src/align/detect_face.py: -------------------------------------------------------------------------------- 1 | """ Tensorflow implementation of the face detection / alignment algorithm found at 2 | https://github.com/kpzhang93/MTCNN_face_detection_alignment 3 | """ 4 | # MIT License 5 | # 6 | # Copyright (c) 2016 David Sandberg 7 | # 8 | # Permission is hereby granted, free of charge, to any person obtaining a copy 9 | # of this software and associated documentation files (the "Software"), to deal 10 | # in the Software without restriction, including without limitation the rights 11 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | # copies of the Software, and to permit persons to whom the Software is 13 | # furnished to do so, subject to the following conditions: 14 | # 15 | # The above copyright notice and this permission notice shall be included in all 16 | # copies or substantial portions of the Software. 17 | # 18 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 24 | # SOFTWARE. 25 | 26 | from __future__ import absolute_import 27 | from __future__ import division 28 | from __future__ import print_function 29 | from six import string_types, iteritems 30 | 31 | import numpy as np 32 | import tensorflow as tf 33 | #from math import floor 34 | import cv2 35 | import os 36 | 37 | def layer(op): 38 | """Decorator for composable network layers.""" 39 | 40 | def layer_decorated(self, *args, **kwargs): 41 | # Automatically set a name if not provided. 42 | name = kwargs.setdefault('name', self.get_unique_name(op.__name__)) 43 | # Figure out the layer inputs. 44 | if len(self.terminals) == 0: 45 | raise RuntimeError('No input variables found for layer %s.' % name) 46 | elif len(self.terminals) == 1: 47 | layer_input = self.terminals[0] 48 | else: 49 | layer_input = list(self.terminals) 50 | # Perform the operation and get the output. 51 | layer_output = op(self, layer_input, *args, **kwargs) 52 | # Add to layer LUT. 53 | self.layers[name] = layer_output 54 | # This output is now the input for the next layer. 55 | self.feed(layer_output) 56 | # Return self for chained calls. 57 | return self 58 | 59 | return layer_decorated 60 | 61 | class Network(object): 62 | 63 | def __init__(self, inputs, trainable=True): 64 | # The input nodes for this network 65 | self.inputs = inputs 66 | # The current list of terminal nodes 67 | self.terminals = [] 68 | # Mapping from layer names to layers 69 | self.layers = dict(inputs) 70 | # If true, the resulting variables are set as trainable 71 | self.trainable = trainable 72 | 73 | self.setup() 74 | 75 | def setup(self): 76 | """Construct the network. """ 77 | raise NotImplementedError('Must be implemented by the subclass.') 78 | 79 | def load(self, data_path, session, ignore_missing=False): 80 | """Load network weights. 81 | data_path: The path to the numpy-serialized network weights 82 | session: The current TensorFlow session 83 | ignore_missing: If true, serialized weights for missing layers are ignored. 84 | """ 85 | data_dict = np.load(data_path, encoding='latin1').item() #pylint: disable=no-member 86 | 87 | for op_name in data_dict: 88 | with tf.variable_scope(op_name, reuse=True): 89 | for param_name, data in iteritems(data_dict[op_name]): 90 | try: 91 | var = tf.get_variable(param_name) 92 | session.run(var.assign(data)) 93 | except ValueError: 94 | if not ignore_missing: 95 | raise 96 | 97 | def feed(self, *args): 98 | """Set the input(s) for the next operation by replacing the terminal nodes. 99 | The arguments can be either layer names or the actual layers. 100 | """ 101 | assert len(args) != 0 102 | self.terminals = [] 103 | for fed_layer in args: 104 | if isinstance(fed_layer, string_types): 105 | try: 106 | fed_layer = self.layers[fed_layer] 107 | except KeyError: 108 | raise KeyError('Unknown layer name fed: %s' % fed_layer) 109 | self.terminals.append(fed_layer) 110 | return self 111 | 112 | def get_output(self): 113 | """Returns the current network output.""" 114 | return self.terminals[-1] 115 | 116 | def get_unique_name(self, prefix): 117 | """Returns an index-suffixed unique name for the given prefix. 118 | This is used for auto-generating layer names based on the type-prefix. 119 | """ 120 | ident = sum(t.startswith(prefix) for t, _ in self.layers.items()) + 1 121 | return '%s_%d' % (prefix, ident) 122 | 123 | def make_var(self, name, shape): 124 | """Creates a new TensorFlow variable.""" 125 | return tf.get_variable(name, shape, trainable=self.trainable) 126 | 127 | def validate_padding(self, padding): 128 | """Verifies that the padding is one of the supported ones.""" 129 | assert padding in ('SAME', 'VALID') 130 | 131 | @layer 132 | def conv(self, 133 | inp, 134 | k_h, 135 | k_w, 136 | c_o, 137 | s_h, 138 | s_w, 139 | name, 140 | relu=True, 141 | padding='SAME', 142 | group=1, 143 | biased=True): 144 | # Verify that the padding is acceptable 145 | self.validate_padding(padding) 146 | # Get the number of channels in the input 147 | c_i = int(inp.get_shape()[-1]) 148 | # Verify that the grouping parameter is valid 149 | assert c_i % group == 0 150 | assert c_o % group == 0 151 | # Convolution for a given input and kernel 152 | convolve = lambda i, k: tf.nn.conv2d(i, k, [1, s_h, s_w, 1], padding=padding) 153 | with tf.variable_scope(name) as scope: 154 | kernel = self.make_var('weights', shape=[k_h, k_w, c_i // group, c_o]) 155 | # This is the common-case. Convolve the input without any further complications. 156 | output = convolve(inp, kernel) 157 | # Add the biases 158 | if biased: 159 | biases = self.make_var('biases', [c_o]) 160 | output = tf.nn.bias_add(output, biases) 161 | if relu: 162 | # ReLU non-linearity 163 | output = tf.nn.relu(output, name=scope.name) 164 | return output 165 | 166 | @layer 167 | def prelu(self, inp, name): 168 | with tf.variable_scope(name): 169 | i = int(inp.get_shape()[-1]) 170 | alpha = self.make_var('alpha', shape=(i,)) 171 | output = tf.nn.relu(inp) + tf.multiply(alpha, -tf.nn.relu(-inp)) 172 | return output 173 | 174 | @layer 175 | def max_pool(self, inp, k_h, k_w, s_h, s_w, name, padding='SAME'): 176 | self.validate_padding(padding) 177 | return tf.nn.max_pool(inp, 178 | ksize=[1, k_h, k_w, 1], 179 | strides=[1, s_h, s_w, 1], 180 | padding=padding, 181 | name=name) 182 | 183 | @layer 184 | def fc(self, inp, num_out, name, relu=True): 185 | with tf.variable_scope(name): 186 | input_shape = inp.get_shape() 187 | if input_shape.ndims == 4: 188 | # The input is spatial. Vectorize it first. 189 | dim = 1 190 | for d in input_shape[1:].as_list(): 191 | dim *= int(d) 192 | feed_in = tf.reshape(inp, [-1, dim]) 193 | else: 194 | feed_in, dim = (inp, input_shape[-1].value) 195 | weights = self.make_var('weights', shape=[dim, num_out]) 196 | biases = self.make_var('biases', [num_out]) 197 | op = tf.nn.relu_layer if relu else tf.nn.xw_plus_b 198 | fc = op(feed_in, weights, biases, name=name) 199 | return fc 200 | 201 | 202 | """ 203 | Multi dimensional softmax, 204 | refer to https://github.com/tensorflow/tensorflow/issues/210 205 | compute softmax along the dimension of target 206 | the native softmax only supports batch_size x dimension 207 | """ 208 | @layer 209 | def softmax(self, target, axis, name=None): 210 | max_axis = tf.reduce_max(target, axis, keepdims=True) 211 | target_exp = tf.exp(target-max_axis) 212 | normalize = tf.reduce_sum(target_exp, axis, keepdims=True) 213 | softmax = tf.div(target_exp, normalize, name) 214 | return softmax 215 | 216 | class PNet(Network): 217 | def setup(self): 218 | (self.feed('data') #pylint: disable=no-value-for-parameter, no-member 219 | .conv(3, 3, 10, 1, 1, padding='VALID', relu=False, name='conv1') 220 | .prelu(name='PReLU1') 221 | .max_pool(2, 2, 2, 2, name='pool1') 222 | .conv(3, 3, 16, 1, 1, padding='VALID', relu=False, name='conv2') 223 | .prelu(name='PReLU2') 224 | .conv(3, 3, 32, 1, 1, padding='VALID', relu=False, name='conv3') 225 | .prelu(name='PReLU3') 226 | .conv(1, 1, 2, 1, 1, relu=False, name='conv4-1') 227 | .softmax(3,name='prob1')) 228 | 229 | (self.feed('PReLU3') #pylint: disable=no-value-for-parameter 230 | .conv(1, 1, 4, 1, 1, relu=False, name='conv4-2')) 231 | 232 | class RNet(Network): 233 | def setup(self): 234 | (self.feed('data') #pylint: disable=no-value-for-parameter, no-member 235 | .conv(3, 3, 28, 1, 1, padding='VALID', relu=False, name='conv1') 236 | .prelu(name='prelu1') 237 | .max_pool(3, 3, 2, 2, name='pool1') 238 | .conv(3, 3, 48, 1, 1, padding='VALID', relu=False, name='conv2') 239 | .prelu(name='prelu2') 240 | .max_pool(3, 3, 2, 2, padding='VALID', name='pool2') 241 | .conv(2, 2, 64, 1, 1, padding='VALID', relu=False, name='conv3') 242 | .prelu(name='prelu3') 243 | .fc(128, relu=False, name='conv4') 244 | .prelu(name='prelu4') 245 | .fc(2, relu=False, name='conv5-1') 246 | .softmax(1,name='prob1')) 247 | 248 | (self.feed('prelu4') #pylint: disable=no-value-for-parameter 249 | .fc(4, relu=False, name='conv5-2')) 250 | 251 | class ONet(Network): 252 | def setup(self): 253 | (self.feed('data') #pylint: disable=no-value-for-parameter, no-member 254 | .conv(3, 3, 32, 1, 1, padding='VALID', relu=False, name='conv1') 255 | .prelu(name='prelu1') 256 | .max_pool(3, 3, 2, 2, name='pool1') 257 | .conv(3, 3, 64, 1, 1, padding='VALID', relu=False, name='conv2') 258 | .prelu(name='prelu2') 259 | .max_pool(3, 3, 2, 2, padding='VALID', name='pool2') 260 | .conv(3, 3, 64, 1, 1, padding='VALID', relu=False, name='conv3') 261 | .prelu(name='prelu3') 262 | .max_pool(2, 2, 2, 2, name='pool3') 263 | .conv(2, 2, 128, 1, 1, padding='VALID', relu=False, name='conv4') 264 | .prelu(name='prelu4') 265 | .fc(256, relu=False, name='conv5') 266 | .prelu(name='prelu5') 267 | .fc(2, relu=False, name='conv6-1') 268 | .softmax(1, name='prob1')) 269 | 270 | (self.feed('prelu5') #pylint: disable=no-value-for-parameter 271 | .fc(4, relu=False, name='conv6-2')) 272 | 273 | (self.feed('prelu5') #pylint: disable=no-value-for-parameter 274 | .fc(10, relu=False, name='conv6-3')) 275 | 276 | def create_mtcnn(sess, model_path): 277 | if not model_path: 278 | model_path,_ = os.path.split(os.path.realpath(__file__)) 279 | 280 | with tf.variable_scope('pnet'): 281 | data = tf.placeholder(tf.float32, (None,None,None,3), 'input') 282 | pnet = PNet({'data':data}) 283 | pnet.load(os.path.join(model_path, 'det1.npy'), sess) 284 | with tf.variable_scope('rnet'): 285 | data = tf.placeholder(tf.float32, (None,24,24,3), 'input') 286 | rnet = RNet({'data':data}) 287 | rnet.load(os.path.join(model_path, 'det2.npy'), sess) 288 | with tf.variable_scope('onet'): 289 | data = tf.placeholder(tf.float32, (None,48,48,3), 'input') 290 | onet = ONet({'data':data}) 291 | onet.load(os.path.join(model_path, 'det3.npy'), sess) 292 | 293 | pnet_fun = lambda img : sess.run(('pnet/conv4-2/BiasAdd:0', 'pnet/prob1:0'), feed_dict={'pnet/input:0':img}) 294 | rnet_fun = lambda img : sess.run(('rnet/conv5-2/conv5-2:0', 'rnet/prob1:0'), feed_dict={'rnet/input:0':img}) 295 | onet_fun = lambda img : sess.run(('onet/conv6-2/conv6-2:0', 'onet/conv6-3/conv6-3:0', 'onet/prob1:0'), feed_dict={'onet/input:0':img}) 296 | return pnet_fun, rnet_fun, onet_fun 297 | 298 | def detect_face(img, minsize, pnet, rnet, onet, threshold, factor): 299 | """Detects faces in an image, and returns bounding boxes and points for them. 300 | img: input image 301 | minsize: minimum faces' size 302 | pnet, rnet, onet: caffemodel 303 | threshold: threshold=[th1, th2, th3], th1-3 are three steps's threshold 304 | factor: the factor used to create a scaling pyramid of face sizes to detect in the image. 305 | """ 306 | factor_count=0 307 | total_boxes=np.empty((0,9)) 308 | points=np.empty(0) 309 | h=img.shape[0] 310 | w=img.shape[1] 311 | minl=np.amin([h, w]) 312 | m=12.0/minsize 313 | minl=minl*m 314 | # create scale pyramid 315 | scales=[] 316 | while minl>=12: 317 | scales += [m*np.power(factor, factor_count)] 318 | minl = minl*factor 319 | factor_count += 1 320 | 321 | # first stage 322 | for scale in scales: 323 | hs=int(np.ceil(h*scale)) 324 | ws=int(np.ceil(w*scale)) 325 | im_data = imresample(img, (hs, ws)) 326 | im_data = (im_data-127.5)*0.0078125 327 | img_x = np.expand_dims(im_data, 0) 328 | img_y = np.transpose(img_x, (0,2,1,3)) 329 | out = pnet(img_y) 330 | out0 = np.transpose(out[0], (0,2,1,3)) 331 | out1 = np.transpose(out[1], (0,2,1,3)) 332 | 333 | boxes, _ = generateBoundingBox(out1[0,:,:,1].copy(), out0[0,:,:,:].copy(), scale, threshold[0]) 334 | 335 | # inter-scale nms 336 | pick = nms(boxes.copy(), 0.5, 'Union') 337 | if boxes.size>0 and pick.size>0: 338 | boxes = boxes[pick,:] 339 | total_boxes = np.append(total_boxes, boxes, axis=0) 340 | 341 | numbox = total_boxes.shape[0] 342 | if numbox>0: 343 | pick = nms(total_boxes.copy(), 0.7, 'Union') 344 | total_boxes = total_boxes[pick,:] 345 | regw = total_boxes[:,2]-total_boxes[:,0] 346 | regh = total_boxes[:,3]-total_boxes[:,1] 347 | qq1 = total_boxes[:,0]+total_boxes[:,5]*regw 348 | qq2 = total_boxes[:,1]+total_boxes[:,6]*regh 349 | qq3 = total_boxes[:,2]+total_boxes[:,7]*regw 350 | qq4 = total_boxes[:,3]+total_boxes[:,8]*regh 351 | total_boxes = np.transpose(np.vstack([qq1, qq2, qq3, qq4, total_boxes[:,4]])) 352 | total_boxes = rerec(total_boxes.copy()) 353 | total_boxes[:,0:4] = np.fix(total_boxes[:,0:4]).astype(np.int32) 354 | dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph = pad(total_boxes.copy(), w, h) 355 | 356 | numbox = total_boxes.shape[0] 357 | if numbox>0: 358 | # second stage 359 | tempimg = np.zeros((24,24,3,numbox)) 360 | for k in range(0,numbox): 361 | tmp = np.zeros((int(tmph[k]),int(tmpw[k]),3)) 362 | tmp[dy[k]-1:edy[k],dx[k]-1:edx[k],:] = img[y[k]-1:ey[k],x[k]-1:ex[k],:] 363 | if tmp.shape[0]>0 and tmp.shape[1]>0 or tmp.shape[0]==0 and tmp.shape[1]==0: 364 | tempimg[:,:,:,k] = imresample(tmp, (24, 24)) 365 | else: 366 | return np.empty() 367 | tempimg = (tempimg-127.5)*0.0078125 368 | tempimg1 = np.transpose(tempimg, (3,1,0,2)) 369 | out = rnet(tempimg1) 370 | out0 = np.transpose(out[0]) 371 | out1 = np.transpose(out[1]) 372 | score = out1[1,:] 373 | ipass = np.where(score>threshold[1]) 374 | total_boxes = np.hstack([total_boxes[ipass[0],0:4].copy(), np.expand_dims(score[ipass].copy(),1)]) 375 | mv = out0[:,ipass[0]] 376 | if total_boxes.shape[0]>0: 377 | pick = nms(total_boxes, 0.7, 'Union') 378 | total_boxes = total_boxes[pick,:] 379 | total_boxes = bbreg(total_boxes.copy(), np.transpose(mv[:,pick])) 380 | total_boxes = rerec(total_boxes.copy()) 381 | 382 | numbox = total_boxes.shape[0] 383 | if numbox>0: 384 | # third stage 385 | total_boxes = np.fix(total_boxes).astype(np.int32) 386 | dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph = pad(total_boxes.copy(), w, h) 387 | tempimg = np.zeros((48,48,3,numbox)) 388 | for k in range(0,numbox): 389 | tmp = np.zeros((int(tmph[k]),int(tmpw[k]),3)) 390 | tmp[dy[k]-1:edy[k],dx[k]-1:edx[k],:] = img[y[k]-1:ey[k],x[k]-1:ex[k],:] 391 | if tmp.shape[0]>0 and tmp.shape[1]>0 or tmp.shape[0]==0 and tmp.shape[1]==0: 392 | tempimg[:,:,:,k] = imresample(tmp, (48, 48)) 393 | else: 394 | return np.empty() 395 | tempimg = (tempimg-127.5)*0.0078125 396 | tempimg1 = np.transpose(tempimg, (3,1,0,2)) 397 | out = onet(tempimg1) 398 | out0 = np.transpose(out[0]) 399 | out1 = np.transpose(out[1]) 400 | out2 = np.transpose(out[2]) 401 | score = out2[1,:] 402 | points = out1 403 | ipass = np.where(score>threshold[2]) 404 | points = points[:,ipass[0]] 405 | total_boxes = np.hstack([total_boxes[ipass[0],0:4].copy(), np.expand_dims(score[ipass].copy(),1)]) 406 | mv = out0[:,ipass[0]] 407 | 408 | w = total_boxes[:,2]-total_boxes[:,0]+1 409 | h = total_boxes[:,3]-total_boxes[:,1]+1 410 | points[0:5,:] = np.tile(w,(5, 1))*points[0:5,:] + np.tile(total_boxes[:,0],(5, 1))-1 411 | points[5:10,:] = np.tile(h,(5, 1))*points[5:10,:] + np.tile(total_boxes[:,1],(5, 1))-1 412 | if total_boxes.shape[0]>0: 413 | total_boxes = bbreg(total_boxes.copy(), np.transpose(mv)) 414 | pick = nms(total_boxes.copy(), 0.7, 'Min') 415 | total_boxes = total_boxes[pick,:] 416 | points = points[:,pick] 417 | 418 | return total_boxes, points 419 | 420 | 421 | def bulk_detect_face(images, detection_window_size_ratio, pnet, rnet, onet, threshold, factor): 422 | """Detects faces in a list of images 423 | images: list containing input images 424 | detection_window_size_ratio: ratio of minimum face size to smallest image dimension 425 | pnet, rnet, onet: caffemodel 426 | threshold: threshold=[th1 th2 th3], th1-3 are three steps's threshold [0-1] 427 | factor: the factor used to create a scaling pyramid of face sizes to detect in the image. 428 | """ 429 | all_scales = [None] * len(images) 430 | images_with_boxes = [None] * len(images) 431 | 432 | for i in range(len(images)): 433 | images_with_boxes[i] = {'total_boxes': np.empty((0, 9))} 434 | 435 | # create scale pyramid 436 | for index, img in enumerate(images): 437 | all_scales[index] = [] 438 | h = img.shape[0] 439 | w = img.shape[1] 440 | minsize = int(detection_window_size_ratio * np.minimum(w, h)) 441 | factor_count = 0 442 | minl = np.amin([h, w]) 443 | if minsize <= 12: 444 | minsize = 12 445 | 446 | m = 12.0 / minsize 447 | minl = minl * m 448 | while minl >= 12: 449 | all_scales[index].append(m * np.power(factor, factor_count)) 450 | minl = minl * factor 451 | factor_count += 1 452 | 453 | # # # # # # # # # # # # # 454 | # first stage - fast proposal network (pnet) to obtain face candidates 455 | # # # # # # # # # # # # # 456 | 457 | images_obj_per_resolution = {} 458 | 459 | # TODO: use some type of rounding to number module 8 to increase probability that pyramid images will have the same resolution across input images 460 | 461 | for index, scales in enumerate(all_scales): 462 | h = images[index].shape[0] 463 | w = images[index].shape[1] 464 | 465 | for scale in scales: 466 | hs = int(np.ceil(h * scale)) 467 | ws = int(np.ceil(w * scale)) 468 | 469 | if (ws, hs) not in images_obj_per_resolution: 470 | images_obj_per_resolution[(ws, hs)] = [] 471 | 472 | im_data = imresample(images[index], (hs, ws)) 473 | im_data = (im_data - 127.5) * 0.0078125 474 | img_y = np.transpose(im_data, (1, 0, 2)) # caffe uses different dimensions ordering 475 | images_obj_per_resolution[(ws, hs)].append({'scale': scale, 'image': img_y, 'index': index}) 476 | 477 | for resolution in images_obj_per_resolution: 478 | images_per_resolution = [i['image'] for i in images_obj_per_resolution[resolution]] 479 | outs = pnet(images_per_resolution) 480 | 481 | for index in range(len(outs[0])): 482 | scale = images_obj_per_resolution[resolution][index]['scale'] 483 | image_index = images_obj_per_resolution[resolution][index]['index'] 484 | out0 = np.transpose(outs[0][index], (1, 0, 2)) 485 | out1 = np.transpose(outs[1][index], (1, 0, 2)) 486 | 487 | boxes, _ = generateBoundingBox(out1[:, :, 1].copy(), out0[:, :, :].copy(), scale, threshold[0]) 488 | 489 | # inter-scale nms 490 | pick = nms(boxes.copy(), 0.5, 'Union') 491 | if boxes.size > 0 and pick.size > 0: 492 | boxes = boxes[pick, :] 493 | images_with_boxes[image_index]['total_boxes'] = np.append(images_with_boxes[image_index]['total_boxes'], 494 | boxes, 495 | axis=0) 496 | 497 | for index, image_obj in enumerate(images_with_boxes): 498 | numbox = image_obj['total_boxes'].shape[0] 499 | if numbox > 0: 500 | h = images[index].shape[0] 501 | w = images[index].shape[1] 502 | pick = nms(image_obj['total_boxes'].copy(), 0.7, 'Union') 503 | image_obj['total_boxes'] = image_obj['total_boxes'][pick, :] 504 | regw = image_obj['total_boxes'][:, 2] - image_obj['total_boxes'][:, 0] 505 | regh = image_obj['total_boxes'][:, 3] - image_obj['total_boxes'][:, 1] 506 | qq1 = image_obj['total_boxes'][:, 0] + image_obj['total_boxes'][:, 5] * regw 507 | qq2 = image_obj['total_boxes'][:, 1] + image_obj['total_boxes'][:, 6] * regh 508 | qq3 = image_obj['total_boxes'][:, 2] + image_obj['total_boxes'][:, 7] * regw 509 | qq4 = image_obj['total_boxes'][:, 3] + image_obj['total_boxes'][:, 8] * regh 510 | image_obj['total_boxes'] = np.transpose(np.vstack([qq1, qq2, qq3, qq4, image_obj['total_boxes'][:, 4]])) 511 | image_obj['total_boxes'] = rerec(image_obj['total_boxes'].copy()) 512 | image_obj['total_boxes'][:, 0:4] = np.fix(image_obj['total_boxes'][:, 0:4]).astype(np.int32) 513 | dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph = pad(image_obj['total_boxes'].copy(), w, h) 514 | 515 | numbox = image_obj['total_boxes'].shape[0] 516 | tempimg = np.zeros((24, 24, 3, numbox)) 517 | 518 | if numbox > 0: 519 | for k in range(0, numbox): 520 | tmp = np.zeros((int(tmph[k]), int(tmpw[k]), 3)) 521 | tmp[dy[k] - 1:edy[k], dx[k] - 1:edx[k], :] = images[index][y[k] - 1:ey[k], x[k] - 1:ex[k], :] 522 | if tmp.shape[0] > 0 and tmp.shape[1] > 0 or tmp.shape[0] == 0 and tmp.shape[1] == 0: 523 | tempimg[:, :, :, k] = imresample(tmp, (24, 24)) 524 | else: 525 | return np.empty() 526 | 527 | tempimg = (tempimg - 127.5) * 0.0078125 528 | image_obj['rnet_input'] = np.transpose(tempimg, (3, 1, 0, 2)) 529 | 530 | # # # # # # # # # # # # # 531 | # second stage - refinement of face candidates with rnet 532 | # # # # # # # # # # # # # 533 | 534 | bulk_rnet_input = np.empty((0, 24, 24, 3)) 535 | for index, image_obj in enumerate(images_with_boxes): 536 | if 'rnet_input' in image_obj: 537 | bulk_rnet_input = np.append(bulk_rnet_input, image_obj['rnet_input'], axis=0) 538 | 539 | out = rnet(bulk_rnet_input) 540 | out0 = np.transpose(out[0]) 541 | out1 = np.transpose(out[1]) 542 | score = out1[1, :] 543 | 544 | i = 0 545 | for index, image_obj in enumerate(images_with_boxes): 546 | if 'rnet_input' not in image_obj: 547 | continue 548 | 549 | rnet_input_count = image_obj['rnet_input'].shape[0] 550 | score_per_image = score[i:i + rnet_input_count] 551 | out0_per_image = out0[:, i:i + rnet_input_count] 552 | 553 | ipass = np.where(score_per_image > threshold[1]) 554 | image_obj['total_boxes'] = np.hstack([image_obj['total_boxes'][ipass[0], 0:4].copy(), 555 | np.expand_dims(score_per_image[ipass].copy(), 1)]) 556 | 557 | mv = out0_per_image[:, ipass[0]] 558 | 559 | if image_obj['total_boxes'].shape[0] > 0: 560 | h = images[index].shape[0] 561 | w = images[index].shape[1] 562 | pick = nms(image_obj['total_boxes'], 0.7, 'Union') 563 | image_obj['total_boxes'] = image_obj['total_boxes'][pick, :] 564 | image_obj['total_boxes'] = bbreg(image_obj['total_boxes'].copy(), np.transpose(mv[:, pick])) 565 | image_obj['total_boxes'] = rerec(image_obj['total_boxes'].copy()) 566 | 567 | numbox = image_obj['total_boxes'].shape[0] 568 | 569 | if numbox > 0: 570 | tempimg = np.zeros((48, 48, 3, numbox)) 571 | image_obj['total_boxes'] = np.fix(image_obj['total_boxes']).astype(np.int32) 572 | dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph = pad(image_obj['total_boxes'].copy(), w, h) 573 | 574 | for k in range(0, numbox): 575 | tmp = np.zeros((int(tmph[k]), int(tmpw[k]), 3)) 576 | tmp[dy[k] - 1:edy[k], dx[k] - 1:edx[k], :] = images[index][y[k] - 1:ey[k], x[k] - 1:ex[k], :] 577 | if tmp.shape[0] > 0 and tmp.shape[1] > 0 or tmp.shape[0] == 0 and tmp.shape[1] == 0: 578 | tempimg[:, :, :, k] = imresample(tmp, (48, 48)) 579 | else: 580 | return np.empty() 581 | tempimg = (tempimg - 127.5) * 0.0078125 582 | image_obj['onet_input'] = np.transpose(tempimg, (3, 1, 0, 2)) 583 | 584 | i += rnet_input_count 585 | 586 | # # # # # # # # # # # # # 587 | # third stage - further refinement and facial landmarks positions with onet 588 | # # # # # # # # # # # # # 589 | 590 | bulk_onet_input = np.empty((0, 48, 48, 3)) 591 | for index, image_obj in enumerate(images_with_boxes): 592 | if 'onet_input' in image_obj: 593 | bulk_onet_input = np.append(bulk_onet_input, image_obj['onet_input'], axis=0) 594 | 595 | out = onet(bulk_onet_input) 596 | 597 | out0 = np.transpose(out[0]) 598 | out1 = np.transpose(out[1]) 599 | out2 = np.transpose(out[2]) 600 | score = out2[1, :] 601 | points = out1 602 | 603 | i = 0 604 | ret = [] 605 | for index, image_obj in enumerate(images_with_boxes): 606 | if 'onet_input' not in image_obj: 607 | ret.append(None) 608 | continue 609 | 610 | onet_input_count = image_obj['onet_input'].shape[0] 611 | 612 | out0_per_image = out0[:, i:i + onet_input_count] 613 | score_per_image = score[i:i + onet_input_count] 614 | points_per_image = points[:, i:i + onet_input_count] 615 | 616 | ipass = np.where(score_per_image > threshold[2]) 617 | points_per_image = points_per_image[:, ipass[0]] 618 | 619 | image_obj['total_boxes'] = np.hstack([image_obj['total_boxes'][ipass[0], 0:4].copy(), 620 | np.expand_dims(score_per_image[ipass].copy(), 1)]) 621 | mv = out0_per_image[:, ipass[0]] 622 | 623 | w = image_obj['total_boxes'][:, 2] - image_obj['total_boxes'][:, 0] + 1 624 | h = image_obj['total_boxes'][:, 3] - image_obj['total_boxes'][:, 1] + 1 625 | points_per_image[0:5, :] = np.tile(w, (5, 1)) * points_per_image[0:5, :] + np.tile( 626 | image_obj['total_boxes'][:, 0], (5, 1)) - 1 627 | points_per_image[5:10, :] = np.tile(h, (5, 1)) * points_per_image[5:10, :] + np.tile( 628 | image_obj['total_boxes'][:, 1], (5, 1)) - 1 629 | 630 | if image_obj['total_boxes'].shape[0] > 0: 631 | image_obj['total_boxes'] = bbreg(image_obj['total_boxes'].copy(), np.transpose(mv)) 632 | pick = nms(image_obj['total_boxes'].copy(), 0.7, 'Min') 633 | image_obj['total_boxes'] = image_obj['total_boxes'][pick, :] 634 | points_per_image = points_per_image[:, pick] 635 | 636 | ret.append((image_obj['total_boxes'], points_per_image)) 637 | else: 638 | ret.append(None) 639 | 640 | i += onet_input_count 641 | 642 | return ret 643 | 644 | 645 | # function [boundingbox] = bbreg(boundingbox,reg) 646 | def bbreg(boundingbox,reg): 647 | """Calibrate bounding boxes""" 648 | if reg.shape[1]==1: 649 | reg = np.reshape(reg, (reg.shape[2], reg.shape[3])) 650 | 651 | w = boundingbox[:,2]-boundingbox[:,0]+1 652 | h = boundingbox[:,3]-boundingbox[:,1]+1 653 | b1 = boundingbox[:,0]+reg[:,0]*w 654 | b2 = boundingbox[:,1]+reg[:,1]*h 655 | b3 = boundingbox[:,2]+reg[:,2]*w 656 | b4 = boundingbox[:,3]+reg[:,3]*h 657 | boundingbox[:,0:4] = np.transpose(np.vstack([b1, b2, b3, b4 ])) 658 | return boundingbox 659 | 660 | def generateBoundingBox(imap, reg, scale, t): 661 | """Use heatmap to generate bounding boxes""" 662 | stride=2 663 | cellsize=12 664 | 665 | imap = np.transpose(imap) 666 | dx1 = np.transpose(reg[:,:,0]) 667 | dy1 = np.transpose(reg[:,:,1]) 668 | dx2 = np.transpose(reg[:,:,2]) 669 | dy2 = np.transpose(reg[:,:,3]) 670 | y, x = np.where(imap >= t) 671 | if y.shape[0]==1: 672 | dx1 = np.flipud(dx1) 673 | dy1 = np.flipud(dy1) 674 | dx2 = np.flipud(dx2) 675 | dy2 = np.flipud(dy2) 676 | score = imap[(y,x)] 677 | reg = np.transpose(np.vstack([ dx1[(y,x)], dy1[(y,x)], dx2[(y,x)], dy2[(y,x)] ])) 678 | if reg.size==0: 679 | reg = np.empty((0,3)) 680 | bb = np.transpose(np.vstack([y,x])) 681 | q1 = np.fix((stride*bb+1)/scale) 682 | q2 = np.fix((stride*bb+cellsize-1+1)/scale) 683 | boundingbox = np.hstack([q1, q2, np.expand_dims(score,1), reg]) 684 | return boundingbox, reg 685 | 686 | # function pick = nms(boxes,threshold,type) 687 | def nms(boxes, threshold, method): 688 | if boxes.size==0: 689 | return np.empty((0,3)) 690 | x1 = boxes[:,0] 691 | y1 = boxes[:,1] 692 | x2 = boxes[:,2] 693 | y2 = boxes[:,3] 694 | s = boxes[:,4] 695 | area = (x2-x1+1) * (y2-y1+1) 696 | I = np.argsort(s) 697 | pick = np.zeros_like(s, dtype=np.int16) 698 | counter = 0 699 | while I.size>0: 700 | i = I[-1] 701 | pick[counter] = i 702 | counter += 1 703 | idx = I[0:-1] 704 | xx1 = np.maximum(x1[i], x1[idx]) 705 | yy1 = np.maximum(y1[i], y1[idx]) 706 | xx2 = np.minimum(x2[i], x2[idx]) 707 | yy2 = np.minimum(y2[i], y2[idx]) 708 | w = np.maximum(0.0, xx2-xx1+1) 709 | h = np.maximum(0.0, yy2-yy1+1) 710 | inter = w * h 711 | if method is 'Min': 712 | o = inter / np.minimum(area[i], area[idx]) 713 | else: 714 | o = inter / (area[i] + area[idx] - inter) 715 | I = I[np.where(o<=threshold)] 716 | pick = pick[0:counter] 717 | return pick 718 | 719 | # function [dy edy dx edx y ey x ex tmpw tmph] = pad(total_boxes,w,h) 720 | def pad(total_boxes, w, h): 721 | """Compute the padding coordinates (pad the bounding boxes to square)""" 722 | tmpw = (total_boxes[:,2]-total_boxes[:,0]+1).astype(np.int32) 723 | tmph = (total_boxes[:,3]-total_boxes[:,1]+1).astype(np.int32) 724 | numbox = total_boxes.shape[0] 725 | 726 | dx = np.ones((numbox), dtype=np.int32) 727 | dy = np.ones((numbox), dtype=np.int32) 728 | edx = tmpw.copy().astype(np.int32) 729 | edy = tmph.copy().astype(np.int32) 730 | 731 | x = total_boxes[:,0].copy().astype(np.int32) 732 | y = total_boxes[:,1].copy().astype(np.int32) 733 | ex = total_boxes[:,2].copy().astype(np.int32) 734 | ey = total_boxes[:,3].copy().astype(np.int32) 735 | 736 | tmp = np.where(ex>w) 737 | edx.flat[tmp] = np.expand_dims(-ex[tmp]+w+tmpw[tmp],1) 738 | ex[tmp] = w 739 | 740 | tmp = np.where(ey>h) 741 | edy.flat[tmp] = np.expand_dims(-ey[tmp]+h+tmph[tmp],1) 742 | ey[tmp] = h 743 | 744 | tmp = np.where(x<1) 745 | dx.flat[tmp] = np.expand_dims(2-x[tmp],1) 746 | x[tmp] = 1 747 | 748 | tmp = np.where(y<1) 749 | dy.flat[tmp] = np.expand_dims(2-y[tmp],1) 750 | y[tmp] = 1 751 | 752 | return dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph 753 | 754 | # function [bboxA] = rerec(bboxA) 755 | def rerec(bboxA): 756 | """Convert bboxA to square.""" 757 | h = bboxA[:,3]-bboxA[:,1] 758 | w = bboxA[:,2]-bboxA[:,0] 759 | l = np.maximum(w, h) 760 | bboxA[:,0] = bboxA[:,0]+w*0.5-l*0.5 761 | bboxA[:,1] = bboxA[:,1]+h*0.5-l*0.5 762 | bboxA[:,2:4] = bboxA[:,0:2] + np.transpose(np.tile(l,(2,1))) 763 | return bboxA 764 | 765 | def imresample(img, sz): 766 | im_data = cv2.resize(img, (sz[1], sz[0]), interpolation=cv2.INTER_AREA) #@UndefinedVariable 767 | return im_data 768 | 769 | # This method is kept for debugging purpose 770 | # h=img.shape[0] 771 | # w=img.shape[1] 772 | # hs, ws = sz 773 | # dx = float(w) / ws 774 | # dy = float(h) / hs 775 | # im_data = np.zeros((hs,ws,3)) 776 | # for a1 in range(0,hs): 777 | # for a2 in range(0,ws): 778 | # for a3 in range(0,3): 779 | # im_data[a1,a2,a3] = img[int(floor(a1*dy)),int(floor(a2*dx)),a3] 780 | # return im_data 781 | 782 | --------------------------------------------------------------------------------