├── src
    ├── align
    │   ├── ___init__.py
    │   ├── det1.npy
    │   ├── det2.npy
    │   ├── det3.npy
    │   ├── __pycache__
    │   │   └── detect_face.cpython-36.pyc
    │   ├── align_dataset_mtcnn.py
    │   └── detect_face.py
    ├── detect_face
    │   ├── __init__.py
    │   └── face_detector.py
    ├── __pycache__
    │   ├── lfw.cpython-36.pyc
    │   └── facenet.cpython-36.pyc
    ├── data_generator.py
    ├── dataset.py
    ├── applications
    │   ├── __init__.py
    │   ├── imagenet_utils.py
    │   ├── mobilenet.py
    │   └── mobilenet_v2.py
    ├── import_pb_to_tensorboard.py
    ├── lfw.py
    ├── freeze_graph.py
    ├── build_dataset.py
    ├── utils.py
    ├── facenet_live.py
    ├── validate.py
    ├── validate_on_lfw.py
    └── facenet.py
├── requirements.txt
├── .gitignore
├── README.md
└── Forward Propagation.ipynb


/src/align/___init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/src/detect_face/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/src/align/det1.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pedroprates/mobile-face-net/HEAD/src/align/det1.npy


--------------------------------------------------------------------------------
/src/align/det2.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pedroprates/mobile-face-net/HEAD/src/align/det2.npy


--------------------------------------------------------------------------------
/src/align/det3.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pedroprates/mobile-face-net/HEAD/src/align/det3.npy


--------------------------------------------------------------------------------
/src/__pycache__/lfw.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pedroprates/mobile-face-net/HEAD/src/__pycache__/lfw.cpython-36.pyc


--------------------------------------------------------------------------------
/src/__pycache__/facenet.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pedroprates/mobile-face-net/HEAD/src/__pycache__/facenet.cpython-36.pyc


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | tensorflow
 2 | scipy
 3 | scikit-learn
 4 | opencv-python
 5 | h5py
 6 | matplotlib
 7 | Pillow
 8 | requests
 9 | psutil
10 | imageio


--------------------------------------------------------------------------------
/src/align/__pycache__/detect_face.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pedroprates/mobile-face-net/HEAD/src/align/__pycache__/detect_face.cpython-36.pyc


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | models/
 2 | .DS_Store
 3 | .ipynb_checkpoints/
 4 | face-recognition/
 5 | datasets/
 6 | .vscode/
 7 | src/detect_face/__pycache__/
 8 | src/__pycache__/
 9 | art/
10 | .idea
11 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # MobileFaceNet
2 | 
3 | This is based on my graduation thesis, where I propose the MobileFaceNet, a smaller Convolution Neural Network to perform Facial Recognition. The model was trained based on the technique [Distilling the Knowledge in a Neural Network](https://arxiv.org/abs/1503.02531) proposed by Geoffrey Hinton, and as a coarse model it was used the pretrained [FaceNet from David Sandberg](https://github.com/davidsandberg/facenet), which achieves over 98% of accuracy on the [LFW dataset](http://vis-www.cs.umass.edu/lfw/). 
4 | 
5 | *This repository is currently under development*


--------------------------------------------------------------------------------
/src/data_generator.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from scipy import misc
 3 | from keras.utils import Sequence
 4 | 
 5 | 
 6 | class TCCGenerator(Sequence):
 7 | 
 8 |     def __init__(self, image_filenames, labels, batch_size):
 9 |         self.image_filenames, self.labels = image_filenames, labels
10 |         self.batch_size = batch_size
11 | 
12 |     def __len__(self):
13 |         return np.ceil(len(self.image_filenames) / float(self.batch_size)).astype(int)
14 | 
15 |     def __getitem__(self, idx):
16 |         batch_x = self.image_filenames[idx * self.batch_size : (idx+1) * self.batch_size]
17 |         batch_y = self.labels[idx * self.batch_size:(idx+1) * self.batch_size]
18 | 
19 |         embeddings = np.array([np.load(filename) for filename in batch_y])
20 |         images = np.array([misc.imread(filename) for filename in batch_x])
21 |         images = images / 255
22 | 
23 |         return images, embeddings.reshape(embeddings.shape[0], -1)
24 | 


--------------------------------------------------------------------------------
/src/detect_face/face_detector.py:
--------------------------------------------------------------------------------
 1 | import cv2
 2 | import numpy as np
 3 | 
 4 | class FaceDetector:
 5 |     def __init__(self, extractor_path):
 6 |         self.path = extractor_path
 7 |         self.face_detector = cv2.CascadeClassifier(extractor_path)
 8 | 
 9 |     def detect_faces(self, frame, scaleFactor=1.3, minNeighbors=1, minSize=(30, 30)):
10 |         rects = self.face_detector.detectMultiScale(frame, 
11 |                                                     scaleFactor=scaleFactor, 
12 |                                                     minNeighbors=minNeighbors,
13 |                                                     minSize=minSize,
14 |                                                     flags=cv2.CASCADE_SCALE_IMAGE)
15 | 
16 |         return rects
17 | 
18 |     @staticmethod
19 |     def extract_faces(frame, rects, size=(160, 160)):
20 |         nrof_images = len(rects)
21 |         images = np.zeros((nrof_images, *size, 3), dtype=np.uint8)
22 | 
23 |         for idx, (x, y, w, h) in enumerate(rects):
24 |             h_margin = int(0.1*w)
25 |             v_margin = int(0.1*h)
26 | 
27 |             cropped_image = frame[y:y+h, x:x+w]
28 |             images[idx,:,:,:] = cv2.resize(cropped_image, size)
29 |         
30 |         return images
31 | 


--------------------------------------------------------------------------------
/src/dataset.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import numpy as np
 3 | import progressbar
 4 | 
 5 | def only_alpha(string):
 6 |     return all(not a.isdigit() for a in string)
 7 | 
 8 | def clean_name(path_name, with_number=False):
 9 |     path_name = path_name.split('/')[-1]
10 |     path_name = path_name.split('.')[0]
11 | 
12 |     if with_number:
13 |         return path_name
14 | 
15 |     list_names = path_name.split('_')
16 | 
17 |     list_names = list(filter(lambda x: only_alpha(x), list_names))
18 |     name = '_'.join(list_names)
19 |     return name
20 | 
21 | def get_names(data):
22 |     """ Return the list of unique names that compose the dataset
23 | 
24 |         :params data: The dataset to be analyzed
25 |     """
26 |     names = []
27 |     for image_path in (data):
28 |         name = clean_name(image_path)
29 | 
30 |         if name not in names:
31 |             names.append(name)
32 |         
33 |     return names
34 | 
35 | def build_dataset(data, 
36 |                   output='output',
37 |                   base_path='/Users/pedroprates/Google Drive/FaceRecognition/datasets/lfw/lfw_mtcnnpy_160'):
38 |     people = get_names(data)
39 |     embeddings = []
40 |     print('[CHECK] It has %d people on the dataset.' % len(people))
41 |     for person in progressbar.progressbar(people):
42 |         person_path = os.path.join(base_path, person)
43 |         person_path = os.path.join(person_path, output)
44 | 
45 |         faces = os.listdir(person_path)
46 |         faces = [os.path.join(person_path, f) for f in faces]
47 |         nrof_faces = len(faces)
48 | 
49 |         for idx, face in enumerate(faces):
50 |             embedding_face = np.load(face)
51 |             embedding = { 'name': clean_name(face, with_number=True),
52 |                           'embedding': embedding_face }
53 |             embeddings.append(embedding)
54 |         
55 |     embeddings_output_path = os.path.join(base_path, 'embeddings_test_mac.npy')
56 |     if os.path.exists(embeddings_output_path):
57 |         os.remove(embeddings_output_path)
58 |         
59 |     np.save(embeddings_output_path, np.array(embeddings))
60 | 
61 | def main():
62 |     X_test = np.load('/Users/pedroprates/Google Drive/FaceRecognition/datasets/lfw/xtest.npy')
63 |     print('[CHECK] Test set has %d files.' % X_test.shape[0])
64 |     print('[STARTING] Building dataset...')
65 |     build_dataset(X_test)
66 | 
67 | if __name__ == "__main__":
68 |     main()


--------------------------------------------------------------------------------
/src/applications/__init__.py:
--------------------------------------------------------------------------------
 1 | """Enables dynamic setting of underlying Keras module.
 2 | """
 3 | from __future__ import absolute_import
 4 | from __future__ import division
 5 | from __future__ import print_function
 6 | 
 7 | _KERAS_BACKEND = None
 8 | _KERAS_LAYERS = None
 9 | _KERAS_MODELS = None
10 | _KERAS_UTILS = None
11 | 
12 | 
13 | def set_keras_submodules(backend=None,
14 |                          layers=None,
15 |                          models=None,
16 |                          utils=None,
17 |                          engine=None):
18 |     # Deprecated, will be removed in the future.
19 |     global _KERAS_BACKEND
20 |     global _KERAS_LAYERS
21 |     global _KERAS_MODELS
22 |     global _KERAS_UTILS
23 |     _KERAS_BACKEND = backend
24 |     _KERAS_LAYERS = layers
25 |     _KERAS_MODELS = models
26 |     _KERAS_UTILS = utils
27 | 
28 | 
29 | def get_keras_submodule(name):
30 |     # Deprecated, will be removed in the future.
31 |     if name not in {'backend', 'layers', 'models', 'utils'}:
32 |         raise ImportError(
33 |             'Can only retrieve one of "backend", '
34 |             '"layers", "models", or "utils". '
35 |             'Requested: %s' % name)
36 |     if _KERAS_BACKEND is None:
37 |         raise ImportError('You need to first `import keras` '
38 |                           'in order to use `keras_applications`. '
39 |                           'For instance, you can do:\n\n'
40 |                           '```\n'
41 |                           'import keras\n'
42 |                           'from keras_applications import vgg16\n'
43 |                           '```\n\n'
44 |                           'Or, preferably, this equivalent formulation:\n\n'
45 |                           '```\n'
46 |                           'from keras import applications\n'
47 |                           '```\n')
48 |     if name == 'backend':
49 |         return _KERAS_BACKEND
50 |     elif name == 'layers':
51 |         return _KERAS_LAYERS
52 |     elif name == 'models':
53 |         return _KERAS_MODELS
54 |     elif name == 'utils':
55 |         return _KERAS_UTILS
56 | 
57 | 
58 | def get_submodules_from_kwargs(kwargs):
59 |     backend = kwargs.get('backend', _KERAS_BACKEND)
60 |     layers = kwargs.get('layers', _KERAS_LAYERS)
61 |     models = kwargs.get('models', _KERAS_MODELS)
62 |     utils = kwargs.get('utils', _KERAS_UTILS)
63 |     for key in kwargs.keys():
64 |         if key not in ['backend', 'layers', 'models', 'utils']:
65 |             raise TypeError('Invalid keyword argument: %s', key)
66 |     return backend, layers, models, utils
67 | 
68 | 
69 | def correct_pad(backend, inputs, kernel_size):
70 |     """Returns a tuple for zero-padding for 2D convolution with downsampling.
71 |     # Arguments
72 |         input_size: An integer or tuple/list of 2 integers.
73 |         kernel_size: An integer or tuple/list of 2 integers.
74 |     # Returns
75 |         A tuple.
76 |     """
77 |     img_dim = 2 if backend.image_data_format() == 'channels_first' else 1
78 |     input_size = backend.int_shape(inputs)[img_dim:(img_dim + 2)]
79 | 
80 |     if isinstance(kernel_size, int):
81 |         kernel_size = (kernel_size, kernel_size)
82 | 
83 |     if input_size[0] is None:
84 |         adjust = (1, 1)
85 |     else:
86 |         adjust = (1 - input_size[0] % 2, 1 - input_size[1] % 2)
87 | 
88 |     correct = (kernel_size[0] // 2, kernel_size[1] // 2)
89 | 
90 |     return ((correct[0] - adjust[0], correct[0]),
91 |             (correct[1] - adjust[1], correct[1]))
92 | 


--------------------------------------------------------------------------------
/src/import_pb_to_tensorboard.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved.
 2 | #
 3 | # Licensed under the Apache License, Version 2.0 (the "License");
 4 | # you may not use this file except in compliance with the License.
 5 | # You may obtain a copy of the License at
 6 | #
 7 | #     http://www.apache.org/licenses/LICENSE-2.0
 8 | #
 9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ================================
15 | """Imports a protobuf model as a graph in Tensorboard."""
16 | 
17 | from __future__ import absolute_import
18 | from __future__ import division
19 | from __future__ import print_function
20 | 
21 | import argparse
22 | import sys
23 | 
24 | from tensorflow.core.framework import graph_pb2
25 | from tensorflow.python.client import session
26 | from tensorflow.python.framework import importer
27 | from tensorflow.python.framework import ops
28 | from tensorflow.python.platform import app
29 | from tensorflow.python.platform import gfile
30 | from tensorflow.python.summary import summary
31 | 
32 | # Try importing TensorRT ops if available
33 | # TODO(aaroey): ideally we should import everything from contrib, but currently
34 | # tensorrt module would cause build errors when being imported in
35 | # tensorflow/contrib/__init__.py. Fix it.
36 | # pylint: disable=unused-import,g-import-not-at-top,wildcard-import
37 | try:
38 |   from tensorflow.contrib.tensorrt.ops.gen_trt_engine_op import *
39 | except ImportError:
40 |   pass
41 | # pylint: enable=unused-import,g-import-not-at-top,wildcard-import
42 | 
43 | def import_to_tensorboard(model_dir, log_dir):
44 |   """View an imported protobuf model (`.pb` file) as a graph in Tensorboard.
45 |   Args:
46 |     model_dir: The location of the protobuf (`pb`) model to visualize
47 |     log_dir: The location for the Tensorboard log to begin visualization from.
48 |   Usage:
49 |     Call this function with your model location and desired log directory.
50 |     Launch Tensorboard by pointing it to the log directory.
51 |     View your imported `.pb` model as a graph.
52 |   """
53 |   with session.Session(graph=ops.Graph()) as sess:
54 |     with gfile.FastGFile(model_dir, "rb") as f:
55 |       graph_def = graph_pb2.GraphDef()
56 |       graph_def.ParseFromString(f.read())
57 |       importer.import_graph_def(graph_def)
58 | 
59 |     pb_visual_writer = summary.FileWriter(log_dir)
60 |     pb_visual_writer.add_graph(sess.graph)
61 |     print("Model Imported. Visualize by running: "
62 |           "tensorboard --logdir={}".format(log_dir))
63 | 
64 | 
65 | def main(unused_args):
66 |   import_to_tensorboard(FLAGS.model_dir, FLAGS.log_dir)
67 | 
68 | if __name__ == "__main__":
69 |   parser = argparse.ArgumentParser()
70 |   parser.register("type", "bool", lambda v: v.lower() == "true")
71 |   parser.add_argument(
72 |       "--model_dir",
73 |       type=str,
74 |       default="",
75 |       required=True,
76 |       help="The location of the protobuf (\'pb\') model to visualize.")
77 |   parser.add_argument(
78 |       "--log_dir",
79 |       type=str,
80 |       default="",
81 |       required=True,
82 |       help="The location for the Tensorboard log to begin visualization from.")
83 |   FLAGS, unparsed = parser.parse_known_args()
84 |   app.run(main=main, argv=[sys.argv[0]] + unparsed)


--------------------------------------------------------------------------------
/src/lfw.py:
--------------------------------------------------------------------------------
 1 | """Helper for evaluation on the Labeled Faces in the Wild dataset 
 2 | """
 3 | 
 4 | # MIT License
 5 | # 
 6 | # Copyright (c) 2016 David Sandberg
 7 | # 
 8 | # Permission is hereby granted, free of charge, to any person obtaining a copy
 9 | # of this software and associated documentation files (the "Software"), to deal
10 | # in the Software without restriction, including without limitation the rights
11 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12 | # copies of the Software, and to permit persons to whom the Software is
13 | # furnished to do so, subject to the following conditions:
14 | # 
15 | # The above copyright notice and this permission notice shall be included in all
16 | # copies or substantial portions of the Software.
17 | # 
18 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24 | # SOFTWARE.
25 | 
26 | from __future__ import absolute_import
27 | from __future__ import division
28 | from __future__ import print_function
29 | 
30 | import os
31 | import numpy as np
32 | import facenet
33 | 
34 | 
35 | def evaluate(embeddings, actual_issame, nrof_folds=10, distance_metric=0, subtract_mean=False):
36 |     # Calculate evaluation metrics
37 |     thresholds = np.arange(0, 4, 0.01)
38 |     embeddings1 = embeddings[0::2]
39 |     embeddings2 = embeddings[1::2]
40 |     tpr, fpr, accuracy = facenet.calculate_roc(thresholds, embeddings1, embeddings2,
41 |         np.asarray(actual_issame), nrof_folds=nrof_folds, distance_metric=distance_metric, subtract_mean=subtract_mean)
42 |     thresholds = np.arange(0, 4, 0.001)
43 |     val, val_std, far = facenet.calculate_val(thresholds, embeddings1, embeddings2,
44 |         np.asarray(actual_issame), 1e-3, nrof_folds=nrof_folds, distance_metric=distance_metric, subtract_mean=subtract_mean)
45 |     return tpr, fpr, accuracy, val, val_std, far
46 | 
47 | 
48 | def get_paths(lfw_dir, pairs):
49 |     nrof_skipped_pairs = 0
50 |     path_list = []
51 |     issame_list = []
52 |     for pair in pairs:
53 |         if len(pair) == 3:
54 |             path0 = add_extension(os.path.join(lfw_dir, pair[0], pair[0] + '_' + '%04d' % int(pair[1])))
55 |             path1 = add_extension(os.path.join(lfw_dir, pair[0], pair[0] + '_' + '%04d' % int(pair[2])))
56 |             issame = True
57 |         elif len(pair) == 4:
58 |             path0 = add_extension(os.path.join(lfw_dir, pair[0], pair[0] + '_' + '%04d' % int(pair[1])))
59 |             path1 = add_extension(os.path.join(lfw_dir, pair[2], pair[2] + '_' + '%04d' % int(pair[3])))
60 |             issame = False
61 |         if os.path.exists(path0) and os.path.exists(path1):    # Only add the pair if both paths exist
62 |             path_list += (path0,path1)
63 |             issame_list.append(issame)
64 |         else:
65 |             nrof_skipped_pairs += 1
66 |     if nrof_skipped_pairs>0:
67 |         print('Skipped %d image pairs' % nrof_skipped_pairs)
68 |     
69 |     return path_list, issame_list
70 | 
71 | 
72 | def add_extension(path):
73 |     if os.path.exists(path+'.jpg'):
74 |         return path+'.jpg'
75 |     elif os.path.exists(path+'.png'):
76 |         return path+'.png'
77 |     else:
78 |         raise RuntimeError('No file "%s" with extension png or jpg.' % path)
79 | 
80 | 
81 | def read_pairs(pairs_filename):
82 |     pairs = []
83 |     with open(pairs_filename, 'r') as f:
84 |         for line in f.readlines()[1:]:
85 |             pair = line.strip().split()
86 |             pairs.append(pair)
87 |     return np.array(pairs)
88 | 
89 | 
90 | 
91 | 


--------------------------------------------------------------------------------
/src/freeze_graph.py:
--------------------------------------------------------------------------------
  1 | """Imports a model metagraph and checkpoint file, converts the variables to constants
  2 | and exports the model as a graphdef protobuf
  3 | """
  4 | # MIT License
  5 | # 
  6 | # Copyright (c) 2016 David Sandberg
  7 | # 
  8 | # Permission is hereby granted, free of charge, to any person obtaining a copy
  9 | # of this software and associated documentation files (the "Software"), to deal
 10 | # in the Software without restriction, including without limitation the rights
 11 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 12 | # copies of the Software, and to permit persons to whom the Software is
 13 | # furnished to do so, subject to the following conditions:
 14 | # 
 15 | # The above copyright notice and this permission notice shall be included in all
 16 | # copies or substantial portions of the Software.
 17 | # 
 18 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 19 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 20 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 21 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 22 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 23 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 24 | # SOFTWARE.
 25 | 
 26 | from __future__ import absolute_import
 27 | from __future__ import division
 28 | from __future__ import print_function
 29 | 
 30 | from tensorflow.python.framework import graph_util
 31 | import tensorflow as tf
 32 | import argparse
 33 | import os
 34 | import sys
 35 | import facenet
 36 | from six.moves import xrange  # @UnresolvedImport
 37 | 
 38 | def main(args):
 39 |     with tf.Graph().as_default():
 40 |         with tf.Session() as sess:
 41 |             # Load the model metagraph and checkpoint
 42 |             print('Model directory: %s' % args.model_dir)
 43 |             meta_file, ckpt_file = facenet.get_model_filenames(os.path.expanduser(args.model_dir))
 44 |             
 45 |             print('Metagraph file: %s' % meta_file)
 46 |             print('Checkpoint file: %s' % ckpt_file)
 47 | 
 48 |             model_dir_exp = os.path.expanduser(args.model_dir)
 49 |             saver = tf.train.import_meta_graph(os.path.join(model_dir_exp, meta_file), clear_devices=True)
 50 |             tf.get_default_session().run(tf.global_variables_initializer())
 51 |             tf.get_default_session().run(tf.local_variables_initializer())
 52 |             saver.restore(tf.get_default_session(), os.path.join(model_dir_exp, ckpt_file))
 53 |             
 54 |             # Retrieve the protobuf graph definition and fix the batch norm nodes
 55 |             input_graph_def = sess.graph.as_graph_def()
 56 |             
 57 |             # Freeze the graph def
 58 |             output_graph_def = freeze_graph_def(sess, input_graph_def, 'embeddings,label_batch')
 59 | 
 60 |         # Serialize and dump the output graph to the filesystem
 61 |         with tf.gfile.GFile(args.output_file, 'wb') as f:
 62 |             f.write(output_graph_def.SerializeToString())
 63 |         print("%d ops in the final graph: %s" % (len(output_graph_def.node), args.output_file))
 64 |         
 65 | def freeze_graph_def(sess, input_graph_def, output_node_names):
 66 |     for node in input_graph_def.node:
 67 |         if node.op == 'RefSwitch':
 68 |             node.op = 'Switch'
 69 |             for index in xrange(len(node.input)):
 70 |                 if 'moving_' in node.input[index]:
 71 |                     node.input[index] = node.input[index] + '/read'
 72 |         elif node.op == 'AssignSub':
 73 |             node.op = 'Sub'
 74 |             if 'use_locking' in node.attr: del node.attr['use_locking']
 75 |         elif node.op == 'AssignAdd':
 76 |             node.op = 'Add'
 77 |             if 'use_locking' in node.attr: del node.attr['use_locking']
 78 |     
 79 |     # Get the list of important nodes
 80 |     whitelist_names = []
 81 |     for node in input_graph_def.node:
 82 |         if (node.name.startswith('InceptionResnet') or node.name.startswith('embeddings') or 
 83 |                 node.name.startswith('image_batch') or node.name.startswith('label_batch') or
 84 |                 node.name.startswith('phase_train') or node.name.startswith('Logits')):
 85 |             whitelist_names.append(node.name)
 86 | 
 87 |     # Replace all the variables in the graph with constants of the same values
 88 |     output_graph_def = graph_util.convert_variables_to_constants(
 89 |         sess, input_graph_def, output_node_names.split(","),
 90 |         variable_names_whitelist=whitelist_names)
 91 |     return output_graph_def
 92 |   
 93 | def parse_arguments(argv):
 94 |     parser = argparse.ArgumentParser()
 95 |     
 96 |     parser.add_argument('model_dir', type=str, 
 97 |         help='Directory containing the metagraph (.meta) file and the checkpoint (ckpt) file containing model parameters')
 98 |     parser.add_argument('output_file', type=str, 
 99 |         help='Filename for the exported graphdef protobuf (.pb)')
100 |     return parser.parse_args(argv)
101 | 
102 | if __name__ == '__main__':
103 |     main(parse_arguments(sys.argv[1:]))
104 | 


--------------------------------------------------------------------------------
/src/build_dataset.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import utils
  3 | import os
  4 | import tensorflow as tf
  5 | import cv2
  6 | import numpy as np
  7 | import time
  8 | import keras
  9 | import keras.backend as K
 10 | import json
 11 | 
 12 | def main(args):
 13 |     print("[STARTING] Starting the code to create the dataset.")
 14 |     print(".\n.\n.")
 15 |     
 16 |     print("[LOADING] Loading the Convolutional Neural Network model...")
 17 |     type_mode = args["type"]
 18 |     assert type_mode in ["MobileFaceNet", "FaceNet"], "Only MobileFaceNet or FaceNet are supported."
 19 | 
 20 |     if type_mode == 'FaceNet':
 21 |         start = time.time()
 22 |         sess = tf.Session()
 23 |         utils.load_model(args["model"])
 24 | 
 25 |         images_placeholder = tf.get_default_graph().get_tensor_by_name("input:0")
 26 |         embeddings = tf.get_default_graph().get_tensor_by_name("embeddings:0")
 27 |         phase_train_placeholder = tf.get_default_graph().get_tensor_by_name("phase_train:0")
 28 |         print("[LOADING] Loading the FaceNet weights took %.2f" % (time.time() - start))
 29 |     else:
 30 |         K.clear_session()
 31 |         define_keras_functions()
 32 |         with open(args["json"]) as f:
 33 |             start = time.time()
 34 |             model_json = json.load(f)
 35 |             model = keras.models.model_from_json(model_json)
 36 |             print("[LOADING] Loadng the Weights...")
 37 |             model.load_weights(args["weights"])
 38 |             print("[LOADING] Loading the MobileFaceNet weights took %.2fs" % (time.time() - start))
 39 | 
 40 |     print("[LOADING] Checking the dataset path...")
 41 |     dataset_path = args['dataset']
 42 |     dataset_path = os.path.expanduser(dataset_path)
 43 |     assert os.path.isdir(dataset_path), "Dataset folder should be the dataset root folder."
 44 |     people = [person for person in os.listdir(dataset_path) if not person.startswith('.')]
 45 | 
 46 |     print('[RUNNING] Building the dataset!')
 47 |     times = []
 48 |     for person in people:
 49 |         print('\t[BUILD] Building ', person)
 50 |         person_path = os.path.join(dataset_path, person)
 51 |         pics = [pic for pic in os.listdir(person_path) if (pic.endswith('jpg') or pic.endswith('jpeg'))]
 52 |         nrof_pics = len(pics)
 53 |         images = np.zeros((nrof_pics, args['image'], args['image'], 3))
 54 | 
 55 |         for idx, pic in enumerate(pics):
 56 |             image = cv2.imread(os.path.join(person_path, pic))
 57 |             image = cv2.resize(image, (160, 160))
 58 |             image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
 59 |             images[idx, :, :, :] = image_rgb / 255
 60 | 
 61 |         # Recognize the images
 62 |         if type_mode == 'FaceNet':
 63 |             start_time = time.time()
 64 |             feed_dict = {images_placeholder: images, phase_train_placeholder: False}
 65 |             embeddings_array = sess.run(embeddings, feed_dict=feed_dict)
 66 |             times.append(time.time() - start_time)
 67 |         else:
 68 |             start_time = time.time()
 69 |             embeddings_array = model.predict(images)
 70 |             times.append(time.time() - start_time)
 71 | 
 72 |         output_file = os.path.join(person_path, person+'.npy')
 73 | 
 74 |         if (os.path.isfile(output_file)):
 75 |             os.remove(output_file)
 76 | 
 77 |         np.save(output_file, embeddings_array)
 78 | 
 79 | def parse_arguments():
 80 |     """ Parsing command line arguments
 81 |     """
 82 |     ap = argparse.ArgumentParser()
 83 |     ap.add_argument('-d', 
 84 |                     '--dataset', 
 85 |                     type=str,
 86 |                     required=True,
 87 |                     help='Path to the dataset root folder')
 88 |     ap.add_argument('-m',
 89 |                     '--model',
 90 |                     type=str,
 91 |                     help="Path to the CNN model")
 92 | 
 93 |     ap.add_argument('-i',
 94 |                     '--image',
 95 |                     type=int,
 96 |                     default=160,
 97 |                     help='Size of the image')
 98 | 
 99 |     ap.add_argument('-t',
100 |                     '--type',
101 |                     type=str,
102 |                     default="MobileFaceNet",
103 |                     help="Which model to use to create the embeddings")
104 |     
105 |     ap.add_argument('-j',
106 |                     '--json',
107 |                     type=str,
108 |                     default='/home/pi/Documents/TCC/face-recognition/models/mobilefacenet/model.json',
109 |                     help='Path to the JSON containing the model structure')
110 | 
111 |     ap.add_argument('-w',
112 |                     '--weights',
113 |                     type=str,
114 |                     default='/home/pi/Documents/TCC/face-recognition/models/mobilefacenet/model_weights.h5',
115 |                     help='Path to the weights of the model')
116 |     
117 |     return vars(ap.parse_args())
118 | 
119 | def define_keras_functions():
120 |     def distillation_loss(y_true, y_pred):
121 |         return K.square(y_pred - y_true)
122 | 
123 |     def max_diff(y_true, y_pred):
124 |         return K.max(K.square(y_pred - y_true), axis=-1)
125 | 
126 |     def sum_diff(y_true, y_pred):
127 |         return K.sum(K.square(y_pred - y_true), axis=-1)
128 | 
129 |     keras.losses.distillation_loss = distillation_loss
130 |     keras.metrics.max_diff = max_diff
131 |     keras.metrics.sum_diff = sum_diff
132 | 
133 | 
134 | if __name__ == "__main__":
135 |     main(parse_arguments())
136 | 


--------------------------------------------------------------------------------
/src/utils.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from tensorflow.python.platform import gfile
  3 | import tensorflow as tf
  4 | import numpy as np
  5 | import math
  6 |         
  7 | def load_model(model, input_map=None):
  8 |     """ Load model given its path. Currently only working with '.pb' saved models
  9 | 
 10 |         :param model: Path of where the model was saved
 11 |         :param input_map: Input map of the model, default to None 
 12 |     """
 13 |     model_exp = os.path.expanduser(model)
 14 |     assert os.path.isfile(model_exp), "Currently its only working with '.pb' model files. So your path should be one."
 15 | 
 16 |     print('Model filename: %s' % model_exp)
 17 |     with gfile.FastGFile(model_exp,'rb') as f:
 18 |         graph_def = tf.GraphDef()
 19 |         graph_def.ParseFromString(f.read())
 20 |         tf.import_graph_def(graph_def, input_map=input_map, name='')
 21 | 
 22 | def distance(embeddings1, embeddings2, distance_metric='euclidean'):
 23 |     """ Calculate the distance between two embeddings. Currently working with euclidean and cosine similarity. 
 24 | 
 25 |         :param embeddings1: First embedding
 26 |         :param embeddings2: Second embedding
 27 |         :param distance_metric: Distance metric to be used to make the calculation. Should be either: 'euclidean' or 'cosine'
 28 | 
 29 |         :returns: The distance between the `embeddings1` and `embeddings2`
 30 |     """
 31 |     assert distance_metric in ['euclidean', 'cosine'], "The distance metric should be either 'euclidean' or 'cosine'"
 32 | 
 33 |     if distance_metric == 'euclidean':
 34 |         diff = np.subtract(embeddings1, embeddings2)
 35 |         dist = np.sum(np.square(diff), 1)
 36 | 
 37 |     elif distance_metric == 'cosine':
 38 |         dot = np.sum(np.multiply(embeddings1, embeddings2), axis=1)
 39 |         norm = np.linalg.norm(embeddings1, axis=1) * np.linalg.norm(embeddings2, axis=1)
 40 |         similarity = dot / norm
 41 |         dist = np.arccos(similarity) / math.pi
 42 | 
 43 |     return dist
 44 | 
 45 | def build_dataset(path):
 46 |     """ Building a dataset given the path of the source folder. 
 47 |         The source folder should be structured as described on the github wiki.
 48 | 
 49 |         :param path: The path of the source folder
 50 | 
 51 |         :returns: Three dictionaries - One with the encodings, and two others mapping names to indexes.
 52 |     """ 
 53 |     dataset = {}
 54 | 
 55 |     people = os.listdir(path)
 56 |     for person in people:
 57 |         if person.startswith('.'):
 58 |             continue
 59 | 
 60 |         embs = np.load(path + '/' + person + '/' + person + '.npy')
 61 |         dataset[person] = embs
 62 | 
 63 |     names = iter(dataset.keys())
 64 |     idxs = iter(np.arange(len(dataset)))
 65 | 
 66 |     names_to_idx = dict(zip(names, idxs))
 67 |     idx_to_names = dict([x, v] for v, x in names_to_idx.items())
 68 | 
 69 |     return dataset, names_to_idx, idx_to_names
 70 | 
 71 | def get_image(dataset, name, chosen_n=-1):
 72 |     """ Given a dataset, get the chosen image in a person base. If the image index equals -1, returns a random image from the person.
 73 | 
 74 |         :params dataset: Dataset with known faces
 75 |         :params name: Name of the known person which the image should be returned
 76 |         :params chosen_n: The index of image from the given person. If equals to -1, returns a random image from that person.
 77 | 
 78 |         :returns: Embeddings from the face image of that given person
 79 |     """
 80 |     assert name in dataset.keys(), "Name not found. Make sure that your name is present on your dataset."
 81 |     
 82 |     if chosen_n == -1:
 83 |         nrof_faces = dataset[name].shape[0]
 84 |         chosen_n = np.random.randint(nrof_faces)
 85 | 
 86 |     chosen = dataset[name][chosen_n]
 87 |     return chosen.reshape((1, *chosen.shape))
 88 | 
 89 | 
 90 | def predict_face(dataset, name_to_idx, idx_to_name, face, threshold=.1, distance_metric='euclidean'):
 91 |     """ Given the embeddings of a face and the dataset of known embeddings, predict if the person is present on our dataset or not.
 92 | 
 93 |         :params dataset: Dataset with the known faces and their names
 94 |         :params name_to_idx: Dictionary with the mapping name to idx
 95 |         :params idx_to =_name: Dictionary with the mapping idx to name
 96 |         :params face: Array with the embeddings of a face
 97 |         :params threshold: Minimum acceptable distance between a known face and the face, if there aren't any known
 98 |                         faces that fulfill this requirement, it will be predicted as "Unknown"
 99 |         :params distance_metric: Distance metric to be used to make the calculation. Should be either 'euclidean' or 'cosine'
100 | 
101 |         :returns: Name of the person, if present on the dataset, or "Unknown" if it does not meet the requirements
102 |     """
103 |     distances = np.zeros(len(dataset))
104 | 
105 |     for person in dataset.keys():
106 |         nrof_images = len(dataset[person])
107 |         for image in range(nrof_images):
108 |             known_face = get_image(dataset, person, image)
109 |             d = distance(face, known_face, distance_metric=distance_metric)
110 | 
111 |             if distances[name_to_idx[person]] == 0:
112 |                 distances[name_to_idx[person]] = d
113 |             elif distances[name_to_idx[person]] > d:
114 |                 distances[name_to_idx[person]] = d
115 | 
116 |     idx_min = distances.argmin()
117 |     if distances[idx_min] > threshold:
118 |         return 'Unknown'
119 |     print(idx_to_name)
120 |     print(distances)
121 | 
122 |     return idx_to_name[idx_min].replace('_', ' ')


--------------------------------------------------------------------------------
/src/facenet_live.py:
--------------------------------------------------------------------------------
  1 | import cv2
  2 | import imutils
  3 | import argparse
  4 | from imutils.video import VideoStream, FPS
  5 | import time
  6 | from detect_face.face_detector import FaceDetector
  7 | import tensorflow as tf
  8 | import utils
  9 | import keras
 10 | import keras.backend as K
 11 | import json
 12 | 
 13 | def main(args):
 14 |     
 15 |     print("[STARTING] Facenet ResNet v1 for Facial Recognition")
 16 |     print(".\n.\n.")
 17 |     print("[LOADING] Loading face detector...")
 18 |     detector = FaceDetector(args["cascade"])
 19 |     
 20 |     print("[LOADING] Loading the faces dataset...")
 21 |     dataset, name_to_idx, idx_to_name = utils.build_dataset(args["dataset"])
 22 | 
 23 |     print("[LOADING] Loading the Convolutional Neural Network model...")
 24 |     type_mode = args["type"]
 25 |     use_pi = args['run'] == 'raspberry'
 26 |     assert type_mode in ["MobileFaceNet", "FaceNet"], "Only MobileFaceNet or FaceNet are supported."
 27 | 
 28 |     if type_mode == 'FaceNet':
 29 |         start = time.time()
 30 |         sess = tf.Session()
 31 |         utils.load_model(args["model"])
 32 | 
 33 |         images_placeholder = tf.get_default_graph().get_tensor_by_name("input:0")
 34 |         embeddings = tf.get_default_graph().get_tensor_by_name("embeddings:0")
 35 |         phase_train_placeholder = tf.get_default_graph().get_tensor_by_name("phase_train:0")
 36 |         print("[LOADING] Loading the FaceNet weights took %.2f" % (time.time() - start))
 37 |     else:
 38 |         K.clear_session()
 39 |         define_keras_functions()
 40 |         with open(args["json"]) as f:
 41 |             start = time.time()
 42 |             model_json = json.load(f)
 43 |             model = keras.models.model_from_json(model_json)
 44 |             print("[LOADING] Loadng the Weights...")
 45 |             model.load_weights(args["weights"])
 46 |             print("[LOADING] Loading the MobileFaceNet weights took %.2fs" % (time.time() - start))
 47 | 
 48 |     print("[LOADING] Starting the video stream...")
 49 |     if use_pi:
 50 |         vs = VideoStream(usePiCamera=True).start()
 51 |     else:
 52 |         vs = VideoStream(src=0).start()
 53 |     time.sleep(2.0)
 54 |     fps = FPS().start()
 55 |     times = []
 56 | 
 57 |     while True:
 58 |         frame = vs.read()
 59 |         frame = imutils.resize(frame, width=500) # Width of the frame is configurable
 60 |         
 61 |         gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
 62 |         rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
 63 | 
 64 |         # Detect faces on the frame
 65 |         rects = detector.detect_faces(gray)
 66 |         nrof_faces = len(rects)
 67 |         if nrof_faces > 0:
 68 |             face_images = detector.extract_faces(rgb, rects)
 69 |             face_images = face_images / 255
 70 |             # Recognize the images
 71 |             if type_mode == 'FaceNet':
 72 |                 start_time = time.time()
 73 |                 feed_dict = {images_placeholder: face_images, phase_train_placeholder: False}
 74 |                 embeddings_array = sess.run(embeddings, feed_dict=feed_dict)
 75 |                 times.append(time.time() - start_time)
 76 |             else:
 77 |                 start_time = time.time()
 78 |                 embeddings_array = model.predict(face_images)
 79 |                 times.append(time.time() - start_time)
 80 | 
 81 |             for idx, embedding in enumerate(embeddings_array):
 82 |                 embedding = embedding.reshape((1, *embedding.shape))
 83 |                 predicted = utils.predict_face(dataset,
 84 |                                                name_to_idx,
 85 |                                                idx_to_name,
 86 |                                                embedding,
 87 |                                                threshold=3,
 88 |                                                distance_metric='cosine')
 89 |                 x, y, w, h = rects[idx]
 90 |                 color = (0, 0, 255) if predicted == "Unknown" else (0, 255, 0)
 91 |                 cv2.rectangle(frame, (x, y+h), (x+w, y), color, 2)
 92 |                 top = y+h-15 if y+h-15 > 15 else y+h+15
 93 |                 cv2.putText(frame, predicted, (x, top), cv2.FONT_HERSHEY_SIMPLEX, 0.75, color, 2)
 94 | 
 95 |         # Display the image
 96 |         cv2.imshow("Frame", frame)
 97 | 
 98 |         key = cv2.waitKey(1) & 0xFF
 99 |         if key == ord('q'):
100 |             break
101 | 
102 |         fps.update()
103 | 
104 |     fps.stop()
105 |     print("[INFO] elapsed time: {:.2f}".format(fps.elapsed()))
106 |     print("[INFO] approximated FPS: {:.2f}fps".format(fps.fps()))
107 |     print("[INFO] approximated forward propagation time: {:.2f}s".format(sum(times)/len(times)))
108 | 
109 |     cv2.destroyAllWindows()
110 |     vs.stop()
111 | 
112 | 
113 | def parse_arguments():
114 |     """ Parsing arguments to run variables to the main
115 |     """
116 |     parser = argparse.ArgumentParser()
117 |     
118 |     parser.add_argument("-c",
119 |                         "--cascade",
120 |                         type=str,
121 |                         default="/home/pi/Documents/TCC/face-recognition/models/haarcascade/haarcascade_frontalface_default.xml",
122 |                         help="Path to the face cascade config files")
123 |     parser.add_argument("-d",
124 |                         "--dataset",
125 |                         type=str,
126 |                         default="../datasets/tcc",
127 |                         help="Path datasets source folder")
128 | 
129 |     parser.add_argument("-m",
130 |                         "--model",
131 |                         type=str,
132 |                         default="/home/pi/Documents/TCC/face-recognition/models/facenet/20180402-114759.pb",
133 |                         help="Path to the CNN model")
134 | 
135 |     parser.add_argument("-t",
136 |                         "--type",
137 |                         type=str,
138 |                         default="MobileFaceNet",
139 |                         help="CNN architecture to be used")
140 | 
141 |     parser.add_argument("-j",
142 |                         "--json",
143 |                         type=str,
144 |                         default="/home/pi/Documents/TCC/face-recognition/models/mobilefacenet/model.json",
145 |                         help="Path to the JSON file")
146 |     
147 | 
148 |     parser.add_argument("-w",
149 |                         "--weights",
150 |                         type=str,
151 |                         default="/home/pi/Documents/TCC/face-recognition/models/mobilefacenet/model_weights.h5",
152 |                         help="Path to the weights")
153 | 
154 |     parser.add_argument("-r",
155 |                         "--run",
156 |                         type=str,
157 |                         default="raspberry",
158 |                         help="Where to run, either Raspberry or PC")
159 |     return vars(parser.parse_args())
160 | 
161 | 
162 | def define_keras_functions():
163 |     def distillation_loss(y_true, y_pred):
164 |         return K.square(y_pred - y_true)
165 | 
166 |     def max_diff(y_true, y_pred):
167 |         return K.max(K.square(y_pred - y_true), axis=-1)
168 | 
169 |     def sum_diff(y_true, y_pred):
170 |         return K.sum(K.square(y_pred - y_true), axis=-1)
171 | 
172 |     keras.losses.distillation_loss = distillation_loss
173 |     keras.metrics.max_diff = max_diff
174 |     keras.metrics.sum_diff = sum_diff
175 | 
176 | 
177 | if __name__ == "__main__":
178 |     main(parse_arguments())
179 | 


--------------------------------------------------------------------------------
/src/validate.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import os
  3 | import math
  4 | from sklearn.model_selection import KFold
  5 | 
  6 | 
  7 | def read_pairs(path):
  8 |     pairs = []
  9 |     with open(path, 'r') as f:
 10 |         for line in f.readlines()[1:]:
 11 |             pair = line.strip().split()
 12 |             pairs.append(pair)
 13 | 
 14 |     return np.array(pairs)
 15 | 
 16 | 
 17 | def create_path(lfw_dir, pair, output):
 18 |     if len(pair) == 3:
 19 |         # TRUE
 20 |         path0 = os.path.join(lfw_dir, pair[0], output, pair[0] + '_' + '%04d' % int(pair[1])) + '.npy'
 21 |         path1 = os.path.join(lfw_dir, pair[0], output, pair[0] + '_' + '%04d' % int(pair[2])) + '.npy'
 22 |         is_same = True
 23 | 
 24 |     elif len(pair) == 4:
 25 |         # FALSE
 26 |         path0 = os.path.join(lfw_dir, pair[0], output, pair[0] + '_' + '%04d' % int(pair[1])) + '.npy'
 27 |         path1 = os.path.join(lfw_dir, pair[2], output, pair[2] + '_' + '%04d' % int(pair[3])) + '.npy'
 28 |         is_same = False
 29 | 
 30 |     else:
 31 |         raise RuntimeError('Error while reading the pair images. It was expected 3 or 4 elements per line\ '
 32 |                            'but it was found %d elements.' % len(pair))
 33 | 
 34 |     return path0, path1, is_same
 35 | 
 36 | 
 37 | def get_paths(lfw_dir, pairs, output):
 38 |     nrof_skipped_pairs = 0
 39 |     path_list = []
 40 |     is_same_list = []
 41 | 
 42 |     for pair in pairs:
 43 |         path0, path1, is_same = create_path(lfw_dir, pair, output)
 44 | 
 45 |         if os.path.exists(path0) and os.path.exists(path1):
 46 |             path_list += (path0, path1)
 47 |             is_same_list.append(is_same)
 48 |         else:
 49 |             nrof_skipped_pairs += 1
 50 | 
 51 |     if nrof_skipped_pairs > 0:
 52 |         print("%d pairs couldn't be read." % nrof_skipped_pairs)
 53 | 
 54 |     return path_list, is_same_list
 55 | 
 56 | 
 57 | def distance(embeddings1, embeddings2, distance_metric='euclidean'):
 58 |     """ Calculate the distance between two embeddings. Currently working with euclidean and cosine similarity.
 59 | 
 60 |         :param embeddings1: First embedding
 61 |         :param embeddings2: Second embedding
 62 |         :param distance_metric: Distance metric to be used to make the calculation. Should be either: 'euclidean' or 'cosine'
 63 | 
 64 |         :returns: The distance between the `embeddings1` and `embeddings2`
 65 |     """
 66 |     assert distance_metric in ['euclidean', 'cosine'], "The distance metric should be either 'euclidean' or 'cosine'"
 67 | 
 68 |     if distance_metric == 'euclidean':
 69 |         diff = np.subtract(embeddings1, embeddings2)
 70 |         dist = np.sum(np.square(diff), 1)
 71 | 
 72 |     elif distance_metric == 'cosine':
 73 |         dot = np.sum(np.multiply(embeddings1, embeddings2), axis=1)
 74 |         norm = np.linalg.norm(embeddings1, axis=1) * np.linalg.norm(embeddings2, axis=1)
 75 |         similarity = dot / norm
 76 |         dist = np.arccos(similarity) / math.pi
 77 | 
 78 |     else:
 79 |         raise RuntimeError("Distance metric not found %s. It should be either 'cosine' or 'euclidean'" % distance_metric)
 80 | 
 81 |     return dist
 82 | 
 83 | 
 84 | def load_embeddings(paths):
 85 |     nrof_skips = 0
 86 |     bt_size = len(paths)
 87 |     embeddings = np.zeros((bt_size, 512))
 88 | 
 89 |     for i, path in enumerate(paths):
 90 |         if not os.path.exists(path):
 91 |             nrof_skips += 1
 92 |             continue
 93 | 
 94 |         emb = np.load(path)
 95 |         embeddings[i, :] = emb
 96 | 
 97 |     if nrof_skips > 0:
 98 |         print("There was %d skips when trying to read the embeddings.")
 99 | 
100 |     return embeddings
101 | 
102 | 
103 | def calculate_accuracy(threshold, dist, actual_issame):
104 |     predict_issame = np.less(dist, threshold)
105 |     tp = np.sum(np.logical_and(predict_issame, actual_issame))
106 |     fp = np.sum(np.logical_and(predict_issame, np.logical_not(actual_issame)))
107 |     tn = np.sum(np.logical_and(np.logical_not(predict_issame), np.logical_not(actual_issame)))
108 |     fn = np.sum(np.logical_and(np.logical_not(predict_issame), actual_issame))
109 | 
110 |     tpr = 0 if tp + fn == 0 else float(tp) / (tp + fn)
111 |     fpr = 0 if fp + tn == 0 else float(fp) / (fp + tn)
112 |     acc = float(tp + tn) / dist.size
113 | 
114 |     return tpr, fpr, acc
115 | 
116 | 
117 | def calculate_roc(thresholds,
118 |                   embeddings1,
119 |                   embeddings2,
120 |                   actual_issame,
121 |                   distance_metric='cosine',
122 |                   subtract_mean=True,
123 |                   nrof_folds=10):
124 | 
125 |     assert embeddings1.shape[0] == embeddings2.shape[0]
126 |     assert embeddings1.shape[1] == embeddings2.shape[1]
127 | 
128 |     kfolds = KFold(n_splits=nrof_folds, shuffle=False)
129 | 
130 |     nrof_pairs = min(len(actual_issame), embeddings1.shape[0])
131 |     nrof_thresholds = len(thresholds)
132 | 
133 |     tprs = np.zeros((nrof_folds, nrof_thresholds))
134 |     fprs = np.zeros((nrof_folds, nrof_thresholds))
135 |     accuracy = np.zeros(nrof_folds)
136 | 
137 |     indices = np.arange(nrof_pairs)
138 | 
139 |     for fold_idx, (train_set, test_set) in enumerate(kfolds.split(indices)):
140 |         if subtract_mean:
141 |             mean = np.mean(np.concatenate([embeddings1[train_set], embeddings2[train_set]], axis=0))
142 |         else:
143 |             mean = 0
144 | 
145 |         dist = distance(embeddings1-mean, embeddings2-mean, distance_metric)
146 |         acc_train = np.zeros(nrof_thresholds)
147 | 
148 |         for threshold_idx, threshold in enumerate(thresholds):
149 |             _, _, acc_train[threshold_idx] = calculate_accuracy(threshold, dist[train_set], actual_issame[train_set])
150 |         best_threshold_idx = np.argmax(acc_train)
151 | 
152 |         for threshold_idx, threshold in enumerate(thresholds):
153 |             tprs[fold_idx, threshold_idx], fprs[fold_idx, threshold_idx], _ = calculate_accuracy(threshold,
154 |                                                                                                  dist[test_set],
155 |                                                                                                  actual_issame[test_set])
156 |         _, _, accuracy[fold_idx] = calculate_accuracy(thresholds[best_threshold_idx],
157 |                                                       dist[test_set],
158 |                                                       actual_issame[test_set])
159 | 
160 |         tpr = np.mean(tprs, 0)
161 |         fpr = np.mean(fprs, 0)
162 | 
163 |     return tpr, fpr, accuracy
164 | 
165 | 
166 | def evaluate(embeddings, actual_issame, distance_metric='cosine', subtract_mean=False):
167 |     thresholds = np.arange(0, 4, 0.01)
168 |     embeddings1 = embeddings[0::2]
169 |     embeddings2 = embeddings[1::2]
170 |     tpr, fpr, acc = calculate_roc(thresholds,
171 |                                   embeddings1,
172 |                                   embeddings2,
173 |                                   np.array(actual_issame),
174 |                                   distance_metric,
175 |                                   subtract_mean)
176 | 
177 |     return tpr, fpr, acc
178 | 
179 | 
180 | def main():
181 |     pairs_path = '/Users/pedroprates/Google Drive/FaceRecognition/data/pairs.txt'
182 |     lfw_path = '/Users/pedroprates/Google Drive/FaceRecognition/datasets/lfw/lfw_mtcnnpy_160'
183 | 
184 |     pairs = read_pairs(pairs_path)
185 | 
186 |     path_list, actual_issame = get_paths(lfw_path, pairs, 'output')
187 |     embeddings = load_embeddings(path_list)
188 |     tpr, fpr, acc = evaluate(embeddings, actual_issame, subtract_mean=True)
189 | 
190 |     print("TPR: %.2f" % tpr)
191 |     print("FPR: %.2f" % fpr)
192 |     print("Accuracy: %.2f" % acc)
193 | 
194 | 
195 | if __name__ == '__main__':
196 |     main()


--------------------------------------------------------------------------------
/src/align/align_dataset_mtcnn.py:
--------------------------------------------------------------------------------
  1 | """Performs face alignment and stores face thumbnails in the output directory."""
  2 | # MIT License
  3 | # 
  4 | # Copyright (c) 2016 David Sandberg
  5 | # 
  6 | # Permission is hereby granted, free of charge, to any person obtaining a copy
  7 | # of this software and associated documentation files (the "Software"), to deal
  8 | # in the Software without restriction, including without limitation the rights
  9 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 10 | # copies of the Software, and to permit persons to whom the Software is
 11 | # furnished to do so, subject to the following conditions:
 12 | # 
 13 | # The above copyright notice and this permission notice shall be included in all
 14 | # copies or substantial portions of the Software.
 15 | # 
 16 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 17 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 18 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 19 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 20 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 21 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 22 | # SOFTWARE.
 23 | 
 24 | from __future__ import absolute_import
 25 | from __future__ import division
 26 | from __future__ import print_function
 27 | 
 28 | from scipy import misc
 29 | import sys
 30 | import os
 31 | import argparse
 32 | import tensorflow as tf
 33 | import numpy as np
 34 | import facenet
 35 | import align.detect_face
 36 | import random
 37 | from time import sleep
 38 | 
 39 | def main(args):
 40 |     sleep(random.random())
 41 |     output_dir = os.path.expanduser(args.output_dir)
 42 |     if not os.path.exists(output_dir):
 43 |         os.makedirs(output_dir)
 44 |     # Store some git revision info in a text file in the log directory
 45 |     src_path,_ = os.path.split(os.path.realpath(__file__))
 46 |     facenet.store_revision_info(src_path, output_dir, ' '.join(sys.argv))
 47 |     dataset = facenet.get_dataset(args.input_dir)
 48 |     
 49 |     print('Creating networks and loading parameters')
 50 |     
 51 |     with tf.Graph().as_default():
 52 |         gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=args.gpu_memory_fraction)
 53 |         sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False))
 54 |         with sess.as_default():
 55 |             pnet, rnet, onet = align.detect_face.create_mtcnn(sess, None)
 56 |     
 57 |     minsize = 20 # minimum size of face
 58 |     threshold = [ 0.6, 0.7, 0.7 ]  # three steps's threshold
 59 |     factor = 0.709 # scale factor
 60 | 
 61 |     # Add a random key to the filename to allow alignment using multiple processes
 62 |     random_key = np.random.randint(0, high=99999)
 63 |     bounding_boxes_filename = os.path.join(output_dir, 'bounding_boxes_%05d.txt' % random_key)
 64 |     
 65 |     with open(bounding_boxes_filename, "w") as text_file:
 66 |         nrof_images_total = 0
 67 |         nrof_successfully_aligned = 0
 68 |         if args.random_order:
 69 |             random.shuffle(dataset)
 70 |         for cls in dataset:
 71 |             output_class_dir = os.path.join(output_dir, cls.name)
 72 |             if not os.path.exists(output_class_dir):
 73 |                 os.makedirs(output_class_dir)
 74 |                 if args.random_order:
 75 |                     random.shuffle(cls.image_paths)
 76 |             for image_path in cls.image_paths:
 77 |                 nrof_images_total += 1
 78 |                 filename = os.path.splitext(os.path.split(image_path)[1])[0]
 79 |                 output_filename = os.path.join(output_class_dir, filename+'.png')
 80 |                 print(image_path)
 81 |                 if not os.path.exists(output_filename):
 82 |                     try:
 83 |                         img = misc.imread(image_path)
 84 |                     except (IOError, ValueError, IndexError) as e:
 85 |                         errorMessage = '{}: {}'.format(image_path, e)
 86 |                         print(errorMessage)
 87 |                     else:
 88 |                         if img.ndim<2:
 89 |                             print('Unable to align "%s"' % image_path)
 90 |                             text_file.write('%s\n' % (output_filename))
 91 |                             continue
 92 |                         if img.ndim == 2:
 93 |                             img = facenet.to_rgb(img)
 94 |                         img = img[:,:,0:3]
 95 |     
 96 |                         bounding_boxes, _ = align.detect_face.detect_face(img, minsize, pnet, rnet, onet, threshold, factor)
 97 |                         nrof_faces = bounding_boxes.shape[0]
 98 |                         if nrof_faces>0:
 99 |                             det = bounding_boxes[:,0:4]
100 |                             det_arr = []
101 |                             img_size = np.asarray(img.shape)[0:2]
102 |                             if nrof_faces>1:
103 |                                 if args.detect_multiple_faces:
104 |                                     for i in range(nrof_faces):
105 |                                         det_arr.append(np.squeeze(det[i]))
106 |                                 else:
107 |                                     bounding_box_size = (det[:,2]-det[:,0])*(det[:,3]-det[:,1])
108 |                                     img_center = img_size / 2
109 |                                     offsets = np.vstack([ (det[:,0]+det[:,2])/2-img_center[1], (det[:,1]+det[:,3])/2-img_center[0] ])
110 |                                     offset_dist_squared = np.sum(np.power(offsets,2.0),0)
111 |                                     index = np.argmax(bounding_box_size-offset_dist_squared*2.0) # some extra weight on the centering
112 |                                     det_arr.append(det[index,:])
113 |                             else:
114 |                                 det_arr.append(np.squeeze(det))
115 | 
116 |                             for i, det in enumerate(det_arr):
117 |                                 det = np.squeeze(det)
118 |                                 bb = np.zeros(4, dtype=np.int32)
119 |                                 bb[0] = np.maximum(det[0]-args.margin/2, 0)
120 |                                 bb[1] = np.maximum(det[1]-args.margin/2, 0)
121 |                                 bb[2] = np.minimum(det[2]+args.margin/2, img_size[1])
122 |                                 bb[3] = np.minimum(det[3]+args.margin/2, img_size[0])
123 |                                 cropped = img[bb[1]:bb[3],bb[0]:bb[2],:]
124 |                                 scaled = misc.imresize(cropped, (args.image_size, args.image_size), interp='bilinear')
125 |                                 nrof_successfully_aligned += 1
126 |                                 filename_base, file_extension = os.path.splitext(output_filename)
127 |                                 if args.detect_multiple_faces:
128 |                                     output_filename_n = "{}_{}{}".format(filename_base, i, file_extension)
129 |                                 else:
130 |                                     output_filename_n = "{}{}".format(filename_base, file_extension)
131 |                                 misc.imsave(output_filename_n, scaled)
132 |                                 text_file.write('%s %d %d %d %d\n' % (output_filename_n, bb[0], bb[1], bb[2], bb[3]))
133 |                         else:
134 |                             print('Unable to align "%s"' % image_path)
135 |                             text_file.write('%s\n' % (output_filename))
136 |                             
137 |     print('Total number of images: %d' % nrof_images_total)
138 |     print('Number of successfully aligned images: %d' % nrof_successfully_aligned)
139 |             
140 | 
141 | def parse_arguments(argv):
142 |     parser = argparse.ArgumentParser()
143 |     
144 |     parser.add_argument('input_dir', type=str, help='Directory with unaligned images.')
145 |     parser.add_argument('output_dir', type=str, help='Directory with aligned face thumbnails.')
146 |     parser.add_argument('--image_size', type=int,
147 |         help='Image size (height, width) in pixels.', default=182)
148 |     parser.add_argument('--margin', type=int,
149 |         help='Margin for the crop around the bounding box (height, width) in pixels.', default=44)
150 |     parser.add_argument('--random_order', 
151 |         help='Shuffles the order of images to enable alignment using multiple processes.', action='store_true')
152 |     parser.add_argument('--gpu_memory_fraction', type=float,
153 |         help='Upper bound on the amount of GPU memory that will be used by the process.', default=1.0)
154 |     parser.add_argument('--detect_multiple_faces', type=bool,
155 |                         help='Detect and align multiple faces per image.', default=False)
156 |     return parser.parse_args(argv)
157 | 
158 | if __name__ == '__main__':
159 |     main(parse_arguments(sys.argv[1:]))
160 | 


--------------------------------------------------------------------------------
/src/validate_on_lfw.py:
--------------------------------------------------------------------------------
  1 | """Validate a face recognizer on the "Labeled Faces in the Wild" dataset (http://vis-www.cs.umass.edu/lfw/).
  2 | Embeddings are calculated using the pairs from http://vis-www.cs.umass.edu/lfw/pairs.txt and the ROC curve
  3 | is calculated and plotted. Both the model metagraph and the model parameters need to exist
  4 | in the same directory, and the metagraph should have the extension '.meta'.
  5 | """
  6 | # MIT License
  7 | # 
  8 | # Copyright (c) 2016 David Sandberg
  9 | # 
 10 | # Permission is hereby granted, free of charge, to any person obtaining a copy
 11 | # of this software and associated documentation files (the "Software"), to deal
 12 | # in the Software without restriction, including without limitation the rights
 13 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 14 | # copies of the Software, and to permit persons to whom the Software is
 15 | # furnished to do so, subject to the following conditions:
 16 | # 
 17 | # The above copyright notice and this permission notice shall be included in all
 18 | # copies or substantial portions of the Software.
 19 | # 
 20 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 21 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 22 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 23 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 24 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 25 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 26 | # SOFTWARE.
 27 | 
 28 | from __future__ import absolute_import
 29 | from __future__ import division
 30 | from __future__ import print_function
 31 | 
 32 | import tensorflow as tf
 33 | import numpy as np
 34 | import argparse
 35 | import facenet
 36 | import lfw
 37 | import os
 38 | import sys
 39 | from tensorflow.python.ops import data_flow_ops
 40 | from sklearn import metrics
 41 | from scipy.optimize import brentq
 42 | from scipy import interpolate
 43 | 
 44 | 
 45 | def main(args):
 46 |   
 47 |     with tf.Graph().as_default():
 48 |       
 49 |         with tf.Session() as sess:
 50 |             
 51 |             # Read the file containing the pairs used for testing
 52 |             pairs = lfw.read_pairs(os.path.expanduser(args.lfw_pairs))
 53 | 
 54 |             # Get the paths for the corresponding images
 55 |             paths, actual_issame = lfw.get_paths(os.path.expanduser(args.lfw_dir), pairs)
 56 |             
 57 |             image_paths_placeholder = tf.placeholder(tf.string, shape=(None,1), name='image_paths')
 58 |             labels_placeholder = tf.placeholder(tf.int32, shape=(None,1), name='labels')
 59 |             batch_size_placeholder = tf.placeholder(tf.int32, name='batch_size')
 60 |             control_placeholder = tf.placeholder(tf.int32, shape=(None,1), name='control')
 61 |             phase_train_placeholder = tf.placeholder(tf.bool, name='phase_train')
 62 |  
 63 |             nrof_preprocess_threads = 4
 64 |             image_size = (args.image_size, args.image_size)
 65 |             eval_input_queue = data_flow_ops.FIFOQueue(capacity=2000000,
 66 |                                         dtypes=[tf.string, tf.int32, tf.int32],
 67 |                                         shapes=[(1,), (1,), (1,)],
 68 |                                         shared_name=None, name=None)
 69 |             eval_enqueue_op = eval_input_queue.enqueue_many([image_paths_placeholder, labels_placeholder, control_placeholder], name='eval_enqueue_op')
 70 |             image_batch, label_batch = facenet.create_input_pipeline(eval_input_queue, image_size, nrof_preprocess_threads, batch_size_placeholder)
 71 |      
 72 |             # Load the model
 73 |             input_map = {'image_batch': image_batch, 'label_batch': label_batch, 'phase_train': phase_train_placeholder}
 74 |             facenet.load_model(args.model, input_map=input_map)
 75 | 
 76 |             # Get output tensor
 77 |             embeddings = tf.get_default_graph().get_tensor_by_name("embeddings:0")
 78 |               
 79 |             coord = tf.train.Coordinator()
 80 |             tf.train.start_queue_runners(coord=coord, sess=sess)
 81 | 
 82 |             evaluate(sess, eval_enqueue_op, image_paths_placeholder, labels_placeholder, phase_train_placeholder, batch_size_placeholder, control_placeholder,
 83 |                 embeddings, label_batch, paths, actual_issame, args.lfw_batch_size, args.lfw_nrof_folds, args.distance_metric, args.subtract_mean,
 84 |                 args.use_flipped_images, args.use_fixed_image_standardization)
 85 | 
 86 |               
 87 | def evaluate(sess, enqueue_op, image_paths_placeholder, labels_placeholder, phase_train_placeholder, batch_size_placeholder, control_placeholder,
 88 |         embeddings, labels, image_paths, actual_issame, batch_size, nrof_folds, distance_metric, subtract_mean, use_flipped_images, use_fixed_image_standardization):
 89 |     # Run forward pass to calculate embeddings
 90 |     print('Running forward pass on LFW images')
 91 |     
 92 |     # Enqueue one epoch of image paths and labels
 93 |     nrof_embeddings = len(actual_issame)*2  # nrof_pairs * nrof_images_per_pair
 94 |     nrof_flips = 2 if use_flipped_images else 1
 95 |     nrof_images = nrof_embeddings * nrof_flips
 96 |     labels_array = np.expand_dims(np.arange(0,nrof_images),1)
 97 |     image_paths_array = np.expand_dims(np.repeat(np.array(image_paths),nrof_flips),1)
 98 |     control_array = np.zeros_like(labels_array, np.int32)
 99 |     if use_fixed_image_standardization:
100 |         control_array += np.ones_like(labels_array)*facenet.FIXED_STANDARDIZATION
101 |     if use_flipped_images:
102 |         # Flip every second image
103 |         control_array += (labels_array % 2)*facenet.FLIP
104 |     sess.run(enqueue_op, {image_paths_placeholder: image_paths_array, labels_placeholder: labels_array, control_placeholder: control_array})
105 |     
106 |     embedding_size = int(embeddings.get_shape()[1])
107 |     assert nrof_images % batch_size == 0, 'The number of LFW images must be an integer multiple of the LFW batch size'
108 |     nrof_batches = nrof_images // batch_size
109 |     emb_array = np.zeros((nrof_images, embedding_size))
110 |     lab_array = np.zeros((nrof_images,))
111 |     for i in range(nrof_batches):
112 |         feed_dict = {phase_train_placeholder:False, batch_size_placeholder:batch_size}
113 |         emb, lab = sess.run([embeddings, labels], feed_dict=feed_dict)
114 |         lab_array[lab] = lab
115 |         emb_array[lab, :] = emb
116 |         if i % 10 == 9:
117 |             print('.', end='')
118 |             sys.stdout.flush()
119 |     print('')
120 |     embeddings = np.zeros((nrof_embeddings, embedding_size*nrof_flips))
121 |     if use_flipped_images:
122 |         # Concatenate embeddings for flipped and non flipped version of the images
123 |         embeddings[:,:embedding_size] = emb_array[0::2,:]
124 |         embeddings[:,embedding_size:] = emb_array[1::2,:]
125 |     else:
126 |         embeddings = emb_array
127 | 
128 |     np.save('/Users/pedroprates/Google Drive/FaceRecognition/datasets/all_lfw.npy', embeddings)
129 | 
130 |     assert np.array_equal(lab_array, np.arange(nrof_images))==True, 'Wrong labels used for evaluation, ' \
131 |                                                                     'possibly caused by training examples left ' \
132 |                                                                     'in the input pipeline'
133 |     tpr, fpr, accuracy, val, val_std, far = lfw.evaluate(embeddings,
134 |                                                          actual_issame,
135 |                                                          nrof_folds=nrof_folds,
136 |                                                          distance_metric=distance_metric,
137 |                                                          subtract_mean=subtract_mean)
138 |     
139 |     print('Accuracy: %2.5f+-%2.5f' % (np.mean(accuracy), np.std(accuracy)))
140 |     print('Validation rate: %2.5f+-%2.5f @ FAR=%2.5f' % (val, val_std, far))
141 |     
142 |     auc = metrics.auc(fpr, tpr)
143 |     print('Area Under Curve (AUC): %1.3f' % auc)
144 |     eer = brentq(lambda x: 1. - x - interpolate.interp1d(fpr, tpr)(x), 0., 1.)
145 |     print('Equal Error Rate (EER): %1.3f' % eer)
146 | 
147 | 
148 | def parse_arguments(argv):
149 |     parser = argparse.ArgumentParser()
150 |     
151 |     parser.add_argument('lfw_dir', type=str,
152 |         help='Path to the data directory containing aligned LFW face patches.')
153 |     parser.add_argument('--lfw_batch_size', type=int,
154 |         help='Number of images to process in a batch in the LFW test set.', default=100)
155 |     parser.add_argument('model', type=str, 
156 |         help='Could be either a directory containing the meta_file and ckpt_file or a model protobuf (.pb) file')
157 |     parser.add_argument('--image_size', type=int,
158 |         help='Image size (height, width) in pixels.', default=160)
159 |     parser.add_argument('--lfw_pairs', type=str,
160 |         help='The file containing the pairs to use for validation.', default='../data/pairs.txt')
161 |     parser.add_argument('--lfw_nrof_folds', type=int,
162 |         help='Number of folds to use for cross validation. Mainly used for testing.', default=10)
163 |     parser.add_argument('--distance_metric', type=int,
164 |         help='Distance metric  0:euclidian, 1:cosine similarity.', default=0)
165 |     parser.add_argument('--use_flipped_images', 
166 |         help='Concatenates embeddings for the image and its horizontally flipped counterpart.', action='store_true')
167 |     parser.add_argument('--subtract_mean', 
168 |         help='Subtract feature mean before calculating distance.', action='store_true')
169 |     parser.add_argument('--use_fixed_image_standardization', 
170 |         help='Performs fixed standardization of images.', action='store_true')
171 |     return parser.parse_args(argv)
172 | 
173 | 
174 | if __name__ == '__main__':
175 |     main(parse_arguments(sys.argv[1:]))
176 | 


--------------------------------------------------------------------------------
/src/applications/imagenet_utils.py:
--------------------------------------------------------------------------------
  1 | """Utilities for ImageNet data preprocessing & prediction decoding.
  2 | """
  3 | from __future__ import absolute_import
  4 | from __future__ import division
  5 | from __future__ import print_function
  6 | 
  7 | import json
  8 | import warnings
  9 | import numpy as np
 10 | 
 11 | from . import get_submodules_from_kwargs
 12 | 
 13 | CLASS_INDEX = None
 14 | CLASS_INDEX_PATH = ('https://s3.amazonaws.com/deep-learning-models/'
 15 |                     'image-models/imagenet_class_index.json')
 16 | 
 17 | # Global tensor of imagenet mean for preprocessing symbolic inputs
 18 | _IMAGENET_MEAN = None
 19 | 
 20 | 
 21 | def _preprocess_numpy_input(x, data_format, mode, **kwargs):
 22 |     """Preprocesses a Numpy array encoding a batch of images.
 23 |     # Arguments
 24 |         x: Input array, 3D or 4D.
 25 |         data_format: Data format of the image array.
 26 |         mode: One of "caffe", "tf" or "torch".
 27 |             - caffe: will convert the images from RGB to BGR,
 28 |                 then will zero-center each color channel with
 29 |                 respect to the ImageNet dataset,
 30 |                 without scaling.
 31 |             - tf: will scale pixels between -1 and 1,
 32 |                 sample-wise.
 33 |             - torch: will scale pixels between 0 and 1 and then
 34 |                 will normalize each channel with respect to the
 35 |                 ImageNet dataset.
 36 |     # Returns
 37 |         Preprocessed Numpy array.
 38 |     """
 39 |     backend, _, _, _ = get_submodules_from_kwargs(kwargs)
 40 |     if not issubclass(x.dtype.type, np.floating):
 41 |         x = x.astype(backend.floatx(), copy=False)
 42 | 
 43 |     if mode == 'tf':
 44 |         x /= 127.5
 45 |         x -= 1.
 46 |         return x
 47 | 
 48 |     if mode == 'torch':
 49 |         x /= 255.
 50 |         mean = [0.485, 0.456, 0.406]
 51 |         std = [0.229, 0.224, 0.225]
 52 |     else:
 53 |         if data_format == 'channels_first':
 54 |             # 'RGB'->'BGR'
 55 |             if x.ndim == 3:
 56 |                 x = x[::-1, ...]
 57 |             else:
 58 |                 x = x[:, ::-1, ...]
 59 |         else:
 60 |             # 'RGB'->'BGR'
 61 |             x = x[..., ::-1]
 62 |         mean = [103.939, 116.779, 123.68]
 63 |         std = None
 64 | 
 65 |     # Zero-center by mean pixel
 66 |     if data_format == 'channels_first':
 67 |         if x.ndim == 3:
 68 |             x[0, :, :] -= mean[0]
 69 |             x[1, :, :] -= mean[1]
 70 |             x[2, :, :] -= mean[2]
 71 |             if std is not None:
 72 |                 x[0, :, :] /= std[0]
 73 |                 x[1, :, :] /= std[1]
 74 |                 x[2, :, :] /= std[2]
 75 |         else:
 76 |             x[:, 0, :, :] -= mean[0]
 77 |             x[:, 1, :, :] -= mean[1]
 78 |             x[:, 2, :, :] -= mean[2]
 79 |             if std is not None:
 80 |                 x[:, 0, :, :] /= std[0]
 81 |                 x[:, 1, :, :] /= std[1]
 82 |                 x[:, 2, :, :] /= std[2]
 83 |     else:
 84 |         x[..., 0] -= mean[0]
 85 |         x[..., 1] -= mean[1]
 86 |         x[..., 2] -= mean[2]
 87 |         if std is not None:
 88 |             x[..., 0] /= std[0]
 89 |             x[..., 1] /= std[1]
 90 |             x[..., 2] /= std[2]
 91 |     return x
 92 | 
 93 | 
 94 | def _preprocess_symbolic_input(x, data_format, mode, **kwargs):
 95 |     """Preprocesses a tensor encoding a batch of images.
 96 |     # Arguments
 97 |         x: Input tensor, 3D or 4D.
 98 |         data_format: Data format of the image tensor.
 99 |         mode: One of "caffe", "tf" or "torch".
100 |             - caffe: will convert the images from RGB to BGR,
101 |                 then will zero-center each color channel with
102 |                 respect to the ImageNet dataset,
103 |                 without scaling.
104 |             - tf: will scale pixels between -1 and 1,
105 |                 sample-wise.
106 |             - torch: will scale pixels between 0 and 1 and then
107 |                 will normalize each channel with respect to the
108 |                 ImageNet dataset.
109 |     # Returns
110 |         Preprocessed tensor.
111 |     """
112 |     global _IMAGENET_MEAN
113 | 
114 |     backend, _, _, _ = get_submodules_from_kwargs(kwargs)
115 | 
116 |     if mode == 'tf':
117 |         x /= 127.5
118 |         x -= 1.
119 |         return x
120 | 
121 |     if mode == 'torch':
122 |         x /= 255.
123 |         mean = [0.485, 0.456, 0.406]
124 |         std = [0.229, 0.224, 0.225]
125 |     else:
126 |         if data_format == 'channels_first':
127 |             # 'RGB'->'BGR'
128 |             if backend.ndim(x) == 3:
129 |                 x = x[::-1, ...]
130 |             else:
131 |                 x = x[:, ::-1, ...]
132 |         else:
133 |             # 'RGB'->'BGR'
134 |             x = x[..., ::-1]
135 |         mean = [103.939, 116.779, 123.68]
136 |         std = None
137 | 
138 |     if _IMAGENET_MEAN is None:
139 |         _IMAGENET_MEAN = backend.constant(-np.array(mean))
140 | 
141 |     # Zero-center by mean pixel
142 |     if backend.dtype(x) != backend.dtype(_IMAGENET_MEAN):
143 |         x = backend.bias_add(
144 |             x, backend.cast(_IMAGENET_MEAN, backend.dtype(x)),
145 |             data_format=data_format)
146 |     else:
147 |         x = backend.bias_add(x, _IMAGENET_MEAN, data_format)
148 |     if std is not None:
149 |         x /= std
150 |     return x
151 | 
152 | 
153 | def preprocess_input(x, data_format=None, mode='caffe', **kwargs):
154 |     """Preprocesses a tensor or Numpy array encoding a batch of images.
155 |     # Arguments
156 |         x: Input Numpy or symbolic tensor, 3D or 4D.
157 |             The preprocessed data is written over the input data
158 |             if the data types are compatible. To avoid this
159 |             behaviour, `numpy.copy(x)` can be used.
160 |         data_format: Data format of the image tensor/array.
161 |         mode: One of "caffe", "tf" or "torch".
162 |             - caffe: will convert the images from RGB to BGR,
163 |                 then will zero-center each color channel with
164 |                 respect to the ImageNet dataset,
165 |                 without scaling.
166 |             - tf: will scale pixels between -1 and 1,
167 |                 sample-wise.
168 |             - torch: will scale pixels between 0 and 1 and then
169 |                 will normalize each channel with respect to the
170 |                 ImageNet dataset.
171 |     # Returns
172 |         Preprocessed tensor or Numpy array.
173 |     # Raises
174 |         ValueError: In case of unknown `data_format` argument.
175 |     """
176 |     backend, _, _, _ = get_submodules_from_kwargs(kwargs)
177 | 
178 |     if data_format is None:
179 |         data_format = backend.image_data_format()
180 |     if data_format not in {'channels_first', 'channels_last'}:
181 |         raise ValueError('Unknown data_format ' + str(data_format))
182 | 
183 |     if isinstance(x, np.ndarray):
184 |         return _preprocess_numpy_input(x, data_format=data_format,
185 |                                        mode=mode, **kwargs)
186 |     else:
187 |         return _preprocess_symbolic_input(x, data_format=data_format,
188 |                                           mode=mode, **kwargs)
189 | 
190 | 
191 | def decode_predictions(preds, top=5, **kwargs):
192 |     """Decodes the prediction of an ImageNet model.
193 |     # Arguments
194 |         preds: Numpy tensor encoding a batch of predictions.
195 |         top: Integer, how many top-guesses to return.
196 |     # Returns
197 |         A list of lists of top class prediction tuples
198 |         `(class_name, class_description, score)`.
199 |         One list of tuples per sample in batch input.
200 |     # Raises
201 |         ValueError: In case of invalid shape of the `pred` array
202 |             (must be 2D).
203 |     """
204 |     global CLASS_INDEX
205 | 
206 |     backend, _, _, keras_utils = get_submodules_from_kwargs(kwargs)
207 | 
208 |     if len(preds.shape) != 2 or preds.shape[1] != 1000:
209 |         raise ValueError('`decode_predictions` expects '
210 |                          'a batch of predictions '
211 |                          '(i.e. a 2D array of shape (samples, 1000)). '
212 |                          'Found array with shape: ' + str(preds.shape))
213 |     if CLASS_INDEX is None:
214 |         fpath = keras_utils.get_file(
215 |             'imagenet_class_index.json',
216 |             CLASS_INDEX_PATH,
217 |             cache_subdir='models',
218 |             file_hash='c2c37ea517e94d9795004a39431a14cb')
219 |         with open(fpath) as f:
220 |             CLASS_INDEX = json.load(f)
221 |     results = []
222 |     for pred in preds:
223 |         top_indices = pred.argsort()[-top:][::-1]
224 |         result = [tuple(CLASS_INDEX[str(i)]) + (pred[i],) for i in top_indices]
225 |         result.sort(key=lambda x: x[2], reverse=True)
226 |         results.append(result)
227 |     return results
228 | 
229 | 
230 | def _obtain_input_shape(input_shape,
231 |                         default_size,
232 |                         min_size,
233 |                         data_format,
234 |                         require_flatten,
235 |                         weights=None):
236 |     """Internal utility to compute/validate a model's input shape.
237 |     # Arguments
238 |         input_shape: Either None (will return the default network input shape),
239 |             or a user-provided shape to be validated.
240 |         default_size: Default input width/height for the model.
241 |         min_size: Minimum input width/height accepted by the model.
242 |         data_format: Image data format to use.
243 |         require_flatten: Whether the model is expected to
244 |             be linked to a classifier via a Flatten layer.
245 |         weights: One of `None` (random initialization)
246 |             or 'imagenet' (pre-training on ImageNet).
247 |             If weights='imagenet' input channels must be equal to 3.
248 |     # Returns
249 |         An integer shape tuple (may include None entries).
250 |     # Raises
251 |         ValueError: In case of invalid argument values.
252 |     """
253 |     if weights != 'imagenet' and input_shape and len(input_shape) == 3:
254 |         if data_format == 'channels_first':
255 |             if input_shape[0] not in {1, 3}:
256 |                 warnings.warn(
257 |                     'This model usually expects 1 or 3 input channels. '
258 |                     'However, it was passed an input_shape with ' +
259 |                     str(input_shape[0]) + ' input channels.')
260 |             default_shape = (input_shape[0], default_size, default_size)
261 |         else:
262 |             if input_shape[-1] not in {1, 3}:
263 |                 warnings.warn(
264 |                     'This model usually expects 1 or 3 input channels. '
265 |                     'However, it was passed an input_shape with ' +
266 |                     str(input_shape[-1]) + ' input channels.')
267 |             default_shape = (default_size, default_size, input_shape[-1])
268 |     else:
269 |         if data_format == 'channels_first':
270 |             default_shape = (3, default_size, default_size)
271 |         else:
272 |             default_shape = (default_size, default_size, 3)
273 |     if weights == 'imagenet' and require_flatten:
274 |         if input_shape is not None:
275 |             if input_shape != default_shape:
276 |                 raise ValueError('When setting`include_top=True` '
277 |                                  'and loading `imagenet` weights, '
278 |                                  '`input_shape` should be ' +
279 |                                  str(default_shape) + '.')
280 |         return default_shape
281 |     if input_shape:
282 |         if data_format == 'channels_first':
283 |             if input_shape is not None:
284 |                 if len(input_shape) != 3:
285 |                     raise ValueError(
286 |                         '`input_shape` must be a tuple of three integers.')
287 |                 if input_shape[0] != 3 and weights == 'imagenet':
288 |                     raise ValueError('The input must have 3 channels; got '
289 |                                      '`input_shape=' + str(input_shape) + '`')
290 |                 if ((input_shape[1] is not None and input_shape[1] < min_size) or
291 |                    (input_shape[2] is not None and input_shape[2] < min_size)):
292 |                     raise ValueError('Input size must be at least ' +
293 |                                      str(min_size) + 'x' + str(min_size) +
294 |                                      '; got `input_shape=' +
295 |                                      str(input_shape) + '`')
296 |         else:
297 |             if input_shape is not None:
298 |                 if len(input_shape) != 3:
299 |                     raise ValueError(
300 |                         '`input_shape` must be a tuple of three integers.')
301 |                 if input_shape[-1] != 3 and weights == 'imagenet':
302 |                     raise ValueError('The input must have 3 channels; got '
303 |                                      '`input_shape=' + str(input_shape) + '`')
304 |                 if ((input_shape[0] is not None and input_shape[0] < min_size) or
305 |                    (input_shape[1] is not None and input_shape[1] < min_size)):
306 |                     raise ValueError('Input size must be at least ' +
307 |                                      str(min_size) + 'x' + str(min_size) +
308 |                                      '; got `input_shape=' +
309 |                                      str(input_shape) + '`')
310 |     else:
311 |         if require_flatten:
312 |             input_shape = default_shape
313 |         else:
314 |             if data_format == 'channels_first':
315 |                 input_shape = (3, None, None)
316 |             else:
317 |                 input_shape = (None, None, 3)
318 |     if require_flatten:
319 |         if None in input_shape:
320 |             raise ValueError('If `include_top` is True, '
321 |                              'you should specify a static `input_shape`. '
322 |                              'Got `input_shape=' + str(input_shape) + '`')
323 |     return input_shape
324 | 


--------------------------------------------------------------------------------
/src/applications/mobilenet.py:
--------------------------------------------------------------------------------
  1 | """MobileNet v1 models for Keras.
  2 | MobileNet is a general architecture and can be used for multiple use cases.
  3 | Depending on the use case, it can use different input layer size and
  4 | different width factors. This allows different width models to reduce
  5 | the number of multiply-adds and thereby
  6 | reduce inference cost on mobile devices.
  7 | MobileNets support any input size greater than 32 x 32, with larger image sizes
  8 | offering better performance.
  9 | The number of parameters and number of multiply-adds
 10 | can be modified by using the `alpha` parameter,
 11 | which increases/decreases the number of filters in each layer.
 12 | By altering the image size and `alpha` parameter,
 13 | all 16 models from the paper can be built, with ImageNet weights provided.
 14 | The paper demonstrates the performance of MobileNets using `alpha` values of
 15 | 1.0 (also called 100 % MobileNet), 0.75, 0.5 and 0.25.
 16 | For each of these `alpha` values, weights for 4 different input image sizes
 17 | are provided (224, 192, 160, 128).
 18 | The following table describes the size and accuracy of the 100% MobileNet
 19 | on size 224 x 224:
 20 | ----------------------------------------------------------------------------
 21 | Width Multiplier (alpha) | ImageNet Acc |  Multiply-Adds (M) |  Params (M)
 22 | ----------------------------------------------------------------------------
 23 | |   1.0 MobileNet-224    |    70.6 %     |        529        |     4.2     |
 24 | |   0.75 MobileNet-224   |    68.4 %     |        325        |     2.6     |
 25 | |   0.50 MobileNet-224   |    63.7 %     |        149        |     1.3     |
 26 | |   0.25 MobileNet-224   |    50.6 %     |        41         |     0.5     |
 27 | ----------------------------------------------------------------------------
 28 | The following table describes the performance of
 29 | the 100 % MobileNet on various input sizes:
 30 | ------------------------------------------------------------------------
 31 |       Resolution      | ImageNet Acc | Multiply-Adds (M) | Params (M)
 32 | ------------------------------------------------------------------------
 33 | |  1.0 MobileNet-224  |    70.6 %    |        529        |     4.2     |
 34 | |  1.0 MobileNet-192  |    69.1 %    |        529        |     4.2     |
 35 | |  1.0 MobileNet-160  |    67.2 %    |        529        |     4.2     |
 36 | |  1.0 MobileNet-128  |    64.4 %    |        529        |     4.2     |
 37 | ------------------------------------------------------------------------
 38 | The weights for all 16 models are obtained and translated
 39 | from TensorFlow checkpoints found at
 40 | https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet_v1.md
 41 | # Reference
 42 | - [MobileNets: Efficient Convolutional Neural Networks for
 43 |    Mobile Vision Applications](https://arxiv.org/pdf/1704.04861.pdf))
 44 | """
 45 | from __future__ import print_function
 46 | from __future__ import absolute_import
 47 | from __future__ import division
 48 | 
 49 | import os
 50 | import warnings
 51 | 
 52 | from . import get_submodules_from_kwargs
 53 | from . import imagenet_utils
 54 | # from .imagenet_utils import decode_predictions
 55 | from .imagenet_utils import _obtain_input_shape
 56 | 
 57 | 
 58 | BASE_WEIGHT_PATH = ('https://github.com/fchollet/deep-learning-models/'
 59 |                     'releases/download/v0.6/')
 60 | 
 61 | backend = None
 62 | layers = None
 63 | models = None
 64 | keras_utils = None
 65 | 
 66 | 
 67 | def preprocess_input(x, **kwargs):
 68 |     """Preprocesses a numpy array encoding a batch of images.
 69 |     # Arguments
 70 |         x: a 4D numpy array consists of RGB values within [0, 255].
 71 |     # Returns
 72 |         Preprocessed array.
 73 |     """
 74 |     return imagenet_utils.preprocess_input(x, mode='tf', **kwargs)
 75 | 
 76 | 
 77 | def MobileNet(input_shape=None,
 78 |               alpha=1.0,
 79 |               depth_multiplier=1,
 80 |               dropout=1e-3,
 81 |               include_top=True,
 82 |               weights='imagenet',
 83 |               input_tensor=None,
 84 |               pooling=None,
 85 |               classes=1000,
 86 |               **kwargs):
 87 |     """Instantiates the MobileNet architecture.
 88 |     # Arguments
 89 |         input_shape: optional shape tuple, only to be specified
 90 |             if `include_top` is False (otherwise the input shape
 91 |             has to be `(224, 224, 3)`
 92 |             (with `channels_last` data format)
 93 |             or (3, 224, 224) (with `channels_first` data format).
 94 |             It should have exactly 3 inputs channels,
 95 |             and width and height should be no smaller than 32.
 96 |             E.g. `(200, 200, 3)` would be one valid value.
 97 |         alpha: controls the width of the network.
 98 |             - If `alpha` < 1.0, proportionally decreases the number
 99 |                 of filters in each layer.
100 |             - If `alpha` > 1.0, proportionally increases the number
101 |                 of filters in each layer.
102 |             - If `alpha` = 1, default number of filters from the paper
103 |                  are used at each layer.
104 |         depth_multiplier: depth multiplier for depthwise convolution
105 |             (also called the resolution multiplier)
106 |         dropout: dropout rate
107 |         include_top: whether to include the fully-connected
108 |             layer at the top of the network.
109 |         weights: one of `None` (random initialization),
110 |               'imagenet' (pre-training on ImageNet),
111 |               or the path to the weights file to be loaded.
112 |         input_tensor: optional Keras tensor (i.e. output of
113 |             `layers.Input()`)
114 |             to use as image input for the model.
115 |         pooling: Optional pooling mode for feature extraction
116 |             when `include_top` is `False`.
117 |             - `None` means that the output of the model
118 |                 will be the 4D tensor output of the
119 |                 last convolutional layer.
120 |             - `avg` means that global average pooling
121 |                 will be applied to the output of the
122 |                 last convolutional layer, and thus
123 |                 the output of the model will be a
124 |                 2D tensor.
125 |             - `max` means that global max pooling will
126 |                 be applied.
127 |         classes: optional number of classes to classify images
128 |             into, only to be specified if `include_top` is True, and
129 |             if no `weights` argument is specified.
130 |     # Returns
131 |         A Keras model instance.
132 |     # Raises
133 |         ValueError: in case of invalid argument for `weights`,
134 |             or invalid input shape.
135 |         RuntimeError: If attempting to run this model with a
136 |             backend that does not support separable convolutions.
137 |     """
138 |     global backend, layers, models, keras_utils
139 |     backend, layers, models, keras_utils = get_submodules_from_kwargs(kwargs)
140 | 
141 |     if not (weights in {'imagenet', None} or os.path.exists(weights)):
142 |         raise ValueError('The `weights` argument should be either '
143 |                          '`None` (random initialization), `imagenet` '
144 |                          '(pre-training on ImageNet), '
145 |                          'or the path to the weights file to be loaded.')
146 | 
147 |     if weights == 'imagenet' and include_top and classes != 1000:
148 |         raise ValueError('If using `weights` as `"imagenet"` with `include_top` '
149 |                          'as true, `classes` should be 1000')
150 | 
151 |     # Determine proper input shape and default size.
152 |     if input_shape is None:
153 |         default_size = 224
154 |     else:
155 |         if backend.image_data_format() == 'channels_first':
156 |             rows = input_shape[1]
157 |             cols = input_shape[2]
158 |         else:
159 |             rows = input_shape[0]
160 |             cols = input_shape[1]
161 | 
162 |         if rows == cols and rows in [128, 160, 192, 224]:
163 |             default_size = rows
164 |         else:
165 |             default_size = 224
166 | 
167 |     input_shape = _obtain_input_shape(input_shape,
168 |                                       default_size=default_size,
169 |                                       min_size=32,
170 |                                       data_format=backend.image_data_format(),
171 |                                       require_flatten=include_top,
172 |                                       weights=weights)
173 | 
174 |     if backend.image_data_format() == 'channels_last':
175 |         row_axis, col_axis = (0, 1)
176 |     else:
177 |         row_axis, col_axis = (1, 2)
178 |     rows = input_shape[row_axis]
179 |     cols = input_shape[col_axis]
180 | 
181 |     if weights == 'imagenet':
182 |         if depth_multiplier != 1:
183 |             raise ValueError('If imagenet weights are being loaded, '
184 |                              'depth multiplier must be 1')
185 | 
186 |         if alpha not in [0.25, 0.50, 0.75, 1.0]:
187 |             raise ValueError('If imagenet weights are being loaded, '
188 |                              'alpha can be one of'
189 |                              '`0.25`, `0.50`, `0.75` or `1.0` only.')
190 | 
191 |         if rows != cols or rows not in [128, 160, 192, 224]:
192 |             if rows is None:
193 |                 rows = 224
194 |                 warnings.warn('MobileNet shape is undefined.'
195 |                               ' Weights for input shape '
196 |                               '(224, 224) will be loaded.')
197 |             else:
198 |                 raise ValueError('If imagenet weights are being loaded, '
199 |                                  'input must have a static square shape '
200 |                                  '(one of (128, 128), (160, 160), '
201 |                                  '(192, 192), or (224, 224)). '
202 |                                  'Input shape provided = %s' % (input_shape,))
203 | 
204 |     if backend.image_data_format() != 'channels_last':
205 |         warnings.warn('The MobileNet family of models is only available '
206 |                       'for the input data format "channels_last" '
207 |                       '(width, height, channels). '
208 |                       'However your settings specify the default '
209 |                       'data format "channels_first" (channels, width, height).'
210 |                       ' You should set `image_data_format="channels_last"` '
211 |                       'in your Keras config located at ~/.keras/keras.json. '
212 |                       'The model being returned right now will expect inputs '
213 |                       'to follow the "channels_last" data format.')
214 |         backend.set_image_data_format('channels_last')
215 |         old_data_format = 'channels_first'
216 |     else:
217 |         old_data_format = None
218 | 
219 |     if input_tensor is None:
220 |         img_input = layers.Input(shape=input_shape)
221 |     else:
222 |         if not backend.is_keras_tensor(input_tensor):
223 |             img_input = layers.Input(tensor=input_tensor, shape=input_shape)
224 |         else:
225 |             img_input = input_tensor
226 | 
227 |     x = _conv_block(img_input, 32, alpha, strides=(2, 2))
228 |     x = _depthwise_conv_block(x, 64, alpha, depth_multiplier, block_id=1)
229 | 
230 |     x = _depthwise_conv_block(x, 128, alpha, depth_multiplier,
231 |                               strides=(2, 2), block_id=2)
232 |     x = _depthwise_conv_block(x, 128, alpha, depth_multiplier, block_id=3)
233 | 
234 |     x = _depthwise_conv_block(x, 256, alpha, depth_multiplier,
235 |                               strides=(2, 2), block_id=4)
236 |     x = _depthwise_conv_block(x, 256, alpha, depth_multiplier, block_id=5)
237 | 
238 |     x = _depthwise_conv_block(x, 512, alpha, depth_multiplier,
239 |                               strides=(2, 2), block_id=6)
240 |     x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=7)
241 |     x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=8)
242 |     x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=9)
243 |     x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=10)
244 |     x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=11)
245 | 
246 |     x = _depthwise_conv_block(x, 1024, alpha, depth_multiplier,
247 |                               strides=(2, 2), block_id=12)
248 |     x = _depthwise_conv_block(x, 1024, alpha, depth_multiplier, block_id=13)
249 | 
250 |     if include_top:
251 |         if backend.image_data_format() == 'channels_first':
252 |             shape = (int(1024 * alpha), 1, 1)
253 |         else:
254 |             shape = (1, 1, int(1024 * alpha))
255 | 
256 |         x = layers.GlobalAveragePooling2D()(x)
257 |         x = layers.Reshape(shape, name='reshape_1')(x)
258 |         x = layers.Dropout(dropout, name='dropout')(x)
259 |         x = layers.Conv2D(classes, (1, 1),
260 |                           padding='same',
261 |                           name='conv_preds')(x)
262 |         x = layers.Activation('softmax', name='act_softmax')(x)
263 |         x = layers.Reshape((classes,), name='reshape_2')(x)
264 |     else:
265 |         if pooling == 'avg':
266 |             x = layers.GlobalAveragePooling2D()(x)
267 |         elif pooling == 'max':
268 |             x = layers.GlobalMaxPooling2D()(x)
269 | 
270 |     # Ensure that the model takes into account
271 |     # any potential predecessors of `input_tensor`.
272 |     if input_tensor is not None:
273 |         inputs = keras_utils.get_source_inputs(input_tensor)
274 |     else:
275 |         inputs = img_input
276 | 
277 |     # Create model.
278 |     model = models.Model(inputs, x, name='mobilenet_%0.2f_%s' % (alpha, rows))
279 | 
280 |     # Load weights.
281 |     if weights == 'imagenet':
282 |         if backend.image_data_format() == 'channels_first':
283 |             raise ValueError('Weights for "channels_first" format '
284 |                              'are not available.')
285 |         if alpha == 1.0:
286 |             alpha_text = '1_0'
287 |         elif alpha == 0.75:
288 |             alpha_text = '7_5'
289 |         elif alpha == 0.50:
290 |             alpha_text = '5_0'
291 |         else:
292 |             alpha_text = '2_5'
293 | 
294 |         if include_top:
295 |             model_name = 'mobilenet_%s_%d_tf.h5' % (alpha_text, rows)
296 |             weight_path = BASE_WEIGHT_PATH + model_name
297 |             weights_path = keras_utils.get_file(model_name,
298 |                                                 weight_path,
299 |                                                 cache_subdir='models')
300 |         else:
301 |             model_name = 'mobilenet_%s_%d_tf_no_top.h5' % (alpha_text, rows)
302 |             weight_path = BASE_WEIGHT_PATH + model_name
303 |             weights_path = keras_utils.get_file(model_name,
304 |                                                 weight_path,
305 |                                                 cache_subdir='models')
306 |         model.load_weights(weights_path)
307 |     elif weights is not None:
308 |         model.load_weights(weights)
309 | 
310 |     if old_data_format:
311 |         backend.set_image_data_format(old_data_format)
312 |     return model
313 | 
314 | 
315 | def _conv_block(inputs, filters, alpha, kernel=(3, 3), strides=(1, 1)):
316 |     """Adds an initial convolution layer (with batch normalization and relu6).
317 |     # Arguments
318 |         inputs: Input tensor of shape `(rows, cols, 3)`
319 |             (with `channels_last` data format) or
320 |             (3, rows, cols) (with `channels_first` data format).
321 |             It should have exactly 3 inputs channels,
322 |             and width and height should be no smaller than 32.
323 |             E.g. `(224, 224, 3)` would be one valid value.
324 |         filters: Integer, the dimensionality of the output space
325 |             (i.e. the number of output filters in the convolution).
326 |         alpha: controls the width of the network.
327 |             - If `alpha` < 1.0, proportionally decreases the number
328 |                 of filters in each layer.
329 |             - If `alpha` > 1.0, proportionally increases the number
330 |                 of filters in each layer.
331 |             - If `alpha` = 1, default number of filters from the paper
332 |                  are used at each layer.
333 |         kernel: An integer or tuple/list of 2 integers, specifying the
334 |             width and height of the 2D convolution window.
335 |             Can be a single integer to specify the same value for
336 |             all spatial dimensions.
337 |         strides: An integer or tuple/list of 2 integers,
338 |             specifying the strides of the convolution
339 |             along the width and height.
340 |             Can be a single integer to specify the same value for
341 |             all spatial dimensions.
342 |             Specifying any stride value != 1 is incompatible with specifying
343 |             any `dilation_rate` value != 1.
344 |     # Input shape
345 |         4D tensor with shape:
346 |         `(samples, channels, rows, cols)` if data_format='channels_first'
347 |         or 4D tensor with shape:
348 |         `(samples, rows, cols, channels)` if data_format='channels_last'.
349 |     # Output shape
350 |         4D tensor with shape:
351 |         `(samples, filters, new_rows, new_cols)`
352 |         if data_format='channels_first'
353 |         or 4D tensor with shape:
354 |         `(samples, new_rows, new_cols, filters)`
355 |         if data_format='channels_last'.
356 |         `rows` and `cols` values might have changed due to stride.
357 |     # Returns
358 |         Output tensor of block.
359 |     """
360 |     channel_axis = 1 if backend.image_data_format() == 'channels_first' else -1
361 |     filters = int(filters * alpha)
362 |     x = layers.ZeroPadding2D(padding=((0, 1), (0, 1)), name='conv1_pad')(inputs)
363 |     x = layers.Conv2D(filters, kernel,
364 |                       padding='valid',
365 |                       use_bias=False,
366 |                       strides=strides,
367 |                       name='conv1')(x)
368 |     x = layers.BatchNormalization(axis=channel_axis, name='conv1_bn')(x)
369 |     return layers.ReLU(6., name='conv1_relu')(x)
370 | 
371 | 
372 | def _depthwise_conv_block(inputs, pointwise_conv_filters, alpha,
373 |                           depth_multiplier=1, strides=(1, 1), block_id=1):
374 |     """Adds a depthwise convolution block.
375 |     A depthwise convolution block consists of a depthwise conv,
376 |     batch normalization, relu6, pointwise convolution,
377 |     batch normalization and relu6 activation.
378 |     # Arguments
379 |         inputs: Input tensor of shape `(rows, cols, channels)`
380 |             (with `channels_last` data format) or
381 |             (channels, rows, cols) (with `channels_first` data format).
382 |         pointwise_conv_filters: Integer, the dimensionality of the output space
383 |             (i.e. the number of output filters in the pointwise convolution).
384 |         alpha: controls the width of the network.
385 |             - If `alpha` < 1.0, proportionally decreases the number
386 |                 of filters in each layer.
387 |             - If `alpha` > 1.0, proportionally increases the number
388 |                 of filters in each layer.
389 |             - If `alpha` = 1, default number of filters from the paper
390 |                  are used at each layer.
391 |         depth_multiplier: The number of depthwise convolution output channels
392 |             for each input channel.
393 |             The total number of depthwise convolution output
394 |             channels will be equal to `filters_in * depth_multiplier`.
395 |         strides: An integer or tuple/list of 2 integers,
396 |             specifying the strides of the convolution
397 |             along the width and height.
398 |             Can be a single integer to specify the same value for
399 |             all spatial dimensions.
400 |             Specifying any stride value != 1 is incompatible with specifying
401 |             any `dilation_rate` value != 1.
402 |         block_id: Integer, a unique identification designating
403 |             the block number.
404 |     # Input shape
405 |         4D tensor with shape:
406 |         `(batch, channels, rows, cols)` if data_format='channels_first'
407 |         or 4D tensor with shape:
408 |         `(batch, rows, cols, channels)` if data_format='channels_last'.
409 |     # Output shape
410 |         4D tensor with shape:
411 |         `(batch, filters, new_rows, new_cols)`
412 |         if data_format='channels_first'
413 |         or 4D tensor with shape:
414 |         `(batch, new_rows, new_cols, filters)`
415 |         if data_format='channels_last'.
416 |         `rows` and `cols` values might have changed due to stride.
417 |     # Returns
418 |         Output tensor of block.
419 |     """
420 |     channel_axis = 1 if backend.image_data_format() == 'channels_first' else -1
421 |     pointwise_conv_filters = int(pointwise_conv_filters * alpha)
422 | 
423 |     if strides == (1, 1):
424 |         x = inputs
425 |     else:
426 |         x = layers.ZeroPadding2D(((0, 1), (0, 1)),
427 |                                  name='conv_pad_%d' % block_id)(inputs)
428 |     x = layers.DepthwiseConv2D((3, 3),
429 |                                padding='same' if strides == (1, 1) else 'valid',
430 |                                depth_multiplier=depth_multiplier,
431 |                                strides=strides,
432 |                                use_bias=False,
433 |                                name='conv_dw_%d' % block_id)(x)
434 |     x = layers.BatchNormalization(
435 |         axis=channel_axis, name='conv_dw_%d_bn' % block_id)(x)
436 |     x = layers.ReLU(6., name='conv_dw_%d_relu' % block_id)(x)
437 | 
438 |     x = layers.Conv2D(pointwise_conv_filters, (1, 1),
439 |                       padding='same',
440 |                       use_bias=False,
441 |                       strides=(1, 1),
442 |                       name='conv_pw_%d' % block_id)(x)
443 |     x = layers.BatchNormalization(axis=channel_axis,
444 |                                   name='conv_pw_%d_bn' % block_id)(x)
445 |     return layers.ReLU(6., name='conv_pw_%d_relu' % block_id)(x)
446 | 


--------------------------------------------------------------------------------
/src/applications/mobilenet_v2.py:
--------------------------------------------------------------------------------
  1 | """MobileNet v2 models for Keras.
  2 | MobileNetV2 is a general architecture and can be used for multiple use cases.
  3 | Depending on the use case, it can use different input layer size and
  4 | different width factors. This allows different width models to reduce
  5 | the number of multiply-adds and thereby
  6 | reduce inference cost on mobile devices.
  7 | MobileNetV2 is very similar to the original MobileNet,
  8 | except that it uses inverted residual blocks with
  9 | bottlenecking features. It has a drastically lower
 10 | parameter count than the original MobileNet.
 11 | MobileNets support any input size greater
 12 | than 32 x 32, with larger image sizes
 13 | offering better performance.
 14 | The number of parameters and number of multiply-adds
 15 | can be modified by using the `alpha` parameter,
 16 | which increases/decreases the number of filters in each layer.
 17 | By altering the image size and `alpha` parameter,
 18 | all 22 models from the paper can be built, with ImageNet weights provided.
 19 | The paper demonstrates the performance of MobileNets using `alpha` values of
 20 | 1.0 (also called 100 % MobileNet), 0.35, 0.5, 0.75, 1.0, 1.3, and 1.4
 21 | For each of these `alpha` values, weights for 5 different input image sizes
 22 | are provided (224, 192, 160, 128, and 96).
 23 | The following table describes the performance of
 24 | MobileNet on various input sizes:
 25 | ------------------------------------------------------------------------
 26 | MACs stands for Multiply Adds
 27 |  Classification Checkpoint| MACs (M) | Parameters (M)| Top 1 Accuracy| Top 5 Accuracy
 28 | --------------------------|------------|---------------|---------|----|-------------
 29 | | [mobilenet_v2_1.4_224]  | 582 | 6.06 |          75.0 | 92.5 |
 30 | | [mobilenet_v2_1.3_224]  | 509 | 5.34 |          74.4 | 92.1 |
 31 | | [mobilenet_v2_1.0_224]  | 300 | 3.47 |          71.8 | 91.0 |
 32 | | [mobilenet_v2_1.0_192]  | 221 | 3.47 |          70.7 | 90.1 |
 33 | | [mobilenet_v2_1.0_160]  | 154 | 3.47 |          68.8 | 89.0 |
 34 | | [mobilenet_v2_1.0_128]  | 99  | 3.47 |          65.3 | 86.9 |
 35 | | [mobilenet_v2_1.0_96]   | 56  | 3.47 |          60.3 | 83.2 |
 36 | | [mobilenet_v2_0.75_224] | 209 | 2.61 |          69.8 | 89.6 |
 37 | | [mobilenet_v2_0.75_192] | 153 | 2.61 |          68.7 | 88.9 |
 38 | | [mobilenet_v2_0.75_160] | 107 | 2.61 |          66.4 | 87.3 |
 39 | | [mobilenet_v2_0.75_128] | 69  | 2.61 |          63.2 | 85.3 |
 40 | | [mobilenet_v2_0.75_96]  | 39  | 2.61 |          58.8 | 81.6 |
 41 | | [mobilenet_v2_0.5_224]  | 97  | 1.95 |          65.4 | 86.4 |
 42 | | [mobilenet_v2_0.5_192]  | 71  | 1.95 |          63.9 | 85.4 |
 43 | | [mobilenet_v2_0.5_160]  | 50  | 1.95 |          61.0 | 83.2 |
 44 | | [mobilenet_v2_0.5_128]  | 32  | 1.95 |          57.7 | 80.8 |
 45 | | [mobilenet_v2_0.5_96]   | 18  | 1.95 |          51.2 | 75.8 |
 46 | | [mobilenet_v2_0.35_224] | 59  | 1.66 |          60.3 | 82.9 |
 47 | | [mobilenet_v2_0.35_192] | 43  | 1.66 |          58.2 | 81.2 |
 48 | | [mobilenet_v2_0.35_160] | 30  | 1.66 |          55.7 | 79.1 |
 49 | | [mobilenet_v2_0.35_128] | 20  | 1.66 |          50.8 | 75.0 |
 50 | | [mobilenet_v2_0.35_96]  | 11  | 1.66 |          45.5 | 70.4 |
 51 | The weights for all 16 models are obtained and
 52 | translated from the Tensorflow checkpoints
 53 | from TensorFlow checkpoints found [here]
 54 | (https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/README.md).
 55 | # Reference
 56 | This file contains building code for MobileNetV2, based on
 57 | [MobileNetV2: Inverted Residuals and Linear Bottlenecks]
 58 | (https://arxiv.org/abs/1801.04381)
 59 | Tests comparing this model to the existing Tensorflow model can be
 60 | found at [mobilenet_v2_keras]
 61 | (https://github.com/JonathanCMitchell/mobilenet_v2_keras)
 62 | """
 63 | from __future__ import print_function
 64 | from __future__ import absolute_import
 65 | from __future__ import division
 66 | 
 67 | import os
 68 | import warnings
 69 | import numpy as np
 70 | 
 71 | from . import correct_pad
 72 | from . import get_submodules_from_kwargs
 73 | from .imagenet_utils import decode_predictions
 74 | from .imagenet_utils import _obtain_input_shape
 75 | 
 76 | # TODO Change path to v1.1
 77 | BASE_WEIGHT_PATH = ('https://github.com/JonathanCMitchell/mobilenet_v2_keras/'
 78 |                     'releases/download/v1.1/')
 79 | 
 80 | backend = None
 81 | layers = None
 82 | models = None
 83 | keras_utils = None
 84 | 
 85 | 
 86 | def preprocess_input(x, **kwargs):
 87 |     """Preprocesses a numpy array encoding a batch of images.
 88 |     This function applies the "Inception" preprocessing which converts
 89 |     the RGB values from [0, 255] to [-1, 1]. Note that this preprocessing
 90 |     function is different from `imagenet_utils.preprocess_input()`.
 91 |     # Arguments
 92 |         x: a 4D numpy array consists of RGB values within [0, 255].
 93 |     # Returns
 94 |         Preprocessed array.
 95 |     """
 96 |     x /= 128.
 97 |     x -= 1.
 98 |     return x.astype(np.float32)
 99 | 
100 | 
101 | # This function is taken from the original tf repo.
102 | # It ensures that all layers have a channel number that is divisible by 8
103 | # It can be seen here:
104 | # https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py
105 | 
106 | 
107 | def _make_divisible(v, divisor, min_value=None):
108 |     if min_value is None:
109 |         min_value = divisor
110 |     new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
111 |     # Make sure that round down does not go down by more than 10%.
112 |     if new_v < 0.9 * v:
113 |         new_v += divisor
114 |     return new_v
115 | 
116 | 
117 | def MobileNetV2(input_shape=None,
118 |                 alpha=1.0,
119 |                 depth_multiplier=1,
120 |                 include_top=True,
121 |                 weights='imagenet',
122 |                 input_tensor=None,
123 |                 pooling=None,
124 |                 classes=1000,
125 |                 **kwargs):
126 |     """Instantiates the MobileNetV2 architecture.
127 |     # Arguments
128 |         input_shape: optional shape tuple, to be specified if you would
129 |             like to use a model with an input img resolution that is not
130 |             (224, 224, 3).
131 |             It should have exactly 3 inputs channels (224, 224, 3).
132 |             You can also omit this option if you would like
133 |             to infer input_shape from an input_tensor.
134 |             If you choose to include both input_tensor and input_shape then
135 |             input_shape will be used if they match, if the shapes
136 |             do not match then we will throw an error.
137 |             E.g. `(160, 160, 3)` would be one valid value.
138 |         alpha: controls the width of the network. This is known as the
139 |         width multiplier in the MobileNetV2 paper.
140 |             - If `alpha` < 1.0, proportionally decreases the number
141 |                 of filters in each layer.
142 |             - If `alpha` > 1.0, proportionally increases the number
143 |                 of filters in each layer.
144 |             - If `alpha` = 1, default number of filters from the paper
145 |                  are used at each layer.
146 |         depth_multiplier: depth multiplier for depthwise convolution
147 |             (also called the resolution multiplier)
148 |         include_top: whether to include the fully-connected
149 |             layer at the top of the network.
150 |         weights: one of `None` (random initialization),
151 |               'imagenet' (pre-training on ImageNet),
152 |               or the path to the weights file to be loaded.
153 |         input_tensor: optional Keras tensor (i.e. output of
154 |             `layers.Input()`)
155 |             to use as image input for the model.
156 |         pooling: Optional pooling mode for feature extraction
157 |             when `include_top` is `False`.
158 |             - `None` means that the output of the model
159 |                 will be the 4D tensor output of the
160 |                 last convolutional layer.
161 |             - `avg` means that global average pooling
162 |                 will be applied to the output of the
163 |                 last convolutional layer, and thus
164 |                 the output of the model will be a
165 |                 2D tensor.
166 |             - `max` means that global max pooling will
167 |                 be applied.
168 |         classes: optional number of classes to classify images
169 |             into, only to be specified if `include_top` is True, and
170 |             if no `weights` argument is specified.
171 |     # Returns
172 |         A Keras model instance.
173 |     # Raises
174 |         ValueError: in case of invalid argument for `weights`,
175 |             or invalid input shape or invalid depth_multiplier, alpha,
176 |             rows when weights='imagenet'
177 |     """
178 |     global backend, layers, models, keras_utils
179 |     backend, layers, models, keras_utils = get_submodules_from_kwargs(kwargs)
180 | 
181 |     if not (weights in {'imagenet', None} or os.path.exists(weights)):
182 |         raise ValueError('The `weights` argument should be either '
183 |                          '`None` (random initialization), `imagenet` '
184 |                          '(pre-training on ImageNet), '
185 |                          'or the path to the weights file to be loaded.')
186 | 
187 |     if weights == 'imagenet' and include_top and classes != 1000:
188 |         raise ValueError('If using `weights` as `"imagenet"` with `include_top` '
189 |                          'as true, `classes` should be 1000')
190 | 
191 |     # Determine proper input shape and default size.
192 |     # If both input_shape and input_tensor are used, they should match
193 |     if input_shape is not None and input_tensor is not None:
194 |         try:
195 |             is_input_t_tensor = backend.is_keras_tensor(input_tensor)
196 |         except ValueError:
197 |             try:
198 |                 is_input_t_tensor = backend.is_keras_tensor(
199 |                     keras_utils.get_source_inputs(input_tensor))
200 |             except ValueError:
201 |                 raise ValueError('input_tensor: ', input_tensor,
202 |                                  'is not type input_tensor')
203 |         if is_input_t_tensor:
204 |             if backend.image_data_format == 'channels_first':
205 |                 if backend.int_shape(input_tensor)[1] != input_shape[1]:
206 |                     raise ValueError('input_shape: ', input_shape,
207 |                                      'and input_tensor: ', input_tensor,
208 |                                      'do not meet the same shape requirements')
209 |             else:
210 |                 if backend.int_shape(input_tensor)[2] != input_shape[1]:
211 |                     raise ValueError('input_shape: ', input_shape,
212 |                                      'and input_tensor: ', input_tensor,
213 |                                      'do not meet the same shape requirements')
214 |         else:
215 |             raise ValueError('input_tensor specified: ', input_tensor,
216 |                              'is not a keras tensor')
217 | 
218 |     # If input_shape is None, infer shape from input_tensor
219 |     if input_shape is None and input_tensor is not None:
220 | 
221 |         try:
222 |             backend.is_keras_tensor(input_tensor)
223 |         except ValueError:
224 |             raise ValueError('input_tensor: ', input_tensor,
225 |                              'is type: ', type(input_tensor),
226 |                              'which is not a valid type')
227 | 
228 |         if input_shape is None and not backend.is_keras_tensor(input_tensor):
229 |             default_size = 224
230 |         elif input_shape is None and backend.is_keras_tensor(input_tensor):
231 |             if backend.image_data_format() == 'channels_first':
232 |                 rows = backend.int_shape(input_tensor)[2]
233 |                 cols = backend.int_shape(input_tensor)[3]
234 |             else:
235 |                 rows = backend.int_shape(input_tensor)[1]
236 |                 cols = backend.int_shape(input_tensor)[2]
237 | 
238 |             if rows == cols and rows in [96, 128, 160, 192, 224]:
239 |                 default_size = rows
240 |             else:
241 |                 default_size = 224
242 | 
243 |     # If input_shape is None and no input_tensor
244 |     elif input_shape is None:
245 |         default_size = 224
246 | 
247 |     # If input_shape is not None, assume default size
248 |     else:
249 |         if backend.image_data_format() == 'channels_first':
250 |             rows = input_shape[1]
251 |             cols = input_shape[2]
252 |         else:
253 |             rows = input_shape[0]
254 |             cols = input_shape[1]
255 | 
256 |         if rows == cols and rows in [96, 128, 160, 192, 224]:
257 |             default_size = rows
258 |         else:
259 |             default_size = 224
260 | 
261 |     input_shape = _obtain_input_shape(input_shape,
262 |                                       default_size=default_size,
263 |                                       min_size=32,
264 |                                       data_format=backend.image_data_format(),
265 |                                       require_flatten=include_top,
266 |                                       weights=weights)
267 | 
268 |     if backend.image_data_format() == 'channels_last':
269 |         row_axis, col_axis = (0, 1)
270 |     else:
271 |         row_axis, col_axis = (1, 2)
272 |     rows = input_shape[row_axis]
273 |     cols = input_shape[col_axis]
274 | 
275 |     if weights == 'imagenet':
276 |         if depth_multiplier != 1:
277 |             raise ValueError('If imagenet weights are being loaded, '
278 |                              'depth multiplier must be 1')
279 | 
280 |         if alpha not in [0.35, 0.50, 0.75, 1.0, 1.3, 1.4]:
281 |             raise ValueError('If imagenet weights are being loaded, '
282 |                              'alpha can be one of `0.35`, `0.50`, `0.75`, '
283 |                              '`1.0`, `1.3` or `1.4` only.')
284 | 
285 |         if rows != cols or rows not in [96, 128, 160, 192, 224]:
286 |             if rows is None:
287 |                 rows = 224
288 |                 warnings.warn('MobileNet shape is undefined.'
289 |                               ' Weights for input shape'
290 |                               '(224, 224) will be loaded.')
291 |             else:
292 |                 raise ValueError('If imagenet weights are being loaded, '
293 |                                  'input must have a static square shape'
294 |                                  '(one of (96, 96), (128, 128), (160, 160),'
295 |                                  '(192, 192), or (224, 224)).'
296 |                                  'Input shape provided = %s' % (input_shape,))
297 | 
298 |     if backend.image_data_format() != 'channels_last':
299 |         warnings.warn('The MobileNet family of models is only available '
300 |                       'for the input data format "channels_last" '
301 |                       '(width, height, channels). '
302 |                       'However your settings specify the default '
303 |                       'data format "channels_first" (channels, width, height).'
304 |                       ' You should set `image_data_format="channels_last"` '
305 |                       'in your Keras config located at ~/.keras/keras.json. '
306 |                       'The model being returned right now will expect inputs '
307 |                       'to follow the "channels_last" data format.')
308 |         backend.set_image_data_format('channels_last')
309 |         old_data_format = 'channels_first'
310 |     else:
311 |         old_data_format = None
312 | 
313 |     if input_tensor is None:
314 |         img_input = layers.Input(shape=input_shape)
315 |     else:
316 |         if not backend.is_keras_tensor(input_tensor):
317 |             img_input = layers.Input(tensor=input_tensor, shape=input_shape)
318 |         else:
319 |             img_input = input_tensor
320 | 
321 |     first_block_filters = _make_divisible(32 * alpha, 8)
322 |     x = layers.ZeroPadding2D(padding=correct_pad(backend, img_input, 3),
323 |                              name='Conv1_pad')(img_input)
324 |     x = layers.Conv2D(first_block_filters,
325 |                       kernel_size=3,
326 |                       strides=(2, 2),
327 |                       padding='valid',
328 |                       use_bias=False,
329 |                       name='Conv1')(x)
330 |     x = layers.BatchNormalization(
331 |         epsilon=1e-3, momentum=0.999, name='bn_Conv1')(x)
332 |     x = layers.ReLU(6., name='Conv1_relu')(x)
333 | 
334 |     x = _inverted_res_block(x, filters=16, alpha=alpha, stride=1,
335 |                             expansion=1, block_id=0)
336 | 
337 |     x = _inverted_res_block(x, filters=24, alpha=alpha, stride=2,
338 |                             expansion=6, block_id=1)
339 |     x = _inverted_res_block(x, filters=24, alpha=alpha, stride=1,
340 |                             expansion=6, block_id=2)
341 | 
342 |     x = _inverted_res_block(x, filters=32, alpha=alpha, stride=2,
343 |                             expansion=6, block_id=3)
344 |     x = _inverted_res_block(x, filters=32, alpha=alpha, stride=1,
345 |                             expansion=6, block_id=4)
346 |     x = _inverted_res_block(x, filters=32, alpha=alpha, stride=1,
347 |                             expansion=6, block_id=5)
348 | 
349 |     x = _inverted_res_block(x, filters=64, alpha=alpha, stride=2,
350 |                             expansion=6, block_id=6)
351 |     x = _inverted_res_block(x, filters=64, alpha=alpha, stride=1,
352 |                             expansion=6, block_id=7)
353 |     x = _inverted_res_block(x, filters=64, alpha=alpha, stride=1,
354 |                             expansion=6, block_id=8)
355 |     x = _inverted_res_block(x, filters=64, alpha=alpha, stride=1,
356 |                             expansion=6, block_id=9)
357 | 
358 |     x = _inverted_res_block(x, filters=96, alpha=alpha, stride=1,
359 |                             expansion=6, block_id=10)
360 |     x = _inverted_res_block(x, filters=96, alpha=alpha, stride=1,
361 |                             expansion=6, block_id=11)
362 |     x = _inverted_res_block(x, filters=96, alpha=alpha, stride=1,
363 |                             expansion=6, block_id=12)
364 | 
365 |     x = _inverted_res_block(x, filters=160, alpha=alpha, stride=2,
366 |                             expansion=6, block_id=13)
367 |     x = _inverted_res_block(x, filters=160, alpha=alpha, stride=1,
368 |                             expansion=6, block_id=14)
369 |     x = _inverted_res_block(x, filters=160, alpha=alpha, stride=1,
370 |                             expansion=6, block_id=15)
371 | 
372 |     x = _inverted_res_block(x, filters=320, alpha=alpha, stride=1,
373 |                             expansion=6, block_id=16)
374 | 
375 |     # no alpha applied to last conv as stated in the paper:
376 |     # if the width multiplier is greater than 1 we
377 |     # increase the number of output channels
378 |     if alpha > 1.0:
379 |         last_block_filters = _make_divisible(1280 * alpha, 8)
380 |     else:
381 |         last_block_filters = 1280
382 | 
383 |     x = layers.Conv2D(last_block_filters,
384 |                       kernel_size=1,
385 |                       use_bias=False,
386 |                       name='Conv_1')(x)
387 |     x = layers.BatchNormalization(epsilon=1e-3,
388 |                                   momentum=0.999,
389 |                                   name='Conv_1_bn')(x)
390 |     x = layers.ReLU(6., name='out_relu')(x)
391 | 
392 |     if include_top:
393 |         x = layers.GlobalAveragePooling2D()(x)
394 |         x = layers.Dense(classes, activation='softmax',
395 |                          use_bias=True, name='Logits')(x)
396 |     else:
397 |         if pooling == 'avg':
398 |             x = layers.GlobalAveragePooling2D()(x)
399 |         elif pooling == 'max':
400 |             x = layers.GlobalMaxPooling2D()(x)
401 | 
402 |     # Ensure that the model takes into account
403 |     # any potential predecessors of `input_tensor`.
404 |     if input_tensor is not None:
405 |         inputs = keras_utils.get_source_inputs(input_tensor)
406 |     else:
407 |         inputs = img_input
408 | 
409 |     # Create model.
410 |     model = models.Model(inputs, x,
411 |                          name='mobilenetv2_%0.2f_%s' % (alpha, rows))
412 | 
413 |     # Load weights.
414 |     if weights == 'imagenet':
415 |         if backend.image_data_format() == 'channels_first':
416 |             raise ValueError('Weights for "channels_first" format '
417 |                              'are not available.')
418 | 
419 |         if include_top:
420 |             model_name = ('mobilenet_v2_weights_tf_dim_ordering_tf_kernels_' +
421 |                           str(alpha) + '_' + str(rows) + '.h5')
422 |             weigh_path = BASE_WEIGHT_PATH + model_name
423 |             weights_path = keras_utils.get_file(
424 |                 model_name, weigh_path, cache_subdir='models')
425 |         else:
426 |             model_name = ('mobilenet_v2_weights_tf_dim_ordering_tf_kernels_' +
427 |                           str(alpha) + '_' + str(rows) + '_no_top' + '.h5')
428 |             weigh_path = BASE_WEIGHT_PATH + model_name
429 |             weights_path = keras_utils.get_file(
430 |                 model_name, weigh_path, cache_subdir='models')
431 |         model.load_weights(weights_path)
432 |     elif weights is not None:
433 |         model.load_weights(weights)
434 | 
435 |     if old_data_format:
436 |         backend.set_image_data_format(old_data_format)
437 |     return model
438 | 
439 | 
440 | def _inverted_res_block(inputs, expansion, stride, alpha, filters, block_id):
441 |     in_channels = backend.int_shape(inputs)[-1]
442 |     pointwise_conv_filters = int(filters * alpha)
443 |     pointwise_filters = _make_divisible(pointwise_conv_filters, 8)
444 |     x = inputs
445 |     prefix = 'block_{}_'.format(block_id)
446 | 
447 |     if block_id:
448 |         # Expand
449 |         x = layers.Conv2D(expansion * in_channels,
450 |                           kernel_size=1,
451 |                           padding='same',
452 |                           use_bias=False,
453 |                           activation=None,
454 |                           name=prefix + 'expand')(x)
455 |         x = layers.BatchNormalization(epsilon=1e-3,
456 |                                       momentum=0.999,
457 |                                       name=prefix + 'expand_BN')(x)
458 |         x = layers.ReLU(6., name=prefix + 'expand_relu')(x)
459 |     else:
460 |         prefix = 'expanded_conv_'
461 | 
462 |     # Depthwise
463 |     if stride == 2:
464 |         x = layers.ZeroPadding2D(padding=correct_pad(backend, x, 3),
465 |                                  name=prefix + 'pad')(x)
466 |     x = layers.DepthwiseConv2D(kernel_size=3,
467 |                                strides=stride,
468 |                                activation=None,
469 |                                use_bias=False,
470 |                                padding='same' if stride == 1 else 'valid',
471 |                                name=prefix + 'depthwise')(x)
472 |     x = layers.BatchNormalization(epsilon=1e-3,
473 |                                   momentum=0.999,
474 |                                   name=prefix + 'depthwise_BN')(x)
475 | 
476 |     x = layers.ReLU(6., name=prefix + 'depthwise_relu')(x)
477 | 
478 |     # Project
479 |     x = layers.Conv2D(pointwise_filters,
480 |                       kernel_size=1,
481 |                       padding='same',
482 |                       use_bias=False,
483 |                       activation=None,
484 |                       name=prefix + 'project')(x)
485 |     x = layers.BatchNormalization(
486 |         epsilon=1e-3, momentum=0.999, name=prefix + 'project_BN')(x)
487 | 
488 |     if in_channels == pointwise_filters and stride == 1:
489 |         return layers.Add(name=prefix + 'add')([inputs, x])
490 |     return x


--------------------------------------------------------------------------------
/src/facenet.py:
--------------------------------------------------------------------------------
  1 | """Functions for building the face recognition network.
  2 | """
  3 | # MIT License
  4 | # 
  5 | # Copyright (c) 2016 David Sandberg
  6 | # 
  7 | # Permission is hereby granted, free of charge, to any person obtaining a copy
  8 | # of this software and associated documentation files (the "Software"), to deal
  9 | # in the Software without restriction, including without limitation the rights
 10 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 11 | # copies of the Software, and to permit persons to whom the Software is
 12 | # furnished to do so, subject to the following conditions:
 13 | # 
 14 | # The above copyright notice and this permission notice shall be included in all
 15 | # copies or substantial portions of the Software.
 16 | # 
 17 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 18 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 19 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 20 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 21 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 22 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 23 | # SOFTWARE.
 24 | 
 25 | # pylint: disable=missing-docstring
 26 | from __future__ import absolute_import
 27 | from __future__ import division
 28 | from __future__ import print_function
 29 | 
 30 | import os
 31 | from subprocess import Popen, PIPE
 32 | import tensorflow as tf
 33 | import numpy as np
 34 | from scipy import misc
 35 | from sklearn.model_selection import KFold
 36 | from scipy import interpolate
 37 | from tensorflow.python.training import training
 38 | import random
 39 | import re
 40 | from tensorflow.python.platform import gfile
 41 | import math
 42 | from six import iteritems
 43 | 
 44 | def triplet_loss(anchor, positive, negative, alpha):
 45 |     """Calculate the triplet loss according to the FaceNet paper
 46 |     
 47 |     Args:
 48 |       anchor: the embeddings for the anchor images.
 49 |       positive: the embeddings for the positive images.
 50 |       negative: the embeddings for the negative images.
 51 |   
 52 |     Returns:
 53 |       the triplet loss according to the FaceNet paper as a float tensor.
 54 |     """
 55 |     with tf.variable_scope('triplet_loss'):
 56 |         pos_dist = tf.reduce_sum(tf.square(tf.subtract(anchor, positive)), 1)
 57 |         neg_dist = tf.reduce_sum(tf.square(tf.subtract(anchor, negative)), 1)
 58 |         
 59 |         basic_loss = tf.add(tf.subtract(pos_dist,neg_dist), alpha)
 60 |         loss = tf.reduce_mean(tf.maximum(basic_loss, 0.0), 0)
 61 |       
 62 |     return loss
 63 |   
 64 | def center_loss(features, label, alfa, nrof_classes):
 65 |     """Center loss based on the paper "A Discriminative Feature Learning Approach for Deep Face Recognition"
 66 |        (http://ydwen.github.io/papers/WenECCV16.pdf)
 67 |     """
 68 |     nrof_features = features.get_shape()[1]
 69 |     centers = tf.get_variable('centers', [nrof_classes, nrof_features], dtype=tf.float32,
 70 |         initializer=tf.constant_initializer(0), trainable=False)
 71 |     label = tf.reshape(label, [-1])
 72 |     centers_batch = tf.gather(centers, label)
 73 |     diff = (1 - alfa) * (centers_batch - features)
 74 |     centers = tf.scatter_sub(centers, label, diff)
 75 |     with tf.control_dependencies([centers]):
 76 |         loss = tf.reduce_mean(tf.square(features - centers_batch))
 77 |     return loss, centers
 78 | 
 79 | def get_image_paths_and_labels(dataset):
 80 |     image_paths_flat = []
 81 |     labels_flat = []
 82 |     for i in range(len(dataset)):
 83 |         image_paths_flat += dataset[i].image_paths
 84 |         labels_flat += [i] * len(dataset[i].image_paths)
 85 |     return image_paths_flat, labels_flat
 86 | 
 87 | def shuffle_examples(image_paths, labels):
 88 |     shuffle_list = list(zip(image_paths, labels))
 89 |     random.shuffle(shuffle_list)
 90 |     image_paths_shuff, labels_shuff = zip(*shuffle_list)
 91 |     return image_paths_shuff, labels_shuff
 92 | 
 93 | def random_rotate_image(image):
 94 |     angle = np.random.uniform(low=-10.0, high=10.0)
 95 |     return misc.imrotate(image, angle, 'bicubic')
 96 |   
 97 | # 1: Random rotate 2: Random crop  4: Random flip  8:  Fixed image standardization  16: Flip
 98 | RANDOM_ROTATE = 1
 99 | RANDOM_CROP = 2
100 | RANDOM_FLIP = 4
101 | FIXED_STANDARDIZATION = 8
102 | FLIP = 16
103 | def create_input_pipeline(input_queue, image_size, nrof_preprocess_threads, batch_size_placeholder):
104 |     images_and_labels_list = []
105 |     for _ in range(nrof_preprocess_threads):
106 |         filenames, label, control = input_queue.dequeue()
107 |         images = []
108 |         for filename in tf.unstack(filenames):
109 |             file_contents = tf.read_file(filename)
110 |             image = tf.image.decode_image(file_contents, 3)
111 |             image = tf.cond(get_control_flag(control[0], RANDOM_ROTATE),
112 |                             lambda:tf.py_func(random_rotate_image, [image], tf.uint8), 
113 |                             lambda:tf.identity(image))
114 |             image = tf.cond(get_control_flag(control[0], RANDOM_CROP), 
115 |                             lambda:tf.random_crop(image, image_size + (3,)), 
116 |                             lambda:tf.image.resize_image_with_crop_or_pad(image, image_size[0], image_size[1]))
117 |             image = tf.cond(get_control_flag(control[0], RANDOM_FLIP),
118 |                             lambda:tf.image.random_flip_left_right(image),
119 |                             lambda:tf.identity(image))
120 |             image = tf.cond(get_control_flag(control[0], FIXED_STANDARDIZATION),
121 |                             lambda:(tf.cast(image, tf.float32) - 127.5)/128.0,
122 |                             lambda:tf.image.per_image_standardization(image))
123 |             image = tf.cond(get_control_flag(control[0], FLIP),
124 |                             lambda:tf.image.flip_left_right(image),
125 |                             lambda:tf.identity(image))
126 |             #pylint: disable=no-member
127 |             image.set_shape(image_size + (3,))
128 |             images.append(image)
129 |         images_and_labels_list.append([images, label])
130 | 
131 |     image_batch, label_batch = tf.train.batch_join(
132 |         images_and_labels_list, batch_size=batch_size_placeholder, 
133 |         shapes=[image_size + (3,), ()], enqueue_many=True,
134 |         capacity=4 * nrof_preprocess_threads * 100,
135 |         allow_smaller_final_batch=True)
136 |     
137 |     return image_batch, label_batch
138 | 
139 | def get_control_flag(control, field):
140 |     return tf.equal(tf.mod(tf.floor_div(control, field), 2), 1)
141 |   
142 | def _add_loss_summaries(total_loss):
143 |     """Add summaries for losses.
144 |   
145 |     Generates moving average for all losses and associated summaries for
146 |     visualizing the performance of the network.
147 |   
148 |     Args:
149 |       total_loss: Total loss from loss().
150 |     Returns:
151 |       loss_averages_op: op for generating moving averages of losses.
152 |     """
153 |     # Compute the moving average of all individual losses and the total loss.
154 |     loss_averages = tf.train.ExponentialMovingAverage(0.9, name='avg')
155 |     losses = tf.get_collection('losses')
156 |     loss_averages_op = loss_averages.apply(losses + [total_loss])
157 |   
158 |     # Attach a scalar summmary to all individual losses and the total loss; do the
159 |     # same for the averaged version of the losses.
160 |     for l in losses + [total_loss]:
161 |         # Name each loss as '(raw)' and name the moving average version of the loss
162 |         # as the original loss name.
163 |         tf.summary.scalar(l.op.name +' (raw)', l)
164 |         tf.summary.scalar(l.op.name, loss_averages.average(l))
165 |   
166 |     return loss_averages_op
167 | 
168 | def train(total_loss, global_step, optimizer, learning_rate, moving_average_decay, update_gradient_vars, log_histograms=True):
169 |     # Generate moving averages of all losses and associated summaries.
170 |     loss_averages_op = _add_loss_summaries(total_loss)
171 | 
172 |     # Compute gradients.
173 |     with tf.control_dependencies([loss_averages_op]):
174 |         if optimizer=='ADAGRAD':
175 |             opt = tf.train.AdagradOptimizer(learning_rate)
176 |         elif optimizer=='ADADELTA':
177 |             opt = tf.train.AdadeltaOptimizer(learning_rate, rho=0.9, epsilon=1e-6)
178 |         elif optimizer=='ADAM':
179 |             opt = tf.train.AdamOptimizer(learning_rate, beta1=0.9, beta2=0.999, epsilon=0.1)
180 |         elif optimizer=='RMSPROP':
181 |             opt = tf.train.RMSPropOptimizer(learning_rate, decay=0.9, momentum=0.9, epsilon=1.0)
182 |         elif optimizer=='MOM':
183 |             opt = tf.train.MomentumOptimizer(learning_rate, 0.9, use_nesterov=True)
184 |         else:
185 |             raise ValueError('Invalid optimization algorithm')
186 |     
187 |         grads = opt.compute_gradients(total_loss, update_gradient_vars)
188 |         
189 |     # Apply gradients.
190 |     apply_gradient_op = opt.apply_gradients(grads, global_step=global_step)
191 |   
192 |     # Add histograms for trainable variables.
193 |     if log_histograms:
194 |         for var in tf.trainable_variables():
195 |             tf.summary.histogram(var.op.name, var)
196 |    
197 |     # Add histograms for gradients.
198 |     if log_histograms:
199 |         for grad, var in grads:
200 |             if grad is not None:
201 |                 tf.summary.histogram(var.op.name + '/gradients', grad)
202 |   
203 |     # Track the moving averages of all trainable variables.
204 |     variable_averages = tf.train.ExponentialMovingAverage(
205 |         moving_average_decay, global_step)
206 |     variables_averages_op = variable_averages.apply(tf.trainable_variables())
207 |   
208 |     with tf.control_dependencies([apply_gradient_op, variables_averages_op]):
209 |         train_op = tf.no_op(name='train')
210 |   
211 |     return train_op
212 | 
213 | def prewhiten(x):
214 |     mean = np.mean(x)
215 |     std = np.std(x)
216 |     std_adj = np.maximum(std, 1.0/np.sqrt(x.size))
217 |     y = np.multiply(np.subtract(x, mean), 1/std_adj)
218 |     return y  
219 | 
220 | def crop(image, random_crop, image_size):
221 |     if image.shape[1]>image_size:
222 |         sz1 = int(image.shape[1]//2)
223 |         sz2 = int(image_size//2)
224 |         if random_crop:
225 |             diff = sz1-sz2
226 |             (h, v) = (np.random.randint(-diff, diff+1), np.random.randint(-diff, diff+1))
227 |         else:
228 |             (h, v) = (0,0)
229 |         image = image[(sz1-sz2+v):(sz1+sz2+v),(sz1-sz2+h):(sz1+sz2+h),:]
230 |     return image
231 |   
232 | def flip(image, random_flip):
233 |     if random_flip and np.random.choice([True, False]):
234 |         image = np.fliplr(image)
235 |     return image
236 | 
237 | def to_rgb(img):
238 |     w, h = img.shape
239 |     ret = np.empty((w, h, 3), dtype=np.uint8)
240 |     ret[:, :, 0] = ret[:, :, 1] = ret[:, :, 2] = img
241 |     return ret
242 |   
243 | def load_data(image_paths, do_random_crop, do_random_flip, image_size, do_prewhiten=True):
244 |     nrof_samples = len(image_paths)
245 |     images = np.zeros((nrof_samples, image_size, image_size, 3))
246 |     for i in range(nrof_samples):
247 |         img = misc.imread(image_paths[i])
248 |         if img.ndim == 2:
249 |             img = to_rgb(img)
250 |         if do_prewhiten:
251 |             img = prewhiten(img)
252 |         img = crop(img, do_random_crop, image_size)
253 |         img = flip(img, do_random_flip)
254 |         images[i,:,:,:] = img
255 |     return images
256 | 
257 | def get_label_batch(label_data, batch_size, batch_index):
258 |     nrof_examples = np.size(label_data, 0)
259 |     j = batch_index*batch_size % nrof_examples
260 |     if j+batch_size<=nrof_examples:
261 |         batch = label_data[j:j+batch_size]
262 |     else:
263 |         x1 = label_data[j:nrof_examples]
264 |         x2 = label_data[0:nrof_examples-j]
265 |         batch = np.vstack([x1,x2])
266 |     batch_int = batch.astype(np.int64)
267 |     return batch_int
268 | 
269 | def get_batch(image_data, batch_size, batch_index):
270 |     nrof_examples = np.size(image_data, 0)
271 |     j = batch_index*batch_size % nrof_examples
272 |     if j+batch_size<=nrof_examples:
273 |         batch = image_data[j:j+batch_size,:,:,:]
274 |     else:
275 |         x1 = image_data[j:nrof_examples,:,:,:]
276 |         x2 = image_data[0:nrof_examples-j,:,:,:]
277 |         batch = np.vstack([x1,x2])
278 |     batch_float = batch.astype(np.float32)
279 |     return batch_float
280 | 
281 | def get_triplet_batch(triplets, batch_index, batch_size):
282 |     ax, px, nx = triplets
283 |     a = get_batch(ax, int(batch_size/3), batch_index)
284 |     p = get_batch(px, int(batch_size/3), batch_index)
285 |     n = get_batch(nx, int(batch_size/3), batch_index)
286 |     batch = np.vstack([a, p, n])
287 |     return batch
288 | 
289 | def get_learning_rate_from_file(filename, epoch):
290 |     with open(filename, 'r') as f:
291 |         for line in f.readlines():
292 |             line = line.split('#', 1)[0]
293 |             if line:
294 |                 par = line.strip().split(':')
295 |                 e = int(par[0])
296 |                 if par[1]=='-':
297 |                     lr = -1
298 |                 else:
299 |                     lr = float(par[1])
300 |                 if e <= epoch:
301 |                     learning_rate = lr
302 |                 else:
303 |                     return learning_rate
304 | 
305 | class ImageClass():
306 |     "Stores the paths to images for a given class"
307 |     def __init__(self, name, image_paths):
308 |         self.name = name
309 |         self.image_paths = image_paths
310 |   
311 |     def __str__(self):
312 |         return self.name + ', ' + str(len(self.image_paths)) + ' images'
313 |   
314 |     def __len__(self):
315 |         return len(self.image_paths)
316 |   
317 | def get_dataset(path, has_class_directories=True):
318 |     dataset = []
319 |     path_exp = os.path.expanduser(path)
320 |     classes = [path for path in os.listdir(path_exp) \
321 |                     if os.path.isdir(os.path.join(path_exp, path))]
322 |     classes.sort()
323 |     nrof_classes = len(classes)
324 |     for i in range(nrof_classes):
325 |         class_name = classes[i]
326 |         facedir = os.path.join(path_exp, class_name)
327 |         image_paths = get_image_paths(facedir)
328 |         dataset.append(ImageClass(class_name, image_paths))
329 |   
330 |     return dataset
331 | 
332 | def get_image_paths(facedir):
333 |     image_paths = []
334 |     if os.path.isdir(facedir):
335 |         images = os.listdir(facedir)
336 |         image_paths = [os.path.join(facedir,img) for img in images]
337 |     return image_paths
338 |   
339 | def split_dataset(dataset, split_ratio, min_nrof_images_per_class, mode):
340 |     if mode=='SPLIT_CLASSES':
341 |         nrof_classes = len(dataset)
342 |         class_indices = np.arange(nrof_classes)
343 |         np.random.shuffle(class_indices)
344 |         split = int(round(nrof_classes*(1-split_ratio)))
345 |         train_set = [dataset[i] for i in class_indices[0:split]]
346 |         test_set = [dataset[i] for i in class_indices[split:-1]]
347 |     elif mode=='SPLIT_IMAGES':
348 |         train_set = []
349 |         test_set = []
350 |         for cls in dataset:
351 |             paths = cls.image_paths
352 |             np.random.shuffle(paths)
353 |             nrof_images_in_class = len(paths)
354 |             split = int(math.floor(nrof_images_in_class*(1-split_ratio)))
355 |             if split==nrof_images_in_class:
356 |                 split = nrof_images_in_class-1
357 |             if split>=min_nrof_images_per_class and nrof_images_in_class-split>=1:
358 |                 train_set.append(ImageClass(cls.name, paths[:split]))
359 |                 test_set.append(ImageClass(cls.name, paths[split:]))
360 |     else:
361 |         raise ValueError('Invalid train/test split mode "%s"' % mode)
362 |     return train_set, test_set
363 | 
364 | 
365 | def load_model(model, input_map=None):
366 |     # Check if the model is a model directory (containing a metagraph and a checkpoint file)
367 |     #  or if it is a protobuf file with a frozen graph
368 |     model_exp = os.path.expanduser(model)
369 |     if (os.path.isfile(model_exp)):
370 |         print('Model filename: %s' % model_exp)
371 |         with gfile.FastGFile(model_exp,'rb') as f:
372 |             graph_def = tf.GraphDef()
373 |             graph_def.ParseFromString(f.read())
374 |             tf.import_graph_def(graph_def, input_map=input_map, name='')
375 |     else:
376 |         print('Model directory: %s' % model_exp)
377 |         meta_file, ckpt_file = get_model_filenames(model_exp)
378 |         
379 |         print('Metagraph file: %s' % meta_file)
380 |         print('Checkpoint file: %s' % ckpt_file)
381 |       
382 |         saver = tf.train.import_meta_graph(os.path.join(model_exp, meta_file), input_map=input_map)
383 |         saver.restore(tf.get_default_session(), os.path.join(model_exp, ckpt_file))
384 | 
385 | 
386 | def get_model_filenames(model_dir):
387 |     files = os.listdir(model_dir)
388 |     meta_files = [s for s in files if s.endswith('.meta')]
389 |     if len(meta_files)==0:
390 |         raise ValueError('No meta file found in the model directory (%s)' % model_dir)
391 |     elif len(meta_files)>1:
392 |         raise ValueError('There should not be more than one meta file in the model directory (%s)' % model_dir)
393 |     meta_file = meta_files[0]
394 |     ckpt = tf.train.get_checkpoint_state(model_dir)
395 |     if ckpt and ckpt.model_checkpoint_path:
396 |         ckpt_file = os.path.basename(ckpt.model_checkpoint_path)
397 |         return meta_file, ckpt_file
398 | 
399 |     meta_files = [s for s in files if '.ckpt' in s]
400 |     max_step = -1
401 |     for f in files:
402 |         step_str = re.match(r'(^model-[\w\- ]+.ckpt-(\d+))', f)
403 |         if step_str is not None and len(step_str.groups())>=2:
404 |             step = int(step_str.groups()[1])
405 |             if step > max_step:
406 |                 max_step = step
407 |                 ckpt_file = step_str.groups()[0]
408 |     return meta_file, ckpt_file
409 | 
410 | 
411 | def distance(embeddings1, embeddings2, distance_metric=0):
412 |     if distance_metric==0:
413 |         # Euclidian distance
414 |         diff = np.subtract(embeddings1, embeddings2)
415 |         dist = np.sum(np.square(diff),1)
416 |     elif distance_metric==1:
417 |         # Distance based on cosine similarity
418 |         dot = np.sum(np.multiply(embeddings1, embeddings2), axis=1)
419 |         norm = np.linalg.norm(embeddings1, axis=1) * np.linalg.norm(embeddings2, axis=1)
420 |         similarity = dot / norm
421 |         dist = np.arccos(similarity) / math.pi
422 |     else:
423 |         raise 'Undefined distance metric %d' % distance_metric 
424 |         
425 |     return dist
426 | 
427 | 
428 | def calculate_roc(thresholds, embeddings1, embeddings2, actual_issame, nrof_folds=10, distance_metric=0, subtract_mean=False):
429 |     assert(embeddings1.shape[0] == embeddings2.shape[0])
430 |     assert(embeddings1.shape[1] == embeddings2.shape[1])
431 |     nrof_pairs = min(len(actual_issame), embeddings1.shape[0])
432 |     nrof_thresholds = len(thresholds)
433 |     k_fold = KFold(n_splits=nrof_folds, shuffle=False)
434 |     
435 |     tprs = np.zeros((nrof_folds,nrof_thresholds))
436 |     fprs = np.zeros((nrof_folds,nrof_thresholds))
437 |     accuracy = np.zeros((nrof_folds))
438 |     
439 |     indices = np.arange(nrof_pairs)
440 |     
441 |     for fold_idx, (train_set, test_set) in enumerate(k_fold.split(indices)):
442 |         if subtract_mean:
443 |             mean = np.mean(np.concatenate([embeddings1[train_set], embeddings2[train_set]]), axis=0)
444 |         else:
445 |           mean = 0.0
446 |         dist = distance(embeddings1-mean, embeddings2-mean, distance_metric)
447 |         
448 |         # Find the best threshold for the fold
449 |         acc_train = np.zeros((nrof_thresholds))
450 |         for threshold_idx, threshold in enumerate(thresholds):
451 |             _, _, acc_train[threshold_idx] = calculate_accuracy(threshold, dist[train_set], actual_issame[train_set])
452 |         best_threshold_index = np.argmax(acc_train)
453 |         for threshold_idx, threshold in enumerate(thresholds):
454 |             tprs[fold_idx,threshold_idx], fprs[fold_idx,threshold_idx], _ = calculate_accuracy(threshold, dist[test_set], actual_issame[test_set])
455 |         _, _, accuracy[fold_idx] = calculate_accuracy(thresholds[best_threshold_index], dist[test_set], actual_issame[test_set])
456 |           
457 |         tpr = np.mean(tprs,0)
458 |         fpr = np.mean(fprs,0)
459 |     return tpr, fpr, accuracy
460 | 
461 | 
462 | def calculate_accuracy(threshold, dist, actual_issame):
463 |     predict_issame = np.less(dist, threshold)
464 |     tp = np.sum(np.logical_and(predict_issame, actual_issame))
465 |     fp = np.sum(np.logical_and(predict_issame, np.logical_not(actual_issame)))
466 |     tn = np.sum(np.logical_and(np.logical_not(predict_issame), np.logical_not(actual_issame)))
467 |     fn = np.sum(np.logical_and(np.logical_not(predict_issame), actual_issame))
468 |   
469 |     tpr = 0 if (tp+fn==0) else float(tp) / float(tp+fn)
470 |     fpr = 0 if (fp+tn==0) else float(fp) / float(fp+tn)
471 |     acc = float(tp+tn)/dist.size
472 |     return tpr, fpr, acc
473 | 
474 | 
475 | def calculate_val(thresholds, embeddings1, embeddings2, actual_issame, far_target, nrof_folds=10, distance_metric=0, subtract_mean=False):
476 |     assert(embeddings1.shape[0] == embeddings2.shape[0])
477 |     assert(embeddings1.shape[1] == embeddings2.shape[1])
478 |     nrof_pairs = min(len(actual_issame), embeddings1.shape[0])
479 |     nrof_thresholds = len(thresholds)
480 |     k_fold = KFold(n_splits=nrof_folds, shuffle=False)
481 |     
482 |     val = np.zeros(nrof_folds)
483 |     far = np.zeros(nrof_folds)
484 |     
485 |     indices = np.arange(nrof_pairs)
486 |     
487 |     for fold_idx, (train_set, test_set) in enumerate(k_fold.split(indices)):
488 |         if subtract_mean:
489 |             mean = np.mean(np.concatenate([embeddings1[train_set], embeddings2[train_set]]), axis=0)
490 |         else:
491 |           mean = 0.0
492 |         dist = distance(embeddings1-mean, embeddings2-mean, distance_metric)
493 |       
494 |         # Find the threshold that gives FAR = far_target
495 |         far_train = np.zeros(nrof_thresholds)
496 |         for threshold_idx, threshold in enumerate(thresholds):
497 |             _, far_train[threshold_idx] = calculate_val_far(threshold, dist[train_set], actual_issame[train_set])
498 |         if np.max(far_train)>=far_target:
499 |             f = interpolate.interp1d(far_train, thresholds, kind='slinear')
500 |             threshold = f(far_target)
501 |         else:
502 |             threshold = 0.0
503 |     
504 |         val[fold_idx], far[fold_idx] = calculate_val_far(threshold, dist[test_set], actual_issame[test_set])
505 |   
506 |     val_mean = np.mean(val)
507 |     far_mean = np.mean(far)
508 |     val_std = np.std(val)
509 |     return val_mean, val_std, far_mean
510 | 
511 | 
512 | def calculate_val_far(threshold, dist, actual_issame):
513 |     predict_issame = np.less(dist, threshold)
514 |     true_accept = np.sum(np.logical_and(predict_issame, actual_issame))
515 |     false_accept = np.sum(np.logical_and(predict_issame, np.logical_not(actual_issame)))
516 |     n_same = np.sum(actual_issame)
517 |     n_diff = np.sum(np.logical_not(actual_issame))
518 |     val = float(true_accept) / float(n_same)
519 |     far = float(false_accept) / float(n_diff)
520 |     return val, far
521 | 
522 | def store_revision_info(src_path, output_dir, arg_string):
523 |     try:
524 |         # Get git hash
525 |         cmd = ['git', 'rev-parse', 'HEAD']
526 |         gitproc = Popen(cmd, stdout = PIPE, cwd=src_path)
527 |         (stdout, _) = gitproc.communicate()
528 |         git_hash = stdout.strip()
529 |     except OSError as e:
530 |         git_hash = ' '.join(cmd) + ': ' +  e.strerror
531 |   
532 |     try:
533 |         # Get local changes
534 |         cmd = ['git', 'diff', 'HEAD']
535 |         gitproc = Popen(cmd, stdout = PIPE, cwd=src_path)
536 |         (stdout, _) = gitproc.communicate()
537 |         git_diff = stdout.strip()
538 |     except OSError as e:
539 |         git_diff = ' '.join(cmd) + ': ' +  e.strerror
540 |     
541 |     # Store a text file in the log directory
542 |     rev_info_filename = os.path.join(output_dir, 'revision_info.txt')
543 |     with open(rev_info_filename, "w") as text_file:
544 |         text_file.write('arguments: %s\n--------------------\n' % arg_string)
545 |         text_file.write('tensorflow version: %s\n--------------------\n' % tf.__version__)  # @UndefinedVariable
546 |         text_file.write('git hash: %s\n--------------------\n' % git_hash)
547 |         text_file.write('%s' % git_diff)
548 | 
549 | def list_variables(filename):
550 |     reader = training.NewCheckpointReader(filename)
551 |     variable_map = reader.get_variable_to_shape_map()
552 |     names = sorted(variable_map.keys())
553 |     return names
554 | 
555 | def put_images_on_grid(images, shape=(16,8)):
556 |     nrof_images = images.shape[0]
557 |     img_size = images.shape[1]
558 |     bw = 3
559 |     img = np.zeros((shape[1]*(img_size+bw)+bw, shape[0]*(img_size+bw)+bw, 3), np.float32)
560 |     for i in range(shape[1]):
561 |         x_start = i*(img_size+bw)+bw
562 |         for j in range(shape[0]):
563 |             img_index = i*shape[0]+j
564 |             if img_index>=nrof_images:
565 |                 break
566 |             y_start = j*(img_size+bw)+bw
567 |             img[x_start:x_start+img_size, y_start:y_start+img_size, :] = images[img_index, :, :, :]
568 |         if img_index>=nrof_images:
569 |             break
570 |     return img
571 | 
572 | def write_arguments_to_file(args, filename):
573 |     with open(filename, 'w') as f:
574 |         for key, value in iteritems(vars(args)):
575 |             f.write('%s: %s\n' % (key, str(value)))
576 | 


--------------------------------------------------------------------------------
/Forward Propagation.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {
  7 |     "ExecuteTime": {
  8 |      "end_time": "2018-11-06T22:24:48.482520Z",
  9 |      "start_time": "2018-11-06T22:24:48.213241Z"
 10 |     }
 11 |    },
 12 |    "outputs": [],
 13 |    "source": [
 14 |     "import os\n",
 15 |     "import numpy as np\n",
 16 |     "import progressbar\n",
 17 |     "from imageio import imread"
 18 |    ]
 19 |   },
 20 |   {
 21 |    "cell_type": "code",
 22 |    "execution_count": 2,
 23 |    "metadata": {
 24 |     "ExecuteTime": {
 25 |      "end_time": "2018-11-06T22:24:49.371199Z",
 26 |      "start_time": "2018-11-06T22:24:48.700419Z"
 27 |     }
 28 |    },
 29 |    "outputs": [],
 30 |    "source": [
 31 |     "initial_path = '/Users/pedroprates/Google Drive/FaceRecognition/datasets/lfw/lfw_mtcnnpy_160'\n",
 32 |     "os.listdir(initial_path)\n",
 33 |     "\n",
 34 |     "dirs = [os.path.join(initial_path, d) for d in os.listdir(initial_path) if os.path.isdir(os.path.join(initial_path, d))]"
 35 |    ]
 36 |   },
 37 |   {
 38 |    "cell_type": "code",
 39 |    "execution_count": 3,
 40 |    "metadata": {
 41 |     "ExecuteTime": {
 42 |      "end_time": "2018-11-06T22:24:57.928466Z",
 43 |      "start_time": "2018-11-06T22:24:57.317786Z"
 44 |     }
 45 |    },
 46 |    "outputs": [
 47 |     {
 48 |      "name": "stderr",
 49 |      "output_type": "stream",
 50 |      "text": [
 51 |       "100% (5749 of 5749) |####################| Elapsed Time: 0:00:00 Time:  0:00:00\n"
 52 |      ]
 53 |     }
 54 |    ],
 55 |    "source": [
 56 |     "import progressbar\n",
 57 |     "\n",
 58 |     "inputs = []\n",
 59 |     "for d in progressbar.progressbar(dirs):\n",
 60 |     "    for f in os.listdir(d):\n",
 61 |     "#         d = d.replace('/Users/pedroprates/Google Drive/', '/gdrive/My Drive/')\n",
 62 |     "        if f.endswith('png') or f.endswith('jpg') or f.endswith('jpeg'):\n",
 63 |     "            inputs.append(os.path.join(d, f))"
 64 |    ]
 65 |   },
 66 |   {
 67 |    "cell_type": "code",
 68 |    "execution_count": 16,
 69 |    "metadata": {
 70 |     "ExecuteTime": {
 71 |      "end_time": "2018-10-18T00:12:41.487259Z",
 72 |      "start_time": "2018-10-18T00:12:41.356799Z"
 73 |     }
 74 |    },
 75 |    "outputs": [
 76 |     {
 77 |      "name": "stderr",
 78 |      "output_type": "stream",
 79 |      "text": [
 80 |       "100% (13233 of 13233) |##################| Elapsed Time: 0:00:00 Time:  0:00:00\n"
 81 |      ]
 82 |     }
 83 |    ],
 84 |    "source": [
 85 |     "outputs = []\n",
 86 |     "\n",
 87 |     "for inp in progressbar.progressbar(inputs):\n",
 88 |     "    filename = inp.split('/')[-1]\n",
 89 |     "    path = inp.split('/')[:-1]\n",
 90 |     "  \n",
 91 |     "    filename = 'output_resnet/' + filename.split('.')[0] + '.npy'\n",
 92 |     "    path = '/'.join(path)\n",
 93 |     "#     path = path.replace('/Users/pedroprates/Google Drive/', '/gdrive/My Drive/')\n",
 94 |     "  \n",
 95 |     "    outputs.append(os.path.join(path, filename))"
 96 |    ]
 97 |   },
 98 |   {
 99 |    "cell_type": "code",
100 |    "execution_count": 17,
101 |    "metadata": {
102 |     "ExecuteTime": {
103 |      "end_time": "2018-10-18T00:12:42.733095Z",
104 |      "start_time": "2018-10-18T00:12:42.590978Z"
105 |     }
106 |    },
107 |    "outputs": [],
108 |    "source": [
109 |     "inputs_np = np.array(inputs)\n",
110 |     "outputs_np = np.array(outputs)\n",
111 |     "\n",
112 |     "np.save('/Users/pedroprates/Google Drive/FaceRecognition/datasets/lfw/input_resnet_mac.npy', inputs_np)\n",
113 |     "np.save('/Users/pedroprates/Google Drive/FaceRecognition/datasets/lfw/output_resnet_mac.npy', outputs_np)"
114 |    ]
115 |   },
116 |   {
117 |    "cell_type": "code",
118 |    "execution_count": 22,
119 |    "metadata": {
120 |     "ExecuteTime": {
121 |      "end_time": "2018-10-18T00:15:47.235962Z",
122 |      "start_time": "2018-10-18T00:15:47.160424Z"
123 |     }
124 |    },
125 |    "outputs": [],
126 |    "source": [
127 |     "from imageio import imread"
128 |    ]
129 |   },
130 |   {
131 |    "cell_type": "code",
132 |    "execution_count": 23,
133 |    "metadata": {
134 |     "ExecuteTime": {
135 |      "end_time": "2018-10-18T00:16:46.011880Z",
136 |      "start_time": "2018-10-18T00:15:54.521347Z"
137 |     }
138 |    },
139 |    "outputs": [
140 |     {
141 |      "name": "stderr",
142 |      "output_type": "stream",
143 |      "text": [
144 |       "| | #                                                | 52 Elapsed Time: 0:00:00"
145 |      ]
146 |     },
147 |     {
148 |      "name": "stdout",
149 |      "output_type": "stream",
150 |      "text": [
151 |       "[RUNNING] X\n"
152 |      ]
153 |     },
154 |     {
155 |      "name": "stderr",
156 |      "output_type": "stream",
157 |      "text": [
158 |       "| |          #                                    | 13232 Elapsed Time: 0:00:38\n",
159 |       "| | #                                               | 123 Elapsed Time: 0:00:00"
160 |      ]
161 |     },
162 |     {
163 |      "name": "stdout",
164 |      "output_type": "stream",
165 |      "text": [
166 |       "[RUNNING] y Train\n"
167 |      ]
168 |     },
169 |     {
170 |      "name": "stderr",
171 |      "output_type": "stream",
172 |      "text": [
173 |       "| |                             #                 | 13232 Elapsed Time: 0:00:12\n"
174 |      ]
175 |     }
176 |    ],
177 |    "source": [
178 |     "X = np.zeros((inputs_np.shape[0], 160, 160, 3))\n",
179 |     "y = np.zeros((outputs_np.shape[0], 512))\n",
180 |     "\n",
181 |     "print(\"[RUNNING] X\")\n",
182 |     "for ix, element in progressbar.progressbar(enumerate(inputs)):\n",
183 |     "    im = imread(element)\n",
184 |     "    X[ix, :, :, :] = im\n",
185 |     "\n",
186 |     "print(\"[RUNNING] y Train\")\n",
187 |     "for ix, element in progressbar.progressbar(enumerate(outputs)):\n",
188 |     "    em = np.load(element)\n",
189 |     "    y[ix, :] = em"
190 |    ]
191 |   },
192 |   {
193 |    "cell_type": "code",
194 |    "execution_count": 27,
195 |    "metadata": {
196 |     "ExecuteTime": {
197 |      "end_time": "2018-10-18T00:25:53.277355Z",
198 |      "start_time": "2018-10-18T00:24:00.565125Z"
199 |     }
200 |    },
201 |    "outputs": [],
202 |    "source": [
203 |     "np.save('/Users/pedroprates/Google Drive/FaceRecognition/datasets/lfw/X.npy', X)\n",
204 |     "np.save('/Users/pedroprates/Google Drive/FaceRecognition/datasets/lfw/y.npy', y)"
205 |    ]
206 |   },
207 |   {
208 |    "cell_type": "markdown",
209 |    "metadata": {},
210 |    "source": [
211 |     "# Teste"
212 |    ]
213 |   },
214 |   {
215 |    "cell_type": "code",
216 |    "execution_count": 5,
217 |    "metadata": {
218 |     "ExecuteTime": {
219 |      "end_time": "2018-11-06T22:25:07.405347Z",
220 |      "start_time": "2018-11-06T22:25:07.399740Z"
221 |     }
222 |    },
223 |    "outputs": [],
224 |    "source": [
225 |     "import keras\n",
226 |     "import os\n",
227 |     "import tensorflow as tf\n",
228 |     "import progressbar\n",
229 |     "import numpy as np\n",
230 |     "from imageio import imread"
231 |    ]
232 |   },
233 |   {
234 |    "cell_type": "code",
235 |    "execution_count": 6,
236 |    "metadata": {
237 |     "ExecuteTime": {
238 |      "end_time": "2018-11-06T22:25:21.826440Z",
239 |      "start_time": "2018-11-06T22:25:21.811981Z"
240 |     }
241 |    },
242 |    "outputs": [],
243 |    "source": [
244 |     "import keras.backend as K\n",
245 |     "# Custom loss function\n",
246 |     "def distillation_loss(y_true, y_pred):\n",
247 |     "    return K.square(y_pred - y_true)\n",
248 |     "\n",
249 |     "def max_diff(y_true, y_pred):\n",
250 |     "    return K.max(K.square(y_pred - y_true), axis=-1)\n",
251 |     "\n",
252 |     "def sum_diff(y_true, y_pred):\n",
253 |     "    return K.sum(K.square(y_pred - y_true), axis=-1)\n",
254 |     "\n",
255 |     "keras.losses.distillation_loss = distillation_loss\n",
256 |     "keras.metrics.max_diff = max_diff\n",
257 |     "keras.metrics.sum_diff = sum_diff"
258 |    ]
259 |   },
260 |   {
261 |    "cell_type": "code",
262 |    "execution_count": 7,
263 |    "metadata": {
264 |     "ExecuteTime": {
265 |      "end_time": "2018-11-06T22:25:44.158801Z",
266 |      "start_time": "2018-11-06T22:25:28.787974Z"
267 |     }
268 |    },
269 |    "outputs": [],
270 |    "source": [
271 |     "model = keras.models.load_model('/Users/pedroprates/Google Drive/FaceRecognition/models/mobile-net/mobilenetv1_v2.h5')"
272 |    ]
273 |   },
274 |   {
275 |    "cell_type": "code",
276 |    "execution_count": 8,
277 |    "metadata": {
278 |     "ExecuteTime": {
279 |      "end_time": "2018-11-06T22:50:14.555540Z",
280 |      "start_time": "2018-11-06T22:25:46.211450Z"
281 |     }
282 |    },
283 |    "outputs": [
284 |     {
285 |      "name": "stderr",
286 |      "output_type": "stream",
287 |      "text": [
288 |       "100% (5755 of 5755) |####################| Elapsed Time: 0:24:28 Time:  0:24:28\n"
289 |      ]
290 |     }
291 |    ],
292 |    "source": [
293 |     "base_path = '/Users/pedroprates/Google Drive/FaceRecognition/datasets/lfw/lfw_mtcnnpy_160/'\n",
294 |     "list_folders = os.listdir(base_path)\n",
295 |     "list_folders = [os.path.join(base_path, x) for x in list_folders]\n",
296 |     "\n",
297 |     "for folder in progressbar.progressbar(list_folders):\n",
298 |     "    if not os.path.isdir(folder):\n",
299 |     "        continue\n",
300 |     "    \n",
301 |     "    list_images = os.listdir(folder)\n",
302 |     "    list_images = [os.path.join(folder, image) for image in list_images]\n",
303 |     "    list_images = list(filter(lambda x: os.path.isfile(x), list_images))\n",
304 |     "    list_images = list(filter(lambda x: '.DS_Store' not in x, list_images))\n",
305 |     "    filenames = [x.split('/')[-1].split('.')[0] for x in list_images]\n",
306 |     "    output_filenames = [x + '.npy' for x in filenames]\n",
307 |     "    output_folder = os.path.join(folder, 'mobilenetv1_v2')\n",
308 |     "\n",
309 |     "    # Get the embeddings\n",
310 |     "    images = np.array([imread(f) / 255 for f in list_images])\n",
311 |     "    embeddings = model.predict(images)\n",
312 |     "  \n",
313 |     "    if not os.path.exists(os.path.join(base_path, output_folder)):\n",
314 |     "        os.makedirs(os.path.join(base_path, output_folder))\n",
315 |     "    for idx, embedding in enumerate(embeddings):\n",
316 |     "        emb_to_save = embedding.reshape(1, *embedding.shape)\n",
317 |     "        np.save(os.path.join(output_folder, output_filenames[idx]), emb_to_save)"
318 |    ]
319 |   },
320 |   {
321 |    "cell_type": "code",
322 |    "execution_count": 9,
323 |    "metadata": {
324 |     "ExecuteTime": {
325 |      "end_time": "2018-11-06T23:38:26.770284Z",
326 |      "start_time": "2018-11-06T23:38:26.568794Z"
327 |     }
328 |    },
329 |    "outputs": [
330 |     {
331 |      "name": "stdout",
332 |      "output_type": "stream",
333 |      "text": [
334 |       "_________________________________________________________________\n",
335 |       "Layer (type)                 Output Shape              Param #   \n",
336 |       "=================================================================\n",
337 |       "input_2 (InputLayer)         (None, 160, 160, 3)       0         \n",
338 |       "_________________________________________________________________\n",
339 |       "conv1_pad (ZeroPadding2D)    (None, 161, 161, 3)       0         \n",
340 |       "_________________________________________________________________\n",
341 |       "conv1 (Conv2D)               (None, 80, 80, 32)        864       \n",
342 |       "_________________________________________________________________\n",
343 |       "conv1_bn (BatchNormalization (None, 80, 80, 32)        128       \n",
344 |       "_________________________________________________________________\n",
345 |       "conv1_relu (ReLU)            (None, 80, 80, 32)        0         \n",
346 |       "_________________________________________________________________\n",
347 |       "conv_dw_1 (DepthwiseConv2D)  (None, 80, 80, 32)        288       \n",
348 |       "_________________________________________________________________\n",
349 |       "conv_dw_1_bn (BatchNormaliza (None, 80, 80, 32)        128       \n",
350 |       "_________________________________________________________________\n",
351 |       "conv_dw_1_relu (ReLU)        (None, 80, 80, 32)        0         \n",
352 |       "_________________________________________________________________\n",
353 |       "conv_pw_1 (Conv2D)           (None, 80, 80, 64)        2048      \n",
354 |       "_________________________________________________________________\n",
355 |       "conv_pw_1_bn (BatchNormaliza (None, 80, 80, 64)        256       \n",
356 |       "_________________________________________________________________\n",
357 |       "conv_pw_1_relu (ReLU)        (None, 80, 80, 64)        0         \n",
358 |       "_________________________________________________________________\n",
359 |       "conv_pad_2 (ZeroPadding2D)   (None, 81, 81, 64)        0         \n",
360 |       "_________________________________________________________________\n",
361 |       "conv_dw_2 (DepthwiseConv2D)  (None, 40, 40, 64)        576       \n",
362 |       "_________________________________________________________________\n",
363 |       "conv_dw_2_bn (BatchNormaliza (None, 40, 40, 64)        256       \n",
364 |       "_________________________________________________________________\n",
365 |       "conv_dw_2_relu (ReLU)        (None, 40, 40, 64)        0         \n",
366 |       "_________________________________________________________________\n",
367 |       "conv_pw_2 (Conv2D)           (None, 40, 40, 128)       8192      \n",
368 |       "_________________________________________________________________\n",
369 |       "conv_pw_2_bn (BatchNormaliza (None, 40, 40, 128)       512       \n",
370 |       "_________________________________________________________________\n",
371 |       "conv_pw_2_relu (ReLU)        (None, 40, 40, 128)       0         \n",
372 |       "_________________________________________________________________\n",
373 |       "conv_dw_3 (DepthwiseConv2D)  (None, 40, 40, 128)       1152      \n",
374 |       "_________________________________________________________________\n",
375 |       "conv_dw_3_bn (BatchNormaliza (None, 40, 40, 128)       512       \n",
376 |       "_________________________________________________________________\n",
377 |       "conv_dw_3_relu (ReLU)        (None, 40, 40, 128)       0         \n",
378 |       "_________________________________________________________________\n",
379 |       "conv_pw_3 (Conv2D)           (None, 40, 40, 128)       16384     \n",
380 |       "_________________________________________________________________\n",
381 |       "conv_pw_3_bn (BatchNormaliza (None, 40, 40, 128)       512       \n",
382 |       "_________________________________________________________________\n",
383 |       "conv_pw_3_relu (ReLU)        (None, 40, 40, 128)       0         \n",
384 |       "_________________________________________________________________\n",
385 |       "conv_pad_4 (ZeroPadding2D)   (None, 41, 41, 128)       0         \n",
386 |       "_________________________________________________________________\n",
387 |       "conv_dw_4 (DepthwiseConv2D)  (None, 20, 20, 128)       1152      \n",
388 |       "_________________________________________________________________\n",
389 |       "conv_dw_4_bn (BatchNormaliza (None, 20, 20, 128)       512       \n",
390 |       "_________________________________________________________________\n",
391 |       "conv_dw_4_relu (ReLU)        (None, 20, 20, 128)       0         \n",
392 |       "_________________________________________________________________\n",
393 |       "conv_pw_4 (Conv2D)           (None, 20, 20, 256)       32768     \n",
394 |       "_________________________________________________________________\n",
395 |       "conv_pw_4_bn (BatchNormaliza (None, 20, 20, 256)       1024      \n",
396 |       "_________________________________________________________________\n",
397 |       "conv_pw_4_relu (ReLU)        (None, 20, 20, 256)       0         \n",
398 |       "_________________________________________________________________\n",
399 |       "conv_dw_5 (DepthwiseConv2D)  (None, 20, 20, 256)       2304      \n",
400 |       "_________________________________________________________________\n",
401 |       "conv_dw_5_bn (BatchNormaliza (None, 20, 20, 256)       1024      \n",
402 |       "_________________________________________________________________\n",
403 |       "conv_dw_5_relu (ReLU)        (None, 20, 20, 256)       0         \n",
404 |       "_________________________________________________________________\n",
405 |       "conv_pw_5 (Conv2D)           (None, 20, 20, 256)       65536     \n",
406 |       "_________________________________________________________________\n",
407 |       "conv_pw_5_bn (BatchNormaliza (None, 20, 20, 256)       1024      \n",
408 |       "_________________________________________________________________\n",
409 |       "conv_pw_5_relu (ReLU)        (None, 20, 20, 256)       0         \n",
410 |       "_________________________________________________________________\n",
411 |       "conv_pad_6 (ZeroPadding2D)   (None, 21, 21, 256)       0         \n",
412 |       "_________________________________________________________________\n",
413 |       "conv_dw_6 (DepthwiseConv2D)  (None, 10, 10, 256)       2304      \n",
414 |       "_________________________________________________________________\n",
415 |       "conv_dw_6_bn (BatchNormaliza (None, 10, 10, 256)       1024      \n",
416 |       "_________________________________________________________________\n",
417 |       "conv_dw_6_relu (ReLU)        (None, 10, 10, 256)       0         \n",
418 |       "_________________________________________________________________\n",
419 |       "conv_pw_6 (Conv2D)           (None, 10, 10, 512)       131072    \n",
420 |       "_________________________________________________________________\n",
421 |       "conv_pw_6_bn (BatchNormaliza (None, 10, 10, 512)       2048      \n",
422 |       "_________________________________________________________________\n",
423 |       "conv_pw_6_relu (ReLU)        (None, 10, 10, 512)       0         \n",
424 |       "_________________________________________________________________\n",
425 |       "conv_dw_7 (DepthwiseConv2D)  (None, 10, 10, 512)       4608      \n",
426 |       "_________________________________________________________________\n",
427 |       "conv_dw_7_bn (BatchNormaliza (None, 10, 10, 512)       2048      \n",
428 |       "_________________________________________________________________\n",
429 |       "conv_dw_7_relu (ReLU)        (None, 10, 10, 512)       0         \n",
430 |       "_________________________________________________________________\n",
431 |       "conv_pw_7 (Conv2D)           (None, 10, 10, 512)       262144    \n",
432 |       "_________________________________________________________________\n",
433 |       "conv_pw_7_bn (BatchNormaliza (None, 10, 10, 512)       2048      \n",
434 |       "_________________________________________________________________\n",
435 |       "conv_pw_7_relu (ReLU)        (None, 10, 10, 512)       0         \n",
436 |       "_________________________________________________________________\n",
437 |       "conv_dw_8 (DepthwiseConv2D)  (None, 10, 10, 512)       4608      \n",
438 |       "_________________________________________________________________\n",
439 |       "conv_dw_8_bn (BatchNormaliza (None, 10, 10, 512)       2048      \n",
440 |       "_________________________________________________________________\n",
441 |       "conv_dw_8_relu (ReLU)        (None, 10, 10, 512)       0         \n",
442 |       "_________________________________________________________________\n",
443 |       "conv_pw_8 (Conv2D)           (None, 10, 10, 512)       262144    \n",
444 |       "_________________________________________________________________\n",
445 |       "conv_pw_8_bn (BatchNormaliza (None, 10, 10, 512)       2048      \n",
446 |       "_________________________________________________________________\n",
447 |       "conv_pw_8_relu (ReLU)        (None, 10, 10, 512)       0         \n",
448 |       "_________________________________________________________________\n",
449 |       "conv_dw_9 (DepthwiseConv2D)  (None, 10, 10, 512)       4608      \n",
450 |       "_________________________________________________________________\n",
451 |       "conv_dw_9_bn (BatchNormaliza (None, 10, 10, 512)       2048      \n",
452 |       "_________________________________________________________________\n",
453 |       "conv_dw_9_relu (ReLU)        (None, 10, 10, 512)       0         \n",
454 |       "_________________________________________________________________\n",
455 |       "conv_pw_9 (Conv2D)           (None, 10, 10, 512)       262144    \n",
456 |       "_________________________________________________________________\n",
457 |       "conv_pw_9_bn (BatchNormaliza (None, 10, 10, 512)       2048      \n",
458 |       "_________________________________________________________________\n",
459 |       "conv_pw_9_relu (ReLU)        (None, 10, 10, 512)       0         \n",
460 |       "_________________________________________________________________\n",
461 |       "conv_dw_10 (DepthwiseConv2D) (None, 10, 10, 512)       4608      \n",
462 |       "_________________________________________________________________\n",
463 |       "conv_dw_10_bn (BatchNormaliz (None, 10, 10, 512)       2048      \n",
464 |       "_________________________________________________________________\n",
465 |       "conv_dw_10_relu (ReLU)       (None, 10, 10, 512)       0         \n",
466 |       "_________________________________________________________________\n",
467 |       "conv_pw_10 (Conv2D)          (None, 10, 10, 512)       262144    \n",
468 |       "_________________________________________________________________\n",
469 |       "conv_pw_10_bn (BatchNormaliz (None, 10, 10, 512)       2048      \n",
470 |       "_________________________________________________________________\n",
471 |       "conv_pw_10_relu (ReLU)       (None, 10, 10, 512)       0         \n",
472 |       "_________________________________________________________________\n",
473 |       "conv_dw_11 (DepthwiseConv2D) (None, 10, 10, 512)       4608      \n",
474 |       "_________________________________________________________________\n",
475 |       "conv_dw_11_bn (BatchNormaliz (None, 10, 10, 512)       2048      \n",
476 |       "_________________________________________________________________\n",
477 |       "conv_dw_11_relu (ReLU)       (None, 10, 10, 512)       0         \n",
478 |       "_________________________________________________________________\n",
479 |       "conv_pw_11 (Conv2D)          (None, 10, 10, 512)       262144    \n",
480 |       "_________________________________________________________________\n",
481 |       "conv_pw_11_bn (BatchNormaliz (None, 10, 10, 512)       2048      \n",
482 |       "_________________________________________________________________\n",
483 |       "conv_pw_11_relu (ReLU)       (None, 10, 10, 512)       0         \n",
484 |       "_________________________________________________________________\n",
485 |       "conv_pad_12 (ZeroPadding2D)  (None, 11, 11, 512)       0         \n",
486 |       "_________________________________________________________________\n",
487 |       "conv_dw_12 (DepthwiseConv2D) (None, 5, 5, 512)         4608      \n",
488 |       "_________________________________________________________________\n",
489 |       "conv_dw_12_bn (BatchNormaliz (None, 5, 5, 512)         2048      \n",
490 |       "_________________________________________________________________\n",
491 |       "conv_dw_12_relu (ReLU)       (None, 5, 5, 512)         0         \n",
492 |       "_________________________________________________________________\n",
493 |       "conv_pw_12 (Conv2D)          (None, 5, 5, 1024)        524288    \n",
494 |       "_________________________________________________________________\n",
495 |       "conv_pw_12_bn (BatchNormaliz (None, 5, 5, 1024)        4096      \n",
496 |       "_________________________________________________________________\n",
497 |       "conv_pw_12_relu (ReLU)       (None, 5, 5, 1024)        0         \n",
498 |       "_________________________________________________________________\n",
499 |       "conv_dw_13 (DepthwiseConv2D) (None, 5, 5, 1024)        9216      \n",
500 |       "_________________________________________________________________\n",
501 |       "conv_dw_13_bn (BatchNormaliz (None, 5, 5, 1024)        4096      \n",
502 |       "_________________________________________________________________\n",
503 |       "conv_dw_13_relu (ReLU)       (None, 5, 5, 1024)        0         \n",
504 |       "_________________________________________________________________\n",
505 |       "conv_pw_13 (Conv2D)          (None, 5, 5, 1024)        1048576   \n",
506 |       "_________________________________________________________________\n",
507 |       "conv_pw_13_bn (BatchNormaliz (None, 5, 5, 1024)        4096      \n",
508 |       "_________________________________________________________________\n",
509 |       "conv_pw_13_relu (ReLU)       (None, 5, 5, 1024)        0         \n",
510 |       "_________________________________________________________________\n",
511 |       "Conv_Last (Conv2D)           (None, 5, 5, 512)         13107200  \n",
512 |       "_________________________________________________________________\n",
513 |       "batch_normalization_1 (Batch (None, 5, 5, 512)         2048      \n",
514 |       "_________________________________________________________________\n",
515 |       "re_lu_1 (ReLU)               (None, 5, 5, 512)         0         \n",
516 |       "_________________________________________________________________\n",
517 |       "global_average_pooling2d_1 ( (None, 512)               0         \n",
518 |       "=================================================================\n",
519 |       "Total params: 16,338,112\n",
520 |       "Trainable params: 16,315,200\n",
521 |       "Non-trainable params: 22,912\n",
522 |       "_________________________________________________________________\n"
523 |      ]
524 |     }
525 |    ],
526 |    "source": [
527 |     "model.summary()"
528 |    ]
529 |   },
530 |   {
531 |    "cell_type": "code",
532 |    "execution_count": 10,
533 |    "metadata": {
534 |     "ExecuteTime": {
535 |      "end_time": "2018-11-06T23:39:53.330378Z",
536 |      "start_time": "2018-11-06T23:39:20.210293Z"
537 |     }
538 |    },
539 |    "outputs": [],
540 |    "source": [
541 |     "model16 = keras.models.load_model('/Users/pedroprates/Google Drive/FaceRecognition/models/mobile-net/mobilenetv1_v16.h5')"
542 |    ]
543 |   },
544 |   {
545 |    "cell_type": "code",
546 |    "execution_count": 13,
547 |    "metadata": {
548 |     "ExecuteTime": {
549 |      "end_time": "2018-11-06T23:43:02.315489Z",
550 |      "start_time": "2018-11-06T23:43:02.299993Z"
551 |     }
552 |    },
553 |    "outputs": [],
554 |    "source": [
555 |     "import json\n",
556 |     "\n",
557 |     "model_json = model.to_json()\n",
558 |     "\n",
559 |     "with open('models/mobile-net/json-test/modelv2.json', 'w') as f:\n",
560 |     "    json.dump(model_json, f)"
561 |    ]
562 |   },
563 |   {
564 |    "cell_type": "code",
565 |    "execution_count": 14,
566 |    "metadata": {
567 |     "ExecuteTime": {
568 |      "end_time": "2018-11-06T23:43:29.787850Z",
569 |      "start_time": "2018-11-06T23:43:22.568565Z"
570 |     }
571 |    },
572 |    "outputs": [],
573 |    "source": [
574 |     "model.save_weights('models/mobile-net/json-test/modelv2_weights.h5')"
575 |    ]
576 |   }
577 |  ],
578 |  "metadata": {
579 |   "kernelspec": {
580 |    "display_name": "Python 3",
581 |    "language": "python",
582 |    "name": "python3"
583 |   },
584 |   "language_info": {
585 |    "codemirror_mode": {
586 |     "name": "ipython",
587 |     "version": 3
588 |    },
589 |    "file_extension": ".py",
590 |    "mimetype": "text/x-python",
591 |    "name": "python",
592 |    "nbconvert_exporter": "python",
593 |    "pygments_lexer": "ipython3",
594 |    "version": "3.6.4"
595 |   }
596 |  },
597 |  "nbformat": 4,
598 |  "nbformat_minor": 2
599 | }
600 | 


--------------------------------------------------------------------------------
/src/align/detect_face.py:
--------------------------------------------------------------------------------
  1 | """ Tensorflow implementation of the face detection / alignment algorithm found at
  2 | https://github.com/kpzhang93/MTCNN_face_detection_alignment
  3 | """
  4 | # MIT License
  5 | # 
  6 | # Copyright (c) 2016 David Sandberg
  7 | # 
  8 | # Permission is hereby granted, free of charge, to any person obtaining a copy
  9 | # of this software and associated documentation files (the "Software"), to deal
 10 | # in the Software without restriction, including without limitation the rights
 11 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 12 | # copies of the Software, and to permit persons to whom the Software is
 13 | # furnished to do so, subject to the following conditions:
 14 | # 
 15 | # The above copyright notice and this permission notice shall be included in all
 16 | # copies or substantial portions of the Software.
 17 | # 
 18 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 19 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 20 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 21 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 22 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 23 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 24 | # SOFTWARE.
 25 | 
 26 | from __future__ import absolute_import
 27 | from __future__ import division
 28 | from __future__ import print_function
 29 | from six import string_types, iteritems
 30 | 
 31 | import numpy as np
 32 | import tensorflow as tf
 33 | #from math import floor
 34 | import cv2
 35 | import os
 36 | 
 37 | def layer(op):
 38 |     """Decorator for composable network layers."""
 39 | 
 40 |     def layer_decorated(self, *args, **kwargs):
 41 |         # Automatically set a name if not provided.
 42 |         name = kwargs.setdefault('name', self.get_unique_name(op.__name__))
 43 |         # Figure out the layer inputs.
 44 |         if len(self.terminals) == 0:
 45 |             raise RuntimeError('No input variables found for layer %s.' % name)
 46 |         elif len(self.terminals) == 1:
 47 |             layer_input = self.terminals[0]
 48 |         else:
 49 |             layer_input = list(self.terminals)
 50 |         # Perform the operation and get the output.
 51 |         layer_output = op(self, layer_input, *args, **kwargs)
 52 |         # Add to layer LUT.
 53 |         self.layers[name] = layer_output
 54 |         # This output is now the input for the next layer.
 55 |         self.feed(layer_output)
 56 |         # Return self for chained calls.
 57 |         return self
 58 | 
 59 |     return layer_decorated
 60 | 
 61 | class Network(object):
 62 | 
 63 |     def __init__(self, inputs, trainable=True):
 64 |         # The input nodes for this network
 65 |         self.inputs = inputs
 66 |         # The current list of terminal nodes
 67 |         self.terminals = []
 68 |         # Mapping from layer names to layers
 69 |         self.layers = dict(inputs)
 70 |         # If true, the resulting variables are set as trainable
 71 |         self.trainable = trainable
 72 | 
 73 |         self.setup()
 74 | 
 75 |     def setup(self):
 76 |         """Construct the network. """
 77 |         raise NotImplementedError('Must be implemented by the subclass.')
 78 | 
 79 |     def load(self, data_path, session, ignore_missing=False):
 80 |         """Load network weights.
 81 |         data_path: The path to the numpy-serialized network weights
 82 |         session: The current TensorFlow session
 83 |         ignore_missing: If true, serialized weights for missing layers are ignored.
 84 |         """
 85 |         data_dict = np.load(data_path, encoding='latin1').item() #pylint: disable=no-member
 86 | 
 87 |         for op_name in data_dict:
 88 |             with tf.variable_scope(op_name, reuse=True):
 89 |                 for param_name, data in iteritems(data_dict[op_name]):
 90 |                     try:
 91 |                         var = tf.get_variable(param_name)
 92 |                         session.run(var.assign(data))
 93 |                     except ValueError:
 94 |                         if not ignore_missing:
 95 |                             raise
 96 | 
 97 |     def feed(self, *args):
 98 |         """Set the input(s) for the next operation by replacing the terminal nodes.
 99 |         The arguments can be either layer names or the actual layers.
100 |         """
101 |         assert len(args) != 0
102 |         self.terminals = []
103 |         for fed_layer in args:
104 |             if isinstance(fed_layer, string_types):
105 |                 try:
106 |                     fed_layer = self.layers[fed_layer]
107 |                 except KeyError:
108 |                     raise KeyError('Unknown layer name fed: %s' % fed_layer)
109 |             self.terminals.append(fed_layer)
110 |         return self
111 | 
112 |     def get_output(self):
113 |         """Returns the current network output."""
114 |         return self.terminals[-1]
115 | 
116 |     def get_unique_name(self, prefix):
117 |         """Returns an index-suffixed unique name for the given prefix.
118 |         This is used for auto-generating layer names based on the type-prefix.
119 |         """
120 |         ident = sum(t.startswith(prefix) for t, _ in self.layers.items()) + 1
121 |         return '%s_%d' % (prefix, ident)
122 | 
123 |     def make_var(self, name, shape):
124 |         """Creates a new TensorFlow variable."""
125 |         return tf.get_variable(name, shape, trainable=self.trainable)
126 | 
127 |     def validate_padding(self, padding):
128 |         """Verifies that the padding is one of the supported ones."""
129 |         assert padding in ('SAME', 'VALID')
130 | 
131 |     @layer
132 |     def conv(self,
133 |              inp,
134 |              k_h,
135 |              k_w,
136 |              c_o,
137 |              s_h,
138 |              s_w,
139 |              name,
140 |              relu=True,
141 |              padding='SAME',
142 |              group=1,
143 |              biased=True):
144 |         # Verify that the padding is acceptable
145 |         self.validate_padding(padding)
146 |         # Get the number of channels in the input
147 |         c_i = int(inp.get_shape()[-1])
148 |         # Verify that the grouping parameter is valid
149 |         assert c_i % group == 0
150 |         assert c_o % group == 0
151 |         # Convolution for a given input and kernel
152 |         convolve = lambda i, k: tf.nn.conv2d(i, k, [1, s_h, s_w, 1], padding=padding)
153 |         with tf.variable_scope(name) as scope:
154 |             kernel = self.make_var('weights', shape=[k_h, k_w, c_i // group, c_o])
155 |             # This is the common-case. Convolve the input without any further complications.
156 |             output = convolve(inp, kernel)
157 |             # Add the biases
158 |             if biased:
159 |                 biases = self.make_var('biases', [c_o])
160 |                 output = tf.nn.bias_add(output, biases)
161 |             if relu:
162 |                 # ReLU non-linearity
163 |                 output = tf.nn.relu(output, name=scope.name)
164 |             return output
165 | 
166 |     @layer
167 |     def prelu(self, inp, name):
168 |         with tf.variable_scope(name):
169 |             i = int(inp.get_shape()[-1])
170 |             alpha = self.make_var('alpha', shape=(i,))
171 |             output = tf.nn.relu(inp) + tf.multiply(alpha, -tf.nn.relu(-inp))
172 |         return output
173 | 
174 |     @layer
175 |     def max_pool(self, inp, k_h, k_w, s_h, s_w, name, padding='SAME'):
176 |         self.validate_padding(padding)
177 |         return tf.nn.max_pool(inp,
178 |                               ksize=[1, k_h, k_w, 1],
179 |                               strides=[1, s_h, s_w, 1],
180 |                               padding=padding,
181 |                               name=name)
182 | 
183 |     @layer
184 |     def fc(self, inp, num_out, name, relu=True):
185 |         with tf.variable_scope(name):
186 |             input_shape = inp.get_shape()
187 |             if input_shape.ndims == 4:
188 |                 # The input is spatial. Vectorize it first.
189 |                 dim = 1
190 |                 for d in input_shape[1:].as_list():
191 |                     dim *= int(d)
192 |                 feed_in = tf.reshape(inp, [-1, dim])
193 |             else:
194 |                 feed_in, dim = (inp, input_shape[-1].value)
195 |             weights = self.make_var('weights', shape=[dim, num_out])
196 |             biases = self.make_var('biases', [num_out])
197 |             op = tf.nn.relu_layer if relu else tf.nn.xw_plus_b
198 |             fc = op(feed_in, weights, biases, name=name)
199 |             return fc
200 | 
201 | 
202 |     """
203 |     Multi dimensional softmax,
204 |     refer to https://github.com/tensorflow/tensorflow/issues/210
205 |     compute softmax along the dimension of target
206 |     the native softmax only supports batch_size x dimension
207 |     """
208 |     @layer
209 |     def softmax(self, target, axis, name=None):
210 |         max_axis = tf.reduce_max(target, axis, keepdims=True)
211 |         target_exp = tf.exp(target-max_axis)
212 |         normalize = tf.reduce_sum(target_exp, axis, keepdims=True)
213 |         softmax = tf.div(target_exp, normalize, name)
214 |         return softmax
215 |     
216 | class PNet(Network):
217 |     def setup(self):
218 |         (self.feed('data') #pylint: disable=no-value-for-parameter, no-member
219 |              .conv(3, 3, 10, 1, 1, padding='VALID', relu=False, name='conv1')
220 |              .prelu(name='PReLU1')
221 |              .max_pool(2, 2, 2, 2, name='pool1')
222 |              .conv(3, 3, 16, 1, 1, padding='VALID', relu=False, name='conv2')
223 |              .prelu(name='PReLU2')
224 |              .conv(3, 3, 32, 1, 1, padding='VALID', relu=False, name='conv3')
225 |              .prelu(name='PReLU3')
226 |              .conv(1, 1, 2, 1, 1, relu=False, name='conv4-1')
227 |              .softmax(3,name='prob1'))
228 | 
229 |         (self.feed('PReLU3') #pylint: disable=no-value-for-parameter
230 |              .conv(1, 1, 4, 1, 1, relu=False, name='conv4-2'))
231 |         
232 | class RNet(Network):
233 |     def setup(self):
234 |         (self.feed('data') #pylint: disable=no-value-for-parameter, no-member
235 |              .conv(3, 3, 28, 1, 1, padding='VALID', relu=False, name='conv1')
236 |              .prelu(name='prelu1')
237 |              .max_pool(3, 3, 2, 2, name='pool1')
238 |              .conv(3, 3, 48, 1, 1, padding='VALID', relu=False, name='conv2')
239 |              .prelu(name='prelu2')
240 |              .max_pool(3, 3, 2, 2, padding='VALID', name='pool2')
241 |              .conv(2, 2, 64, 1, 1, padding='VALID', relu=False, name='conv3')
242 |              .prelu(name='prelu3')
243 |              .fc(128, relu=False, name='conv4')
244 |              .prelu(name='prelu4')
245 |              .fc(2, relu=False, name='conv5-1')
246 |              .softmax(1,name='prob1'))
247 | 
248 |         (self.feed('prelu4') #pylint: disable=no-value-for-parameter
249 |              .fc(4, relu=False, name='conv5-2'))
250 | 
251 | class ONet(Network):
252 |     def setup(self):
253 |         (self.feed('data') #pylint: disable=no-value-for-parameter, no-member
254 |              .conv(3, 3, 32, 1, 1, padding='VALID', relu=False, name='conv1')
255 |              .prelu(name='prelu1')
256 |              .max_pool(3, 3, 2, 2, name='pool1')
257 |              .conv(3, 3, 64, 1, 1, padding='VALID', relu=False, name='conv2')
258 |              .prelu(name='prelu2')
259 |              .max_pool(3, 3, 2, 2, padding='VALID', name='pool2')
260 |              .conv(3, 3, 64, 1, 1, padding='VALID', relu=False, name='conv3')
261 |              .prelu(name='prelu3')
262 |              .max_pool(2, 2, 2, 2, name='pool3')
263 |              .conv(2, 2, 128, 1, 1, padding='VALID', relu=False, name='conv4')
264 |              .prelu(name='prelu4')
265 |              .fc(256, relu=False, name='conv5')
266 |              .prelu(name='prelu5')
267 |              .fc(2, relu=False, name='conv6-1')
268 |              .softmax(1, name='prob1'))
269 | 
270 |         (self.feed('prelu5') #pylint: disable=no-value-for-parameter
271 |              .fc(4, relu=False, name='conv6-2'))
272 | 
273 |         (self.feed('prelu5') #pylint: disable=no-value-for-parameter
274 |              .fc(10, relu=False, name='conv6-3'))
275 | 
276 | def create_mtcnn(sess, model_path):
277 |     if not model_path:
278 |         model_path,_ = os.path.split(os.path.realpath(__file__))
279 | 
280 |     with tf.variable_scope('pnet'):
281 |         data = tf.placeholder(tf.float32, (None,None,None,3), 'input')
282 |         pnet = PNet({'data':data})
283 |         pnet.load(os.path.join(model_path, 'det1.npy'), sess)
284 |     with tf.variable_scope('rnet'):
285 |         data = tf.placeholder(tf.float32, (None,24,24,3), 'input')
286 |         rnet = RNet({'data':data})
287 |         rnet.load(os.path.join(model_path, 'det2.npy'), sess)
288 |     with tf.variable_scope('onet'):
289 |         data = tf.placeholder(tf.float32, (None,48,48,3), 'input')
290 |         onet = ONet({'data':data})
291 |         onet.load(os.path.join(model_path, 'det3.npy'), sess)
292 |         
293 |     pnet_fun = lambda img : sess.run(('pnet/conv4-2/BiasAdd:0', 'pnet/prob1:0'), feed_dict={'pnet/input:0':img})
294 |     rnet_fun = lambda img : sess.run(('rnet/conv5-2/conv5-2:0', 'rnet/prob1:0'), feed_dict={'rnet/input:0':img})
295 |     onet_fun = lambda img : sess.run(('onet/conv6-2/conv6-2:0', 'onet/conv6-3/conv6-3:0', 'onet/prob1:0'), feed_dict={'onet/input:0':img})
296 |     return pnet_fun, rnet_fun, onet_fun
297 | 
298 | def detect_face(img, minsize, pnet, rnet, onet, threshold, factor):
299 |     """Detects faces in an image, and returns bounding boxes and points for them.
300 |     img: input image
301 |     minsize: minimum faces' size
302 |     pnet, rnet, onet: caffemodel
303 |     threshold: threshold=[th1, th2, th3], th1-3 are three steps's threshold
304 |     factor: the factor used to create a scaling pyramid of face sizes to detect in the image.
305 |     """
306 |     factor_count=0
307 |     total_boxes=np.empty((0,9))
308 |     points=np.empty(0)
309 |     h=img.shape[0]
310 |     w=img.shape[1]
311 |     minl=np.amin([h, w])
312 |     m=12.0/minsize
313 |     minl=minl*m
314 |     # create scale pyramid
315 |     scales=[]
316 |     while minl>=12:
317 |         scales += [m*np.power(factor, factor_count)]
318 |         minl = minl*factor
319 |         factor_count += 1
320 | 
321 |     # first stage
322 |     for scale in scales:
323 |         hs=int(np.ceil(h*scale))
324 |         ws=int(np.ceil(w*scale))
325 |         im_data = imresample(img, (hs, ws))
326 |         im_data = (im_data-127.5)*0.0078125
327 |         img_x = np.expand_dims(im_data, 0)
328 |         img_y = np.transpose(img_x, (0,2,1,3))
329 |         out = pnet(img_y)
330 |         out0 = np.transpose(out[0], (0,2,1,3))
331 |         out1 = np.transpose(out[1], (0,2,1,3))
332 |         
333 |         boxes, _ = generateBoundingBox(out1[0,:,:,1].copy(), out0[0,:,:,:].copy(), scale, threshold[0])
334 |         
335 |         # inter-scale nms
336 |         pick = nms(boxes.copy(), 0.5, 'Union')
337 |         if boxes.size>0 and pick.size>0:
338 |             boxes = boxes[pick,:]
339 |             total_boxes = np.append(total_boxes, boxes, axis=0)
340 | 
341 |     numbox = total_boxes.shape[0]
342 |     if numbox>0:
343 |         pick = nms(total_boxes.copy(), 0.7, 'Union')
344 |         total_boxes = total_boxes[pick,:]
345 |         regw = total_boxes[:,2]-total_boxes[:,0]
346 |         regh = total_boxes[:,3]-total_boxes[:,1]
347 |         qq1 = total_boxes[:,0]+total_boxes[:,5]*regw
348 |         qq2 = total_boxes[:,1]+total_boxes[:,6]*regh
349 |         qq3 = total_boxes[:,2]+total_boxes[:,7]*regw
350 |         qq4 = total_boxes[:,3]+total_boxes[:,8]*regh
351 |         total_boxes = np.transpose(np.vstack([qq1, qq2, qq3, qq4, total_boxes[:,4]]))
352 |         total_boxes = rerec(total_boxes.copy())
353 |         total_boxes[:,0:4] = np.fix(total_boxes[:,0:4]).astype(np.int32)
354 |         dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph = pad(total_boxes.copy(), w, h)
355 | 
356 |     numbox = total_boxes.shape[0]
357 |     if numbox>0:
358 |         # second stage
359 |         tempimg = np.zeros((24,24,3,numbox))
360 |         for k in range(0,numbox):
361 |             tmp = np.zeros((int(tmph[k]),int(tmpw[k]),3))
362 |             tmp[dy[k]-1:edy[k],dx[k]-1:edx[k],:] = img[y[k]-1:ey[k],x[k]-1:ex[k],:]
363 |             if tmp.shape[0]>0 and tmp.shape[1]>0 or tmp.shape[0]==0 and tmp.shape[1]==0:
364 |                 tempimg[:,:,:,k] = imresample(tmp, (24, 24))
365 |             else:
366 |                 return np.empty()
367 |         tempimg = (tempimg-127.5)*0.0078125
368 |         tempimg1 = np.transpose(tempimg, (3,1,0,2))
369 |         out = rnet(tempimg1)
370 |         out0 = np.transpose(out[0])
371 |         out1 = np.transpose(out[1])
372 |         score = out1[1,:]
373 |         ipass = np.where(score>threshold[1])
374 |         total_boxes = np.hstack([total_boxes[ipass[0],0:4].copy(), np.expand_dims(score[ipass].copy(),1)])
375 |         mv = out0[:,ipass[0]]
376 |         if total_boxes.shape[0]>0:
377 |             pick = nms(total_boxes, 0.7, 'Union')
378 |             total_boxes = total_boxes[pick,:]
379 |             total_boxes = bbreg(total_boxes.copy(), np.transpose(mv[:,pick]))
380 |             total_boxes = rerec(total_boxes.copy())
381 | 
382 |     numbox = total_boxes.shape[0]
383 |     if numbox>0:
384 |         # third stage
385 |         total_boxes = np.fix(total_boxes).astype(np.int32)
386 |         dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph = pad(total_boxes.copy(), w, h)
387 |         tempimg = np.zeros((48,48,3,numbox))
388 |         for k in range(0,numbox):
389 |             tmp = np.zeros((int(tmph[k]),int(tmpw[k]),3))
390 |             tmp[dy[k]-1:edy[k],dx[k]-1:edx[k],:] = img[y[k]-1:ey[k],x[k]-1:ex[k],:]
391 |             if tmp.shape[0]>0 and tmp.shape[1]>0 or tmp.shape[0]==0 and tmp.shape[1]==0:
392 |                 tempimg[:,:,:,k] = imresample(tmp, (48, 48))
393 |             else:
394 |                 return np.empty()
395 |         tempimg = (tempimg-127.5)*0.0078125
396 |         tempimg1 = np.transpose(tempimg, (3,1,0,2))
397 |         out = onet(tempimg1)
398 |         out0 = np.transpose(out[0])
399 |         out1 = np.transpose(out[1])
400 |         out2 = np.transpose(out[2])
401 |         score = out2[1,:]
402 |         points = out1
403 |         ipass = np.where(score>threshold[2])
404 |         points = points[:,ipass[0]]
405 |         total_boxes = np.hstack([total_boxes[ipass[0],0:4].copy(), np.expand_dims(score[ipass].copy(),1)])
406 |         mv = out0[:,ipass[0]]
407 | 
408 |         w = total_boxes[:,2]-total_boxes[:,0]+1
409 |         h = total_boxes[:,3]-total_boxes[:,1]+1
410 |         points[0:5,:] = np.tile(w,(5, 1))*points[0:5,:] + np.tile(total_boxes[:,0],(5, 1))-1
411 |         points[5:10,:] = np.tile(h,(5, 1))*points[5:10,:] + np.tile(total_boxes[:,1],(5, 1))-1
412 |         if total_boxes.shape[0]>0:
413 |             total_boxes = bbreg(total_boxes.copy(), np.transpose(mv))
414 |             pick = nms(total_boxes.copy(), 0.7, 'Min')
415 |             total_boxes = total_boxes[pick,:]
416 |             points = points[:,pick]
417 |                 
418 |     return total_boxes, points
419 | 
420 | 
421 | def bulk_detect_face(images, detection_window_size_ratio, pnet, rnet, onet, threshold, factor):
422 |     """Detects faces in a list of images
423 |     images: list containing input images
424 |     detection_window_size_ratio: ratio of minimum face size to smallest image dimension
425 |     pnet, rnet, onet: caffemodel
426 |     threshold: threshold=[th1 th2 th3], th1-3 are three steps's threshold [0-1]
427 |     factor: the factor used to create a scaling pyramid of face sizes to detect in the image.
428 |     """
429 |     all_scales = [None] * len(images)
430 |     images_with_boxes = [None] * len(images)
431 | 
432 |     for i in range(len(images)):
433 |         images_with_boxes[i] = {'total_boxes': np.empty((0, 9))}
434 | 
435 |     # create scale pyramid
436 |     for index, img in enumerate(images):
437 |         all_scales[index] = []
438 |         h = img.shape[0]
439 |         w = img.shape[1]
440 |         minsize = int(detection_window_size_ratio * np.minimum(w, h))
441 |         factor_count = 0
442 |         minl = np.amin([h, w])
443 |         if minsize <= 12:
444 |             minsize = 12
445 | 
446 |         m = 12.0 / minsize
447 |         minl = minl * m
448 |         while minl >= 12:
449 |             all_scales[index].append(m * np.power(factor, factor_count))
450 |             minl = minl * factor
451 |             factor_count += 1
452 | 
453 |     # # # # # # # # # # # # #
454 |     # first stage - fast proposal network (pnet) to obtain face candidates
455 |     # # # # # # # # # # # # #
456 | 
457 |     images_obj_per_resolution = {}
458 | 
459 |     # TODO: use some type of rounding to number module 8 to increase probability that pyramid images will have the same resolution across input images
460 | 
461 |     for index, scales in enumerate(all_scales):
462 |         h = images[index].shape[0]
463 |         w = images[index].shape[1]
464 | 
465 |         for scale in scales:
466 |             hs = int(np.ceil(h * scale))
467 |             ws = int(np.ceil(w * scale))
468 | 
469 |             if (ws, hs) not in images_obj_per_resolution:
470 |                 images_obj_per_resolution[(ws, hs)] = []
471 | 
472 |             im_data = imresample(images[index], (hs, ws))
473 |             im_data = (im_data - 127.5) * 0.0078125
474 |             img_y = np.transpose(im_data, (1, 0, 2))  # caffe uses different dimensions ordering
475 |             images_obj_per_resolution[(ws, hs)].append({'scale': scale, 'image': img_y, 'index': index})
476 | 
477 |     for resolution in images_obj_per_resolution:
478 |         images_per_resolution = [i['image'] for i in images_obj_per_resolution[resolution]]
479 |         outs = pnet(images_per_resolution)
480 | 
481 |         for index in range(len(outs[0])):
482 |             scale = images_obj_per_resolution[resolution][index]['scale']
483 |             image_index = images_obj_per_resolution[resolution][index]['index']
484 |             out0 = np.transpose(outs[0][index], (1, 0, 2))
485 |             out1 = np.transpose(outs[1][index], (1, 0, 2))
486 | 
487 |             boxes, _ = generateBoundingBox(out1[:, :, 1].copy(), out0[:, :, :].copy(), scale, threshold[0])
488 | 
489 |             # inter-scale nms
490 |             pick = nms(boxes.copy(), 0.5, 'Union')
491 |             if boxes.size > 0 and pick.size > 0:
492 |                 boxes = boxes[pick, :]
493 |                 images_with_boxes[image_index]['total_boxes'] = np.append(images_with_boxes[image_index]['total_boxes'],
494 |                                                                           boxes,
495 |                                                                           axis=0)
496 | 
497 |     for index, image_obj in enumerate(images_with_boxes):
498 |         numbox = image_obj['total_boxes'].shape[0]
499 |         if numbox > 0:
500 |             h = images[index].shape[0]
501 |             w = images[index].shape[1]
502 |             pick = nms(image_obj['total_boxes'].copy(), 0.7, 'Union')
503 |             image_obj['total_boxes'] = image_obj['total_boxes'][pick, :]
504 |             regw = image_obj['total_boxes'][:, 2] - image_obj['total_boxes'][:, 0]
505 |             regh = image_obj['total_boxes'][:, 3] - image_obj['total_boxes'][:, 1]
506 |             qq1 = image_obj['total_boxes'][:, 0] + image_obj['total_boxes'][:, 5] * regw
507 |             qq2 = image_obj['total_boxes'][:, 1] + image_obj['total_boxes'][:, 6] * regh
508 |             qq3 = image_obj['total_boxes'][:, 2] + image_obj['total_boxes'][:, 7] * regw
509 |             qq4 = image_obj['total_boxes'][:, 3] + image_obj['total_boxes'][:, 8] * regh
510 |             image_obj['total_boxes'] = np.transpose(np.vstack([qq1, qq2, qq3, qq4, image_obj['total_boxes'][:, 4]]))
511 |             image_obj['total_boxes'] = rerec(image_obj['total_boxes'].copy())
512 |             image_obj['total_boxes'][:, 0:4] = np.fix(image_obj['total_boxes'][:, 0:4]).astype(np.int32)
513 |             dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph = pad(image_obj['total_boxes'].copy(), w, h)
514 | 
515 |             numbox = image_obj['total_boxes'].shape[0]
516 |             tempimg = np.zeros((24, 24, 3, numbox))
517 | 
518 |             if numbox > 0:
519 |                 for k in range(0, numbox):
520 |                     tmp = np.zeros((int(tmph[k]), int(tmpw[k]), 3))
521 |                     tmp[dy[k] - 1:edy[k], dx[k] - 1:edx[k], :] = images[index][y[k] - 1:ey[k], x[k] - 1:ex[k], :]
522 |                     if tmp.shape[0] > 0 and tmp.shape[1] > 0 or tmp.shape[0] == 0 and tmp.shape[1] == 0:
523 |                         tempimg[:, :, :, k] = imresample(tmp, (24, 24))
524 |                     else:
525 |                         return np.empty()
526 | 
527 |                 tempimg = (tempimg - 127.5) * 0.0078125
528 |                 image_obj['rnet_input'] = np.transpose(tempimg, (3, 1, 0, 2))
529 | 
530 |     # # # # # # # # # # # # #
531 |     # second stage - refinement of face candidates with rnet
532 |     # # # # # # # # # # # # #
533 | 
534 |     bulk_rnet_input = np.empty((0, 24, 24, 3))
535 |     for index, image_obj in enumerate(images_with_boxes):
536 |         if 'rnet_input' in image_obj:
537 |             bulk_rnet_input = np.append(bulk_rnet_input, image_obj['rnet_input'], axis=0)
538 | 
539 |     out = rnet(bulk_rnet_input)
540 |     out0 = np.transpose(out[0])
541 |     out1 = np.transpose(out[1])
542 |     score = out1[1, :]
543 | 
544 |     i = 0
545 |     for index, image_obj in enumerate(images_with_boxes):
546 |         if 'rnet_input' not in image_obj:
547 |             continue
548 | 
549 |         rnet_input_count = image_obj['rnet_input'].shape[0]
550 |         score_per_image = score[i:i + rnet_input_count]
551 |         out0_per_image = out0[:, i:i + rnet_input_count]
552 | 
553 |         ipass = np.where(score_per_image > threshold[1])
554 |         image_obj['total_boxes'] = np.hstack([image_obj['total_boxes'][ipass[0], 0:4].copy(),
555 |                                               np.expand_dims(score_per_image[ipass].copy(), 1)])
556 | 
557 |         mv = out0_per_image[:, ipass[0]]
558 | 
559 |         if image_obj['total_boxes'].shape[0] > 0:
560 |             h = images[index].shape[0]
561 |             w = images[index].shape[1]
562 |             pick = nms(image_obj['total_boxes'], 0.7, 'Union')
563 |             image_obj['total_boxes'] = image_obj['total_boxes'][pick, :]
564 |             image_obj['total_boxes'] = bbreg(image_obj['total_boxes'].copy(), np.transpose(mv[:, pick]))
565 |             image_obj['total_boxes'] = rerec(image_obj['total_boxes'].copy())
566 | 
567 |             numbox = image_obj['total_boxes'].shape[0]
568 | 
569 |             if numbox > 0:
570 |                 tempimg = np.zeros((48, 48, 3, numbox))
571 |                 image_obj['total_boxes'] = np.fix(image_obj['total_boxes']).astype(np.int32)
572 |                 dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph = pad(image_obj['total_boxes'].copy(), w, h)
573 | 
574 |                 for k in range(0, numbox):
575 |                     tmp = np.zeros((int(tmph[k]), int(tmpw[k]), 3))
576 |                     tmp[dy[k] - 1:edy[k], dx[k] - 1:edx[k], :] = images[index][y[k] - 1:ey[k], x[k] - 1:ex[k], :]
577 |                     if tmp.shape[0] > 0 and tmp.shape[1] > 0 or tmp.shape[0] == 0 and tmp.shape[1] == 0:
578 |                         tempimg[:, :, :, k] = imresample(tmp, (48, 48))
579 |                     else:
580 |                         return np.empty()
581 |                 tempimg = (tempimg - 127.5) * 0.0078125
582 |                 image_obj['onet_input'] = np.transpose(tempimg, (3, 1, 0, 2))
583 | 
584 |         i += rnet_input_count
585 | 
586 |     # # # # # # # # # # # # #
587 |     # third stage - further refinement and facial landmarks positions with onet
588 |     # # # # # # # # # # # # #
589 | 
590 |     bulk_onet_input = np.empty((0, 48, 48, 3))
591 |     for index, image_obj in enumerate(images_with_boxes):
592 |         if 'onet_input' in image_obj:
593 |             bulk_onet_input = np.append(bulk_onet_input, image_obj['onet_input'], axis=0)
594 | 
595 |     out = onet(bulk_onet_input)
596 | 
597 |     out0 = np.transpose(out[0])
598 |     out1 = np.transpose(out[1])
599 |     out2 = np.transpose(out[2])
600 |     score = out2[1, :]
601 |     points = out1
602 | 
603 |     i = 0
604 |     ret = []
605 |     for index, image_obj in enumerate(images_with_boxes):
606 |         if 'onet_input' not in image_obj:
607 |             ret.append(None)
608 |             continue
609 | 
610 |         onet_input_count = image_obj['onet_input'].shape[0]
611 | 
612 |         out0_per_image = out0[:, i:i + onet_input_count]
613 |         score_per_image = score[i:i + onet_input_count]
614 |         points_per_image = points[:, i:i + onet_input_count]
615 | 
616 |         ipass = np.where(score_per_image > threshold[2])
617 |         points_per_image = points_per_image[:, ipass[0]]
618 | 
619 |         image_obj['total_boxes'] = np.hstack([image_obj['total_boxes'][ipass[0], 0:4].copy(),
620 |                                               np.expand_dims(score_per_image[ipass].copy(), 1)])
621 |         mv = out0_per_image[:, ipass[0]]
622 | 
623 |         w = image_obj['total_boxes'][:, 2] - image_obj['total_boxes'][:, 0] + 1
624 |         h = image_obj['total_boxes'][:, 3] - image_obj['total_boxes'][:, 1] + 1
625 |         points_per_image[0:5, :] = np.tile(w, (5, 1)) * points_per_image[0:5, :] + np.tile(
626 |             image_obj['total_boxes'][:, 0], (5, 1)) - 1
627 |         points_per_image[5:10, :] = np.tile(h, (5, 1)) * points_per_image[5:10, :] + np.tile(
628 |             image_obj['total_boxes'][:, 1], (5, 1)) - 1
629 | 
630 |         if image_obj['total_boxes'].shape[0] > 0:
631 |             image_obj['total_boxes'] = bbreg(image_obj['total_boxes'].copy(), np.transpose(mv))
632 |             pick = nms(image_obj['total_boxes'].copy(), 0.7, 'Min')
633 |             image_obj['total_boxes'] = image_obj['total_boxes'][pick, :]
634 |             points_per_image = points_per_image[:, pick]
635 | 
636 |             ret.append((image_obj['total_boxes'], points_per_image))
637 |         else:
638 |             ret.append(None)
639 | 
640 |         i += onet_input_count
641 | 
642 |     return ret
643 | 
644 | 
645 | # function [boundingbox] = bbreg(boundingbox,reg)
646 | def bbreg(boundingbox,reg):
647 |     """Calibrate bounding boxes"""
648 |     if reg.shape[1]==1:
649 |         reg = np.reshape(reg, (reg.shape[2], reg.shape[3]))
650 | 
651 |     w = boundingbox[:,2]-boundingbox[:,0]+1
652 |     h = boundingbox[:,3]-boundingbox[:,1]+1
653 |     b1 = boundingbox[:,0]+reg[:,0]*w
654 |     b2 = boundingbox[:,1]+reg[:,1]*h
655 |     b3 = boundingbox[:,2]+reg[:,2]*w
656 |     b4 = boundingbox[:,3]+reg[:,3]*h
657 |     boundingbox[:,0:4] = np.transpose(np.vstack([b1, b2, b3, b4 ]))
658 |     return boundingbox
659 |  
660 | def generateBoundingBox(imap, reg, scale, t):
661 |     """Use heatmap to generate bounding boxes"""
662 |     stride=2
663 |     cellsize=12
664 | 
665 |     imap = np.transpose(imap)
666 |     dx1 = np.transpose(reg[:,:,0])
667 |     dy1 = np.transpose(reg[:,:,1])
668 |     dx2 = np.transpose(reg[:,:,2])
669 |     dy2 = np.transpose(reg[:,:,3])
670 |     y, x = np.where(imap >= t)
671 |     if y.shape[0]==1:
672 |         dx1 = np.flipud(dx1)
673 |         dy1 = np.flipud(dy1)
674 |         dx2 = np.flipud(dx2)
675 |         dy2 = np.flipud(dy2)
676 |     score = imap[(y,x)]
677 |     reg = np.transpose(np.vstack([ dx1[(y,x)], dy1[(y,x)], dx2[(y,x)], dy2[(y,x)] ]))
678 |     if reg.size==0:
679 |         reg = np.empty((0,3))
680 |     bb = np.transpose(np.vstack([y,x]))
681 |     q1 = np.fix((stride*bb+1)/scale)
682 |     q2 = np.fix((stride*bb+cellsize-1+1)/scale)
683 |     boundingbox = np.hstack([q1, q2, np.expand_dims(score,1), reg])
684 |     return boundingbox, reg
685 |  
686 | # function pick = nms(boxes,threshold,type)
687 | def nms(boxes, threshold, method):
688 |     if boxes.size==0:
689 |         return np.empty((0,3))
690 |     x1 = boxes[:,0]
691 |     y1 = boxes[:,1]
692 |     x2 = boxes[:,2]
693 |     y2 = boxes[:,3]
694 |     s = boxes[:,4]
695 |     area = (x2-x1+1) * (y2-y1+1)
696 |     I = np.argsort(s)
697 |     pick = np.zeros_like(s, dtype=np.int16)
698 |     counter = 0
699 |     while I.size>0:
700 |         i = I[-1]
701 |         pick[counter] = i
702 |         counter += 1
703 |         idx = I[0:-1]
704 |         xx1 = np.maximum(x1[i], x1[idx])
705 |         yy1 = np.maximum(y1[i], y1[idx])
706 |         xx2 = np.minimum(x2[i], x2[idx])
707 |         yy2 = np.minimum(y2[i], y2[idx])
708 |         w = np.maximum(0.0, xx2-xx1+1)
709 |         h = np.maximum(0.0, yy2-yy1+1)
710 |         inter = w * h
711 |         if method is 'Min':
712 |             o = inter / np.minimum(area[i], area[idx])
713 |         else:
714 |             o = inter / (area[i] + area[idx] - inter)
715 |         I = I[np.where(o<=threshold)]
716 |     pick = pick[0:counter]
717 |     return pick
718 | 
719 | # function [dy edy dx edx y ey x ex tmpw tmph] = pad(total_boxes,w,h)
720 | def pad(total_boxes, w, h):
721 |     """Compute the padding coordinates (pad the bounding boxes to square)"""
722 |     tmpw = (total_boxes[:,2]-total_boxes[:,0]+1).astype(np.int32)
723 |     tmph = (total_boxes[:,3]-total_boxes[:,1]+1).astype(np.int32)
724 |     numbox = total_boxes.shape[0]
725 | 
726 |     dx = np.ones((numbox), dtype=np.int32)
727 |     dy = np.ones((numbox), dtype=np.int32)
728 |     edx = tmpw.copy().astype(np.int32)
729 |     edy = tmph.copy().astype(np.int32)
730 | 
731 |     x = total_boxes[:,0].copy().astype(np.int32)
732 |     y = total_boxes[:,1].copy().astype(np.int32)
733 |     ex = total_boxes[:,2].copy().astype(np.int32)
734 |     ey = total_boxes[:,3].copy().astype(np.int32)
735 | 
736 |     tmp = np.where(ex>w)
737 |     edx.flat[tmp] = np.expand_dims(-ex[tmp]+w+tmpw[tmp],1)
738 |     ex[tmp] = w
739 |     
740 |     tmp = np.where(ey>h)
741 |     edy.flat[tmp] = np.expand_dims(-ey[tmp]+h+tmph[tmp],1)
742 |     ey[tmp] = h
743 | 
744 |     tmp = np.where(x<1)
745 |     dx.flat[tmp] = np.expand_dims(2-x[tmp],1)
746 |     x[tmp] = 1
747 | 
748 |     tmp = np.where(y<1)
749 |     dy.flat[tmp] = np.expand_dims(2-y[tmp],1)
750 |     y[tmp] = 1
751 |     
752 |     return dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph
753 | 
754 | # function [bboxA] = rerec(bboxA)
755 | def rerec(bboxA):
756 |     """Convert bboxA to square."""
757 |     h = bboxA[:,3]-bboxA[:,1]
758 |     w = bboxA[:,2]-bboxA[:,0]
759 |     l = np.maximum(w, h)
760 |     bboxA[:,0] = bboxA[:,0]+w*0.5-l*0.5
761 |     bboxA[:,1] = bboxA[:,1]+h*0.5-l*0.5
762 |     bboxA[:,2:4] = bboxA[:,0:2] + np.transpose(np.tile(l,(2,1)))
763 |     return bboxA
764 | 
765 | def imresample(img, sz):
766 |     im_data = cv2.resize(img, (sz[1], sz[0]), interpolation=cv2.INTER_AREA) #@UndefinedVariable
767 |     return im_data
768 | 
769 |     # This method is kept for debugging purpose
770 | #     h=img.shape[0]
771 | #     w=img.shape[1]
772 | #     hs, ws = sz
773 | #     dx = float(w) / ws
774 | #     dy = float(h) / hs
775 | #     im_data = np.zeros((hs,ws,3))
776 | #     for a1 in range(0,hs):
777 | #         for a2 in range(0,ws):
778 | #             for a3 in range(0,3):
779 | #                 im_data[a1,a2,a3] = img[int(floor(a1*dy)),int(floor(a2*dx)),a3]
780 | #     return im_data
781 | 
782 | 


--------------------------------------------------------------------------------