├── README.md ├── __pycache__ └── mtcnn_detect_face.cpython-36.pyc ├── demo_colab.ipynb ├── detector ├── __pycache__ │ └── face_detector.cpython-36.pyc └── face_detector.py ├── elg_weights └── elg_keras.h5 ├── models ├── __pycache__ │ └── elg_keras.cpython-36.pyc └── elg_keras.py ├── mtcnn_detect_face.py ├── mtcnn_weights ├── README.md ├── det1.npy ├── det2.npy └── det3.npy ├── results ├── result_fashion-1063100_640.png ├── result_lenna.png ├── result_model-1439909_640.png └── result_reiwa.png └── test_imgs ├── Lenna_(test_image).png ├── fashion-1063100_640.jpg ├── gengou_happyou_reiwa.png └── model-1439909_640.jpg /README.md: -------------------------------------------------------------------------------- 1 | # GazeML-keras 2 | A keras port of [swook/GazeML](https://github.com/swook/GazeML) for eye region landmarks detection. 3 | 4 | The dlib face detector is replaced by MTCNN. 5 | 6 | ## Demo 7 | 8 | [Here](https://github.com/shaoanlu/GazeML-keras/blob/master/demo_colab.ipynb) is the demo jupyter notebook, or [try it](https://colab.research.google.com/github/shaoanlu/GazeML-keras/blob/master/demo_colab.ipynb) on Colaboratory. 9 | 10 | ## Results 11 | 12 | ![](https://github.com/shaoanlu/GazeML-keras/raw/master/results/result_lenna.png) 13 | ![](https://github.com/shaoanlu/GazeML-keras/raw/master/results/result_fashion-1063100_640.png) 14 | ![](https://github.com/shaoanlu/GazeML-keras/raw/master/results/result_model-1439909_640.png) 15 | ![](https://github.com/shaoanlu/GazeML-keras/raw/master/results/result_reiwa.png) 16 | 17 | ## WIP 18 | 1. The preprocessing is very different (including not flipping left eye) from the [official implementation](https://github.com/swook/GazeML/blob/master/src/datasources/frames.py#L223), thus the results produced are suboptimal. 19 | 20 | 2. Model training as well as gaze estimation has not been ported yet. 21 | 22 | ## Dependency 23 | - python 3.6 24 | - keras 2.2.4 25 | - tensorflow 1.12.0 26 | 27 | ## Acknoledgement 28 | ELG model weights are converted from the official repo [swook/GazeML](https://github.com/swook/GazeML). We learnt a lot from there. 29 | -------------------------------------------------------------------------------- /__pycache__/mtcnn_detect_face.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shaoanlu/GazeML-keras/62bf9f84dc9f0728bbd906b96b7cf7893deafcd1/__pycache__/mtcnn_detect_face.cpython-36.pyc -------------------------------------------------------------------------------- /detector/__pycache__/face_detector.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shaoanlu/GazeML-keras/62bf9f84dc9f0728bbd906b96b7cf7893deafcd1/detector/__pycache__/face_detector.cpython-36.pyc -------------------------------------------------------------------------------- /detector/face_detector.py: -------------------------------------------------------------------------------- 1 | import mtcnn_detect_face 2 | import tensorflow as tf 3 | from keras import backend as K 4 | import numpy as np 5 | import cv2 6 | import os 7 | 8 | class MTCNNFaceDetector(): 9 | """ 10 | This class load the MTCNN network and perform face detection. 11 | 12 | Attributes: 13 | model_path: path to the MTCNN weights files 14 | """ 15 | def __init__(self, sess, model_path="./mtcnn_weights/"): 16 | self.pnet = None 17 | self.rnet = None 18 | self.onet = None 19 | self.create_mtcnn(sess, model_path) 20 | 21 | def create_mtcnn(self, sess, model_path): 22 | if not model_path: 23 | model_path, _ = os.path.split(os.path.realpath(__file__)) 24 | 25 | with tf.variable_scope('pnet'): 26 | data = tf.placeholder(tf.float32, (None,None,None,3), 'input') 27 | pnet = mtcnn_detect_face.PNet({'data':data}) 28 | pnet.load(os.path.join(model_path, 'det1.npy'), sess) 29 | with tf.variable_scope('rnet'): 30 | data = tf.placeholder(tf.float32, (None,24,24,3), 'input') 31 | rnet = mtcnn_detect_face.RNet({'data':data}) 32 | rnet.load(os.path.join(model_path, 'det2.npy'), sess) 33 | with tf.variable_scope('onet'): 34 | data = tf.placeholder(tf.float32, (None,48,48,3), 'input') 35 | onet = mtcnn_detect_face.ONet({'data':data}) 36 | onet.load(os.path.join(model_path, 'det3.npy'), sess) 37 | self.pnet = K.function([pnet.layers['data']], [pnet.layers['conv4-2'], pnet.layers['prob1']]) 38 | self.rnet = K.function([rnet.layers['data']], [rnet.layers['conv5-2'], rnet.layers['prob1']]) 39 | self.onet = K.function([onet.layers['data']], [onet.layers['conv6-2'], onet.layers['conv6-3'], onet.layers['prob1']]) 40 | 41 | def detect_face(self, image, minsize=20, threshold=0.7, factor=0.709, use_auto_downscaling=True, min_face_area=25*25): 42 | if use_auto_downscaling: 43 | image, scale_factor = self.auto_downscale(image) 44 | 45 | faces, pnts = mtcnn_detect_face.detect_face( 46 | image, minsize, 47 | self.pnet, self.rnet, self.onet, 48 | [0.6, 0.7, threshold], 49 | factor) 50 | faces = self.process_mtcnn_bbox(faces, image.shape) 51 | faces, pnts = self.remove_small_faces(faces, pnts, min_face_area) 52 | 53 | if use_auto_downscaling: 54 | faces = self.calibrate_coord(faces, scale_factor) 55 | pnts = self.calibrate_landmarks(pnts, scale_factor) 56 | return faces, pnts 57 | 58 | def auto_downscale(self, image): 59 | if self.is_higher_than_1080p(image): 60 | scale_factor = 4 61 | resized_image = cv2.resize(image, 62 | (image.shape[1]//scale_factor, 63 | image.shape[0]//scale_factor)) 64 | elif self.is_higher_than_720p(image): 65 | scale_factor = 3 66 | resized_image = cv2.resize(image, 67 | (image.shape[1]//scale_factor, 68 | image.shape[0]//scale_factor)) 69 | elif self.is_higher_than_480p(image): 70 | scale_factor = 2 71 | resized_image = cv2.resize(image, 72 | (image.shape[1]//scale_factor, 73 | image.shape[0]//scale_factor)) 74 | else: 75 | scale_factor = 1 76 | resized_image = image.copy() 77 | return resized_image, scale_factor 78 | 79 | @staticmethod 80 | def is_higher_than_480p(x): 81 | return (x.shape[0] * x.shape[1]) >= (858*480) 82 | 83 | @staticmethod 84 | def is_higher_than_720p(x): 85 | return (x.shape[0] * x.shape[1]) >= (1280*720) 86 | 87 | @staticmethod 88 | def is_higher_than_1080p(x): 89 | return (x.shape[0] * x.shape[1]) >= (1920*1080) 90 | 91 | @staticmethod 92 | def process_mtcnn_bbox(bboxes, im_shape): 93 | # output bbox coordinate of MTCNN is (y0, x0, y1, x1) 94 | # Here we process the bbox coord. to a square bbox with ordering (x0, y1, x1, y0) 95 | for i, bbox in enumerate(bboxes): 96 | y0, x0, y1, x1 = bboxes[i,0:4] 97 | w = int(y1 - y0) 98 | h = int(x1 - x0) 99 | length = (w + h)/2 100 | center = (int((x1+x0)/2),int((y1+y0)/2)) 101 | new_x0 = np.max([0, (center[0]-length//2)])#.astype(np.int32) 102 | new_x1 = np.min([im_shape[0], (center[0]+length//2)])#.astype(np.int32) 103 | new_y0 = np.max([0, (center[1]-length//2)])#.astype(np.int32) 104 | new_y1 = np.min([im_shape[1], (center[1]+length//2)])#.astype(np.int32) 105 | bboxes[i,0:4] = new_x0, new_y1, new_x1, new_y0 106 | return bboxes 107 | 108 | @staticmethod 109 | def calibrate_coord(faces, scale_factor): 110 | for i, (x0, y1, x1, y0, _) in enumerate(faces): 111 | faces[i] = (x0*scale_factor, y1*scale_factor, 112 | x1*scale_factor, y0*scale_factor, _) 113 | return faces 114 | 115 | @staticmethod 116 | def calibrate_landmarks(pnts, scale_factor): 117 | # pnts is a numpy array 118 | return np.array([xy * scale_factor for xy in pnts]) 119 | 120 | @staticmethod 121 | def remove_small_faces(faces, pnts, min_area=25*25): 122 | def compute_area(face_coord): 123 | x0, y1, x1, y0, _ = face_coord 124 | area = np.abs((x1 - x0) * (y1 - y0)) 125 | return area 126 | 127 | new_faces = [] 128 | new_pnts = [] 129 | # faces has shape (num_faces, coord), and pnts has shape (coord, num_faces) 130 | for face,pnt in zip(faces, pnts.transpose()): 131 | if compute_area(face) >= min_area: 132 | new_faces.append(face) 133 | new_pnts.append(pnt) 134 | new_faces = np.array(new_faces) 135 | new_pnts = np.array(new_pnts).transpose() 136 | return new_faces, new_pnts -------------------------------------------------------------------------------- /elg_weights/elg_keras.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shaoanlu/GazeML-keras/62bf9f84dc9f0728bbd906b96b7cf7893deafcd1/elg_weights/elg_keras.h5 -------------------------------------------------------------------------------- /models/__pycache__/elg_keras.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shaoanlu/GazeML-keras/62bf9f84dc9f0728bbd906b96b7cf7893deafcd1/models/__pycache__/elg_keras.cpython-36.pyc -------------------------------------------------------------------------------- /models/elg_keras.py: -------------------------------------------------------------------------------- 1 | from keras.layers import * 2 | from keras.models import Model 3 | import tensorflow as tf 4 | import numpy as np 5 | 6 | class KerasELG(): 7 | def __init__(self, first_layer_stride=3, hg_num_feature_maps=64, hg_num_modules=3): 8 | self._first_layer_stride = first_layer_stride 9 | self._hg_num_feature_maps = hg_num_feature_maps 10 | self._hg_num_modules = hg_num_modules 11 | self._hg_num_residual_blocks = 1 12 | self._hg_num_landmarks = 18 13 | 14 | self.net = self.build_elg_network() 15 | 16 | def build_elg_network(self): 17 | return self.elg() 18 | 19 | """ 20 | The following code is heavily refer to GazeML source code: 21 | https://github.com/swook/GazeML/blob/master/src/models/elg.py 22 | """ 23 | def elg(self): 24 | outputs = {} 25 | inp = Input((108, 180, 1)) 26 | 27 | # Prepare for Hourglass by downscaling via conv 28 | n = self._hg_num_feature_maps 29 | pre_conv1 = self._apply_conv(inp, n, k=7, s=self._first_layer_stride, name="hourglass_pre") 30 | pre_conv1 = self._apply_bn(pre_conv1, name="hourglass_pre_BatchNorm") 31 | pre_conv1 = Activation('relu')(pre_conv1) 32 | pre_res1 = self._build_residual_block(pre_conv1, 2*n, name="hourglass_pre_res1") 33 | pre_res2 = self._build_residual_block(pre_res1, n, name="hourglass_pre_res2") 34 | 35 | # Hourglass blocks 36 | x = pre_res2 37 | x_prev = pre_res2 38 | for i in range(self._hg_num_modules): 39 | prefix = f"hourglass_hg_{str(i+1)}" 40 | x = self._build_hourglass(x, steps_to_go=4, f=self._hg_num_feature_maps, name=prefix) 41 | x, h = self._build_hourglass_after( 42 | x_prev, 43 | x, 44 | do_merge=(i<(self._hg_num_modules-1)), 45 | name=prefix) 46 | x_prev = x 47 | x = h 48 | outputs['heatmaps'] = x 49 | 50 | return Model(inp, outputs['heatmaps']) 51 | 52 | def _apply_conv(self, x, f, k=3, s=1, padding='same', name=None): 53 | return Conv2D(f, kernel_size=k, strides=s, use_bias=True, padding=padding, name=name)(x) 54 | 55 | def _apply_bn(self, x, name=None): 56 | return BatchNormalization(name=name)(x) 57 | 58 | def _apply_pool(self, x, k=2, s=2): 59 | return MaxPooling2D(pool_size=k, strides=s, padding="same")(x) 60 | 61 | def _build_residual_block(self, x, f, name="res_block"): 62 | num_in = x.shape.as_list()[-1] 63 | half_num_out = max(int(f/2), 1) 64 | c = x 65 | conv1 = self._apply_bn(c, name=name+"_conv1_BatchNorm") 66 | conv1 = Activation('relu')(conv1) 67 | conv1 = self._apply_conv(conv1, half_num_out, k=1, s=1, name=name+"_conv1") 68 | conv2 = self._apply_bn(conv1, name=name+"_conv2_BatchNorm") 69 | conv2 = Activation('relu')(conv2) 70 | conv2 = self._apply_conv(conv2, half_num_out, k=3, s=1, name=name+"_conv2") 71 | conv3 = self._apply_bn(conv2, name=name+"_conv3_BatchNorm") 72 | conv3 = Activation('relu')(conv3) 73 | conv3 = self._apply_conv(conv3, f, k=1, s=1, name=name+"_conv3") 74 | 75 | if num_in == f: 76 | s = x 77 | else: 78 | s = self._apply_conv(x, f, k=1, s=1, name=name+"_skip") 79 | out = Add()([conv3, s]) 80 | return out 81 | 82 | def _build_hourglass(self, x, steps_to_go, f, depth=1, name=None): 83 | prefix_name = name + f"_depth{str(depth)}" 84 | 85 | # Upper branch 86 | up1 = x 87 | for i in range(self._hg_num_residual_blocks): 88 | up1 = self._build_residual_block(up1, f, name=prefix_name+f"_up1_{str(i+1)}") 89 | 90 | # Lower branch 91 | low1 = self._apply_pool(x, k=2, s=2) 92 | for i in range(self._hg_num_residual_blocks): 93 | low1 = self._build_residual_block(low1, f, name=prefix_name+f"_low1_{str(i+1)}") 94 | 95 | # Recursive 96 | low2 = None 97 | if steps_to_go > 1: 98 | low2 = self._build_hourglass(low1, steps_to_go-1, f, depth=depth+1, name=prefix_name) 99 | else: 100 | low2 = low1 101 | for i in range(self._hg_num_residual_blocks): 102 | low2 = self._build_residual_block(low2, f, name=prefix_name+f"_low2_{str(i+1)}") 103 | 104 | # Additional residual blocks 105 | low3 = low2 106 | for i in range(self._hg_num_residual_blocks): 107 | low3 = self._build_residual_block(low3, f, name=prefix_name+f"_low3_{str(i+1)}") 108 | 109 | # Upsample 110 | up2 = Lambda( 111 | lambda x: tf.image.resize_bicubic( 112 | x[0], 113 | x[1].shape.as_list()[1:3], 114 | align_corners=True))([low3, up1]) # default resize_bilear 115 | 116 | out = Add()([up1, up2]) 117 | return out 118 | 119 | def _build_hourglass_after(self, x_prev, x_now, do_merge=True, name=None): 120 | prefix_name = name+"_after" 121 | 122 | for j in range(self._hg_num_residual_blocks): 123 | x_now = self._build_residual_block( 124 | x_now, 125 | self._hg_num_feature_maps, 126 | name=prefix_name+f"_after_hg_{str(j+1)}") 127 | x_now = self._apply_conv(x_now, self._hg_num_feature_maps, k=1, s=1, name=prefix_name) 128 | x_now = self._apply_bn(x_now, name=prefix_name+"_BatchNorm") 129 | x_now = Activation('relu')(x_now) 130 | 131 | h = self._apply_conv(x_now, self._hg_num_landmarks, k=1, s=1, name=prefix_name+"_hmap") 132 | 133 | x_next = x_now 134 | if do_merge: 135 | prefix_name = name 136 | x_hmaps = self._apply_conv( 137 | h, 138 | self._hg_num_feature_maps, 139 | k=1, 140 | s=1, 141 | name=prefix_name+"_merge_h") 142 | x_now = self._apply_conv( 143 | x_now, 144 | self._hg_num_feature_maps, 145 | k=1, 146 | s=1, 147 | name=prefix_name+"_merge_x") 148 | x_add = Add()([x_prev, x_hmaps]) 149 | x_next = Add()([x_next, x_add]) 150 | return x_next, h 151 | 152 | @staticmethod 153 | def _calculate_landmarks(x, beta=5e1): 154 | def np_softmax(x, axis=1): 155 | t = np.exp(x) 156 | a = np.exp(x) / np.sum(t, axis=axis).reshape(-1,1) 157 | return a 158 | 159 | if len(x.shape) < 4: 160 | x = x[None, ...] 161 | h, w = x.shape[1:3] 162 | ref_xs, ref_ys = np.meshgrid(np.linspace(0, 1.0, num=w, endpoint=True), 163 | np.linspace(0, 1.0, num=h, endpoint=True), 164 | indexing='xy') 165 | ref_xs = np.reshape(ref_xs, [-1, h*w]) 166 | ref_ys = np.reshape(ref_ys, [-1, h*w]) 167 | 168 | # Assuming N x 18 x 45 x 75 (NCHW) 169 | beta = beta 170 | x = np.transpose(x, (0, 3, 1, 2)) 171 | x = np.reshape(x, [-1, 18, h*w]) 172 | x = np_softmax(beta * x, axis=-1) 173 | lmrk_xs = np.sum(ref_xs * x, axis=2) 174 | lmrk_ys = np.sum(ref_ys * x, axis=2) 175 | 176 | # Return to actual coordinates ranges 177 | return np.stack([lmrk_xs * (w - 1.0) + 0.5, lmrk_ys * (h - 1.0) + 0.5], axis=2) # N x 18 x 2 178 | -------------------------------------------------------------------------------- /mtcnn_detect_face.py: -------------------------------------------------------------------------------- 1 | # Source: https://github.com/davidsandberg/facenet/blob/master/src/align/ 2 | 3 | """ Tensorflow implementation of the face detection / alignment algorithm found at 4 | https://github.com/kpzhang93/MTCNN_face_detection_alignment 5 | """ 6 | # MIT License 7 | # 8 | # Copyright (c) 2016 David Sandberg 9 | # 10 | # Permission is hereby granted, free of charge, to any person obtaining a copy 11 | # of this software and associated documentation files (the "Software"), to deal 12 | # in the Software without restriction, including without limitation the rights 13 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 14 | # copies of the Software, and to permit persons to whom the Software is 15 | # furnished to do so, subject to the following conditions: 16 | # 17 | # The above copyright notice and this permission notice shall be included in all 18 | # copies or substantial portions of the Software. 19 | # 20 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 21 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 22 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 23 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 24 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 25 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 26 | # SOFTWARE. 27 | 28 | from __future__ import absolute_import 29 | from __future__ import division 30 | from __future__ import print_function 31 | from six import string_types, iteritems 32 | 33 | import numpy as np 34 | import tensorflow as tf 35 | #from math import floor 36 | import cv2 37 | import os 38 | 39 | def layer(op): 40 | """Decorator for composable network layers.""" 41 | 42 | def layer_decorated(self, *args, **kwargs): 43 | # Automatically set a name if not provided. 44 | name = kwargs.setdefault('name', self.get_unique_name(op.__name__)) 45 | # Figure out the layer inputs. 46 | if len(self.terminals) == 0: 47 | raise RuntimeError('No input variables found for layer %s.' % name) 48 | elif len(self.terminals) == 1: 49 | layer_input = self.terminals[0] 50 | else: 51 | layer_input = list(self.terminals) 52 | # Perform the operation and get the output. 53 | layer_output = op(self, layer_input, *args, **kwargs) 54 | # Add to layer LUT. 55 | self.layers[name] = layer_output 56 | # This output is now the input for the next layer. 57 | self.feed(layer_output) 58 | # Return self for chained calls. 59 | return self 60 | 61 | return layer_decorated 62 | 63 | class Network(object): 64 | 65 | def __init__(self, inputs, trainable=True): 66 | # The input nodes for this network 67 | self.inputs = inputs 68 | # The current list of terminal nodes 69 | self.terminals = [] 70 | # Mapping from layer names to layers 71 | self.layers = dict(inputs) 72 | # If true, the resulting variables are set as trainable 73 | self.trainable = trainable 74 | 75 | self.setup() 76 | 77 | def setup(self): 78 | """Construct the network. """ 79 | raise NotImplementedError('Must be implemented by the subclass.') 80 | 81 | def load(self, data_path, session, ignore_missing=False): 82 | """Load network weights. 83 | data_path: The path to the numpy-serialized network weights 84 | session: The current TensorFlow session 85 | ignore_missing: If true, serialized weights for missing layers are ignored. 86 | """ 87 | data_dict = np.load(data_path, encoding='latin1').item() #pylint: disable=no-member 88 | 89 | for op_name in data_dict: 90 | with tf.variable_scope(op_name, reuse=True): 91 | for param_name, data in iteritems(data_dict[op_name]): 92 | try: 93 | var = tf.get_variable(param_name) 94 | session.run(var.assign(data)) 95 | except ValueError: 96 | if not ignore_missing: 97 | raise 98 | 99 | def feed(self, *args): 100 | """Set the input(s) for the next operation by replacing the terminal nodes. 101 | The arguments can be either layer names or the actual layers. 102 | """ 103 | assert len(args) != 0 104 | self.terminals = [] 105 | for fed_layer in args: 106 | if isinstance(fed_layer, string_types): 107 | try: 108 | fed_layer = self.layers[fed_layer] 109 | except KeyError: 110 | raise KeyError('Unknown layer name fed: %s' % fed_layer) 111 | self.terminals.append(fed_layer) 112 | return self 113 | 114 | def get_output(self): 115 | """Returns the current network output.""" 116 | return self.terminals[-1] 117 | 118 | def get_unique_name(self, prefix): 119 | """Returns an index-suffixed unique name for the given prefix. 120 | This is used for auto-generating layer names based on the type-prefix. 121 | """ 122 | ident = sum(t.startswith(prefix) for t, _ in self.layers.items()) + 1 123 | return '%s_%d' % (prefix, ident) 124 | 125 | def make_var(self, name, shape): 126 | """Creates a new TensorFlow variable.""" 127 | return tf.get_variable(name, shape, trainable=self.trainable) 128 | 129 | def validate_padding(self, padding): 130 | """Verifies that the padding is one of the supported ones.""" 131 | assert padding in ('SAME', 'VALID') 132 | 133 | @layer 134 | def conv(self, 135 | inp, 136 | k_h, 137 | k_w, 138 | c_o, 139 | s_h, 140 | s_w, 141 | name, 142 | relu=True, 143 | padding='SAME', 144 | group=1, 145 | biased=True): 146 | # Verify that the padding is acceptable 147 | self.validate_padding(padding) 148 | # Get the number of channels in the input 149 | c_i = int(inp.get_shape()[-1]) 150 | # Verify that the grouping parameter is valid 151 | assert c_i % group == 0 152 | assert c_o % group == 0 153 | # Convolution for a given input and kernel 154 | convolve = lambda i, k: tf.nn.conv2d(i, k, [1, s_h, s_w, 1], padding=padding) 155 | with tf.variable_scope(name) as scope: 156 | kernel = self.make_var('weights', shape=[k_h, k_w, c_i // group, c_o]) 157 | # This is the common-case. Convolve the input without any further complications. 158 | output = convolve(inp, kernel) 159 | # Add the biases 160 | if biased: 161 | biases = self.make_var('biases', [c_o]) 162 | output = tf.nn.bias_add(output, biases) 163 | if relu: 164 | # ReLU non-linearity 165 | output = tf.nn.relu(output, name=scope.name) 166 | return output 167 | 168 | @layer 169 | def prelu(self, inp, name): 170 | with tf.variable_scope(name): 171 | i = int(inp.get_shape()[-1]) 172 | alpha = self.make_var('alpha', shape=(i,)) 173 | output = tf.nn.relu(inp) + tf.multiply(alpha, -tf.nn.relu(-inp)) 174 | return output 175 | 176 | @layer 177 | def max_pool(self, inp, k_h, k_w, s_h, s_w, name, padding='SAME'): 178 | self.validate_padding(padding) 179 | return tf.nn.max_pool(inp, 180 | ksize=[1, k_h, k_w, 1], 181 | strides=[1, s_h, s_w, 1], 182 | padding=padding, 183 | name=name) 184 | 185 | @layer 186 | def fc(self, inp, num_out, name, relu=True): 187 | with tf.variable_scope(name): 188 | input_shape = inp.get_shape() 189 | if input_shape.ndims == 4: 190 | # The input is spatial. Vectorize it first. 191 | dim = 1 192 | for d in input_shape[1:].as_list(): 193 | dim *= int(d) 194 | feed_in = tf.reshape(inp, [-1, dim]) 195 | else: 196 | feed_in, dim = (inp, input_shape[-1].value) 197 | weights = self.make_var('weights', shape=[dim, num_out]) 198 | biases = self.make_var('biases', [num_out]) 199 | op = tf.nn.relu_layer if relu else tf.nn.xw_plus_b 200 | fc = op(feed_in, weights, biases, name=name) 201 | return fc 202 | 203 | 204 | """ 205 | Multi dimensional softmax, 206 | refer to https://github.com/tensorflow/tensorflow/issues/210 207 | compute softmax along the dimension of target 208 | the native softmax only supports batch_size x dimension 209 | """ 210 | @layer 211 | def softmax(self, target, axis, name=None): 212 | max_axis = tf.reduce_max(target, axis, keepdims=True) 213 | target_exp = tf.exp(target-max_axis) 214 | normalize = tf.reduce_sum(target_exp, axis, keepdims=True) 215 | softmax = tf.div(target_exp, normalize, name) 216 | return softmax 217 | 218 | class PNet(Network): 219 | def setup(self): 220 | (self.feed('data') #pylint: disable=no-value-for-parameter, no-member 221 | .conv(3, 3, 10, 1, 1, padding='VALID', relu=False, name='conv1') 222 | .prelu(name='PReLU1') 223 | .max_pool(2, 2, 2, 2, name='pool1') 224 | .conv(3, 3, 16, 1, 1, padding='VALID', relu=False, name='conv2') 225 | .prelu(name='PReLU2') 226 | .conv(3, 3, 32, 1, 1, padding='VALID', relu=False, name='conv3') 227 | .prelu(name='PReLU3') 228 | .conv(1, 1, 2, 1, 1, relu=False, name='conv4-1') 229 | .softmax(3,name='prob1')) 230 | 231 | (self.feed('PReLU3') #pylint: disable=no-value-for-parameter 232 | .conv(1, 1, 4, 1, 1, relu=False, name='conv4-2')) 233 | 234 | class RNet(Network): 235 | def setup(self): 236 | (self.feed('data') #pylint: disable=no-value-for-parameter, no-member 237 | .conv(3, 3, 28, 1, 1, padding='VALID', relu=False, name='conv1') 238 | .prelu(name='prelu1') 239 | .max_pool(3, 3, 2, 2, name='pool1') 240 | .conv(3, 3, 48, 1, 1, padding='VALID', relu=False, name='conv2') 241 | .prelu(name='prelu2') 242 | .max_pool(3, 3, 2, 2, padding='VALID', name='pool2') 243 | .conv(2, 2, 64, 1, 1, padding='VALID', relu=False, name='conv3') 244 | .prelu(name='prelu3') 245 | .fc(128, relu=False, name='conv4') 246 | .prelu(name='prelu4') 247 | .fc(2, relu=False, name='conv5-1') 248 | .softmax(1,name='prob1')) 249 | 250 | (self.feed('prelu4') #pylint: disable=no-value-for-parameter 251 | .fc(4, relu=False, name='conv5-2')) 252 | 253 | class ONet(Network): 254 | def setup(self): 255 | (self.feed('data') #pylint: disable=no-value-for-parameter, no-member 256 | .conv(3, 3, 32, 1, 1, padding='VALID', relu=False, name='conv1') 257 | .prelu(name='prelu1') 258 | .max_pool(3, 3, 2, 2, name='pool1') 259 | .conv(3, 3, 64, 1, 1, padding='VALID', relu=False, name='conv2') 260 | .prelu(name='prelu2') 261 | .max_pool(3, 3, 2, 2, padding='VALID', name='pool2') 262 | .conv(3, 3, 64, 1, 1, padding='VALID', relu=False, name='conv3') 263 | .prelu(name='prelu3') 264 | .max_pool(2, 2, 2, 2, name='pool3') 265 | .conv(2, 2, 128, 1, 1, padding='VALID', relu=False, name='conv4') 266 | .prelu(name='prelu4') 267 | .fc(256, relu=False, name='conv5') 268 | .prelu(name='prelu5') 269 | .fc(2, relu=False, name='conv6-1') 270 | .softmax(1, name='prob1')) 271 | 272 | (self.feed('prelu5') #pylint: disable=no-value-for-parameter 273 | .fc(4, relu=False, name='conv6-2')) 274 | 275 | (self.feed('prelu5') #pylint: disable=no-value-for-parameter 276 | .fc(10, relu=False, name='conv6-3')) 277 | 278 | def create_mtcnn(sess, model_path): 279 | if not model_path: 280 | model_path,_ = os.path.split(os.path.realpath(__file__)) 281 | 282 | with tf.variable_scope('pnet'): 283 | data = tf.placeholder(tf.float32, (None,None,None,3), 'input') 284 | pnet = PNet({'data':data}) 285 | pnet.load(os.path.join(model_path, 'det1.npy'), sess) 286 | with tf.variable_scope('rnet'): 287 | data = tf.placeholder(tf.float32, (None,24,24,3), 'input') 288 | rnet = RNet({'data':data}) 289 | rnet.load(os.path.join(model_path, 'det2.npy'), sess) 290 | with tf.variable_scope('onet'): 291 | data = tf.placeholder(tf.float32, (None,48,48,3), 'input') 292 | onet = ONet({'data':data}) 293 | onet.load(os.path.join(model_path, 'det3.npy'), sess) 294 | 295 | pnet_fun = lambda img : sess.run(('pnet/conv4-2/BiasAdd:0', 'pnet/prob1:0'), feed_dict={'pnet/input:0':img}) 296 | rnet_fun = lambda img : sess.run(('rnet/conv5-2/conv5-2:0', 'rnet/prob1:0'), feed_dict={'rnet/input:0':img}) 297 | onet_fun = lambda img : sess.run(('onet/conv6-2/conv6-2:0', 'onet/conv6-3/conv6-3:0', 'onet/prob1:0'), feed_dict={'onet/input:0':img}) 298 | return pnet_fun, rnet_fun, onet_fun 299 | 300 | def detect_face(img, minsize, pnet, rnet, onet, threshold, factor): 301 | """Detects faces in an image, and returns bounding boxes and points for them. 302 | img: input image 303 | minsize: minimum faces' size 304 | pnet, rnet, onet: caffemodel 305 | threshold: threshold=[th1, th2, th3], th1-3 are three steps's threshold 306 | factor: the factor used to create a scaling pyramid of face sizes to detect in the image. 307 | """ 308 | factor_count=0 309 | total_boxes=np.empty((0,9)) 310 | points=np.empty(0) 311 | h=img.shape[0] 312 | w=img.shape[1] 313 | minl=np.amin([h, w]) 314 | m=12.0/minsize 315 | minl=minl*m 316 | # create scale pyramid 317 | scales=[] 318 | while minl>=12: 319 | scales += [m*np.power(factor, factor_count)] 320 | minl = minl*factor 321 | factor_count += 1 322 | 323 | # first stage 324 | for scale in scales: 325 | hs=int(np.ceil(h*scale)) 326 | ws=int(np.ceil(w*scale)) 327 | im_data = imresample(img, (hs, ws)) 328 | im_data = (im_data-127.5)*0.0078125 329 | img_x = np.expand_dims(im_data, 0) 330 | img_y = np.transpose(img_x, (0,2,1,3)) 331 | out = pnet([img_y]) 332 | out0 = np.transpose(out[0], (0,2,1,3)) 333 | out1 = np.transpose(out[1], (0,2,1,3)) 334 | 335 | boxes, _ = generateBoundingBox(out1[0,:,:,1].copy(), out0[0,:,:,:].copy(), scale, threshold[0]) 336 | 337 | # inter-scale nms 338 | pick = nms(boxes.copy(), 0.5, 'Union') 339 | if boxes.size>0 and pick.size>0: 340 | boxes = boxes[pick,:] 341 | total_boxes = np.append(total_boxes, boxes, axis=0) 342 | 343 | numbox = total_boxes.shape[0] 344 | if numbox>0: 345 | pick = nms(total_boxes.copy(), 0.7, 'Union') 346 | total_boxes = total_boxes[pick,:] 347 | regw = total_boxes[:,2]-total_boxes[:,0] 348 | regh = total_boxes[:,3]-total_boxes[:,1] 349 | qq1 = total_boxes[:,0]+total_boxes[:,5]*regw 350 | qq2 = total_boxes[:,1]+total_boxes[:,6]*regh 351 | qq3 = total_boxes[:,2]+total_boxes[:,7]*regw 352 | qq4 = total_boxes[:,3]+total_boxes[:,8]*regh 353 | total_boxes = np.transpose(np.vstack([qq1, qq2, qq3, qq4, total_boxes[:,4]])) 354 | total_boxes = rerec(total_boxes.copy()) 355 | total_boxes[:,0:4] = np.fix(total_boxes[:,0:4]).astype(np.int32) 356 | dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph = pad(total_boxes.copy(), w, h) 357 | 358 | numbox = total_boxes.shape[0] 359 | if numbox>0: 360 | # second stage 361 | tempimg = np.zeros((24,24,3,numbox)) 362 | for k in range(0,numbox): 363 | tmp = np.zeros((int(tmph[k]),int(tmpw[k]),3)) 364 | tmp[dy[k]-1:edy[k],dx[k]-1:edx[k],:] = img[y[k]-1:ey[k],x[k]-1:ex[k],:] 365 | if tmp.shape[0]>0 and tmp.shape[1]>0 or tmp.shape[0]==0 and tmp.shape[1]==0: 366 | tempimg[:,:,:,k] = imresample(tmp, (24, 24)) 367 | else: 368 | return np.empty() 369 | tempimg = (tempimg-127.5)*0.0078125 370 | tempimg1 = np.transpose(tempimg, (3,1,0,2)) 371 | out = rnet([tempimg1]) 372 | out0 = np.transpose(out[0]) 373 | out1 = np.transpose(out[1]) 374 | score = out1[1,:] 375 | ipass = np.where(score>threshold[1]) 376 | total_boxes = np.hstack([total_boxes[ipass[0],0:4].copy(), np.expand_dims(score[ipass].copy(),1)]) 377 | mv = out0[:,ipass[0]] 378 | if total_boxes.shape[0]>0: 379 | pick = nms(total_boxes, 0.7, 'Union') 380 | total_boxes = total_boxes[pick,:] 381 | total_boxes = bbreg(total_boxes.copy(), np.transpose(mv[:,pick])) 382 | total_boxes = rerec(total_boxes.copy()) 383 | 384 | numbox = total_boxes.shape[0] 385 | if numbox>0: 386 | # third stage 387 | total_boxes = np.fix(total_boxes).astype(np.int32) 388 | dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph = pad(total_boxes.copy(), w, h) 389 | tempimg = np.zeros((48,48,3,numbox)) 390 | for k in range(0,numbox): 391 | tmp = np.zeros((int(tmph[k]),int(tmpw[k]),3)) 392 | tmp[dy[k]-1:edy[k],dx[k]-1:edx[k],:] = img[y[k]-1:ey[k],x[k]-1:ex[k],:] 393 | if tmp.shape[0]>0 and tmp.shape[1]>0 or tmp.shape[0]==0 and tmp.shape[1]==0: 394 | tempimg[:,:,:,k] = imresample(tmp, (48, 48)) 395 | else: 396 | return np.empty() 397 | tempimg = (tempimg-127.5)*0.0078125 398 | tempimg1 = np.transpose(tempimg, (3,1,0,2)) 399 | out = onet([tempimg1]) 400 | out0 = np.transpose(out[0]) 401 | out1 = np.transpose(out[1]) 402 | out2 = np.transpose(out[2]) 403 | score = out2[1,:] 404 | points = out1 405 | ipass = np.where(score>threshold[2]) 406 | points = points[:,ipass[0]] 407 | total_boxes = np.hstack([total_boxes[ipass[0],0:4].copy(), np.expand_dims(score[ipass].copy(),1)]) 408 | mv = out0[:,ipass[0]] 409 | 410 | w = total_boxes[:,2]-total_boxes[:,0]+1 411 | h = total_boxes[:,3]-total_boxes[:,1]+1 412 | points[0:5,:] = np.tile(w,(5, 1))*points[0:5,:] + np.tile(total_boxes[:,0],(5, 1))-1 413 | points[5:10,:] = np.tile(h,(5, 1))*points[5:10,:] + np.tile(total_boxes[:,1],(5, 1))-1 414 | if total_boxes.shape[0]>0: 415 | total_boxes = bbreg(total_boxes.copy(), np.transpose(mv)) 416 | pick = nms(total_boxes.copy(), 0.7, 'Min') 417 | total_boxes = total_boxes[pick,:] 418 | points = points[:,pick] 419 | 420 | return total_boxes, points 421 | 422 | 423 | def bulk_detect_face(images, detection_window_size_ratio, pnet, rnet, onet, threshold, factor): 424 | """Detects faces in a list of images 425 | images: list containing input images 426 | detection_window_size_ratio: ratio of minimum face size to smallest image dimension 427 | pnet, rnet, onet: caffemodel 428 | threshold: threshold=[th1 th2 th3], th1-3 are three steps's threshold [0-1] 429 | factor: the factor used to create a scaling pyramid of face sizes to detect in the image. 430 | """ 431 | all_scales = [None] * len(images) 432 | images_with_boxes = [None] * len(images) 433 | 434 | for i in range(len(images)): 435 | images_with_boxes[i] = {'total_boxes': np.empty((0, 9))} 436 | 437 | # create scale pyramid 438 | for index, img in enumerate(images): 439 | all_scales[index] = [] 440 | h = img.shape[0] 441 | w = img.shape[1] 442 | minsize = int(detection_window_size_ratio * np.minimum(w, h)) 443 | factor_count = 0 444 | minl = np.amin([h, w]) 445 | if minsize <= 12: 446 | minsize = 12 447 | 448 | m = 12.0 / minsize 449 | minl = minl * m 450 | while minl >= 12: 451 | all_scales[index].append(m * np.power(factor, factor_count)) 452 | minl = minl * factor 453 | factor_count += 1 454 | 455 | # # # # # # # # # # # # # 456 | # first stage - fast proposal network (pnet) to obtain face candidates 457 | # # # # # # # # # # # # # 458 | 459 | images_obj_per_resolution = {} 460 | 461 | # TODO: use some type of rounding to number module 8 to increase probability that pyramid images will have the same resolution across input images 462 | 463 | for index, scales in enumerate(all_scales): 464 | h = images[index].shape[0] 465 | w = images[index].shape[1] 466 | 467 | for scale in scales: 468 | hs = int(np.ceil(h * scale)) 469 | ws = int(np.ceil(w * scale)) 470 | 471 | if (ws, hs) not in images_obj_per_resolution: 472 | images_obj_per_resolution[(ws, hs)] = [] 473 | 474 | im_data = imresample(images[index], (hs, ws)) 475 | im_data = (im_data - 127.5) * 0.0078125 476 | img_y = np.transpose(im_data, (1, 0, 2)) # caffe uses different dimensions ordering 477 | images_obj_per_resolution[(ws, hs)].append({'scale': scale, 'image': img_y, 'index': index}) 478 | 479 | for resolution in images_obj_per_resolution: 480 | images_per_resolution = [i['image'] for i in images_obj_per_resolution[resolution]] 481 | outs = pnet(images_per_resolution) 482 | 483 | for index in range(len(outs[0])): 484 | scale = images_obj_per_resolution[resolution][index]['scale'] 485 | image_index = images_obj_per_resolution[resolution][index]['index'] 486 | out0 = np.transpose(outs[0][index], (1, 0, 2)) 487 | out1 = np.transpose(outs[1][index], (1, 0, 2)) 488 | 489 | boxes, _ = generateBoundingBox(out1[:, :, 1].copy(), out0[:, :, :].copy(), scale, threshold[0]) 490 | 491 | # inter-scale nms 492 | pick = nms(boxes.copy(), 0.5, 'Union') 493 | if boxes.size > 0 and pick.size > 0: 494 | boxes = boxes[pick, :] 495 | images_with_boxes[image_index]['total_boxes'] = np.append(images_with_boxes[image_index]['total_boxes'], 496 | boxes, 497 | axis=0) 498 | 499 | for index, image_obj in enumerate(images_with_boxes): 500 | numbox = image_obj['total_boxes'].shape[0] 501 | if numbox > 0: 502 | h = images[index].shape[0] 503 | w = images[index].shape[1] 504 | pick = nms(image_obj['total_boxes'].copy(), 0.7, 'Union') 505 | image_obj['total_boxes'] = image_obj['total_boxes'][pick, :] 506 | regw = image_obj['total_boxes'][:, 2] - image_obj['total_boxes'][:, 0] 507 | regh = image_obj['total_boxes'][:, 3] - image_obj['total_boxes'][:, 1] 508 | qq1 = image_obj['total_boxes'][:, 0] + image_obj['total_boxes'][:, 5] * regw 509 | qq2 = image_obj['total_boxes'][:, 1] + image_obj['total_boxes'][:, 6] * regh 510 | qq3 = image_obj['total_boxes'][:, 2] + image_obj['total_boxes'][:, 7] * regw 511 | qq4 = image_obj['total_boxes'][:, 3] + image_obj['total_boxes'][:, 8] * regh 512 | image_obj['total_boxes'] = np.transpose(np.vstack([qq1, qq2, qq3, qq4, image_obj['total_boxes'][:, 4]])) 513 | image_obj['total_boxes'] = rerec(image_obj['total_boxes'].copy()) 514 | image_obj['total_boxes'][:, 0:4] = np.fix(image_obj['total_boxes'][:, 0:4]).astype(np.int32) 515 | dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph = pad(image_obj['total_boxes'].copy(), w, h) 516 | 517 | numbox = image_obj['total_boxes'].shape[0] 518 | tempimg = np.zeros((24, 24, 3, numbox)) 519 | 520 | if numbox > 0: 521 | for k in range(0, numbox): 522 | tmp = np.zeros((int(tmph[k]), int(tmpw[k]), 3)) 523 | tmp[dy[k] - 1:edy[k], dx[k] - 1:edx[k], :] = images[index][y[k] - 1:ey[k], x[k] - 1:ex[k], :] 524 | if tmp.shape[0] > 0 and tmp.shape[1] > 0 or tmp.shape[0] == 0 and tmp.shape[1] == 0: 525 | tempimg[:, :, :, k] = imresample(tmp, (24, 24)) 526 | else: 527 | return np.empty() 528 | 529 | tempimg = (tempimg - 127.5) * 0.0078125 530 | image_obj['rnet_input'] = np.transpose(tempimg, (3, 1, 0, 2)) 531 | 532 | # # # # # # # # # # # # # 533 | # second stage - refinement of face candidates with rnet 534 | # # # # # # # # # # # # # 535 | 536 | bulk_rnet_input = np.empty((0, 24, 24, 3)) 537 | for index, image_obj in enumerate(images_with_boxes): 538 | if 'rnet_input' in image_obj: 539 | bulk_rnet_input = np.append(bulk_rnet_input, image_obj['rnet_input'], axis=0) 540 | 541 | out = rnet(bulk_rnet_input) 542 | out0 = np.transpose(out[0]) 543 | out1 = np.transpose(out[1]) 544 | score = out1[1, :] 545 | 546 | i = 0 547 | for index, image_obj in enumerate(images_with_boxes): 548 | if 'rnet_input' not in image_obj: 549 | continue 550 | 551 | rnet_input_count = image_obj['rnet_input'].shape[0] 552 | score_per_image = score[i:i + rnet_input_count] 553 | out0_per_image = out0[:, i:i + rnet_input_count] 554 | 555 | ipass = np.where(score_per_image > threshold[1]) 556 | image_obj['total_boxes'] = np.hstack([image_obj['total_boxes'][ipass[0], 0:4].copy(), 557 | np.expand_dims(score_per_image[ipass].copy(), 1)]) 558 | 559 | mv = out0_per_image[:, ipass[0]] 560 | 561 | if image_obj['total_boxes'].shape[0] > 0: 562 | h = images[index].shape[0] 563 | w = images[index].shape[1] 564 | pick = nms(image_obj['total_boxes'], 0.7, 'Union') 565 | image_obj['total_boxes'] = image_obj['total_boxes'][pick, :] 566 | image_obj['total_boxes'] = bbreg(image_obj['total_boxes'].copy(), np.transpose(mv[:, pick])) 567 | image_obj['total_boxes'] = rerec(image_obj['total_boxes'].copy()) 568 | 569 | numbox = image_obj['total_boxes'].shape[0] 570 | 571 | if numbox > 0: 572 | tempimg = np.zeros((48, 48, 3, numbox)) 573 | image_obj['total_boxes'] = np.fix(image_obj['total_boxes']).astype(np.int32) 574 | dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph = pad(image_obj['total_boxes'].copy(), w, h) 575 | 576 | for k in range(0, numbox): 577 | tmp = np.zeros((int(tmph[k]), int(tmpw[k]), 3)) 578 | tmp[dy[k] - 1:edy[k], dx[k] - 1:edx[k], :] = images[index][y[k] - 1:ey[k], x[k] - 1:ex[k], :] 579 | if tmp.shape[0] > 0 and tmp.shape[1] > 0 or tmp.shape[0] == 0 and tmp.shape[1] == 0: 580 | tempimg[:, :, :, k] = imresample(tmp, (48, 48)) 581 | else: 582 | return np.empty() 583 | tempimg = (tempimg - 127.5) * 0.0078125 584 | image_obj['onet_input'] = np.transpose(tempimg, (3, 1, 0, 2)) 585 | 586 | i += rnet_input_count 587 | 588 | # # # # # # # # # # # # # 589 | # third stage - further refinement and facial landmarks positions with onet 590 | # # # # # # # # # # # # # 591 | 592 | bulk_onet_input = np.empty((0, 48, 48, 3)) 593 | for index, image_obj in enumerate(images_with_boxes): 594 | if 'onet_input' in image_obj: 595 | bulk_onet_input = np.append(bulk_onet_input, image_obj['onet_input'], axis=0) 596 | 597 | out = onet(bulk_onet_input) 598 | 599 | out0 = np.transpose(out[0]) 600 | out1 = np.transpose(out[1]) 601 | out2 = np.transpose(out[2]) 602 | score = out2[1, :] 603 | points = out1 604 | 605 | i = 0 606 | ret = [] 607 | for index, image_obj in enumerate(images_with_boxes): 608 | if 'onet_input' not in image_obj: 609 | ret.append(None) 610 | continue 611 | 612 | onet_input_count = image_obj['onet_input'].shape[0] 613 | 614 | out0_per_image = out0[:, i:i + onet_input_count] 615 | score_per_image = score[i:i + onet_input_count] 616 | points_per_image = points[:, i:i + onet_input_count] 617 | 618 | ipass = np.where(score_per_image > threshold[2]) 619 | points_per_image = points_per_image[:, ipass[0]] 620 | 621 | image_obj['total_boxes'] = np.hstack([image_obj['total_boxes'][ipass[0], 0:4].copy(), 622 | np.expand_dims(score_per_image[ipass].copy(), 1)]) 623 | mv = out0_per_image[:, ipass[0]] 624 | 625 | w = image_obj['total_boxes'][:, 2] - image_obj['total_boxes'][:, 0] + 1 626 | h = image_obj['total_boxes'][:, 3] - image_obj['total_boxes'][:, 1] + 1 627 | points_per_image[0:5, :] = np.tile(w, (5, 1)) * points_per_image[0:5, :] + np.tile( 628 | image_obj['total_boxes'][:, 0], (5, 1)) - 1 629 | points_per_image[5:10, :] = np.tile(h, (5, 1)) * points_per_image[5:10, :] + np.tile( 630 | image_obj['total_boxes'][:, 1], (5, 1)) - 1 631 | 632 | if image_obj['total_boxes'].shape[0] > 0: 633 | image_obj['total_boxes'] = bbreg(image_obj['total_boxes'].copy(), np.transpose(mv)) 634 | pick = nms(image_obj['total_boxes'].copy(), 0.7, 'Min') 635 | image_obj['total_boxes'] = image_obj['total_boxes'][pick, :] 636 | points_per_image = points_per_image[:, pick] 637 | 638 | ret.append((image_obj['total_boxes'], points_per_image)) 639 | else: 640 | ret.append(None) 641 | 642 | i += onet_input_count 643 | 644 | return ret 645 | 646 | 647 | # function [boundingbox] = bbreg(boundingbox,reg) 648 | def bbreg(boundingbox,reg): 649 | """Calibrate bounding boxes""" 650 | if reg.shape[1]==1: 651 | reg = np.reshape(reg, (reg.shape[2], reg.shape[3])) 652 | 653 | w = boundingbox[:,2]-boundingbox[:,0]+1 654 | h = boundingbox[:,3]-boundingbox[:,1]+1 655 | b1 = boundingbox[:,0]+reg[:,0]*w 656 | b2 = boundingbox[:,1]+reg[:,1]*h 657 | b3 = boundingbox[:,2]+reg[:,2]*w 658 | b4 = boundingbox[:,3]+reg[:,3]*h 659 | boundingbox[:,0:4] = np.transpose(np.vstack([b1, b2, b3, b4 ])) 660 | return boundingbox 661 | 662 | def generateBoundingBox(imap, reg, scale, t): 663 | """Use heatmap to generate bounding boxes""" 664 | stride=2 665 | cellsize=12 666 | 667 | imap = np.transpose(imap) 668 | dx1 = np.transpose(reg[:,:,0]) 669 | dy1 = np.transpose(reg[:,:,1]) 670 | dx2 = np.transpose(reg[:,:,2]) 671 | dy2 = np.transpose(reg[:,:,3]) 672 | y, x = np.where(imap >= t) 673 | if y.shape[0]==1: 674 | dx1 = np.flipud(dx1) 675 | dy1 = np.flipud(dy1) 676 | dx2 = np.flipud(dx2) 677 | dy2 = np.flipud(dy2) 678 | score = imap[(y,x)] 679 | reg = np.transpose(np.vstack([ dx1[(y,x)], dy1[(y,x)], dx2[(y,x)], dy2[(y,x)] ])) 680 | if reg.size==0: 681 | reg = np.empty((0,3)) 682 | bb = np.transpose(np.vstack([y,x])) 683 | q1 = np.fix((stride*bb+1)/scale) 684 | q2 = np.fix((stride*bb+cellsize-1+1)/scale) 685 | boundingbox = np.hstack([q1, q2, np.expand_dims(score,1), reg]) 686 | return boundingbox, reg 687 | 688 | # function pick = nms(boxes,threshold,type) 689 | def nms(boxes, threshold, method): 690 | if boxes.size==0: 691 | return np.empty((0,3)) 692 | x1 = boxes[:,0] 693 | y1 = boxes[:,1] 694 | x2 = boxes[:,2] 695 | y2 = boxes[:,3] 696 | s = boxes[:,4] 697 | area = (x2-x1+1) * (y2-y1+1) 698 | I = np.argsort(s) 699 | pick = np.zeros_like(s, dtype=np.int16) 700 | counter = 0 701 | while I.size>0: 702 | i = I[-1] 703 | pick[counter] = i 704 | counter += 1 705 | idx = I[0:-1] 706 | xx1 = np.maximum(x1[i], x1[idx]) 707 | yy1 = np.maximum(y1[i], y1[idx]) 708 | xx2 = np.minimum(x2[i], x2[idx]) 709 | yy2 = np.minimum(y2[i], y2[idx]) 710 | w = np.maximum(0.0, xx2-xx1+1) 711 | h = np.maximum(0.0, yy2-yy1+1) 712 | inter = w * h 713 | if method is 'Min': 714 | o = inter / np.minimum(area[i], area[idx]) 715 | else: 716 | o = inter / (area[i] + area[idx] - inter) 717 | I = I[np.where(o<=threshold)] 718 | pick = pick[0:counter] 719 | return pick 720 | 721 | # function [dy edy dx edx y ey x ex tmpw tmph] = pad(total_boxes,w,h) 722 | def pad(total_boxes, w, h): 723 | """Compute the padding coordinates (pad the bounding boxes to square)""" 724 | tmpw = (total_boxes[:,2]-total_boxes[:,0]+1).astype(np.int32) 725 | tmph = (total_boxes[:,3]-total_boxes[:,1]+1).astype(np.int32) 726 | numbox = total_boxes.shape[0] 727 | 728 | dx = np.ones((numbox), dtype=np.int32) 729 | dy = np.ones((numbox), dtype=np.int32) 730 | edx = tmpw.copy().astype(np.int32) 731 | edy = tmph.copy().astype(np.int32) 732 | 733 | x = total_boxes[:,0].copy().astype(np.int32) 734 | y = total_boxes[:,1].copy().astype(np.int32) 735 | ex = total_boxes[:,2].copy().astype(np.int32) 736 | ey = total_boxes[:,3].copy().astype(np.int32) 737 | 738 | tmp = np.where(ex>w) 739 | edx.flat[tmp] = np.expand_dims(-ex[tmp]+w+tmpw[tmp],1) 740 | ex[tmp] = w 741 | 742 | tmp = np.where(ey>h) 743 | edy.flat[tmp] = np.expand_dims(-ey[tmp]+h+tmph[tmp],1) 744 | ey[tmp] = h 745 | 746 | tmp = np.where(x<1) 747 | dx.flat[tmp] = np.expand_dims(2-x[tmp],1) 748 | x[tmp] = 1 749 | 750 | tmp = np.where(y<1) 751 | dy.flat[tmp] = np.expand_dims(2-y[tmp],1) 752 | y[tmp] = 1 753 | 754 | return dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph 755 | 756 | # function [bboxA] = rerec(bboxA) 757 | def rerec(bboxA): 758 | """Convert bboxA to square.""" 759 | h = bboxA[:,3]-bboxA[:,1] 760 | w = bboxA[:,2]-bboxA[:,0] 761 | l = np.maximum(w, h) 762 | bboxA[:,0] = bboxA[:,0]+w*0.5-l*0.5 763 | bboxA[:,1] = bboxA[:,1]+h*0.5-l*0.5 764 | bboxA[:,2:4] = bboxA[:,0:2] + np.transpose(np.tile(l,(2,1))) 765 | return bboxA 766 | 767 | def imresample(img, sz): 768 | im_data = cv2.resize(img, (sz[1], sz[0]), interpolation=cv2.INTER_AREA) #@UndefinedVariable 769 | return im_data 770 | 771 | # This method is kept for debugging purpose 772 | # h=img.shape[0] 773 | # w=img.shape[1] 774 | # hs, ws = sz 775 | # dx = float(w) / ws 776 | # dy = float(h) / hs 777 | # im_data = np.zeros((hs,ws,3)) 778 | # for a1 in range(0,hs): 779 | # for a2 in range(0,ws): 780 | # for a3 in range(0,3): 781 | # im_data[a1,a2,a3] = img[int(floor(a1*dy)),int(floor(a2*dx)),a3] 782 | # return im_data 783 | 784 | -------------------------------------------------------------------------------- /mtcnn_weights/README.md: -------------------------------------------------------------------------------- 1 | Weights files are form https://github.com/davidsandberg/facenet/tree/master/src/align 2 | -------------------------------------------------------------------------------- /mtcnn_weights/det1.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shaoanlu/GazeML-keras/62bf9f84dc9f0728bbd906b96b7cf7893deafcd1/mtcnn_weights/det1.npy -------------------------------------------------------------------------------- /mtcnn_weights/det2.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shaoanlu/GazeML-keras/62bf9f84dc9f0728bbd906b96b7cf7893deafcd1/mtcnn_weights/det2.npy -------------------------------------------------------------------------------- /mtcnn_weights/det3.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shaoanlu/GazeML-keras/62bf9f84dc9f0728bbd906b96b7cf7893deafcd1/mtcnn_weights/det3.npy -------------------------------------------------------------------------------- /results/result_fashion-1063100_640.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shaoanlu/GazeML-keras/62bf9f84dc9f0728bbd906b96b7cf7893deafcd1/results/result_fashion-1063100_640.png -------------------------------------------------------------------------------- /results/result_lenna.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shaoanlu/GazeML-keras/62bf9f84dc9f0728bbd906b96b7cf7893deafcd1/results/result_lenna.png -------------------------------------------------------------------------------- /results/result_model-1439909_640.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shaoanlu/GazeML-keras/62bf9f84dc9f0728bbd906b96b7cf7893deafcd1/results/result_model-1439909_640.png -------------------------------------------------------------------------------- /results/result_reiwa.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shaoanlu/GazeML-keras/62bf9f84dc9f0728bbd906b96b7cf7893deafcd1/results/result_reiwa.png -------------------------------------------------------------------------------- /test_imgs/Lenna_(test_image).png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shaoanlu/GazeML-keras/62bf9f84dc9f0728bbd906b96b7cf7893deafcd1/test_imgs/Lenna_(test_image).png -------------------------------------------------------------------------------- /test_imgs/fashion-1063100_640.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shaoanlu/GazeML-keras/62bf9f84dc9f0728bbd906b96b7cf7893deafcd1/test_imgs/fashion-1063100_640.jpg -------------------------------------------------------------------------------- /test_imgs/gengou_happyou_reiwa.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shaoanlu/GazeML-keras/62bf9f84dc9f0728bbd906b96b7cf7893deafcd1/test_imgs/gengou_happyou_reiwa.png -------------------------------------------------------------------------------- /test_imgs/model-1439909_640.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/shaoanlu/GazeML-keras/62bf9f84dc9f0728bbd906b96b7cf7893deafcd1/test_imgs/model-1439909_640.jpg --------------------------------------------------------------------------------