├── README.md ├── data └── pairs.txt ├── dataset └── cleaned_list.txt ├── lib ├── lfw.py └── utils.py ├── lr_coco.txt ├── lr_scatter.txt ├── networks ├── inception_resnet_v1.py ├── resface.py └── sphere_network.py ├── test.sh ├── test └── test.py ├── train.sh └── train └── train_multi_gpu.py /README.md: -------------------------------------------------------------------------------- 1 | ## Recent Update 2 | ```2018.07.04```: I achieved a better accuracy(99.2%,[trained model](https://pan.baidu.com/s/1c7bPoM_hGvkzp5Tunu_ivg)) on LFW. I did some modification as bellow: 3 | - Align webface and lfw dataset to ```112x112```([casia-112x112](https://pan.baidu.com/s/1MYNq6pkZJCkpKERC92Ea1A),[lfw-112x112](https://pan.baidu.com/s/1-QASgnuL0FYBpzq3K79Vmw)) using [insightface align method](https://github.com/deepinsight/insightface/blob/master/src/align/align_lfw.py) 4 | - Set a bigger margin parameter (```0.35```) and a higher feature embedding demension (```1024```) 5 | - Use the clean dataset and the details can be seen [this](https://github.com/happynear/FaceVerification/issues/30) 6 | ## CosFace 7 | This project is aimmed at implementing the CosFace described by the paper [**CosFace: Large Margin Cosine Loss for Deep Face Recognition**](https://arxiv.org/pdf/1801.09414.pdf). The code can be trained on [CASIA-Webface](http://www.cbsr.ia.ac.cn/english/CASIA-WebFace-Database.html) and the best accuracy [LFW](http://vis-www.cs.umass.edu/lfw/) is 98.6%. The result is lower than reported by paper(99.33%), which may be caused by sphere network implemented in tensorflow. I train the sphere network implemented in tensorflow using the softmax loss and just obtain the accuracy 95.6%, which is more lower than caffe version(97.88%). 8 | 9 | ## Preprocessing 10 | I supply the preprocessed dataset in baidu pan:[CASIA-WebFace-112X96](https://pan.baidu.com/s/160RN84j_79TnktKZmzakfw),[lfw-112X96](https://pan.baidu.com/s/1fkH9xR5Z0inxTP7Maae2KQ). You can download and unzip them to dir ```dataset```. 11 | 12 | If you want to preprocess the dataset by yourself, you can refer to [sphereface](https://github.com/wy1iu/sphereface/tree/0056a7d27d05f2815a276cb26471f0348d6dd8da#installation). 13 | 14 | 15 | ## Train 16 | ```./train.sh``` 17 | 18 | ## Test 19 | Modify the ```MODEL_DIR``` in ```test.sh``` and run ```./test.sh```. 20 | 21 | If you do not want to train your model, you can download my [trained model](https://pan.baidu.com/s/1ouQA2PXz1hp7Uz_uhsyMdw) and unzip it to ```models``` dir. 22 | 23 | ## Reference 24 | - [facenet](https://github.com/davidsandberg/facenet) 25 | -------------------------------------------------------------------------------- /lib/lfw.py: -------------------------------------------------------------------------------- 1 | """Helper for evaluation on the Labeled Faces in the Wild dataset 2 | """ 3 | 4 | # MIT License 5 | # 6 | # Copyright (c) 2016 David Sandberg 7 | # 8 | # Permission is hereby granted, free of charge, to any person obtaining a copy 9 | # of this software and associated documentation files (the "Software"), to deal 10 | # in the Software without restriction, including without limitation the rights 11 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | # copies of the Software, and to permit persons to whom the Software is 13 | # furnished to do so, subject to the following conditions: 14 | # 15 | # The above copyright notice and this permission notice shall be included in all 16 | # copies or substantial portions of the Software. 17 | # 18 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 24 | # SOFTWARE. 25 | 26 | from __future__ import absolute_import 27 | from __future__ import division 28 | from __future__ import print_function 29 | 30 | import os 31 | import numpy as np 32 | import utils 33 | 34 | def evaluate(embeddings, actual_issame, nrof_folds=10): 35 | # Calculate evaluation metrics 36 | thresholds = np.arange(0, 4, 0.01/4) 37 | embeddings1 = embeddings[0::2] 38 | embeddings2 = embeddings[1::2] 39 | tpr, fpr, accuracy = utils.calculate_roc(thresholds, embeddings1, embeddings2, 40 | np.asarray(actual_issame), nrof_folds=nrof_folds) 41 | thresholds = np.arange(0, 4, 0.001) 42 | val, val_std, far = utils.calculate_val(thresholds, embeddings1, embeddings2, 43 | np.asarray(actual_issame), 1e-3, nrof_folds=nrof_folds) 44 | return tpr, fpr, accuracy, val, val_std, far 45 | 46 | def get_paths(lfw_dir, pairs, file_ext): 47 | nrof_skipped_pairs = 0 48 | path_list = [] 49 | issame_list = [] 50 | for pair in pairs: 51 | if len(pair) == 3: 52 | path0 = os.path.join(lfw_dir, pair[0], pair[0] + '_' + '%04d' % int(pair[1])+'.'+file_ext) 53 | path1 = os.path.join(lfw_dir, pair[0], pair[0] + '_' + '%04d' % int(pair[2])+'.'+file_ext) 54 | issame = True 55 | elif len(pair) == 4: 56 | path0 = os.path.join(lfw_dir, pair[0], pair[0] + '_' + '%04d' % int(pair[1])+'.'+file_ext) 57 | path1 = os.path.join(lfw_dir, pair[2], pair[2] + '_' + '%04d' % int(pair[3])+'.'+file_ext) 58 | issame = False 59 | if os.path.exists(path0) and os.path.exists(path1): # Only add the pair if both paths exist 60 | path_list += (path0,path1) 61 | issame_list.append(issame) 62 | else: 63 | nrof_skipped_pairs += 1 64 | if nrof_skipped_pairs>0: 65 | print('Skipped %d image pairs' % nrof_skipped_pairs) 66 | 67 | return path_list, issame_list 68 | 69 | def read_pairs(pairs_filename): 70 | pairs = [] 71 | with open(pairs_filename, 'r') as f: 72 | for line in f.readlines()[1:]: 73 | pair = line.strip().split() 74 | pairs.append(pair) 75 | return np.array(pairs) 76 | 77 | 78 | 79 | -------------------------------------------------------------------------------- /lib/utils.py: -------------------------------------------------------------------------------- 1 | """Functions for building the face recognition network. 2 | """ 3 | # MIT License 4 | # 5 | # Copyright (c) 2016 David Sandberg 6 | # 7 | # Permission is hereby granted, free of charge, to any person obtaining a copy 8 | # of this software and associated documentation files (the "Software"), to deal 9 | # in the Software without restriction, including without limitation the rights 10 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 | # copies of the Software, and to permit persons to whom the Software is 12 | # furnished to do so, subject to the following conditions: 13 | # 14 | # The above copyright notice and this permission notice shall be included in all 15 | # copies or substantial portions of the Software. 16 | # 17 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23 | # SOFTWARE. 24 | 25 | # pylint: disable=missing-docstring 26 | from __future__ import absolute_import 27 | from __future__ import division 28 | from __future__ import print_function 29 | 30 | import os 31 | import pdb 32 | from subprocess import Popen, PIPE 33 | import tensorflow as tf 34 | from tensorflow.python.framework import ops 35 | import numpy as np 36 | import time 37 | import tensorflow.contrib.slim as slim 38 | import pickle 39 | from scipy import misc 40 | from sklearn.model_selection import KFold 41 | from scipy import interpolate 42 | from tensorflow.python.training import training 43 | import random 44 | import re 45 | from tensorflow.python.platform import gfile 46 | 47 | def py_func(func, inp, Tout, stateful = True, name=None, grad_func=None): 48 | rand_name = 'PyFuncGrad' + str(np.random.randint(0,1E+8)) 49 | tf.RegisterGradient(rand_name)(grad_func) 50 | g = tf.get_default_graph() 51 | with g.gradient_override_map({'PyFunc':rand_name}): 52 | return tf.py_func(func,inp,Tout,stateful=stateful, name=name) 53 | 54 | 55 | 56 | 57 | def coco_forward(xw, y, m, name=None): 58 | #pdb.set_trace() 59 | xw_copy = xw.copy() 60 | num = len(y) 61 | orig_ind = range(num) 62 | xw_copy[orig_ind,y] -= m 63 | return xw_copy 64 | 65 | def coco_help(grad,y): 66 | grad_copy = grad.copy() 67 | return grad_copy 68 | 69 | def coco_backward(op, grad): 70 | 71 | y = op.inputs[1] 72 | m = op.inputs[2] 73 | grad_copy = tf.py_func(coco_help,[grad,y],tf.float32) 74 | return grad_copy,y,m 75 | 76 | def coco_func(xw,y,m, name=None): 77 | with tf.op_scope([xw,y,m],name,"Coco_func") as name: 78 | coco_out = py_func(coco_forward,[xw,y,m],tf.float32,name=name,grad_func=coco_backward) 79 | return coco_out 80 | 81 | def cos_loss(x, y, num_cls, reuse=False, alpha=0.25, scale=64,name = 'cos_loss'): 82 | ''' 83 | x: B x D - features 84 | y: B x 1 - labels 85 | num_cls: 1 - total class number 86 | alpah: 1 - margin 87 | scale: 1 - scaling paramter 88 | ''' 89 | # define the classifier weights 90 | xs = x.get_shape() 91 | with tf.variable_scope('centers_var',reuse=reuse) as center_scope: 92 | w = tf.get_variable("centers", [xs[1], num_cls], dtype=tf.float32, 93 | initializer=tf.contrib.layers.xavier_initializer(),trainable=True) 94 | 95 | #normalize the feature and weight 96 | #(N,D) 97 | x_feat_norm = tf.nn.l2_normalize(x,1,1e-10) 98 | #(D,C) 99 | w_feat_norm = tf.nn.l2_normalize(w,0,1e-10) 100 | 101 | # get the scores after normalization 102 | #(N,C) 103 | xw_norm = tf.matmul(x_feat_norm, w_feat_norm) 104 | #implemented by py_func 105 | #value = tf.identity(xw) 106 | #substract the marigin and scale it 107 | value = coco_func(xw_norm,y,alpha) * scale 108 | 109 | #implemented by tf api 110 | #margin_xw_norm = xw_norm - alpha 111 | #label_onehot = tf.one_hot(y,num_cls) 112 | #value = scale*tf.where(tf.equal(label_onehot,1), margin_xw_norm, xw_norm) 113 | 114 | 115 | # compute the loss as softmax loss 116 | cos_loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=value)) 117 | 118 | return cos_loss 119 | 120 | 121 | def softmax_loss(prelogits,labels,nrof_classes,weight_decay,reuse): 122 | logits = slim.fully_connected(prelogits, nrof_classes, activation_fn=None, 123 | weights_initializer=tf.truncated_normal_initializer(stddev=0.1), 124 | weights_regularizer=slim.l2_regularizer(weight_decay), 125 | scope='softmax', reuse=reuse) 126 | cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits( 127 | labels=labels, logits=logits, name='cross_entropy_per_example') 128 | cross_entropy_mean = tf.reduce_mean(cross_entropy, name='cross_entropy') 129 | return cross_entropy_mean 130 | 131 | 132 | 133 | 134 | def contrastive_index(dists, labels, alfa): 135 | #pdb.set_trace() 136 | base_inds = np.array(range(dists.shape[0])) 137 | nrof_classes = dists.shape[1] 138 | same_indexs = base_inds*nrof_classes + labels 139 | dists = dists.reshape([-1]) 140 | #dist_same = dists[same_indexs].copy() 141 | dists_copy = dists.copy() 142 | dists_copy[same_indexs] = np.NaN 143 | diff_indexs = np.where(dists_copy < alfa)[0] 144 | #pdb.set_trace() 145 | #dists[same_indexs] = 146 | print('same indexs: {}, diff indexs: {}'.format(len(same_indexs),len(diff_indexs))) 147 | return np.array(same_indexs,dtype=np.int64), np.array(diff_indexs, dtype=np.int64) 148 | 149 | 150 | def get_image_paths_and_labels(dataset): 151 | image_paths_flat = [] 152 | labels_flat = [] 153 | for i in range(len(dataset)): 154 | image_paths_flat += dataset[i].image_paths 155 | labels_flat += [i] * len(dataset[i].image_paths) 156 | return image_paths_flat, labels_flat 157 | 158 | def shuffle_examples(image_paths, labels): 159 | shuffle_list = list(zip(image_paths, labels)) 160 | random.shuffle(shuffle_list) 161 | image_paths_shuff, labels_shuff = zip(*shuffle_list) 162 | return image_paths_shuff, labels_shuff 163 | 164 | def read_images_from_disk(input_queue): 165 | """Consumes a single filename and label as a ' '-delimited string. 166 | Args: 167 | filename_and_label_tensor: A scalar string tensor. 168 | Returns: 169 | Two tensors: the decoded image, and the string label. 170 | """ 171 | label = input_queue[1] 172 | file_contents = tf.read_file(input_queue[0]) 173 | example = tf.image.decode_image(file_contents, channels=3) 174 | return example, label 175 | 176 | def random_rotate_image(image): 177 | angle = np.random.uniform(low=-10.0, high=10.0) 178 | return misc.imrotate(image, angle, 'bicubic') 179 | 180 | def read_and_augment_data(image_list, label_list, image_size, batch_size, max_nrof_epochs, 181 | random_crop, random_flip, random_rotate, nrof_preprocess_threads, shuffle=True): 182 | 183 | images = ops.convert_to_tensor(image_list, dtype=tf.string) 184 | labels = ops.convert_to_tensor(label_list, dtype=tf.int32) 185 | 186 | # Makes an input queue 187 | input_queue = tf.train.slice_input_producer([images, labels], 188 | num_epochs=max_nrof_epochs, shuffle=shuffle) 189 | 190 | images_and_labels = [] 191 | for _ in range(nrof_preprocess_threads): 192 | image, label = read_images_from_disk(input_queue) 193 | if random_rotate: 194 | image = tf.py_func(random_rotate_image, [image], tf.uint8) 195 | if random_crop: 196 | image = tf.random_crop(image, [image_size, image_size, 3]) 197 | else: 198 | image = tf.image.resize_image_with_crop_or_pad(image, image_size, image_size) 199 | if random_flip: 200 | image = tf.image.random_flip_left_right(image) 201 | #pylint: disable=no-member 202 | image.set_shape((image_size, image_size, 3)) 203 | image = tf.image.per_image_standardization(image) 204 | images_and_labels.append([image, label]) 205 | 206 | image_batch, label_batch = tf.train.batch_join( 207 | images_and_labels, batch_size=batch_size, 208 | capacity=4 * nrof_preprocess_threads * batch_size, 209 | allow_smaller_final_batch=True) 210 | 211 | return image_batch, label_batch 212 | 213 | def _add_loss_summaries(total_loss): 214 | """Add summaries for losses. 215 | 216 | Generates moving average for all losses and associated summaries for 217 | visualizing the performance of the network. 218 | 219 | Args: 220 | total_loss: Total loss from loss(). 221 | Returns: 222 | loss_averages_op: op for generating moving averages of losses. 223 | """ 224 | # Compute the moving average of all individual losses and the total loss. 225 | loss_averages = tf.train.ExponentialMovingAverage(0.9, name='avg') 226 | losses = tf.get_collection('losses') 227 | loss_averages_op = loss_averages.apply(losses + [total_loss]) 228 | 229 | # Attach a scalar summmary to all individual losses and the total loss; do the 230 | # same for the averaged version of the losses. 231 | for l in losses + [total_loss]: 232 | # Name each loss as '(raw)' and name the moving average version of the loss 233 | # as the original loss name. 234 | tf.summary.scalar(l.op.name +' (raw)', l) 235 | tf.summary.scalar(l.op.name, loss_averages.average(l)) 236 | 237 | return loss_averages_op 238 | 239 | def train(total_loss, global_step, optimizer, learning_rate, moving_average_decay, update_gradient_vars, log_histograms=True): 240 | # Generate moving averages of all losses and associated summaries. 241 | loss_averages_op = _add_loss_summaries(total_loss) 242 | 243 | # Compute gradients. 244 | with tf.control_dependencies([loss_averages_op]): 245 | if optimizer=='ADAGRAD': 246 | opt = tf.train.AdagradOptimizer(learning_rate) 247 | elif optimizer=='ADADELTA': 248 | opt = tf.train.AdadeltaOptimizer(learning_rate, rho=0.9, epsilon=1e-6) 249 | elif optimizer=='ADAM': 250 | opt = tf.train.AdamOptimizer(learning_rate, beta1=0.9, beta2=0.999, epsilon=0.1) 251 | elif optimizer=='RMSPROP': 252 | opt = tf.train.RMSPropOptimizer(learning_rate, decay=0.9, momentum=0.9, epsilon=1.0) 253 | elif optimizer=='MOM': 254 | opt = tf.train.MomentumOptimizer(learning_rate, 0.9, use_nesterov=True) 255 | else: 256 | raise ValueError('Invalid optimization algorithm') 257 | 258 | grads = opt.compute_gradients(total_loss, update_gradient_vars) 259 | 260 | # Apply gradients. 261 | apply_gradient_op = opt.apply_gradients(grads, global_step=global_step) 262 | 263 | # Add histograms for trainable variables. 264 | if log_histograms: 265 | for var in tf.trainable_variables(): 266 | tf.summary.histogram(var.op.name, var) 267 | 268 | # Add histograms for gradients. 269 | if log_histograms: 270 | for grad, var in grads: 271 | if grad is not None: 272 | tf.summary.histogram(var.op.name + '/gradients', grad) 273 | 274 | # Track the moving averages of all trainable variables. 275 | variable_averages = tf.train.ExponentialMovingAverage( 276 | moving_average_decay, global_step) 277 | variables_averages_op = variable_averages.apply(tf.trainable_variables()) 278 | 279 | with tf.control_dependencies([apply_gradient_op, variables_averages_op]): 280 | train_op = tf.no_op(name='train') 281 | 282 | return train_op 283 | def compute_gradient(total_loss, global_step, optimizer, learning_rate, update_gradient_vars): 284 | 285 | # Compute gradients. 286 | with tf.control_dependencies([loss_averages_op]): 287 | if optimizer=='ADAGRAD': 288 | opt = tf.train.AdagradOptimizer(learning_rate) 289 | elif optimizer=='ADADELTA': 290 | opt = tf.train.AdadeltaOptimizer(learning_rate, rho=0.9, epsilon=1e-6) 291 | elif optimizer=='ADAM': 292 | opt = tf.train.AdamOptimizer(learning_rate, beta1=0.9, beta2=0.999, epsilon=0.1) 293 | elif optimizer=='RMSPROP': 294 | opt = tf.train.RMSPropOptimizer(learning_rate, decay=0.9, momentum=0.9, epsilon=1.0) 295 | elif optimizer=='MOM': 296 | opt = tf.train.MomentumOptimizer(learning_rate, 0.9, use_nesterov=True) 297 | else: 298 | raise ValueError('Invalid optimization algorithm') 299 | 300 | grads = opt.compute_gradients(total_loss, update_gradient_vars) 301 | 302 | return opt,grads 303 | def get_opt(optimizer,learning_rate): 304 | 305 | # Compute gradients. 306 | if optimizer=='ADAGRAD': 307 | opt = tf.train.AdagradOptimizer(learning_rate) 308 | elif optimizer=='ADADELTA': 309 | opt = tf.train.AdadeltaOptimizer(learning_rate, rho=0.9, epsilon=1e-6) 310 | elif optimizer=='ADAM': 311 | opt = tf.train.AdamOptimizer(learning_rate, beta1=0.9, beta2=0.999, epsilon=0.1) 312 | elif optimizer=='RMSPROP': 313 | opt = tf.train.RMSPropOptimizer(learning_rate, decay=0.9, momentum=0.9, epsilon=1.0) 314 | elif optimizer=='MOM': 315 | opt = tf.train.MomentumOptimizer(learning_rate, 0.9, use_nesterov=True) 316 | elif optimizer=='SGD': 317 | opt = tf.train.GradientDescentOptimizer(learning_rate) 318 | else: 319 | raise ValueError('Invalid optimization algorithm') 320 | 321 | 322 | return opt 323 | 324 | 325 | 326 | def average_gradients(tower_grads): 327 | ''' 328 | Calculate the average gradient for each shared variable across all towers 329 | ''' 330 | average_grads = [] 331 | #pdb.set_trace() 332 | for grad_and_vars in zip(*tower_grads): 333 | #tower_grads[i] is the gradient computed in tower i, which have form ((g_var0,var0),...) 334 | #so zip(*tower_grads) has form (((grad0_gpu0,var0_gpu0),...),...) 335 | #this is to say: grad_and_vars is about the gradient and vars in variable i in all towers, which is gathered in a list. 336 | 337 | #pdb.set_trace() 338 | grads = [] 339 | for g,_ in grad_and_vars: 340 | #print(g) 341 | expanded_g = tf.expand_dims(g,0) 342 | grads.append(expanded_g) 343 | grad = tf.concat(grads,0) 344 | grad = tf.reduce_mean(grad,0) 345 | 346 | v = grad_and_vars[0][1] 347 | grad_and_var = (grad,v) 348 | average_grads.append(grad_and_var) 349 | return average_grads 350 | def sum_gradients(tower_grads): 351 | ''' 352 | Calculate the average gradient for each shared variable across all towers 353 | ''' 354 | average_grads = [] 355 | #pdb.set_trace() 356 | for grad_and_vars in zip(*tower_grads): 357 | #tower_grads[i] is the gradient computed in tower i, which have form ((g_var0,var0),...) 358 | #so zip(*tower_grads) has form (((grad0_gpu0,var0_gpu0),...),...) 359 | #this is to say: grad_and_vars is about the gradient and vars in variable i in all towers, which is gathered in a list. 360 | 361 | #pdb.set_trace() 362 | grads = [] 363 | for g,v in grad_and_vars: 364 | #print(g) 365 | if 'Batch' in v.name and 'mean' in v.name: 366 | print(grad_and_vars) 367 | 368 | expanded_g = tf.expand_dims(g,0) 369 | grads.append(expanded_g) 370 | grad = tf.concat(grads,0) 371 | grad = tf.reduce_sum(grad,0) 372 | 373 | v = grad_and_vars[0][1] 374 | grad_and_var = (grad,v) 375 | average_grads.append(grad_and_var) 376 | return average_grads 377 | 378 | 379 | def prewhiten(x): 380 | mean = np.mean(x) 381 | std = np.std(x) 382 | std_adj = np.maximum(std, 1.0/np.sqrt(x.size)) 383 | y = np.multiply(np.subtract(x, mean), 1/std_adj) 384 | return y 385 | 386 | def crop(image, random_crop, image_size): 387 | if image.shape[1]>image_size: 388 | sz1 = int(image.shape[1]//2) 389 | sz2 = int(image_size//2) 390 | if random_crop: 391 | diff = sz1-sz2 392 | (h, v) = (np.random.randint(-diff, diff+1), np.random.randint(-diff, diff+1)) 393 | else: 394 | (h, v) = (0,0) 395 | image = image[(sz1-sz2+v):(sz1+sz2+v),(sz1-sz2+h):(sz1+sz2+h),:] 396 | return image 397 | 398 | def flip(image, random_flip): 399 | if random_flip and np.random.choice([True, False]): 400 | image = np.fliplr(image) 401 | return image 402 | 403 | def to_rgb(img): 404 | w, h = img.shape 405 | ret = np.empty((w, h, 3), dtype=np.uint8) 406 | ret[:, :, 0] = ret[:, :, 1] = ret[:, :, 2] = img 407 | return ret 408 | 409 | 410 | def load_data(image_paths, do_random_crop, do_flip, image_height,image_width, do_prewhiten=True,src_size=None): 411 | nrof_samples = len(image_paths) 412 | images = np.zeros((nrof_samples, image_height, image_width, 3)) 413 | for i in range(nrof_samples): 414 | img = misc.imread(image_paths[i]) 415 | if src_size is not None: 416 | img = misc.imresize(img,(src_size[0],src_size[1])) 417 | if img.ndim == 2: 418 | img = to_rgb(img) 419 | if do_prewhiten: 420 | img = prewhiten(img) 421 | else: 422 | img = img - 127.5 423 | img = img / 128. 424 | 425 | #img = crop(img, do_random_crop, image_size) 426 | #img = flip(img, do_random_flip) 427 | if do_flip: 428 | img = np.fliplr(img) 429 | images[i,:,:,:] = img 430 | return images 431 | 432 | def l2_normalize(x): 433 | n,e = x.shape 434 | mean = np.mean(x,axis=1) 435 | mean = mean.reshape((n,1)) 436 | mean = np.repeat(mean,e,axis=1) 437 | x -= mean 438 | norm = np.linalg.norm(x,axis=1) 439 | norm = norm.reshape((n,1)) 440 | norm = np.repeat(norm,e,axis=1) 441 | y = np.multiply(x,1/norm) 442 | return y 443 | 444 | 445 | 446 | 447 | 448 | def data_from_mx(inputs,image_size,do_resize=True,do_prewhiten=True): 449 | ''' 450 | convet data in mxnet format to tf format 451 | params: 452 | - inputs: [n,h,w,c] 453 | ''' 454 | nrof_samples = len(inputs) 455 | images = np.zeros((nrof_samples, image_size, image_size, 3)) 456 | for i in range(nrof_samples): 457 | img = inputs[i].transpose((1,2,0)) 458 | #plt.imshow(img) 459 | #plt.show() 460 | #pdb.set_trace() 461 | if img.ndim == 2: 462 | img = to_rgb(img) 463 | if do_resize: 464 | img = misc.imresize(img,[image_size,image_size]) 465 | if do_prewhiten: 466 | img = prewhiten(img) 467 | #img = crop(img, do_random_crop, image_size) 468 | #img = flip(img, do_random_flip) 469 | images[i,:,:,:] = img 470 | return images 471 | 472 | def load_data_simple(image_paths,image_size,do_random_crop=False): 473 | nrof_samples = len(image_paths) 474 | images = np.zeros((nrof_samples, image_size, image_size, 3)) 475 | for i in range(nrof_samples): 476 | img = misc.imread(image_paths[i]) 477 | #pdb.set_trace() 478 | if img.ndim == 2: 479 | img = to_rgb(img) 480 | #if do_prewhiten: 481 | #img = misc.imresize(img,[image_size,image_size]) 482 | img = prewhiten(img) 483 | img = crop(img, do_random_crop, image_size) 484 | #img = flip(img, do_random_flip) 485 | images[i,:,:,:] = img 486 | return images 487 | def load_data_caffe(image_paths,image_size): 488 | nrof_samples = len(image_paths) 489 | images = np.zeros((nrof_samples, image_size, image_size, 3)) 490 | for i in range(nrof_samples): 491 | img = misc.imread(image_paths[i]) 492 | #pdb.set_trace() 493 | if img.ndim == 2: 494 | img = to_rgb(img) 495 | #if do_prewhiten: 496 | img = misc.imresize(img,[image_size,image_size]) 497 | #img = crop(img, do_random_crop, image_size) 498 | #img = flip(img, do_random_flip) 499 | images[i,:,:,:] = img 500 | return images 501 | 502 | 503 | 504 | 505 | def get_label_batch(label_data, batch_size, batch_index): 506 | nrof_examples = np.size(label_data, 0) 507 | j = batch_index*batch_size % nrof_examples 508 | if j+batch_size<=nrof_examples: 509 | batch = label_data[j:j+batch_size] 510 | else: 511 | x1 = label_data[j:nrof_examples] 512 | x2 = label_data[0:nrof_examples-j] 513 | batch = np.vstack([x1,x2]) 514 | batch_int = batch.astype(np.int64) 515 | return batch_int 516 | 517 | def get_batch(image_data, batch_size, batch_index): 518 | nrof_examples = np.size(image_data, 0) 519 | j = batch_index*batch_size % nrof_examples 520 | if j+batch_size<=nrof_examples: 521 | batch = image_data[j:j+batch_size,:,:,:] 522 | else: 523 | x1 = image_data[j:nrof_examples,:,:,:] 524 | x2 = image_data[0:nrof_examples-j,:,:,:] 525 | batch = np.vstack([x1,x2]) 526 | batch_float = batch.astype(np.float32) 527 | return batch_float 528 | 529 | def get_triplet_batch(triplets, batch_index, batch_size): 530 | ax, px, nx = triplets 531 | a = get_batch(ax, int(batch_size/3), batch_index) 532 | p = get_batch(px, int(batch_size/3), batch_index) 533 | n = get_batch(nx, int(batch_size/3), batch_index) 534 | batch = np.vstack([a, p, n]) 535 | return batch 536 | 537 | def get_learning_rate_from_file(filename, epoch): 538 | with open(filename, 'r') as f: 539 | for line in f.readlines(): 540 | line = line.split('#', 1)[0] 541 | if line: 542 | par = line.strip().split(':') 543 | e = int(par[0]) 544 | lr = float(par[1]) 545 | if e <= epoch: 546 | learning_rate = lr 547 | else: 548 | return learning_rate 549 | 550 | class ImageClass(): 551 | "Stores the paths to images for a given class" 552 | def __init__(self, name, image_paths): 553 | self.name = name 554 | self.image_paths = image_paths 555 | 556 | def __str__(self): 557 | return self.name + ', ' + str(len(self.image_paths)) + ' images' 558 | 559 | def __len__(self): 560 | return len(self.image_paths) 561 | 562 | def get_dataset(paths, has_class_directories=True): 563 | dataset = [] 564 | count = 1 565 | for path in paths.split(':'): 566 | path_exp = os.path.expanduser(path) 567 | classes = os.listdir(path_exp) 568 | classes.sort() 569 | nrof_classes = len(classes) 570 | for i in range(nrof_classes): 571 | class_name = classes[i] 572 | facedir = os.path.join(path_exp, class_name) 573 | image_paths = get_image_paths(facedir) 574 | class_name = '{}_{:08d}'.format(class_name,count) 575 | count += 1 576 | dataset.append(ImageClass(class_name, image_paths)) 577 | 578 | return dataset 579 | def dataset_from_list(data_dir,list_file): 580 | dataset = [] 581 | lines = open(list_file,'r').read().strip().split('\n') 582 | path_exp = os.path.expanduser(data_dir) 583 | count = 1 584 | class_paths = {} 585 | for line in lines: 586 | image_path, _ = line.split(' ') 587 | class_name, _ = image_path.split('/') 588 | if class_name not in class_paths: 589 | class_paths[class_name] = [] 590 | full_image_path = os.path.join(path_exp,image_path) 591 | assert os.path.exists(full_image_path), 'file {} not exist'.format(full_image_path) 592 | class_paths[class_name].append(full_image_path) 593 | dataset = [] 594 | keys = class_paths.keys() 595 | keys.sort() 596 | for key in keys: 597 | dataset.append(ImageClass(key,class_paths[key])) 598 | return dataset 599 | 600 | 601 | 602 | 603 | def get_image_paths(facedir): 604 | image_paths = [] 605 | if os.path.isdir(facedir): 606 | images = os.listdir(facedir) 607 | image_paths = [os.path.join(facedir,img) for img in images] 608 | return image_paths 609 | 610 | def split_dataset(dataset, split_ratio, mode): 611 | if mode=='SPLIT_CLASSES': 612 | nrof_classes = len(dataset) 613 | class_indices = np.arange(nrof_classes) 614 | np.random.shuffle(class_indices) 615 | split = int(round(nrof_classes*split_ratio)) 616 | train_set = [dataset[i] for i in class_indices[0:split]] 617 | test_set = [dataset[i] for i in class_indices[split:-1]] 618 | elif mode=='SPLIT_IMAGES': 619 | train_set = [] 620 | test_set = [] 621 | min_nrof_images = 2 622 | for cls in dataset: 623 | paths = cls.image_paths 624 | np.random.shuffle(paths) 625 | split = int(round(len(paths)*split_ratio)) 626 | if split1: 660 | raise ValueError('There should not be more than one meta file in the model directory (%s)' % model_dir) 661 | meta_file = meta_files[0] 662 | meta_files = [s for s in files if '.ckpt' in s] 663 | max_step = -1 664 | for f in files: 665 | step_str = re.match(r'(^model-[\w\- ]+.ckpt-(\d+))', f) 666 | if step_str is not None and len(step_str.groups())>=2: 667 | step = int(step_str.groups()[1]) 668 | if step > max_step: 669 | max_step = step 670 | ckpt_file = step_str.groups()[0] 671 | return meta_file, ckpt_file 672 | 673 | def calculate_roc(thresholds, embeddings1, embeddings2, actual_issame, nrof_folds=10): 674 | assert(embeddings1.shape[0] == embeddings2.shape[0]) 675 | assert(embeddings1.shape[1] == embeddings2.shape[1]) 676 | nrof_pairs = min(len(actual_issame), embeddings1.shape[0]) 677 | nrof_thresholds = len(thresholds) 678 | k_fold = KFold(n_splits=nrof_folds, shuffle=False) 679 | 680 | tprs = np.zeros((nrof_folds,nrof_thresholds)) 681 | fprs = np.zeros((nrof_folds,nrof_thresholds)) 682 | accuracy = np.zeros((nrof_folds)) 683 | 684 | diff = np.subtract(embeddings1, embeddings2) 685 | dist = np.sum(np.square(diff),1) 686 | indices = np.arange(nrof_pairs) 687 | #pdb.set_trace() 688 | for fold_idx, (train_set, test_set) in enumerate(k_fold.split(indices)): 689 | 690 | # Find the best threshold for the fold 691 | acc_train = np.zeros((nrof_thresholds)) 692 | for threshold_idx, threshold in enumerate(thresholds): 693 | _, _, acc_train[threshold_idx] = calculate_accuracy(threshold, dist[train_set], actual_issame[train_set]) 694 | best_threshold_index = np.argmax(acc_train) 695 | for threshold_idx, threshold in enumerate(thresholds): 696 | tprs[fold_idx,threshold_idx], fprs[fold_idx,threshold_idx], _ = calculate_accuracy(threshold, dist[test_set], actual_issame[test_set]) 697 | _, _, accuracy[fold_idx] = calculate_accuracy(thresholds[best_threshold_index], dist[test_set], actual_issame[test_set]) 698 | 699 | tpr = np.mean(tprs,0) 700 | fpr = np.mean(fprs,0) 701 | return tpr, fpr, accuracy 702 | 703 | def calculate_accuracy(threshold, dist, actual_issame): 704 | predict_issame = np.less(dist, threshold) 705 | tp = np.sum(np.logical_and(predict_issame, actual_issame)) 706 | fp = np.sum(np.logical_and(predict_issame, np.logical_not(actual_issame))) 707 | tn = np.sum(np.logical_and(np.logical_not(predict_issame), np.logical_not(actual_issame))) 708 | fn = np.sum(np.logical_and(np.logical_not(predict_issame), actual_issame)) 709 | 710 | tpr = 0 if (tp+fn==0) else float(tp) / float(tp+fn) 711 | fpr = 0 if (fp+tn==0) else float(fp) / float(fp+tn) 712 | acc = float(tp+tn)/dist.size 713 | return tpr, fpr, acc 714 | 715 | 716 | 717 | def calculate_val(thresholds, embeddings1, embeddings2, actual_issame, far_target, nrof_folds=10): 718 | assert(embeddings1.shape[0] == embeddings2.shape[0]) 719 | assert(embeddings1.shape[1] == embeddings2.shape[1]) 720 | nrof_pairs = min(len(actual_issame), embeddings1.shape[0]) 721 | nrof_thresholds = len(thresholds) 722 | k_fold = KFold(n_splits=nrof_folds, shuffle=False) 723 | 724 | val = np.zeros(nrof_folds) 725 | far = np.zeros(nrof_folds) 726 | 727 | diff = np.subtract(embeddings1, embeddings2) 728 | dist = np.sum(np.square(diff),1) 729 | indices = np.arange(nrof_pairs) 730 | 731 | for fold_idx, (train_set, test_set) in enumerate(k_fold.split(indices)): 732 | 733 | # Find the threshold that gives FAR = far_target 734 | far_train = np.zeros(nrof_thresholds) 735 | for threshold_idx, threshold in enumerate(thresholds): 736 | _, far_train[threshold_idx] = calculate_val_far(threshold, dist[train_set], actual_issame[train_set]) 737 | if np.max(far_train)>=far_target: 738 | f = interpolate.interp1d(far_train, thresholds, kind='slinear') 739 | threshold = f(far_target) 740 | else: 741 | threshold = 0.0 742 | 743 | val[fold_idx], far[fold_idx] = calculate_val_far(threshold, dist[test_set], actual_issame[test_set]) 744 | 745 | val_mean = np.mean(val) 746 | far_mean = np.mean(far) 747 | val_std = np.std(val) 748 | return val_mean, val_std, far_mean 749 | 750 | 751 | def calculate_val_far(threshold, dist, actual_issame): 752 | predict_issame = np.less(dist, threshold) 753 | true_accept = np.sum(np.logical_and(predict_issame, actual_issame)) 754 | false_accept = np.sum(np.logical_and(predict_issame, np.logical_not(actual_issame))) 755 | n_same = np.sum(actual_issame) 756 | n_diff = np.sum(np.logical_not(actual_issame)) 757 | val = float(true_accept) / float(n_same) 758 | far = float(false_accept) / float(n_diff) 759 | return val, far 760 | 761 | def store_revision_info(src_path, output_dir, arg_string): 762 | 763 | # Get git hash 764 | gitproc = Popen(['git', 'rev-parse', 'HEAD'], stdout = PIPE, cwd=src_path) 765 | (stdout, _) = gitproc.communicate() 766 | git_hash = stdout.strip() 767 | 768 | # Get local changes 769 | gitproc = Popen(['git', 'diff', 'HEAD'], stdout = PIPE, cwd=src_path) 770 | (stdout, _) = gitproc.communicate() 771 | git_diff = stdout.strip() 772 | 773 | # Store a text file in the log directory 774 | rev_info_filename = os.path.join(output_dir, 'revision_info.txt') 775 | with open(rev_info_filename, "w") as text_file: 776 | text_file.write('arguments: %s\n--------------------\n' % arg_string) 777 | text_file.write('git hash: %s\n--------------------\n' % git_hash) 778 | text_file.write('%s' % git_diff) 779 | 780 | def list_variables(filename): 781 | reader = training.NewCheckpointReader(filename) 782 | variable_map = reader.get_variable_to_shape_map() 783 | names = sorted(variable_map.keys()) 784 | return names 785 | 786 | def put_images_on_grid(images, shape=(16,8)): 787 | nrof_images = images.shape[0] 788 | img_size = images.shape[1] 789 | bw = 3 790 | img = np.zeros((shape[1]*(img_size+bw)+bw, shape[0]*(img_size+bw)+bw, 3), np.float32) 791 | for i in range(shape[1]): 792 | x_start = i*(img_size+bw)+bw 793 | for j in range(shape[0]): 794 | img_index = i*shape[0]+j 795 | if img_index>=nrof_images: 796 | break 797 | y_start = j*(img_size+bw)+bw 798 | img[x_start:x_start+img_size, y_start:y_start+img_size, :] = images[img_index, :, :, :] 799 | if img_index>=nrof_images: 800 | break 801 | return img 802 | 803 | def write_arguments_to_file(args, filename): 804 | with open(filename, 'w') as f: 805 | for key, value in vars(args).iteritems(): 806 | f.write('%s: %s\n' % (key, str(value))) 807 | -------------------------------------------------------------------------------- /lr_coco.txt: -------------------------------------------------------------------------------- 1 | # Learning rate schedule 2 | # Maps an epoch number to a learning rate 3 | 0: 0.01 4 | 5: 0.05 5 | 10: 0.1 6 | 30: 0.05 7 | 60: 0.001 8 | 1000: 0.0001 9 | -------------------------------------------------------------------------------- /lr_scatter.txt: -------------------------------------------------------------------------------- 1 | # Learning rate schedule 2 | # Maps an epoch number to a learning rate 3 | 0: 0.05 4 | #5: 0.1 5 | 30: 0.03 6 | 60: 0.001 7 | 1000: 0.0001 8 | -------------------------------------------------------------------------------- /networks/inception_resnet_v1.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | """Contains the definition of the Inception Resnet V1 architecture. 17 | As described in http://arxiv.org/abs/1602.07261. 18 | Inception-v4, Inception-ResNet and the Impact of Residual Connections 19 | on Learning 20 | Christian Szegedy, Sergey Ioffe, Vincent Vanhoucke, Alex Alemi 21 | """ 22 | from __future__ import absolute_import 23 | from __future__ import division 24 | from __future__ import print_function 25 | 26 | import tensorflow as tf 27 | import tensorflow.contrib.slim as slim 28 | 29 | # Inception-Renset-A 30 | def block35(net, scale=1.0, activation_fn=tf.nn.relu, scope=None, reuse=None): 31 | """Builds the 35x35 resnet block.""" 32 | with tf.variable_scope(scope, 'Block35', [net], reuse=reuse): 33 | with tf.variable_scope('Branch_0'): 34 | tower_conv = slim.conv2d(net, 32, 1, scope='Conv2d_1x1') 35 | with tf.variable_scope('Branch_1'): 36 | tower_conv1_0 = slim.conv2d(net, 32, 1, scope='Conv2d_0a_1x1') 37 | tower_conv1_1 = slim.conv2d(tower_conv1_0, 32, 3, scope='Conv2d_0b_3x3') 38 | with tf.variable_scope('Branch_2'): 39 | tower_conv2_0 = slim.conv2d(net, 32, 1, scope='Conv2d_0a_1x1') 40 | tower_conv2_1 = slim.conv2d(tower_conv2_0, 32, 3, scope='Conv2d_0b_3x3') 41 | tower_conv2_2 = slim.conv2d(tower_conv2_1, 32, 3, scope='Conv2d_0c_3x3') 42 | mixed = tf.concat([tower_conv, tower_conv1_1, tower_conv2_2], 3) 43 | up = slim.conv2d(mixed, net.get_shape()[3], 1, normalizer_fn=None, 44 | activation_fn=None, scope='Conv2d_1x1') 45 | net += scale * up 46 | if activation_fn: 47 | net = activation_fn(net) 48 | return net 49 | 50 | # Inception-Renset-B 51 | def block17(net, scale=1.0, activation_fn=tf.nn.relu, scope=None, reuse=None): 52 | """Builds the 17x17 resnet block.""" 53 | with tf.variable_scope(scope, 'Block17', [net], reuse=reuse): 54 | with tf.variable_scope('Branch_0'): 55 | tower_conv = slim.conv2d(net, 128, 1, scope='Conv2d_1x1') 56 | with tf.variable_scope('Branch_1'): 57 | tower_conv1_0 = slim.conv2d(net, 128, 1, scope='Conv2d_0a_1x1') 58 | tower_conv1_1 = slim.conv2d(tower_conv1_0, 128, [1, 7], 59 | scope='Conv2d_0b_1x7') 60 | tower_conv1_2 = slim.conv2d(tower_conv1_1, 128, [7, 1], 61 | scope='Conv2d_0c_7x1') 62 | mixed = tf.concat([tower_conv, tower_conv1_2], 3) 63 | up = slim.conv2d(mixed, net.get_shape()[3], 1, normalizer_fn=None, 64 | activation_fn=None, scope='Conv2d_1x1') 65 | net += scale * up 66 | if activation_fn: 67 | net = activation_fn(net) 68 | return net 69 | 70 | 71 | # Inception-Resnet-C 72 | def block8(net, scale=1.0, activation_fn=tf.nn.relu, scope=None, reuse=None): 73 | """Builds the 8x8 resnet block.""" 74 | with tf.variable_scope(scope, 'Block8', [net], reuse=reuse): 75 | with tf.variable_scope('Branch_0'): 76 | tower_conv = slim.conv2d(net, 192, 1, scope='Conv2d_1x1') 77 | with tf.variable_scope('Branch_1'): 78 | tower_conv1_0 = slim.conv2d(net, 192, 1, scope='Conv2d_0a_1x1') 79 | tower_conv1_1 = slim.conv2d(tower_conv1_0, 192, [1, 3], 80 | scope='Conv2d_0b_1x3') 81 | tower_conv1_2 = slim.conv2d(tower_conv1_1, 192, [3, 1], 82 | scope='Conv2d_0c_3x1') 83 | mixed = tf.concat([tower_conv, tower_conv1_2], 3) 84 | up = slim.conv2d(mixed, net.get_shape()[3], 1, normalizer_fn=None, 85 | activation_fn=None, scope='Conv2d_1x1') 86 | net += scale * up 87 | if activation_fn: 88 | net = activation_fn(net) 89 | return net 90 | 91 | def reduction_a(net, k, l, m, n): 92 | with tf.variable_scope('Branch_0'): 93 | tower_conv = slim.conv2d(net, n, 3, stride=2, padding='VALID', 94 | scope='Conv2d_1a_3x3') 95 | with tf.variable_scope('Branch_1'): 96 | tower_conv1_0 = slim.conv2d(net, k, 1, scope='Conv2d_0a_1x1') 97 | tower_conv1_1 = slim.conv2d(tower_conv1_0, l, 3, 98 | scope='Conv2d_0b_3x3') 99 | tower_conv1_2 = slim.conv2d(tower_conv1_1, m, 3, 100 | stride=2, padding='VALID', 101 | scope='Conv2d_1a_3x3') 102 | with tf.variable_scope('Branch_2'): 103 | tower_pool = slim.max_pool2d(net, 3, stride=2, padding='VALID', 104 | scope='MaxPool_1a_3x3') 105 | net = tf.concat([tower_conv, tower_conv1_2, tower_pool], 3) 106 | return net 107 | 108 | def reduction_b(net): 109 | with tf.variable_scope('Branch_0'): 110 | tower_conv = slim.conv2d(net, 256, 1, scope='Conv2d_0a_1x1') 111 | tower_conv_1 = slim.conv2d(tower_conv, 384, 3, stride=2, 112 | padding='VALID', scope='Conv2d_1a_3x3') 113 | with tf.variable_scope('Branch_1'): 114 | tower_conv1 = slim.conv2d(net, 256, 1, scope='Conv2d_0a_1x1') 115 | tower_conv1_1 = slim.conv2d(tower_conv1, 256, 3, stride=2, 116 | padding='VALID', scope='Conv2d_1a_3x3') 117 | with tf.variable_scope('Branch_2'): 118 | tower_conv2 = slim.conv2d(net, 256, 1, scope='Conv2d_0a_1x1') 119 | tower_conv2_1 = slim.conv2d(tower_conv2, 256, 3, 120 | scope='Conv2d_0b_3x3') 121 | tower_conv2_2 = slim.conv2d(tower_conv2_1, 256, 3, stride=2, 122 | padding='VALID', scope='Conv2d_1a_3x3') 123 | with tf.variable_scope('Branch_3'): 124 | tower_pool = slim.max_pool2d(net, 3, stride=2, padding='VALID', 125 | scope='MaxPool_1a_3x3') 126 | net = tf.concat([tower_conv_1, tower_conv1_1, 127 | tower_conv2_2, tower_pool], 3) 128 | return net 129 | 130 | def inference(images, keep_probability, phase_train=True, 131 | bottleneck_layer_size=128, weight_decay=0.0, reuse=None): 132 | batch_norm_params = { 133 | # Decay for the moving averages. 134 | 'decay': 0.995, 135 | # epsilon to prevent 0s in variance. 136 | 'epsilon': 0.001, 137 | #'is_training': phase_train, 138 | # force in-place updates of mean and variance estimates 139 | #'updates_collections': None, 140 | # Moving averages ends up in the trainable variables collection 141 | 'variables_collections': [ tf.GraphKeys.TRAINABLE_VARIABLES ], 142 | } 143 | 144 | with slim.arg_scope([slim.conv2d, slim.fully_connected], 145 | weights_initializer=tf.truncated_normal_initializer(stddev=0.1), 146 | weights_regularizer=slim.l2_regularizer(weight_decay), 147 | normalizer_fn=slim.batch_norm, 148 | normalizer_params=batch_norm_params): 149 | return inception_resnet_v1(images, is_training=phase_train, 150 | dropout_keep_prob=keep_probability, bottleneck_layer_size=bottleneck_layer_size, reuse=reuse) 151 | 152 | 153 | def inception_resnet_v1(inputs, is_training=True, 154 | dropout_keep_prob=0.8, 155 | bottleneck_layer_size=128, 156 | reuse=None, 157 | scope='InceptionResnetV1'): 158 | """Creates the Inception Resnet V1 model. 159 | Args: 160 | inputs: a 4-D tensor of size [batch_size, height, width, 3]. 161 | num_classes: number of predicted classes. 162 | is_training: whether is training or not. 163 | dropout_keep_prob: float, the fraction to keep before final layer. 164 | reuse: whether or not the network and its variables should be reused. To be 165 | able to reuse 'scope' must be given. 166 | scope: Optional variable_scope. 167 | Returns: 168 | logits: the logits outputs of the model. 169 | end_points: the set of end_points from the inception model. 170 | """ 171 | end_points = {} 172 | 173 | with tf.variable_scope(scope, 'InceptionResnetV1', [inputs], reuse=reuse): 174 | with slim.arg_scope([slim.batch_norm, slim.dropout], 175 | is_training=is_training): 176 | with slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d], 177 | stride=1, padding='SAME'): 178 | 179 | # 149 x 149 x 32 180 | net = slim.conv2d(inputs, 32, 3, stride=2, padding='VALID', 181 | scope='Conv2d_1a_3x3') 182 | end_points['Conv2d_1a_3x3'] = net 183 | # 147 x 147 x 32 184 | net = slim.conv2d(net, 32, 3, padding='VALID', 185 | scope='Conv2d_2a_3x3') 186 | end_points['Conv2d_2a_3x3'] = net 187 | # 147 x 147 x 64 188 | net = slim.conv2d(net, 64, 3, scope='Conv2d_2b_3x3') 189 | end_points['Conv2d_2b_3x3'] = net 190 | # 73 x 73 x 64 191 | net = slim.max_pool2d(net, 3, stride=2, padding='VALID', 192 | scope='MaxPool_3a_3x3') 193 | end_points['MaxPool_3a_3x3'] = net 194 | # 73 x 73 x 80 195 | net = slim.conv2d(net, 80, 1, padding='VALID', 196 | scope='Conv2d_3b_1x1') 197 | end_points['Conv2d_3b_1x1'] = net 198 | # 71 x 71 x 192 199 | net = slim.conv2d(net, 192, 3, padding='VALID', 200 | scope='Conv2d_4a_3x3') 201 | end_points['Conv2d_4a_3x3'] = net 202 | # 35 x 35 x 256 203 | net = slim.conv2d(net, 256, 3, stride=2, padding='VALID', 204 | scope='Conv2d_4b_3x3') 205 | end_points['Conv2d_4b_3x3'] = net 206 | 207 | # 5 x Inception-resnet-A 208 | net = slim.repeat(net, 5, block35, scale=0.17) 209 | end_points['Mixed_5a'] = net 210 | 211 | # Reduction-A 212 | with tf.variable_scope('Mixed_6a'): 213 | net = reduction_a(net, 192, 192, 256, 384) 214 | end_points['Mixed_6a'] = net 215 | 216 | # 10 x Inception-Resnet-B 217 | net = slim.repeat(net, 10, block17, scale=0.10) 218 | end_points['Mixed_6b'] = net 219 | 220 | # Reduction-B 221 | with tf.variable_scope('Mixed_7a'): 222 | net = reduction_b(net) 223 | end_points['Mixed_7a'] = net 224 | 225 | # 5 x Inception-Resnet-C 226 | net = slim.repeat(net, 5, block8, scale=0.20) 227 | end_points['Mixed_8a'] = net 228 | 229 | net = block8(net, activation_fn=None) 230 | end_points['Mixed_8b'] = net 231 | 232 | with tf.variable_scope('Logits'): 233 | end_points['PrePool'] = net 234 | #pylint: disable=no-member 235 | net = slim.avg_pool2d(net, net.get_shape()[1:3], padding='VALID', 236 | scope='AvgPool_1a_8x8') 237 | net = slim.flatten(net) 238 | 239 | net = slim.dropout(net, dropout_keep_prob, is_training=is_training, 240 | scope='Dropout') 241 | 242 | end_points['PreLogitsFlatten'] = net 243 | 244 | net = slim.fully_connected(net, bottleneck_layer_size, activation_fn=None, 245 | scope='Bottleneck', reuse=False) 246 | 247 | return net, end_points 248 | -------------------------------------------------------------------------------- /networks/resface.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import tensorflow.contrib.slim as slim 3 | 4 | ''' 5 | Resface20 and Resface36 proposed in sphereface and applied in Additive Margin Softmax paper 6 | Notice: 7 | batch norm is used in line 111. to cancel batch norm, simply commend out line 111 and use line 112 8 | ''' 9 | 10 | def prelu(x): 11 | with tf.variable_scope('PRelu'): 12 | #alphas = tf.Variable(tf.constant(0.25,dtype=tf.float32,shape=[x.get_shape()[-1]]),name='prelu_alphas') 13 | alphas = tf.get_variable(name='prelu_alphas',initializer=tf.constant(0.25,dtype=tf.float32,shape=[x.get_shape()[-1]])) 14 | pos = tf.nn.relu(x) 15 | neg = alphas * (x - abs(x)) * 0.5 16 | return pos + neg 17 | 18 | def resface_block(lower_input,output_channels,scope=None): 19 | with tf.variable_scope(scope): 20 | net = slim.conv2d(lower_input, output_channels,weights_initializer=tf.truncated_normal_initializer(stddev=0.01)) 21 | net = slim.conv2d(net, output_channels,weights_initializer=tf.truncated_normal_initializer(stddev=0.01)) 22 | return lower_input + net 23 | 24 | def resface_pre(lower_input,output_channels,scope=None): 25 | net = slim.conv2d(lower_input, output_channels, stride=2, scope=scope) 26 | return net 27 | 28 | def resface20(images, keep_probability, 29 | phase_train=True, bottleneck_layer_size=512, 30 | weight_decay=0.0, reuse=None): 31 | ''' 32 | conv name 33 | conv[conv_layer]_[block_index]_[block_layer_index] 34 | ''' 35 | with tf.variable_scope('Conv1'): 36 | net = resface_pre(images,64,scope='Conv1_pre') 37 | net = slim.repeat(net,1,resface_block,64,scope='Conv1') 38 | with tf.variable_scope('Conv2'): 39 | net = resface_pre(net,128,scope='Conv2_pre') 40 | net = slim.repeat(net,2,resface_block,128,scope='Conv2') 41 | with tf.variable_scope('Conv3'): 42 | net = resface_pre(net,256,scope='Conv3_pre') 43 | net = slim.repeat(net,4,resface_block,256,scope='Conv3') 44 | with tf.variable_scope('Conv4'): 45 | net = resface_pre(net,512,scope='Conv4_pre') 46 | net = slim.repeat(net,1,resface_block,512,scope='Conv4') 47 | 48 | with tf.variable_scope('Logits'): 49 | #pylint: disable=no-member 50 | #net = slim.avg_pool2d(net, net.get_shape()[1:3], padding='VALID', 51 | # scope='AvgPool') 52 | net = slim.flatten(net) 53 | 54 | net = slim.dropout(net, keep_probability, is_training=phase_train, 55 | scope='Dropout') 56 | 57 | net = slim.fully_connected(net, bottleneck_layer_size, activation_fn=None, 58 | scope='Bottleneck', reuse=False) 59 | return net,'' 60 | 61 | def resface36(images, keep_probability, 62 | phase_train=True, bottleneck_layer_size=512, 63 | weight_decay=0.0, reuse=None): 64 | ''' 65 | conv name 66 | conv[conv_layer]_[block_index]_[block_layer_index] 67 | ''' 68 | with tf.variable_scope('Conv1'): 69 | net = resface_pre(images,64,scope='Conv1_pre') 70 | net = slim.repeat(net,2,resface_block,64,scope='Conv_1') 71 | with tf.variable_scope('Conv2'): 72 | net = resface_pre(net,128,scope='Conv2_pre') 73 | net = slim.repeat(net,4,resface_block,128,scope='Conv_2') 74 | with tf.variable_scope('Conv3'): 75 | net = resface_pre(net,256,scope='Conv3_pre') 76 | net = slim.repeat(net,8,resface_block,256,scope='Conv_3') 77 | with tf.variable_scope('Conv4'): 78 | net = resface_pre(net,512,scope='Conv4_pre') 79 | #net = resface_block(Conv4_pre,512,scope='Conv4_1') 80 | net = slim.repeat(net,1,resface_block,512,scope='Conv4') 81 | 82 | with tf.variable_scope('Logits'): 83 | #pylint: disable=no-member 84 | #net = slim.avg_pool2d(net, net.get_shape()[1:3], padding='VALID', 85 | # scope='AvgPool') 86 | net = slim.flatten(net) 87 | net = slim.dropout(net, keep_probability, is_training=phase_train, 88 | scope='Dropout') 89 | net = slim.fully_connected(net, bottleneck_layer_size, activation_fn=None, 90 | scope='Bottleneck', reuse=False) 91 | return net,'' 92 | 93 | def inference(image_batch, keep_probability, 94 | phase_train=True, bottleneck_layer_size=512, 95 | weight_decay=0.0,reuse=False): 96 | batch_norm_params = { 97 | 'decay': 0.995, 98 | 'epsilon': 0.001, 99 | 'scale':True, 100 | 'is_training': phase_train, 101 | 'updates_collections': None, 102 | 'variables_collections': [ tf.GraphKeys.TRAINABLE_VARIABLES ], 103 | } 104 | with tf.variable_scope('Resface',reuse=reuse): 105 | with slim.arg_scope([slim.conv2d, slim.fully_connected], 106 | weights_initializer=tf.contrib.layers.xavier_initializer(), 107 | weights_regularizer=slim.l2_regularizer(weight_decay), 108 | activation_fn=prelu, 109 | normalizer_fn=slim.batch_norm, 110 | #normalizer_fn=None, 111 | normalizer_params=batch_norm_params): 112 | with slim.arg_scope([slim.conv2d], kernel_size=3): 113 | return resface20(images=image_batch, 114 | keep_probability=keep_probability, 115 | phase_train=phase_train, 116 | bottleneck_layer_size=bottleneck_layer_size) 117 | -------------------------------------------------------------------------------- /networks/sphere_network.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np 3 | from tensorflow.python.framework import ops 4 | 5 | l2_regularizer= tf.contrib.layers.l2_regularizer(1.0) 6 | xavier = tf.contrib.layers.xavier_initializer_conv2d() 7 | def get_shape(tensor): 8 | static_shape = tensor.shape.as_list() 9 | dynamic_shape = tf.unstack(tf.shape(tensor)) 10 | dims = [s[1] if s[0] is None else s[0] for s in zip(static_shape,dynamic_shape)] 11 | return dims 12 | def infer(input,embedding_size=512): 13 | with tf.variable_scope('conv1_'): 14 | network = first_conv(input, 64, name = 'conv1') 15 | network = block(network, 'conv1_23', 64) 16 | with tf.variable_scope('conv2_'): 17 | network = first_conv(network, 128, name = 'conv2') 18 | network = block(network, 'conv2_23', 128) 19 | network = block(network, 'conv2_45', 128) 20 | with tf.variable_scope('conv3_'): 21 | network = first_conv(network, 256, name = 'conv3') 22 | network = block(network, 'conv3_23', 256) 23 | network = block(network, 'conv3_45', 256) 24 | network = block(network, 'conv3_67', 256) 25 | network = block(network, 'conv3_89', 256) 26 | with tf.variable_scope('conv4_'): 27 | network = first_conv(network, 512, name = 'conv4') 28 | network = block(network, 'conv4_23', 512) 29 | with tf.variable_scope('feature'): 30 | #BATCH_SIZE = network.get_shape()[0] 31 | dims = get_shape(network) 32 | print(dims) 33 | #BATCH_SIZE = tf.shape(network)[0] 34 | #feature = tf.layers.dense(tf.reshape(network,[BATCH_SIZE, -1]), 512, kernel_regularizer = l2_regularizer, kernel_initializer = xavier) 35 | feature = tf.layers.dense(tf.reshape(network,[dims[0], np.prod(dims[1:])]), embedding_size, kernel_regularizer = l2_regularizer, kernel_initializer = xavier) 36 | return feature 37 | 38 | 39 | def prelu(x, name = 'prelu'): 40 | with tf.variable_scope(name): 41 | alphas = tf.get_variable('alpha', x.get_shape()[-1], initializer=tf.constant_initializer(0.25), regularizer = l2_regularizer, dtype = tf.float32) 42 | pos = tf.nn.relu(x) 43 | neg = tf.multiply(alphas,(x - abs(x)) * 0.5) 44 | return pos + neg 45 | 46 | def first_conv(input, num_output, name): 47 | 48 | zero_init = tf.zeros_initializer() 49 | network = tf.layers.conv2d(input, num_output, kernel_size = [3, 3], strides = (2, 2), padding = 'same', kernel_initializer = xavier, bias_initializer = zero_init, kernel_regularizer = l2_regularizer, bias_regularizer = l2_regularizer) 50 | network = prelu(network, name = name) 51 | return network 52 | 53 | 54 | def block(input, name, num_output): 55 | with tf.variable_scope(name): 56 | network = tf.layers.conv2d(input, num_output, kernel_size = [3, 3], strides = [1, 1], padding = 'same', kernel_initializer = tf.random_normal_initializer(stddev=0.01), use_bias = False , kernel_regularizer = l2_regularizer) 57 | network = prelu(network, name = 'name'+ '1') 58 | network = tf.layers.conv2d(network, num_output, kernel_size = [3, 3], strides = [1, 1], padding = 'same', kernel_initializer = tf.random_normal_initializer(stddev=0.01), use_bias = False, kernel_regularizer = l2_regularizer) 59 | network = prelu(network, name = 'name'+ '2') 60 | network = tf.add(input, network) 61 | return network 62 | 63 | def get_normal_loss(input, label, num_output, lambda_value, m_value = 4): 64 | feature_dim = input.get_shape()[1] 65 | weight = tf.get_variable('weight', shape = [num_output, feature_dim], regularizer = l2_regularizer, initializer = xavier) 66 | prob_distribution = tf.one_hot(label, num_output) 67 | weight = tf.nn.l2_normalize(weight, dim = 1) 68 | label_float = tf.cast(label, tf.float32) 69 | margin_out = marginInnerProduct_module.margin_inner_product(input, weight, tf.constant(m_value), lambda_value, label_float) 70 | #margin_out = tf.layers.dense(input, num_output) 71 | 72 | loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=margin_out, labels = prob_distribution)) 73 | return loss 74 | 75 | -------------------------------------------------------------------------------- /test.sh: -------------------------------------------------------------------------------- 1 | V="v1" 2 | if test ${V} = "v1" 3 | then 4 | MODEL_DIR=models/model-20180309-083949.ckpt-60000 5 | TEST_DATA=dataset/lfw-112X96 6 | EMBEDDING_SIZE=512 7 | FC_BN='' 8 | PREWHITEN='--prewhiten' 9 | IMAGE_WIDTH=96 10 | else 11 | MODEL_DIR=models/model-20180626-205832.ckpt-60000 12 | TEST_DATA=dataset/lfw-112x112 13 | EMBEDDING_SIZE=1024 14 | FC_BN='--fc_bn' 15 | PREWHITEN='' 16 | IMAGE_WIDTH=112 17 | fi 18 | IMAGE_HEIGHT=112 19 | CUDA_VISIBLE_DEVICES=1 python test/test.py ${TEST_DATA} ${MODEL_DIR} --lfw_file_ext jpg --network_type sphere_network --embedding_size ${EMBEDDING_SIZE} ${FC_BN} ${PREWHITEN} --image_height ${IMAGE_HEIGHT} --image_width ${IMAGE_WIDTH} 20 | -------------------------------------------------------------------------------- /test/test.py: -------------------------------------------------------------------------------- 1 | 2 | from __future__ import absolute_import 3 | from __future__ import division 4 | from __future__ import print_function 5 | 6 | import tensorflow as tf 7 | import numpy as np 8 | import argparse 9 | import sys 10 | sys.path.insert(0,'networks') 11 | sys.path.insert(0,'lib') 12 | import utils 13 | import lfw 14 | import os 15 | import math 16 | import tensorflow.contrib.slim as slim 17 | from tensorflow.contrib.slim.nets import resnet_v1, resnet_v2 18 | #from models import resnet_v1, resnet_v2,resnet_v1_modify,resnet_v2_modify 19 | import sphere_network as network 20 | from sklearn import metrics 21 | from scipy.optimize import brentq 22 | from scipy import interpolate 23 | import importlib 24 | import pdb 25 | 26 | def main(args): 27 | 28 | with tf.Graph().as_default(): 29 | 30 | with tf.Session() as sess: 31 | 32 | # Read the file containing the pairs used for testing 33 | pairs = lfw.read_pairs(os.path.expanduser(args.lfw_pairs)) 34 | #pdb.set_trace() 35 | 36 | # Get the paths for the corresponding images 37 | paths, actual_issame = lfw.get_paths(os.path.expanduser(args.lfw_dir), pairs, args.lfw_file_ext) 38 | 39 | 40 | 41 | #image_size = images_placeholder.get_shape()[1] # For some reason this doesn't work for frozen graphs 42 | image_size = args.image_size 43 | print('image size',image_size) 44 | #images_placeholder = tf.placeholder(tf.float32,shape=(None,image_size,image_size,3),name='image') 45 | images_placeholder = tf.placeholder(tf.float32,shape=(None,args.image_height,args.image_width,3),name='image') 46 | phase_train_placeholder = tf.placeholder(tf.bool, name='phase_train') 47 | #with slim.arg_scope(resnet_v1.resnet_arg_scope(False)): 48 | if args.network_type == 'resnet50': 49 | with slim.arg_scope(resnet_v2.resnet_arg_scope(False)): 50 | prelogits, end_points = resnet_v2.resnet_v2_50(images_placeholder,is_training=phase_train_placeholder,num_classes=256,output_stride=16) 51 | #prelogits, end_points = resnet_v2.resnet_v2_50(images_placeholder,is_training=phase_train_placeholder,num_classes=256,output_stride=8) 52 | #prelogits, end_points = resnet_v2_modify.resnet_v2_50(images_placeholder,is_training=phase_train_placeholder,num_classes=256) 53 | #prelogits = slim.batch_norm(prelogits, is_training=phase_train_placeholder,epsilon=1e-5, scale=True,scope='softmax_bn') 54 | prelogits = tf.squeeze(prelogits,[1,2],name='SpatialSqueeze') 55 | 56 | elif args.network_type == 'sphere_network': 57 | prelogits = network.infer(images_placeholder,args.embedding_size) 58 | if args.fc_bn: 59 | print('do batch norm after network') 60 | prelogits = slim.batch_norm(prelogits, is_training=phase_train_placeholder,epsilon=1e-5, scale=True,scope='softmax_bn') 61 | 62 | 63 | 64 | #embeddings = tf.nn.l2_normalize(prelogits, 1, 1e-10, name='embeddings') 65 | embeddings = tf.identity(prelogits) 66 | #saver = tf.train.Saver(tf.trainable_variables(), max_to_keep=3) 67 | saver = tf.train.Saver(tf.global_variables(), max_to_keep=3) 68 | saver.restore(sess, args.model) 69 | if args.save_model: 70 | saver.save(sess,'./tmp_saved_model',global_step=1) 71 | return 0 72 | 73 | embedding_size = embeddings.get_shape()[1] 74 | # Run forward pass to calculate embeddings 75 | print('Runnning forward pass on LFW images') 76 | batch_size = args.lfw_batch_size 77 | nrof_images = len(paths) 78 | nrof_batches = int(math.ceil(1.0*nrof_images / batch_size)) 79 | if args.do_flip: 80 | embedding_size *= 2 81 | emb_array = np.zeros((nrof_images, embedding_size)) 82 | else: 83 | emb_array = np.zeros((nrof_images, embedding_size)) 84 | for i in range(nrof_batches): 85 | start_index = i*batch_size 86 | print('handing {}/{}'.format(start_index,nrof_images)) 87 | end_index = min((i+1)*batch_size, nrof_images) 88 | paths_batch = paths[start_index:end_index] 89 | #images = facenet.load_data(paths_batch, False, False, image_size,True,image_size) 90 | #images = facenet.load_data2(paths_batch, False, False, args.image_height,args.image_width,True,) 91 | images = utils.load_data(paths_batch, False, False, args.image_height,args.image_width,args.prewhiten,(args.image_height,args.image_width)) 92 | feed_dict = { images_placeholder:images, phase_train_placeholder:False } 93 | feats = sess.run(embeddings, feed_dict=feed_dict) 94 | if args.do_flip: 95 | images_flip = utils.load_data(paths_batch, False, True, args.image_height,args.image_width,args.prewhiten,(args.image_height,args.image_width)) 96 | feed_dict = { images_placeholder:images_flip, phase_train_placeholder:False } 97 | feats_flip = sess.run(embeddings, feed_dict=feed_dict) 98 | feats = np.concatenate((feats,feats_flip),axis=1) 99 | #feats = (feats+feats_flip)/2 100 | #images = facenet.load_data(paths_batch, False, False, 160,True,182) 101 | #images = facenet.load_data(paths_batch, False, False, image_size,src_size=256) 102 | #feed_dict = { images_placeholder:images, phase_train_placeholder:True} 103 | #pdb.set_trace() 104 | #feats = facenet.prewhiten(feats) 105 | feats = utils.l2_normalize(feats) 106 | emb_array[start_index:end_index,:] = feats 107 | #pdb.set_trace() 108 | 109 | tpr, fpr, accuracy, val, val_std, far = lfw.evaluate(emb_array, 110 | actual_issame, nrof_folds=args.lfw_nrof_folds) 111 | 112 | print('Accuracy: %1.3f+-%1.3f' % (np.mean(accuracy), np.std(accuracy))) 113 | print('Validation rate: %2.5f+-%2.5f @ FAR=%2.5f' % (val, val_std, far)) 114 | 115 | auc = metrics.auc(fpr, tpr) 116 | print('Area Under Curve (AUC): %1.3f' % auc) 117 | eer = brentq(lambda x: 1. - x - interpolate.interp1d(fpr, tpr)(x), 0., 1.) 118 | print('Equal Error Rate (EER): %1.3f' % eer) 119 | 120 | def parse_arguments(argv): 121 | parser = argparse.ArgumentParser() 122 | 123 | parser.add_argument('lfw_dir', type=str, 124 | help='Path to the data directory containing aligned LFW face patches.') 125 | parser.add_argument('--network_type', type=str, 126 | help='Network structure.',default='resnet50') 127 | parser.add_argument('--fc_bn', 128 | help='wheather bn is followed by fc layer.',default=False,action='store_true') 129 | parser.add_argument('--prewhiten', 130 | help='wheather do prewhiten to preprocess image.',default=False,action='store_true') 131 | parser.add_argument('--save_model', type=bool, 132 | help='whether save model to disk.',default=False) 133 | parser.add_argument('--do_flip', type=bool, 134 | help='wheather flip is used in test.',default=False) 135 | parser.add_argument('--lfw_batch_size', type=int, 136 | help='Number of images to process in a batch in the LFW test set.', default=200) 137 | parser.add_argument('--embedding_size', type=int, 138 | help='Feature embedding size.', default=512) 139 | parser.add_argument('model', type=str, 140 | help='Could be either a directory containing the meta_file and ckpt_file or a model protobuf (.pb) file') 141 | parser.add_argument('--image_size', type=int, 142 | help='Image size (height, width) in pixels.', default=224) 143 | parser.add_argument('--image_height', type=int, 144 | help='Image size (height, width) in pixels.', default=112) 145 | parser.add_argument('--image_width', type=int, 146 | help='Image size (height, width) in pixels.', default=96) 147 | parser.add_argument('--lfw_pairs', type=str, 148 | help='The file containing the pairs to use for validation.', default='data/pairs.txt') 149 | parser.add_argument('--lfw_file_ext', type=str, 150 | help='The file extension for the LFW dataset.', default='png', choices=['jpg', 'png']) 151 | parser.add_argument('--lfw_nrof_folds', type=int, 152 | help='Number of folds to use for cross validation. Mainly used for testing.', default=10) 153 | parser.add_argument('--model_def', type=str, 154 | help='Model definition. Points to a module containing the definition of the inference graph.', default='models.inception_resnet_v1') 155 | return parser.parse_args(argv) 156 | 157 | if __name__ == '__main__': 158 | main(parse_arguments(sys.argv[1:])) 159 | -------------------------------------------------------------------------------- /train.sh: -------------------------------------------------------------------------------- 1 | NETWORK=sphere_network 2 | #NETWORK=resface 3 | #NETWORK=inception_net 4 | #NETWORK=resnet_v2 5 | 6 | CROP=112 7 | echo $NAME 8 | GPU=0 9 | #GPU=0,1,2,3 10 | NUM_GPUS=1 11 | ARGS="CUDA_VISIBLE_DEVICES=${GPU}" 12 | #WEIGHT_DECAY=1e-3 13 | WEIGHT_DECAY=1e-4 14 | LOSS_TYPE=cosface 15 | #LOSS_TYPE=softmax 16 | SCALE=64. 17 | #WEIGHT=3. 18 | #SCALE=32. 19 | WEIGHT=2. 20 | #WEIGHT=2.5 21 | ALPHA=0.35 22 | #ALPHA=0.25 23 | #ALPHA=0.2 24 | #ALPHA=0.3 25 | #LR_FILE=lr_coco.txt 26 | IMAGE_HEIGHT=112 27 | IMAGE_WIDTH=112 28 | EMBEDDING_SIZE=1024 29 | LR_FILE=lr_coco.txt 30 | OPT=ADAM 31 | #OPT=MOM 32 | FC_BN='--fc_bn' 33 | NAME=${NETWORK}_${LOSS_TYPE}_${CROP}_${GPU}_${SCALE}_${WEIGHT}_${ALPHA}_${OPT}_${FC_BN}_${IMAGE_WIDTH}_${EMBEDDING_SIZE} 34 | #CMD="python train_softmax_mult_gpu.py --logs_base_dir logs/${NAME}/ --models_base_dir models/$NAME/ --data_dir dataset/CASIA-maxpy-clean --image_size 160 --model_def models.inception_resnet_v1 --lfw_dir '' --optimizer MOM --learning_rate -1 --max_nrof_epochs 100 --random_flip --learning_rate_schedule_file learning_rate_schedule_classifier_resnet.txt --num_gpus 1 --weight_decay ${WEIGHT_DECAY} --loss_type ${LOSS_TYPE} --scale ${SCALE} --weight ${WEIGHT} --alpha ${ALPHA}" 35 | #CMD="python train/train_multi_gpu.py --logs_base_dir logs/${NAME}/ --models_base_dir models/$NAME/ --data_dir dataset/CASIA-maxpy-clean --image_size 160 --model_def models.inception_resnet_v1 --optimizer MOM --learning_rate -1 --max_nrof_epochs 100 --random_flip --learning_rate_schedule_file ${LR_FILE} --num_gpus 1 --weight_decay ${WEIGHT_DECAY} --loss_type ${LOSS_TYPE} --scale ${SCALE} --weight ${WEIGHT} --alpha ${ALPHA} --network ${NETWORK}" 36 | #CMD="python train/train_multi_gpu.py --logs_base_dir logs/${NAME}/ --models_base_dir models/$NAME/ --data_dir dataset/CASIA-WebFace-112X96 --model_def models.inception_resnet_v1 --optimizer MOM --learning_rate -1 --max_nrof_epochs 100 --random_flip --learning_rate_schedule_file ${LR_FILE} --num_gpus 1 --weight_decay ${WEIGHT_DECAY} --loss_type ${LOSS_TYPE} --scale ${SCALE} --weight ${WEIGHT} --alpha ${ALPHA} --network ${NETWORK}" 37 | #CMD="python train/train_multi_gpu.py --logs_base_dir logs/${NAME}/ --models_base_dir models/$NAME/ --data_dir dataset/CASIA-WebFace-112X96 --model_def models.inception_resnet_v1 --optimizer ${OPT} --learning_rate -1 --max_nrof_epochs 100 --random_flip --learning_rate_schedule_file ${LR_FILE} --num_gpus ${NUM_GPUS} --weight_decay ${WEIGHT_DECAY} --loss_type ${LOSS_TYPE} --scale ${SCALE} --weight ${WEIGHT} --alpha ${ALPHA} --network ${NETWORK} ${FC_BN}" 38 | CMD="python train/train_multi_gpu.py --logs_base_dir logs/${NAME}/ --models_base_dir models/$NAME/ --data_dir dataset/casia-112x112 --list_file dataset/cleaned_list.txt --model_def models.inception_resnet_v1 --optimizer ${OPT} --learning_rate -1 --max_nrof_epochs 100 --random_flip --learning_rate_schedule_file ${LR_FILE} --num_gpus ${NUM_GPUS} --weight_decay ${WEIGHT_DECAY} --loss_type ${LOSS_TYPE} --scale ${SCALE} --weight ${WEIGHT} --alpha ${ALPHA} --network ${NETWORK} ${FC_BN} --image_height ${IMAGE_HEIGHT} --image_width ${IMAGE_WIDTH} --embedding_size ${EMBEDDING_SIZE}" 39 | echo Run "$ARGS ${CMD}" 40 | eval "$ARGS ${CMD}" 41 | -------------------------------------------------------------------------------- /train/train_multi_gpu.py: -------------------------------------------------------------------------------- 1 | 2 | from __future__ import absolute_import 3 | from __future__ import division 4 | from __future__ import print_function 5 | 6 | from datetime import datetime 7 | import os.path 8 | import os 9 | import time 10 | import sys 11 | sys.path.insert(0,'lib') 12 | sys.path.insert(0,'networks') 13 | import tensorflow as tf 14 | from tensorflow.python.client import timeline 15 | from tensorflow.contrib import slim 16 | import tensorflow.contrib.data as tf_data 17 | from collections import Counter 18 | import numpy as np 19 | import importlib 20 | import itertools 21 | import tensorflow.contrib.slim as slim 22 | from tensorflow.contrib.slim.nets import resnet_v1, resnet_v2 23 | import argparse 24 | import utils 25 | import sphere_network as network 26 | import inception_resnet_v1 as inception_net 27 | import resface as resface 28 | #import lfw 29 | import pdb 30 | #import cv2 31 | #import pylab as plt 32 | 33 | debug = False 34 | softmax_ind = 0 35 | 36 | from tensorflow.python.ops import data_flow_ops 37 | 38 | def _from_tensor_slices(tensors_x,tensors_y): 39 | #return TensorSliceDataset((tensors_x,tensors_y)) 40 | return tf_data.Dataset.from_tensor_slices((tensors_x,tensors_y)) 41 | 42 | 43 | 44 | def main(args): 45 | 46 | #network = importlib.import_module(args.model_def) 47 | 48 | subdir = datetime.strftime(datetime.now(), '%Y%m%d-%H%M%S') 49 | log_dir = os.path.join(os.path.expanduser(args.logs_base_dir), subdir) 50 | if not os.path.isdir(log_dir): # Create the log directory if it doesn't exist 51 | os.makedirs(log_dir) 52 | model_dir = os.path.join(os.path.expanduser(args.models_base_dir), subdir) 53 | if not os.path.isdir(model_dir): # Create the model directory if it doesn't exist 54 | os.makedirs(model_dir) 55 | 56 | # Write arguments to a text file 57 | utils.write_arguments_to_file(args, os.path.join(log_dir, 'arguments.txt')) 58 | 59 | # Store some git revision info in a text file in the log directory 60 | src_path,_ = os.path.split(os.path.realpath(__file__)) 61 | utils.store_revision_info(src_path, log_dir, ' '.join(sys.argv)) 62 | 63 | np.random.seed(seed=args.seed) 64 | 65 | #train_set = utils.get_dataset(args.data_dir) 66 | train_set = utils.dataset_from_list(args.data_dir,args.list_file) 67 | nrof_classes = len(train_set) 68 | print('nrof_classes: ',nrof_classes) 69 | image_list, label_list = utils.get_image_paths_and_labels(train_set) 70 | print('total images: ',len(image_list)) 71 | image_list = np.array(image_list) 72 | label_list = np.array(label_list,dtype=np.int32) 73 | 74 | dataset_size = len(image_list) 75 | single_batch_size = args.people_per_batch*args.images_per_person 76 | indices = range(dataset_size) 77 | np.random.shuffle(indices) 78 | 79 | def _sample_people_softmax(x): 80 | global softmax_ind 81 | if softmax_ind >= dataset_size: 82 | np.random.shuffle(indices) 83 | softmax_ind = 0 84 | true_num_batch = min(single_batch_size,dataset_size - softmax_ind) 85 | 86 | sample_paths = image_list[indices[softmax_ind:softmax_ind+true_num_batch]] 87 | sample_labels = label_list[indices[softmax_ind:softmax_ind+true_num_batch]] 88 | 89 | softmax_ind += true_num_batch 90 | 91 | return (np.array(sample_paths), np.array(sample_labels,dtype=np.int32)) 92 | 93 | def _sample_people(x): 94 | '''We sample people based on tf.data, where we can use transform and prefetch. 95 | 96 | ''' 97 | 98 | image_paths, num_per_class = sample_people(train_set,args.people_per_batch*(args.num_gpus-1),args.images_per_person) 99 | labels = [] 100 | for i in range(len(num_per_class)): 101 | labels.extend([i]*num_per_class[i]) 102 | return (np.array(image_paths),np.array(labels,dtype=np.int32)) 103 | 104 | def _parse_function(filename,label): 105 | file_contents = tf.read_file(filename) 106 | image = tf.image.decode_image(file_contents, channels=3) 107 | #image = tf.image.decode_jpeg(file_contents, channels=3) 108 | print(image.shape) 109 | 110 | if args.random_crop: 111 | print('use random crop') 112 | image = tf.random_crop(image, [args.image_size, args.image_size, 3]) 113 | else: 114 | print('Not use random crop') 115 | #image.set_shape((args.image_size, args.image_size, 3)) 116 | image.set_shape((None,None, 3)) 117 | image = tf.image.resize_images(image, size=(args.image_height, args.image_width)) 118 | #print(image.shape) 119 | if args.random_flip: 120 | image = tf.image.random_flip_left_right(image) 121 | 122 | #pylint: disable=no-member 123 | #image.set_shape((args.image_size, args.image_size, 3)) 124 | image.set_shape((args.image_height, args.image_width, 3)) 125 | if debug: 126 | image = tf.cast(image,tf.float32) 127 | else: 128 | image = tf.cast(image,tf.float32) 129 | image = tf.subtract(image,127.5) 130 | image = tf.div(image,128.) 131 | #image = tf.image.per_image_standardization(image) 132 | return image, label 133 | 134 | 135 | print('Model directory: %s' % model_dir) 136 | print('Log directory: %s' % log_dir) 137 | if args.pretrained_model: 138 | print('Pre-trained model: %s' % os.path.expanduser(args.pretrained_model)) 139 | 140 | 141 | with tf.Graph().as_default(): 142 | tf.set_random_seed(args.seed) 143 | global_step = tf.Variable(0, trainable=False,name='global_step') 144 | 145 | # Placeholder for the learning rate 146 | learning_rate_placeholder = tf.placeholder(tf.float32, name='learning_rate') 147 | 148 | 149 | phase_train_placeholder = tf.placeholder(tf.bool, name='phase_train') 150 | 151 | 152 | 153 | #the image is generated by sequence 154 | with tf.device("/cpu:0"): 155 | 156 | softmax_dataset = tf_data.Dataset.range(args.epoch_size*args.max_nrof_epochs*100) 157 | softmax_dataset = softmax_dataset.map(lambda x: tf.py_func(_sample_people_softmax,[x],[tf.string,tf.int32])) 158 | softmax_dataset = softmax_dataset.flat_map(_from_tensor_slices) 159 | softmax_dataset = softmax_dataset.map(_parse_function,num_threads=8,output_buffer_size=2000) 160 | softmax_dataset = softmax_dataset.batch(args.num_gpus*single_batch_size) 161 | softmax_iterator = softmax_dataset.make_initializable_iterator() 162 | softmax_next_element = softmax_iterator.get_next() 163 | softmax_next_element[0].set_shape((args.num_gpus*single_batch_size, args.image_height,args.image_width,3)) 164 | softmax_next_element[1].set_shape(args.num_gpus*single_batch_size) 165 | batch_image_split = tf.split(softmax_next_element[0],args.num_gpus) 166 | batch_label_split = tf.split(softmax_next_element[1],args.num_gpus) 167 | 168 | 169 | 170 | 171 | 172 | learning_rate = tf.train.exponential_decay(learning_rate_placeholder, global_step, 173 | args.learning_rate_decay_epochs*args.epoch_size, args.learning_rate_decay_factor, staircase=True) 174 | tf.summary.scalar('learning_rate', learning_rate) 175 | 176 | print('Using optimizer: {}'.format(args.optimizer)) 177 | if args.optimizer == 'ADAGRAD': 178 | opt = tf.train.AdagradOptimizer(learning_rate) 179 | elif args.optimizer == 'MOM': 180 | opt = tf.train.MomentumOptimizer(learning_rate,0.9) 181 | elif args.optimizer == 'ADAM': 182 | opt = tf.train.AdamOptimizer(learning_rate, beta1=0.9, beta2=0.999, epsilon=0.1) 183 | else: 184 | raise Exception("Not supported optimizer: {}".format(args.optimizer)) 185 | tower_losses = [] 186 | tower_cross = [] 187 | tower_dist = [] 188 | tower_reg= [] 189 | for i in range(args.num_gpus): 190 | with tf.device("/gpu:" + str(i)): 191 | with tf.name_scope("tower_" + str(i)) as scope: 192 | with slim.arg_scope([slim.model_variable, slim.variable], device="/cpu:0"): 193 | with tf.variable_scope(tf.get_variable_scope()) as var_scope: 194 | reuse = False if i ==0 else True 195 | #with slim.arg_scope(resnet_v2.resnet_arg_scope(args.weight_decay)): 196 | #prelogits, end_points = resnet_v2.resnet_v2_50(batch_image_split[i],is_training=True, 197 | # output_stride=16,num_classes=args.embedding_size,reuse=reuse) 198 | #prelogits, end_points = network.inference(batch_image_split[i], args.keep_probability, 199 | # phase_train=phase_train_placeholder, bottleneck_layer_size=args.embedding_size, 200 | # weight_decay=args.weight_decay, reuse=reuse) 201 | if args.network == 'sphere_network': 202 | prelogits = network.infer(batch_image_split[i],args.embedding_size) 203 | print(prelogits) 204 | elif args.network == 'resface': 205 | prelogits, _ = resface.inference(batch_image_split[i],1.0,bottleneck_layer_size=args.embedding_size,weight_decay=args.weight_decay,reuse=reuse) 206 | elif args.network == 'inception_net': 207 | prelogits, endpoints = inception_net.inference(batch_image_split[i],1,phase_train=True,bottleneck_layer_size=args.embedding_size,weight_decay=args.weight_decay,reuse=reuse) 208 | print(prelogits) 209 | 210 | elif args.network == 'resnet_v2': 211 | with slim.arg_scope(resnet_v2.resnet_arg_scope(args.weight_decay)): 212 | prelogits, end_points = resnet_v2.resnet_v2_50(batch_image_split[i],is_training=True, 213 | output_stride=16,num_classes=args.embedding_size,reuse=reuse) 214 | prelogits = tf.squeeze(prelogits,axis=[1,2]) 215 | 216 | else: 217 | raise Exception("Not supported network: {}".format(args.network)) 218 | if args.fc_bn: 219 | 220 | prelogits = slim.batch_norm(prelogits, is_training=True, decay=0.997,epsilon=1e-5,scale=True,updates_collections=tf.GraphKeys.UPDATE_OPS,reuse=reuse,scope='softmax_bn') 221 | if args.loss_type == 'softmax': 222 | cross_entropy_mean = utils.softmax_loss(prelogits,batch_label_split[i], len(train_set),args.weight_decay,reuse) 223 | regularization_losses = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES) 224 | tower_cross.append(cross_entropy_mean) 225 | #loss = cross_entropy_mean + args.weight_decay*tf.add_n(regularization_losses) 226 | loss = cross_entropy_mean + tf.add_n(regularization_losses) 227 | #tower_dist.append(0) 228 | #tower_cross.append(cross_entropy_mean) 229 | #tower_th.append(0) 230 | tower_losses.append(loss) 231 | tower_reg.append(regularization_losses) 232 | elif args.loss_type == 'cosface': 233 | label_reshape = tf.reshape(batch_label_split[i],[single_batch_size]) 234 | label_reshape = tf.cast(label_reshape,tf.int64) 235 | coco_loss = utils.cos_loss(prelogits,label_reshape, len(train_set),reuse,alpha=args.alpha,scale=args.scale) 236 | #scatter_loss, _ = facenet.coco_loss(prelogits,label_reshape, len(train_set),reuse,alpha=args.alpha,scale=args.scale) 237 | #coco_loss = scatter_loss['loss_total'] 238 | regularization_losses = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES) 239 | if args.network == 'sphere_network': 240 | print('reg loss using weight_decay * tf.add_n') 241 | reg_loss = args.weight_decay*tf.add_n(regularization_losses) 242 | else: 243 | print('reg loss using tf.add_n') 244 | reg_loss = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES) 245 | loss = coco_loss + reg_loss 246 | 247 | tower_losses.append(loss) 248 | tower_reg.append(reg_loss) 249 | 250 | #loss = tf.add_n([cross_entropy_mean] + regularization_losses, name='total_loss') 251 | tf.get_variable_scope().reuse_variables() 252 | total_loss = tf.reduce_mean(tower_losses) 253 | total_reg = tf.reduce_mean(tower_reg) 254 | losses = {} 255 | losses['total_loss'] = total_loss 256 | losses['total_reg'] = total_reg 257 | 258 | grads = opt.compute_gradients(total_loss,tf.trainable_variables(),colocate_gradients_with_ops=True) 259 | apply_gradient_op = opt.apply_gradients(grads,global_step=global_step) 260 | update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) 261 | with tf.control_dependencies(update_ops): 262 | train_op = tf.group(apply_gradient_op) 263 | 264 | save_vars = [var for var in tf.global_variables() if 'Adagrad' not in var.name and 'global_step' not in var.name] 265 | 266 | #saver = tf.train.Saver(tf.trainable_variables(), max_to_keep=3) 267 | saver = tf.train.Saver(save_vars, max_to_keep=3) 268 | 269 | # Build the summary operation based on the TF collection of Summaries. 270 | summary_op = tf.summary.merge_all() 271 | 272 | # Start running operations on the Graph. 273 | gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=args.gpu_memory_fraction) 274 | sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options,allow_soft_placement=True)) 275 | 276 | # Initialize variables 277 | sess.run(tf.global_variables_initializer(), feed_dict={phase_train_placeholder:True}) 278 | sess.run(tf.local_variables_initializer(), feed_dict={phase_train_placeholder:True}) 279 | 280 | #sess.run(iterator.initializer) 281 | sess.run(softmax_iterator.initializer) 282 | 283 | summary_writer = tf.summary.FileWriter(log_dir, sess.graph) 284 | coord = tf.train.Coordinator() 285 | tf.train.start_queue_runners(coord=coord, sess=sess) 286 | 287 | with sess.as_default(): 288 | #pdb.set_trace() 289 | 290 | if args.pretrained_model: 291 | print('Restoring pretrained model: %s' % args.pretrained_model) 292 | saver.restore(sess, os.path.expanduser(args.pretrained_model)) 293 | 294 | # Training and validation loop 295 | epoch = 0 296 | while epoch < args.max_nrof_epochs: 297 | step = sess.run(global_step, feed_dict=None) 298 | epoch = step // args.epoch_size 299 | if debug: 300 | debug_train(args, sess, train_set, epoch, image_batch_gather, enqueue_op,batch_size_placeholder, image_batch_split,image_paths_split,num_per_class_split, 301 | image_paths_placeholder,image_paths_split_placeholder, labels_placeholder, labels_batch, num_per_class_placeholder,num_per_class_split_placeholder,len(gpus)) 302 | # Train for one epoch 303 | train(args, sess, epoch, 304 | learning_rate_placeholder, phase_train_placeholder, global_step, 305 | losses, train_op, summary_op, summary_writer, args.learning_rate_schedule_file) 306 | 307 | # Save variables and the metagraph if it doesn't exist already 308 | save_variables_and_metagraph(sess, saver, summary_writer, model_dir, subdir, step) 309 | 310 | return model_dir 311 | 312 | def train(args, sess, epoch, 313 | learning_rate_placeholder, phase_train_placeholder, global_step, 314 | loss, train_op, summary_op, summary_writer, learning_rate_schedule_file): 315 | batch_number = 0 316 | 317 | if args.learning_rate>0.0: 318 | lr = args.learning_rate 319 | else: 320 | lr = utils.get_learning_rate_from_file(learning_rate_schedule_file, epoch) 321 | while batch_number < args.epoch_size: 322 | start_time = time.time() 323 | 324 | print('Running forward pass on sampled images: ', end='') 325 | feed_dict = {learning_rate_placeholder: lr, phase_train_placeholder: True} 326 | start_time = time.time() 327 | total_err, reg_err, _, step = sess.run([loss['total_loss'], loss['total_reg'], train_op, global_step ], feed_dict=feed_dict) 328 | duration = time.time() - start_time 329 | print('Epoch: [%d][%d/%d]\tTime %.3f\tTotal Loss %2.3f\tReg Loss %2.3f, lr %2.5f' % 330 | (epoch, batch_number+1, args.epoch_size, duration, total_err, reg_err, lr)) 331 | 332 | batch_number += 1 333 | return step 334 | 335 | 336 | def save_variables_and_metagraph(sess, saver, summary_writer, model_dir, model_name, step): 337 | # Save the model checkpoint 338 | print('Saving variables') 339 | start_time = time.time() 340 | checkpoint_path = os.path.join(model_dir, 'model-%s.ckpt' % model_name) 341 | saver.save(sess, checkpoint_path, global_step=step, write_meta_graph=False) 342 | save_time_variables = time.time() - start_time 343 | print('Variables saved in %.2f seconds' % save_time_variables) 344 | metagraph_filename = os.path.join(model_dir, 'model-%s.meta' % model_name) 345 | save_time_metagraph = 0 346 | if not os.path.exists(metagraph_filename): 347 | print('Saving metagraph') 348 | start_time = time.time() 349 | saver.export_meta_graph(metagraph_filename) 350 | save_time_metagraph = time.time() - start_time 351 | print('Metagraph saved in %.2f seconds' % save_time_metagraph) 352 | summary = tf.Summary() 353 | #pylint: disable=maybe-no-member 354 | summary.value.add(tag='time/save_variables', simple_value=save_time_variables) 355 | summary.value.add(tag='time/save_metagraph', simple_value=save_time_metagraph) 356 | summary_writer.add_summary(summary, step) 357 | 358 | 359 | def get_learning_rate_from_file(filename, epoch): 360 | with open(filename, 'r') as f: 361 | for line in f.readlines(): 362 | line = line.split('#', 1)[0] 363 | if line: 364 | par = line.strip().split(':') 365 | e = int(par[0]) 366 | lr = float(par[1]) 367 | if e <= epoch: 368 | learning_rate = lr 369 | else: 370 | return learning_rate 371 | 372 | 373 | def parse_arguments(argv): 374 | parser = argparse.ArgumentParser() 375 | 376 | parser.add_argument('--logs_base_dir', type=str, 377 | help='Directory where to write event logs.', default='logs/facenet_ms_mp') 378 | parser.add_argument('--models_base_dir', type=str, 379 | help='Directory where to write trained models and checkpoints.', default='models/facenet_ms_mp') 380 | parser.add_argument('--gpu_memory_fraction', type=float, 381 | help='Upper bound on the amount of GPU memory that will be used by the process.', default=.9) 382 | parser.add_argument('--pretrained_model', type=str, 383 | help='Load a pretrained model before training starts.') 384 | parser.add_argument('--loss_type', type=str, 385 | help='Which type loss to be used.',default='softmax') 386 | parser.add_argument('--network', type=str, 387 | help='which network is used to extract feature.',default='resnet50') 388 | parser.add_argument('--data_dir', type=str, 389 | help='Path to the data directory containing aligned face patches. Multiple directories are separated with colon.', 390 | default='~/datasets/casia/casia_maxpy_mtcnnalign_182_160') 391 | parser.add_argument('--list_file', type=str, 392 | help='Image list file') 393 | parser.add_argument('--model_def', type=str, 394 | help='Model definition. Points to a module containing the definition of the inference graph.', default='models.inception_resnet_v1') 395 | parser.add_argument('--max_nrof_epochs', type=int, 396 | help='Number of epochs to run.', default=500) 397 | parser.add_argument('--batch_size', type=int, 398 | help='Number of images to process in a batch.', default=90) 399 | parser.add_argument('--image_size', type=int, 400 | help='Image size (height, width) in pixels.', default=160) 401 | parser.add_argument('--image_src_size', type=int, 402 | help='Src Image size (height, width) in pixels.', default=256) 403 | parser.add_argument('--image_height', type=int, 404 | help='Image size (height, width) in pixels.', default=112) 405 | parser.add_argument('--image_width', type=int, 406 | help='Image size (height, width) in pixels.', default=96) 407 | parser.add_argument('--people_per_batch', type=int, 408 | help='Number of people per batch.', default=30) 409 | parser.add_argument('--num_gpus', type=int, 410 | help='Number of gpus.', default=4) 411 | parser.add_argument('--images_per_person', type=int, 412 | help='Number of images per person.', default=5) 413 | parser.add_argument('--epoch_size', type=int, 414 | help='Number of batches per epoch.', default=600) 415 | parser.add_argument('--alpha', type=float, 416 | help='Margin for cos margin.', default=0.15) 417 | parser.add_argument('--scale', type=float, 418 | help='Scale as the fixed norm of weight and feature.', default=64.) 419 | parser.add_argument('--weight', type=float, 420 | help='weiht to balance the dist and th loss.', default=3.) 421 | parser.add_argument('--embedding_size', type=int, 422 | help='Dimensionality of the embedding.', default=256) 423 | parser.add_argument('--random_crop', 424 | help='Performs random cropping of training images. If false, the center image_size pixels from the training images are used. ' + 425 | 'If the size of the images in the data directory is equal to image_size no cropping is performed', action='store_true') 426 | parser.add_argument('--random_flip', 427 | help='Performs random horizontal flipping of training images.', action='store_true') 428 | parser.add_argument('--fc_bn', 429 | help='Wheater use bn after fc.', action='store_true') 430 | parser.add_argument('--keep_probability', type=float, 431 | help='Keep probability of dropout for the fully connected layer(s).', default=1.0) 432 | parser.add_argument('--weight_decay', type=float, 433 | help='L2 weight regularization.', default=0.0) 434 | parser.add_argument('--optimizer', type=str, choices=['ADAGRAD', 'ADADELTA', 'ADAM', 'RMSPROP', 'MOM','SGD'], 435 | help='The optimization algorithm to use', default='ADAGRAD') 436 | parser.add_argument('--center_loss_factor', type=float, 437 | help='Center loss factor.', default=0.0) 438 | parser.add_argument('--center_loss_alfa', type=float, 439 | help='Center update rate for center loss.', default=0.95) 440 | parser.add_argument('--learning_rate', type=float, 441 | help='Initial learning rate. If set to a negative value a learning rate ' + 442 | 'schedule can be specified in the file "learning_rate_schedule.txt"', default=0.1) 443 | parser.add_argument('--learning_rate_decay_epochs', type=int, 444 | help='Number of epochs between learning rate decay.', default=100) 445 | parser.add_argument('--learning_rate_decay_factor', type=float, 446 | help='Learning rate decay factor.', default=1.0) 447 | parser.add_argument('--moving_average_decay', type=float, 448 | help='Exponential decay for tracking of training parameters.', default=0.9999) 449 | parser.add_argument('--seed', type=int, 450 | help='Random seed.', default=666) 451 | parser.add_argument('--learning_rate_schedule_file', type=str, 452 | help='File containing the learning rate schedule that is used when learning_rate is set to to -1.', default='data/learning_rate_schedule.txt') 453 | 454 | 455 | return parser.parse_args(argv) 456 | 457 | 458 | if __name__ == '__main__': 459 | main(parse_arguments(sys.argv[1:])) 460 | --------------------------------------------------------------------------------