├── data ├── tmp1.py ├── generateTFRecord.py ├── mxrec2folders.py ├── tmp.py └── classificationDataTool.py ├── configs ├── config_ms1m_res101.yaml ├── config_ms1m_res50.yaml ├── config_finetune.yaml ├── config_ms1m_100.yaml └── config_ms1m_200.yaml ├── losses └── logit_loss.py ├── model.py ├── utils.py ├── backbones ├── utils.py ├── ResNet_v2.py ├── ResNet_v1.py └── modifiedResNet_v2.py ├── eval └── utils.py ├── get_embd.py ├── evaluate.py ├── README.md ├── train_softmax.py └── finetune_softmax.py /data/tmp1.py: -------------------------------------------------------------------------------- 1 | import io 2 | import os 3 | from scipy import misc 4 | 5 | import numpy as np 6 | import pickle 7 | 8 | 9 | read_path = r'F:\FaceDataset\faces_vgg_112x112\lfw.bin' 10 | save_dir = r'F:\FaceDataset\faces_vgg_112x112\lfw_img_sample' 11 | 12 | 13 | bins, issame_list = pickle.load(open(read_path, 'rb'), encoding='bytes') 14 | cnt = 0 15 | for bin in bins: 16 | img = misc.imread(io.BytesIO(bin)) 17 | print('============================================') 18 | print(img.dtype) 19 | print(np.max(img)) 20 | print(np.min(img)) 21 | print('============================================') 22 | misc.imsave(os.path.join(save_dir, str(cnt)+'.jpg'), img) 23 | cnt += 1 24 | if cnt >= 10: 25 | break -------------------------------------------------------------------------------- /data/generateTFRecord.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | from classificationDataTool import ClassificationImageData 4 | 5 | 6 | def get_args(): 7 | parser = argparse.ArgumentParser() 8 | 9 | parser.add_argument('--mode', type=str, help='from which to generate TFRecord, folders or mxrec', default='mxrec') 10 | parser.add_argument('--image_size', type=int, help='image size', default=112) 11 | parser.add_argument('--read_dir', type=str, help='directory to read data', default='') 12 | parser.add_argument('--save_path', type=str, help='path to save TFRecord file', default='') 13 | 14 | return parser.parse_args() 15 | 16 | 17 | if __name__ == "__main__": 18 | args = get_args() 19 | cid = ClassificationImageData(img_size=args.image_size) 20 | if args.mode == 'folders': 21 | cid.write_tfrecord_from_folders(args.read_dir, args.save_path) 22 | elif args.mode == 'mxrec': 23 | cid.write_tfrecord_from_mxrec(args.read_dir, args.save_path) 24 | else: 25 | raise('ERROR: wrong mode (only folders and mxrec are supported)') 26 | -------------------------------------------------------------------------------- /data/mxrec2folders.py: -------------------------------------------------------------------------------- 1 | import mxnet as mx 2 | import io 3 | import os 4 | from scipy import misc 5 | 6 | import numpy as np 7 | 8 | read_dir = r'F:\FaceDataset\faces_webface_112x112' 9 | save_dir = r'F:\FaceDataset\faces_webface_112x112_folders' 10 | 11 | idx_path = os.path.join(read_dir, 'train.idx') 12 | bin_path = os.path.join(read_dir, 'train.rec') 13 | imgrec = mx.recordio.MXIndexedRecordIO(idx_path, bin_path, 'r') 14 | s = imgrec.read_idx(0) 15 | header, _ = mx.recordio.unpack(s) 16 | imgidx = list(range(1, int(header.label[0]))) 17 | total = len(imgidx) 18 | cnt = 0 19 | for i in imgidx: 20 | img_info = imgrec.read_idx(i) 21 | header, img = mx.recordio.unpack(img_info) 22 | l = int(header.label) 23 | img = io.BytesIO(img) 24 | img = misc.imread(img) 25 | cur_save_dir = os.path.join(save_dir, str(l)) 26 | if not os.path.exists(cur_save_dir): 27 | os.makedirs(cur_save_dir) 28 | misc.imsave(os.path.join(cur_save_dir, str(cnt)+'.jpg'), img) 29 | cnt += 1 30 | print('%d/%d' % (cnt, total), end='\r') 31 | # if cnt >= 10: 32 | # break -------------------------------------------------------------------------------- /data/tmp.py: -------------------------------------------------------------------------------- 1 | import mxnet as mx 2 | import io 3 | import os 4 | from scipy import misc 5 | 6 | import numpy as np 7 | 8 | read_dir = r'F:\FaceDataset\faces_webface_112x112' 9 | save_dir = r'F:\FaceDataset\faces_webface_112x112\img_sample' 10 | 11 | if not os.path.exists(save_dir): 12 | os.makedirs(save_dir) 13 | 14 | idx_path = os.path.join(read_dir, 'train.idx') 15 | bin_path = os.path.join(read_dir, 'train.rec') 16 | imgrec = mx.recordio.MXIndexedRecordIO(idx_path, bin_path, 'r') 17 | s = imgrec.read_idx(0) 18 | header, _ = mx.recordio.unpack(s) 19 | imgidx = list(range(1, int(header.label[0]))) 20 | total = len(imgidx) 21 | cnt = 0 22 | for i in imgidx: 23 | img_info = imgrec.read_idx(i) 24 | header, img = mx.recordio.unpack(img_info) 25 | l = int(header.label) 26 | img = io.BytesIO(img) 27 | img = misc.imread(img) 28 | print('============================================') 29 | print(img.dtype) 30 | print(np.max(img)) 31 | print(np.min(img)) 32 | print('============================================') 33 | misc.imsave(os.path.join(save_dir, str(cnt)+'.jpg'), img) 34 | cnt += 1 35 | if cnt >= 100: 36 | break -------------------------------------------------------------------------------- /configs/config_ms1m_res101.yaml: -------------------------------------------------------------------------------- 1 | # model params 2 | backbone_type: resnet_v2_101 3 | loss_type: arcface 4 | out_type: E 5 | 6 | image_size: 112 7 | embd_size: 512 8 | class_num: 85742 9 | 10 | 11 | # hyper params 12 | bn_decay: 0.9 13 | keep_prob: 0.4 14 | weight_decay: !!float 5e-4 15 | logits_scale: 64.0 16 | logits_margin: 0.5 17 | momentum: 0.9 18 | 19 | 20 | # run params 21 | val_bn_train: False 22 | augment_flag: True 23 | augment_margin: 16 24 | 25 | gpu_num: 1 26 | batch_size: 128 27 | epoch_num: 20 28 | step_per_epoch: 100000 29 | val_freq: 2000 30 | 31 | lr_steps: [40000, 60000, 80000] 32 | lr_values: [0.004, 0.002, 0.0012, 0.0004] 33 | 34 | # paths 35 | pretrained_model: '' 36 | 37 | train_data: ['/data/hhd/dataset/FaceData/InsightFace/faces_ms1m_arcface.tfrecord'] 38 | val_data: {'agedb_30': '/data/hhd/dataset/FaceData/InsightFace/faces_ms1m_arcface/agedb_30.bin', 'lfw': '/data/hhd/dataset/FaceData/InsightFace/faces_ms1m_arcface/lfw.bin', 'cfp_ff': '/data/hhd/dataset/FaceData/InsightFace/faces_ms1m_arcface/cfp_ff.bin', 'cfp_fp': '/data/hhd/dataset/FaceData/InsightFace/faces_ms1m_arcface/cfp_fp.bin', 'calfw': '/data/hhd/dataset/FaceData/InsightFace/faces_ms1m_arcface/calfw.bin', 'cplfw': '/data/hhd/dataset/FaceData/InsightFace/faces_ms1m_arcface/cplfw.bin', 'vgg2_fp': '/data/hhd/dataset/FaceData/InsightFace/faces_ms1m_arcface/vgg2_fp.bin'} 39 | 40 | output_dir: './output' 41 | 42 | -------------------------------------------------------------------------------- /configs/config_ms1m_res50.yaml: -------------------------------------------------------------------------------- 1 | # model params 2 | backbone_type: resnet_v2_50 3 | loss_type: arcface 4 | out_type: E 5 | 6 | image_size: 112 7 | embd_size: 512 8 | class_num: 85742 9 | 10 | 11 | # hyper params 12 | bn_decay: 0.9 13 | keep_prob: 0.4 14 | weight_decay: !!float 5e-4 15 | logits_scale: 64.0 16 | logits_margin: 0.5 17 | momentum: 0.9 18 | 19 | 20 | # run params 21 | val_bn_train: False 22 | augment_flag: True 23 | augment_margin: 16 24 | 25 | gpu_num: 1 26 | batch_size: 256 27 | epoch_num: 20 28 | step_per_epoch: 100000 29 | val_freq: 2000 30 | 31 | lr_steps: [40000, 60000, 80000] 32 | lr_values: [0.004, 0.002, 0.0012, 0.0004] 33 | 34 | 35 | # paths 36 | pretrained_model: '' 37 | 38 | train_data: ['/data/hhd/dataset/FaceData/InsightFace/faces_ms1m_arcface.tfrecord'] 39 | val_data: {'agedb_30': '/data/hhd/dataset/FaceData/InsightFace/faces_ms1m_arcface/agedb_30.bin', 'lfw': '/data/hhd/dataset/FaceData/InsightFace/faces_ms1m_arcface/lfw.bin', 'cfp_ff': '/data/hhd/dataset/FaceData/InsightFace/faces_ms1m_arcface/cfp_ff.bin', 'cfp_fp': '/data/hhd/dataset/FaceData/InsightFace/faces_ms1m_arcface/cfp_fp.bin', 'calfw': '/data/hhd/dataset/FaceData/InsightFace/faces_ms1m_arcface/calfw.bin', 'cplfw': '/data/hhd/dataset/FaceData/InsightFace/faces_ms1m_arcface/cplfw.bin', 'vgg2_fp': '/data/hhd/dataset/FaceData/InsightFace/faces_ms1m_arcface/vgg2_fp.bin'} 40 | 41 | output_dir: './output' 42 | 43 | -------------------------------------------------------------------------------- /configs/config_finetune.yaml: -------------------------------------------------------------------------------- 1 | # model params 2 | backbone_type: resnet_v2_m_50 3 | loss_type: arcface 4 | out_type: E 5 | 6 | image_size: 112 7 | embd_size: 512 8 | class_num: 85742 9 | 10 | 11 | # hyper params 12 | bn_decay: 0.9 13 | keep_prob: 0.4 14 | weight_decay: !!float 5e-4 15 | logits_scale: 64.0 16 | logits_margin: 0.5 17 | momentum: 0.9 18 | 19 | 20 | # run params 21 | fixed_epoch_num: 1 22 | val_bn_train: False 23 | augment_flag: True 24 | augment_margin: 16 25 | 26 | gpu_num: 1 27 | batch_size: 100 28 | epoch_num: 20 29 | step_per_epoch: 100000 30 | val_freq: 2000 31 | 32 | lr_steps: [40000, 60000, 80000] 33 | lr_values: [0.004, 0.002, 0.0012, 0.0004] 34 | 35 | 36 | # paths 37 | pretrained_model: '' 38 | 39 | train_data: ['/data/hhd/dataset/FaceData/InsightFace/faces_ms1m_arcface.tfrecord'] 40 | val_data: {'agedb_30': '/data/hhd/dataset/FaceData/InsightFace/faces_ms1m_arcface/agedb_30.bin', 'lfw': '/data/hhd/dataset/FaceData/InsightFace/faces_ms1m_arcface/lfw.bin', 'cfp_ff': '/data/hhd/dataset/FaceData/InsightFace/faces_ms1m_arcface/cfp_ff.bin', 'cfp_fp': '/data/hhd/dataset/FaceData/InsightFace/faces_ms1m_arcface/cfp_fp.bin', 'calfw': '/data/hhd/dataset/FaceData/InsightFace/faces_ms1m_arcface/calfw.bin', 'cplfw': '/data/hhd/dataset/FaceData/InsightFace/faces_ms1m_arcface/cplfw.bin', 'vgg2_fp': '/data/hhd/dataset/FaceData/InsightFace/faces_ms1m_arcface/vgg2_fp.bin'} 41 | 42 | output_dir: './output' 43 | 44 | -------------------------------------------------------------------------------- /configs/config_ms1m_100.yaml: -------------------------------------------------------------------------------- 1 | # model params 2 | backbone_type: resnet_v2_m_50 3 | loss_type: arcface 4 | out_type: E 5 | 6 | image_size: 112 7 | embd_size: 512 8 | class_num: 85742 9 | 10 | 11 | # hyper params 12 | bn_decay: 0.9 13 | keep_prob: 0.4 14 | weight_decay: !!float 5e-4 15 | logits_scale: 64.0 16 | logits_margin: 0.5 17 | momentum: 0.9 18 | 19 | 20 | # run params 21 | val_bn_train: False 22 | augment_flag: True 23 | augment_margin: 16 24 | 25 | gpu_num: 1 26 | batch_size: 100 27 | epoch_num: 20 28 | step_per_epoch: 100000 29 | val_freq: 2000 30 | 31 | lr_steps: [40000, 60000, 80000] 32 | lr_values: [0.004, 0.002, 0.0012, 0.0004] 33 | 34 | 35 | # paths 36 | pretrained_model: '/data/hhd/InsightFace-tensorflow/output/20190120-133421/checkpoints/ckpt-m-140000' 37 | 38 | train_data: ['/data/hhd/dataset/FaceData/InsightFace/faces_ms1m_arcface.tfrecord'] 39 | val_data: {'agedb_30': '/data/hhd/dataset/FaceData/InsightFace/faces_ms1m_arcface/agedb_30.bin', 'lfw': '/data/hhd/dataset/FaceData/InsightFace/faces_ms1m_arcface/lfw.bin', 'cfp_ff': '/data/hhd/dataset/FaceData/InsightFace/faces_ms1m_arcface/cfp_ff.bin', 'cfp_fp': '/data/hhd/dataset/FaceData/InsightFace/faces_ms1m_arcface/cfp_fp.bin', 'calfw': '/data/hhd/dataset/FaceData/InsightFace/faces_ms1m_arcface/calfw.bin', 'cplfw': '/data/hhd/dataset/FaceData/InsightFace/faces_ms1m_arcface/cplfw.bin', 'vgg2_fp': '/data/hhd/dataset/FaceData/InsightFace/faces_ms1m_arcface/vgg2_fp.bin'} 40 | 41 | output_dir: './output' 42 | 43 | -------------------------------------------------------------------------------- /configs/config_ms1m_200.yaml: -------------------------------------------------------------------------------- 1 | # model params 2 | backbone_type: resnet_v2_m_50 3 | loss_type: arcface 4 | out_type: E 5 | 6 | image_size: 112 7 | embd_size: 512 8 | class_num: 85742 9 | 10 | 11 | # hyper params 12 | bn_decay: 0.9 13 | keep_prob: 0.4 14 | weight_decay: !!float 5e-4 15 | logits_scale: 64.0 16 | logits_margin: 0.5 17 | momentum: 0.9 18 | 19 | 20 | # run params 21 | val_bn_train: False 22 | augment_flag: True 23 | augment_margin: 16 24 | 25 | gpu_num: 2 26 | batch_size: 200 27 | epoch_num: 20 28 | step_per_epoch: 100000 29 | val_freq: 2000 30 | 31 | lr_steps: [40000, 60000, 80000] 32 | lr_values: [0.004, 0.002, 0.0012, 0.0004] 33 | 34 | 35 | # paths 36 | pretrained_model: '/data/hhd/InsightFace-tensorflow/output/20190122-101014/checkpoints/ckpt-m-124000' 37 | 38 | train_data: ['/data/hhd/dataset/FaceData/InsightFace/faces_ms1m_arcface.tfrecord'] 39 | val_data: {'agedb_30': '/data/hhd/dataset/FaceData/InsightFace/faces_ms1m_arcface/agedb_30.bin', 'lfw': '/data/hhd/dataset/FaceData/InsightFace/faces_ms1m_arcface/lfw.bin', 'cfp_ff': '/data/hhd/dataset/FaceData/InsightFace/faces_ms1m_arcface/cfp_ff.bin', 'cfp_fp': '/data/hhd/dataset/FaceData/InsightFace/faces_ms1m_arcface/cfp_fp.bin', 'calfw': '/data/hhd/dataset/FaceData/InsightFace/faces_ms1m_arcface/calfw.bin', 'cplfw': '/data/hhd/dataset/FaceData/InsightFace/faces_ms1m_arcface/cplfw.bin', 'vgg2_fp': '/data/hhd/dataset/FaceData/InsightFace/faces_ms1m_arcface/vgg2_fp.bin'} 40 | 41 | output_dir: './output' 42 | 43 | -------------------------------------------------------------------------------- /losses/logit_loss.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import tensorflow.contrib.slim as slim 3 | 4 | import math 5 | 6 | 7 | W_INIT = tf.contrib.layers.xavier_initializer(uniform=False) 8 | 9 | 10 | def get_logits(embds, labels, config, w_init=W_INIT, reuse=False, scope='logits'): 11 | with tf.variable_scope(scope, reuse=reuse): 12 | weights = tf.get_variable(name='classify_weight', shape=[embds.get_shape().as_list()[-1], config['class_num']], dtype=tf.float32, initializer=w_init, regularizer=slim.l2_regularizer(config['weight_decay']), trainable=True) 13 | if config['loss_type'] == 'arcface': 14 | return calculate_arcface_logits(embds, weights, labels, config['class_num'], config['logits_scale'], config['logits_margin']) 15 | elif config['loss_type'] == 'softmax': 16 | return slim.fully_connected(embds, num_outputs=config['class_num'], activation_fn=None, normalizer_fn=None, weights_initializer=w_init, weights_regularizer=slim.l2_regularizer(config['weight_decay'])) 17 | else: 18 | raise ValueError('Invalid loss type.') 19 | 20 | 21 | def calculate_arcface_logits(embds, weights, labels, class_num, s, m): 22 | embds = tf.nn.l2_normalize(embds, axis=1, name='normed_embd') 23 | weights = tf.nn.l2_normalize(weights, axis=0) 24 | 25 | cos_m = math.cos(m) 26 | sin_m = math.sin(m) 27 | 28 | mm = sin_m * m 29 | 30 | threshold = math.cos(math.pi - m) 31 | 32 | cos_t = tf.matmul(embds, weights, name='cos_t') 33 | 34 | cos_t2 = tf.square(cos_t, name='cos_2') 35 | sin_t2 = tf.subtract(1., cos_t2, name='sin_2') 36 | sin_t = tf.sqrt(sin_t2, name='sin_t') 37 | cos_mt = s * tf.subtract(tf.multiply(cos_t, cos_m), tf.multiply(sin_t, sin_m), name='cos_mt') 38 | cond_v = cos_t - threshold 39 | cond = tf.cast(tf.nn.relu(cond_v, name='if_else'), dtype=tf.bool) 40 | keep_val = s*(cos_t - mm) 41 | cos_mt_temp = tf.where(cond, cos_mt, keep_val) 42 | mask = tf.one_hot(labels, depth=class_num, name='one_hot_mask') 43 | inv_mask = tf.subtract(1., mask, name='inverse_mask') 44 | s_cos_t = tf.multiply(s, cos_t, name='scalar_cos_t') 45 | output = tf.add(tf.multiply(s_cos_t, inv_mask), tf.multiply(cos_mt_temp, mask), name='arcface_logits') 46 | return output 47 | 48 | -------------------------------------------------------------------------------- /model.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import tensorflow.contrib.slim as slim 3 | 4 | from backbones import modifiedResNet_v2, ResNet_v2 5 | 6 | 7 | def get_embd(inputs, is_training_dropout, is_training_bn, config, reuse=False, scope='embd_extractor'): 8 | with tf.variable_scope(scope, reuse=reuse): 9 | net = inputs 10 | end_points = {} 11 | if config['backbone_type'].startswith('resnet_v2_m'): 12 | arg_sc = modifiedResNet_v2.resnet_arg_scope(weight_decay=config['weight_decay'], batch_norm_decay=config['bn_decay']) 13 | with slim.arg_scope(arg_sc): 14 | if config['backbone_type'] == 'resnet_v2_m_50': 15 | net, end_points = modifiedResNet_v2.resnet_v2_m_50(net, is_training=is_training_bn, return_raw=True) 16 | elif config['backbone_type'] == 'resnet_v2_m_101': 17 | net, end_points = modifiedResNet_v2.resnet_v2_m_101(net, is_training=is_training_bn, return_raw=True) 18 | elif config['backbone_type'] == 'resnet_v2_m_152': 19 | net, end_points = modifiedResNet_v2.resnet_v2_m_152(net, is_training=is_training_bn, return_raw=True) 20 | elif config['backbone_type'] == 'resnet_v2_m_200': 21 | net, end_points = modifiedResNet_v2.resnet_v2_m_200(net, is_training=is_training_bn, return_raw=True) 22 | else: 23 | raise ValueError('Invalid backbone type.') 24 | elif config['backbone_type'].startswith('resnet_v2'): 25 | arg_sc = ResNet_v2.resnet_arg_scope(weight_decay=config['weight_decay'], batch_norm_decay=config['bn_decay']) 26 | with slim.arg_scope(arg_sc): 27 | if config['backbone_type'] == 'resnet_v2_50': 28 | net, end_points = ResNet_v2.resnet_v2_50(net, is_training=is_training_bn, return_raw=True) 29 | elif config['backbone_type'] == 'resnet_v2_101': 30 | net, end_points = ResNet_v2.resnet_v2_101(net, is_training=is_training_bn, return_raw=True) 31 | elif config['backbone_type'] == 'resnet_v2_152': 32 | net, end_points = ResNet_v2.resnet_v2_152(net, is_training=is_training_bn, return_raw=True) 33 | elif config['backbone_type'] == 'resnet_v2_200': 34 | net, end_points = ResNet_v2.resnet_v2_200(net, is_training=is_training_bn, return_raw=True) 35 | else: 36 | raise ValueError('Invalid backbone type.') 37 | 38 | if config['out_type'] == 'E': 39 | with slim.arg_scope(arg_sc): 40 | net = slim.batch_norm(net, activation_fn=None, is_training=is_training_bn) 41 | net = slim.dropout(net, keep_prob=config['keep_prob'], is_training=is_training_dropout) 42 | net = slim.flatten(net) 43 | net = slim.fully_connected(net, config['embd_size'], normalizer_fn=None, activation_fn=None) 44 | net = slim.batch_norm(net, scale=False, activation_fn=None, is_training=is_training_bn) 45 | end_points['embds'] = net 46 | else: 47 | raise ValueError('Invalid out type.') 48 | 49 | return net, end_points 50 | 51 | 52 | 53 | -------------------------------------------------------------------------------- /utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import tensorflow as tf 4 | 5 | 6 | def check_folders(paths): 7 | if isinstance(paths, str): 8 | paths = [paths] 9 | for path in paths: 10 | if not os.path.exists(path): 11 | os.makedirs(path) 12 | 13 | 14 | def average_gradients(tower_grads): 15 | """Calculate the average gradient for each shared variable across all towers. 16 | Note that this function provides a synchronization point across all towers. 17 | Args: 18 | tower_grads: List of lists of (gradient, variable) tuples. The outer list 19 | is over individual gradients. The inner list is over the gradient 20 | calculation for each tower. 21 | Returns: 22 | List of pairs of (gradient, variable) where the gradient has been averaged 23 | across all towers. 24 | """ 25 | average_grads = [] 26 | for grad_and_vars in zip(*tower_grads): 27 | # Note that each grad_and_vars looks like the following: 28 | # ((grad0_gpu0, var0_gpu0), ... , (grad0_gpuN, var0_gpuN)) 29 | grads = [] 30 | for g, _ in grad_and_vars: 31 | # Add 0 dimension to the gradients to represent the tower. 32 | expanded_g = tf.expand_dims(g, 0) 33 | 34 | # Append on a 'tower' dimension which we will average over below. 35 | grads.append(expanded_g) 36 | 37 | # Average over the 'tower' dimension. 38 | grad = tf.concat(axis=0, values=grads) 39 | grad = tf.reduce_mean(grad, 0) 40 | 41 | # Keep in mind that the Variables are redundant because they are shared 42 | # across towers. So .. we will just return the first tower's pointer to 43 | # the Variable. 44 | v = grad_and_vars[0][1] 45 | grad_and_var = (grad, v) 46 | average_grads.append(grad_and_var) 47 | return average_grads 48 | 49 | 50 | def tensor_description(var): 51 | """Returns a compact and informative string about a tensor. 52 | Args: 53 | var: A tensor variable. 54 | Returns: 55 | a string with type and size, e.g.: (float32 1x8x8x1024). 56 | """ 57 | description = '(' + str(var.dtype.name) + ' ' 58 | sizes = var.get_shape() 59 | for i, size in enumerate(sizes): 60 | description += str(size) 61 | if i < len(sizes) - 1: 62 | description += 'x' 63 | description += ')' 64 | return description 65 | 66 | 67 | def analyze_vars(variables, path): 68 | """Prints the names and shapes of the variables. 69 | Args: 70 | variables: list of variables, for example tf.global_variables(). 71 | print_info: Optional, if true print variables and their shape. 72 | Returns: 73 | (total size of the variables, total bytes of the variables) 74 | """ 75 | f = open(path, 'w') 76 | f.write('---------\n') 77 | f.write('Variables: name (type shape) [size]\n') 78 | f.write('---------\n') 79 | total_size = 0 80 | total_bytes = 0 81 | for var in variables: 82 | # if var.num_elements() is None or [] assume size 0. 83 | var_size = var.get_shape().num_elements() or 0 84 | var_bytes = var_size * var.dtype.size 85 | total_size += var_size 86 | total_bytes += var_bytes 87 | f.write(var.name+' '+tensor_description(var)+' '+'[%d, bytes: %d]\n' % (var_size, var_bytes)) 88 | f.write('Total size of variables: %d\n' % total_size) 89 | f.write('Total bytes of variables: %d\n' % total_bytes) 90 | return total_size, total_bytes -------------------------------------------------------------------------------- /backbones/utils.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | from collections import namedtuple 6 | 7 | import tensorflow as tf 8 | import tensorflow.contrib.slim as slim 9 | 10 | 11 | class Block(namedtuple('Block', ['scope', 'unit_fn', 'args'])): 12 | """A named tuple describing a ResNet block. 13 | 14 | Its parts are: 15 | scope: The scope of the `Block`. 16 | unit_fn: The ResNet unit function which takes as input a `Tensor` and returns another `Tensor` with the output of the ResNet unit. 17 | args: A list of length equal to the number of units in the `Block`. The list contains one (depth, depth_bottleneck, stride) tuple for each unit in the block to serve as argument to unit_fn. 18 | """ 19 | 20 | 21 | def subsample(inputs, factor, scope=None): 22 | if factor == 1: 23 | return inputs 24 | else: 25 | return slim.max_pool2d(inputs, [1, 1], stride=factor, scope=scope) # padding='VALID' 26 | 27 | 28 | def conv2d_same(inputs, num_outputs, kernel_size, stride, rate=1, scope=None): 29 | if stride == 1: 30 | return slim.conv2d(inputs, num_outputs, kernel_size, stride=1, rate=rate, padding='SAME', scope=scope) 31 | else: 32 | kernel_size_effective = kernel_size+(kernel_size-1)*(rate-1) 33 | pad_total = kernel_size_effective-1 34 | pad_beg = pad_total//2 35 | pad_end = pad_total-pad_beg 36 | inputs = tf.pad(inputs, [[0, 0], [pad_beg, pad_end], [pad_beg, pad_end], [0, 0]]) # zero padding 37 | return slim.conv2d(inputs, num_outputs, kernel_size, stride=stride, rate=rate, padding='VALID', scope=scope) 38 | 39 | 40 | @slim.add_arg_scope 41 | def stack_blocks_dense(net, blocks, output_stride=None, store_non_strided_activations=False, outputs_collections=None): 42 | current_stride = 1 43 | rate = 1 44 | 45 | for block in blocks: 46 | with tf.variable_scope(block.scope, 'block', [net]) as sc: 47 | block_stride = 1 48 | for i, unit in enumerate(block.args): 49 | if store_non_strided_activations and i == len(block.args)-1: 50 | block_stride = unit.get('stride', 1) 51 | unit = dict(unit, stride=1) 52 | with tf.variable_scope('unit_%d' % (i+1), values=[net]): 53 | if output_stride is not None and current_stride == output_stride: 54 | net = block.unit_fn(net, rate=rate, **dict(unit, stride=1)) 55 | rate *= unit.get('stride', 1) 56 | else: 57 | net = block.unit_fn(net, rate=1, **unit) 58 | current_stride *= unit.get('stride', 1) 59 | if output_stride is not None and current_stride > output_stride: 60 | raise ValueError('The target output_stride cannot be reached.') 61 | net = slim.utils.collect_named_outputs(outputs_collections, sc.name, net) 62 | 63 | if output_stride is not None and current_stride == output_stride: 64 | rate *= block_stride 65 | else: 66 | net = subsample(net, block_stride) 67 | current_stride *= block_stride 68 | if output_stride is not None and current_stride > output_stride: 69 | raise ValueError('The target output_stride cannot be reached.') 70 | if output_stride is not None and current_stride != output_stride: 71 | raise ValueError('The target output_stride cannot be reached.') 72 | return net 73 | 74 | 75 | def resnet_arg_scope(weight_decay=0.0001, 76 | batch_norm_decay=0.9, 77 | batch_norm_epsilon=2e-5, 78 | batch_norm_scale=True, 79 | activation_fn=tf.nn.leaky_relu, 80 | use_batch_norm=True, 81 | batch_norm_updates_collections=tf.GraphKeys.UPDATE_OPS): 82 | batch_norm_params = { 83 | 'decay': batch_norm_decay, 84 | 'epsilon': batch_norm_epsilon, 85 | 'scale': batch_norm_scale, 86 | 'updates_collections': batch_norm_updates_collections, 87 | 'fused': None, # Use fused batch norm if possible. 88 | 'param_regularizers': {'gamma': slim.l2_regularizer(weight_decay)}, 89 | } 90 | 91 | with slim.arg_scope( 92 | [slim.conv2d], 93 | weights_regularizer=slim.l2_regularizer(weight_decay), 94 | weights_initializer=tf.contrib.layers.xavier_initializer(uniform=False), 95 | activation_fn=activation_fn, 96 | normalizer_fn=slim.batch_norm if use_batch_norm else None, 97 | normalizer_params=batch_norm_params): 98 | with slim.arg_scope([slim.batch_norm], **batch_norm_params): 99 | with slim.arg_scope([slim.max_pool2d], padding='SAME') as arg_sc: 100 | return arg_sc -------------------------------------------------------------------------------- /eval/utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import math 3 | import numpy as np 4 | 5 | from sklearn.model_selection import KFold 6 | from scipy import interpolate 7 | 8 | 9 | def distance(embeddings1, embeddings2, distance_metric=0): 10 | if distance_metric==0: 11 | # Euclidian distance 12 | embeddings1 = embeddings1/np.linalg.norm(embeddings1, axis=1, keepdims=True) 13 | embeddings2 = embeddings2/np.linalg.norm(embeddings2, axis=1, keepdims=True) 14 | diff = np.subtract(embeddings1, embeddings2) 15 | dist = np.sum(np.square(diff),1) 16 | elif distance_metric==1: 17 | # Distance based on cosine similarity 18 | dot = np.sum(np.multiply(embeddings1, embeddings2), axis=1) 19 | norm = np.linalg.norm(embeddings1, axis=1) * np.linalg.norm(embeddings2, axis=1) 20 | similarity = dot/norm 21 | dist = np.arccos(similarity) / math.pi 22 | else: 23 | raise 'Undefined distance metric %d' % distance_metric 24 | 25 | return dist 26 | 27 | 28 | def calculate_roc(thresholds, embeddings1, embeddings2, actual_issame, distance_metric=0, nrof_folds=10): 29 | assert(embeddings1.shape[0] == embeddings2.shape[0]) 30 | assert(embeddings1.shape[1] == embeddings2.shape[1]) 31 | nrof_pairs = min(len(actual_issame), embeddings1.shape[0]) 32 | nrof_thresholds = len(thresholds) 33 | k_fold = KFold(n_splits=nrof_folds, shuffle=False) 34 | 35 | tprs = np.zeros((nrof_folds,nrof_thresholds)) 36 | fprs = np.zeros((nrof_folds,nrof_thresholds)) 37 | accuracy = np.zeros((nrof_folds)) 38 | 39 | dist = distance(embeddings1, embeddings2, distance_metric) 40 | indices = np.arange(nrof_pairs) 41 | 42 | for fold_idx, (train_set, test_set) in enumerate(k_fold.split(indices)): 43 | 44 | # Find the best threshold for the fold 45 | acc_train = np.zeros((nrof_thresholds)) 46 | for threshold_idx, threshold in enumerate(thresholds): 47 | _, _, acc_train[threshold_idx] = calculate_accuracy(threshold, dist[train_set], actual_issame[train_set]) 48 | best_threshold_index = np.argmax(acc_train) 49 | for threshold_idx, threshold in enumerate(thresholds): 50 | tprs[fold_idx,threshold_idx], fprs[fold_idx,threshold_idx], _ = calculate_accuracy(threshold, dist[test_set], actual_issame[test_set]) 51 | _, _, accuracy[fold_idx] = calculate_accuracy(thresholds[best_threshold_index], dist[test_set], actual_issame[test_set]) 52 | 53 | tpr = np.mean(tprs,0) 54 | fpr = np.mean(fprs,0) 55 | return tpr, fpr, accuracy 56 | 57 | def calculate_accuracy(threshold, dist, actual_issame): 58 | predict_issame = np.less(dist, threshold) 59 | tp = np.sum(np.logical_and(predict_issame, actual_issame)) 60 | fp = np.sum(np.logical_and(predict_issame, np.logical_not(actual_issame))) 61 | tn = np.sum(np.logical_and(np.logical_not(predict_issame), np.logical_not(actual_issame))) 62 | fn = np.sum(np.logical_and(np.logical_not(predict_issame), actual_issame)) 63 | 64 | tpr = 0 if (tp+fn==0) else float(tp) / float(tp+fn) 65 | fpr = 0 if (fp+tn==0) else float(fp) / float(fp+tn) 66 | acc = float(tp+tn)/dist.size 67 | return tpr, fpr, acc 68 | 69 | 70 | def calculate_tar_far(threshold, dist, actual_issame): 71 | predict_issame = np.less(dist, threshold) 72 | true_accept = np.sum(np.logical_and(predict_issame, actual_issame)) 73 | false_accept = np.sum(np.logical_and(predict_issame, np.logical_not(actual_issame))) 74 | n_same = np.sum(actual_issame) 75 | n_diff = np.sum(np.logical_not(actual_issame)) 76 | tar = float(true_accept) / float(n_same) 77 | far = float(false_accept) / float(n_diff) 78 | return tar, far 79 | 80 | 81 | def calculate_tar(thresholds, embeddings1, embeddings2, actual_issame, far_target, nrof_folds=10, distance_metric=0, subtract_mean=False): 82 | assert(embeddings1.shape[0] == embeddings2.shape[0]) 83 | assert(embeddings1.shape[1] == embeddings2.shape[1]) 84 | nrof_pairs = min(len(actual_issame), embeddings1.shape[0]) 85 | nrof_thresholds = len(thresholds) 86 | k_fold = KFold(n_splits=nrof_folds, shuffle=False) 87 | 88 | tar = np.zeros(nrof_folds) 89 | far = np.zeros(nrof_folds) 90 | 91 | indices = np.arange(nrof_pairs) 92 | 93 | for fold_idx, (train_set, test_set) in enumerate(k_fold.split(indices)): 94 | if subtract_mean: 95 | mean = np.mean(np.concatenate([embeddings1[train_set], embeddings2[train_set]]), axis=0) 96 | else: 97 | mean = 0.0 98 | dist = distance(embeddings1-mean, embeddings2-mean, distance_metric) 99 | 100 | # Find the threshold that gives FAR = far_target 101 | far_train = np.zeros(nrof_thresholds) 102 | for threshold_idx, threshold in enumerate(thresholds): 103 | _, far_train[threshold_idx] = calculate_tar_far(threshold, dist[train_set], actual_issame[train_set]) 104 | if np.max(far_train)>=far_target: 105 | f = interpolate.interp1d(far_train, thresholds, kind='slinear') 106 | threshold = f(far_target) 107 | else: 108 | threshold = 0.0 109 | 110 | tar[fold_idx], far[fold_idx] = calculate_tar_far(threshold, dist[test_set], actual_issame[test_set]) 111 | 112 | tar_mean = np.mean(tar) 113 | far_mean = np.mean(far) 114 | tar_std = np.std(tar) 115 | return tar_mean, tar_std, far_mean -------------------------------------------------------------------------------- /data/classificationDataTool.py: -------------------------------------------------------------------------------- 1 | import os 2 | import tensorflow as tf 3 | from scipy import misc 4 | import numpy as np 5 | import random 6 | import sys 7 | import io 8 | 9 | 10 | def to_rgb(img): 11 | if img.ndim < 3: 12 | h, w = img.shape 13 | ret = np.empty((h, w, 3), dtype=np.uint8) 14 | ret[:, :, 0] = ret[:, :, 1] = ret[:, :, 2] = img 15 | return ret 16 | else: 17 | return img 18 | 19 | 20 | def augmentation(image, aug_img_size): 21 | ori_image_shape = tf.shape(image) 22 | image = tf.image.random_flip_left_right(image) 23 | # image = tf.image.resize_images(image, [aug_img_size, aug_img_size]) 24 | # image = tf.random_crop(image, ori_image_shape) 25 | return image 26 | 27 | 28 | class ClassificationImageData: 29 | 30 | def __init__(self, img_size=112, augment_flag=True, augment_margin=16): 31 | self.img_size = img_size 32 | self.augment_flag = augment_flag 33 | self.augment_margin = augment_margin 34 | 35 | 36 | def get_path_label(self, root): 37 | ids = list(os.listdir(root)) 38 | ids.sort() 39 | self.cat_num = len(ids) 40 | id_dict = dict(zip(ids, list(range(self.cat_num)))) 41 | paths = [] 42 | labels = [] 43 | for i in ids: 44 | cur_dir = os.path.join(root, i) 45 | fns = os.listdir(cur_dir) 46 | paths += [os.path.join(cur_dir, fn) for fn in fns] 47 | labels += [id_dict[i]]*len(fns) 48 | return paths, labels 49 | 50 | 51 | def image_processing(self, img): 52 | img.set_shape([None, None, 3]) 53 | img = tf.image.resize_images(img, [self.img_size, self.img_size]) 54 | 55 | if self.augment_flag : 56 | augment_size = self.img_size + self.augment_margin 57 | img = augmentation(img, augment_size) 58 | 59 | img = tf.cast(img, tf.float32) / 127.5 - 1 60 | 61 | return img 62 | 63 | 64 | def add_record(self, img, label, writer): 65 | img = to_rgb(img) 66 | img = misc.imresize(img, [self.img_size, self.img_size]).astype(np.uint8) 67 | shape = img.shape 68 | tf_features = tf.train.Features(feature={ 69 | "img": tf.train.Feature(bytes_list=tf.train.BytesList(value=[img.tostring()])), 70 | "shape": tf.train.Feature(int64_list=tf.train.Int64List(value=list(shape))), 71 | "label": tf.train.Feature(int64_list=tf.train.Int64List(value=[label])) 72 | }) 73 | tf_example = tf.train.Example(features = tf_features) 74 | tf_serialized = tf_example.SerializeToString() 75 | writer.write(tf_serialized) 76 | 77 | 78 | def write_tfrecord_from_folders(self, read_dir, write_path): 79 | print('write tfrecord from folders...') 80 | writer = tf.python_io.TFRecordWriter(write_path, options=None) 81 | paths, labels = self.get_path_label(read_dir) 82 | assert(len(paths) == len(labels)) 83 | total = len(paths) 84 | cnt = 0 85 | for p, l in zip(paths, labels): 86 | img = misc.imread(p).astype(np.uint8) 87 | self.add_record(img, l, writer) 88 | cnt += 1 89 | print('%d/%d' % (cnt, total), end='\r') 90 | writer.close() 91 | print('done![%d/%d]' % (cnt, total)) 92 | print('class num: %d' % self.cat_num) 93 | 94 | 95 | def write_tfrecord_from_mxrec(self, read_dir, write_path): 96 | import mxnet as mx 97 | print('write tfrecord from mxrec...') 98 | idx_path = os.path.join(read_dir, 'train.idx') 99 | bin_path = os.path.join(read_dir, 'train.rec') 100 | imgrec = mx.recordio.MXIndexedRecordIO(idx_path, bin_path, 'r') 101 | s = imgrec.read_idx(0) 102 | header, _ = mx.recordio.unpack(s) 103 | imgidx = list(range(1, int(header.label[0]))) 104 | writer = tf.python_io.TFRecordWriter(write_path, options=None) 105 | total = len(imgidx) 106 | cnt = 0 107 | labels = [] 108 | for i in imgidx: 109 | img_info = imgrec.read_idx(i) 110 | header, img = mx.recordio.unpack(img_info) 111 | l = int(header.label) 112 | labels.append(l) 113 | img = io.BytesIO(img) 114 | img = misc.imread(img).astype(np.uint8) 115 | self.add_record(img, l, writer) 116 | cnt += 1 117 | print('%d/%d' % (cnt, total), end='\r') 118 | writer.close() 119 | self.cat_num = len(set(labels)) 120 | print('done![%d/%d]' % (cnt, total)) 121 | print('class num: %d' % self.cat_num) 122 | 123 | 124 | def parse_function(self, example_proto): 125 | dics = { 126 | 'img': tf.FixedLenFeature(shape=(), dtype=tf.string), 127 | 'shape': tf.FixedLenFeature(shape=(3,), dtype=tf.int64), 128 | 'label': tf.FixedLenFeature(shape=(), dtype=tf.int64) 129 | } 130 | parsed_example = tf.parse_single_example(example_proto, dics) 131 | parsed_example['img'] = tf.decode_raw(parsed_example['img'], tf.uint8) 132 | parsed_example['img'] = tf.reshape(parsed_example['img'], parsed_example['shape']) 133 | return self.image_processing(parsed_example['img']), parsed_example['label'] 134 | 135 | 136 | def read_TFRecord(self, filenames): 137 | dataset = tf.data.TFRecordDataset(filenames, buffer_size=256<<20) 138 | return dataset.map(self.parse_function, num_parallel_calls=8) 139 | 140 | -------------------------------------------------------------------------------- /get_embd.py: -------------------------------------------------------------------------------- 1 | import io 2 | import os 3 | import yaml 4 | import pickle 5 | import argparse 6 | import numpy as np 7 | import tensorflow as tf 8 | 9 | from scipy import misc 10 | 11 | from model import get_embd 12 | from eval.utils import calculate_roc, calculate_tar 13 | 14 | 15 | def get_args(): 16 | parser = argparse.ArgumentParser() 17 | 18 | parser.add_argument('--mode', type=str, default='build', help='model mode: build') 19 | parser.add_argument('--config_path', type=str, default='./configs/config_ms1m_100.yaml', help='config path, used when mode is build') 20 | parser.add_argument('--model_path', type=str, default='/data/hhd/InsightFace-tensorflow/output/20190116-130753/checkpoints/ckpt-m-116000', help='model path') 21 | parser.add_argument('--read_path', type=str, default='', help='path to image file or directory to images') 22 | parser.add_argument('--save_path', type=str, default='embds.pkl', help='path to save embds') 23 | parser.add_argument('--train_mode', type=int, default=0, help='whether set train phase to True when getting embds. zero means False, one means True') 24 | 25 | return parser.parse_args() 26 | 27 | 28 | def load_image(path, image_size): 29 | print('reading %s' % path) 30 | if os.path.isdir(path): 31 | paths = list(os.listdir(path)) 32 | else: 33 | paths = [path] 34 | images = [] 35 | images_f = [] 36 | for path in paths: 37 | img = misc.imread(path) 38 | img = misc.imresize(img, [image_size, image_size]) 39 | # img = img[s:s+image_size, s:s+image_size, :] 40 | img_f = np.fliplr(img) 41 | img = img/127.5-1.0 42 | img_f = img_f/127.5-1.0 43 | images.append(img) 44 | images_f.append(img_f) 45 | fns = [os.path.basename(p) for p in paths] 46 | print('done!') 47 | return (np.array(images), np.array(images_f), fns) 48 | 49 | 50 | 51 | def evaluate(embeddings, actual_issame, far_target=1e-3, distance_metric=0, nrof_folds=10): 52 | thresholds = np.arange(0, 4, 0.01) 53 | if distance_metric == 1: 54 | thresholdes = np.arange(0, 1, 0.0025) 55 | embeddings1 = embeddings[0::2] 56 | embeddings2 = embeddings[1::2] 57 | tpr, fpr, accuracy = calculate_roc(thresholds, embeddings1, embeddings2, np.asarray(actual_issame), distance_metric=distance_metric, nrof_folds=nrof_folds) 58 | tar, tar_std, far = calculate_tar(thresholds, embeddings1, embeddings2, np.asarray(actual_issame), far_target=far_target, distance_metric=distance_metric, nrof_folds=nrof_folds) 59 | acc_mean = np.mean(accuracy) 60 | acc_std = np.std(accuracy) 61 | return tpr, fpr, acc_mean, acc_std, tar, tar_std, far 62 | 63 | 64 | def run_embds(sess, images, batch_size, image_size, train_mode, embds_ph, image_ph, train_ph_dropout, train_ph_bn): 65 | if train_mode >= 1: 66 | train = True 67 | else: 68 | train = False 69 | batch_num = len(images)//batch_size 70 | left = len(images)%batch_size 71 | embds = [] 72 | for i in range(batch_num): 73 | image_batch = images[i*batch_size: (i+1)*batch_size] 74 | cur_embd = sess.run(embds_ph, feed_dict={image_ph: image_batch, train_ph_dropout: train, train_ph_bn: train}) 75 | embds += list(cur_embd) 76 | print('%d/%d' % (i, batch_num), end='\r') 77 | if left > 0: 78 | image_batch = np.zeros([batch_size, image_size, image_size, 3]) 79 | image_batch[:left, :, :, :] = images[-left:] 80 | cur_embd = sess.run(embds_ph, feed_dict={image_ph: image_batch, train_ph_dropout: train, train_ph_bn: train}) 81 | embds += list(cur_embd)[:left] 82 | print() 83 | print('done!') 84 | return np.array(embds) 85 | 86 | 87 | if __name__ == '__main__': 88 | args = get_args() 89 | if args.mode == 'build': 90 | print('building...') 91 | config = yaml.load(open(args.config_path)) 92 | images = tf.placeholder(dtype=tf.float32, shape=[None, config['image_size'], config['image_size'], 3], name='input_image') 93 | train_phase_dropout = tf.placeholder(dtype=tf.bool, shape=None, name='train_phase') 94 | train_phase_bn = tf.placeholder(dtype=tf.bool, shape=None, name='train_phase_last') 95 | embds, _ = get_embd(images, train_phase_dropout, train_phase_bn, config) 96 | print('done!') 97 | tf_config = tf.ConfigProto(allow_soft_placement=True) 98 | tf_config.gpu_options.allow_growth = True 99 | with tf.Session(config=tf_config) as sess: 100 | tf.global_variables_initializer().run() 101 | print('loading...') 102 | saver = tf.train.Saver(var_list=tf.trainable_variables()) 103 | saver.restore(sess, args.model_path) 104 | print('done!') 105 | 106 | batch_size = config['batch_size'] 107 | imgs, imgs_f, fns = load_image(args.read_path, config['image_size']) 108 | print('forward running...') 109 | embds_arr = run_embds(sess, imgs, batch_size, config['image_size'], args.train_mode, embds, images, train_phase_dropout, train_phase_bn) 110 | embds_f_arr = run_embds(sess, imgs_f, batch_size, config['image_size'], args.train_mode, embds, images, train_phase_dropout, train_phase_bn) 111 | embds_arr = embds_arr/np.linalg.norm(embds_arr, axis=1, keepdims=True)+embds_f_arr/np.linalg.norm(embds_f_arr, axis=1, keepdims=True) 112 | embds_arr = embds_arr/np.linalg.norm(embds_arr, axis=1, keepdims=True) 113 | print('done!') 114 | print('saving...') 115 | embds_dict = dict(*zip(fns, list(embds_arr))) 116 | pickle.dump(embds_dict, open(args.save_path, 'wb')) 117 | print('done!') 118 | 119 | -------------------------------------------------------------------------------- /evaluate.py: -------------------------------------------------------------------------------- 1 | import io 2 | import os 3 | import yaml 4 | import pickle 5 | import argparse 6 | import numpy as np 7 | import tensorflow as tf 8 | 9 | from scipy import misc 10 | 11 | from model import get_embd 12 | from eval.utils import calculate_roc, calculate_tar 13 | 14 | 15 | def get_args(): 16 | parser = argparse.ArgumentParser() 17 | 18 | parser.add_argument('--mode', type=str, default='build', help='model mode: build') 19 | parser.add_argument('--config_path', type=str, default='./configs/config_ms1m_100.yaml', help='config path, used when mode is build') 20 | parser.add_argument('--model_path', type=str, default='/data/hhd/InsightFace-tensorflow/output/20190116-130753/checkpoints/ckpt-m-116000', help='model path') 21 | parser.add_argument('--val_data', type=str, default='', help='val data, a dict with key as data name, value as data path') 22 | parser.add_argument('--train_mode', type=int, default=0, help='whether set train phase to True when getting embds. zero means False, one means True') 23 | parser.add_argument('--target_far', type=float, default=1e-3, help='target far when calculate tar') 24 | 25 | return parser.parse_args() 26 | 27 | 28 | def load_bin(path, image_size): 29 | print('reading %s' % path) 30 | bins, issame_list = pickle.load(open(path, 'rb'), encoding='bytes') 31 | num = len(bins) 32 | images = np.zeros(shape=[num, image_size, image_size, 3], dtype=np.float32) 33 | images_f = np.zeros(shape=[num, image_size, image_size, 3], dtype=np.float32) 34 | # m = config['augment_margin'] 35 | # s = int(m/2) 36 | cnt = 0 37 | for bin in bins: 38 | img = misc.imread(io.BytesIO(bin)) 39 | img = misc.imresize(img, [image_size, image_size]) 40 | # img = img[s:s+image_size, s:s+image_size, :] 41 | img_f = np.fliplr(img) 42 | img = img/127.5-1.0 43 | img_f = img_f/127.5-1.0 44 | images[cnt] = img 45 | images_f[cnt] = img_f 46 | cnt += 1 47 | print('done!') 48 | return (images, images_f, issame_list) 49 | 50 | 51 | 52 | def evaluate(embeddings, actual_issame, far_target=1e-3, distance_metric=0, nrof_folds=10): 53 | thresholds = np.arange(0, 4, 0.01) 54 | if distance_metric == 1: 55 | thresholdes = np.arange(0, 1, 0.0025) 56 | embeddings1 = embeddings[0::2] 57 | embeddings2 = embeddings[1::2] 58 | tpr, fpr, accuracy = calculate_roc(thresholds, embeddings1, embeddings2, np.asarray(actual_issame), distance_metric=distance_metric, nrof_folds=nrof_folds) 59 | tar, tar_std, far = calculate_tar(thresholds, embeddings1, embeddings2, np.asarray(actual_issame), far_target=far_target, distance_metric=distance_metric, nrof_folds=nrof_folds) 60 | acc_mean = np.mean(accuracy) 61 | acc_std = np.std(accuracy) 62 | return tpr, fpr, acc_mean, acc_std, tar, tar_std, far 63 | 64 | 65 | def run_embds(sess, images, batch_size, image_size, train_mode, embds_ph, image_ph, train_ph_dropout, train_ph_bn): 66 | if train_mode >= 1: 67 | train = True 68 | else: 69 | train = False 70 | batch_num = len(images)//batch_size 71 | left = len(images)%batch_size 72 | embds = [] 73 | for i in range(batch_num): 74 | image_batch = images[i*batch_size: (i+1)*batch_size] 75 | cur_embd = sess.run(embds_ph, feed_dict={image_ph: image_batch, train_ph_dropout: train, train_ph_bn: train}) 76 | embds += list(cur_embd) 77 | print('%d/%d' % (i, batch_num), end='\r') 78 | if left > 0: 79 | image_batch = np.zeros([batch_size, image_size, image_size, 3]) 80 | image_batch[:left, :, :, :] = images[-left:] 81 | cur_embd = sess.run(embds_ph, feed_dict={image_ph: image_batch, train_ph_dropout: train, train_ph_bn: train}) 82 | embds += list(cur_embd)[:left] 83 | print() 84 | print('done!') 85 | return np.array(embds) 86 | 87 | 88 | if __name__ == '__main__': 89 | args = get_args() 90 | if args.mode == 'build': 91 | print('building...') 92 | config = yaml.load(open(args.config_path)) 93 | images = tf.placeholder(dtype=tf.float32, shape=[None, config['image_size'], config['image_size'], 3], name='input_image') 94 | train_phase_dropout = tf.placeholder(dtype=tf.bool, shape=None, name='train_phase') 95 | train_phase_bn = tf.placeholder(dtype=tf.bool, shape=None, name='train_phase_last') 96 | embds, _ = get_embd(images, train_phase_dropout, train_phase_bn, config) 97 | print('done!') 98 | tf_config = tf.ConfigProto(allow_soft_placement=True) 99 | tf_config.gpu_options.allow_growth = True 100 | with tf.Session(config=tf_config) as sess: 101 | tf.global_variables_initializer().run() 102 | print('loading...') 103 | saver = tf.train.Saver() 104 | saver.restore(sess, args.model_path) 105 | print('done!') 106 | 107 | batch_size = config['batch_size'] 108 | # batch_size = 32 109 | print('evaluating...') 110 | val_data = {} 111 | if args.val_data == '': 112 | val_data = config['val_data'] 113 | else: 114 | val_data[os.path.basename(args.val_data)] = args.val_data 115 | for k, v in val_data.items(): 116 | imgs, imgs_f, issame = load_bin(v, config['image_size']) 117 | print('forward running...') 118 | embds_arr = run_embds(sess, imgs, batch_size, config['image_size'], args.train_mode, embds, images, train_phase_dropout, train_phase_bn) 119 | embds_f_arr = run_embds(sess, imgs_f, batch_size, config['image_size'], args.train_mode, embds, images, train_phase_dropout, train_phase_bn) 120 | embds_arr = embds_arr/np.linalg.norm(embds_arr, axis=1, keepdims=True)+embds_f_arr/np.linalg.norm(embds_f_arr, axis=1, keepdims=True) 121 | print('done!') 122 | tpr, fpr, acc_mean, acc_std, tar, tar_std, far = evaluate(embds_arr, issame, far_target=args.target_far, distance_metric=0) 123 | print('eval on %s: acc--%1.5f+-%1.5f, tar--%1.5f+-%1.5f@far=%1.5f' % (k, acc_mean, acc_std, tar, tar_std, far)) 124 | print('done!') 125 | else: 126 | raise ValueError("Invalid value for --mode.") 127 | 128 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # InsightFace-tensorflow 2 | 3 | This is a tensorflow implementation of paper "[ArcFace: Additive Angular Margin Loss for Deep Face Recognition](https://arxiv.org/abs/1801.07698)". This implementation aims at making both usage of pretrained model and training of your own model easier. Whether you just want to use pretrained model to do face recognition/verification or you want train/finetune your own model, this project can give you a favor. An introduction on face recognition losses can be found [here](https://luckycallor.xyz/20190123/FaceLosses.html)(in Chinese). 4 | 5 | The implementation referred to [the official implementation in mxnet](https://github.com/deepinsight/insightface) and [the previous third-party implementation in tensorflow](https://github.com/auroua/InsightFace_TF). 6 | 7 | - [InsightFace-tensorflow](#insightface-tensorflow) 8 | - [TODO List](#todo-list) 9 | - [Running Environment](#running-environment) 10 | - [Usage of Pretrained Model](#usage-of-pretrained-model) 11 | - [Pretrained Model](#pretrained-model) 12 | - [Model Evaluation](#model-evaluation) 13 | - [Extract Embedding with Pretrained Model](#extract-embedding-with-pretrained-model) 14 | - [Train Your Own Model](#train-your-own-model) 15 | - [Data Prepare](#data-prepare) 16 | - [Train with Softmax](#train-with-softmax) 17 | - [Finetune with Softmax](#finetune-with-softmax) 18 | 19 | ## TODO List 20 | 21 | 1. *Train with softmax [done!]* 22 | 2. *Model evaluation [done!]* 23 | 3. *Finetune with softmax [done!]* 24 | 4. *Get embedding with pretrained model [done!]* 25 | 5. **Train with triplet loss [todo]** 26 | 6. **Finetune with triplet loss [todo]** 27 | 7. Backbones 28 | 7.1 *ResNet [done!]* 29 | 7.2 **ResNeXt [todo]** 30 | 7.3 **DenseNet [todo]** 31 | 8. Losses 32 | 8.1 *Arcface loss [done!]* 33 | 8.2 **Cosface loss [todo]** 34 | 8.3 **Sphereface loss [todo]** 35 | 8.4 **Triplet loss [todo]** 36 | 9. **Face detection and alignment [todo]** 37 | 38 | ## Running Environment 39 | 40 | - python 3.6 41 | - scipy, numpy (Anaconda 3 recommended) 42 | - tensorflow 1.7.0 43 | - mxnet 1.3.1 (only needed when reading mxrec file) 44 | 45 | ## Usage of Pretrained Model 46 | 47 | Here we open our pretrained models for easier application of face recognition or verification. Codes on model evaluation and extracting embedding from face images are supplied. 48 | 49 | ### Pretrained Model 50 | 51 | Pretrained models and their accuracies on validation datasets are shown as following: 52 | 53 | |config|lfw|calfw|cplfw|agedb_30|cfp_ff|cfp_fp|vgg2_fp|steps|download| 54 | |:----:|:----:|:----:|:----:|:----:|:----:|:----:|:----:|:----:|:----:| 55 | |[ms1m_100](https://github.com/luckycallor/InsightFace-tensorflow/blob/master/configs/config_ms1m_100.yaml)|99.53%|93.92%|87.85%|94.18%|99.29%|94.73%|93.9%|334k|[baidu](https://pan.baidu.com/s/1Zr91ZYWTXJDlG63XLqNdzQ)| 56 | |[ms1m_100](https://github.com/luckycallor/InsightFace-tensorflow/blob/master/configs/config_ms1m_100.yaml)|99.53%|94.68%|89.75%|95.20%|99.54%|96.30%|94.84%|1006k|[baidu](https://pan.baidu.com/s/1v1L3c7cEs_GyqPYH9WhNKA), [google](https://drive.google.com/open?id=107Qu56o1IwQxH61Q6smZk-DO2-xU6EwE)| 57 | |[ms1m_200](https://github.com/luckycallor/InsightFace-tensorflow/blob/master/configs/config_ms1m_200.yaml)|99.43%|94.40%|88.23%|94.58%|99.29%|94.77%|93.9%|200k|[baidu](https://pan.baidu.com/s/1q3kXkhjtclXD-eQgZC5gBA)| 58 | 59 | ### Model Evaluation 60 | 61 | You can evaluate a pretrained model with [evaluate.py](https://github.com/luckycallor/InsightFace-tensorflow/blob/master/evaluate.py) by specifying the config path and model path, for example: 62 | 63 | ``` 64 | python evaluate.py 65 | --config_path=./configs/config_ms1m_100.yaml 66 | --model_path=$DIRECTORY_TO_PRETRAINED_MODEL$/best-m-150000 67 | ``` 68 | 69 | This will evaluate the pretrained model on validation datasets specified in the config file. If you want to evaluate the model on other validation dataset, you can specify it by --val_data as following: 70 | 71 | ``` 72 | python evaluate.py 73 | --config_path=./configs/config_ms1m_100.yaml 74 | --model_path=$DIRECTORY_TO_PRETRAINED_MODEL$/best-m-150000 75 | --val_data=$DIRECTORY_TO_VAL_DATA$/xxx.bin 76 | ``` 77 | 78 | ### Extract Embedding with Pretrained Model 79 | 80 | You can extract embedding from face images with [get_embd.py](https://github.com/luckycallor/InsightFace-tensorflow/blob/master/get_embd.py) by the following script: 81 | 82 | ``` 83 | python get_embd.py 84 | --config_path=./configs/config_ms1m_100.yaml 85 | --model_path=$DIRECTORY_TO_PRETRAINED_MODEL$/best-m-150000 86 | --read_path=$PATH_TO_FACE_IMAGES$ 87 | --save_path=$SAVING_DIRECTORY$/embd.pkl 88 | ``` 89 | 90 | where config_path and model_path specify the config file and pretrained model respectively. read_path is path to face images, that can be a path to one image or a directory with only images in it. save_path specifies where to save the embedding. The saved file is a dict with image file name as key, the corresponding embedding as value, and can be loaded with pickle in python. Note that face images should be well cropped here. 91 | 92 | ## Train Your Own Model 93 | 94 | If you want train your own model from scratch, or finetune pretrained model with your own data, here is what you should do. 95 | 96 | ### Data Prepare 97 | 98 | The official InsightFace project open their training data in the [DataZoo](https://github.com/deepinsight/insightface/wiki/Dataset-Zoo). This data is in mxrec format, you can transform it to tfrecord format with [./data/generateTFRecord.py](https://github.com/luckycallor/InsightFace-tensorflow/blob/master/data/generateTFRecord.py) by the following script: 99 | 100 | ``` 101 | python generateTFRecord.py 102 | --mode=mxrec 103 | --image_size=112 104 | --read_dir=$DIRECTORY_TO_THE_TRAINING_DATA$ 105 | --save_path=$DIRECTORY_TO_SAVE_TFRECORD_FILE$/xxx.tfrecord 106 | ``` 107 | 108 | Or, if you want to train the model with your own data, you can prepare the tfrecord file by the following script: 109 | 110 | ``` 111 | python generateTFRecord.py 112 | --mode=folders 113 | --image_size=112 114 | --read_dir=$DIRECTORY_TO_THE_TRAINING_DATA$ 115 | --save_path=$DIRECTORY_TO_SAVE_TFRECORD_FILE$/xxx.tfrecord 116 | ``` 117 | 118 | Here, the read_dir should be the directory to your own face images, where images to one person are saved in one folder. The directory should have a structure like this: 119 | 120 | ``` 121 | read_dir/ 122 | - id1/ 123 | -- id1_1.jpg 124 | ... 125 | - id2/ 126 | -- id2_1.jpg 127 | ... 128 | - id3/ 129 | -- id3_1.jpg 130 | -- id3_2.jpg 131 | ... 132 | ... 133 | ``` 134 | 135 | ### Train with Softmax 136 | 137 | To train your own model with softmax, firstly you should prepare a config file like those in [./configs](https://github.com/luckycallor/InsightFace-tensorflow/tree/master/configs). It is recommended to modify one example config file to your own config. Secondly, the following script starts training: 138 | 139 | ``` 140 | python train_softmax.py --config_path=./configs/config_ms1m_100.yaml 141 | ``` 142 | 143 | ### Finetune with Softmax 144 | 145 | To finetune a pretrained model with your own data, you should prepare a finetune config file like [./configs/config_finetune.yaml](https://github.com/luckycallor/InsightFace-tensorflow/blob/master/configs/config_finetune.yaml), and start training by the following script: 146 | 147 | ``` 148 | python finetune_softmax.py --config_path=./configs/config_finetune.yaml 149 | ``` -------------------------------------------------------------------------------- /backbones/ResNet_v2.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import tensorflow as tf 6 | import tensorflow.contrib.slim as slim 7 | 8 | from backbones import utils 9 | 10 | resnet_arg_scope = utils.resnet_arg_scope 11 | 12 | 13 | @slim.add_arg_scope 14 | def bottleneck(inputs, depth, depth_bottleneck, stride, rate=1, 15 | outputs_collections=None, scope=None): 16 | with tf.variable_scope(scope, 'bottleneck_v2', [inputs]) as sc: 17 | depth_in = slim.utils.last_dimension(inputs.get_shape(), min_rank=4) 18 | preact = slim.batch_norm(inputs, activation_fn=tf.nn.relu, scope='preact') 19 | if depth == depth_in: 20 | shortcut = utils.subsample(inputs, stride, 'shortcut') 21 | else: 22 | shortcut = slim.conv2d(preact, depth, [1, 1], stride=stride, normalizer_fn=None, activation_fn=None, scope='shortcut') 23 | 24 | residual = slim.conv2d(preact, depth_bottleneck, [1, 1], stride=1, scope='conv1') 25 | residual = utils.conv2d_same(residual, depth_bottleneck, 3, stride, rate=rate, scope='conv2') 26 | residual = slim.conv2d(residual, depth, [1, 1], stride=1, normalizer_fn=None, activation_fn=None, scope='conv3') 27 | 28 | output = shortcut + residual 29 | 30 | return slim.utils.collect_named_outputs(outputs_collections, sc.name, output) 31 | 32 | 33 | def resnet_v2(inputs, 34 | blocks, 35 | num_classes=None, 36 | is_training=True, 37 | return_raw=True, 38 | global_pool=True, 39 | output_stride=None, 40 | include_root_block=True, 41 | spatial_squeeze=True, 42 | reuse=None, 43 | scope=None): 44 | with tf.variable_scope(scope, 'resnet_v2', [inputs], reuse=reuse) as sc: 45 | end_points_collection = sc.original_name_scope + '_end_points' 46 | with slim.arg_scope([slim.conv2d, bottleneck, utils.stack_blocks_dense], outputs_collections=end_points_collection): 47 | with slim.arg_scope([slim.batch_norm], is_training=is_training): 48 | net = inputs 49 | if include_root_block: 50 | if output_stride is not None: 51 | if output_stride % 4 != 0: 52 | raise ValueError('The output_stride needs to be a multiple of 4.') 53 | output_stride /= 4 54 | with slim.arg_scope([slim.conv2d], activation_fn=None, normalizer_fn=None): 55 | net = utils.conv2d_same(net, 64, 7, stride=2, scope='conv1') 56 | net = slim.max_pool2d(net, [3, 3], stride=2, scope='pool1') 57 | net = utils.stack_blocks_dense(net, blocks, output_stride) 58 | end_points = slim.utils.convert_collection_to_dict(end_points_collection) 59 | if return_raw: 60 | return net, end_points 61 | 62 | net = slim.batch_norm(net, activation_fn=tf.nn.relu, scope='postnorm') 63 | end_points[sc.name + '/postnorm'] = net 64 | 65 | if global_pool: 66 | net = tf.reduce_mean(net, [1, 2], name='pool5', keep_dims=True) 67 | end_points['global_pool'] = net 68 | 69 | if num_classes: 70 | net = slim.conv2d(net, num_classes, [1, 1], activation_fn=None, normalizer_fn=None, scope='logits') 71 | end_points[sc.name + '/logits'] = net 72 | if spatial_squeeze: 73 | net = tf.squeeze(net, [1, 2], name='SpatialSqueeze') 74 | end_points[sc.name + '/spatial_squeeze'] = net 75 | end_points['predictions'] = slim.softmax(net, scope='predictions') 76 | return net, end_points 77 | resnet_v2.default_image_size = 224 78 | 79 | 80 | def resnet_v2_block(scope, base_depth, num_units, stride): 81 | return utils.Block(scope, bottleneck, [{ 82 | 'depth': base_depth * 4, 83 | 'depth_bottleneck': base_depth, 84 | 'stride': 1 85 | }] * (num_units - 1) + [{ 86 | 'depth': base_depth * 4, 87 | 'depth_bottleneck': base_depth, 88 | 'stride': stride 89 | }]) 90 | resnet_v2.default_image_size = 224 91 | 92 | 93 | def resnet_v2_50(inputs, 94 | num_classes=None, 95 | is_training=True, 96 | return_raw=True, 97 | global_pool=True, 98 | output_stride=None, 99 | spatial_squeeze=True, 100 | reuse=None, 101 | scope='resnet_v2_50'): 102 | """ResNet-50 model of [1]. See resnet_v2() for arg and return description.""" 103 | blocks = [ 104 | resnet_v2_block('block1', base_depth=64, num_units=3, stride=2), 105 | resnet_v2_block('block2', base_depth=128, num_units=4, stride=2), 106 | resnet_v2_block('block3', base_depth=256, num_units=6, stride=2), 107 | resnet_v2_block('block4', base_depth=512, num_units=3, stride=1), 108 | ] 109 | return resnet_v2(inputs, blocks, num_classes, is_training=is_training, return_raw=return_raw, global_pool=global_pool, output_stride=output_stride, include_root_block=True, spatial_squeeze=spatial_squeeze, reuse=reuse, scope=scope) 110 | resnet_v2_50.default_image_size = resnet_v2.default_image_size 111 | 112 | 113 | def resnet_v2_101(inputs, 114 | num_classes=None, 115 | is_training=True, 116 | return_raw=True, 117 | global_pool=True, 118 | output_stride=None, 119 | spatial_squeeze=True, 120 | reuse=None, 121 | scope='resnet_v2_101'): 122 | """ResNet-101 model of [1]. See resnet_v2() for arg and return description.""" 123 | blocks = [ 124 | resnet_v2_block('block1', base_depth=64, num_units=3, stride=2), 125 | resnet_v2_block('block2', base_depth=128, num_units=4, stride=2), 126 | resnet_v2_block('block3', base_depth=256, num_units=23, stride=2), 127 | resnet_v2_block('block4', base_depth=512, num_units=3, stride=1), 128 | ] 129 | return resnet_v2(inputs, blocks, num_classes, is_training=is_training, return_raw=return_raw, global_pool=global_pool, output_stride=output_stride, include_root_block=True, spatial_squeeze=spatial_squeeze, reuse=reuse, scope=scope) 130 | resnet_v2_101.default_image_size = resnet_v2.default_image_size 131 | 132 | 133 | def resnet_v2_152(inputs, 134 | num_classes=None, 135 | is_training=True, 136 | return_raw=True, 137 | global_pool=True, 138 | output_stride=None, 139 | spatial_squeeze=True, 140 | reuse=None, 141 | scope='resnet_v2_152'): 142 | """ResNet-152 model of [1]. See resnet_v2() for arg and return description.""" 143 | blocks = [ 144 | resnet_v2_block('block1', base_depth=64, num_units=3, stride=2), 145 | resnet_v2_block('block2', base_depth=128, num_units=8, stride=2), 146 | resnet_v2_block('block3', base_depth=256, num_units=36, stride=2), 147 | resnet_v2_block('block4', base_depth=512, num_units=3, stride=1), 148 | ] 149 | return resnet_v2(inputs, blocks, num_classes, is_training=is_training, return_raw=return_raw, global_pool=global_pool, output_stride=output_stride, include_root_block=True, spatial_squeeze=spatial_squeeze, reuse=reuse, scope=scope) 150 | resnet_v2_152.default_image_size = resnet_v2.default_image_size 151 | 152 | 153 | def resnet_v2_200(inputs, 154 | num_classes=None, 155 | is_training=True, 156 | return_raw=True, 157 | global_pool=True, 158 | output_stride=None, 159 | spatial_squeeze=True, 160 | reuse=None, 161 | scope='resnet_v2_200'): 162 | """ResNet-200 model of [2]. See resnet_v2() for arg and return description.""" 163 | blocks = [ 164 | resnet_v2_block('block1', base_depth=64, num_units=3, stride=2), 165 | resnet_v2_block('block2', base_depth=128, num_units=24, stride=2), 166 | resnet_v2_block('block3', base_depth=256, num_units=36, stride=2), 167 | resnet_v2_block('block4', base_depth=512, num_units=3, stride=1), 168 | ] 169 | return resnet_v2(inputs, blocks, num_classes, is_training=is_training, return_raw=return_raw, global_pool=global_pool, output_stride=output_stride, include_root_block=True, spatial_squeeze=spatial_squeeze, reuse=reuse, scope=scope) 170 | resnet_v2_200.default_image_size = resnet_v2.default_image_size 171 | -------------------------------------------------------------------------------- /backbones/ResNet_v1.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import tensorflow as tf 6 | import tensorflow.contrib.slim as slim 7 | 8 | from backbones import utils 9 | 10 | 11 | resnet_arg_scope = utils.resnet_arg_scope 12 | 13 | 14 | class NoOpScope(object): 15 | """No-op context manager.""" 16 | 17 | def __enter__(self): 18 | return None 19 | 20 | def __exit__(self, exc_type, exc_value, traceback): 21 | return False 22 | 23 | 24 | @slim.add_arg_scope 25 | def bottleneck(inputs, 26 | depth, 27 | depth_bottleneck, 28 | stride, 29 | rate=1, 30 | outputs_collections=None, 31 | scope=None, 32 | use_bounded_activations=False): 33 | with tf.variable_scope(scope, 'bottleneck_v1', [inputs]) as sc: 34 | depth_in = slim.utils.last_dimension(inputs.get_shape(), min_rank=4) 35 | 36 | if depth == depth_in: 37 | shortcut = utils.subsample(inputs, stride, 'shortcut') 38 | else: 39 | shortcut = slim.conv2d(inputs, depth, [1, 1], stride=stride, activation_fn=tf.nn.relu6 if use_bounded_activations else None, scope='shortcut') 40 | 41 | residual = slim.conv2d(inputs, depth_bottleneck, [1, 1], stride=1, scope='conv1') 42 | residual = utils.conv2d_same(residual, depth_bottleneck, 3, stride, rate=rate, scope='conv2') 43 | residual = slim.conv2d(residual, depth, [1, 1], stride=1, activation_fn=None, scope='conv3') 44 | 45 | if use_bounded_activations: 46 | # Use clip_by_value to simulate bandpass activation. 47 | residual = tf.clip_by_value(residual, -6.0, 6.0) 48 | output = tf.nn.relu6(shortcut + residual) 49 | else: 50 | output = tf.nn.relu(shortcut + residual) 51 | 52 | return slim.utils.collect_named_outputs(outputs_collections, sc.name, output) 53 | 54 | 55 | def resnet_v1(inputs, 56 | blocks, 57 | num_classes=None, 58 | is_training=True, 59 | global_pool=True, 60 | output_stride=None, 61 | include_root_block=True, 62 | spatial_squeeze=True, 63 | store_non_strided_activations=False, 64 | reuse=None, 65 | scope=None): 66 | with tf.variable_scope(scope, 'resnet_v1', [inputs], reuse=reuse) as sc: 67 | end_points_collection = sc.original_name_scope + '_end_points' 68 | with slim.arg_scope([slim.conv2d, bottleneck, utils.stack_blocks_dense], outputs_collections=end_points_collection): 69 | with (slim.arg_scope([slim.batch_norm], is_training=is_training) if is_training is not None else NoOpScope()): 70 | net = inputs 71 | if include_root_block: 72 | if output_stride is not None: 73 | if output_stride % 4 != 0: 74 | raise ValueError('The output_stride needs to be a multiple of 4.') 75 | output_stride /= 4 76 | net = utils.conv2d_same(net, 64, 7, stride=2, scope='conv1') 77 | net = slim.max_pool2d(net, [3, 3], stride=2, scope='pool1') 78 | net = utils.stack_blocks_dense(net, blocks, output_stride, store_non_strided_activations) 79 | 80 | end_points = slim.utils.convert_collection_to_dict(end_points_collection) 81 | 82 | if global_pool: 83 | net = tf.reduce_mean(net, [1, 2], name='pool5', keep_dims=True) 84 | end_points['global_pool'] = net 85 | if num_classes: 86 | net = slim.conv2d(net, num_classes, [1, 1], activation_fn=None, normalizer_fn=None, scope='logits') 87 | end_points[sc.name + '/logits'] = net 88 | if spatial_squeeze: 89 | net = tf.squeeze(net, [1, 2], name='SpatialSqueeze') 90 | end_points[sc.name + '/spatial_squeeze'] = net 91 | end_points['predictions'] = slim.softmax(net, scope='predictions') 92 | return net, end_points 93 | resnet_v1.default_image_size = 224 94 | 95 | 96 | def resnet_v1_block(scope, base_depth, num_units, stride): 97 | return utils.Block(scope, bottleneck, [{ 98 | 'depth': base_depth * 4, 99 | 'depth_bottleneck': base_depth, 100 | 'stride': 1 101 | }] * (num_units - 1) + [{ 102 | 'depth': base_depth * 4, 103 | 'depth_bottleneck': base_depth, 104 | 'stride': stride 105 | }]) 106 | 107 | 108 | def resnet_v1_50(inputs, 109 | num_classes=None, 110 | is_training=True, 111 | global_pool=True, 112 | output_stride=None, 113 | spatial_squeeze=True, 114 | store_non_strided_activations=False, 115 | reuse=None, 116 | scope='resnet_v1_50'): 117 | """ResNet-50 model of [1]. See resnet_v1() for arg and return description.""" 118 | blocks = [ 119 | resnet_v1_block('block1', base_depth=64, num_units=3, stride=2), 120 | resnet_v1_block('block2', base_depth=128, num_units=4, stride=2), 121 | resnet_v1_block('block3', base_depth=256, num_units=6, stride=2), 122 | resnet_v1_block('block4', base_depth=512, num_units=3, stride=1), 123 | ] 124 | return resnet_v1(inputs, blocks, num_classes, is_training, 125 | global_pool=global_pool, output_stride=output_stride, 126 | include_root_block=True, spatial_squeeze=spatial_squeeze, 127 | store_non_strided_activations=store_non_strided_activations, 128 | reuse=reuse, scope=scope) 129 | resnet_v1_50.default_image_size = resnet_v1.default_image_size 130 | 131 | 132 | def resnet_v1_101(inputs, 133 | num_classes=None, 134 | is_training=True, 135 | global_pool=True, 136 | output_stride=None, 137 | spatial_squeeze=True, 138 | store_non_strided_activations=False, 139 | reuse=None, 140 | scope='resnet_v1_101'): 141 | """ResNet-101 model of [1]. See resnet_v1() for arg and return description.""" 142 | blocks = [ 143 | resnet_v1_block('block1', base_depth=64, num_units=3, stride=2), 144 | resnet_v1_block('block2', base_depth=128, num_units=4, stride=2), 145 | resnet_v1_block('block3', base_depth=256, num_units=23, stride=2), 146 | resnet_v1_block('block4', base_depth=512, num_units=3, stride=1), 147 | ] 148 | return resnet_v1(inputs, blocks, num_classes, is_training, 149 | global_pool=global_pool, output_stride=output_stride, 150 | include_root_block=True, spatial_squeeze=spatial_squeeze, 151 | store_non_strided_activations=store_non_strided_activations, 152 | reuse=reuse, scope=scope) 153 | resnet_v1_101.default_image_size = resnet_v1.default_image_size 154 | 155 | 156 | def resnet_v1_152(inputs, 157 | num_classes=None, 158 | is_training=True, 159 | global_pool=True, 160 | output_stride=None, 161 | store_non_strided_activations=False, 162 | spatial_squeeze=True, 163 | reuse=None, 164 | scope='resnet_v1_152'): 165 | """ResNet-152 model of [1]. See resnet_v1() for arg and return description.""" 166 | blocks = [ 167 | resnet_v1_block('block1', base_depth=64, num_units=3, stride=2), 168 | resnet_v1_block('block2', base_depth=128, num_units=8, stride=2), 169 | resnet_v1_block('block3', base_depth=256, num_units=36, stride=2), 170 | resnet_v1_block('block4', base_depth=512, num_units=3, stride=1), 171 | ] 172 | return resnet_v1(inputs, blocks, num_classes, is_training, 173 | global_pool=global_pool, output_stride=output_stride, 174 | include_root_block=True, spatial_squeeze=spatial_squeeze, 175 | store_non_strided_activations=store_non_strided_activations, 176 | reuse=reuse, scope=scope) 177 | resnet_v1_152.default_image_size = resnet_v1.default_image_size 178 | 179 | 180 | def resnet_v1_200(inputs, 181 | num_classes=None, 182 | is_training=True, 183 | global_pool=True, 184 | output_stride=None, 185 | store_non_strided_activations=False, 186 | spatial_squeeze=True, 187 | reuse=None, 188 | scope='resnet_v1_200'): 189 | """ResNet-200 model of [2]. See resnet_v1() for arg and return description.""" 190 | blocks = [ 191 | resnet_v1_block('block1', base_depth=64, num_units=3, stride=2), 192 | resnet_v1_block('block2', base_depth=128, num_units=24, stride=2), 193 | resnet_v1_block('block3', base_depth=256, num_units=36, stride=2), 194 | resnet_v1_block('block4', base_depth=512, num_units=3, stride=1), 195 | ] 196 | return resnet_v1(inputs, blocks, num_classes, is_training, 197 | global_pool=global_pool, output_stride=output_stride, 198 | include_root_block=True, spatial_squeeze=spatial_squeeze, 199 | store_non_strided_activations=store_non_strided_activations, 200 | reuse=reuse, scope=scope) 201 | resnet_v1_200.default_image_size = resnet_v1.default_image_size 202 | -------------------------------------------------------------------------------- /backbones/modifiedResNet_v2.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import tensorflow as tf 6 | import tensorflow.contrib.slim as slim 7 | 8 | from backbones import utils 9 | 10 | resnet_arg_scope = utils.resnet_arg_scope 11 | 12 | 13 | @slim.add_arg_scope 14 | def bottleneck(inputs, depth, depth_bottleneck, stride, rate=1, outputs_collections=None, scope=None): 15 | with tf.variable_scope(scope, 'bottleneck_v2', [inputs]) as sc: 16 | depth_in = slim.utils.last_dimension(inputs.get_shape(), min_rank=4) 17 | preact = slim.batch_norm(inputs, activation_fn=tf.nn.leaky_relu, scope='preact') 18 | if depth == depth_in: 19 | shortcut = utils.subsample(inputs, stride, 'shortcut') 20 | else: 21 | shortcut = slim.conv2d(preact, depth, [1, 1], stride=stride, normalizer_fn=None, activation_fn=None, scope='shortcut') 22 | 23 | residual = slim.conv2d(preact, depth_bottleneck, [1, 1], stride=1, scope='conv1') 24 | residual = utils.conv2d_same(residual, depth_bottleneck, 3, stride, rate=rate, scope='conv2') 25 | residual = slim.conv2d(residual, depth, [1, 1], stride=1, normalizer_fn=None, activation_fn=None, scope='conv3') 26 | 27 | output = shortcut + residual 28 | 29 | return slim.utils.collect_named_outputs(outputs_collections, sc.name, output) 30 | 31 | 32 | @slim.add_arg_scope 33 | def block(inputs, depth, stride, rate=1, outputs_collections=None, scope=None): 34 | with tf.variable_scope(scope, 'block_v2', [inputs]) as sc: 35 | depth_in = slim.utils.last_dimension(inputs.get_shape(), min_rank=4) 36 | preact = slim.batch_norm(inputs, activation_fn=tf.nn.leaky_relu, scope='preact') 37 | if depth == depth_in: 38 | shortcut = utils.subsample(inputs, stride, 'shortcut') 39 | else: 40 | shortcut = slim.conv2d(preact, depth, [1, 1], stride=stride, normalizer_fn=None, activation_fn=None, scope='shortcut') 41 | 42 | residual = utils.conv2d_same(preact, depth, 3, stride, rate=rate, scope='conv1') 43 | residual = slim.conv2d(residual, depth, [3, 3], stride=1, normalizer_fn=None, activation_fn=None, scope='conv2') 44 | # residual = slim.conv2d(residual, depth, [1, 1], stride=1, normalizer_fn=None, activation_fn=None, scope='conv3') 45 | 46 | output = shortcut + residual 47 | 48 | return slim.utils.collect_named_outputs(outputs_collections, sc.name, output) 49 | 50 | 51 | def resnet_v2_m(inputs, 52 | blocks, 53 | num_classes=None, 54 | is_training=True, 55 | return_raw=True, 56 | global_pool=True, 57 | output_stride=None, 58 | include_root_block=True, 59 | spatial_squeeze=True, 60 | reuse=None, 61 | scope=None): 62 | with tf.variable_scope(scope, 'resnet_v2', [inputs], reuse=reuse) as sc: 63 | end_points_collection = sc.original_name_scope + '_end_points' 64 | with slim.arg_scope([slim.conv2d, bottleneck, utils.stack_blocks_dense], outputs_collections=end_points_collection): 65 | with slim.arg_scope([slim.batch_norm], is_training=is_training): 66 | net = inputs 67 | if include_root_block: 68 | if output_stride is not None: 69 | if output_stride % 4 != 0: 70 | raise ValueError('The output_stride needs to be a multiple of 4.') 71 | output_stride /= 4 72 | with slim.arg_scope([slim.conv2d], activation_fn=None, normalizer_fn=None): 73 | net = utils.conv2d_same(net, 64, 3, stride=1, scope='conv1') 74 | # net = slim.max_pool2d(net, [3, 3], stride=2, scope='pool1') 75 | net = utils.stack_blocks_dense(net, blocks, output_stride) 76 | end_points = slim.utils.convert_collection_to_dict(end_points_collection) 77 | if return_raw: 78 | return net, end_points 79 | net = slim.batch_norm(net, activation_fn=tf.nn.relu, scope='postnorm') 80 | end_points[sc.name + '/postnorm'] = net 81 | 82 | if global_pool: 83 | net = tf.reduce_mean(net, [1, 2], name='pool5', keep_dims=True) 84 | end_points['global_pool'] = net 85 | 86 | if num_classes: 87 | net = slim.conv2d(net, num_classes, [1, 1], activation_fn=None, normalizer_fn=None, scope='logits') 88 | end_points[sc.name + '/logits'] = net 89 | if spatial_squeeze: 90 | net = tf.squeeze(net, [1, 2], name='SpatialSqueeze') 91 | end_points[sc.name + '/spatial_squeeze'] = net 92 | end_points['predictions'] = slim.softmax(net, scope='predictions') 93 | return net, end_points 94 | resnet_v2_m.default_image_size = 224 95 | 96 | 97 | def resnet_v2_bottleneck(scope, base_depth, num_units, stride): 98 | return utils.Block(scope, bottleneck, [{ 99 | 'depth': base_depth * 4, 100 | 'depth_bottleneck': base_depth, 101 | 'stride': stride 102 | }] + (num_units - 1) * [{ 103 | 'depth': base_depth * 4, 104 | 'depth_bottleneck': base_depth, 105 | 'stride': 1 106 | }]) 107 | resnet_v2_m.default_image_size = 224 108 | 109 | 110 | def resnet_v2_block(scope, base_depth, num_units, stride): 111 | return utils.Block(scope, block, [{ 112 | 'depth': base_depth * 4, 113 | 'stride': stride 114 | }] + (num_units - 1) * [{ 115 | 'depth': base_depth * 4, 116 | 'stride': 1 117 | }]) 118 | resnet_v2_m.default_image_size = 224 119 | 120 | 121 | def resnet_v2_m_50(inputs, 122 | num_classes=None, 123 | is_training=True, 124 | return_raw=True, 125 | global_pool=True, 126 | output_stride=None, 127 | spatial_squeeze=True, 128 | reuse=None, 129 | scope='resnet_v2_50'): 130 | """ResNet-50 model of [1]. See resnet_v2() for arg and return description.""" 131 | blocks = [ 132 | resnet_v2_block('block1', base_depth=16, num_units=3, stride=2), 133 | resnet_v2_block('block2', base_depth=32, num_units=4, stride=2), 134 | resnet_v2_block('block3', base_depth=64, num_units=14, stride=2), 135 | resnet_v2_block('block4', base_depth=128, num_units=3, stride=2), 136 | ] 137 | return resnet_v2_m(inputs, blocks, num_classes, is_training=is_training, return_raw=return_raw, global_pool=global_pool, output_stride=output_stride, include_root_block=True, spatial_squeeze=spatial_squeeze, reuse=reuse, scope=scope) 138 | resnet_v2_m_50.default_image_size = resnet_v2_m.default_image_size 139 | 140 | 141 | def resnet_v2_m_101(inputs, 142 | num_classes=None, 143 | is_training=True, 144 | return_raw=True, 145 | global_pool=True, 146 | output_stride=None, 147 | spatial_squeeze=True, 148 | reuse=None, 149 | scope='resnet_v2_101'): 150 | """ResNet-101 model of [1]. See resnet_v2() for arg and return description.""" 151 | blocks = [ 152 | resnet_v2_bottleneck('block1', base_depth=64, num_units=3, stride=2), 153 | resnet_v2_bottleneck('block2', base_depth=128, num_units=4, stride=2), 154 | resnet_v2_bottleneck('block3', base_depth=256, num_units=23, stride=2), 155 | resnet_v2_bottleneck('block4', base_depth=512, num_units=3, stride=2), 156 | ] 157 | return resnet_v2_m(inputs, blocks, num_classes, is_training=is_training, return_raw=return_raw, global_pool=global_pool, output_stride=output_stride, include_root_block=True, spatial_squeeze=spatial_squeeze, reuse=reuse, scope=scope) 158 | resnet_v2_m_101.default_image_size = resnet_v2_m.default_image_size 159 | 160 | 161 | def resnet_v2_m_152(inputs, 162 | num_classes=None, 163 | is_training=True, 164 | return_raw=True, 165 | global_pool=True, 166 | output_stride=None, 167 | spatial_squeeze=True, 168 | reuse=None, 169 | scope='resnet_v2_152'): 170 | """ResNet-152 model of [1]. See resnet_v2() for arg and return description.""" 171 | blocks = [ 172 | resnet_v2_bottleneck('block1', base_depth=64, num_units=3, stride=2), 173 | resnet_v2_bottleneck('block2', base_depth=128, num_units=8, stride=2), 174 | resnet_v2_bottleneck('block3', base_depth=256, num_units=36, stride=2), 175 | resnet_v2_bottleneck('block4', base_depth=512, num_units=3, stride=2), 176 | ] 177 | return resnet_v2_m(inputs, blocks, num_classes, is_training=is_training, return_raw=return_raw, global_pool=global_pool, output_stride=output_stride, include_root_block=True, spatial_squeeze=spatial_squeeze, reuse=reuse, scope=scope) 178 | resnet_v2_m_152.default_image_size = resnet_v2_m.default_image_size 179 | 180 | 181 | def resnet_v2_m_200(inputs, 182 | num_classes=None, 183 | is_training=True, 184 | return_raw=True, 185 | global_pool=True, 186 | output_stride=None, 187 | spatial_squeeze=True, 188 | reuse=None, 189 | scope='resnet_v2_200'): 190 | """ResNet-200 model of [2]. See resnet_v2() for arg and return description.""" 191 | blocks = [ 192 | resnet_v2_bottleneck('block1', base_depth=64, num_units=3, stride=2), 193 | resnet_v2_bottleneck('block2', base_depth=128, num_units=24, stride=2), 194 | resnet_v2_bottleneck('block3', base_depth=256, num_units=36, stride=2), 195 | resnet_v2_bottleneck('block4', base_depth=512, num_units=3, stride=2), 196 | ] 197 | return resnet_v2_m(inputs, blocks, num_classes, is_training=is_training, return_raw=return_raw, global_pool=global_pool, output_stride=output_stride, include_root_block=True, spatial_squeeze=spatial_squeeze, reuse=reuse, scope=scope) 198 | resnet_v2_m_200.default_image_size = resnet_v2_m.default_image_size 199 | -------------------------------------------------------------------------------- /train_softmax.py: -------------------------------------------------------------------------------- 1 | import os 2 | import time 3 | import pickle 4 | import argparse 5 | import numpy as np 6 | 7 | import io 8 | import yaml 9 | from scipy import misc 10 | 11 | import tensorflow as tf 12 | import tensorflow.contrib.slim as slim 13 | 14 | from datetime import datetime 15 | 16 | from losses.logit_loss import get_logits 17 | from data.classificationDataTool import ClassificationImageData 18 | from model import get_embd 19 | from utils import average_gradients, check_folders, analyze_vars 20 | from evaluate import load_bin, evaluate 21 | 22 | 23 | def parse_args(): 24 | parser = argparse.ArgumentParser() 25 | 26 | parser.add_argument('--config_path', type=str, help='path to config file', default='./configs/config_ms1m_100.yaml') 27 | 28 | return parser.parse_args() 29 | 30 | 31 | def inference(images, labels, is_training_dropout, is_training_bn, config): 32 | embds, end_points = get_embd(images, is_training_dropout, is_training_bn, config) 33 | logits = get_logits(embds, labels, config) 34 | end_points['logits'] = logits 35 | return embds, logits, end_points 36 | 37 | 38 | class Trainer: 39 | def __init__(self, config): 40 | self.config = config 41 | subdir = datetime.strftime(datetime.now(), '%Y%m%d-%H%M%S') 42 | self.output_dir = os.path.join(config['output_dir'], subdir) 43 | self.model_dir = os.path.join(self.output_dir, 'models') 44 | self.log_dir = os.path.join(self.output_dir, 'log') 45 | self.checkpoint_dir = os.path.join(self.output_dir, 'checkpoints') 46 | self.debug_dir = os.path.join(self.output_dir, 'debug') 47 | check_folders([self.output_dir, self.model_dir, self.log_dir, self.checkpoint_dir, self.debug_dir]) 48 | self.val_log = os.path.join(self.output_dir, 'val_log.txt') 49 | 50 | self.batch_size = config['batch_size'] 51 | self.gpu_num = config['gpu_num'] 52 | if self.batch_size % self.gpu_num != 0: 53 | raise ValueError('batch_size must be a multiple of gpu_num') 54 | self.image_size = config['image_size'] 55 | self.epoch_num = config['epoch_num'] 56 | self.step_per_epoch = config['step_per_epoch'] 57 | self.val_freq = config['val_freq'] 58 | self.val_data = config['val_data'] 59 | self.val_bn_train = config['val_bn_train'] 60 | # for k, v in config['val_data'].items(): 61 | # self.val_data[k] = load_bin(v, self.image_size) 62 | # imgs = self.val_data[k][0] 63 | # np.save(os.path.join(self.debug_dir, k+'.npy'), imgs[:100]) 64 | 65 | with open(os.path.join(self.output_dir, 'config.yaml'), 'w') as f: 66 | f.write(yaml.dump(self.config)) 67 | 68 | 69 | def build(self): 70 | self.train_phase_dropout = tf.placeholder(dtype=tf.bool, shape=None, name='train_phase_dropout') 71 | self.train_phase_bn = tf.placeholder(dtype=tf.bool, shape=None, name='train_phase_bn') 72 | self.global_step = tf.Variable(name='global_step', initial_value=0, trainable=False) 73 | self.inc_op = tf.assign_add(self.global_step, 1, name='increment_global_step') 74 | scale = int(512.0/self.batch_size) 75 | lr_steps = [scale*s for s in self.config['lr_steps']] 76 | lr_values = [v/scale for v in self.config['lr_values']] 77 | # lr_steps = self.config['lr_steps'] 78 | self.lr = tf.train.piecewise_constant(self.global_step, boundaries=lr_steps, values=lr_values, name='lr_schedule') 79 | 80 | cid = ClassificationImageData(img_size=self.image_size, augment_flag=self.config['augment_flag'], augment_margin=self.config['augment_margin']) 81 | train_dataset = cid.read_TFRecord(self.config['train_data']).shuffle(10000).repeat().batch(self.batch_size) 82 | train_iterator = train_dataset.make_one_shot_iterator() 83 | self.train_images, self.train_labels = train_iterator.get_next() 84 | self.train_images = tf.identity(self.train_images, 'input_images') 85 | self.train_labels = tf.identity(self.train_labels, 'labels') 86 | if self.gpu_num <= 1: 87 | self.embds, self.logits, self.end_points = inference(self.train_images, self.train_labels, self.train_phase_dropout, self.train_phase_bn, self.config) 88 | self.embds = tf.identity(self.embds, 'embeddings') 89 | self.inference_loss = slim.losses.sparse_softmax_cross_entropy(logits=self.logits, labels=self.train_labels) 90 | self.wd_loss = tf.add_n(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)) 91 | self.train_loss = self.inference_loss+self.wd_loss 92 | pred = tf.arg_max(tf.nn.softmax(self.logits), dimension=-1, output_type=tf.int64) 93 | self.train_acc = tf.reduce_mean(tf.cast(tf.equal(pred, self.train_labels), tf.float32)) 94 | update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) 95 | with tf.control_dependencies(update_ops): 96 | self.train_op = tf.train.MomentumOptimizer(learning_rate=self.lr, momentum=self.config['momentum']).minimize(self.train_loss) 97 | else: 98 | self.embds = [] 99 | self.logits = [] 100 | self.inference_loss = [] 101 | self.wd_loss = [] 102 | self.train_loss = [] 103 | pred = [] 104 | tower_grads = [] 105 | update_ops = [] 106 | opt = tf.train.MomentumOptimizer(learning_rate=self.lr, momentum=self.config['momentum']) 107 | train_images = tf.split(self.train_images, self.gpu_num) 108 | train_labels = tf.split(self.train_labels, self.gpu_num) 109 | for i in range(self.gpu_num): 110 | sub_train_images = train_images[i] 111 | sub_train_labels = train_labels[i] 112 | with tf.device('/gpu:%d' % i): 113 | with tf.variable_scope(tf.get_variable_scope(), reuse=(i > 0)): 114 | embds, logits, end_points = inference(sub_train_images, sub_train_labels, self.train_phase_dropout, self.train_phase_bn, self.config) 115 | inference_loss = slim.losses.sparse_softmax_cross_entropy(logits=logits, labels=sub_train_labels) 116 | wd_loss = tf.add_n(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)) 117 | train_loss = inference_loss+wd_loss 118 | pred.append(tf.arg_max(tf.nn.softmax(logits), dimension=-1, output_type=tf.int64)) 119 | tower_grads.append(opt.compute_gradients(train_loss)) 120 | update_ops.append(tf.get_collection(tf.GraphKeys.UPDATE_OPS)) 121 | self.embds.append(embds) 122 | self.logits.append(logits) 123 | self.inference_loss.append(inference_loss) 124 | self.wd_loss.append(wd_loss) 125 | self.train_loss.append(train_loss) 126 | self.embds = tf.concat(self.embds, axis=0) 127 | self.logits = tf.concat(self.logits, axis=0) 128 | self.inference_loss = tf.add_n(self.inference_loss)/self.gpu_num 129 | self.wd_loss = tf.add_n(self.wd_loss)/self.gpu_num 130 | self.train_loss = tf.add_n(self.train_loss)/self.gpu_num 131 | pred = tf.concat(pred, axis=0) 132 | self.train_acc = tf.reduce_mean(tf.cast(tf.equal(pred, self.train_labels), tf.float32)) 133 | train_ops = [opt.apply_gradients(average_gradients(tower_grads))] 134 | train_ops.extend(update_ops) 135 | self.train_op = tf.group(*train_ops) 136 | 137 | 138 | self.train_summary = tf.summary.merge([ 139 | tf.summary.scalar('inference_loss', self.inference_loss), 140 | tf.summary.scalar('wd_loss', self.wd_loss), 141 | tf.summary.scalar('train_loss', self.train_loss), 142 | tf.summary.scalar('train_acc', self.train_acc) 143 | ]) 144 | 145 | def run_embds(self, sess, images): 146 | batch_num = len(images)//self.batch_size 147 | left = len(images)%self.batch_size 148 | embds = [] 149 | for i in range(batch_num): 150 | cur_embd = sess.run(self.embds, feed_dict={self.train_images: images[i*self.batch_size: (i+1)*self.batch_size], self.train_phase_dropout: False, self.train_phase_bn: self.val_bn_train}) 151 | embds += list(cur_embd) 152 | if left > 0: 153 | image_batch = np.zeros([self.batch_size, self.image_size, self.image_size, 3]) 154 | image_batch[:left, :, :, :] = images[-left:] 155 | cur_embd = sess.run(self.embds, feed_dict={self.train_images: image_batch, self.train_phase_dropout: False, self.train_phase_bn: self.val_bn_train}) 156 | embds += list(cur_embd)[:left] 157 | return np.array(embds) 158 | 159 | def save_image_label(self, images, labels, step): 160 | save_dir = os.path.join(self.debug_dir, 'image_by_label') 161 | for i in range(len(labels)): 162 | if(labels[i] < 10): 163 | cur_save_dir = os.path.join(save_dir, str(labels[i])) 164 | check_folders(cur_save_dir) 165 | misc.imsave(os.path.join(cur_save_dir, '%d_%d.jpg' % (step, i)), images[i]) 166 | 167 | 168 | def train(self): 169 | self.build() 170 | analyze_vars(tf.trainable_variables(), os.path.join(self.output_dir, 'model_vars.txt')) 171 | with open(os.path.join(self.output_dir, 'regularizers.txt'), 'w') as f: 172 | for v in tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES): 173 | f.write(v.name+'\n') 174 | # exit(-1) 175 | tf_config = tf.ConfigProto(allow_soft_placement=True) 176 | tf_config.gpu_options.allow_growth = True 177 | with tf.Session(config=tf_config) as sess: 178 | tf.global_variables_initializer().run() 179 | saver_ckpt = tf.train.Saver() 180 | saver_best = tf.train.Saver() 181 | summary_writer = tf.summary.FileWriter(self.log_dir, sess.graph) 182 | start_time = time.time() 183 | best_acc = 0 184 | counter = 0 185 | if config['pretrained_model'] != '': 186 | saver_ckpt.restore(sess, config['pretrained_model']) 187 | step = int(os.path.basename(config['pretrained_model']).split('.')[0].split('-')[-1]) 188 | sess.run(tf.assign(self.global_step, step)) 189 | counter = self.global_step.eval(sess) 190 | print('start step: %d' % counter) 191 | debug = True 192 | for i in range(self.epoch_num): 193 | for j in range(self.step_per_epoch): 194 | _, l, l_wd, l_inf, acc, s, _ = sess.run([self.train_op, self.train_loss, self.wd_loss, self.inference_loss, self.train_acc, self.train_summary, self.inc_op], feed_dict={self.train_phase_dropout: True, self.train_phase_bn: True}) 195 | counter += 1 196 | 197 | # debug 198 | # self.save_image_label(train_img, train_lbl, counter) 199 | # if(debug): 200 | # if(len(train_imgs) < 100): 201 | # train_imgs.append(train_img[0]) 202 | # else: 203 | # np.save(os.path.join(self.debug_dir, 'train_imgs.npy'), np.array(train_imgs)) 204 | # debug=False 205 | 206 | print("Epoch: [%2d/%2d] [%6d/%6d] time: %.2f, loss: %.3f (inference: %.3f, wd: %.3f), acc: %.3f" % (i, self.epoch_num, j, self.step_per_epoch, time.time() - start_time, l, l_inf, l_wd, acc)) 207 | start_time = time.time() 208 | if counter % self.val_freq == 0: 209 | saver_ckpt.save(sess, os.path.join(self.checkpoint_dir, 'ckpt-m'), global_step=counter) 210 | acc = [] 211 | with open(self.val_log, 'a') as f: 212 | f.write('step: %d\n' % counter) 213 | for k, v in self.val_data.items(): 214 | imgs, imgs_f, issame = load_bin(v, self.image_size) 215 | embds = self.run_embds(sess, imgs) 216 | embds_f = self.run_embds(sess, imgs_f) 217 | embds = embds/np.linalg.norm(embds, axis=1, keepdims=True)+embds_f/np.linalg.norm(embds_f, axis=1, keepdims=True) 218 | tpr, fpr, acc_mean, acc_std, tar, tar_std, far = evaluate(embds, issame, far_target=1e-3, distance_metric=0) 219 | f.write('eval on %s: acc--%1.5f+-%1.5f, tar--%1.5f+-%1.5f@far=%1.5f\n' % (k, acc_mean, acc_std, tar, tar_std, far)) 220 | acc.append(acc_mean) 221 | acc = np.mean(np.array(acc)) 222 | if acc > best_acc: 223 | saver_best.save(sess, os.path.join(self.model_dir, 'best-m'), global_step=counter) 224 | best_acc = acc 225 | 226 | 227 | if __name__ == '__main__': 228 | args = parse_args() 229 | config = yaml.load(open(args.config_path)) 230 | trainer = Trainer(config) 231 | trainer.train() 232 | 233 | 234 | -------------------------------------------------------------------------------- /finetune_softmax.py: -------------------------------------------------------------------------------- 1 | import os 2 | import time 3 | import pickle 4 | import argparse 5 | import numpy as np 6 | 7 | import io 8 | import yaml 9 | from scipy import misc 10 | 11 | import tensorflow as tf 12 | import tensorflow.contrib.slim as slim 13 | 14 | from datetime import datetime 15 | 16 | from losses.logit_loss import get_logits 17 | from data.classificationDataTool import ClassificationImageData 18 | from model import get_embd 19 | from utils import average_gradients, check_folders, analyze_vars 20 | from evaluate import load_bin, evaluate 21 | 22 | 23 | def parse_args(): 24 | parser = argparse.ArgumentParser() 25 | 26 | parser.add_argument('--config_path', type=str, help='path to config file', default='./configs/config_finetune.yaml') 27 | 28 | return parser.parse_args() 29 | 30 | 31 | def inference(images, labels, is_training_dropout, is_training_bn, config): 32 | embds, end_points = get_embd(images, is_training_dropout, is_training_bn, config) 33 | logits = get_logits(embds, labels, config) 34 | end_points['logits'] = logits 35 | return embds, logits, end_points 36 | 37 | 38 | class Trainer: 39 | def __init__(self, config): 40 | self.config = config 41 | subdir = datetime.strftime(datetime.now(), '%Y%m%d-%H%M%S') 42 | self.output_dir = os.path.join(config['output_dir'], subdir) 43 | self.model_dir = os.path.join(self.output_dir, 'models') 44 | self.log_dir = os.path.join(self.output_dir, 'log') 45 | self.checkpoint_dir = os.path.join(self.output_dir, 'checkpoints') 46 | self.debug_dir = os.path.join(self.output_dir, 'debug') 47 | check_folders([self.output_dir, self.model_dir, self.log_dir, self.checkpoint_dir, self.debug_dir]) 48 | self.val_log = os.path.join(self.output_dir, 'val_log.txt') 49 | 50 | self.batch_size = config['batch_size'] 51 | self.gpu_num = config['gpu_num'] 52 | if self.batch_size % self.gpu_num != 0: 53 | raise ValueError('batch_size must be a multiple of gpu_num') 54 | self.image_size = config['image_size'] 55 | self.epoch_num = config['epoch_num'] 56 | self.step_per_epoch = config['step_per_epoch'] 57 | self.val_freq = config['val_freq'] 58 | self.val_data = config['val_data'] 59 | self.val_bn_train = config['val_bn_train'] 60 | # for k, v in config['val_data'].items(): 61 | # self.val_data[k] = load_bin(v, self.image_size) 62 | # imgs = self.val_data[k][0] 63 | # np.save(os.path.join(self.debug_dir, k+'.npy'), imgs[:100]) 64 | 65 | with open(os.path.join(self.output_dir, 'config.yaml'), 'w') as f: 66 | f.write(yaml.dump(self.config)) 67 | 68 | 69 | def build(self): 70 | self.train_phase_dropout = tf.placeholder(dtype=tf.bool, shape=None, name='train_phase_dropout') 71 | self.train_phase_bn = tf.placeholder(dtype=tf.bool, shape=None, name='train_phase_bn') 72 | self.global_step = tf.Variable(name='global_step', initial_value=0, trainable=False) 73 | self.inc_op = tf.assign_add(self.global_step, 1, name='increment_global_step') 74 | scale = int(512.0/self.batch_size) 75 | lr_steps = [scale*s for s in self.config['lr_steps']] 76 | lr_values = [v/scale for v in self.config['lr_values']] 77 | # lr_steps = self.config['lr_steps'] 78 | self.lr = tf.train.piecewise_constant(self.global_step, boundaries=lr_steps, values=lr_values, name='lr_schedule') 79 | 80 | cid = ClassificationImageData(img_size=self.image_size, augment_flag=self.config['augment_flag'], augment_margin=self.config['augment_margin']) 81 | train_dataset = cid.read_TFRecord(self.config['train_data']).shuffle(10000).repeat().batch(self.batch_size) 82 | train_iterator = train_dataset.make_one_shot_iterator() 83 | self.train_images, self.train_labels = train_iterator.get_next() 84 | self.train_images = tf.identity(self.train_images, 'input_images') 85 | self.train_labels = tf.identity(self.train_labels, 'labels') 86 | if self.gpu_num <= 1: 87 | self.embds, self.logits, self.end_points = inference(self.train_images, self.train_labels, self.train_phase_dropout, self.train_phase_bn, self.config) 88 | self.embds = tf.identity(self.embds, 'embeddings') 89 | self.inference_loss = slim.losses.sparse_softmax_cross_entropy(logits=self.logits, labels=self.train_labels) 90 | self.wd_loss = tf.add_n(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)) 91 | self.train_loss = self.inference_loss+self.wd_loss 92 | pred = tf.arg_max(tf.nn.softmax(self.logits), dimension=-1, output_type=tf.int64) 93 | self.train_acc = tf.reduce_mean(tf.cast(tf.equal(pred, self.train_labels), tf.float32)) 94 | update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) 95 | vars_softmax = [v for v in tf.trainable_variables() if 'embd_extractor' not in v.name] 96 | with tf.control_dependencies(update_ops): 97 | self.train_op = tf.train.MomentumOptimizer(learning_rate=self.lr, momentum=self.config['momentum']).minimize(self.train_loss) 98 | self.train_op_softmax = tf.train.MomentumOptimizer(learning_rate=self.lr, momentum=self.config['momentum']).minimize(self.train_loss, var_list=vars_softmax) 99 | else: 100 | self.embds = [] 101 | self.logits = [] 102 | self.inference_loss = [] 103 | self.wd_loss = [] 104 | self.train_loss = [] 105 | pred = [] 106 | tower_grads = [] 107 | tower_grads_softmax = [] 108 | update_ops = [] 109 | opt = tf.train.MomentumOptimizer(learning_rate=self.lr, momentum=self.config['momentum']) 110 | train_images = tf.split(self.train_images, self.gpu_num) 111 | train_labels = tf.split(self.train_labels, self.gpu_num) 112 | for i in range(self.gpu_num): 113 | sub_train_images = train_images[i] 114 | sub_train_labels = train_labels[i] 115 | with tf.device('/gpu:%d' % i): 116 | with tf.variable_scope(tf.get_variable_scope(), reuse=(i > 0)): 117 | embds, logits, end_points = inference(sub_train_images, sub_train_labels, self.train_phase_dropout, self.train_phase_bn, self.config) 118 | inference_loss = slim.losses.sparse_softmax_cross_entropy(logits=logits, labels=sub_train_labels) 119 | wd_loss = tf.add_n(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)) 120 | train_loss = inference_loss+wd_loss 121 | pred.append(tf.arg_max(tf.nn.softmax(logits), dimension=-1, output_type=tf.int64)) 122 | vars_softmax = [v for v in tf.trainable_variables() if 'embd_extractor' not in v.name] 123 | tower_grads.append(opt.compute_gradients(train_loss)) 124 | tower_grads_softmax.append(opt.compute_gradients(train_loss, var_list=vars_softmax)) 125 | update_ops.append(tf.get_collection(tf.GraphKeys.UPDATE_OPS)) 126 | self.embds.append(embds) 127 | self.logits.append(logits) 128 | self.inference_loss.append(inference_loss) 129 | self.wd_loss.append(wd_loss) 130 | self.train_loss.append(train_loss) 131 | self.embds = tf.concat(self.embds, axis=0) 132 | self.logits = tf.concat(self.logits, axis=0) 133 | self.inference_loss = tf.add_n(self.inference_loss)/self.gpu_num 134 | self.wd_loss = tf.add_n(self.wd_loss)/self.gpu_num 135 | self.train_loss = tf.add_n(self.train_loss)/self.gpu_num 136 | pred = tf.concat(pred, axis=0) 137 | self.train_acc = tf.reduce_mean(tf.cast(tf.equal(pred, self.train_labels), tf.float32)) 138 | train_ops = [opt.apply_gradients(average_gradients(tower_grads))] 139 | train_ops_softmax = [opt.apply_gradients(average_gradients(tower_grads_softmax))] 140 | train_ops.extend(update_ops) 141 | train_ops_softmax.extend(update_ops) 142 | self.train_op = tf.group(*train_ops) 143 | self.train_op_softmax = tf.group(*train_ops_softmax) 144 | 145 | 146 | self.train_summary = tf.summary.merge([ 147 | tf.summary.scalar('inference_loss', self.inference_loss), 148 | tf.summary.scalar('wd_loss', self.wd_loss), 149 | tf.summary.scalar('train_loss', self.train_loss), 150 | tf.summary.scalar('train_acc', self.train_acc) 151 | ]) 152 | 153 | def run_embds(self, sess, images): 154 | batch_num = len(images)//self.batch_size 155 | left = len(images)%self.batch_size 156 | embds = [] 157 | for i in range(batch_num): 158 | cur_embd = sess.run(self.embds, feed_dict={self.train_images: images[i*self.batch_size: (i+1)*self.batch_size], self.train_phase_dropout: False, self.train_phase_bn: self.val_bn_train}) 159 | embds += list(cur_embd) 160 | if left > 0: 161 | image_batch = np.zeros([self.batch_size, self.image_size, self.image_size, 3]) 162 | image_batch[:left, :, :, :] = images[-left:] 163 | cur_embd = sess.run(self.embds, feed_dict={self.train_images: image_batch, self.train_phase_dropout: False, self.train_phase_bn: self.val_bn_train}) 164 | embds += list(cur_embd)[:left] 165 | return np.array(embds) 166 | 167 | def save_image_label(self, images, labels, step): 168 | save_dir = os.path.join(self.debug_dir, 'image_by_label') 169 | for i in range(len(labels)): 170 | if(labels[i] < 10): 171 | cur_save_dir = os.path.join(save_dir, str(labels[i])) 172 | check_folders(cur_save_dir) 173 | misc.imsave(os.path.join(cur_save_dir, '%d_%d.jpg' % (step, i)), images[i]) 174 | 175 | 176 | def train(self): 177 | self.build() 178 | analyze_vars(tf.trainable_variables(), os.path.join(self.output_dir, 'model_vars.txt')) 179 | with open(os.path.join(self.output_dir, 'regularizers.txt'), 'w') as f: 180 | for v in tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES): 181 | f.write(v.name+'\n') 182 | # exit(-1) 183 | tf_config = tf.ConfigProto(allow_soft_placement=True) 184 | tf_config.gpu_options.allow_growth = True 185 | with tf.Session(config=tf_config) as sess: 186 | tf.global_variables_initializer().run() 187 | saver_ckpt = tf.train.Saver() 188 | saver_best = tf.train.Saver() 189 | saver_embd = tf.train.Saver(var_list=[v for v in tf.trainable_variables() if 'embd_extractor' in v.name]) 190 | if config['pretrained_model'] != '': 191 | saver_embd.restore(sess, config['pretrained_model']) 192 | summary_writer = tf.summary.FileWriter(self.log_dir, sess.graph) 193 | start_time = time.time() 194 | best_acc = 0 195 | counter = 0 196 | debug = True 197 | for i in range(self.epoch_num): 198 | if i < config['fixed_epoch_num']: 199 | cur_train_op = self.train_op_softmax 200 | else: 201 | cur_train_op = self.train_op 202 | for j in range(self.step_per_epoch): 203 | _, l, l_wd, l_inf, acc, s, _ = sess.run([cur_train_op, self.train_loss, self.wd_loss, self.inference_loss, self.train_acc, self.train_summary, self.inc_op], feed_dict={self.train_phase_dropout: True, self.train_phase_bn: True}) 204 | counter += 1 205 | 206 | print("Epoch: [%2d/%2d] [%6d/%6d] time: %.2f, loss: %.3f (inference: %.3f, wd: %.3f), acc: %.3f" % (i, self.epoch_num, j, self.step_per_epoch, time.time() - start_time, l, l_inf, l_wd, acc)) 207 | start_time = time.time() 208 | if counter % self.val_freq == 0: 209 | saver_ckpt.save(sess, os.path.join(self.checkpoint_dir, 'ckpt-m'), global_step=counter) 210 | acc = [] 211 | with open(self.val_log, 'a') as f: 212 | f.write('step: %d\n' % counter) 213 | for k, v in self.val_data.items(): 214 | imgs, imgs_f, issame = load_bin(v, self.image_size) 215 | embds = self.run_embds(sess, imgs) 216 | embds_f = self.run_embds(sess, imgs_f) 217 | embds = embds/np.linalg.norm(embds, axis=1, keepdims=True)+embds_f/np.linalg.norm(embds_f, axis=1, keepdims=True) 218 | tpr, fpr, acc_mean, acc_std, tar, tar_std, far = evaluate(embds, issame, far_target=1e-3, distance_metric=0) 219 | f.write('eval on %s: acc--%1.5f+-%1.5f, tar--%1.5f+-%1.5f@far=%1.5f\n' % (k, acc_mean, acc_std, tar, tar_std, far)) 220 | acc.append(acc_mean) 221 | acc = np.mean(np.array(acc)) 222 | if acc > best_acc: 223 | saver_best.save(sess, os.path.join(self.model_dir, 'best-m'), global_step=counter) 224 | best_acc = acc 225 | 226 | 227 | if __name__ == '__main__': 228 | args = parse_args() 229 | config = yaml.load(open(args.config_path)) 230 | trainer = Trainer(config) 231 | trainer.train() 232 | 233 | 234 | --------------------------------------------------------------------------------