├── data
    ├── tmp1.py
    ├── generateTFRecord.py
    ├── mxrec2folders.py
    ├── tmp.py
    └── classificationDataTool.py
├── configs
    ├── config_ms1m_res101.yaml
    ├── config_ms1m_res50.yaml
    ├── config_finetune.yaml
    ├── config_ms1m_100.yaml
    └── config_ms1m_200.yaml
├── losses
    └── logit_loss.py
├── model.py
├── utils.py
├── backbones
    ├── utils.py
    ├── ResNet_v2.py
    ├── ResNet_v1.py
    └── modifiedResNet_v2.py
├── eval
    └── utils.py
├── get_embd.py
├── evaluate.py
├── README.md
├── train_softmax.py
└── finetune_softmax.py


/data/tmp1.py:
--------------------------------------------------------------------------------
 1 | import io
 2 | import os
 3 | from scipy import misc
 4 | 
 5 | import numpy as np
 6 | import pickle
 7 | 
 8 | 
 9 | read_path = r'F:\FaceDataset\faces_vgg_112x112\lfw.bin'
10 | save_dir = r'F:\FaceDataset\faces_vgg_112x112\lfw_img_sample'
11 | 
12 | 
13 | bins, issame_list = pickle.load(open(read_path, 'rb'), encoding='bytes')
14 | cnt = 0
15 | for bin in bins:
16 |     img = misc.imread(io.BytesIO(bin))
17 |     print('============================================')
18 |     print(img.dtype)
19 |     print(np.max(img))
20 |     print(np.min(img))
21 |     print('============================================')
22 |     misc.imsave(os.path.join(save_dir, str(cnt)+'.jpg'), img)
23 |     cnt += 1
24 |     if cnt >= 10:
25 |         break


--------------------------------------------------------------------------------
/data/generateTFRecord.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | 
 3 | from classificationDataTool import ClassificationImageData
 4 | 
 5 | 
 6 | def get_args():
 7 |     parser = argparse.ArgumentParser()
 8 | 
 9 |     parser.add_argument('--mode', type=str, help='from which to generate TFRecord, folders or mxrec', default='mxrec')
10 |     parser.add_argument('--image_size', type=int, help='image size', default=112)
11 |     parser.add_argument('--read_dir', type=str, help='directory to read data', default='')
12 |     parser.add_argument('--save_path', type=str, help='path to save TFRecord file', default='')
13 | 
14 |     return parser.parse_args()
15 | 
16 | 
17 | if __name__ == "__main__":
18 |     args = get_args()
19 |     cid = ClassificationImageData(img_size=args.image_size)
20 |     if args.mode == 'folders':
21 |         cid.write_tfrecord_from_folders(args.read_dir, args.save_path)
22 |     elif args.mode == 'mxrec':
23 |         cid.write_tfrecord_from_mxrec(args.read_dir, args.save_path)
24 |     else:
25 |         raise('ERROR: wrong mode (only folders and mxrec are supported)')
26 | 


--------------------------------------------------------------------------------
/data/mxrec2folders.py:
--------------------------------------------------------------------------------
 1 | import mxnet as mx
 2 | import io
 3 | import os
 4 | from scipy import misc
 5 | 
 6 | import numpy as np
 7 | 
 8 | read_dir = r'F:\FaceDataset\faces_webface_112x112'
 9 | save_dir = r'F:\FaceDataset\faces_webface_112x112_folders'
10 | 
11 | idx_path = os.path.join(read_dir, 'train.idx')
12 | bin_path = os.path.join(read_dir, 'train.rec')
13 | imgrec = mx.recordio.MXIndexedRecordIO(idx_path, bin_path, 'r')
14 | s = imgrec.read_idx(0)
15 | header, _ = mx.recordio.unpack(s)
16 | imgidx = list(range(1, int(header.label[0])))
17 | total = len(imgidx)
18 | cnt = 0
19 | for i in imgidx:
20 |     img_info = imgrec.read_idx(i)
21 |     header, img = mx.recordio.unpack(img_info)
22 |     l = int(header.label)
23 |     img = io.BytesIO(img)
24 |     img = misc.imread(img)
25 |     cur_save_dir = os.path.join(save_dir, str(l))
26 |     if not os.path.exists(cur_save_dir):
27 |         os.makedirs(cur_save_dir)
28 |     misc.imsave(os.path.join(cur_save_dir, str(cnt)+'.jpg'), img)
29 |     cnt += 1
30 |     print('%d/%d' % (cnt, total), end='\r')
31 |     # if cnt >= 10:
32 |     #     break


--------------------------------------------------------------------------------
/data/tmp.py:
--------------------------------------------------------------------------------
 1 | import mxnet as mx
 2 | import io
 3 | import os
 4 | from scipy import misc
 5 | 
 6 | import numpy as np
 7 | 
 8 | read_dir = r'F:\FaceDataset\faces_webface_112x112'
 9 | save_dir = r'F:\FaceDataset\faces_webface_112x112\img_sample'
10 | 
11 | if not os.path.exists(save_dir):
12 |     os.makedirs(save_dir)
13 | 
14 | idx_path = os.path.join(read_dir, 'train.idx')
15 | bin_path = os.path.join(read_dir, 'train.rec')
16 | imgrec = mx.recordio.MXIndexedRecordIO(idx_path, bin_path, 'r')
17 | s = imgrec.read_idx(0)
18 | header, _ = mx.recordio.unpack(s)
19 | imgidx = list(range(1, int(header.label[0])))
20 | total = len(imgidx)
21 | cnt = 0
22 | for i in imgidx:
23 |     img_info = imgrec.read_idx(i)
24 |     header, img = mx.recordio.unpack(img_info)
25 |     l = int(header.label)
26 |     img = io.BytesIO(img)
27 |     img = misc.imread(img)
28 |     print('============================================')
29 |     print(img.dtype)
30 |     print(np.max(img))
31 |     print(np.min(img))
32 |     print('============================================')
33 |     misc.imsave(os.path.join(save_dir, str(cnt)+'.jpg'), img)
34 |     cnt += 1
35 |     if cnt >= 100:
36 |         break


--------------------------------------------------------------------------------
/configs/config_ms1m_res101.yaml:
--------------------------------------------------------------------------------
 1 | # model params
 2 | backbone_type: resnet_v2_101
 3 | loss_type: arcface
 4 | out_type: E
 5 | 
 6 | image_size: 112
 7 | embd_size: 512
 8 | class_num: 85742
 9 | 
10 | 
11 | # hyper params
12 | bn_decay: 0.9
13 | keep_prob: 0.4
14 | weight_decay: !!float 5e-4
15 | logits_scale: 64.0
16 | logits_margin: 0.5
17 | momentum: 0.9
18 | 
19 | 
20 | # run params
21 | val_bn_train: False
22 | augment_flag: True
23 | augment_margin: 16
24 | 
25 | gpu_num: 1
26 | batch_size: 128
27 | epoch_num: 20
28 | step_per_epoch: 100000
29 | val_freq: 2000
30 | 
31 | lr_steps: [40000, 60000, 80000]
32 | lr_values: [0.004, 0.002, 0.0012, 0.0004]
33 | 
34 | # paths
35 | pretrained_model: ''
36 | 
37 | train_data: ['/data/hhd/dataset/FaceData/InsightFace/faces_ms1m_arcface.tfrecord']
38 | val_data: {'agedb_30': '/data/hhd/dataset/FaceData/InsightFace/faces_ms1m_arcface/agedb_30.bin', 'lfw': '/data/hhd/dataset/FaceData/InsightFace/faces_ms1m_arcface/lfw.bin', 'cfp_ff': '/data/hhd/dataset/FaceData/InsightFace/faces_ms1m_arcface/cfp_ff.bin', 'cfp_fp': '/data/hhd/dataset/FaceData/InsightFace/faces_ms1m_arcface/cfp_fp.bin', 'calfw': '/data/hhd/dataset/FaceData/InsightFace/faces_ms1m_arcface/calfw.bin', 'cplfw': '/data/hhd/dataset/FaceData/InsightFace/faces_ms1m_arcface/cplfw.bin', 'vgg2_fp': '/data/hhd/dataset/FaceData/InsightFace/faces_ms1m_arcface/vgg2_fp.bin'}
39 | 
40 | output_dir: './output'
41 | 
42 | 


--------------------------------------------------------------------------------
/configs/config_ms1m_res50.yaml:
--------------------------------------------------------------------------------
 1 | # model params
 2 | backbone_type: resnet_v2_50
 3 | loss_type: arcface
 4 | out_type: E
 5 | 
 6 | image_size: 112
 7 | embd_size: 512
 8 | class_num: 85742
 9 | 
10 | 
11 | # hyper params
12 | bn_decay: 0.9
13 | keep_prob: 0.4
14 | weight_decay: !!float 5e-4
15 | logits_scale: 64.0
16 | logits_margin: 0.5
17 | momentum: 0.9
18 | 
19 | 
20 | # run params
21 | val_bn_train: False
22 | augment_flag: True
23 | augment_margin: 16
24 | 
25 | gpu_num: 1
26 | batch_size: 256
27 | epoch_num: 20
28 | step_per_epoch: 100000
29 | val_freq: 2000
30 | 
31 | lr_steps: [40000, 60000, 80000]
32 | lr_values: [0.004, 0.002, 0.0012, 0.0004]
33 | 
34 | 
35 | # paths
36 | pretrained_model: ''
37 | 
38 | train_data: ['/data/hhd/dataset/FaceData/InsightFace/faces_ms1m_arcface.tfrecord']
39 | val_data: {'agedb_30': '/data/hhd/dataset/FaceData/InsightFace/faces_ms1m_arcface/agedb_30.bin', 'lfw': '/data/hhd/dataset/FaceData/InsightFace/faces_ms1m_arcface/lfw.bin', 'cfp_ff': '/data/hhd/dataset/FaceData/InsightFace/faces_ms1m_arcface/cfp_ff.bin', 'cfp_fp': '/data/hhd/dataset/FaceData/InsightFace/faces_ms1m_arcface/cfp_fp.bin', 'calfw': '/data/hhd/dataset/FaceData/InsightFace/faces_ms1m_arcface/calfw.bin', 'cplfw': '/data/hhd/dataset/FaceData/InsightFace/faces_ms1m_arcface/cplfw.bin', 'vgg2_fp': '/data/hhd/dataset/FaceData/InsightFace/faces_ms1m_arcface/vgg2_fp.bin'}
40 | 
41 | output_dir: './output'
42 | 
43 | 


--------------------------------------------------------------------------------
/configs/config_finetune.yaml:
--------------------------------------------------------------------------------
 1 | # model params
 2 | backbone_type: resnet_v2_m_50
 3 | loss_type: arcface
 4 | out_type: E
 5 | 
 6 | image_size: 112
 7 | embd_size: 512
 8 | class_num: 85742
 9 | 
10 | 
11 | # hyper params
12 | bn_decay: 0.9
13 | keep_prob: 0.4
14 | weight_decay: !!float 5e-4
15 | logits_scale: 64.0
16 | logits_margin: 0.5
17 | momentum: 0.9
18 | 
19 | 
20 | # run params
21 | fixed_epoch_num: 1
22 | val_bn_train: False
23 | augment_flag: True
24 | augment_margin: 16
25 | 
26 | gpu_num: 1
27 | batch_size: 100
28 | epoch_num: 20
29 | step_per_epoch: 100000
30 | val_freq: 2000
31 | 
32 | lr_steps: [40000, 60000, 80000]
33 | lr_values: [0.004, 0.002, 0.0012, 0.0004]
34 | 
35 | 
36 | # paths
37 | pretrained_model: ''
38 | 
39 | train_data: ['/data/hhd/dataset/FaceData/InsightFace/faces_ms1m_arcface.tfrecord']
40 | val_data: {'agedb_30': '/data/hhd/dataset/FaceData/InsightFace/faces_ms1m_arcface/agedb_30.bin', 'lfw': '/data/hhd/dataset/FaceData/InsightFace/faces_ms1m_arcface/lfw.bin', 'cfp_ff': '/data/hhd/dataset/FaceData/InsightFace/faces_ms1m_arcface/cfp_ff.bin', 'cfp_fp': '/data/hhd/dataset/FaceData/InsightFace/faces_ms1m_arcface/cfp_fp.bin', 'calfw': '/data/hhd/dataset/FaceData/InsightFace/faces_ms1m_arcface/calfw.bin', 'cplfw': '/data/hhd/dataset/FaceData/InsightFace/faces_ms1m_arcface/cplfw.bin', 'vgg2_fp': '/data/hhd/dataset/FaceData/InsightFace/faces_ms1m_arcface/vgg2_fp.bin'}
41 | 
42 | output_dir: './output'
43 | 
44 | 


--------------------------------------------------------------------------------
/configs/config_ms1m_100.yaml:
--------------------------------------------------------------------------------
 1 | # model params
 2 | backbone_type: resnet_v2_m_50
 3 | loss_type: arcface
 4 | out_type: E
 5 | 
 6 | image_size: 112
 7 | embd_size: 512
 8 | class_num: 85742
 9 | 
10 | 
11 | # hyper params
12 | bn_decay: 0.9
13 | keep_prob: 0.4
14 | weight_decay: !!float 5e-4
15 | logits_scale: 64.0
16 | logits_margin: 0.5
17 | momentum: 0.9
18 | 
19 | 
20 | # run params
21 | val_bn_train: False
22 | augment_flag: True
23 | augment_margin: 16
24 | 
25 | gpu_num: 1
26 | batch_size: 100
27 | epoch_num: 20
28 | step_per_epoch: 100000
29 | val_freq: 2000
30 | 
31 | lr_steps: [40000, 60000, 80000]
32 | lr_values: [0.004, 0.002, 0.0012, 0.0004]
33 | 
34 | 
35 | # paths
36 | pretrained_model: '/data/hhd/InsightFace-tensorflow/output/20190120-133421/checkpoints/ckpt-m-140000'
37 | 
38 | train_data: ['/data/hhd/dataset/FaceData/InsightFace/faces_ms1m_arcface.tfrecord']
39 | val_data: {'agedb_30': '/data/hhd/dataset/FaceData/InsightFace/faces_ms1m_arcface/agedb_30.bin', 'lfw': '/data/hhd/dataset/FaceData/InsightFace/faces_ms1m_arcface/lfw.bin', 'cfp_ff': '/data/hhd/dataset/FaceData/InsightFace/faces_ms1m_arcface/cfp_ff.bin', 'cfp_fp': '/data/hhd/dataset/FaceData/InsightFace/faces_ms1m_arcface/cfp_fp.bin', 'calfw': '/data/hhd/dataset/FaceData/InsightFace/faces_ms1m_arcface/calfw.bin', 'cplfw': '/data/hhd/dataset/FaceData/InsightFace/faces_ms1m_arcface/cplfw.bin', 'vgg2_fp': '/data/hhd/dataset/FaceData/InsightFace/faces_ms1m_arcface/vgg2_fp.bin'}
40 | 
41 | output_dir: './output'
42 | 
43 | 


--------------------------------------------------------------------------------
/configs/config_ms1m_200.yaml:
--------------------------------------------------------------------------------
 1 | # model params
 2 | backbone_type: resnet_v2_m_50
 3 | loss_type: arcface
 4 | out_type: E
 5 | 
 6 | image_size: 112
 7 | embd_size: 512
 8 | class_num: 85742
 9 | 
10 | 
11 | # hyper params
12 | bn_decay: 0.9
13 | keep_prob: 0.4
14 | weight_decay: !!float 5e-4
15 | logits_scale: 64.0
16 | logits_margin: 0.5
17 | momentum: 0.9
18 | 
19 | 
20 | # run params
21 | val_bn_train: False
22 | augment_flag: True
23 | augment_margin: 16
24 | 
25 | gpu_num: 2
26 | batch_size: 200
27 | epoch_num: 20
28 | step_per_epoch: 100000
29 | val_freq: 2000
30 | 
31 | lr_steps: [40000, 60000, 80000]
32 | lr_values: [0.004, 0.002, 0.0012, 0.0004]
33 | 
34 | 
35 | # paths
36 | pretrained_model: '/data/hhd/InsightFace-tensorflow/output/20190122-101014/checkpoints/ckpt-m-124000'
37 | 
38 | train_data: ['/data/hhd/dataset/FaceData/InsightFace/faces_ms1m_arcface.tfrecord']
39 | val_data: {'agedb_30': '/data/hhd/dataset/FaceData/InsightFace/faces_ms1m_arcface/agedb_30.bin', 'lfw': '/data/hhd/dataset/FaceData/InsightFace/faces_ms1m_arcface/lfw.bin', 'cfp_ff': '/data/hhd/dataset/FaceData/InsightFace/faces_ms1m_arcface/cfp_ff.bin', 'cfp_fp': '/data/hhd/dataset/FaceData/InsightFace/faces_ms1m_arcface/cfp_fp.bin', 'calfw': '/data/hhd/dataset/FaceData/InsightFace/faces_ms1m_arcface/calfw.bin', 'cplfw': '/data/hhd/dataset/FaceData/InsightFace/faces_ms1m_arcface/cplfw.bin', 'vgg2_fp': '/data/hhd/dataset/FaceData/InsightFace/faces_ms1m_arcface/vgg2_fp.bin'}
40 | 
41 | output_dir: './output'
42 | 
43 | 


--------------------------------------------------------------------------------
/losses/logit_loss.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | import tensorflow.contrib.slim as slim
 3 | 
 4 | import math
 5 | 
 6 | 
 7 | W_INIT = tf.contrib.layers.xavier_initializer(uniform=False)
 8 | 
 9 | 
10 | def get_logits(embds, labels, config, w_init=W_INIT, reuse=False, scope='logits'):
11 |     with tf.variable_scope(scope, reuse=reuse):
12 |         weights = tf.get_variable(name='classify_weight', shape=[embds.get_shape().as_list()[-1], config['class_num']], dtype=tf.float32, initializer=w_init, regularizer=slim.l2_regularizer(config['weight_decay']), trainable=True)
13 |         if config['loss_type'] == 'arcface':
14 |             return calculate_arcface_logits(embds, weights, labels, config['class_num'], config['logits_scale'], config['logits_margin'])
15 |         elif config['loss_type'] == 'softmax':
16 |             return slim.fully_connected(embds, num_outputs=config['class_num'], activation_fn=None, normalizer_fn=None, weights_initializer=w_init, weights_regularizer=slim.l2_regularizer(config['weight_decay']))
17 |         else:
18 |             raise ValueError('Invalid loss type.')
19 | 
20 | 
21 | def calculate_arcface_logits(embds, weights, labels, class_num, s, m):
22 |     embds = tf.nn.l2_normalize(embds, axis=1, name='normed_embd')
23 |     weights = tf.nn.l2_normalize(weights, axis=0)
24 | 
25 |     cos_m = math.cos(m)
26 |     sin_m = math.sin(m)
27 | 
28 |     mm = sin_m * m
29 | 
30 |     threshold = math.cos(math.pi - m)
31 | 
32 |     cos_t = tf.matmul(embds, weights, name='cos_t')
33 | 
34 |     cos_t2 = tf.square(cos_t, name='cos_2')
35 |     sin_t2 = tf.subtract(1., cos_t2, name='sin_2')
36 |     sin_t = tf.sqrt(sin_t2, name='sin_t')
37 |     cos_mt = s * tf.subtract(tf.multiply(cos_t, cos_m), tf.multiply(sin_t, sin_m), name='cos_mt')
38 |     cond_v = cos_t - threshold
39 |     cond = tf.cast(tf.nn.relu(cond_v, name='if_else'), dtype=tf.bool)
40 |     keep_val = s*(cos_t - mm)
41 |     cos_mt_temp = tf.where(cond, cos_mt, keep_val)
42 |     mask = tf.one_hot(labels, depth=class_num, name='one_hot_mask')
43 |     inv_mask = tf.subtract(1., mask, name='inverse_mask')
44 |     s_cos_t = tf.multiply(s, cos_t, name='scalar_cos_t')
45 |     output = tf.add(tf.multiply(s_cos_t, inv_mask), tf.multiply(cos_mt_temp, mask), name='arcface_logits')
46 |     return output
47 | 
48 | 


--------------------------------------------------------------------------------
/model.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | import tensorflow.contrib.slim as slim
 3 | 
 4 | from backbones import modifiedResNet_v2, ResNet_v2
 5 | 
 6 | 
 7 | def get_embd(inputs, is_training_dropout, is_training_bn, config, reuse=False, scope='embd_extractor'):
 8 |     with tf.variable_scope(scope, reuse=reuse):
 9 |         net = inputs
10 |         end_points = {}
11 |         if config['backbone_type'].startswith('resnet_v2_m'):
12 |             arg_sc = modifiedResNet_v2.resnet_arg_scope(weight_decay=config['weight_decay'], batch_norm_decay=config['bn_decay'])
13 |             with slim.arg_scope(arg_sc):
14 |                 if config['backbone_type'] == 'resnet_v2_m_50':
15 |                     net, end_points = modifiedResNet_v2.resnet_v2_m_50(net, is_training=is_training_bn, return_raw=True)
16 |                 elif config['backbone_type'] == 'resnet_v2_m_101':
17 |                     net, end_points = modifiedResNet_v2.resnet_v2_m_101(net, is_training=is_training_bn, return_raw=True)
18 |                 elif config['backbone_type'] == 'resnet_v2_m_152':
19 |                     net, end_points = modifiedResNet_v2.resnet_v2_m_152(net, is_training=is_training_bn, return_raw=True)
20 |                 elif config['backbone_type'] == 'resnet_v2_m_200':
21 |                     net, end_points = modifiedResNet_v2.resnet_v2_m_200(net, is_training=is_training_bn, return_raw=True)
22 |                 else:
23 |                     raise ValueError('Invalid backbone type.')
24 |         elif config['backbone_type'].startswith('resnet_v2'):
25 |             arg_sc = ResNet_v2.resnet_arg_scope(weight_decay=config['weight_decay'], batch_norm_decay=config['bn_decay'])
26 |             with slim.arg_scope(arg_sc):
27 |                 if config['backbone_type'] == 'resnet_v2_50':
28 |                     net, end_points = ResNet_v2.resnet_v2_50(net, is_training=is_training_bn, return_raw=True)
29 |                 elif config['backbone_type'] == 'resnet_v2_101':
30 |                     net, end_points = ResNet_v2.resnet_v2_101(net, is_training=is_training_bn, return_raw=True)
31 |                 elif config['backbone_type'] == 'resnet_v2_152':
32 |                     net, end_points = ResNet_v2.resnet_v2_152(net, is_training=is_training_bn, return_raw=True)
33 |                 elif config['backbone_type'] == 'resnet_v2_200':
34 |                     net, end_points = ResNet_v2.resnet_v2_200(net, is_training=is_training_bn, return_raw=True)
35 |         else:
36 |             raise ValueError('Invalid backbone type.')
37 | 
38 |         if config['out_type'] == 'E':
39 |             with slim.arg_scope(arg_sc):
40 |                 net = slim.batch_norm(net, activation_fn=None, is_training=is_training_bn)
41 |                 net = slim.dropout(net, keep_prob=config['keep_prob'], is_training=is_training_dropout)
42 |                 net = slim.flatten(net)
43 |                 net = slim.fully_connected(net, config['embd_size'], normalizer_fn=None, activation_fn=None)
44 |                 net = slim.batch_norm(net, scale=False, activation_fn=None, is_training=is_training_bn)
45 |                 end_points['embds'] = net
46 |         else:
47 |             raise ValueError('Invalid out type.')
48 |         
49 |         return net, end_points
50 | 
51 | 
52 |         
53 |         


--------------------------------------------------------------------------------
/utils.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import tensorflow as tf
 4 | 
 5 | 
 6 | def check_folders(paths):
 7 |     if isinstance(paths, str):
 8 |         paths = [paths]
 9 |     for path in paths:
10 |         if not os.path.exists(path):
11 |             os.makedirs(path)
12 | 
13 | 
14 | def average_gradients(tower_grads):
15 |     """Calculate the average gradient for each shared variable across all towers.
16 |     Note that this function provides a synchronization point across all towers.
17 |     Args:
18 |         tower_grads: List of lists of (gradient, variable) tuples. The outer list
19 |         is over individual gradients. The inner list is over the gradient
20 |         calculation for each tower.
21 |     Returns:
22 |         List of pairs of (gradient, variable) where the gradient has been averaged
23 |         across all towers.
24 |     """
25 |     average_grads = []
26 |     for grad_and_vars in zip(*tower_grads):
27 |         # Note that each grad_and_vars looks like the following:
28 |         #   ((grad0_gpu0, var0_gpu0), ... , (grad0_gpuN, var0_gpuN))
29 |         grads = []
30 |         for g, _ in grad_and_vars:
31 |             # Add 0 dimension to the gradients to represent the tower.
32 |             expanded_g = tf.expand_dims(g, 0)
33 | 
34 |             # Append on a 'tower' dimension which we will average over below.
35 |             grads.append(expanded_g)
36 | 
37 |         # Average over the 'tower' dimension.
38 |         grad = tf.concat(axis=0, values=grads)
39 |         grad = tf.reduce_mean(grad, 0)
40 | 
41 |         # Keep in mind that the Variables are redundant because they are shared
42 |         # across towers. So .. we will just return the first tower's pointer to
43 |         # the Variable.
44 |         v = grad_and_vars[0][1]
45 |         grad_and_var = (grad, v)
46 |         average_grads.append(grad_and_var)
47 |     return average_grads
48 | 
49 | 
50 | def tensor_description(var):
51 |     """Returns a compact and informative string about a tensor.
52 |     Args:
53 |         var: A tensor variable.
54 |     Returns:
55 |         a string with type and size, e.g.: (float32 1x8x8x1024).
56 |     """
57 |     description = '(' + str(var.dtype.name) + ' '
58 |     sizes = var.get_shape()
59 |     for i, size in enumerate(sizes):
60 |         description += str(size)
61 |         if i < len(sizes) - 1:
62 |             description += 'x'
63 |     description += ')'
64 |     return description
65 | 
66 | 
67 | def analyze_vars(variables, path):
68 |     """Prints the names and shapes of the variables.
69 |     Args:
70 |         variables: list of variables, for example tf.global_variables().
71 |         print_info: Optional, if true print variables and their shape.
72 |     Returns:
73 |         (total size of the variables, total bytes of the variables)
74 |     """
75 |     f = open(path, 'w')
76 |     f.write('---------\n')
77 |     f.write('Variables: name (type shape) [size]\n')
78 |     f.write('---------\n')
79 |     total_size = 0
80 |     total_bytes = 0
81 |     for var in variables:
82 |         # if var.num_elements() is None or [] assume size 0.
83 |         var_size = var.get_shape().num_elements() or 0
84 |         var_bytes = var_size * var.dtype.size
85 |         total_size += var_size
86 |         total_bytes += var_bytes
87 |         f.write(var.name+' '+tensor_description(var)+' '+'[%d, bytes: %d]\n' % (var_size, var_bytes))
88 |     f.write('Total size of variables: %d\n' % total_size)
89 |     f.write('Total bytes of variables: %d\n' % total_bytes)
90 |     return total_size, total_bytes


--------------------------------------------------------------------------------
/backbones/utils.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from __future__ import division
  3 | from __future__ import print_function
  4 | 
  5 | from collections import namedtuple
  6 | 
  7 | import tensorflow as tf
  8 | import tensorflow.contrib.slim as slim
  9 | 
 10 | 
 11 | class Block(namedtuple('Block', ['scope', 'unit_fn', 'args'])):
 12 |     """A named tuple describing a ResNet block.
 13 | 
 14 |     Its parts are:
 15 |         scope: The scope of the `Block`.
 16 |         unit_fn: The ResNet unit function which takes as input a `Tensor` and returns another `Tensor` with the output of the ResNet unit.
 17 |         args: A list of length equal to the number of units in the `Block`. The list contains one (depth, depth_bottleneck, stride) tuple for each unit in the block to serve as argument to unit_fn.
 18 |     """
 19 | 
 20 | 
 21 | def subsample(inputs, factor, scope=None):
 22 |     if factor == 1:
 23 |         return inputs
 24 |     else:
 25 |         return slim.max_pool2d(inputs, [1, 1], stride=factor, scope=scope)  # padding='VALID'
 26 | 
 27 | 
 28 | def conv2d_same(inputs, num_outputs, kernel_size, stride, rate=1, scope=None):
 29 |     if stride == 1:
 30 |         return slim.conv2d(inputs, num_outputs, kernel_size, stride=1, rate=rate, padding='SAME', scope=scope)
 31 |     else:
 32 |         kernel_size_effective = kernel_size+(kernel_size-1)*(rate-1)
 33 |         pad_total = kernel_size_effective-1
 34 |         pad_beg = pad_total//2
 35 |         pad_end = pad_total-pad_beg
 36 |         inputs = tf.pad(inputs, [[0, 0], [pad_beg, pad_end], [pad_beg, pad_end], [0, 0]])  # zero padding
 37 |         return slim.conv2d(inputs, num_outputs, kernel_size, stride=stride, rate=rate, padding='VALID', scope=scope)
 38 | 
 39 | 
 40 | @slim.add_arg_scope
 41 | def stack_blocks_dense(net, blocks, output_stride=None, store_non_strided_activations=False, outputs_collections=None):
 42 |     current_stride = 1
 43 |     rate = 1
 44 | 
 45 |     for block in blocks:
 46 |         with tf.variable_scope(block.scope, 'block', [net]) as sc:
 47 |             block_stride = 1
 48 |             for i, unit in enumerate(block.args):
 49 |                 if store_non_strided_activations and i == len(block.args)-1:
 50 |                     block_stride = unit.get('stride', 1)
 51 |                     unit = dict(unit, stride=1)
 52 |                 with tf.variable_scope('unit_%d' % (i+1), values=[net]):
 53 |                     if output_stride is not None and current_stride == output_stride:
 54 |                         net = block.unit_fn(net, rate=rate, **dict(unit, stride=1))
 55 |                         rate *= unit.get('stride', 1)
 56 |                     else:
 57 |                         net = block.unit_fn(net, rate=1, **unit)
 58 |                         current_stride *= unit.get('stride', 1)
 59 |                         if output_stride is not None and current_stride > output_stride:
 60 |                             raise ValueError('The target output_stride cannot be reached.')
 61 |             net = slim.utils.collect_named_outputs(outputs_collections, sc.name, net)
 62 | 
 63 |             if output_stride is not None and current_stride == output_stride:
 64 |                 rate *= block_stride
 65 |             else:
 66 |                 net = subsample(net, block_stride)
 67 |                 current_stride *= block_stride
 68 |                 if output_stride is not None and current_stride > output_stride:
 69 |                     raise ValueError('The target output_stride cannot be reached.')
 70 |     if output_stride is not None and current_stride != output_stride:
 71 |         raise ValueError('The target output_stride cannot be reached.')
 72 |     return net
 73 | 
 74 | 
 75 | def resnet_arg_scope(weight_decay=0.0001,
 76 |                      batch_norm_decay=0.9,
 77 |                      batch_norm_epsilon=2e-5,
 78 |                      batch_norm_scale=True,
 79 |                      activation_fn=tf.nn.leaky_relu,
 80 |                      use_batch_norm=True,
 81 |                      batch_norm_updates_collections=tf.GraphKeys.UPDATE_OPS):
 82 |     batch_norm_params = {
 83 |       'decay': batch_norm_decay,
 84 |       'epsilon': batch_norm_epsilon,
 85 |       'scale': batch_norm_scale,
 86 |       'updates_collections': batch_norm_updates_collections,
 87 |       'fused': None,  # Use fused batch norm if possible.
 88 |       'param_regularizers': {'gamma': slim.l2_regularizer(weight_decay)},
 89 |     }
 90 |     
 91 |     with slim.arg_scope(
 92 |         [slim.conv2d],
 93 |         weights_regularizer=slim.l2_regularizer(weight_decay),
 94 |         weights_initializer=tf.contrib.layers.xavier_initializer(uniform=False),
 95 |         activation_fn=activation_fn,
 96 |         normalizer_fn=slim.batch_norm if use_batch_norm else None,
 97 |         normalizer_params=batch_norm_params):
 98 |         with slim.arg_scope([slim.batch_norm], **batch_norm_params):
 99 |             with slim.arg_scope([slim.max_pool2d], padding='SAME') as arg_sc:
100 |                 return arg_sc


--------------------------------------------------------------------------------
/eval/utils.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import math
  3 | import numpy as np
  4 | 
  5 | from sklearn.model_selection import KFold
  6 | from scipy import interpolate
  7 | 
  8 | 
  9 | def distance(embeddings1, embeddings2, distance_metric=0):
 10 |     if distance_metric==0:
 11 |         # Euclidian distance
 12 |         embeddings1 = embeddings1/np.linalg.norm(embeddings1, axis=1, keepdims=True)
 13 |         embeddings2 = embeddings2/np.linalg.norm(embeddings2, axis=1, keepdims=True)
 14 |         diff = np.subtract(embeddings1, embeddings2)
 15 |         dist = np.sum(np.square(diff),1)
 16 |     elif distance_metric==1:
 17 |         # Distance based on cosine similarity
 18 |         dot = np.sum(np.multiply(embeddings1, embeddings2), axis=1)
 19 |         norm = np.linalg.norm(embeddings1, axis=1) * np.linalg.norm(embeddings2, axis=1)
 20 |         similarity = dot/norm
 21 |         dist = np.arccos(similarity) / math.pi
 22 |     else:
 23 |         raise 'Undefined distance metric %d' % distance_metric 
 24 |         
 25 |     return dist
 26 | 
 27 | 
 28 | def calculate_roc(thresholds, embeddings1, embeddings2, actual_issame, distance_metric=0, nrof_folds=10):
 29 |     assert(embeddings1.shape[0] == embeddings2.shape[0])
 30 |     assert(embeddings1.shape[1] == embeddings2.shape[1])
 31 |     nrof_pairs = min(len(actual_issame), embeddings1.shape[0])
 32 |     nrof_thresholds = len(thresholds)
 33 |     k_fold = KFold(n_splits=nrof_folds, shuffle=False)
 34 |     
 35 |     tprs = np.zeros((nrof_folds,nrof_thresholds))
 36 |     fprs = np.zeros((nrof_folds,nrof_thresholds))
 37 |     accuracy = np.zeros((nrof_folds))
 38 |     
 39 |     dist = distance(embeddings1, embeddings2, distance_metric)
 40 |     indices = np.arange(nrof_pairs)
 41 |     
 42 |     for fold_idx, (train_set, test_set) in enumerate(k_fold.split(indices)):
 43 |         
 44 |         # Find the best threshold for the fold
 45 |         acc_train = np.zeros((nrof_thresholds))
 46 |         for threshold_idx, threshold in enumerate(thresholds):
 47 |             _, _, acc_train[threshold_idx] = calculate_accuracy(threshold, dist[train_set], actual_issame[train_set])
 48 |         best_threshold_index = np.argmax(acc_train)
 49 |         for threshold_idx, threshold in enumerate(thresholds):
 50 |             tprs[fold_idx,threshold_idx], fprs[fold_idx,threshold_idx], _ = calculate_accuracy(threshold, dist[test_set], actual_issame[test_set])
 51 |         _, _, accuracy[fold_idx] = calculate_accuracy(thresholds[best_threshold_index], dist[test_set], actual_issame[test_set])
 52 |           
 53 |     tpr = np.mean(tprs,0)
 54 |     fpr = np.mean(fprs,0)
 55 |     return tpr, fpr, accuracy
 56 | 
 57 | def calculate_accuracy(threshold, dist, actual_issame):
 58 |     predict_issame = np.less(dist, threshold)
 59 |     tp = np.sum(np.logical_and(predict_issame, actual_issame))
 60 |     fp = np.sum(np.logical_and(predict_issame, np.logical_not(actual_issame)))
 61 |     tn = np.sum(np.logical_and(np.logical_not(predict_issame), np.logical_not(actual_issame)))
 62 |     fn = np.sum(np.logical_and(np.logical_not(predict_issame), actual_issame))
 63 |   
 64 |     tpr = 0 if (tp+fn==0) else float(tp) / float(tp+fn)
 65 |     fpr = 0 if (fp+tn==0) else float(fp) / float(fp+tn)
 66 |     acc = float(tp+tn)/dist.size
 67 |     return tpr, fpr, acc
 68 | 
 69 | 
 70 | def calculate_tar_far(threshold, dist, actual_issame):
 71 |     predict_issame = np.less(dist, threshold)
 72 |     true_accept = np.sum(np.logical_and(predict_issame, actual_issame))
 73 |     false_accept = np.sum(np.logical_and(predict_issame, np.logical_not(actual_issame)))
 74 |     n_same = np.sum(actual_issame)
 75 |     n_diff = np.sum(np.logical_not(actual_issame))
 76 |     tar = float(true_accept) / float(n_same)
 77 |     far = float(false_accept) / float(n_diff)
 78 |     return tar, far
 79 | 
 80 | 
 81 | def calculate_tar(thresholds, embeddings1, embeddings2, actual_issame, far_target, nrof_folds=10, distance_metric=0, subtract_mean=False):
 82 |     assert(embeddings1.shape[0] == embeddings2.shape[0])
 83 |     assert(embeddings1.shape[1] == embeddings2.shape[1])
 84 |     nrof_pairs = min(len(actual_issame), embeddings1.shape[0])
 85 |     nrof_thresholds = len(thresholds)
 86 |     k_fold = KFold(n_splits=nrof_folds, shuffle=False)
 87 |     
 88 |     tar = np.zeros(nrof_folds)
 89 |     far = np.zeros(nrof_folds)
 90 |     
 91 |     indices = np.arange(nrof_pairs)
 92 |     
 93 |     for fold_idx, (train_set, test_set) in enumerate(k_fold.split(indices)):
 94 |         if subtract_mean:
 95 |             mean = np.mean(np.concatenate([embeddings1[train_set], embeddings2[train_set]]), axis=0)
 96 |         else:
 97 |             mean = 0.0
 98 |         dist = distance(embeddings1-mean, embeddings2-mean, distance_metric)
 99 |       
100 |         # Find the threshold that gives FAR = far_target
101 |         far_train = np.zeros(nrof_thresholds)
102 |         for threshold_idx, threshold in enumerate(thresholds):
103 |             _, far_train[threshold_idx] = calculate_tar_far(threshold, dist[train_set], actual_issame[train_set])
104 |         if np.max(far_train)>=far_target:
105 |             f = interpolate.interp1d(far_train, thresholds, kind='slinear')
106 |             threshold = f(far_target)
107 |         else:
108 |             threshold = 0.0
109 | 
110 |         tar[fold_idx], far[fold_idx] = calculate_tar_far(threshold, dist[test_set], actual_issame[test_set])
111 | 
112 |     tar_mean = np.mean(tar)
113 |     far_mean = np.mean(far)
114 |     tar_std = np.std(tar)
115 |     return tar_mean, tar_std, far_mean


--------------------------------------------------------------------------------
/data/classificationDataTool.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import tensorflow as tf
  3 | from scipy import misc
  4 | import numpy as np
  5 | import random
  6 | import sys
  7 | import io
  8 | 
  9 | 
 10 | def to_rgb(img):
 11 |     if img.ndim < 3:
 12 |         h, w = img.shape
 13 |         ret = np.empty((h, w, 3), dtype=np.uint8)
 14 |         ret[:, :, 0] = ret[:, :, 1] = ret[:, :, 2] = img
 15 |         return ret
 16 |     else:
 17 |         return img
 18 | 
 19 | 
 20 | def augmentation(image, aug_img_size):
 21 |     ori_image_shape = tf.shape(image)
 22 |     image = tf.image.random_flip_left_right(image)
 23 |     # image = tf.image.resize_images(image, [aug_img_size, aug_img_size])
 24 |     # image = tf.random_crop(image, ori_image_shape)
 25 |     return image
 26 | 
 27 | 
 28 | class ClassificationImageData:
 29 | 
 30 |     def __init__(self, img_size=112, augment_flag=True, augment_margin=16):
 31 |         self.img_size = img_size
 32 |         self.augment_flag = augment_flag
 33 |         self.augment_margin = augment_margin
 34 | 
 35 | 
 36 |     def get_path_label(self, root):
 37 |         ids = list(os.listdir(root))
 38 |         ids.sort()
 39 |         self.cat_num = len(ids)
 40 |         id_dict = dict(zip(ids, list(range(self.cat_num))))
 41 |         paths = []
 42 |         labels = []
 43 |         for i in ids:
 44 |             cur_dir = os.path.join(root, i)
 45 |             fns = os.listdir(cur_dir)
 46 |             paths += [os.path.join(cur_dir, fn) for fn in fns]
 47 |             labels += [id_dict[i]]*len(fns)
 48 |         return paths, labels
 49 | 
 50 | 
 51 |     def image_processing(self, img):
 52 |         img.set_shape([None, None, 3])
 53 |         img = tf.image.resize_images(img, [self.img_size, self.img_size])
 54 | 
 55 |         if self.augment_flag :
 56 |             augment_size = self.img_size + self.augment_margin
 57 |             img = augmentation(img, augment_size)
 58 |         
 59 |         img = tf.cast(img, tf.float32) / 127.5 - 1
 60 | 
 61 |         return img
 62 | 
 63 | 
 64 |     def add_record(self, img, label, writer):
 65 |         img = to_rgb(img)
 66 |         img = misc.imresize(img, [self.img_size, self.img_size]).astype(np.uint8)
 67 |         shape = img.shape
 68 |         tf_features = tf.train.Features(feature={
 69 |             "img": tf.train.Feature(bytes_list=tf.train.BytesList(value=[img.tostring()])),
 70 |             "shape": tf.train.Feature(int64_list=tf.train.Int64List(value=list(shape))),
 71 |             "label": tf.train.Feature(int64_list=tf.train.Int64List(value=[label]))
 72 |         })
 73 |         tf_example = tf.train.Example(features = tf_features)
 74 |         tf_serialized = tf_example.SerializeToString()
 75 |         writer.write(tf_serialized)
 76 |     
 77 |     
 78 |     def write_tfrecord_from_folders(self, read_dir, write_path):
 79 |         print('write tfrecord from folders...')
 80 |         writer = tf.python_io.TFRecordWriter(write_path, options=None)
 81 |         paths, labels = self.get_path_label(read_dir)
 82 |         assert(len(paths) == len(labels))
 83 |         total = len(paths)
 84 |         cnt = 0
 85 |         for p, l in zip(paths, labels):
 86 |             img = misc.imread(p).astype(np.uint8)
 87 |             self.add_record(img, l, writer)
 88 |             cnt += 1
 89 |             print('%d/%d' % (cnt, total), end='\r')
 90 |         writer.close()
 91 |         print('done![%d/%d]' % (cnt, total))
 92 |         print('class num: %d' % self.cat_num)
 93 | 
 94 | 
 95 |     def write_tfrecord_from_mxrec(self, read_dir, write_path):
 96 |         import mxnet as mx
 97 |         print('write tfrecord from mxrec...')
 98 |         idx_path = os.path.join(read_dir, 'train.idx')
 99 |         bin_path = os.path.join(read_dir, 'train.rec')
100 |         imgrec = mx.recordio.MXIndexedRecordIO(idx_path, bin_path, 'r')
101 |         s = imgrec.read_idx(0)
102 |         header, _ = mx.recordio.unpack(s)
103 |         imgidx = list(range(1, int(header.label[0])))
104 |         writer = tf.python_io.TFRecordWriter(write_path, options=None)
105 |         total = len(imgidx)
106 |         cnt = 0
107 |         labels = []
108 |         for i in imgidx:
109 |             img_info = imgrec.read_idx(i)
110 |             header, img = mx.recordio.unpack(img_info)
111 |             l = int(header.label)
112 |             labels.append(l)
113 |             img = io.BytesIO(img)
114 |             img = misc.imread(img).astype(np.uint8)
115 |             self.add_record(img, l, writer)
116 |             cnt += 1
117 |             print('%d/%d' % (cnt, total), end='\r')
118 |         writer.close()
119 |         self.cat_num = len(set(labels))
120 |         print('done![%d/%d]' % (cnt, total))
121 |         print('class num: %d' % self.cat_num)
122 | 
123 | 
124 |     def parse_function(self, example_proto):
125 |         dics = {
126 |             'img': tf.FixedLenFeature(shape=(), dtype=tf.string),
127 |             'shape': tf.FixedLenFeature(shape=(3,), dtype=tf.int64),
128 |             'label': tf.FixedLenFeature(shape=(), dtype=tf.int64)
129 |         }
130 |         parsed_example = tf.parse_single_example(example_proto, dics)
131 |         parsed_example['img'] = tf.decode_raw(parsed_example['img'], tf.uint8)
132 |         parsed_example['img'] = tf.reshape(parsed_example['img'], parsed_example['shape'])
133 |         return self.image_processing(parsed_example['img']), parsed_example['label']
134 | 
135 | 
136 |     def read_TFRecord(self, filenames):
137 |         dataset = tf.data.TFRecordDataset(filenames, buffer_size=256<<20)
138 |         return dataset.map(self.parse_function, num_parallel_calls=8)
139 | 
140 | 


--------------------------------------------------------------------------------
/get_embd.py:
--------------------------------------------------------------------------------
  1 | import io
  2 | import os
  3 | import yaml
  4 | import pickle
  5 | import argparse
  6 | import numpy as np
  7 | import tensorflow as tf
  8 | 
  9 | from scipy import misc
 10 | 
 11 | from model import get_embd
 12 | from eval.utils import calculate_roc, calculate_tar
 13 | 
 14 | 
 15 | def get_args():
 16 |     parser = argparse.ArgumentParser()
 17 | 
 18 |     parser.add_argument('--mode', type=str, default='build', help='model mode: build')
 19 |     parser.add_argument('--config_path', type=str, default='./configs/config_ms1m_100.yaml', help='config path, used when mode is build')
 20 |     parser.add_argument('--model_path', type=str, default='/data/hhd/InsightFace-tensorflow/output/20190116-130753/checkpoints/ckpt-m-116000', help='model path')
 21 |     parser.add_argument('--read_path', type=str, default='', help='path to image file or directory to images')
 22 |     parser.add_argument('--save_path', type=str, default='embds.pkl', help='path to save embds')
 23 |     parser.add_argument('--train_mode', type=int, default=0, help='whether set train phase to True when getting embds. zero means False, one means True')
 24 | 
 25 |     return parser.parse_args()
 26 | 
 27 | 
 28 | def load_image(path, image_size):
 29 |     print('reading %s' % path)
 30 |     if os.path.isdir(path):
 31 |         paths = list(os.listdir(path))
 32 |     else:
 33 |         paths = [path]
 34 |     images = []
 35 |     images_f = []
 36 |     for path in paths:
 37 |         img = misc.imread(path)
 38 |         img = misc.imresize(img, [image_size, image_size])
 39 |         # img = img[s:s+image_size, s:s+image_size, :]
 40 |         img_f = np.fliplr(img)
 41 |         img = img/127.5-1.0
 42 |         img_f = img_f/127.5-1.0
 43 |         images.append(img)
 44 |         images_f.append(img_f)
 45 |     fns = [os.path.basename(p) for p in paths]
 46 |     print('done!')
 47 |     return (np.array(images), np.array(images_f), fns)
 48 | 
 49 | 
 50 | 
 51 | def evaluate(embeddings, actual_issame, far_target=1e-3, distance_metric=0, nrof_folds=10):
 52 |     thresholds = np.arange(0, 4, 0.01)
 53 |     if distance_metric == 1:
 54 |         thresholdes = np.arange(0, 1, 0.0025)
 55 |     embeddings1 = embeddings[0::2]
 56 |     embeddings2 = embeddings[1::2]
 57 |     tpr, fpr, accuracy = calculate_roc(thresholds, embeddings1, embeddings2, np.asarray(actual_issame), distance_metric=distance_metric, nrof_folds=nrof_folds)
 58 |     tar, tar_std, far = calculate_tar(thresholds, embeddings1, embeddings2, np.asarray(actual_issame), far_target=far_target, distance_metric=distance_metric, nrof_folds=nrof_folds)
 59 |     acc_mean = np.mean(accuracy)
 60 |     acc_std = np.std(accuracy)
 61 |     return tpr, fpr, acc_mean, acc_std, tar, tar_std, far
 62 | 
 63 | 
 64 | def run_embds(sess, images, batch_size, image_size, train_mode, embds_ph, image_ph, train_ph_dropout, train_ph_bn):
 65 |     if train_mode >= 1:
 66 |         train = True
 67 |     else:
 68 |         train = False
 69 |     batch_num = len(images)//batch_size
 70 |     left = len(images)%batch_size
 71 |     embds = []
 72 |     for i in range(batch_num):
 73 |         image_batch = images[i*batch_size: (i+1)*batch_size]
 74 |         cur_embd = sess.run(embds_ph, feed_dict={image_ph: image_batch, train_ph_dropout: train, train_ph_bn: train})
 75 |         embds += list(cur_embd)
 76 |         print('%d/%d' % (i, batch_num), end='\r')
 77 |     if left > 0:
 78 |         image_batch = np.zeros([batch_size, image_size, image_size, 3])
 79 |         image_batch[:left, :, :, :] = images[-left:]
 80 |         cur_embd = sess.run(embds_ph, feed_dict={image_ph: image_batch, train_ph_dropout: train, train_ph_bn: train})
 81 |         embds += list(cur_embd)[:left]
 82 |     print()
 83 |     print('done!')
 84 |     return np.array(embds)
 85 | 
 86 | 
 87 | if __name__ == '__main__':
 88 |     args = get_args()
 89 |     if args.mode == 'build':
 90 |         print('building...')
 91 |         config = yaml.load(open(args.config_path))
 92 |         images = tf.placeholder(dtype=tf.float32, shape=[None, config['image_size'], config['image_size'], 3], name='input_image')
 93 |         train_phase_dropout = tf.placeholder(dtype=tf.bool, shape=None, name='train_phase')
 94 |         train_phase_bn = tf.placeholder(dtype=tf.bool, shape=None, name='train_phase_last')
 95 |         embds, _ = get_embd(images, train_phase_dropout, train_phase_bn, config)
 96 |         print('done!')
 97 |         tf_config = tf.ConfigProto(allow_soft_placement=True)
 98 |         tf_config.gpu_options.allow_growth = True
 99 |         with tf.Session(config=tf_config) as sess:
100 |             tf.global_variables_initializer().run()
101 |             print('loading...')
102 |             saver = tf.train.Saver(var_list=tf.trainable_variables())
103 |             saver.restore(sess, args.model_path)
104 |             print('done!')
105 | 
106 |             batch_size = config['batch_size']
107 |             imgs, imgs_f, fns = load_image(args.read_path, config['image_size'])
108 |             print('forward running...')
109 |             embds_arr = run_embds(sess, imgs, batch_size, config['image_size'], args.train_mode, embds, images, train_phase_dropout, train_phase_bn)
110 |             embds_f_arr = run_embds(sess, imgs_f, batch_size, config['image_size'], args.train_mode, embds, images, train_phase_dropout, train_phase_bn)
111 |             embds_arr = embds_arr/np.linalg.norm(embds_arr, axis=1, keepdims=True)+embds_f_arr/np.linalg.norm(embds_f_arr, axis=1, keepdims=True)
112 |             embds_arr = embds_arr/np.linalg.norm(embds_arr, axis=1, keepdims=True)
113 |             print('done!')
114 |             print('saving...')
115 |             embds_dict = dict(*zip(fns, list(embds_arr)))
116 |             pickle.dump(embds_dict, open(args.save_path, 'wb'))
117 |             print('done!')
118 | 
119 | 


--------------------------------------------------------------------------------
/evaluate.py:
--------------------------------------------------------------------------------
  1 | import io
  2 | import os
  3 | import yaml
  4 | import pickle
  5 | import argparse
  6 | import numpy as np
  7 | import tensorflow as tf
  8 | 
  9 | from scipy import misc
 10 | 
 11 | from model import get_embd
 12 | from eval.utils import calculate_roc, calculate_tar
 13 | 
 14 | 
 15 | def get_args():
 16 |     parser = argparse.ArgumentParser()
 17 | 
 18 |     parser.add_argument('--mode', type=str, default='build', help='model mode: build')
 19 |     parser.add_argument('--config_path', type=str, default='./configs/config_ms1m_100.yaml', help='config path, used when mode is build')
 20 |     parser.add_argument('--model_path', type=str, default='/data/hhd/InsightFace-tensorflow/output/20190116-130753/checkpoints/ckpt-m-116000', help='model path')
 21 |     parser.add_argument('--val_data', type=str, default='', help='val data, a dict with key as data name, value as data path')
 22 |     parser.add_argument('--train_mode', type=int, default=0, help='whether set train phase to True when getting embds. zero means False, one means True')
 23 |     parser.add_argument('--target_far', type=float, default=1e-3, help='target far when calculate tar')
 24 | 
 25 |     return parser.parse_args()
 26 | 
 27 | 
 28 | def load_bin(path, image_size):
 29 |     print('reading %s' % path)
 30 |     bins, issame_list = pickle.load(open(path, 'rb'), encoding='bytes')
 31 |     num = len(bins)
 32 |     images = np.zeros(shape=[num, image_size, image_size, 3], dtype=np.float32)
 33 |     images_f = np.zeros(shape=[num, image_size, image_size, 3], dtype=np.float32)
 34 |     # m = config['augment_margin']
 35 |     # s = int(m/2)
 36 |     cnt = 0
 37 |     for bin in bins:
 38 |         img = misc.imread(io.BytesIO(bin))
 39 |         img = misc.imresize(img, [image_size, image_size])
 40 |         # img = img[s:s+image_size, s:s+image_size, :]
 41 |         img_f = np.fliplr(img)
 42 |         img = img/127.5-1.0
 43 |         img_f = img_f/127.5-1.0
 44 |         images[cnt] = img
 45 |         images_f[cnt] = img_f
 46 |         cnt += 1
 47 |     print('done!')
 48 |     return (images, images_f, issame_list)
 49 | 
 50 | 
 51 | 
 52 | def evaluate(embeddings, actual_issame, far_target=1e-3, distance_metric=0, nrof_folds=10):
 53 |     thresholds = np.arange(0, 4, 0.01)
 54 |     if distance_metric == 1:
 55 |         thresholdes = np.arange(0, 1, 0.0025)
 56 |     embeddings1 = embeddings[0::2]
 57 |     embeddings2 = embeddings[1::2]
 58 |     tpr, fpr, accuracy = calculate_roc(thresholds, embeddings1, embeddings2, np.asarray(actual_issame), distance_metric=distance_metric, nrof_folds=nrof_folds)
 59 |     tar, tar_std, far = calculate_tar(thresholds, embeddings1, embeddings2, np.asarray(actual_issame), far_target=far_target, distance_metric=distance_metric, nrof_folds=nrof_folds)
 60 |     acc_mean = np.mean(accuracy)
 61 |     acc_std = np.std(accuracy)
 62 |     return tpr, fpr, acc_mean, acc_std, tar, tar_std, far
 63 | 
 64 | 
 65 | def run_embds(sess, images, batch_size, image_size, train_mode, embds_ph, image_ph, train_ph_dropout, train_ph_bn):
 66 |     if train_mode >= 1:
 67 |         train = True
 68 |     else:
 69 |         train = False
 70 |     batch_num = len(images)//batch_size
 71 |     left = len(images)%batch_size
 72 |     embds = []
 73 |     for i in range(batch_num):
 74 |         image_batch = images[i*batch_size: (i+1)*batch_size]
 75 |         cur_embd = sess.run(embds_ph, feed_dict={image_ph: image_batch, train_ph_dropout: train, train_ph_bn: train})
 76 |         embds += list(cur_embd)
 77 |         print('%d/%d' % (i, batch_num), end='\r')
 78 |     if left > 0:
 79 |         image_batch = np.zeros([batch_size, image_size, image_size, 3])
 80 |         image_batch[:left, :, :, :] = images[-left:]
 81 |         cur_embd = sess.run(embds_ph, feed_dict={image_ph: image_batch, train_ph_dropout: train, train_ph_bn: train})
 82 |         embds += list(cur_embd)[:left]
 83 |     print()
 84 |     print('done!')
 85 |     return np.array(embds)
 86 | 
 87 | 
 88 | if __name__ == '__main__':
 89 |     args = get_args()
 90 |     if args.mode == 'build':
 91 |         print('building...')
 92 |         config = yaml.load(open(args.config_path))
 93 |         images = tf.placeholder(dtype=tf.float32, shape=[None, config['image_size'], config['image_size'], 3], name='input_image')
 94 |         train_phase_dropout = tf.placeholder(dtype=tf.bool, shape=None, name='train_phase')
 95 |         train_phase_bn = tf.placeholder(dtype=tf.bool, shape=None, name='train_phase_last')
 96 |         embds, _ = get_embd(images, train_phase_dropout, train_phase_bn, config)
 97 |         print('done!')
 98 |         tf_config = tf.ConfigProto(allow_soft_placement=True)
 99 |         tf_config.gpu_options.allow_growth = True
100 |         with tf.Session(config=tf_config) as sess:
101 |             tf.global_variables_initializer().run()
102 |             print('loading...')
103 |             saver = tf.train.Saver()
104 |             saver.restore(sess, args.model_path)
105 |             print('done!')
106 | 
107 |             batch_size = config['batch_size']
108 |             # batch_size = 32
109 |             print('evaluating...')
110 |             val_data = {}
111 |             if args.val_data == '':
112 |                 val_data = config['val_data']
113 |             else:
114 |                 val_data[os.path.basename(args.val_data)] = args.val_data
115 |             for k, v in val_data.items():
116 |                 imgs, imgs_f, issame = load_bin(v, config['image_size'])
117 |                 print('forward running...')
118 |                 embds_arr = run_embds(sess, imgs, batch_size, config['image_size'], args.train_mode, embds, images, train_phase_dropout, train_phase_bn)
119 |                 embds_f_arr = run_embds(sess, imgs_f, batch_size, config['image_size'], args.train_mode, embds, images, train_phase_dropout, train_phase_bn)
120 |                 embds_arr = embds_arr/np.linalg.norm(embds_arr, axis=1, keepdims=True)+embds_f_arr/np.linalg.norm(embds_f_arr, axis=1, keepdims=True)
121 |                 print('done!')
122 |                 tpr, fpr, acc_mean, acc_std, tar, tar_std, far = evaluate(embds_arr, issame, far_target=args.target_far, distance_metric=0)
123 |                 print('eval on %s: acc--%1.5f+-%1.5f, tar--%1.5f+-%1.5f@far=%1.5f' % (k, acc_mean, acc_std, tar, tar_std, far))
124 |             print('done!')
125 |     else:
126 |         raise ValueError("Invalid value for --mode.")
127 | 
128 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # InsightFace-tensorflow
  2 | 
  3 | This is a tensorflow implementation of paper "[ArcFace: Additive Angular Margin Loss for Deep Face Recognition](https://arxiv.org/abs/1801.07698)". This implementation aims at making both usage of pretrained model and training of your own model easier. Whether you just want to use pretrained model to do face recognition/verification or you want train/finetune your own model, this project can give you a favor. An introduction on face recognition losses can be found [here](https://luckycallor.xyz/20190123/FaceLosses.html)(in Chinese).
  4 | 
  5 | The implementation referred to [the official implementation in mxnet](https://github.com/deepinsight/insightface) and [the previous third-party implementation in tensorflow](https://github.com/auroua/InsightFace_TF).
  6 | 
  7 | - [InsightFace-tensorflow](#insightface-tensorflow)
  8 |   - [TODO List](#todo-list)
  9 |   - [Running Environment](#running-environment)
 10 |   - [Usage of Pretrained Model](#usage-of-pretrained-model)
 11 |     - [Pretrained Model](#pretrained-model)
 12 |     - [Model Evaluation](#model-evaluation)
 13 |     - [Extract Embedding with Pretrained Model](#extract-embedding-with-pretrained-model)
 14 |   - [Train Your Own Model](#train-your-own-model)
 15 |     - [Data Prepare](#data-prepare)
 16 |     - [Train with Softmax](#train-with-softmax)
 17 |     - [Finetune with Softmax](#finetune-with-softmax)
 18 | 
 19 | ## TODO List
 20 | 
 21 | 1. *Train with softmax [done!]*
 22 | 2. *Model evaluation [done!]*
 23 | 3. *Finetune with softmax [done!]*
 24 | 4. *Get embedding with pretrained model [done!]*
 25 | 5. **Train with triplet loss [todo]**
 26 | 6. **Finetune with triplet loss [todo]**
 27 | 7. Backbones    
 28 |    7.1 *ResNet [done!]*    
 29 |    7.2 **ResNeXt [todo]**    
 30 |    7.3 **DenseNet [todo]**    
 31 | 8. Losses    
 32 |    8.1 *Arcface loss [done!]*    
 33 |    8.2 **Cosface loss [todo]**    
 34 |    8.3 **Sphereface loss [todo]**    
 35 |    8.4 **Triplet loss [todo]**
 36 | 9.  **Face detection and alignment [todo]**
 37 | 
 38 | ## Running Environment
 39 | 
 40 | - python 3.6 
 41 | - scipy, numpy (Anaconda 3 recommended)
 42 | - tensorflow 1.7.0
 43 | - mxnet 1.3.1 (only needed when reading mxrec file)
 44 | 
 45 | ## Usage of Pretrained Model
 46 | 
 47 | Here we open our pretrained models for easier application of face recognition or verification. Codes on model evaluation and extracting embedding from face images are supplied.
 48 | 
 49 | ### Pretrained Model
 50 | 
 51 | Pretrained models and their accuracies on validation datasets are shown as following:
 52 | 
 53 | |config|lfw|calfw|cplfw|agedb_30|cfp_ff|cfp_fp|vgg2_fp|steps|download|
 54 | |:----:|:----:|:----:|:----:|:----:|:----:|:----:|:----:|:----:|:----:|
 55 | |[ms1m_100](https://github.com/luckycallor/InsightFace-tensorflow/blob/master/configs/config_ms1m_100.yaml)|99.53%|93.92%|87.85%|94.18%|99.29%|94.73%|93.9%|334k|[baidu](https://pan.baidu.com/s/1Zr91ZYWTXJDlG63XLqNdzQ)|
 56 | |[ms1m_100](https://github.com/luckycallor/InsightFace-tensorflow/blob/master/configs/config_ms1m_100.yaml)|99.53%|94.68%|89.75%|95.20%|99.54%|96.30%|94.84%|1006k|[baidu](https://pan.baidu.com/s/1v1L3c7cEs_GyqPYH9WhNKA), [google](https://drive.google.com/open?id=107Qu56o1IwQxH61Q6smZk-DO2-xU6EwE)|
 57 | |[ms1m_200](https://github.com/luckycallor/InsightFace-tensorflow/blob/master/configs/config_ms1m_200.yaml)|99.43%|94.40%|88.23%|94.58%|99.29%|94.77%|93.9%|200k|[baidu](https://pan.baidu.com/s/1q3kXkhjtclXD-eQgZC5gBA)|
 58 | 
 59 | ### Model Evaluation
 60 | 
 61 | You can evaluate a pretrained model with [evaluate.py](https://github.com/luckycallor/InsightFace-tensorflow/blob/master/evaluate.py) by specifying the config path and model path, for example:
 62 | 
 63 | ```
 64 | python evaluate.py 
 65 | --config_path=./configs/config_ms1m_100.yaml 
 66 | --model_path=$DIRECTORY_TO_PRETRAINED_MODEL$/best-m-150000
 67 | ```
 68 | 
 69 | This will evaluate the pretrained model on validation datasets specified in the config file. If you want to evaluate the model on other validation dataset, you can specify it by --val_data as following:
 70 | 
 71 | ```
 72 | python evaluate.py 
 73 | --config_path=./configs/config_ms1m_100.yaml 
 74 | --model_path=$DIRECTORY_TO_PRETRAINED_MODEL$/best-m-150000 
 75 | --val_data=$DIRECTORY_TO_VAL_DATA$/xxx.bin
 76 | ```
 77 | 
 78 | ### Extract Embedding with Pretrained Model
 79 | 
 80 | You can extract embedding from face images with [get_embd.py](https://github.com/luckycallor/InsightFace-tensorflow/blob/master/get_embd.py) by the following script:
 81 | 
 82 | ```
 83 | python get_embd.py 
 84 | --config_path=./configs/config_ms1m_100.yaml 
 85 | --model_path=$DIRECTORY_TO_PRETRAINED_MODEL$/best-m-150000 
 86 | --read_path=$PATH_TO_FACE_IMAGES$
 87 | --save_path=$SAVING_DIRECTORY$/embd.pkl
 88 | ```
 89 | 
 90 | where config_path and model_path specify the config file and pretrained model respectively. read_path is path to face images, that can be a path to one image or a directory with only images in it. save_path specifies where to save the embedding. The saved file is a dict with image file name as key, the corresponding embedding as value, and can be loaded with pickle in python. Note that face images should be well cropped here.
 91 | 
 92 | ## Train Your Own Model
 93 | 
 94 | If you want train your own model from scratch, or finetune pretrained model with your own data, here is what you should do.
 95 | 
 96 | ### Data Prepare
 97 | 
 98 | The official InsightFace project open their training data in the [DataZoo](https://github.com/deepinsight/insightface/wiki/Dataset-Zoo). This data is in mxrec format, you can transform it to tfrecord format with [./data/generateTFRecord.py](https://github.com/luckycallor/InsightFace-tensorflow/blob/master/data/generateTFRecord.py) by the following script:
 99 | 
100 | ```
101 | python generateTFRecord.py 
102 | --mode=mxrec
103 | --image_size=112
104 | --read_dir=$DIRECTORY_TO_THE_TRAINING_DATA$
105 | --save_path=$DIRECTORY_TO_SAVE_TFRECORD_FILE$/xxx.tfrecord
106 | ```
107 | 
108 | Or, if you want to train the model with your own data, you can prepare the tfrecord file by the following script:
109 | 
110 | ```
111 | python generateTFRecord.py 
112 | --mode=folders
113 | --image_size=112
114 | --read_dir=$DIRECTORY_TO_THE_TRAINING_DATA$
115 | --save_path=$DIRECTORY_TO_SAVE_TFRECORD_FILE$/xxx.tfrecord
116 | ```
117 | 
118 | Here, the read_dir should be the directory to your own face images, where images to one person are saved in one folder. The directory should have a structure like this:
119 | 
120 | ```
121 | read_dir/
122 |   - id1/
123 |     -- id1_1.jpg
124 |     ...
125 |   - id2/
126 |     -- id2_1.jpg
127 |     ...
128 |   - id3/
129 |     -- id3_1.jpg
130 |     -- id3_2.jpg
131 |     ...
132 |   ...
133 | ```
134 | 
135 | ### Train with Softmax
136 | 
137 | To train your own model with softmax, firstly you should prepare a config file like those in [./configs](https://github.com/luckycallor/InsightFace-tensorflow/tree/master/configs). It is recommended to modify one example config file to your own config. Secondly, the following script starts training:
138 | 
139 | ```
140 | python train_softmax.py --config_path=./configs/config_ms1m_100.yaml
141 | ```
142 | 
143 | ### Finetune with Softmax
144 | 
145 | To finetune a pretrained model with your own data, you should prepare a finetune config file like [./configs/config_finetune.yaml](https://github.com/luckycallor/InsightFace-tensorflow/blob/master/configs/config_finetune.yaml), and start training by the following script:
146 | 
147 | ```
148 | python finetune_softmax.py --config_path=./configs/config_finetune.yaml
149 | ```


--------------------------------------------------------------------------------
/backbones/ResNet_v2.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from __future__ import division
  3 | from __future__ import print_function
  4 | 
  5 | import tensorflow as tf
  6 | import tensorflow.contrib.slim as slim
  7 | 
  8 | from backbones import utils
  9 | 
 10 | resnet_arg_scope = utils.resnet_arg_scope
 11 | 
 12 | 
 13 | @slim.add_arg_scope
 14 | def bottleneck(inputs, depth, depth_bottleneck, stride, rate=1,
 15 |                outputs_collections=None, scope=None):
 16 |     with tf.variable_scope(scope, 'bottleneck_v2', [inputs]) as sc:
 17 |         depth_in = slim.utils.last_dimension(inputs.get_shape(), min_rank=4)
 18 |         preact = slim.batch_norm(inputs, activation_fn=tf.nn.relu, scope='preact')
 19 |         if depth == depth_in:
 20 |             shortcut = utils.subsample(inputs, stride, 'shortcut')
 21 |         else:
 22 |             shortcut = slim.conv2d(preact, depth, [1, 1], stride=stride, normalizer_fn=None, activation_fn=None, scope='shortcut')
 23 | 
 24 |         residual = slim.conv2d(preact, depth_bottleneck, [1, 1], stride=1, scope='conv1')
 25 |         residual = utils.conv2d_same(residual, depth_bottleneck, 3, stride, rate=rate, scope='conv2')
 26 |         residual = slim.conv2d(residual, depth, [1, 1], stride=1, normalizer_fn=None, activation_fn=None, scope='conv3')
 27 | 
 28 |         output = shortcut + residual
 29 | 
 30 |         return slim.utils.collect_named_outputs(outputs_collections, sc.name, output)
 31 | 
 32 | 
 33 | def resnet_v2(inputs,
 34 |               blocks,
 35 |               num_classes=None,
 36 |               is_training=True,
 37 |               return_raw=True,
 38 |               global_pool=True,
 39 |               output_stride=None,
 40 |               include_root_block=True,
 41 |               spatial_squeeze=True,
 42 |               reuse=None,
 43 |               scope=None):
 44 |     with tf.variable_scope(scope, 'resnet_v2', [inputs], reuse=reuse) as sc:
 45 |         end_points_collection = sc.original_name_scope + '_end_points'
 46 |         with slim.arg_scope([slim.conv2d, bottleneck, utils.stack_blocks_dense], outputs_collections=end_points_collection):
 47 |             with slim.arg_scope([slim.batch_norm], is_training=is_training):
 48 |                 net = inputs
 49 |                 if include_root_block:
 50 |                     if output_stride is not None:
 51 |                         if output_stride % 4 != 0:
 52 |                             raise ValueError('The output_stride needs to be a multiple of 4.')
 53 |                         output_stride /= 4
 54 |                     with slim.arg_scope([slim.conv2d], activation_fn=None, normalizer_fn=None):
 55 |                         net = utils.conv2d_same(net, 64, 7, stride=2, scope='conv1')
 56 |                     net = slim.max_pool2d(net, [3, 3], stride=2, scope='pool1')
 57 |                 net = utils.stack_blocks_dense(net, blocks, output_stride)
 58 |                 end_points = slim.utils.convert_collection_to_dict(end_points_collection)
 59 |                 if return_raw:
 60 |                     return net, end_points
 61 | 
 62 |                 net = slim.batch_norm(net, activation_fn=tf.nn.relu, scope='postnorm')
 63 |                 end_points[sc.name + '/postnorm'] = net
 64 | 
 65 |                 if global_pool:
 66 |                     net = tf.reduce_mean(net, [1, 2], name='pool5', keep_dims=True)
 67 |                     end_points['global_pool'] = net
 68 | 
 69 |                 if num_classes:
 70 |                     net = slim.conv2d(net, num_classes, [1, 1], activation_fn=None, normalizer_fn=None, scope='logits')
 71 |                     end_points[sc.name + '/logits'] = net
 72 |                     if spatial_squeeze:
 73 |                         net = tf.squeeze(net, [1, 2], name='SpatialSqueeze')
 74 |                         end_points[sc.name + '/spatial_squeeze'] = net
 75 |                     end_points['predictions'] = slim.softmax(net, scope='predictions')
 76 |                 return net, end_points
 77 | resnet_v2.default_image_size = 224
 78 | 
 79 | 
 80 | def resnet_v2_block(scope, base_depth, num_units, stride):
 81 |     return utils.Block(scope, bottleneck, [{
 82 |         'depth': base_depth * 4,
 83 |         'depth_bottleneck': base_depth,
 84 |         'stride': 1
 85 |     }] * (num_units - 1) + [{
 86 |         'depth': base_depth * 4,
 87 |         'depth_bottleneck': base_depth,
 88 |         'stride': stride
 89 |     }])
 90 | resnet_v2.default_image_size = 224
 91 | 
 92 | 
 93 | def resnet_v2_50(inputs,
 94 |                  num_classes=None,
 95 |                  is_training=True,
 96 |                  return_raw=True,
 97 |                  global_pool=True,
 98 |                  output_stride=None,
 99 |                  spatial_squeeze=True,
100 |                  reuse=None,
101 |                  scope='resnet_v2_50'):
102 |     """ResNet-50 model of [1]. See resnet_v2() for arg and return description."""
103 |     blocks = [
104 |         resnet_v2_block('block1', base_depth=64, num_units=3, stride=2),
105 |         resnet_v2_block('block2', base_depth=128, num_units=4, stride=2),
106 |         resnet_v2_block('block3', base_depth=256, num_units=6, stride=2),
107 |         resnet_v2_block('block4', base_depth=512, num_units=3, stride=1),
108 |     ]
109 |     return resnet_v2(inputs, blocks, num_classes, is_training=is_training, return_raw=return_raw, global_pool=global_pool, output_stride=output_stride, include_root_block=True, spatial_squeeze=spatial_squeeze, reuse=reuse, scope=scope)
110 | resnet_v2_50.default_image_size = resnet_v2.default_image_size
111 | 
112 | 
113 | def resnet_v2_101(inputs,
114 |                   num_classes=None,
115 |                   is_training=True,
116 |                   return_raw=True,
117 |                   global_pool=True,
118 |                   output_stride=None,
119 |                   spatial_squeeze=True,
120 |                   reuse=None,
121 |                   scope='resnet_v2_101'):
122 |     """ResNet-101 model of [1]. See resnet_v2() for arg and return description."""
123 |     blocks = [
124 |         resnet_v2_block('block1', base_depth=64, num_units=3, stride=2),
125 |         resnet_v2_block('block2', base_depth=128, num_units=4, stride=2),
126 |         resnet_v2_block('block3', base_depth=256, num_units=23, stride=2),
127 |         resnet_v2_block('block4', base_depth=512, num_units=3, stride=1),
128 |     ]
129 |     return resnet_v2(inputs, blocks, num_classes, is_training=is_training, return_raw=return_raw, global_pool=global_pool, output_stride=output_stride, include_root_block=True, spatial_squeeze=spatial_squeeze, reuse=reuse, scope=scope)
130 | resnet_v2_101.default_image_size = resnet_v2.default_image_size
131 | 
132 | 
133 | def resnet_v2_152(inputs,
134 |                   num_classes=None,
135 |                   is_training=True,
136 |                   return_raw=True,
137 |                   global_pool=True,
138 |                   output_stride=None,
139 |                   spatial_squeeze=True,
140 |                   reuse=None,
141 |                   scope='resnet_v2_152'):
142 |     """ResNet-152 model of [1]. See resnet_v2() for arg and return description."""
143 |     blocks = [
144 |         resnet_v2_block('block1', base_depth=64, num_units=3, stride=2),
145 |         resnet_v2_block('block2', base_depth=128, num_units=8, stride=2),
146 |         resnet_v2_block('block3', base_depth=256, num_units=36, stride=2),
147 |         resnet_v2_block('block4', base_depth=512, num_units=3, stride=1),
148 |     ]
149 |     return resnet_v2(inputs, blocks, num_classes, is_training=is_training, return_raw=return_raw, global_pool=global_pool, output_stride=output_stride, include_root_block=True, spatial_squeeze=spatial_squeeze, reuse=reuse, scope=scope)
150 | resnet_v2_152.default_image_size = resnet_v2.default_image_size
151 | 
152 | 
153 | def resnet_v2_200(inputs,
154 |                   num_classes=None,
155 |                   is_training=True,
156 |                   return_raw=True,
157 |                   global_pool=True,
158 |                   output_stride=None,
159 |                   spatial_squeeze=True,
160 |                   reuse=None,
161 |                   scope='resnet_v2_200'):
162 |     """ResNet-200 model of [2]. See resnet_v2() for arg and return description."""
163 |     blocks = [
164 |         resnet_v2_block('block1', base_depth=64, num_units=3, stride=2),
165 |         resnet_v2_block('block2', base_depth=128, num_units=24, stride=2),
166 |         resnet_v2_block('block3', base_depth=256, num_units=36, stride=2),
167 |         resnet_v2_block('block4', base_depth=512, num_units=3, stride=1),
168 |     ]
169 |     return resnet_v2(inputs, blocks, num_classes, is_training=is_training, return_raw=return_raw, global_pool=global_pool, output_stride=output_stride, include_root_block=True, spatial_squeeze=spatial_squeeze, reuse=reuse, scope=scope)
170 | resnet_v2_200.default_image_size = resnet_v2.default_image_size
171 | 


--------------------------------------------------------------------------------
/backbones/ResNet_v1.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from __future__ import division
  3 | from __future__ import print_function
  4 | 
  5 | import tensorflow as tf
  6 | import tensorflow.contrib.slim as slim
  7 | 
  8 | from backbones import utils
  9 | 
 10 | 
 11 | resnet_arg_scope = utils.resnet_arg_scope
 12 | 
 13 | 
 14 | class NoOpScope(object):
 15 |     """No-op context manager."""
 16 | 
 17 |     def __enter__(self):
 18 |         return None
 19 | 
 20 |     def __exit__(self, exc_type, exc_value, traceback):
 21 |         return False
 22 | 
 23 | 
 24 | @slim.add_arg_scope
 25 | def bottleneck(inputs,
 26 |                depth,
 27 |                depth_bottleneck,
 28 |                stride,
 29 |                rate=1,
 30 |                outputs_collections=None,
 31 |                scope=None,
 32 |                use_bounded_activations=False):
 33 |     with tf.variable_scope(scope, 'bottleneck_v1', [inputs]) as sc:
 34 |         depth_in = slim.utils.last_dimension(inputs.get_shape(), min_rank=4)
 35 | 
 36 |         if depth == depth_in:
 37 |             shortcut = utils.subsample(inputs, stride, 'shortcut')
 38 |         else:
 39 |             shortcut = slim.conv2d(inputs, depth, [1, 1], stride=stride, activation_fn=tf.nn.relu6 if use_bounded_activations else None, scope='shortcut')
 40 |         
 41 |         residual = slim.conv2d(inputs, depth_bottleneck, [1, 1], stride=1, scope='conv1')
 42 |         residual = utils.conv2d_same(residual, depth_bottleneck, 3, stride, rate=rate, scope='conv2')
 43 |         residual = slim.conv2d(residual, depth, [1, 1], stride=1, activation_fn=None, scope='conv3')
 44 | 
 45 |         if use_bounded_activations:
 46 |             # Use clip_by_value to simulate bandpass activation.
 47 |             residual = tf.clip_by_value(residual, -6.0, 6.0)
 48 |             output = tf.nn.relu6(shortcut + residual)
 49 |         else:
 50 |             output = tf.nn.relu(shortcut + residual)
 51 | 
 52 |         return slim.utils.collect_named_outputs(outputs_collections, sc.name, output)
 53 | 
 54 | 
 55 | def resnet_v1(inputs,
 56 |               blocks,
 57 |               num_classes=None,
 58 |               is_training=True,
 59 |               global_pool=True,
 60 |               output_stride=None,
 61 |               include_root_block=True,
 62 |               spatial_squeeze=True,
 63 |               store_non_strided_activations=False,
 64 |               reuse=None,
 65 |               scope=None):
 66 |     with tf.variable_scope(scope, 'resnet_v1', [inputs], reuse=reuse) as sc:
 67 |         end_points_collection = sc.original_name_scope + '_end_points'
 68 |         with slim.arg_scope([slim.conv2d, bottleneck, utils.stack_blocks_dense], outputs_collections=end_points_collection):
 69 |             with (slim.arg_scope([slim.batch_norm], is_training=is_training) if is_training is not None else NoOpScope()):
 70 |                 net = inputs
 71 |                 if include_root_block:
 72 |                     if output_stride is not None:
 73 |                         if output_stride % 4 != 0:
 74 |                             raise ValueError('The output_stride needs to be a multiple of 4.')
 75 |                         output_stride /= 4
 76 |                     net = utils.conv2d_same(net, 64, 7, stride=2, scope='conv1')
 77 |                     net = slim.max_pool2d(net, [3, 3], stride=2, scope='pool1')
 78 |                 net = utils.stack_blocks_dense(net, blocks, output_stride, store_non_strided_activations)
 79 | 
 80 |                 end_points = slim.utils.convert_collection_to_dict(end_points_collection)
 81 | 
 82 |                 if global_pool:
 83 |                     net = tf.reduce_mean(net, [1, 2], name='pool5', keep_dims=True)
 84 |                     end_points['global_pool'] = net
 85 |                 if num_classes:
 86 |                     net = slim.conv2d(net, num_classes, [1, 1], activation_fn=None, normalizer_fn=None, scope='logits')
 87 |                     end_points[sc.name + '/logits'] = net
 88 |                     if spatial_squeeze:
 89 |                         net = tf.squeeze(net, [1, 2], name='SpatialSqueeze')
 90 |                         end_points[sc.name + '/spatial_squeeze'] = net
 91 |                     end_points['predictions'] = slim.softmax(net, scope='predictions')
 92 |                 return net, end_points
 93 | resnet_v1.default_image_size = 224
 94 | 
 95 | 
 96 | def resnet_v1_block(scope, base_depth, num_units, stride):
 97 |     return utils.Block(scope, bottleneck, [{
 98 |       'depth': base_depth * 4,
 99 |       'depth_bottleneck': base_depth,
100 |       'stride': 1
101 |     }] * (num_units - 1) + [{
102 |       'depth': base_depth * 4,
103 |       'depth_bottleneck': base_depth,
104 |       'stride': stride
105 |     }])
106 | 
107 | 
108 | def resnet_v1_50(inputs,
109 |                  num_classes=None,
110 |                  is_training=True,
111 |                  global_pool=True,
112 |                  output_stride=None,
113 |                  spatial_squeeze=True,
114 |                  store_non_strided_activations=False,
115 |                  reuse=None,
116 |                  scope='resnet_v1_50'):
117 |     """ResNet-50 model of [1]. See resnet_v1() for arg and return description."""
118 |     blocks = [
119 |         resnet_v1_block('block1', base_depth=64, num_units=3, stride=2),
120 |         resnet_v1_block('block2', base_depth=128, num_units=4, stride=2),
121 |         resnet_v1_block('block3', base_depth=256, num_units=6, stride=2),
122 |         resnet_v1_block('block4', base_depth=512, num_units=3, stride=1),
123 |     ]
124 |     return resnet_v1(inputs, blocks, num_classes, is_training,
125 |                     global_pool=global_pool, output_stride=output_stride,
126 |                     include_root_block=True, spatial_squeeze=spatial_squeeze,
127 |                     store_non_strided_activations=store_non_strided_activations,
128 |                     reuse=reuse, scope=scope)
129 | resnet_v1_50.default_image_size = resnet_v1.default_image_size
130 | 
131 | 
132 | def resnet_v1_101(inputs,
133 |                   num_classes=None,
134 |                   is_training=True,
135 |                   global_pool=True,
136 |                   output_stride=None,
137 |                   spatial_squeeze=True,
138 |                   store_non_strided_activations=False,
139 |                   reuse=None,
140 |                   scope='resnet_v1_101'):
141 |     """ResNet-101 model of [1]. See resnet_v1() for arg and return description."""
142 |     blocks = [
143 |         resnet_v1_block('block1', base_depth=64, num_units=3, stride=2),
144 |         resnet_v1_block('block2', base_depth=128, num_units=4, stride=2),
145 |         resnet_v1_block('block3', base_depth=256, num_units=23, stride=2),
146 |         resnet_v1_block('block4', base_depth=512, num_units=3, stride=1),
147 |     ]
148 |     return resnet_v1(inputs, blocks, num_classes, is_training,
149 |                     global_pool=global_pool, output_stride=output_stride,
150 |                     include_root_block=True, spatial_squeeze=spatial_squeeze,
151 |                     store_non_strided_activations=store_non_strided_activations,
152 |                     reuse=reuse, scope=scope)
153 | resnet_v1_101.default_image_size = resnet_v1.default_image_size
154 | 
155 | 
156 | def resnet_v1_152(inputs,
157 |                   num_classes=None,
158 |                   is_training=True,
159 |                   global_pool=True,
160 |                   output_stride=None,
161 |                   store_non_strided_activations=False,
162 |                   spatial_squeeze=True,
163 |                   reuse=None,
164 |                   scope='resnet_v1_152'):
165 |     """ResNet-152 model of [1]. See resnet_v1() for arg and return description."""
166 |     blocks = [
167 |         resnet_v1_block('block1', base_depth=64, num_units=3, stride=2),
168 |         resnet_v1_block('block2', base_depth=128, num_units=8, stride=2),
169 |         resnet_v1_block('block3', base_depth=256, num_units=36, stride=2),
170 |         resnet_v1_block('block4', base_depth=512, num_units=3, stride=1),
171 |     ]
172 |     return resnet_v1(inputs, blocks, num_classes, is_training,
173 |                     global_pool=global_pool, output_stride=output_stride,
174 |                     include_root_block=True, spatial_squeeze=spatial_squeeze,
175 |                     store_non_strided_activations=store_non_strided_activations,
176 |                     reuse=reuse, scope=scope)
177 | resnet_v1_152.default_image_size = resnet_v1.default_image_size
178 | 
179 | 
180 | def resnet_v1_200(inputs,
181 |                   num_classes=None,
182 |                   is_training=True,
183 |                   global_pool=True,
184 |                   output_stride=None,
185 |                   store_non_strided_activations=False,
186 |                   spatial_squeeze=True,
187 |                   reuse=None,
188 |                   scope='resnet_v1_200'):
189 |     """ResNet-200 model of [2]. See resnet_v1() for arg and return description."""
190 |     blocks = [
191 |         resnet_v1_block('block1', base_depth=64, num_units=3, stride=2),
192 |         resnet_v1_block('block2', base_depth=128, num_units=24, stride=2),
193 |         resnet_v1_block('block3', base_depth=256, num_units=36, stride=2),
194 |         resnet_v1_block('block4', base_depth=512, num_units=3, stride=1),
195 |     ]
196 |     return resnet_v1(inputs, blocks, num_classes, is_training,
197 |                     global_pool=global_pool, output_stride=output_stride,
198 |                     include_root_block=True, spatial_squeeze=spatial_squeeze,
199 |                     store_non_strided_activations=store_non_strided_activations,
200 |                     reuse=reuse, scope=scope)
201 | resnet_v1_200.default_image_size = resnet_v1.default_image_size
202 | 


--------------------------------------------------------------------------------
/backbones/modifiedResNet_v2.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from __future__ import division
  3 | from __future__ import print_function
  4 | 
  5 | import tensorflow as tf
  6 | import tensorflow.contrib.slim as slim
  7 | 
  8 | from backbones import utils
  9 | 
 10 | resnet_arg_scope = utils.resnet_arg_scope
 11 | 
 12 | 
 13 | @slim.add_arg_scope
 14 | def bottleneck(inputs, depth, depth_bottleneck, stride, rate=1, outputs_collections=None, scope=None):
 15 |     with tf.variable_scope(scope, 'bottleneck_v2', [inputs]) as sc:
 16 |         depth_in = slim.utils.last_dimension(inputs.get_shape(), min_rank=4)
 17 |         preact = slim.batch_norm(inputs, activation_fn=tf.nn.leaky_relu, scope='preact')
 18 |         if depth == depth_in:
 19 |             shortcut = utils.subsample(inputs, stride, 'shortcut')
 20 |         else:
 21 |             shortcut = slim.conv2d(preact, depth, [1, 1], stride=stride, normalizer_fn=None, activation_fn=None, scope='shortcut')
 22 | 
 23 |         residual = slim.conv2d(preact, depth_bottleneck, [1, 1], stride=1, scope='conv1')
 24 |         residual = utils.conv2d_same(residual, depth_bottleneck, 3, stride, rate=rate, scope='conv2')
 25 |         residual = slim.conv2d(residual, depth, [1, 1], stride=1, normalizer_fn=None, activation_fn=None, scope='conv3')
 26 | 
 27 |         output = shortcut + residual
 28 | 
 29 |         return slim.utils.collect_named_outputs(outputs_collections, sc.name, output)
 30 | 
 31 | 
 32 | @slim.add_arg_scope
 33 | def block(inputs, depth, stride, rate=1, outputs_collections=None, scope=None):
 34 |     with tf.variable_scope(scope, 'block_v2', [inputs]) as sc:
 35 |         depth_in = slim.utils.last_dimension(inputs.get_shape(), min_rank=4)
 36 |         preact = slim.batch_norm(inputs, activation_fn=tf.nn.leaky_relu, scope='preact')
 37 |         if depth == depth_in:
 38 |             shortcut = utils.subsample(inputs, stride, 'shortcut')
 39 |         else:
 40 |             shortcut = slim.conv2d(preact, depth, [1, 1], stride=stride, normalizer_fn=None, activation_fn=None, scope='shortcut')
 41 | 
 42 |         residual = utils.conv2d_same(preact, depth, 3, stride, rate=rate, scope='conv1')
 43 |         residual = slim.conv2d(residual, depth, [3, 3], stride=1, normalizer_fn=None, activation_fn=None, scope='conv2')
 44 |         # residual = slim.conv2d(residual, depth, [1, 1], stride=1, normalizer_fn=None, activation_fn=None, scope='conv3')
 45 | 
 46 |         output = shortcut + residual
 47 | 
 48 |         return slim.utils.collect_named_outputs(outputs_collections, sc.name, output)
 49 | 
 50 | 
 51 | def resnet_v2_m(inputs,
 52 |               blocks,
 53 |               num_classes=None,
 54 |               is_training=True,
 55 |               return_raw=True,
 56 |               global_pool=True,
 57 |               output_stride=None,
 58 |               include_root_block=True,
 59 |               spatial_squeeze=True,
 60 |               reuse=None,
 61 |               scope=None):
 62 |     with tf.variable_scope(scope, 'resnet_v2', [inputs], reuse=reuse) as sc:
 63 |         end_points_collection = sc.original_name_scope + '_end_points'
 64 |         with slim.arg_scope([slim.conv2d, bottleneck, utils.stack_blocks_dense], outputs_collections=end_points_collection):
 65 |             with slim.arg_scope([slim.batch_norm], is_training=is_training):
 66 |                 net = inputs
 67 |                 if include_root_block:
 68 |                     if output_stride is not None:
 69 |                         if output_stride % 4 != 0:
 70 |                             raise ValueError('The output_stride needs to be a multiple of 4.')
 71 |                         output_stride /= 4
 72 |                     with slim.arg_scope([slim.conv2d], activation_fn=None, normalizer_fn=None):
 73 |                         net = utils.conv2d_same(net, 64, 3, stride=1, scope='conv1')
 74 |                     # net = slim.max_pool2d(net, [3, 3], stride=2, scope='pool1')
 75 |                 net = utils.stack_blocks_dense(net, blocks, output_stride)
 76 |                 end_points = slim.utils.convert_collection_to_dict(end_points_collection)
 77 |                 if return_raw:
 78 |                     return net, end_points
 79 |                 net = slim.batch_norm(net, activation_fn=tf.nn.relu, scope='postnorm')
 80 |                 end_points[sc.name + '/postnorm'] = net
 81 | 
 82 |                 if global_pool:
 83 |                     net = tf.reduce_mean(net, [1, 2], name='pool5', keep_dims=True)
 84 |                     end_points['global_pool'] = net
 85 | 
 86 |                 if num_classes:
 87 |                     net = slim.conv2d(net, num_classes, [1, 1], activation_fn=None, normalizer_fn=None, scope='logits')
 88 |                     end_points[sc.name + '/logits'] = net
 89 |                     if spatial_squeeze:
 90 |                         net = tf.squeeze(net, [1, 2], name='SpatialSqueeze')
 91 |                         end_points[sc.name + '/spatial_squeeze'] = net
 92 |                     end_points['predictions'] = slim.softmax(net, scope='predictions')
 93 |                 return net, end_points
 94 | resnet_v2_m.default_image_size = 224
 95 | 
 96 | 
 97 | def resnet_v2_bottleneck(scope, base_depth, num_units, stride):
 98 |     return utils.Block(scope, bottleneck, [{
 99 |         'depth': base_depth * 4,
100 |         'depth_bottleneck': base_depth,
101 |         'stride': stride
102 |     }] + (num_units - 1) * [{
103 |         'depth': base_depth * 4,
104 |         'depth_bottleneck': base_depth,
105 |         'stride': 1
106 |     }])
107 | resnet_v2_m.default_image_size = 224
108 | 
109 | 
110 | def resnet_v2_block(scope, base_depth, num_units, stride):
111 |     return utils.Block(scope, block, [{
112 |         'depth': base_depth * 4,
113 |         'stride': stride
114 |     }] + (num_units - 1) * [{
115 |         'depth': base_depth * 4,
116 |         'stride': 1
117 |     }])
118 | resnet_v2_m.default_image_size = 224
119 | 
120 | 
121 | def resnet_v2_m_50(inputs,
122 |                  num_classes=None,
123 |                  is_training=True,
124 |                  return_raw=True,
125 |                  global_pool=True,
126 |                  output_stride=None,
127 |                  spatial_squeeze=True,
128 |                  reuse=None,
129 |                  scope='resnet_v2_50'):
130 |     """ResNet-50 model of [1]. See resnet_v2() for arg and return description."""
131 |     blocks = [
132 |         resnet_v2_block('block1', base_depth=16, num_units=3, stride=2),
133 |         resnet_v2_block('block2', base_depth=32, num_units=4, stride=2),
134 |         resnet_v2_block('block3', base_depth=64, num_units=14, stride=2),
135 |         resnet_v2_block('block4', base_depth=128, num_units=3, stride=2),
136 |     ]
137 |     return resnet_v2_m(inputs, blocks, num_classes, is_training=is_training, return_raw=return_raw, global_pool=global_pool, output_stride=output_stride, include_root_block=True, spatial_squeeze=spatial_squeeze, reuse=reuse, scope=scope)
138 | resnet_v2_m_50.default_image_size = resnet_v2_m.default_image_size
139 | 
140 | 
141 | def resnet_v2_m_101(inputs,
142 |                   num_classes=None,
143 |                   is_training=True,
144 |                   return_raw=True,
145 |                   global_pool=True,
146 |                   output_stride=None,
147 |                   spatial_squeeze=True,
148 |                   reuse=None,
149 |                   scope='resnet_v2_101'):
150 |     """ResNet-101 model of [1]. See resnet_v2() for arg and return description."""
151 |     blocks = [
152 |         resnet_v2_bottleneck('block1', base_depth=64, num_units=3, stride=2),
153 |         resnet_v2_bottleneck('block2', base_depth=128, num_units=4, stride=2),
154 |         resnet_v2_bottleneck('block3', base_depth=256, num_units=23, stride=2),
155 |         resnet_v2_bottleneck('block4', base_depth=512, num_units=3, stride=2),
156 |     ]
157 |     return resnet_v2_m(inputs, blocks, num_classes, is_training=is_training, return_raw=return_raw, global_pool=global_pool, output_stride=output_stride, include_root_block=True, spatial_squeeze=spatial_squeeze, reuse=reuse, scope=scope)
158 | resnet_v2_m_101.default_image_size = resnet_v2_m.default_image_size
159 | 
160 | 
161 | def resnet_v2_m_152(inputs,
162 |                   num_classes=None,
163 |                   is_training=True,
164 |                   return_raw=True,
165 |                   global_pool=True,
166 |                   output_stride=None,
167 |                   spatial_squeeze=True,
168 |                   reuse=None,
169 |                   scope='resnet_v2_152'):
170 |     """ResNet-152 model of [1]. See resnet_v2() for arg and return description."""
171 |     blocks = [
172 |         resnet_v2_bottleneck('block1', base_depth=64, num_units=3, stride=2),
173 |         resnet_v2_bottleneck('block2', base_depth=128, num_units=8, stride=2),
174 |         resnet_v2_bottleneck('block3', base_depth=256, num_units=36, stride=2),
175 |         resnet_v2_bottleneck('block4', base_depth=512, num_units=3, stride=2),
176 |     ]
177 |     return resnet_v2_m(inputs, blocks, num_classes, is_training=is_training, return_raw=return_raw, global_pool=global_pool, output_stride=output_stride, include_root_block=True, spatial_squeeze=spatial_squeeze, reuse=reuse, scope=scope)
178 | resnet_v2_m_152.default_image_size = resnet_v2_m.default_image_size
179 | 
180 | 
181 | def resnet_v2_m_200(inputs,
182 |                   num_classes=None,
183 |                   is_training=True,
184 |                   return_raw=True,
185 |                   global_pool=True,
186 |                   output_stride=None,
187 |                   spatial_squeeze=True,
188 |                   reuse=None,
189 |                   scope='resnet_v2_200'):
190 |     """ResNet-200 model of [2]. See resnet_v2() for arg and return description."""
191 |     blocks = [
192 |         resnet_v2_bottleneck('block1', base_depth=64, num_units=3, stride=2),
193 |         resnet_v2_bottleneck('block2', base_depth=128, num_units=24, stride=2),
194 |         resnet_v2_bottleneck('block3', base_depth=256, num_units=36, stride=2),
195 |         resnet_v2_bottleneck('block4', base_depth=512, num_units=3, stride=2),
196 |     ]
197 |     return resnet_v2_m(inputs, blocks, num_classes, is_training=is_training, return_raw=return_raw, global_pool=global_pool, output_stride=output_stride, include_root_block=True, spatial_squeeze=spatial_squeeze, reuse=reuse, scope=scope)
198 | resnet_v2_m_200.default_image_size = resnet_v2_m.default_image_size
199 | 


--------------------------------------------------------------------------------
/train_softmax.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import time
  3 | import pickle
  4 | import argparse
  5 | import numpy as np
  6 | 
  7 | import io
  8 | import yaml
  9 | from scipy import misc
 10 | 
 11 | import tensorflow as tf
 12 | import tensorflow.contrib.slim as slim
 13 | 
 14 | from datetime import datetime
 15 | 
 16 | from losses.logit_loss import get_logits
 17 | from data.classificationDataTool import ClassificationImageData
 18 | from model import get_embd
 19 | from utils import average_gradients, check_folders, analyze_vars
 20 | from evaluate import load_bin, evaluate
 21 | 
 22 | 
 23 | def parse_args():
 24 |     parser = argparse.ArgumentParser()
 25 | 
 26 |     parser.add_argument('--config_path', type=str, help='path to config file', default='./configs/config_ms1m_100.yaml')
 27 | 
 28 |     return parser.parse_args()
 29 | 
 30 | 
 31 | def inference(images, labels, is_training_dropout, is_training_bn, config):
 32 |     embds, end_points = get_embd(images, is_training_dropout, is_training_bn, config)
 33 |     logits = get_logits(embds, labels, config)
 34 |     end_points['logits'] = logits
 35 |     return embds, logits, end_points
 36 | 
 37 | 
 38 | class Trainer:
 39 |     def __init__(self, config):
 40 |         self.config = config
 41 |         subdir = datetime.strftime(datetime.now(), '%Y%m%d-%H%M%S')
 42 |         self.output_dir = os.path.join(config['output_dir'], subdir)
 43 |         self.model_dir = os.path.join(self.output_dir, 'models')
 44 |         self.log_dir = os.path.join(self.output_dir, 'log')
 45 |         self.checkpoint_dir = os.path.join(self.output_dir, 'checkpoints')
 46 |         self.debug_dir = os.path.join(self.output_dir, 'debug')
 47 |         check_folders([self.output_dir, self.model_dir, self.log_dir, self.checkpoint_dir, self.debug_dir])
 48 |         self.val_log = os.path.join(self.output_dir, 'val_log.txt')
 49 | 
 50 |         self.batch_size = config['batch_size']
 51 |         self.gpu_num = config['gpu_num']
 52 |         if self.batch_size % self.gpu_num != 0:
 53 |             raise ValueError('batch_size must be a multiple of gpu_num')
 54 |         self.image_size = config['image_size']
 55 |         self.epoch_num = config['epoch_num']
 56 |         self.step_per_epoch = config['step_per_epoch']
 57 |         self.val_freq = config['val_freq']
 58 |         self.val_data = config['val_data']
 59 |         self.val_bn_train = config['val_bn_train']
 60 |         # for k, v in config['val_data'].items():
 61 |         #     self.val_data[k] = load_bin(v, self.image_size)
 62 |         #     imgs = self.val_data[k][0]
 63 |         #     np.save(os.path.join(self.debug_dir, k+'.npy'), imgs[:100])
 64 | 
 65 |         with open(os.path.join(self.output_dir, 'config.yaml'), 'w') as f:
 66 |             f.write(yaml.dump(self.config))
 67 | 
 68 | 
 69 |     def build(self):
 70 |         self.train_phase_dropout = tf.placeholder(dtype=tf.bool, shape=None, name='train_phase_dropout')
 71 |         self.train_phase_bn = tf.placeholder(dtype=tf.bool, shape=None, name='train_phase_bn')
 72 |         self.global_step = tf.Variable(name='global_step', initial_value=0, trainable=False)
 73 |         self.inc_op = tf.assign_add(self.global_step, 1, name='increment_global_step')
 74 |         scale = int(512.0/self.batch_size)
 75 |         lr_steps = [scale*s for s in self.config['lr_steps']]
 76 |         lr_values = [v/scale for v in self.config['lr_values']]
 77 |         # lr_steps = self.config['lr_steps']
 78 |         self.lr = tf.train.piecewise_constant(self.global_step, boundaries=lr_steps, values=lr_values, name='lr_schedule')
 79 | 
 80 |         cid = ClassificationImageData(img_size=self.image_size, augment_flag=self.config['augment_flag'], augment_margin=self.config['augment_margin'])
 81 |         train_dataset = cid.read_TFRecord(self.config['train_data']).shuffle(10000).repeat().batch(self.batch_size)
 82 |         train_iterator = train_dataset.make_one_shot_iterator()
 83 |         self.train_images, self.train_labels = train_iterator.get_next()
 84 |         self.train_images = tf.identity(self.train_images, 'input_images')
 85 |         self.train_labels = tf.identity(self.train_labels, 'labels')
 86 |         if self.gpu_num <= 1:
 87 |             self.embds, self.logits, self.end_points = inference(self.train_images, self.train_labels, self.train_phase_dropout, self.train_phase_bn, self.config)
 88 |             self.embds = tf.identity(self.embds, 'embeddings')
 89 |             self.inference_loss = slim.losses.sparse_softmax_cross_entropy(logits=self.logits, labels=self.train_labels)
 90 |             self.wd_loss = tf.add_n(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))
 91 |             self.train_loss = self.inference_loss+self.wd_loss
 92 |             pred = tf.arg_max(tf.nn.softmax(self.logits), dimension=-1, output_type=tf.int64)
 93 |             self.train_acc = tf.reduce_mean(tf.cast(tf.equal(pred, self.train_labels), tf.float32))
 94 |             update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
 95 |             with tf.control_dependencies(update_ops):
 96 |                 self.train_op = tf.train.MomentumOptimizer(learning_rate=self.lr, momentum=self.config['momentum']).minimize(self.train_loss)
 97 |         else:
 98 |             self.embds = []
 99 |             self.logits = []
100 |             self.inference_loss = []
101 |             self.wd_loss = []
102 |             self.train_loss = []
103 |             pred = []
104 |             tower_grads = []
105 |             update_ops = []
106 |             opt = tf.train.MomentumOptimizer(learning_rate=self.lr, momentum=self.config['momentum'])
107 |             train_images = tf.split(self.train_images, self.gpu_num)
108 |             train_labels = tf.split(self.train_labels, self.gpu_num)
109 |             for i in range(self.gpu_num):
110 |                 sub_train_images = train_images[i]
111 |                 sub_train_labels = train_labels[i]
112 |                 with tf.device('/gpu:%d' % i):
113 |                     with tf.variable_scope(tf.get_variable_scope(), reuse=(i > 0)):
114 |                         embds, logits, end_points = inference(sub_train_images, sub_train_labels, self.train_phase_dropout, self.train_phase_bn, self.config)
115 |                         inference_loss = slim.losses.sparse_softmax_cross_entropy(logits=logits, labels=sub_train_labels)
116 |                         wd_loss = tf.add_n(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))
117 |                         train_loss = inference_loss+wd_loss
118 |                         pred.append(tf.arg_max(tf.nn.softmax(logits), dimension=-1, output_type=tf.int64))
119 |                         tower_grads.append(opt.compute_gradients(train_loss))
120 |                         update_ops.append(tf.get_collection(tf.GraphKeys.UPDATE_OPS))
121 |                         self.embds.append(embds)
122 |                         self.logits.append(logits)
123 |                         self.inference_loss.append(inference_loss)
124 |                         self.wd_loss.append(wd_loss)
125 |                         self.train_loss.append(train_loss)
126 |             self.embds = tf.concat(self.embds, axis=0)
127 |             self.logits = tf.concat(self.logits, axis=0)
128 |             self.inference_loss = tf.add_n(self.inference_loss)/self.gpu_num
129 |             self.wd_loss = tf.add_n(self.wd_loss)/self.gpu_num
130 |             self.train_loss = tf.add_n(self.train_loss)/self.gpu_num
131 |             pred = tf.concat(pred, axis=0)
132 |             self.train_acc = tf.reduce_mean(tf.cast(tf.equal(pred, self.train_labels), tf.float32))
133 |             train_ops = [opt.apply_gradients(average_gradients(tower_grads))]
134 |             train_ops.extend(update_ops)
135 |             self.train_op = tf.group(*train_ops)
136 | 
137 | 
138 |         self.train_summary = tf.summary.merge([
139 |             tf.summary.scalar('inference_loss', self.inference_loss),
140 |             tf.summary.scalar('wd_loss', self.wd_loss),
141 |             tf.summary.scalar('train_loss', self.train_loss),
142 |             tf.summary.scalar('train_acc', self.train_acc)
143 |         ])
144 | 
145 |     def run_embds(self, sess, images):
146 |         batch_num = len(images)//self.batch_size
147 |         left = len(images)%self.batch_size
148 |         embds = []
149 |         for i in range(batch_num):
150 |             cur_embd = sess.run(self.embds, feed_dict={self.train_images: images[i*self.batch_size: (i+1)*self.batch_size], self.train_phase_dropout: False, self.train_phase_bn: self.val_bn_train})
151 |             embds += list(cur_embd)
152 |         if left > 0:
153 |             image_batch = np.zeros([self.batch_size, self.image_size, self.image_size, 3])
154 |             image_batch[:left, :, :, :] = images[-left:]
155 |             cur_embd = sess.run(self.embds, feed_dict={self.train_images: image_batch, self.train_phase_dropout: False, self.train_phase_bn: self.val_bn_train})
156 |             embds += list(cur_embd)[:left]
157 |         return np.array(embds)
158 | 
159 |     def save_image_label(self, images, labels, step):
160 |         save_dir = os.path.join(self.debug_dir, 'image_by_label')
161 |         for i in range(len(labels)):
162 |             if(labels[i] < 10):
163 |                 cur_save_dir = os.path.join(save_dir, str(labels[i]))
164 |                 check_folders(cur_save_dir)
165 |                 misc.imsave(os.path.join(cur_save_dir, '%d_%d.jpg' % (step, i)), images[i])
166 | 
167 | 
168 |     def train(self):
169 |         self.build()
170 |         analyze_vars(tf.trainable_variables(), os.path.join(self.output_dir, 'model_vars.txt'))
171 |         with open(os.path.join(self.output_dir, 'regularizers.txt'), 'w') as f:
172 |             for v in tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES):
173 |                 f.write(v.name+'\n')
174 |         # exit(-1)
175 |         tf_config = tf.ConfigProto(allow_soft_placement=True)
176 |         tf_config.gpu_options.allow_growth = True
177 |         with tf.Session(config=tf_config) as sess:
178 |             tf.global_variables_initializer().run()
179 |             saver_ckpt = tf.train.Saver()
180 |             saver_best = tf.train.Saver()
181 |             summary_writer = tf.summary.FileWriter(self.log_dir, sess.graph)
182 |             start_time = time.time()
183 |             best_acc = 0
184 |             counter = 0
185 |             if config['pretrained_model'] != '':
186 |                 saver_ckpt.restore(sess, config['pretrained_model'])
187 |                 step = int(os.path.basename(config['pretrained_model']).split('.')[0].split('-')[-1])
188 |                 sess.run(tf.assign(self.global_step, step))
189 |                 counter = self.global_step.eval(sess)
190 |                 print('start step: %d' % counter)
191 |             debug = True
192 |             for i in range(self.epoch_num):
193 |                 for j in range(self.step_per_epoch):
194 |                     _, l, l_wd, l_inf, acc, s, _ = sess.run([self.train_op, self.train_loss, self.wd_loss, self.inference_loss, self.train_acc, self.train_summary, self.inc_op], feed_dict={self.train_phase_dropout: True, self.train_phase_bn: True})
195 |                     counter += 1
196 | 
197 |                     # debug
198 |                     # self.save_image_label(train_img, train_lbl, counter)
199 |                     # if(debug):
200 |                     #     if(len(train_imgs) < 100):
201 |                     #         train_imgs.append(train_img[0])
202 |                     #     else:
203 |                     #         np.save(os.path.join(self.debug_dir, 'train_imgs.npy'), np.array(train_imgs))
204 |                     #         debug=False
205 |                     
206 |                     print("Epoch: [%2d/%2d] [%6d/%6d] time: %.2f, loss: %.3f (inference: %.3f, wd: %.3f), acc: %.3f" % (i, self.epoch_num, j, self.step_per_epoch, time.time() - start_time, l, l_inf, l_wd, acc))
207 |                     start_time = time.time()
208 |                     if counter % self.val_freq == 0:
209 |                         saver_ckpt.save(sess, os.path.join(self.checkpoint_dir, 'ckpt-m'), global_step=counter)
210 |                         acc = []
211 |                         with open(self.val_log, 'a') as f:
212 |                             f.write('step: %d\n' % counter)
213 |                             for k, v in self.val_data.items():
214 |                                 imgs, imgs_f, issame = load_bin(v, self.image_size)
215 |                                 embds = self.run_embds(sess, imgs)
216 |                                 embds_f = self.run_embds(sess, imgs_f)
217 |                                 embds = embds/np.linalg.norm(embds, axis=1, keepdims=True)+embds_f/np.linalg.norm(embds_f, axis=1, keepdims=True)
218 |                                 tpr, fpr, acc_mean, acc_std, tar, tar_std, far = evaluate(embds, issame, far_target=1e-3, distance_metric=0)
219 |                                 f.write('eval on %s: acc--%1.5f+-%1.5f, tar--%1.5f+-%1.5f@far=%1.5f\n' % (k, acc_mean, acc_std, tar, tar_std, far))
220 |                                 acc.append(acc_mean)
221 |                             acc = np.mean(np.array(acc))
222 |                             if acc > best_acc:
223 |                                 saver_best.save(sess, os.path.join(self.model_dir, 'best-m'), global_step=counter)
224 |                                 best_acc = acc
225 | 
226 |                         
227 | if __name__ == '__main__':
228 |     args = parse_args()
229 |     config = yaml.load(open(args.config_path))
230 |     trainer = Trainer(config)
231 |     trainer.train()
232 | 
233 | 
234 |                     


--------------------------------------------------------------------------------
/finetune_softmax.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import time
  3 | import pickle
  4 | import argparse
  5 | import numpy as np
  6 | 
  7 | import io
  8 | import yaml
  9 | from scipy import misc
 10 | 
 11 | import tensorflow as tf
 12 | import tensorflow.contrib.slim as slim
 13 | 
 14 | from datetime import datetime
 15 | 
 16 | from losses.logit_loss import get_logits
 17 | from data.classificationDataTool import ClassificationImageData
 18 | from model import get_embd
 19 | from utils import average_gradients, check_folders, analyze_vars
 20 | from evaluate import load_bin, evaluate
 21 | 
 22 | 
 23 | def parse_args():
 24 |     parser = argparse.ArgumentParser()
 25 | 
 26 |     parser.add_argument('--config_path', type=str, help='path to config file', default='./configs/config_finetune.yaml')
 27 | 
 28 |     return parser.parse_args()
 29 | 
 30 | 
 31 | def inference(images, labels, is_training_dropout, is_training_bn, config):
 32 |     embds, end_points = get_embd(images, is_training_dropout, is_training_bn, config)
 33 |     logits = get_logits(embds, labels, config)
 34 |     end_points['logits'] = logits
 35 |     return embds, logits, end_points
 36 | 
 37 | 
 38 | class Trainer:
 39 |     def __init__(self, config):
 40 |         self.config = config
 41 |         subdir = datetime.strftime(datetime.now(), '%Y%m%d-%H%M%S')
 42 |         self.output_dir = os.path.join(config['output_dir'], subdir)
 43 |         self.model_dir = os.path.join(self.output_dir, 'models')
 44 |         self.log_dir = os.path.join(self.output_dir, 'log')
 45 |         self.checkpoint_dir = os.path.join(self.output_dir, 'checkpoints')
 46 |         self.debug_dir = os.path.join(self.output_dir, 'debug')
 47 |         check_folders([self.output_dir, self.model_dir, self.log_dir, self.checkpoint_dir, self.debug_dir])
 48 |         self.val_log = os.path.join(self.output_dir, 'val_log.txt')
 49 | 
 50 |         self.batch_size = config['batch_size']
 51 |         self.gpu_num = config['gpu_num']
 52 |         if self.batch_size % self.gpu_num != 0:
 53 |             raise ValueError('batch_size must be a multiple of gpu_num')
 54 |         self.image_size = config['image_size']
 55 |         self.epoch_num = config['epoch_num']
 56 |         self.step_per_epoch = config['step_per_epoch']
 57 |         self.val_freq = config['val_freq']
 58 |         self.val_data = config['val_data']
 59 |         self.val_bn_train = config['val_bn_train']
 60 |         # for k, v in config['val_data'].items():
 61 |         #     self.val_data[k] = load_bin(v, self.image_size)
 62 |         #     imgs = self.val_data[k][0]
 63 |         #     np.save(os.path.join(self.debug_dir, k+'.npy'), imgs[:100])
 64 | 
 65 |         with open(os.path.join(self.output_dir, 'config.yaml'), 'w') as f:
 66 |             f.write(yaml.dump(self.config))
 67 | 
 68 | 
 69 |     def build(self):
 70 |         self.train_phase_dropout = tf.placeholder(dtype=tf.bool, shape=None, name='train_phase_dropout')
 71 |         self.train_phase_bn = tf.placeholder(dtype=tf.bool, shape=None, name='train_phase_bn')
 72 |         self.global_step = tf.Variable(name='global_step', initial_value=0, trainable=False)
 73 |         self.inc_op = tf.assign_add(self.global_step, 1, name='increment_global_step')
 74 |         scale = int(512.0/self.batch_size)
 75 |         lr_steps = [scale*s for s in self.config['lr_steps']]
 76 |         lr_values = [v/scale for v in self.config['lr_values']]
 77 |         # lr_steps = self.config['lr_steps']
 78 |         self.lr = tf.train.piecewise_constant(self.global_step, boundaries=lr_steps, values=lr_values, name='lr_schedule')
 79 | 
 80 |         cid = ClassificationImageData(img_size=self.image_size, augment_flag=self.config['augment_flag'], augment_margin=self.config['augment_margin'])
 81 |         train_dataset = cid.read_TFRecord(self.config['train_data']).shuffle(10000).repeat().batch(self.batch_size)
 82 |         train_iterator = train_dataset.make_one_shot_iterator()
 83 |         self.train_images, self.train_labels = train_iterator.get_next()
 84 |         self.train_images = tf.identity(self.train_images, 'input_images')
 85 |         self.train_labels = tf.identity(self.train_labels, 'labels')
 86 |         if self.gpu_num <= 1:
 87 |             self.embds, self.logits, self.end_points = inference(self.train_images, self.train_labels, self.train_phase_dropout, self.train_phase_bn, self.config)
 88 |             self.embds = tf.identity(self.embds, 'embeddings')
 89 |             self.inference_loss = slim.losses.sparse_softmax_cross_entropy(logits=self.logits, labels=self.train_labels)
 90 |             self.wd_loss = tf.add_n(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))
 91 |             self.train_loss = self.inference_loss+self.wd_loss
 92 |             pred = tf.arg_max(tf.nn.softmax(self.logits), dimension=-1, output_type=tf.int64)
 93 |             self.train_acc = tf.reduce_mean(tf.cast(tf.equal(pred, self.train_labels), tf.float32))
 94 |             update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
 95 |             vars_softmax = [v for v in tf.trainable_variables() if 'embd_extractor' not in v.name]
 96 |             with tf.control_dependencies(update_ops):
 97 |                 self.train_op = tf.train.MomentumOptimizer(learning_rate=self.lr, momentum=self.config['momentum']).minimize(self.train_loss)
 98 |                 self.train_op_softmax = tf.train.MomentumOptimizer(learning_rate=self.lr, momentum=self.config['momentum']).minimize(self.train_loss, var_list=vars_softmax)
 99 |         else:
100 |             self.embds = []
101 |             self.logits = []
102 |             self.inference_loss = []
103 |             self.wd_loss = []
104 |             self.train_loss = []
105 |             pred = []
106 |             tower_grads = []
107 |             tower_grads_softmax = []
108 |             update_ops = []
109 |             opt = tf.train.MomentumOptimizer(learning_rate=self.lr, momentum=self.config['momentum'])
110 |             train_images = tf.split(self.train_images, self.gpu_num)
111 |             train_labels = tf.split(self.train_labels, self.gpu_num)
112 |             for i in range(self.gpu_num):
113 |                 sub_train_images = train_images[i]
114 |                 sub_train_labels = train_labels[i]
115 |                 with tf.device('/gpu:%d' % i):
116 |                     with tf.variable_scope(tf.get_variable_scope(), reuse=(i > 0)):
117 |                         embds, logits, end_points = inference(sub_train_images, sub_train_labels, self.train_phase_dropout, self.train_phase_bn, self.config)
118 |                         inference_loss = slim.losses.sparse_softmax_cross_entropy(logits=logits, labels=sub_train_labels)
119 |                         wd_loss = tf.add_n(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))
120 |                         train_loss = inference_loss+wd_loss
121 |                         pred.append(tf.arg_max(tf.nn.softmax(logits), dimension=-1, output_type=tf.int64))
122 |                         vars_softmax = [v for v in tf.trainable_variables() if 'embd_extractor' not in v.name]
123 |                         tower_grads.append(opt.compute_gradients(train_loss))
124 |                         tower_grads_softmax.append(opt.compute_gradients(train_loss, var_list=vars_softmax))
125 |                         update_ops.append(tf.get_collection(tf.GraphKeys.UPDATE_OPS))
126 |                         self.embds.append(embds)
127 |                         self.logits.append(logits)
128 |                         self.inference_loss.append(inference_loss)
129 |                         self.wd_loss.append(wd_loss)
130 |                         self.train_loss.append(train_loss)
131 |             self.embds = tf.concat(self.embds, axis=0)
132 |             self.logits = tf.concat(self.logits, axis=0)
133 |             self.inference_loss = tf.add_n(self.inference_loss)/self.gpu_num
134 |             self.wd_loss = tf.add_n(self.wd_loss)/self.gpu_num
135 |             self.train_loss = tf.add_n(self.train_loss)/self.gpu_num
136 |             pred = tf.concat(pred, axis=0)
137 |             self.train_acc = tf.reduce_mean(tf.cast(tf.equal(pred, self.train_labels), tf.float32))
138 |             train_ops = [opt.apply_gradients(average_gradients(tower_grads))]
139 |             train_ops_softmax = [opt.apply_gradients(average_gradients(tower_grads_softmax))]
140 |             train_ops.extend(update_ops)
141 |             train_ops_softmax.extend(update_ops)
142 |             self.train_op = tf.group(*train_ops)
143 |             self.train_op_softmax = tf.group(*train_ops_softmax)
144 | 
145 | 
146 |         self.train_summary = tf.summary.merge([
147 |             tf.summary.scalar('inference_loss', self.inference_loss),
148 |             tf.summary.scalar('wd_loss', self.wd_loss),
149 |             tf.summary.scalar('train_loss', self.train_loss),
150 |             tf.summary.scalar('train_acc', self.train_acc)
151 |         ])
152 | 
153 |     def run_embds(self, sess, images):
154 |         batch_num = len(images)//self.batch_size
155 |         left = len(images)%self.batch_size
156 |         embds = []
157 |         for i in range(batch_num):
158 |             cur_embd = sess.run(self.embds, feed_dict={self.train_images: images[i*self.batch_size: (i+1)*self.batch_size], self.train_phase_dropout: False, self.train_phase_bn: self.val_bn_train})
159 |             embds += list(cur_embd)
160 |         if left > 0:
161 |             image_batch = np.zeros([self.batch_size, self.image_size, self.image_size, 3])
162 |             image_batch[:left, :, :, :] = images[-left:]
163 |             cur_embd = sess.run(self.embds, feed_dict={self.train_images: image_batch, self.train_phase_dropout: False, self.train_phase_bn: self.val_bn_train})
164 |             embds += list(cur_embd)[:left]
165 |         return np.array(embds)
166 | 
167 |     def save_image_label(self, images, labels, step):
168 |         save_dir = os.path.join(self.debug_dir, 'image_by_label')
169 |         for i in range(len(labels)):
170 |             if(labels[i] < 10):
171 |                 cur_save_dir = os.path.join(save_dir, str(labels[i]))
172 |                 check_folders(cur_save_dir)
173 |                 misc.imsave(os.path.join(cur_save_dir, '%d_%d.jpg' % (step, i)), images[i])
174 | 
175 | 
176 |     def train(self):
177 |         self.build()
178 |         analyze_vars(tf.trainable_variables(), os.path.join(self.output_dir, 'model_vars.txt'))
179 |         with open(os.path.join(self.output_dir, 'regularizers.txt'), 'w') as f:
180 |             for v in tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES):
181 |                 f.write(v.name+'\n')
182 |         # exit(-1)
183 |         tf_config = tf.ConfigProto(allow_soft_placement=True)
184 |         tf_config.gpu_options.allow_growth = True
185 |         with tf.Session(config=tf_config) as sess:
186 |             tf.global_variables_initializer().run()
187 |             saver_ckpt = tf.train.Saver()
188 |             saver_best = tf.train.Saver()
189 |             saver_embd = tf.train.Saver(var_list=[v for v in tf.trainable_variables() if 'embd_extractor' in v.name])
190 |             if config['pretrained_model'] != '':
191 |                 saver_embd.restore(sess, config['pretrained_model'])
192 |             summary_writer = tf.summary.FileWriter(self.log_dir, sess.graph)
193 |             start_time = time.time()
194 |             best_acc = 0
195 |             counter = 0
196 |             debug = True
197 |             for i in range(self.epoch_num):
198 |                 if i < config['fixed_epoch_num']:
199 |                     cur_train_op = self.train_op_softmax
200 |                 else:
201 |                     cur_train_op = self.train_op
202 |                 for j in range(self.step_per_epoch):
203 |                     _, l, l_wd, l_inf, acc, s, _ = sess.run([cur_train_op, self.train_loss, self.wd_loss, self.inference_loss, self.train_acc, self.train_summary, self.inc_op], feed_dict={self.train_phase_dropout: True, self.train_phase_bn: True})
204 |                     counter += 1
205 |                     
206 |                     print("Epoch: [%2d/%2d] [%6d/%6d] time: %.2f, loss: %.3f (inference: %.3f, wd: %.3f), acc: %.3f" % (i, self.epoch_num, j, self.step_per_epoch, time.time() - start_time, l, l_inf, l_wd, acc))
207 |                     start_time = time.time()
208 |                     if counter % self.val_freq == 0:
209 |                         saver_ckpt.save(sess, os.path.join(self.checkpoint_dir, 'ckpt-m'), global_step=counter)
210 |                         acc = []
211 |                         with open(self.val_log, 'a') as f:
212 |                             f.write('step: %d\n' % counter)
213 |                             for k, v in self.val_data.items():
214 |                                 imgs, imgs_f, issame = load_bin(v, self.image_size)
215 |                                 embds = self.run_embds(sess, imgs)
216 |                                 embds_f = self.run_embds(sess, imgs_f)
217 |                                 embds = embds/np.linalg.norm(embds, axis=1, keepdims=True)+embds_f/np.linalg.norm(embds_f, axis=1, keepdims=True)
218 |                                 tpr, fpr, acc_mean, acc_std, tar, tar_std, far = evaluate(embds, issame, far_target=1e-3, distance_metric=0)
219 |                                 f.write('eval on %s: acc--%1.5f+-%1.5f, tar--%1.5f+-%1.5f@far=%1.5f\n' % (k, acc_mean, acc_std, tar, tar_std, far))
220 |                                 acc.append(acc_mean)
221 |                             acc = np.mean(np.array(acc))
222 |                             if acc > best_acc:
223 |                                 saver_best.save(sess, os.path.join(self.model_dir, 'best-m'), global_step=counter)
224 |                                 best_acc = acc
225 | 
226 |                         
227 | if __name__ == '__main__':
228 |     args = parse_args()
229 |     config = yaml.load(open(args.config_path))
230 |     trainer = Trainer(config)
231 |     trainer.train()
232 | 
233 | 
234 |                     


--------------------------------------------------------------------------------