├── LICENSE ├── README.md ├── data ├── __init__.py ├── eval_data_reader.py └── mx2tfrecords.py ├── eval_ckpt_file.py ├── figures ├── lfw_250k.png ├── lfw_310k.png ├── lfw_730k.png └── model_a_170k.png ├── losses ├── __init__.py └── face_losses.py ├── nets ├── L_Resnet_E_IR.py ├── L_Resnet_E_IR_GBN.py ├── L_Resnet_E_IR_MGPU.py ├── L_Resnet_E_IR_RBN.py ├── L_Resnet_E_IR_fix_issue9.py ├── __init__.py ├── imagenet_classes.py ├── nets_utils.py ├── networks.py ├── readme.md ├── resnet.py ├── tl_layers_modify.py ├── vgg16.py └── vgg19.py ├── test ├── benchmark │ ├── README.md │ ├── __init__.py │ ├── gluon_batchsize_test.py │ ├── mxnet_batchsize_test.py │ ├── resnet_slim_benchmark.py │ ├── resnet_tl_benchmark.py │ ├── tensorlayer_batchsize_test.py │ ├── utils_final.py │ ├── vgg19_slim_benchmark.py │ └── vgg19_tl_benchmark.py ├── memory_usage_test.py ├── multiple_gpu_test │ ├── __init__.py │ ├── test_mgpu_mnist.py │ └── test_tensorlayer.py ├── resnet_test_static.py └── test_losses.py ├── train_nets.py ├── train_nets_mgpu.py ├── train_nets_mgpu_new.py └── verification.py /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Jiankang Deng and Jia Guo 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## Insight Face in TensorFlow 2 | 3 | #### Tasks 4 | * ~~mxnet dataset to tfrecords~~ 5 | * ~~backbone network architectures [vgg16, vgg19, resnet]~~ 6 | * ~~backbone network architectures [resnet-se, resnext]~~ 7 | * ~~LResNet50E-IR~~ 8 | * ~~LResNet100E-IR~~ 9 | * ~~Additive Angular Margin Loss~~ 10 | * ~~CosineFace Loss~~ 11 | * ~~train network code~~ 12 | * ~~add validate during training~~ 13 | * ~~multi-gpu training~~ 14 | * ~~combine losses~~ contributed by RogerLo. 15 | * evaluate code 16 | 17 | 18 | #### Training Tips(Continual updates) 19 | * If you can't use large batch size(>128), you should use small learning rate 20 | * If you can't use large batch size(>128), you can try batch renormalization(file `L_Resnet_E_IR_RBN.py`) 21 | * If use multiple gpus, you should keep at least 16 images each gpu. 22 | * Try [Group Normalization](https://arxiv.org/pdf/1803.08494.pdf), you can use the code `L_Resnet_E_IR_GBN.py` 23 | * Using the current model, and the lr schedule in `train_nets.py`, you can get the results as `model c` 24 | * The bug about model size is 1.6G have fixed based on issues #9. If you want to get a small model, you should use `L_Resnet_E_IR_fix_issues9.py` 25 | * multi-gpu training code's bug have fixed. If you want to use the correct version, you should use `train_nets_mgpu_new.py` 26 | 27 | 28 | #### Training models (Continual updates) 29 | 30 | ##### model A 31 | | model name | depth| normalization layer |batch size| total_steps | download | password | 32 | | ----- |:-----:|:-----:|:-----:|:-----:|:-----:|:-----:| 33 | | model A | 50 |group normalization|16| 1060k |[model a](https://pan.baidu.com/s/1qWrDCTFlQXlFcBR-dqR-6A)|2q72| 34 | 35 | ###### accuracy 36 | | dbname | accuracy | 37 | | ----- |:-----:| 38 | | lfw |0.9897| 39 | | cfp_ff |0.9876| 40 | | cfp_fp |0.84357| 41 | | age_db30 |0.914| 42 | 43 | 44 | ##### model B 45 | | model name | depth| normalization layer |batch size| total_steps| download | password | 46 | | ----- |:-----:|:-----:|:-----:|:-----:|:-----:|:-----:| 47 | | model B | 50 |batch normalization|16| 1100k |[model_b](https://pan.baidu.com/s/11KDqOkF4ThO7mnQQaNO9bA) |h6ai| 48 | 49 | ###### accuracy 50 | | dbname | accuracy | 51 | | ----- |:-----:| 52 | | lfw |0.9933| 53 | | cfp_ff |0.99357| 54 | | cfp_fp |0.8766| 55 | | age_db30 |0.9342| 56 | 57 | 58 | 59 | ##### model C 60 | | model name | depth| normalization layer |batch size| total_steps| download | password | 61 | | ----- |:-----:|:-----:|:-----:|:-----:|:-----:|:-----:| 62 | | model C | 50 |batch normalization|16| 1950k |[model_c](https://pan.baidu.com/s/1ZlDcQPBh0znduSH6vQ_Q8Q) |8mdi| 63 | 64 | ###### accuracy 65 | | dbname | accuracy | 66 | | ----- |:-----:| 67 | | lfw |0.9963| 68 | | cfp_ff |0.99586| 69 | | cfp_fp |0.9087| 70 | | age_db30 |0.96367| 71 | 72 | 73 | ##### model D 74 | | model name | depth| normalization layer |batch size| total_steps| model_size| download | password | 75 | | ----- |:-----:|:-----:|:-----:|:-----:|:-----:|:-----:|:-----:| 76 | | model D | 50 |batch normalization|136| 710k | 348.9MB |[model_d](https://pan.baidu.com/s/1tQYMqfbG36wg1cXKHVoMVw) |amdt| 77 | 78 | ###### accuracy 79 | | dbname | accuracy | 80 | | ----- |:-----:| 81 | | lfw |0.9968| 82 | | cfp_ff |0.9973| 83 | | cfp_fp |0.9271| 84 | | age_db30 |0.9725| 85 | 86 | 87 | 88 | #### Requirements 89 | 1. TensorFlow 1.4 1.6 90 | 2. TensorLayer 1.7 91 | 3. cuda8&cudnn6 or cuda9&cudnn7 92 | 4. Python3 93 | 94 | 95 | #### Max Batch Size Test 96 | ###### Environment 97 | 98 | | GPU | cuda| cudnn | TensorFlow |TensorLayer|Maxnet |Gluon| 99 | | ----- |:-----:|:-----:|:------:|:---:|:------:|:---:| 100 | | Titan xp | 9.0 |7.0|1.6|1.7 |1.1.0|1.1.0 | 101 | 102 | ###### Results 103 | 104 | | DL Tools | Max BatchSize(without bn and prelu)| Max BatchSize(with bn only) | Max BatchSize(with prelu only) |Max BatchSize(with bn and prelu)| 105 | | ------------- |:-------------:|:--------------:|:------------:|:------------:| 106 | | TensorLayer | (8000, 9000) |(5000, 6000)|(3000, 4000)|(2000, 3000) | 107 | | Mxnet | (40000, 50000) |(20000, 30000)|(20000, 30000)|(10000, 20000) | 108 | | Gluon | (7000, 8000) |(3000, 4000)|no official method| None | 109 | 110 | > (8000, 9000) : 8000 without OOM, 9000 OOM Error 111 | 112 | ###### Test Code 113 | 114 | |TensorLayer| Maxnet | Gluon | 115 | | ----- |:-----:|:-----:| 116 | | [tensorlayer_batchsize_test.py](https://github.com/auroua/InsightFace_TF/blob/master/test/benchmark/tensorlayer_batchsize_test.py) | [mxnet_batchsize_test.py](https://github.com/auroua/InsightFace_TF/blob/master/test/benchmark/mxnet_batchsize_test.py) |[gluon_batchsize_test.py](https://github.com/auroua/InsightFace_TF/blob/master/test/benchmark/gluon_batchsize_test.py)| 117 | 118 | 119 | 120 | #### pretrained model download link 121 | * [resnet_v1_50](http://download.tensorflow.org/models/resnet_v1_50_2016_08_28.tar.gz) 122 | * [resnet_v1_101](http://download.tensorflow.org/models/resnet_v1_101_2016_08_28.tar.gz) 123 | * [resnet_v1_152](http://download.tensorflow.org/models/resnet_v1_152_2016_08_28.tar.gz) 124 | * [vgg16](http://www.cs.toronto.edu/~frossard/post/vgg16/) 125 | * [vgg19](https://github.com/machrisaa/tensorflow-vgg) 126 | 127 | 128 | #### References 129 | 1. [InsightFace mxnet](https://github.com/deepinsight/insightface) 130 | 2. [InsightFace : Additive Angular Margin Loss for Deep Face Recognition](https://arxiv.org/abs/1801.07698) 131 | 3. [Group Normalization](https://arxiv.org/pdf/1803.08494.pdf) 132 | 3. [tensorlayer_vgg16](https://github.com/tensorlayer/tensorlayer/blob/master/example/tutorial_vgg16.py) 133 | 4. [tensorlayer_vgg19](https://github.com/tensorlayer/tensorlayer/blob/master/example/tutorial_vgg19.py) 134 | 5. [tf_slim](https://github.com/tensorflow/models/tree/master/research/slim) 135 | 6. [Deep Residual Learning for Image Recognition](https://arxiv.org/abs/1512.03385) 136 | 7. [Very Deep Convolutional Networks For Large-Scale Image Recognition](https://arxiv.org/abs/1409.1556) 137 | 8. [Squeeze-and-Excitation Networks](https://arxiv.org/pdf/1709.01507.pdf) -------------------------------------------------------------------------------- /data/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/auroua/InsightFace_TF/6ffe4296460bdfea56f91521db6d6412a89249d9/data/__init__.py -------------------------------------------------------------------------------- /data/eval_data_reader.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np 3 | import pickle 4 | import argparse 5 | import os 6 | import mxnet as mx 7 | import cv2 8 | import io 9 | import PIL.Image 10 | import mxnet.ndarray as nd 11 | 12 | 13 | def get_parser(): 14 | parser = argparse.ArgumentParser(description='evluation data parser') 15 | parser.add_argument('--eval_datasets', default=['lfw', 'cfp_ff', 'cfp_fp', 'agedb_30'], help='evluation datasets') 16 | # parser.add_argument('--eval_datasets', default=['cfp_fp'], help='evluation datasets') 17 | parser.add_argument('--eval_db_path', default='../datasets/faces_ms1m_112x112', help='evluate datasets base path') 18 | parser.add_argument('--image_size', default=[112, 112], help='the image size') 19 | parser.add_argument('--tfrecords_file_path', default='../datasets/tfrecords/eval', help='the image size') 20 | parser.add_argument('--db_base_path', default='../datasets/faces_ms1m_112x112', help='the image size') 21 | args = parser.parse_args() 22 | return args 23 | 24 | 25 | def load_bin(path, image_size): 26 | ''' 27 | :param path: the input file path 28 | :param image_size: the input image size 29 | :return: the returned datasets is opencv format BGR [112, 112, 3] 30 | ''' 31 | bins, issame_list = pickle.load(open(path, 'rb'), encoding='bytes') 32 | issame_list_int = list(map(int, issame_list)) 33 | data_list = [] 34 | for _ in [0, 1]: 35 | data = np.zeros(shape=[len(issame_list)*2, *image_size, 3]) 36 | data_list.append(data) 37 | for i in range(len(issame_list)*2): 38 | _bin = bins[i] 39 | tf_images = tf.image.decode_jpeg(_bin) 40 | tf_images = tf.reshape(tf_images, shape=(112, 112, 3)) 41 | sess = tf.Session() 42 | images = sess.run(tf_images) 43 | img_cv = cv2.cvtColor(images, cv2.COLOR_RGB2BGR) 44 | print(np.min(img_cv), np.max(img_cv), img_cv.dtype) 45 | cv2.imshow('test', img_cv) 46 | cv2.waitKey(0) 47 | for flip in [0,1]: 48 | if flip == 1: 49 | # print(i, flip) 50 | img_cv = np.fliplr(img_cv) 51 | # cv2.imshow('test', img_cv) 52 | # cv2.waitKey(0) 53 | data_list[flip][i][:] = img_cv 54 | i += 1 55 | if i % 1000 == 0: 56 | print('loading bin', i) 57 | print(data_list[0].shape) 58 | return data_list, issame_list 59 | 60 | 61 | def mx2tfrecords(imgidx, imgrec, args): 62 | output_path = os.path.join(args.tfrecords_file_path, 'tran.tfrecords') 63 | writer = tf.python_io.TFRecordWriter(output_path) 64 | for i in imgidx: 65 | img_info = imgrec.read_idx(i) 66 | header, img = mx.recordio.unpack(img_info) 67 | encoded_jpg_io = io.BytesIO(img) 68 | image = PIL.Image.open(encoded_jpg_io) 69 | np_img = np.array(image) 70 | img = cv2.cvtColor(np_img, cv2.COLOR_RGB2BGR) 71 | img_raw = img.tobytes() 72 | label = int(header.label) 73 | example = tf.train.Example(features=tf.train.Features(feature={ 74 | 'image_raw': tf.train.Feature(bytes_list=tf.train.BytesList(value=[img_raw])), 75 | "label": tf.train.Feature(int64_list=tf.train.Int64List(value=[label])) 76 | })) 77 | writer.write(example.SerializeToString()) # Serialize To String 78 | if i % 10000 == 0: 79 | print('%d num image processed' % i) 80 | writer.close() 81 | 82 | 83 | def mx2tfrecords_eval_data(args, db_name): 84 | ''' 85 | Change evaluation data to tfrecords 86 | :param args: 87 | :param type: lfw, ...... 88 | :return: 89 | ''' 90 | bins, issame_list = pickle.load(open(os.path.join(args.db_base_path, db_name+'.bin'), 'rb'), encoding='bytes') 91 | output_image_path = os.path.join(args.tfrecords_file_path, db_name+'_eval_data.tfrecords') 92 | writer_img = tf.python_io.TFRecordWriter(output_image_path) 93 | for i in range(len(bins)): 94 | img_info = bins[i] 95 | img = mx.image.imdecode(img_info).asnumpy() 96 | img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR) 97 | img_b = img.tobytes() 98 | # # decode test 99 | # sess = tf.Session() 100 | # img_2 = tf.decode_raw(img_b, out_type=tf.uint8) 101 | # img_2 = tf.reshape(img_2, shape=(112, 112, 3)) 102 | # img_2 = tf.image.flip_left_right(img_2) 103 | # img_2_np = sess.run(img_2) 104 | # print(img_2_np.shape) 105 | # cv2.imshow('test', img_2_np) 106 | # cv2.waitKey(0) 107 | example = tf.train.Example(features=tf.train.Features(feature={ 108 | 'image_raw': tf.train.Feature(bytes_list=tf.train.BytesList(value=[img_b])) 109 | })) 110 | writer_img.write(example.SerializeToString()) # Serialize To String 111 | if i % 1000 == 0: 112 | print('%d num image processed' % i) 113 | writer_img.close() 114 | 115 | 116 | def load_bin(db_name, image_size, args): 117 | bins, issame_list = pickle.load(open(os.path.join(args.eval_db_path, db_name+'.bin'), 'rb'), encoding='bytes') 118 | data_list = [] 119 | for _ in [0,1]: 120 | data = np.empty((len(issame_list)*2, image_size[0], image_size[1], 3)) 121 | data_list.append(data) 122 | for i in range(len(issame_list)*2): 123 | _bin = bins[i] 124 | img = mx.image.imdecode(_bin).asnumpy() 125 | img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR) 126 | for flip in [0,1]: 127 | if flip == 1: 128 | img = np.fliplr(img) 129 | data_list[flip][i, ...] = img 130 | i += 1 131 | if i % 1000 == 0: 132 | print('loading bin', i) 133 | print(data_list[0].shape) 134 | return data_list, issame_list 135 | 136 | 137 | if __name__ == '__main__': 138 | args = get_parser() 139 | ver_list = [] 140 | ver_name_list = [] 141 | for db in args.eval_datasets: 142 | print('begin db %s convert.' % db) 143 | # mx2tfrecords_eval_data(args, db) 144 | data_set = load_bin(db, args.image_size) -------------------------------------------------------------------------------- /data/mx2tfrecords.py: -------------------------------------------------------------------------------- 1 | import mxnet as mx 2 | import argparse 3 | import PIL.Image 4 | import io 5 | import numpy as np 6 | import cv2 7 | import tensorflow as tf 8 | import os 9 | 10 | 11 | def parse_args(): 12 | parser = argparse.ArgumentParser( 13 | formatter_class=argparse.ArgumentDefaultsHelpFormatter, 14 | description='data path information' 15 | ) 16 | parser.add_argument('--bin_path', default='../datasets/faces_ms1m_112x112/train.rec', type=str, 17 | help='path to the binary image file') 18 | parser.add_argument('--idx_path', default='../datasets/faces_ms1m_112x112/train.idx', type=str, 19 | help='path to the image index path') 20 | parser.add_argument('--tfrecords_file_path', default='../datasets/tfrecords', type=str, 21 | help='path to the output of tfrecords file path') 22 | args = parser.parse_args() 23 | return args 24 | 25 | 26 | def mx2tfrecords_old(imgidx, imgrec, args): 27 | output_path = os.path.join(args.tfrecords_file_path, 'tran.tfrecords') 28 | writer = tf.python_io.TFRecordWriter(output_path) 29 | for i in imgidx: 30 | img_info = imgrec.read_idx(i) 31 | header, img = mx.recordio.unpack(img_info) 32 | encoded_jpg_io = io.BytesIO(img) 33 | image = PIL.Image.open(encoded_jpg_io) 34 | np_img = np.array(image) 35 | img = cv2.cvtColor(np_img, cv2.COLOR_RGB2BGR) 36 | img_raw = img.tobytes() 37 | label = int(header.label) 38 | example = tf.train.Example(features=tf.train.Features(feature={ 39 | 'image_raw': tf.train.Feature(bytes_list=tf.train.BytesList(value=[img_raw])), 40 | "label": tf.train.Feature(int64_list=tf.train.Int64List(value=[label])) 41 | })) 42 | writer.write(example.SerializeToString()) # Serialize To String 43 | if i % 10000 == 0: 44 | print('%d num image processed' % i) 45 | writer.close() 46 | 47 | 48 | def mx2tfrecords(imgidx, imgrec, args): 49 | output_path = os.path.join(args.tfrecords_file_path, 'tran.tfrecords') 50 | writer = tf.python_io.TFRecordWriter(output_path) 51 | for i in imgidx: 52 | img_info = imgrec.read_idx(i) 53 | header, img = mx.recordio.unpack(img_info) 54 | label = int(header.label) 55 | example = tf.train.Example(features=tf.train.Features(feature={ 56 | 'image_raw': tf.train.Feature(bytes_list=tf.train.BytesList(value=[img])), 57 | "label": tf.train.Feature(int64_list=tf.train.Int64List(value=[label])) 58 | })) 59 | writer.write(example.SerializeToString()) # Serialize To String 60 | if i % 10000 == 0: 61 | print('%d num image processed' % i) 62 | writer.close() 63 | 64 | 65 | def parse_function(example_proto): 66 | features = {'image_raw': tf.FixedLenFeature([], tf.string), 67 | 'label': tf.FixedLenFeature([], tf.int64)} 68 | features = tf.parse_single_example(example_proto, features) 69 | # You can do more image distortion here for training data 70 | img = tf.image.decode_jpeg(features['image_raw']) 71 | img = tf.reshape(img, shape=(112, 112, 3)) 72 | r, g, b = tf.split(img, num_or_size_splits=3, axis=-1) 73 | img = tf.concat([b, g, r], axis=-1) 74 | img = tf.cast(img, dtype=tf.float32) 75 | img = tf.subtract(img, 127.5) 76 | img = tf.multiply(img, 0.0078125) 77 | img = tf.image.random_flip_left_right(img) 78 | label = tf.cast(features['label'], tf.int64) 79 | return img, label 80 | 81 | 82 | if __name__ == '__main__': 83 | # # define parameters 84 | # id2range = {} 85 | # data_shape = (3, 112, 112) 86 | args = parse_args() 87 | # imgrec = mx.recordio.MXIndexedRecordIO(args.idx_path, args.bin_path, 'r') 88 | # s = imgrec.read_idx(0) 89 | # header, _ = mx.recordio.unpack(s) 90 | # print(header.label) 91 | # imgidx = list(range(1, int(header.label[0]))) 92 | # seq_identity = range(int(header.label[0]), int(header.label[1])) 93 | # for identity in seq_identity: 94 | # s = imgrec.read_idx(identity) 95 | # header, _ = mx.recordio.unpack(s) 96 | # a, b = int(header.label[0]), int(header.label[1]) 97 | # id2range[identity] = (a, b) 98 | # print('id2range', len(id2range)) 99 | 100 | # # generate tfrecords 101 | # mx2tfrecords(imgidx, imgrec, args) 102 | 103 | config = tf.ConfigProto(allow_soft_placement=True) 104 | sess = tf.Session(config=config) 105 | # training datasets api config 106 | tfrecords_f = os.path.join(args.tfrecords_file_path, 'tran.tfrecords') 107 | dataset = tf.data.TFRecordDataset(tfrecords_f) 108 | dataset = dataset.map(parse_function) 109 | dataset = dataset.shuffle(buffer_size=30000) 110 | dataset = dataset.batch(32) 111 | iterator = dataset.make_initializable_iterator() 112 | next_element = iterator.get_next() 113 | # begin iteration 114 | for i in range(1000): 115 | sess.run(iterator.initializer) 116 | while True: 117 | try: 118 | images, labels = sess.run(next_element) 119 | cv2.imshow('test', images[1, ...]) 120 | cv2.waitKey(0) 121 | except tf.errors.OutOfRangeError: 122 | print("End of dataset") 123 | 124 | 125 | 126 | 127 | -------------------------------------------------------------------------------- /eval_ckpt_file.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import argparse 3 | from data.eval_data_reader import load_bin 4 | from losses.face_losses import arcface_loss 5 | from nets.L_Resnet_E_IR import get_resnet 6 | import tensorlayer as tl 7 | from verification import ver_test 8 | 9 | 10 | def get_args(): 11 | parser = argparse.ArgumentParser(description='input information') 12 | parser.add_argument('--ckpt_file', default='/home/aurora/workspaces2018/InsightFace_TF/output/ckpt_model_c/InsightFace_iter_best_', 13 | type=str, help='the ckpt file path') 14 | # parser.add_argument('--eval_datasets', default=['lfw', 'cfp_ff', 'cfp_fp', 'agedb_30'], help='evluation datasets') 15 | parser.add_argument('--eval_datasets', default=['agedb_30'], help='evluation datasets') 16 | parser.add_argument('--eval_db_path', default='./datasets/faces_ms1m_112x112', help='evluate datasets base path') 17 | parser.add_argument('--image_size', default=[112, 112], help='the image size') 18 | parser.add_argument('--net_depth', default=50, help='resnet depth, default is 50') 19 | parser.add_argument('--num_output', default=85164, help='the image size') 20 | parser.add_argument('--batch_size', default=32, help='batch size to train network') 21 | parser.add_argument('--ckpt_index_list', 22 | default=['1950000.ckpt'], help='ckpt file indexes') 23 | args = parser.parse_args() 24 | return args 25 | 26 | 27 | if __name__ == '__main__': 28 | args = get_args() 29 | ver_list = [] 30 | ver_name_list = [] 31 | for db in args.eval_datasets: 32 | print('begin db %s convert.' % db) 33 | data_set = load_bin(db, args.image_size, args) 34 | ver_list.append(data_set) 35 | ver_name_list.append(db) 36 | 37 | images = tf.placeholder(name='img_inputs', shape=[None, *args.image_size, 3], dtype=tf.float32) 38 | labels = tf.placeholder(name='img_labels', shape=[None, ], dtype=tf.int64) 39 | dropout_rate = tf.placeholder(name='dropout_rate', dtype=tf.float32) 40 | 41 | w_init_method = tf.contrib.layers.xavier_initializer(uniform=False) 42 | net = get_resnet(images, args.net_depth, type='ir', w_init=w_init_method, trainable=False, keep_rate=dropout_rate) 43 | embedding_tensor = net.outputs 44 | # mv_mean = tl.layers.get_variables_with_name('resnet_v1_50/bn0/moving_mean', False, True)[0] 45 | # 3.2 get arcface loss 46 | logit = arcface_loss(embedding=net.outputs, labels=labels, w_init=w_init_method, out_num=args.num_output) 47 | 48 | sess = tf.Session() 49 | saver = tf.train.Saver() 50 | 51 | result_index = [] 52 | for file_index in args.ckpt_index_list: 53 | feed_dict_test = {} 54 | path = args.ckpt_file + file_index 55 | saver.restore(sess, path) 56 | print('ckpt file %s restored!' % file_index) 57 | feed_dict_test.update(tl.utils.dict_to_one(net.all_drop)) 58 | feed_dict_test[dropout_rate] = 1.0 59 | results = ver_test(ver_list=ver_list, ver_name_list=ver_name_list, nbatch=0, sess=sess, 60 | embedding_tensor=embedding_tensor, batch_size=args.batch_size, feed_dict=feed_dict_test, 61 | input_placeholder=images) 62 | result_index.append(results) 63 | print(result_index) 64 | 65 | -------------------------------------------------------------------------------- /figures/lfw_250k.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/auroua/InsightFace_TF/6ffe4296460bdfea56f91521db6d6412a89249d9/figures/lfw_250k.png -------------------------------------------------------------------------------- /figures/lfw_310k.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/auroua/InsightFace_TF/6ffe4296460bdfea56f91521db6d6412a89249d9/figures/lfw_310k.png -------------------------------------------------------------------------------- /figures/lfw_730k.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/auroua/InsightFace_TF/6ffe4296460bdfea56f91521db6d6412a89249d9/figures/lfw_730k.png -------------------------------------------------------------------------------- /figures/model_a_170k.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/auroua/InsightFace_TF/6ffe4296460bdfea56f91521db6d6412a89249d9/figures/model_a_170k.png -------------------------------------------------------------------------------- /losses/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/auroua/InsightFace_TF/6ffe4296460bdfea56f91521db6d6412a89249d9/losses/__init__.py -------------------------------------------------------------------------------- /losses/face_losses.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import math 3 | 4 | 5 | def arcface_loss(embedding, labels, out_num, w_init=None, s=64., m=0.5): 6 | ''' 7 | :param embedding: the input embedding vectors 8 | :param labels: the input labels, the shape should be eg: (batch_size, 1) 9 | :param s: scalar value default is 64 10 | :param out_num: output class num 11 | :param m: the margin value, default is 0.5 12 | :return: the final cacualted output, this output is send into the tf.nn.softmax directly 13 | ''' 14 | cos_m = math.cos(m) 15 | sin_m = math.sin(m) 16 | mm = sin_m * m # issue 1 17 | threshold = math.cos(math.pi - m) 18 | with tf.variable_scope('arcface_loss'): 19 | # inputs and weights norm 20 | embedding_norm = tf.norm(embedding, axis=1, keep_dims=True) 21 | embedding = tf.div(embedding, embedding_norm, name='norm_embedding') 22 | weights = tf.get_variable(name='embedding_weights', shape=(embedding.get_shape().as_list()[-1], out_num), 23 | initializer=w_init, dtype=tf.float32) 24 | weights_norm = tf.norm(weights, axis=0, keep_dims=True) 25 | weights = tf.div(weights, weights_norm, name='norm_weights') 26 | # cos(theta+m) 27 | cos_t = tf.matmul(embedding, weights, name='cos_t') 28 | cos_t2 = tf.square(cos_t, name='cos_2') 29 | sin_t2 = tf.subtract(1., cos_t2, name='sin_2') 30 | sin_t = tf.sqrt(sin_t2, name='sin_t') 31 | cos_mt = s * tf.subtract(tf.multiply(cos_t, cos_m), tf.multiply(sin_t, sin_m), name='cos_mt') 32 | 33 | # this condition controls the theta+m should in range [0, pi] 34 | # 0<=theta+m<=pi 35 | # -m<=theta<=pi-m 36 | cond_v = cos_t - threshold 37 | cond = tf.cast(tf.nn.relu(cond_v, name='if_else'), dtype=tf.bool) 38 | 39 | keep_val = s*(cos_t - mm) 40 | cos_mt_temp = tf.where(cond, cos_mt, keep_val) 41 | 42 | mask = tf.one_hot(labels, depth=out_num, name='one_hot_mask') 43 | # mask = tf.squeeze(mask, 1) 44 | inv_mask = tf.subtract(1., mask, name='inverse_mask') 45 | 46 | s_cos_t = tf.multiply(s, cos_t, name='scalar_cos_t') 47 | 48 | output = tf.add(tf.multiply(s_cos_t, inv_mask), tf.multiply(cos_mt_temp, mask), name='arcface_loss_output') 49 | return output 50 | 51 | 52 | def cosineface_losses(embedding, labels, out_num, w_init=None, s=30., m=0.4): 53 | ''' 54 | :param embedding: the input embedding vectors 55 | :param labels: the input labels, the shape should be eg: (batch_size, 1) 56 | :param s: scalar value, default is 30 57 | :param out_num: output class num 58 | :param m: the margin value, default is 0.4 59 | :return: the final cacualted output, this output is send into the tf.nn.softmax directly 60 | ''' 61 | with tf.variable_scope('cosineface_loss'): 62 | # inputs and weights norm 63 | embedding_norm = tf.norm(embedding, axis=1, keep_dims=True) 64 | embedding = tf.div(embedding, embedding_norm, name='norm_embedding') 65 | weights = tf.get_variable(name='embedding_weights', shape=(embedding.get_shape().as_list()[-1], out_num), 66 | initializer=w_init, dtype=tf.float32) 67 | weights_norm = tf.norm(weights, axis=0, keep_dims=True) 68 | weights = tf.div(weights, weights_norm, name='norm_weights') 69 | # cos_theta - m 70 | cos_t = tf.matmul(embedding, weights, name='cos_t') 71 | cos_t_m = tf.subtract(cos_t, m, name='cos_t_m') 72 | 73 | mask = tf.one_hot(labels, depth=out_num, name='one_hot_mask') 74 | inv_mask = tf.subtract(1., mask, name='inverse_mask') 75 | 76 | output = tf.add(s * tf.multiply(cos_t, inv_mask), s * tf.multiply(cos_t_m, mask), name='cosineface_loss_output') 77 | return output 78 | 79 | 80 | def combine_loss_val(embedding, labels, w_init, out_num, margin_a, margin_m, margin_b, s): 81 | ''' 82 | This code is contributed by RogerLo. Thanks for you contribution. 83 | 84 | :param embedding: the input embedding vectors 85 | :param labels: the input labels, the shape should be eg: (batch_size, 1) 86 | :param s: scalar value default is 64 87 | :param out_num: output class num 88 | :param m: the margin value, default is 0.5 89 | :return: the final cacualted output, this output is send into the tf.nn.softmax directly 90 | ''' 91 | weights = tf.get_variable(name='embedding_weights', shape=(embedding.get_shape().as_list()[-1], out_num), 92 | initializer=w_init, dtype=tf.float32) 93 | weights_unit = tf.nn.l2_normalize(weights, axis=0) 94 | embedding_unit = tf.nn.l2_normalize(embedding, axis=1) 95 | cos_t = tf.matmul(embedding_unit, weights_unit) 96 | ordinal = tf.constant(list(range(0, embedding.get_shape().as_list()[0])), tf.int64) 97 | ordinal_y = tf.stack([ordinal, labels], axis=1) 98 | zy = cos_t * s 99 | sel_cos_t = tf.gather_nd(zy, ordinal_y) 100 | if margin_a != 1.0 or margin_m != 0.0 or margin_b != 0.0: 101 | if margin_a == 1.0 and margin_m == 0.0: 102 | s_m = s * margin_b 103 | new_zy = sel_cos_t - s_m 104 | else: 105 | cos_value = sel_cos_t / s 106 | t = tf.acos(cos_value) 107 | if margin_a != 1.0: 108 | t = t * margin_a 109 | if margin_m > 0.0: 110 | t = t + margin_m 111 | body = tf.cos(t) 112 | if margin_b > 0.0: 113 | body = body - margin_b 114 | new_zy = body * s 115 | updated_logits = tf.add(zy, tf.scatter_nd(ordinal_y, tf.subtract(new_zy, sel_cos_t), zy.get_shape())) 116 | loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(labels=labels, logits=updated_logits)) 117 | predict_cls = tf.argmax(updated_logits, 1) 118 | accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.cast(predict_cls, tf.int64), tf.cast(labels, tf.int64)), 'float')) 119 | predict_cls_s = tf.argmax(zy, 1) 120 | accuracy_s = tf.reduce_mean(tf.cast(tf.equal(tf.cast(predict_cls_s, tf.int64), tf.cast(labels, tf.int64)), 'float')) 121 | return zy, loss, accuracy, accuracy_s, predict_cls_s -------------------------------------------------------------------------------- /nets/L_Resnet_E_IR_GBN.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import tensorlayer as tl 3 | from tensorflow.contrib.layers.python.layers import utils 4 | import collections 5 | from tensorlayer.layers import Layer, list_remove_repeat 6 | from tl_layers_modify import GroupNormLayer 7 | 8 | 9 | class ElementwiseLayer(Layer): 10 | """ 11 | The :class:`ElementwiseLayer` class combines multiple :class:`Layer` which have the same output shapes by a given elemwise-wise operation. 12 | 13 | Parameters 14 | ---------- 15 | layer : a list of :class:`Layer` instances 16 | The `Layer` class feeding into this layer. 17 | combine_fn : a TensorFlow elemwise-merge function 18 | e.g. AND is ``tf.minimum`` ; OR is ``tf.maximum`` ; ADD is ``tf.add`` ; MUL is ``tf.multiply`` and so on. 19 | See `TensorFlow Math API `_ . 20 | name : a string or None 21 | An optional name to attach to this layer. 22 | """ 23 | def __init__( 24 | self, 25 | layer = [], 26 | combine_fn = tf.minimum, 27 | name ='elementwise_layer', 28 | act = None, 29 | ): 30 | Layer.__init__(self, name=name) 31 | 32 | if act: 33 | print(" [TL] ElementwiseLayer %s: size:%s fn:%s, act:%s" % ( 34 | self.name, layer[0].outputs.get_shape(), combine_fn.__name__, act.__name__)) 35 | else: 36 | print(" [TL] ElementwiseLayer %s: size:%s fn:%s" % ( 37 | self.name, layer[0].outputs.get_shape(), combine_fn.__name__)) 38 | 39 | self.outputs = layer[0].outputs 40 | # print(self.outputs._shape, type(self.outputs._shape)) 41 | for l in layer[1:]: 42 | # assert str(self.outputs.get_shape()) == str(l.outputs.get_shape()), "Hint: the input shapes should be the same. %s != %s" % (self.outputs.get_shape() , str(l.outputs.get_shape())) 43 | self.outputs = combine_fn(self.outputs, l.outputs, name=name) 44 | if act: 45 | self.outputs = act(self.outputs) 46 | self.all_layers = list(layer[0].all_layers) 47 | self.all_params = list(layer[0].all_params) 48 | self.all_drop = dict(layer[0].all_drop) 49 | 50 | for i in range(1, len(layer)): 51 | self.all_layers.extend(list(layer[i].all_layers)) 52 | self.all_params.extend(list(layer[i].all_params)) 53 | self.all_drop.update(dict(layer[i].all_drop)) 54 | 55 | self.all_layers = list_remove_repeat(self.all_layers) 56 | self.all_params = list_remove_repeat(self.all_params) 57 | 58 | 59 | def subsample(inputs, factor, scope=None): 60 | if factor == 1: 61 | return inputs 62 | else: 63 | return tl.layers.MaxPool2d(inputs, [1, 1], strides=(factor, factor), name=scope) 64 | 65 | 66 | def conv2d_same(inputs, num_outputs, kernel_size, strides, rate=1, w_init=None, scope=None, trainable=None): 67 | ''' 68 | Reference slim resnet 69 | :param inputs: 70 | :param num_outputs: 71 | :param kernel_size: 72 | :param strides: 73 | :param rate: 74 | :param scope: 75 | :return: 76 | ''' 77 | if strides == 1: 78 | if rate == 1: 79 | nets = tl.layers.Conv2d(inputs, n_filter=num_outputs, filter_size=(kernel_size, kernel_size), b_init=None, 80 | strides=(strides, strides), W_init=w_init, act=None, padding='SAME', name=scope, 81 | use_cudnn_on_gpu=True) 82 | nets = GroupNormLayer(layer=nets, act=tf.identity, name=scope+'_bn/GroupNorm') 83 | else: 84 | nets = tl.layers.AtrousConv2dLayer(inputs, n_filter=num_outputs, filter_size=(kernel_size, kernel_size), 85 | rate=rate, act=None, W_init=w_init, padding='SAME', name=scope) 86 | nets = GroupNormLayer(layer=nets, act=tf.identity, name=scope+'_bn/GroupNorm') 87 | return nets 88 | else: 89 | kernel_size_effective = kernel_size + (kernel_size - 1) * (rate - 1) 90 | pad_total = kernel_size_effective - 1 91 | pad_beg = pad_total // 2 92 | pad_end = pad_total - pad_beg 93 | inputs = tl.layers.PadLayer(inputs, [[0, 0], [pad_beg, pad_end], [pad_beg, pad_end], [0, 0]], name='padding_%s' % scope) 94 | if rate == 1: 95 | nets = tl.layers.Conv2d(inputs, n_filter=num_outputs, filter_size=(kernel_size, kernel_size), b_init=None, 96 | strides=(strides, strides), W_init=w_init, act=None, padding='VALID', name=scope, 97 | use_cudnn_on_gpu=True) 98 | nets = GroupNormLayer(layer=nets, act=tf.identity, name=scope+'_bn/GroupNorm') 99 | else: 100 | nets = tl.layers.AtrousConv2dLayer(inputs, n_filter=num_outputs, filter_size=(kernel_size, kernel_size), b_init=None, 101 | rate=rate, act=None, W_init=w_init, padding='SAME', name=scope) 102 | nets = GroupNormLayer(layer=nets, act=tf.identity, name=scope+'_bn/GroupNorm') 103 | return nets 104 | 105 | 106 | def bottleneck(inputs, depth, depth_bottleneck, stride, rate=1, scope=None): 107 | with tf.variable_scope(scope, 'bottleneck_v1') as sc: 108 | depth_in = utils.last_dimension(inputs.outputs.get_shape(), min_rank=4) 109 | if depth == depth_in: 110 | shortcut = subsample(inputs, stride, 'shortcut') 111 | else: 112 | shortcut = tl.layers.Conv2d(inputs, depth, filter_size=(1, 1), strides=(stride, stride), act=None, 113 | b_init=None, name='shortcut_conv') 114 | shortcut = GroupNormLayer(layer=shortcut, act=tf.identity, name='shortcut_bn/BatchNorm') 115 | # bottleneck layer 1 116 | residual = tl.layers.Conv2d(inputs, depth_bottleneck, filter_size=(1, 1), strides=(1, 1), act=None, b_init=None, 117 | name='conv1') 118 | residual = GroupNormLayer(layer=residual, act=tf.nn.relu, name='conv1_bn/BatchNorm') 119 | 120 | # bottleneck layer 2 121 | residual = conv2d_same(residual, depth_bottleneck, kernel_size=3, strides= stride, rate=rate, scope='conv2') 122 | 123 | # bottleneck layer 3 124 | residual = tl.layers.Conv2d(residual, depth, filter_size=(1, 1), strides=(1, 1), act=None, b_init=None, 125 | name='conv3') 126 | residual = GroupNormLayer(layer=residual, act=tf.identity, name='conv3_bn/BatchNorm', 127 | scale_init=tf.constant_initializer(0.0)) 128 | output = ElementwiseLayer(layer=[shortcut, residual], 129 | combine_fn=tf.add, 130 | name='combine_layer', 131 | act=tf.nn.relu) 132 | return output 133 | 134 | 135 | def bottleneck_IR(inputs, depth, depth_bottleneck, stride, rate=1, w_init=None, scope=None, trainable=None): 136 | with tf.variable_scope(scope, 'bottleneck_v1') as sc: 137 | depth_in = utils.last_dimension(inputs.outputs.get_shape(), min_rank=4) 138 | if depth == depth_in: 139 | shortcut = subsample(inputs, stride, 'shortcut') 140 | else: 141 | shortcut = tl.layers.Conv2d(inputs, depth, filter_size=(1, 1), strides=(stride, stride), act=None, 142 | W_init=w_init, b_init=None, name='shortcut_conv', use_cudnn_on_gpu=True) 143 | shortcut = GroupNormLayer(layer=shortcut, act=tf.identity, name='shortcut_bn/BatchNorm') 144 | # bottleneck layer 1 145 | residual = GroupNormLayer(layer=inputs, act=tf.identity, name='conv1_bn1') 146 | residual = tl.layers.Conv2d(residual, depth_bottleneck, filter_size=(3, 3), strides=(1, 1), act=None, b_init=None, 147 | W_init=w_init, name='conv1', use_cudnn_on_gpu=True) 148 | residual = GroupNormLayer(layer=residual, act=tf.identity, name='conv1_bn2') 149 | # bottleneck prelu 150 | residual = tl.layers.PReluLayer(residual) 151 | # bottleneck layer 2 152 | residual = conv2d_same(residual, depth, kernel_size=3, strides=stride, rate=rate, w_init=w_init, scope='conv2', trainable=trainable) 153 | output = ElementwiseLayer(layer=[shortcut, residual], 154 | combine_fn=tf.add, 155 | name='combine_layer', 156 | act=None) 157 | return output 158 | 159 | 160 | def bottleneck_IR_SE(inputs, depth, depth_bottleneck, stride, rate=1, w_init=None, scope=None, trainable=None): 161 | with tf.variable_scope(scope, 'bottleneck_v1') as sc: 162 | depth_in = utils.last_dimension(inputs.outputs.get_shape(), min_rank=4) 163 | if depth == depth_in: 164 | shortcut = subsample(inputs, stride, 'shortcut') 165 | else: 166 | shortcut = tl.layers.Conv2d(inputs, depth, filter_size=(1, 1), strides=(stride, stride), act=None, 167 | W_init=w_init, b_init=None, name='shortcut_conv', use_cudnn_on_gpu=True) 168 | shortcut = GroupNormLayer(layer=shortcut, act=tf.identity, name='shortcut_bn/BatchNorm') 169 | residual = GroupNormLayer(layer=inputs, act=tf.identity, name='conv1_bn1') 170 | residual = tl.layers.Conv2d(residual, depth_bottleneck, filter_size=(3, 3), strides=(1, 1), act=None, b_init=None, 171 | W_init=w_init, name='conv1', use_cudnn_on_gpu=True) 172 | residual = GroupNormLayer(layer=residual, act=tf.identity, name='conv1_bn2') 173 | # bottleneck prelu 174 | residual = tl.layers.PReluLayer(residual) 175 | # bottleneck layer 2 176 | residual = conv2d_same(residual, depth, kernel_size=3, strides=stride, rate=rate, w_init=w_init, scope='conv2', trainable=trainable) 177 | # squeeze 178 | squeeze = tl.layers.InputLayer(tf.reduce_mean(residual.outputs, axis=[1, 2]), name='squeeze_layer') 179 | # excitation 180 | excitation1 = tl.layers.DenseLayer(squeeze, n_units=int(depth/16.0), act=tf.nn.relu, 181 | W_init=w_init, name='excitation_1') 182 | # excitation1 = tl.layers.PReluLayer(excitation1, name='excitation_prelu') 183 | excitation2 = tl.layers.DenseLayer(excitation1, n_units=depth, act=tf.nn.sigmoid, 184 | W_init=w_init, name='excitation_2') 185 | # scale 186 | scale = tl.layers.ReshapeLayer(excitation2, shape=[tf.shape(excitation2.outputs)[0], 1, 1, depth], name='excitation_reshape') 187 | 188 | residual_se = ElementwiseLayer(layer=[residual, scale], 189 | combine_fn=tf.multiply, 190 | name='scale_layer', 191 | act=None) 192 | 193 | output = ElementwiseLayer(layer=[shortcut, residual_se], 194 | combine_fn=tf.add, 195 | name='combine_layer', 196 | act=tf.nn.relu) 197 | return output 198 | 199 | 200 | def resnet(inputs, bottle_neck, blocks, w_init=None, trainable=None, scope=None): 201 | with tf.variable_scope(scope): 202 | net_inputs = tl.layers.InputLayer(inputs, name='input_layer') 203 | if bottle_neck: 204 | net = tl.layers.Conv2d(net_inputs, n_filter=64, filter_size=(3, 3), strides=(1, 1), 205 | act=None, W_init=w_init, b_init=None, name='conv1', use_cudnn_on_gpu=True) 206 | net = GroupNormLayer(layer=net, act=tf.identity, name='group_norm_0') 207 | net = tl.layers.PReluLayer(net, name='prelu0') 208 | else: 209 | raise ValueError('The standard resnet must support the bottleneck layer') 210 | for block in blocks: 211 | with tf.variable_scope(block.scope): 212 | for i, var in enumerate(block.args): 213 | with tf.variable_scope('unit_%d' % (i+1)): 214 | net = block.unit_fn(net, depth=var['depth'], depth_bottleneck=var['depth_bottleneck'], 215 | w_init=w_init, stride=var['stride'], rate=var['rate'], scope=None, 216 | trainable=trainable) 217 | net = GroupNormLayer(layer=net, act=tf.identity, name='E_GN_0') 218 | net = tl.layers.DropoutLayer(net, keep=0.4, name='E_Dropout') 219 | net_shape = net.outputs.get_shape() 220 | net = tl.layers.ReshapeLayer(net, shape=[-1, net_shape[1]*net_shape[2]*net_shape[3]], name='E_Reshapelayer') 221 | net = tl.layers.DenseLayer(net, n_units=512, W_init=w_init, name='E_DenseLayer') 222 | # net = GroupNormLayer(layer=net, act=tf.identity, name='E_GN_1') 223 | return net 224 | 225 | 226 | class Block(collections.namedtuple('Block', ['scope', 'unit_fn', 'args'])): 227 | """A named tuple describing a ResNet block. 228 | 229 | Its parts are: 230 | scope: The scope of the `Block`. 231 | unit_fn: The ResNet unit function which takes as input a `Tensor` and 232 | returns another `Tensor` with the output of the ResNet unit. 233 | args: A list of length equal to the number of units in the `Block`. The list 234 | contains one (depth, depth_bottleneck, stride) tuple for each unit in the 235 | block to serve as argument to unit_fn. 236 | """ 237 | 238 | 239 | def resnetse_v1_block(scope, base_depth, num_units, stride, rate=1, unit_fn=None): 240 | """Helper function for creating a resnet_v1 bottleneck block. 241 | 242 | Args: 243 | scope: The scope of the block. 244 | base_depth: The depth of the bottleneck layer for each unit. 245 | num_units: The number of units in the block. 246 | stride: The stride of the block, implemented as a stride in the last unit. 247 | All other units have stride=1. 248 | 249 | Returns: 250 | A resnet_v1 bottleneck block. 251 | """ 252 | return Block(scope, unit_fn, [{ 253 | 'depth': base_depth * 4, 254 | 'depth_bottleneck': base_depth, 255 | 'stride': stride, 256 | 'rate': rate 257 | }] + [{ 258 | 'depth': base_depth * 4, 259 | 'depth_bottleneck': base_depth, 260 | 'stride': 1, 261 | 'rate': rate 262 | }] * (num_units - 1)) 263 | 264 | 265 | def resnetse_v1_block_2(scope, base_depth, num_units, stride, rate=1, unit_fn=None): 266 | """Helper function for creating a resnet_v1 bottleneck block. 267 | 268 | Args: 269 | scope: The scope of the block. 270 | base_depth: The depth of the bottleneck layer for each unit. 271 | num_units: The number of units in the block. 272 | stride: The stride of the block, implemented as a stride in the last unit. 273 | All other units have stride=1. 274 | 275 | Returns: 276 | A resnet_v1 bottleneck block. 277 | """ 278 | return Block(scope, unit_fn, [{ 279 | 'depth': base_depth * 4, 280 | 'depth_bottleneck': base_depth, 281 | 'stride': 1, 282 | 'rate': rate 283 | }] * (num_units - 1) + [{ 284 | 'depth': base_depth * 4, 285 | 'depth_bottleneck': base_depth, 286 | 'stride': stride, 287 | 'rate': rate 288 | }]) 289 | 290 | 291 | def get_resnet(inputs, num_layers, type=None, w_init=None, trainable=None, sess=None): 292 | if type == 'ir': 293 | unit_fn = bottleneck_IR 294 | elif type == 'se_ir': 295 | unit_fn = bottleneck_IR_SE 296 | # elif type == 'resnet': 297 | # unit_fn = bottleneck 298 | # blocks = [ 299 | # resnetse_v1_block_2('block1', base_depth=64, num_units=3, stride=2, rate=1, unit_fn=unit_fn), 300 | # resnetse_v1_block_2('block2', base_depth=128, num_units=4, stride=2, rate=1, unit_fn=unit_fn), 301 | # resnetse_v1_block_2('block3', base_depth=256, num_units=6, stride=2, rate=1, unit_fn=unit_fn), 302 | # resnetse_v1_block_2('block4', base_depth=512, num_units=3, stride=1, rate=1, unit_fn=unit_fn) 303 | # ] 304 | else: 305 | raise ValueError('the input fn is unknown') 306 | 307 | if num_layers == 50: 308 | blocks = [ 309 | resnetse_v1_block('block1', base_depth=64, num_units=3, stride=2, rate=1, unit_fn=unit_fn), 310 | resnetse_v1_block('block2', base_depth=128, num_units=4, stride=2, rate=1, unit_fn=unit_fn), 311 | resnetse_v1_block('block3', base_depth=256, num_units=14, stride=2, rate=1, unit_fn=unit_fn), 312 | resnetse_v1_block('block4', base_depth=512, num_units=3, stride=2, rate=1, unit_fn=unit_fn) 313 | ] 314 | elif num_layers == 101: 315 | blocks = [ 316 | resnetse_v1_block('block1', base_depth=64, num_units=3, stride=2, rate=1, unit_fn=unit_fn), 317 | resnetse_v1_block('block2', base_depth=128, num_units=13, stride=2, rate=1, unit_fn=unit_fn), 318 | resnetse_v1_block('block3', base_depth=256, num_units=30, stride=2, rate=1, unit_fn=unit_fn), 319 | resnetse_v1_block('block4', base_depth=512, num_units=3, stride=2, rate=1, unit_fn=unit_fn) 320 | ] 321 | elif num_layers == 152: 322 | blocks = [ 323 | resnetse_v1_block('block1', base_depth=64, num_units=3, stride=2, rate=1, unit_fn=unit_fn), 324 | resnetse_v1_block('block2', base_depth=128, num_units=8, stride=2, rate=1, unit_fn=unit_fn), 325 | resnetse_v1_block('block3', base_depth=256, num_units=36, stride=2, rate=1, unit_fn=unit_fn), 326 | resnetse_v1_block('block4', base_depth=512, num_units=3, stride=2, rate=1, unit_fn=unit_fn) 327 | ] 328 | else: 329 | raise ValueError('Resnet layer %d is not supported now.' % num_layers) 330 | net = resnet(inputs=inputs, 331 | bottle_neck=True, 332 | blocks=blocks, 333 | w_init=w_init, 334 | trainable=trainable, 335 | scope='resnet_v1_%d' % num_layers) 336 | return net 337 | 338 | 339 | if __name__ == '__main__': 340 | x = tf.placeholder(dtype=tf.float32, shape=[None, 112, 112, 3], name='input_place') 341 | sess = tf.Session() 342 | # w_init = tf.truncated_normal_initializer(mean=10, stddev=5e-2) 343 | w_init = tf.contrib.layers.xavier_initializer(uniform=False) 344 | # test resnetse 345 | nets = get_resnet(x, 50, type='ir', w_init=w_init, sess=sess) 346 | tl.layers.initialize_global_variables(sess) 347 | 348 | for p in tl.layers.get_variables_with_name('W_conv2d', True, True): 349 | print(p.op.name) 350 | print('##############'*30) 351 | with sess: 352 | nets.print_params() 353 | -------------------------------------------------------------------------------- /nets/L_Resnet_E_IR_MGPU.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import tensorlayer as tl 3 | from tensorflow.contrib.layers.python.layers import utils 4 | import collections 5 | from tl_layers_modify import ElementwiseLayer, BatchNormLayer, Conv2d, PReluLayer, DenseLayer 6 | 7 | 8 | def subsample(inputs, factor, scope=None): 9 | if factor == 1: 10 | return inputs 11 | else: 12 | return tl.layers.MaxPool2d(inputs, [1, 1], strides=(factor, factor), name=scope) 13 | 14 | 15 | def conv2d_same(inputs, num_outputs, kernel_size, strides, rate=1, w_init=None, scope=None, trainable=None): 16 | ''' 17 | Reference slim resnet 18 | :param inputs: 19 | :param num_outputs: 20 | :param kernel_size: 21 | :param strides: 22 | :param rate: 23 | :param scope: 24 | :return: 25 | ''' 26 | if strides == 1: 27 | if rate == 1: 28 | nets = Conv2d(inputs, n_filter=num_outputs, filter_size=(kernel_size, kernel_size), b_init=None, 29 | strides=(strides, strides), W_init=w_init, act=None, padding='SAME', name=scope, 30 | use_cudnn_on_gpu=True) 31 | nets = BatchNormLayer(nets, act=tf.identity, is_train=True, trainable=trainable, name=scope+'_bn/BatchNorm') 32 | else: 33 | nets = tl.layers.AtrousConv2dLayer(inputs, n_filter=num_outputs, filter_size=(kernel_size, kernel_size), 34 | rate=rate, act=None, W_init=w_init, padding='SAME', name=scope) 35 | nets = BatchNormLayer(nets, act=tf.identity, is_train=True, trainable=trainable, name=scope+'_bn/BatchNorm') 36 | return nets 37 | else: 38 | kernel_size_effective = kernel_size + (kernel_size - 1) * (rate - 1) 39 | pad_total = kernel_size_effective - 1 40 | pad_beg = pad_total // 2 41 | pad_end = pad_total - pad_beg 42 | inputs = tl.layers.PadLayer(inputs, [[0, 0], [pad_beg, pad_end], [pad_beg, pad_end], [0, 0]], name='padding_%s' % scope) 43 | if rate == 1: 44 | nets = Conv2d(inputs, n_filter=num_outputs, filter_size=(kernel_size, kernel_size), b_init=None, 45 | strides=(strides, strides), W_init=w_init, act=None, padding='VALID', name=scope, 46 | use_cudnn_on_gpu=True) 47 | nets = BatchNormLayer(nets, act=tf.identity, is_train=True, trainable=trainable, name=scope+'_bn/BatchNorm') 48 | else: 49 | nets = tl.layers.AtrousConv2dLayer(inputs, n_filter=num_outputs, filter_size=(kernel_size, kernel_size), b_init=None, 50 | rate=rate, act=None, W_init=w_init, padding='SAME', name=scope) 51 | nets = BatchNormLayer(nets, act=tf.identity, is_train=True, trainable=trainable, name=scope+'_bn/BatchNorm') 52 | return nets 53 | 54 | 55 | def bottleneck_IR(inputs, depth, depth_bottleneck, stride, rate=1, w_init=None, scope=None, trainable=None): 56 | with tf.variable_scope(scope, 'bottleneck_v1') as sc: 57 | depth_in = utils.last_dimension(inputs.outputs.get_shape(), min_rank=4) 58 | if depth == depth_in: 59 | shortcut = subsample(inputs, stride, 'shortcut') 60 | else: 61 | shortcut = Conv2d(inputs, depth, filter_size=(1, 1), strides=(stride, stride), act=None, 62 | W_init=w_init, b_init=None, name='shortcut_conv', use_cudnn_on_gpu=True) 63 | shortcut = BatchNormLayer(shortcut, act=tf.identity, is_train=True, trainable=trainable, name='shortcut_bn/BatchNorm') 64 | # bottleneck layer 1 65 | residual = BatchNormLayer(inputs, act=tf.identity, is_train=True, trainable=trainable, name='conv1_bn1') 66 | residual = Conv2d(residual, depth_bottleneck, filter_size=(3, 3), strides=(1, 1), act=None, b_init=None, 67 | W_init=w_init, name='conv1', use_cudnn_on_gpu=True) 68 | residual = BatchNormLayer(residual, act=tf.identity, is_train=True, trainable=trainable, name='conv1_bn2') 69 | # bottleneck prelu 70 | residual = PReluLayer(residual) 71 | # bottleneck layer 2 72 | residual = conv2d_same(residual, depth, kernel_size=3, strides=stride, rate=rate, w_init=w_init, scope='conv2', trainable=trainable) 73 | output = ElementwiseLayer(layer=[shortcut, residual], 74 | combine_fn=tf.add, 75 | name='combine_layer', 76 | act=None) 77 | return output 78 | 79 | 80 | def bottleneck_IR_SE(inputs, depth, depth_bottleneck, stride, rate=1, w_init=None, scope=None, trainable=None): 81 | with tf.variable_scope(scope, 'bottleneck_v1') as sc: 82 | depth_in = utils.last_dimension(inputs.outputs.get_shape(), min_rank=4) 83 | if depth == depth_in: 84 | shortcut = subsample(inputs, stride, 'shortcut') 85 | else: 86 | shortcut = Conv2d(inputs, depth, filter_size=(1, 1), strides=(stride, stride), act=None, 87 | W_init=w_init, b_init=None, name='shortcut_conv', use_cudnn_on_gpu=True) 88 | shortcut = BatchNormLayer(shortcut, act=tf.identity, is_train=True, trainable=trainable, name='shortcut_bn/BatchNorm') 89 | # bottleneck layer 1 90 | residual = BatchNormLayer(inputs, act=tf.identity, is_train=True, trainable=trainable, name='conv1_bn1') 91 | residual = Conv2d(residual, depth_bottleneck, filter_size=(3, 3), strides=(1, 1), act=None, b_init=None, 92 | W_init=w_init, name='conv1', use_cudnn_on_gpu=True) 93 | residual = BatchNormLayer(residual, act=tf.identity, is_train=True, trainable=trainable, name='conv1_bn2') 94 | # bottleneck prelu 95 | residual = PReluLayer(residual) 96 | # bottleneck layer 2 97 | residual = conv2d_same(residual, depth, kernel_size=3, strides=stride, rate=rate, w_init=w_init, scope='conv2', trainable=trainable) 98 | # squeeze 99 | squeeze = tl.layers.InputLayer(tf.reduce_mean(residual.outputs, axis=[1, 2]), name='squeeze_layer') 100 | # excitation 101 | excitation1 = DenseLayer(squeeze, n_units=int(depth/16.0), act=tf.nn.relu, 102 | W_init=w_init, name='excitation_1') 103 | # excitation1 = tl.layers.PReluLayer(excitation1, name='excitation_prelu') 104 | excitation2 = DenseLayer(excitation1, n_units=depth, act=tf.nn.sigmoid, 105 | W_init=w_init, name='excitation_2') 106 | # scale 107 | scale = tl.layers.ReshapeLayer(excitation2, shape=[tf.shape(excitation2.outputs)[0], 1, 1, depth], name='excitation_reshape') 108 | 109 | residual_se = ElementwiseLayer(layer=[residual, scale], 110 | combine_fn=tf.multiply, 111 | name='scale_layer', 112 | act=None) 113 | 114 | output = ElementwiseLayer(layer=[shortcut, residual_se], 115 | combine_fn=tf.add, 116 | name='combine_layer', 117 | act=tf.nn.relu) 118 | return output 119 | 120 | 121 | def resnet(inputs, bottle_neck, blocks, w_init=None, trainable=None, keep_rate=None, scope=None): 122 | with tf.variable_scope(scope): 123 | net_inputs = tl.layers.InputLayer(inputs, name='input_layer') 124 | if bottle_neck: 125 | net = Conv2d(net_inputs, n_filter=64, filter_size=(3, 3), strides=(1, 1), 126 | act=None, W_init=w_init, b_init=None, name='conv1', use_cudnn_on_gpu=True) 127 | net = BatchNormLayer(net, act=tf.identity, name='bn0', is_train=True, trainable=trainable) 128 | net = PReluLayer(net, name='prelu0') 129 | else: 130 | raise ValueError('The standard resnet must support the bottleneck layer') 131 | for block in blocks: 132 | with tf.variable_scope(block.scope): 133 | for i, var in enumerate(block.args): 134 | with tf.variable_scope('unit_%d' % (i+1)): 135 | net = block.unit_fn(net, depth=var['depth'], depth_bottleneck=var['depth_bottleneck'], 136 | w_init=w_init, stride=var['stride'], rate=var['rate'], scope=None, 137 | trainable=trainable) 138 | net = BatchNormLayer(net, act=tf.identity, is_train=True, name='E_BN1', trainable=trainable) 139 | net = tl.layers.DropoutLayer(net, keep=keep_rate, name='E_Dropout') 140 | net_shape = net.outputs.get_shape() 141 | net = tl.layers.ReshapeLayer(net, shape=[-1, net_shape[1]*net_shape[2]*net_shape[3]], name='E_Reshapelayer') 142 | net = DenseLayer(net, n_units=512, W_init=w_init, name='E_DenseLayer') 143 | net = BatchNormLayer(net, act=tf.identity, is_train=True, fix_gamma=False, trainable=trainable, name='E_BN2') 144 | return net 145 | 146 | 147 | class Block(collections.namedtuple('Block', ['scope', 'unit_fn', 'args'])): 148 | """A named tuple describing a ResNet block. 149 | 150 | Its parts are: 151 | scope: The scope of the `Block`. 152 | unit_fn: The ResNet unit function which takes as input a `Tensor` and 153 | returns another `Tensor` with the output of the ResNet unit. 154 | args: A list of length equal to the number of units in the `Block`. The list 155 | contains one (depth, depth_bottleneck, stride) tuple for each unit in the 156 | block to serve as argument to unit_fn. 157 | """ 158 | 159 | 160 | def resnetse_v1_block(scope, base_depth, num_units, stride, rate=1, unit_fn=None): 161 | """Helper function for creating a resnet_v1 bottleneck block. 162 | 163 | Args: 164 | scope: The scope of the block. 165 | base_depth: The depth of the bottleneck layer for each unit. 166 | num_units: The number of units in the block. 167 | stride: The stride of the block, implemented as a stride in the last unit. 168 | All other units have stride=1. 169 | 170 | Returns: 171 | A resnet_v1 bottleneck block. 172 | """ 173 | return Block(scope, unit_fn, [{ 174 | 'depth': base_depth, 175 | 'depth_bottleneck': base_depth, 176 | 'stride': stride, 177 | 'rate': rate 178 | }] + [{ 179 | 'depth': base_depth, 180 | 'depth_bottleneck': base_depth, 181 | 'stride': 1, 182 | 'rate': rate 183 | }] * (num_units - 1)) 184 | 185 | 186 | def get_resnet(inputs, num_layers, type=None, w_init=None, trainable=None, keep_rate=None, sess=None): 187 | if type == 'ir': 188 | unit_fn = bottleneck_IR 189 | elif type == 'se_ir': 190 | unit_fn = bottleneck_IR_SE 191 | else: 192 | raise ValueError('the input fn is unknown') 193 | 194 | if num_layers == 50: 195 | blocks = [ 196 | resnetse_v1_block('block1', base_depth=64, num_units=3, stride=2, rate=1, unit_fn=unit_fn), 197 | resnetse_v1_block('block2', base_depth=128, num_units=4, stride=2, rate=1, unit_fn=unit_fn), 198 | resnetse_v1_block('block3', base_depth=256, num_units=14, stride=2, rate=1, unit_fn=unit_fn), 199 | resnetse_v1_block('block4', base_depth=512, num_units=3, stride=2, rate=1, unit_fn=unit_fn) 200 | ] 201 | elif num_layers == 100: 202 | blocks = [ 203 | resnetse_v1_block('block1', base_depth=64, num_units=3, stride=2, rate=1, unit_fn=unit_fn), 204 | resnetse_v1_block('block2', base_depth=128, num_units=13, stride=2, rate=1, unit_fn=unit_fn), 205 | resnetse_v1_block('block3', base_depth=256, num_units=30, stride=2, rate=1, unit_fn=unit_fn), 206 | resnetse_v1_block('block4', base_depth=512, num_units=3, stride=2, rate=1, unit_fn=unit_fn) 207 | ] 208 | elif num_layers == 152: 209 | blocks = [ 210 | resnetse_v1_block('block1', base_depth=64, num_units=3, stride=2, rate=1, unit_fn=unit_fn), 211 | resnetse_v1_block('block2', base_depth=128, num_units=8, stride=2, rate=1, unit_fn=unit_fn), 212 | resnetse_v1_block('block3', base_depth=256, num_units=36, stride=2, rate=1, unit_fn=unit_fn), 213 | resnetse_v1_block('block4', base_depth=512, num_units=3, stride=2, rate=1, unit_fn=unit_fn) 214 | ] 215 | else: 216 | raise ValueError('Resnet layer %d is not supported now.' % num_layers) 217 | net = resnet(inputs=inputs, 218 | bottle_neck=True, 219 | blocks=blocks, 220 | w_init=w_init, 221 | trainable=trainable, 222 | keep_rate=keep_rate, 223 | scope='resnet_v1_%d' % num_layers) 224 | return net 225 | 226 | 227 | if __name__ == '__main__': 228 | x = tf.placeholder(dtype=tf.float32, shape=[None, 112, 112, 3], name='input_place') 229 | sess = tf.Session() 230 | # w_init = tf.truncated_normal_initializer(mean=10, stddev=5e-2) 231 | w_init = tf.contrib.layers.xavier_initializer(uniform=False) 232 | # test resnetse 233 | nets = get_resnet(x, 50, type='ir', w_init=w_init, sess=sess) 234 | tl.layers.initialize_global_variables(sess) 235 | 236 | for p in tl.layers.get_variables_with_name('W_conv2d', True, True): 237 | print(p.op.name) 238 | print('##############'*30) 239 | with sess: 240 | nets.print_params() 241 | -------------------------------------------------------------------------------- /nets/L_Resnet_E_IR_RBN.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import tensorlayer as tl 3 | from tensorflow.contrib.layers.python.layers import utils 4 | import collections 5 | from tensorlayer.layers import Layer, list_remove_repeat 6 | 7 | 8 | class ElementwiseLayer(Layer): 9 | """ 10 | The :class:`ElementwiseLayer` class combines multiple :class:`Layer` which have the same output shapes by a given elemwise-wise operation. 11 | 12 | Parameters 13 | ---------- 14 | layer : a list of :class:`Layer` instances 15 | The `Layer` class feeding into this layer. 16 | combine_fn : a TensorFlow elemwise-merge function 17 | e.g. AND is ``tf.minimum`` ; OR is ``tf.maximum`` ; ADD is ``tf.add`` ; MUL is ``tf.multiply`` and so on. 18 | See `TensorFlow Math API `_ . 19 | name : a string or None 20 | An optional name to attach to this layer. 21 | """ 22 | def __init__( 23 | self, 24 | layer = [], 25 | combine_fn = tf.minimum, 26 | name ='elementwise_layer', 27 | act = None, 28 | ): 29 | Layer.__init__(self, name=name) 30 | 31 | if act: 32 | print(" [TL] ElementwiseLayer %s: size:%s fn:%s, act:%s" % ( 33 | self.name, layer[0].outputs.get_shape(), combine_fn.__name__, act.__name__)) 34 | else: 35 | print(" [TL] ElementwiseLayer %s: size:%s fn:%s" % ( 36 | self.name, layer[0].outputs.get_shape(), combine_fn.__name__)) 37 | 38 | self.outputs = layer[0].outputs 39 | # print(self.outputs._shape, type(self.outputs._shape)) 40 | for l in layer[1:]: 41 | # assert str(self.outputs.get_shape()) == str(l.outputs.get_shape()), "Hint: the input shapes should be the same. %s != %s" % (self.outputs.get_shape() , str(l.outputs.get_shape())) 42 | self.outputs = combine_fn(self.outputs, l.outputs, name=name) 43 | if act: 44 | self.outputs = act(self.outputs) 45 | self.all_layers = list(layer[0].all_layers) 46 | self.all_params = list(layer[0].all_params) 47 | self.all_drop = dict(layer[0].all_drop) 48 | 49 | for i in range(1, len(layer)): 50 | self.all_layers.extend(list(layer[i].all_layers)) 51 | self.all_params.extend(list(layer[i].all_params)) 52 | self.all_drop.update(dict(layer[i].all_drop)) 53 | 54 | self.all_layers = list_remove_repeat(self.all_layers) 55 | self.all_params = list_remove_repeat(self.all_params) 56 | 57 | 58 | def subsample(inputs, factor, scope=None): 59 | if factor == 1: 60 | return inputs 61 | else: 62 | return tl.layers.MaxPool2d(inputs, [1, 1], strides=(factor, factor), name=scope) 63 | 64 | 65 | def conv2d_same(inputs, num_outputs, kernel_size, strides, rate=1, w_init=None, scope=None, trainable=None): 66 | ''' 67 | Reference slim resnet 68 | :param inputs: 69 | :param num_outputs: 70 | :param kernel_size: 71 | :param strides: 72 | :param rate: 73 | :param scope: 74 | :return: 75 | ''' 76 | if strides == 1: 77 | if rate == 1: 78 | nets = tl.layers.Conv2d(inputs, n_filter=num_outputs, filter_size=(kernel_size, kernel_size), b_init=None, 79 | strides=(strides, strides), W_init=w_init, act=None, padding='SAME', name=scope, 80 | use_cudnn_on_gpu=True) 81 | nets.outputs = tf.layers.batch_normalization(inputs=nets.outputs, 82 | momentum=0.9, 83 | training=trainable, 84 | renorm=True, 85 | renorm_clipping={'rmax':3, 'rmin':0.3333, 86 | 'dmax':5}, 87 | renorm_momentum=0.9, 88 | name=scope+'_bn/BatchNorm') 89 | else: 90 | nets = tl.layers.AtrousConv2dLayer(inputs, n_filter=num_outputs, filter_size=(kernel_size, kernel_size), 91 | rate=rate, act=None, W_init=w_init, padding='SAME', name=scope) 92 | nets.outputs = tf.layers.batch_normalization(inputs=nets.outputs, 93 | momentum=0.9, 94 | training=trainable, 95 | renorm=True, 96 | renorm_clipping={'rmax':3, 'rmin':0.3333, 97 | 'dmax':5}, 98 | renorm_momentum=0.9, 99 | name=scope+'_bn/BatchNorm') 100 | return nets 101 | else: 102 | kernel_size_effective = kernel_size + (kernel_size - 1) * (rate - 1) 103 | pad_total = kernel_size_effective - 1 104 | pad_beg = pad_total // 2 105 | pad_end = pad_total - pad_beg 106 | inputs = tl.layers.PadLayer(inputs, [[0, 0], [pad_beg, pad_end], [pad_beg, pad_end], [0, 0]], name='padding_%s' % scope) 107 | if rate == 1: 108 | nets = tl.layers.Conv2d(inputs, n_filter=num_outputs, filter_size=(kernel_size, kernel_size), b_init=None, 109 | strides=(strides, strides), W_init=w_init, act=None, padding='VALID', name=scope, 110 | use_cudnn_on_gpu=True) 111 | nets.outputs = tf.layers.batch_normalization(inputs=nets.outputs, 112 | momentum=0.9, 113 | training=trainable, 114 | renorm=True, 115 | renorm_clipping={'rmax':3, 'rmin':0.3333, 116 | 'dmax':5}, 117 | renorm_momentum=0.9, 118 | name=scope+'_bn/BatchNorm') 119 | else: 120 | nets = tl.layers.AtrousConv2dLayer(inputs, n_filter=num_outputs, filter_size=(kernel_size, kernel_size), b_init=None, 121 | rate=rate, act=None, W_init=w_init, padding='SAME', name=scope) 122 | nets.outputs = tf.layers.batch_normalization(inputs=nets.outputs, 123 | momentum=0.9, 124 | training=trainable, 125 | renorm=True, 126 | renorm_clipping={'rmax':3, 'rmin':0.3333, 127 | 'dmax':5}, 128 | renorm_momentum=0.9, 129 | name=scope+'_bn/BatchNorm') 130 | return nets 131 | 132 | 133 | def bottleneck_IR(inputs, depth, depth_bottleneck, stride, rate=1, w_init=None, scope=None, trainable=None): 134 | with tf.variable_scope(scope, 'bottleneck_v1') as sc: 135 | depth_in = utils.last_dimension(inputs.outputs.get_shape(), min_rank=4) 136 | if depth == depth_in: 137 | shortcut = subsample(inputs, stride, 'shortcut') 138 | else: 139 | shortcut = tl.layers.Conv2d(inputs, depth, filter_size=(1, 1), strides=(stride, stride), act=None, 140 | W_init=w_init, b_init=None, name='shortcut_conv', use_cudnn_on_gpu=True) 141 | shortcut.outputs = tf.layers.batch_normalization(inputs=shortcut.outputs, 142 | momentum=0.9, 143 | training=trainable, 144 | renorm=True, 145 | renorm_clipping={'rmax': 3, 'rmin': 0.3333, 146 | 'dmax': 5}, 147 | renorm_momentum=0.9, 148 | name='shortcut_bn/BatchNorm') 149 | # bottleneck layer 1 150 | inputs.outputs = tf.layers.batch_normalization(inputs=inputs.outputs, 151 | momentum=0.9, 152 | training=trainable, 153 | renorm=True, 154 | renorm_clipping={'rmax': 3, 'rmin': 0.3333, 155 | 'dmax': 5}, 156 | renorm_momentum=0.9, 157 | name='conv1_bn1') 158 | residual = tl.layers.Conv2d(inputs, depth_bottleneck, filter_size=(3, 3), strides=(1, 1), act=None, b_init=None, 159 | W_init=w_init, name='conv1', use_cudnn_on_gpu=True) 160 | residual.outputs = tf.layers.batch_normalization(inputs=residual.outputs, 161 | momentum=0.9, 162 | training=trainable, 163 | renorm=True, 164 | renorm_clipping={'rmax': 3, 'rmin': 0.3333, 165 | 'dmax': 5}, 166 | renorm_momentum=0.9, 167 | name='conv1_bn2') 168 | # bottleneck prelu 169 | residual = tl.layers.PReluLayer(residual) 170 | # bottleneck layer 2 171 | residual = conv2d_same(residual, depth, kernel_size=3, strides=stride, rate=rate, w_init=w_init, scope='conv2', trainable=trainable) 172 | output = ElementwiseLayer(layer=[shortcut, residual], 173 | combine_fn=tf.add, 174 | name='combine_layer', 175 | act=None) 176 | return output 177 | 178 | 179 | def resnet(inputs, bottle_neck, blocks, w_init=None, trainable=None, scope=None): 180 | with tf.variable_scope(scope): 181 | # inputs = tf.subtract(inputs, 127.5) 182 | # inputs = tf.multiply(inputs, 0.0078125) 183 | net_inputs = tl.layers.InputLayer(inputs, name='input_layer') 184 | if bottle_neck: 185 | net = tl.layers.Conv2d(net_inputs, n_filter=64, filter_size=(3, 3), strides=(1, 1), 186 | act=None, W_init=w_init, b_init=None, name='conv1', use_cudnn_on_gpu=True) 187 | net.outputs = tf.layers.batch_normalization(inputs=net.outputs, 188 | momentum=0.9, 189 | training=trainable, 190 | renorm=True, 191 | renorm_clipping={'rmax': 3, 'rmin': 0.3333, 192 | 'dmax': 5}, 193 | renorm_momentum=0.9, 194 | name='bn0') 195 | net = tl.layers.PReluLayer(net, name='prelu0') 196 | else: 197 | raise ValueError('The standard resnet must support the bottleneck layer') 198 | for block in blocks: 199 | with tf.variable_scope(block.scope): 200 | for i, var in enumerate(block.args): 201 | with tf.variable_scope('unit_%d' % (i+1)): 202 | net = block.unit_fn(net, depth=var['depth'], depth_bottleneck=var['depth_bottleneck'], 203 | w_init=w_init, stride=var['stride'], rate=var['rate'], scope=None, 204 | trainable=trainable) 205 | net.outputs = tf.layers.batch_normalization(inputs=net.outputs, 206 | momentum=0.9, 207 | training=trainable, 208 | renorm=True, 209 | renorm_clipping={'rmax': 3, 'rmin': 0.3333, 210 | 'dmax': 5}, 211 | renorm_momentum=0.9, 212 | name='E_BN1') 213 | net = tl.layers.DropoutLayer(net, keep=0.4, name='E_Dropout') 214 | net_shape = net.outputs.get_shape() 215 | net = tl.layers.ReshapeLayer(net, shape=[-1, net_shape[1]*net_shape[2]*net_shape[3]], name='E_Reshapelayer') 216 | net = tl.layers.DenseLayer(net, n_units=512, W_init=w_init, name='E_DenseLayer') 217 | net.outputs = tf.layers.batch_normalization(inputs=net.outputs, 218 | momentum=0.9, 219 | training=trainable, 220 | renorm=True, 221 | renorm_clipping={'rmax': 3, 'rmin': 0.3333, 222 | 'dmax': 5}, 223 | renorm_momentum=0.9, 224 | name='E_BN2') 225 | return net 226 | 227 | 228 | class Block(collections.namedtuple('Block', ['scope', 'unit_fn', 'args'])): 229 | """A named tuple describing a ResNet block. 230 | 231 | Its parts are: 232 | scope: The scope of the `Block`. 233 | unit_fn: The ResNet unit function which takes as input a `Tensor` and 234 | returns another `Tensor` with the output of the ResNet unit. 235 | args: A list of length equal to the number of units in the `Block`. The list 236 | contains one (depth, depth_bottleneck, stride) tuple for each unit in the 237 | block to serve as argument to unit_fn. 238 | """ 239 | 240 | 241 | def resnetse_v1_block(scope, base_depth, num_units, stride, rate=1, unit_fn=None): 242 | """Helper function for creating a resnet_v1 bottleneck block. 243 | 244 | Args: 245 | scope: The scope of the block. 246 | base_depth: The depth of the bottleneck layer for each unit. 247 | num_units: The number of units in the block. 248 | stride: The stride of the block, implemented as a stride in the last unit. 249 | All other units have stride=1. 250 | 251 | Returns: 252 | A resnet_v1 bottleneck block. 253 | """ 254 | return Block(scope, unit_fn, [{ 255 | 'depth': base_depth * 4, 256 | 'depth_bottleneck': base_depth, 257 | 'stride': stride, 258 | 'rate': rate 259 | }] + [{ 260 | 'depth': base_depth * 4, 261 | 'depth_bottleneck': base_depth, 262 | 'stride': 1, 263 | 'rate': rate 264 | }] * (num_units - 1)) 265 | 266 | 267 | def get_resnet(inputs, num_layers, type=None, w_init=None, trainable=None, sess=None): 268 | if type == 'ir': 269 | unit_fn = bottleneck_IR 270 | # elif type == 'se_ir': 271 | # unit_fn = bottleneck_IR_SE 272 | else: 273 | raise ValueError('the input fn is unknown') 274 | 275 | if num_layers == 50: 276 | blocks = [ 277 | resnetse_v1_block('block1', base_depth=64, num_units=3, stride=2, rate=1, unit_fn=unit_fn), 278 | resnetse_v1_block('block2', base_depth=128, num_units=4, stride=2, rate=1, unit_fn=unit_fn), 279 | resnetse_v1_block('block3', base_depth=256, num_units=14, stride=2, rate=1, unit_fn=unit_fn), 280 | resnetse_v1_block('block4', base_depth=512, num_units=3, stride=2, rate=1, unit_fn=unit_fn) 281 | ] 282 | elif num_layers == 101: 283 | blocks = [ 284 | resnetse_v1_block('block1', base_depth=64, num_units=3, stride=2, rate=1, unit_fn=unit_fn), 285 | resnetse_v1_block('block2', base_depth=128, num_units=13, stride=2, rate=1, unit_fn=unit_fn), 286 | resnetse_v1_block('block3', base_depth=256, num_units=30, stride=2, rate=1, unit_fn=unit_fn), 287 | resnetse_v1_block('block4', base_depth=512, num_units=3, stride=2, rate=1, unit_fn=unit_fn) 288 | ] 289 | elif num_layers == 152: 290 | blocks = [ 291 | resnetse_v1_block('block1', base_depth=64, num_units=3, stride=2, rate=1, unit_fn=unit_fn), 292 | resnetse_v1_block('block2', base_depth=128, num_units=8, stride=2, rate=1, unit_fn=unit_fn), 293 | resnetse_v1_block('block3', base_depth=256, num_units=36, stride=2, rate=1, unit_fn=unit_fn), 294 | resnetse_v1_block('block4', base_depth=512, num_units=3, stride=2, rate=1, unit_fn=unit_fn) 295 | ] 296 | else: 297 | raise ValueError('Resnet layer %d is not supported now.' % num_layers) 298 | net = resnet(inputs=inputs, 299 | bottle_neck=True, 300 | blocks=blocks, 301 | w_init=w_init, 302 | trainable=trainable, 303 | scope='resnet_v1_%d' % num_layers) 304 | return net 305 | 306 | 307 | if __name__ == '__main__': 308 | x = tf.placeholder(dtype=tf.float32, shape=[None, 112, 112, 3], name='input_place') 309 | sess = tf.Session() 310 | # w_init = tf.truncated_normal_initializer(mean=10, stddev=5e-2) 311 | w_init = tf.contrib.layers.xavier_initializer(uniform=False) 312 | # test resnetse 313 | nets = get_resnet(x, 50, type='ir', w_init=w_init, sess=sess) 314 | tl.layers.initialize_global_variables(sess) 315 | 316 | for p in tl.layers.get_variables_with_name('W_conv2d', True, True): 317 | print(p.op.name) 318 | print('##############'*30) 319 | with sess: 320 | nets.print_params() 321 | -------------------------------------------------------------------------------- /nets/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/auroua/InsightFace_TF/6ffe4296460bdfea56f91521db6d6412a89249d9/nets/__init__.py -------------------------------------------------------------------------------- /nets/nets_utils.py: -------------------------------------------------------------------------------- 1 | from tensorflow.python import pywrap_tensorflow 2 | import collections 3 | import numpy as np 4 | 5 | 6 | var_stat = collections.namedtuple('stats', ['mean', 'median', 'std']) 7 | 8 | 9 | def get_variables_in_checkpoint_file(file_name): 10 | try: 11 | reader = pywrap_tensorflow.NewCheckpointReader(file_name) 12 | var_to_shape_map = reader.get_variable_to_shape_map() 13 | return var_to_shape_map 14 | except Exception as e: # pylint: disable=broad-except 15 | print(str(e)) 16 | if "corrupted compressed block contents" in str(e): 17 | print("It's likely that your checkpoint file has been compressed " 18 | "with SNAPPY.") 19 | 20 | 21 | def get_tensor_static_val(file_name, all_tensors, all_tensor_names): 22 | reader = pywrap_tensorflow.NewCheckpointReader(file_name) 23 | vars_dict = {} 24 | if all_tensors or all_tensor_names: 25 | var_to_shape_map = reader.get_variable_to_shape_map() 26 | for key in sorted(var_to_shape_map): 27 | if all_tensors: 28 | vars_dict[key] = var_stat(np.mean(reader.get_tensor(key)), np.median(reader.get_tensor(key)), 29 | np.std(reader.get_tensor(key))) 30 | return vars_dict -------------------------------------------------------------------------------- /nets/networks.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from vgg16 import get_vgg16 3 | from vgg19 import get_vgg19 4 | 5 | 6 | def get_model(inputs, sess, type, pretrained=True): 7 | if type == 'vgg16': 8 | return get_vgg16(inputs, sess, pretrained) 9 | elif type == 'vgg19': 10 | return get_vgg19(inputs, sess, pretrained) 11 | 12 | 13 | if __name__ == '__main__': 14 | tfconfig = tf.ConfigProto(allow_soft_placement=True) 15 | x = tf.placeholder(dtype=tf.float32, shape=[None, 224, 224, 3], name='inpust') 16 | with tf.Session(config=tfconfig) as sess: 17 | network = get_model(x, sess, type='vgg19', pretrained=True) 18 | network.print_params() 19 | network.print_layers() -------------------------------------------------------------------------------- /nets/readme.md: -------------------------------------------------------------------------------- 1 | 1. `vgg16.py` 2 | the vgg16 model, input is an image that with shape 224*224 and mean substract. The input should be first resized to 224*224, and then substract the mean. The channels of the input image is `RGB`. 3 | 2. 'vgg19.py' 4 | the vgg19 model. The input should be first normalized to [0, 1], then resized to 224*224 and then do substract to mean, the channel of the input should be `RGB`. 5 | 3. 'resnet' 6 | -------------------------------------------------------------------------------- /nets/vgg16.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/python 2 | # -*- coding: utf-8 -*- 3 | """ 4 | VGG-16 for ImageNet. 5 | Introduction 6 | ---------------- 7 | VGG is a convolutional neural network model proposed by K. Simonyan and A. Zisserman 8 | from the University of Oxford in the paper “Very Deep Convolutional Networks for 9 | Large-Scale Image Recognition” . The model achieves 92.7% top-5 test accuracy in ImageNet, 10 | which is a dataset of over 14 million images belonging to 1000 classes. 11 | Download Pre-trained Model 12 | ---------------------------- 13 | - Model weights in this example - vgg16_weights.npz : http://www.cs.toronto.edu/~frossard/post/vgg16/ 14 | - Caffe VGG 16 model : https://gist.github.com/ksimonyan/211839e770f7b538e2d8#file-readme-md 15 | - Tool to convert the Caffe models to TensorFlow's : https://github.com/ethereon/caffe-tensorflow 16 | Note 17 | ------ 18 | - For simplified CNN layer see "Convolutional layer (Simplified)" 19 | in read the docs website. 20 | - When feeding other images to the model be sure to properly resize or crop them 21 | beforehand. Distorted images might end up being misclassified. One way of safely 22 | feeding images of multiple sizes is by doing center cropping, as shown in the 23 | 24 | The input image type is 25 | from scipy.misc import imread, imresize 26 | img1 = imread('data/laska.png', mode='RGB') # test data in github 27 | img1 = imresize(img1, (224, 224)) 28 | So the input image is three channels, and is RGB. 29 | 30 | following snippet: 31 | # >>> image_h, image_w, _ = np.shape(img) 32 | # >>> shorter_side = min(image_h, image_w) 33 | # >>> scale = 224. / shorter_side 34 | # >>> image_h, image_w = np.ceil([scale * image_h, scale * image_w]).astype('int32') 35 | # >>> img = imresize(img, (image_h, image_w)) 36 | # >>> crop_x = (image_w - 224) / 2 37 | # >>> crop_y = (image_h - 224) / 2 38 | # >>> img = img[crop_y:crop_y+224,crop_x:crop_x+224,:] 39 | """ 40 | 41 | import tensorlayer as tl 42 | from tensorlayer.layers import * 43 | from scipy.misc import imread, imresize 44 | from nets.imagenet_classes import * 45 | import os 46 | 47 | 48 | def _conv_layers(net_in): 49 | with tf.name_scope('preprocess'): 50 | # Notice that we include a preprocessing layer that takes the RGB image 51 | # with pixels values in the range of 0-255 and subtracts the mean image 52 | # values (calculated over the entire ImageNet training set). 53 | mean = tf.constant([123.68, 116.779, 103.939], dtype=tf.float32, shape=[1, 1, 1, 3], name='img_mean') 54 | net_in.outputs = net_in.outputs - mean 55 | 56 | # conv1 57 | network = Conv2dLayer( 58 | net_in, 59 | act=tf.nn.relu, 60 | shape=[3, 3, 3, 64], # 64 features for each 3x3 patch 61 | strides=[1, 1, 1, 1], 62 | padding='SAME', 63 | name='conv1_1') 64 | network = Conv2dLayer( 65 | network, 66 | act=tf.nn.relu, 67 | shape=[3, 3, 64, 64], # 64 features for each 3x3 patch 68 | strides=[1, 1, 1, 1], 69 | padding='SAME', 70 | name='conv1_2') 71 | network = PoolLayer(network, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', pool=tf.nn.max_pool, name='pool1') 72 | 73 | # conv2 74 | network = Conv2dLayer( 75 | network, 76 | act=tf.nn.relu, 77 | shape=[3, 3, 64, 128], # 128 features for each 3x3 patch 78 | strides=[1, 1, 1, 1], 79 | padding='SAME', 80 | name='conv2_1') 81 | network = Conv2dLayer( 82 | network, 83 | act=tf.nn.relu, 84 | shape=[3, 3, 128, 128], # 128 features for each 3x3 patch 85 | strides=[1, 1, 1, 1], 86 | padding='SAME', 87 | name='conv2_2') 88 | network = PoolLayer(network, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', pool=tf.nn.max_pool, name='pool2') 89 | 90 | # conv3 91 | network = Conv2dLayer( 92 | network, 93 | act=tf.nn.relu, 94 | shape=[3, 3, 128, 256], # 256 features for each 3x3 patch 95 | strides=[1, 1, 1, 1], 96 | padding='SAME', 97 | name='conv3_1') 98 | network = Conv2dLayer( 99 | network, 100 | act=tf.nn.relu, 101 | shape=[3, 3, 256, 256], # 256 features for each 3x3 patch 102 | strides=[1, 1, 1, 1], 103 | padding='SAME', 104 | name='conv3_2') 105 | network = Conv2dLayer( 106 | network, 107 | act=tf.nn.relu, 108 | shape=[3, 3, 256, 256], # 256 features for each 3x3 patch 109 | strides=[1, 1, 1, 1], 110 | padding='SAME', 111 | name='conv3_3') 112 | network = PoolLayer(network, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', pool=tf.nn.max_pool, name='pool3') 113 | 114 | # conv4 115 | network = Conv2dLayer( 116 | network, 117 | act=tf.nn.relu, 118 | shape=[3, 3, 256, 512], # 512 features for each 3x3 patch 119 | strides=[1, 1, 1, 1], 120 | padding='SAME', 121 | name='conv4_1') 122 | network = Conv2dLayer( 123 | network, 124 | act=tf.nn.relu, 125 | shape=[3, 3, 512, 512], # 512 features for each 3x3 patch 126 | strides=[1, 1, 1, 1], 127 | padding='SAME', 128 | name='conv4_2') 129 | network = Conv2dLayer( 130 | network, 131 | act=tf.nn.relu, 132 | shape=[3, 3, 512, 512], # 512 features for each 3x3 patch 133 | strides=[1, 1, 1, 1], 134 | padding='SAME', 135 | name='conv4_3') 136 | network = PoolLayer(network, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', pool=tf.nn.max_pool, name='pool4') 137 | 138 | # conv5 139 | network = Conv2dLayer( 140 | network, 141 | act=tf.nn.relu, 142 | shape=[3, 3, 512, 512], # 512 features for each 3x3 patch 143 | strides=[1, 1, 1, 1], 144 | padding='SAME', 145 | name='conv5_1') 146 | network = Conv2dLayer( 147 | network, 148 | act=tf.nn.relu, 149 | shape=[3, 3, 512, 512], # 512 features for each 3x3 patch 150 | strides=[1, 1, 1, 1], 151 | padding='SAME', 152 | name='conv5_2') 153 | network = Conv2dLayer( 154 | network, 155 | act=tf.nn.relu, 156 | shape=[3, 3, 512, 512], # 512 features for each 3x3 patch 157 | strides=[1, 1, 1, 1], 158 | padding='SAME', 159 | name='conv5_3') 160 | network = PoolLayer(network, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', pool=tf.nn.max_pool, name='pool5') 161 | return network 162 | 163 | 164 | def _fc_layers(net): 165 | network = FlattenLayer(net, name='flatten') 166 | network = DenseLayer(network, n_units=4096, act=tf.nn.relu, name='fc1_relu') 167 | network = DenseLayer(network, n_units=4096, act=tf.nn.relu, name='fc2_relu') 168 | network = DenseLayer(network, n_units=1000, act=tf.identity, name='fc3_relu') 169 | return network 170 | 171 | 172 | def get_vgg16(x, sess=None, pretrained=True): 173 | net_in = InputLayer(x, name='input') 174 | net_cnn = _conv_layers(net_in) # simplified CNN APIs 175 | network = _fc_layers(net_cnn) 176 | 177 | if pretrained: 178 | npz = np.load('../model_weights/vgg16_weights.npz') 179 | params = [] 180 | for val in sorted(npz.items()): 181 | print(" Loading %s" % str(val[1].shape)) 182 | params.append(val[1]) 183 | tl.files.assign_params(sess, params, network) 184 | return network 185 | else: 186 | tl.layers.initialize_global_variables(sess) 187 | return network 188 | 189 | 190 | if __name__ == '__main__': 191 | DATA_PATH = '/home/aurora/workspaces2/PycharmProjects/tensorflow/tensorlayer/example/data' 192 | 193 | x = tf.placeholder(tf.float32, [None, 224, 224, 3]) 194 | tfconfig = tf.ConfigProto(allow_soft_placement=True) 195 | with tf.Session(config=tfconfig) as sess: 196 | network = get_vgg16(x, sess, pretrained=True) 197 | y = network.outputs 198 | network.print_params() 199 | network.print_layers() 200 | img1 = imread(os.path.join(DATA_PATH, 'laska.png'), mode='RGB') # test data in github 201 | img1 = imresize(img1, (224, 224)) 202 | probs = tf.nn.softmax(y) 203 | start_time = time.time() 204 | prob = sess.run(probs, feed_dict={x: [img1]})[0] 205 | print(" End time : %.5ss" % (time.time() - start_time)) 206 | preds = (np.argsort(prob)[::-1])[0:5] 207 | for p in preds: 208 | print(class_names[p], prob[p]) -------------------------------------------------------------------------------- /nets/vgg19.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/python 2 | # -*- coding: utf-8 -*- 3 | """ 4 | VGG-19 for ImageNet. 5 | Pre-trained model in this example - VGG19 NPZ and 6 | trainable examples of VGG16/19 in TensorFlow can be found here: 7 | https://github.com/machrisaa/tensorflow-vgg 8 | For simplified CNN layer see "Convolutional layer (Simplified)" 9 | in read the docs website. 10 | """ 11 | 12 | import os 13 | import time 14 | 15 | import numpy as np 16 | import skimage 17 | import skimage.io 18 | import skimage.transform 19 | import tensorflow as tf 20 | from scipy.misc import imread, imresize 21 | import tensorlayer as tl 22 | from tensorlayer.layers import * 23 | from imagenet_classes import * 24 | 25 | 26 | DATA_PATH = '/home/aurora/workspaces2/PycharmProjects/tensorflow/tensorlayer/example/data' 27 | VGG_MEAN = [103.939, 116.779, 123.68] 28 | 29 | 30 | def load_image(path): 31 | # load image 32 | img = skimage.io.imread(path) 33 | img = img / 255.0 34 | if ((0 <= img).all() and (img <= 1.0).all()) is False: 35 | raise Exception("image value should be [0, 1]") 36 | # print "Original Image Shape: ", img.shape 37 | # we crop image from center 38 | short_edge = min(img.shape[:2]) 39 | yy = int((img.shape[0] - short_edge) / 2) 40 | xx = int((img.shape[1] - short_edge) / 2) 41 | crop_img = img[yy:yy + short_edge, xx:xx + short_edge] 42 | # resize to 224, 224 43 | resized_img = skimage.transform.resize(crop_img, (224, 224)) 44 | return resized_img 45 | 46 | 47 | def print_prob(prob): 48 | synset = class_names 49 | # print prob 50 | pred = np.argsort(prob)[::-1] 51 | # Get top1 label 52 | top1 = synset[pred[0]] 53 | print("Top1: ", top1, prob[pred[0]]) 54 | # Get top5 label 55 | top5 = [(synset[pred[i]], prob[pred[i]]) for i in range(5)] 56 | print("Top5: ", top5) 57 | return top1 58 | 59 | 60 | def _Vgg19(rgb): 61 | """ 62 | Build the VGG 19 Model 63 | Parameters 64 | ----------- 65 | rgb : rgb image placeholder [batch, height, width, 3] values scaled [0, 1] 66 | """ 67 | start_time = time.time() 68 | print("build model started") 69 | rgb_scaled = rgb * 255.0 70 | # Convert RGB to BGR 71 | if tf.__version__ <= '0.11': 72 | red, green, blue = tf.split(3, 3, rgb_scaled) 73 | else: # TF 1.0 74 | print(rgb_scaled) 75 | red, green, blue = tf.split(rgb_scaled, 3, 3) 76 | if red.get_shape().as_list()[1:] != [224, 224, 1]: 77 | raise Exception("image size unmatch") 78 | if green.get_shape().as_list()[1:] != [224, 224, 1]: 79 | raise Exception("image size unmatch") 80 | if blue.get_shape().as_list()[1:] != [224, 224, 1]: 81 | raise Exception("image size unmatch") 82 | if tf.__version__ <= '0.11': 83 | bgr = tf.concat(3, [ 84 | blue - VGG_MEAN[0], 85 | green - VGG_MEAN[1], 86 | red - VGG_MEAN[2], 87 | ]) 88 | else: 89 | bgr = tf.concat( 90 | [ 91 | blue - VGG_MEAN[0], 92 | green - VGG_MEAN[1], 93 | red - VGG_MEAN[2], 94 | ], axis=3) 95 | if bgr.get_shape().as_list()[1:] != [224, 224, 3]: 96 | raise Exception("image size unmatch") 97 | # input layer 98 | net_in = InputLayer(bgr, name='input') 99 | # conv1 100 | network = Conv2dLayer(net_in, act=tf.nn.relu, shape=[3, 3, 3, 64], strides=[1, 1, 1, 1], padding='SAME', name='conv1_1') 101 | network = Conv2dLayer(network, act=tf.nn.relu, shape=[3, 3, 64, 64], strides=[1, 1, 1, 1], padding='SAME', name='conv1_2') 102 | network = PoolLayer(network, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', pool=tf.nn.max_pool, name='pool1') 103 | # conv2 104 | network = Conv2dLayer(network, act=tf.nn.relu, shape=[3, 3, 64, 128], strides=[1, 1, 1, 1], padding='SAME', name='conv2_1') 105 | network = Conv2dLayer(network, act=tf.nn.relu, shape=[3, 3, 128, 128], strides=[1, 1, 1, 1], padding='SAME', name='conv2_2') 106 | network = PoolLayer(network, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', pool=tf.nn.max_pool, name='pool2') 107 | # conv3 108 | network = Conv2dLayer(network, act=tf.nn.relu, shape=[3, 3, 128, 256], strides=[1, 1, 1, 1], padding='SAME', name='conv3_1') 109 | network = Conv2dLayer(network, act=tf.nn.relu, shape=[3, 3, 256, 256], strides=[1, 1, 1, 1], padding='SAME', name='conv3_2') 110 | network = Conv2dLayer(network, act=tf.nn.relu, shape=[3, 3, 256, 256], strides=[1, 1, 1, 1], padding='SAME', name='conv3_3') 111 | network = Conv2dLayer(network, act=tf.nn.relu, shape=[3, 3, 256, 256], strides=[1, 1, 1, 1], padding='SAME', name='conv3_4') 112 | network = PoolLayer(network, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', pool=tf.nn.max_pool, name='pool3') 113 | # conv4 114 | network = Conv2dLayer(network, act=tf.nn.relu, shape=[3, 3, 256, 512], strides=[1, 1, 1, 1], padding='SAME', name='conv4_1') 115 | network = Conv2dLayer(network, act=tf.nn.relu, shape=[3, 3, 512, 512], strides=[1, 1, 1, 1], padding='SAME', name='conv4_2') 116 | network = Conv2dLayer(network, act=tf.nn.relu, shape=[3, 3, 512, 512], strides=[1, 1, 1, 1], padding='SAME', name='conv4_3') 117 | network = Conv2dLayer(network, act=tf.nn.relu, shape=[3, 3, 512, 512], strides=[1, 1, 1, 1], padding='SAME', name='conv4_4') 118 | network = PoolLayer(network, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', pool=tf.nn.max_pool, name='pool4') 119 | # conv5 120 | network = Conv2dLayer(network, act=tf.nn.relu, shape=[3, 3, 512, 512], strides=[1, 1, 1, 1], padding='SAME', name='conv5_1') 121 | network = Conv2dLayer(network, act=tf.nn.relu, shape=[3, 3, 512, 512], strides=[1, 1, 1, 1], padding='SAME', name='conv5_2') 122 | network = Conv2dLayer(network, act=tf.nn.relu, shape=[3, 3, 512, 512], strides=[1, 1, 1, 1], padding='SAME', name='conv5_3') 123 | network = Conv2dLayer(network, act=tf.nn.relu, shape=[3, 3, 512, 512], strides=[1, 1, 1, 1], padding='SAME', name='conv5_4') 124 | network = PoolLayer(network, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', pool=tf.nn.max_pool, name='pool5') 125 | # fc 6~8 126 | network = FlattenLayer(network, name='flatten') 127 | network = DenseLayer(network, n_units=4096, act=tf.nn.relu, name='fc6') 128 | network = DenseLayer(network, n_units=4096, act=tf.nn.relu, name='fc7') 129 | network = DenseLayer(network, n_units=1000, act=tf.identity, name='fc8') 130 | print("build model finished: %fs" % (time.time() - start_time)) 131 | return network 132 | 133 | 134 | def _Vgg19_simple_api(rgb): 135 | """ 136 | Build the VGG 19 Model 137 | Parameters 138 | ----------- 139 | rgb : rgb image placeholder [batch, height, width, 3] values scaled [0, 1] 140 | """ 141 | start_time = time.time() 142 | print("build model started") 143 | rgb_scaled = rgb * 255.0 144 | # Convert RGB to BGR 145 | if tf.__version__ <= '0.11': 146 | red, green, blue = tf.split(3, 3, rgb_scaled) 147 | else: # TF 1.0 148 | print(rgb_scaled) 149 | red, green, blue = tf.split(rgb_scaled, 3, 3) 150 | if red.get_shape().as_list()[1:] != [224, 224, 1]: 151 | raise Exception("image size unmatch") 152 | if green.get_shape().as_list()[1:] != [224, 224, 1]: 153 | raise Exception("image size unmatch") 154 | if blue.get_shape().as_list()[1:] != [224, 224, 1]: 155 | raise Exception("image size unmatch") 156 | if tf.__version__ <= '0.11': 157 | bgr = tf.concat(3, [ 158 | blue - VGG_MEAN[0], 159 | green - VGG_MEAN[1], 160 | red - VGG_MEAN[2], 161 | ]) 162 | else: 163 | bgr = tf.concat( 164 | [ 165 | blue - VGG_MEAN[0], 166 | green - VGG_MEAN[1], 167 | red - VGG_MEAN[2], 168 | ], axis=3) 169 | if bgr.get_shape().as_list()[1:] != [224, 224, 3]: 170 | raise Exception("image size unmatch") 171 | # input layer 172 | net_in = InputLayer(bgr, name='input') 173 | # conv1 174 | network = Conv2d(net_in, n_filter=64, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv1_1') 175 | network = Conv2d(network, n_filter=64, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv1_2') 176 | network = MaxPool2d(network, filter_size=(2, 2), strides=(2, 2), padding='SAME', name='pool1') 177 | # conv2 178 | network = Conv2d(network, n_filter=128, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv2_1') 179 | network = Conv2d(network, n_filter=128, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv2_2') 180 | network = MaxPool2d(network, filter_size=(2, 2), strides=(2, 2), padding='SAME', name='pool2') 181 | # conv3 182 | network = Conv2d(network, n_filter=256, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv3_1') 183 | network = Conv2d(network, n_filter=256, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv3_2') 184 | network = Conv2d(network, n_filter=256, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv3_3') 185 | network = Conv2d(network, n_filter=256, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv3_4') 186 | network = MaxPool2d(network, filter_size=(2, 2), strides=(2, 2), padding='SAME', name='pool3') 187 | # conv4 188 | network = Conv2d(network, n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv4_1') 189 | network = Conv2d(network, n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv4_2') 190 | network = Conv2d(network, n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv4_3') 191 | network = Conv2d(network, n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv4_4') 192 | network = MaxPool2d(network, filter_size=(2, 2), strides=(2, 2), padding='SAME', name='pool4') 193 | # conv5 194 | network = Conv2d(network, n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv5_1') 195 | network = Conv2d(network, n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv5_2') 196 | network = Conv2d(network, n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv5_3') 197 | network = Conv2d(network, n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv5_4') 198 | network = MaxPool2d(network, filter_size=(2, 2), strides=(2, 2), padding='SAME', name='pool5') 199 | # fc 6~8 200 | network = FlattenLayer(network, name='flatten') 201 | network = DenseLayer(network, n_units=4096, act=tf.nn.relu, name='fc6') 202 | network = DenseLayer(network, n_units=4096, act=tf.nn.relu, name='fc7') 203 | network = DenseLayer(network, n_units=1000, act=tf.identity, name='fc8') 204 | print("build model finished: %fs" % (time.time() - start_time)) 205 | return network 206 | 207 | 208 | def get_vgg19(inputs, sess=None, pretrained=True): 209 | network = _Vgg19(inputs) 210 | if pretrained: 211 | vgg19_npy_path = "../model_weights/vgg19.npy" 212 | npz = np.load(vgg19_npy_path, encoding='latin1').item() 213 | params = [] 214 | for val in sorted(npz.items()): 215 | W = np.asarray(val[1][0]) 216 | b = np.asarray(val[1][1]) 217 | print(" Loading %s: %s, %s" % (val[0], W.shape, b.shape)) 218 | params.extend([W, b]) 219 | print("Restoring model from npz file") 220 | tl.files.assign_params(sess, params, network) 221 | return network 222 | else: 223 | tl.layers.initialize_global_variables(sess) 224 | return network 225 | 226 | 227 | if __name__ == '__main__': 228 | sess = tf.InteractiveSession() 229 | x = tf.placeholder("float", [None, 224, 224, 3]) 230 | network = get_vgg19(x, sess) 231 | y = network.outputs 232 | probs = tf.nn.softmax(y, name="prob") 233 | img1 = load_image(os.path.join(DATA_PATH, "tiger.jpeg")) # test data in github 234 | img1 = img1.reshape((1, 224, 224, 3)) 235 | start_time = time.time() 236 | prob = sess.run(probs, feed_dict={x: img1}) 237 | print("End time : %.5ss" % (time.time() - start_time)) 238 | 239 | print_prob(prob[0]) -------------------------------------------------------------------------------- /test/benchmark/README.md: -------------------------------------------------------------------------------- 1 | ##### Test max batch size -------------------------------------------------------------------------------- /test/benchmark/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/auroua/InsightFace_TF/6ffe4296460bdfea56f91521db6d6412a89249d9/test/benchmark/__init__.py -------------------------------------------------------------------------------- /test/benchmark/gluon_batchsize_test.py: -------------------------------------------------------------------------------- 1 | from mxnet import gluon 2 | import mxnet as mx 3 | from mxnet import ndarray as nd 4 | import utils_final as utils 5 | import mxnet.gluon.nn as nn 6 | from mxnet import init 7 | import os 8 | from mxnet import initializer 9 | from mxnet.gluon.block import HybridBlock 10 | 11 | 12 | def prelu(): 13 | pass 14 | 15 | 16 | def inference(): 17 | net = gluon.nn.Sequential() 18 | with net.name_scope(): 19 | net.add(nn.Conv2D(channels=64, kernel_size=3, padding=1)) 20 | net.add(nn.BatchNorm(axis=1, center=True, scale=True)) 21 | # net.add(mx.sym.LeakyReLU(data=net, act_type='prelu', name='prelu1')) 22 | net.add(nn.Conv2D(channels=64, kernel_size=3, padding=1)) 23 | net.add(nn.BatchNorm(axis=1, center=True, scale=True)) 24 | net.add(nn.Conv2D(channels=64, kernel_size=3, padding=1, strides=2)) 25 | net.add(nn.BatchNorm(axis=1, center=True, scale=True)) 26 | 27 | net.add(nn.Conv2D(channels=128, kernel_size=3, padding=1)) 28 | net.add(nn.BatchNorm(axis=1, center=True, scale=True)) 29 | net.add(nn.Conv2D(channels=128, kernel_size=3, padding=1)) 30 | net.add(nn.BatchNorm(axis=1, center=True, scale=True)) 31 | net.add(nn.Conv2D(channels=128, kernel_size=3, padding=1, strides=2)) 32 | net.add(nn.BatchNorm(axis=1, center=True, scale=True)) 33 | 34 | net.add(nn.Conv2D(channels=256, kernel_size=3, padding=1)) 35 | net.add(nn.BatchNorm(axis=1, center=True, scale=True)) 36 | net.add(nn.Conv2D(channels=256, kernel_size=3, padding=1)) 37 | net.add(nn.BatchNorm(axis=1, center=True, scale=True)) 38 | net.add(nn.Conv2D(channels=256, kernel_size=3, padding=1, strides=2)) 39 | net.add(nn.BatchNorm(axis=1, center=True, scale=True)) 40 | 41 | net.add(nn.Flatten()) 42 | net.add(nn.Dense(10)) 43 | return net 44 | 45 | 46 | if __name__ == '__main__': 47 | # without prelu and bn 7000< max batch size <8000 48 | # with bn only 3000< max batch size <4000 49 | os.environ["CUDA_VISIBLE_DEVICES"] = "0" 50 | batch_size = 3000 51 | train_data, test_data = utils.load_data_mnist(batch_size=batch_size) 52 | ctx = utils.try_gpu() 53 | net = inference() 54 | print(net) 55 | net.initialize(ctx=ctx, init=init.Xavier()) 56 | softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss() 57 | trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.01}) 58 | utils.train(train_data, test_data, net, softmax_cross_entropy, trainer, ctx, num_epochs=10) 59 | 60 | 61 | -------------------------------------------------------------------------------- /test/benchmark/mxnet_batchsize_test.py: -------------------------------------------------------------------------------- 1 | import mxnet as mx 2 | import mxnet.ndarray as nd 3 | import os 4 | 5 | 6 | if __name__ == '__main__': 7 | # without bn and prelu max batchsize (40000, 50000) 8 | # with bn max batchsize (20000, 30000) 9 | # with prelu batchsize (20000, 30000) 10 | # with bn and prelu max batchsize (10000, 20000) 11 | os.environ["CUDA_VISIBLE_DEVICES"] = "0" 12 | batch_size = 10000 13 | mnist = mx.test_utils.get_mnist() 14 | print(mnist['train_data'].shape) 15 | train_iter = mx.io.NDArrayIter(mnist['train_data'], mnist['train_label'], batch_size, shuffle=True) 16 | 17 | # inference 18 | data = mx.sym.var('data') 19 | # first conv layer 20 | net = mx.sym.Convolution(data=data, kernel=(3, 3), num_filter=64) 21 | net = mx.sym.BatchNorm(data=net, fix_gamma=False, eps=2e-5, name='_bn1') 22 | net = mx.sym.LeakyReLU(data=net, act_type='prelu', name='_preul1') 23 | net = mx.sym.Convolution(data=data, kernel=(3, 3), num_filter=64) 24 | net = mx.sym.BatchNorm(data=net, fix_gamma=False, eps=2e-5, name='_bn2') 25 | net = mx.sym.LeakyReLU(data=net, act_type='prelu', name='_preul2') 26 | net = mx.sym.Convolution(data=data, kernel=(3, 3), stride=(2, 2), num_filter=64) 27 | net = mx.sym.BatchNorm(data=net, fix_gamma=False, eps=2e-5, name='_bn3') 28 | net = mx.sym.LeakyReLU(data=net, act_type='prelu', name='_preul3') 29 | 30 | net = mx.sym.Convolution(data=data, kernel=(3, 3), num_filter=128) 31 | net = mx.sym.BatchNorm(data=net, fix_gamma=False, eps=2e-5, name='_bn4') 32 | net = mx.sym.LeakyReLU(data=net, act_type='prelu', name='_preul4') 33 | net = mx.sym.Convolution(data=data, kernel=(3, 3), num_filter=128) 34 | net = mx.sym.BatchNorm(data=net, fix_gamma=False, eps=2e-5, name='_bn5') 35 | net = mx.sym.LeakyReLU(data=net, act_type='prelu', name='_preul5') 36 | net = mx.sym.Convolution(data=data, kernel=(3, 3), stride=(2, 2), num_filter=128) 37 | net = mx.sym.BatchNorm(data=net, fix_gamma=False, eps=2e-5, name='_bn6') 38 | net = mx.sym.LeakyReLU(data=net, act_type='prelu', name='_preul6') 39 | 40 | net = mx.sym.Convolution(data=data, kernel=(3, 3), num_filter=256) 41 | net = mx.sym.BatchNorm(data=net, fix_gamma=False, eps=2e-5, name='_bn7') 42 | net = mx.sym.LeakyReLU(data=net, act_type='prelu', name='_preul7') 43 | net = mx.sym.Convolution(data=data, kernel=(3, 3), num_filter=256) 44 | net = mx.sym.BatchNorm(data=net, fix_gamma=False, eps=2e-5, name='_bn8') 45 | net = mx.sym.LeakyReLU(data=net, act_type='prelu', name='_preul8') 46 | net = mx.sym.Convolution(data=data, kernel=(3, 3), stride=(2, 2), num_filter=256) 47 | net = mx.sym.BatchNorm(data=net, fix_gamma=False, eps=2e-5, name='_bn9') 48 | net = mx.sym.LeakyReLU(data=net, act_type='prelu', name='_preul9') 49 | 50 | flatten = mx.sym.flatten(data=net) 51 | # MNIST has 10 classes 52 | fc3 = mx.sym.FullyConnected(data=flatten, num_hidden=10) 53 | # Softmax with cross entropy loss 54 | mlp = mx.sym.SoftmaxOutput(data=fc3, name='softmax') 55 | 56 | import logging 57 | 58 | logging.getLogger().setLevel(logging.DEBUG) # logging to stdout 59 | # create a trainable module on GPU 60 | mlp_model = mx.mod.Module(symbol=mlp, context=mx.gpu()) 61 | mlp_model.fit(train_iter, # train data 62 | optimizer='sgd', # use SGD to train 63 | optimizer_params={'learning_rate': 0.1}, # use fixed learning rate 64 | eval_metric='acc', # report accuracy during training 65 | batch_end_callback=mx.callback.Speedometer(batch_size, 100), 66 | # output progress for each 100 data batches 67 | num_epoch=10) # train for at most 10 dataset passes -------------------------------------------------------------------------------- /test/benchmark/resnet_slim_benchmark.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import tensorflow.contrib.slim.nets as nets 3 | import numpy as np 4 | 5 | 6 | slim = tf.contrib.slim 7 | resnet = nets.resnet_v1 8 | 9 | if __name__ == '__main__': 10 | output_shape = 85164 11 | batch_size = 64 12 | image = tf.placeholder(name='input_x', shape=[None, 224, 224, 3], dtype=tf.float32) 13 | labels = tf.placeholder(name='input_label', shape=[None, output_shape], dtype=tf.float32) 14 | with slim.arg_scope(nets.resnet_utils.resnet_arg_scope()): 15 | resnet_50, end_points = resnet.resnet_v1_50(inputs=image, num_classes=output_shape, scope='resnet_v1_50') 16 | prob = tf.squeeze(resnet_50, axis=[1, 2]) 17 | probabilities = tf.reduce_mean(tf.nn.softmax(prob, dim=-1)) 18 | losses = tf.norm(tf.subtract(probabilities, labels)) 19 | train_op = tf.train.AdamOptimizer(learning_rate=0.0001).minimize(losses) 20 | sess = tf.Session() 21 | saver = tf.train.Saver() 22 | sess.run(tf.global_variables_initializer()) 23 | while True: 24 | datasets = np.random.randn(batch_size, 224, 224, 3).astype(np.float32) 25 | datasets_labels = np.random.randn(batch_size, output_shape).astype(np.float32) 26 | losses_val, _ = sess.run([losses, train_op], feed_dict={image: datasets, labels: datasets_labels}) 27 | print(losses_val) -------------------------------------------------------------------------------- /test/benchmark/resnet_tl_benchmark.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import tensorflow.contrib.slim.nets as nets 3 | import numpy as np 4 | from nets.resnet import get_resnet 5 | 6 | 7 | slim = tf.contrib.slim 8 | resnet = nets.resnet_v1 9 | 10 | if __name__ == '__main__': 11 | output_shape = 85164 12 | batch_size = 128 13 | image = tf.placeholder(name='input_x', shape=[None, 224, 224, 3], dtype=tf.float32) 14 | labels = tf.placeholder(name='input_label', shape=[None, output_shape], dtype=tf.float32) 15 | with slim.arg_scope(nets.resnet_utils.resnet_arg_scope()): 16 | nets = get_resnet(image, output_shape, 50, type='resnet', sess=None, pretrained=False) 17 | print(nets.outputs) 18 | probabilities = tf.reduce_mean(tf.nn.softmax(nets.outputs, dim=-1)) 19 | print(probabilities) 20 | losses = tf.norm(tf.subtract(probabilities, labels)) 21 | train_op = tf.train.AdamOptimizer(learning_rate=0.0001).minimize(losses) 22 | sess = tf.Session() 23 | saver = tf.train.Saver() 24 | sess.run(tf.global_variables_initializer()) 25 | while True: 26 | datasets = np.random.randn(batch_size, 224, 224, 3).astype(np.float32) 27 | datasets_labels = np.random.randn(batch_size, output_shape).astype(np.float32) 28 | losses_val, _ = sess.run([losses, train_op], feed_dict={image: datasets, labels: datasets_labels}) 29 | print(losses_val) -------------------------------------------------------------------------------- /test/benchmark/tensorlayer_batchsize_test.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import tensorlayer as tl 3 | import os 4 | 5 | 6 | def inference(x): 7 | w_init_method = tf.contrib.layers.xavier_initializer(uniform=True) 8 | # define the network 9 | network = tl.layers.InputLayer(x, name='input') 10 | network = tl.layers.Conv2d(network, n_filter=64, filter_size=(3, 3), strides=(1, 1), padding='SAME', act=None, 11 | W_init=w_init_method, name='conv1_1') 12 | network = tl.layers.BatchNormLayer(network, act=tf.identity, is_train=True, name='bn1') 13 | network = tl.layers.PReluLayer(network, name='prelu1') 14 | network = tl.layers.Conv2d(network, n_filter=64, filter_size=(3, 3), strides=(1, 1), padding='SAME', act=None, 15 | W_init=w_init_method, name='conv1_2') 16 | network = tl.layers.BatchNormLayer(network, act=tf.identity, is_train=True, name='bn2') 17 | network = tl.layers.PReluLayer(network, name='prelu2') 18 | network = tl.layers.Conv2d(network, n_filter=64, filter_size=(3, 3), strides=(2, 2), padding='SAME', act=None, 19 | W_init=w_init_method, name='conv1_3') 20 | network = tl.layers.BatchNormLayer(network, act=tf.identity, is_train=True, name='bn3') 21 | network = tl.layers.PReluLayer(network, name='prelu3') 22 | 23 | network = tl.layers.Conv2d(network, n_filter=128, filter_size=(3, 3), strides=(1, 1), padding='SAME', act=None, 24 | W_init=w_init_method, name='conv2_1') 25 | network = tl.layers.BatchNormLayer(network, act=tf.identity, is_train=True, name='bn4') 26 | network = tl.layers.PReluLayer(network, name='prelu4') 27 | 28 | network = tl.layers.Conv2d(network, n_filter=128, filter_size=(3, 3), strides=(1, 1), padding='SAME', act=None, 29 | W_init=w_init_method, name='conv2_2') 30 | network = tl.layers.BatchNormLayer(network, act=tf.identity, is_train=True, name='bn5') 31 | network = tl.layers.PReluLayer(network, name='prelu5') 32 | network = tl.layers.Conv2d(network, n_filter=128, filter_size=(3, 3), strides=(2, 2), padding='SAME', act=None, 33 | W_init=w_init_method, name='conv2_3') 34 | network = tl.layers.BatchNormLayer(network, act=tf.identity, is_train=True, name='bn6') 35 | network = tl.layers.PReluLayer(network, name='prelu6') 36 | 37 | network = tl.layers.Conv2d(network, n_filter=256, filter_size=(3, 3), strides=(1, 1), padding='SAME', act=None, 38 | W_init=w_init_method, name='conv3_1') 39 | network = tl.layers.BatchNormLayer(network, act=tf.identity, is_train=True, name='bn7') 40 | network = tl.layers.PReluLayer(network, name='prelu7') 41 | network = tl.layers.Conv2d(network, n_filter=256, filter_size=(3, 3), strides=(1, 1), padding='SAME', act=None, 42 | W_init=w_init_method, name='conv3_2') 43 | network = tl.layers.BatchNormLayer(network, act=tf.identity, is_train=True, name='bn8') 44 | network = tl.layers.PReluLayer(network, name='prelu8') 45 | network = tl.layers.Conv2d(network, n_filter=256, filter_size=(3, 3), strides=(2, 2), padding='SAME', act=None, 46 | W_init=w_init_method, name='conv3_3') 47 | network = tl.layers.BatchNormLayer(network, act=tf.identity, is_train=True, name='bn9') 48 | network = tl.layers.PReluLayer(network, name='prelu9') 49 | 50 | network = tl.layers.FlattenLayer(network, name='flatten') 51 | network = tl.layers.DenseLayer(network, 10) 52 | 53 | return network.outputs 54 | 55 | 56 | if __name__ == '__main__': 57 | # without bn prelu 8000< max batch size <9000 58 | # with bn only 5000< max batch size <6000 59 | # with prelu only 3000< max batch size <4000 60 | # with bn and prelu 2000< max batch size <3000 61 | os.environ["CUDA_VISIBLE_DEVICES"] = "0" 62 | batch_size = 2000 63 | n_epoch = 10 64 | # prepare data 65 | X_train, y_train, X_val, y_val, X_test, y_test = tl.files.load_mnist_dataset(shape=(-1, 28, 28, 1)) 66 | # define placeholder 67 | x = tf.placeholder(tf.float32, shape=[None, 28, 28, 1], name='x') 68 | y_ = tf.placeholder(tf.int64, shape=[None], name='y_') 69 | 70 | output = inference(x) 71 | cost = tl.cost.cross_entropy(output, y_, 'cost') 72 | train_op = tf.train.GradientDescentOptimizer(learning_rate=0.1).minimize(cost) 73 | 74 | sess = tf.Session() 75 | tl.layers.initialize_global_variables(sess) 76 | 77 | correct_prediction = tf.equal(tf.argmax(output, 1), y_) 78 | acc = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) 79 | 80 | for epoch in range(n_epoch): 81 | train_loss, train_acc, n_batch = 0, 0, 0 82 | for X_train_a, y_train_a in tl.iterate.minibatches(X_train, y_train, batch_size, shuffle=True): 83 | feed_dict = {x: X_train_a, y_: y_train_a} 84 | _, err, ac = sess.run([train_op, cost, acc], feed_dict=feed_dict) 85 | train_loss += err 86 | train_acc += ac 87 | n_batch += 1 88 | print("epoch %d, train acc: %f" % (epoch, (train_acc / n_batch))) -------------------------------------------------------------------------------- /test/benchmark/utils_final.py: -------------------------------------------------------------------------------- 1 | from math import exp 2 | from mxnet import gluon 3 | from mxnet import autograd 4 | from mxnet import nd 5 | from mxnet import image 6 | from mxnet.gluon import nn 7 | import mxnet as mx 8 | import numpy as np 9 | from time import time 10 | import matplotlib.pyplot as plt 11 | import random 12 | 13 | 14 | class DataLoader(object): 15 | """similiar to gluon.data.DataLoader, but might be faster. 16 | 17 | The main difference this data loader tries to read more exmaples each 18 | time. But the limits are 1) all examples in dataset have the same shape, 2) 19 | data transfomer needs to process multiple examples at each time 20 | """ 21 | 22 | def __init__(self, dataset, batch_size, shuffle, transform=None): 23 | self.dataset = dataset 24 | self.batch_size = batch_size 25 | self.shuffle = shuffle 26 | self.transform = transform 27 | 28 | def __iter__(self): 29 | data = self.dataset[:] 30 | X = data[0] 31 | y = nd.array(data[1]) 32 | n = X.shape[0] 33 | if self.shuffle: 34 | idx = np.arange(n) 35 | np.random.shuffle(idx) 36 | X = nd.array(X.asnumpy()[idx]) 37 | y = nd.array(y.asnumpy()[idx]) 38 | 39 | for i in range(n // self.batch_size): 40 | if self.transform is not None: 41 | yield self.transform(X[i * self.batch_size:(i + 1) * self.batch_size], 42 | y[i * self.batch_size:(i + 1) * self.batch_size]) 43 | else: 44 | yield (X[i * self.batch_size:(i + 1) * self.batch_size], 45 | y[i * self.batch_size:(i + 1) * self.batch_size]) 46 | 47 | def __len__(self): 48 | return len(self.dataset) // self.batch_size 49 | 50 | 51 | def load_data_fashion_mnist(batch_size, resize=None, root="~/.mxnet/datasets/fashion-mnist"): 52 | """download the fashion mnist dataest and then load into memory""" 53 | 54 | def transform_mnist(data, label): 55 | # Transform a batch of examples. 56 | if resize: 57 | n = data.shape[0] 58 | new_data = nd.zeros((n, resize, resize, data.shape[3])) 59 | for i in range(n): 60 | new_data[i] = image.imresize(data[i], resize, resize) 61 | data = new_data 62 | # change data from batch x height x width x channel to batch x channel x height x width 63 | return nd.transpose(data.astype('float32'), (0, 3, 1, 2)) / 255, label.astype('float32') 64 | 65 | mnist_train = gluon.data.vision.FashionMNIST(root=root, train=True, transform=None) 66 | mnist_test = gluon.data.vision.FashionMNIST(root=root, train=False, transform=None) 67 | # Transform later to avoid memory explosion. 68 | train_data = DataLoader(mnist_train, batch_size, shuffle=True, transform=transform_mnist) 69 | test_data = DataLoader(mnist_test, batch_size, shuffle=False, transform=transform_mnist) 70 | return (train_data, test_data) 71 | 72 | 73 | def load_data_mnist(batch_size, resize=None, root="~/.mxnet/datasets/mnist"): 74 | """download the fashion mnist dataest and then load into memory""" 75 | 76 | def transform_mnist(data, label): 77 | # Transform a batch of examples. 78 | if resize: 79 | n = data.shape[0] 80 | new_data = nd.zeros((n, resize, resize, data.shape[3])) 81 | for i in range(n): 82 | new_data[i] = image.imresize(data[i], resize, resize) 83 | data = new_data 84 | # change data from batch x height x width x channel to batch x channel x height x width 85 | return nd.transpose(data.astype('float32'), (0, 3, 1, 2)) / 255, label.astype('float32') 86 | 87 | mnist_train = gluon.data.vision.MNIST(root=root, train=True, transform=None) 88 | mnist_test = gluon.data.vision.MNIST(root=root, train=False, transform=None) 89 | # Transform later to avoid memory explosion. 90 | train_data = DataLoader(mnist_train, batch_size, shuffle=True, transform=transform_mnist) 91 | test_data = DataLoader(mnist_test, batch_size, shuffle=False, transform=transform_mnist) 92 | return (train_data, test_data) 93 | 94 | 95 | def try_gpu(): 96 | """If GPU is available, return mx.gpu(0); else return mx.cpu()""" 97 | try: 98 | ctx = mx.gpu() 99 | _ = nd.array([0], ctx=ctx) 100 | except: 101 | ctx = mx.cpu() 102 | return ctx 103 | 104 | 105 | def try_all_gpus(): 106 | """Return all available GPUs, or [mx.gpu()] if there is no GPU""" 107 | ctx_list = [] 108 | try: 109 | for i in range(16): 110 | ctx = mx.gpu(i) 111 | _ = nd.array([0], ctx=ctx) 112 | ctx_list.append(ctx) 113 | except: 114 | pass 115 | if not ctx_list: 116 | ctx_list = [mx.cpu()] 117 | return ctx_list 118 | 119 | 120 | def SGD(params, lr): 121 | for param in params: 122 | param[:] = param - lr * param.grad 123 | 124 | 125 | def accuracy(output, label): 126 | return nd.mean(output.argmax(axis=1) == label).asscalar() 127 | 128 | 129 | def _get_batch(batch, ctx): 130 | """return data and label on ctx""" 131 | if isinstance(batch, mx.io.DataBatch): 132 | data = batch.data[0] 133 | label = batch.label[0] 134 | else: 135 | data, label = batch 136 | return (gluon.utils.split_and_load(data, ctx), 137 | gluon.utils.split_and_load(label, ctx), 138 | data.shape[0]) 139 | 140 | 141 | def evaluate_accuracy(data_iterator, net, ctx=[mx.cpu()]): 142 | if isinstance(ctx, mx.Context): 143 | ctx = [ctx] 144 | acc = nd.array([0]) 145 | n = 0. 146 | if isinstance(data_iterator, mx.io.MXDataIter): 147 | data_iterator.reset() 148 | for batch in data_iterator: 149 | data, label, batch_size = _get_batch(batch, ctx) 150 | for X, y in zip(data, label): 151 | acc += nd.sum(net(X).argmax(axis=1) == y).copyto(mx.cpu()) 152 | n += y.size 153 | acc.wait_to_read() # don't push too many operators into backend 154 | return acc.asscalar() / n 155 | 156 | 157 | def train(train_data, test_data, net, loss, trainer, ctx, num_epochs, print_batches=None): 158 | """Train a network""" 159 | print("Start training on ", ctx) 160 | if isinstance(ctx, mx.Context): 161 | ctx = [ctx] 162 | for epoch in range(num_epochs): 163 | train_loss, train_acc, n, m = 0.0, 0.0, 0.0, 0.0 164 | if isinstance(train_data, mx.io.MXDataIter): 165 | train_data.reset() 166 | start = time() 167 | for i, batch in enumerate(train_data): 168 | data, label, batch_size = _get_batch(batch, ctx) 169 | losses = [] 170 | with autograd.record(): 171 | outputs = [net(X) for X in data] 172 | losses = [loss(yhat, y) for yhat, y in zip(outputs, label)] 173 | for l in losses: 174 | l.backward() 175 | train_acc += sum([(yhat.argmax(axis=1) == y).sum().asscalar() 176 | for yhat, y in zip(outputs, label)]) 177 | train_loss += sum([l.sum().asscalar() for l in losses]) 178 | trainer.step(batch_size) 179 | n += batch_size 180 | m += sum([y.size for y in label]) 181 | if print_batches and (i + 1) % print_batches == 0: 182 | print("Batch %d. Loss: %f, Train acc %f" % ( 183 | n, train_loss / n, train_acc / m 184 | )) 185 | 186 | test_acc = evaluate_accuracy(test_data, net, ctx) 187 | print("Epoch %d. Loss: %.3f, Train acc %.2f, Test acc %.2f, Time %.1f sec" % ( 188 | epoch, train_loss / n, train_acc / m, test_acc, time() - start 189 | )) 190 | 191 | 192 | class Residual(nn.HybridBlock): 193 | def __init__(self, channels, same_shape=True, **kwargs): 194 | super(Residual, self).__init__(**kwargs) 195 | self.same_shape = same_shape 196 | with self.name_scope(): 197 | strides = 1 if same_shape else 2 198 | self.conv1 = nn.Conv2D(channels, kernel_size=3, padding=1, 199 | strides=strides) 200 | self.bn1 = nn.BatchNorm() 201 | self.conv2 = nn.Conv2D(channels, kernel_size=3, padding=1) 202 | self.bn2 = nn.BatchNorm() 203 | if not same_shape: 204 | self.conv3 = nn.Conv2D(channels, kernel_size=1, 205 | strides=strides) 206 | 207 | def hybrid_forward(self, F, x): 208 | out = F.relu(self.bn1(self.conv1(x))) 209 | out = self.bn2(self.conv2(out)) 210 | if not self.same_shape: 211 | x = self.conv3(x) 212 | return F.relu(out + x) 213 | 214 | 215 | def resnet18(num_classes): 216 | net = nn.HybridSequential() 217 | with net.name_scope(): 218 | net.add( 219 | nn.BatchNorm(), 220 | nn.Conv2D(64, kernel_size=3, strides=1), 221 | nn.MaxPool2D(pool_size=3, strides=2), 222 | Residual(64), 223 | Residual(64), 224 | Residual(128, same_shape=False), 225 | Residual(128), 226 | Residual(256, same_shape=False), 227 | Residual(256), 228 | nn.GlobalAvgPool2D(), 229 | nn.Dense(num_classes) 230 | ) 231 | return net 232 | 233 | 234 | def show_images(imgs, nrows, ncols, figsize=None): 235 | """plot a list of images""" 236 | if not figsize: 237 | figsize = (ncols, nrows) 238 | _, figs = plt.subplots(nrows, ncols, figsize=figsize) 239 | for i in range(nrows): 240 | for j in range(ncols): 241 | figs[i][j].imshow(imgs[i * ncols + j].asnumpy()) 242 | figs[i][j].axes.get_xaxis().set_visible(False) 243 | figs[i][j].axes.get_yaxis().set_visible(False) 244 | plt.show() 245 | 246 | 247 | def data_iter_random(corpus_indices, batch_size, num_steps, ctx=None): 248 | """Sample mini-batches in a random order from sequential data.""" 249 | # Subtract 1 because label indices are corresponding input indices + 1. 250 | num_examples = (len(corpus_indices) - 1) // num_steps 251 | epoch_size = num_examples // batch_size 252 | # Randomize samples. 253 | example_indices = list(range(num_examples)) 254 | random.shuffle(example_indices) 255 | 256 | def _data(pos): 257 | return corpus_indices[pos: pos + num_steps] 258 | 259 | for i in range(epoch_size): 260 | # Read batch_size random samples each time. 261 | i = i * batch_size 262 | batch_indices = example_indices[i: i + batch_size] 263 | data = nd.array( 264 | [_data(j * num_steps) for j in batch_indices], ctx=ctx) 265 | label = nd.array( 266 | [_data(j * num_steps + 1) for j in batch_indices], ctx=ctx) 267 | yield data, label 268 | 269 | 270 | def data_iter_consecutive(corpus_indices, batch_size, num_steps, ctx=None): 271 | """Sample mini-batches in a consecutive order from sequential data.""" 272 | corpus_indices = nd.array(corpus_indices, ctx=ctx) 273 | data_len = len(corpus_indices) 274 | batch_len = data_len // batch_size 275 | 276 | indices = corpus_indices[0: batch_size * batch_len].reshape(( 277 | batch_size, batch_len)) 278 | # Subtract 1 because label indices are corresponding input indices + 1. 279 | epoch_size = (batch_len - 1) // num_steps 280 | 281 | for i in range(epoch_size): 282 | i = i * num_steps 283 | data = indices[:, i: i + num_steps] 284 | label = indices[:, i + 1: i + num_steps + 1] 285 | yield data, label 286 | 287 | 288 | def grad_clipping(params, clipping_norm, ctx): 289 | """Gradient clipping.""" 290 | if clipping_norm is not None: 291 | norm = nd.array([0.0], ctx) 292 | for p in params: 293 | norm += nd.sum(p.grad ** 2) 294 | norm = nd.sqrt(norm).asscalar() 295 | if norm > clipping_norm: 296 | for p in params: 297 | p.grad[:] *= clipping_norm / norm 298 | 299 | 300 | def predict_rnn(rnn, prefix, num_chars, params, hidden_dim, ctx, idx_to_char, 301 | char_to_idx, get_inputs, is_lstm=False): 302 | """Predict the next chars given the prefix.""" 303 | prefix = prefix.lower() 304 | state_h = nd.zeros(shape=(1, hidden_dim), ctx=ctx) 305 | if is_lstm: 306 | state_c = nd.zeros(shape=(1, hidden_dim), ctx=ctx) 307 | output = [char_to_idx[prefix[0]]] 308 | for i in range(num_chars + len(prefix)): 309 | X = nd.array([output[-1]], ctx=ctx) 310 | if is_lstm: 311 | Y, state_h, state_c = rnn(get_inputs(X), state_h, state_c, *params) 312 | else: 313 | Y, state_h = rnn(get_inputs(X), state_h, *params) 314 | if i < len(prefix) - 1: 315 | next_input = char_to_idx[prefix[i + 1]] 316 | else: 317 | next_input = int(Y[0].argmax(axis=1).asscalar()) 318 | output.append(next_input) 319 | return ''.join([idx_to_char[i] for i in output]) 320 | 321 | 322 | def train_and_predict_rnn(rnn, is_random_iter, epochs, num_steps, hidden_dim, 323 | learning_rate, clipping_norm, batch_size, 324 | pred_period, pred_len, seqs, get_params, get_inputs, 325 | ctx, corpus_indices, idx_to_char, char_to_idx, 326 | is_lstm=False): 327 | """Train an RNN model and predict the next item in the sequence.""" 328 | if is_random_iter: 329 | data_iter = data_iter_random 330 | else: 331 | data_iter = data_iter_consecutive 332 | params = get_params() 333 | 334 | softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss() 335 | 336 | for e in range(1, epochs + 1): 337 | # If consecutive sampling is used, in the same epoch, the hidden state 338 | # is initialized only at the beginning of the epoch. 339 | if not is_random_iter: 340 | state_h = nd.zeros(shape=(batch_size, hidden_dim), ctx=ctx) 341 | if is_lstm: 342 | state_c = nd.zeros(shape=(batch_size, hidden_dim), ctx=ctx) 343 | train_loss, num_examples = 0, 0 344 | for data, label in data_iter(corpus_indices, batch_size, num_steps, 345 | ctx): 346 | # If random sampling is used, the hidden state has to be 347 | # initialized for each mini-batch. 348 | if is_random_iter: 349 | state_h = nd.zeros(shape=(batch_size, hidden_dim), ctx=ctx) 350 | if is_lstm: 351 | state_c = nd.zeros(shape=(batch_size, hidden_dim), ctx=ctx) 352 | with autograd.record(): 353 | # outputs shape: (batch_size, vocab_size) 354 | if is_lstm: 355 | outputs, state_h, state_c = rnn(get_inputs(data), state_h, 356 | state_c, *params) 357 | else: 358 | outputs, state_h = rnn(get_inputs(data), state_h, *params) 359 | # Let t_ib_j be the j-th element of the mini-batch at time i. 360 | # label shape: (batch_size * num_steps) 361 | # label = [t_0b_0, t_0b_1, ..., t_1b_0, t_1b_1, ..., ]. 362 | label = label.T.reshape((-1,)) 363 | # Concatenate outputs: 364 | # shape: (batch_size * num_steps, vocab_size). 365 | outputs = nd.concat(*outputs, dim=0) 366 | # Now outputs and label are aligned. 367 | loss = softmax_cross_entropy(outputs, label) 368 | loss.backward() 369 | 370 | grad_clipping(params, clipping_norm, ctx) 371 | SGD(params, learning_rate) 372 | 373 | train_loss += nd.sum(loss).asscalar() 374 | num_examples += loss.size 375 | 376 | if e % pred_period == 0: 377 | print("Epoch %d. Training perplexity %f" % (e, 378 | exp(train_loss / num_examples))) 379 | for seq in seqs: 380 | print(' - ', predict_rnn(rnn, seq, pred_len, params, 381 | hidden_dim, ctx, idx_to_char, char_to_idx, get_inputs, 382 | is_lstm)) 383 | print() 384 | 385 | -------------------------------------------------------------------------------- /test/benchmark/vgg19_slim_benchmark.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import tensorflow.contrib.slim.nets as nets 3 | import numpy as np 4 | 5 | slim = tf.contrib.slim 6 | 7 | if __name__ == '__main__': 8 | output_shape = 1000 9 | batch_size = 128 10 | image = tf.placeholder(name='input_x', shape=[None, 224, 224, 3], dtype=tf.float32) 11 | labels = tf.placeholder(name='input_label', shape=[None, output_shape], dtype=tf.float32) 12 | with slim.arg_scope(nets.vgg.vgg_arg_scope()): 13 | vgg_19, end_points = nets.vgg.vgg_19(inputs=image, num_classes=output_shape, scope='vgg_19') 14 | probabilities = tf.reduce_mean(tf.nn.softmax(vgg_19, dim=-1)) 15 | losses = tf.norm(tf.subtract(probabilities, labels)) 16 | train_op = tf.train.AdamOptimizer(learning_rate=0.0001).minimize(losses) 17 | sess = tf.Session() 18 | saver = tf.train.Saver() 19 | sess.run(tf.global_variables_initializer()) 20 | while True: 21 | datasets = np.random.randn(batch_size, 224, 224, 3).astype(np.float32) 22 | datasets_labels = np.random.randn(batch_size, output_shape).astype(np.float32) 23 | losses_val, _ = sess.run([losses, train_op], feed_dict={image: datasets, labels: datasets_labels}) 24 | print(losses_val) -------------------------------------------------------------------------------- /test/benchmark/vgg19_tl_benchmark.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from nets.vgg19 import get_vgg19 3 | import numpy as np 4 | 5 | 6 | if __name__ == '__main__': 7 | sess = tf.Session() 8 | x = tf.placeholder(name="inputs_x", shape=[None, 224, 224, 3], dtype=tf.float32) 9 | y = tf.placeholder(name='inputs_y', shape=[None, 1000], dtype=tf.float32) 10 | network = get_vgg19(x, sess, pretrained=False) 11 | outputs_y = network.outputs 12 | probs = tf.nn.softmax(outputs_y, name="prob") 13 | loss = tf.reduce_mean(tf.subtract(probs, y)) 14 | 15 | while True: 16 | batch_size = 128 17 | datasets_x = np.random.randn(batch_size, 224, 224, 3).astype(np.float32) 18 | datasets_y = np.random.randn(batch_size, 1000).astype(np.float32) 19 | feed_dict = {x: datasets_x, y: datasets_y} 20 | loss_val = sess.run(loss, feed_dict=feed_dict) 21 | print('batch size %d, loss value is %.2f' % (batch_size, loss_val)) 22 | -------------------------------------------------------------------------------- /test/memory_usage_test.py: -------------------------------------------------------------------------------- 1 | import mxnet as mx 2 | import argparse 3 | import PIL.Image 4 | import io 5 | import numpy as np 6 | import cv2 7 | import tensorflow as tf 8 | import os 9 | import sys 10 | 11 | 12 | def parse_args(): 13 | parser = argparse.ArgumentParser( 14 | formatter_class=argparse.ArgumentDefaultsHelpFormatter, 15 | description='data path information' 16 | ) 17 | parser.add_argument('--bin_path', default='../datasets/faces_ms1m_112x112/train.rec', type=str, 18 | help='path to the binary image file') 19 | parser.add_argument('--idx_path', default='../datasets/faces_ms1m_112x112/train.idx', type=str, 20 | help='path to the image index path') 21 | parser.add_argument('--tfrecords_file_path', default='../datasets/tfrecords', type=str, 22 | help='path to the output of tfrecords file path') 23 | args = parser.parse_args() 24 | return args 25 | 26 | 27 | def mx2tfrecords_mem_test(imgidx, imgrec, args): 28 | output_path = os.path.join(args.tfrecords_file_path, 'tran.tfrecords') 29 | writer = tf.python_io.TFRecordWriter(output_path) 30 | for i in imgidx: 31 | img_info = imgrec.read_idx(i) 32 | header, img = mx.recordio.unpack(img_info) 33 | print(type(img)) 34 | print(img) 35 | print(sys.getsizeof(img)) 36 | print('#####################') 37 | img_mx = mx.image.imdecode(img) 38 | print(type(img_mx)) 39 | print(sys.getsizeof(img_mx)) 40 | print(img_mx.size) 41 | print(img_mx.dtype) 42 | print(img_mx.context) 43 | print(img_mx.stype) 44 | print(img_mx) 45 | print('#####################') 46 | img_mx_np = img_mx.asnumpy() 47 | print(type(img_mx_np)) 48 | print(sys.getsizeof(img_mx_np)) 49 | print('#####################') 50 | back_mx_ndarray = mx.nd.array(img_mx_np) 51 | print(type(back_mx_ndarray)) 52 | print(sys.getsizeof(back_mx_ndarray)) 53 | encoded_jpg_io = io.BytesIO(img) 54 | print(sys.getsizeof(encoded_jpg_io)) 55 | image = PIL.Image.open(encoded_jpg_io) 56 | np_img = np.array(image) 57 | img = cv2.cvtColor(np_img, cv2.COLOR_RGB2BGR) 58 | print(sys.getsizeof(img)) 59 | print('#####################') 60 | img_raw = img.tobytes() 61 | print(sys.getsizeof(img)) 62 | print('#####################') 63 | writer.close() 64 | 65 | 66 | def mx2tfrecords(imgidx, imgrec, args): 67 | output_path = os.path.join(args.tfrecords_file_path, 'tran.tfrecords') 68 | writer = tf.python_io.TFRecordWriter(output_path) 69 | for i in imgidx: 70 | img_info = imgrec.read_idx(i) 71 | header, img = mx.recordio.unpack(img_info) 72 | # encoded_jpg_io = io.BytesIO(img) 73 | # image = PIL.Image.open(encoded_jpg_io) 74 | # np_img = np.array(image) 75 | # img = cv2.cvtColor(np_img, cv2.COLOR_RGB2BGR) 76 | # img_raw = img.tobytes() 77 | # images = tf.image.decode_jpeg(img) 78 | # images = tf.reshape(images, shape=(112, 112, 3)) 79 | # r, g, b = tf.split(images, num_or_size_splits=3, axis=-1) 80 | # images = tf.concat([b, g, r], axis=-1) 81 | # sess = tf.Session() 82 | # np_images = sess.run(images) 83 | # print(images.shape) 84 | # print(type(np_images)) 85 | # print(sys.getsizeof(np_images)) 86 | # cv2.imshow('test', np_images) 87 | # cv2.waitKey(0) 88 | label = int(header.label) 89 | example = tf.train.Example(features=tf.train.Features(feature={ 90 | 'image_raw': tf.train.Feature(bytes_list=tf.train.BytesList(value=[img])), 91 | "label": tf.train.Feature(int64_list=tf.train.Int64List(value=[label])) 92 | })) 93 | writer.write(example.SerializeToString()) # Serialize To String 94 | if i % 10000 == 0: 95 | print('%d num image processed' % i) 96 | writer.close() 97 | 98 | 99 | if __name__ == '__main__': 100 | # define parameters 101 | id2range = {} 102 | data_shape = (3, 112, 112) 103 | args = parse_args() 104 | imgrec = mx.recordio.MXIndexedRecordIO(args.idx_path, args.bin_path, 'r') 105 | s = imgrec.read_idx(0) 106 | header, _ = mx.recordio.unpack(s) 107 | print(header.label) 108 | imgidx = list(range(1, int(header.label[0]))) 109 | seq_identity = range(int(header.label[0]), int(header.label[1])) 110 | for identity in seq_identity: 111 | s = imgrec.read_idx(identity) 112 | header, _ = mx.recordio.unpack(s) 113 | a, b = int(header.label[0]), int(header.label[1]) 114 | id2range[identity] = (a, b) 115 | print('id2range', len(id2range)) 116 | 117 | # generate tfrecords 118 | mx2tfrecords_mem_test(imgidx, imgrec, args) 119 | 120 | 121 | 122 | 123 | -------------------------------------------------------------------------------- /test/multiple_gpu_test/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/auroua/InsightFace_TF/6ffe4296460bdfea56f91521db6d6412a89249d9/test/multiple_gpu_test/__init__.py -------------------------------------------------------------------------------- /test/multiple_gpu_test/test_mgpu_mnist.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import tensorlayer as tl 3 | import os 4 | 5 | Layer = tl.layers.Layer 6 | D_TYPE = tf.float32 7 | 8 | class DenseLayer(Layer): 9 | """ 10 | The :class:`DenseLayer` class is a fully connected layer. 11 | 12 | Parameters 13 | ---------- 14 | layer : a :class:`Layer` instance 15 | The `Layer` class feeding into this layer. 16 | n_units : int 17 | The number of units of the layer. 18 | act : activation function 19 | The function that is applied to the layer activations. 20 | W_init : weights initializer 21 | The initializer for initializing the weight matrix. 22 | b_init : biases initializer or None 23 | The initializer for initializing the bias vector. If None, skip biases. 24 | W_init_args : dictionary 25 | The arguments for the weights tf.get_variable. 26 | b_init_args : dictionary 27 | The arguments for the biases tf.get_variable. 28 | name : a string or None 29 | An optional name to attach to this layer. 30 | 31 | Examples 32 | -------- 33 | >>> network = tl.layers.InputLayer(x, name='input_layer') 34 | >>> network = tl.layers.DenseLayer( 35 | ... network, 36 | ... n_units=800, 37 | ... act = tf.nn.relu, 38 | ... W_init=tf.truncated_normal_initializer(stddev=0.1), 39 | ... name ='relu_layer' 40 | ... ) 41 | 42 | >>> Without TensorLayer, you can do as follow. 43 | >>> W = tf.Variable( 44 | ... tf.random_uniform([n_in, n_units], -1.0, 1.0), name='W') 45 | >>> b = tf.Variable(tf.zeros(shape=[n_units]), name='b') 46 | >>> y = tf.nn.relu(tf.matmul(inputs, W) + b) 47 | 48 | Notes 49 | ----- 50 | If the input to this layer has more than two axes, it need to flatten the 51 | input by using :class:`FlattenLayer` in this case. 52 | """ 53 | 54 | def __init__( 55 | self, 56 | layer=None, 57 | n_units=100, 58 | act=tf.identity, 59 | W_init=tf.truncated_normal_initializer(stddev=0.1), 60 | b_init=tf.constant_initializer(value=0.0), 61 | W_init_args={}, 62 | b_init_args={}, 63 | name='dense_layer', 64 | ): 65 | Layer.__init__(self, name=name) 66 | self.inputs = layer.outputs 67 | if self.inputs.get_shape().ndims != 2: 68 | raise Exception("The input dimension must be rank 2, please reshape or flatten it") 69 | 70 | n_in = int(self.inputs.get_shape()[-1]) 71 | self.n_units = n_units 72 | print(" [TL] DenseLayer %s: %d %s" % (self.name, self.n_units, act.__name__)) 73 | with tf.variable_scope(name) as vs: 74 | with tf.device('/cpu:0'): 75 | W = tf.get_variable(name='W', shape=(n_in, n_units), initializer=W_init, dtype=D_TYPE, **W_init_args) 76 | if b_init is not None: 77 | try: 78 | with tf.device('/cpu:0'): 79 | b = tf.get_variable(name='b', shape=(n_units), initializer=b_init, dtype=D_TYPE, **b_init_args) 80 | except: # If initializer is a constant, do not specify shape. 81 | with tf.device('/cpu:0'): 82 | b = tf.get_variable(name='b', initializer=b_init, dtype=D_TYPE, **b_init_args) 83 | self.outputs = act(tf.matmul(self.inputs, W) + b) 84 | else: 85 | self.outputs = act(tf.matmul(self.inputs, W)) 86 | 87 | # Hint : list(), dict() is pass by value (shallow), without them, it is 88 | # pass by reference. 89 | self.all_layers = list(layer.all_layers) 90 | self.all_params = list(layer.all_params) 91 | self.all_drop = dict(layer.all_drop) 92 | self.all_layers.extend([self.outputs]) 93 | if b_init is not None: 94 | self.all_params.extend([W, b]) 95 | else: 96 | self.all_params.extend([W]) 97 | 98 | 99 | def inference(x): 100 | network = tl.layers.InputLayer(x, name='input') 101 | network = tl.layers.DropoutLayer(network, keep=0.8, name='drop1') 102 | network = DenseLayer(network, n_units=800, act=tf.nn.relu, name='relu1') 103 | network = tl.layers.DropoutLayer(network, keep=0.5, name='drop2') 104 | network = DenseLayer(network, n_units=800, act=tf.nn.relu, name='relu2') 105 | network = tl.layers.DropoutLayer(network, keep=0.5, name='drop3') 106 | network = DenseLayer(network, n_units=10, act=tf.identity, name='output') 107 | y = network.outputs 108 | return y 109 | 110 | 111 | def load_data(): 112 | X_train, y_train, X_val, y_val, X_test, y_test = \ 113 | tl.files.load_mnist_dataset(shape=(-1, 784), path='/home/aurora/workspaces/data') 114 | print('X_train.shape', X_train.shape) 115 | print('y_train.shape', y_train.shape) 116 | print('X_val.shape', X_val.shape) 117 | print('y_val.shape', y_val.shape) 118 | print('X_test.shape', X_test.shape) 119 | print('y_test.shape', y_test.shape) 120 | print('X %s y %s' % (X_test.dtype, y_test.dtype)) 121 | return X_train, y_train 122 | 123 | 124 | def tower_losses(inputs, labels): 125 | logit = inference(inputs) 126 | loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logit, labels=labels, name='cross_entropy') 127 | return loss 128 | 129 | 130 | def average_gradients(tower_grads): 131 | """Calculate the average gradient for each shared variable across all towers. 132 | 133 | Note that this function provides a synchronization point across all towers. 134 | 135 | Args: 136 | tower_grads: List of lists of (gradient, variable) tuples. The outer list 137 | is over individual gradients. The inner list is over the gradient 138 | calculation for each tower. 139 | Returns: 140 | List of pairs of (gradient, variable) where the gradient has been averaged 141 | across all towers. 142 | """ 143 | average_grads = [] 144 | 145 | for grad_and_vars in zip(*tower_grads): 146 | # Note that each grad_and_vars looks like the following: 147 | # ((grad0_gpu0, var0_gpu0), ... , (grad0_gpuN, var0_gpuN)) 148 | grads = [] 149 | for g, g1 in grad_and_vars: 150 | # Add 0 dimension to the gradients to represent the tower. 151 | expanded_g = tf.expand_dims(g, 0) 152 | 153 | # Append on a 'tower' dimension which we will average over below. 154 | grads.append(expanded_g) 155 | 156 | # Average over the 'tower' dimension. 157 | grad = tf.concat(axis=0, values=grads) 158 | grad = tf.reduce_mean(grad, 0) 159 | 160 | # Keep in mind that the Variables are redundant because they are shared 161 | # across towers. So .. we will just return the first tower's pointer to 162 | # the Variable. 163 | v = grad_and_vars[0][1] 164 | grad_and_var = (grad, v) 165 | average_grads.append(grad_and_var) 166 | return average_grads 167 | 168 | 169 | def train(): 170 | with tf.Graph().as_default(), tf.device('/cpu:0'): 171 | global_step = tf.get_variable( 172 | 'global_step', [], 173 | initializer=tf.constant_initializer(0), trainable=False) 174 | # Decay the learning rate exponentially based on the number of steps. 175 | lr = tf.train.exponential_decay(0.01, 176 | global_step, 177 | 10000, 178 | 0.99, 179 | staircase=True) 180 | # Create an optimizer that performs gradient descent. 181 | opt = tf.train.GradientDescentOptimizer(lr) 182 | tower_grads = [] 183 | x = tf.placeholder(tf.float32, shape=[None, 784], name='x') 184 | y_ = tf.placeholder(tf.int64, shape=[None, ], name='y_') 185 | with tf.variable_scope(tf.get_variable_scope()): 186 | for i in range(1): 187 | with tf.device('/gpu:%d' % i): 188 | with tf.name_scope('%s_%d' % ('tower', i)) as scope: 189 | tl.layers.set_name_reuse(True) 190 | # Dequeues one batch for the GPU 191 | # Calculate the loss for one tower of the CIFAR model. This function 192 | # constructs the entire CIFAR model but shares the variables across 193 | # all towers. 194 | summaries = tf.get_collection(tf.GraphKeys.SUMMARIES, scope) 195 | loss = tower_losses(x, y_) 196 | # Reuse variables for the next tower. 197 | tf.get_variable_scope().reuse_variables() 198 | # Calculate the gradients for the batch of data on this CIFAR tower. 199 | grads = opt.compute_gradients(loss) 200 | # Keep track of the gradients across all towers. 201 | tower_grads.append(grads) 202 | # We must calculate the mean of each gradient. Note that this is the 203 | # synchronization point across all towers. 204 | grads = average_gradients(tower_grads) 205 | apply_gradient_op = opt.apply_gradients(grads, global_step=global_step) 206 | 207 | # Track the moving averages of all trainable variables. 208 | variable_averages = tf.train.ExponentialMovingAverage(0.9999, global_step) 209 | variables_averages_op = variable_averages.apply(tf.trainable_variables()) 210 | 211 | train_op = tf.group(apply_gradient_op, variables_averages_op) 212 | # Build an initialization operation to run below. 213 | init = tf.global_variables_initializer() 214 | sess = tf.Session(config=tf.ConfigProto( 215 | allow_soft_placement=True, 216 | log_device_placement=True)) 217 | sess.run(init) 218 | 219 | 220 | if __name__ == '__main__': 221 | os.environ["CUDA_VISIBLE_DEVICES"] = "1" 222 | train() 223 | 224 | -------------------------------------------------------------------------------- /test/multiple_gpu_test/test_tensorlayer.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import tensorlayer as tl 3 | 4 | Layer = tl.layers.Layer 5 | D_TYPE = tf.float32 6 | 7 | 8 | class DenseLayer(Layer): 9 | """ 10 | The :class:`DenseLayer` class is a fully connected layer. 11 | 12 | Parameters 13 | ---------- 14 | layer : a :class:`Layer` instance 15 | The `Layer` class feeding into this layer. 16 | n_units : int 17 | The number of units of the layer. 18 | act : activation function 19 | The function that is applied to the layer activations. 20 | W_init : weights initializer 21 | The initializer for initializing the weight matrix. 22 | b_init : biases initializer or None 23 | The initializer for initializing the bias vector. If None, skip biases. 24 | W_init_args : dictionary 25 | The arguments for the weights tf.get_variable. 26 | b_init_args : dictionary 27 | The arguments for the biases tf.get_variable. 28 | name : a string or None 29 | An optional name to attach to this layer. 30 | 31 | Examples 32 | -------- 33 | >>> network = tl.layers.InputLayer(x, name='input_layer') 34 | >>> network = tl.layers.DenseLayer( 35 | ... network, 36 | ... n_units=800, 37 | ... act = tf.nn.relu, 38 | ... W_init=tf.truncated_normal_initializer(stddev=0.1), 39 | ... name ='relu_layer' 40 | ... ) 41 | 42 | >>> Without TensorLayer, you can do as follow. 43 | >>> W = tf.Variable( 44 | ... tf.random_uniform([n_in, n_units], -1.0, 1.0), name='W') 45 | >>> b = tf.Variable(tf.zeros(shape=[n_units]), name='b') 46 | >>> y = tf.nn.relu(tf.matmul(inputs, W) + b) 47 | 48 | Notes 49 | ----- 50 | If the input to this layer has more than two axes, it need to flatten the 51 | input by using :class:`FlattenLayer` in this case. 52 | """ 53 | 54 | def __init__( 55 | self, 56 | layer=None, 57 | n_units=100, 58 | act=tf.identity, 59 | W_init=tf.truncated_normal_initializer(stddev=0.1), 60 | b_init=tf.constant_initializer(value=0.0), 61 | W_init_args={}, 62 | b_init_args={}, 63 | name='dense_layer', 64 | ): 65 | Layer.__init__(self, name=name) 66 | self.inputs = layer.outputs 67 | if self.inputs.get_shape().ndims != 2: 68 | raise Exception("The input dimension must be rank 2, please reshape or flatten it") 69 | 70 | n_in = int(self.inputs.get_shape()[-1]) 71 | self.n_units = n_units 72 | print(" [TL] DenseLayer %s: %d %s" % (self.name, self.n_units, act.__name__)) 73 | with tf.variable_scope(name) as vs: 74 | with tf.device('/cpu:0'): 75 | W = tf.get_variable(name='W', shape=(n_in, n_units), initializer=W_init, dtype=D_TYPE, **W_init_args) 76 | if b_init is not None: 77 | try: 78 | with tf.device('/cpu:0'): 79 | b = tf.get_variable(name='b', shape=(n_units), initializer=b_init, dtype=D_TYPE, **b_init_args) 80 | except: # If initializer is a constant, do not specify shape. 81 | with tf.device('/cpu:0'): 82 | b = tf.get_variable(name='b', initializer=b_init, dtype=D_TYPE, **b_init_args) 83 | self.outputs = act(tf.matmul(self.inputs, W) + b) 84 | else: 85 | self.outputs = act(tf.matmul(self.inputs, W)) 86 | 87 | # Hint : list(), dict() is pass by value (shallow), without them, it is 88 | # pass by reference. 89 | self.all_layers = list(layer.all_layers) 90 | self.all_params = list(layer.all_params) 91 | self.all_drop = dict(layer.all_drop) 92 | self.all_layers.extend([self.outputs]) 93 | if b_init is not None: 94 | self.all_params.extend([W, b]) 95 | else: 96 | self.all_params.extend([W]) 97 | 98 | 99 | def inference(): 100 | x = tf.placeholder(tf.float32, shape=[None, 784], name='x') 101 | network = tl.layers.InputLayer(x, name='input') 102 | network = tl.layers.DropoutLayer(network, keep=0.8, name='drop1') 103 | network = DenseLayer(network, n_units=800, act=tf.nn.relu, name='relu1') 104 | network = tl.layers.DropoutLayer(network, keep=0.5, name='drop2') 105 | network = DenseLayer(network, n_units=800, act=tf.nn.relu, name='relu2') 106 | network = tl.layers.DropoutLayer(network, keep=0.5, name='drop3') 107 | network = DenseLayer(network, n_units=10, act=tf.identity, name='output') 108 | return network 109 | 110 | 111 | if __name__ == '__main__': 112 | with tf.device('/gpu:0'): 113 | network = inference() 114 | network.print_layers() 115 | sess = tf.Session(config=tf.ConfigProto( 116 | allow_soft_placement=True, 117 | log_device_placement=True)) 118 | tl.layers.initialize_global_variables(sess) -------------------------------------------------------------------------------- /test/resnet_test_static.py: -------------------------------------------------------------------------------- 1 | from resnet import get_resnet 2 | import tensorflow as tf 3 | from nets_utils import get_tensor_static_val 4 | import numpy as np 5 | 6 | 7 | def resnet_diff_test(layers_num): 8 | ckpt_file_path = '../model_weights/resnet_v1_'+str(layers_num)+'.ckpt' 9 | x = tf.placeholder(dtype=tf.float32, shape=[1, 224, 224, 3], name='input_place') 10 | tfconfig = tf.ConfigProto(allow_soft_placement=True) 11 | sess = tf.Session(config=tfconfig) 12 | nets = get_resnet(x, 1000, layers_num, sess) 13 | ckpt_static = get_tensor_static_val(ckpt_file_path, all_tensors=True, all_tensor_names=True) 14 | 15 | print('###########'*30) 16 | vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES) 17 | 18 | total_count = 0 19 | mean_avg = 0.0 20 | median_avg = 0.0 21 | std_avg = 0.0 22 | 23 | for var in vars: 24 | var_name = var.op.name 25 | var_name_new = var_name 26 | if '_bn' in var_name: 27 | var_name_new = var_name_new.replace('_bn', '') 28 | if 'W_conv2d' in var_name: 29 | var_name_new = var_name_new.replace('W_conv2d', 'weights') 30 | if 'b_conv2d' in var_name: 31 | var_name_new = var_name_new.replace('b_conv2d', 'biases') 32 | if 'shortcut_conv' in var_name: 33 | var_name_new = var_name_new.replace('shortcut_conv', 'shortcut') 34 | 35 | if var_name_new in ckpt_static: 36 | print(var_name_new, end=', ') 37 | total_count += 1 38 | ckpt_s = ckpt_static[var_name_new] 39 | var_val = sess.run(var) 40 | mean_diff = np.mean(var_val) - ckpt_s.mean 41 | mean_avg += mean_diff 42 | median_diff = np.median(var_val) - ckpt_s.median 43 | median_avg += median_diff 44 | std_diff = np.std(var_val) - ckpt_s.std 45 | std_avg += std_diff 46 | print('mean_diff: ', mean_diff, 'median_diff: ', median_diff, 'std_diff: ', std_diff) 47 | 48 | print('total_mean_diff', mean_avg/total_count, 'total_mean_diff', median_avg/total_count, 49 | 'total_std_diff', std_avg/total_count) 50 | 51 | 52 | if __name__ == '__main__': 53 | with tf.device('/device:GPU:1'): 54 | resnet_diff_test(50) 55 | -------------------------------------------------------------------------------- /test/test_losses.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np 3 | from losses.face_losses import cosineface_losses 4 | import mxnet as mx 5 | import math 6 | 7 | 8 | def arcface_loss_val(embedding, labels, weights, out_num, s=64., m=0.5): 9 | ''' 10 | :param embedding: the input embedding vectors 11 | :param labels: the input labels, the shape should be eg: (batch_size, 1) 12 | :param s: scalar value default is 64 13 | :param out_num: output class num 14 | :param m: the margin value, default is 0.5 15 | :return: the final cacualted output, this output is send into the tf.nn.softmax directly 16 | ''' 17 | cos_m = math.cos(m) 18 | sin_m = math.sin(m) 19 | mm = sin_m * m * s 20 | threshold = math.cos(math.pi - m) 21 | with tf.variable_scope('arcface_loss'): 22 | # inputs and weights norm 23 | embedding_norm = tf.norm(embedding, axis=1, keep_dims=True) 24 | embedding = tf.div(embedding, embedding_norm, name='norm_embedding') 25 | weights_norm = tf.norm(weights, axis=0, keep_dims=True) 26 | weights = tf.div(weights, weights_norm, name='norm_weights') 27 | # cos(theta+m) 28 | cos_t = tf.matmul(embedding, weights, name='cos_t') 29 | cos_t2 = tf.square(cos_t, name='cos_2') 30 | sin_t2 = tf.subtract(1., cos_t2, name='sin_2') 31 | sin_t = tf.sqrt(sin_t2, name='sin_t') 32 | cos_mt = s * tf.subtract(tf.multiply(cos_t, cos_m), tf.multiply(sin_t, sin_m), name='cos_mt') 33 | 34 | # this condition controls the theta+m should in range [0, pi] 35 | # 0<=theta+m<=pi 36 | # -m<=theta<=pi-m 37 | cond_v = cos_t - threshold 38 | cond = tf.cast(tf.nn.relu(cond_v, name='if_else'), dtype=tf.bool) 39 | 40 | keep_val = s * (cos_t - mm) 41 | cos_mt_temp = tf.where(cond, cos_mt, keep_val) 42 | 43 | mask = tf.one_hot(labels, depth=out_num, name='one_hot_mask') 44 | inv_mask = tf.subtract(1., mask, name='inverse_mask') 45 | 46 | s_cos_t = tf.multiply(s, cos_t, name='scalar_cos_t') 47 | 48 | output = tf.add(tf.multiply(s_cos_t, inv_mask), tf.multiply(cos_mt_temp, mask), name='arcface_loss_output') 49 | return output 50 | 51 | 52 | def test_arcface_losses(np_embedding, np_weights): 53 | tf_embedding = tf.constant(np_embedding, name='embedding', dtype=tf.float32) 54 | labels = tf.constant([1, 3, 2, 1, 1], name='input_labels', dtype=tf.int64) 55 | print(labels) 56 | tf_weights = tf.constant(np_weights, name='weights') 57 | output = arcface_loss_val(embedding=tf_embedding, labels=labels, out_num=10, weights=tf_weights) 58 | print(output) 59 | sess = tf.Session() 60 | sess.run(tf.global_variables_initializer()) 61 | results1 = sess.run(output) 62 | print(results1) 63 | return results1 64 | 65 | 66 | def test_cosineface_losses(): 67 | np_embedding = np.random.randn(5, 512).astype(dtype=np.float32) 68 | tf_embedding = tf.constant(np_embedding, name='embedding', dtype=tf.float32) 69 | labels = tf.constant([1, 3, 2, 1, 1], name='input_labels', dtype=tf.int64) 70 | output = cosineface_losses(embedding=tf_embedding, labels=labels, out_num=10) 71 | sess = tf.Session() 72 | sess.run(tf.global_variables_initializer()) 73 | print(sess.run(output)) 74 | 75 | 76 | def test_mxnet_losses(np_embedding, np_weights): 77 | labels = np.array([1, 3, 2, 1, 1]).astype(dtype=np.float32) 78 | return mxnet_arcface_val(np_embedding, labels, np_weights) 79 | 80 | 81 | def mxnet_arcface_val(embedding, gt_label, weights): 82 | s = 64 83 | m = 0.5 84 | _weight = mx.symbol.Variable("fc7_weight", shape=(10, 512), lr_mult=1.0) 85 | _weight = mx.symbol.L2Normalization(_weight, mode='instance') 86 | _embedding = mx.symbol.Variable('mx_embedding', shape=(5, 512), lr_mult=1.0) 87 | nembedding = mx.symbol.L2Normalization(_embedding, mode='instance', name='fc1n')*s 88 | fc7 = mx.sym.FullyConnected(data=nembedding, weight=_weight, no_bias=True, num_hidden=10, name='fc7') 89 | 90 | _labels = mx.symbol.Variable('labels', shape=(5, ), lr_mult=1.0) 91 | zy = mx.sym.pick(fc7, _labels, axis=1) 92 | cos_t = zy/s 93 | 94 | cos_m = math.cos(m) 95 | sin_m = math.sin(m) 96 | mm = math.sin(math.pi - m) * m 97 | # threshold = 0.0 98 | threshold = math.cos(math.pi - m) 99 | 100 | cond_v = cos_t - threshold 101 | cond = mx.symbol.Activation(data=cond_v, act_type='relu') 102 | 103 | body = cos_t * cos_t 104 | body = 1.0 - body 105 | sin_t = mx.sym.sqrt(body) 106 | new_zy = cos_t * cos_m 107 | b = sin_t * sin_m 108 | new_zy = new_zy - b 109 | new_zy = new_zy * s 110 | 111 | zy_keep = zy - s * mm 112 | new_zy = mx.sym.where(cond, new_zy, zy_keep) 113 | 114 | diff = new_zy - zy 115 | diff = mx.sym.expand_dims(diff, 1) 116 | gt_one_hot = mx.sym.one_hot(_labels, depth = 10, on_value = 1.0, off_value = 0.0) 117 | body = mx.sym.broadcast_mul(gt_one_hot, diff) 118 | fc7 = fc7+body 119 | executor = fc7.bind(mx.cpu(), {'fc7_weight': mx.nd.array(weights.T), 'mx_embedding': mx.nd.array(embedding), 120 | 'labels': mx.nd.array(gt_label)}) 121 | output = executor.forward() 122 | print(output) 123 | return output 124 | 125 | 126 | if __name__ == '__main__': 127 | np_embedding = np.random.randn(5, 512).astype(dtype=np.float32) 128 | np_weights = np.random.randn(512, 10).astype(dtype=np.float32) 129 | # test arcface_losses output 130 | result1 = test_arcface_losses(np_embedding, np_weights) 131 | # print('########'*30) 132 | print('################') 133 | result2 = test_mxnet_losses(np_embedding, np_weights) 134 | print(len(result2[0])) 135 | print(type(result1)) 136 | print(type(result2[0].asnumpy())) 137 | print(np.mean(result1 - result2[0].asnumpy())) # 1.26362e-07 -------------------------------------------------------------------------------- /train_nets.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import tensorlayer as tl 3 | import argparse 4 | from data.mx2tfrecords import parse_function 5 | import os 6 | # from nets.L_Resnet_E_IR import get_resnet 7 | # from nets.L_Resnet_E_IR_GBN import get_resnet 8 | from nets.L_Resnet_E_IR_fix_issue9 import get_resnet 9 | from losses.face_losses import arcface_loss 10 | from tensorflow.core.protobuf import config_pb2 11 | import time 12 | from data.eval_data_reader import load_bin 13 | from verification import ver_test 14 | 15 | 16 | def get_parser(): 17 | parser = argparse.ArgumentParser(description='parameters to train net') 18 | parser.add_argument('--net_depth', default=100, help='resnet depth, default is 50') 19 | parser.add_argument('--epoch', default=100000, help='epoch to train the network') 20 | parser.add_argument('--batch_size', default=32, help='batch size to train network') 21 | parser.add_argument('--lr_steps', default=[40000, 60000, 80000], help='learning rate to train network') 22 | parser.add_argument('--momentum', default=0.9, help='learning alg momentum') 23 | parser.add_argument('--weight_deacy', default=5e-4, help='learning alg momentum') 24 | # parser.add_argument('--eval_datasets', default=['lfw', 'cfp_ff', 'cfp_fp', 'agedb_30'], help='evluation datasets') 25 | parser.add_argument('--eval_datasets', default=['lfw'], help='evluation datasets') 26 | parser.add_argument('--eval_db_path', default='./datasets/faces_ms1m_112x112', help='evluate datasets base path') 27 | parser.add_argument('--image_size', default=[112, 112], help='the image size') 28 | parser.add_argument('--num_output', default=85164, help='the image size') 29 | parser.add_argument('--tfrecords_file_path', default='./datasets/tfrecords', type=str, 30 | help='path to the output of tfrecords file path') 31 | parser.add_argument('--summary_path', default='./output/summary', help='the summary file save path') 32 | parser.add_argument('--ckpt_path', default='./output/ckpt', help='the ckpt file save path') 33 | parser.add_argument('--log_file_path', default='./output/logs', help='the ckpt file save path') 34 | parser.add_argument('--saver_maxkeep', default=100, help='tf.train.Saver max keep ckpt files') 35 | parser.add_argument('--buffer_size', default=10000, help='tf dataset api buffer size') 36 | parser.add_argument('--log_device_mapping', default=False, help='show device placement log') 37 | parser.add_argument('--summary_interval', default=300, help='interval to save summary') 38 | parser.add_argument('--ckpt_interval', default=10000, help='intervals to save ckpt file') 39 | parser.add_argument('--validate_interval', default=2000, help='intervals to save ckpt file') 40 | parser.add_argument('--show_info_interval', default=20, help='intervals to save ckpt file') 41 | args = parser.parse_args() 42 | return args 43 | 44 | 45 | if __name__ == '__main__': 46 | os.environ["CUDA_VISIBLE_DEVICES"] = "0" 47 | # 1. define global parameters 48 | args = get_parser() 49 | global_step = tf.Variable(name='global_step', initial_value=0, trainable=False) 50 | inc_op = tf.assign_add(global_step, 1, name='increment_global_step') 51 | images = tf.placeholder(name='img_inputs', shape=[None, *args.image_size, 3], dtype=tf.float32) 52 | labels = tf.placeholder(name='img_labels', shape=[None, ], dtype=tf.int64) 53 | # trainable = tf.placeholder(name='trainable_bn', dtype=tf.bool) 54 | dropout_rate = tf.placeholder(name='dropout_rate', dtype=tf.float32) 55 | # 2 prepare train datasets and test datasets by using tensorflow dataset api 56 | # 2.1 train datasets 57 | # the image is substracted 127.5 and multiplied 1/128. 58 | # random flip left right 59 | tfrecords_f = os.path.join(args.tfrecords_file_path, 'tran.tfrecords') 60 | dataset = tf.data.TFRecordDataset(tfrecords_f) 61 | dataset = dataset.map(parse_function) 62 | dataset = dataset.shuffle(buffer_size=args.buffer_size) 63 | dataset = dataset.batch(args.batch_size) 64 | iterator = dataset.make_initializable_iterator() 65 | next_element = iterator.get_next() 66 | # 2.2 prepare validate datasets 67 | ver_list = [] 68 | ver_name_list = [] 69 | for db in args.eval_datasets: 70 | print('begin db %s convert.' % db) 71 | data_set = load_bin(db, args.image_size, args) 72 | ver_list.append(data_set) 73 | ver_name_list.append(db) 74 | # 3. define network, loss, optimize method, learning rate schedule, summary writer, saver 75 | # 3.1 inference phase 76 | w_init_method = tf.contrib.layers.xavier_initializer(uniform=False) 77 | net = get_resnet(images, args.net_depth, type='ir', w_init=w_init_method, trainable=True, keep_rate=dropout_rate) 78 | # 3.2 get arcface loss 79 | logit = arcface_loss(embedding=net.outputs, labels=labels, w_init=w_init_method, out_num=args.num_output) 80 | # test net because of batch normal layer 81 | tl.layers.set_name_reuse(True) 82 | test_net = get_resnet(images, args.net_depth, type='ir', w_init=w_init_method, trainable=False, reuse=True, keep_rate=dropout_rate) 83 | embedding_tensor = test_net.outputs 84 | # 3.3 define the cross entropy 85 | inference_loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logit, labels=labels)) 86 | # inference_loss_avg = tf.reduce_mean(inference_loss) 87 | # 3.4 define weight deacy losses 88 | # for var in tf.trainable_variables(): 89 | # print(var.name) 90 | # print('##########'*30) 91 | wd_loss = 0 92 | for weights in tl.layers.get_variables_with_name('W_conv2d', True, True): 93 | wd_loss += tf.contrib.layers.l2_regularizer(args.weight_deacy)(weights) 94 | for W in tl.layers.get_variables_with_name('resnet_v1_50/E_DenseLayer/W', True, True): 95 | wd_loss += tf.contrib.layers.l2_regularizer(args.weight_deacy)(W) 96 | for weights in tl.layers.get_variables_with_name('embedding_weights', True, True): 97 | wd_loss += tf.contrib.layers.l2_regularizer(args.weight_deacy)(weights) 98 | for gamma in tl.layers.get_variables_with_name('gamma', True, True): 99 | wd_loss += tf.contrib.layers.l2_regularizer(args.weight_deacy)(gamma) 100 | # for beta in tl.layers.get_variables_with_name('beta', True, True): 101 | # wd_loss += tf.contrib.layers.l2_regularizer(args.weight_deacy)(beta) 102 | for alphas in tl.layers.get_variables_with_name('alphas', True, True): 103 | wd_loss += tf.contrib.layers.l2_regularizer(args.weight_deacy)(alphas) 104 | # for bias in tl.layers.get_variables_with_name('resnet_v1_50/E_DenseLayer/b', True, True): 105 | # wd_loss += tf.contrib.layers.l2_regularizer(args.weight_deacy)(bias) 106 | 107 | # 3.5 total losses 108 | total_loss = inference_loss + wd_loss 109 | # 3.6 define the learning rate schedule 110 | p = int(512.0/args.batch_size) 111 | lr_steps = [p*val for val in args.lr_steps] 112 | print(lr_steps) 113 | lr = tf.train.piecewise_constant(global_step, boundaries=lr_steps, values=[0.001, 0.0005, 0.0003, 0.0001], name='lr_schedule') 114 | # 3.7 define the optimize method 115 | opt = tf.train.MomentumOptimizer(learning_rate=lr, momentum=args.momentum) 116 | # 3.8 get train op 117 | grads = opt.compute_gradients(total_loss) 118 | update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) 119 | with tf.control_dependencies(update_ops): 120 | train_op = opt.apply_gradients(grads, global_step=global_step) 121 | # train_op = opt.minimize(total_loss, global_step=global_step) 122 | # 3.9 define the inference accuracy used during validate or test 123 | pred = tf.nn.softmax(logit) 124 | acc = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(pred, axis=1), labels), dtype=tf.float32)) 125 | # 3.10 define sess 126 | config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=args.log_device_mapping) 127 | config.gpu_options.allow_growth = True 128 | 129 | sess = tf.Session(config=config) 130 | # 3.11 summary writer 131 | summary = tf.summary.FileWriter(args.summary_path, sess.graph) 132 | summaries = [] 133 | # # 3.11.1 add grad histogram op 134 | for grad, var in grads: 135 | if grad is not None: 136 | summaries.append(tf.summary.histogram(var.op.name + '/gradients', grad)) 137 | # 3.11.2 add trainabel variable gradients 138 | for var in tf.trainable_variables(): 139 | summaries.append(tf.summary.histogram(var.op.name, var)) 140 | # 3.11.3 add loss summary 141 | summaries.append(tf.summary.scalar('inference_loss', inference_loss)) 142 | summaries.append(tf.summary.scalar('wd_loss', wd_loss)) 143 | summaries.append(tf.summary.scalar('total_loss', total_loss)) 144 | # 3.11.4 add learning rate 145 | summaries.append(tf.summary.scalar('leraning_rate', lr)) 146 | summary_op = tf.summary.merge(summaries) 147 | # 3.12 saver 148 | saver = tf.train.Saver(max_to_keep=args.saver_maxkeep) 149 | # 3.13 init all variables 150 | sess.run(tf.global_variables_initializer()) 151 | 152 | # restore_saver = tf.train.Saver() 153 | # restore_saver.restore(sess, '/home/aurora/workspaces2018/InsightFace_TF/output/ckpt/InsightFace_iter_1110000.ckpt') 154 | # 4 begin iteration 155 | if not os.path.exists(args.log_file_path): 156 | os.makedirs(args.log_file_path) 157 | log_file_path = args.log_file_path + '/train' + time.strftime('_%Y-%m-%d-%H-%M', time.localtime(time.time())) + '.log' 158 | log_file = open(log_file_path, 'w') 159 | # 4 begin iteration 160 | count = 0 161 | total_accuracy = {} 162 | 163 | for i in range(args.epoch): 164 | sess.run(iterator.initializer) 165 | while True: 166 | try: 167 | images_train, labels_train = sess.run(next_element) 168 | feed_dict = {images: images_train, labels: labels_train, dropout_rate: 0.4} 169 | feed_dict.update(net.all_drop) 170 | start = time.time() 171 | _, total_loss_val, inference_loss_val, wd_loss_val, _, acc_val = \ 172 | sess.run([train_op, total_loss, inference_loss, wd_loss, inc_op, acc], 173 | feed_dict=feed_dict, 174 | options=config_pb2.RunOptions(report_tensor_allocations_upon_oom=True)) 175 | end = time.time() 176 | pre_sec = args.batch_size/(end - start) 177 | # print training information 178 | if count > 0 and count % args.show_info_interval == 0: 179 | print('epoch %d, total_step %d, total loss is %.2f , inference loss is %.2f, weight deacy ' 180 | 'loss is %.2f, training accuracy is %.6f, time %.3f samples/sec' % 181 | (i, count, total_loss_val, inference_loss_val, wd_loss_val, acc_val, pre_sec)) 182 | count += 1 183 | 184 | # save summary 185 | if count > 0 and count % args.summary_interval == 0: 186 | feed_dict = {images: images_train, labels: labels_train, dropout_rate: 0.4} 187 | feed_dict.update(net.all_drop) 188 | summary_op_val = sess.run(summary_op, feed_dict=feed_dict) 189 | summary.add_summary(summary_op_val, count) 190 | 191 | # save ckpt files 192 | if count > 0 and count % args.ckpt_interval == 0: 193 | filename = 'InsightFace_iter_{:d}'.format(count) + '.ckpt' 194 | filename = os.path.join(args.ckpt_path, filename) 195 | saver.save(sess, filename) 196 | 197 | # validate 198 | if count > 0 and count % args.validate_interval == 0: 199 | feed_dict_test ={dropout_rate: 1.0} 200 | feed_dict_test.update(tl.utils.dict_to_one(net.all_drop)) 201 | results = ver_test(ver_list=ver_list, ver_name_list=ver_name_list, nbatch=count, sess=sess, 202 | embedding_tensor=embedding_tensor, batch_size=args.batch_size, feed_dict=feed_dict_test, 203 | input_placeholder=images) 204 | print('test accuracy is: ', str(results[0])) 205 | total_accuracy[str(count)] = results[0] 206 | log_file.write('########'*10+'\n') 207 | log_file.write(','.join(list(total_accuracy.keys())) + '\n') 208 | log_file.write(','.join([str(val) for val in list(total_accuracy.values())])+'\n') 209 | log_file.flush() 210 | if max(results) > 0.996: 211 | print('best accuracy is %.5f' % max(results)) 212 | filename = 'InsightFace_iter_best_{:d}'.format(count) + '.ckpt' 213 | filename = os.path.join(args.ckpt_path, filename) 214 | saver.save(sess, filename) 215 | log_file.write('######Best Accuracy######'+'\n') 216 | log_file.write(str(max(results))+'\n') 217 | log_file.write(filename+'\n') 218 | 219 | log_file.flush() 220 | except tf.errors.OutOfRangeError: 221 | print("End of epoch %d" % i) 222 | break 223 | log_file.close() 224 | log_file.write('\n') -------------------------------------------------------------------------------- /train_nets_mgpu.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import tensorlayer as tl 3 | import argparse 4 | from data.mx2tfrecords import parse_function 5 | import os 6 | from nets.L_Resnet_E_IR_MGPU import get_resnet 7 | from losses.face_losses import arcface_loss 8 | import time 9 | from data.eval_data_reader import load_bin 10 | from verification import ver_test 11 | 12 | 13 | def get_parser(): 14 | parser = argparse.ArgumentParser(description='parameters to train net') 15 | parser.add_argument('--net_depth', default=50, help='resnet depth, default is 50') 16 | parser.add_argument('--epoch', default=100000, help='epoch to train the network') 17 | parser.add_argument('--batch_size', default=32, help='batch size to train network') 18 | parser.add_argument('--lr_steps', default=[40000, 60000, 80000], help='learning rate to train network') 19 | parser.add_argument('--momentum', default=0.9, help='learning alg momentum') 20 | parser.add_argument('--weight_deacy', default=5e-4, help='learning alg momentum') 21 | # parser.add_argument('--eval_datasets', default=['lfw', 'cfp_ff', 'cfp_fp', 'agedb_30'], help='evluation datasets') 22 | parser.add_argument('--eval_datasets', default=['lfw', 'cfp_fp'], help='evluation datasets') 23 | parser.add_argument('--eval_db_path', default='./datasets/faces_ms1m_112x112', help='evluate datasets base path') 24 | parser.add_argument('--image_size', default=[112, 112], help='the image size') 25 | parser.add_argument('--num_output', default=85164, help='the image size') 26 | parser.add_argument('--tfrecords_file_path', default='./datasets/tfrecords', type=str, 27 | help='path to the output of tfrecords file path') 28 | parser.add_argument('--summary_path', default='./output/summary', help='the summary file save path') 29 | parser.add_argument('--ckpt_path', default='./output/ckpt', help='the ckpt file save path') 30 | parser.add_argument('--saver_maxkeep', default=100, help='tf.train.Saver max keep ckpt files') 31 | parser.add_argument('--buffer_size', default=50000, help='tf dataset api buffer size') 32 | parser.add_argument('--log_device_mapping', default=False, help='show device placement log') 33 | parser.add_argument('--summary_interval', default=300, help='interval to save summary') 34 | parser.add_argument('--ckpt_interval', default=5000, help='intervals to save ckpt file') 35 | parser.add_argument('--validate_interval', default=2000, help='intervals to save ckpt file') 36 | parser.add_argument('--show_info_interval', default=20, help='intervals to show information') 37 | parser.add_argument('--num_gpus', default=2, help='the num of gpus') 38 | parser.add_argument('--tower_name', default='tower', help='tower name') 39 | args = parser.parse_args() 40 | return args 41 | 42 | 43 | def average_gradients(tower_grads): 44 | """Calculate the average gradient for each shared variable across all towers. 45 | 46 | Note that this function provides a synchronization point across all towers. 47 | 48 | Args: 49 | tower_grads: List of lists of (gradient, variable) tuples. The outer list 50 | is over individual gradients. The inner list is over the gradient 51 | calculation for each tower. 52 | Returns: 53 | List of pairs of (gradient, variable) where the gradient has been averaged 54 | across all towers. 55 | """ 56 | average_grads = [] 57 | for grad_and_vars in zip(*tower_grads): 58 | # Note that each grad_and_vars looks like the following: 59 | # ((grad0_gpu0, var0_gpu0), ... , (grad0_gpuN, var0_gpuN)) 60 | grads = [] 61 | for g, _ in grad_and_vars: 62 | # Add 0 dimension to the gradients to represent the tower. 63 | expanded_g = tf.expand_dims(g, 0) 64 | 65 | # Append on a 'tower' dimension which we will average over below. 66 | grads.append(expanded_g) 67 | 68 | # Average over the 'tower' dimension. 69 | grad = tf.concat(axis=0, values=grads) 70 | grad = tf.reduce_mean(grad, 0) 71 | 72 | # Keep in mind that the Variables are redundant because they are shared 73 | # across towers. So .. we will just return the first tower's pointer to 74 | # the Variable. 75 | v = grad_and_vars[0][1] 76 | grad_and_var = (grad, v) 77 | average_grads.append(grad_and_var) 78 | return average_grads 79 | 80 | 81 | if __name__ == '__main__': 82 | # os.environ["CUDA_VISIBLE_DEVICES"] = "0" 83 | # 1. define global parameters 84 | args = get_parser() 85 | global_step = tf.Variable(name='global_step', initial_value=0, trainable=False) 86 | inc_op = tf.assign_add(global_step, 1, name='increment_global_step') 87 | trainable = tf.placeholder(name='trainable_bn', dtype=tf.bool) 88 | images = tf.placeholder(name='img_inputs', shape=[None, *args.image_size, 3], dtype=tf.float32) 89 | labels = tf.placeholder(name='img_labels', shape=[None, ], dtype=tf.int64) 90 | # splits input to different gpu 91 | images_s = tf.split(images, num_or_size_splits=args.num_gpus, axis=0) 92 | labels_s = tf.split(labels, num_or_size_splits=args.num_gpus, axis=0) 93 | # 2 prepare train datasets and test datasets by using tensorflow dataset api 94 | # 2.1 train datasets 95 | # the image is substracted 127.5 and multiplied 1/128. 96 | # random flip left right 97 | tfrecords_f = os.path.join(args.tfrecords_file_path, 'tran.tfrecords') 98 | dataset = tf.data.TFRecordDataset(tfrecords_f) 99 | dataset = dataset.map(parse_function) 100 | dataset = dataset.shuffle(buffer_size=args.buffer_size) 101 | dataset = dataset.batch(args.batch_size) 102 | iterator = dataset.make_initializable_iterator() 103 | next_element = iterator.get_next() 104 | # 2.2 prepare validate datasets 105 | ver_list = [] 106 | ver_name_list = [] 107 | for db in args.eval_datasets: 108 | print('begin db %s convert.' % db) 109 | data_set = load_bin(db, args.image_size, args) 110 | ver_list.append(data_set) 111 | ver_name_list.append(db) 112 | 113 | # 3. define network, loss, optimize method, learning rate schedule, summary writer, saver 114 | # 3.1 inference phase 115 | w_init_method = tf.contrib.layers.xavier_initializer(uniform=False) 116 | # 3.2 define the learning rate schedule 117 | p = int(512.0/args.batch_size) 118 | lr_steps = [p*val for val in args.lr_steps] 119 | print('learning rate steps: ', lr_steps) 120 | lr = tf.train.piecewise_constant(global_step, boundaries=lr_steps, values=[0.001, 0.0001, 0.00005, 0.00001], name='lr_schedule') 121 | # 3.3 define the optimize method 122 | opt = tf.train.MomentumOptimizer(learning_rate=lr, momentum=args.momentum) 123 | 124 | # Calculate the gradients for each model tower. 125 | tower_grads = [] 126 | tl.layers.set_name_reuse(True) 127 | loss_dict = {} 128 | drop_dict = {} 129 | loss_keys = [] 130 | with tf.variable_scope(tf.get_variable_scope()): 131 | for i in range(args.num_gpus): 132 | with tf.device('/gpu:%d' % i): 133 | with tf.name_scope('%s_%d' % (args.tower_name, i)) as scope: 134 | net = get_resnet(images_s[i], args.net_depth, type='ir', w_init=w_init_method, trainable=trainable) 135 | logit = arcface_loss(embedding=net.outputs, labels=labels_s[i], w_init=w_init_method, out_num=args.num_output) 136 | # Reuse variables for the next tower. 137 | tf.get_variable_scope().reuse_variables() 138 | # define the cross entropy 139 | inference_loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logit, labels=labels_s[i])) 140 | # define weight deacy losses 141 | wd_loss = 0 142 | for weights in tl.layers.get_variables_with_name('W_conv2d', True, True): 143 | wd_loss += tf.contrib.layers.l2_regularizer(args.weight_deacy)(weights) 144 | for W in tl.layers.get_variables_with_name('resnet_v1_50/E_DenseLayer/W', True, True): 145 | wd_loss += tf.contrib.layers.l2_regularizer(args.weight_deacy)(W) 146 | for weights in tl.layers.get_variables_with_name('embedding_weights', True, True): 147 | wd_loss += tf.contrib.layers.l2_regularizer(args.weight_deacy)(weights) 148 | for gamma in tl.layers.get_variables_with_name('gamma', True, True): 149 | wd_loss += tf.contrib.layers.l2_regularizer(args.weight_deacy)(gamma) 150 | for beta in tl.layers.get_variables_with_name('beta', True, True): 151 | wd_loss += tf.contrib.layers.l2_regularizer(args.weight_deacy)(beta) 152 | for alphas in tl.layers.get_variables_with_name('alphas', True, True): 153 | wd_loss += tf.contrib.layers.l2_regularizer(args.weight_deacy)(alphas) 154 | for bias in tl.layers.get_variables_with_name('resnet_v1_50/E_DenseLayer/b', True, True): 155 | wd_loss += tf.contrib.layers.l2_regularizer(args.weight_deacy)(bias) 156 | total_loss = inference_loss + wd_loss 157 | 158 | loss_dict[('inference_loss_%s_%d' % ('gpu', i))] = inference_loss 159 | loss_keys.append(('inference_loss_%s_%d' % ('gpu', i))) 160 | loss_dict[('wd_loss_%s_%d' % ('gpu', i))] = wd_loss 161 | loss_keys.append(('wd_loss_%s_%d' % ('gpu', i))) 162 | loss_dict[('total_loss_%s_%d' % ('gpu', i))] = total_loss 163 | loss_keys.append(('total_loss_%s_%d' % ('gpu', i))) 164 | grads = opt.compute_gradients(total_loss) 165 | tower_grads.append(grads) 166 | drop_dict.update(net.all_drop) 167 | if i == 0: 168 | update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) 169 | pred = tf.nn.softmax(logit) 170 | acc = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(pred, axis=1), labels_s[i]), dtype=tf.int64)) 171 | embedding_tensor_gpu0 = net.outputs 172 | 173 | grads = average_gradients(tower_grads) 174 | with tf.control_dependencies(update_ops): 175 | # Apply the gradients to adjust the shared variables. 176 | train_op = opt.apply_gradients(grads, global_step=global_step) 177 | 178 | config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=args.log_device_mapping) 179 | config.gpu_options.allow_growth = True 180 | sess = tf.Session(config=config) 181 | # summary writer 182 | summary = tf.summary.FileWriter(args.summary_path, sess.graph) 183 | summaries = [] 184 | # add grad histogram op 185 | for grad, var in grads: 186 | if grad is not None: 187 | summaries.append(tf.summary.histogram(var.op.name + '/gradients', grad)) 188 | # add trainabel variable gradients 189 | for var in tf.trainable_variables(): 190 | summaries.append(tf.summary.histogram(var.op.name, var)) 191 | # add loss summary 192 | for keys, val in loss_dict.items(): 193 | summaries.append(tf.summary.scalar(keys, val)) 194 | # add learning rate 195 | summaries.append(tf.summary.scalar('leraning_rate', lr)) 196 | summary_op = tf.summary.merge(summaries) 197 | 198 | # Create a saver. 199 | saver = tf.train.Saver(tf.global_variables()) 200 | # init all variables 201 | sess.run(tf.global_variables_initializer()) 202 | 203 | drop_dict_test = {keys: 1 for keys in drop_dict.keys()} 204 | # begin iteration 205 | count = 0 206 | for i in range(args.epoch): 207 | sess.run(iterator.initializer) 208 | while True: 209 | try: 210 | images_train, labels_train = sess.run(next_element) 211 | feed_dict = {images: images_train, labels: labels_train, trainable: True} 212 | feed_dict.update(drop_dict) 213 | start = time.time() 214 | _, _, inference_loss_val_gpu_1, wd_loss_val_gpu_1, total_loss_gpu_1, inference_loss_val_gpu_2, \ 215 | wd_loss_val_gpu_2, total_loss_gpu_2, acc_val = sess.run([train_op, inc_op, loss_dict[loss_keys[0]], 216 | loss_dict[loss_keys[1]], 217 | loss_dict[loss_keys[2]], 218 | loss_dict[loss_keys[3]], 219 | loss_dict[loss_keys[4]], 220 | loss_dict[loss_keys[5]], acc], 221 | feed_dict=feed_dict) 222 | end = time.time() 223 | pre_sec = args.batch_size/(end - start) 224 | # print training information 225 | if count > 0 and count % args.show_info_interval == 0: 226 | print('epoch %d, total_step %d, total loss gpu 1 is %.2f , inference loss gpu 1 is %.2f, weight deacy ' 227 | 'loss gpu 1 is %.2f, total loss gpu 2 is %.2f , inference loss gpu 2 is %.2f, weight deacy ' 228 | 'loss gpu 2 is %.2f, training accuracy is %.6f, time %.3f samples/sec' % 229 | (i, count, total_loss_gpu_1, inference_loss_val_gpu_1, wd_loss_val_gpu_1, total_loss_gpu_2, 230 | inference_loss_val_gpu_2, wd_loss_val_gpu_2, acc_val, pre_sec)) 231 | count += 1 232 | 233 | # save summary 234 | if count > 0 and count % args.summary_interval == 0: 235 | feed_dict = {images: images_train, labels: labels_train, trainable: True} 236 | feed_dict.update(drop_dict) 237 | summary_op_val = sess.run(summary_op, feed_dict=feed_dict) 238 | summary.add_summary(summary_op_val, count) 239 | 240 | # save ckpt files 241 | if count > 0 and count % args.ckpt_interval == 0: 242 | filename = 'InsightFace_iter_{:d}'.format(count) + '.ckpt' 243 | filename = os.path.join(args.ckpt_path, filename) 244 | saver.save(sess, filename) 245 | # # validate 246 | if count > 0 and count % args.validate_interval == 0: 247 | feed_dict_test ={trainable: False} 248 | feed_dict_test.update(drop_dict_test) 249 | results = ver_test(ver_list=ver_list, ver_name_list=ver_name_list, nbatch=count, sess=sess, 250 | embedding_tensor=embedding_tensor_gpu0, batch_size=args.batch_size//args.num_gpus, feed_dict=feed_dict_test, 251 | input_placeholder=images_s[0]) 252 | if max(results) > 0.99: 253 | print('best accuracy is %.5f' % max(results)) 254 | filename = 'InsightFace_iter_best_{:d}'.format(count) + '.ckpt' 255 | filename = os.path.join(args.ckpt_path, filename) 256 | saver.save(sess, filename) 257 | except tf.errors.OutOfRangeError: 258 | print("End of epoch %d" % i) 259 | break 260 | -------------------------------------------------------------------------------- /train_nets_mgpu_new.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import tensorlayer as tl 3 | import argparse 4 | from data.mx2tfrecords import parse_function 5 | import os 6 | from nets.L_Resnet_E_IR_MGPU import get_resnet 7 | from losses.face_losses import arcface_loss 8 | import time 9 | from data.eval_data_reader import load_bin 10 | from verification import ver_test 11 | 12 | 13 | def get_parser(): 14 | parser = argparse.ArgumentParser(description='parameters to train net') 15 | parser.add_argument('--net_depth', default=100, help='resnet depth, default is 50') 16 | parser.add_argument('--epoch', default=100000, help='epoch to train the network') 17 | parser.add_argument('--batch_size', default=64, help='batch size to train network') 18 | parser.add_argument('--lr_steps', default=[40000, 60000, 80000], help='learning rate to train network') 19 | parser.add_argument('--momentum', default=0.9, help='learning alg momentum') 20 | parser.add_argument('--weight_deacy', default=5e-4, help='learning alg momentum') 21 | # parser.add_argument('--eval_datasets', default=['lfw', 'cfp_ff', 'cfp_fp', 'agedb_30'], help='evluation datasets') 22 | parser.add_argument('--eval_datasets', default=['lfw', 'cfp_fp'], help='evluation datasets') 23 | parser.add_argument('--eval_db_path', default='./datasets/faces_ms1m_112x112', help='evluate datasets base path') 24 | parser.add_argument('--image_size', default=[112, 112], help='the image size') 25 | parser.add_argument('--num_output', default=85164, help='the image size') 26 | parser.add_argument('--tfrecords_file_path', default='./datasets/tfrecords', type=str, 27 | help='path to the output of tfrecords file path') 28 | parser.add_argument('--summary_path', default='./output/summary', help='the summary file save path') 29 | parser.add_argument('--ckpt_path', default='./output/ckpt', help='the ckpt file save path') 30 | parser.add_argument('--saver_maxkeep', default=100, help='tf.train.Saver max keep ckpt files') 31 | parser.add_argument('--buffer_size', default=100000, help='tf dataset api buffer size') 32 | parser.add_argument('--log_device_mapping', default=False, help='show device placement log') 33 | parser.add_argument('--summary_interval', default=300, help='interval to save summary') 34 | parser.add_argument('--ckpt_interval', default=5000, help='intervals to save ckpt file') 35 | parser.add_argument('--validate_interval', default=2000, help='intervals to save ckpt file') 36 | parser.add_argument('--show_info_interval', default=20, help='intervals to show information') 37 | parser.add_argument('--num_gpus', default=2, help='the num of gpus') 38 | parser.add_argument('--tower_name', default='tower', help='tower name') 39 | args = parser.parse_args() 40 | return args 41 | 42 | 43 | def average_gradients(tower_grads): 44 | """Calculate the average gradient for each shared variable across all towers. 45 | 46 | Note that this function provides a synchronization point across all towers. 47 | 48 | Args: 49 | tower_grads: List of lists of (gradient, variable) tuples. The outer list 50 | is over individual gradients. The inner list is over the gradient 51 | calculation for each tower. 52 | Returns: 53 | List of pairs of (gradient, variable) where the gradient has been averaged 54 | across all towers. 55 | """ 56 | average_grads = [] 57 | for grad_and_vars in zip(*tower_grads): 58 | # Note that each grad_and_vars looks like the following: 59 | # ((grad0_gpu0, var0_gpu0), ... , (grad0_gpuN, var0_gpuN)) 60 | grads = [] 61 | for g, _ in grad_and_vars: 62 | # Add 0 dimension to the gradients to represent the tower. 63 | expanded_g = tf.expand_dims(g, 0) 64 | 65 | # Append on a 'tower' dimension which we will average over below. 66 | grads.append(expanded_g) 67 | 68 | # Average over the 'tower' dimension. 69 | grad = tf.concat(axis=0, values=grads) 70 | grad = tf.reduce_mean(grad, 0) 71 | 72 | # Keep in mind that the Variables are redundant because they are shared 73 | # across towers. So .. we will just return the first tower's pointer to 74 | # the Variable. 75 | v = grad_and_vars[0][1] 76 | grad_and_var = (grad, v) 77 | average_grads.append(grad_and_var) 78 | return average_grads 79 | 80 | 81 | if __name__ == '__main__': 82 | # os.environ["CUDA_VISIBLE_DEVICES"] = "0" 83 | # 1. define global parameters 84 | args = get_parser() 85 | global_step = tf.Variable(name='global_step', initial_value=0, trainable=False) 86 | inc_op = tf.assign_add(global_step, 1, name='increment_global_step') 87 | images = tf.placeholder(name='img_inputs', shape=[None, *args.image_size, 3], dtype=tf.float32) 88 | images_test = tf.placeholder(name='img_inputs', shape=[None, *args.image_size, 3], dtype=tf.float32) 89 | labels = tf.placeholder(name='img_labels', shape=[None, ], dtype=tf.int64) 90 | dropout_rate = tf.placeholder(name='dropout_rate', dtype=tf.float32) 91 | # splits input to different gpu 92 | images_s = tf.split(images, num_or_size_splits=args.num_gpus, axis=0) 93 | labels_s = tf.split(labels, num_or_size_splits=args.num_gpus, axis=0) 94 | # 2 prepare train datasets and test datasets by using tensorflow dataset api 95 | # 2.1 train datasets 96 | # the image is substracted 127.5 and multiplied 1/128. 97 | # random flip left right 98 | tfrecords_f = os.path.join(args.tfrecords_file_path, 'tran.tfrecords') 99 | dataset = tf.data.TFRecordDataset(tfrecords_f) 100 | dataset = dataset.map(parse_function) 101 | dataset = dataset.shuffle(buffer_size=args.buffer_size) 102 | dataset = dataset.batch(args.batch_size) 103 | iterator = dataset.make_initializable_iterator() 104 | next_element = iterator.get_next() 105 | # 2.2 prepare validate datasets 106 | ver_list = [] 107 | ver_name_list = [] 108 | for db in args.eval_datasets: 109 | print('begin db %s convert.' % db) 110 | data_set = load_bin(db, args.image_size, args) 111 | ver_list.append(data_set) 112 | ver_name_list.append(db) 113 | # 3. define network, loss, optimize method, learning rate schedule, summary writer, saver 114 | # 3.1 inference phase 115 | w_init_method = tf.contrib.layers.xavier_initializer(uniform=False) 116 | # 3.2 define the learning rate schedule 117 | p = int(512.0/args.batch_size) 118 | lr_steps = [p*val for val in args.lr_steps] 119 | print('learning rate steps: ', lr_steps) 120 | lr = tf.train.piecewise_constant(global_step, boundaries=lr_steps, values=[0.001, 0.0005, 0.0003, 0.0001], 121 | name='lr_schedule') 122 | # 3.3 define the optimize method 123 | opt = tf.train.MomentumOptimizer(learning_rate=lr, momentum=args.momentum) 124 | 125 | # Calculate the gradients for each model tower. 126 | tower_grads = [] 127 | tl.layers.set_name_reuse(True) 128 | loss_dict = {} 129 | drop_dict = {} 130 | loss_keys = [] 131 | with tf.variable_scope(tf.get_variable_scope()): 132 | for i in range(args.num_gpus): 133 | with tf.device('/gpu:%d' % i): 134 | with tf.name_scope('%s_%d' % (args.tower_name, i)) as scope: 135 | net = get_resnet(images_s[i], args.net_depth, type='ir', w_init=w_init_method, trainable=True, keep_rate=dropout_rate) 136 | logit = arcface_loss(embedding=net.outputs, labels=labels_s[i], w_init=w_init_method, out_num=args.num_output) 137 | # Reuse variables for the next tower. 138 | tf.get_variable_scope().reuse_variables() 139 | # define the cross entropy 140 | inference_loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logit, labels=labels_s[i])) 141 | # define weight deacy losses 142 | wd_loss = 0 143 | for weights in tl.layers.get_variables_with_name('W_conv2d', True, True): 144 | wd_loss += tf.contrib.layers.l2_regularizer(args.weight_deacy)(weights) 145 | for W in tl.layers.get_variables_with_name('resnet_v1_50/E_DenseLayer/W', True, True): 146 | wd_loss += tf.contrib.layers.l2_regularizer(args.weight_deacy)(W) 147 | for weights in tl.layers.get_variables_with_name('embedding_weights', True, True): 148 | wd_loss += tf.contrib.layers.l2_regularizer(args.weight_deacy)(weights) 149 | for gamma in tl.layers.get_variables_with_name('gamma', True, True): 150 | wd_loss += tf.contrib.layers.l2_regularizer(args.weight_deacy)(gamma) 151 | for alphas in tl.layers.get_variables_with_name('alphas', True, True): 152 | wd_loss += tf.contrib.layers.l2_regularizer(args.weight_deacy)(alphas) 153 | total_loss = inference_loss + wd_loss 154 | 155 | loss_dict[('inference_loss_%s_%d' % ('gpu', i))] = inference_loss 156 | loss_keys.append(('inference_loss_%s_%d' % ('gpu', i))) 157 | loss_dict[('wd_loss_%s_%d' % ('gpu', i))] = wd_loss 158 | loss_keys.append(('wd_loss_%s_%d' % ('gpu', i))) 159 | loss_dict[('total_loss_%s_%d' % ('gpu', i))] = total_loss 160 | loss_keys.append(('total_loss_%s_%d' % ('gpu', i))) 161 | grads = opt.compute_gradients(total_loss) 162 | tower_grads.append(grads) 163 | if i == 0: 164 | test_net = get_resnet(images_test, args.net_depth, type='ir', w_init=w_init_method, trainable=False, keep_rate=dropout_rate) 165 | embedding_tensor = test_net.outputs 166 | update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) 167 | pred = tf.nn.softmax(logit) 168 | acc = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(pred, axis=1), labels_s[i]), dtype=tf.float32)) 169 | 170 | grads = average_gradients(tower_grads) 171 | with tf.control_dependencies(update_ops): 172 | # Apply the gradients to adjust the shared variables. 173 | train_op = opt.apply_gradients(grads, global_step=global_step) 174 | 175 | config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=args.log_device_mapping) 176 | config.gpu_options.allow_growth = True 177 | sess = tf.Session(config=config) 178 | # summary writer 179 | summary = tf.summary.FileWriter(args.summary_path, sess.graph) 180 | summaries = [] 181 | # add grad histogram op 182 | for grad, var in grads: 183 | if grad is not None: 184 | summaries.append(tf.summary.histogram(var.op.name + '/gradients', grad)) 185 | # add trainabel variable gradients 186 | for var in tf.trainable_variables(): 187 | summaries.append(tf.summary.histogram(var.op.name, var)) 188 | # add loss summary 189 | for keys, val in loss_dict.items(): 190 | summaries.append(tf.summary.scalar(keys, val)) 191 | # add learning rate 192 | summaries.append(tf.summary.scalar('leraning_rate', lr)) 193 | summary_op = tf.summary.merge(summaries) 194 | 195 | # Create a saver. 196 | saver = tf.train.Saver(tf.global_variables()) 197 | # init all variables 198 | sess.run(tf.global_variables_initializer()) 199 | # begin iteration 200 | count = 0 201 | for i in range(args.epoch): 202 | sess.run(iterator.initializer) 203 | while True: 204 | try: 205 | images_train, labels_train = sess.run(next_element) 206 | feed_dict = {images: images_train, labels: labels_train, dropout_rate: 0.4} 207 | start = time.time() 208 | _, _, inference_loss_val_gpu_1, wd_loss_val_gpu_1, total_loss_gpu_1, inference_loss_val_gpu_2, \ 209 | wd_loss_val_gpu_2, total_loss_gpu_2, acc_val = sess.run([train_op, inc_op, loss_dict[loss_keys[0]], 210 | loss_dict[loss_keys[1]], 211 | loss_dict[loss_keys[2]], 212 | loss_dict[loss_keys[3]], 213 | loss_dict[loss_keys[4]], 214 | loss_dict[loss_keys[5]], acc], 215 | feed_dict=feed_dict) 216 | end = time.time() 217 | pre_sec = args.batch_size/(end - start) 218 | # print training information 219 | if count > 0 and count % args.show_info_interval == 0: 220 | # print('epoch %d, total_step %d, total loss gpu 1 is %.2f , inference loss gpu 1 is %.2f, weight deacy ' 221 | # 'loss gpu 1 is %.2f, total loss gpu 2 is %.2f , inference loss gpu 2 is %.2f, weight deacy ' 222 | # 'loss gpu 2 is %.2f, training accuracy is %.6f, time %.3f samples/sec' % 223 | # (i, count, total_loss_gpu_1, inference_loss_val_gpu_1, wd_loss_val_gpu_1, total_loss_gpu_2, 224 | # inference_loss_val_gpu_2, wd_loss_val_gpu_2, acc_val, pre_sec)) 225 | 226 | print('epoch %d, total_step %d, total loss: [%.2f, %.2f], inference loss: [%.2f, %.2f], weight deacy ' 227 | 'loss: [%.2f, %.2f], training accuracy is %.6f, time %.3f samples/sec' % 228 | (i, count, total_loss_gpu_1, total_loss_gpu_2, inference_loss_val_gpu_1, inference_loss_val_gpu_2, 229 | wd_loss_val_gpu_1, wd_loss_val_gpu_2, acc_val, pre_sec)) 230 | count += 1 231 | 232 | # save summary 233 | if count > 0 and count % args.summary_interval == 0: 234 | feed_dict = {images: images_train, labels: labels_train, dropout_rate: 0.4} 235 | summary_op_val = sess.run(summary_op, feed_dict=feed_dict) 236 | summary.add_summary(summary_op_val, count) 237 | 238 | # save ckpt files 239 | if count > 0 and count % args.ckpt_interval == 0: 240 | filename = 'InsightFace_iter_{:d}'.format(count) + '.ckpt' 241 | filename = os.path.join(args.ckpt_path, filename) 242 | saver.save(sess, filename) 243 | # # validate 244 | if count >= 0 and count % args.validate_interval == 0: 245 | feed_dict_test ={dropout_rate: 1.0} 246 | results = ver_test(ver_list=ver_list, ver_name_list=ver_name_list, nbatch=count, sess=sess, 247 | embedding_tensor=embedding_tensor, batch_size=args.batch_size//args.num_gpus, feed_dict=feed_dict_test, 248 | input_placeholder=images_test) 249 | if max(results) > 0.998: 250 | print('best accuracy is %.5f' % max(results)) 251 | filename = 'InsightFace_iter_best_{:d}'.format(count) + '.ckpt' 252 | filename = os.path.join(args.ckpt_path, filename) 253 | saver.save(sess, filename) 254 | except tf.errors.OutOfRangeError: 255 | print("End of epoch %d" % i) 256 | break 257 | -------------------------------------------------------------------------------- /verification.py: -------------------------------------------------------------------------------- 1 | """Helper for evaluation on the Labeled Faces in the Wild dataset 2 | """ 3 | 4 | # MIT License 5 | # 6 | # Copyright (c) 2016 David Sandberg 7 | # 8 | # Permission is hereby granted, free of charge, to any person obtaining a copy 9 | # of this software and associated documentation files (the "Software"), to deal 10 | # in the Software without restriction, including without limitation the rights 11 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | # copies of the Software, and to permit persons to whom the Software is 13 | # furnished to do so, subject to the following conditions: 14 | # 15 | # The above copyright notice and this permission notice shall be included in all 16 | # copies or substantial portions of the Software. 17 | # 18 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 24 | # SOFTWARE. 25 | 26 | import tensorflow as tf 27 | import numpy as np 28 | from sklearn.model_selection import KFold 29 | from sklearn.decomposition import PCA 30 | import sklearn 31 | from scipy import interpolate 32 | import datetime 33 | import mxnet as mx 34 | 35 | 36 | def calculate_roc(thresholds, embeddings1, embeddings2, actual_issame, nrof_folds=10, pca=0): 37 | assert (embeddings1.shape[0] == embeddings2.shape[0]) 38 | assert (embeddings1.shape[1] == embeddings2.shape[1]) 39 | nrof_pairs = min(len(actual_issame), embeddings1.shape[0]) 40 | nrof_thresholds = len(thresholds) 41 | k_fold = KFold(n_splits=nrof_folds, shuffle=False) 42 | 43 | tprs = np.zeros((nrof_folds, nrof_thresholds)) 44 | fprs = np.zeros((nrof_folds, nrof_thresholds)) 45 | accuracy = np.zeros((nrof_folds)) 46 | indices = np.arange(nrof_pairs) 47 | # print('pca', pca) 48 | 49 | if pca == 0: 50 | diff = np.subtract(embeddings1, embeddings2) 51 | dist = np.sum(np.square(diff), 1) 52 | 53 | for fold_idx, (train_set, test_set) in enumerate(k_fold.split(indices)): 54 | # print('train_set', train_set) 55 | # print('test_set', test_set) 56 | if pca > 0: 57 | print('doing pca on', fold_idx) 58 | embed1_train = embeddings1[train_set] 59 | embed2_train = embeddings2[train_set] 60 | _embed_train = np.concatenate((embed1_train, embed2_train), axis=0) 61 | # print(_embed_train.shape) 62 | pca_model = PCA(n_components=pca) 63 | pca_model.fit(_embed_train) 64 | embed1 = pca_model.transform(embeddings1) 65 | embed2 = pca_model.transform(embeddings2) 66 | embed1 = sklearn.preprocessing.normalize(embed1) 67 | embed2 = sklearn.preprocessing.normalize(embed2) 68 | # print(embed1.shape, embed2.shape) 69 | diff = np.subtract(embed1, embed2) 70 | dist = np.sum(np.square(diff), 1) 71 | 72 | # Find the best threshold for the fold 73 | acc_train = np.zeros((nrof_thresholds)) 74 | for threshold_idx, threshold in enumerate(thresholds): 75 | _, _, acc_train[threshold_idx] = calculate_accuracy(threshold, dist[train_set], actual_issame[train_set]) 76 | best_threshold_index = np.argmax(acc_train) 77 | print('best_threshold_index', best_threshold_index, acc_train[best_threshold_index]) 78 | for threshold_idx, threshold in enumerate(thresholds): 79 | tprs[fold_idx, threshold_idx], fprs[fold_idx, threshold_idx], _ = calculate_accuracy(threshold, 80 | dist[test_set], 81 | actual_issame[ 82 | test_set]) 83 | _, _, accuracy[fold_idx] = calculate_accuracy(thresholds[best_threshold_index], dist[test_set], 84 | actual_issame[test_set]) 85 | 86 | tpr = np.mean(tprs, 0) 87 | fpr = np.mean(fprs, 0) 88 | return tpr, fpr, accuracy 89 | 90 | 91 | def calculate_accuracy(threshold, dist, actual_issame): 92 | predict_issame = np.less(dist, threshold) 93 | tp = np.sum(np.logical_and(predict_issame, actual_issame)) 94 | fp = np.sum(np.logical_and(predict_issame, np.logical_not(actual_issame))) 95 | tn = np.sum(np.logical_and(np.logical_not(predict_issame), np.logical_not(actual_issame))) 96 | fn = np.sum(np.logical_and(np.logical_not(predict_issame), actual_issame)) 97 | 98 | tpr = 0 if (tp + fn == 0) else float(tp) / float(tp + fn) 99 | fpr = 0 if (fp + tn == 0) else float(fp) / float(fp + tn) 100 | acc = float(tp + tn) / dist.size 101 | return tpr, fpr, acc 102 | 103 | 104 | def calculate_val(thresholds, embeddings1, embeddings2, actual_issame, far_target, nrof_folds=10): 105 | ''' 106 | Copy from [insightface](https://github.com/deepinsight/insightface) 107 | :param thresholds: 108 | :param embeddings1: 109 | :param embeddings2: 110 | :param actual_issame: 111 | :param far_target: 112 | :param nrof_folds: 113 | :return: 114 | ''' 115 | assert (embeddings1.shape[0] == embeddings2.shape[0]) 116 | assert (embeddings1.shape[1] == embeddings2.shape[1]) 117 | nrof_pairs = min(len(actual_issame), embeddings1.shape[0]) 118 | nrof_thresholds = len(thresholds) 119 | k_fold = KFold(n_splits=nrof_folds, shuffle=False) 120 | 121 | val = np.zeros(nrof_folds) 122 | far = np.zeros(nrof_folds) 123 | 124 | diff = np.subtract(embeddings1, embeddings2) 125 | dist = np.sum(np.square(diff), 1) 126 | indices = np.arange(nrof_pairs) 127 | 128 | for fold_idx, (train_set, test_set) in enumerate(k_fold.split(indices)): 129 | 130 | # Find the threshold that gives FAR = far_target 131 | far_train = np.zeros(nrof_thresholds) 132 | for threshold_idx, threshold in enumerate(thresholds): 133 | _, far_train[threshold_idx] = calculate_val_far(threshold, dist[train_set], actual_issame[train_set]) 134 | if np.max(far_train) >= far_target: 135 | f = interpolate.interp1d(far_train, thresholds, kind='slinear') 136 | threshold = f(far_target) 137 | else: 138 | threshold = 0.0 139 | 140 | val[fold_idx], far[fold_idx] = calculate_val_far(threshold, dist[test_set], actual_issame[test_set]) 141 | 142 | val_mean = np.mean(val) 143 | far_mean = np.mean(far) 144 | val_std = np.std(val) 145 | return val_mean, val_std, far_mean 146 | 147 | 148 | def calculate_val_far(threshold, dist, actual_issame): 149 | predict_issame = np.less(dist, threshold) 150 | true_accept = np.sum(np.logical_and(predict_issame, actual_issame)) 151 | false_accept = np.sum(np.logical_and(predict_issame, np.logical_not(actual_issame))) 152 | n_same = np.sum(actual_issame) 153 | n_diff = np.sum(np.logical_not(actual_issame)) 154 | val = float(true_accept) / float(n_same) 155 | far = float(false_accept) / float(n_diff) 156 | return val, far 157 | 158 | 159 | def evaluate(embeddings, actual_issame, nrof_folds=10, pca=0): 160 | # Calculate evaluation metrics 161 | thresholds = np.arange(0, 4, 0.01) 162 | embeddings1 = embeddings[0::2] 163 | embeddings2 = embeddings[1::2] 164 | tpr, fpr, accuracy = calculate_roc(thresholds, embeddings1, embeddings2, 165 | np.asarray(actual_issame), nrof_folds=nrof_folds, pca=pca) 166 | thresholds = np.arange(0, 4, 0.001) 167 | val, val_std, far = calculate_val(thresholds, embeddings1, embeddings2, 168 | np.asarray(actual_issame), 1e-3, nrof_folds=nrof_folds) 169 | return tpr, fpr, accuracy, val, val_std, far 170 | 171 | 172 | def data_iter(datasets, batch_size): 173 | data_num = datasets.shape[0] 174 | for i in range(0, data_num, batch_size): 175 | yield datasets[i:min(i+batch_size, data_num), ...] 176 | 177 | 178 | def test(data_set, sess, embedding_tensor, batch_size, label_shape=None, feed_dict=None, input_placeholder=None): 179 | ''' 180 | referenc official implementation [insightface](https://github.com/deepinsight/insightface) 181 | :param data_set: 182 | :param sess: 183 | :param embedding_tensor: 184 | :param batch_size: 185 | :param label_shape: 186 | :param feed_dict: 187 | :param input_placeholder: 188 | :return: 189 | ''' 190 | print('testing verification..') 191 | data_list = data_set[0] 192 | issame_list = data_set[1] 193 | embeddings_list = [] 194 | time_consumed = 0.0 195 | for i in range(len(data_list)): 196 | datas = data_list[i] 197 | embeddings = None 198 | feed_dict.setdefault(input_placeholder, None) 199 | for idx, data in enumerate(data_iter(datas, batch_size)): 200 | data_tmp = data.copy() # fix issues #4 201 | data_tmp -= 127.5 202 | data_tmp *= 0.0078125 203 | feed_dict[input_placeholder] = data_tmp 204 | time0 = datetime.datetime.now() 205 | _embeddings = sess.run(embedding_tensor, feed_dict) 206 | time_now = datetime.datetime.now() 207 | diff = time_now - time0 208 | time_consumed += diff.total_seconds() 209 | if embeddings is None: 210 | embeddings = np.zeros((datas.shape[0], _embeddings.shape[1])) 211 | try: 212 | embeddings[idx*batch_size:min((idx+1)*batch_size, datas.shape[0]), ...] = _embeddings 213 | except ValueError: 214 | print('idx*batch_size value is %d min((idx+1)*batch_size, datas.shape[0]) %d, batch_size %d, data.shape[0] %d' % 215 | (idx*batch_size, min((idx+1)*batch_size, datas.shape[0]), batch_size, datas.shape[0])) 216 | print('embedding shape is ', _embeddings.shape) 217 | embeddings_list.append(embeddings) 218 | 219 | _xnorm = 0.0 220 | _xnorm_cnt = 0 221 | for embed in embeddings_list: 222 | for i in range(embed.shape[0]): 223 | _em = embed[i] 224 | _norm = np.linalg.norm(_em) 225 | # print(_em.shape, _norm) 226 | _xnorm += _norm 227 | _xnorm_cnt += 1 228 | _xnorm /= _xnorm_cnt 229 | 230 | acc1 = 0.0 231 | std1 = 0.0 232 | embeddings = embeddings_list[0] + embeddings_list[1] 233 | embeddings = sklearn.preprocessing.normalize(embeddings) 234 | print(embeddings.shape) 235 | print('infer time', time_consumed) 236 | _, _, accuracy, val, val_std, far = evaluate(embeddings, issame_list, nrof_folds=10) 237 | acc2, std2 = np.mean(accuracy), np.std(accuracy) 238 | return acc1, std1, acc2, std2, _xnorm, embeddings_list 239 | 240 | 241 | def ver_test(ver_list, ver_name_list, nbatch, sess, embedding_tensor, batch_size, feed_dict, input_placeholder): 242 | results = [] 243 | for i in range(len(ver_list)): 244 | acc1, std1, acc2, std2, xnorm, embeddings_list = test(data_set=ver_list[i], sess=sess, embedding_tensor=embedding_tensor, 245 | batch_size=batch_size, feed_dict=feed_dict, 246 | input_placeholder=input_placeholder) 247 | print('[%s][%d]XNorm: %f' % (ver_name_list[i], nbatch, xnorm)) 248 | print('[%s][%d]Accuracy-Flip: %1.5f+-%1.5f' % (ver_name_list[i], nbatch, acc2, std2)) 249 | results.append(acc2) 250 | return results 251 | --------------------------------------------------------------------------------