├── LICENSE
├── README.md
├── data
    ├── __init__.py
    ├── eval_data_reader.py
    └── mx2tfrecords.py
├── eval_ckpt_file.py
├── figures
    ├── lfw_250k.png
    ├── lfw_310k.png
    ├── lfw_730k.png
    └── model_a_170k.png
├── losses
    ├── __init__.py
    └── face_losses.py
├── nets
    ├── L_Resnet_E_IR.py
    ├── L_Resnet_E_IR_GBN.py
    ├── L_Resnet_E_IR_MGPU.py
    ├── L_Resnet_E_IR_RBN.py
    ├── L_Resnet_E_IR_fix_issue9.py
    ├── __init__.py
    ├── imagenet_classes.py
    ├── nets_utils.py
    ├── networks.py
    ├── readme.md
    ├── resnet.py
    ├── tl_layers_modify.py
    ├── vgg16.py
    └── vgg19.py
├── test
    ├── benchmark
    │   ├── README.md
    │   ├── __init__.py
    │   ├── gluon_batchsize_test.py
    │   ├── mxnet_batchsize_test.py
    │   ├── resnet_slim_benchmark.py
    │   ├── resnet_tl_benchmark.py
    │   ├── tensorlayer_batchsize_test.py
    │   ├── utils_final.py
    │   ├── vgg19_slim_benchmark.py
    │   └── vgg19_tl_benchmark.py
    ├── memory_usage_test.py
    ├── multiple_gpu_test
    │   ├── __init__.py
    │   ├── test_mgpu_mnist.py
    │   └── test_tensorlayer.py
    ├── resnet_test_static.py
    └── test_losses.py
├── train_nets.py
├── train_nets_mgpu.py
├── train_nets_mgpu_new.py
└── verification.py


/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2018 Jiankang Deng and Jia Guo
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | ## Insight Face in TensorFlow
  2 | 
  3 | #### Tasks
  4 | * ~~mxnet dataset to tfrecords~~
  5 | * ~~backbone network architectures [vgg16, vgg19, resnet]~~
  6 | * ~~backbone network architectures [resnet-se, resnext]~~
  7 | * ~~LResNet50E-IR~~
  8 | * ~~LResNet100E-IR~~
  9 | * ~~Additive Angular Margin Loss~~
 10 | * ~~CosineFace Loss~~
 11 | * ~~train network code~~
 12 | * ~~add validate during training~~
 13 | * ~~multi-gpu training~~
 14 | * ~~combine losses~~ contributed by RogerLo.
 15 | * evaluate code
 16 | 
 17 | 
 18 | #### Training Tips(Continual updates)
 19 | * If you can't use large batch size(>128), you should use small learning rate
 20 | * If you can't use large batch size(>128), you can try batch renormalization(file `L_Resnet_E_IR_RBN.py`)
 21 | * If use multiple gpus, you should keep at least 16 images each gpu.
 22 | * Try [Group Normalization](https://arxiv.org/pdf/1803.08494.pdf), you can use the code `L_Resnet_E_IR_GBN.py`
 23 | * Using the current model, and the lr schedule in `train_nets.py`, you can get the results as `model c`
 24 | * The bug about model size is 1.6G have fixed based on issues #9. If you want to get a small model, you should use `L_Resnet_E_IR_fix_issues9.py`
 25 | * multi-gpu training code's bug have fixed. If you want to use the correct version, you should use `train_nets_mgpu_new.py`
 26 | 
 27 | 
 28 | #### Training models (Continual updates)
 29 | 
 30 | ##### model A
 31 | | model name    | depth| normalization layer |batch size| total_steps | download | password |
 32 | | ----- |:-----:|:-----:|:-----:|:-----:|:-----:|:-----:|
 33 | | model A | 50 |group normalization|16| 1060k |[model a](https://pan.baidu.com/s/1qWrDCTFlQXlFcBR-dqR-6A)|2q72|
 34 | 
 35 | ###### accuracy
 36 | | dbname | accuracy |
 37 | | ----- |:-----:|
 38 | | lfw |0.9897|
 39 | | cfp_ff |0.9876|
 40 | | cfp_fp |0.84357|
 41 | | age_db30 |0.914|
 42 | 
 43 | 
 44 | ##### model B
 45 | | model name    | depth| normalization layer |batch size| total_steps| download | password |
 46 | | ----- |:-----:|:-----:|:-----:|:-----:|:-----:|:-----:|
 47 | | model B | 50 |batch normalization|16| 1100k |[model_b](https://pan.baidu.com/s/11KDqOkF4ThO7mnQQaNO9bA) |h6ai|
 48 | 
 49 | ###### accuracy
 50 | | dbname | accuracy |
 51 | | ----- |:-----:|
 52 | | lfw |0.9933|
 53 | | cfp_ff |0.99357|
 54 | | cfp_fp |0.8766|
 55 | | age_db30 |0.9342|
 56 | 
 57 | 
 58 | 
 59 | ##### model C
 60 | | model name    | depth| normalization layer |batch size| total_steps| download | password |
 61 | | ----- |:-----:|:-----:|:-----:|:-----:|:-----:|:-----:|
 62 | | model C | 50 |batch normalization|16| 1950k |[model_c](https://pan.baidu.com/s/1ZlDcQPBh0znduSH6vQ_Q8Q) |8mdi|
 63 | 
 64 | ###### accuracy
 65 | | dbname | accuracy |
 66 | | ----- |:-----:|
 67 | | lfw |0.9963|
 68 | | cfp_ff |0.99586|
 69 | | cfp_fp |0.9087|
 70 | | age_db30 |0.96367|
 71 | 
 72 | 
 73 | ##### model D
 74 | | model name    | depth| normalization layer |batch size| total_steps| model_size| download | password |
 75 | | ----- |:-----:|:-----:|:-----:|:-----:|:-----:|:-----:|:-----:|
 76 | | model D | 50 |batch normalization|136| 710k | 348.9MB |[model_d](https://pan.baidu.com/s/1tQYMqfbG36wg1cXKHVoMVw) |amdt|
 77 | 
 78 | ###### accuracy
 79 | | dbname | accuracy |
 80 | | ----- |:-----:|
 81 | | lfw |0.9968|
 82 | | cfp_ff |0.9973|
 83 | | cfp_fp |0.9271|
 84 | | age_db30 |0.9725|
 85 | 
 86 | 
 87 | 
 88 | #### Requirements
 89 | 1. TensorFlow 1.4 1.6
 90 | 2. TensorLayer 1.7
 91 | 3. cuda8&cudnn6 or cuda9&cudnn7
 92 | 4. Python3
 93 | 
 94 | 
 95 | #### Max Batch Size Test
 96 | ###### Environment
 97 | 
 98 | | GPU    | cuda| cudnn | TensorFlow |TensorLayer|Maxnet |Gluon|
 99 | | ----- |:-----:|:-----:|:------:|:---:|:------:|:---:|
100 | | Titan xp | 9.0 |7.0|1.6|1.7 |1.1.0|1.1.0 |
101 | 
102 | ###### Results
103 | 
104 | | DL Tools        | Max BatchSize(without bn and prelu)| Max BatchSize(with bn only) | Max BatchSize(with prelu only) |Max BatchSize(with bn and prelu)|
105 | | ------------- |:-------------:|:--------------:|:------------:|:------------:|
106 | | TensorLayer      | (8000, 9000) |(5000, 6000)|(3000, 4000)|(2000, 3000) |
107 | | Mxnet      | (40000, 50000) |(20000, 30000)|(20000, 30000)|(10000, 20000) |
108 | | Gluon      | (7000, 8000) |(3000, 4000)|no official method| None |
109 | 
110 | > (8000, 9000) : 8000 without OOM, 9000 OOM Error
111 | 
112 | ###### Test Code
113 | 
114 | |TensorLayer| Maxnet | Gluon |
115 | | ----- |:-----:|:-----:|
116 | | [tensorlayer_batchsize_test.py](https://github.com/auroua/InsightFace_TF/blob/master/test/benchmark/tensorlayer_batchsize_test.py) | [mxnet_batchsize_test.py](https://github.com/auroua/InsightFace_TF/blob/master/test/benchmark/mxnet_batchsize_test.py) |[gluon_batchsize_test.py](https://github.com/auroua/InsightFace_TF/blob/master/test/benchmark/gluon_batchsize_test.py)|
117 | 
118 | 
119 | 
120 | #### pretrained model download link
121 | * [resnet_v1_50](http://download.tensorflow.org/models/resnet_v1_50_2016_08_28.tar.gz)
122 | * [resnet_v1_101](http://download.tensorflow.org/models/resnet_v1_101_2016_08_28.tar.gz)
123 | * [resnet_v1_152](http://download.tensorflow.org/models/resnet_v1_152_2016_08_28.tar.gz)
124 | * [vgg16](http://www.cs.toronto.edu/~frossard/post/vgg16/)
125 | * [vgg19](https://github.com/machrisaa/tensorflow-vgg)
126 | 
127 | 
128 | #### References
129 | 1. [InsightFace mxnet](https://github.com/deepinsight/insightface)
130 | 2. [InsightFace : Additive Angular Margin Loss for Deep Face Recognition](https://arxiv.org/abs/1801.07698)
131 | 3. [Group Normalization](https://arxiv.org/pdf/1803.08494.pdf)
132 | 3. [tensorlayer_vgg16](https://github.com/tensorlayer/tensorlayer/blob/master/example/tutorial_vgg16.py)
133 | 4. [tensorlayer_vgg19](https://github.com/tensorlayer/tensorlayer/blob/master/example/tutorial_vgg19.py)
134 | 5. [tf_slim](https://github.com/tensorflow/models/tree/master/research/slim)
135 | 6. [Deep Residual Learning for Image Recognition](https://arxiv.org/abs/1512.03385)
136 | 7. [Very Deep Convolutional Networks For Large-Scale Image Recognition](https://arxiv.org/abs/1409.1556)
137 | 8. [Squeeze-and-Excitation Networks](https://arxiv.org/pdf/1709.01507.pdf)


--------------------------------------------------------------------------------
/data/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/auroua/InsightFace_TF/6ffe4296460bdfea56f91521db6d6412a89249d9/data/__init__.py


--------------------------------------------------------------------------------
/data/eval_data_reader.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | import numpy as np
  3 | import pickle
  4 | import argparse
  5 | import os
  6 | import mxnet as mx
  7 | import cv2
  8 | import io
  9 | import PIL.Image
 10 | import mxnet.ndarray as nd
 11 | 
 12 | 
 13 | def get_parser():
 14 |     parser = argparse.ArgumentParser(description='evluation data parser')
 15 |     parser.add_argument('--eval_datasets', default=['lfw', 'cfp_ff', 'cfp_fp', 'agedb_30'], help='evluation datasets')
 16 |     # parser.add_argument('--eval_datasets', default=['cfp_fp'], help='evluation datasets')
 17 |     parser.add_argument('--eval_db_path', default='../datasets/faces_ms1m_112x112', help='evluate datasets base path')
 18 |     parser.add_argument('--image_size', default=[112, 112], help='the image size')
 19 |     parser.add_argument('--tfrecords_file_path', default='../datasets/tfrecords/eval', help='the image size')
 20 |     parser.add_argument('--db_base_path', default='../datasets/faces_ms1m_112x112', help='the image size')
 21 |     args = parser.parse_args()
 22 |     return args
 23 | 
 24 | 
 25 | def load_bin(path, image_size):
 26 |     '''
 27 |     :param path: the input file path
 28 |     :param image_size: the input image size
 29 |     :return: the returned datasets is opencv format BGR  [112, 112, 3]
 30 |     '''
 31 |     bins, issame_list = pickle.load(open(path, 'rb'), encoding='bytes')
 32 |     issame_list_int = list(map(int, issame_list))
 33 |     data_list = []
 34 |     for _ in [0, 1]:
 35 |         data = np.zeros(shape=[len(issame_list)*2, *image_size, 3])
 36 |         data_list.append(data)
 37 |     for i in range(len(issame_list)*2):
 38 |         _bin = bins[i]
 39 |         tf_images = tf.image.decode_jpeg(_bin)
 40 |         tf_images = tf.reshape(tf_images, shape=(112, 112, 3))
 41 |         sess = tf.Session()
 42 |         images = sess.run(tf_images)
 43 |         img_cv = cv2.cvtColor(images, cv2.COLOR_RGB2BGR)
 44 |         print(np.min(img_cv), np.max(img_cv), img_cv.dtype)
 45 |         cv2.imshow('test', img_cv)
 46 |         cv2.waitKey(0)
 47 |         for flip in [0,1]:
 48 |             if flip == 1:
 49 |                 # print(i, flip)
 50 |                 img_cv = np.fliplr(img_cv)
 51 |                 # cv2.imshow('test', img_cv)
 52 |                 # cv2.waitKey(0)
 53 |             data_list[flip][i][:] = img_cv
 54 |         i += 1
 55 |         if i % 1000 == 0:
 56 |             print('loading bin', i)
 57 |     print(data_list[0].shape)
 58 |     return data_list, issame_list
 59 | 
 60 | 
 61 | def mx2tfrecords(imgidx, imgrec, args):
 62 |     output_path = os.path.join(args.tfrecords_file_path, 'tran.tfrecords')
 63 |     writer = tf.python_io.TFRecordWriter(output_path)
 64 |     for i in imgidx:
 65 |         img_info = imgrec.read_idx(i)
 66 |         header, img = mx.recordio.unpack(img_info)
 67 |         encoded_jpg_io = io.BytesIO(img)
 68 |         image = PIL.Image.open(encoded_jpg_io)
 69 |         np_img = np.array(image)
 70 |         img = cv2.cvtColor(np_img, cv2.COLOR_RGB2BGR)
 71 |         img_raw = img.tobytes()
 72 |         label = int(header.label)
 73 |         example = tf.train.Example(features=tf.train.Features(feature={
 74 |             'image_raw': tf.train.Feature(bytes_list=tf.train.BytesList(value=[img_raw])),
 75 |             "label": tf.train.Feature(int64_list=tf.train.Int64List(value=[label]))
 76 |         }))
 77 |         writer.write(example.SerializeToString())  # Serialize To String
 78 |         if i % 10000 == 0:
 79 |             print('%d num image processed' % i)
 80 |     writer.close()
 81 | 
 82 | 
 83 | def mx2tfrecords_eval_data(args, db_name):
 84 |     '''
 85 |     Change evaluation data to tfrecords
 86 |     :param args:
 87 |     :param type: lfw, ......
 88 |     :return:
 89 |     '''
 90 |     bins, issame_list = pickle.load(open(os.path.join(args.db_base_path, db_name+'.bin'), 'rb'), encoding='bytes')
 91 |     output_image_path = os.path.join(args.tfrecords_file_path, db_name+'_eval_data.tfrecords')
 92 |     writer_img = tf.python_io.TFRecordWriter(output_image_path)
 93 |     for i in range(len(bins)):
 94 |         img_info = bins[i]
 95 |         img = mx.image.imdecode(img_info).asnumpy()
 96 |         img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
 97 |         img_b = img.tobytes()
 98 |         # # decode test
 99 |         # sess = tf.Session()
100 |         # img_2 = tf.decode_raw(img_b, out_type=tf.uint8)
101 |         # img_2 = tf.reshape(img_2, shape=(112, 112, 3))
102 |         # img_2 = tf.image.flip_left_right(img_2)
103 |         # img_2_np = sess.run(img_2)
104 |         # print(img_2_np.shape)
105 |         # cv2.imshow('test', img_2_np)
106 |         # cv2.waitKey(0)
107 |         example = tf.train.Example(features=tf.train.Features(feature={
108 |             'image_raw': tf.train.Feature(bytes_list=tf.train.BytesList(value=[img_b]))
109 |         }))
110 |         writer_img.write(example.SerializeToString())  # Serialize To String
111 |         if i % 1000 == 0:
112 |             print('%d num image processed' % i)
113 |     writer_img.close()
114 | 
115 | 
116 | def load_bin(db_name, image_size, args):
117 |     bins, issame_list = pickle.load(open(os.path.join(args.eval_db_path, db_name+'.bin'), 'rb'), encoding='bytes')
118 |     data_list = []
119 |     for _ in [0,1]:
120 |         data = np.empty((len(issame_list)*2, image_size[0], image_size[1], 3))
121 |         data_list.append(data)
122 |     for i in range(len(issame_list)*2):
123 |         _bin = bins[i]
124 |         img = mx.image.imdecode(_bin).asnumpy()
125 |         img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
126 |         for flip in [0,1]:
127 |             if flip == 1:
128 |                 img = np.fliplr(img)
129 |             data_list[flip][i, ...] = img
130 |         i += 1
131 |         if i % 1000 == 0:
132 |             print('loading bin', i)
133 |     print(data_list[0].shape)
134 |     return data_list, issame_list
135 | 
136 | 
137 | if __name__ == '__main__':
138 |     args = get_parser()
139 |     ver_list = []
140 |     ver_name_list = []
141 |     for db in args.eval_datasets:
142 |         print('begin db %s convert.' % db)
143 |         # mx2tfrecords_eval_data(args, db)
144 |         data_set = load_bin(db, args.image_size)


--------------------------------------------------------------------------------
/data/mx2tfrecords.py:
--------------------------------------------------------------------------------
  1 | import mxnet as mx
  2 | import argparse
  3 | import PIL.Image
  4 | import io
  5 | import numpy as np
  6 | import cv2
  7 | import tensorflow as tf
  8 | import os
  9 | 
 10 | 
 11 | def parse_args():
 12 |     parser = argparse.ArgumentParser(
 13 |         formatter_class=argparse.ArgumentDefaultsHelpFormatter,
 14 |         description='data path information'
 15 |     )
 16 |     parser.add_argument('--bin_path', default='../datasets/faces_ms1m_112x112/train.rec', type=str,
 17 |                         help='path to the binary image file')
 18 |     parser.add_argument('--idx_path', default='../datasets/faces_ms1m_112x112/train.idx', type=str,
 19 |                         help='path to the image index path')
 20 |     parser.add_argument('--tfrecords_file_path', default='../datasets/tfrecords', type=str,
 21 |                         help='path to the output of tfrecords file path')
 22 |     args = parser.parse_args()
 23 |     return args
 24 | 
 25 | 
 26 | def mx2tfrecords_old(imgidx, imgrec, args):
 27 |     output_path = os.path.join(args.tfrecords_file_path, 'tran.tfrecords')
 28 |     writer = tf.python_io.TFRecordWriter(output_path)
 29 |     for i in imgidx:
 30 |         img_info = imgrec.read_idx(i)
 31 |         header, img = mx.recordio.unpack(img_info)
 32 |         encoded_jpg_io = io.BytesIO(img)
 33 |         image = PIL.Image.open(encoded_jpg_io)
 34 |         np_img = np.array(image)
 35 |         img = cv2.cvtColor(np_img, cv2.COLOR_RGB2BGR)
 36 |         img_raw = img.tobytes()
 37 |         label = int(header.label)
 38 |         example = tf.train.Example(features=tf.train.Features(feature={
 39 |             'image_raw': tf.train.Feature(bytes_list=tf.train.BytesList(value=[img_raw])),
 40 |             "label": tf.train.Feature(int64_list=tf.train.Int64List(value=[label]))
 41 |         }))
 42 |         writer.write(example.SerializeToString())  # Serialize To String
 43 |         if i % 10000 == 0:
 44 |             print('%d num image processed' % i)
 45 |     writer.close()
 46 | 
 47 | 
 48 | def mx2tfrecords(imgidx, imgrec, args):
 49 |     output_path = os.path.join(args.tfrecords_file_path, 'tran.tfrecords')
 50 |     writer = tf.python_io.TFRecordWriter(output_path)
 51 |     for i in imgidx:
 52 |         img_info = imgrec.read_idx(i)
 53 |         header, img = mx.recordio.unpack(img_info)
 54 |         label = int(header.label)
 55 |         example = tf.train.Example(features=tf.train.Features(feature={
 56 |             'image_raw': tf.train.Feature(bytes_list=tf.train.BytesList(value=[img])),
 57 |             "label": tf.train.Feature(int64_list=tf.train.Int64List(value=[label]))
 58 |         }))
 59 |         writer.write(example.SerializeToString())  # Serialize To String
 60 |         if i % 10000 == 0:
 61 |             print('%d num image processed' % i)
 62 |     writer.close()
 63 | 
 64 | 
 65 | def parse_function(example_proto):
 66 |     features = {'image_raw': tf.FixedLenFeature([], tf.string),
 67 |                 'label': tf.FixedLenFeature([], tf.int64)}
 68 |     features = tf.parse_single_example(example_proto, features)
 69 |     # You can do more image distortion here for training data
 70 |     img = tf.image.decode_jpeg(features['image_raw'])
 71 |     img = tf.reshape(img, shape=(112, 112, 3))
 72 |     r, g, b = tf.split(img, num_or_size_splits=3, axis=-1)
 73 |     img = tf.concat([b, g, r], axis=-1)
 74 |     img = tf.cast(img, dtype=tf.float32)
 75 |     img = tf.subtract(img, 127.5)
 76 |     img = tf.multiply(img,  0.0078125)
 77 |     img = tf.image.random_flip_left_right(img)
 78 |     label = tf.cast(features['label'], tf.int64)
 79 |     return img, label
 80 | 
 81 | 
 82 | if __name__ == '__main__':
 83 |     # # define parameters
 84 |     # id2range = {}
 85 |     # data_shape = (3, 112, 112)
 86 |     args = parse_args()
 87 |     # imgrec = mx.recordio.MXIndexedRecordIO(args.idx_path, args.bin_path, 'r')
 88 |     # s = imgrec.read_idx(0)
 89 |     # header, _ = mx.recordio.unpack(s)
 90 |     # print(header.label)
 91 |     # imgidx = list(range(1, int(header.label[0])))
 92 |     # seq_identity = range(int(header.label[0]), int(header.label[1]))
 93 |     # for identity in seq_identity:
 94 |     #     s = imgrec.read_idx(identity)
 95 |     #     header, _ = mx.recordio.unpack(s)
 96 |     #     a, b = int(header.label[0]), int(header.label[1])
 97 |     #     id2range[identity] = (a, b)
 98 |     # print('id2range', len(id2range))
 99 | 
100 |     # # generate tfrecords
101 |     # mx2tfrecords(imgidx, imgrec, args)
102 | 
103 |     config = tf.ConfigProto(allow_soft_placement=True)
104 |     sess = tf.Session(config=config)
105 |     # training datasets api config
106 |     tfrecords_f = os.path.join(args.tfrecords_file_path, 'tran.tfrecords')
107 |     dataset = tf.data.TFRecordDataset(tfrecords_f)
108 |     dataset = dataset.map(parse_function)
109 |     dataset = dataset.shuffle(buffer_size=30000)
110 |     dataset = dataset.batch(32)
111 |     iterator = dataset.make_initializable_iterator()
112 |     next_element = iterator.get_next()
113 |     # begin iteration
114 |     for i in range(1000):
115 |         sess.run(iterator.initializer)
116 |         while True:
117 |             try:
118 |                 images, labels = sess.run(next_element)
119 |                 cv2.imshow('test', images[1, ...])
120 |                 cv2.waitKey(0)
121 |             except tf.errors.OutOfRangeError:
122 |                 print("End of dataset")
123 | 
124 | 
125 | 
126 | 
127 | 


--------------------------------------------------------------------------------
/eval_ckpt_file.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | import argparse
 3 | from data.eval_data_reader import load_bin
 4 | from losses.face_losses import arcface_loss
 5 | from nets.L_Resnet_E_IR import get_resnet
 6 | import tensorlayer as tl
 7 | from verification import ver_test
 8 | 
 9 | 
10 | def get_args():
11 |     parser = argparse.ArgumentParser(description='input information')
12 |     parser.add_argument('--ckpt_file', default='/home/aurora/workspaces2018/InsightFace_TF/output/ckpt_model_c/InsightFace_iter_best_',
13 |                        type=str, help='the ckpt file path')
14 |     # parser.add_argument('--eval_datasets', default=['lfw', 'cfp_ff', 'cfp_fp', 'agedb_30'], help='evluation datasets')
15 |     parser.add_argument('--eval_datasets', default=['agedb_30'], help='evluation datasets')
16 |     parser.add_argument('--eval_db_path', default='./datasets/faces_ms1m_112x112', help='evluate datasets base path')
17 |     parser.add_argument('--image_size', default=[112, 112], help='the image size')
18 |     parser.add_argument('--net_depth', default=50, help='resnet depth, default is 50')
19 |     parser.add_argument('--num_output', default=85164, help='the image size')
20 |     parser.add_argument('--batch_size', default=32, help='batch size to train network')
21 |     parser.add_argument('--ckpt_index_list',
22 |                         default=['1950000.ckpt'], help='ckpt file indexes')
23 |     args = parser.parse_args()
24 |     return args
25 | 
26 | 
27 | if __name__ == '__main__':
28 |     args = get_args()
29 |     ver_list = []
30 |     ver_name_list = []
31 |     for db in args.eval_datasets:
32 |         print('begin db %s convert.' % db)
33 |         data_set = load_bin(db, args.image_size, args)
34 |         ver_list.append(data_set)
35 |         ver_name_list.append(db)
36 | 
37 |     images = tf.placeholder(name='img_inputs', shape=[None, *args.image_size, 3], dtype=tf.float32)
38 |     labels = tf.placeholder(name='img_labels', shape=[None, ], dtype=tf.int64)
39 |     dropout_rate = tf.placeholder(name='dropout_rate', dtype=tf.float32)
40 | 
41 |     w_init_method = tf.contrib.layers.xavier_initializer(uniform=False)
42 |     net = get_resnet(images, args.net_depth, type='ir', w_init=w_init_method, trainable=False, keep_rate=dropout_rate)
43 |     embedding_tensor = net.outputs
44 |     # mv_mean = tl.layers.get_variables_with_name('resnet_v1_50/bn0/moving_mean', False, True)[0]
45 |     # 3.2 get arcface loss
46 |     logit = arcface_loss(embedding=net.outputs, labels=labels, w_init=w_init_method, out_num=args.num_output)
47 | 
48 |     sess = tf.Session()
49 |     saver = tf.train.Saver()
50 | 
51 |     result_index = []
52 |     for file_index in args.ckpt_index_list:
53 |         feed_dict_test = {}
54 |         path = args.ckpt_file + file_index
55 |         saver.restore(sess, path)
56 |         print('ckpt file %s restored!' % file_index)
57 |         feed_dict_test.update(tl.utils.dict_to_one(net.all_drop))
58 |         feed_dict_test[dropout_rate] = 1.0
59 |         results = ver_test(ver_list=ver_list, ver_name_list=ver_name_list, nbatch=0, sess=sess,
60 |                            embedding_tensor=embedding_tensor, batch_size=args.batch_size, feed_dict=feed_dict_test,
61 |                            input_placeholder=images)
62 |         result_index.append(results)
63 |     print(result_index)
64 | 
65 | 


--------------------------------------------------------------------------------
/figures/lfw_250k.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/auroua/InsightFace_TF/6ffe4296460bdfea56f91521db6d6412a89249d9/figures/lfw_250k.png


--------------------------------------------------------------------------------
/figures/lfw_310k.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/auroua/InsightFace_TF/6ffe4296460bdfea56f91521db6d6412a89249d9/figures/lfw_310k.png


--------------------------------------------------------------------------------
/figures/lfw_730k.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/auroua/InsightFace_TF/6ffe4296460bdfea56f91521db6d6412a89249d9/figures/lfw_730k.png


--------------------------------------------------------------------------------
/figures/model_a_170k.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/auroua/InsightFace_TF/6ffe4296460bdfea56f91521db6d6412a89249d9/figures/model_a_170k.png


--------------------------------------------------------------------------------
/losses/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/auroua/InsightFace_TF/6ffe4296460bdfea56f91521db6d6412a89249d9/losses/__init__.py


--------------------------------------------------------------------------------
/losses/face_losses.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | import math
  3 | 
  4 | 
  5 | def arcface_loss(embedding, labels, out_num, w_init=None, s=64., m=0.5):
  6 |     '''
  7 |     :param embedding: the input embedding vectors
  8 |     :param labels:  the input labels, the shape should be eg: (batch_size, 1)
  9 |     :param s: scalar value default is 64
 10 |     :param out_num: output class num
 11 |     :param m: the margin value, default is 0.5
 12 |     :return: the final cacualted output, this output is send into the tf.nn.softmax directly
 13 |     '''
 14 |     cos_m = math.cos(m)
 15 |     sin_m = math.sin(m)
 16 |     mm = sin_m * m  # issue 1
 17 |     threshold = math.cos(math.pi - m)
 18 |     with tf.variable_scope('arcface_loss'):
 19 |         # inputs and weights norm
 20 |         embedding_norm = tf.norm(embedding, axis=1, keep_dims=True)
 21 |         embedding = tf.div(embedding, embedding_norm, name='norm_embedding')
 22 |         weights = tf.get_variable(name='embedding_weights', shape=(embedding.get_shape().as_list()[-1], out_num),
 23 |                                   initializer=w_init, dtype=tf.float32)
 24 |         weights_norm = tf.norm(weights, axis=0, keep_dims=True)
 25 |         weights = tf.div(weights, weights_norm, name='norm_weights')
 26 |         # cos(theta+m)
 27 |         cos_t = tf.matmul(embedding, weights, name='cos_t')
 28 |         cos_t2 = tf.square(cos_t, name='cos_2')
 29 |         sin_t2 = tf.subtract(1., cos_t2, name='sin_2')
 30 |         sin_t = tf.sqrt(sin_t2, name='sin_t')
 31 |         cos_mt = s * tf.subtract(tf.multiply(cos_t, cos_m), tf.multiply(sin_t, sin_m), name='cos_mt')
 32 | 
 33 |         # this condition controls the theta+m should in range [0, pi]
 34 |         #      0<=theta+m<=pi
 35 |         #     -m<=theta<=pi-m
 36 |         cond_v = cos_t - threshold
 37 |         cond = tf.cast(tf.nn.relu(cond_v, name='if_else'), dtype=tf.bool)
 38 | 
 39 |         keep_val = s*(cos_t - mm)
 40 |         cos_mt_temp = tf.where(cond, cos_mt, keep_val)
 41 | 
 42 |         mask = tf.one_hot(labels, depth=out_num, name='one_hot_mask')
 43 |         # mask = tf.squeeze(mask, 1)
 44 |         inv_mask = tf.subtract(1., mask, name='inverse_mask')
 45 | 
 46 |         s_cos_t = tf.multiply(s, cos_t, name='scalar_cos_t')
 47 | 
 48 |         output = tf.add(tf.multiply(s_cos_t, inv_mask), tf.multiply(cos_mt_temp, mask), name='arcface_loss_output')
 49 |     return output
 50 | 
 51 | 
 52 | def cosineface_losses(embedding, labels, out_num, w_init=None, s=30., m=0.4):
 53 |     '''
 54 |     :param embedding: the input embedding vectors
 55 |     :param labels:  the input labels, the shape should be eg: (batch_size, 1)
 56 |     :param s: scalar value, default is 30
 57 |     :param out_num: output class num
 58 |     :param m: the margin value, default is 0.4
 59 |     :return: the final cacualted output, this output is send into the tf.nn.softmax directly
 60 |     '''
 61 |     with tf.variable_scope('cosineface_loss'):
 62 |         # inputs and weights norm
 63 |         embedding_norm = tf.norm(embedding, axis=1, keep_dims=True)
 64 |         embedding = tf.div(embedding, embedding_norm, name='norm_embedding')
 65 |         weights = tf.get_variable(name='embedding_weights', shape=(embedding.get_shape().as_list()[-1], out_num),
 66 |                                   initializer=w_init, dtype=tf.float32)
 67 |         weights_norm = tf.norm(weights, axis=0, keep_dims=True)
 68 |         weights = tf.div(weights, weights_norm, name='norm_weights')
 69 |         # cos_theta - m
 70 |         cos_t = tf.matmul(embedding, weights, name='cos_t')
 71 |         cos_t_m = tf.subtract(cos_t, m, name='cos_t_m')
 72 | 
 73 |         mask = tf.one_hot(labels, depth=out_num, name='one_hot_mask')
 74 |         inv_mask = tf.subtract(1., mask, name='inverse_mask')
 75 | 
 76 |         output = tf.add(s * tf.multiply(cos_t, inv_mask), s * tf.multiply(cos_t_m, mask), name='cosineface_loss_output')
 77 |     return output
 78 | 
 79 | 
 80 | def combine_loss_val(embedding, labels, w_init, out_num, margin_a, margin_m, margin_b, s):
 81 |     '''
 82 |     This code is contributed by RogerLo. Thanks for you contribution.
 83 | 
 84 |     :param embedding: the input embedding vectors
 85 |     :param labels:  the input labels, the shape should be eg: (batch_size, 1)
 86 |     :param s: scalar value default is 64
 87 |     :param out_num: output class num
 88 |     :param m: the margin value, default is 0.5
 89 |     :return: the final cacualted output, this output is send into the tf.nn.softmax directly
 90 |     '''
 91 |     weights = tf.get_variable(name='embedding_weights', shape=(embedding.get_shape().as_list()[-1], out_num),
 92 |                               initializer=w_init, dtype=tf.float32)
 93 |     weights_unit = tf.nn.l2_normalize(weights, axis=0)
 94 |     embedding_unit = tf.nn.l2_normalize(embedding, axis=1)
 95 |     cos_t = tf.matmul(embedding_unit, weights_unit)
 96 |     ordinal = tf.constant(list(range(0, embedding.get_shape().as_list()[0])), tf.int64)
 97 |     ordinal_y = tf.stack([ordinal, labels], axis=1)
 98 |     zy = cos_t * s
 99 |     sel_cos_t = tf.gather_nd(zy, ordinal_y)
100 |     if margin_a != 1.0 or margin_m != 0.0 or margin_b != 0.0:
101 |         if margin_a == 1.0 and margin_m == 0.0:
102 |             s_m = s * margin_b
103 |             new_zy = sel_cos_t - s_m
104 |         else:
105 |             cos_value = sel_cos_t / s
106 |             t = tf.acos(cos_value)
107 |             if margin_a != 1.0:
108 |                 t = t * margin_a
109 |             if margin_m > 0.0:
110 |                 t = t + margin_m
111 |             body = tf.cos(t)
112 |             if margin_b > 0.0:
113 |                 body = body - margin_b
114 |             new_zy = body * s
115 |     updated_logits = tf.add(zy, tf.scatter_nd(ordinal_y, tf.subtract(new_zy, sel_cos_t), zy.get_shape()))
116 |     loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(labels=labels, logits=updated_logits))
117 |     predict_cls = tf.argmax(updated_logits, 1)
118 |     accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.cast(predict_cls, tf.int64), tf.cast(labels, tf.int64)), 'float'))
119 |     predict_cls_s = tf.argmax(zy, 1)
120 |     accuracy_s = tf.reduce_mean(tf.cast(tf.equal(tf.cast(predict_cls_s, tf.int64), tf.cast(labels, tf.int64)), 'float'))
121 |     return zy, loss, accuracy, accuracy_s, predict_cls_s


--------------------------------------------------------------------------------
/nets/L_Resnet_E_IR_GBN.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | import tensorlayer as tl
  3 | from tensorflow.contrib.layers.python.layers import utils
  4 | import collections
  5 | from tensorlayer.layers import Layer, list_remove_repeat
  6 | from tl_layers_modify import GroupNormLayer
  7 | 
  8 | 
  9 | class ElementwiseLayer(Layer):
 10 |     """
 11 |     The :class:`ElementwiseLayer` class combines multiple :class:`Layer` which have the same output shapes by a given elemwise-wise operation.
 12 | 
 13 |     Parameters
 14 |     ----------
 15 |     layer : a list of :class:`Layer` instances
 16 |         The `Layer` class feeding into this layer.
 17 |     combine_fn : a TensorFlow elemwise-merge function
 18 |         e.g. AND is ``tf.minimum`` ;  OR is ``tf.maximum`` ; ADD is ``tf.add`` ; MUL is ``tf.multiply`` and so on.
 19 |         See `TensorFlow Math API <https://www.tensorflow.org/versions/master/api_docs/python/math_ops.html#math>`_ .
 20 |     name : a string or None
 21 |         An optional name to attach to this layer.
 22 |     """
 23 |     def __init__(
 24 |         self,
 25 |         layer = [],
 26 |         combine_fn = tf.minimum,
 27 |         name ='elementwise_layer',
 28 |         act = None,
 29 |     ):
 30 |         Layer.__init__(self, name=name)
 31 | 
 32 |         if act:
 33 |             print("  [TL] ElementwiseLayer %s: size:%s fn:%s, act:%s" % (
 34 |             self.name, layer[0].outputs.get_shape(), combine_fn.__name__, act.__name__))
 35 |         else:
 36 |             print("  [TL] ElementwiseLayer %s: size:%s fn:%s" % (
 37 |             self.name, layer[0].outputs.get_shape(), combine_fn.__name__))
 38 | 
 39 |         self.outputs = layer[0].outputs
 40 |         # print(self.outputs._shape, type(self.outputs._shape))
 41 |         for l in layer[1:]:
 42 |             # assert str(self.outputs.get_shape()) == str(l.outputs.get_shape()), "Hint: the input shapes should be the same. %s != %s" %  (self.outputs.get_shape() , str(l.outputs.get_shape()))
 43 |             self.outputs = combine_fn(self.outputs, l.outputs, name=name)
 44 |         if act:
 45 |             self.outputs = act(self.outputs)
 46 |         self.all_layers = list(layer[0].all_layers)
 47 |         self.all_params = list(layer[0].all_params)
 48 |         self.all_drop = dict(layer[0].all_drop)
 49 | 
 50 |         for i in range(1, len(layer)):
 51 |             self.all_layers.extend(list(layer[i].all_layers))
 52 |             self.all_params.extend(list(layer[i].all_params))
 53 |             self.all_drop.update(dict(layer[i].all_drop))
 54 | 
 55 |         self.all_layers = list_remove_repeat(self.all_layers)
 56 |         self.all_params = list_remove_repeat(self.all_params)
 57 | 
 58 | 
 59 | def subsample(inputs, factor, scope=None):
 60 |     if factor == 1:
 61 |         return inputs
 62 |     else:
 63 |         return tl.layers.MaxPool2d(inputs, [1, 1], strides=(factor, factor), name=scope)
 64 | 
 65 | 
 66 | def conv2d_same(inputs, num_outputs, kernel_size, strides, rate=1, w_init=None, scope=None, trainable=None):
 67 |     '''
 68 |     Reference slim resnet
 69 |     :param inputs:
 70 |     :param num_outputs:
 71 |     :param kernel_size:
 72 |     :param strides:
 73 |     :param rate:
 74 |     :param scope:
 75 |     :return:
 76 |     '''
 77 |     if strides == 1:
 78 |         if rate == 1:
 79 |             nets = tl.layers.Conv2d(inputs, n_filter=num_outputs, filter_size=(kernel_size, kernel_size), b_init=None,
 80 |                                    strides=(strides, strides), W_init=w_init, act=None, padding='SAME', name=scope,
 81 |                                     use_cudnn_on_gpu=True)
 82 |             nets = GroupNormLayer(layer=nets, act=tf.identity, name=scope+'_bn/GroupNorm')
 83 |         else:
 84 |             nets = tl.layers.AtrousConv2dLayer(inputs, n_filter=num_outputs, filter_size=(kernel_size, kernel_size),
 85 |                                                rate=rate, act=None, W_init=w_init, padding='SAME', name=scope)
 86 |             nets = GroupNormLayer(layer=nets, act=tf.identity, name=scope+'_bn/GroupNorm')
 87 |         return nets
 88 |     else:
 89 |         kernel_size_effective = kernel_size + (kernel_size - 1) * (rate - 1)
 90 |         pad_total = kernel_size_effective - 1
 91 |         pad_beg = pad_total // 2
 92 |         pad_end = pad_total - pad_beg
 93 |         inputs = tl.layers.PadLayer(inputs, [[0, 0], [pad_beg, pad_end], [pad_beg, pad_end], [0, 0]], name='padding_%s' % scope)
 94 |         if rate == 1:
 95 |             nets = tl.layers.Conv2d(inputs, n_filter=num_outputs, filter_size=(kernel_size, kernel_size), b_init=None,
 96 |                                     strides=(strides, strides), W_init=w_init, act=None, padding='VALID', name=scope,
 97 |                                     use_cudnn_on_gpu=True)
 98 |             nets = GroupNormLayer(layer=nets, act=tf.identity, name=scope+'_bn/GroupNorm')
 99 |         else:
100 |             nets = tl.layers.AtrousConv2dLayer(inputs, n_filter=num_outputs, filter_size=(kernel_size, kernel_size), b_init=None,
101 |                                               rate=rate, act=None, W_init=w_init, padding='SAME', name=scope)
102 |             nets = GroupNormLayer(layer=nets, act=tf.identity, name=scope+'_bn/GroupNorm')
103 |         return nets
104 | 
105 | 
106 | def bottleneck(inputs, depth, depth_bottleneck, stride, rate=1, scope=None):
107 |     with tf.variable_scope(scope, 'bottleneck_v1') as sc:
108 |         depth_in = utils.last_dimension(inputs.outputs.get_shape(), min_rank=4)
109 |         if depth == depth_in:
110 |             shortcut = subsample(inputs, stride, 'shortcut')
111 |         else:
112 |             shortcut = tl.layers.Conv2d(inputs, depth, filter_size=(1, 1), strides=(stride, stride), act=None,
113 |                                         b_init=None, name='shortcut_conv')
114 |             shortcut = GroupNormLayer(layer=shortcut, act=tf.identity, name='shortcut_bn/BatchNorm')
115 |         # bottleneck layer 1
116 |         residual = tl.layers.Conv2d(inputs, depth_bottleneck, filter_size=(1, 1), strides=(1, 1), act=None, b_init=None,
117 |                                     name='conv1')
118 |         residual = GroupNormLayer(layer=residual, act=tf.nn.relu, name='conv1_bn/BatchNorm')
119 | 
120 |         # bottleneck layer 2
121 |         residual = conv2d_same(residual, depth_bottleneck, kernel_size=3, strides= stride, rate=rate, scope='conv2')
122 | 
123 |         # bottleneck layer 3
124 |         residual = tl.layers.Conv2d(residual, depth, filter_size=(1, 1), strides=(1, 1), act=None, b_init=None,
125 |                                     name='conv3')
126 |         residual = GroupNormLayer(layer=residual, act=tf.identity, name='conv3_bn/BatchNorm',
127 |                                   scale_init=tf.constant_initializer(0.0))
128 |         output = ElementwiseLayer(layer=[shortcut, residual],
129 |                                   combine_fn=tf.add,
130 |                                   name='combine_layer',
131 |                                   act=tf.nn.relu)
132 |         return output
133 | 
134 | 
135 | def bottleneck_IR(inputs, depth, depth_bottleneck, stride, rate=1, w_init=None, scope=None, trainable=None):
136 |     with tf.variable_scope(scope, 'bottleneck_v1') as sc:
137 |         depth_in = utils.last_dimension(inputs.outputs.get_shape(), min_rank=4)
138 |         if depth == depth_in:
139 |             shortcut = subsample(inputs, stride, 'shortcut')
140 |         else:
141 |             shortcut = tl.layers.Conv2d(inputs, depth, filter_size=(1, 1), strides=(stride, stride), act=None,
142 |                                         W_init=w_init, b_init=None, name='shortcut_conv', use_cudnn_on_gpu=True)
143 |             shortcut = GroupNormLayer(layer=shortcut, act=tf.identity, name='shortcut_bn/BatchNorm')
144 |         # bottleneck layer 1
145 |         residual = GroupNormLayer(layer=inputs, act=tf.identity, name='conv1_bn1')
146 |         residual = tl.layers.Conv2d(residual, depth_bottleneck, filter_size=(3, 3), strides=(1, 1), act=None, b_init=None,
147 |                                     W_init=w_init, name='conv1', use_cudnn_on_gpu=True)
148 |         residual = GroupNormLayer(layer=residual, act=tf.identity, name='conv1_bn2')
149 |         # bottleneck prelu
150 |         residual = tl.layers.PReluLayer(residual)
151 |         # bottleneck layer 2
152 |         residual = conv2d_same(residual, depth, kernel_size=3, strides=stride, rate=rate, w_init=w_init, scope='conv2', trainable=trainable)
153 |         output = ElementwiseLayer(layer=[shortcut, residual],
154 |                                   combine_fn=tf.add,
155 |                                   name='combine_layer',
156 |                                   act=None)
157 |         return output
158 | 
159 | 
160 | def bottleneck_IR_SE(inputs, depth, depth_bottleneck, stride, rate=1, w_init=None, scope=None, trainable=None):
161 |     with tf.variable_scope(scope, 'bottleneck_v1') as sc:
162 |         depth_in = utils.last_dimension(inputs.outputs.get_shape(), min_rank=4)
163 |         if depth == depth_in:
164 |             shortcut = subsample(inputs, stride, 'shortcut')
165 |         else:
166 |             shortcut = tl.layers.Conv2d(inputs, depth, filter_size=(1, 1), strides=(stride, stride), act=None,
167 |                                         W_init=w_init, b_init=None, name='shortcut_conv', use_cudnn_on_gpu=True)
168 |             shortcut = GroupNormLayer(layer=shortcut, act=tf.identity, name='shortcut_bn/BatchNorm')
169 |         residual = GroupNormLayer(layer=inputs, act=tf.identity, name='conv1_bn1')
170 |         residual = tl.layers.Conv2d(residual, depth_bottleneck, filter_size=(3, 3), strides=(1, 1), act=None, b_init=None,
171 |                                     W_init=w_init, name='conv1', use_cudnn_on_gpu=True)
172 |         residual = GroupNormLayer(layer=residual, act=tf.identity, name='conv1_bn2')
173 |         # bottleneck prelu
174 |         residual = tl.layers.PReluLayer(residual)
175 |         # bottleneck layer 2
176 |         residual = conv2d_same(residual, depth, kernel_size=3, strides=stride, rate=rate, w_init=w_init, scope='conv2', trainable=trainable)
177 |         # squeeze
178 |         squeeze = tl.layers.InputLayer(tf.reduce_mean(residual.outputs, axis=[1, 2]), name='squeeze_layer')
179 |         # excitation
180 |         excitation1 = tl.layers.DenseLayer(squeeze, n_units=int(depth/16.0), act=tf.nn.relu,
181 |                                            W_init=w_init, name='excitation_1')
182 |         # excitation1 = tl.layers.PReluLayer(excitation1, name='excitation_prelu')
183 |         excitation2 = tl.layers.DenseLayer(excitation1, n_units=depth, act=tf.nn.sigmoid,
184 |                                            W_init=w_init, name='excitation_2')
185 |         # scale
186 |         scale = tl.layers.ReshapeLayer(excitation2, shape=[tf.shape(excitation2.outputs)[0], 1, 1, depth], name='excitation_reshape')
187 | 
188 |         residual_se = ElementwiseLayer(layer=[residual, scale],
189 |                                        combine_fn=tf.multiply,
190 |                                        name='scale_layer',
191 |                                        act=None)
192 | 
193 |         output = ElementwiseLayer(layer=[shortcut, residual_se],
194 |                                   combine_fn=tf.add,
195 |                                   name='combine_layer',
196 |                                   act=tf.nn.relu)
197 |         return output
198 | 
199 | 
200 | def resnet(inputs, bottle_neck, blocks, w_init=None, trainable=None, scope=None):
201 |     with tf.variable_scope(scope):
202 |         net_inputs = tl.layers.InputLayer(inputs, name='input_layer')
203 |         if bottle_neck:
204 |             net = tl.layers.Conv2d(net_inputs, n_filter=64, filter_size=(3, 3), strides=(1, 1),
205 |                                    act=None, W_init=w_init, b_init=None, name='conv1', use_cudnn_on_gpu=True)
206 |             net = GroupNormLayer(layer=net, act=tf.identity, name='group_norm_0')
207 |             net = tl.layers.PReluLayer(net, name='prelu0')
208 |         else:
209 |             raise ValueError('The standard resnet must support the bottleneck layer')
210 |         for block in blocks:
211 |             with tf.variable_scope(block.scope):
212 |                 for i, var in enumerate(block.args):
213 |                     with tf.variable_scope('unit_%d' % (i+1)):
214 |                         net = block.unit_fn(net, depth=var['depth'], depth_bottleneck=var['depth_bottleneck'],
215 |                                             w_init=w_init, stride=var['stride'], rate=var['rate'], scope=None,
216 |                                             trainable=trainable)
217 |         net = GroupNormLayer(layer=net, act=tf.identity, name='E_GN_0')
218 |         net = tl.layers.DropoutLayer(net, keep=0.4, name='E_Dropout')
219 |         net_shape = net.outputs.get_shape()
220 |         net = tl.layers.ReshapeLayer(net, shape=[-1, net_shape[1]*net_shape[2]*net_shape[3]], name='E_Reshapelayer')
221 |         net = tl.layers.DenseLayer(net, n_units=512, W_init=w_init, name='E_DenseLayer')
222 |         # net = GroupNormLayer(layer=net, act=tf.identity, name='E_GN_1')
223 |         return net
224 | 
225 | 
226 | class Block(collections.namedtuple('Block', ['scope', 'unit_fn', 'args'])):
227 |     """A named tuple describing a ResNet block.
228 | 
229 |     Its parts are:
230 |       scope: The scope of the `Block`.
231 |       unit_fn: The ResNet unit function which takes as input a `Tensor` and
232 |         returns another `Tensor` with the output of the ResNet unit.
233 |       args: A list of length equal to the number of units in the `Block`. The list
234 |         contains one (depth, depth_bottleneck, stride) tuple for each unit in the
235 |         block to serve as argument to unit_fn.
236 |     """
237 | 
238 | 
239 | def resnetse_v1_block(scope, base_depth, num_units, stride, rate=1, unit_fn=None):
240 |   """Helper function for creating a resnet_v1 bottleneck block.
241 | 
242 |   Args:
243 |     scope: The scope of the block.
244 |     base_depth: The depth of the bottleneck layer for each unit.
245 |     num_units: The number of units in the block.
246 |     stride: The stride of the block, implemented as a stride in the last unit.
247 |       All other units have stride=1.
248 | 
249 |   Returns:
250 |     A resnet_v1 bottleneck block.
251 |   """
252 |   return Block(scope, unit_fn, [{
253 |       'depth': base_depth * 4,
254 |       'depth_bottleneck': base_depth,
255 |       'stride': stride,
256 |       'rate': rate
257 |   }] + [{
258 |       'depth': base_depth * 4,
259 |       'depth_bottleneck': base_depth,
260 |       'stride': 1,
261 |       'rate': rate
262 |   }] * (num_units - 1))
263 | 
264 | 
265 | def resnetse_v1_block_2(scope, base_depth, num_units, stride, rate=1, unit_fn=None):
266 |   """Helper function for creating a resnet_v1 bottleneck block.
267 | 
268 |   Args:
269 |     scope: The scope of the block.
270 |     base_depth: The depth of the bottleneck layer for each unit.
271 |     num_units: The number of units in the block.
272 |     stride: The stride of the block, implemented as a stride in the last unit.
273 |       All other units have stride=1.
274 | 
275 |   Returns:
276 |     A resnet_v1 bottleneck block.
277 |   """
278 |   return Block(scope, unit_fn, [{
279 |       'depth': base_depth * 4,
280 |       'depth_bottleneck': base_depth,
281 |       'stride': 1,
282 |       'rate': rate
283 |   }] * (num_units - 1) + [{
284 |       'depth': base_depth * 4,
285 |       'depth_bottleneck': base_depth,
286 |       'stride': stride,
287 |       'rate': rate
288 |   }])
289 | 
290 | 
291 | def get_resnet(inputs, num_layers, type=None, w_init=None, trainable=None, sess=None):
292 |     if type == 'ir':
293 |         unit_fn = bottleneck_IR
294 |     elif type == 'se_ir':
295 |         unit_fn = bottleneck_IR_SE
296 |     # elif type == 'resnet':
297 |     #     unit_fn = bottleneck
298 |     #     blocks = [
299 |     #         resnetse_v1_block_2('block1', base_depth=64, num_units=3, stride=2, rate=1, unit_fn=unit_fn),
300 |     #         resnetse_v1_block_2('block2', base_depth=128, num_units=4, stride=2, rate=1, unit_fn=unit_fn),
301 |     #         resnetse_v1_block_2('block3', base_depth=256, num_units=6, stride=2, rate=1, unit_fn=unit_fn),
302 |     #         resnetse_v1_block_2('block4', base_depth=512, num_units=3, stride=1, rate=1, unit_fn=unit_fn)
303 |     #     ]
304 |     else:
305 |         raise ValueError('the input fn is unknown')
306 | 
307 |     if num_layers == 50:
308 |         blocks = [
309 |             resnetse_v1_block('block1', base_depth=64, num_units=3, stride=2, rate=1, unit_fn=unit_fn),
310 |             resnetse_v1_block('block2', base_depth=128, num_units=4, stride=2, rate=1, unit_fn=unit_fn),
311 |             resnetse_v1_block('block3', base_depth=256, num_units=14, stride=2, rate=1, unit_fn=unit_fn),
312 |             resnetse_v1_block('block4', base_depth=512, num_units=3, stride=2, rate=1, unit_fn=unit_fn)
313 |         ]
314 |     elif num_layers == 101:
315 |         blocks = [
316 |             resnetse_v1_block('block1', base_depth=64, num_units=3, stride=2, rate=1, unit_fn=unit_fn),
317 |             resnetse_v1_block('block2', base_depth=128, num_units=13, stride=2, rate=1, unit_fn=unit_fn),
318 |             resnetse_v1_block('block3', base_depth=256, num_units=30, stride=2, rate=1, unit_fn=unit_fn),
319 |             resnetse_v1_block('block4', base_depth=512, num_units=3, stride=2, rate=1, unit_fn=unit_fn)
320 |         ]
321 |     elif num_layers == 152:
322 |         blocks = [
323 |             resnetse_v1_block('block1', base_depth=64, num_units=3, stride=2, rate=1, unit_fn=unit_fn),
324 |             resnetse_v1_block('block2', base_depth=128, num_units=8, stride=2, rate=1, unit_fn=unit_fn),
325 |             resnetse_v1_block('block3', base_depth=256, num_units=36, stride=2, rate=1, unit_fn=unit_fn),
326 |             resnetse_v1_block('block4', base_depth=512, num_units=3, stride=2, rate=1, unit_fn=unit_fn)
327 |         ]
328 |     else:
329 |         raise ValueError('Resnet layer %d is not supported now.' % num_layers)
330 |     net = resnet(inputs=inputs,
331 |                  bottle_neck=True,
332 |                  blocks=blocks,
333 |                  w_init=w_init,
334 |                  trainable=trainable,
335 |                  scope='resnet_v1_%d' % num_layers)
336 |     return net
337 | 
338 | 
339 | if __name__ == '__main__':
340 |         x = tf.placeholder(dtype=tf.float32, shape=[None, 112, 112, 3], name='input_place')
341 |         sess = tf.Session()
342 |         # w_init = tf.truncated_normal_initializer(mean=10, stddev=5e-2)
343 |         w_init = tf.contrib.layers.xavier_initializer(uniform=False)
344 |         # test resnetse
345 |         nets = get_resnet(x, 50, type='ir', w_init=w_init, sess=sess)
346 |         tl.layers.initialize_global_variables(sess)
347 | 
348 |         for p in tl.layers.get_variables_with_name('W_conv2d', True, True):
349 |             print(p.op.name)
350 |         print('##############'*30)
351 |         with sess:
352 |             nets.print_params()
353 | 


--------------------------------------------------------------------------------
/nets/L_Resnet_E_IR_MGPU.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | import tensorlayer as tl
  3 | from tensorflow.contrib.layers.python.layers import utils
  4 | import collections
  5 | from tl_layers_modify import ElementwiseLayer, BatchNormLayer, Conv2d, PReluLayer, DenseLayer
  6 | 
  7 | 
  8 | def subsample(inputs, factor, scope=None):
  9 |     if factor == 1:
 10 |         return inputs
 11 |     else:
 12 |         return tl.layers.MaxPool2d(inputs, [1, 1], strides=(factor, factor), name=scope)
 13 | 
 14 | 
 15 | def conv2d_same(inputs, num_outputs, kernel_size, strides, rate=1, w_init=None, scope=None, trainable=None):
 16 |     '''
 17 |     Reference slim resnet
 18 |     :param inputs:
 19 |     :param num_outputs:
 20 |     :param kernel_size:
 21 |     :param strides:
 22 |     :param rate:
 23 |     :param scope:
 24 |     :return:
 25 |     '''
 26 |     if strides == 1:
 27 |         if rate == 1:
 28 |             nets = Conv2d(inputs, n_filter=num_outputs, filter_size=(kernel_size, kernel_size), b_init=None,
 29 |                                    strides=(strides, strides), W_init=w_init, act=None, padding='SAME', name=scope,
 30 |                                     use_cudnn_on_gpu=True)
 31 |             nets = BatchNormLayer(nets, act=tf.identity, is_train=True, trainable=trainable, name=scope+'_bn/BatchNorm')
 32 |         else:
 33 |             nets = tl.layers.AtrousConv2dLayer(inputs, n_filter=num_outputs, filter_size=(kernel_size, kernel_size),
 34 |                                                rate=rate, act=None, W_init=w_init, padding='SAME', name=scope)
 35 |             nets = BatchNormLayer(nets, act=tf.identity, is_train=True, trainable=trainable, name=scope+'_bn/BatchNorm')
 36 |         return nets
 37 |     else:
 38 |         kernel_size_effective = kernel_size + (kernel_size - 1) * (rate - 1)
 39 |         pad_total = kernel_size_effective - 1
 40 |         pad_beg = pad_total // 2
 41 |         pad_end = pad_total - pad_beg
 42 |         inputs = tl.layers.PadLayer(inputs, [[0, 0], [pad_beg, pad_end], [pad_beg, pad_end], [0, 0]], name='padding_%s' % scope)
 43 |         if rate == 1:
 44 |             nets = Conv2d(inputs, n_filter=num_outputs, filter_size=(kernel_size, kernel_size), b_init=None,
 45 |                                     strides=(strides, strides), W_init=w_init, act=None, padding='VALID', name=scope,
 46 |                                     use_cudnn_on_gpu=True)
 47 |             nets = BatchNormLayer(nets, act=tf.identity, is_train=True, trainable=trainable, name=scope+'_bn/BatchNorm')
 48 |         else:
 49 |             nets = tl.layers.AtrousConv2dLayer(inputs, n_filter=num_outputs, filter_size=(kernel_size, kernel_size), b_init=None,
 50 |                                               rate=rate, act=None, W_init=w_init, padding='SAME', name=scope)
 51 |             nets = BatchNormLayer(nets, act=tf.identity, is_train=True, trainable=trainable, name=scope+'_bn/BatchNorm')
 52 |         return nets
 53 | 
 54 | 
 55 | def bottleneck_IR(inputs, depth, depth_bottleneck, stride, rate=1, w_init=None, scope=None, trainable=None):
 56 |     with tf.variable_scope(scope, 'bottleneck_v1') as sc:
 57 |         depth_in = utils.last_dimension(inputs.outputs.get_shape(), min_rank=4)
 58 |         if depth == depth_in:
 59 |             shortcut = subsample(inputs, stride, 'shortcut')
 60 |         else:
 61 |             shortcut = Conv2d(inputs, depth, filter_size=(1, 1), strides=(stride, stride), act=None,
 62 |                                         W_init=w_init, b_init=None, name='shortcut_conv', use_cudnn_on_gpu=True)
 63 |             shortcut = BatchNormLayer(shortcut, act=tf.identity, is_train=True, trainable=trainable, name='shortcut_bn/BatchNorm')
 64 |         # bottleneck layer 1
 65 |         residual = BatchNormLayer(inputs, act=tf.identity, is_train=True, trainable=trainable, name='conv1_bn1')
 66 |         residual = Conv2d(residual, depth_bottleneck, filter_size=(3, 3), strides=(1, 1), act=None, b_init=None,
 67 |                                     W_init=w_init, name='conv1', use_cudnn_on_gpu=True)
 68 |         residual = BatchNormLayer(residual, act=tf.identity, is_train=True, trainable=trainable, name='conv1_bn2')
 69 |         # bottleneck prelu
 70 |         residual = PReluLayer(residual)
 71 |         # bottleneck layer 2
 72 |         residual = conv2d_same(residual, depth, kernel_size=3, strides=stride, rate=rate, w_init=w_init, scope='conv2', trainable=trainable)
 73 |         output = ElementwiseLayer(layer=[shortcut, residual],
 74 |                                   combine_fn=tf.add,
 75 |                                   name='combine_layer',
 76 |                                   act=None)
 77 |         return output
 78 | 
 79 | 
 80 | def bottleneck_IR_SE(inputs, depth, depth_bottleneck, stride, rate=1, w_init=None, scope=None, trainable=None):
 81 |     with tf.variable_scope(scope, 'bottleneck_v1') as sc:
 82 |         depth_in = utils.last_dimension(inputs.outputs.get_shape(), min_rank=4)
 83 |         if depth == depth_in:
 84 |             shortcut = subsample(inputs, stride, 'shortcut')
 85 |         else:
 86 |             shortcut = Conv2d(inputs, depth, filter_size=(1, 1), strides=(stride, stride), act=None,
 87 |                                         W_init=w_init, b_init=None, name='shortcut_conv', use_cudnn_on_gpu=True)
 88 |             shortcut = BatchNormLayer(shortcut, act=tf.identity, is_train=True, trainable=trainable, name='shortcut_bn/BatchNorm')
 89 |         # bottleneck layer 1
 90 |         residual = BatchNormLayer(inputs, act=tf.identity, is_train=True, trainable=trainable, name='conv1_bn1')
 91 |         residual = Conv2d(residual, depth_bottleneck, filter_size=(3, 3), strides=(1, 1), act=None, b_init=None,
 92 |                                     W_init=w_init, name='conv1', use_cudnn_on_gpu=True)
 93 |         residual = BatchNormLayer(residual, act=tf.identity, is_train=True, trainable=trainable, name='conv1_bn2')
 94 |         # bottleneck prelu
 95 |         residual = PReluLayer(residual)
 96 |         # bottleneck layer 2
 97 |         residual = conv2d_same(residual, depth, kernel_size=3, strides=stride, rate=rate, w_init=w_init, scope='conv2', trainable=trainable)
 98 |         # squeeze
 99 |         squeeze = tl.layers.InputLayer(tf.reduce_mean(residual.outputs, axis=[1, 2]), name='squeeze_layer')
100 |         # excitation
101 |         excitation1 = DenseLayer(squeeze, n_units=int(depth/16.0), act=tf.nn.relu,
102 |                                            W_init=w_init, name='excitation_1')
103 |         # excitation1 = tl.layers.PReluLayer(excitation1, name='excitation_prelu')
104 |         excitation2 = DenseLayer(excitation1, n_units=depth, act=tf.nn.sigmoid,
105 |                                            W_init=w_init, name='excitation_2')
106 |         # scale
107 |         scale = tl.layers.ReshapeLayer(excitation2, shape=[tf.shape(excitation2.outputs)[0], 1, 1, depth], name='excitation_reshape')
108 | 
109 |         residual_se = ElementwiseLayer(layer=[residual, scale],
110 |                                        combine_fn=tf.multiply,
111 |                                        name='scale_layer',
112 |                                        act=None)
113 | 
114 |         output = ElementwiseLayer(layer=[shortcut, residual_se],
115 |                                   combine_fn=tf.add,
116 |                                   name='combine_layer',
117 |                                   act=tf.nn.relu)
118 |         return output
119 | 
120 | 
121 | def resnet(inputs, bottle_neck, blocks, w_init=None, trainable=None, keep_rate=None, scope=None):
122 |     with tf.variable_scope(scope):
123 |         net_inputs = tl.layers.InputLayer(inputs, name='input_layer')
124 |         if bottle_neck:
125 |             net = Conv2d(net_inputs, n_filter=64, filter_size=(3, 3), strides=(1, 1),
126 |                                    act=None, W_init=w_init, b_init=None, name='conv1', use_cudnn_on_gpu=True)
127 |             net = BatchNormLayer(net, act=tf.identity, name='bn0', is_train=True, trainable=trainable)
128 |             net = PReluLayer(net, name='prelu0')
129 |         else:
130 |             raise ValueError('The standard resnet must support the bottleneck layer')
131 |         for block in blocks:
132 |             with tf.variable_scope(block.scope):
133 |                 for i, var in enumerate(block.args):
134 |                     with tf.variable_scope('unit_%d' % (i+1)):
135 |                         net = block.unit_fn(net, depth=var['depth'], depth_bottleneck=var['depth_bottleneck'],
136 |                                             w_init=w_init, stride=var['stride'], rate=var['rate'], scope=None,
137 |                                             trainable=trainable)
138 |         net = BatchNormLayer(net, act=tf.identity, is_train=True, name='E_BN1', trainable=trainable)
139 |         net = tl.layers.DropoutLayer(net, keep=keep_rate, name='E_Dropout')
140 |         net_shape = net.outputs.get_shape()
141 |         net = tl.layers.ReshapeLayer(net, shape=[-1, net_shape[1]*net_shape[2]*net_shape[3]], name='E_Reshapelayer')
142 |         net = DenseLayer(net, n_units=512, W_init=w_init, name='E_DenseLayer')
143 |         net = BatchNormLayer(net, act=tf.identity, is_train=True, fix_gamma=False, trainable=trainable, name='E_BN2')
144 |         return net
145 | 
146 | 
147 | class Block(collections.namedtuple('Block', ['scope', 'unit_fn', 'args'])):
148 |     """A named tuple describing a ResNet block.
149 | 
150 |     Its parts are:
151 |       scope: The scope of the `Block`.
152 |       unit_fn: The ResNet unit function which takes as input a `Tensor` and
153 |         returns another `Tensor` with the output of the ResNet unit.
154 |       args: A list of length equal to the number of units in the `Block`. The list
155 |         contains one (depth, depth_bottleneck, stride) tuple for each unit in the
156 |         block to serve as argument to unit_fn.
157 |     """
158 | 
159 | 
160 | def resnetse_v1_block(scope, base_depth, num_units, stride, rate=1, unit_fn=None):
161 |   """Helper function for creating a resnet_v1 bottleneck block.
162 | 
163 |   Args:
164 |     scope: The scope of the block.
165 |     base_depth: The depth of the bottleneck layer for each unit.
166 |     num_units: The number of units in the block.
167 |     stride: The stride of the block, implemented as a stride in the last unit.
168 |       All other units have stride=1.
169 | 
170 |   Returns:
171 |     A resnet_v1 bottleneck block.
172 |   """
173 |   return Block(scope, unit_fn, [{
174 |       'depth': base_depth,
175 |       'depth_bottleneck': base_depth,
176 |       'stride': stride,
177 |       'rate': rate
178 |   }] + [{
179 |       'depth': base_depth,
180 |       'depth_bottleneck': base_depth,
181 |       'stride': 1,
182 |       'rate': rate
183 |   }] * (num_units - 1))
184 | 
185 | 
186 | def get_resnet(inputs, num_layers, type=None, w_init=None, trainable=None, keep_rate=None, sess=None):
187 |     if type == 'ir':
188 |         unit_fn = bottleneck_IR
189 |     elif type == 'se_ir':
190 |         unit_fn = bottleneck_IR_SE
191 |     else:
192 |         raise ValueError('the input fn is unknown')
193 | 
194 |     if num_layers == 50:
195 |         blocks = [
196 |             resnetse_v1_block('block1', base_depth=64, num_units=3, stride=2, rate=1, unit_fn=unit_fn),
197 |             resnetse_v1_block('block2', base_depth=128, num_units=4, stride=2, rate=1, unit_fn=unit_fn),
198 |             resnetse_v1_block('block3', base_depth=256, num_units=14, stride=2, rate=1, unit_fn=unit_fn),
199 |             resnetse_v1_block('block4', base_depth=512, num_units=3, stride=2, rate=1, unit_fn=unit_fn)
200 |         ]
201 |     elif num_layers == 100:
202 |         blocks = [
203 |             resnetse_v1_block('block1', base_depth=64, num_units=3, stride=2, rate=1, unit_fn=unit_fn),
204 |             resnetse_v1_block('block2', base_depth=128, num_units=13, stride=2, rate=1, unit_fn=unit_fn),
205 |             resnetse_v1_block('block3', base_depth=256, num_units=30, stride=2, rate=1, unit_fn=unit_fn),
206 |             resnetse_v1_block('block4', base_depth=512, num_units=3, stride=2, rate=1, unit_fn=unit_fn)
207 |         ]
208 |     elif num_layers == 152:
209 |         blocks = [
210 |             resnetse_v1_block('block1', base_depth=64, num_units=3, stride=2, rate=1, unit_fn=unit_fn),
211 |             resnetse_v1_block('block2', base_depth=128, num_units=8, stride=2, rate=1, unit_fn=unit_fn),
212 |             resnetse_v1_block('block3', base_depth=256, num_units=36, stride=2, rate=1, unit_fn=unit_fn),
213 |             resnetse_v1_block('block4', base_depth=512, num_units=3, stride=2, rate=1, unit_fn=unit_fn)
214 |         ]
215 |     else:
216 |         raise ValueError('Resnet layer %d is not supported now.' % num_layers)
217 |     net = resnet(inputs=inputs,
218 |                  bottle_neck=True,
219 |                  blocks=blocks,
220 |                  w_init=w_init,
221 |                  trainable=trainable,
222 |                  keep_rate=keep_rate,
223 |                  scope='resnet_v1_%d' % num_layers)
224 |     return net
225 | 
226 | 
227 | if __name__ == '__main__':
228 |         x = tf.placeholder(dtype=tf.float32, shape=[None, 112, 112, 3], name='input_place')
229 |         sess = tf.Session()
230 |         # w_init = tf.truncated_normal_initializer(mean=10, stddev=5e-2)
231 |         w_init = tf.contrib.layers.xavier_initializer(uniform=False)
232 |         # test resnetse
233 |         nets = get_resnet(x, 50, type='ir', w_init=w_init, sess=sess)
234 |         tl.layers.initialize_global_variables(sess)
235 | 
236 |         for p in tl.layers.get_variables_with_name('W_conv2d', True, True):
237 |             print(p.op.name)
238 |         print('##############'*30)
239 |         with sess:
240 |             nets.print_params()
241 | 


--------------------------------------------------------------------------------
/nets/L_Resnet_E_IR_RBN.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | import tensorlayer as tl
  3 | from tensorflow.contrib.layers.python.layers import utils
  4 | import collections
  5 | from tensorlayer.layers import Layer, list_remove_repeat
  6 | 
  7 | 
  8 | class ElementwiseLayer(Layer):
  9 |     """
 10 |     The :class:`ElementwiseLayer` class combines multiple :class:`Layer` which have the same output shapes by a given elemwise-wise operation.
 11 | 
 12 |     Parameters
 13 |     ----------
 14 |     layer : a list of :class:`Layer` instances
 15 |         The `Layer` class feeding into this layer.
 16 |     combine_fn : a TensorFlow elemwise-merge function
 17 |         e.g. AND is ``tf.minimum`` ;  OR is ``tf.maximum`` ; ADD is ``tf.add`` ; MUL is ``tf.multiply`` and so on.
 18 |         See `TensorFlow Math API <https://www.tensorflow.org/versions/master/api_docs/python/math_ops.html#math>`_ .
 19 |     name : a string or None
 20 |         An optional name to attach to this layer.
 21 |     """
 22 |     def __init__(
 23 |         self,
 24 |         layer = [],
 25 |         combine_fn = tf.minimum,
 26 |         name ='elementwise_layer',
 27 |         act = None,
 28 |     ):
 29 |         Layer.__init__(self, name=name)
 30 | 
 31 |         if act:
 32 |             print("  [TL] ElementwiseLayer %s: size:%s fn:%s, act:%s" % (
 33 |             self.name, layer[0].outputs.get_shape(), combine_fn.__name__, act.__name__))
 34 |         else:
 35 |             print("  [TL] ElementwiseLayer %s: size:%s fn:%s" % (
 36 |             self.name, layer[0].outputs.get_shape(), combine_fn.__name__))
 37 | 
 38 |         self.outputs = layer[0].outputs
 39 |         # print(self.outputs._shape, type(self.outputs._shape))
 40 |         for l in layer[1:]:
 41 |             # assert str(self.outputs.get_shape()) == str(l.outputs.get_shape()), "Hint: the input shapes should be the same. %s != %s" %  (self.outputs.get_shape() , str(l.outputs.get_shape()))
 42 |             self.outputs = combine_fn(self.outputs, l.outputs, name=name)
 43 |         if act:
 44 |             self.outputs = act(self.outputs)
 45 |         self.all_layers = list(layer[0].all_layers)
 46 |         self.all_params = list(layer[0].all_params)
 47 |         self.all_drop = dict(layer[0].all_drop)
 48 | 
 49 |         for i in range(1, len(layer)):
 50 |             self.all_layers.extend(list(layer[i].all_layers))
 51 |             self.all_params.extend(list(layer[i].all_params))
 52 |             self.all_drop.update(dict(layer[i].all_drop))
 53 | 
 54 |         self.all_layers = list_remove_repeat(self.all_layers)
 55 |         self.all_params = list_remove_repeat(self.all_params)
 56 | 
 57 | 
 58 | def subsample(inputs, factor, scope=None):
 59 |     if factor == 1:
 60 |         return inputs
 61 |     else:
 62 |         return tl.layers.MaxPool2d(inputs, [1, 1], strides=(factor, factor), name=scope)
 63 | 
 64 | 
 65 | def conv2d_same(inputs, num_outputs, kernel_size, strides, rate=1, w_init=None, scope=None, trainable=None):
 66 |     '''
 67 |     Reference slim resnet
 68 |     :param inputs:
 69 |     :param num_outputs:
 70 |     :param kernel_size:
 71 |     :param strides:
 72 |     :param rate:
 73 |     :param scope:
 74 |     :return:
 75 |     '''
 76 |     if strides == 1:
 77 |         if rate == 1:
 78 |             nets = tl.layers.Conv2d(inputs, n_filter=num_outputs, filter_size=(kernel_size, kernel_size), b_init=None,
 79 |                                    strides=(strides, strides), W_init=w_init, act=None, padding='SAME', name=scope,
 80 |                                     use_cudnn_on_gpu=True)
 81 |             nets.outputs = tf.layers.batch_normalization(inputs=nets.outputs,
 82 |                                                          momentum=0.9,
 83 |                                                          training=trainable,
 84 |                                                          renorm=True,
 85 |                                                          renorm_clipping={'rmax':3, 'rmin':0.3333,
 86 |                                                                           'dmax':5},
 87 |                                                          renorm_momentum=0.9,
 88 |                                                          name=scope+'_bn/BatchNorm')
 89 |         else:
 90 |             nets = tl.layers.AtrousConv2dLayer(inputs, n_filter=num_outputs, filter_size=(kernel_size, kernel_size),
 91 |                                                rate=rate, act=None, W_init=w_init, padding='SAME', name=scope)
 92 |             nets.outputs = tf.layers.batch_normalization(inputs=nets.outputs,
 93 |                                                          momentum=0.9,
 94 |                                                          training=trainable,
 95 |                                                          renorm=True,
 96 |                                                          renorm_clipping={'rmax':3, 'rmin':0.3333,
 97 |                                                                           'dmax':5},
 98 |                                                          renorm_momentum=0.9,
 99 |                                                          name=scope+'_bn/BatchNorm')
100 |         return nets
101 |     else:
102 |         kernel_size_effective = kernel_size + (kernel_size - 1) * (rate - 1)
103 |         pad_total = kernel_size_effective - 1
104 |         pad_beg = pad_total // 2
105 |         pad_end = pad_total - pad_beg
106 |         inputs = tl.layers.PadLayer(inputs, [[0, 0], [pad_beg, pad_end], [pad_beg, pad_end], [0, 0]], name='padding_%s' % scope)
107 |         if rate == 1:
108 |             nets = tl.layers.Conv2d(inputs, n_filter=num_outputs, filter_size=(kernel_size, kernel_size), b_init=None,
109 |                                     strides=(strides, strides), W_init=w_init, act=None, padding='VALID', name=scope,
110 |                                     use_cudnn_on_gpu=True)
111 |             nets.outputs = tf.layers.batch_normalization(inputs=nets.outputs,
112 |                                                          momentum=0.9,
113 |                                                          training=trainable,
114 |                                                          renorm=True,
115 |                                                          renorm_clipping={'rmax':3, 'rmin':0.3333,
116 |                                                                           'dmax':5},
117 |                                                          renorm_momentum=0.9,
118 |                                                          name=scope+'_bn/BatchNorm')
119 |         else:
120 |             nets = tl.layers.AtrousConv2dLayer(inputs, n_filter=num_outputs, filter_size=(kernel_size, kernel_size), b_init=None,
121 |                                               rate=rate, act=None, W_init=w_init, padding='SAME', name=scope)
122 |             nets.outputs = tf.layers.batch_normalization(inputs=nets.outputs,
123 |                                                          momentum=0.9,
124 |                                                          training=trainable,
125 |                                                          renorm=True,
126 |                                                          renorm_clipping={'rmax':3, 'rmin':0.3333,
127 |                                                                           'dmax':5},
128 |                                                          renorm_momentum=0.9,
129 |                                                          name=scope+'_bn/BatchNorm')
130 |         return nets
131 | 
132 | 
133 | def bottleneck_IR(inputs, depth, depth_bottleneck, stride, rate=1, w_init=None, scope=None, trainable=None):
134 |     with tf.variable_scope(scope, 'bottleneck_v1') as sc:
135 |         depth_in = utils.last_dimension(inputs.outputs.get_shape(), min_rank=4)
136 |         if depth == depth_in:
137 |             shortcut = subsample(inputs, stride, 'shortcut')
138 |         else:
139 |             shortcut = tl.layers.Conv2d(inputs, depth, filter_size=(1, 1), strides=(stride, stride), act=None,
140 |                                         W_init=w_init, b_init=None, name='shortcut_conv', use_cudnn_on_gpu=True)
141 |             shortcut.outputs = tf.layers.batch_normalization(inputs=shortcut.outputs,
142 |                                                              momentum=0.9,
143 |                                                              training=trainable,
144 |                                                              renorm=True,
145 |                                                              renorm_clipping={'rmax': 3, 'rmin': 0.3333,
146 |                                                                               'dmax': 5},
147 |                                                              renorm_momentum=0.9,
148 |                                                              name='shortcut_bn/BatchNorm')
149 |         # bottleneck layer 1
150 |         inputs.outputs = tf.layers.batch_normalization(inputs=inputs.outputs,
151 |                                                          momentum=0.9,
152 |                                                          training=trainable,
153 |                                                          renorm=True,
154 |                                                          renorm_clipping={'rmax': 3, 'rmin': 0.3333,
155 |                                                                           'dmax': 5},
156 |                                                          renorm_momentum=0.9,
157 |                                                          name='conv1_bn1')
158 |         residual = tl.layers.Conv2d(inputs, depth_bottleneck, filter_size=(3, 3), strides=(1, 1), act=None, b_init=None,
159 |                                     W_init=w_init, name='conv1', use_cudnn_on_gpu=True)
160 |         residual.outputs = tf.layers.batch_normalization(inputs=residual.outputs,
161 |                                                          momentum=0.9,
162 |                                                          training=trainable,
163 |                                                          renorm=True,
164 |                                                          renorm_clipping={'rmax': 3, 'rmin': 0.3333,
165 |                                                                           'dmax': 5},
166 |                                                          renorm_momentum=0.9,
167 |                                                          name='conv1_bn2')
168 |         # bottleneck prelu
169 |         residual = tl.layers.PReluLayer(residual)
170 |         # bottleneck layer 2
171 |         residual = conv2d_same(residual, depth, kernel_size=3, strides=stride, rate=rate, w_init=w_init, scope='conv2', trainable=trainable)
172 |         output = ElementwiseLayer(layer=[shortcut, residual],
173 |                                   combine_fn=tf.add,
174 |                                   name='combine_layer',
175 |                                   act=None)
176 |         return output
177 | 
178 | 
179 | def resnet(inputs, bottle_neck, blocks, w_init=None, trainable=None, scope=None):
180 |     with tf.variable_scope(scope):
181 |         # inputs = tf.subtract(inputs, 127.5)
182 |         # inputs = tf.multiply(inputs, 0.0078125)
183 |         net_inputs = tl.layers.InputLayer(inputs, name='input_layer')
184 |         if bottle_neck:
185 |             net = tl.layers.Conv2d(net_inputs, n_filter=64, filter_size=(3, 3), strides=(1, 1),
186 |                                    act=None, W_init=w_init, b_init=None, name='conv1', use_cudnn_on_gpu=True)
187 |             net.outputs = tf.layers.batch_normalization(inputs=net.outputs,
188 |                                                              momentum=0.9,
189 |                                                              training=trainable,
190 |                                                              renorm=True,
191 |                                                              renorm_clipping={'rmax': 3, 'rmin': 0.3333,
192 |                                                                               'dmax': 5},
193 |                                                              renorm_momentum=0.9,
194 |                                                              name='bn0')
195 |             net = tl.layers.PReluLayer(net, name='prelu0')
196 |         else:
197 |             raise ValueError('The standard resnet must support the bottleneck layer')
198 |         for block in blocks:
199 |             with tf.variable_scope(block.scope):
200 |                 for i, var in enumerate(block.args):
201 |                     with tf.variable_scope('unit_%d' % (i+1)):
202 |                         net = block.unit_fn(net, depth=var['depth'], depth_bottleneck=var['depth_bottleneck'],
203 |                                             w_init=w_init, stride=var['stride'], rate=var['rate'], scope=None,
204 |                                             trainable=trainable)
205 |         net.outputs = tf.layers.batch_normalization(inputs=net.outputs,
206 |                                                     momentum=0.9,
207 |                                                     training=trainable,
208 |                                                     renorm=True,
209 |                                                     renorm_clipping={'rmax': 3, 'rmin': 0.3333,
210 |                                                                      'dmax': 5},
211 |                                                     renorm_momentum=0.9,
212 |                                                     name='E_BN1')
213 |         net = tl.layers.DropoutLayer(net, keep=0.4, name='E_Dropout')
214 |         net_shape = net.outputs.get_shape()
215 |         net = tl.layers.ReshapeLayer(net, shape=[-1, net_shape[1]*net_shape[2]*net_shape[3]], name='E_Reshapelayer')
216 |         net = tl.layers.DenseLayer(net, n_units=512, W_init=w_init, name='E_DenseLayer')
217 |         net.outputs = tf.layers.batch_normalization(inputs=net.outputs,
218 |                                                     momentum=0.9,
219 |                                                     training=trainable,
220 |                                                     renorm=True,
221 |                                                     renorm_clipping={'rmax': 3, 'rmin': 0.3333,
222 |                                                                      'dmax': 5},
223 |                                                     renorm_momentum=0.9,
224 |                                                     name='E_BN2')
225 |         return net
226 | 
227 | 
228 | class Block(collections.namedtuple('Block', ['scope', 'unit_fn', 'args'])):
229 |     """A named tuple describing a ResNet block.
230 | 
231 |     Its parts are:
232 |       scope: The scope of the `Block`.
233 |       unit_fn: The ResNet unit function which takes as input a `Tensor` and
234 |         returns another `Tensor` with the output of the ResNet unit.
235 |       args: A list of length equal to the number of units in the `Block`. The list
236 |         contains one (depth, depth_bottleneck, stride) tuple for each unit in the
237 |         block to serve as argument to unit_fn.
238 |     """
239 | 
240 | 
241 | def resnetse_v1_block(scope, base_depth, num_units, stride, rate=1, unit_fn=None):
242 |   """Helper function for creating a resnet_v1 bottleneck block.
243 | 
244 |   Args:
245 |     scope: The scope of the block.
246 |     base_depth: The depth of the bottleneck layer for each unit.
247 |     num_units: The number of units in the block.
248 |     stride: The stride of the block, implemented as a stride in the last unit.
249 |       All other units have stride=1.
250 | 
251 |   Returns:
252 |     A resnet_v1 bottleneck block.
253 |   """
254 |   return Block(scope, unit_fn, [{
255 |       'depth': base_depth * 4,
256 |       'depth_bottleneck': base_depth,
257 |       'stride': stride,
258 |       'rate': rate
259 |   }] + [{
260 |       'depth': base_depth * 4,
261 |       'depth_bottleneck': base_depth,
262 |       'stride': 1,
263 |       'rate': rate
264 |   }] * (num_units - 1))
265 | 
266 | 
267 | def get_resnet(inputs, num_layers, type=None, w_init=None, trainable=None, sess=None):
268 |     if type == 'ir':
269 |         unit_fn = bottleneck_IR
270 |     # elif type == 'se_ir':
271 |     #     unit_fn = bottleneck_IR_SE
272 |     else:
273 |         raise ValueError('the input fn is unknown')
274 | 
275 |     if num_layers == 50:
276 |         blocks = [
277 |             resnetse_v1_block('block1', base_depth=64, num_units=3, stride=2, rate=1, unit_fn=unit_fn),
278 |             resnetse_v1_block('block2', base_depth=128, num_units=4, stride=2, rate=1, unit_fn=unit_fn),
279 |             resnetse_v1_block('block3', base_depth=256, num_units=14, stride=2, rate=1, unit_fn=unit_fn),
280 |             resnetse_v1_block('block4', base_depth=512, num_units=3, stride=2, rate=1, unit_fn=unit_fn)
281 |         ]
282 |     elif num_layers == 101:
283 |         blocks = [
284 |             resnetse_v1_block('block1', base_depth=64, num_units=3, stride=2, rate=1, unit_fn=unit_fn),
285 |             resnetse_v1_block('block2', base_depth=128, num_units=13, stride=2, rate=1, unit_fn=unit_fn),
286 |             resnetse_v1_block('block3', base_depth=256, num_units=30, stride=2, rate=1, unit_fn=unit_fn),
287 |             resnetse_v1_block('block4', base_depth=512, num_units=3, stride=2, rate=1, unit_fn=unit_fn)
288 |         ]
289 |     elif num_layers == 152:
290 |         blocks = [
291 |             resnetse_v1_block('block1', base_depth=64, num_units=3, stride=2, rate=1, unit_fn=unit_fn),
292 |             resnetse_v1_block('block2', base_depth=128, num_units=8, stride=2, rate=1, unit_fn=unit_fn),
293 |             resnetse_v1_block('block3', base_depth=256, num_units=36, stride=2, rate=1, unit_fn=unit_fn),
294 |             resnetse_v1_block('block4', base_depth=512, num_units=3, stride=2, rate=1, unit_fn=unit_fn)
295 |         ]
296 |     else:
297 |         raise ValueError('Resnet layer %d is not supported now.' % num_layers)
298 |     net = resnet(inputs=inputs,
299 |                  bottle_neck=True,
300 |                  blocks=blocks,
301 |                  w_init=w_init,
302 |                  trainable=trainable,
303 |                  scope='resnet_v1_%d' % num_layers)
304 |     return net
305 | 
306 | 
307 | if __name__ == '__main__':
308 |         x = tf.placeholder(dtype=tf.float32, shape=[None, 112, 112, 3], name='input_place')
309 |         sess = tf.Session()
310 |         # w_init = tf.truncated_normal_initializer(mean=10, stddev=5e-2)
311 |         w_init = tf.contrib.layers.xavier_initializer(uniform=False)
312 |         # test resnetse
313 |         nets = get_resnet(x, 50, type='ir', w_init=w_init, sess=sess)
314 |         tl.layers.initialize_global_variables(sess)
315 | 
316 |         for p in tl.layers.get_variables_with_name('W_conv2d', True, True):
317 |             print(p.op.name)
318 |         print('##############'*30)
319 |         with sess:
320 |             nets.print_params()
321 | 


--------------------------------------------------------------------------------
/nets/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/auroua/InsightFace_TF/6ffe4296460bdfea56f91521db6d6412a89249d9/nets/__init__.py


--------------------------------------------------------------------------------
/nets/nets_utils.py:
--------------------------------------------------------------------------------
 1 | from tensorflow.python import pywrap_tensorflow
 2 | import collections
 3 | import numpy as np
 4 | 
 5 | 
 6 | var_stat = collections.namedtuple('stats', ['mean', 'median', 'std'])
 7 | 
 8 | 
 9 | def get_variables_in_checkpoint_file(file_name):
10 |     try:
11 |         reader = pywrap_tensorflow.NewCheckpointReader(file_name)
12 |         var_to_shape_map = reader.get_variable_to_shape_map()
13 |         return var_to_shape_map
14 |     except Exception as e:  # pylint: disable=broad-except
15 |         print(str(e))
16 |         if "corrupted compressed block contents" in str(e):
17 |             print("It's likely that your checkpoint file has been compressed "
18 |                   "with SNAPPY.")
19 | 
20 | 
21 | def get_tensor_static_val(file_name, all_tensors, all_tensor_names):
22 |     reader = pywrap_tensorflow.NewCheckpointReader(file_name)
23 |     vars_dict = {}
24 |     if all_tensors or all_tensor_names:
25 |       var_to_shape_map = reader.get_variable_to_shape_map()
26 |       for key in sorted(var_to_shape_map):
27 |         if all_tensors:
28 |           vars_dict[key] = var_stat(np.mean(reader.get_tensor(key)), np.median(reader.get_tensor(key)),
29 |                                     np.std(reader.get_tensor(key)))
30 |     return vars_dict


--------------------------------------------------------------------------------
/nets/networks.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | from vgg16 import get_vgg16
 3 | from vgg19 import get_vgg19
 4 | 
 5 | 
 6 | def get_model(inputs, sess, type, pretrained=True):
 7 |     if type == 'vgg16':
 8 |         return get_vgg16(inputs, sess, pretrained)
 9 |     elif type == 'vgg19':
10 |         return get_vgg19(inputs, sess, pretrained)
11 | 
12 | 
13 | if __name__ == '__main__':
14 |     tfconfig = tf.ConfigProto(allow_soft_placement=True)
15 |     x = tf.placeholder(dtype=tf.float32, shape=[None, 224, 224, 3], name='inpust')
16 |     with tf.Session(config=tfconfig) as sess:
17 |         network = get_model(x, sess, type='vgg19', pretrained=True)
18 |         network.print_params()
19 |         network.print_layers()


--------------------------------------------------------------------------------
/nets/readme.md:
--------------------------------------------------------------------------------
1 | 1. `vgg16.py`
2 |    the vgg16 model, input is an image that with shape 224*224 and mean substract. The input should be first resized to 224*224, and then substract the mean. The channels of the input image is `RGB`.
3 | 2. 'vgg19.py'
4 |    the vgg19 model. The input should be first normalized to [0, 1], then resized to 224*224 and then do substract to mean, the channel of the input should be `RGB`.
5 | 3. 'resnet'
6 | 


--------------------------------------------------------------------------------
/nets/vgg16.py:
--------------------------------------------------------------------------------
  1 | #! /usr/bin/python
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | VGG-16 for ImageNet.
  5 | Introduction
  6 | ----------------
  7 | VGG is a convolutional neural network model proposed by K. Simonyan and A. Zisserman
  8 | from the University of Oxford in the paper “Very Deep Convolutional Networks for
  9 | Large-Scale Image Recognition”  . The model achieves 92.7% top-5 test accuracy in ImageNet,
 10 | which is a dataset of over 14 million images belonging to 1000 classes.
 11 | Download Pre-trained Model
 12 | ----------------------------
 13 | - Model weights in this example - vgg16_weights.npz : http://www.cs.toronto.edu/~frossard/post/vgg16/
 14 | - Caffe VGG 16 model : https://gist.github.com/ksimonyan/211839e770f7b538e2d8#file-readme-md
 15 | - Tool to convert the Caffe models to TensorFlow's : https://github.com/ethereon/caffe-tensorflow
 16 | Note
 17 | ------
 18 | - For simplified CNN layer see "Convolutional layer (Simplified)"
 19 | in read the docs website.
 20 | - When feeding other images to the model be sure to properly resize or crop them
 21 | beforehand. Distorted images might end up being misclassified. One way of safely
 22 | feeding images of multiple sizes is by doing center cropping, as shown in the
 23 | 
 24 | The input image type is
 25 |     from scipy.misc import imread, imresize
 26 |     img1 = imread('data/laska.png', mode='RGB')  # test data in github
 27 |     img1 = imresize(img1, (224, 224))
 28 | So the input image is three channels, and is RGB.
 29 | 
 30 | following snippet:
 31 | #   >>> image_h, image_w, _ = np.shape(img)
 32 | #   >>> shorter_side = min(image_h, image_w)
 33 | #   >>> scale = 224. / shorter_side
 34 | #   >>> image_h, image_w = np.ceil([scale * image_h, scale * image_w]).astype('int32')
 35 | #   >>> img = imresize(img, (image_h, image_w))
 36 | #   >>> crop_x = (image_w - 224) / 2
 37 | #   >>> crop_y = (image_h - 224) / 2
 38 | #   >>> img = img[crop_y:crop_y+224,crop_x:crop_x+224,:]
 39 | """
 40 | 
 41 | import tensorlayer as tl
 42 | from tensorlayer.layers import *
 43 | from scipy.misc import imread, imresize
 44 | from nets.imagenet_classes import *
 45 | import os
 46 | 
 47 | 
 48 | def _conv_layers(net_in):
 49 |     with tf.name_scope('preprocess'):
 50 |         # Notice that we include a preprocessing layer that takes the RGB image
 51 |         # with pixels values in the range of 0-255 and subtracts the mean image
 52 |         # values (calculated over the entire ImageNet training set).
 53 |         mean = tf.constant([123.68, 116.779, 103.939], dtype=tf.float32, shape=[1, 1, 1, 3], name='img_mean')
 54 |         net_in.outputs = net_in.outputs - mean
 55 | 
 56 |     # conv1
 57 |     network = Conv2dLayer(
 58 |         net_in,
 59 |         act=tf.nn.relu,
 60 |         shape=[3, 3, 3, 64],  # 64 features for each 3x3 patch
 61 |         strides=[1, 1, 1, 1],
 62 |         padding='SAME',
 63 |         name='conv1_1')
 64 |     network = Conv2dLayer(
 65 |         network,
 66 |         act=tf.nn.relu,
 67 |         shape=[3, 3, 64, 64],  # 64 features for each 3x3 patch
 68 |         strides=[1, 1, 1, 1],
 69 |         padding='SAME',
 70 |         name='conv1_2')
 71 |     network = PoolLayer(network, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', pool=tf.nn.max_pool, name='pool1')
 72 | 
 73 |     # conv2
 74 |     network = Conv2dLayer(
 75 |         network,
 76 |         act=tf.nn.relu,
 77 |         shape=[3, 3, 64, 128],  # 128 features for each 3x3 patch
 78 |         strides=[1, 1, 1, 1],
 79 |         padding='SAME',
 80 |         name='conv2_1')
 81 |     network = Conv2dLayer(
 82 |         network,
 83 |         act=tf.nn.relu,
 84 |         shape=[3, 3, 128, 128],  # 128 features for each 3x3 patch
 85 |         strides=[1, 1, 1, 1],
 86 |         padding='SAME',
 87 |         name='conv2_2')
 88 |     network = PoolLayer(network, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', pool=tf.nn.max_pool, name='pool2')
 89 | 
 90 |     # conv3
 91 |     network = Conv2dLayer(
 92 |         network,
 93 |         act=tf.nn.relu,
 94 |         shape=[3, 3, 128, 256],  # 256 features for each 3x3 patch
 95 |         strides=[1, 1, 1, 1],
 96 |         padding='SAME',
 97 |         name='conv3_1')
 98 |     network = Conv2dLayer(
 99 |         network,
100 |         act=tf.nn.relu,
101 |         shape=[3, 3, 256, 256],  # 256 features for each 3x3 patch
102 |         strides=[1, 1, 1, 1],
103 |         padding='SAME',
104 |         name='conv3_2')
105 |     network = Conv2dLayer(
106 |         network,
107 |         act=tf.nn.relu,
108 |         shape=[3, 3, 256, 256],  # 256 features for each 3x3 patch
109 |         strides=[1, 1, 1, 1],
110 |         padding='SAME',
111 |         name='conv3_3')
112 |     network = PoolLayer(network, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', pool=tf.nn.max_pool, name='pool3')
113 | 
114 |     # conv4
115 |     network = Conv2dLayer(
116 |         network,
117 |         act=tf.nn.relu,
118 |         shape=[3, 3, 256, 512],  # 512 features for each 3x3 patch
119 |         strides=[1, 1, 1, 1],
120 |         padding='SAME',
121 |         name='conv4_1')
122 |     network = Conv2dLayer(
123 |         network,
124 |         act=tf.nn.relu,
125 |         shape=[3, 3, 512, 512],  # 512 features for each 3x3 patch
126 |         strides=[1, 1, 1, 1],
127 |         padding='SAME',
128 |         name='conv4_2')
129 |     network = Conv2dLayer(
130 |         network,
131 |         act=tf.nn.relu,
132 |         shape=[3, 3, 512, 512],  # 512 features for each 3x3 patch
133 |         strides=[1, 1, 1, 1],
134 |         padding='SAME',
135 |         name='conv4_3')
136 |     network = PoolLayer(network, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', pool=tf.nn.max_pool, name='pool4')
137 | 
138 |     # conv5
139 |     network = Conv2dLayer(
140 |         network,
141 |         act=tf.nn.relu,
142 |         shape=[3, 3, 512, 512],  # 512 features for each 3x3 patch
143 |         strides=[1, 1, 1, 1],
144 |         padding='SAME',
145 |         name='conv5_1')
146 |     network = Conv2dLayer(
147 |         network,
148 |         act=tf.nn.relu,
149 |         shape=[3, 3, 512, 512],  # 512 features for each 3x3 patch
150 |         strides=[1, 1, 1, 1],
151 |         padding='SAME',
152 |         name='conv5_2')
153 |     network = Conv2dLayer(
154 |         network,
155 |         act=tf.nn.relu,
156 |         shape=[3, 3, 512, 512],  # 512 features for each 3x3 patch
157 |         strides=[1, 1, 1, 1],
158 |         padding='SAME',
159 |         name='conv5_3')
160 |     network = PoolLayer(network, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', pool=tf.nn.max_pool, name='pool5')
161 |     return network
162 | 
163 | 
164 | def _fc_layers(net):
165 |     network = FlattenLayer(net, name='flatten')
166 |     network = DenseLayer(network, n_units=4096, act=tf.nn.relu, name='fc1_relu')
167 |     network = DenseLayer(network, n_units=4096, act=tf.nn.relu, name='fc2_relu')
168 |     network = DenseLayer(network, n_units=1000, act=tf.identity, name='fc3_relu')
169 |     return network
170 | 
171 | 
172 | def get_vgg16(x, sess=None, pretrained=True):
173 |     net_in = InputLayer(x, name='input')
174 |     net_cnn = _conv_layers(net_in)  # simplified CNN APIs
175 |     network = _fc_layers(net_cnn)
176 | 
177 |     if pretrained:
178 |         npz = np.load('../model_weights/vgg16_weights.npz')
179 |         params = []
180 |         for val in sorted(npz.items()):
181 |             print("  Loading %s" % str(val[1].shape))
182 |             params.append(val[1])
183 |         tl.files.assign_params(sess, params, network)
184 |         return network
185 |     else:
186 |         tl.layers.initialize_global_variables(sess)
187 |         return network
188 | 
189 | 
190 | if __name__ == '__main__':
191 |     DATA_PATH = '/home/aurora/workspaces2/PycharmProjects/tensorflow/tensorlayer/example/data'
192 | 
193 |     x = tf.placeholder(tf.float32, [None, 224, 224, 3])
194 |     tfconfig = tf.ConfigProto(allow_soft_placement=True)
195 |     with tf.Session(config=tfconfig) as sess:
196 |         network = get_vgg16(x, sess, pretrained=True)
197 |         y = network.outputs
198 |         network.print_params()
199 |         network.print_layers()
200 |         img1 = imread(os.path.join(DATA_PATH, 'laska.png'), mode='RGB')  # test data in github
201 |         img1 = imresize(img1, (224, 224))
202 |         probs = tf.nn.softmax(y)
203 |         start_time = time.time()
204 |         prob = sess.run(probs, feed_dict={x: [img1]})[0]
205 |         print("  End time : %.5ss" % (time.time() - start_time))
206 |         preds = (np.argsort(prob)[::-1])[0:5]
207 |         for p in preds:
208 |             print(class_names[p], prob[p])


--------------------------------------------------------------------------------
/nets/vgg19.py:
--------------------------------------------------------------------------------
  1 | #! /usr/bin/python
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | VGG-19 for ImageNet.
  5 | Pre-trained model in this example - VGG19 NPZ and
  6 | trainable examples of VGG16/19 in TensorFlow can be found here:
  7 | https://github.com/machrisaa/tensorflow-vgg
  8 | For simplified CNN layer see "Convolutional layer (Simplified)"
  9 | in read the docs website.
 10 | """
 11 | 
 12 | import os
 13 | import time
 14 | 
 15 | import numpy as np
 16 | import skimage
 17 | import skimage.io
 18 | import skimage.transform
 19 | import tensorflow as tf
 20 | from scipy.misc import imread, imresize
 21 | import tensorlayer as tl
 22 | from tensorlayer.layers import *
 23 | from imagenet_classes import *
 24 | 
 25 | 
 26 | DATA_PATH = '/home/aurora/workspaces2/PycharmProjects/tensorflow/tensorlayer/example/data'
 27 | VGG_MEAN = [103.939, 116.779, 123.68]
 28 | 
 29 | 
 30 | def load_image(path):
 31 |     # load image
 32 |     img = skimage.io.imread(path)
 33 |     img = img / 255.0
 34 |     if ((0 <= img).all() and (img <= 1.0).all()) is False:
 35 |         raise Exception("image value should be [0, 1]")
 36 |     # print "Original Image Shape: ", img.shape
 37 |     # we crop image from center
 38 |     short_edge = min(img.shape[:2])
 39 |     yy = int((img.shape[0] - short_edge) / 2)
 40 |     xx = int((img.shape[1] - short_edge) / 2)
 41 |     crop_img = img[yy:yy + short_edge, xx:xx + short_edge]
 42 |     # resize to 224, 224
 43 |     resized_img = skimage.transform.resize(crop_img, (224, 224))
 44 |     return resized_img
 45 | 
 46 | 
 47 | def print_prob(prob):
 48 |     synset = class_names
 49 |     # print prob
 50 |     pred = np.argsort(prob)[::-1]
 51 |     # Get top1 label
 52 |     top1 = synset[pred[0]]
 53 |     print("Top1: ", top1, prob[pred[0]])
 54 |     # Get top5 label
 55 |     top5 = [(synset[pred[i]], prob[pred[i]]) for i in range(5)]
 56 |     print("Top5: ", top5)
 57 |     return top1
 58 | 
 59 | 
 60 | def _Vgg19(rgb):
 61 |     """
 62 |     Build the VGG 19 Model
 63 |     Parameters
 64 |     -----------
 65 |     rgb : rgb image placeholder [batch, height, width, 3] values scaled [0, 1]
 66 |     """
 67 |     start_time = time.time()
 68 |     print("build model started")
 69 |     rgb_scaled = rgb * 255.0
 70 |     # Convert RGB to BGR
 71 |     if tf.__version__ <= '0.11':
 72 |         red, green, blue = tf.split(3, 3, rgb_scaled)
 73 |     else:  # TF 1.0
 74 |         print(rgb_scaled)
 75 |         red, green, blue = tf.split(rgb_scaled, 3, 3)
 76 |     if red.get_shape().as_list()[1:] != [224, 224, 1]:
 77 |         raise Exception("image size unmatch")
 78 |     if green.get_shape().as_list()[1:] != [224, 224, 1]:
 79 |         raise Exception("image size unmatch")
 80 |     if blue.get_shape().as_list()[1:] != [224, 224, 1]:
 81 |         raise Exception("image size unmatch")
 82 |     if tf.__version__ <= '0.11':
 83 |         bgr = tf.concat(3, [
 84 |             blue - VGG_MEAN[0],
 85 |             green - VGG_MEAN[1],
 86 |             red - VGG_MEAN[2],
 87 |         ])
 88 |     else:
 89 |         bgr = tf.concat(
 90 |             [
 91 |                 blue - VGG_MEAN[0],
 92 |                 green - VGG_MEAN[1],
 93 |                 red - VGG_MEAN[2],
 94 |             ], axis=3)
 95 |     if bgr.get_shape().as_list()[1:] != [224, 224, 3]:
 96 |         raise Exception("image size unmatch")
 97 |     # input layer
 98 |     net_in = InputLayer(bgr, name='input')
 99 |     # conv1
100 |     network = Conv2dLayer(net_in, act=tf.nn.relu, shape=[3, 3, 3, 64], strides=[1, 1, 1, 1], padding='SAME', name='conv1_1')
101 |     network = Conv2dLayer(network, act=tf.nn.relu, shape=[3, 3, 64, 64], strides=[1, 1, 1, 1], padding='SAME', name='conv1_2')
102 |     network = PoolLayer(network, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', pool=tf.nn.max_pool, name='pool1')
103 |     # conv2
104 |     network = Conv2dLayer(network, act=tf.nn.relu, shape=[3, 3, 64, 128], strides=[1, 1, 1, 1], padding='SAME', name='conv2_1')
105 |     network = Conv2dLayer(network, act=tf.nn.relu, shape=[3, 3, 128, 128], strides=[1, 1, 1, 1], padding='SAME', name='conv2_2')
106 |     network = PoolLayer(network, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', pool=tf.nn.max_pool, name='pool2')
107 |     # conv3
108 |     network = Conv2dLayer(network, act=tf.nn.relu, shape=[3, 3, 128, 256], strides=[1, 1, 1, 1], padding='SAME', name='conv3_1')
109 |     network = Conv2dLayer(network, act=tf.nn.relu, shape=[3, 3, 256, 256], strides=[1, 1, 1, 1], padding='SAME', name='conv3_2')
110 |     network = Conv2dLayer(network, act=tf.nn.relu, shape=[3, 3, 256, 256], strides=[1, 1, 1, 1], padding='SAME', name='conv3_3')
111 |     network = Conv2dLayer(network, act=tf.nn.relu, shape=[3, 3, 256, 256], strides=[1, 1, 1, 1], padding='SAME', name='conv3_4')
112 |     network = PoolLayer(network, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', pool=tf.nn.max_pool, name='pool3')
113 |     # conv4
114 |     network = Conv2dLayer(network, act=tf.nn.relu, shape=[3, 3, 256, 512], strides=[1, 1, 1, 1], padding='SAME', name='conv4_1')
115 |     network = Conv2dLayer(network, act=tf.nn.relu, shape=[3, 3, 512, 512], strides=[1, 1, 1, 1], padding='SAME', name='conv4_2')
116 |     network = Conv2dLayer(network, act=tf.nn.relu, shape=[3, 3, 512, 512], strides=[1, 1, 1, 1], padding='SAME', name='conv4_3')
117 |     network = Conv2dLayer(network, act=tf.nn.relu, shape=[3, 3, 512, 512], strides=[1, 1, 1, 1], padding='SAME', name='conv4_4')
118 |     network = PoolLayer(network, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', pool=tf.nn.max_pool, name='pool4')
119 |     # conv5
120 |     network = Conv2dLayer(network, act=tf.nn.relu, shape=[3, 3, 512, 512], strides=[1, 1, 1, 1], padding='SAME', name='conv5_1')
121 |     network = Conv2dLayer(network, act=tf.nn.relu, shape=[3, 3, 512, 512], strides=[1, 1, 1, 1], padding='SAME', name='conv5_2')
122 |     network = Conv2dLayer(network, act=tf.nn.relu, shape=[3, 3, 512, 512], strides=[1, 1, 1, 1], padding='SAME', name='conv5_3')
123 |     network = Conv2dLayer(network, act=tf.nn.relu, shape=[3, 3, 512, 512], strides=[1, 1, 1, 1], padding='SAME', name='conv5_4')
124 |     network = PoolLayer(network, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', pool=tf.nn.max_pool, name='pool5')
125 |     # fc 6~8
126 |     network = FlattenLayer(network, name='flatten')
127 |     network = DenseLayer(network, n_units=4096, act=tf.nn.relu, name='fc6')
128 |     network = DenseLayer(network, n_units=4096, act=tf.nn.relu, name='fc7')
129 |     network = DenseLayer(network, n_units=1000, act=tf.identity, name='fc8')
130 |     print("build model finished: %fs" % (time.time() - start_time))
131 |     return network
132 | 
133 | 
134 | def _Vgg19_simple_api(rgb):
135 |     """
136 |     Build the VGG 19 Model
137 |     Parameters
138 |     -----------
139 |     rgb : rgb image placeholder [batch, height, width, 3] values scaled [0, 1]
140 |     """
141 |     start_time = time.time()
142 |     print("build model started")
143 |     rgb_scaled = rgb * 255.0
144 |     # Convert RGB to BGR
145 |     if tf.__version__ <= '0.11':
146 |         red, green, blue = tf.split(3, 3, rgb_scaled)
147 |     else:  # TF 1.0
148 |         print(rgb_scaled)
149 |         red, green, blue = tf.split(rgb_scaled, 3, 3)
150 |     if red.get_shape().as_list()[1:] != [224, 224, 1]:
151 |         raise Exception("image size unmatch")
152 |     if green.get_shape().as_list()[1:] != [224, 224, 1]:
153 |         raise Exception("image size unmatch")
154 |     if blue.get_shape().as_list()[1:] != [224, 224, 1]:
155 |         raise Exception("image size unmatch")
156 |     if tf.__version__ <= '0.11':
157 |         bgr = tf.concat(3, [
158 |             blue - VGG_MEAN[0],
159 |             green - VGG_MEAN[1],
160 |             red - VGG_MEAN[2],
161 |         ])
162 |     else:
163 |         bgr = tf.concat(
164 |             [
165 |                 blue - VGG_MEAN[0],
166 |                 green - VGG_MEAN[1],
167 |                 red - VGG_MEAN[2],
168 |             ], axis=3)
169 |     if bgr.get_shape().as_list()[1:] != [224, 224, 3]:
170 |         raise Exception("image size unmatch")
171 |     # input layer
172 |     net_in = InputLayer(bgr, name='input')
173 |     # conv1
174 |     network = Conv2d(net_in, n_filter=64, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv1_1')
175 |     network = Conv2d(network, n_filter=64, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv1_2')
176 |     network = MaxPool2d(network, filter_size=(2, 2), strides=(2, 2), padding='SAME', name='pool1')
177 |     # conv2
178 |     network = Conv2d(network, n_filter=128, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv2_1')
179 |     network = Conv2d(network, n_filter=128, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv2_2')
180 |     network = MaxPool2d(network, filter_size=(2, 2), strides=(2, 2), padding='SAME', name='pool2')
181 |     # conv3
182 |     network = Conv2d(network, n_filter=256, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv3_1')
183 |     network = Conv2d(network, n_filter=256, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv3_2')
184 |     network = Conv2d(network, n_filter=256, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv3_3')
185 |     network = Conv2d(network, n_filter=256, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv3_4')
186 |     network = MaxPool2d(network, filter_size=(2, 2), strides=(2, 2), padding='SAME', name='pool3')
187 |     # conv4
188 |     network = Conv2d(network, n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv4_1')
189 |     network = Conv2d(network, n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv4_2')
190 |     network = Conv2d(network, n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv4_3')
191 |     network = Conv2d(network, n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv4_4')
192 |     network = MaxPool2d(network, filter_size=(2, 2), strides=(2, 2), padding='SAME', name='pool4')
193 |     # conv5
194 |     network = Conv2d(network, n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv5_1')
195 |     network = Conv2d(network, n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv5_2')
196 |     network = Conv2d(network, n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv5_3')
197 |     network = Conv2d(network, n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv5_4')
198 |     network = MaxPool2d(network, filter_size=(2, 2), strides=(2, 2), padding='SAME', name='pool5')
199 |     # fc 6~8
200 |     network = FlattenLayer(network, name='flatten')
201 |     network = DenseLayer(network, n_units=4096, act=tf.nn.relu, name='fc6')
202 |     network = DenseLayer(network, n_units=4096, act=tf.nn.relu, name='fc7')
203 |     network = DenseLayer(network, n_units=1000, act=tf.identity, name='fc8')
204 |     print("build model finished: %fs" % (time.time() - start_time))
205 |     return network
206 | 
207 | 
208 | def get_vgg19(inputs, sess=None, pretrained=True):
209 |     network = _Vgg19(inputs)
210 |     if pretrained:
211 |         vgg19_npy_path = "../model_weights/vgg19.npy"
212 |         npz = np.load(vgg19_npy_path, encoding='latin1').item()
213 |         params = []
214 |         for val in sorted(npz.items()):
215 |             W = np.asarray(val[1][0])
216 |             b = np.asarray(val[1][1])
217 |             print("  Loading %s: %s, %s" % (val[0], W.shape, b.shape))
218 |             params.extend([W, b])
219 |         print("Restoring model from npz file")
220 |         tl.files.assign_params(sess, params, network)
221 |         return network
222 |     else:
223 |         tl.layers.initialize_global_variables(sess)
224 |         return network
225 | 
226 | 
227 | if __name__ == '__main__':
228 |     sess = tf.InteractiveSession()
229 |     x = tf.placeholder("float", [None, 224, 224, 3])
230 |     network = get_vgg19(x, sess)
231 |     y = network.outputs
232 |     probs = tf.nn.softmax(y, name="prob")
233 |     img1 = load_image(os.path.join(DATA_PATH, "tiger.jpeg"))  # test data in github
234 |     img1 = img1.reshape((1, 224, 224, 3))
235 |     start_time = time.time()
236 |     prob = sess.run(probs, feed_dict={x: img1})
237 |     print("End time : %.5ss" % (time.time() - start_time))
238 | 
239 |     print_prob(prob[0])


--------------------------------------------------------------------------------
/test/benchmark/README.md:
--------------------------------------------------------------------------------
1 | ##### Test max batch size


--------------------------------------------------------------------------------
/test/benchmark/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/auroua/InsightFace_TF/6ffe4296460bdfea56f91521db6d6412a89249d9/test/benchmark/__init__.py


--------------------------------------------------------------------------------
/test/benchmark/gluon_batchsize_test.py:
--------------------------------------------------------------------------------
 1 | from mxnet import gluon
 2 | import mxnet as mx
 3 | from mxnet import ndarray as nd
 4 | import utils_final as utils
 5 | import mxnet.gluon.nn as nn
 6 | from mxnet import init
 7 | import os
 8 | from mxnet import initializer
 9 | from mxnet.gluon.block import HybridBlock
10 | 
11 | 
12 | def prelu():
13 |     pass
14 | 
15 | 
16 | def inference():
17 |     net = gluon.nn.Sequential()
18 |     with net.name_scope():
19 |         net.add(nn.Conv2D(channels=64, kernel_size=3, padding=1))
20 |         net.add(nn.BatchNorm(axis=1, center=True, scale=True))
21 |         # net.add(mx.sym.LeakyReLU(data=net, act_type='prelu', name='prelu1'))
22 |         net.add(nn.Conv2D(channels=64, kernel_size=3, padding=1))
23 |         net.add(nn.BatchNorm(axis=1, center=True, scale=True))
24 |         net.add(nn.Conv2D(channels=64, kernel_size=3, padding=1, strides=2))
25 |         net.add(nn.BatchNorm(axis=1, center=True, scale=True))
26 | 
27 |         net.add(nn.Conv2D(channels=128, kernel_size=3, padding=1))
28 |         net.add(nn.BatchNorm(axis=1, center=True, scale=True))
29 |         net.add(nn.Conv2D(channels=128, kernel_size=3, padding=1))
30 |         net.add(nn.BatchNorm(axis=1, center=True, scale=True))
31 |         net.add(nn.Conv2D(channels=128, kernel_size=3, padding=1, strides=2))
32 |         net.add(nn.BatchNorm(axis=1, center=True, scale=True))
33 | 
34 |         net.add(nn.Conv2D(channels=256, kernel_size=3, padding=1))
35 |         net.add(nn.BatchNorm(axis=1, center=True, scale=True))
36 |         net.add(nn.Conv2D(channels=256, kernel_size=3, padding=1))
37 |         net.add(nn.BatchNorm(axis=1, center=True, scale=True))
38 |         net.add(nn.Conv2D(channels=256, kernel_size=3, padding=1, strides=2))
39 |         net.add(nn.BatchNorm(axis=1, center=True, scale=True))
40 | 
41 |         net.add(nn.Flatten())
42 |         net.add(nn.Dense(10))
43 |     return net
44 | 
45 | 
46 | if __name__ == '__main__':
47 |     # without prelu and bn    7000< max batch size <8000
48 |     # with bn only            3000< max batch size <4000
49 |     os.environ["CUDA_VISIBLE_DEVICES"] = "0"
50 |     batch_size = 3000
51 |     train_data, test_data = utils.load_data_mnist(batch_size=batch_size)
52 |     ctx = utils.try_gpu()
53 |     net = inference()
54 |     print(net)
55 |     net.initialize(ctx=ctx, init=init.Xavier())
56 |     softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss()
57 |     trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.01})
58 |     utils.train(train_data, test_data, net, softmax_cross_entropy, trainer, ctx, num_epochs=10)
59 | 
60 | 
61 | 


--------------------------------------------------------------------------------
/test/benchmark/mxnet_batchsize_test.py:
--------------------------------------------------------------------------------
 1 | import mxnet as mx
 2 | import mxnet.ndarray as nd
 3 | import os
 4 | 
 5 | 
 6 | if __name__ == '__main__':
 7 |     # without bn and prelu  max batchsize (40000, 50000)
 8 |     # with bn max batchsize (20000, 30000)
 9 |     # with prelu batchsize (20000, 30000)
10 |     # with bn and prelu max batchsize (10000, 20000)
11 |     os.environ["CUDA_VISIBLE_DEVICES"] = "0"
12 |     batch_size = 10000
13 |     mnist = mx.test_utils.get_mnist()
14 |     print(mnist['train_data'].shape)
15 |     train_iter = mx.io.NDArrayIter(mnist['train_data'], mnist['train_label'], batch_size, shuffle=True)
16 | 
17 |     # inference
18 |     data = mx.sym.var('data')
19 |     # first conv layer
20 |     net = mx.sym.Convolution(data=data, kernel=(3, 3), num_filter=64)
21 |     net = mx.sym.BatchNorm(data=net, fix_gamma=False, eps=2e-5, name='_bn1')
22 |     net = mx.sym.LeakyReLU(data=net, act_type='prelu', name='_preul1')
23 |     net = mx.sym.Convolution(data=data, kernel=(3, 3), num_filter=64)
24 |     net = mx.sym.BatchNorm(data=net, fix_gamma=False, eps=2e-5, name='_bn2')
25 |     net = mx.sym.LeakyReLU(data=net, act_type='prelu', name='_preul2')
26 |     net = mx.sym.Convolution(data=data, kernel=(3, 3), stride=(2, 2), num_filter=64)
27 |     net = mx.sym.BatchNorm(data=net, fix_gamma=False, eps=2e-5, name='_bn3')
28 |     net = mx.sym.LeakyReLU(data=net, act_type='prelu', name='_preul3')
29 | 
30 |     net = mx.sym.Convolution(data=data, kernel=(3, 3), num_filter=128)
31 |     net = mx.sym.BatchNorm(data=net, fix_gamma=False, eps=2e-5, name='_bn4')
32 |     net = mx.sym.LeakyReLU(data=net, act_type='prelu', name='_preul4')
33 |     net = mx.sym.Convolution(data=data, kernel=(3, 3), num_filter=128)
34 |     net = mx.sym.BatchNorm(data=net, fix_gamma=False, eps=2e-5, name='_bn5')
35 |     net = mx.sym.LeakyReLU(data=net, act_type='prelu', name='_preul5')
36 |     net = mx.sym.Convolution(data=data, kernel=(3, 3), stride=(2, 2), num_filter=128)
37 |     net = mx.sym.BatchNorm(data=net, fix_gamma=False, eps=2e-5, name='_bn6')
38 |     net = mx.sym.LeakyReLU(data=net, act_type='prelu', name='_preul6')
39 | 
40 |     net = mx.sym.Convolution(data=data, kernel=(3, 3), num_filter=256)
41 |     net = mx.sym.BatchNorm(data=net, fix_gamma=False, eps=2e-5, name='_bn7')
42 |     net = mx.sym.LeakyReLU(data=net, act_type='prelu', name='_preul7')
43 |     net = mx.sym.Convolution(data=data, kernel=(3, 3), num_filter=256)
44 |     net = mx.sym.BatchNorm(data=net, fix_gamma=False, eps=2e-5, name='_bn8')
45 |     net = mx.sym.LeakyReLU(data=net, act_type='prelu', name='_preul8')
46 |     net = mx.sym.Convolution(data=data, kernel=(3, 3), stride=(2, 2), num_filter=256)
47 |     net = mx.sym.BatchNorm(data=net, fix_gamma=False, eps=2e-5, name='_bn9')
48 |     net = mx.sym.LeakyReLU(data=net, act_type='prelu', name='_preul9')
49 | 
50 |     flatten = mx.sym.flatten(data=net)
51 |     # MNIST has 10 classes
52 |     fc3 = mx.sym.FullyConnected(data=flatten, num_hidden=10)
53 |     # Softmax with cross entropy loss
54 |     mlp = mx.sym.SoftmaxOutput(data=fc3, name='softmax')
55 | 
56 |     import logging
57 | 
58 |     logging.getLogger().setLevel(logging.DEBUG)  # logging to stdout
59 |     # create a trainable module on GPU
60 |     mlp_model = mx.mod.Module(symbol=mlp, context=mx.gpu())
61 |     mlp_model.fit(train_iter,  # train data
62 |                   optimizer='sgd',  # use SGD to train
63 |                   optimizer_params={'learning_rate': 0.1},  # use fixed learning rate
64 |                   eval_metric='acc',  # report accuracy during training
65 |                   batch_end_callback=mx.callback.Speedometer(batch_size, 100),
66 |                   # output progress for each 100 data batches
67 |                   num_epoch=10)  # train for at most 10 dataset passes


--------------------------------------------------------------------------------
/test/benchmark/resnet_slim_benchmark.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | import tensorflow.contrib.slim.nets as nets
 3 | import numpy as np
 4 | 
 5 | 
 6 | slim = tf.contrib.slim
 7 | resnet = nets.resnet_v1
 8 | 
 9 | if __name__ == '__main__':
10 |     output_shape = 85164
11 |     batch_size = 64
12 |     image = tf.placeholder(name='input_x', shape=[None, 224, 224, 3], dtype=tf.float32)
13 |     labels = tf.placeholder(name='input_label', shape=[None, output_shape], dtype=tf.float32)
14 |     with slim.arg_scope(nets.resnet_utils.resnet_arg_scope()):
15 |         resnet_50, end_points = resnet.resnet_v1_50(inputs=image, num_classes=output_shape, scope='resnet_v1_50')
16 |         prob = tf.squeeze(resnet_50, axis=[1, 2])
17 |     probabilities = tf.reduce_mean(tf.nn.softmax(prob, dim=-1))
18 |     losses = tf.norm(tf.subtract(probabilities, labels))
19 |     train_op = tf.train.AdamOptimizer(learning_rate=0.0001).minimize(losses)
20 |     sess = tf.Session()
21 |     saver = tf.train.Saver()
22 |     sess.run(tf.global_variables_initializer())
23 |     while True:
24 |         datasets = np.random.randn(batch_size, 224, 224, 3).astype(np.float32)
25 |         datasets_labels = np.random.randn(batch_size, output_shape).astype(np.float32)
26 |         losses_val, _ = sess.run([losses, train_op], feed_dict={image: datasets, labels: datasets_labels})
27 |         print(losses_val)


--------------------------------------------------------------------------------
/test/benchmark/resnet_tl_benchmark.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | import tensorflow.contrib.slim.nets as nets
 3 | import numpy as np
 4 | from nets.resnet import get_resnet
 5 | 
 6 | 
 7 | slim = tf.contrib.slim
 8 | resnet = nets.resnet_v1
 9 | 
10 | if __name__ == '__main__':
11 |     output_shape = 85164
12 |     batch_size = 128
13 |     image = tf.placeholder(name='input_x', shape=[None, 224, 224, 3], dtype=tf.float32)
14 |     labels = tf.placeholder(name='input_label', shape=[None, output_shape], dtype=tf.float32)
15 |     with slim.arg_scope(nets.resnet_utils.resnet_arg_scope()):
16 |         nets = get_resnet(image, output_shape, 50, type='resnet', sess=None, pretrained=False)
17 |     print(nets.outputs)
18 |     probabilities = tf.reduce_mean(tf.nn.softmax(nets.outputs, dim=-1))
19 |     print(probabilities)
20 |     losses = tf.norm(tf.subtract(probabilities, labels))
21 |     train_op = tf.train.AdamOptimizer(learning_rate=0.0001).minimize(losses)
22 |     sess = tf.Session()
23 |     saver = tf.train.Saver()
24 |     sess.run(tf.global_variables_initializer())
25 |     while True:
26 |         datasets = np.random.randn(batch_size, 224, 224, 3).astype(np.float32)
27 |         datasets_labels = np.random.randn(batch_size, output_shape).astype(np.float32)
28 |         losses_val, _ = sess.run([losses, train_op], feed_dict={image: datasets, labels: datasets_labels})
29 |         print(losses_val)


--------------------------------------------------------------------------------
/test/benchmark/tensorlayer_batchsize_test.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | import tensorlayer as tl
 3 | import os
 4 | 
 5 | 
 6 | def inference(x):
 7 |     w_init_method = tf.contrib.layers.xavier_initializer(uniform=True)
 8 |     # define the network
 9 |     network = tl.layers.InputLayer(x, name='input')
10 |     network = tl.layers.Conv2d(network, n_filter=64, filter_size=(3, 3), strides=(1, 1), padding='SAME', act=None,
11 |                                W_init=w_init_method, name='conv1_1')
12 |     network = tl.layers.BatchNormLayer(network, act=tf.identity, is_train=True, name='bn1')
13 |     network = tl.layers.PReluLayer(network, name='prelu1')
14 |     network = tl.layers.Conv2d(network, n_filter=64, filter_size=(3, 3), strides=(1, 1), padding='SAME', act=None,
15 |                                W_init=w_init_method, name='conv1_2')
16 |     network = tl.layers.BatchNormLayer(network, act=tf.identity, is_train=True, name='bn2')
17 |     network = tl.layers.PReluLayer(network, name='prelu2')
18 |     network = tl.layers.Conv2d(network, n_filter=64, filter_size=(3, 3), strides=(2, 2), padding='SAME', act=None,
19 |                                W_init=w_init_method, name='conv1_3')
20 |     network = tl.layers.BatchNormLayer(network, act=tf.identity, is_train=True, name='bn3')
21 |     network = tl.layers.PReluLayer(network, name='prelu3')
22 | 
23 |     network = tl.layers.Conv2d(network, n_filter=128, filter_size=(3, 3), strides=(1, 1), padding='SAME', act=None,
24 |                                W_init=w_init_method, name='conv2_1')
25 |     network = tl.layers.BatchNormLayer(network, act=tf.identity, is_train=True, name='bn4')
26 |     network = tl.layers.PReluLayer(network, name='prelu4')
27 | 
28 |     network = tl.layers.Conv2d(network, n_filter=128, filter_size=(3, 3), strides=(1, 1), padding='SAME', act=None,
29 |                                W_init=w_init_method, name='conv2_2')
30 |     network = tl.layers.BatchNormLayer(network, act=tf.identity, is_train=True, name='bn5')
31 |     network = tl.layers.PReluLayer(network, name='prelu5')
32 |     network = tl.layers.Conv2d(network, n_filter=128, filter_size=(3, 3), strides=(2, 2), padding='SAME', act=None,
33 |                                W_init=w_init_method, name='conv2_3')
34 |     network = tl.layers.BatchNormLayer(network, act=tf.identity, is_train=True, name='bn6')
35 |     network = tl.layers.PReluLayer(network, name='prelu6')
36 | 
37 |     network = tl.layers.Conv2d(network, n_filter=256, filter_size=(3, 3), strides=(1, 1), padding='SAME', act=None,
38 |                                W_init=w_init_method, name='conv3_1')
39 |     network = tl.layers.BatchNormLayer(network, act=tf.identity, is_train=True, name='bn7')
40 |     network = tl.layers.PReluLayer(network, name='prelu7')
41 |     network = tl.layers.Conv2d(network, n_filter=256, filter_size=(3, 3), strides=(1, 1), padding='SAME', act=None,
42 |                                W_init=w_init_method, name='conv3_2')
43 |     network = tl.layers.BatchNormLayer(network, act=tf.identity, is_train=True, name='bn8')
44 |     network = tl.layers.PReluLayer(network, name='prelu8')
45 |     network = tl.layers.Conv2d(network, n_filter=256, filter_size=(3, 3), strides=(2, 2), padding='SAME', act=None,
46 |                                W_init=w_init_method, name='conv3_3')
47 |     network = tl.layers.BatchNormLayer(network, act=tf.identity, is_train=True, name='bn9')
48 |     network = tl.layers.PReluLayer(network, name='prelu9')
49 | 
50 |     network = tl.layers.FlattenLayer(network, name='flatten')
51 |     network = tl.layers.DenseLayer(network, 10)
52 | 
53 |     return network.outputs
54 | 
55 | 
56 | if __name__ == '__main__':
57 |     # without bn prelu     8000< max batch size <9000
58 |     # with bn only         5000< max batch size <6000
59 |     # with prelu only      3000< max batch size <4000
60 |     # with bn and prelu    2000< max batch size <3000
61 |     os.environ["CUDA_VISIBLE_DEVICES"] = "0"
62 |     batch_size = 2000
63 |     n_epoch = 10
64 |     # prepare data
65 |     X_train, y_train, X_val, y_val, X_test, y_test = tl.files.load_mnist_dataset(shape=(-1, 28, 28, 1))
66 |     # define placeholder
67 |     x = tf.placeholder(tf.float32, shape=[None, 28, 28, 1], name='x')
68 |     y_ = tf.placeholder(tf.int64, shape=[None], name='y_')
69 | 
70 |     output = inference(x)
71 |     cost = tl.cost.cross_entropy(output, y_, 'cost')
72 |     train_op = tf.train.GradientDescentOptimizer(learning_rate=0.1).minimize(cost)
73 | 
74 |     sess = tf.Session()
75 |     tl.layers.initialize_global_variables(sess)
76 | 
77 |     correct_prediction = tf.equal(tf.argmax(output, 1), y_)
78 |     acc = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
79 | 
80 |     for epoch in range(n_epoch):
81 |         train_loss, train_acc, n_batch = 0, 0, 0
82 |         for X_train_a, y_train_a in tl.iterate.minibatches(X_train, y_train, batch_size, shuffle=True):
83 |             feed_dict = {x: X_train_a, y_: y_train_a}
84 |             _, err, ac = sess.run([train_op, cost, acc], feed_dict=feed_dict)
85 |             train_loss += err
86 |             train_acc += ac
87 |             n_batch += 1
88 |         print("epoch %d, train acc: %f" % (epoch, (train_acc / n_batch)))


--------------------------------------------------------------------------------
/test/benchmark/utils_final.py:
--------------------------------------------------------------------------------
  1 | from math import exp
  2 | from mxnet import gluon
  3 | from mxnet import autograd
  4 | from mxnet import nd
  5 | from mxnet import image
  6 | from mxnet.gluon import nn
  7 | import mxnet as mx
  8 | import numpy as np
  9 | from time import time
 10 | import matplotlib.pyplot as plt
 11 | import random
 12 | 
 13 | 
 14 | class DataLoader(object):
 15 |     """similiar to gluon.data.DataLoader, but might be faster.
 16 | 
 17 |     The main difference this data loader tries to read more exmaples each
 18 |     time. But the limits are 1) all examples in dataset have the same shape, 2)
 19 |     data transfomer needs to process multiple examples at each time
 20 |     """
 21 | 
 22 |     def __init__(self, dataset, batch_size, shuffle, transform=None):
 23 |         self.dataset = dataset
 24 |         self.batch_size = batch_size
 25 |         self.shuffle = shuffle
 26 |         self.transform = transform
 27 | 
 28 |     def __iter__(self):
 29 |         data = self.dataset[:]
 30 |         X = data[0]
 31 |         y = nd.array(data[1])
 32 |         n = X.shape[0]
 33 |         if self.shuffle:
 34 |             idx = np.arange(n)
 35 |             np.random.shuffle(idx)
 36 |             X = nd.array(X.asnumpy()[idx])
 37 |             y = nd.array(y.asnumpy()[idx])
 38 | 
 39 |         for i in range(n // self.batch_size):
 40 |             if self.transform is not None:
 41 |                 yield self.transform(X[i * self.batch_size:(i + 1) * self.batch_size],
 42 |                                      y[i * self.batch_size:(i + 1) * self.batch_size])
 43 |             else:
 44 |                 yield (X[i * self.batch_size:(i + 1) * self.batch_size],
 45 |                        y[i * self.batch_size:(i + 1) * self.batch_size])
 46 | 
 47 |     def __len__(self):
 48 |         return len(self.dataset) // self.batch_size
 49 | 
 50 | 
 51 | def load_data_fashion_mnist(batch_size, resize=None, root="~/.mxnet/datasets/fashion-mnist"):
 52 |     """download the fashion mnist dataest and then load into memory"""
 53 | 
 54 |     def transform_mnist(data, label):
 55 |         # Transform a batch of examples.
 56 |         if resize:
 57 |             n = data.shape[0]
 58 |             new_data = nd.zeros((n, resize, resize, data.shape[3]))
 59 |             for i in range(n):
 60 |                 new_data[i] = image.imresize(data[i], resize, resize)
 61 |             data = new_data
 62 |         # change data from batch x height x width x channel to batch x channel x height x width
 63 |         return nd.transpose(data.astype('float32'), (0, 3, 1, 2)) / 255, label.astype('float32')
 64 | 
 65 |     mnist_train = gluon.data.vision.FashionMNIST(root=root, train=True, transform=None)
 66 |     mnist_test = gluon.data.vision.FashionMNIST(root=root, train=False, transform=None)
 67 |     # Transform later to avoid memory explosion.
 68 |     train_data = DataLoader(mnist_train, batch_size, shuffle=True, transform=transform_mnist)
 69 |     test_data = DataLoader(mnist_test, batch_size, shuffle=False, transform=transform_mnist)
 70 |     return (train_data, test_data)
 71 | 
 72 | 
 73 | def load_data_mnist(batch_size, resize=None, root="~/.mxnet/datasets/mnist"):
 74 |     """download the fashion mnist dataest and then load into memory"""
 75 | 
 76 |     def transform_mnist(data, label):
 77 |         # Transform a batch of examples.
 78 |         if resize:
 79 |             n = data.shape[0]
 80 |             new_data = nd.zeros((n, resize, resize, data.shape[3]))
 81 |             for i in range(n):
 82 |                 new_data[i] = image.imresize(data[i], resize, resize)
 83 |             data = new_data
 84 |         # change data from batch x height x width x channel to batch x channel x height x width
 85 |         return nd.transpose(data.astype('float32'), (0, 3, 1, 2)) / 255, label.astype('float32')
 86 | 
 87 |     mnist_train = gluon.data.vision.MNIST(root=root, train=True, transform=None)
 88 |     mnist_test = gluon.data.vision.MNIST(root=root, train=False, transform=None)
 89 |     # Transform later to avoid memory explosion.
 90 |     train_data = DataLoader(mnist_train, batch_size, shuffle=True, transform=transform_mnist)
 91 |     test_data = DataLoader(mnist_test, batch_size, shuffle=False, transform=transform_mnist)
 92 |     return (train_data, test_data)
 93 | 
 94 | 
 95 | def try_gpu():
 96 |     """If GPU is available, return mx.gpu(0); else return mx.cpu()"""
 97 |     try:
 98 |         ctx = mx.gpu()
 99 |         _ = nd.array([0], ctx=ctx)
100 |     except:
101 |         ctx = mx.cpu()
102 |     return ctx
103 | 
104 | 
105 | def try_all_gpus():
106 |     """Return all available GPUs, or [mx.gpu()] if there is no GPU"""
107 |     ctx_list = []
108 |     try:
109 |         for i in range(16):
110 |             ctx = mx.gpu(i)
111 |             _ = nd.array([0], ctx=ctx)
112 |             ctx_list.append(ctx)
113 |     except:
114 |         pass
115 |     if not ctx_list:
116 |         ctx_list = [mx.cpu()]
117 |     return ctx_list
118 | 
119 | 
120 | def SGD(params, lr):
121 |     for param in params:
122 |         param[:] = param - lr * param.grad
123 | 
124 | 
125 | def accuracy(output, label):
126 |     return nd.mean(output.argmax(axis=1) == label).asscalar()
127 | 
128 | 
129 | def _get_batch(batch, ctx):
130 |     """return data and label on ctx"""
131 |     if isinstance(batch, mx.io.DataBatch):
132 |         data = batch.data[0]
133 |         label = batch.label[0]
134 |     else:
135 |         data, label = batch
136 |     return (gluon.utils.split_and_load(data, ctx),
137 |             gluon.utils.split_and_load(label, ctx),
138 |             data.shape[0])
139 | 
140 | 
141 | def evaluate_accuracy(data_iterator, net, ctx=[mx.cpu()]):
142 |     if isinstance(ctx, mx.Context):
143 |         ctx = [ctx]
144 |     acc = nd.array([0])
145 |     n = 0.
146 |     if isinstance(data_iterator, mx.io.MXDataIter):
147 |         data_iterator.reset()
148 |     for batch in data_iterator:
149 |         data, label, batch_size = _get_batch(batch, ctx)
150 |         for X, y in zip(data, label):
151 |             acc += nd.sum(net(X).argmax(axis=1) == y).copyto(mx.cpu())
152 |             n += y.size
153 |         acc.wait_to_read()  # don't push too many operators into backend
154 |     return acc.asscalar() / n
155 | 
156 | 
157 | def train(train_data, test_data, net, loss, trainer, ctx, num_epochs, print_batches=None):
158 |     """Train a network"""
159 |     print("Start training on ", ctx)
160 |     if isinstance(ctx, mx.Context):
161 |         ctx = [ctx]
162 |     for epoch in range(num_epochs):
163 |         train_loss, train_acc, n, m = 0.0, 0.0, 0.0, 0.0
164 |         if isinstance(train_data, mx.io.MXDataIter):
165 |             train_data.reset()
166 |         start = time()
167 |         for i, batch in enumerate(train_data):
168 |             data, label, batch_size = _get_batch(batch, ctx)
169 |             losses = []
170 |             with autograd.record():
171 |                 outputs = [net(X) for X in data]
172 |                 losses = [loss(yhat, y) for yhat, y in zip(outputs, label)]
173 |             for l in losses:
174 |                 l.backward()
175 |             train_acc += sum([(yhat.argmax(axis=1) == y).sum().asscalar()
176 |                               for yhat, y in zip(outputs, label)])
177 |             train_loss += sum([l.sum().asscalar() for l in losses])
178 |             trainer.step(batch_size)
179 |             n += batch_size
180 |             m += sum([y.size for y in label])
181 |             if print_batches and (i + 1) % print_batches == 0:
182 |                 print("Batch %d. Loss: %f, Train acc %f" % (
183 |                     n, train_loss / n, train_acc / m
184 |                 ))
185 | 
186 |         test_acc = evaluate_accuracy(test_data, net, ctx)
187 |         print("Epoch %d. Loss: %.3f, Train acc %.2f, Test acc %.2f, Time %.1f sec" % (
188 |             epoch, train_loss / n, train_acc / m, test_acc, time() - start
189 |         ))
190 | 
191 | 
192 | class Residual(nn.HybridBlock):
193 |     def __init__(self, channels, same_shape=True, **kwargs):
194 |         super(Residual, self).__init__(**kwargs)
195 |         self.same_shape = same_shape
196 |         with self.name_scope():
197 |             strides = 1 if same_shape else 2
198 |             self.conv1 = nn.Conv2D(channels, kernel_size=3, padding=1,
199 |                                    strides=strides)
200 |             self.bn1 = nn.BatchNorm()
201 |             self.conv2 = nn.Conv2D(channels, kernel_size=3, padding=1)
202 |             self.bn2 = nn.BatchNorm()
203 |             if not same_shape:
204 |                 self.conv3 = nn.Conv2D(channels, kernel_size=1,
205 |                                        strides=strides)
206 | 
207 |     def hybrid_forward(self, F, x):
208 |         out = F.relu(self.bn1(self.conv1(x)))
209 |         out = self.bn2(self.conv2(out))
210 |         if not self.same_shape:
211 |             x = self.conv3(x)
212 |         return F.relu(out + x)
213 | 
214 | 
215 | def resnet18(num_classes):
216 |     net = nn.HybridSequential()
217 |     with net.name_scope():
218 |         net.add(
219 |             nn.BatchNorm(),
220 |             nn.Conv2D(64, kernel_size=3, strides=1),
221 |             nn.MaxPool2D(pool_size=3, strides=2),
222 |             Residual(64),
223 |             Residual(64),
224 |             Residual(128, same_shape=False),
225 |             Residual(128),
226 |             Residual(256, same_shape=False),
227 |             Residual(256),
228 |             nn.GlobalAvgPool2D(),
229 |             nn.Dense(num_classes)
230 |         )
231 |     return net
232 | 
233 | 
234 | def show_images(imgs, nrows, ncols, figsize=None):
235 |     """plot a list of images"""
236 |     if not figsize:
237 |         figsize = (ncols, nrows)
238 |     _, figs = plt.subplots(nrows, ncols, figsize=figsize)
239 |     for i in range(nrows):
240 |         for j in range(ncols):
241 |             figs[i][j].imshow(imgs[i * ncols + j].asnumpy())
242 |             figs[i][j].axes.get_xaxis().set_visible(False)
243 |             figs[i][j].axes.get_yaxis().set_visible(False)
244 |     plt.show()
245 | 
246 | 
247 | def data_iter_random(corpus_indices, batch_size, num_steps, ctx=None):
248 |     """Sample mini-batches in a random order from sequential data."""
249 |     # Subtract 1 because label indices are corresponding input indices + 1.
250 |     num_examples = (len(corpus_indices) - 1) // num_steps
251 |     epoch_size = num_examples // batch_size
252 |     # Randomize samples.
253 |     example_indices = list(range(num_examples))
254 |     random.shuffle(example_indices)
255 | 
256 |     def _data(pos):
257 |         return corpus_indices[pos: pos + num_steps]
258 | 
259 |     for i in range(epoch_size):
260 |         # Read batch_size random samples each time.
261 |         i = i * batch_size
262 |         batch_indices = example_indices[i: i + batch_size]
263 |         data = nd.array(
264 |             [_data(j * num_steps) for j in batch_indices], ctx=ctx)
265 |         label = nd.array(
266 |             [_data(j * num_steps + 1) for j in batch_indices], ctx=ctx)
267 |         yield data, label
268 | 
269 | 
270 | def data_iter_consecutive(corpus_indices, batch_size, num_steps, ctx=None):
271 |     """Sample mini-batches in a consecutive order from sequential data."""
272 |     corpus_indices = nd.array(corpus_indices, ctx=ctx)
273 |     data_len = len(corpus_indices)
274 |     batch_len = data_len // batch_size
275 | 
276 |     indices = corpus_indices[0: batch_size * batch_len].reshape((
277 |         batch_size, batch_len))
278 |     # Subtract 1 because label indices are corresponding input indices + 1.
279 |     epoch_size = (batch_len - 1) // num_steps
280 | 
281 |     for i in range(epoch_size):
282 |         i = i * num_steps
283 |         data = indices[:, i: i + num_steps]
284 |         label = indices[:, i + 1: i + num_steps + 1]
285 |         yield data, label
286 | 
287 | 
288 | def grad_clipping(params, clipping_norm, ctx):
289 |     """Gradient clipping."""
290 |     if clipping_norm is not None:
291 |         norm = nd.array([0.0], ctx)
292 |         for p in params:
293 |             norm += nd.sum(p.grad ** 2)
294 |         norm = nd.sqrt(norm).asscalar()
295 |         if norm > clipping_norm:
296 |             for p in params:
297 |                 p.grad[:] *= clipping_norm / norm
298 | 
299 | 
300 | def predict_rnn(rnn, prefix, num_chars, params, hidden_dim, ctx, idx_to_char,
301 |                 char_to_idx, get_inputs, is_lstm=False):
302 |     """Predict the next chars given the prefix."""
303 |     prefix = prefix.lower()
304 |     state_h = nd.zeros(shape=(1, hidden_dim), ctx=ctx)
305 |     if is_lstm:
306 |         state_c = nd.zeros(shape=(1, hidden_dim), ctx=ctx)
307 |     output = [char_to_idx[prefix[0]]]
308 |     for i in range(num_chars + len(prefix)):
309 |         X = nd.array([output[-1]], ctx=ctx)
310 |         if is_lstm:
311 |             Y, state_h, state_c = rnn(get_inputs(X), state_h, state_c, *params)
312 |         else:
313 |             Y, state_h = rnn(get_inputs(X), state_h, *params)
314 |         if i < len(prefix) - 1:
315 |             next_input = char_to_idx[prefix[i + 1]]
316 |         else:
317 |             next_input = int(Y[0].argmax(axis=1).asscalar())
318 |         output.append(next_input)
319 |     return ''.join([idx_to_char[i] for i in output])
320 | 
321 | 
322 | def train_and_predict_rnn(rnn, is_random_iter, epochs, num_steps, hidden_dim,
323 |                           learning_rate, clipping_norm, batch_size,
324 |                           pred_period, pred_len, seqs, get_params, get_inputs,
325 |                           ctx, corpus_indices, idx_to_char, char_to_idx,
326 |                           is_lstm=False):
327 |     """Train an RNN model and predict the next item in the sequence."""
328 |     if is_random_iter:
329 |         data_iter = data_iter_random
330 |     else:
331 |         data_iter = data_iter_consecutive
332 |     params = get_params()
333 | 
334 |     softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss()
335 | 
336 |     for e in range(1, epochs + 1):
337 |         # If consecutive sampling is used, in the same epoch, the hidden state
338 |         # is initialized only at the beginning of the epoch.
339 |         if not is_random_iter:
340 |             state_h = nd.zeros(shape=(batch_size, hidden_dim), ctx=ctx)
341 |             if is_lstm:
342 |                 state_c = nd.zeros(shape=(batch_size, hidden_dim), ctx=ctx)
343 |         train_loss, num_examples = 0, 0
344 |         for data, label in data_iter(corpus_indices, batch_size, num_steps,
345 |                                      ctx):
346 |             # If random sampling is used, the hidden state has to be
347 |             # initialized for each mini-batch.
348 |             if is_random_iter:
349 |                 state_h = nd.zeros(shape=(batch_size, hidden_dim), ctx=ctx)
350 |                 if is_lstm:
351 |                     state_c = nd.zeros(shape=(batch_size, hidden_dim), ctx=ctx)
352 |             with autograd.record():
353 |                 # outputs shape: (batch_size, vocab_size)
354 |                 if is_lstm:
355 |                     outputs, state_h, state_c = rnn(get_inputs(data), state_h,
356 |                                                     state_c, *params)
357 |                 else:
358 |                     outputs, state_h = rnn(get_inputs(data), state_h, *params)
359 |                 # Let t_ib_j be the j-th element of the mini-batch at time i.
360 |                 # label shape: (batch_size * num_steps)
361 |                 # label = [t_0b_0, t_0b_1, ..., t_1b_0, t_1b_1, ..., ].
362 |                 label = label.T.reshape((-1,))
363 |                 # Concatenate outputs:
364 |                 # shape: (batch_size * num_steps, vocab_size).
365 |                 outputs = nd.concat(*outputs, dim=0)
366 |                 # Now outputs and label are aligned.
367 |                 loss = softmax_cross_entropy(outputs, label)
368 |             loss.backward()
369 | 
370 |             grad_clipping(params, clipping_norm, ctx)
371 |             SGD(params, learning_rate)
372 | 
373 |             train_loss += nd.sum(loss).asscalar()
374 |             num_examples += loss.size
375 | 
376 |         if e % pred_period == 0:
377 |             print("Epoch %d. Training perplexity %f" % (e,
378 |                                                         exp(train_loss / num_examples)))
379 |             for seq in seqs:
380 |                 print(' - ', predict_rnn(rnn, seq, pred_len, params,
381 |                                          hidden_dim, ctx, idx_to_char, char_to_idx, get_inputs,
382 |                                          is_lstm))
383 |             print()
384 | 
385 | 


--------------------------------------------------------------------------------
/test/benchmark/vgg19_slim_benchmark.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | import tensorflow.contrib.slim.nets as nets
 3 | import numpy as np
 4 | 
 5 | slim = tf.contrib.slim
 6 | 
 7 | if __name__ == '__main__':
 8 |     output_shape = 1000
 9 |     batch_size = 128
10 |     image = tf.placeholder(name='input_x', shape=[None, 224, 224, 3], dtype=tf.float32)
11 |     labels = tf.placeholder(name='input_label', shape=[None, output_shape], dtype=tf.float32)
12 |     with slim.arg_scope(nets.vgg.vgg_arg_scope()):
13 |         vgg_19, end_points = nets.vgg.vgg_19(inputs=image, num_classes=output_shape, scope='vgg_19')
14 |     probabilities = tf.reduce_mean(tf.nn.softmax(vgg_19, dim=-1))
15 |     losses = tf.norm(tf.subtract(probabilities, labels))
16 |     train_op = tf.train.AdamOptimizer(learning_rate=0.0001).minimize(losses)
17 |     sess = tf.Session()
18 |     saver = tf.train.Saver()
19 |     sess.run(tf.global_variables_initializer())
20 |     while True:
21 |         datasets = np.random.randn(batch_size, 224, 224, 3).astype(np.float32)
22 |         datasets_labels = np.random.randn(batch_size, output_shape).astype(np.float32)
23 |         losses_val, _ = sess.run([losses, train_op], feed_dict={image: datasets, labels: datasets_labels})
24 |         print(losses_val)


--------------------------------------------------------------------------------
/test/benchmark/vgg19_tl_benchmark.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | from nets.vgg19 import get_vgg19
 3 | import numpy as np
 4 | 
 5 | 
 6 | if __name__ == '__main__':
 7 |     sess = tf.Session()
 8 |     x = tf.placeholder(name="inputs_x", shape=[None, 224, 224, 3], dtype=tf.float32)
 9 |     y = tf.placeholder(name='inputs_y', shape=[None, 1000], dtype=tf.float32)
10 |     network = get_vgg19(x, sess, pretrained=False)
11 |     outputs_y = network.outputs
12 |     probs = tf.nn.softmax(outputs_y, name="prob")
13 |     loss = tf.reduce_mean(tf.subtract(probs, y))
14 | 
15 |     while True:
16 |         batch_size = 128
17 |         datasets_x = np.random.randn(batch_size, 224, 224, 3).astype(np.float32)
18 |         datasets_y = np.random.randn(batch_size, 1000).astype(np.float32)
19 |         feed_dict = {x: datasets_x, y: datasets_y}
20 |         loss_val = sess.run(loss, feed_dict=feed_dict)
21 |         print('batch size %d, loss value is %.2f' % (batch_size, loss_val))
22 | 


--------------------------------------------------------------------------------
/test/memory_usage_test.py:
--------------------------------------------------------------------------------
  1 | import mxnet as mx
  2 | import argparse
  3 | import PIL.Image
  4 | import io
  5 | import numpy as np
  6 | import cv2
  7 | import tensorflow as tf
  8 | import os
  9 | import sys
 10 | 
 11 | 
 12 | def parse_args():
 13 |     parser = argparse.ArgumentParser(
 14 |         formatter_class=argparse.ArgumentDefaultsHelpFormatter,
 15 |         description='data path information'
 16 |     )
 17 |     parser.add_argument('--bin_path', default='../datasets/faces_ms1m_112x112/train.rec', type=str,
 18 |                         help='path to the binary image file')
 19 |     parser.add_argument('--idx_path', default='../datasets/faces_ms1m_112x112/train.idx', type=str,
 20 |                         help='path to the image index path')
 21 |     parser.add_argument('--tfrecords_file_path', default='../datasets/tfrecords', type=str,
 22 |                         help='path to the output of tfrecords file path')
 23 |     args = parser.parse_args()
 24 |     return args
 25 | 
 26 | 
 27 | def mx2tfrecords_mem_test(imgidx, imgrec, args):
 28 |     output_path = os.path.join(args.tfrecords_file_path, 'tran.tfrecords')
 29 |     writer = tf.python_io.TFRecordWriter(output_path)
 30 |     for i in imgidx:
 31 |         img_info = imgrec.read_idx(i)
 32 |         header, img = mx.recordio.unpack(img_info)
 33 |         print(type(img))
 34 |         print(img)
 35 |         print(sys.getsizeof(img))
 36 |         print('#####################')
 37 |         img_mx = mx.image.imdecode(img)
 38 |         print(type(img_mx))
 39 |         print(sys.getsizeof(img_mx))
 40 |         print(img_mx.size)
 41 |         print(img_mx.dtype)
 42 |         print(img_mx.context)
 43 |         print(img_mx.stype)
 44 |         print(img_mx)
 45 |         print('#####################')
 46 |         img_mx_np = img_mx.asnumpy()
 47 |         print(type(img_mx_np))
 48 |         print(sys.getsizeof(img_mx_np))
 49 |         print('#####################')
 50 |         back_mx_ndarray = mx.nd.array(img_mx_np)
 51 |         print(type(back_mx_ndarray))
 52 |         print(sys.getsizeof(back_mx_ndarray))
 53 |         encoded_jpg_io = io.BytesIO(img)
 54 |         print(sys.getsizeof(encoded_jpg_io))
 55 |         image = PIL.Image.open(encoded_jpg_io)
 56 |         np_img = np.array(image)
 57 |         img = cv2.cvtColor(np_img, cv2.COLOR_RGB2BGR)
 58 |         print(sys.getsizeof(img))
 59 |         print('#####################')
 60 |         img_raw = img.tobytes()
 61 |         print(sys.getsizeof(img))
 62 |         print('#####################')
 63 |     writer.close()
 64 | 
 65 | 
 66 | def mx2tfrecords(imgidx, imgrec, args):
 67 |     output_path = os.path.join(args.tfrecords_file_path, 'tran.tfrecords')
 68 |     writer = tf.python_io.TFRecordWriter(output_path)
 69 |     for i in imgidx:
 70 |         img_info = imgrec.read_idx(i)
 71 |         header, img = mx.recordio.unpack(img_info)
 72 |         # encoded_jpg_io = io.BytesIO(img)
 73 |         # image = PIL.Image.open(encoded_jpg_io)
 74 |         # np_img = np.array(image)
 75 |         # img = cv2.cvtColor(np_img, cv2.COLOR_RGB2BGR)
 76 |         # img_raw = img.tobytes()
 77 |         # images = tf.image.decode_jpeg(img)
 78 |         # images = tf.reshape(images, shape=(112, 112, 3))
 79 |         # r, g, b = tf.split(images, num_or_size_splits=3, axis=-1)
 80 |         # images = tf.concat([b, g, r], axis=-1)
 81 |         # sess = tf.Session()
 82 |         # np_images = sess.run(images)
 83 |         # print(images.shape)
 84 |         # print(type(np_images))
 85 |         # print(sys.getsizeof(np_images))
 86 |         # cv2.imshow('test', np_images)
 87 |         # cv2.waitKey(0)
 88 |         label = int(header.label)
 89 |         example = tf.train.Example(features=tf.train.Features(feature={
 90 |             'image_raw': tf.train.Feature(bytes_list=tf.train.BytesList(value=[img])),
 91 |             "label": tf.train.Feature(int64_list=tf.train.Int64List(value=[label]))
 92 |         }))
 93 |         writer.write(example.SerializeToString())  # Serialize To String
 94 |         if i % 10000 == 0:
 95 |             print('%d num image processed' % i)
 96 |     writer.close()
 97 | 
 98 | 
 99 | if __name__ == '__main__':
100 |     # define parameters
101 |     id2range = {}
102 |     data_shape = (3, 112, 112)
103 |     args = parse_args()
104 |     imgrec = mx.recordio.MXIndexedRecordIO(args.idx_path, args.bin_path, 'r')
105 |     s = imgrec.read_idx(0)
106 |     header, _ = mx.recordio.unpack(s)
107 |     print(header.label)
108 |     imgidx = list(range(1, int(header.label[0])))
109 |     seq_identity = range(int(header.label[0]), int(header.label[1]))
110 |     for identity in seq_identity:
111 |         s = imgrec.read_idx(identity)
112 |         header, _ = mx.recordio.unpack(s)
113 |         a, b = int(header.label[0]), int(header.label[1])
114 |         id2range[identity] = (a, b)
115 |     print('id2range', len(id2range))
116 | 
117 |     # generate tfrecords
118 |     mx2tfrecords_mem_test(imgidx, imgrec, args)
119 | 
120 | 
121 | 
122 | 
123 | 


--------------------------------------------------------------------------------
/test/multiple_gpu_test/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/auroua/InsightFace_TF/6ffe4296460bdfea56f91521db6d6412a89249d9/test/multiple_gpu_test/__init__.py


--------------------------------------------------------------------------------
/test/multiple_gpu_test/test_mgpu_mnist.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | import tensorlayer as tl
  3 | import os
  4 | 
  5 | Layer = tl.layers.Layer
  6 | D_TYPE = tf.float32
  7 | 
  8 | class DenseLayer(Layer):
  9 |     """
 10 |     The :class:`DenseLayer` class is a fully connected layer.
 11 | 
 12 |     Parameters
 13 |     ----------
 14 |     layer : a :class:`Layer` instance
 15 |         The `Layer` class feeding into this layer.
 16 |     n_units : int
 17 |         The number of units of the layer.
 18 |     act : activation function
 19 |         The function that is applied to the layer activations.
 20 |     W_init : weights initializer
 21 |         The initializer for initializing the weight matrix.
 22 |     b_init : biases initializer or None
 23 |         The initializer for initializing the bias vector. If None, skip biases.
 24 |     W_init_args : dictionary
 25 |         The arguments for the weights tf.get_variable.
 26 |     b_init_args : dictionary
 27 |         The arguments for the biases tf.get_variable.
 28 |     name : a string or None
 29 |         An optional name to attach to this layer.
 30 | 
 31 |     Examples
 32 |     --------
 33 |     >>> network = tl.layers.InputLayer(x, name='input_layer')
 34 |     >>> network = tl.layers.DenseLayer(
 35 |     ...                 network,
 36 |     ...                 n_units=800,
 37 |     ...                 act = tf.nn.relu,
 38 |     ...                 W_init=tf.truncated_normal_initializer(stddev=0.1),
 39 |     ...                 name ='relu_layer'
 40 |     ...                 )
 41 | 
 42 |     >>> Without TensorLayer, you can do as follow.
 43 |     >>> W = tf.Variable(
 44 |     ...     tf.random_uniform([n_in, n_units], -1.0, 1.0), name='W')
 45 |     >>> b = tf.Variable(tf.zeros(shape=[n_units]), name='b')
 46 |     >>> y = tf.nn.relu(tf.matmul(inputs, W) + b)
 47 | 
 48 |     Notes
 49 |     -----
 50 |     If the input to this layer has more than two axes, it need to flatten the
 51 |     input by using :class:`FlattenLayer` in this case.
 52 |     """
 53 | 
 54 |     def __init__(
 55 |             self,
 56 |             layer=None,
 57 |             n_units=100,
 58 |             act=tf.identity,
 59 |             W_init=tf.truncated_normal_initializer(stddev=0.1),
 60 |             b_init=tf.constant_initializer(value=0.0),
 61 |             W_init_args={},
 62 |             b_init_args={},
 63 |             name='dense_layer',
 64 |     ):
 65 |         Layer.__init__(self, name=name)
 66 |         self.inputs = layer.outputs
 67 |         if self.inputs.get_shape().ndims != 2:
 68 |             raise Exception("The input dimension must be rank 2, please reshape or flatten it")
 69 | 
 70 |         n_in = int(self.inputs.get_shape()[-1])
 71 |         self.n_units = n_units
 72 |         print("  [TL] DenseLayer  %s: %d %s" % (self.name, self.n_units, act.__name__))
 73 |         with tf.variable_scope(name) as vs:
 74 |             with tf.device('/cpu:0'):
 75 |                 W = tf.get_variable(name='W', shape=(n_in, n_units), initializer=W_init, dtype=D_TYPE, **W_init_args)
 76 |             if b_init is not None:
 77 |                 try:
 78 |                     with tf.device('/cpu:0'):
 79 |                         b = tf.get_variable(name='b', shape=(n_units), initializer=b_init, dtype=D_TYPE, **b_init_args)
 80 |                 except:  # If initializer is a constant, do not specify shape.
 81 |                     with tf.device('/cpu:0'):
 82 |                         b = tf.get_variable(name='b', initializer=b_init, dtype=D_TYPE, **b_init_args)
 83 |                 self.outputs = act(tf.matmul(self.inputs, W) + b)
 84 |             else:
 85 |                 self.outputs = act(tf.matmul(self.inputs, W))
 86 | 
 87 |         # Hint : list(), dict() is pass by value (shallow), without them, it is
 88 |         # pass by reference.
 89 |         self.all_layers = list(layer.all_layers)
 90 |         self.all_params = list(layer.all_params)
 91 |         self.all_drop = dict(layer.all_drop)
 92 |         self.all_layers.extend([self.outputs])
 93 |         if b_init is not None:
 94 |             self.all_params.extend([W, b])
 95 |         else:
 96 |             self.all_params.extend([W])
 97 | 
 98 | 
 99 | def inference(x):
100 |     network = tl.layers.InputLayer(x, name='input')
101 |     network = tl.layers.DropoutLayer(network, keep=0.8, name='drop1')
102 |     network = DenseLayer(network, n_units=800, act=tf.nn.relu, name='relu1')
103 |     network = tl.layers.DropoutLayer(network, keep=0.5, name='drop2')
104 |     network = DenseLayer(network, n_units=800, act=tf.nn.relu, name='relu2')
105 |     network = tl.layers.DropoutLayer(network, keep=0.5, name='drop3')
106 |     network = DenseLayer(network, n_units=10, act=tf.identity, name='output')
107 |     y = network.outputs
108 |     return y
109 | 
110 | 
111 | def load_data():
112 |     X_train, y_train, X_val, y_val, X_test, y_test = \
113 |         tl.files.load_mnist_dataset(shape=(-1, 784), path='/home/aurora/workspaces/data')
114 |     print('X_train.shape', X_train.shape)
115 |     print('y_train.shape', y_train.shape)
116 |     print('X_val.shape', X_val.shape)
117 |     print('y_val.shape', y_val.shape)
118 |     print('X_test.shape', X_test.shape)
119 |     print('y_test.shape', y_test.shape)
120 |     print('X %s   y %s' % (X_test.dtype, y_test.dtype))
121 |     return X_train, y_train
122 | 
123 | 
124 | def tower_losses(inputs, labels):
125 |     logit = inference(inputs)
126 |     loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logit, labels=labels, name='cross_entropy')
127 |     return loss
128 | 
129 | 
130 | def average_gradients(tower_grads):
131 |   """Calculate the average gradient for each shared variable across all towers.
132 | 
133 |   Note that this function provides a synchronization point across all towers.
134 | 
135 |   Args:
136 |     tower_grads: List of lists of (gradient, variable) tuples. The outer list
137 |       is over individual gradients. The inner list is over the gradient
138 |       calculation for each tower.
139 |   Returns:
140 |      List of pairs of (gradient, variable) where the gradient has been averaged
141 |      across all towers.
142 |   """
143 |   average_grads = []
144 | 
145 |   for grad_and_vars in zip(*tower_grads):
146 |     # Note that each grad_and_vars looks like the following:
147 |     #   ((grad0_gpu0, var0_gpu0), ... , (grad0_gpuN, var0_gpuN))
148 |     grads = []
149 |     for g, g1 in grad_and_vars:
150 |       # Add 0 dimension to the gradients to represent the tower.
151 |       expanded_g = tf.expand_dims(g, 0)
152 | 
153 |       # Append on a 'tower' dimension which we will average over below.
154 |       grads.append(expanded_g)
155 | 
156 |     # Average over the 'tower' dimension.
157 |     grad = tf.concat(axis=0, values=grads)
158 |     grad = tf.reduce_mean(grad, 0)
159 | 
160 |     # Keep in mind that the Variables are redundant because they are shared
161 |     # across towers. So .. we will just return the first tower's pointer to
162 |     # the Variable.
163 |     v = grad_and_vars[0][1]
164 |     grad_and_var = (grad, v)
165 |     average_grads.append(grad_and_var)
166 |   return average_grads
167 | 
168 | 
169 | def train():
170 |     with tf.Graph().as_default(), tf.device('/cpu:0'):
171 |         global_step = tf.get_variable(
172 |             'global_step', [],
173 |             initializer=tf.constant_initializer(0), trainable=False)
174 |         # Decay the learning rate exponentially based on the number of steps.
175 |         lr = tf.train.exponential_decay(0.01,
176 |                                         global_step,
177 |                                         10000,
178 |                                         0.99,
179 |                                         staircase=True)
180 |         # Create an optimizer that performs gradient descent.
181 |         opt = tf.train.GradientDescentOptimizer(lr)
182 |         tower_grads = []
183 |         x = tf.placeholder(tf.float32, shape=[None, 784], name='x')
184 |         y_ = tf.placeholder(tf.int64, shape=[None, ], name='y_')
185 |         with tf.variable_scope(tf.get_variable_scope()):
186 |             for i in range(1):
187 |                 with tf.device('/gpu:%d' % i):
188 |                     with tf.name_scope('%s_%d' % ('tower', i)) as scope:
189 |                         tl.layers.set_name_reuse(True)
190 |                         # Dequeues one batch for the GPU
191 |                         # Calculate the loss for one tower of the CIFAR model. This function
192 |                         # constructs the entire CIFAR model but shares the variables across
193 |                         # all towers.
194 |                         summaries = tf.get_collection(tf.GraphKeys.SUMMARIES, scope)
195 |                         loss = tower_losses(x, y_)
196 |                         # Reuse variables for the next tower.
197 |                         tf.get_variable_scope().reuse_variables()
198 |                         # Calculate the gradients for the batch of data on this CIFAR tower.
199 |                         grads = opt.compute_gradients(loss)
200 |                         # Keep track of the gradients across all towers.
201 |                         tower_grads.append(grads)
202 |         # We must calculate the mean of each gradient. Note that this is the
203 |         # synchronization point across all towers.
204 |         grads = average_gradients(tower_grads)
205 |         apply_gradient_op = opt.apply_gradients(grads, global_step=global_step)
206 | 
207 |         # Track the moving averages of all trainable variables.
208 |         variable_averages = tf.train.ExponentialMovingAverage(0.9999, global_step)
209 |         variables_averages_op = variable_averages.apply(tf.trainable_variables())
210 | 
211 |         train_op = tf.group(apply_gradient_op, variables_averages_op)
212 |         # Build an initialization operation to run below.
213 |         init = tf.global_variables_initializer()
214 |         sess = tf.Session(config=tf.ConfigProto(
215 |             allow_soft_placement=True,
216 |             log_device_placement=True))
217 |         sess.run(init)
218 | 
219 | 
220 | if __name__ == '__main__':
221 |     os.environ["CUDA_VISIBLE_DEVICES"] = "1"
222 |     train()
223 | 
224 | 


--------------------------------------------------------------------------------
/test/multiple_gpu_test/test_tensorlayer.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | import tensorlayer as tl
  3 | 
  4 | Layer = tl.layers.Layer
  5 | D_TYPE = tf.float32
  6 | 
  7 | 
  8 | class DenseLayer(Layer):
  9 |     """
 10 |     The :class:`DenseLayer` class is a fully connected layer.
 11 | 
 12 |     Parameters
 13 |     ----------
 14 |     layer : a :class:`Layer` instance
 15 |         The `Layer` class feeding into this layer.
 16 |     n_units : int
 17 |         The number of units of the layer.
 18 |     act : activation function
 19 |         The function that is applied to the layer activations.
 20 |     W_init : weights initializer
 21 |         The initializer for initializing the weight matrix.
 22 |     b_init : biases initializer or None
 23 |         The initializer for initializing the bias vector. If None, skip biases.
 24 |     W_init_args : dictionary
 25 |         The arguments for the weights tf.get_variable.
 26 |     b_init_args : dictionary
 27 |         The arguments for the biases tf.get_variable.
 28 |     name : a string or None
 29 |         An optional name to attach to this layer.
 30 | 
 31 |     Examples
 32 |     --------
 33 |     >>> network = tl.layers.InputLayer(x, name='input_layer')
 34 |     >>> network = tl.layers.DenseLayer(
 35 |     ...                 network,
 36 |     ...                 n_units=800,
 37 |     ...                 act = tf.nn.relu,
 38 |     ...                 W_init=tf.truncated_normal_initializer(stddev=0.1),
 39 |     ...                 name ='relu_layer'
 40 |     ...                 )
 41 | 
 42 |     >>> Without TensorLayer, you can do as follow.
 43 |     >>> W = tf.Variable(
 44 |     ...     tf.random_uniform([n_in, n_units], -1.0, 1.0), name='W')
 45 |     >>> b = tf.Variable(tf.zeros(shape=[n_units]), name='b')
 46 |     >>> y = tf.nn.relu(tf.matmul(inputs, W) + b)
 47 | 
 48 |     Notes
 49 |     -----
 50 |     If the input to this layer has more than two axes, it need to flatten the
 51 |     input by using :class:`FlattenLayer` in this case.
 52 |     """
 53 | 
 54 |     def __init__(
 55 |             self,
 56 |             layer=None,
 57 |             n_units=100,
 58 |             act=tf.identity,
 59 |             W_init=tf.truncated_normal_initializer(stddev=0.1),
 60 |             b_init=tf.constant_initializer(value=0.0),
 61 |             W_init_args={},
 62 |             b_init_args={},
 63 |             name='dense_layer',
 64 |     ):
 65 |         Layer.__init__(self, name=name)
 66 |         self.inputs = layer.outputs
 67 |         if self.inputs.get_shape().ndims != 2:
 68 |             raise Exception("The input dimension must be rank 2, please reshape or flatten it")
 69 | 
 70 |         n_in = int(self.inputs.get_shape()[-1])
 71 |         self.n_units = n_units
 72 |         print("  [TL] DenseLayer  %s: %d %s" % (self.name, self.n_units, act.__name__))
 73 |         with tf.variable_scope(name) as vs:
 74 |             with tf.device('/cpu:0'):
 75 |                 W = tf.get_variable(name='W', shape=(n_in, n_units), initializer=W_init, dtype=D_TYPE, **W_init_args)
 76 |             if b_init is not None:
 77 |                 try:
 78 |                     with tf.device('/cpu:0'):
 79 |                         b = tf.get_variable(name='b', shape=(n_units), initializer=b_init, dtype=D_TYPE, **b_init_args)
 80 |                 except:  # If initializer is a constant, do not specify shape.
 81 |                     with tf.device('/cpu:0'):
 82 |                         b = tf.get_variable(name='b', initializer=b_init, dtype=D_TYPE, **b_init_args)
 83 |                 self.outputs = act(tf.matmul(self.inputs, W) + b)
 84 |             else:
 85 |                 self.outputs = act(tf.matmul(self.inputs, W))
 86 | 
 87 |         # Hint : list(), dict() is pass by value (shallow), without them, it is
 88 |         # pass by reference.
 89 |         self.all_layers = list(layer.all_layers)
 90 |         self.all_params = list(layer.all_params)
 91 |         self.all_drop = dict(layer.all_drop)
 92 |         self.all_layers.extend([self.outputs])
 93 |         if b_init is not None:
 94 |             self.all_params.extend([W, b])
 95 |         else:
 96 |             self.all_params.extend([W])
 97 | 
 98 | 
 99 | def inference():
100 |         x = tf.placeholder(tf.float32, shape=[None, 784], name='x')
101 |         network = tl.layers.InputLayer(x, name='input')
102 |         network = tl.layers.DropoutLayer(network, keep=0.8, name='drop1')
103 |         network = DenseLayer(network, n_units=800, act=tf.nn.relu, name='relu1')
104 |         network = tl.layers.DropoutLayer(network, keep=0.5, name='drop2')
105 |         network = DenseLayer(network, n_units=800, act=tf.nn.relu, name='relu2')
106 |         network = tl.layers.DropoutLayer(network, keep=0.5, name='drop3')
107 |         network = DenseLayer(network, n_units=10, act=tf.identity, name='output')
108 |         return network
109 | 
110 | 
111 | if __name__ == '__main__':
112 |     with tf.device('/gpu:0'):
113 |         network = inference()
114 |         network.print_layers()
115 |         sess = tf.Session(config=tf.ConfigProto(
116 |                 allow_soft_placement=True,
117 |                 log_device_placement=True))
118 |         tl.layers.initialize_global_variables(sess)


--------------------------------------------------------------------------------
/test/resnet_test_static.py:
--------------------------------------------------------------------------------
 1 | from resnet import get_resnet
 2 | import tensorflow as tf
 3 | from nets_utils import get_tensor_static_val
 4 | import numpy as np
 5 | 
 6 | 
 7 | def resnet_diff_test(layers_num):
 8 |     ckpt_file_path = '../model_weights/resnet_v1_'+str(layers_num)+'.ckpt'
 9 |     x = tf.placeholder(dtype=tf.float32, shape=[1, 224, 224, 3], name='input_place')
10 |     tfconfig = tf.ConfigProto(allow_soft_placement=True)
11 |     sess = tf.Session(config=tfconfig)
12 |     nets = get_resnet(x, 1000, layers_num, sess)
13 |     ckpt_static = get_tensor_static_val(ckpt_file_path, all_tensors=True, all_tensor_names=True)
14 | 
15 |     print('###########'*30)
16 |     vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)
17 | 
18 |     total_count = 0
19 |     mean_avg = 0.0
20 |     median_avg = 0.0
21 |     std_avg = 0.0
22 | 
23 |     for var in vars:
24 |         var_name = var.op.name
25 |         var_name_new = var_name
26 |         if '_bn' in var_name:
27 |             var_name_new = var_name_new.replace('_bn', '')
28 |         if 'W_conv2d' in var_name:
29 |             var_name_new = var_name_new.replace('W_conv2d', 'weights')
30 |         if 'b_conv2d' in var_name:
31 |             var_name_new = var_name_new.replace('b_conv2d', 'biases')
32 |         if 'shortcut_conv' in var_name:
33 |             var_name_new = var_name_new.replace('shortcut_conv', 'shortcut')
34 | 
35 |         if var_name_new in ckpt_static:
36 |             print(var_name_new, end=',    ')
37 |             total_count += 1
38 |             ckpt_s = ckpt_static[var_name_new]
39 |             var_val = sess.run(var)
40 |             mean_diff = np.mean(var_val) - ckpt_s.mean
41 |             mean_avg += mean_diff
42 |             median_diff = np.median(var_val) - ckpt_s.median
43 |             median_avg += median_diff
44 |             std_diff = np.std(var_val) - ckpt_s.std
45 |             std_avg += std_diff
46 |             print('mean_diff: ', mean_diff, 'median_diff: ', median_diff, 'std_diff: ', std_diff)
47 | 
48 |     print('total_mean_diff', mean_avg/total_count, 'total_mean_diff', median_avg/total_count,
49 |           'total_std_diff', std_avg/total_count)
50 | 
51 | 
52 | if __name__ == '__main__':
53 |     with tf.device('/device:GPU:1'):
54 |         resnet_diff_test(50)
55 | 


--------------------------------------------------------------------------------
/test/test_losses.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | import numpy as np
  3 | from losses.face_losses import cosineface_losses
  4 | import mxnet as mx
  5 | import math
  6 | 
  7 | 
  8 | def arcface_loss_val(embedding, labels, weights, out_num, s=64., m=0.5):
  9 |     '''
 10 |     :param embedding: the input embedding vectors
 11 |     :param labels:  the input labels, the shape should be eg: (batch_size, 1)
 12 |     :param s: scalar value default is 64
 13 |     :param out_num: output class num
 14 |     :param m: the margin value, default is 0.5
 15 |     :return: the final cacualted output, this output is send into the tf.nn.softmax directly
 16 |     '''
 17 |     cos_m = math.cos(m)
 18 |     sin_m = math.sin(m)
 19 |     mm = sin_m * m * s
 20 |     threshold = math.cos(math.pi - m)
 21 |     with tf.variable_scope('arcface_loss'):
 22 |         # inputs and weights norm
 23 |         embedding_norm = tf.norm(embedding, axis=1, keep_dims=True)
 24 |         embedding = tf.div(embedding, embedding_norm, name='norm_embedding')
 25 |         weights_norm = tf.norm(weights, axis=0, keep_dims=True)
 26 |         weights = tf.div(weights, weights_norm, name='norm_weights')
 27 |         # cos(theta+m)
 28 |         cos_t = tf.matmul(embedding, weights, name='cos_t')
 29 |         cos_t2 = tf.square(cos_t, name='cos_2')
 30 |         sin_t2 = tf.subtract(1., cos_t2, name='sin_2')
 31 |         sin_t = tf.sqrt(sin_t2, name='sin_t')
 32 |         cos_mt = s * tf.subtract(tf.multiply(cos_t, cos_m), tf.multiply(sin_t, sin_m), name='cos_mt')
 33 | 
 34 |         # this condition controls the theta+m should in range [0, pi]
 35 |         #      0<=theta+m<=pi
 36 |         #     -m<=theta<=pi-m
 37 |         cond_v = cos_t - threshold
 38 |         cond = tf.cast(tf.nn.relu(cond_v, name='if_else'), dtype=tf.bool)
 39 | 
 40 |         keep_val = s * (cos_t - mm)
 41 |         cos_mt_temp = tf.where(cond, cos_mt, keep_val)
 42 | 
 43 |         mask = tf.one_hot(labels, depth=out_num, name='one_hot_mask')
 44 |         inv_mask = tf.subtract(1., mask, name='inverse_mask')
 45 | 
 46 |         s_cos_t = tf.multiply(s, cos_t, name='scalar_cos_t')
 47 | 
 48 |         output = tf.add(tf.multiply(s_cos_t, inv_mask), tf.multiply(cos_mt_temp, mask), name='arcface_loss_output')
 49 |     return output
 50 | 
 51 | 
 52 | def test_arcface_losses(np_embedding, np_weights):
 53 |     tf_embedding = tf.constant(np_embedding, name='embedding', dtype=tf.float32)
 54 |     labels = tf.constant([1, 3, 2, 1, 1], name='input_labels', dtype=tf.int64)
 55 |     print(labels)
 56 |     tf_weights = tf.constant(np_weights, name='weights')
 57 |     output = arcface_loss_val(embedding=tf_embedding, labels=labels, out_num=10, weights=tf_weights)
 58 |     print(output)
 59 |     sess = tf.Session()
 60 |     sess.run(tf.global_variables_initializer())
 61 |     results1 = sess.run(output)
 62 |     print(results1)
 63 |     return results1
 64 | 
 65 | 
 66 | def test_cosineface_losses():
 67 |     np_embedding = np.random.randn(5, 512).astype(dtype=np.float32)
 68 |     tf_embedding = tf.constant(np_embedding, name='embedding', dtype=tf.float32)
 69 |     labels = tf.constant([1, 3, 2, 1, 1], name='input_labels', dtype=tf.int64)
 70 |     output = cosineface_losses(embedding=tf_embedding, labels=labels, out_num=10)
 71 |     sess = tf.Session()
 72 |     sess.run(tf.global_variables_initializer())
 73 |     print(sess.run(output))
 74 | 
 75 | 
 76 | def test_mxnet_losses(np_embedding, np_weights):
 77 |     labels = np.array([1, 3, 2, 1, 1]).astype(dtype=np.float32)
 78 |     return mxnet_arcface_val(np_embedding, labels, np_weights)
 79 | 
 80 | 
 81 | def mxnet_arcface_val(embedding, gt_label, weights):
 82 |     s = 64
 83 |     m = 0.5
 84 |     _weight = mx.symbol.Variable("fc7_weight", shape=(10, 512), lr_mult=1.0)
 85 |     _weight = mx.symbol.L2Normalization(_weight, mode='instance')
 86 |     _embedding = mx.symbol.Variable('mx_embedding', shape=(5, 512), lr_mult=1.0)
 87 |     nembedding = mx.symbol.L2Normalization(_embedding, mode='instance', name='fc1n')*s
 88 |     fc7 = mx.sym.FullyConnected(data=nembedding, weight=_weight, no_bias=True, num_hidden=10, name='fc7')
 89 | 
 90 |     _labels = mx.symbol.Variable('labels', shape=(5, ), lr_mult=1.0)
 91 |     zy = mx.sym.pick(fc7, _labels, axis=1)
 92 |     cos_t = zy/s
 93 | 
 94 |     cos_m = math.cos(m)
 95 |     sin_m = math.sin(m)
 96 |     mm = math.sin(math.pi - m) * m
 97 |     # threshold = 0.0
 98 |     threshold = math.cos(math.pi - m)
 99 | 
100 |     cond_v = cos_t - threshold
101 |     cond = mx.symbol.Activation(data=cond_v, act_type='relu')
102 | 
103 |     body = cos_t * cos_t
104 |     body = 1.0 - body
105 |     sin_t = mx.sym.sqrt(body)
106 |     new_zy = cos_t * cos_m
107 |     b = sin_t * sin_m
108 |     new_zy = new_zy - b
109 |     new_zy = new_zy * s
110 | 
111 |     zy_keep = zy - s * mm
112 |     new_zy = mx.sym.where(cond, new_zy, zy_keep)
113 | 
114 |     diff = new_zy - zy
115 |     diff = mx.sym.expand_dims(diff, 1)
116 |     gt_one_hot = mx.sym.one_hot(_labels, depth = 10, on_value = 1.0, off_value = 0.0)
117 |     body = mx.sym.broadcast_mul(gt_one_hot, diff)
118 |     fc7 = fc7+body
119 |     executor = fc7.bind(mx.cpu(), {'fc7_weight': mx.nd.array(weights.T), 'mx_embedding': mx.nd.array(embedding),
120 |                                    'labels': mx.nd.array(gt_label)})
121 |     output = executor.forward()
122 |     print(output)
123 |     return output
124 | 
125 | 
126 | if __name__ == '__main__':
127 |     np_embedding = np.random.randn(5, 512).astype(dtype=np.float32)
128 |     np_weights = np.random.randn(512, 10).astype(dtype=np.float32)
129 |     # test arcface_losses output
130 |     result1 = test_arcface_losses(np_embedding, np_weights)
131 |     # print('########'*30)
132 |     print('################')
133 |     result2 = test_mxnet_losses(np_embedding, np_weights)
134 |     print(len(result2[0]))
135 |     print(type(result1))
136 |     print(type(result2[0].asnumpy()))
137 |     print(np.mean(result1 - result2[0].asnumpy()))   # 1.26362e-07


--------------------------------------------------------------------------------
/train_nets.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | import tensorlayer as tl
  3 | import argparse
  4 | from data.mx2tfrecords import parse_function
  5 | import os
  6 | # from nets.L_Resnet_E_IR import get_resnet
  7 | # from nets.L_Resnet_E_IR_GBN import get_resnet
  8 | from nets.L_Resnet_E_IR_fix_issue9 import get_resnet
  9 | from losses.face_losses import arcface_loss
 10 | from tensorflow.core.protobuf import config_pb2
 11 | import time
 12 | from data.eval_data_reader import load_bin
 13 | from verification import ver_test
 14 | 
 15 | 
 16 | def get_parser():
 17 |     parser = argparse.ArgumentParser(description='parameters to train net')
 18 |     parser.add_argument('--net_depth', default=100, help='resnet depth, default is 50')
 19 |     parser.add_argument('--epoch', default=100000, help='epoch to train the network')
 20 |     parser.add_argument('--batch_size', default=32, help='batch size to train network')
 21 |     parser.add_argument('--lr_steps', default=[40000, 60000, 80000], help='learning rate to train network')
 22 |     parser.add_argument('--momentum', default=0.9, help='learning alg momentum')
 23 |     parser.add_argument('--weight_deacy', default=5e-4, help='learning alg momentum')
 24 |     # parser.add_argument('--eval_datasets', default=['lfw', 'cfp_ff', 'cfp_fp', 'agedb_30'], help='evluation datasets')
 25 |     parser.add_argument('--eval_datasets', default=['lfw'], help='evluation datasets')
 26 |     parser.add_argument('--eval_db_path', default='./datasets/faces_ms1m_112x112', help='evluate datasets base path')
 27 |     parser.add_argument('--image_size', default=[112, 112], help='the image size')
 28 |     parser.add_argument('--num_output', default=85164, help='the image size')
 29 |     parser.add_argument('--tfrecords_file_path', default='./datasets/tfrecords', type=str,
 30 |                         help='path to the output of tfrecords file path')
 31 |     parser.add_argument('--summary_path', default='./output/summary', help='the summary file save path')
 32 |     parser.add_argument('--ckpt_path', default='./output/ckpt', help='the ckpt file save path')
 33 |     parser.add_argument('--log_file_path', default='./output/logs', help='the ckpt file save path')
 34 |     parser.add_argument('--saver_maxkeep', default=100, help='tf.train.Saver max keep ckpt files')
 35 |     parser.add_argument('--buffer_size', default=10000, help='tf dataset api buffer size')
 36 |     parser.add_argument('--log_device_mapping', default=False, help='show device placement log')
 37 |     parser.add_argument('--summary_interval', default=300, help='interval to save summary')
 38 |     parser.add_argument('--ckpt_interval', default=10000, help='intervals to save ckpt file')
 39 |     parser.add_argument('--validate_interval', default=2000, help='intervals to save ckpt file')
 40 |     parser.add_argument('--show_info_interval', default=20, help='intervals to save ckpt file')
 41 |     args = parser.parse_args()
 42 |     return args
 43 | 
 44 | 
 45 | if __name__ == '__main__':
 46 |     os.environ["CUDA_VISIBLE_DEVICES"] = "0"
 47 |     # 1. define global parameters
 48 |     args = get_parser()
 49 |     global_step = tf.Variable(name='global_step', initial_value=0, trainable=False)
 50 |     inc_op = tf.assign_add(global_step, 1, name='increment_global_step')
 51 |     images = tf.placeholder(name='img_inputs', shape=[None, *args.image_size, 3], dtype=tf.float32)
 52 |     labels = tf.placeholder(name='img_labels', shape=[None, ], dtype=tf.int64)
 53 |     # trainable = tf.placeholder(name='trainable_bn', dtype=tf.bool)
 54 |     dropout_rate = tf.placeholder(name='dropout_rate', dtype=tf.float32)
 55 |     # 2 prepare train datasets and test datasets by using tensorflow dataset api
 56 |     # 2.1 train datasets
 57 |     # the image is substracted 127.5 and multiplied 1/128.
 58 |     # random flip left right
 59 |     tfrecords_f = os.path.join(args.tfrecords_file_path, 'tran.tfrecords')
 60 |     dataset = tf.data.TFRecordDataset(tfrecords_f)
 61 |     dataset = dataset.map(parse_function)
 62 |     dataset = dataset.shuffle(buffer_size=args.buffer_size)
 63 |     dataset = dataset.batch(args.batch_size)
 64 |     iterator = dataset.make_initializable_iterator()
 65 |     next_element = iterator.get_next()
 66 |     # 2.2 prepare validate datasets
 67 |     ver_list = []
 68 |     ver_name_list = []
 69 |     for db in args.eval_datasets:
 70 |         print('begin db %s convert.' % db)
 71 |         data_set = load_bin(db, args.image_size, args)
 72 |         ver_list.append(data_set)
 73 |         ver_name_list.append(db)
 74 |     # 3. define network, loss, optimize method, learning rate schedule, summary writer, saver
 75 |     # 3.1 inference phase
 76 |     w_init_method = tf.contrib.layers.xavier_initializer(uniform=False)
 77 |     net = get_resnet(images, args.net_depth, type='ir', w_init=w_init_method, trainable=True, keep_rate=dropout_rate)
 78 |     # 3.2 get arcface loss
 79 |     logit = arcface_loss(embedding=net.outputs, labels=labels, w_init=w_init_method, out_num=args.num_output)
 80 |     # test net  because of batch normal layer
 81 |     tl.layers.set_name_reuse(True)
 82 |     test_net = get_resnet(images, args.net_depth, type='ir', w_init=w_init_method, trainable=False, reuse=True, keep_rate=dropout_rate)
 83 |     embedding_tensor = test_net.outputs
 84 |     # 3.3 define the cross entropy
 85 |     inference_loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logit, labels=labels))
 86 |     # inference_loss_avg = tf.reduce_mean(inference_loss)
 87 |     # 3.4 define weight deacy losses
 88 |     # for var in tf.trainable_variables():
 89 |     #     print(var.name)
 90 |     # print('##########'*30)
 91 |     wd_loss = 0
 92 |     for weights in tl.layers.get_variables_with_name('W_conv2d', True, True):
 93 |         wd_loss += tf.contrib.layers.l2_regularizer(args.weight_deacy)(weights)
 94 |     for W in tl.layers.get_variables_with_name('resnet_v1_50/E_DenseLayer/W', True, True):
 95 |         wd_loss += tf.contrib.layers.l2_regularizer(args.weight_deacy)(W)
 96 |     for weights in tl.layers.get_variables_with_name('embedding_weights', True, True):
 97 |         wd_loss += tf.contrib.layers.l2_regularizer(args.weight_deacy)(weights)
 98 |     for gamma in tl.layers.get_variables_with_name('gamma', True, True):
 99 |         wd_loss += tf.contrib.layers.l2_regularizer(args.weight_deacy)(gamma)
100 |     # for beta in tl.layers.get_variables_with_name('beta', True, True):
101 |     #     wd_loss += tf.contrib.layers.l2_regularizer(args.weight_deacy)(beta)
102 |     for alphas in tl.layers.get_variables_with_name('alphas', True, True):
103 |         wd_loss += tf.contrib.layers.l2_regularizer(args.weight_deacy)(alphas)
104 |     # for bias in tl.layers.get_variables_with_name('resnet_v1_50/E_DenseLayer/b', True, True):
105 |     #     wd_loss += tf.contrib.layers.l2_regularizer(args.weight_deacy)(bias)
106 | 
107 |     # 3.5 total losses
108 |     total_loss = inference_loss + wd_loss
109 |     # 3.6 define the learning rate schedule
110 |     p = int(512.0/args.batch_size)
111 |     lr_steps = [p*val for val in args.lr_steps]
112 |     print(lr_steps)
113 |     lr = tf.train.piecewise_constant(global_step, boundaries=lr_steps, values=[0.001, 0.0005, 0.0003, 0.0001], name='lr_schedule')
114 |     # 3.7 define the optimize method
115 |     opt = tf.train.MomentumOptimizer(learning_rate=lr, momentum=args.momentum)
116 |     # 3.8 get train op
117 |     grads = opt.compute_gradients(total_loss)
118 |     update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
119 |     with tf.control_dependencies(update_ops):
120 |         train_op = opt.apply_gradients(grads, global_step=global_step)
121 |     # train_op = opt.minimize(total_loss, global_step=global_step)
122 |     # 3.9 define the inference accuracy used during validate or test
123 |     pred = tf.nn.softmax(logit)
124 |     acc = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(pred, axis=1), labels), dtype=tf.float32))
125 |     # 3.10 define sess
126 |     config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=args.log_device_mapping)
127 |     config.gpu_options.allow_growth = True
128 | 
129 |     sess = tf.Session(config=config)
130 |     # 3.11 summary writer
131 |     summary = tf.summary.FileWriter(args.summary_path, sess.graph)
132 |     summaries = []
133 |     # # 3.11.1 add grad histogram op
134 |     for grad, var in grads:
135 |         if grad is not None:
136 |             summaries.append(tf.summary.histogram(var.op.name + '/gradients', grad))
137 |     # 3.11.2 add trainabel variable gradients
138 |     for var in tf.trainable_variables():
139 |         summaries.append(tf.summary.histogram(var.op.name, var))
140 |     # 3.11.3 add loss summary
141 |     summaries.append(tf.summary.scalar('inference_loss', inference_loss))
142 |     summaries.append(tf.summary.scalar('wd_loss', wd_loss))
143 |     summaries.append(tf.summary.scalar('total_loss', total_loss))
144 |     # 3.11.4 add learning rate
145 |     summaries.append(tf.summary.scalar('leraning_rate', lr))
146 |     summary_op = tf.summary.merge(summaries)
147 |     # 3.12 saver
148 |     saver = tf.train.Saver(max_to_keep=args.saver_maxkeep)
149 |     # 3.13 init all variables
150 |     sess.run(tf.global_variables_initializer())
151 | 
152 |     # restore_saver = tf.train.Saver()
153 |     # restore_saver.restore(sess, '/home/aurora/workspaces2018/InsightFace_TF/output/ckpt/InsightFace_iter_1110000.ckpt')
154 |     # 4 begin iteration
155 |     if not os.path.exists(args.log_file_path):
156 |         os.makedirs(args.log_file_path)
157 |     log_file_path = args.log_file_path + '/train' + time.strftime('_%Y-%m-%d-%H-%M', time.localtime(time.time())) + '.log'
158 |     log_file = open(log_file_path, 'w')
159 |     # 4 begin iteration
160 |     count = 0
161 |     total_accuracy = {}
162 | 
163 |     for i in range(args.epoch):
164 |         sess.run(iterator.initializer)
165 |         while True:
166 |             try:
167 |                 images_train, labels_train = sess.run(next_element)
168 |                 feed_dict = {images: images_train, labels: labels_train, dropout_rate: 0.4}
169 |                 feed_dict.update(net.all_drop)
170 |                 start = time.time()
171 |                 _, total_loss_val, inference_loss_val, wd_loss_val, _, acc_val = \
172 |                     sess.run([train_op, total_loss, inference_loss, wd_loss, inc_op, acc],
173 |                               feed_dict=feed_dict,
174 |                               options=config_pb2.RunOptions(report_tensor_allocations_upon_oom=True))
175 |                 end = time.time()
176 |                 pre_sec = args.batch_size/(end - start)
177 |                 # print training information
178 |                 if count > 0 and count % args.show_info_interval == 0:
179 |                     print('epoch %d, total_step %d, total loss is %.2f , inference loss is %.2f, weight deacy '
180 |                           'loss is %.2f, training accuracy is %.6f, time %.3f samples/sec' %
181 |                           (i, count, total_loss_val, inference_loss_val, wd_loss_val, acc_val, pre_sec))
182 |                 count += 1
183 | 
184 |                 # save summary
185 |                 if count > 0 and count % args.summary_interval == 0:
186 |                     feed_dict = {images: images_train, labels: labels_train, dropout_rate: 0.4}
187 |                     feed_dict.update(net.all_drop)
188 |                     summary_op_val = sess.run(summary_op, feed_dict=feed_dict)
189 |                     summary.add_summary(summary_op_val, count)
190 | 
191 |                 # save ckpt files
192 |                 if count > 0 and count % args.ckpt_interval == 0:
193 |                     filename = 'InsightFace_iter_{:d}'.format(count) + '.ckpt'
194 |                     filename = os.path.join(args.ckpt_path, filename)
195 |                     saver.save(sess, filename)
196 | 
197 |                 # validate
198 |                 if count > 0 and count % args.validate_interval == 0:
199 |                     feed_dict_test ={dropout_rate: 1.0}
200 |                     feed_dict_test.update(tl.utils.dict_to_one(net.all_drop))
201 |                     results = ver_test(ver_list=ver_list, ver_name_list=ver_name_list, nbatch=count, sess=sess,
202 |                              embedding_tensor=embedding_tensor, batch_size=args.batch_size, feed_dict=feed_dict_test,
203 |                              input_placeholder=images)
204 |                     print('test accuracy is: ', str(results[0]))
205 |                     total_accuracy[str(count)] = results[0]
206 |                     log_file.write('########'*10+'\n')
207 |                     log_file.write(','.join(list(total_accuracy.keys())) + '\n')
208 |                     log_file.write(','.join([str(val) for val in list(total_accuracy.values())])+'\n')
209 |                     log_file.flush()
210 |                     if max(results) > 0.996:
211 |                         print('best accuracy is %.5f' % max(results))
212 |                         filename = 'InsightFace_iter_best_{:d}'.format(count) + '.ckpt'
213 |                         filename = os.path.join(args.ckpt_path, filename)
214 |                         saver.save(sess, filename)
215 |                         log_file.write('######Best Accuracy######'+'\n')
216 |                         log_file.write(str(max(results))+'\n')
217 |                         log_file.write(filename+'\n')
218 | 
219 |                         log_file.flush()
220 |             except tf.errors.OutOfRangeError:
221 |                 print("End of epoch %d" % i)
222 |                 break
223 |     log_file.close()
224 |     log_file.write('\n')


--------------------------------------------------------------------------------
/train_nets_mgpu.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | import tensorlayer as tl
  3 | import argparse
  4 | from data.mx2tfrecords import parse_function
  5 | import os
  6 | from nets.L_Resnet_E_IR_MGPU import get_resnet
  7 | from losses.face_losses import arcface_loss
  8 | import time
  9 | from data.eval_data_reader import load_bin
 10 | from verification import ver_test
 11 | 
 12 | 
 13 | def get_parser():
 14 |     parser = argparse.ArgumentParser(description='parameters to train net')
 15 |     parser.add_argument('--net_depth', default=50, help='resnet depth, default is 50')
 16 |     parser.add_argument('--epoch', default=100000, help='epoch to train the network')
 17 |     parser.add_argument('--batch_size', default=32, help='batch size to train network')
 18 |     parser.add_argument('--lr_steps', default=[40000, 60000, 80000], help='learning rate to train network')
 19 |     parser.add_argument('--momentum', default=0.9, help='learning alg momentum')
 20 |     parser.add_argument('--weight_deacy', default=5e-4, help='learning alg momentum')
 21 |     # parser.add_argument('--eval_datasets', default=['lfw', 'cfp_ff', 'cfp_fp', 'agedb_30'], help='evluation datasets')
 22 |     parser.add_argument('--eval_datasets', default=['lfw', 'cfp_fp'], help='evluation datasets')
 23 |     parser.add_argument('--eval_db_path', default='./datasets/faces_ms1m_112x112', help='evluate datasets base path')
 24 |     parser.add_argument('--image_size', default=[112, 112], help='the image size')
 25 |     parser.add_argument('--num_output', default=85164, help='the image size')
 26 |     parser.add_argument('--tfrecords_file_path', default='./datasets/tfrecords', type=str,
 27 |                         help='path to the output of tfrecords file path')
 28 |     parser.add_argument('--summary_path', default='./output/summary', help='the summary file save path')
 29 |     parser.add_argument('--ckpt_path', default='./output/ckpt', help='the ckpt file save path')
 30 |     parser.add_argument('--saver_maxkeep', default=100, help='tf.train.Saver max keep ckpt files')
 31 |     parser.add_argument('--buffer_size', default=50000, help='tf dataset api buffer size')
 32 |     parser.add_argument('--log_device_mapping', default=False, help='show device placement log')
 33 |     parser.add_argument('--summary_interval', default=300, help='interval to save summary')
 34 |     parser.add_argument('--ckpt_interval', default=5000, help='intervals to save ckpt file')
 35 |     parser.add_argument('--validate_interval', default=2000, help='intervals to save ckpt file')
 36 |     parser.add_argument('--show_info_interval', default=20, help='intervals to show information')
 37 |     parser.add_argument('--num_gpus', default=2, help='the num of gpus')
 38 |     parser.add_argument('--tower_name', default='tower', help='tower name')
 39 |     args = parser.parse_args()
 40 |     return args
 41 | 
 42 | 
 43 | def average_gradients(tower_grads):
 44 |   """Calculate the average gradient for each shared variable across all towers.
 45 | 
 46 |   Note that this function provides a synchronization point across all towers.
 47 | 
 48 |   Args:
 49 |     tower_grads: List of lists of (gradient, variable) tuples. The outer list
 50 |       is over individual gradients. The inner list is over the gradient
 51 |       calculation for each tower.
 52 |   Returns:
 53 |      List of pairs of (gradient, variable) where the gradient has been averaged
 54 |      across all towers.
 55 |   """
 56 |   average_grads = []
 57 |   for grad_and_vars in zip(*tower_grads):
 58 |     # Note that each grad_and_vars looks like the following:
 59 |     #   ((grad0_gpu0, var0_gpu0), ... , (grad0_gpuN, var0_gpuN))
 60 |     grads = []
 61 |     for g, _ in grad_and_vars:
 62 |       # Add 0 dimension to the gradients to represent the tower.
 63 |       expanded_g = tf.expand_dims(g, 0)
 64 | 
 65 |       # Append on a 'tower' dimension which we will average over below.
 66 |       grads.append(expanded_g)
 67 | 
 68 |     # Average over the 'tower' dimension.
 69 |     grad = tf.concat(axis=0, values=grads)
 70 |     grad = tf.reduce_mean(grad, 0)
 71 | 
 72 |     # Keep in mind that the Variables are redundant because they are shared
 73 |     # across towers. So .. we will just return the first tower's pointer to
 74 |     # the Variable.
 75 |     v = grad_and_vars[0][1]
 76 |     grad_and_var = (grad, v)
 77 |     average_grads.append(grad_and_var)
 78 |   return average_grads
 79 | 
 80 | 
 81 | if __name__ == '__main__':
 82 |     # os.environ["CUDA_VISIBLE_DEVICES"] = "0"
 83 |     # 1. define global parameters
 84 |     args = get_parser()
 85 |     global_step = tf.Variable(name='global_step', initial_value=0, trainable=False)
 86 |     inc_op = tf.assign_add(global_step, 1, name='increment_global_step')
 87 |     trainable = tf.placeholder(name='trainable_bn', dtype=tf.bool)
 88 |     images = tf.placeholder(name='img_inputs', shape=[None, *args.image_size, 3], dtype=tf.float32)
 89 |     labels = tf.placeholder(name='img_labels', shape=[None, ], dtype=tf.int64)
 90 |     # splits input to different gpu
 91 |     images_s = tf.split(images, num_or_size_splits=args.num_gpus, axis=0)
 92 |     labels_s = tf.split(labels, num_or_size_splits=args.num_gpus, axis=0)
 93 |     # 2 prepare train datasets and test datasets by using tensorflow dataset api
 94 |     # 2.1 train datasets
 95 |     # the image is substracted 127.5 and multiplied 1/128.
 96 |     # random flip left right
 97 |     tfrecords_f = os.path.join(args.tfrecords_file_path, 'tran.tfrecords')
 98 |     dataset = tf.data.TFRecordDataset(tfrecords_f)
 99 |     dataset = dataset.map(parse_function)
100 |     dataset = dataset.shuffle(buffer_size=args.buffer_size)
101 |     dataset = dataset.batch(args.batch_size)
102 |     iterator = dataset.make_initializable_iterator()
103 |     next_element = iterator.get_next()
104 |     # 2.2 prepare validate datasets
105 |     ver_list = []
106 |     ver_name_list = []
107 |     for db in args.eval_datasets:
108 |         print('begin db %s convert.' % db)
109 |         data_set = load_bin(db, args.image_size, args)
110 |         ver_list.append(data_set)
111 |         ver_name_list.append(db)
112 | 
113 |     # 3. define network, loss, optimize method, learning rate schedule, summary writer, saver
114 |     # 3.1 inference phase
115 |     w_init_method = tf.contrib.layers.xavier_initializer(uniform=False)
116 |     # 3.2 define the learning rate schedule
117 |     p = int(512.0/args.batch_size)
118 |     lr_steps = [p*val for val in args.lr_steps]
119 |     print('learning rate steps: ', lr_steps)
120 |     lr = tf.train.piecewise_constant(global_step, boundaries=lr_steps, values=[0.001, 0.0001, 0.00005, 0.00001], name='lr_schedule')
121 |     # 3.3 define the optimize method
122 |     opt = tf.train.MomentumOptimizer(learning_rate=lr, momentum=args.momentum)
123 | 
124 |     # Calculate the gradients for each model tower.
125 |     tower_grads = []
126 |     tl.layers.set_name_reuse(True)
127 |     loss_dict = {}
128 |     drop_dict = {}
129 |     loss_keys = []
130 |     with tf.variable_scope(tf.get_variable_scope()):
131 |       for i in range(args.num_gpus):
132 |         with tf.device('/gpu:%d' % i):
133 |           with tf.name_scope('%s_%d' % (args.tower_name, i)) as scope:
134 |             net = get_resnet(images_s[i], args.net_depth, type='ir', w_init=w_init_method, trainable=trainable)
135 |             logit = arcface_loss(embedding=net.outputs, labels=labels_s[i], w_init=w_init_method, out_num=args.num_output)
136 |             # Reuse variables for the next tower.
137 |             tf.get_variable_scope().reuse_variables()
138 |             # define the cross entropy
139 |             inference_loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logit, labels=labels_s[i]))
140 |             # define weight deacy losses
141 |             wd_loss = 0
142 |             for weights in tl.layers.get_variables_with_name('W_conv2d', True, True):
143 |                 wd_loss += tf.contrib.layers.l2_regularizer(args.weight_deacy)(weights)
144 |             for W in tl.layers.get_variables_with_name('resnet_v1_50/E_DenseLayer/W', True, True):
145 |                 wd_loss += tf.contrib.layers.l2_regularizer(args.weight_deacy)(W)
146 |             for weights in tl.layers.get_variables_with_name('embedding_weights', True, True):
147 |                 wd_loss += tf.contrib.layers.l2_regularizer(args.weight_deacy)(weights)
148 |             for gamma in tl.layers.get_variables_with_name('gamma', True, True):
149 |                 wd_loss += tf.contrib.layers.l2_regularizer(args.weight_deacy)(gamma)
150 |             for beta in tl.layers.get_variables_with_name('beta', True, True):
151 |                 wd_loss += tf.contrib.layers.l2_regularizer(args.weight_deacy)(beta)
152 |             for alphas in tl.layers.get_variables_with_name('alphas', True, True):
153 |                 wd_loss += tf.contrib.layers.l2_regularizer(args.weight_deacy)(alphas)
154 |             for bias in tl.layers.get_variables_with_name('resnet_v1_50/E_DenseLayer/b', True, True):
155 |                 wd_loss += tf.contrib.layers.l2_regularizer(args.weight_deacy)(bias)
156 |             total_loss = inference_loss + wd_loss
157 | 
158 |             loss_dict[('inference_loss_%s_%d' % ('gpu', i))] = inference_loss
159 |             loss_keys.append(('inference_loss_%s_%d' % ('gpu', i)))
160 |             loss_dict[('wd_loss_%s_%d' % ('gpu', i))] = wd_loss
161 |             loss_keys.append(('wd_loss_%s_%d' % ('gpu', i)))
162 |             loss_dict[('total_loss_%s_%d' % ('gpu', i))] = total_loss
163 |             loss_keys.append(('total_loss_%s_%d' % ('gpu', i)))
164 |             grads = opt.compute_gradients(total_loss)
165 |             tower_grads.append(grads)
166 |             drop_dict.update(net.all_drop)
167 |             if i == 0:
168 |                 update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
169 |                 pred = tf.nn.softmax(logit)
170 |                 acc = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(pred, axis=1), labels_s[i]), dtype=tf.int64))
171 |                 embedding_tensor_gpu0 = net.outputs
172 | 
173 |     grads = average_gradients(tower_grads)
174 |     with tf.control_dependencies(update_ops):
175 |         # Apply the gradients to adjust the shared variables.
176 |         train_op = opt.apply_gradients(grads, global_step=global_step)
177 | 
178 |     config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=args.log_device_mapping)
179 |     config.gpu_options.allow_growth = True
180 |     sess = tf.Session(config=config)
181 |     # summary writer
182 |     summary = tf.summary.FileWriter(args.summary_path, sess.graph)
183 |     summaries = []
184 |     # add grad histogram op
185 |     for grad, var in grads:
186 |         if grad is not None:
187 |             summaries.append(tf.summary.histogram(var.op.name + '/gradients', grad))
188 |     # add trainabel variable gradients
189 |     for var in tf.trainable_variables():
190 |         summaries.append(tf.summary.histogram(var.op.name, var))
191 |     # add loss summary
192 |     for keys, val in loss_dict.items():
193 |         summaries.append(tf.summary.scalar(keys, val))
194 |     # add learning rate
195 |     summaries.append(tf.summary.scalar('leraning_rate', lr))
196 |     summary_op = tf.summary.merge(summaries)
197 | 
198 |     # Create a saver.
199 |     saver = tf.train.Saver(tf.global_variables())
200 |     # init all variables
201 |     sess.run(tf.global_variables_initializer())
202 | 
203 |     drop_dict_test = {keys: 1 for keys in drop_dict.keys()}
204 |     # begin iteration
205 |     count = 0
206 |     for i in range(args.epoch):
207 |         sess.run(iterator.initializer)
208 |         while True:
209 |             try:
210 |                 images_train, labels_train = sess.run(next_element)
211 |                 feed_dict = {images: images_train, labels: labels_train, trainable: True}
212 |                 feed_dict.update(drop_dict)
213 |                 start = time.time()
214 |                 _, _, inference_loss_val_gpu_1, wd_loss_val_gpu_1, total_loss_gpu_1, inference_loss_val_gpu_2, \
215 |                 wd_loss_val_gpu_2, total_loss_gpu_2, acc_val = sess.run([train_op, inc_op, loss_dict[loss_keys[0]],
216 |                                                                          loss_dict[loss_keys[1]],
217 |                                                                          loss_dict[loss_keys[2]],
218 |                                                                          loss_dict[loss_keys[3]],
219 |                                                                          loss_dict[loss_keys[4]],
220 |                                                                          loss_dict[loss_keys[5]], acc],
221 |                                                                          feed_dict=feed_dict)
222 |                 end = time.time()
223 |                 pre_sec = args.batch_size/(end - start)
224 |                 # print training information
225 |                 if count > 0 and count % args.show_info_interval == 0:
226 |                     print('epoch %d, total_step %d, total loss gpu 1 is %.2f , inference loss gpu 1 is %.2f, weight deacy '
227 |                           'loss gpu 1 is %.2f, total loss gpu 2 is %.2f , inference loss gpu 2 is %.2f, weight deacy '
228 |                           'loss gpu 2 is %.2f, training accuracy is %.6f, time %.3f samples/sec' %
229 |                           (i, count, total_loss_gpu_1, inference_loss_val_gpu_1, wd_loss_val_gpu_1, total_loss_gpu_2,
230 |                            inference_loss_val_gpu_2, wd_loss_val_gpu_2, acc_val, pre_sec))
231 |                 count += 1
232 | 
233 |                 # save summary
234 |                 if count > 0 and count % args.summary_interval == 0:
235 |                     feed_dict = {images: images_train, labels: labels_train, trainable: True}
236 |                     feed_dict.update(drop_dict)
237 |                     summary_op_val = sess.run(summary_op, feed_dict=feed_dict)
238 |                     summary.add_summary(summary_op_val, count)
239 | 
240 |                 # save ckpt files
241 |                 if count > 0 and count % args.ckpt_interval == 0:
242 |                     filename = 'InsightFace_iter_{:d}'.format(count) + '.ckpt'
243 |                     filename = os.path.join(args.ckpt_path, filename)
244 |                     saver.save(sess, filename)
245 |                 # # validate
246 |                 if count > 0 and count % args.validate_interval == 0:
247 |                     feed_dict_test ={trainable: False}
248 |                     feed_dict_test.update(drop_dict_test)
249 |                     results = ver_test(ver_list=ver_list, ver_name_list=ver_name_list, nbatch=count, sess=sess,
250 |                              embedding_tensor=embedding_tensor_gpu0, batch_size=args.batch_size//args.num_gpus, feed_dict=feed_dict_test,
251 |                              input_placeholder=images_s[0])
252 |                     if max(results) > 0.99:
253 |                         print('best accuracy is %.5f' % max(results))
254 |                         filename = 'InsightFace_iter_best_{:d}'.format(count) + '.ckpt'
255 |                         filename = os.path.join(args.ckpt_path, filename)
256 |                         saver.save(sess, filename)
257 |             except tf.errors.OutOfRangeError:
258 |                 print("End of epoch %d" % i)
259 |                 break
260 | 


--------------------------------------------------------------------------------
/train_nets_mgpu_new.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | import tensorlayer as tl
  3 | import argparse
  4 | from data.mx2tfrecords import parse_function
  5 | import os
  6 | from nets.L_Resnet_E_IR_MGPU import get_resnet
  7 | from losses.face_losses import arcface_loss
  8 | import time
  9 | from data.eval_data_reader import load_bin
 10 | from verification import ver_test
 11 | 
 12 | 
 13 | def get_parser():
 14 |     parser = argparse.ArgumentParser(description='parameters to train net')
 15 |     parser.add_argument('--net_depth', default=100, help='resnet depth, default is 50')
 16 |     parser.add_argument('--epoch', default=100000, help='epoch to train the network')
 17 |     parser.add_argument('--batch_size', default=64, help='batch size to train network')
 18 |     parser.add_argument('--lr_steps', default=[40000, 60000, 80000], help='learning rate to train network')
 19 |     parser.add_argument('--momentum', default=0.9, help='learning alg momentum')
 20 |     parser.add_argument('--weight_deacy', default=5e-4, help='learning alg momentum')
 21 |     # parser.add_argument('--eval_datasets', default=['lfw', 'cfp_ff', 'cfp_fp', 'agedb_30'], help='evluation datasets')
 22 |     parser.add_argument('--eval_datasets', default=['lfw', 'cfp_fp'], help='evluation datasets')
 23 |     parser.add_argument('--eval_db_path', default='./datasets/faces_ms1m_112x112', help='evluate datasets base path')
 24 |     parser.add_argument('--image_size', default=[112, 112], help='the image size')
 25 |     parser.add_argument('--num_output', default=85164, help='the image size')
 26 |     parser.add_argument('--tfrecords_file_path', default='./datasets/tfrecords', type=str,
 27 |                         help='path to the output of tfrecords file path')
 28 |     parser.add_argument('--summary_path', default='./output/summary', help='the summary file save path')
 29 |     parser.add_argument('--ckpt_path', default='./output/ckpt', help='the ckpt file save path')
 30 |     parser.add_argument('--saver_maxkeep', default=100, help='tf.train.Saver max keep ckpt files')
 31 |     parser.add_argument('--buffer_size', default=100000, help='tf dataset api buffer size')
 32 |     parser.add_argument('--log_device_mapping', default=False, help='show device placement log')
 33 |     parser.add_argument('--summary_interval', default=300, help='interval to save summary')
 34 |     parser.add_argument('--ckpt_interval', default=5000, help='intervals to save ckpt file')
 35 |     parser.add_argument('--validate_interval', default=2000, help='intervals to save ckpt file')
 36 |     parser.add_argument('--show_info_interval', default=20, help='intervals to show information')
 37 |     parser.add_argument('--num_gpus', default=2, help='the num of gpus')
 38 |     parser.add_argument('--tower_name', default='tower', help='tower name')
 39 |     args = parser.parse_args()
 40 |     return args
 41 | 
 42 | 
 43 | def average_gradients(tower_grads):
 44 |   """Calculate the average gradient for each shared variable across all towers.
 45 | 
 46 |   Note that this function provides a synchronization point across all towers.
 47 | 
 48 |   Args:
 49 |     tower_grads: List of lists of (gradient, variable) tuples. The outer list
 50 |       is over individual gradients. The inner list is over the gradient
 51 |       calculation for each tower.
 52 |   Returns:
 53 |      List of pairs of (gradient, variable) where the gradient has been averaged
 54 |      across all towers.
 55 |   """
 56 |   average_grads = []
 57 |   for grad_and_vars in zip(*tower_grads):
 58 |     # Note that each grad_and_vars looks like the following:
 59 |     #   ((grad0_gpu0, var0_gpu0), ... , (grad0_gpuN, var0_gpuN))
 60 |     grads = []
 61 |     for g, _ in grad_and_vars:
 62 |       # Add 0 dimension to the gradients to represent the tower.
 63 |       expanded_g = tf.expand_dims(g, 0)
 64 | 
 65 |       # Append on a 'tower' dimension which we will average over below.
 66 |       grads.append(expanded_g)
 67 | 
 68 |     # Average over the 'tower' dimension.
 69 |     grad = tf.concat(axis=0, values=grads)
 70 |     grad = tf.reduce_mean(grad, 0)
 71 | 
 72 |     # Keep in mind that the Variables are redundant because they are shared
 73 |     # across towers. So .. we will just return the first tower's pointer to
 74 |     # the Variable.
 75 |     v = grad_and_vars[0][1]
 76 |     grad_and_var = (grad, v)
 77 |     average_grads.append(grad_and_var)
 78 |   return average_grads
 79 | 
 80 | 
 81 | if __name__ == '__main__':
 82 |     # os.environ["CUDA_VISIBLE_DEVICES"] = "0"
 83 |     # 1. define global parameters
 84 |     args = get_parser()
 85 |     global_step = tf.Variable(name='global_step', initial_value=0, trainable=False)
 86 |     inc_op = tf.assign_add(global_step, 1, name='increment_global_step')
 87 |     images = tf.placeholder(name='img_inputs', shape=[None, *args.image_size, 3], dtype=tf.float32)
 88 |     images_test = tf.placeholder(name='img_inputs', shape=[None, *args.image_size, 3], dtype=tf.float32)
 89 |     labels = tf.placeholder(name='img_labels', shape=[None, ], dtype=tf.int64)
 90 |     dropout_rate = tf.placeholder(name='dropout_rate', dtype=tf.float32)
 91 |     # splits input to different gpu
 92 |     images_s = tf.split(images, num_or_size_splits=args.num_gpus, axis=0)
 93 |     labels_s = tf.split(labels, num_or_size_splits=args.num_gpus, axis=0)
 94 |     # 2 prepare train datasets and test datasets by using tensorflow dataset api
 95 |     # 2.1 train datasets
 96 |     # the image is substracted 127.5 and multiplied 1/128.
 97 |     # random flip left right
 98 |     tfrecords_f = os.path.join(args.tfrecords_file_path, 'tran.tfrecords')
 99 |     dataset = tf.data.TFRecordDataset(tfrecords_f)
100 |     dataset = dataset.map(parse_function)
101 |     dataset = dataset.shuffle(buffer_size=args.buffer_size)
102 |     dataset = dataset.batch(args.batch_size)
103 |     iterator = dataset.make_initializable_iterator()
104 |     next_element = iterator.get_next()
105 |     # 2.2 prepare validate datasets
106 |     ver_list = []
107 |     ver_name_list = []
108 |     for db in args.eval_datasets:
109 |         print('begin db %s convert.' % db)
110 |         data_set = load_bin(db, args.image_size, args)
111 |         ver_list.append(data_set)
112 |         ver_name_list.append(db)
113 |     # 3. define network, loss, optimize method, learning rate schedule, summary writer, saver
114 |     # 3.1 inference phase
115 |     w_init_method = tf.contrib.layers.xavier_initializer(uniform=False)
116 |     # 3.2 define the learning rate schedule
117 |     p = int(512.0/args.batch_size)
118 |     lr_steps = [p*val for val in args.lr_steps]
119 |     print('learning rate steps: ', lr_steps)
120 |     lr = tf.train.piecewise_constant(global_step, boundaries=lr_steps, values=[0.001, 0.0005, 0.0003, 0.0001],
121 |                                      name='lr_schedule')
122 |     # 3.3 define the optimize method
123 |     opt = tf.train.MomentumOptimizer(learning_rate=lr, momentum=args.momentum)
124 | 
125 |     # Calculate the gradients for each model tower.
126 |     tower_grads = []
127 |     tl.layers.set_name_reuse(True)
128 |     loss_dict = {}
129 |     drop_dict = {}
130 |     loss_keys = []
131 |     with tf.variable_scope(tf.get_variable_scope()):
132 |       for i in range(args.num_gpus):
133 |         with tf.device('/gpu:%d' % i):
134 |           with tf.name_scope('%s_%d' % (args.tower_name, i)) as scope:
135 |             net = get_resnet(images_s[i], args.net_depth, type='ir', w_init=w_init_method, trainable=True, keep_rate=dropout_rate)
136 |             logit = arcface_loss(embedding=net.outputs, labels=labels_s[i], w_init=w_init_method, out_num=args.num_output)
137 |             # Reuse variables for the next tower.
138 |             tf.get_variable_scope().reuse_variables()
139 |             # define the cross entropy
140 |             inference_loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logit, labels=labels_s[i]))
141 |             # define weight deacy losses
142 |             wd_loss = 0
143 |             for weights in tl.layers.get_variables_with_name('W_conv2d', True, True):
144 |                 wd_loss += tf.contrib.layers.l2_regularizer(args.weight_deacy)(weights)
145 |             for W in tl.layers.get_variables_with_name('resnet_v1_50/E_DenseLayer/W', True, True):
146 |                 wd_loss += tf.contrib.layers.l2_regularizer(args.weight_deacy)(W)
147 |             for weights in tl.layers.get_variables_with_name('embedding_weights', True, True):
148 |                 wd_loss += tf.contrib.layers.l2_regularizer(args.weight_deacy)(weights)
149 |             for gamma in tl.layers.get_variables_with_name('gamma', True, True):
150 |                 wd_loss += tf.contrib.layers.l2_regularizer(args.weight_deacy)(gamma)
151 |             for alphas in tl.layers.get_variables_with_name('alphas', True, True):
152 |                 wd_loss += tf.contrib.layers.l2_regularizer(args.weight_deacy)(alphas)
153 |             total_loss = inference_loss + wd_loss
154 | 
155 |             loss_dict[('inference_loss_%s_%d' % ('gpu', i))] = inference_loss
156 |             loss_keys.append(('inference_loss_%s_%d' % ('gpu', i)))
157 |             loss_dict[('wd_loss_%s_%d' % ('gpu', i))] = wd_loss
158 |             loss_keys.append(('wd_loss_%s_%d' % ('gpu', i)))
159 |             loss_dict[('total_loss_%s_%d' % ('gpu', i))] = total_loss
160 |             loss_keys.append(('total_loss_%s_%d' % ('gpu', i)))
161 |             grads = opt.compute_gradients(total_loss)
162 |             tower_grads.append(grads)
163 |             if i == 0:
164 |                 test_net = get_resnet(images_test, args.net_depth, type='ir', w_init=w_init_method, trainable=False, keep_rate=dropout_rate)
165 |                 embedding_tensor = test_net.outputs
166 |                 update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
167 |                 pred = tf.nn.softmax(logit)
168 |                 acc = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(pred, axis=1), labels_s[i]), dtype=tf.float32))
169 | 
170 |     grads = average_gradients(tower_grads)
171 |     with tf.control_dependencies(update_ops):
172 |         # Apply the gradients to adjust the shared variables.
173 |         train_op = opt.apply_gradients(grads, global_step=global_step)
174 | 
175 |     config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=args.log_device_mapping)
176 |     config.gpu_options.allow_growth = True
177 |     sess = tf.Session(config=config)
178 |     # summary writer
179 |     summary = tf.summary.FileWriter(args.summary_path, sess.graph)
180 |     summaries = []
181 |     # add grad histogram op
182 |     for grad, var in grads:
183 |         if grad is not None:
184 |             summaries.append(tf.summary.histogram(var.op.name + '/gradients', grad))
185 |     # add trainabel variable gradients
186 |     for var in tf.trainable_variables():
187 |         summaries.append(tf.summary.histogram(var.op.name, var))
188 |     # add loss summary
189 |     for keys, val in loss_dict.items():
190 |         summaries.append(tf.summary.scalar(keys, val))
191 |     # add learning rate
192 |     summaries.append(tf.summary.scalar('leraning_rate', lr))
193 |     summary_op = tf.summary.merge(summaries)
194 | 
195 |     # Create a saver.
196 |     saver = tf.train.Saver(tf.global_variables())
197 |     # init all variables
198 |     sess.run(tf.global_variables_initializer())
199 |     # begin iteration
200 |     count = 0
201 |     for i in range(args.epoch):
202 |         sess.run(iterator.initializer)
203 |         while True:
204 |             try:
205 |                 images_train, labels_train = sess.run(next_element)
206 |                 feed_dict = {images: images_train, labels: labels_train, dropout_rate: 0.4}
207 |                 start = time.time()
208 |                 _, _, inference_loss_val_gpu_1, wd_loss_val_gpu_1, total_loss_gpu_1, inference_loss_val_gpu_2, \
209 |                 wd_loss_val_gpu_2, total_loss_gpu_2, acc_val = sess.run([train_op, inc_op, loss_dict[loss_keys[0]],
210 |                                                                          loss_dict[loss_keys[1]],
211 |                                                                          loss_dict[loss_keys[2]],
212 |                                                                          loss_dict[loss_keys[3]],
213 |                                                                          loss_dict[loss_keys[4]],
214 |                                                                          loss_dict[loss_keys[5]], acc],
215 |                                                                          feed_dict=feed_dict)
216 |                 end = time.time()
217 |                 pre_sec = args.batch_size/(end - start)
218 |                 # print training information
219 |                 if count > 0 and count % args.show_info_interval == 0:
220 |                     # print('epoch %d, total_step %d, total loss gpu 1 is %.2f , inference loss gpu 1 is %.2f, weight deacy '
221 |                     #       'loss gpu 1 is %.2f, total loss gpu 2 is %.2f , inference loss gpu 2 is %.2f, weight deacy '
222 |                     #       'loss gpu 2 is %.2f, training accuracy is %.6f, time %.3f samples/sec' %
223 |                     #       (i, count, total_loss_gpu_1, inference_loss_val_gpu_1, wd_loss_val_gpu_1, total_loss_gpu_2,
224 |                     #        inference_loss_val_gpu_2, wd_loss_val_gpu_2, acc_val, pre_sec))
225 | 
226 |                     print('epoch %d, total_step %d, total loss: [%.2f, %.2f], inference loss: [%.2f, %.2f], weight deacy '
227 |                           'loss: [%.2f, %.2f], training accuracy is %.6f, time %.3f samples/sec' %
228 |                           (i, count, total_loss_gpu_1, total_loss_gpu_2, inference_loss_val_gpu_1, inference_loss_val_gpu_2,
229 |                            wd_loss_val_gpu_1, wd_loss_val_gpu_2, acc_val, pre_sec))
230 |                 count += 1
231 | 
232 |                 # save summary
233 |                 if count > 0 and count % args.summary_interval == 0:
234 |                     feed_dict = {images: images_train, labels: labels_train, dropout_rate: 0.4}
235 |                     summary_op_val = sess.run(summary_op, feed_dict=feed_dict)
236 |                     summary.add_summary(summary_op_val, count)
237 | 
238 |                 # save ckpt files
239 |                 if count > 0 and count % args.ckpt_interval == 0:
240 |                     filename = 'InsightFace_iter_{:d}'.format(count) + '.ckpt'
241 |                     filename = os.path.join(args.ckpt_path, filename)
242 |                     saver.save(sess, filename)
243 |                 # # validate
244 |                 if count >= 0 and count % args.validate_interval == 0:
245 |                     feed_dict_test ={dropout_rate: 1.0}
246 |                     results = ver_test(ver_list=ver_list, ver_name_list=ver_name_list, nbatch=count, sess=sess,
247 |                              embedding_tensor=embedding_tensor, batch_size=args.batch_size//args.num_gpus, feed_dict=feed_dict_test,
248 |                              input_placeholder=images_test)
249 |                     if max(results) > 0.998:
250 |                         print('best accuracy is %.5f' % max(results))
251 |                         filename = 'InsightFace_iter_best_{:d}'.format(count) + '.ckpt'
252 |                         filename = os.path.join(args.ckpt_path, filename)
253 |                         saver.save(sess, filename)
254 |             except tf.errors.OutOfRangeError:
255 |                 print("End of epoch %d" % i)
256 |                 break
257 | 


--------------------------------------------------------------------------------
/verification.py:
--------------------------------------------------------------------------------
  1 | """Helper for evaluation on the Labeled Faces in the Wild dataset
  2 | """
  3 | 
  4 | # MIT License
  5 | #
  6 | # Copyright (c) 2016 David Sandberg
  7 | #
  8 | # Permission is hereby granted, free of charge, to any person obtaining a copy
  9 | # of this software and associated documentation files (the "Software"), to deal
 10 | # in the Software without restriction, including without limitation the rights
 11 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 12 | # copies of the Software, and to permit persons to whom the Software is
 13 | # furnished to do so, subject to the following conditions:
 14 | #
 15 | # The above copyright notice and this permission notice shall be included in all
 16 | # copies or substantial portions of the Software.
 17 | #
 18 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 19 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 20 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 21 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 22 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 23 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 24 | # SOFTWARE.
 25 | 
 26 | import tensorflow as tf
 27 | import numpy as np
 28 | from sklearn.model_selection import KFold
 29 | from sklearn.decomposition import PCA
 30 | import sklearn
 31 | from scipy import interpolate
 32 | import datetime
 33 | import mxnet as mx
 34 | 
 35 | 
 36 | def calculate_roc(thresholds, embeddings1, embeddings2, actual_issame, nrof_folds=10, pca=0):
 37 |     assert (embeddings1.shape[0] == embeddings2.shape[0])
 38 |     assert (embeddings1.shape[1] == embeddings2.shape[1])
 39 |     nrof_pairs = min(len(actual_issame), embeddings1.shape[0])
 40 |     nrof_thresholds = len(thresholds)
 41 |     k_fold = KFold(n_splits=nrof_folds, shuffle=False)
 42 | 
 43 |     tprs = np.zeros((nrof_folds, nrof_thresholds))
 44 |     fprs = np.zeros((nrof_folds, nrof_thresholds))
 45 |     accuracy = np.zeros((nrof_folds))
 46 |     indices = np.arange(nrof_pairs)
 47 |     # print('pca', pca)
 48 | 
 49 |     if pca == 0:
 50 |         diff = np.subtract(embeddings1, embeddings2)
 51 |         dist = np.sum(np.square(diff), 1)
 52 | 
 53 |     for fold_idx, (train_set, test_set) in enumerate(k_fold.split(indices)):
 54 |         # print('train_set', train_set)
 55 |         # print('test_set', test_set)
 56 |         if pca > 0:
 57 |             print('doing pca on', fold_idx)
 58 |             embed1_train = embeddings1[train_set]
 59 |             embed2_train = embeddings2[train_set]
 60 |             _embed_train = np.concatenate((embed1_train, embed2_train), axis=0)
 61 |             # print(_embed_train.shape)
 62 |             pca_model = PCA(n_components=pca)
 63 |             pca_model.fit(_embed_train)
 64 |             embed1 = pca_model.transform(embeddings1)
 65 |             embed2 = pca_model.transform(embeddings2)
 66 |             embed1 = sklearn.preprocessing.normalize(embed1)
 67 |             embed2 = sklearn.preprocessing.normalize(embed2)
 68 |             # print(embed1.shape, embed2.shape)
 69 |             diff = np.subtract(embed1, embed2)
 70 |             dist = np.sum(np.square(diff), 1)
 71 | 
 72 |         # Find the best threshold for the fold
 73 |         acc_train = np.zeros((nrof_thresholds))
 74 |         for threshold_idx, threshold in enumerate(thresholds):
 75 |             _, _, acc_train[threshold_idx] = calculate_accuracy(threshold, dist[train_set], actual_issame[train_set])
 76 |         best_threshold_index = np.argmax(acc_train)
 77 |         print('best_threshold_index', best_threshold_index, acc_train[best_threshold_index])
 78 |         for threshold_idx, threshold in enumerate(thresholds):
 79 |             tprs[fold_idx, threshold_idx], fprs[fold_idx, threshold_idx], _ = calculate_accuracy(threshold,
 80 |                                                                                                  dist[test_set],
 81 |                                                                                                  actual_issame[
 82 |                                                                                                      test_set])
 83 |         _, _, accuracy[fold_idx] = calculate_accuracy(thresholds[best_threshold_index], dist[test_set],
 84 |                                                       actual_issame[test_set])
 85 | 
 86 |     tpr = np.mean(tprs, 0)
 87 |     fpr = np.mean(fprs, 0)
 88 |     return tpr, fpr, accuracy
 89 | 
 90 | 
 91 | def calculate_accuracy(threshold, dist, actual_issame):
 92 |     predict_issame = np.less(dist, threshold)
 93 |     tp = np.sum(np.logical_and(predict_issame, actual_issame))
 94 |     fp = np.sum(np.logical_and(predict_issame, np.logical_not(actual_issame)))
 95 |     tn = np.sum(np.logical_and(np.logical_not(predict_issame), np.logical_not(actual_issame)))
 96 |     fn = np.sum(np.logical_and(np.logical_not(predict_issame), actual_issame))
 97 | 
 98 |     tpr = 0 if (tp + fn == 0) else float(tp) / float(tp + fn)
 99 |     fpr = 0 if (fp + tn == 0) else float(fp) / float(fp + tn)
100 |     acc = float(tp + tn) / dist.size
101 |     return tpr, fpr, acc
102 | 
103 | 
104 | def calculate_val(thresholds, embeddings1, embeddings2, actual_issame, far_target, nrof_folds=10):
105 |     '''
106 |     Copy from [insightface](https://github.com/deepinsight/insightface)
107 |     :param thresholds:
108 |     :param embeddings1:
109 |     :param embeddings2:
110 |     :param actual_issame:
111 |     :param far_target:
112 |     :param nrof_folds:
113 |     :return:
114 |     '''
115 |     assert (embeddings1.shape[0] == embeddings2.shape[0])
116 |     assert (embeddings1.shape[1] == embeddings2.shape[1])
117 |     nrof_pairs = min(len(actual_issame), embeddings1.shape[0])
118 |     nrof_thresholds = len(thresholds)
119 |     k_fold = KFold(n_splits=nrof_folds, shuffle=False)
120 | 
121 |     val = np.zeros(nrof_folds)
122 |     far = np.zeros(nrof_folds)
123 | 
124 |     diff = np.subtract(embeddings1, embeddings2)
125 |     dist = np.sum(np.square(diff), 1)
126 |     indices = np.arange(nrof_pairs)
127 | 
128 |     for fold_idx, (train_set, test_set) in enumerate(k_fold.split(indices)):
129 | 
130 |         # Find the threshold that gives FAR = far_target
131 |         far_train = np.zeros(nrof_thresholds)
132 |         for threshold_idx, threshold in enumerate(thresholds):
133 |             _, far_train[threshold_idx] = calculate_val_far(threshold, dist[train_set], actual_issame[train_set])
134 |         if np.max(far_train) >= far_target:
135 |             f = interpolate.interp1d(far_train, thresholds, kind='slinear')
136 |             threshold = f(far_target)
137 |         else:
138 |             threshold = 0.0
139 | 
140 |         val[fold_idx], far[fold_idx] = calculate_val_far(threshold, dist[test_set], actual_issame[test_set])
141 | 
142 |     val_mean = np.mean(val)
143 |     far_mean = np.mean(far)
144 |     val_std = np.std(val)
145 |     return val_mean, val_std, far_mean
146 | 
147 | 
148 | def calculate_val_far(threshold, dist, actual_issame):
149 |     predict_issame = np.less(dist, threshold)
150 |     true_accept = np.sum(np.logical_and(predict_issame, actual_issame))
151 |     false_accept = np.sum(np.logical_and(predict_issame, np.logical_not(actual_issame)))
152 |     n_same = np.sum(actual_issame)
153 |     n_diff = np.sum(np.logical_not(actual_issame))
154 |     val = float(true_accept) / float(n_same)
155 |     far = float(false_accept) / float(n_diff)
156 |     return val, far
157 | 
158 | 
159 | def evaluate(embeddings, actual_issame, nrof_folds=10, pca=0):
160 |     # Calculate evaluation metrics
161 |     thresholds = np.arange(0, 4, 0.01)
162 |     embeddings1 = embeddings[0::2]
163 |     embeddings2 = embeddings[1::2]
164 |     tpr, fpr, accuracy = calculate_roc(thresholds, embeddings1, embeddings2,
165 |                                        np.asarray(actual_issame), nrof_folds=nrof_folds, pca=pca)
166 |     thresholds = np.arange(0, 4, 0.001)
167 |     val, val_std, far = calculate_val(thresholds, embeddings1, embeddings2,
168 |                                       np.asarray(actual_issame), 1e-3, nrof_folds=nrof_folds)
169 |     return tpr, fpr, accuracy, val, val_std, far
170 | 
171 | 
172 | def data_iter(datasets, batch_size):
173 |     data_num = datasets.shape[0]
174 |     for i in range(0, data_num, batch_size):
175 |         yield datasets[i:min(i+batch_size, data_num), ...]
176 | 
177 | 
178 | def test(data_set, sess, embedding_tensor, batch_size, label_shape=None, feed_dict=None, input_placeholder=None):
179 |     '''
180 |     referenc official implementation [insightface](https://github.com/deepinsight/insightface)
181 |     :param data_set:
182 |     :param sess:
183 |     :param embedding_tensor:
184 |     :param batch_size:
185 |     :param label_shape:
186 |     :param feed_dict:
187 |     :param input_placeholder:
188 |     :return:
189 |     '''
190 |     print('testing verification..')
191 |     data_list = data_set[0]
192 |     issame_list = data_set[1]
193 |     embeddings_list = []
194 |     time_consumed = 0.0
195 |     for i in range(len(data_list)):
196 |         datas = data_list[i]
197 |         embeddings = None
198 |         feed_dict.setdefault(input_placeholder, None)
199 |         for idx, data in enumerate(data_iter(datas, batch_size)):
200 |             data_tmp = data.copy()    # fix issues #4
201 |             data_tmp -= 127.5
202 |             data_tmp *= 0.0078125
203 |             feed_dict[input_placeholder] = data_tmp
204 |             time0 = datetime.datetime.now()
205 |             _embeddings = sess.run(embedding_tensor, feed_dict)
206 |             time_now = datetime.datetime.now()
207 |             diff = time_now - time0
208 |             time_consumed += diff.total_seconds()
209 |             if embeddings is None:
210 |                 embeddings = np.zeros((datas.shape[0], _embeddings.shape[1]))
211 |             try:
212 |                 embeddings[idx*batch_size:min((idx+1)*batch_size, datas.shape[0]), ...] = _embeddings
213 |             except ValueError:
214 |                 print('idx*batch_size value is %d min((idx+1)*batch_size, datas.shape[0]) %d, batch_size %d, data.shape[0] %d' %
215 |                       (idx*batch_size, min((idx+1)*batch_size, datas.shape[0]), batch_size, datas.shape[0]))
216 |                 print('embedding shape is ', _embeddings.shape)
217 |         embeddings_list.append(embeddings)
218 | 
219 |     _xnorm = 0.0
220 |     _xnorm_cnt = 0
221 |     for embed in embeddings_list:
222 |         for i in range(embed.shape[0]):
223 |             _em = embed[i]
224 |             _norm = np.linalg.norm(_em)
225 |             # print(_em.shape, _norm)
226 |             _xnorm += _norm
227 |             _xnorm_cnt += 1
228 |     _xnorm /= _xnorm_cnt
229 | 
230 |     acc1 = 0.0
231 |     std1 = 0.0
232 |     embeddings = embeddings_list[0] + embeddings_list[1]
233 |     embeddings = sklearn.preprocessing.normalize(embeddings)
234 |     print(embeddings.shape)
235 |     print('infer time', time_consumed)
236 |     _, _, accuracy, val, val_std, far = evaluate(embeddings, issame_list, nrof_folds=10)
237 |     acc2, std2 = np.mean(accuracy), np.std(accuracy)
238 |     return acc1, std1, acc2, std2, _xnorm, embeddings_list
239 | 
240 | 
241 | def ver_test(ver_list, ver_name_list, nbatch, sess, embedding_tensor, batch_size, feed_dict, input_placeholder):
242 |     results = []
243 |     for i in range(len(ver_list)):
244 |         acc1, std1, acc2, std2, xnorm, embeddings_list = test(data_set=ver_list[i], sess=sess, embedding_tensor=embedding_tensor,
245 |                                                               batch_size=batch_size, feed_dict=feed_dict,
246 |                                                               input_placeholder=input_placeholder)
247 |         print('[%s][%d]XNorm: %f' % (ver_name_list[i], nbatch, xnorm))
248 |         print('[%s][%d]Accuracy-Flip: %1.5f+-%1.5f' % (ver_name_list[i], nbatch, acc2, std2))
249 |         results.append(acc2)
250 |     return results
251 | 


--------------------------------------------------------------------------------