├── .vscode └── settings.json ├── assests ├── ratio.JPG ├── resnet.jpg ├── benefit.JPG ├── inception.jpg ├── senet_block.JPG ├── incorporation.JPG └── state_of_art.JPG ├── data_dump.py ├── LICENSE ├── .gitignore ├── README.md ├── scene_eval.py ├── dataflow_input.py ├── cifar10.py ├── eval.py ├── SE_ResNeXt.py ├── pre_train.py ├── train.py ├── SE_Inception_v4.py ├── SE_Inception_resnet_v2.py └── resnet_model.py /.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "python.linting.pylintEnabled": false 3 | } -------------------------------------------------------------------------------- /assests/ratio.JPG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenynCV/SENet-Tensorflow/HEAD/assests/ratio.JPG -------------------------------------------------------------------------------- /assests/resnet.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenynCV/SENet-Tensorflow/HEAD/assests/resnet.jpg -------------------------------------------------------------------------------- /assests/benefit.JPG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenynCV/SENet-Tensorflow/HEAD/assests/benefit.JPG -------------------------------------------------------------------------------- /assests/inception.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenynCV/SENet-Tensorflow/HEAD/assests/inception.jpg -------------------------------------------------------------------------------- /assests/senet_block.JPG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenynCV/SENet-Tensorflow/HEAD/assests/senet_block.JPG -------------------------------------------------------------------------------- /assests/incorporation.JPG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenynCV/SENet-Tensorflow/HEAD/assests/incorporation.JPG -------------------------------------------------------------------------------- /assests/state_of_art.JPG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenynCV/SENet-Tensorflow/HEAD/assests/state_of_art.JPG -------------------------------------------------------------------------------- /data_dump.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import json 3 | import os 4 | from IPython import embed 5 | 6 | label_path = '/data0/AIChallenger/places_devkit/categories_places365.txt' 7 | data_path = '/data0/AIChallenger/data_256' 8 | 9 | result = [] 10 | with open(label_path, 'r') as f: 11 | lines = (line.strip() for line in f) 12 | for line in lines: 13 | path, label_id = line.split() 14 | path = path[1:] 15 | for filename in os.listdir(os.path.join(data_path, path)): 16 | image = {} 17 | image['image_id'] = os.path.join(path, filename) 18 | image['label_id'] = label_id 19 | result.append(image) 20 | 21 | with open('/data0/AIChallenger/data_256.json', 'w') as f: 22 | json.dump(result, f) 23 | print('write result json, num is %d' % len(result)) 24 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 Junho Kim (1993.01.12) 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *model*/ 2 | *log*/ 3 | 4 | # Byte-compiled / optimized / DLL files 5 | __pycache__/ 6 | *.py[cod] 7 | *$py.class 8 | 9 | # C extensions 10 | *.so 11 | 12 | # Distribution / packaging 13 | .Python 14 | env/ 15 | build/ 16 | develop-eggs/ 17 | dist/ 18 | downloads/ 19 | eggs/ 20 | .eggs/ 21 | lib/ 22 | lib64/ 23 | parts/ 24 | sdist/ 25 | var/ 26 | wheels/ 27 | *.egg-info/ 28 | .installed.cfg 29 | *.egg 30 | 31 | # PyInstaller 32 | # Usually these files are written by a python script from a template 33 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 34 | *.manifest 35 | *.spec 36 | 37 | # Installer logs 38 | pip-log.txt 39 | pip-delete-this-directory.txt 40 | 41 | # Unit test / coverage reports 42 | htmlcov/ 43 | .tox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | .hypothesis/ 51 | 52 | # Translations 53 | *.mo 54 | *.pot 55 | 56 | # Django stuff: 57 | *.log 58 | local_settings.py 59 | 60 | # Flask stuff: 61 | instance/ 62 | .webassets-cache 63 | 64 | # Scrapy stuff: 65 | .scrapy 66 | 67 | # Sphinx documentation 68 | docs/_build/ 69 | 70 | # PyBuilder 71 | target/ 72 | 73 | # Jupyter Notebook 74 | .ipynb_checkpoints 75 | 76 | # pyenv 77 | .python-version 78 | 79 | # celery beat schedule file 80 | celerybeat-schedule 81 | 82 | # SageMath parsed files 83 | *.sage.py 84 | 85 | # dotenv 86 | .env 87 | 88 | # virtualenv 89 | .venv 90 | venv/ 91 | ENV/ 92 | 93 | # Spyder project settings 94 | .spyderproject 95 | .spyproject 96 | 97 | # Rope project settings 98 | .ropeproject 99 | 100 | # mkdocs documentation 101 | /site 102 | 103 | # mypy 104 | .mypy_cache/ 105 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # SENet-Tensorflow 2 | Simple Tensorflow implementation of [Squeeze Excitation Networks](https://arxiv.org/abs/1709.01507) using **Cifar10** 3 | 4 | I implemented the following SENet 5 | * [ResNeXt paper](https://arxiv.org/abs/1611.05431) 6 | * [Inception-v4, Inception-resnet-v2 paper](https://arxiv.org/abs/1602.07261) 7 | 8 | If you want to see the ***original author's code***, please refer to this [link](https://github.com/hujie-frank/SENet) 9 | 10 | 11 | 12 | ## Requirements 13 | * Tensorflow 1.x 14 | * Python 3.x 15 | * tflearn (If you are easy to use ***global average pooling***, you should install ***tflearn***) 16 | 17 | ## Issue 18 | ### Image_size 19 | * In paper, experimented with *ImageNet* 20 | * However, due to **image size** issues in ***Inception network***, so I used ***zero padding*** for the Cifar10 21 | ```python 22 | input_x = tf.pad(input_x, [[0, 0], [32, 32], [32, 32], [0, 0]]) # size 32x32 -> 96x96 23 | ``` 24 | ### NOT ENOUGH GPU Memory 25 | * If not enough GPU memory, Please edit the code 26 | ```python 27 | with tf.Session() as sess : NO 28 | with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess : OK 29 | ``` 30 | 31 | ## Idea 32 | ### What is the "SE block" ? 33 | ![senet](./assests/senet_block.JPG) 34 | ```python 35 | def Squeeze_excitation_layer(self, input_x, out_dim, ratio, layer_name): 36 | with tf.name_scope(layer_name) : 37 | squeeze = Global_Average_Pooling(input_x) 38 | 39 | excitation = Fully_connected(squeeze, units=out_dim / ratio, layer_name=layer_name+'_fully_connected1') 40 | excitation = Relu(excitation) 41 | excitation = Fully_connected(excitation, units=out_dim, layer_name=layer_name+'_fully_connected2') 42 | excitation = Sigmoid(excitation) 43 | 44 | excitation = tf.reshape(excitation, [-1,1,1,out_dim]) 45 | 46 | scale = input_x * excitation 47 | 48 | return scale 49 | ``` 50 | 51 | ### How apply ? (Inception, Residual) 52 |
53 |   54 | 55 |
56 | 57 | ### How *"Reduction ratio"* should I set? 58 | ![reduction](./assests/ratio.JPG) 59 | * **original** refers to ***ResNet-50*** 60 | 61 | ## ImageNet Results 62 | ### Benefits against Network Depth 63 | ![depth](./assests/benefit.JPG) 64 | 65 | ### Incorporation with Modern Architecture 66 | ![incorporation](./assests/incorporation.JPG) 67 | 68 | ### Comparison with State-of-the-art 69 | ![compare](./assests/state_of_art.JPG) 70 | 71 | ## Cifar10 Results 72 | Will be soon 73 | 74 | ## Related works 75 | * [Densenet-Tensorflow](https://github.com/taki0112/Densenet-Tensorflow) 76 | * [ResNeXt-Tensorflow](https://github.com/taki0112/ResNeXt-Tensorflow) 77 | 78 | ## Reference 79 | * [Inception_korean](https://norman3.github.io/papers/docs/google_inception.html) 80 | 81 | ## Author 82 | Junho Kim 83 | -------------------------------------------------------------------------------- /scene_eval.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding=utf-8 3 | # Copyright 2017 challenger.ai 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | """ 18 | Scene classification is a task of AI Challenger 全球AI挑战赛 19 | 20 | This python script is used for calculating the accuracy of the test result, 21 | 22 | based on your submited file and the reference file containing ground truth. 23 | 24 | Usage: 25 | 26 | python scene_eval.py --submit SUBMIT_FILEPATH --ref REF_FILEPATH 27 | 28 | A test case is provided, submited file is submit.json, reference file is ref.json, test it by: 29 | 30 | python scene_eval.py --submit ./submit.json --ref ./ref.json 31 | 32 | The accuracy of the submited result, error message and warning message will be printed. 33 | """ 34 | 35 | import json 36 | import argparse 37 | import time 38 | 39 | 40 | def __load_data(submit_file, reference_file): 41 | # load submit result and reference result 42 | 43 | with open(submit_file, 'r') as file1: 44 | submit_data = json.load(file1) 45 | with open(reference_file, 'r') as file1: 46 | ref_data = json.load(file1) 47 | if len(submit_data) != len(ref_data): 48 | result['warning'].append('Inconsistent number of images between submission and reference data \n') 49 | submit_dict = {} 50 | ref_dict = {} 51 | for item in submit_data: 52 | submit_dict[item['image_id']] = item['label_id'] 53 | for item in ref_data: 54 | ref_dict[item['image_id']] = int(item['label_id']) 55 | return submit_dict, ref_dict 56 | 57 | 58 | def __eval_result(submit_dict, ref_dict): 59 | # eval accuracy 60 | 61 | right_count = 0 62 | for (key, value) in ref_dict.items(): 63 | 64 | if key not in set(submit_dict.keys()): 65 | result['warning'].append('lacking image %s in your submission file \n' % key) 66 | print('warnning: lacking image %s in your submission file' % key) 67 | continue 68 | 69 | if value in submit_dict[key][:3]: 70 | right_count += 1 71 | 72 | result['score'] = str(float(right_count)/max(len(ref_dict), 1e-5)) 73 | return result 74 | 75 | 76 | if __name__ == '__main__': 77 | 78 | PARSER = argparse.ArgumentParser() 79 | 80 | PARSER.add_argument( 81 | '--submit', 82 | type=str, 83 | default='./submit.json', 84 | help="""\ 85 | Path to submission file\ 86 | """ 87 | ) 88 | 89 | PARSER.add_argument( 90 | '--ref', 91 | type=str, 92 | default='./ref.json', 93 | help="""\ 94 | Path to reference file\ 95 | """ 96 | ) 97 | 98 | FLAGS = PARSER.parse_args() 99 | 100 | result = {'error': [], 'warning': [], 'score': None} 101 | 102 | START_TIME = time.time() 103 | SUBMIT = {} 104 | REF = {} 105 | 106 | try: 107 | SUBMIT, REF = __load_data(FLAGS.submit, FLAGS.ref) 108 | except Exception as error: 109 | result['error'].append(str(error)) 110 | try: 111 | result = __eval_result(SUBMIT, REF) 112 | except Exception as error: 113 | result['error'].append(str(error)) 114 | print('Evaluation time of your result: %f s' % (time.time() - START_TIME)) 115 | 116 | print(result) 117 | -------------------------------------------------------------------------------- /dataflow_input.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import cv2 3 | import json 4 | import os 5 | import random 6 | from tensorpack import imgaug, dataset, ModelDesc, InputDesc 7 | from tensorpack.dataflow import ( 8 | AugmentImageComponent, PrefetchDataZMQ, 9 | BatchData, MultiThreadMapData, DataFlow) 10 | from IPython import embed 11 | 12 | class GoogleNetResize(imgaug.ImageAugmentor): 13 | """ 14 | crop 8%~100% of the original image 15 | See `Going Deeper with Convolutions` by Google. 16 | """ 17 | def __init__(self, crop_area_fraction=0.08, 18 | aspect_ratio_low=0.75, aspect_ratio_high=1.333, 19 | target_shape=224): 20 | self._init(locals()) 21 | 22 | def _augment(self, img, _): 23 | h, w = img.shape[:2] 24 | area = h * w 25 | for _ in range(10): 26 | targetArea = self.rng.uniform(self.crop_area_fraction, 1.0) * area 27 | aspectR = self.rng.uniform(self.aspect_ratio_low, self.aspect_ratio_high) 28 | ww = int(np.sqrt(targetArea * aspectR) + 0.5) 29 | hh = int(np.sqrt(targetArea / aspectR) + 0.5) 30 | if self.rng.uniform() < 0.5: 31 | ww, hh = hh, ww 32 | if hh <= h and ww <= w: 33 | x1 = 0 if w == ww else self.rng.randint(0, w - ww) 34 | y1 = 0 if h == hh else self.rng.randint(0, h - hh) 35 | out = img[y1:y1 + hh, x1:x1 + ww] 36 | out = cv2.resize(out, (self.target_shape, self.target_shape), interpolation=cv2.INTER_CUBIC) 37 | return out 38 | out = imgaug.ResizeShortestEdge(self.target_shape, interp=cv2.INTER_CUBIC).augment(img) 39 | out = imgaug.CenterCrop(self.target_shape).augment(out) 40 | return out 41 | 42 | def fbresnet_augmentor(isTrain, target_shape=224): 43 | """ 44 | Augmentor used in fb.resnet.torch, for BGR images in range [0,255]. 45 | """ 46 | if isTrain: 47 | augmentors = [ 48 | GoogleNetResize(crop_area_fraction=0.32, target_shape=target_shape), 49 | # GoogleNetResize(target_shape=target_shape), 50 | imgaug.RandomOrderAug( 51 | [# imgaug.BrightnessScale((0.6, 1.4), clip=False), 52 | # imgaug.Contrast((0.6, 1.4), clip=False), 53 | # imgaug.Saturation(0.4, rgb=False), 54 | # rgb-bgr conversion for the constants copied from fb.resnet.torch 55 | imgaug.Lighting(0.1, 56 | eigval=np.asarray( 57 | [0.2175, 0.0188, 0.0045][::-1]) * 255.0, 58 | eigvec=np.array( 59 | [[-0.5675, 0.7192, 0.4009], 60 | [-0.5808, -0.0045, -0.8140], 61 | [-0.5836, -0.6948, 0.4203]], 62 | dtype='float32')[::-1, ::-1] 63 | )]), 64 | imgaug.Flip(horiz=True), 65 | ] 66 | else: 67 | augmentors = [ 68 | imgaug.ResizeShortestEdge(int(256 / 224 * target_shape), cv2.INTER_CUBIC), 69 | imgaug.CenterCrop((target_shape, target_shape)), 70 | ] 71 | return augmentors 72 | 73 | def data_augmentation(im, augmentors): 74 | """ 75 | See explanations in the tutorial: 76 | http://tensorpack.readthedocs.io/en/latest/tutorial/efficient-dataflow.html 77 | """ 78 | assert isinstance(augmentors, list) 79 | aug = imgaug.AugmentorList(augmentors) 80 | im = aug.augment(im) 81 | return im 82 | 83 | class MyDataFlow(DataFlow): 84 | def __init__(self, image_path, label_path, is_training=True, batch_size=64, img_size=224): 85 | # get all the image name and its label 86 | self.data_dict = {} 87 | with open(label_path, 'r') as f: 88 | label_list = json.load(f) 89 | for image in label_list: 90 | self.data_dict[image['image_id']] = int(image['label_id']) 91 | self.img_name = list(self.data_dict.keys()) 92 | self.image_path = image_path 93 | self.is_training = is_training 94 | self.batch_size = batch_size 95 | self.img_size = img_size 96 | self.augmentors = fbresnet_augmentor(isTrain=is_training, target_shape=img_size) 97 | 98 | def get_data(self): 99 | np.random.seed() 100 | img_batch = np.random.choice(self.img_name, self.batch_size) 101 | img_data = [] 102 | img_label = [] 103 | for item in img_batch: 104 | im = cv2.imread(os.path.join(self.image_path, item), cv2.IMREAD_COLOR) 105 | im = data_augmentation(im, self.augmentors) 106 | img_data.append(im) 107 | img_label.append(self.data_dict[item]) 108 | yield {'data': np.array(img_data), 'label': np.array(img_label)} 109 | 110 | 111 | class MyDataFlowEval(DataFlow): 112 | def __init__(self, image_path, label_path, img_size=224): 113 | # get all the image name and its label 114 | self.data_dict = {} 115 | with open(label_path, 'r') as f: 116 | label_list = json.load(f) 117 | for image in label_list: 118 | self.data_dict[image['image_id']] = int(image['label_id']) 119 | self.img_name = list(self.data_dict.keys()) 120 | self.image_path = image_path 121 | self.img_size = img_size 122 | self.Length = len(self.data_dict) 123 | self.augmentors = fbresnet_augmentor(isTrain=False, target_shape=img_size) 124 | 125 | def get_data(self): 126 | for index, item in enumerate(self.img_name): 127 | im = cv2.imread(os.path.join(self.image_path, item), cv2.IMREAD_COLOR) 128 | im = data_augmentation(im, self.augmentors) 129 | label = self.data_dict[item] 130 | yield { 131 | 'name': item, 132 | 'data': np.expand_dims(np.array(im), axis=0), 133 | 'label': np.array(label), 134 | 'epoch': (index+1) == self.Length 135 | } -------------------------------------------------------------------------------- /cifar10.py: -------------------------------------------------------------------------------- 1 | # -*- coding:utf-8 -*- 2 | 3 | import os 4 | import sys 5 | import time 6 | import pickle 7 | import random 8 | import numpy as np 9 | 10 | class_num = 10 11 | image_size = 32 12 | img_channels = 3 13 | 14 | 15 | # ========================================================== # 16 | # ├─ prepare_data() 17 | # ├─ download training data if not exist by download_data() 18 | # ├─ load data by load_data() 19 | # └─ shuffe and return data 20 | # ========================================================== # 21 | 22 | 23 | 24 | def download_data(): 25 | dirname = 'cifar-10-batches-py' 26 | origin = 'http://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz' 27 | fname = 'cifar-10-python.tar.gz' 28 | fpath = './' + dirname 29 | 30 | download = False 31 | if os.path.exists(fpath) or os.path.isfile(fname): 32 | download = False 33 | print("DataSet aready exist!") 34 | else: 35 | download = True 36 | if download: 37 | print('Downloading data from', origin) 38 | import urllib.request 39 | import tarfile 40 | 41 | def reporthook(count, block_size, total_size): 42 | global start_time 43 | if count == 0: 44 | start_time = time.time() 45 | return 46 | duration = time.time() - start_time 47 | progress_size = int(count * block_size) 48 | speed = int(progress_size / (1024 * duration)) 49 | percent = min(int(count * block_size * 100 / total_size), 100) 50 | sys.stdout.write("\r...%d%%, %d MB, %d KB/s, %d seconds passed" % 51 | (percent, progress_size / (1024 * 1024), speed, duration)) 52 | sys.stdout.flush() 53 | 54 | urllib.request.urlretrieve(origin, fname, reporthook) 55 | print('Download finished. Start extract!', origin) 56 | if (fname.endswith("tar.gz")): 57 | tar = tarfile.open(fname, "r:gz") 58 | tar.extractall() 59 | tar.close() 60 | elif (fname.endswith("tar")): 61 | tar = tarfile.open(fname, "r:") 62 | tar.extractall() 63 | tar.close() 64 | 65 | 66 | def unpickle(file): 67 | with open(file, 'rb') as fo: 68 | dict = pickle.load(fo, encoding='bytes') 69 | return dict 70 | 71 | 72 | def load_data_one(file): 73 | batch = unpickle(file) 74 | data = batch[b'data'] 75 | labels = batch[b'labels'] 76 | print("Loading %s : %d." % (file, len(data))) 77 | return data, labels 78 | 79 | 80 | def load_data(files, data_dir, label_count): 81 | global image_size, img_channels 82 | data, labels = load_data_one(data_dir + '/' + files[0]) 83 | for f in files[1:]: 84 | data_n, labels_n = load_data_one(data_dir + '/' + f) 85 | data = np.append(data, data_n, axis=0) 86 | labels = np.append(labels, labels_n, axis=0) 87 | labels = np.array([[float(i == label) for i in range(label_count)] for label in labels]) 88 | data = data.reshape([-1, img_channels, image_size, image_size]) 89 | data = data.transpose([0, 2, 3, 1]) 90 | return data, labels 91 | 92 | 93 | def prepare_data(): 94 | print("======Loading data======") 95 | download_data() 96 | data_dir = './cifar-10-batches-py' 97 | image_dim = image_size * image_size * img_channels 98 | meta = unpickle(data_dir + '/batches.meta') 99 | 100 | label_names = meta[b'label_names'] 101 | label_count = len(label_names) 102 | train_files = ['data_batch_%d' % d for d in range(1, 6)] 103 | train_data, train_labels = load_data(train_files, data_dir, label_count) 104 | test_data, test_labels = load_data(['test_batch'], data_dir, label_count) 105 | 106 | print("Train data:", np.shape(train_data), np.shape(train_labels)) 107 | print("Test data :", np.shape(test_data), np.shape(test_labels)) 108 | print("======Load finished======") 109 | 110 | print("======Shuffling data======") 111 | indices = np.random.permutation(len(train_data)) 112 | train_data = train_data[indices] 113 | train_labels = train_labels[indices] 114 | print("======Prepare Finished======") 115 | 116 | return train_data, train_labels, test_data, test_labels 117 | 118 | 119 | # ========================================================== # 120 | # ├─ _random_crop() 121 | # ├─ _random_flip_leftright() 122 | # ├─ data_augmentation() 123 | # └─ color_preprocessing() 124 | # ========================================================== # 125 | 126 | def _random_crop(batch, crop_shape, padding=None): 127 | oshape = np.shape(batch[0]) 128 | 129 | if padding: 130 | oshape = (oshape[0] + 2 * padding, oshape[1] + 2 * padding) 131 | new_batch = [] 132 | npad = ((padding, padding), (padding, padding), (0, 0)) 133 | for i in range(len(batch)): 134 | new_batch.append(batch[i]) 135 | if padding: 136 | new_batch[i] = np.lib.pad(batch[i], pad_width=npad, 137 | mode='constant', constant_values=0) 138 | nh = random.randint(0, oshape[0] - crop_shape[0]) 139 | nw = random.randint(0, oshape[1] - crop_shape[1]) 140 | new_batch[i] = new_batch[i][nh:nh + crop_shape[0], 141 | nw:nw + crop_shape[1]] 142 | return new_batch 143 | 144 | 145 | def _random_flip_leftright(batch): 146 | for i in range(len(batch)): 147 | if bool(random.getrandbits(1)): 148 | batch[i] = np.fliplr(batch[i]) 149 | return batch 150 | 151 | 152 | def color_preprocessing(x_train, x_test): 153 | x_train = x_train.astype('float32') 154 | x_test = x_test.astype('float32') 155 | x_train[:, :, :, 0] = (x_train[:, :, :, 0] - np.mean(x_train[:, :, :, 0])) / np.std(x_train[:, :, :, 0]) 156 | x_train[:, :, :, 1] = (x_train[:, :, :, 1] - np.mean(x_train[:, :, :, 1])) / np.std(x_train[:, :, :, 1]) 157 | x_train[:, :, :, 2] = (x_train[:, :, :, 2] - np.mean(x_train[:, :, :, 2])) / np.std(x_train[:, :, :, 2]) 158 | 159 | x_test[:, :, :, 0] = (x_test[:, :, :, 0] - np.mean(x_test[:, :, :, 0])) / np.std(x_test[:, :, :, 0]) 160 | x_test[:, :, :, 1] = (x_test[:, :, :, 1] - np.mean(x_test[:, :, :, 1])) / np.std(x_test[:, :, :, 1]) 161 | x_test[:, :, :, 2] = (x_test[:, :, :, 2] - np.mean(x_test[:, :, :, 2])) / np.std(x_test[:, :, :, 2]) 162 | 163 | return x_train, x_test 164 | 165 | 166 | def data_augmentation(batch): 167 | batch = _random_flip_leftright(batch) 168 | batch = _random_crop(batch, [32, 32], 4) 169 | return batch -------------------------------------------------------------------------------- /eval.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np 3 | import os 4 | import json 5 | from tensorpack import imgaug, dataset, ModelDesc, InputDesc 6 | from tensorpack.dataflow import (PrefetchDataZMQ, BatchData) 7 | from dataflow_input import MyDataFlowEval 8 | import resnet_model 9 | from IPython import embed 10 | 11 | os.environ['CUDA_VISIBLE_DEVICES']= '2' 12 | 13 | init_learning_rate = 0.1 14 | batch_size = 64 15 | image_size = 224 16 | img_channels = 3 17 | class_num = 80 18 | 19 | weight_decay = 1e-4 20 | momentum = 0.9 21 | 22 | total_epochs = 100 23 | iteration = 421 24 | # 128 * 421 ~ 53,879 25 | test_iteration = 10 26 | 27 | def dist_top_k(feat, centers): 28 | feat = feat[0, ] 29 | diff = centers_class - feat 30 | diff = - tf.reduce_sum(diff*diff, axis=1) 31 | _, predictions = tf.nn.top_k(diff, 3) 32 | return predictions 33 | 34 | def get_tensor_by_name(save_file, var_name): 35 | reader = tf.train.NewCheckpointReader(save_file) 36 | return reader.get_tensor(var_name) 37 | 38 | def center_loss(features, label, alfa, nrof_classes): 39 | """Center loss based on the paper "A Discriminative Feature Learning Approach for Deep Face Recognition" 40 | (http://ydwen.github.io/papers/WenECCV16.pdf) 41 | """ 42 | nrof_features = features.get_shape()[1] 43 | centers = tf.get_variable('centers', [nrof_classes, nrof_features], dtype=tf.float32, 44 | initializer=tf.constant_initializer(0), trainable=False) 45 | label = tf.reshape(label, [-1]) 46 | centers_batch = tf.gather(centers, label) 47 | diff = (1 - alfa) * (centers_batch - features) 48 | centers = tf.scatter_sub(centers, label, diff) 49 | # centers = tf.nn.l2_normalize(centers, 1, 1e-10, name='centers_norm') 50 | loss = tf.reduce_mean(tf.square(features - centers_batch)) 51 | return loss, centers 52 | 53 | def focal_loss(onehot_labels, cls_preds, 54 | alpha=0.25, gamma=2.0, name=None, scope=None): 55 | """Compute softmax focal loss between logits and onehot labels 56 | logits and onehot_labels must have same shape [batchsize, num_classes] and 57 | the same data type (float16, 32, 64) 58 | Args: 59 | onehot_labels: Each row labels[i] must be a valid probability distribution 60 | cls_preds: Unscaled log probabilities 61 | alpha: The hyperparameter for adjusting biased samples, default is 0.25 62 | gamma: The hyperparameter for penalizing the easy labeled samples 63 | name: A name for the operation (optional) 64 | Returns: 65 | A 1-D tensor of length batch_size of same type as logits with softmax focal loss 66 | """ 67 | with tf.name_scope(scope, 'focal_loss', [cls_preds, onehot_labels]) as sc: 68 | logits = tf.convert_to_tensor(cls_preds) 69 | onehot_labels = tf.convert_to_tensor(onehot_labels) 70 | 71 | precise_logits = tf.cast(logits, tf.float32) if ( 72 | logits.dtype == tf.float16) else logits 73 | onehot_labels = tf.cast(onehot_labels, precise_logits.dtype) 74 | predictions = tf.nn.sigmoid(logits) 75 | predictions_pt = tf.where(tf.equal(onehot_labels, 1), predictions, 1.-predictions) 76 | # add small value to avoid 0 77 | epsilon = 1e-8 78 | alpha_t = tf.scalar_mul(alpha, tf.ones_like(onehot_labels, dtype=tf.float32)) 79 | alpha_t = tf.where(tf.equal(onehot_labels, 1.0), alpha_t, 1-alpha_t) 80 | losses = tf.reduce_sum(-alpha_t * tf.pow(1. - predictions_pt, gamma) * tf.log(predictions_pt+epsilon), 81 | name=name, axis=1) 82 | return losses 83 | 84 | def Evaluate(sess): 85 | test_acc = 0.0 86 | test_loss = 0.0 87 | 88 | for it in range(test_iteration): 89 | batch_data = next(scene_data_val) 90 | test_batch_x = batch_data['data'] 91 | test_batch_y = batch_data['label'] 92 | 93 | test_feed_dict = { 94 | x: test_batch_x, 95 | label: test_batch_y, 96 | learning_rate: epoch_learning_rate, 97 | training_flag: False 98 | } 99 | 100 | loss_, acc_ = sess.run([Total_loss, accuracy], feed_dict=test_feed_dict) 101 | 102 | test_loss += loss_ 103 | test_acc += acc_ 104 | 105 | test_loss /= test_iteration # average loss 106 | test_acc /= test_iteration # average accuracy 107 | 108 | summary = tf.Summary(value=[tf.Summary.Value(tag='test_loss', simple_value=test_loss), 109 | tf.Summary.Value(tag='test_accuracy', simple_value=test_acc)]) 110 | 111 | return test_acc, test_loss, summary 112 | 113 | def resnet_model_fn(inputs, training): 114 | """Our model_fn for ResNet to be used with our Estimator.""" 115 | 116 | network = resnet_model.imagenet_resnet_v2( 117 | resnet_size=18, num_classes=class_num, mode='se', data_format=None) 118 | inputs= network(inputs=inputs, is_training=training) 119 | feat = tf.nn.l2_normalize(inputs, 1, 1e-10, name='feat') 120 | inputs = tf.layers.dense(inputs=inputs, units=class_num) 121 | # inputs = tf.layers.dense(inputs=feat, units=class_num) 122 | inputs = tf.identity(inputs, 'final_dense') 123 | 124 | return inputs, feat 125 | 126 | # image_size = 32, img_channels = 3, class_num = 10 in cifar10 127 | x = tf.placeholder(tf.float32, shape=[None, image_size, image_size, img_channels]) 128 | label = tf.placeholder(tf.float32, shape=[None,]) 129 | one_hot_labels = tf.one_hot(indices=tf.cast(label, tf.int32), depth=class_num) 130 | 131 | training_flag = tf.placeholder(tf.bool) 132 | learning_rate = tf.placeholder(tf.float32, name='learning_rate') 133 | 134 | logits, feat = resnet_model_fn(x, training=training_flag) 135 | 136 | cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=one_hot_labels, logits=logits)) 137 | Focal_loss = tf.reduce_mean(focal_loss(one_hot_labels, logits, alpha=0.5)) 138 | l2_loss = weight_decay * tf.add_n([tf.nn.l2_loss(v) for v in tf.trainable_variables()]) 139 | Center_loss, Centers = center_loss(feat, tf.cast(label, dtype=tf.int32), 0.95, class_num) 140 | Total_loss = Focal_loss + l2_loss + Center_loss 141 | 142 | optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum=momentum, use_nesterov=True) 143 | # Batch norm requires update_ops to be added as a train_op dependency. 144 | update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) 145 | with tf.control_dependencies(update_ops): 146 | train_op = optimizer.minimize(Total_loss) 147 | 148 | correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(one_hot_labels, 1)) 149 | accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) 150 | 151 | values, indices = tf.nn.top_k(logits, 3) 152 | 153 | val_dir = '/data0/AIChallenger/ai_challenger_scene_validation_20170908/scene_validation_images_20170908/' 154 | annotations = '/data0/AIChallenger/ai_challenger_scene_validation_20170908/scene_validation_annotations_20170908.json' 155 | # a DataFlow you implement to produce [tensor1, tensor2, ..] lists from whatever sources: 156 | df = MyDataFlowEval(val_dir, annotations, img_size=image_size) 157 | # start 3 processes to run the dataflow in parallel 158 | df = PrefetchDataZMQ(df, nr_proc=1) 159 | df.reset_state() 160 | scene_data_val = df.get_data() 161 | 162 | centers_class = np.load("centers.npy") 163 | centers_class = tf.convert_to_tensor(centers_class) 164 | indices_Center = dist_top_k(feat, centers_class) 165 | 166 | saver = tf.train.Saver(tf.global_variables()) 167 | 168 | with tf.Session() as sess: 169 | ckpt = tf.train.get_checkpoint_state('./model_release') 170 | print("loading checkpoint...") 171 | saver.restore(sess, ckpt.model_checkpoint_path) 172 | 173 | result = [] 174 | for it in scene_data_val: 175 | temp_dict = {} 176 | feed_dict = {x: it['data'], training_flag: False} 177 | predictions, predictions_Center = sess.run([indices, indices_Center], feed_dict=feed_dict) 178 | predictions = np.squeeze(predictions, axis=0) 179 | 180 | predictions = predictions_Center 181 | 182 | temp_dict['image_id'] = it['name'] 183 | temp_dict['label_id'] = predictions.tolist() 184 | result.append(temp_dict) 185 | print('image %s is %d,%d,%d, label: %d' % (it['name'], predictions[0], predictions[1], predictions[2], it['label'])) 186 | if it['epoch']: 187 | break 188 | 189 | with open('submit.json', 'w') as f: 190 | json.dump(result, f) 191 | print('write result json, num is %d' % len(result)) 192 | 193 | -------------------------------------------------------------------------------- /SE_ResNeXt.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from tflearn.layers.conv import global_avg_pool 3 | from tensorflow.contrib.layers import batch_norm, flatten 4 | from tensorflow.contrib.framework import arg_scope 5 | import numpy as np 6 | import os 7 | from tensorpack import imgaug, dataset, ModelDesc, InputDesc 8 | from tensorpack.dataflow import ( 9 | AugmentImageComponent, PrefetchDataZMQ, 10 | BatchData, MultiThreadMapData, DataFlow) 11 | from dataflow_input import (MyDataFlow, data_augmentation) 12 | from IPython import embed 13 | 14 | os.environ['CUDA_VISIBLE_DEVICES']= '3' 15 | 16 | weight_decay = 0.0005 17 | momentum = 0.9 18 | 19 | init_learning_rate = 0.1 * 5 20 | cardinality = 2 # how many split ? 21 | blocks = 3 # res_block ! (split + transition) 22 | depth = 64 # out channel 23 | 24 | """ 25 | So, the total number of layers is (3*blokcs)*residual_layer_num + 2 26 | because, blocks = split(conv 2) + transition(conv 1) = 3 layer 27 | and, first conv layer 1, last dense layer 1 28 | thus, total number of layers = (3*blocks)*residual_layer_num + 2 29 | """ 30 | 31 | reduction_ratio = 4 32 | 33 | total_epochs = 100 34 | 35 | batch_size = 64 36 | image_size = 224 37 | img_channels = 3 38 | class_num = 80 39 | 40 | iteration = 421 41 | # 128 * 421 ~ 53,879 42 | 43 | test_iteration = 10 44 | 45 | def conv_layer(input, filter, kernel, stride, padding='SAME', layer_name="conv"): 46 | with tf.name_scope(layer_name): 47 | network = tf.layers.conv2d(inputs=input, use_bias=False, filters=filter, kernel_size=kernel, strides=stride, padding=padding) 48 | return network 49 | 50 | def deconv_layer(input, filter, kernel, stride, padding='SAME', layer_name="deconv"): 51 | with tf.name_scope(layer_name): 52 | network = tf.layers.conv2d_transpose(inputs=input, use_bias=False, filters=filter, kernel_size=kernel, strides=stride, padding=padding) 53 | return network 54 | 55 | def Global_Average_Pooling(x): 56 | return global_avg_pool(x, name='Global_avg_pooling') 57 | 58 | def Average_pooling(x, pool_size=[2,2], stride=2, padding='SAME'): 59 | return tf.layers.average_pooling2d(inputs=x, pool_size=pool_size, strides=stride, padding=padding) 60 | 61 | def Max_pooling(x, pool_size=[3,3], stride=2, padding='VALID') : 62 | return tf.layers.max_pooling2d(inputs=x, pool_size=pool_size, strides=stride, padding=padding) 63 | 64 | def Batch_Normalization(x, training, scope): 65 | with arg_scope([batch_norm], 66 | scope=scope, 67 | updates_collections=None, 68 | decay=0.9, 69 | center=True, 70 | scale=True, 71 | zero_debias_moving_mean=True) : 72 | return tf.cond(training, 73 | lambda : batch_norm(inputs=x, is_training=training, reuse=None), 74 | lambda : batch_norm(inputs=x, is_training=training, reuse=True)) 75 | 76 | def Relu(x): 77 | return tf.nn.relu(x) 78 | 79 | def Sigmoid(x) : 80 | return tf.nn.sigmoid(x) 81 | 82 | def tanh(x): 83 | return tf.tanh(x) 84 | 85 | def Concatenation(layers) : 86 | return tf.concat(layers, axis=3) 87 | 88 | def Fully_connected(x, units=class_num, layer_name='fully_connected') : 89 | with tf.name_scope(layer_name) : 90 | return tf.layers.dense(inputs=x, use_bias=False, units=units) 91 | 92 | def center_loss(features, label, alfa, nrof_classes): 93 | """Center loss based on the paper "A Discriminative Feature Learning Approach for Deep Face Recognition" 94 | (http://ydwen.github.io/papers/WenECCV16.pdf) 95 | """ 96 | nrof_features = features.get_shape()[1] 97 | centers = tf.get_variable('centers', [nrof_classes, nrof_features], dtype=tf.float32, 98 | initializer=tf.constant_initializer(0), trainable=False) 99 | label = tf.reshape(label, [-1]) 100 | centers_batch = tf.gather(centers, label) 101 | diff = (1 - alfa) * (centers_batch - features) 102 | centers = tf.scatter_sub(centers, label, diff) 103 | loss = tf.reduce_mean(tf.square(features - centers_batch)) 104 | return loss, centers 105 | 106 | def Evaluate(sess): 107 | test_acc = 0.0 108 | test_loss = 0.0 109 | 110 | for it in range(test_iteration): 111 | batch_data = next(scene_data_val) 112 | test_batch_x = batch_data['data'] 113 | test_batch_y = batch_data['label'] 114 | 115 | test_feed_dict = { 116 | x: test_batch_x, 117 | label: test_batch_y, 118 | learning_rate: epoch_learning_rate, 119 | training_flag: False 120 | } 121 | 122 | loss_, acc_ = sess.run([cost, accuracy], feed_dict=test_feed_dict) 123 | 124 | test_loss += loss_ 125 | test_acc += acc_ 126 | 127 | test_loss /= test_iteration # average loss 128 | test_acc /= test_iteration # average accuracy 129 | 130 | summary = tf.Summary(value=[tf.Summary.Value(tag='test_loss', simple_value=test_loss), 131 | tf.Summary.Value(tag='test_accuracy', simple_value=test_acc)]) 132 | 133 | return test_acc, test_loss, summary 134 | 135 | class SE_ResNeXt(): 136 | def __init__(self, x, training): 137 | self.training = training 138 | self.model = self.Build_SEnet(x) 139 | 140 | def first_layer(self, x, scope): 141 | with tf.name_scope(scope) : 142 | x = conv_layer(x, filter=64, kernel=[7, 7], stride=2, layer_name=scope+'_conv1') 143 | x = Batch_Normalization(x, training=self.training, scope=scope+'_batch1') 144 | x = Relu(x) 145 | x = Max_pooling(x) 146 | 147 | return x 148 | 149 | def transform_layer(self, x, stride, scope): 150 | with tf.name_scope(scope) : 151 | x = conv_layer(x, filter=depth, kernel=[1,1], stride=1, layer_name=scope+'_conv1') 152 | x = Batch_Normalization(x, training=self.training, scope=scope+'_batch1') 153 | x = Relu(x) 154 | 155 | x = conv_layer(x, filter=depth, kernel=[3,3], stride=stride, layer_name=scope+'_conv2') 156 | x = Batch_Normalization(x, training=self.training, scope=scope+'_batch2') 157 | x = Relu(x) 158 | return x 159 | 160 | def transition_layer(self, x, out_dim, scope): 161 | with tf.name_scope(scope): 162 | x = conv_layer(x, filter=out_dim, kernel=[1,1], stride=1, layer_name=scope+'_conv1') 163 | x = Batch_Normalization(x, training=self.training, scope=scope+'_batch1') 164 | # x = Relu(x) 165 | 166 | return x 167 | 168 | def split_layer(self, input_x, stride, layer_name): 169 | with tf.name_scope(layer_name) : 170 | layers_split = list() 171 | for i in range(cardinality) : 172 | splits = self.transform_layer(input_x, stride=stride, scope=layer_name + '_splitN_' + str(i)) 173 | layers_split.append(splits) 174 | 175 | return Concatenation(layers_split) 176 | 177 | def squeeze_excitation_layer(self, input_x, out_dim, ratio, layer_name): 178 | with tf.name_scope(layer_name) : 179 | squeeze = Global_Average_Pooling(input_x) 180 | 181 | excitation = Fully_connected(squeeze, units=out_dim / ratio, layer_name=layer_name+'_fully_connected1') 182 | excitation = Relu(excitation) 183 | excitation = Fully_connected(excitation, units=out_dim, layer_name=layer_name+'_fully_connected2') 184 | excitation = Sigmoid(excitation) 185 | 186 | excitation = tf.reshape(excitation, [-1,1,1,out_dim]) 187 | scale = input_x * excitation 188 | 189 | return scale 190 | 191 | def residual_layer(self, input_x, out_dim, layer_num, res_block=blocks): 192 | # split + transform(bottleneck) + transition + merge 193 | # input_dim = input_x.get_shape().as_list()[-1] 194 | 195 | for i in range(res_block): 196 | input_dim = int(np.shape(input_x)[-1]) 197 | 198 | if input_dim * 2 == out_dim: 199 | flag = True 200 | stride = 2 201 | channel = input_dim // 2 202 | else: 203 | flag = False 204 | stride = 1 205 | x = self.split_layer(input_x, stride=stride, layer_name='split_layer_'+layer_num+'_'+str(i)) 206 | x = self.transition_layer(x, out_dim=out_dim, scope='trans_layer_'+layer_num+'_'+str(i)) 207 | x = self.squeeze_excitation_layer(x, out_dim=out_dim, ratio=reduction_ratio, layer_name='squeeze_layer_'+layer_num+'_'+str(i)) 208 | 209 | if flag is True : 210 | pad_input_x = Average_pooling(input_x) 211 | pad_input_x = tf.pad(pad_input_x, [[0, 0], [0, 0], [0, 0], [channel, channel]]) # [?, height, width, channel] 212 | else : 213 | pad_input_x = input_x 214 | 215 | input_x = Relu(x + pad_input_x) 216 | 217 | return x 218 | 219 | def generator(self, x, scope="generator"): 220 | with tf.variable_scope(scope): 221 | n_downsampling = 5 222 | for i in range(n_downsampling): 223 | mult = pow(2, (n_downsampling - i)) 224 | x = deconv_layer(x, filter=int((32 * mult) / 2), kernel=[3, 3], stride=2, layer_name='deconv' + str(i)) 225 | x = Relu(x) 226 | 227 | x = conv_layer(x, filter=3, kernel=[7,7], stride=1, layer_name='conv1') 228 | x = 128 * Batch_Normalization(x, training=self.training, scope=scope+'_batch1') + 128 229 | 230 | return x 231 | 232 | def Build_SEnet(self, input_x): 233 | # only cifar10 architecture 234 | 235 | input_x = self.first_layer(input_x, scope='first_layer') 236 | 237 | x = self.residual_layer(input_x, out_dim=64, layer_num='1') 238 | x = self.residual_layer(x, out_dim=128, layer_num='2') 239 | x = self.residual_layer(x, out_dim=256, layer_num='3') 240 | x = self.residual_layer(x, out_dim=512, layer_num='4') 241 | 242 | recon_x = self.generator(x) 243 | # recon_x = tf.cast(recon_x, dtype=tf.uint8) 244 | 245 | x = Global_Average_Pooling(x) 246 | x = flatten(x) 247 | 248 | feat = tf.nn.l2_normalize(x, 1, 1e-10, name='feat') 249 | 250 | x = Fully_connected(x, layer_name='final_fully_connected') 251 | return x, recon_x, feat -------------------------------------------------------------------------------- /pre_train.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np 3 | import os 4 | from tensorpack import imgaug, dataset, ModelDesc, InputDesc 5 | from tensorpack.dataflow import (PrefetchDataZMQ, BatchData) 6 | from dataflow_input import MyDataFlow 7 | import resnet_model 8 | from IPython import embed 9 | 10 | os.environ['CUDA_VISIBLE_DEVICES']= '0' 11 | 12 | init_learning_rate = 0.1 13 | batch_size = 128 14 | image_size = 224 15 | img_channels = 3 16 | class_num = 365 17 | 18 | weight_decay = 1e-4 19 | momentum = 0.9 20 | 21 | total_epochs = 30 22 | iteration = 14089 // 1 23 | # 128 * 14089 ~ 1,803,460 24 | test_iteration = 10 25 | 26 | def center_loss(features, label, alfa, nrof_classes): 27 | """Center loss based on the paper "A Discriminative Feature Learning Approach for Deep Face Recognition" 28 | (http://ydwen.github.io/papers/WenECCV16.pdf) 29 | """ 30 | nrof_features = features.get_shape()[1] 31 | centers = tf.get_variable('centers', [nrof_classes, nrof_features], dtype=tf.float32, 32 | initializer=tf.constant_initializer(0), trainable=False) 33 | label = tf.reshape(label, [-1]) 34 | centers_batch = tf.gather(centers, label) 35 | diff = (1 - alfa) * (centers_batch - features) 36 | centers = tf.scatter_sub(centers, label, diff) 37 | # centers = tf.nn.l2_normalize(centers, 1, 1e-10, name='centers_norm') 38 | loss = tf.reduce_mean(tf.square(features - centers_batch)) 39 | return loss, centers 40 | 41 | def focal_loss(onehot_labels, cls_preds, 42 | alpha=0.25, gamma=2.0, name=None, scope=None): 43 | """Compute softmax focal loss between logits and onehot labels 44 | logits and onehot_labels must have same shape [batchsize, num_classes] and 45 | the same data type (float16, 32, 64) 46 | Args: 47 | onehot_labels: Each row labels[i] must be a valid probability distribution 48 | cls_preds: Unscaled log probabilities 49 | alpha: The hyperparameter for adjusting biased samples, default is 0.25 50 | gamma: The hyperparameter for penalizing the easy labeled samples 51 | name: A name for the operation (optional) 52 | Returns: 53 | A 1-D tensor of length batch_size of same type as logits with softmax focal loss 54 | """ 55 | with tf.name_scope(scope, 'focal_loss', [cls_preds, onehot_labels]) as sc: 56 | logits = tf.convert_to_tensor(cls_preds) 57 | onehot_labels = tf.convert_to_tensor(onehot_labels) 58 | 59 | precise_logits = tf.cast(logits, tf.float32) if ( 60 | logits.dtype == tf.float16) else logits 61 | onehot_labels = tf.cast(onehot_labels, precise_logits.dtype) 62 | predictions = tf.nn.sigmoid(logits) 63 | predictions_pt = tf.where(tf.equal(onehot_labels, 1), predictions, 1.-predictions) 64 | # add small value to avoid 0 65 | epsilon = 1e-8 66 | alpha_t = tf.scalar_mul(alpha, tf.ones_like(onehot_labels, dtype=tf.float32)) 67 | alpha_t = tf.where(tf.equal(onehot_labels, 1.0), alpha_t, 1-alpha_t) 68 | losses = tf.reduce_sum(-alpha_t * tf.pow(1. - predictions_pt, gamma) * tf.log(predictions_pt+epsilon), 69 | name=name, axis=1) 70 | return losses 71 | 72 | def Evaluate(sess): 73 | test_acc = 0.0 74 | test_loss = 0.0 75 | 76 | for it in range(test_iteration): 77 | batch_data = next(scene_data_val) 78 | test_batch_x = batch_data['data'] 79 | test_batch_y = batch_data['label'] 80 | 81 | test_feed_dict = { 82 | x: test_batch_x, 83 | label: test_batch_y, 84 | learning_rate: epoch_learning_rate, 85 | training_flag: False 86 | } 87 | 88 | loss_, acc_ = sess.run([Total_loss, accuracy], feed_dict=test_feed_dict) 89 | 90 | test_loss += loss_ 91 | test_acc += acc_ 92 | 93 | test_loss /= test_iteration # average loss 94 | test_acc /= test_iteration # average accuracy 95 | 96 | summary = tf.Summary(value=[tf.Summary.Value(tag='test_loss', simple_value=test_loss), 97 | tf.Summary.Value(tag='test_accuracy', simple_value=test_acc)]) 98 | 99 | return test_acc, test_loss, summary 100 | 101 | def resnet_model_fn(inputs, training): 102 | """Our model_fn for ResNet to be used with our Estimator.""" 103 | 104 | network = resnet_model.imagenet_resnet_v2( 105 | resnet_size=18, num_classes=class_num, mode='se', data_format=None) 106 | inputs= network(inputs=inputs, is_training=training) 107 | feat = tf.nn.l2_normalize(inputs, 1, 1e-10, name='feat') 108 | inputs = tf.layers.dense(inputs=inputs, units=class_num) 109 | # inputs = tf.layers.dense(inputs=feat, units=class_num) 110 | inputs = tf.identity(inputs, 'final_dense') 111 | 112 | return inputs, feat 113 | 114 | # image_size = 32, img_channels = 3, class_num = 10 in cifar10 115 | x = tf.placeholder(tf.float32, shape=[None, image_size, image_size, img_channels]) 116 | label = tf.placeholder(tf.float32, shape=[None,]) 117 | one_hot_labels = tf.one_hot(indices=tf.cast(label, tf.int32), depth=class_num) 118 | 119 | training_flag = tf.placeholder(tf.bool) 120 | learning_rate = tf.placeholder(tf.float32, name='learning_rate') 121 | 122 | logits, feat = resnet_model_fn(x, training=training_flag) 123 | 124 | cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=one_hot_labels, logits=logits)) 125 | Focal_loss = tf.reduce_mean(focal_loss(one_hot_labels, logits, alpha=0.5)) 126 | l2_loss = weight_decay * tf.add_n([tf.nn.l2_loss(v) for v in tf.trainable_variables()]) 127 | Center_loss, Centers = center_loss(feat, tf.cast(label, dtype=tf.int32), 0.95, class_num) 128 | Total_loss = cost + l2_loss 129 | 130 | optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum=momentum, use_nesterov=True) 131 | # Batch norm requires update_ops to be added as a train_op dependency. 132 | update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) 133 | with tf.control_dependencies(update_ops): 134 | train_op = optimizer.minimize(Total_loss) 135 | 136 | correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(one_hot_labels, 1)) 137 | accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) 138 | 139 | # val_dir = '/data0/AIChallenger/ai_challenger_scene_validation_20170908/scene_validation_images_20170908/' 140 | # annotations = '/data0/AIChallenger/ai_challenger_scene_validation_20170908/scene_validation_annotations_20170908.json' 141 | # # a DataFlow you implement to produce [tensor1, tensor2, ..] lists from whatever sources: 142 | # df = MyDataFlow(val_dir, annotations, is_training=False, batch_size=batch_size, img_size=image_size) 143 | # # start 3 processes to run the dataflow in parallel 144 | # df = PrefetchDataZMQ(df, nr_proc=10) 145 | # df.reset_state() 146 | # scene_data_val = df.get_data() 147 | 148 | train_dir = '/data0/AIChallenger/data_256' 149 | annotations = '/data0/AIChallenger/data_256.json' 150 | # a DataFlow you implement to produce [tensor1, tensor2, ..] lists from whatever sources: 151 | df = MyDataFlow(train_dir, annotations, is_training=True, batch_size=batch_size, img_size=image_size) 152 | # start 3 processes to run the dataflow in parallel 153 | df = PrefetchDataZMQ(df, nr_proc=10) 154 | df.reset_state() 155 | scene_data = df.get_data() 156 | 157 | saver = tf.train.Saver(tf.global_variables()) 158 | 159 | with tf.Session() as sess: 160 | ckpt = tf.train.get_checkpoint_state('./model_pretrain') 161 | if ckpt and tf.train.checkpoint_exists(ckpt.model_checkpoint_path): 162 | print("loading checkpoint...") 163 | saver.restore(sess, ckpt.model_checkpoint_path) 164 | else: 165 | sess.run(tf.global_variables_initializer()) 166 | 167 | summary_writer = tf.summary.FileWriter('./logs_pretrain', sess.graph) 168 | 169 | _x = x[:, :, :, ::-1] 170 | tf.summary.image('x', _x, 4) 171 | 172 | summary_op = tf.summary.merge_all() 173 | 174 | epoch_learning_rate = init_learning_rate 175 | for epoch in range(1, total_epochs + 1): 176 | if epoch % 10 == 0 : 177 | epoch_learning_rate = epoch_learning_rate / 10 178 | 179 | train_acc = 0.0 180 | train_loss = 0.0 181 | 182 | for step in range(1, iteration + 1): 183 | batch_data = next(scene_data) 184 | batch_x = batch_data['data'] 185 | batch_y = batch_data['label'] 186 | 187 | train_feed_dict = { 188 | x: batch_x, 189 | label: batch_y, 190 | learning_rate: epoch_learning_rate, 191 | training_flag: True 192 | } 193 | 194 | _, batch_loss = sess.run([train_op, Total_loss], feed_dict=train_feed_dict) 195 | batch_acc = accuracy.eval(feed_dict=train_feed_dict) 196 | 197 | print("epoch: %d/%d, iter: %d/%d, batch_loss: %.4f, batch_acc: %.4f \n" % ( 198 | epoch, total_epochs, step, iteration, batch_loss, batch_acc)) 199 | 200 | train_loss += batch_loss 201 | train_acc += batch_acc 202 | 203 | if step % 30 == 0 : 204 | summary_str = sess.run(summary_op, feed_dict=train_feed_dict) 205 | summary_writer.add_summary(summary=summary_str, global_step=epoch) 206 | summary_writer.flush() 207 | 208 | 209 | train_loss /= iteration # average loss 210 | train_acc /= iteration # average accuracy 211 | 212 | train_summary = tf.Summary(value=[tf.Summary.Value(tag='train_loss', simple_value=train_loss), 213 | tf.Summary.Value(tag='train_accuracy', simple_value=train_acc)]) 214 | 215 | # test_acc, test_loss, test_summary = Evaluate(sess) 216 | 217 | summary_writer.add_summary(summary=train_summary, global_step=epoch) 218 | # summary_writer.add_summary(summary=test_summary, global_step=epoch) 219 | summary_writer.flush() 220 | 221 | # line = "epoch: %d/%d, train_loss: %.4f, train_acc: %.4f, test_loss: %.4f, test_acc: %.4f \n" % ( 222 | # epoch, total_epochs, train_loss, train_acc, test_loss, test_acc) 223 | line = "epoch: %d/%d, train_loss: %.4f, train_acc: %.4f \n" % ( 224 | epoch, total_epochs, train_loss, train_acc) 225 | print(line) 226 | 227 | with open('./logs_pretrain/logs.txt', 'a') as f: 228 | f.write(line) 229 | 230 | saver.save(sess=sess, save_path='./model_pretrain/model.ckpt') 231 | -------------------------------------------------------------------------------- /train.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np 3 | import os 4 | from tensorpack import imgaug, dataset, ModelDesc, InputDesc 5 | from tensorpack.dataflow import (PrefetchDataZMQ, BatchData) 6 | from dataflow_input import MyDataFlow 7 | import resnet_model 8 | from IPython import embed 9 | 10 | os.environ['CUDA_VISIBLE_DEVICES'] = '1' 11 | 12 | init_learning_rate = 0.01 13 | batch_size = 128 14 | image_size = 224 15 | img_channels = 3 16 | class_num = 80 17 | 18 | weight_decay = 1e-4 19 | momentum = 0.9 20 | 21 | total_epochs = 30 22 | iteration = 1*421 23 | # 128 * 421 ~ 53,879 24 | test_iteration = 10 25 | 26 | def optimistic_restore(session, save_file): 27 | reader = tf.train.NewCheckpointReader(save_file) 28 | saved_shapes = reader.get_variable_to_shape_map() 29 | var_names = sorted([(var.name, var.name.split(':')[0]) for var in tf.global_variables() if var.name.split(':')[0] in saved_shapes]) 30 | restore_vars = [] 31 | name2var = dict(zip(map(lambda x:x.name.split(':')[0], tf.global_variables()), tf.global_variables())) 32 | with tf.variable_scope('', reuse=True): 33 | for var_name, saved_var_name in var_names: 34 | curr_var = name2var[saved_var_name] 35 | var_shape = curr_var.get_shape().as_list() 36 | if var_shape == saved_shapes[saved_var_name]: 37 | restore_vars.append(curr_var) 38 | saver = tf.train.Saver(restore_vars) 39 | saver.restore(session, save_file) 40 | 41 | def center_loss(features, label, alfa, nrof_classes): 42 | """Center loss based on the paper "A Discriminative Feature Learning Approach for Deep Face Recognition" 43 | (http://ydwen.github.io/papers/WenECCV16.pdf) 44 | """ 45 | nrof_features = features.get_shape()[1] 46 | centers = tf.get_variable('centers', [nrof_classes, nrof_features], dtype=tf.float32, 47 | initializer=tf.constant_initializer(0), trainable=False) 48 | label = tf.reshape(label, [-1]) 49 | centers_batch = tf.gather(centers, label) 50 | diff = (1 - alfa) * (centers_batch - features) 51 | centers = tf.scatter_sub(centers, label, diff) 52 | # centers = tf.nn.l2_normalize(centers, 1, 1e-10, name='centers_norm') 53 | loss = tf.reduce_mean(tf.square(features - centers_batch)) 54 | return loss, centers 55 | 56 | def focal_loss(onehot_labels, cls_preds, 57 | alpha=0.25, gamma=2.0, name=None, scope=None): 58 | """Compute softmax focal loss between logits and onehot labels 59 | logits and onehot_labels must have same shape [batchsize, num_classes] and 60 | the same data type (float16, 32, 64) 61 | Args: 62 | onehot_labels: Each row labels[i] must be a valid probability distribution 63 | cls_preds: Unscaled log probabilities 64 | alpha: The hyperparameter for adjusting biased samples, default is 0.25 65 | gamma: The hyperparameter for penalizing the easy labeled samples 66 | name: A name for the operation (optional) 67 | Returns: 68 | A 1-D tensor of length batch_size of same type as logits with softmax focal loss 69 | """ 70 | with tf.name_scope(scope, 'focal_loss', [cls_preds, onehot_labels]) as sc: 71 | logits = tf.convert_to_tensor(cls_preds) 72 | onehot_labels = tf.convert_to_tensor(onehot_labels) 73 | 74 | precise_logits = tf.cast(logits, tf.float32) if ( 75 | logits.dtype == tf.float16) else logits 76 | onehot_labels = tf.cast(onehot_labels, precise_logits.dtype) 77 | predictions = tf.nn.sigmoid(logits) 78 | predictions_pt = tf.where(tf.equal(onehot_labels, 1), predictions, 1.-predictions) 79 | # add small value to avoid 0 80 | epsilon = 1e-8 81 | alpha_t = tf.scalar_mul(alpha, tf.ones_like(onehot_labels, dtype=tf.float32)) 82 | alpha_t = tf.where(tf.equal(onehot_labels, 1.0), alpha_t, 1-alpha_t) 83 | losses = tf.reduce_sum(-alpha_t * tf.pow(1. - predictions_pt, gamma) * tf.log(predictions_pt+epsilon), 84 | name=name, axis=1) 85 | return losses 86 | 87 | def Evaluate(sess): 88 | test_acc = 0.0 89 | test_loss = 0.0 90 | 91 | for it in range(test_iteration): 92 | batch_data = next(scene_data_val) 93 | test_batch_x = batch_data['data'] 94 | test_batch_y = batch_data['label'] 95 | 96 | test_feed_dict = { 97 | x: test_batch_x, 98 | label: test_batch_y, 99 | learning_rate: epoch_learning_rate, 100 | training_flag: False 101 | } 102 | 103 | loss_, acc_ = sess.run([Total_loss, accuracy], feed_dict=test_feed_dict) 104 | 105 | test_loss += loss_ 106 | test_acc += acc_ 107 | 108 | test_loss /= test_iteration # average loss 109 | test_acc /= test_iteration # average accuracy 110 | 111 | summary = tf.Summary(value=[tf.Summary.Value(tag='test_loss', simple_value=test_loss), 112 | tf.Summary.Value(tag='test_accuracy', simple_value=test_acc)]) 113 | 114 | return test_acc, test_loss, summary 115 | 116 | def resnet_model_fn(inputs, training): 117 | """Our model_fn for ResNet to be used with our Estimator.""" 118 | 119 | network = resnet_model.imagenet_resnet_v2( 120 | resnet_size=18, num_classes=class_num, mode='se', data_format=None) 121 | inputs= network(inputs=inputs, is_training=training) 122 | feat = tf.nn.l2_normalize(inputs, 1, 1e-10, name='feat') 123 | inputs = tf.layers.dense(inputs=inputs, units=class_num) 124 | # inputs = tf.layers.dense(inputs=feat, units=class_num) 125 | inputs = tf.identity(inputs, 'final_dense') 126 | 127 | return inputs, feat 128 | 129 | # image_size = 32, img_channels = 3, class_num = 10 in cifar10 130 | x = tf.placeholder(tf.float32, shape=[None, image_size, image_size, img_channels]) 131 | label = tf.placeholder(tf.float32, shape=[None,]) 132 | one_hot_labels = tf.one_hot(indices=tf.cast(label, tf.int32), depth=class_num) 133 | 134 | training_flag = tf.placeholder(tf.bool) 135 | learning_rate = tf.placeholder(tf.float32, name='learning_rate') 136 | 137 | logits, feat = resnet_model_fn(x, training=training_flag) 138 | 139 | cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=one_hot_labels, logits=logits)) 140 | Focal_loss = tf.reduce_mean(focal_loss(one_hot_labels, logits, alpha=0.5)) 141 | l2_loss = weight_decay * tf.add_n([tf.nn.l2_loss(v) for v in tf.trainable_variables()]) 142 | Center_loss, Centers = center_loss(feat, tf.cast(label, dtype=tf.int32), 0.95, class_num) 143 | Total_loss = cost + l2_loss + Center_loss 144 | 145 | optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum=momentum, use_nesterov=True) 146 | # Batch norm requires update_ops to be added as a train_op dependency. 147 | update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) 148 | with tf.control_dependencies(update_ops): 149 | train_op = optimizer.minimize(Total_loss) 150 | 151 | correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(one_hot_labels, 1)) 152 | accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) 153 | 154 | val_dir = '/data0/AIChallenger/ai_challenger_scene_validation_20170908/scene_validation_images_20170908/' 155 | annotations = '/data0/AIChallenger/ai_challenger_scene_validation_20170908/scene_validation_annotations_20170908.json' 156 | # a DataFlow you implement to produce [tensor1, tensor2, ..] lists from whatever sources: 157 | df = MyDataFlow(val_dir, annotations, is_training=False, batch_size=batch_size, img_size=image_size) 158 | # start 3 processes to run the dataflow in parallel 159 | df = PrefetchDataZMQ(df, nr_proc=1) 160 | df.reset_state() 161 | scene_data_val = df.get_data() 162 | 163 | train_dir = '/data0/AIChallenger/ai_challenger_scene_train_20170904/scene_train_images_20170904/' 164 | annotations = '/data0/AIChallenger/ai_challenger_scene_train_20170904/scene_train_annotations_20170904.json' 165 | # a DataFlow you implement to produce [tensor1, tensor2, ..] lists from whatever sources: 166 | df = MyDataFlow(train_dir, annotations, is_training=True, batch_size=batch_size, img_size=image_size) 167 | # start 3 processes to run the dataflow in parallel 168 | df = PrefetchDataZMQ(df, nr_proc=10) 169 | df.reset_state() 170 | scene_data = df.get_data() 171 | 172 | saver = tf.train.Saver(tf.global_variables()) 173 | 174 | with tf.Session() as sess: 175 | ckpt = tf.train.get_checkpoint_state('./model') 176 | if ckpt and tf.train.checkpoint_exists(ckpt.model_checkpoint_path): 177 | print("loading checkpoint...") 178 | sess.run(tf.global_variables_initializer()) 179 | optimistic_restore(sess, ckpt.model_checkpoint_path) 180 | # saver.restore(sess, ckpt.model_checkpoint_path) 181 | else: 182 | sess.run(tf.global_variables_initializer()) 183 | 184 | summary_writer = tf.summary.FileWriter('./logs', sess.graph) 185 | 186 | _x = x[:, :, :, ::-1] 187 | tf.summary.image('x', _x, 4) 188 | 189 | summary_op = tf.summary.merge_all() 190 | 191 | epoch_learning_rate = init_learning_rate 192 | for epoch in range(1, total_epochs + 1): 193 | if epoch % 20 == 0 : 194 | epoch_learning_rate = epoch_learning_rate / 10 195 | 196 | train_acc = 0.0 197 | train_loss = 0.0 198 | 199 | for step in range(1, iteration + 1): 200 | batch_data = next(scene_data) 201 | batch_x = batch_data['data'] 202 | batch_y = batch_data['label'] 203 | 204 | train_feed_dict = { 205 | x: batch_x, 206 | label: batch_y, 207 | learning_rate: epoch_learning_rate, 208 | training_flag: True 209 | } 210 | 211 | _, batch_loss, centers_class = sess.run([train_op, Total_loss, Centers], feed_dict=train_feed_dict) 212 | batch_acc = accuracy.eval(feed_dict=train_feed_dict) 213 | 214 | print("epoch: %d/%d, iter: %d/%d, batch_loss: %.4f, batch_acc: %.4f \n" % ( 215 | epoch, total_epochs, step, iteration, batch_loss, batch_acc)) 216 | 217 | train_loss += batch_loss 218 | train_acc += batch_acc 219 | 220 | if step % 30 == 0 : 221 | summary_str = sess.run(summary_op, feed_dict=train_feed_dict) 222 | summary_writer.add_summary(summary=summary_str, global_step=epoch) 223 | summary_writer.flush() 224 | 225 | 226 | train_loss /= iteration # average loss 227 | train_acc /= iteration # average accuracy 228 | 229 | train_summary = tf.Summary(value=[tf.Summary.Value(tag='train_loss', simple_value=train_loss), 230 | tf.Summary.Value(tag='train_accuracy', simple_value=train_acc)]) 231 | 232 | test_acc, test_loss, test_summary = Evaluate(sess) 233 | 234 | summary_writer.add_summary(summary=train_summary, global_step=epoch) 235 | summary_writer.add_summary(summary=test_summary, global_step=epoch) 236 | summary_writer.flush() 237 | 238 | line = "epoch: %d/%d, train_loss: %.4f, train_acc: %.4f, test_loss: %.4f, test_acc: %.4f \n" % ( 239 | epoch, total_epochs, train_loss, train_acc, test_loss, test_acc) 240 | print(line) 241 | 242 | with open('./logs/logs.txt', 'a') as f: 243 | f.write(line) 244 | 245 | saver.save(sess=sess, save_path='./model/model.ckpt') 246 | np.save("centers.npy", centers_class) 247 | -------------------------------------------------------------------------------- /SE_Inception_v4.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from tflearn.layers.conv import global_avg_pool 3 | from tensorflow.contrib.layers import batch_norm, flatten 4 | from tensorflow.contrib.framework import arg_scope 5 | import numpy as np 6 | import scene_input 7 | import os 8 | 9 | os.environ['CUDA_VISIBLE_DEVICES']= '2' 10 | 11 | weight_decay = 0.0005 12 | momentum = 0.9 13 | 14 | init_learning_rate = 0.1 15 | reduction_ratio = 4 16 | 17 | batch_size = 32 18 | image_size = 96 19 | img_channels = 3 20 | class_num = 80 21 | 22 | iteration = 391 23 | # 128 * 391 ~ 50,000 24 | 25 | test_iteration = 10 26 | 27 | total_epochs = 100 28 | 29 | def conv_layer(input, filter, kernel, stride=1, padding='SAME', layer_name="conv"): 30 | with tf.name_scope(layer_name): 31 | network = tf.layers.conv2d(inputs=input, use_bias=True, filters=filter, kernel_size=kernel, strides=stride, padding=padding) 32 | network = Relu(network) 33 | return network 34 | 35 | def Fully_connected(x, units=class_num, layer_name='fully_connected') : 36 | with tf.name_scope(layer_name) : 37 | return tf.layers.dense(inputs=x, use_bias=True, units=units) 38 | 39 | def Relu(x): 40 | return tf.nn.relu(x) 41 | 42 | def Sigmoid(x): 43 | return tf.nn.sigmoid(x) 44 | 45 | def Global_Average_Pooling(x): 46 | return global_avg_pool(x, name='Global_avg_pooling') 47 | 48 | def Max_pooling(x, pool_size=[3,3], stride=2, padding='VALID') : 49 | return tf.layers.max_pooling2d(inputs=x, pool_size=pool_size, strides=stride, padding=padding) 50 | 51 | def Avg_pooling(x, pool_size=[3,3], stride=1, padding='SAME') : 52 | return tf.layers.average_pooling2d(inputs=x, pool_size=pool_size, strides=stride, padding=padding) 53 | 54 | def Batch_Normalization(x, training, scope): 55 | with arg_scope([batch_norm], 56 | scope=scope, 57 | updates_collections=None, 58 | decay=0.9, 59 | center=True, 60 | scale=True, 61 | zero_debias_moving_mean=True) : 62 | return tf.cond(training, 63 | lambda : batch_norm(inputs=x, is_training=training, reuse=None), 64 | lambda : batch_norm(inputs=x, is_training=training, reuse=True)) 65 | 66 | def Concatenation(layers) : 67 | return tf.concat(layers, axis=3) 68 | 69 | def Dropout(x, rate, training) : 70 | return tf.layers.dropout(inputs=x, rate=rate, training=training) 71 | 72 | def Evaluate(sess): 73 | test_acc = 0.0 74 | test_loss = 0.0 75 | 76 | for it in range(test_iteration): 77 | test_batch_x, test_batch_y = scene_data_val.next_batch(batch_size, image_size) 78 | 79 | test_feed_dict = { 80 | x: test_batch_x, 81 | label: test_batch_y, 82 | learning_rate: epoch_learning_rate, 83 | training_flag: False 84 | } 85 | 86 | loss_, acc_ = sess.run([cost, accuracy], feed_dict=test_feed_dict) 87 | 88 | test_loss += loss_ 89 | test_acc += acc_ 90 | 91 | test_loss /= test_iteration # average loss 92 | test_acc /= test_iteration # average accuracy 93 | 94 | summary = tf.Summary(value=[tf.Summary.Value(tag='test_loss', simple_value=test_loss), 95 | tf.Summary.Value(tag='test_accuracy', simple_value=test_acc)]) 96 | 97 | return test_acc, test_loss, summary 98 | 99 | class SE_Inception_v4(): 100 | def __init__(self, x, training): 101 | self.training = training 102 | self.model = self.Build_SEnet(x) 103 | 104 | def Stem(self, x, scope): 105 | with tf.name_scope(scope) : 106 | x = conv_layer(x, filter=32, kernel=[3,3], stride=2, padding='VALID', layer_name=scope+'_conv1') 107 | x = conv_layer(x, filter=32, kernel=[3,3], padding='VALID', layer_name=scope+'_conv2') 108 | block_1 = conv_layer(x, filter=64, kernel=[3,3], layer_name=scope+'_conv3') 109 | 110 | split_max_x = Max_pooling(block_1) 111 | split_conv_x = conv_layer(block_1, filter=96, kernel=[3,3], stride=2, padding='VALID', layer_name=scope+'_split_conv1') 112 | x = Concatenation([split_max_x,split_conv_x]) 113 | 114 | split_conv_x1 = conv_layer(x, filter=64, kernel=[1,1], layer_name=scope+'_split_conv2') 115 | split_conv_x1 = conv_layer(split_conv_x1, filter=96, kernel=[3,3], padding='VALID', layer_name=scope+'_split_conv3') 116 | 117 | split_conv_x2 = conv_layer(x, filter=64, kernel=[1,1], layer_name=scope+'_split_conv4') 118 | split_conv_x2 = conv_layer(split_conv_x2, filter=64, kernel=[7,1], layer_name=scope+'_split_conv5') 119 | split_conv_x2 = conv_layer(split_conv_x2, filter=64, kernel=[1,7], layer_name=scope+'_split_conv6') 120 | split_conv_x2 = conv_layer(split_conv_x2, filter=96, kernel=[3,3], padding='VALID', layer_name=scope+'_split_conv7') 121 | 122 | x = Concatenation([split_conv_x1,split_conv_x2]) 123 | 124 | split_conv_x = conv_layer(x, filter=192, kernel=[3,3], stride=2, padding='VALID', layer_name=scope+'_split_conv8') 125 | split_max_x = Max_pooling(x) 126 | 127 | x = Concatenation([split_conv_x, split_max_x]) 128 | 129 | x = Batch_Normalization(x, training=self.training, scope=scope+'_batch1') 130 | x = Relu(x) 131 | 132 | return x 133 | 134 | def Inception_A(self, x, scope): 135 | with tf.name_scope(scope) : 136 | split_conv_x1 = Avg_pooling(x) 137 | split_conv_x1 = conv_layer(split_conv_x1, filter=96, kernel=[1,1], layer_name=scope+'_split_conv1') 138 | 139 | split_conv_x2 = conv_layer(x, filter=96, kernel=[1,1], layer_name=scope+'_split_conv2') 140 | 141 | split_conv_x3 = conv_layer(x, filter=64, kernel=[1,1], layer_name=scope+'_split_conv3') 142 | split_conv_x3 = conv_layer(split_conv_x3, filter=96, kernel=[3,3], layer_name=scope+'_split_conv4') 143 | 144 | split_conv_x4 = conv_layer(x, filter=64, kernel=[1,1], layer_name=scope+'_split_conv5') 145 | split_conv_x4 = conv_layer(split_conv_x4, filter=96, kernel=[3,3], layer_name=scope+'_split_conv6') 146 | split_conv_x4 = conv_layer(split_conv_x4, filter=96, kernel=[3,3], layer_name=scope+'_split_conv7') 147 | 148 | x = Concatenation([split_conv_x1, split_conv_x2, split_conv_x3, split_conv_x4]) 149 | 150 | x = Batch_Normalization(x, training=self.training, scope=scope+'_batch1') 151 | x = Relu(x) 152 | 153 | return x 154 | 155 | def Inception_B(self, x, scope): 156 | with tf.name_scope(scope) : 157 | init = x 158 | 159 | split_conv_x1 = Avg_pooling(x) 160 | split_conv_x1 = conv_layer(split_conv_x1, filter=128, kernel=[1,1], layer_name=scope+'_split_conv1') 161 | 162 | split_conv_x2 = conv_layer(x, filter=384, kernel=[1,1], layer_name=scope+'_split_conv2') 163 | 164 | split_conv_x3 = conv_layer(x, filter=192, kernel=[1,1], layer_name=scope+'_split_conv3') 165 | split_conv_x3 = conv_layer(split_conv_x3, filter=224, kernel=[1,7], layer_name=scope+'_split_conv4') 166 | split_conv_x3 = conv_layer(split_conv_x3, filter=256, kernel=[1,7], layer_name=scope+'_split_conv5') 167 | 168 | split_conv_x4 = conv_layer(x, filter=192, kernel=[1,1], layer_name=scope+'_split_conv6') 169 | split_conv_x4 = conv_layer(split_conv_x4, filter=192, kernel=[1,7], layer_name=scope+'_split_conv7') 170 | split_conv_x4 = conv_layer(split_conv_x4, filter=224, kernel=[7,1], layer_name=scope+'_split_conv8') 171 | split_conv_x4 = conv_layer(split_conv_x4, filter=224, kernel=[1,7], layer_name=scope+'_split_conv9') 172 | split_conv_x4 = conv_layer(split_conv_x4, filter=256, kernel=[7,1], layer_name=scope+'_split_connv10') 173 | 174 | x = Concatenation([split_conv_x1, split_conv_x2, split_conv_x3, split_conv_x4]) 175 | 176 | x = Batch_Normalization(x, training=self.training, scope=scope+'_batch1') 177 | x = Relu(x) 178 | 179 | return x 180 | 181 | def Inception_C(self, x, scope): 182 | with tf.name_scope(scope) : 183 | split_conv_x1 = Avg_pooling(x) 184 | split_conv_x1 = conv_layer(split_conv_x1, filter=256, kernel=[1,1], layer_name=scope+'_split_conv1') 185 | 186 | split_conv_x2 = conv_layer(x, filter=256, kernel=[1,1], layer_name=scope+'_split_conv2') 187 | 188 | split_conv_x3 = conv_layer(x, filter=384, kernel=[1,1], layer_name=scope+'_split_conv3') 189 | split_conv_x3_1 = conv_layer(split_conv_x3, filter=256, kernel=[1,3], layer_name=scope+'_split_conv4') 190 | split_conv_x3_2 = conv_layer(split_conv_x3, filter=256, kernel=[3,1], layer_name=scope+'_split_conv5') 191 | 192 | split_conv_x4 = conv_layer(x, filter=384, kernel=[1,1], layer_name=scope+'_split_conv6') 193 | split_conv_x4 = conv_layer(split_conv_x4, filter=448, kernel=[1,3], layer_name=scope+'_split_conv7') 194 | split_conv_x4 = conv_layer(split_conv_x4, filter=512, kernel=[3,1], layer_name=scope+'_split_conv8') 195 | split_conv_x4_1 = conv_layer(split_conv_x4, filter=256, kernel=[3,1], layer_name=scope+'_split_conv9') 196 | split_conv_x4_2 = conv_layer(split_conv_x4, filter=256, kernel=[1,3], layer_name=scope+'_split_conv10') 197 | 198 | x = Concatenation([split_conv_x1, split_conv_x2, split_conv_x3_1, split_conv_x3_2, split_conv_x4_1, split_conv_x4_2]) 199 | 200 | x = Batch_Normalization(x, training=self.training, scope=scope+'_batch1') 201 | x = Relu(x) 202 | 203 | return x 204 | 205 | def Reduction_A(self, x, scope): 206 | with tf.name_scope(scope) : 207 | k = 256 208 | l = 256 209 | m = 384 210 | n = 384 211 | 212 | split_max_x = Max_pooling(x) 213 | 214 | split_conv_x1 = conv_layer(x, filter=n, kernel=[3,3], stride=2, padding='VALID', layer_name=scope+'_split_conv1') 215 | 216 | split_conv_x2 = conv_layer(x, filter=k, kernel=[1,1], layer_name=scope+'_split_conv2') 217 | split_conv_x2 = conv_layer(split_conv_x2, filter=l, kernel=[3,3], layer_name=scope+'_split_conv3') 218 | split_conv_x2 = conv_layer(split_conv_x2, filter=m, kernel=[3,3], stride=2, padding='VALID', layer_name=scope+'_split_conv4') 219 | 220 | x = Concatenation([split_max_x, split_conv_x1, split_conv_x2]) 221 | 222 | x = Batch_Normalization(x, training=self.training, scope=scope+'_batch1') 223 | x = Relu(x) 224 | 225 | return x 226 | 227 | def Reduction_B(self, x, scope): 228 | with tf.name_scope(scope) : 229 | split_max_x = Max_pooling(x) 230 | 231 | split_conv_x1 = conv_layer(x, filter=256, kernel=[1,1], layer_name=scope+'_split_conv1') 232 | split_conv_x1 = conv_layer(split_conv_x1, filter=384, kernel=[3,3], stride=2, padding='VALID', layer_name=scope+'_split_conv2') 233 | 234 | split_conv_x2 = conv_layer(x, filter=256, kernel=[1,1], layer_name=scope+'_split_conv3') 235 | split_conv_x2 = conv_layer(split_conv_x2, filter=288, kernel=[3,3], stride=2, padding='VALID', layer_name=scope+'_split_conv4') 236 | 237 | split_conv_x3 = conv_layer(x, filter=256, kernel=[1,1], layer_name=scope+'_split_conv5') 238 | split_conv_x3 = conv_layer(split_conv_x3, filter=288, kernel=[3,3], layer_name=scope+'_split_conv6') 239 | split_conv_x3 = conv_layer(split_conv_x3, filter=320, kernel=[3,3], stride=2, padding='VALID', layer_name=scope+'_split_conv7') 240 | 241 | x = Concatenation([split_max_x, split_conv_x1, split_conv_x2, split_conv_x3]) 242 | 243 | x = Batch_Normalization(x, training=self.training, scope=scope+'_batch1') 244 | x = Relu(x) 245 | 246 | return x 247 | 248 | def Squeeze_excitation_layer(self, input_x, out_dim, ratio, layer_name): 249 | with tf.name_scope(layer_name) : 250 | squeeze = Global_Average_Pooling(input_x) 251 | 252 | excitation = Fully_connected(squeeze, units=out_dim / ratio, layer_name=layer_name+'_fully_connected1') 253 | excitation = Relu(excitation) 254 | excitation = Fully_connected(excitation, units=out_dim, layer_name=layer_name+'_fully_connected2') 255 | excitation = Sigmoid(excitation) 256 | 257 | excitation = tf.reshape(excitation, [-1,1,1,out_dim]) 258 | 259 | scale = input_x * excitation 260 | 261 | return scale 262 | 263 | def Build_SEnet(self, input_x): 264 | # input_x = tf.pad(input_x, [[0, 0], [32, 32], [32, 32], [0, 0]]) 265 | # size 32 -> 96 266 | # only cifar10 architecture 267 | 268 | x = self.Stem(input_x, scope='stem') 269 | 270 | for i in range(4) : 271 | x = self.Inception_A(x, scope='Inception_A'+str(i)) 272 | channel = int(np.shape(x)[-1]) 273 | x = self.Squeeze_excitation_layer(x, out_dim=channel, ratio=reduction_ratio, layer_name='SE_A'+str(i)) 274 | 275 | x = self.Reduction_A(x, scope='Reduction_A') 276 | 277 | for i in range(7) : 278 | x = self.Inception_B(x, scope='Inception_B'+str(i)) 279 | channel = int(np.shape(x)[-1]) 280 | x = self.Squeeze_excitation_layer(x, out_dim=channel, ratio=reduction_ratio, layer_name='SE_B'+str(i)) 281 | 282 | x = self.Reduction_B(x, scope='Reduction_B') 283 | 284 | for i in range(3) : 285 | x = self.Inception_C(x, scope='Inception_C'+str(i)) 286 | channel = int(np.shape(x)[-1]) 287 | x = self.Squeeze_excitation_layer(x, out_dim=channel, ratio=reduction_ratio, layer_name='SE_C'+str(i)) 288 | 289 | x = Global_Average_Pooling(x) 290 | x = Dropout(x, rate=0.2, training=self.training) 291 | x = flatten(x) 292 | 293 | x = Fully_connected(x, layer_name='final_fully_connected') 294 | return x 295 | -------------------------------------------------------------------------------- /SE_Inception_resnet_v2.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from tflearn.layers.conv import global_avg_pool 3 | from tensorflow.contrib.layers import batch_norm, flatten 4 | from tensorflow.contrib.framework import arg_scope 5 | from cifar10 import * 6 | import numpy as np 7 | 8 | weight_decay = 0.0005 9 | momentum = 0.9 10 | 11 | init_learning_rate = 0.1 12 | 13 | reduction_ratio = 4 14 | 15 | batch_size = 128 16 | iteration = 391 17 | # 128 * 391 ~ 50,000 18 | 19 | test_iteration = 10 20 | 21 | total_epochs = 100 22 | 23 | def conv_layer(input, filter, kernel, stride=1, padding='SAME', layer_name="conv", activation=True): 24 | with tf.name_scope(layer_name): 25 | network = tf.layers.conv2d(inputs=input, use_bias=True, filters=filter, kernel_size=kernel, strides=stride, padding=padding) 26 | if activation : 27 | network = Relu(network) 28 | return network 29 | 30 | def Fully_connected(x, units=class_num, layer_name='fully_connected') : 31 | with tf.name_scope(layer_name) : 32 | return tf.layers.dense(inputs=x, use_bias=True, units=units) 33 | 34 | def Relu(x): 35 | return tf.nn.relu(x) 36 | 37 | def Sigmoid(x): 38 | return tf.nn.sigmoid(x) 39 | 40 | def Global_Average_Pooling(x): 41 | return global_avg_pool(x, name='Global_avg_pooling') 42 | 43 | def Max_pooling(x, pool_size=[3,3], stride=2, padding='VALID') : 44 | return tf.layers.max_pooling2d(inputs=x, pool_size=pool_size, strides=stride, padding=padding) 45 | 46 | def Batch_Normalization(x, training, scope): 47 | with arg_scope([batch_norm], 48 | scope=scope, 49 | updates_collections=None, 50 | decay=0.9, 51 | center=True, 52 | scale=True, 53 | zero_debias_moving_mean=True) : 54 | return tf.cond(training, 55 | lambda : batch_norm(inputs=x, is_training=training, reuse=None), 56 | lambda : batch_norm(inputs=x, is_training=training, reuse=True)) 57 | 58 | def Concatenation(layers) : 59 | return tf.concat(layers, axis=3) 60 | 61 | def Dropout(x, rate, training) : 62 | return tf.layers.dropout(inputs=x, rate=rate, training=training) 63 | 64 | def Evaluate(sess): 65 | test_acc = 0.0 66 | test_loss = 0.0 67 | test_pre_index = 0 68 | add = 1000 69 | 70 | for it in range(test_iteration): 71 | test_batch_x = test_x[test_pre_index: test_pre_index + add] 72 | test_batch_y = test_y[test_pre_index: test_pre_index + add] 73 | test_pre_index = test_pre_index + add 74 | 75 | test_feed_dict = { 76 | x: test_batch_x, 77 | label: test_batch_y, 78 | learning_rate: epoch_learning_rate, 79 | training_flag: False 80 | } 81 | 82 | loss_, acc_ = sess.run([cost, accuracy], feed_dict=test_feed_dict) 83 | 84 | test_loss += loss_ 85 | test_acc += acc_ 86 | 87 | test_loss /= test_iteration # average loss 88 | test_acc /= test_iteration # average accuracy 89 | 90 | summary = tf.Summary(value=[tf.Summary.Value(tag='test_loss', simple_value=test_loss), 91 | tf.Summary.Value(tag='test_accuracy', simple_value=test_acc)]) 92 | 93 | return test_acc, test_loss, summary 94 | 95 | class SE_Inception_resnet_v2(): 96 | def __init__(self, x, training): 97 | self.training = training 98 | self.model = self.Build_SEnet(x) 99 | 100 | def Stem(self, x, scope): 101 | with tf.name_scope(scope) : 102 | x = conv_layer(x, filter=32, kernel=[3,3], stride=2, padding='VALID', layer_name=scope+'_conv1') 103 | x = conv_layer(x, filter=32, kernel=[3,3], padding='VALID', layer_name=scope+'_conv2') 104 | block_1 = conv_layer(x, filter=64, kernel=[3,3], layer_name=scope+'_conv3') 105 | 106 | split_max_x = Max_pooling(block_1) 107 | split_conv_x = conv_layer(block_1, filter=96, kernel=[3,3], stride=2, padding='VALID', layer_name=scope+'_split_conv1') 108 | x = Concatenation([split_max_x,split_conv_x]) 109 | 110 | split_conv_x1 = conv_layer(x, filter=64, kernel=[1,1], layer_name=scope+'_split_conv2') 111 | split_conv_x1 = conv_layer(split_conv_x1, filter=96, kernel=[3,3], padding='VALID', layer_name=scope+'_split_conv3') 112 | 113 | split_conv_x2 = conv_layer(x, filter=64, kernel=[1,1], layer_name=scope+'_split_conv4') 114 | split_conv_x2 = conv_layer(split_conv_x2, filter=64, kernel=[7,1], layer_name=scope+'_split_conv5') 115 | split_conv_x2 = conv_layer(split_conv_x2, filter=64, kernel=[1,7], layer_name=scope+'_split_conv6') 116 | split_conv_x2 = conv_layer(split_conv_x2, filter=96, kernel=[3,3], padding='VALID', layer_name=scope+'_split_conv7') 117 | 118 | x = Concatenation([split_conv_x1,split_conv_x2]) 119 | 120 | split_conv_x = conv_layer(x, filter=192, kernel=[3,3], stride=2, padding='VALID', layer_name=scope+'_split_conv8') 121 | split_max_x = Max_pooling(x) 122 | 123 | x = Concatenation([split_conv_x, split_max_x]) 124 | 125 | x = Batch_Normalization(x, training=self.training, scope=scope+'_batch1') 126 | x = Relu(x) 127 | 128 | return x 129 | 130 | def Inception_resnet_A(self, x, scope): 131 | with tf.name_scope(scope) : 132 | init = x 133 | 134 | split_conv_x1 = conv_layer(x, filter=32, kernel=[1,1], layer_name=scope+'_split_conv1') 135 | 136 | split_conv_x2 = conv_layer(x, filter=32, kernel=[1,1], layer_name=scope+'_split_conv2') 137 | split_conv_x2 = conv_layer(split_conv_x2, filter=32, kernel=[3,3], layer_name=scope+'_split_conv3') 138 | 139 | split_conv_x3 = conv_layer(x, filter=32, kernel=[1,1], layer_name=scope+'_split_conv4') 140 | split_conv_x3 = conv_layer(split_conv_x3, filter=48, kernel=[3,3], layer_name=scope+'_split_conv5') 141 | split_conv_x3 = conv_layer(split_conv_x3, filter=64, kernel=[3,3], layer_name=scope+'_split_conv6') 142 | 143 | x = Concatenation([split_conv_x1,split_conv_x2,split_conv_x3]) 144 | x = conv_layer(x, filter=384, kernel=[1,1], layer_name=scope+'_final_conv1', activation=False) 145 | 146 | x = x*0.1 147 | x = init + x 148 | 149 | x = Batch_Normalization(x, training=self.training, scope=scope+'_batch1') 150 | x = Relu(x) 151 | 152 | return x 153 | 154 | def Inception_resnet_B(self, x, scope): 155 | with tf.name_scope(scope) : 156 | init = x 157 | 158 | split_conv_x1 = conv_layer(x, filter=192, kernel=[1,1], layer_name=scope+'_split_conv1') 159 | 160 | split_conv_x2 = conv_layer(x, filter=128, kernel=[1,1], layer_name=scope+'_split_conv2') 161 | split_conv_x2 = conv_layer(split_conv_x2, filter=160, kernel=[1,7], layer_name=scope+'_split_conv3') 162 | split_conv_x2 = conv_layer(split_conv_x2, filter=192, kernel=[7,1], layer_name=scope+'_split_conv4') 163 | 164 | x = Concatenation([split_conv_x1, split_conv_x2]) 165 | x = conv_layer(x, filter=1152, kernel=[1,1], layer_name=scope+'_final_conv1', activation=False) 166 | # 1154 167 | x = x * 0.1 168 | x = init + x 169 | 170 | x = Batch_Normalization(x, training=self.training, scope=scope+'_batch1') 171 | x = Relu(x) 172 | 173 | return x 174 | 175 | def Inception_resnet_C(self, x, scope): 176 | with tf.name_scope(scope) : 177 | init = x 178 | 179 | split_conv_x1 = conv_layer(x, filter=192, kernel=[1,1], layer_name=scope+'_split_conv1') 180 | 181 | split_conv_x2 = conv_layer(x, filter=192, kernel=[1, 1], layer_name=scope + '_split_conv2') 182 | split_conv_x2 = conv_layer(split_conv_x2, filter=224, kernel=[1, 3], layer_name=scope + '_split_conv3') 183 | split_conv_x2 = conv_layer(split_conv_x2, filter=256, kernel=[3, 1], layer_name=scope + '_split_conv4') 184 | 185 | x = Concatenation([split_conv_x1,split_conv_x2]) 186 | x = conv_layer(x, filter=2144, kernel=[1,1], layer_name=scope+'_final_conv2', activation=False) 187 | # 2048 188 | x = x * 0.1 189 | x = init + x 190 | 191 | x = Batch_Normalization(x, training=self.training, scope=scope+'_batch1') 192 | x = Relu(x) 193 | 194 | return x 195 | 196 | def Reduction_A(self, x, scope): 197 | with tf.name_scope(scope) : 198 | k = 256 199 | l = 256 200 | m = 384 201 | n = 384 202 | 203 | split_max_x = Max_pooling(x) 204 | 205 | split_conv_x1 = conv_layer(x, filter=n, kernel=[3,3], stride=2, padding='VALID', layer_name=scope+'_split_conv1') 206 | 207 | split_conv_x2 = conv_layer(x, filter=k, kernel=[1,1], layer_name=scope+'_split_conv2') 208 | split_conv_x2 = conv_layer(split_conv_x2, filter=l, kernel=[3,3], layer_name=scope+'_split_conv3') 209 | split_conv_x2 = conv_layer(split_conv_x2, filter=m, kernel=[3,3], stride=2, padding='VALID', layer_name=scope+'_split_conv4') 210 | 211 | x = Concatenation([split_max_x, split_conv_x1, split_conv_x2]) 212 | 213 | x = Batch_Normalization(x, training=self.training, scope=scope+'_batch1') 214 | x = Relu(x) 215 | 216 | return x 217 | 218 | def Reduction_B(self, x, scope): 219 | with tf.name_scope(scope) : 220 | split_max_x = Max_pooling(x) 221 | 222 | split_conv_x1 = conv_layer(x, filter=256, kernel=[1,1], layer_name=scope+'_split_conv1') 223 | split_conv_x1 = conv_layer(split_conv_x1, filter=384, kernel=[3,3], stride=2, padding='VALID', layer_name=scope+'_split_conv2') 224 | 225 | split_conv_x2 = conv_layer(x, filter=256, kernel=[1,1], layer_name=scope+'_split_conv3') 226 | split_conv_x2 = conv_layer(split_conv_x2, filter=288, kernel=[3,3], stride=2, padding='VALID', layer_name=scope+'_split_conv4') 227 | 228 | split_conv_x3 = conv_layer(x, filter=256, kernel=[1,1], layer_name=scope+'_split_conv5') 229 | split_conv_x3 = conv_layer(split_conv_x3, filter=288, kernel=[3,3], layer_name=scope+'_split_conv6') 230 | split_conv_x3 = conv_layer(split_conv_x3, filter=320, kernel=[3,3], stride=2, padding='VALID', layer_name=scope+'_split_conv7') 231 | 232 | x = Concatenation([split_max_x, split_conv_x1, split_conv_x2, split_conv_x3]) 233 | 234 | x = Batch_Normalization(x, training=self.training, scope=scope+'_batch1') 235 | x = Relu(x) 236 | 237 | return x 238 | 239 | def Squeeze_excitation_layer(self, input_x, out_dim, ratio, layer_name): 240 | with tf.name_scope(layer_name) : 241 | 242 | 243 | squeeze = Global_Average_Pooling(input_x) 244 | 245 | excitation = Fully_connected(squeeze, units=out_dim / ratio, layer_name=layer_name+'_fully_connected1') 246 | excitation = Relu(excitation) 247 | excitation = Fully_connected(excitation, units=out_dim, layer_name=layer_name+'_fully_connected2') 248 | excitation = Sigmoid(excitation) 249 | 250 | excitation = tf.reshape(excitation, [-1,1,1,out_dim]) 251 | scale = input_x * excitation 252 | 253 | return scale 254 | 255 | def Build_SEnet(self, input_x): 256 | input_x = tf.pad(input_x, [[0, 0], [32, 32], [32, 32], [0, 0]]) 257 | # size 32 -> 96 258 | print(np.shape(input_x)) 259 | # only cifar10 architecture 260 | 261 | x = self.Stem(input_x, scope='stem') 262 | 263 | for i in range(5) : 264 | x = self.Inception_resnet_A(x, scope='Inception_A'+str(i)) 265 | channel = int(np.shape(x)[-1]) 266 | x = self.Squeeze_excitation_layer(x, out_dim=channel, ratio=reduction_ratio, layer_name='SE_A'+str(i)) 267 | 268 | x = self.Reduction_A(x, scope='Reduction_A') 269 | 270 | for i in range(10) : 271 | x = self.Inception_resnet_B(x, scope='Inception_B'+str(i)) 272 | channel = int(np.shape(x)[-1]) 273 | x = self.Squeeze_excitation_layer(x, out_dim=channel, ratio=reduction_ratio, layer_name='SE_B'+str(i)) 274 | 275 | x = self.Reduction_B(x, scope='Reduction_B') 276 | 277 | for i in range(5) : 278 | x = self.Inception_resnet_C(x, scope='Inception_C'+str(i)) 279 | channel = int(np.shape(x)[-1]) 280 | x = self.Squeeze_excitation_layer(x, out_dim=channel, ratio=reduction_ratio, layer_name='SE_C'+str(i)) 281 | 282 | x = Global_Average_Pooling(x) 283 | x = Dropout(x, rate=0.2, training=self.training) 284 | x = flatten(x) 285 | 286 | x = Fully_connected(x, layer_name='final_fully_connected') 287 | return x 288 | 289 | 290 | train_x, train_y, test_x, test_y = prepare_data() 291 | train_x, test_x = color_preprocessing(train_x, test_x) 292 | 293 | 294 | # image_size = 32, img_channels = 3, class_num = 10 in cifar10 295 | x = tf.placeholder(tf.float32, shape=[None, image_size, image_size, img_channels]) 296 | label = tf.placeholder(tf.float32, shape=[None, class_num]) 297 | 298 | training_flag = tf.placeholder(tf.bool) 299 | 300 | 301 | learning_rate = tf.placeholder(tf.float32, name='learning_rate') 302 | 303 | logits = SE_Inception_resnet_v2(x, training=training_flag).model 304 | cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=label, logits=logits)) 305 | 306 | l2_loss = tf.add_n([tf.nn.l2_loss(var) for var in tf.trainable_variables()]) 307 | optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum=momentum, use_nesterov=True) 308 | train = optimizer.minimize(cost + l2_loss * weight_decay) 309 | 310 | correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(label, 1)) 311 | accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) 312 | 313 | saver = tf.train.Saver(tf.global_variables()) 314 | 315 | with tf.Session() as sess: 316 | ckpt = tf.train.get_checkpoint_state('./model') 317 | if ckpt and tf.train.checkpoint_exists(ckpt.model_checkpoint_path): 318 | saver.restore(sess, ckpt.model_checkpoint_path) 319 | else: 320 | sess.run(tf.global_variables_initializer()) 321 | 322 | summary_writer = tf.summary.FileWriter('./logs', sess.graph) 323 | 324 | epoch_learning_rate = init_learning_rate 325 | for epoch in range(1, total_epochs + 1): 326 | if epoch % 30 == 0 : 327 | epoch_learning_rate = epoch_learning_rate / 10 328 | 329 | pre_index = 0 330 | train_acc = 0.0 331 | train_loss = 0.0 332 | 333 | for step in range(1, iteration + 1): 334 | if pre_index + batch_size < 50000: 335 | batch_x = train_x[pre_index: pre_index + batch_size] 336 | batch_y = train_y[pre_index: pre_index + batch_size] 337 | else: 338 | batch_x = train_x[pre_index:] 339 | batch_y = train_y[pre_index:] 340 | 341 | batch_x = data_augmentation(batch_x) 342 | 343 | train_feed_dict = { 344 | x: batch_x, 345 | label: batch_y, 346 | learning_rate: epoch_learning_rate, 347 | training_flag: True 348 | } 349 | 350 | _, batch_loss = sess.run([train, cost], feed_dict=train_feed_dict) 351 | batch_acc = accuracy.eval(feed_dict=train_feed_dict) 352 | 353 | train_loss += batch_loss 354 | train_acc += batch_acc 355 | pre_index += batch_size 356 | 357 | 358 | train_loss /= iteration # average loss 359 | train_acc /= iteration # average accuracy 360 | 361 | train_summary = tf.Summary(value=[tf.Summary.Value(tag='train_loss', simple_value=train_loss), 362 | tf.Summary.Value(tag='train_accuracy', simple_value=train_acc)]) 363 | 364 | test_acc, test_loss, test_summary = Evaluate(sess) 365 | 366 | summary_writer.add_summary(summary=train_summary, global_step=epoch) 367 | summary_writer.add_summary(summary=test_summary, global_step=epoch) 368 | summary_writer.flush() 369 | 370 | line = "epoch: %d/%d, train_loss: %.4f, train_acc: %.4f, test_loss: %.4f, test_acc: %.4f \n" % ( 371 | epoch, total_epochs, train_loss, train_acc, test_loss, test_acc) 372 | print(line) 373 | 374 | with open('logs.txt', 'a') as f: 375 | f.write(line) 376 | 377 | saver.save(sess=sess, save_path='./model/Inception_resnet_v2.ckpt') -------------------------------------------------------------------------------- /resnet_model.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Contains definitions for the preactivation form of Residual Networks. 16 | 17 | Residual networks (ResNets) were originally proposed in: 18 | [1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun 19 | Deep Residual Learning for Image Recognition. arXiv:1512.03385 20 | 21 | The full preactivation 'v2' ResNet variant implemented in this module was 22 | introduced by: 23 | [2] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun 24 | Identity Mappings in Deep Residual Networks. arXiv: 1603.05027 25 | 26 | The key difference of the full preactivation 'v2' variant compared to the 27 | 'v1' variant in [1] is the use of batch normalization before every weight layer 28 | rather than after. 29 | """ 30 | 31 | from __future__ import absolute_import 32 | from __future__ import division 33 | from __future__ import print_function 34 | 35 | import tensorflow as tf 36 | import numpy as np 37 | from IPython import embed 38 | 39 | _BATCH_NORM_DECAY = 0.997 40 | _BATCH_NORM_EPSILON = 1e-5 41 | 42 | 43 | def batch_norm_relu(inputs, is_training, data_format): 44 | """Performs a batch normalization followed by a ReLU.""" 45 | # We set fused=True for a significant performance boost. See 46 | # https://www.tensorflow.org/performance/performance_guide#common_fused_ops 47 | inputs = tf.layers.batch_normalization( 48 | inputs=inputs, axis=1 if data_format == 'channels_first' else 3, 49 | momentum=_BATCH_NORM_DECAY, epsilon=_BATCH_NORM_EPSILON, center=True, 50 | scale=True, training=is_training, fused=True) 51 | inputs = tf.nn.relu(inputs) 52 | return inputs 53 | 54 | 55 | def fixed_padding(inputs, kernel_size, data_format): 56 | """Pads the input along the spatial dimensions independently of input size. 57 | 58 | Args: 59 | inputs: A tensor of size [batch, channels, height_in, width_in] or 60 | [batch, height_in, width_in, channels] depending on data_format. 61 | kernel_size: The kernel to be used in the conv2d or max_pool2d operation. 62 | Should be a positive integer. 63 | data_format: The input format ('channels_last' or 'channels_first'). 64 | 65 | Returns: 66 | A tensor with the same format as the input with the data either intact 67 | (if kernel_size == 1) or padded (if kernel_size > 1). 68 | """ 69 | pad_total = kernel_size - 1 70 | pad_beg = pad_total // 2 71 | pad_end = pad_total - pad_beg 72 | 73 | if data_format == 'channels_first': 74 | padded_inputs = tf.pad(inputs, [[0, 0], [0, 0], 75 | [pad_beg, pad_end], [pad_beg, pad_end]]) 76 | else: 77 | padded_inputs = tf.pad(inputs, [[0, 0], [pad_beg, pad_end], 78 | [pad_beg, pad_end], [0, 0]]) 79 | return padded_inputs 80 | 81 | 82 | def conv2d_fixed_padding(inputs, filters, kernel_size, strides, data_format): 83 | """Strided 2-D convolution with explicit padding.""" 84 | # The padding is consistent and is based only on `kernel_size`, not on the 85 | # dimensions of `inputs` (as opposed to using `tf.layers.conv2d` alone). 86 | if strides > 1: 87 | inputs = fixed_padding(inputs, kernel_size, data_format) 88 | 89 | return tf.layers.conv2d( 90 | inputs=inputs, filters=filters, kernel_size=kernel_size, strides=strides, 91 | padding=('SAME' if strides == 1 else 'VALID'), use_bias=False, 92 | kernel_initializer=tf.variance_scaling_initializer(), 93 | data_format=data_format) 94 | 95 | 96 | def GlobalAvgPooling(x, data_format): 97 | """ 98 | Global average pooling as in the paper `Network In Network 99 | `_. 100 | Args: 101 | x (tf.Tensor): a NHWC tensor. 102 | Returns: 103 | tf.Tensor: a NC tensor named ``output``. 104 | """ 105 | assert x.shape.ndims == 4 106 | assert data_format in ['channels_last', 'channels_first'] 107 | axis = [1, 2] if data_format == 'channels_last' else [2, 3] 108 | return tf.reduce_mean(x, axis, name='GlobalAvgPooling') 109 | 110 | 111 | def flatten(x): 112 | """ 113 | Flatten the tensor. 114 | """ 115 | return tf.reshape(x, [-1]) 116 | 117 | 118 | def batch_flatten(x): 119 | """ 120 | Flatten the tensor except the first dimension. 121 | """ 122 | shape = x.get_shape().as_list()[1:] 123 | if None not in shape: 124 | return tf.reshape(x, [-1, int(np.prod(shape))]) 125 | return tf.reshape(x, tf.stack([tf.shape(x)[0], -1])) 126 | 127 | 128 | def FullyConnected(x, out_dim, 129 | W_init=None, b_init=None, 130 | nl=tf.identity, use_bias=True, name='fc'): 131 | """ 132 | Fully-Connected layer, takes a N>1D tensor and returns a 2D tensor. 133 | It is an equivalent of `tf.layers.dense` except for naming conventions. 134 | Args: 135 | x (tf.Tensor): a tensor to be flattened except for the first dimension. 136 | out_dim (int): output dimension 137 | W_init: initializer for W. Defaults to `variance_scaling_initializer`. 138 | b_init: initializer for b. Defaults to zero. 139 | nl: a nonlinearity function 140 | use_bias (bool): whether to use bias. 141 | Returns: 142 | tf.Tensor: a NC tensor named ``output`` with attribute `variables`. 143 | Variable Names: 144 | * ``W``: weights of shape [in_dim, out_dim] 145 | * ``b``: bias 146 | """ 147 | x = batch_flatten(x) 148 | 149 | if W_init is None: 150 | W_init = tf.contrib.layers.variance_scaling_initializer() 151 | if b_init is None: 152 | b_init = tf.constant_initializer() 153 | 154 | x = tf.layers.dense( 155 | inputs=x, units=out_dim, activation=lambda x: nl(x, name='output'), use_bias=use_bias, 156 | kernel_initializer=W_init, bias_initializer=b_init, 157 | trainable=True) 158 | 159 | x = tf.identity(x, name) 160 | 161 | return x 162 | 163 | 164 | def building_block(inputs, filters, is_training, projection_shortcut, strides, 165 | data_format): 166 | """Standard building block for residual networks with BN before convolutions. 167 | 168 | Args: 169 | inputs: A tensor of size [batch, channels, height_in, width_in] or 170 | [batch, height_in, width_in, channels] depending on data_format. 171 | filters: The number of filters for the convolutions. 172 | is_training: A Boolean for whether the model is in training or inference 173 | mode. Needed for batch normalization. 174 | projection_shortcut: The function to use for projection shortcuts (typically 175 | a 1x1 convolution when downsampling the input). 176 | strides: The block's stride. If greater than 1, this block will ultimately 177 | downsample the input. 178 | data_format: The input format ('channels_last' or 'channels_first'). 179 | 180 | Returns: 181 | The output tensor of the block. 182 | """ 183 | shortcut = inputs 184 | inputs = batch_norm_relu(inputs, is_training, data_format) 185 | 186 | # The projection shortcut should come after the first batch norm and ReLU 187 | # since it performs a 1x1 convolution. 188 | if projection_shortcut is not None: 189 | shortcut = projection_shortcut(inputs) 190 | 191 | inputs = conv2d_fixed_padding( 192 | inputs=inputs, filters=filters, kernel_size=3, strides=strides, 193 | data_format=data_format) 194 | 195 | inputs = batch_norm_relu(inputs, is_training, data_format) 196 | inputs = conv2d_fixed_padding( 197 | inputs=inputs, filters=filters, kernel_size=3, strides=1, 198 | data_format=data_format) 199 | 200 | return inputs + shortcut 201 | 202 | 203 | def se_building_block(inputs, filters, is_training, projection_shortcut, strides, 204 | data_format): 205 | """Standard building block for residual networks with BN before convolutions. 206 | 207 | Args: 208 | inputs: A tensor of size [batch, channels, height_in, width_in] or 209 | [batch, height_in, width_in, channels] depending on data_format. 210 | filters: The number of filters for the convolutions. 211 | is_training: A Boolean for whether the model is in training or inference 212 | mode. Needed for batch normalization. 213 | projection_shortcut: The function to use for projection shortcuts (typically 214 | a 1x1 convolution when downsampling the input). 215 | strides: The block's stride. If greater than 1, this block will ultimately 216 | downsample the input. 217 | data_format: The input format ('channels_last' or 'channels_first'). 218 | 219 | Returns: 220 | The output tensor of the block. 221 | """ 222 | shortcut = inputs 223 | inputs = batch_norm_relu(inputs, is_training, data_format) 224 | 225 | # The projection shortcut should come after the first batch norm and ReLU 226 | # since it performs a 1x1 convolution. 227 | if projection_shortcut is not None: 228 | shortcut = projection_shortcut(inputs) 229 | 230 | inputs = conv2d_fixed_padding( 231 | inputs=inputs, filters=filters, kernel_size=3, strides=strides, 232 | data_format=data_format) 233 | 234 | inputs = batch_norm_relu(inputs, is_training, data_format) 235 | inputs = conv2d_fixed_padding( 236 | inputs=inputs, filters=filters, kernel_size=3, strides=1, 237 | data_format=data_format) 238 | 239 | squeeze = GlobalAvgPooling(inputs, data_format) 240 | squeeze = FullyConnected(squeeze, filters // 4, nl=tf.nn.relu, name='fc1') 241 | squeeze = FullyConnected(squeeze, filters, nl=tf.nn.sigmoid, name='fc2') 242 | 243 | if data_format == 'channels_first': 244 | inputs = inputs * tf.reshape(squeeze, [-1, filters, 1, 1]) 245 | else: 246 | inputs = inputs * tf.reshape(squeeze, [-1, 1, 1, filters]) 247 | return inputs + shortcut 248 | 249 | 250 | def bottleneck_block(inputs, filters, is_training, projection_shortcut, 251 | strides, data_format): 252 | """Bottleneck block variant for residual networks with BN before convolutions. 253 | 254 | Args: 255 | inputs: A tensor of size [batch, channels, height_in, width_in] or 256 | [batch, height_in, width_in, channels] depending on data_format. 257 | filters: The number of filters for the first two convolutions. Note that the 258 | third and final convolution will use 4 times as many filters. 259 | is_training: A Boolean for whether the model is in training or inference 260 | mode. Needed for batch normalization. 261 | projection_shortcut: The function to use for projection shortcuts (typically 262 | a 1x1 convolution when downsampling the input). 263 | strides: The block's stride. If greater than 1, this block will ultimately 264 | downsample the input. 265 | data_format: The input format ('channels_last' or 'channels_first'). 266 | 267 | Returns: 268 | The output tensor of the block. 269 | """ 270 | shortcut = inputs 271 | inputs = batch_norm_relu(inputs, is_training, data_format) 272 | 273 | # The projection shortcut should come after the first batch norm and ReLU 274 | # since it performs a 1x1 convolution. 275 | if projection_shortcut is not None: 276 | shortcut = projection_shortcut(inputs) 277 | 278 | inputs = conv2d_fixed_padding( 279 | inputs=inputs, filters=filters, kernel_size=1, strides=1, 280 | data_format=data_format) 281 | 282 | inputs = batch_norm_relu(inputs, is_training, data_format) 283 | inputs = conv2d_fixed_padding( 284 | inputs=inputs, filters=filters, kernel_size=3, strides=strides, 285 | data_format=data_format) 286 | 287 | inputs = batch_norm_relu(inputs, is_training, data_format) 288 | inputs = conv2d_fixed_padding( 289 | inputs=inputs, filters=4 * filters, kernel_size=1, strides=1, 290 | data_format=data_format) 291 | 292 | return inputs + shortcut 293 | 294 | 295 | def se_bottleneck_block(inputs, filters, is_training, projection_shortcut, 296 | strides, data_format): 297 | """Bottleneck block variant for residual networks with BN before convolutions. 298 | 299 | Args: 300 | inputs: A tensor of size [batch, channels, height_in, width_in] or 301 | [batch, height_in, width_in, channels] depending on data_format. 302 | filters: The number of filters for the first two convolutions. Note that the 303 | third and final convolution will use 4 times as many filters. 304 | is_training: A Boolean for whether the model is in training or inference 305 | mode. Needed for batch normalization. 306 | projection_shortcut: The function to use for projection shortcuts (typically 307 | a 1x1 convolution when downsampling the input). 308 | strides: The block's stride. If greater than 1, this block will ultimately 309 | downsample the input. 310 | data_format: The input format ('channels_last' or 'channels_first'). 311 | 312 | Returns: 313 | The output tensor of the block. 314 | """ 315 | shortcut = inputs 316 | inputs = batch_norm_relu(inputs, is_training, data_format) 317 | 318 | # The projection shortcut should come after the first batch norm and ReLU 319 | # since it performs a 1x1 convolution. 320 | if projection_shortcut is not None: 321 | shortcut = projection_shortcut(inputs) 322 | 323 | inputs = conv2d_fixed_padding( 324 | inputs=inputs, filters=filters, kernel_size=1, strides=1, 325 | data_format=data_format) 326 | 327 | inputs = batch_norm_relu(inputs, is_training, data_format) 328 | inputs = conv2d_fixed_padding( 329 | inputs=inputs, filters=filters, kernel_size=3, strides=strides, 330 | data_format=data_format) 331 | 332 | inputs = batch_norm_relu(inputs, is_training, data_format) 333 | inputs = conv2d_fixed_padding( 334 | inputs=inputs, filters=4 * filters, kernel_size=1, strides=1, 335 | data_format=data_format) 336 | 337 | squeeze = GlobalAvgPooling(inputs, data_format) 338 | squeeze = FullyConnected(squeeze, filters // 4, nl=tf.nn.relu, name='fc1') 339 | squeeze = FullyConnected(squeeze, filters * 4, nl=tf.nn.sigmoid, name='fc2') 340 | if data_format == 'channels_first': 341 | inputs = inputs * tf.reshape(squeeze, [-1, filters * 4, 1, 1]) 342 | else: 343 | inputs = inputs * tf.reshape(squeeze, [-1, 1, 1, filters * 4]) 344 | 345 | return inputs + shortcut 346 | 347 | 348 | def block_layer(inputs, filters, block_fn, blocks, strides, is_training, name, 349 | data_format): 350 | """Creates one layer of blocks for the ResNet model. 351 | 352 | Args: 353 | inputs: A tensor of size [batch, channels, height_in, width_in] or 354 | [batch, height_in, width_in, channels] depending on data_format. 355 | filters: The number of filters for the first convolution of the layer. 356 | block_fn: The block to use within the model, either `building_block` or 357 | `bottleneck_block`. 358 | blocks: The number of blocks contained in the layer. 359 | strides: The stride to use for the first convolution of the layer. If 360 | greater than 1, this layer will ultimately downsample the input. 361 | is_training: Either True or False, whether we are currently training the 362 | model. Needed for batch norm. 363 | name: A string name for the tensor output of the block layer. 364 | data_format: The input format ('channels_last' or 'channels_first'). 365 | 366 | Returns: 367 | The output tensor of the block layer. 368 | """ 369 | # Bottleneck blocks end with 4x the number of filters as they start with 370 | filters_out = 4 * filters if block_fn in [bottleneck_block, se_bottleneck_block] else filters 371 | 372 | def projection_shortcut(inputs): 373 | return conv2d_fixed_padding( 374 | inputs=inputs, filters=filters_out, kernel_size=1, strides=strides, 375 | data_format=data_format) 376 | 377 | # Only the first block per block_layer uses projection_shortcut and strides 378 | inputs = block_fn(inputs, filters, is_training, projection_shortcut, strides, 379 | data_format) 380 | 381 | for _ in range(1, blocks): 382 | inputs = block_fn(inputs, filters, is_training, None, 1, data_format) 383 | 384 | return tf.identity(inputs, name) 385 | 386 | 387 | def cifar10_resnet_v2_generator(resnet_size, num_classes, data_format=None): 388 | """Generator for CIFAR-10 ResNet v2 models. 389 | 390 | Args: 391 | resnet_size: A single integer for the size of the ResNet model. 392 | num_classes: The number of possible classes for image classification. 393 | data_format: The input format ('channels_last', 'channels_first', or None). 394 | If set to None, the format is dependent on whether a GPU is available. 395 | 396 | Returns: 397 | The model function that takes in `inputs` and `is_training` and 398 | returns the output tensor of the ResNet model. 399 | 400 | Raises: 401 | ValueError: If `resnet_size` is invalid. 402 | """ 403 | if resnet_size % 6 != 2: 404 | raise ValueError('resnet_size must be 6n + 2:', resnet_size) 405 | 406 | num_blocks = (resnet_size - 2) // 6 407 | 408 | if data_format is None: 409 | data_format = ( 410 | 'channels_first' if tf.test.is_built_with_cuda() else 'channels_last') 411 | 412 | def model(inputs, is_training): 413 | """Constructs the ResNet model given the inputs.""" 414 | if data_format == 'channels_first': 415 | # Convert from channels_last (NHWC) to channels_first (NCHW). This 416 | # provides a large performance boost on GPU. See 417 | # https://www.tensorflow.org/performance/performance_guide#data_formats 418 | inputs = tf.transpose(inputs, [0, 3, 1, 2]) 419 | 420 | inputs = conv2d_fixed_padding( 421 | inputs=inputs, filters=16, kernel_size=3, strides=1, 422 | data_format=data_format) 423 | inputs = tf.identity(inputs, 'initial_conv') 424 | 425 | inputs = block_layer( 426 | inputs=inputs, filters=16, block_fn=building_block, blocks=num_blocks, 427 | strides=1, is_training=is_training, name='block_layer1', 428 | data_format=data_format) 429 | inputs = block_layer( 430 | inputs=inputs, filters=32, block_fn=building_block, blocks=num_blocks, 431 | strides=2, is_training=is_training, name='block_layer2', 432 | data_format=data_format) 433 | inputs = block_layer( 434 | inputs=inputs, filters=64, block_fn=building_block, blocks=num_blocks, 435 | strides=2, is_training=is_training, name='block_layer3', 436 | data_format=data_format) 437 | 438 | inputs = batch_norm_relu(inputs, is_training, data_format) 439 | inputs = tf.layers.average_pooling2d( 440 | inputs=inputs, pool_size=8, strides=1, padding='VALID', 441 | data_format=data_format) 442 | inputs = tf.identity(inputs, 'final_avg_pool') 443 | inputs = tf.reshape(inputs, [-1, 64]) 444 | inputs = tf.layers.dense(inputs=inputs, units=num_classes) 445 | inputs = tf.identity(inputs, 'final_dense') 446 | return inputs 447 | 448 | return model 449 | 450 | 451 | def imagenet_resnet_v2_generator(block_fn, layers, num_classes, 452 | data_format=None): 453 | """Generator for ImageNet ResNet v2 models. 454 | 455 | Args: 456 | block_fn: The block to use within the model, either `building_block` or 457 | `bottleneck_block`. 458 | layers: A length-4 array denoting the number of blocks to include in each 459 | layer. Each layer consists of blocks that take inputs of the same size. 460 | num_classes: The number of possible classes for image classification. 461 | data_format: The input format ('channels_last', 'channels_first', or None). 462 | If set to None, the format is dependent on whether a GPU is available. 463 | 464 | Returns: 465 | The model function that takes in `inputs` and `is_training` and 466 | returns the output tensor of the ResNet model. 467 | """ 468 | if data_format is None: 469 | data_format = ( 470 | 'channels_first' if tf.test.is_built_with_cuda() else 'channels_last') 471 | 472 | def model(inputs, is_training): 473 | """Constructs the ResNet model given the inputs.""" 474 | if data_format == 'channels_first': 475 | # Convert from channels_last (NHWC) to channels_first (NCHW). This 476 | # provides a large performance boost on GPU. 477 | inputs = tf.transpose(inputs, [0, 3, 1, 2]) 478 | 479 | inputs = conv2d_fixed_padding( 480 | inputs=inputs, filters=64, kernel_size=7, strides=2, 481 | data_format=data_format) 482 | inputs = tf.identity(inputs, 'initial_conv') 483 | inputs = tf.layers.max_pooling2d( 484 | inputs=inputs, pool_size=3, strides=2, padding='SAME', 485 | data_format=data_format) 486 | inputs = tf.identity(inputs, 'initial_max_pool') 487 | 488 | inputs = block_layer( 489 | inputs=inputs, filters=64, block_fn=block_fn, blocks=layers[0], 490 | strides=1, is_training=is_training, name='block_layer1', 491 | data_format=data_format) 492 | inputs = block_layer( 493 | inputs=inputs, filters=128, block_fn=block_fn, blocks=layers[1], 494 | strides=2, is_training=is_training, name='block_layer2', 495 | data_format=data_format) 496 | inputs = block_layer( 497 | inputs=inputs, filters=256, block_fn=block_fn, blocks=layers[2], 498 | strides=2, is_training=is_training, name='block_layer3', 499 | data_format=data_format) 500 | inputs = block_layer( 501 | inputs=inputs, filters=512, block_fn=block_fn, blocks=layers[3], 502 | strides=2, is_training=is_training, name='block_layer4', 503 | data_format=data_format) 504 | inputs = batch_norm_relu(inputs, is_training, data_format) 505 | inputs = tf.layers.average_pooling2d( 506 | inputs=inputs, pool_size=7, strides=1, padding='VALID', 507 | data_format=data_format) 508 | inputs = tf.identity(inputs, 'final_avg_pool') 509 | inputs = tf.reshape(inputs, 510 | [-1, 512 if block_fn in [building_block, se_building_block] else 2048]) 511 | # inputs = tf.layers.dense(inputs=inputs, units=num_classes) 512 | # inputs = tf.identity(inputs, 'final_dense') 513 | return inputs 514 | 515 | return model 516 | 517 | 518 | def imagenet_resnet_v2(resnet_size, num_classes, mode='v2', data_format=None): 519 | """Returns the ResNet model for a given size and number of output classes.""" 520 | building_block_mode = { 521 | 'v2': building_block, 522 | 'se': se_building_block}[mode] 523 | bottleneck_block_mode = { 524 | 'v2': bottleneck_block, 525 | 'se': se_bottleneck_block}[mode] 526 | 527 | model_params = { 528 | 18: {'block': building_block_mode, 'layers': [2, 2, 2, 2]}, 529 | 34: {'block': building_block_mode, 'layers': [3, 4, 6, 3]}, 530 | 50: {'block': bottleneck_block_mode, 'layers': [3, 4, 6, 3]}, 531 | 101: {'block': bottleneck_block_mode, 'layers': [3, 4, 23, 3]}, 532 | 152: {'block': bottleneck_block_mode, 'layers': [3, 8, 36, 3]}, 533 | 200: {'block': bottleneck_block_mode, 'layers': [3, 24, 36, 3]} 534 | } 535 | 536 | if resnet_size not in model_params: 537 | raise ValueError('Not a valid resnet_size:', resnet_size) 538 | 539 | params = model_params[resnet_size] 540 | return imagenet_resnet_v2_generator( 541 | params['block'], params['layers'], num_classes, data_format) 542 | --------------------------------------------------------------------------------