├── .vscode
└── settings.json
├── assests
├── ratio.JPG
├── resnet.jpg
├── benefit.JPG
├── inception.jpg
├── senet_block.JPG
├── incorporation.JPG
└── state_of_art.JPG
├── data_dump.py
├── LICENSE
├── .gitignore
├── README.md
├── scene_eval.py
├── dataflow_input.py
├── cifar10.py
├── eval.py
├── SE_ResNeXt.py
├── pre_train.py
├── train.py
├── SE_Inception_v4.py
├── SE_Inception_resnet_v2.py
└── resnet_model.py
/.vscode/settings.json:
--------------------------------------------------------------------------------
1 | {
2 | "python.linting.pylintEnabled": false
3 | }
--------------------------------------------------------------------------------
/assests/ratio.JPG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chenynCV/SENet-Tensorflow/HEAD/assests/ratio.JPG
--------------------------------------------------------------------------------
/assests/resnet.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chenynCV/SENet-Tensorflow/HEAD/assests/resnet.jpg
--------------------------------------------------------------------------------
/assests/benefit.JPG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chenynCV/SENet-Tensorflow/HEAD/assests/benefit.JPG
--------------------------------------------------------------------------------
/assests/inception.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chenynCV/SENet-Tensorflow/HEAD/assests/inception.jpg
--------------------------------------------------------------------------------
/assests/senet_block.JPG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chenynCV/SENet-Tensorflow/HEAD/assests/senet_block.JPG
--------------------------------------------------------------------------------
/assests/incorporation.JPG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chenynCV/SENet-Tensorflow/HEAD/assests/incorporation.JPG
--------------------------------------------------------------------------------
/assests/state_of_art.JPG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chenynCV/SENet-Tensorflow/HEAD/assests/state_of_art.JPG
--------------------------------------------------------------------------------
/data_dump.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import json
3 | import os
4 | from IPython import embed
5 |
6 | label_path = '/data0/AIChallenger/places_devkit/categories_places365.txt'
7 | data_path = '/data0/AIChallenger/data_256'
8 |
9 | result = []
10 | with open(label_path, 'r') as f:
11 | lines = (line.strip() for line in f)
12 | for line in lines:
13 | path, label_id = line.split()
14 | path = path[1:]
15 | for filename in os.listdir(os.path.join(data_path, path)):
16 | image = {}
17 | image['image_id'] = os.path.join(path, filename)
18 | image['label_id'] = label_id
19 | result.append(image)
20 |
21 | with open('/data0/AIChallenger/data_256.json', 'w') as f:
22 | json.dump(result, f)
23 | print('write result json, num is %d' % len(result))
24 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2017 Junho Kim (1993.01.12)
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *model*/
2 | *log*/
3 |
4 | # Byte-compiled / optimized / DLL files
5 | __pycache__/
6 | *.py[cod]
7 | *$py.class
8 |
9 | # C extensions
10 | *.so
11 |
12 | # Distribution / packaging
13 | .Python
14 | env/
15 | build/
16 | develop-eggs/
17 | dist/
18 | downloads/
19 | eggs/
20 | .eggs/
21 | lib/
22 | lib64/
23 | parts/
24 | sdist/
25 | var/
26 | wheels/
27 | *.egg-info/
28 | .installed.cfg
29 | *.egg
30 |
31 | # PyInstaller
32 | # Usually these files are written by a python script from a template
33 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
34 | *.manifest
35 | *.spec
36 |
37 | # Installer logs
38 | pip-log.txt
39 | pip-delete-this-directory.txt
40 |
41 | # Unit test / coverage reports
42 | htmlcov/
43 | .tox/
44 | .coverage
45 | .coverage.*
46 | .cache
47 | nosetests.xml
48 | coverage.xml
49 | *.cover
50 | .hypothesis/
51 |
52 | # Translations
53 | *.mo
54 | *.pot
55 |
56 | # Django stuff:
57 | *.log
58 | local_settings.py
59 |
60 | # Flask stuff:
61 | instance/
62 | .webassets-cache
63 |
64 | # Scrapy stuff:
65 | .scrapy
66 |
67 | # Sphinx documentation
68 | docs/_build/
69 |
70 | # PyBuilder
71 | target/
72 |
73 | # Jupyter Notebook
74 | .ipynb_checkpoints
75 |
76 | # pyenv
77 | .python-version
78 |
79 | # celery beat schedule file
80 | celerybeat-schedule
81 |
82 | # SageMath parsed files
83 | *.sage.py
84 |
85 | # dotenv
86 | .env
87 |
88 | # virtualenv
89 | .venv
90 | venv/
91 | ENV/
92 |
93 | # Spyder project settings
94 | .spyderproject
95 | .spyproject
96 |
97 | # Rope project settings
98 | .ropeproject
99 |
100 | # mkdocs documentation
101 | /site
102 |
103 | # mypy
104 | .mypy_cache/
105 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # SENet-Tensorflow
2 | Simple Tensorflow implementation of [Squeeze Excitation Networks](https://arxiv.org/abs/1709.01507) using **Cifar10**
3 |
4 | I implemented the following SENet
5 | * [ResNeXt paper](https://arxiv.org/abs/1611.05431)
6 | * [Inception-v4, Inception-resnet-v2 paper](https://arxiv.org/abs/1602.07261)
7 |
8 | If you want to see the ***original author's code***, please refer to this [link](https://github.com/hujie-frank/SENet)
9 |
10 |
11 |
12 | ## Requirements
13 | * Tensorflow 1.x
14 | * Python 3.x
15 | * tflearn (If you are easy to use ***global average pooling***, you should install ***tflearn***)
16 |
17 | ## Issue
18 | ### Image_size
19 | * In paper, experimented with *ImageNet*
20 | * However, due to **image size** issues in ***Inception network***, so I used ***zero padding*** for the Cifar10
21 | ```python
22 | input_x = tf.pad(input_x, [[0, 0], [32, 32], [32, 32], [0, 0]]) # size 32x32 -> 96x96
23 | ```
24 | ### NOT ENOUGH GPU Memory
25 | * If not enough GPU memory, Please edit the code
26 | ```python
27 | with tf.Session() as sess : NO
28 | with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess : OK
29 | ```
30 |
31 | ## Idea
32 | ### What is the "SE block" ?
33 | 
34 | ```python
35 | def Squeeze_excitation_layer(self, input_x, out_dim, ratio, layer_name):
36 | with tf.name_scope(layer_name) :
37 | squeeze = Global_Average_Pooling(input_x)
38 |
39 | excitation = Fully_connected(squeeze, units=out_dim / ratio, layer_name=layer_name+'_fully_connected1')
40 | excitation = Relu(excitation)
41 | excitation = Fully_connected(excitation, units=out_dim, layer_name=layer_name+'_fully_connected2')
42 | excitation = Sigmoid(excitation)
43 |
44 | excitation = tf.reshape(excitation, [-1,1,1,out_dim])
45 |
46 | scale = input_x * excitation
47 |
48 | return scale
49 | ```
50 |
51 | ### How apply ? (Inception, Residual)
52 |
53 |

54 |

55 |
56 |
57 | ### How *"Reduction ratio"* should I set?
58 | 
59 | * **original** refers to ***ResNet-50***
60 |
61 | ## ImageNet Results
62 | ### Benefits against Network Depth
63 | 
64 |
65 | ### Incorporation with Modern Architecture
66 | 
67 |
68 | ### Comparison with State-of-the-art
69 | 
70 |
71 | ## Cifar10 Results
72 | Will be soon
73 |
74 | ## Related works
75 | * [Densenet-Tensorflow](https://github.com/taki0112/Densenet-Tensorflow)
76 | * [ResNeXt-Tensorflow](https://github.com/taki0112/ResNeXt-Tensorflow)
77 |
78 | ## Reference
79 | * [Inception_korean](https://norman3.github.io/papers/docs/google_inception.html)
80 |
81 | ## Author
82 | Junho Kim
83 |
--------------------------------------------------------------------------------
/scene_eval.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # coding=utf-8
3 | # Copyright 2017 challenger.ai
4 | #
5 | # Licensed under the Apache License, Version 2.0 (the "License");
6 | # you may not use this file except in compliance with the License.
7 | # You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 |
17 | """
18 | Scene classification is a task of AI Challenger 全球AI挑战赛
19 |
20 | This python script is used for calculating the accuracy of the test result,
21 |
22 | based on your submited file and the reference file containing ground truth.
23 |
24 | Usage:
25 |
26 | python scene_eval.py --submit SUBMIT_FILEPATH --ref REF_FILEPATH
27 |
28 | A test case is provided, submited file is submit.json, reference file is ref.json, test it by:
29 |
30 | python scene_eval.py --submit ./submit.json --ref ./ref.json
31 |
32 | The accuracy of the submited result, error message and warning message will be printed.
33 | """
34 |
35 | import json
36 | import argparse
37 | import time
38 |
39 |
40 | def __load_data(submit_file, reference_file):
41 | # load submit result and reference result
42 |
43 | with open(submit_file, 'r') as file1:
44 | submit_data = json.load(file1)
45 | with open(reference_file, 'r') as file1:
46 | ref_data = json.load(file1)
47 | if len(submit_data) != len(ref_data):
48 | result['warning'].append('Inconsistent number of images between submission and reference data \n')
49 | submit_dict = {}
50 | ref_dict = {}
51 | for item in submit_data:
52 | submit_dict[item['image_id']] = item['label_id']
53 | for item in ref_data:
54 | ref_dict[item['image_id']] = int(item['label_id'])
55 | return submit_dict, ref_dict
56 |
57 |
58 | def __eval_result(submit_dict, ref_dict):
59 | # eval accuracy
60 |
61 | right_count = 0
62 | for (key, value) in ref_dict.items():
63 |
64 | if key not in set(submit_dict.keys()):
65 | result['warning'].append('lacking image %s in your submission file \n' % key)
66 | print('warnning: lacking image %s in your submission file' % key)
67 | continue
68 |
69 | if value in submit_dict[key][:3]:
70 | right_count += 1
71 |
72 | result['score'] = str(float(right_count)/max(len(ref_dict), 1e-5))
73 | return result
74 |
75 |
76 | if __name__ == '__main__':
77 |
78 | PARSER = argparse.ArgumentParser()
79 |
80 | PARSER.add_argument(
81 | '--submit',
82 | type=str,
83 | default='./submit.json',
84 | help="""\
85 | Path to submission file\
86 | """
87 | )
88 |
89 | PARSER.add_argument(
90 | '--ref',
91 | type=str,
92 | default='./ref.json',
93 | help="""\
94 | Path to reference file\
95 | """
96 | )
97 |
98 | FLAGS = PARSER.parse_args()
99 |
100 | result = {'error': [], 'warning': [], 'score': None}
101 |
102 | START_TIME = time.time()
103 | SUBMIT = {}
104 | REF = {}
105 |
106 | try:
107 | SUBMIT, REF = __load_data(FLAGS.submit, FLAGS.ref)
108 | except Exception as error:
109 | result['error'].append(str(error))
110 | try:
111 | result = __eval_result(SUBMIT, REF)
112 | except Exception as error:
113 | result['error'].append(str(error))
114 | print('Evaluation time of your result: %f s' % (time.time() - START_TIME))
115 |
116 | print(result)
117 |
--------------------------------------------------------------------------------
/dataflow_input.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import cv2
3 | import json
4 | import os
5 | import random
6 | from tensorpack import imgaug, dataset, ModelDesc, InputDesc
7 | from tensorpack.dataflow import (
8 | AugmentImageComponent, PrefetchDataZMQ,
9 | BatchData, MultiThreadMapData, DataFlow)
10 | from IPython import embed
11 |
12 | class GoogleNetResize(imgaug.ImageAugmentor):
13 | """
14 | crop 8%~100% of the original image
15 | See `Going Deeper with Convolutions` by Google.
16 | """
17 | def __init__(self, crop_area_fraction=0.08,
18 | aspect_ratio_low=0.75, aspect_ratio_high=1.333,
19 | target_shape=224):
20 | self._init(locals())
21 |
22 | def _augment(self, img, _):
23 | h, w = img.shape[:2]
24 | area = h * w
25 | for _ in range(10):
26 | targetArea = self.rng.uniform(self.crop_area_fraction, 1.0) * area
27 | aspectR = self.rng.uniform(self.aspect_ratio_low, self.aspect_ratio_high)
28 | ww = int(np.sqrt(targetArea * aspectR) + 0.5)
29 | hh = int(np.sqrt(targetArea / aspectR) + 0.5)
30 | if self.rng.uniform() < 0.5:
31 | ww, hh = hh, ww
32 | if hh <= h and ww <= w:
33 | x1 = 0 if w == ww else self.rng.randint(0, w - ww)
34 | y1 = 0 if h == hh else self.rng.randint(0, h - hh)
35 | out = img[y1:y1 + hh, x1:x1 + ww]
36 | out = cv2.resize(out, (self.target_shape, self.target_shape), interpolation=cv2.INTER_CUBIC)
37 | return out
38 | out = imgaug.ResizeShortestEdge(self.target_shape, interp=cv2.INTER_CUBIC).augment(img)
39 | out = imgaug.CenterCrop(self.target_shape).augment(out)
40 | return out
41 |
42 | def fbresnet_augmentor(isTrain, target_shape=224):
43 | """
44 | Augmentor used in fb.resnet.torch, for BGR images in range [0,255].
45 | """
46 | if isTrain:
47 | augmentors = [
48 | GoogleNetResize(crop_area_fraction=0.32, target_shape=target_shape),
49 | # GoogleNetResize(target_shape=target_shape),
50 | imgaug.RandomOrderAug(
51 | [# imgaug.BrightnessScale((0.6, 1.4), clip=False),
52 | # imgaug.Contrast((0.6, 1.4), clip=False),
53 | # imgaug.Saturation(0.4, rgb=False),
54 | # rgb-bgr conversion for the constants copied from fb.resnet.torch
55 | imgaug.Lighting(0.1,
56 | eigval=np.asarray(
57 | [0.2175, 0.0188, 0.0045][::-1]) * 255.0,
58 | eigvec=np.array(
59 | [[-0.5675, 0.7192, 0.4009],
60 | [-0.5808, -0.0045, -0.8140],
61 | [-0.5836, -0.6948, 0.4203]],
62 | dtype='float32')[::-1, ::-1]
63 | )]),
64 | imgaug.Flip(horiz=True),
65 | ]
66 | else:
67 | augmentors = [
68 | imgaug.ResizeShortestEdge(int(256 / 224 * target_shape), cv2.INTER_CUBIC),
69 | imgaug.CenterCrop((target_shape, target_shape)),
70 | ]
71 | return augmentors
72 |
73 | def data_augmentation(im, augmentors):
74 | """
75 | See explanations in the tutorial:
76 | http://tensorpack.readthedocs.io/en/latest/tutorial/efficient-dataflow.html
77 | """
78 | assert isinstance(augmentors, list)
79 | aug = imgaug.AugmentorList(augmentors)
80 | im = aug.augment(im)
81 | return im
82 |
83 | class MyDataFlow(DataFlow):
84 | def __init__(self, image_path, label_path, is_training=True, batch_size=64, img_size=224):
85 | # get all the image name and its label
86 | self.data_dict = {}
87 | with open(label_path, 'r') as f:
88 | label_list = json.load(f)
89 | for image in label_list:
90 | self.data_dict[image['image_id']] = int(image['label_id'])
91 | self.img_name = list(self.data_dict.keys())
92 | self.image_path = image_path
93 | self.is_training = is_training
94 | self.batch_size = batch_size
95 | self.img_size = img_size
96 | self.augmentors = fbresnet_augmentor(isTrain=is_training, target_shape=img_size)
97 |
98 | def get_data(self):
99 | np.random.seed()
100 | img_batch = np.random.choice(self.img_name, self.batch_size)
101 | img_data = []
102 | img_label = []
103 | for item in img_batch:
104 | im = cv2.imread(os.path.join(self.image_path, item), cv2.IMREAD_COLOR)
105 | im = data_augmentation(im, self.augmentors)
106 | img_data.append(im)
107 | img_label.append(self.data_dict[item])
108 | yield {'data': np.array(img_data), 'label': np.array(img_label)}
109 |
110 |
111 | class MyDataFlowEval(DataFlow):
112 | def __init__(self, image_path, label_path, img_size=224):
113 | # get all the image name and its label
114 | self.data_dict = {}
115 | with open(label_path, 'r') as f:
116 | label_list = json.load(f)
117 | for image in label_list:
118 | self.data_dict[image['image_id']] = int(image['label_id'])
119 | self.img_name = list(self.data_dict.keys())
120 | self.image_path = image_path
121 | self.img_size = img_size
122 | self.Length = len(self.data_dict)
123 | self.augmentors = fbresnet_augmentor(isTrain=False, target_shape=img_size)
124 |
125 | def get_data(self):
126 | for index, item in enumerate(self.img_name):
127 | im = cv2.imread(os.path.join(self.image_path, item), cv2.IMREAD_COLOR)
128 | im = data_augmentation(im, self.augmentors)
129 | label = self.data_dict[item]
130 | yield {
131 | 'name': item,
132 | 'data': np.expand_dims(np.array(im), axis=0),
133 | 'label': np.array(label),
134 | 'epoch': (index+1) == self.Length
135 | }
--------------------------------------------------------------------------------
/cifar10.py:
--------------------------------------------------------------------------------
1 | # -*- coding:utf-8 -*-
2 |
3 | import os
4 | import sys
5 | import time
6 | import pickle
7 | import random
8 | import numpy as np
9 |
10 | class_num = 10
11 | image_size = 32
12 | img_channels = 3
13 |
14 |
15 | # ========================================================== #
16 | # ├─ prepare_data()
17 | # ├─ download training data if not exist by download_data()
18 | # ├─ load data by load_data()
19 | # └─ shuffe and return data
20 | # ========================================================== #
21 |
22 |
23 |
24 | def download_data():
25 | dirname = 'cifar-10-batches-py'
26 | origin = 'http://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz'
27 | fname = 'cifar-10-python.tar.gz'
28 | fpath = './' + dirname
29 |
30 | download = False
31 | if os.path.exists(fpath) or os.path.isfile(fname):
32 | download = False
33 | print("DataSet aready exist!")
34 | else:
35 | download = True
36 | if download:
37 | print('Downloading data from', origin)
38 | import urllib.request
39 | import tarfile
40 |
41 | def reporthook(count, block_size, total_size):
42 | global start_time
43 | if count == 0:
44 | start_time = time.time()
45 | return
46 | duration = time.time() - start_time
47 | progress_size = int(count * block_size)
48 | speed = int(progress_size / (1024 * duration))
49 | percent = min(int(count * block_size * 100 / total_size), 100)
50 | sys.stdout.write("\r...%d%%, %d MB, %d KB/s, %d seconds passed" %
51 | (percent, progress_size / (1024 * 1024), speed, duration))
52 | sys.stdout.flush()
53 |
54 | urllib.request.urlretrieve(origin, fname, reporthook)
55 | print('Download finished. Start extract!', origin)
56 | if (fname.endswith("tar.gz")):
57 | tar = tarfile.open(fname, "r:gz")
58 | tar.extractall()
59 | tar.close()
60 | elif (fname.endswith("tar")):
61 | tar = tarfile.open(fname, "r:")
62 | tar.extractall()
63 | tar.close()
64 |
65 |
66 | def unpickle(file):
67 | with open(file, 'rb') as fo:
68 | dict = pickle.load(fo, encoding='bytes')
69 | return dict
70 |
71 |
72 | def load_data_one(file):
73 | batch = unpickle(file)
74 | data = batch[b'data']
75 | labels = batch[b'labels']
76 | print("Loading %s : %d." % (file, len(data)))
77 | return data, labels
78 |
79 |
80 | def load_data(files, data_dir, label_count):
81 | global image_size, img_channels
82 | data, labels = load_data_one(data_dir + '/' + files[0])
83 | for f in files[1:]:
84 | data_n, labels_n = load_data_one(data_dir + '/' + f)
85 | data = np.append(data, data_n, axis=0)
86 | labels = np.append(labels, labels_n, axis=0)
87 | labels = np.array([[float(i == label) for i in range(label_count)] for label in labels])
88 | data = data.reshape([-1, img_channels, image_size, image_size])
89 | data = data.transpose([0, 2, 3, 1])
90 | return data, labels
91 |
92 |
93 | def prepare_data():
94 | print("======Loading data======")
95 | download_data()
96 | data_dir = './cifar-10-batches-py'
97 | image_dim = image_size * image_size * img_channels
98 | meta = unpickle(data_dir + '/batches.meta')
99 |
100 | label_names = meta[b'label_names']
101 | label_count = len(label_names)
102 | train_files = ['data_batch_%d' % d for d in range(1, 6)]
103 | train_data, train_labels = load_data(train_files, data_dir, label_count)
104 | test_data, test_labels = load_data(['test_batch'], data_dir, label_count)
105 |
106 | print("Train data:", np.shape(train_data), np.shape(train_labels))
107 | print("Test data :", np.shape(test_data), np.shape(test_labels))
108 | print("======Load finished======")
109 |
110 | print("======Shuffling data======")
111 | indices = np.random.permutation(len(train_data))
112 | train_data = train_data[indices]
113 | train_labels = train_labels[indices]
114 | print("======Prepare Finished======")
115 |
116 | return train_data, train_labels, test_data, test_labels
117 |
118 |
119 | # ========================================================== #
120 | # ├─ _random_crop()
121 | # ├─ _random_flip_leftright()
122 | # ├─ data_augmentation()
123 | # └─ color_preprocessing()
124 | # ========================================================== #
125 |
126 | def _random_crop(batch, crop_shape, padding=None):
127 | oshape = np.shape(batch[0])
128 |
129 | if padding:
130 | oshape = (oshape[0] + 2 * padding, oshape[1] + 2 * padding)
131 | new_batch = []
132 | npad = ((padding, padding), (padding, padding), (0, 0))
133 | for i in range(len(batch)):
134 | new_batch.append(batch[i])
135 | if padding:
136 | new_batch[i] = np.lib.pad(batch[i], pad_width=npad,
137 | mode='constant', constant_values=0)
138 | nh = random.randint(0, oshape[0] - crop_shape[0])
139 | nw = random.randint(0, oshape[1] - crop_shape[1])
140 | new_batch[i] = new_batch[i][nh:nh + crop_shape[0],
141 | nw:nw + crop_shape[1]]
142 | return new_batch
143 |
144 |
145 | def _random_flip_leftright(batch):
146 | for i in range(len(batch)):
147 | if bool(random.getrandbits(1)):
148 | batch[i] = np.fliplr(batch[i])
149 | return batch
150 |
151 |
152 | def color_preprocessing(x_train, x_test):
153 | x_train = x_train.astype('float32')
154 | x_test = x_test.astype('float32')
155 | x_train[:, :, :, 0] = (x_train[:, :, :, 0] - np.mean(x_train[:, :, :, 0])) / np.std(x_train[:, :, :, 0])
156 | x_train[:, :, :, 1] = (x_train[:, :, :, 1] - np.mean(x_train[:, :, :, 1])) / np.std(x_train[:, :, :, 1])
157 | x_train[:, :, :, 2] = (x_train[:, :, :, 2] - np.mean(x_train[:, :, :, 2])) / np.std(x_train[:, :, :, 2])
158 |
159 | x_test[:, :, :, 0] = (x_test[:, :, :, 0] - np.mean(x_test[:, :, :, 0])) / np.std(x_test[:, :, :, 0])
160 | x_test[:, :, :, 1] = (x_test[:, :, :, 1] - np.mean(x_test[:, :, :, 1])) / np.std(x_test[:, :, :, 1])
161 | x_test[:, :, :, 2] = (x_test[:, :, :, 2] - np.mean(x_test[:, :, :, 2])) / np.std(x_test[:, :, :, 2])
162 |
163 | return x_train, x_test
164 |
165 |
166 | def data_augmentation(batch):
167 | batch = _random_flip_leftright(batch)
168 | batch = _random_crop(batch, [32, 32], 4)
169 | return batch
--------------------------------------------------------------------------------
/eval.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | import numpy as np
3 | import os
4 | import json
5 | from tensorpack import imgaug, dataset, ModelDesc, InputDesc
6 | from tensorpack.dataflow import (PrefetchDataZMQ, BatchData)
7 | from dataflow_input import MyDataFlowEval
8 | import resnet_model
9 | from IPython import embed
10 |
11 | os.environ['CUDA_VISIBLE_DEVICES']= '2'
12 |
13 | init_learning_rate = 0.1
14 | batch_size = 64
15 | image_size = 224
16 | img_channels = 3
17 | class_num = 80
18 |
19 | weight_decay = 1e-4
20 | momentum = 0.9
21 |
22 | total_epochs = 100
23 | iteration = 421
24 | # 128 * 421 ~ 53,879
25 | test_iteration = 10
26 |
27 | def dist_top_k(feat, centers):
28 | feat = feat[0, ]
29 | diff = centers_class - feat
30 | diff = - tf.reduce_sum(diff*diff, axis=1)
31 | _, predictions = tf.nn.top_k(diff, 3)
32 | return predictions
33 |
34 | def get_tensor_by_name(save_file, var_name):
35 | reader = tf.train.NewCheckpointReader(save_file)
36 | return reader.get_tensor(var_name)
37 |
38 | def center_loss(features, label, alfa, nrof_classes):
39 | """Center loss based on the paper "A Discriminative Feature Learning Approach for Deep Face Recognition"
40 | (http://ydwen.github.io/papers/WenECCV16.pdf)
41 | """
42 | nrof_features = features.get_shape()[1]
43 | centers = tf.get_variable('centers', [nrof_classes, nrof_features], dtype=tf.float32,
44 | initializer=tf.constant_initializer(0), trainable=False)
45 | label = tf.reshape(label, [-1])
46 | centers_batch = tf.gather(centers, label)
47 | diff = (1 - alfa) * (centers_batch - features)
48 | centers = tf.scatter_sub(centers, label, diff)
49 | # centers = tf.nn.l2_normalize(centers, 1, 1e-10, name='centers_norm')
50 | loss = tf.reduce_mean(tf.square(features - centers_batch))
51 | return loss, centers
52 |
53 | def focal_loss(onehot_labels, cls_preds,
54 | alpha=0.25, gamma=2.0, name=None, scope=None):
55 | """Compute softmax focal loss between logits and onehot labels
56 | logits and onehot_labels must have same shape [batchsize, num_classes] and
57 | the same data type (float16, 32, 64)
58 | Args:
59 | onehot_labels: Each row labels[i] must be a valid probability distribution
60 | cls_preds: Unscaled log probabilities
61 | alpha: The hyperparameter for adjusting biased samples, default is 0.25
62 | gamma: The hyperparameter for penalizing the easy labeled samples
63 | name: A name for the operation (optional)
64 | Returns:
65 | A 1-D tensor of length batch_size of same type as logits with softmax focal loss
66 | """
67 | with tf.name_scope(scope, 'focal_loss', [cls_preds, onehot_labels]) as sc:
68 | logits = tf.convert_to_tensor(cls_preds)
69 | onehot_labels = tf.convert_to_tensor(onehot_labels)
70 |
71 | precise_logits = tf.cast(logits, tf.float32) if (
72 | logits.dtype == tf.float16) else logits
73 | onehot_labels = tf.cast(onehot_labels, precise_logits.dtype)
74 | predictions = tf.nn.sigmoid(logits)
75 | predictions_pt = tf.where(tf.equal(onehot_labels, 1), predictions, 1.-predictions)
76 | # add small value to avoid 0
77 | epsilon = 1e-8
78 | alpha_t = tf.scalar_mul(alpha, tf.ones_like(onehot_labels, dtype=tf.float32))
79 | alpha_t = tf.where(tf.equal(onehot_labels, 1.0), alpha_t, 1-alpha_t)
80 | losses = tf.reduce_sum(-alpha_t * tf.pow(1. - predictions_pt, gamma) * tf.log(predictions_pt+epsilon),
81 | name=name, axis=1)
82 | return losses
83 |
84 | def Evaluate(sess):
85 | test_acc = 0.0
86 | test_loss = 0.0
87 |
88 | for it in range(test_iteration):
89 | batch_data = next(scene_data_val)
90 | test_batch_x = batch_data['data']
91 | test_batch_y = batch_data['label']
92 |
93 | test_feed_dict = {
94 | x: test_batch_x,
95 | label: test_batch_y,
96 | learning_rate: epoch_learning_rate,
97 | training_flag: False
98 | }
99 |
100 | loss_, acc_ = sess.run([Total_loss, accuracy], feed_dict=test_feed_dict)
101 |
102 | test_loss += loss_
103 | test_acc += acc_
104 |
105 | test_loss /= test_iteration # average loss
106 | test_acc /= test_iteration # average accuracy
107 |
108 | summary = tf.Summary(value=[tf.Summary.Value(tag='test_loss', simple_value=test_loss),
109 | tf.Summary.Value(tag='test_accuracy', simple_value=test_acc)])
110 |
111 | return test_acc, test_loss, summary
112 |
113 | def resnet_model_fn(inputs, training):
114 | """Our model_fn for ResNet to be used with our Estimator."""
115 |
116 | network = resnet_model.imagenet_resnet_v2(
117 | resnet_size=18, num_classes=class_num, mode='se', data_format=None)
118 | inputs= network(inputs=inputs, is_training=training)
119 | feat = tf.nn.l2_normalize(inputs, 1, 1e-10, name='feat')
120 | inputs = tf.layers.dense(inputs=inputs, units=class_num)
121 | # inputs = tf.layers.dense(inputs=feat, units=class_num)
122 | inputs = tf.identity(inputs, 'final_dense')
123 |
124 | return inputs, feat
125 |
126 | # image_size = 32, img_channels = 3, class_num = 10 in cifar10
127 | x = tf.placeholder(tf.float32, shape=[None, image_size, image_size, img_channels])
128 | label = tf.placeholder(tf.float32, shape=[None,])
129 | one_hot_labels = tf.one_hot(indices=tf.cast(label, tf.int32), depth=class_num)
130 |
131 | training_flag = tf.placeholder(tf.bool)
132 | learning_rate = tf.placeholder(tf.float32, name='learning_rate')
133 |
134 | logits, feat = resnet_model_fn(x, training=training_flag)
135 |
136 | cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=one_hot_labels, logits=logits))
137 | Focal_loss = tf.reduce_mean(focal_loss(one_hot_labels, logits, alpha=0.5))
138 | l2_loss = weight_decay * tf.add_n([tf.nn.l2_loss(v) for v in tf.trainable_variables()])
139 | Center_loss, Centers = center_loss(feat, tf.cast(label, dtype=tf.int32), 0.95, class_num)
140 | Total_loss = Focal_loss + l2_loss + Center_loss
141 |
142 | optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum=momentum, use_nesterov=True)
143 | # Batch norm requires update_ops to be added as a train_op dependency.
144 | update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
145 | with tf.control_dependencies(update_ops):
146 | train_op = optimizer.minimize(Total_loss)
147 |
148 | correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(one_hot_labels, 1))
149 | accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
150 |
151 | values, indices = tf.nn.top_k(logits, 3)
152 |
153 | val_dir = '/data0/AIChallenger/ai_challenger_scene_validation_20170908/scene_validation_images_20170908/'
154 | annotations = '/data0/AIChallenger/ai_challenger_scene_validation_20170908/scene_validation_annotations_20170908.json'
155 | # a DataFlow you implement to produce [tensor1, tensor2, ..] lists from whatever sources:
156 | df = MyDataFlowEval(val_dir, annotations, img_size=image_size)
157 | # start 3 processes to run the dataflow in parallel
158 | df = PrefetchDataZMQ(df, nr_proc=1)
159 | df.reset_state()
160 | scene_data_val = df.get_data()
161 |
162 | centers_class = np.load("centers.npy")
163 | centers_class = tf.convert_to_tensor(centers_class)
164 | indices_Center = dist_top_k(feat, centers_class)
165 |
166 | saver = tf.train.Saver(tf.global_variables())
167 |
168 | with tf.Session() as sess:
169 | ckpt = tf.train.get_checkpoint_state('./model_release')
170 | print("loading checkpoint...")
171 | saver.restore(sess, ckpt.model_checkpoint_path)
172 |
173 | result = []
174 | for it in scene_data_val:
175 | temp_dict = {}
176 | feed_dict = {x: it['data'], training_flag: False}
177 | predictions, predictions_Center = sess.run([indices, indices_Center], feed_dict=feed_dict)
178 | predictions = np.squeeze(predictions, axis=0)
179 |
180 | predictions = predictions_Center
181 |
182 | temp_dict['image_id'] = it['name']
183 | temp_dict['label_id'] = predictions.tolist()
184 | result.append(temp_dict)
185 | print('image %s is %d,%d,%d, label: %d' % (it['name'], predictions[0], predictions[1], predictions[2], it['label']))
186 | if it['epoch']:
187 | break
188 |
189 | with open('submit.json', 'w') as f:
190 | json.dump(result, f)
191 | print('write result json, num is %d' % len(result))
192 |
193 |
--------------------------------------------------------------------------------
/SE_ResNeXt.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | from tflearn.layers.conv import global_avg_pool
3 | from tensorflow.contrib.layers import batch_norm, flatten
4 | from tensorflow.contrib.framework import arg_scope
5 | import numpy as np
6 | import os
7 | from tensorpack import imgaug, dataset, ModelDesc, InputDesc
8 | from tensorpack.dataflow import (
9 | AugmentImageComponent, PrefetchDataZMQ,
10 | BatchData, MultiThreadMapData, DataFlow)
11 | from dataflow_input import (MyDataFlow, data_augmentation)
12 | from IPython import embed
13 |
14 | os.environ['CUDA_VISIBLE_DEVICES']= '3'
15 |
16 | weight_decay = 0.0005
17 | momentum = 0.9
18 |
19 | init_learning_rate = 0.1 * 5
20 | cardinality = 2 # how many split ?
21 | blocks = 3 # res_block ! (split + transition)
22 | depth = 64 # out channel
23 |
24 | """
25 | So, the total number of layers is (3*blokcs)*residual_layer_num + 2
26 | because, blocks = split(conv 2) + transition(conv 1) = 3 layer
27 | and, first conv layer 1, last dense layer 1
28 | thus, total number of layers = (3*blocks)*residual_layer_num + 2
29 | """
30 |
31 | reduction_ratio = 4
32 |
33 | total_epochs = 100
34 |
35 | batch_size = 64
36 | image_size = 224
37 | img_channels = 3
38 | class_num = 80
39 |
40 | iteration = 421
41 | # 128 * 421 ~ 53,879
42 |
43 | test_iteration = 10
44 |
45 | def conv_layer(input, filter, kernel, stride, padding='SAME', layer_name="conv"):
46 | with tf.name_scope(layer_name):
47 | network = tf.layers.conv2d(inputs=input, use_bias=False, filters=filter, kernel_size=kernel, strides=stride, padding=padding)
48 | return network
49 |
50 | def deconv_layer(input, filter, kernel, stride, padding='SAME', layer_name="deconv"):
51 | with tf.name_scope(layer_name):
52 | network = tf.layers.conv2d_transpose(inputs=input, use_bias=False, filters=filter, kernel_size=kernel, strides=stride, padding=padding)
53 | return network
54 |
55 | def Global_Average_Pooling(x):
56 | return global_avg_pool(x, name='Global_avg_pooling')
57 |
58 | def Average_pooling(x, pool_size=[2,2], stride=2, padding='SAME'):
59 | return tf.layers.average_pooling2d(inputs=x, pool_size=pool_size, strides=stride, padding=padding)
60 |
61 | def Max_pooling(x, pool_size=[3,3], stride=2, padding='VALID') :
62 | return tf.layers.max_pooling2d(inputs=x, pool_size=pool_size, strides=stride, padding=padding)
63 |
64 | def Batch_Normalization(x, training, scope):
65 | with arg_scope([batch_norm],
66 | scope=scope,
67 | updates_collections=None,
68 | decay=0.9,
69 | center=True,
70 | scale=True,
71 | zero_debias_moving_mean=True) :
72 | return tf.cond(training,
73 | lambda : batch_norm(inputs=x, is_training=training, reuse=None),
74 | lambda : batch_norm(inputs=x, is_training=training, reuse=True))
75 |
76 | def Relu(x):
77 | return tf.nn.relu(x)
78 |
79 | def Sigmoid(x) :
80 | return tf.nn.sigmoid(x)
81 |
82 | def tanh(x):
83 | return tf.tanh(x)
84 |
85 | def Concatenation(layers) :
86 | return tf.concat(layers, axis=3)
87 |
88 | def Fully_connected(x, units=class_num, layer_name='fully_connected') :
89 | with tf.name_scope(layer_name) :
90 | return tf.layers.dense(inputs=x, use_bias=False, units=units)
91 |
92 | def center_loss(features, label, alfa, nrof_classes):
93 | """Center loss based on the paper "A Discriminative Feature Learning Approach for Deep Face Recognition"
94 | (http://ydwen.github.io/papers/WenECCV16.pdf)
95 | """
96 | nrof_features = features.get_shape()[1]
97 | centers = tf.get_variable('centers', [nrof_classes, nrof_features], dtype=tf.float32,
98 | initializer=tf.constant_initializer(0), trainable=False)
99 | label = tf.reshape(label, [-1])
100 | centers_batch = tf.gather(centers, label)
101 | diff = (1 - alfa) * (centers_batch - features)
102 | centers = tf.scatter_sub(centers, label, diff)
103 | loss = tf.reduce_mean(tf.square(features - centers_batch))
104 | return loss, centers
105 |
106 | def Evaluate(sess):
107 | test_acc = 0.0
108 | test_loss = 0.0
109 |
110 | for it in range(test_iteration):
111 | batch_data = next(scene_data_val)
112 | test_batch_x = batch_data['data']
113 | test_batch_y = batch_data['label']
114 |
115 | test_feed_dict = {
116 | x: test_batch_x,
117 | label: test_batch_y,
118 | learning_rate: epoch_learning_rate,
119 | training_flag: False
120 | }
121 |
122 | loss_, acc_ = sess.run([cost, accuracy], feed_dict=test_feed_dict)
123 |
124 | test_loss += loss_
125 | test_acc += acc_
126 |
127 | test_loss /= test_iteration # average loss
128 | test_acc /= test_iteration # average accuracy
129 |
130 | summary = tf.Summary(value=[tf.Summary.Value(tag='test_loss', simple_value=test_loss),
131 | tf.Summary.Value(tag='test_accuracy', simple_value=test_acc)])
132 |
133 | return test_acc, test_loss, summary
134 |
135 | class SE_ResNeXt():
136 | def __init__(self, x, training):
137 | self.training = training
138 | self.model = self.Build_SEnet(x)
139 |
140 | def first_layer(self, x, scope):
141 | with tf.name_scope(scope) :
142 | x = conv_layer(x, filter=64, kernel=[7, 7], stride=2, layer_name=scope+'_conv1')
143 | x = Batch_Normalization(x, training=self.training, scope=scope+'_batch1')
144 | x = Relu(x)
145 | x = Max_pooling(x)
146 |
147 | return x
148 |
149 | def transform_layer(self, x, stride, scope):
150 | with tf.name_scope(scope) :
151 | x = conv_layer(x, filter=depth, kernel=[1,1], stride=1, layer_name=scope+'_conv1')
152 | x = Batch_Normalization(x, training=self.training, scope=scope+'_batch1')
153 | x = Relu(x)
154 |
155 | x = conv_layer(x, filter=depth, kernel=[3,3], stride=stride, layer_name=scope+'_conv2')
156 | x = Batch_Normalization(x, training=self.training, scope=scope+'_batch2')
157 | x = Relu(x)
158 | return x
159 |
160 | def transition_layer(self, x, out_dim, scope):
161 | with tf.name_scope(scope):
162 | x = conv_layer(x, filter=out_dim, kernel=[1,1], stride=1, layer_name=scope+'_conv1')
163 | x = Batch_Normalization(x, training=self.training, scope=scope+'_batch1')
164 | # x = Relu(x)
165 |
166 | return x
167 |
168 | def split_layer(self, input_x, stride, layer_name):
169 | with tf.name_scope(layer_name) :
170 | layers_split = list()
171 | for i in range(cardinality) :
172 | splits = self.transform_layer(input_x, stride=stride, scope=layer_name + '_splitN_' + str(i))
173 | layers_split.append(splits)
174 |
175 | return Concatenation(layers_split)
176 |
177 | def squeeze_excitation_layer(self, input_x, out_dim, ratio, layer_name):
178 | with tf.name_scope(layer_name) :
179 | squeeze = Global_Average_Pooling(input_x)
180 |
181 | excitation = Fully_connected(squeeze, units=out_dim / ratio, layer_name=layer_name+'_fully_connected1')
182 | excitation = Relu(excitation)
183 | excitation = Fully_connected(excitation, units=out_dim, layer_name=layer_name+'_fully_connected2')
184 | excitation = Sigmoid(excitation)
185 |
186 | excitation = tf.reshape(excitation, [-1,1,1,out_dim])
187 | scale = input_x * excitation
188 |
189 | return scale
190 |
191 | def residual_layer(self, input_x, out_dim, layer_num, res_block=blocks):
192 | # split + transform(bottleneck) + transition + merge
193 | # input_dim = input_x.get_shape().as_list()[-1]
194 |
195 | for i in range(res_block):
196 | input_dim = int(np.shape(input_x)[-1])
197 |
198 | if input_dim * 2 == out_dim:
199 | flag = True
200 | stride = 2
201 | channel = input_dim // 2
202 | else:
203 | flag = False
204 | stride = 1
205 | x = self.split_layer(input_x, stride=stride, layer_name='split_layer_'+layer_num+'_'+str(i))
206 | x = self.transition_layer(x, out_dim=out_dim, scope='trans_layer_'+layer_num+'_'+str(i))
207 | x = self.squeeze_excitation_layer(x, out_dim=out_dim, ratio=reduction_ratio, layer_name='squeeze_layer_'+layer_num+'_'+str(i))
208 |
209 | if flag is True :
210 | pad_input_x = Average_pooling(input_x)
211 | pad_input_x = tf.pad(pad_input_x, [[0, 0], [0, 0], [0, 0], [channel, channel]]) # [?, height, width, channel]
212 | else :
213 | pad_input_x = input_x
214 |
215 | input_x = Relu(x + pad_input_x)
216 |
217 | return x
218 |
219 | def generator(self, x, scope="generator"):
220 | with tf.variable_scope(scope):
221 | n_downsampling = 5
222 | for i in range(n_downsampling):
223 | mult = pow(2, (n_downsampling - i))
224 | x = deconv_layer(x, filter=int((32 * mult) / 2), kernel=[3, 3], stride=2, layer_name='deconv' + str(i))
225 | x = Relu(x)
226 |
227 | x = conv_layer(x, filter=3, kernel=[7,7], stride=1, layer_name='conv1')
228 | x = 128 * Batch_Normalization(x, training=self.training, scope=scope+'_batch1') + 128
229 |
230 | return x
231 |
232 | def Build_SEnet(self, input_x):
233 | # only cifar10 architecture
234 |
235 | input_x = self.first_layer(input_x, scope='first_layer')
236 |
237 | x = self.residual_layer(input_x, out_dim=64, layer_num='1')
238 | x = self.residual_layer(x, out_dim=128, layer_num='2')
239 | x = self.residual_layer(x, out_dim=256, layer_num='3')
240 | x = self.residual_layer(x, out_dim=512, layer_num='4')
241 |
242 | recon_x = self.generator(x)
243 | # recon_x = tf.cast(recon_x, dtype=tf.uint8)
244 |
245 | x = Global_Average_Pooling(x)
246 | x = flatten(x)
247 |
248 | feat = tf.nn.l2_normalize(x, 1, 1e-10, name='feat')
249 |
250 | x = Fully_connected(x, layer_name='final_fully_connected')
251 | return x, recon_x, feat
--------------------------------------------------------------------------------
/pre_train.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | import numpy as np
3 | import os
4 | from tensorpack import imgaug, dataset, ModelDesc, InputDesc
5 | from tensorpack.dataflow import (PrefetchDataZMQ, BatchData)
6 | from dataflow_input import MyDataFlow
7 | import resnet_model
8 | from IPython import embed
9 |
10 | os.environ['CUDA_VISIBLE_DEVICES']= '0'
11 |
12 | init_learning_rate = 0.1
13 | batch_size = 128
14 | image_size = 224
15 | img_channels = 3
16 | class_num = 365
17 |
18 | weight_decay = 1e-4
19 | momentum = 0.9
20 |
21 | total_epochs = 30
22 | iteration = 14089 // 1
23 | # 128 * 14089 ~ 1,803,460
24 | test_iteration = 10
25 |
26 | def center_loss(features, label, alfa, nrof_classes):
27 | """Center loss based on the paper "A Discriminative Feature Learning Approach for Deep Face Recognition"
28 | (http://ydwen.github.io/papers/WenECCV16.pdf)
29 | """
30 | nrof_features = features.get_shape()[1]
31 | centers = tf.get_variable('centers', [nrof_classes, nrof_features], dtype=tf.float32,
32 | initializer=tf.constant_initializer(0), trainable=False)
33 | label = tf.reshape(label, [-1])
34 | centers_batch = tf.gather(centers, label)
35 | diff = (1 - alfa) * (centers_batch - features)
36 | centers = tf.scatter_sub(centers, label, diff)
37 | # centers = tf.nn.l2_normalize(centers, 1, 1e-10, name='centers_norm')
38 | loss = tf.reduce_mean(tf.square(features - centers_batch))
39 | return loss, centers
40 |
41 | def focal_loss(onehot_labels, cls_preds,
42 | alpha=0.25, gamma=2.0, name=None, scope=None):
43 | """Compute softmax focal loss between logits and onehot labels
44 | logits and onehot_labels must have same shape [batchsize, num_classes] and
45 | the same data type (float16, 32, 64)
46 | Args:
47 | onehot_labels: Each row labels[i] must be a valid probability distribution
48 | cls_preds: Unscaled log probabilities
49 | alpha: The hyperparameter for adjusting biased samples, default is 0.25
50 | gamma: The hyperparameter for penalizing the easy labeled samples
51 | name: A name for the operation (optional)
52 | Returns:
53 | A 1-D tensor of length batch_size of same type as logits with softmax focal loss
54 | """
55 | with tf.name_scope(scope, 'focal_loss', [cls_preds, onehot_labels]) as sc:
56 | logits = tf.convert_to_tensor(cls_preds)
57 | onehot_labels = tf.convert_to_tensor(onehot_labels)
58 |
59 | precise_logits = tf.cast(logits, tf.float32) if (
60 | logits.dtype == tf.float16) else logits
61 | onehot_labels = tf.cast(onehot_labels, precise_logits.dtype)
62 | predictions = tf.nn.sigmoid(logits)
63 | predictions_pt = tf.where(tf.equal(onehot_labels, 1), predictions, 1.-predictions)
64 | # add small value to avoid 0
65 | epsilon = 1e-8
66 | alpha_t = tf.scalar_mul(alpha, tf.ones_like(onehot_labels, dtype=tf.float32))
67 | alpha_t = tf.where(tf.equal(onehot_labels, 1.0), alpha_t, 1-alpha_t)
68 | losses = tf.reduce_sum(-alpha_t * tf.pow(1. - predictions_pt, gamma) * tf.log(predictions_pt+epsilon),
69 | name=name, axis=1)
70 | return losses
71 |
72 | def Evaluate(sess):
73 | test_acc = 0.0
74 | test_loss = 0.0
75 |
76 | for it in range(test_iteration):
77 | batch_data = next(scene_data_val)
78 | test_batch_x = batch_data['data']
79 | test_batch_y = batch_data['label']
80 |
81 | test_feed_dict = {
82 | x: test_batch_x,
83 | label: test_batch_y,
84 | learning_rate: epoch_learning_rate,
85 | training_flag: False
86 | }
87 |
88 | loss_, acc_ = sess.run([Total_loss, accuracy], feed_dict=test_feed_dict)
89 |
90 | test_loss += loss_
91 | test_acc += acc_
92 |
93 | test_loss /= test_iteration # average loss
94 | test_acc /= test_iteration # average accuracy
95 |
96 | summary = tf.Summary(value=[tf.Summary.Value(tag='test_loss', simple_value=test_loss),
97 | tf.Summary.Value(tag='test_accuracy', simple_value=test_acc)])
98 |
99 | return test_acc, test_loss, summary
100 |
101 | def resnet_model_fn(inputs, training):
102 | """Our model_fn for ResNet to be used with our Estimator."""
103 |
104 | network = resnet_model.imagenet_resnet_v2(
105 | resnet_size=18, num_classes=class_num, mode='se', data_format=None)
106 | inputs= network(inputs=inputs, is_training=training)
107 | feat = tf.nn.l2_normalize(inputs, 1, 1e-10, name='feat')
108 | inputs = tf.layers.dense(inputs=inputs, units=class_num)
109 | # inputs = tf.layers.dense(inputs=feat, units=class_num)
110 | inputs = tf.identity(inputs, 'final_dense')
111 |
112 | return inputs, feat
113 |
114 | # image_size = 32, img_channels = 3, class_num = 10 in cifar10
115 | x = tf.placeholder(tf.float32, shape=[None, image_size, image_size, img_channels])
116 | label = tf.placeholder(tf.float32, shape=[None,])
117 | one_hot_labels = tf.one_hot(indices=tf.cast(label, tf.int32), depth=class_num)
118 |
119 | training_flag = tf.placeholder(tf.bool)
120 | learning_rate = tf.placeholder(tf.float32, name='learning_rate')
121 |
122 | logits, feat = resnet_model_fn(x, training=training_flag)
123 |
124 | cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=one_hot_labels, logits=logits))
125 | Focal_loss = tf.reduce_mean(focal_loss(one_hot_labels, logits, alpha=0.5))
126 | l2_loss = weight_decay * tf.add_n([tf.nn.l2_loss(v) for v in tf.trainable_variables()])
127 | Center_loss, Centers = center_loss(feat, tf.cast(label, dtype=tf.int32), 0.95, class_num)
128 | Total_loss = cost + l2_loss
129 |
130 | optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum=momentum, use_nesterov=True)
131 | # Batch norm requires update_ops to be added as a train_op dependency.
132 | update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
133 | with tf.control_dependencies(update_ops):
134 | train_op = optimizer.minimize(Total_loss)
135 |
136 | correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(one_hot_labels, 1))
137 | accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
138 |
139 | # val_dir = '/data0/AIChallenger/ai_challenger_scene_validation_20170908/scene_validation_images_20170908/'
140 | # annotations = '/data0/AIChallenger/ai_challenger_scene_validation_20170908/scene_validation_annotations_20170908.json'
141 | # # a DataFlow you implement to produce [tensor1, tensor2, ..] lists from whatever sources:
142 | # df = MyDataFlow(val_dir, annotations, is_training=False, batch_size=batch_size, img_size=image_size)
143 | # # start 3 processes to run the dataflow in parallel
144 | # df = PrefetchDataZMQ(df, nr_proc=10)
145 | # df.reset_state()
146 | # scene_data_val = df.get_data()
147 |
148 | train_dir = '/data0/AIChallenger/data_256'
149 | annotations = '/data0/AIChallenger/data_256.json'
150 | # a DataFlow you implement to produce [tensor1, tensor2, ..] lists from whatever sources:
151 | df = MyDataFlow(train_dir, annotations, is_training=True, batch_size=batch_size, img_size=image_size)
152 | # start 3 processes to run the dataflow in parallel
153 | df = PrefetchDataZMQ(df, nr_proc=10)
154 | df.reset_state()
155 | scene_data = df.get_data()
156 |
157 | saver = tf.train.Saver(tf.global_variables())
158 |
159 | with tf.Session() as sess:
160 | ckpt = tf.train.get_checkpoint_state('./model_pretrain')
161 | if ckpt and tf.train.checkpoint_exists(ckpt.model_checkpoint_path):
162 | print("loading checkpoint...")
163 | saver.restore(sess, ckpt.model_checkpoint_path)
164 | else:
165 | sess.run(tf.global_variables_initializer())
166 |
167 | summary_writer = tf.summary.FileWriter('./logs_pretrain', sess.graph)
168 |
169 | _x = x[:, :, :, ::-1]
170 | tf.summary.image('x', _x, 4)
171 |
172 | summary_op = tf.summary.merge_all()
173 |
174 | epoch_learning_rate = init_learning_rate
175 | for epoch in range(1, total_epochs + 1):
176 | if epoch % 10 == 0 :
177 | epoch_learning_rate = epoch_learning_rate / 10
178 |
179 | train_acc = 0.0
180 | train_loss = 0.0
181 |
182 | for step in range(1, iteration + 1):
183 | batch_data = next(scene_data)
184 | batch_x = batch_data['data']
185 | batch_y = batch_data['label']
186 |
187 | train_feed_dict = {
188 | x: batch_x,
189 | label: batch_y,
190 | learning_rate: epoch_learning_rate,
191 | training_flag: True
192 | }
193 |
194 | _, batch_loss = sess.run([train_op, Total_loss], feed_dict=train_feed_dict)
195 | batch_acc = accuracy.eval(feed_dict=train_feed_dict)
196 |
197 | print("epoch: %d/%d, iter: %d/%d, batch_loss: %.4f, batch_acc: %.4f \n" % (
198 | epoch, total_epochs, step, iteration, batch_loss, batch_acc))
199 |
200 | train_loss += batch_loss
201 | train_acc += batch_acc
202 |
203 | if step % 30 == 0 :
204 | summary_str = sess.run(summary_op, feed_dict=train_feed_dict)
205 | summary_writer.add_summary(summary=summary_str, global_step=epoch)
206 | summary_writer.flush()
207 |
208 |
209 | train_loss /= iteration # average loss
210 | train_acc /= iteration # average accuracy
211 |
212 | train_summary = tf.Summary(value=[tf.Summary.Value(tag='train_loss', simple_value=train_loss),
213 | tf.Summary.Value(tag='train_accuracy', simple_value=train_acc)])
214 |
215 | # test_acc, test_loss, test_summary = Evaluate(sess)
216 |
217 | summary_writer.add_summary(summary=train_summary, global_step=epoch)
218 | # summary_writer.add_summary(summary=test_summary, global_step=epoch)
219 | summary_writer.flush()
220 |
221 | # line = "epoch: %d/%d, train_loss: %.4f, train_acc: %.4f, test_loss: %.4f, test_acc: %.4f \n" % (
222 | # epoch, total_epochs, train_loss, train_acc, test_loss, test_acc)
223 | line = "epoch: %d/%d, train_loss: %.4f, train_acc: %.4f \n" % (
224 | epoch, total_epochs, train_loss, train_acc)
225 | print(line)
226 |
227 | with open('./logs_pretrain/logs.txt', 'a') as f:
228 | f.write(line)
229 |
230 | saver.save(sess=sess, save_path='./model_pretrain/model.ckpt')
231 |
--------------------------------------------------------------------------------
/train.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | import numpy as np
3 | import os
4 | from tensorpack import imgaug, dataset, ModelDesc, InputDesc
5 | from tensorpack.dataflow import (PrefetchDataZMQ, BatchData)
6 | from dataflow_input import MyDataFlow
7 | import resnet_model
8 | from IPython import embed
9 |
10 | os.environ['CUDA_VISIBLE_DEVICES'] = '1'
11 |
12 | init_learning_rate = 0.01
13 | batch_size = 128
14 | image_size = 224
15 | img_channels = 3
16 | class_num = 80
17 |
18 | weight_decay = 1e-4
19 | momentum = 0.9
20 |
21 | total_epochs = 30
22 | iteration = 1*421
23 | # 128 * 421 ~ 53,879
24 | test_iteration = 10
25 |
26 | def optimistic_restore(session, save_file):
27 | reader = tf.train.NewCheckpointReader(save_file)
28 | saved_shapes = reader.get_variable_to_shape_map()
29 | var_names = sorted([(var.name, var.name.split(':')[0]) for var in tf.global_variables() if var.name.split(':')[0] in saved_shapes])
30 | restore_vars = []
31 | name2var = dict(zip(map(lambda x:x.name.split(':')[0], tf.global_variables()), tf.global_variables()))
32 | with tf.variable_scope('', reuse=True):
33 | for var_name, saved_var_name in var_names:
34 | curr_var = name2var[saved_var_name]
35 | var_shape = curr_var.get_shape().as_list()
36 | if var_shape == saved_shapes[saved_var_name]:
37 | restore_vars.append(curr_var)
38 | saver = tf.train.Saver(restore_vars)
39 | saver.restore(session, save_file)
40 |
41 | def center_loss(features, label, alfa, nrof_classes):
42 | """Center loss based on the paper "A Discriminative Feature Learning Approach for Deep Face Recognition"
43 | (http://ydwen.github.io/papers/WenECCV16.pdf)
44 | """
45 | nrof_features = features.get_shape()[1]
46 | centers = tf.get_variable('centers', [nrof_classes, nrof_features], dtype=tf.float32,
47 | initializer=tf.constant_initializer(0), trainable=False)
48 | label = tf.reshape(label, [-1])
49 | centers_batch = tf.gather(centers, label)
50 | diff = (1 - alfa) * (centers_batch - features)
51 | centers = tf.scatter_sub(centers, label, diff)
52 | # centers = tf.nn.l2_normalize(centers, 1, 1e-10, name='centers_norm')
53 | loss = tf.reduce_mean(tf.square(features - centers_batch))
54 | return loss, centers
55 |
56 | def focal_loss(onehot_labels, cls_preds,
57 | alpha=0.25, gamma=2.0, name=None, scope=None):
58 | """Compute softmax focal loss between logits and onehot labels
59 | logits and onehot_labels must have same shape [batchsize, num_classes] and
60 | the same data type (float16, 32, 64)
61 | Args:
62 | onehot_labels: Each row labels[i] must be a valid probability distribution
63 | cls_preds: Unscaled log probabilities
64 | alpha: The hyperparameter for adjusting biased samples, default is 0.25
65 | gamma: The hyperparameter for penalizing the easy labeled samples
66 | name: A name for the operation (optional)
67 | Returns:
68 | A 1-D tensor of length batch_size of same type as logits with softmax focal loss
69 | """
70 | with tf.name_scope(scope, 'focal_loss', [cls_preds, onehot_labels]) as sc:
71 | logits = tf.convert_to_tensor(cls_preds)
72 | onehot_labels = tf.convert_to_tensor(onehot_labels)
73 |
74 | precise_logits = tf.cast(logits, tf.float32) if (
75 | logits.dtype == tf.float16) else logits
76 | onehot_labels = tf.cast(onehot_labels, precise_logits.dtype)
77 | predictions = tf.nn.sigmoid(logits)
78 | predictions_pt = tf.where(tf.equal(onehot_labels, 1), predictions, 1.-predictions)
79 | # add small value to avoid 0
80 | epsilon = 1e-8
81 | alpha_t = tf.scalar_mul(alpha, tf.ones_like(onehot_labels, dtype=tf.float32))
82 | alpha_t = tf.where(tf.equal(onehot_labels, 1.0), alpha_t, 1-alpha_t)
83 | losses = tf.reduce_sum(-alpha_t * tf.pow(1. - predictions_pt, gamma) * tf.log(predictions_pt+epsilon),
84 | name=name, axis=1)
85 | return losses
86 |
87 | def Evaluate(sess):
88 | test_acc = 0.0
89 | test_loss = 0.0
90 |
91 | for it in range(test_iteration):
92 | batch_data = next(scene_data_val)
93 | test_batch_x = batch_data['data']
94 | test_batch_y = batch_data['label']
95 |
96 | test_feed_dict = {
97 | x: test_batch_x,
98 | label: test_batch_y,
99 | learning_rate: epoch_learning_rate,
100 | training_flag: False
101 | }
102 |
103 | loss_, acc_ = sess.run([Total_loss, accuracy], feed_dict=test_feed_dict)
104 |
105 | test_loss += loss_
106 | test_acc += acc_
107 |
108 | test_loss /= test_iteration # average loss
109 | test_acc /= test_iteration # average accuracy
110 |
111 | summary = tf.Summary(value=[tf.Summary.Value(tag='test_loss', simple_value=test_loss),
112 | tf.Summary.Value(tag='test_accuracy', simple_value=test_acc)])
113 |
114 | return test_acc, test_loss, summary
115 |
116 | def resnet_model_fn(inputs, training):
117 | """Our model_fn for ResNet to be used with our Estimator."""
118 |
119 | network = resnet_model.imagenet_resnet_v2(
120 | resnet_size=18, num_classes=class_num, mode='se', data_format=None)
121 | inputs= network(inputs=inputs, is_training=training)
122 | feat = tf.nn.l2_normalize(inputs, 1, 1e-10, name='feat')
123 | inputs = tf.layers.dense(inputs=inputs, units=class_num)
124 | # inputs = tf.layers.dense(inputs=feat, units=class_num)
125 | inputs = tf.identity(inputs, 'final_dense')
126 |
127 | return inputs, feat
128 |
129 | # image_size = 32, img_channels = 3, class_num = 10 in cifar10
130 | x = tf.placeholder(tf.float32, shape=[None, image_size, image_size, img_channels])
131 | label = tf.placeholder(tf.float32, shape=[None,])
132 | one_hot_labels = tf.one_hot(indices=tf.cast(label, tf.int32), depth=class_num)
133 |
134 | training_flag = tf.placeholder(tf.bool)
135 | learning_rate = tf.placeholder(tf.float32, name='learning_rate')
136 |
137 | logits, feat = resnet_model_fn(x, training=training_flag)
138 |
139 | cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=one_hot_labels, logits=logits))
140 | Focal_loss = tf.reduce_mean(focal_loss(one_hot_labels, logits, alpha=0.5))
141 | l2_loss = weight_decay * tf.add_n([tf.nn.l2_loss(v) for v in tf.trainable_variables()])
142 | Center_loss, Centers = center_loss(feat, tf.cast(label, dtype=tf.int32), 0.95, class_num)
143 | Total_loss = cost + l2_loss + Center_loss
144 |
145 | optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum=momentum, use_nesterov=True)
146 | # Batch norm requires update_ops to be added as a train_op dependency.
147 | update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
148 | with tf.control_dependencies(update_ops):
149 | train_op = optimizer.minimize(Total_loss)
150 |
151 | correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(one_hot_labels, 1))
152 | accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
153 |
154 | val_dir = '/data0/AIChallenger/ai_challenger_scene_validation_20170908/scene_validation_images_20170908/'
155 | annotations = '/data0/AIChallenger/ai_challenger_scene_validation_20170908/scene_validation_annotations_20170908.json'
156 | # a DataFlow you implement to produce [tensor1, tensor2, ..] lists from whatever sources:
157 | df = MyDataFlow(val_dir, annotations, is_training=False, batch_size=batch_size, img_size=image_size)
158 | # start 3 processes to run the dataflow in parallel
159 | df = PrefetchDataZMQ(df, nr_proc=1)
160 | df.reset_state()
161 | scene_data_val = df.get_data()
162 |
163 | train_dir = '/data0/AIChallenger/ai_challenger_scene_train_20170904/scene_train_images_20170904/'
164 | annotations = '/data0/AIChallenger/ai_challenger_scene_train_20170904/scene_train_annotations_20170904.json'
165 | # a DataFlow you implement to produce [tensor1, tensor2, ..] lists from whatever sources:
166 | df = MyDataFlow(train_dir, annotations, is_training=True, batch_size=batch_size, img_size=image_size)
167 | # start 3 processes to run the dataflow in parallel
168 | df = PrefetchDataZMQ(df, nr_proc=10)
169 | df.reset_state()
170 | scene_data = df.get_data()
171 |
172 | saver = tf.train.Saver(tf.global_variables())
173 |
174 | with tf.Session() as sess:
175 | ckpt = tf.train.get_checkpoint_state('./model')
176 | if ckpt and tf.train.checkpoint_exists(ckpt.model_checkpoint_path):
177 | print("loading checkpoint...")
178 | sess.run(tf.global_variables_initializer())
179 | optimistic_restore(sess, ckpt.model_checkpoint_path)
180 | # saver.restore(sess, ckpt.model_checkpoint_path)
181 | else:
182 | sess.run(tf.global_variables_initializer())
183 |
184 | summary_writer = tf.summary.FileWriter('./logs', sess.graph)
185 |
186 | _x = x[:, :, :, ::-1]
187 | tf.summary.image('x', _x, 4)
188 |
189 | summary_op = tf.summary.merge_all()
190 |
191 | epoch_learning_rate = init_learning_rate
192 | for epoch in range(1, total_epochs + 1):
193 | if epoch % 20 == 0 :
194 | epoch_learning_rate = epoch_learning_rate / 10
195 |
196 | train_acc = 0.0
197 | train_loss = 0.0
198 |
199 | for step in range(1, iteration + 1):
200 | batch_data = next(scene_data)
201 | batch_x = batch_data['data']
202 | batch_y = batch_data['label']
203 |
204 | train_feed_dict = {
205 | x: batch_x,
206 | label: batch_y,
207 | learning_rate: epoch_learning_rate,
208 | training_flag: True
209 | }
210 |
211 | _, batch_loss, centers_class = sess.run([train_op, Total_loss, Centers], feed_dict=train_feed_dict)
212 | batch_acc = accuracy.eval(feed_dict=train_feed_dict)
213 |
214 | print("epoch: %d/%d, iter: %d/%d, batch_loss: %.4f, batch_acc: %.4f \n" % (
215 | epoch, total_epochs, step, iteration, batch_loss, batch_acc))
216 |
217 | train_loss += batch_loss
218 | train_acc += batch_acc
219 |
220 | if step % 30 == 0 :
221 | summary_str = sess.run(summary_op, feed_dict=train_feed_dict)
222 | summary_writer.add_summary(summary=summary_str, global_step=epoch)
223 | summary_writer.flush()
224 |
225 |
226 | train_loss /= iteration # average loss
227 | train_acc /= iteration # average accuracy
228 |
229 | train_summary = tf.Summary(value=[tf.Summary.Value(tag='train_loss', simple_value=train_loss),
230 | tf.Summary.Value(tag='train_accuracy', simple_value=train_acc)])
231 |
232 | test_acc, test_loss, test_summary = Evaluate(sess)
233 |
234 | summary_writer.add_summary(summary=train_summary, global_step=epoch)
235 | summary_writer.add_summary(summary=test_summary, global_step=epoch)
236 | summary_writer.flush()
237 |
238 | line = "epoch: %d/%d, train_loss: %.4f, train_acc: %.4f, test_loss: %.4f, test_acc: %.4f \n" % (
239 | epoch, total_epochs, train_loss, train_acc, test_loss, test_acc)
240 | print(line)
241 |
242 | with open('./logs/logs.txt', 'a') as f:
243 | f.write(line)
244 |
245 | saver.save(sess=sess, save_path='./model/model.ckpt')
246 | np.save("centers.npy", centers_class)
247 |
--------------------------------------------------------------------------------
/SE_Inception_v4.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | from tflearn.layers.conv import global_avg_pool
3 | from tensorflow.contrib.layers import batch_norm, flatten
4 | from tensorflow.contrib.framework import arg_scope
5 | import numpy as np
6 | import scene_input
7 | import os
8 |
9 | os.environ['CUDA_VISIBLE_DEVICES']= '2'
10 |
11 | weight_decay = 0.0005
12 | momentum = 0.9
13 |
14 | init_learning_rate = 0.1
15 | reduction_ratio = 4
16 |
17 | batch_size = 32
18 | image_size = 96
19 | img_channels = 3
20 | class_num = 80
21 |
22 | iteration = 391
23 | # 128 * 391 ~ 50,000
24 |
25 | test_iteration = 10
26 |
27 | total_epochs = 100
28 |
29 | def conv_layer(input, filter, kernel, stride=1, padding='SAME', layer_name="conv"):
30 | with tf.name_scope(layer_name):
31 | network = tf.layers.conv2d(inputs=input, use_bias=True, filters=filter, kernel_size=kernel, strides=stride, padding=padding)
32 | network = Relu(network)
33 | return network
34 |
35 | def Fully_connected(x, units=class_num, layer_name='fully_connected') :
36 | with tf.name_scope(layer_name) :
37 | return tf.layers.dense(inputs=x, use_bias=True, units=units)
38 |
39 | def Relu(x):
40 | return tf.nn.relu(x)
41 |
42 | def Sigmoid(x):
43 | return tf.nn.sigmoid(x)
44 |
45 | def Global_Average_Pooling(x):
46 | return global_avg_pool(x, name='Global_avg_pooling')
47 |
48 | def Max_pooling(x, pool_size=[3,3], stride=2, padding='VALID') :
49 | return tf.layers.max_pooling2d(inputs=x, pool_size=pool_size, strides=stride, padding=padding)
50 |
51 | def Avg_pooling(x, pool_size=[3,3], stride=1, padding='SAME') :
52 | return tf.layers.average_pooling2d(inputs=x, pool_size=pool_size, strides=stride, padding=padding)
53 |
54 | def Batch_Normalization(x, training, scope):
55 | with arg_scope([batch_norm],
56 | scope=scope,
57 | updates_collections=None,
58 | decay=0.9,
59 | center=True,
60 | scale=True,
61 | zero_debias_moving_mean=True) :
62 | return tf.cond(training,
63 | lambda : batch_norm(inputs=x, is_training=training, reuse=None),
64 | lambda : batch_norm(inputs=x, is_training=training, reuse=True))
65 |
66 | def Concatenation(layers) :
67 | return tf.concat(layers, axis=3)
68 |
69 | def Dropout(x, rate, training) :
70 | return tf.layers.dropout(inputs=x, rate=rate, training=training)
71 |
72 | def Evaluate(sess):
73 | test_acc = 0.0
74 | test_loss = 0.0
75 |
76 | for it in range(test_iteration):
77 | test_batch_x, test_batch_y = scene_data_val.next_batch(batch_size, image_size)
78 |
79 | test_feed_dict = {
80 | x: test_batch_x,
81 | label: test_batch_y,
82 | learning_rate: epoch_learning_rate,
83 | training_flag: False
84 | }
85 |
86 | loss_, acc_ = sess.run([cost, accuracy], feed_dict=test_feed_dict)
87 |
88 | test_loss += loss_
89 | test_acc += acc_
90 |
91 | test_loss /= test_iteration # average loss
92 | test_acc /= test_iteration # average accuracy
93 |
94 | summary = tf.Summary(value=[tf.Summary.Value(tag='test_loss', simple_value=test_loss),
95 | tf.Summary.Value(tag='test_accuracy', simple_value=test_acc)])
96 |
97 | return test_acc, test_loss, summary
98 |
99 | class SE_Inception_v4():
100 | def __init__(self, x, training):
101 | self.training = training
102 | self.model = self.Build_SEnet(x)
103 |
104 | def Stem(self, x, scope):
105 | with tf.name_scope(scope) :
106 | x = conv_layer(x, filter=32, kernel=[3,3], stride=2, padding='VALID', layer_name=scope+'_conv1')
107 | x = conv_layer(x, filter=32, kernel=[3,3], padding='VALID', layer_name=scope+'_conv2')
108 | block_1 = conv_layer(x, filter=64, kernel=[3,3], layer_name=scope+'_conv3')
109 |
110 | split_max_x = Max_pooling(block_1)
111 | split_conv_x = conv_layer(block_1, filter=96, kernel=[3,3], stride=2, padding='VALID', layer_name=scope+'_split_conv1')
112 | x = Concatenation([split_max_x,split_conv_x])
113 |
114 | split_conv_x1 = conv_layer(x, filter=64, kernel=[1,1], layer_name=scope+'_split_conv2')
115 | split_conv_x1 = conv_layer(split_conv_x1, filter=96, kernel=[3,3], padding='VALID', layer_name=scope+'_split_conv3')
116 |
117 | split_conv_x2 = conv_layer(x, filter=64, kernel=[1,1], layer_name=scope+'_split_conv4')
118 | split_conv_x2 = conv_layer(split_conv_x2, filter=64, kernel=[7,1], layer_name=scope+'_split_conv5')
119 | split_conv_x2 = conv_layer(split_conv_x2, filter=64, kernel=[1,7], layer_name=scope+'_split_conv6')
120 | split_conv_x2 = conv_layer(split_conv_x2, filter=96, kernel=[3,3], padding='VALID', layer_name=scope+'_split_conv7')
121 |
122 | x = Concatenation([split_conv_x1,split_conv_x2])
123 |
124 | split_conv_x = conv_layer(x, filter=192, kernel=[3,3], stride=2, padding='VALID', layer_name=scope+'_split_conv8')
125 | split_max_x = Max_pooling(x)
126 |
127 | x = Concatenation([split_conv_x, split_max_x])
128 |
129 | x = Batch_Normalization(x, training=self.training, scope=scope+'_batch1')
130 | x = Relu(x)
131 |
132 | return x
133 |
134 | def Inception_A(self, x, scope):
135 | with tf.name_scope(scope) :
136 | split_conv_x1 = Avg_pooling(x)
137 | split_conv_x1 = conv_layer(split_conv_x1, filter=96, kernel=[1,1], layer_name=scope+'_split_conv1')
138 |
139 | split_conv_x2 = conv_layer(x, filter=96, kernel=[1,1], layer_name=scope+'_split_conv2')
140 |
141 | split_conv_x3 = conv_layer(x, filter=64, kernel=[1,1], layer_name=scope+'_split_conv3')
142 | split_conv_x3 = conv_layer(split_conv_x3, filter=96, kernel=[3,3], layer_name=scope+'_split_conv4')
143 |
144 | split_conv_x4 = conv_layer(x, filter=64, kernel=[1,1], layer_name=scope+'_split_conv5')
145 | split_conv_x4 = conv_layer(split_conv_x4, filter=96, kernel=[3,3], layer_name=scope+'_split_conv6')
146 | split_conv_x4 = conv_layer(split_conv_x4, filter=96, kernel=[3,3], layer_name=scope+'_split_conv7')
147 |
148 | x = Concatenation([split_conv_x1, split_conv_x2, split_conv_x3, split_conv_x4])
149 |
150 | x = Batch_Normalization(x, training=self.training, scope=scope+'_batch1')
151 | x = Relu(x)
152 |
153 | return x
154 |
155 | def Inception_B(self, x, scope):
156 | with tf.name_scope(scope) :
157 | init = x
158 |
159 | split_conv_x1 = Avg_pooling(x)
160 | split_conv_x1 = conv_layer(split_conv_x1, filter=128, kernel=[1,1], layer_name=scope+'_split_conv1')
161 |
162 | split_conv_x2 = conv_layer(x, filter=384, kernel=[1,1], layer_name=scope+'_split_conv2')
163 |
164 | split_conv_x3 = conv_layer(x, filter=192, kernel=[1,1], layer_name=scope+'_split_conv3')
165 | split_conv_x3 = conv_layer(split_conv_x3, filter=224, kernel=[1,7], layer_name=scope+'_split_conv4')
166 | split_conv_x3 = conv_layer(split_conv_x3, filter=256, kernel=[1,7], layer_name=scope+'_split_conv5')
167 |
168 | split_conv_x4 = conv_layer(x, filter=192, kernel=[1,1], layer_name=scope+'_split_conv6')
169 | split_conv_x4 = conv_layer(split_conv_x4, filter=192, kernel=[1,7], layer_name=scope+'_split_conv7')
170 | split_conv_x4 = conv_layer(split_conv_x4, filter=224, kernel=[7,1], layer_name=scope+'_split_conv8')
171 | split_conv_x4 = conv_layer(split_conv_x4, filter=224, kernel=[1,7], layer_name=scope+'_split_conv9')
172 | split_conv_x4 = conv_layer(split_conv_x4, filter=256, kernel=[7,1], layer_name=scope+'_split_connv10')
173 |
174 | x = Concatenation([split_conv_x1, split_conv_x2, split_conv_x3, split_conv_x4])
175 |
176 | x = Batch_Normalization(x, training=self.training, scope=scope+'_batch1')
177 | x = Relu(x)
178 |
179 | return x
180 |
181 | def Inception_C(self, x, scope):
182 | with tf.name_scope(scope) :
183 | split_conv_x1 = Avg_pooling(x)
184 | split_conv_x1 = conv_layer(split_conv_x1, filter=256, kernel=[1,1], layer_name=scope+'_split_conv1')
185 |
186 | split_conv_x2 = conv_layer(x, filter=256, kernel=[1,1], layer_name=scope+'_split_conv2')
187 |
188 | split_conv_x3 = conv_layer(x, filter=384, kernel=[1,1], layer_name=scope+'_split_conv3')
189 | split_conv_x3_1 = conv_layer(split_conv_x3, filter=256, kernel=[1,3], layer_name=scope+'_split_conv4')
190 | split_conv_x3_2 = conv_layer(split_conv_x3, filter=256, kernel=[3,1], layer_name=scope+'_split_conv5')
191 |
192 | split_conv_x4 = conv_layer(x, filter=384, kernel=[1,1], layer_name=scope+'_split_conv6')
193 | split_conv_x4 = conv_layer(split_conv_x4, filter=448, kernel=[1,3], layer_name=scope+'_split_conv7')
194 | split_conv_x4 = conv_layer(split_conv_x4, filter=512, kernel=[3,1], layer_name=scope+'_split_conv8')
195 | split_conv_x4_1 = conv_layer(split_conv_x4, filter=256, kernel=[3,1], layer_name=scope+'_split_conv9')
196 | split_conv_x4_2 = conv_layer(split_conv_x4, filter=256, kernel=[1,3], layer_name=scope+'_split_conv10')
197 |
198 | x = Concatenation([split_conv_x1, split_conv_x2, split_conv_x3_1, split_conv_x3_2, split_conv_x4_1, split_conv_x4_2])
199 |
200 | x = Batch_Normalization(x, training=self.training, scope=scope+'_batch1')
201 | x = Relu(x)
202 |
203 | return x
204 |
205 | def Reduction_A(self, x, scope):
206 | with tf.name_scope(scope) :
207 | k = 256
208 | l = 256
209 | m = 384
210 | n = 384
211 |
212 | split_max_x = Max_pooling(x)
213 |
214 | split_conv_x1 = conv_layer(x, filter=n, kernel=[3,3], stride=2, padding='VALID', layer_name=scope+'_split_conv1')
215 |
216 | split_conv_x2 = conv_layer(x, filter=k, kernel=[1,1], layer_name=scope+'_split_conv2')
217 | split_conv_x2 = conv_layer(split_conv_x2, filter=l, kernel=[3,3], layer_name=scope+'_split_conv3')
218 | split_conv_x2 = conv_layer(split_conv_x2, filter=m, kernel=[3,3], stride=2, padding='VALID', layer_name=scope+'_split_conv4')
219 |
220 | x = Concatenation([split_max_x, split_conv_x1, split_conv_x2])
221 |
222 | x = Batch_Normalization(x, training=self.training, scope=scope+'_batch1')
223 | x = Relu(x)
224 |
225 | return x
226 |
227 | def Reduction_B(self, x, scope):
228 | with tf.name_scope(scope) :
229 | split_max_x = Max_pooling(x)
230 |
231 | split_conv_x1 = conv_layer(x, filter=256, kernel=[1,1], layer_name=scope+'_split_conv1')
232 | split_conv_x1 = conv_layer(split_conv_x1, filter=384, kernel=[3,3], stride=2, padding='VALID', layer_name=scope+'_split_conv2')
233 |
234 | split_conv_x2 = conv_layer(x, filter=256, kernel=[1,1], layer_name=scope+'_split_conv3')
235 | split_conv_x2 = conv_layer(split_conv_x2, filter=288, kernel=[3,3], stride=2, padding='VALID', layer_name=scope+'_split_conv4')
236 |
237 | split_conv_x3 = conv_layer(x, filter=256, kernel=[1,1], layer_name=scope+'_split_conv5')
238 | split_conv_x3 = conv_layer(split_conv_x3, filter=288, kernel=[3,3], layer_name=scope+'_split_conv6')
239 | split_conv_x3 = conv_layer(split_conv_x3, filter=320, kernel=[3,3], stride=2, padding='VALID', layer_name=scope+'_split_conv7')
240 |
241 | x = Concatenation([split_max_x, split_conv_x1, split_conv_x2, split_conv_x3])
242 |
243 | x = Batch_Normalization(x, training=self.training, scope=scope+'_batch1')
244 | x = Relu(x)
245 |
246 | return x
247 |
248 | def Squeeze_excitation_layer(self, input_x, out_dim, ratio, layer_name):
249 | with tf.name_scope(layer_name) :
250 | squeeze = Global_Average_Pooling(input_x)
251 |
252 | excitation = Fully_connected(squeeze, units=out_dim / ratio, layer_name=layer_name+'_fully_connected1')
253 | excitation = Relu(excitation)
254 | excitation = Fully_connected(excitation, units=out_dim, layer_name=layer_name+'_fully_connected2')
255 | excitation = Sigmoid(excitation)
256 |
257 | excitation = tf.reshape(excitation, [-1,1,1,out_dim])
258 |
259 | scale = input_x * excitation
260 |
261 | return scale
262 |
263 | def Build_SEnet(self, input_x):
264 | # input_x = tf.pad(input_x, [[0, 0], [32, 32], [32, 32], [0, 0]])
265 | # size 32 -> 96
266 | # only cifar10 architecture
267 |
268 | x = self.Stem(input_x, scope='stem')
269 |
270 | for i in range(4) :
271 | x = self.Inception_A(x, scope='Inception_A'+str(i))
272 | channel = int(np.shape(x)[-1])
273 | x = self.Squeeze_excitation_layer(x, out_dim=channel, ratio=reduction_ratio, layer_name='SE_A'+str(i))
274 |
275 | x = self.Reduction_A(x, scope='Reduction_A')
276 |
277 | for i in range(7) :
278 | x = self.Inception_B(x, scope='Inception_B'+str(i))
279 | channel = int(np.shape(x)[-1])
280 | x = self.Squeeze_excitation_layer(x, out_dim=channel, ratio=reduction_ratio, layer_name='SE_B'+str(i))
281 |
282 | x = self.Reduction_B(x, scope='Reduction_B')
283 |
284 | for i in range(3) :
285 | x = self.Inception_C(x, scope='Inception_C'+str(i))
286 | channel = int(np.shape(x)[-1])
287 | x = self.Squeeze_excitation_layer(x, out_dim=channel, ratio=reduction_ratio, layer_name='SE_C'+str(i))
288 |
289 | x = Global_Average_Pooling(x)
290 | x = Dropout(x, rate=0.2, training=self.training)
291 | x = flatten(x)
292 |
293 | x = Fully_connected(x, layer_name='final_fully_connected')
294 | return x
295 |
--------------------------------------------------------------------------------
/SE_Inception_resnet_v2.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | from tflearn.layers.conv import global_avg_pool
3 | from tensorflow.contrib.layers import batch_norm, flatten
4 | from tensorflow.contrib.framework import arg_scope
5 | from cifar10 import *
6 | import numpy as np
7 |
8 | weight_decay = 0.0005
9 | momentum = 0.9
10 |
11 | init_learning_rate = 0.1
12 |
13 | reduction_ratio = 4
14 |
15 | batch_size = 128
16 | iteration = 391
17 | # 128 * 391 ~ 50,000
18 |
19 | test_iteration = 10
20 |
21 | total_epochs = 100
22 |
23 | def conv_layer(input, filter, kernel, stride=1, padding='SAME', layer_name="conv", activation=True):
24 | with tf.name_scope(layer_name):
25 | network = tf.layers.conv2d(inputs=input, use_bias=True, filters=filter, kernel_size=kernel, strides=stride, padding=padding)
26 | if activation :
27 | network = Relu(network)
28 | return network
29 |
30 | def Fully_connected(x, units=class_num, layer_name='fully_connected') :
31 | with tf.name_scope(layer_name) :
32 | return tf.layers.dense(inputs=x, use_bias=True, units=units)
33 |
34 | def Relu(x):
35 | return tf.nn.relu(x)
36 |
37 | def Sigmoid(x):
38 | return tf.nn.sigmoid(x)
39 |
40 | def Global_Average_Pooling(x):
41 | return global_avg_pool(x, name='Global_avg_pooling')
42 |
43 | def Max_pooling(x, pool_size=[3,3], stride=2, padding='VALID') :
44 | return tf.layers.max_pooling2d(inputs=x, pool_size=pool_size, strides=stride, padding=padding)
45 |
46 | def Batch_Normalization(x, training, scope):
47 | with arg_scope([batch_norm],
48 | scope=scope,
49 | updates_collections=None,
50 | decay=0.9,
51 | center=True,
52 | scale=True,
53 | zero_debias_moving_mean=True) :
54 | return tf.cond(training,
55 | lambda : batch_norm(inputs=x, is_training=training, reuse=None),
56 | lambda : batch_norm(inputs=x, is_training=training, reuse=True))
57 |
58 | def Concatenation(layers) :
59 | return tf.concat(layers, axis=3)
60 |
61 | def Dropout(x, rate, training) :
62 | return tf.layers.dropout(inputs=x, rate=rate, training=training)
63 |
64 | def Evaluate(sess):
65 | test_acc = 0.0
66 | test_loss = 0.0
67 | test_pre_index = 0
68 | add = 1000
69 |
70 | for it in range(test_iteration):
71 | test_batch_x = test_x[test_pre_index: test_pre_index + add]
72 | test_batch_y = test_y[test_pre_index: test_pre_index + add]
73 | test_pre_index = test_pre_index + add
74 |
75 | test_feed_dict = {
76 | x: test_batch_x,
77 | label: test_batch_y,
78 | learning_rate: epoch_learning_rate,
79 | training_flag: False
80 | }
81 |
82 | loss_, acc_ = sess.run([cost, accuracy], feed_dict=test_feed_dict)
83 |
84 | test_loss += loss_
85 | test_acc += acc_
86 |
87 | test_loss /= test_iteration # average loss
88 | test_acc /= test_iteration # average accuracy
89 |
90 | summary = tf.Summary(value=[tf.Summary.Value(tag='test_loss', simple_value=test_loss),
91 | tf.Summary.Value(tag='test_accuracy', simple_value=test_acc)])
92 |
93 | return test_acc, test_loss, summary
94 |
95 | class SE_Inception_resnet_v2():
96 | def __init__(self, x, training):
97 | self.training = training
98 | self.model = self.Build_SEnet(x)
99 |
100 | def Stem(self, x, scope):
101 | with tf.name_scope(scope) :
102 | x = conv_layer(x, filter=32, kernel=[3,3], stride=2, padding='VALID', layer_name=scope+'_conv1')
103 | x = conv_layer(x, filter=32, kernel=[3,3], padding='VALID', layer_name=scope+'_conv2')
104 | block_1 = conv_layer(x, filter=64, kernel=[3,3], layer_name=scope+'_conv3')
105 |
106 | split_max_x = Max_pooling(block_1)
107 | split_conv_x = conv_layer(block_1, filter=96, kernel=[3,3], stride=2, padding='VALID', layer_name=scope+'_split_conv1')
108 | x = Concatenation([split_max_x,split_conv_x])
109 |
110 | split_conv_x1 = conv_layer(x, filter=64, kernel=[1,1], layer_name=scope+'_split_conv2')
111 | split_conv_x1 = conv_layer(split_conv_x1, filter=96, kernel=[3,3], padding='VALID', layer_name=scope+'_split_conv3')
112 |
113 | split_conv_x2 = conv_layer(x, filter=64, kernel=[1,1], layer_name=scope+'_split_conv4')
114 | split_conv_x2 = conv_layer(split_conv_x2, filter=64, kernel=[7,1], layer_name=scope+'_split_conv5')
115 | split_conv_x2 = conv_layer(split_conv_x2, filter=64, kernel=[1,7], layer_name=scope+'_split_conv6')
116 | split_conv_x2 = conv_layer(split_conv_x2, filter=96, kernel=[3,3], padding='VALID', layer_name=scope+'_split_conv7')
117 |
118 | x = Concatenation([split_conv_x1,split_conv_x2])
119 |
120 | split_conv_x = conv_layer(x, filter=192, kernel=[3,3], stride=2, padding='VALID', layer_name=scope+'_split_conv8')
121 | split_max_x = Max_pooling(x)
122 |
123 | x = Concatenation([split_conv_x, split_max_x])
124 |
125 | x = Batch_Normalization(x, training=self.training, scope=scope+'_batch1')
126 | x = Relu(x)
127 |
128 | return x
129 |
130 | def Inception_resnet_A(self, x, scope):
131 | with tf.name_scope(scope) :
132 | init = x
133 |
134 | split_conv_x1 = conv_layer(x, filter=32, kernel=[1,1], layer_name=scope+'_split_conv1')
135 |
136 | split_conv_x2 = conv_layer(x, filter=32, kernel=[1,1], layer_name=scope+'_split_conv2')
137 | split_conv_x2 = conv_layer(split_conv_x2, filter=32, kernel=[3,3], layer_name=scope+'_split_conv3')
138 |
139 | split_conv_x3 = conv_layer(x, filter=32, kernel=[1,1], layer_name=scope+'_split_conv4')
140 | split_conv_x3 = conv_layer(split_conv_x3, filter=48, kernel=[3,3], layer_name=scope+'_split_conv5')
141 | split_conv_x3 = conv_layer(split_conv_x3, filter=64, kernel=[3,3], layer_name=scope+'_split_conv6')
142 |
143 | x = Concatenation([split_conv_x1,split_conv_x2,split_conv_x3])
144 | x = conv_layer(x, filter=384, kernel=[1,1], layer_name=scope+'_final_conv1', activation=False)
145 |
146 | x = x*0.1
147 | x = init + x
148 |
149 | x = Batch_Normalization(x, training=self.training, scope=scope+'_batch1')
150 | x = Relu(x)
151 |
152 | return x
153 |
154 | def Inception_resnet_B(self, x, scope):
155 | with tf.name_scope(scope) :
156 | init = x
157 |
158 | split_conv_x1 = conv_layer(x, filter=192, kernel=[1,1], layer_name=scope+'_split_conv1')
159 |
160 | split_conv_x2 = conv_layer(x, filter=128, kernel=[1,1], layer_name=scope+'_split_conv2')
161 | split_conv_x2 = conv_layer(split_conv_x2, filter=160, kernel=[1,7], layer_name=scope+'_split_conv3')
162 | split_conv_x2 = conv_layer(split_conv_x2, filter=192, kernel=[7,1], layer_name=scope+'_split_conv4')
163 |
164 | x = Concatenation([split_conv_x1, split_conv_x2])
165 | x = conv_layer(x, filter=1152, kernel=[1,1], layer_name=scope+'_final_conv1', activation=False)
166 | # 1154
167 | x = x * 0.1
168 | x = init + x
169 |
170 | x = Batch_Normalization(x, training=self.training, scope=scope+'_batch1')
171 | x = Relu(x)
172 |
173 | return x
174 |
175 | def Inception_resnet_C(self, x, scope):
176 | with tf.name_scope(scope) :
177 | init = x
178 |
179 | split_conv_x1 = conv_layer(x, filter=192, kernel=[1,1], layer_name=scope+'_split_conv1')
180 |
181 | split_conv_x2 = conv_layer(x, filter=192, kernel=[1, 1], layer_name=scope + '_split_conv2')
182 | split_conv_x2 = conv_layer(split_conv_x2, filter=224, kernel=[1, 3], layer_name=scope + '_split_conv3')
183 | split_conv_x2 = conv_layer(split_conv_x2, filter=256, kernel=[3, 1], layer_name=scope + '_split_conv4')
184 |
185 | x = Concatenation([split_conv_x1,split_conv_x2])
186 | x = conv_layer(x, filter=2144, kernel=[1,1], layer_name=scope+'_final_conv2', activation=False)
187 | # 2048
188 | x = x * 0.1
189 | x = init + x
190 |
191 | x = Batch_Normalization(x, training=self.training, scope=scope+'_batch1')
192 | x = Relu(x)
193 |
194 | return x
195 |
196 | def Reduction_A(self, x, scope):
197 | with tf.name_scope(scope) :
198 | k = 256
199 | l = 256
200 | m = 384
201 | n = 384
202 |
203 | split_max_x = Max_pooling(x)
204 |
205 | split_conv_x1 = conv_layer(x, filter=n, kernel=[3,3], stride=2, padding='VALID', layer_name=scope+'_split_conv1')
206 |
207 | split_conv_x2 = conv_layer(x, filter=k, kernel=[1,1], layer_name=scope+'_split_conv2')
208 | split_conv_x2 = conv_layer(split_conv_x2, filter=l, kernel=[3,3], layer_name=scope+'_split_conv3')
209 | split_conv_x2 = conv_layer(split_conv_x2, filter=m, kernel=[3,3], stride=2, padding='VALID', layer_name=scope+'_split_conv4')
210 |
211 | x = Concatenation([split_max_x, split_conv_x1, split_conv_x2])
212 |
213 | x = Batch_Normalization(x, training=self.training, scope=scope+'_batch1')
214 | x = Relu(x)
215 |
216 | return x
217 |
218 | def Reduction_B(self, x, scope):
219 | with tf.name_scope(scope) :
220 | split_max_x = Max_pooling(x)
221 |
222 | split_conv_x1 = conv_layer(x, filter=256, kernel=[1,1], layer_name=scope+'_split_conv1')
223 | split_conv_x1 = conv_layer(split_conv_x1, filter=384, kernel=[3,3], stride=2, padding='VALID', layer_name=scope+'_split_conv2')
224 |
225 | split_conv_x2 = conv_layer(x, filter=256, kernel=[1,1], layer_name=scope+'_split_conv3')
226 | split_conv_x2 = conv_layer(split_conv_x2, filter=288, kernel=[3,3], stride=2, padding='VALID', layer_name=scope+'_split_conv4')
227 |
228 | split_conv_x3 = conv_layer(x, filter=256, kernel=[1,1], layer_name=scope+'_split_conv5')
229 | split_conv_x3 = conv_layer(split_conv_x3, filter=288, kernel=[3,3], layer_name=scope+'_split_conv6')
230 | split_conv_x3 = conv_layer(split_conv_x3, filter=320, kernel=[3,3], stride=2, padding='VALID', layer_name=scope+'_split_conv7')
231 |
232 | x = Concatenation([split_max_x, split_conv_x1, split_conv_x2, split_conv_x3])
233 |
234 | x = Batch_Normalization(x, training=self.training, scope=scope+'_batch1')
235 | x = Relu(x)
236 |
237 | return x
238 |
239 | def Squeeze_excitation_layer(self, input_x, out_dim, ratio, layer_name):
240 | with tf.name_scope(layer_name) :
241 |
242 |
243 | squeeze = Global_Average_Pooling(input_x)
244 |
245 | excitation = Fully_connected(squeeze, units=out_dim / ratio, layer_name=layer_name+'_fully_connected1')
246 | excitation = Relu(excitation)
247 | excitation = Fully_connected(excitation, units=out_dim, layer_name=layer_name+'_fully_connected2')
248 | excitation = Sigmoid(excitation)
249 |
250 | excitation = tf.reshape(excitation, [-1,1,1,out_dim])
251 | scale = input_x * excitation
252 |
253 | return scale
254 |
255 | def Build_SEnet(self, input_x):
256 | input_x = tf.pad(input_x, [[0, 0], [32, 32], [32, 32], [0, 0]])
257 | # size 32 -> 96
258 | print(np.shape(input_x))
259 | # only cifar10 architecture
260 |
261 | x = self.Stem(input_x, scope='stem')
262 |
263 | for i in range(5) :
264 | x = self.Inception_resnet_A(x, scope='Inception_A'+str(i))
265 | channel = int(np.shape(x)[-1])
266 | x = self.Squeeze_excitation_layer(x, out_dim=channel, ratio=reduction_ratio, layer_name='SE_A'+str(i))
267 |
268 | x = self.Reduction_A(x, scope='Reduction_A')
269 |
270 | for i in range(10) :
271 | x = self.Inception_resnet_B(x, scope='Inception_B'+str(i))
272 | channel = int(np.shape(x)[-1])
273 | x = self.Squeeze_excitation_layer(x, out_dim=channel, ratio=reduction_ratio, layer_name='SE_B'+str(i))
274 |
275 | x = self.Reduction_B(x, scope='Reduction_B')
276 |
277 | for i in range(5) :
278 | x = self.Inception_resnet_C(x, scope='Inception_C'+str(i))
279 | channel = int(np.shape(x)[-1])
280 | x = self.Squeeze_excitation_layer(x, out_dim=channel, ratio=reduction_ratio, layer_name='SE_C'+str(i))
281 |
282 | x = Global_Average_Pooling(x)
283 | x = Dropout(x, rate=0.2, training=self.training)
284 | x = flatten(x)
285 |
286 | x = Fully_connected(x, layer_name='final_fully_connected')
287 | return x
288 |
289 |
290 | train_x, train_y, test_x, test_y = prepare_data()
291 | train_x, test_x = color_preprocessing(train_x, test_x)
292 |
293 |
294 | # image_size = 32, img_channels = 3, class_num = 10 in cifar10
295 | x = tf.placeholder(tf.float32, shape=[None, image_size, image_size, img_channels])
296 | label = tf.placeholder(tf.float32, shape=[None, class_num])
297 |
298 | training_flag = tf.placeholder(tf.bool)
299 |
300 |
301 | learning_rate = tf.placeholder(tf.float32, name='learning_rate')
302 |
303 | logits = SE_Inception_resnet_v2(x, training=training_flag).model
304 | cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=label, logits=logits))
305 |
306 | l2_loss = tf.add_n([tf.nn.l2_loss(var) for var in tf.trainable_variables()])
307 | optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum=momentum, use_nesterov=True)
308 | train = optimizer.minimize(cost + l2_loss * weight_decay)
309 |
310 | correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(label, 1))
311 | accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
312 |
313 | saver = tf.train.Saver(tf.global_variables())
314 |
315 | with tf.Session() as sess:
316 | ckpt = tf.train.get_checkpoint_state('./model')
317 | if ckpt and tf.train.checkpoint_exists(ckpt.model_checkpoint_path):
318 | saver.restore(sess, ckpt.model_checkpoint_path)
319 | else:
320 | sess.run(tf.global_variables_initializer())
321 |
322 | summary_writer = tf.summary.FileWriter('./logs', sess.graph)
323 |
324 | epoch_learning_rate = init_learning_rate
325 | for epoch in range(1, total_epochs + 1):
326 | if epoch % 30 == 0 :
327 | epoch_learning_rate = epoch_learning_rate / 10
328 |
329 | pre_index = 0
330 | train_acc = 0.0
331 | train_loss = 0.0
332 |
333 | for step in range(1, iteration + 1):
334 | if pre_index + batch_size < 50000:
335 | batch_x = train_x[pre_index: pre_index + batch_size]
336 | batch_y = train_y[pre_index: pre_index + batch_size]
337 | else:
338 | batch_x = train_x[pre_index:]
339 | batch_y = train_y[pre_index:]
340 |
341 | batch_x = data_augmentation(batch_x)
342 |
343 | train_feed_dict = {
344 | x: batch_x,
345 | label: batch_y,
346 | learning_rate: epoch_learning_rate,
347 | training_flag: True
348 | }
349 |
350 | _, batch_loss = sess.run([train, cost], feed_dict=train_feed_dict)
351 | batch_acc = accuracy.eval(feed_dict=train_feed_dict)
352 |
353 | train_loss += batch_loss
354 | train_acc += batch_acc
355 | pre_index += batch_size
356 |
357 |
358 | train_loss /= iteration # average loss
359 | train_acc /= iteration # average accuracy
360 |
361 | train_summary = tf.Summary(value=[tf.Summary.Value(tag='train_loss', simple_value=train_loss),
362 | tf.Summary.Value(tag='train_accuracy', simple_value=train_acc)])
363 |
364 | test_acc, test_loss, test_summary = Evaluate(sess)
365 |
366 | summary_writer.add_summary(summary=train_summary, global_step=epoch)
367 | summary_writer.add_summary(summary=test_summary, global_step=epoch)
368 | summary_writer.flush()
369 |
370 | line = "epoch: %d/%d, train_loss: %.4f, train_acc: %.4f, test_loss: %.4f, test_acc: %.4f \n" % (
371 | epoch, total_epochs, train_loss, train_acc, test_loss, test_acc)
372 | print(line)
373 |
374 | with open('logs.txt', 'a') as f:
375 | f.write(line)
376 |
377 | saver.save(sess=sess, save_path='./model/Inception_resnet_v2.ckpt')
--------------------------------------------------------------------------------
/resnet_model.py:
--------------------------------------------------------------------------------
1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2 | #
3 | # Licensed under the Apache License, Version 2.0 (the "License");
4 | # you may not use this file except in compliance with the License.
5 | # You may obtain a copy of the License at
6 | #
7 | # http://www.apache.org/licenses/LICENSE-2.0
8 | #
9 | # Unless required by applicable law or agreed to in writing, software
10 | # distributed under the License is distributed on an "AS IS" BASIS,
11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | # See the License for the specific language governing permissions and
13 | # limitations under the License.
14 | # ==============================================================================
15 | """Contains definitions for the preactivation form of Residual Networks.
16 |
17 | Residual networks (ResNets) were originally proposed in:
18 | [1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun
19 | Deep Residual Learning for Image Recognition. arXiv:1512.03385
20 |
21 | The full preactivation 'v2' ResNet variant implemented in this module was
22 | introduced by:
23 | [2] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun
24 | Identity Mappings in Deep Residual Networks. arXiv: 1603.05027
25 |
26 | The key difference of the full preactivation 'v2' variant compared to the
27 | 'v1' variant in [1] is the use of batch normalization before every weight layer
28 | rather than after.
29 | """
30 |
31 | from __future__ import absolute_import
32 | from __future__ import division
33 | from __future__ import print_function
34 |
35 | import tensorflow as tf
36 | import numpy as np
37 | from IPython import embed
38 |
39 | _BATCH_NORM_DECAY = 0.997
40 | _BATCH_NORM_EPSILON = 1e-5
41 |
42 |
43 | def batch_norm_relu(inputs, is_training, data_format):
44 | """Performs a batch normalization followed by a ReLU."""
45 | # We set fused=True for a significant performance boost. See
46 | # https://www.tensorflow.org/performance/performance_guide#common_fused_ops
47 | inputs = tf.layers.batch_normalization(
48 | inputs=inputs, axis=1 if data_format == 'channels_first' else 3,
49 | momentum=_BATCH_NORM_DECAY, epsilon=_BATCH_NORM_EPSILON, center=True,
50 | scale=True, training=is_training, fused=True)
51 | inputs = tf.nn.relu(inputs)
52 | return inputs
53 |
54 |
55 | def fixed_padding(inputs, kernel_size, data_format):
56 | """Pads the input along the spatial dimensions independently of input size.
57 |
58 | Args:
59 | inputs: A tensor of size [batch, channels, height_in, width_in] or
60 | [batch, height_in, width_in, channels] depending on data_format.
61 | kernel_size: The kernel to be used in the conv2d or max_pool2d operation.
62 | Should be a positive integer.
63 | data_format: The input format ('channels_last' or 'channels_first').
64 |
65 | Returns:
66 | A tensor with the same format as the input with the data either intact
67 | (if kernel_size == 1) or padded (if kernel_size > 1).
68 | """
69 | pad_total = kernel_size - 1
70 | pad_beg = pad_total // 2
71 | pad_end = pad_total - pad_beg
72 |
73 | if data_format == 'channels_first':
74 | padded_inputs = tf.pad(inputs, [[0, 0], [0, 0],
75 | [pad_beg, pad_end], [pad_beg, pad_end]])
76 | else:
77 | padded_inputs = tf.pad(inputs, [[0, 0], [pad_beg, pad_end],
78 | [pad_beg, pad_end], [0, 0]])
79 | return padded_inputs
80 |
81 |
82 | def conv2d_fixed_padding(inputs, filters, kernel_size, strides, data_format):
83 | """Strided 2-D convolution with explicit padding."""
84 | # The padding is consistent and is based only on `kernel_size`, not on the
85 | # dimensions of `inputs` (as opposed to using `tf.layers.conv2d` alone).
86 | if strides > 1:
87 | inputs = fixed_padding(inputs, kernel_size, data_format)
88 |
89 | return tf.layers.conv2d(
90 | inputs=inputs, filters=filters, kernel_size=kernel_size, strides=strides,
91 | padding=('SAME' if strides == 1 else 'VALID'), use_bias=False,
92 | kernel_initializer=tf.variance_scaling_initializer(),
93 | data_format=data_format)
94 |
95 |
96 | def GlobalAvgPooling(x, data_format):
97 | """
98 | Global average pooling as in the paper `Network In Network
99 | `_.
100 | Args:
101 | x (tf.Tensor): a NHWC tensor.
102 | Returns:
103 | tf.Tensor: a NC tensor named ``output``.
104 | """
105 | assert x.shape.ndims == 4
106 | assert data_format in ['channels_last', 'channels_first']
107 | axis = [1, 2] if data_format == 'channels_last' else [2, 3]
108 | return tf.reduce_mean(x, axis, name='GlobalAvgPooling')
109 |
110 |
111 | def flatten(x):
112 | """
113 | Flatten the tensor.
114 | """
115 | return tf.reshape(x, [-1])
116 |
117 |
118 | def batch_flatten(x):
119 | """
120 | Flatten the tensor except the first dimension.
121 | """
122 | shape = x.get_shape().as_list()[1:]
123 | if None not in shape:
124 | return tf.reshape(x, [-1, int(np.prod(shape))])
125 | return tf.reshape(x, tf.stack([tf.shape(x)[0], -1]))
126 |
127 |
128 | def FullyConnected(x, out_dim,
129 | W_init=None, b_init=None,
130 | nl=tf.identity, use_bias=True, name='fc'):
131 | """
132 | Fully-Connected layer, takes a N>1D tensor and returns a 2D tensor.
133 | It is an equivalent of `tf.layers.dense` except for naming conventions.
134 | Args:
135 | x (tf.Tensor): a tensor to be flattened except for the first dimension.
136 | out_dim (int): output dimension
137 | W_init: initializer for W. Defaults to `variance_scaling_initializer`.
138 | b_init: initializer for b. Defaults to zero.
139 | nl: a nonlinearity function
140 | use_bias (bool): whether to use bias.
141 | Returns:
142 | tf.Tensor: a NC tensor named ``output`` with attribute `variables`.
143 | Variable Names:
144 | * ``W``: weights of shape [in_dim, out_dim]
145 | * ``b``: bias
146 | """
147 | x = batch_flatten(x)
148 |
149 | if W_init is None:
150 | W_init = tf.contrib.layers.variance_scaling_initializer()
151 | if b_init is None:
152 | b_init = tf.constant_initializer()
153 |
154 | x = tf.layers.dense(
155 | inputs=x, units=out_dim, activation=lambda x: nl(x, name='output'), use_bias=use_bias,
156 | kernel_initializer=W_init, bias_initializer=b_init,
157 | trainable=True)
158 |
159 | x = tf.identity(x, name)
160 |
161 | return x
162 |
163 |
164 | def building_block(inputs, filters, is_training, projection_shortcut, strides,
165 | data_format):
166 | """Standard building block for residual networks with BN before convolutions.
167 |
168 | Args:
169 | inputs: A tensor of size [batch, channels, height_in, width_in] or
170 | [batch, height_in, width_in, channels] depending on data_format.
171 | filters: The number of filters for the convolutions.
172 | is_training: A Boolean for whether the model is in training or inference
173 | mode. Needed for batch normalization.
174 | projection_shortcut: The function to use for projection shortcuts (typically
175 | a 1x1 convolution when downsampling the input).
176 | strides: The block's stride. If greater than 1, this block will ultimately
177 | downsample the input.
178 | data_format: The input format ('channels_last' or 'channels_first').
179 |
180 | Returns:
181 | The output tensor of the block.
182 | """
183 | shortcut = inputs
184 | inputs = batch_norm_relu(inputs, is_training, data_format)
185 |
186 | # The projection shortcut should come after the first batch norm and ReLU
187 | # since it performs a 1x1 convolution.
188 | if projection_shortcut is not None:
189 | shortcut = projection_shortcut(inputs)
190 |
191 | inputs = conv2d_fixed_padding(
192 | inputs=inputs, filters=filters, kernel_size=3, strides=strides,
193 | data_format=data_format)
194 |
195 | inputs = batch_norm_relu(inputs, is_training, data_format)
196 | inputs = conv2d_fixed_padding(
197 | inputs=inputs, filters=filters, kernel_size=3, strides=1,
198 | data_format=data_format)
199 |
200 | return inputs + shortcut
201 |
202 |
203 | def se_building_block(inputs, filters, is_training, projection_shortcut, strides,
204 | data_format):
205 | """Standard building block for residual networks with BN before convolutions.
206 |
207 | Args:
208 | inputs: A tensor of size [batch, channels, height_in, width_in] or
209 | [batch, height_in, width_in, channels] depending on data_format.
210 | filters: The number of filters for the convolutions.
211 | is_training: A Boolean for whether the model is in training or inference
212 | mode. Needed for batch normalization.
213 | projection_shortcut: The function to use for projection shortcuts (typically
214 | a 1x1 convolution when downsampling the input).
215 | strides: The block's stride. If greater than 1, this block will ultimately
216 | downsample the input.
217 | data_format: The input format ('channels_last' or 'channels_first').
218 |
219 | Returns:
220 | The output tensor of the block.
221 | """
222 | shortcut = inputs
223 | inputs = batch_norm_relu(inputs, is_training, data_format)
224 |
225 | # The projection shortcut should come after the first batch norm and ReLU
226 | # since it performs a 1x1 convolution.
227 | if projection_shortcut is not None:
228 | shortcut = projection_shortcut(inputs)
229 |
230 | inputs = conv2d_fixed_padding(
231 | inputs=inputs, filters=filters, kernel_size=3, strides=strides,
232 | data_format=data_format)
233 |
234 | inputs = batch_norm_relu(inputs, is_training, data_format)
235 | inputs = conv2d_fixed_padding(
236 | inputs=inputs, filters=filters, kernel_size=3, strides=1,
237 | data_format=data_format)
238 |
239 | squeeze = GlobalAvgPooling(inputs, data_format)
240 | squeeze = FullyConnected(squeeze, filters // 4, nl=tf.nn.relu, name='fc1')
241 | squeeze = FullyConnected(squeeze, filters, nl=tf.nn.sigmoid, name='fc2')
242 |
243 | if data_format == 'channels_first':
244 | inputs = inputs * tf.reshape(squeeze, [-1, filters, 1, 1])
245 | else:
246 | inputs = inputs * tf.reshape(squeeze, [-1, 1, 1, filters])
247 | return inputs + shortcut
248 |
249 |
250 | def bottleneck_block(inputs, filters, is_training, projection_shortcut,
251 | strides, data_format):
252 | """Bottleneck block variant for residual networks with BN before convolutions.
253 |
254 | Args:
255 | inputs: A tensor of size [batch, channels, height_in, width_in] or
256 | [batch, height_in, width_in, channels] depending on data_format.
257 | filters: The number of filters for the first two convolutions. Note that the
258 | third and final convolution will use 4 times as many filters.
259 | is_training: A Boolean for whether the model is in training or inference
260 | mode. Needed for batch normalization.
261 | projection_shortcut: The function to use for projection shortcuts (typically
262 | a 1x1 convolution when downsampling the input).
263 | strides: The block's stride. If greater than 1, this block will ultimately
264 | downsample the input.
265 | data_format: The input format ('channels_last' or 'channels_first').
266 |
267 | Returns:
268 | The output tensor of the block.
269 | """
270 | shortcut = inputs
271 | inputs = batch_norm_relu(inputs, is_training, data_format)
272 |
273 | # The projection shortcut should come after the first batch norm and ReLU
274 | # since it performs a 1x1 convolution.
275 | if projection_shortcut is not None:
276 | shortcut = projection_shortcut(inputs)
277 |
278 | inputs = conv2d_fixed_padding(
279 | inputs=inputs, filters=filters, kernel_size=1, strides=1,
280 | data_format=data_format)
281 |
282 | inputs = batch_norm_relu(inputs, is_training, data_format)
283 | inputs = conv2d_fixed_padding(
284 | inputs=inputs, filters=filters, kernel_size=3, strides=strides,
285 | data_format=data_format)
286 |
287 | inputs = batch_norm_relu(inputs, is_training, data_format)
288 | inputs = conv2d_fixed_padding(
289 | inputs=inputs, filters=4 * filters, kernel_size=1, strides=1,
290 | data_format=data_format)
291 |
292 | return inputs + shortcut
293 |
294 |
295 | def se_bottleneck_block(inputs, filters, is_training, projection_shortcut,
296 | strides, data_format):
297 | """Bottleneck block variant for residual networks with BN before convolutions.
298 |
299 | Args:
300 | inputs: A tensor of size [batch, channels, height_in, width_in] or
301 | [batch, height_in, width_in, channels] depending on data_format.
302 | filters: The number of filters for the first two convolutions. Note that the
303 | third and final convolution will use 4 times as many filters.
304 | is_training: A Boolean for whether the model is in training or inference
305 | mode. Needed for batch normalization.
306 | projection_shortcut: The function to use for projection shortcuts (typically
307 | a 1x1 convolution when downsampling the input).
308 | strides: The block's stride. If greater than 1, this block will ultimately
309 | downsample the input.
310 | data_format: The input format ('channels_last' or 'channels_first').
311 |
312 | Returns:
313 | The output tensor of the block.
314 | """
315 | shortcut = inputs
316 | inputs = batch_norm_relu(inputs, is_training, data_format)
317 |
318 | # The projection shortcut should come after the first batch norm and ReLU
319 | # since it performs a 1x1 convolution.
320 | if projection_shortcut is not None:
321 | shortcut = projection_shortcut(inputs)
322 |
323 | inputs = conv2d_fixed_padding(
324 | inputs=inputs, filters=filters, kernel_size=1, strides=1,
325 | data_format=data_format)
326 |
327 | inputs = batch_norm_relu(inputs, is_training, data_format)
328 | inputs = conv2d_fixed_padding(
329 | inputs=inputs, filters=filters, kernel_size=3, strides=strides,
330 | data_format=data_format)
331 |
332 | inputs = batch_norm_relu(inputs, is_training, data_format)
333 | inputs = conv2d_fixed_padding(
334 | inputs=inputs, filters=4 * filters, kernel_size=1, strides=1,
335 | data_format=data_format)
336 |
337 | squeeze = GlobalAvgPooling(inputs, data_format)
338 | squeeze = FullyConnected(squeeze, filters // 4, nl=tf.nn.relu, name='fc1')
339 | squeeze = FullyConnected(squeeze, filters * 4, nl=tf.nn.sigmoid, name='fc2')
340 | if data_format == 'channels_first':
341 | inputs = inputs * tf.reshape(squeeze, [-1, filters * 4, 1, 1])
342 | else:
343 | inputs = inputs * tf.reshape(squeeze, [-1, 1, 1, filters * 4])
344 |
345 | return inputs + shortcut
346 |
347 |
348 | def block_layer(inputs, filters, block_fn, blocks, strides, is_training, name,
349 | data_format):
350 | """Creates one layer of blocks for the ResNet model.
351 |
352 | Args:
353 | inputs: A tensor of size [batch, channels, height_in, width_in] or
354 | [batch, height_in, width_in, channels] depending on data_format.
355 | filters: The number of filters for the first convolution of the layer.
356 | block_fn: The block to use within the model, either `building_block` or
357 | `bottleneck_block`.
358 | blocks: The number of blocks contained in the layer.
359 | strides: The stride to use for the first convolution of the layer. If
360 | greater than 1, this layer will ultimately downsample the input.
361 | is_training: Either True or False, whether we are currently training the
362 | model. Needed for batch norm.
363 | name: A string name for the tensor output of the block layer.
364 | data_format: The input format ('channels_last' or 'channels_first').
365 |
366 | Returns:
367 | The output tensor of the block layer.
368 | """
369 | # Bottleneck blocks end with 4x the number of filters as they start with
370 | filters_out = 4 * filters if block_fn in [bottleneck_block, se_bottleneck_block] else filters
371 |
372 | def projection_shortcut(inputs):
373 | return conv2d_fixed_padding(
374 | inputs=inputs, filters=filters_out, kernel_size=1, strides=strides,
375 | data_format=data_format)
376 |
377 | # Only the first block per block_layer uses projection_shortcut and strides
378 | inputs = block_fn(inputs, filters, is_training, projection_shortcut, strides,
379 | data_format)
380 |
381 | for _ in range(1, blocks):
382 | inputs = block_fn(inputs, filters, is_training, None, 1, data_format)
383 |
384 | return tf.identity(inputs, name)
385 |
386 |
387 | def cifar10_resnet_v2_generator(resnet_size, num_classes, data_format=None):
388 | """Generator for CIFAR-10 ResNet v2 models.
389 |
390 | Args:
391 | resnet_size: A single integer for the size of the ResNet model.
392 | num_classes: The number of possible classes for image classification.
393 | data_format: The input format ('channels_last', 'channels_first', or None).
394 | If set to None, the format is dependent on whether a GPU is available.
395 |
396 | Returns:
397 | The model function that takes in `inputs` and `is_training` and
398 | returns the output tensor of the ResNet model.
399 |
400 | Raises:
401 | ValueError: If `resnet_size` is invalid.
402 | """
403 | if resnet_size % 6 != 2:
404 | raise ValueError('resnet_size must be 6n + 2:', resnet_size)
405 |
406 | num_blocks = (resnet_size - 2) // 6
407 |
408 | if data_format is None:
409 | data_format = (
410 | 'channels_first' if tf.test.is_built_with_cuda() else 'channels_last')
411 |
412 | def model(inputs, is_training):
413 | """Constructs the ResNet model given the inputs."""
414 | if data_format == 'channels_first':
415 | # Convert from channels_last (NHWC) to channels_first (NCHW). This
416 | # provides a large performance boost on GPU. See
417 | # https://www.tensorflow.org/performance/performance_guide#data_formats
418 | inputs = tf.transpose(inputs, [0, 3, 1, 2])
419 |
420 | inputs = conv2d_fixed_padding(
421 | inputs=inputs, filters=16, kernel_size=3, strides=1,
422 | data_format=data_format)
423 | inputs = tf.identity(inputs, 'initial_conv')
424 |
425 | inputs = block_layer(
426 | inputs=inputs, filters=16, block_fn=building_block, blocks=num_blocks,
427 | strides=1, is_training=is_training, name='block_layer1',
428 | data_format=data_format)
429 | inputs = block_layer(
430 | inputs=inputs, filters=32, block_fn=building_block, blocks=num_blocks,
431 | strides=2, is_training=is_training, name='block_layer2',
432 | data_format=data_format)
433 | inputs = block_layer(
434 | inputs=inputs, filters=64, block_fn=building_block, blocks=num_blocks,
435 | strides=2, is_training=is_training, name='block_layer3',
436 | data_format=data_format)
437 |
438 | inputs = batch_norm_relu(inputs, is_training, data_format)
439 | inputs = tf.layers.average_pooling2d(
440 | inputs=inputs, pool_size=8, strides=1, padding='VALID',
441 | data_format=data_format)
442 | inputs = tf.identity(inputs, 'final_avg_pool')
443 | inputs = tf.reshape(inputs, [-1, 64])
444 | inputs = tf.layers.dense(inputs=inputs, units=num_classes)
445 | inputs = tf.identity(inputs, 'final_dense')
446 | return inputs
447 |
448 | return model
449 |
450 |
451 | def imagenet_resnet_v2_generator(block_fn, layers, num_classes,
452 | data_format=None):
453 | """Generator for ImageNet ResNet v2 models.
454 |
455 | Args:
456 | block_fn: The block to use within the model, either `building_block` or
457 | `bottleneck_block`.
458 | layers: A length-4 array denoting the number of blocks to include in each
459 | layer. Each layer consists of blocks that take inputs of the same size.
460 | num_classes: The number of possible classes for image classification.
461 | data_format: The input format ('channels_last', 'channels_first', or None).
462 | If set to None, the format is dependent on whether a GPU is available.
463 |
464 | Returns:
465 | The model function that takes in `inputs` and `is_training` and
466 | returns the output tensor of the ResNet model.
467 | """
468 | if data_format is None:
469 | data_format = (
470 | 'channels_first' if tf.test.is_built_with_cuda() else 'channels_last')
471 |
472 | def model(inputs, is_training):
473 | """Constructs the ResNet model given the inputs."""
474 | if data_format == 'channels_first':
475 | # Convert from channels_last (NHWC) to channels_first (NCHW). This
476 | # provides a large performance boost on GPU.
477 | inputs = tf.transpose(inputs, [0, 3, 1, 2])
478 |
479 | inputs = conv2d_fixed_padding(
480 | inputs=inputs, filters=64, kernel_size=7, strides=2,
481 | data_format=data_format)
482 | inputs = tf.identity(inputs, 'initial_conv')
483 | inputs = tf.layers.max_pooling2d(
484 | inputs=inputs, pool_size=3, strides=2, padding='SAME',
485 | data_format=data_format)
486 | inputs = tf.identity(inputs, 'initial_max_pool')
487 |
488 | inputs = block_layer(
489 | inputs=inputs, filters=64, block_fn=block_fn, blocks=layers[0],
490 | strides=1, is_training=is_training, name='block_layer1',
491 | data_format=data_format)
492 | inputs = block_layer(
493 | inputs=inputs, filters=128, block_fn=block_fn, blocks=layers[1],
494 | strides=2, is_training=is_training, name='block_layer2',
495 | data_format=data_format)
496 | inputs = block_layer(
497 | inputs=inputs, filters=256, block_fn=block_fn, blocks=layers[2],
498 | strides=2, is_training=is_training, name='block_layer3',
499 | data_format=data_format)
500 | inputs = block_layer(
501 | inputs=inputs, filters=512, block_fn=block_fn, blocks=layers[3],
502 | strides=2, is_training=is_training, name='block_layer4',
503 | data_format=data_format)
504 | inputs = batch_norm_relu(inputs, is_training, data_format)
505 | inputs = tf.layers.average_pooling2d(
506 | inputs=inputs, pool_size=7, strides=1, padding='VALID',
507 | data_format=data_format)
508 | inputs = tf.identity(inputs, 'final_avg_pool')
509 | inputs = tf.reshape(inputs,
510 | [-1, 512 if block_fn in [building_block, se_building_block] else 2048])
511 | # inputs = tf.layers.dense(inputs=inputs, units=num_classes)
512 | # inputs = tf.identity(inputs, 'final_dense')
513 | return inputs
514 |
515 | return model
516 |
517 |
518 | def imagenet_resnet_v2(resnet_size, num_classes, mode='v2', data_format=None):
519 | """Returns the ResNet model for a given size and number of output classes."""
520 | building_block_mode = {
521 | 'v2': building_block,
522 | 'se': se_building_block}[mode]
523 | bottleneck_block_mode = {
524 | 'v2': bottleneck_block,
525 | 'se': se_bottleneck_block}[mode]
526 |
527 | model_params = {
528 | 18: {'block': building_block_mode, 'layers': [2, 2, 2, 2]},
529 | 34: {'block': building_block_mode, 'layers': [3, 4, 6, 3]},
530 | 50: {'block': bottleneck_block_mode, 'layers': [3, 4, 6, 3]},
531 | 101: {'block': bottleneck_block_mode, 'layers': [3, 4, 23, 3]},
532 | 152: {'block': bottleneck_block_mode, 'layers': [3, 8, 36, 3]},
533 | 200: {'block': bottleneck_block_mode, 'layers': [3, 24, 36, 3]}
534 | }
535 |
536 | if resnet_size not in model_params:
537 | raise ValueError('Not a valid resnet_size:', resnet_size)
538 |
539 | params = model_params[resnet_size]
540 | return imagenet_resnet_v2_generator(
541 | params['block'], params['layers'], num_classes, data_format)
542 |
--------------------------------------------------------------------------------