├── .vscode
    └── settings.json
├── assests
    ├── ratio.JPG
    ├── resnet.jpg
    ├── benefit.JPG
    ├── inception.jpg
    ├── senet_block.JPG
    ├── incorporation.JPG
    └── state_of_art.JPG
├── data_dump.py
├── LICENSE
├── .gitignore
├── README.md
├── scene_eval.py
├── dataflow_input.py
├── cifar10.py
├── eval.py
├── SE_ResNeXt.py
├── pre_train.py
├── train.py
├── SE_Inception_v4.py
├── SE_Inception_resnet_v2.py
└── resnet_model.py


/.vscode/settings.json:
--------------------------------------------------------------------------------
1 | {
2 |     "python.linting.pylintEnabled": false
3 | }


--------------------------------------------------------------------------------
/assests/ratio.JPG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chenynCV/SENet-Tensorflow/HEAD/assests/ratio.JPG


--------------------------------------------------------------------------------
/assests/resnet.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chenynCV/SENet-Tensorflow/HEAD/assests/resnet.jpg


--------------------------------------------------------------------------------
/assests/benefit.JPG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chenynCV/SENet-Tensorflow/HEAD/assests/benefit.JPG


--------------------------------------------------------------------------------
/assests/inception.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chenynCV/SENet-Tensorflow/HEAD/assests/inception.jpg


--------------------------------------------------------------------------------
/assests/senet_block.JPG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chenynCV/SENet-Tensorflow/HEAD/assests/senet_block.JPG


--------------------------------------------------------------------------------
/assests/incorporation.JPG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chenynCV/SENet-Tensorflow/HEAD/assests/incorporation.JPG


--------------------------------------------------------------------------------
/assests/state_of_art.JPG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chenynCV/SENet-Tensorflow/HEAD/assests/state_of_art.JPG


--------------------------------------------------------------------------------
/data_dump.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import json
 3 | import os
 4 | from IPython import embed
 5 | 
 6 | label_path = '/data0/AIChallenger/places_devkit/categories_places365.txt'
 7 | data_path = '/data0/AIChallenger/data_256'
 8 | 
 9 | result = []
10 | with open(label_path, 'r') as f:
11 |     lines = (line.strip() for line in f)
12 |     for line in lines:
13 |         path, label_id = line.split()
14 |         path = path[1:]
15 |         for filename in os.listdir(os.path.join(data_path, path)):
16 |             image = {}
17 |             image['image_id'] = os.path.join(path, filename)
18 |             image['label_id'] = label_id
19 |             result.append(image)
20 | 
21 | with open('/data0/AIChallenger/data_256.json', 'w') as f:
22 |     json.dump(result, f)
23 |     print('write result json, num is %d' % len(result))
24 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2017 Junho Kim (1993.01.12)
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | *model*/
  2 | *log*/
  3 | 
  4 | # Byte-compiled / optimized / DLL files
  5 | __pycache__/
  6 | *.py[cod]
  7 | *$py.class
  8 | 
  9 | # C extensions
 10 | *.so
 11 | 
 12 | # Distribution / packaging
 13 | .Python
 14 | env/
 15 | build/
 16 | develop-eggs/
 17 | dist/
 18 | downloads/
 19 | eggs/
 20 | .eggs/
 21 | lib/
 22 | lib64/
 23 | parts/
 24 | sdist/
 25 | var/
 26 | wheels/
 27 | *.egg-info/
 28 | .installed.cfg
 29 | *.egg
 30 | 
 31 | # PyInstaller
 32 | #  Usually these files are written by a python script from a template
 33 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 34 | *.manifest
 35 | *.spec
 36 | 
 37 | # Installer logs
 38 | pip-log.txt
 39 | pip-delete-this-directory.txt
 40 | 
 41 | # Unit test / coverage reports
 42 | htmlcov/
 43 | .tox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | .hypothesis/
 51 | 
 52 | # Translations
 53 | *.mo
 54 | *.pot
 55 | 
 56 | # Django stuff:
 57 | *.log
 58 | local_settings.py
 59 | 
 60 | # Flask stuff:
 61 | instance/
 62 | .webassets-cache
 63 | 
 64 | # Scrapy stuff:
 65 | .scrapy
 66 | 
 67 | # Sphinx documentation
 68 | docs/_build/
 69 | 
 70 | # PyBuilder
 71 | target/
 72 | 
 73 | # Jupyter Notebook
 74 | .ipynb_checkpoints
 75 | 
 76 | # pyenv
 77 | .python-version
 78 | 
 79 | # celery beat schedule file
 80 | celerybeat-schedule
 81 | 
 82 | # SageMath parsed files
 83 | *.sage.py
 84 | 
 85 | # dotenv
 86 | .env
 87 | 
 88 | # virtualenv
 89 | .venv
 90 | venv/
 91 | ENV/
 92 | 
 93 | # Spyder project settings
 94 | .spyderproject
 95 | .spyproject
 96 | 
 97 | # Rope project settings
 98 | .ropeproject
 99 | 
100 | # mkdocs documentation
101 | /site
102 | 
103 | # mypy
104 | .mypy_cache/
105 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # SENet-Tensorflow
 2 | Simple Tensorflow implementation of [Squeeze Excitation Networks](https://arxiv.org/abs/1709.01507) using **Cifar10** 
 3 | 
 4 | I implemented the following SENet
 5 | * [ResNeXt paper](https://arxiv.org/abs/1611.05431)
 6 | * [Inception-v4, Inception-resnet-v2 paper](https://arxiv.org/abs/1602.07261)
 7 | 
 8 | If you want to see the ***original author's code***, please refer to this [link](https://github.com/hujie-frank/SENet)
 9 | 
10 | 
11 | 
12 | ## Requirements
13 | * Tensorflow 1.x
14 | * Python 3.x
15 | * tflearn (If you are easy to use ***global average pooling***, you should install ***tflearn***)
16 | 
17 | ## Issue
18 | ### Image_size
19 | * In paper, experimented with *ImageNet*
20 | * However, due to **image size** issues in ***Inception network***, so I used ***zero padding*** for the Cifar10
21 | ```python
22 | input_x = tf.pad(input_x, [[0, 0], [32, 32], [32, 32], [0, 0]]) # size 32x32 -> 96x96
23 | ```
24 | ### NOT ENOUGH GPU Memory
25 | * If not enough GPU memory, Please edit the code
26 | ```python
27 | with tf.Session() as sess : NO
28 | with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess : OK
29 | ```
30 | 
31 | ## Idea
32 | ### What is the "SE block" ?
33 | ![senet](./assests/senet_block.JPG)
34 | ```python
35 | def Squeeze_excitation_layer(self, input_x, out_dim, ratio, layer_name):
36 |     with tf.name_scope(layer_name) :
37 |         squeeze = Global_Average_Pooling(input_x)
38 | 
39 |         excitation = Fully_connected(squeeze, units=out_dim / ratio, layer_name=layer_name+'_fully_connected1')
40 |         excitation = Relu(excitation)
41 |         excitation = Fully_connected(excitation, units=out_dim, layer_name=layer_name+'_fully_connected2')
42 |         excitation = Sigmoid(excitation)
43 | 
44 |         excitation = tf.reshape(excitation, [-1,1,1,out_dim])
45 | 
46 |         scale = input_x * excitation
47 | 
48 |         return scale
49 | ```
50 | 
51 | ### How apply ? (Inception, Residual)
52 | <div align="center">
53 |    <img src="https://github.com/hujie-frank/SENet/blob/master/figures/SE-Inception-module.jpg" width="420">
54 |   <img src="https://github.com/hujie-frank/SENet/blob/master/figures/SE-ResNet-module.jpg"  width="420">
55 | </div>
56 | 
57 | ### How *"Reduction ratio"* should I set?
58 | ![reduction](./assests/ratio.JPG)
59 | * **original** refers to ***ResNet-50***
60 | 
61 | ## ImageNet Results
62 | ### Benefits against Network Depth
63 | ![depth](./assests/benefit.JPG)
64 | 
65 | ### Incorporation with Modern Architecture
66 | ![incorporation](./assests/incorporation.JPG)
67 | 
68 | ### Comparison with State-of-the-art
69 | ![compare](./assests/state_of_art.JPG)
70 | 
71 | ## Cifar10 Results
72 | Will be soon
73 | 
74 | ## Related works
75 | * [Densenet-Tensorflow](https://github.com/taki0112/Densenet-Tensorflow)
76 | * [ResNeXt-Tensorflow](https://github.com/taki0112/ResNeXt-Tensorflow)
77 | 
78 | ## Reference
79 | * [Inception_korean](https://norman3.github.io/papers/docs/google_inception.html)
80 | 
81 | ## Author
82 | Junho Kim
83 | 


--------------------------------------------------------------------------------
/scene_eval.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # coding=utf-8
  3 | # Copyright 2017 challenger.ai
  4 | #
  5 | # Licensed under the Apache License, Version 2.0 (the "License");
  6 | # you may not use this file except in compliance with the License.
  7 | # You may obtain a copy of the License at
  8 | #
  9 | #     http://www.apache.org/licenses/LICENSE-2.0
 10 | #
 11 | # Unless required by applicable law or agreed to in writing, software
 12 | # distributed under the License is distributed on an "AS IS" BASIS,
 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 | # See the License for the specific language governing permissions and
 15 | # limitations under the License.
 16 | 
 17 | """
 18 | Scene classification is a task of AI Challenger 全球AI挑战赛
 19 | 
 20 | This python script is used for calculating the accuracy of the test result,
 21 | 
 22 | based on your submited file and the reference file containing ground truth.
 23 | 
 24 | Usage:
 25 | 
 26 | python scene_eval.py --submit SUBMIT_FILEPATH --ref REF_FILEPATH
 27 | 
 28 | A test case is provided, submited file is submit.json, reference file is ref.json, test it by:
 29 | 
 30 | python scene_eval.py --submit ./submit.json --ref ./ref.json
 31 | 
 32 | The accuracy of the submited result, error message and warning message will be printed.
 33 | """
 34 | 
 35 | import json
 36 | import argparse
 37 | import time
 38 | 
 39 | 
 40 | def __load_data(submit_file, reference_file):
 41 |   # load submit result and reference result
 42 | 
 43 |     with open(submit_file, 'r') as file1:
 44 |         submit_data = json.load(file1)
 45 |     with open(reference_file, 'r') as file1:
 46 |         ref_data = json.load(file1)
 47 |     if len(submit_data) != len(ref_data):
 48 |         result['warning'].append('Inconsistent number of images between submission and reference data \n')
 49 |     submit_dict = {}
 50 |     ref_dict = {}
 51 |     for item in submit_data:
 52 |         submit_dict[item['image_id']] = item['label_id']
 53 |     for item in ref_data:
 54 |         ref_dict[item['image_id']] = int(item['label_id'])
 55 |     return submit_dict, ref_dict
 56 | 
 57 | 
 58 | def __eval_result(submit_dict, ref_dict):
 59 |     # eval accuracy
 60 | 
 61 |     right_count = 0
 62 |     for (key, value) in ref_dict.items():
 63 | 
 64 |         if key not in set(submit_dict.keys()):
 65 |             result['warning'].append('lacking image %s in your submission file \n' % key)
 66 |             print('warnning: lacking image %s in your submission file' % key)
 67 |             continue
 68 | 
 69 |         if value in submit_dict[key][:3]:
 70 |             right_count += 1
 71 | 
 72 |     result['score'] = str(float(right_count)/max(len(ref_dict), 1e-5))
 73 |     return result
 74 | 
 75 | 
 76 | if __name__ == '__main__':
 77 | 
 78 |     PARSER = argparse.ArgumentParser()
 79 | 
 80 |     PARSER.add_argument(
 81 |         '--submit',
 82 |         type=str,
 83 |         default='./submit.json',
 84 |         help="""\
 85 |         Path to submission file\
 86 |         """
 87 |     )
 88 | 
 89 |     PARSER.add_argument(
 90 |         '--ref',
 91 |         type=str,
 92 |         default='./ref.json',
 93 |         help="""\
 94 |         Path to reference file\
 95 |         """
 96 |     )
 97 | 
 98 |     FLAGS = PARSER.parse_args()
 99 | 
100 |     result = {'error': [], 'warning': [], 'score': None}
101 | 
102 |     START_TIME = time.time()
103 |     SUBMIT = {}
104 |     REF = {}
105 | 
106 |     try:
107 |         SUBMIT, REF = __load_data(FLAGS.submit, FLAGS.ref)
108 |     except Exception as error:
109 |         result['error'].append(str(error))
110 |     try:
111 |         result = __eval_result(SUBMIT, REF)
112 |     except Exception as error:
113 |         result['error'].append(str(error))
114 |     print('Evaluation time of your result: %f s' % (time.time() - START_TIME))
115 | 
116 |     print(result)
117 | 


--------------------------------------------------------------------------------
/dataflow_input.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import cv2
  3 | import json
  4 | import os
  5 | import random
  6 | from tensorpack import imgaug, dataset, ModelDesc, InputDesc
  7 | from tensorpack.dataflow import (
  8 |     AugmentImageComponent, PrefetchDataZMQ,
  9 |     BatchData, MultiThreadMapData, DataFlow)
 10 | from IPython import embed
 11 | 
 12 | class GoogleNetResize(imgaug.ImageAugmentor):
 13 |     """
 14 |     crop 8%~100% of the original image
 15 |     See `Going Deeper with Convolutions` by Google.
 16 |     """
 17 |     def __init__(self, crop_area_fraction=0.08,
 18 |                  aspect_ratio_low=0.75, aspect_ratio_high=1.333,
 19 |                  target_shape=224):
 20 |         self._init(locals())
 21 | 
 22 |     def _augment(self, img, _):
 23 |         h, w = img.shape[:2]
 24 |         area = h * w
 25 |         for _ in range(10):
 26 |             targetArea = self.rng.uniform(self.crop_area_fraction, 1.0) * area
 27 |             aspectR = self.rng.uniform(self.aspect_ratio_low, self.aspect_ratio_high)
 28 |             ww = int(np.sqrt(targetArea * aspectR) + 0.5)
 29 |             hh = int(np.sqrt(targetArea / aspectR) + 0.5)
 30 |             if self.rng.uniform() < 0.5:
 31 |                 ww, hh = hh, ww
 32 |             if hh <= h and ww <= w:
 33 |                 x1 = 0 if w == ww else self.rng.randint(0, w - ww)
 34 |                 y1 = 0 if h == hh else self.rng.randint(0, h - hh)
 35 |                 out = img[y1:y1 + hh, x1:x1 + ww]
 36 |                 out = cv2.resize(out, (self.target_shape, self.target_shape), interpolation=cv2.INTER_CUBIC)
 37 |                 return out
 38 |         out = imgaug.ResizeShortestEdge(self.target_shape, interp=cv2.INTER_CUBIC).augment(img)
 39 |         out = imgaug.CenterCrop(self.target_shape).augment(out)
 40 |         return out
 41 | 
 42 | def fbresnet_augmentor(isTrain, target_shape=224):
 43 |     """
 44 |     Augmentor used in fb.resnet.torch, for BGR images in range [0,255].
 45 |     """
 46 |     if isTrain:
 47 |         augmentors = [
 48 |             GoogleNetResize(crop_area_fraction=0.32, target_shape=target_shape),
 49 |             # GoogleNetResize(target_shape=target_shape),
 50 |             imgaug.RandomOrderAug(
 51 |                 [# imgaug.BrightnessScale((0.6, 1.4), clip=False),
 52 |                  # imgaug.Contrast((0.6, 1.4), clip=False),
 53 |                  # imgaug.Saturation(0.4, rgb=False),
 54 |                  # rgb-bgr conversion for the constants copied from fb.resnet.torch
 55 |                  imgaug.Lighting(0.1,
 56 |                                  eigval=np.asarray(
 57 |                                      [0.2175, 0.0188, 0.0045][::-1]) * 255.0,
 58 |                                  eigvec=np.array(
 59 |                                      [[-0.5675, 0.7192, 0.4009],
 60 |                                       [-0.5808, -0.0045, -0.8140],
 61 |                                       [-0.5836, -0.6948, 0.4203]],
 62 |                                      dtype='float32')[::-1, ::-1]
 63 |                                  )]),
 64 |             imgaug.Flip(horiz=True),
 65 |         ]
 66 |     else:
 67 |         augmentors = [
 68 |             imgaug.ResizeShortestEdge(int(256 / 224 * target_shape), cv2.INTER_CUBIC),
 69 |             imgaug.CenterCrop((target_shape, target_shape)),
 70 |         ]
 71 |     return augmentors
 72 | 
 73 | def data_augmentation(im, augmentors):
 74 |     """
 75 |     See explanations in the tutorial:
 76 |     http://tensorpack.readthedocs.io/en/latest/tutorial/efficient-dataflow.html
 77 |     """
 78 |     assert isinstance(augmentors, list)
 79 |     aug = imgaug.AugmentorList(augmentors)
 80 |     im = aug.augment(im)
 81 |     return im
 82 | 
 83 | class MyDataFlow(DataFlow):
 84 |     def __init__(self, image_path, label_path, is_training=True, batch_size=64, img_size=224):
 85 |         # get all the image name and its label
 86 |         self.data_dict = {}
 87 |         with open(label_path, 'r') as f:
 88 |             label_list = json.load(f)
 89 |         for image in label_list:
 90 |             self.data_dict[image['image_id']] = int(image['label_id'])
 91 |         self.img_name = list(self.data_dict.keys())
 92 |         self.image_path = image_path
 93 |         self.is_training = is_training
 94 |         self.batch_size = batch_size
 95 |         self.img_size = img_size
 96 |         self.augmentors = fbresnet_augmentor(isTrain=is_training, target_shape=img_size)
 97 | 
 98 |     def get_data(self):
 99 |         np.random.seed()
100 |         img_batch = np.random.choice(self.img_name, self.batch_size)
101 |         img_data = []
102 |         img_label = []
103 |         for item in img_batch:
104 |             im = cv2.imread(os.path.join(self.image_path, item), cv2.IMREAD_COLOR)  
105 |             im = data_augmentation(im, self.augmentors)
106 |             img_data.append(im)
107 |             img_label.append(self.data_dict[item])
108 |         yield {'data': np.array(img_data), 'label': np.array(img_label)}
109 | 
110 | 
111 | class MyDataFlowEval(DataFlow):
112 |     def __init__(self, image_path, label_path, img_size=224):
113 |         # get all the image name and its label
114 |         self.data_dict = {}
115 |         with open(label_path, 'r') as f:
116 |             label_list = json.load(f)
117 |         for image in label_list:
118 |             self.data_dict[image['image_id']] = int(image['label_id'])
119 |         self.img_name = list(self.data_dict.keys())
120 |         self.image_path = image_path
121 |         self.img_size = img_size
122 |         self.Length = len(self.data_dict)
123 |         self.augmentors = fbresnet_augmentor(isTrain=False, target_shape=img_size)
124 | 
125 |     def get_data(self):
126 |         for index, item in enumerate(self.img_name):
127 |             im = cv2.imread(os.path.join(self.image_path, item), cv2.IMREAD_COLOR)  
128 |             im = data_augmentation(im, self.augmentors)
129 |             label = self.data_dict[item]
130 |             yield {
131 |                 'name': item, 
132 |                 'data': np.expand_dims(np.array(im), axis=0),
133 |                 'label': np.array(label),
134 |                 'epoch': (index+1) == self.Length
135 |             }


--------------------------------------------------------------------------------
/cifar10.py:
--------------------------------------------------------------------------------
  1 | # -*- coding:utf-8 -*-
  2 | 
  3 | import os
  4 | import sys
  5 | import time
  6 | import pickle
  7 | import random
  8 | import numpy as np
  9 | 
 10 | class_num = 10
 11 | image_size = 32
 12 | img_channels = 3
 13 | 
 14 | 
 15 | # ========================================================== #
 16 | # ├─ prepare_data()
 17 | #  ├─ download training data if not exist by download_data()
 18 | #  ├─ load data by load_data()
 19 | #  └─ shuffe and return data
 20 | # ========================================================== #
 21 | 
 22 | 
 23 | 
 24 | def download_data():
 25 |     dirname = 'cifar-10-batches-py'
 26 |     origin = 'http://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz'
 27 |     fname = 'cifar-10-python.tar.gz'
 28 |     fpath = './' + dirname
 29 | 
 30 |     download = False
 31 |     if os.path.exists(fpath) or os.path.isfile(fname):
 32 |         download = False
 33 |         print("DataSet aready exist!")
 34 |     else:
 35 |         download = True
 36 |     if download:
 37 |         print('Downloading data from', origin)
 38 |         import urllib.request
 39 |         import tarfile
 40 | 
 41 |         def reporthook(count, block_size, total_size):
 42 |             global start_time
 43 |             if count == 0:
 44 |                 start_time = time.time()
 45 |                 return
 46 |             duration = time.time() - start_time
 47 |             progress_size = int(count * block_size)
 48 |             speed = int(progress_size / (1024 * duration))
 49 |             percent = min(int(count * block_size * 100 / total_size), 100)
 50 |             sys.stdout.write("\r...%d%%, %d MB, %d KB/s, %d seconds passed" %
 51 |                              (percent, progress_size / (1024 * 1024), speed, duration))
 52 |             sys.stdout.flush()
 53 | 
 54 |         urllib.request.urlretrieve(origin, fname, reporthook)
 55 |         print('Download finished. Start extract!', origin)
 56 |         if (fname.endswith("tar.gz")):
 57 |             tar = tarfile.open(fname, "r:gz")
 58 |             tar.extractall()
 59 |             tar.close()
 60 |         elif (fname.endswith("tar")):
 61 |             tar = tarfile.open(fname, "r:")
 62 |             tar.extractall()
 63 |             tar.close()
 64 | 
 65 | 
 66 | def unpickle(file):
 67 |     with open(file, 'rb') as fo:
 68 |         dict = pickle.load(fo, encoding='bytes')
 69 |     return dict
 70 | 
 71 | 
 72 | def load_data_one(file):
 73 |     batch = unpickle(file)
 74 |     data = batch[b'data']
 75 |     labels = batch[b'labels']
 76 |     print("Loading %s : %d." % (file, len(data)))
 77 |     return data, labels
 78 | 
 79 | 
 80 | def load_data(files, data_dir, label_count):
 81 |     global image_size, img_channels
 82 |     data, labels = load_data_one(data_dir + '/' + files[0])
 83 |     for f in files[1:]:
 84 |         data_n, labels_n = load_data_one(data_dir + '/' + f)
 85 |         data = np.append(data, data_n, axis=0)
 86 |         labels = np.append(labels, labels_n, axis=0)
 87 |     labels = np.array([[float(i == label) for i in range(label_count)] for label in labels])
 88 |     data = data.reshape([-1, img_channels, image_size, image_size])
 89 |     data = data.transpose([0, 2, 3, 1])
 90 |     return data, labels
 91 | 
 92 | 
 93 | def prepare_data():
 94 |     print("======Loading data======")
 95 |     download_data()
 96 |     data_dir = './cifar-10-batches-py'
 97 |     image_dim = image_size * image_size * img_channels
 98 |     meta = unpickle(data_dir + '/batches.meta')
 99 | 
100 |     label_names = meta[b'label_names']
101 |     label_count = len(label_names)
102 |     train_files = ['data_batch_%d' % d for d in range(1, 6)]
103 |     train_data, train_labels = load_data(train_files, data_dir, label_count)
104 |     test_data, test_labels = load_data(['test_batch'], data_dir, label_count)
105 | 
106 |     print("Train data:", np.shape(train_data), np.shape(train_labels))
107 |     print("Test data :", np.shape(test_data), np.shape(test_labels))
108 |     print("======Load finished======")
109 | 
110 |     print("======Shuffling data======")
111 |     indices = np.random.permutation(len(train_data))
112 |     train_data = train_data[indices]
113 |     train_labels = train_labels[indices]
114 |     print("======Prepare Finished======")
115 | 
116 |     return train_data, train_labels, test_data, test_labels
117 | 
118 | 
119 | # ========================================================== #
120 | # ├─ _random_crop()
121 | # ├─ _random_flip_leftright()
122 | # ├─ data_augmentation()
123 | # └─ color_preprocessing()
124 | # ========================================================== #
125 | 
126 | def _random_crop(batch, crop_shape, padding=None):
127 |     oshape = np.shape(batch[0])
128 | 
129 |     if padding:
130 |         oshape = (oshape[0] + 2 * padding, oshape[1] + 2 * padding)
131 |     new_batch = []
132 |     npad = ((padding, padding), (padding, padding), (0, 0))
133 |     for i in range(len(batch)):
134 |         new_batch.append(batch[i])
135 |         if padding:
136 |             new_batch[i] = np.lib.pad(batch[i], pad_width=npad,
137 |                                       mode='constant', constant_values=0)
138 |         nh = random.randint(0, oshape[0] - crop_shape[0])
139 |         nw = random.randint(0, oshape[1] - crop_shape[1])
140 |         new_batch[i] = new_batch[i][nh:nh + crop_shape[0],
141 |                        nw:nw + crop_shape[1]]
142 |     return new_batch
143 | 
144 | 
145 | def _random_flip_leftright(batch):
146 |     for i in range(len(batch)):
147 |         if bool(random.getrandbits(1)):
148 |             batch[i] = np.fliplr(batch[i])
149 |     return batch
150 | 
151 | 
152 | def color_preprocessing(x_train, x_test):
153 |     x_train = x_train.astype('float32')
154 |     x_test = x_test.astype('float32')
155 |     x_train[:, :, :, 0] = (x_train[:, :, :, 0] - np.mean(x_train[:, :, :, 0])) / np.std(x_train[:, :, :, 0])
156 |     x_train[:, :, :, 1] = (x_train[:, :, :, 1] - np.mean(x_train[:, :, :, 1])) / np.std(x_train[:, :, :, 1])
157 |     x_train[:, :, :, 2] = (x_train[:, :, :, 2] - np.mean(x_train[:, :, :, 2])) / np.std(x_train[:, :, :, 2])
158 | 
159 |     x_test[:, :, :, 0] = (x_test[:, :, :, 0] - np.mean(x_test[:, :, :, 0])) / np.std(x_test[:, :, :, 0])
160 |     x_test[:, :, :, 1] = (x_test[:, :, :, 1] - np.mean(x_test[:, :, :, 1])) / np.std(x_test[:, :, :, 1])
161 |     x_test[:, :, :, 2] = (x_test[:, :, :, 2] - np.mean(x_test[:, :, :, 2])) / np.std(x_test[:, :, :, 2])
162 | 
163 |     return x_train, x_test
164 | 
165 | 
166 | def data_augmentation(batch):
167 |     batch = _random_flip_leftright(batch)
168 |     batch = _random_crop(batch, [32, 32], 4)
169 |     return batch


--------------------------------------------------------------------------------
/eval.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | import numpy as np
  3 | import os
  4 | import json
  5 | from tensorpack import imgaug, dataset, ModelDesc, InputDesc
  6 | from tensorpack.dataflow import (PrefetchDataZMQ, BatchData)
  7 | from dataflow_input import MyDataFlowEval
  8 | import resnet_model
  9 | from IPython import embed
 10 | 
 11 | os.environ['CUDA_VISIBLE_DEVICES']= '2'
 12 | 
 13 | init_learning_rate = 0.1
 14 | batch_size = 64
 15 | image_size = 224
 16 | img_channels = 3
 17 | class_num = 80
 18 | 
 19 | weight_decay = 1e-4
 20 | momentum = 0.9
 21 | 
 22 | total_epochs = 100
 23 | iteration = 421
 24 | # 128 * 421 ~ 53,879
 25 | test_iteration = 10
 26 | 
 27 | def dist_top_k(feat, centers):
 28 |     feat = feat[0, ]
 29 |     diff = centers_class - feat
 30 |     diff = - tf.reduce_sum(diff*diff, axis=1)
 31 |     _, predictions = tf.nn.top_k(diff, 3)
 32 |     return predictions
 33 | 
 34 | def get_tensor_by_name(save_file, var_name):
 35 |     reader = tf.train.NewCheckpointReader(save_file)
 36 |     return reader.get_tensor(var_name)
 37 | 
 38 | def center_loss(features, label, alfa, nrof_classes):
 39 |     """Center loss based on the paper "A Discriminative Feature Learning Approach for Deep Face Recognition"
 40 |        (http://ydwen.github.io/papers/WenECCV16.pdf)
 41 |     """
 42 |     nrof_features = features.get_shape()[1]
 43 |     centers = tf.get_variable('centers', [nrof_classes, nrof_features], dtype=tf.float32,
 44 |         initializer=tf.constant_initializer(0), trainable=False)
 45 |     label = tf.reshape(label, [-1])
 46 |     centers_batch = tf.gather(centers, label)
 47 |     diff = (1 - alfa) * (centers_batch - features)
 48 |     centers = tf.scatter_sub(centers, label, diff)
 49 |     # centers = tf.nn.l2_normalize(centers, 1, 1e-10, name='centers_norm')
 50 |     loss = tf.reduce_mean(tf.square(features - centers_batch))
 51 |     return loss, centers
 52 | 
 53 | def focal_loss(onehot_labels, cls_preds,
 54 |                 alpha=0.25, gamma=2.0, name=None, scope=None):
 55 |     """Compute softmax focal loss between logits and onehot labels
 56 |     logits and onehot_labels must have same shape [batchsize, num_classes] and
 57 |     the same data type (float16, 32, 64)
 58 |     Args:
 59 |       onehot_labels: Each row labels[i] must be a valid probability distribution
 60 |       cls_preds: Unscaled log probabilities
 61 |       alpha: The hyperparameter for adjusting biased samples, default is 0.25
 62 |       gamma: The hyperparameter for penalizing the easy labeled samples
 63 |       name: A name for the operation (optional)
 64 |     Returns:
 65 |       A 1-D tensor of length batch_size of same type as logits with softmax focal loss
 66 |     """
 67 |     with tf.name_scope(scope, 'focal_loss', [cls_preds, onehot_labels]) as sc:
 68 |         logits = tf.convert_to_tensor(cls_preds)
 69 |         onehot_labels = tf.convert_to_tensor(onehot_labels)
 70 | 
 71 |         precise_logits = tf.cast(logits, tf.float32) if (
 72 |                         logits.dtype == tf.float16) else logits
 73 |         onehot_labels = tf.cast(onehot_labels, precise_logits.dtype)
 74 |         predictions = tf.nn.sigmoid(logits)
 75 |         predictions_pt = tf.where(tf.equal(onehot_labels, 1), predictions, 1.-predictions)
 76 |         # add small value to avoid 0
 77 |         epsilon = 1e-8
 78 |         alpha_t = tf.scalar_mul(alpha, tf.ones_like(onehot_labels, dtype=tf.float32))
 79 |         alpha_t = tf.where(tf.equal(onehot_labels, 1.0), alpha_t, 1-alpha_t)
 80 |         losses = tf.reduce_sum(-alpha_t * tf.pow(1. - predictions_pt, gamma) * tf.log(predictions_pt+epsilon),
 81 |                                      name=name, axis=1)
 82 |         return losses
 83 | 
 84 | def Evaluate(sess):
 85 |     test_acc = 0.0
 86 |     test_loss = 0.0
 87 | 
 88 |     for it in range(test_iteration):
 89 |         batch_data = next(scene_data_val)
 90 |         test_batch_x = batch_data['data']
 91 |         test_batch_y = batch_data['label']
 92 | 
 93 |         test_feed_dict = {
 94 |             x: test_batch_x,
 95 |             label: test_batch_y,
 96 |             learning_rate: epoch_learning_rate,
 97 |             training_flag: False
 98 |         }
 99 | 
100 |         loss_, acc_ = sess.run([Total_loss, accuracy], feed_dict=test_feed_dict)
101 | 
102 |         test_loss += loss_
103 |         test_acc += acc_
104 | 
105 |     test_loss /= test_iteration # average loss
106 |     test_acc /= test_iteration # average accuracy
107 | 
108 |     summary = tf.Summary(value=[tf.Summary.Value(tag='test_loss', simple_value=test_loss),
109 |                                 tf.Summary.Value(tag='test_accuracy', simple_value=test_acc)])
110 | 
111 |     return test_acc, test_loss, summary
112 | 
113 | def resnet_model_fn(inputs, training):
114 |     """Our model_fn for ResNet to be used with our Estimator."""
115 | 
116 |     network = resnet_model.imagenet_resnet_v2(
117 |         resnet_size=18, num_classes=class_num, mode='se', data_format=None)
118 |     inputs= network(inputs=inputs, is_training=training)
119 |     feat = tf.nn.l2_normalize(inputs, 1, 1e-10, name='feat')
120 |     inputs = tf.layers.dense(inputs=inputs, units=class_num)
121 |     # inputs = tf.layers.dense(inputs=feat, units=class_num)
122 |     inputs = tf.identity(inputs, 'final_dense')
123 | 
124 |     return inputs, feat
125 | 
126 | # image_size = 32, img_channels = 3, class_num = 10 in cifar10
127 | x = tf.placeholder(tf.float32, shape=[None, image_size, image_size, img_channels])
128 | label = tf.placeholder(tf.float32, shape=[None,])
129 | one_hot_labels = tf.one_hot(indices=tf.cast(label, tf.int32), depth=class_num)
130 | 
131 | training_flag = tf.placeholder(tf.bool)
132 | learning_rate = tf.placeholder(tf.float32, name='learning_rate')
133 | 
134 | logits, feat = resnet_model_fn(x, training=training_flag)
135 | 
136 | cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=one_hot_labels, logits=logits))
137 | Focal_loss = tf.reduce_mean(focal_loss(one_hot_labels, logits, alpha=0.5))
138 | l2_loss = weight_decay * tf.add_n([tf.nn.l2_loss(v) for v in tf.trainable_variables()])
139 | Center_loss, Centers = center_loss(feat, tf.cast(label, dtype=tf.int32), 0.95, class_num)
140 | Total_loss = Focal_loss + l2_loss + Center_loss
141 | 
142 | optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum=momentum, use_nesterov=True)
143 | # Batch norm requires update_ops to be added as a train_op dependency.
144 | update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
145 | with tf.control_dependencies(update_ops):
146 |     train_op = optimizer.minimize(Total_loss)
147 | 
148 | correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(one_hot_labels, 1))
149 | accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
150 | 
151 | values, indices = tf.nn.top_k(logits, 3)
152 | 
153 | val_dir = '/data0/AIChallenger/ai_challenger_scene_validation_20170908/scene_validation_images_20170908/'
154 | annotations = '/data0/AIChallenger/ai_challenger_scene_validation_20170908/scene_validation_annotations_20170908.json'
155 | # a DataFlow you implement to produce [tensor1, tensor2, ..] lists from whatever sources:
156 | df = MyDataFlowEval(val_dir, annotations, img_size=image_size)
157 | # start 3 processes to run the dataflow in parallel
158 | df = PrefetchDataZMQ(df, nr_proc=1)
159 | df.reset_state()
160 | scene_data_val = df.get_data()
161 | 
162 | centers_class = np.load("centers.npy")
163 | centers_class = tf.convert_to_tensor(centers_class)
164 | indices_Center = dist_top_k(feat, centers_class)
165 | 
166 | saver = tf.train.Saver(tf.global_variables())
167 | 
168 | with tf.Session() as sess:
169 |     ckpt = tf.train.get_checkpoint_state('./model_release')
170 |     print("loading checkpoint...")
171 |     saver.restore(sess, ckpt.model_checkpoint_path)
172 | 
173 |     result = []
174 |     for it in scene_data_val:
175 |         temp_dict = {}
176 |         feed_dict = {x: it['data'], training_flag: False}
177 |         predictions, predictions_Center = sess.run([indices, indices_Center], feed_dict=feed_dict)
178 |         predictions = np.squeeze(predictions, axis=0)
179 | 
180 |         predictions = predictions_Center
181 | 
182 |         temp_dict['image_id'] = it['name']
183 |         temp_dict['label_id'] = predictions.tolist()
184 |         result.append(temp_dict)
185 |         print('image %s is %d,%d,%d, label: %d' % (it['name'], predictions[0], predictions[1], predictions[2], it['label']))
186 |         if it['epoch']:
187 |             break
188 | 
189 |     with open('submit.json', 'w') as f:
190 |         json.dump(result, f)
191 |         print('write result json, num is %d' % len(result))
192 | 
193 | 


--------------------------------------------------------------------------------
/SE_ResNeXt.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | from tflearn.layers.conv import global_avg_pool
  3 | from tensorflow.contrib.layers import batch_norm, flatten
  4 | from tensorflow.contrib.framework import arg_scope
  5 | import numpy as np
  6 | import os
  7 | from tensorpack import imgaug, dataset, ModelDesc, InputDesc
  8 | from tensorpack.dataflow import (
  9 |     AugmentImageComponent, PrefetchDataZMQ,
 10 |     BatchData, MultiThreadMapData, DataFlow)
 11 | from dataflow_input import (MyDataFlow, data_augmentation)
 12 | from IPython import embed
 13 | 
 14 | os.environ['CUDA_VISIBLE_DEVICES']= '3'
 15 | 
 16 | weight_decay = 0.0005
 17 | momentum = 0.9
 18 | 
 19 | init_learning_rate = 0.1 * 5
 20 | cardinality = 2 # how many split ?
 21 | blocks = 3 # res_block ! (split + transition)
 22 | depth = 64 # out channel
 23 | 
 24 | """
 25 | So, the total number of layers is (3*blokcs)*residual_layer_num + 2
 26 | because, blocks = split(conv 2) + transition(conv 1) = 3 layer
 27 | and, first conv layer 1, last dense layer 1
 28 | thus, total number of layers = (3*blocks)*residual_layer_num + 2
 29 | """
 30 | 
 31 | reduction_ratio = 4
 32 | 
 33 | total_epochs = 100
 34 | 
 35 | batch_size = 64
 36 | image_size = 224
 37 | img_channels = 3
 38 | class_num = 80
 39 | 
 40 | iteration = 421
 41 | # 128 * 421 ~ 53,879
 42 | 
 43 | test_iteration = 10
 44 | 
 45 | def conv_layer(input, filter, kernel, stride, padding='SAME', layer_name="conv"):
 46 |     with tf.name_scope(layer_name):
 47 |         network = tf.layers.conv2d(inputs=input, use_bias=False, filters=filter, kernel_size=kernel, strides=stride, padding=padding)
 48 |         return network
 49 | 
 50 | def deconv_layer(input, filter, kernel, stride, padding='SAME', layer_name="deconv"):
 51 |     with tf.name_scope(layer_name):
 52 |         network = tf.layers.conv2d_transpose(inputs=input, use_bias=False, filters=filter, kernel_size=kernel, strides=stride, padding=padding)
 53 |         return network
 54 | 
 55 | def Global_Average_Pooling(x):
 56 |     return global_avg_pool(x, name='Global_avg_pooling')
 57 | 
 58 | def Average_pooling(x, pool_size=[2,2], stride=2, padding='SAME'):
 59 |     return tf.layers.average_pooling2d(inputs=x, pool_size=pool_size, strides=stride, padding=padding)
 60 | 
 61 | def Max_pooling(x, pool_size=[3,3], stride=2, padding='VALID') :
 62 |     return tf.layers.max_pooling2d(inputs=x, pool_size=pool_size, strides=stride, padding=padding)
 63 | 
 64 | def Batch_Normalization(x, training, scope):
 65 |     with arg_scope([batch_norm],
 66 |                    scope=scope,
 67 |                    updates_collections=None,
 68 |                    decay=0.9,
 69 |                    center=True,
 70 |                    scale=True,
 71 |                    zero_debias_moving_mean=True) :
 72 |         return tf.cond(training,
 73 |                        lambda : batch_norm(inputs=x, is_training=training, reuse=None),
 74 |                        lambda : batch_norm(inputs=x, is_training=training, reuse=True))
 75 | 
 76 | def Relu(x):
 77 |     return tf.nn.relu(x)
 78 | 
 79 | def Sigmoid(x) :
 80 |     return tf.nn.sigmoid(x)
 81 | 
 82 | def tanh(x):
 83 |     return tf.tanh(x)
 84 | 
 85 | def Concatenation(layers) :
 86 |     return tf.concat(layers, axis=3)
 87 | 
 88 | def Fully_connected(x, units=class_num, layer_name='fully_connected') :
 89 |     with tf.name_scope(layer_name) :
 90 |         return tf.layers.dense(inputs=x, use_bias=False, units=units)
 91 | 
 92 | def center_loss(features, label, alfa, nrof_classes):
 93 |     """Center loss based on the paper "A Discriminative Feature Learning Approach for Deep Face Recognition"
 94 |        (http://ydwen.github.io/papers/WenECCV16.pdf)
 95 |     """
 96 |     nrof_features = features.get_shape()[1]
 97 |     centers = tf.get_variable('centers', [nrof_classes, nrof_features], dtype=tf.float32,
 98 |         initializer=tf.constant_initializer(0), trainable=False)
 99 |     label = tf.reshape(label, [-1])
100 |     centers_batch = tf.gather(centers, label)
101 |     diff = (1 - alfa) * (centers_batch - features)
102 |     centers = tf.scatter_sub(centers, label, diff)
103 |     loss = tf.reduce_mean(tf.square(features - centers_batch))
104 |     return loss, centers
105 | 
106 | def Evaluate(sess):
107 |     test_acc = 0.0
108 |     test_loss = 0.0
109 | 
110 |     for it in range(test_iteration):
111 |         batch_data = next(scene_data_val)
112 |         test_batch_x = batch_data['data']
113 |         test_batch_y = batch_data['label']
114 | 
115 |         test_feed_dict = {
116 |             x: test_batch_x,
117 |             label: test_batch_y,
118 |             learning_rate: epoch_learning_rate,
119 |             training_flag: False
120 |         }
121 | 
122 |         loss_, acc_ = sess.run([cost, accuracy], feed_dict=test_feed_dict)
123 | 
124 |         test_loss += loss_
125 |         test_acc += acc_
126 | 
127 |     test_loss /= test_iteration # average loss
128 |     test_acc /= test_iteration # average accuracy
129 | 
130 |     summary = tf.Summary(value=[tf.Summary.Value(tag='test_loss', simple_value=test_loss),
131 |                                 tf.Summary.Value(tag='test_accuracy', simple_value=test_acc)])
132 | 
133 |     return test_acc, test_loss, summary
134 | 
135 | class SE_ResNeXt():
136 |     def __init__(self, x, training):
137 |         self.training = training
138 |         self.model = self.Build_SEnet(x)
139 | 
140 |     def first_layer(self, x, scope):
141 |         with tf.name_scope(scope) :
142 |             x = conv_layer(x, filter=64, kernel=[7, 7], stride=2, layer_name=scope+'_conv1')
143 |             x = Batch_Normalization(x, training=self.training, scope=scope+'_batch1')
144 |             x = Relu(x)
145 |             x = Max_pooling(x)
146 | 
147 |             return x
148 | 
149 |     def transform_layer(self, x, stride, scope):
150 |         with tf.name_scope(scope) :
151 |             x = conv_layer(x, filter=depth, kernel=[1,1], stride=1, layer_name=scope+'_conv1')
152 |             x = Batch_Normalization(x, training=self.training, scope=scope+'_batch1')
153 |             x = Relu(x)
154 | 
155 |             x = conv_layer(x, filter=depth, kernel=[3,3], stride=stride, layer_name=scope+'_conv2')
156 |             x = Batch_Normalization(x, training=self.training, scope=scope+'_batch2')
157 |             x = Relu(x)
158 |             return x
159 | 
160 |     def transition_layer(self, x, out_dim, scope):
161 |         with tf.name_scope(scope):
162 |             x = conv_layer(x, filter=out_dim, kernel=[1,1], stride=1, layer_name=scope+'_conv1')
163 |             x = Batch_Normalization(x, training=self.training, scope=scope+'_batch1')
164 |             # x = Relu(x)
165 | 
166 |             return x
167 | 
168 |     def split_layer(self, input_x, stride, layer_name):
169 |         with tf.name_scope(layer_name) :
170 |             layers_split = list()
171 |             for i in range(cardinality) :
172 |                 splits = self.transform_layer(input_x, stride=stride, scope=layer_name + '_splitN_' + str(i))
173 |                 layers_split.append(splits)
174 | 
175 |             return Concatenation(layers_split)
176 | 
177 |     def squeeze_excitation_layer(self, input_x, out_dim, ratio, layer_name):
178 |         with tf.name_scope(layer_name) :
179 |             squeeze = Global_Average_Pooling(input_x)
180 | 
181 |             excitation = Fully_connected(squeeze, units=out_dim / ratio, layer_name=layer_name+'_fully_connected1')
182 |             excitation = Relu(excitation)
183 |             excitation = Fully_connected(excitation, units=out_dim, layer_name=layer_name+'_fully_connected2')
184 |             excitation = Sigmoid(excitation)
185 | 
186 |             excitation = tf.reshape(excitation, [-1,1,1,out_dim])
187 |             scale = input_x * excitation
188 | 
189 |             return scale
190 | 
191 |     def residual_layer(self, input_x, out_dim, layer_num, res_block=blocks):
192 |         # split + transform(bottleneck) + transition + merge
193 |         # input_dim = input_x.get_shape().as_list()[-1]
194 | 
195 |         for i in range(res_block):
196 |             input_dim = int(np.shape(input_x)[-1])
197 | 
198 |             if input_dim * 2 == out_dim:
199 |                 flag = True
200 |                 stride = 2
201 |                 channel = input_dim // 2
202 |             else:
203 |                 flag = False
204 |                 stride = 1
205 |             x = self.split_layer(input_x, stride=stride, layer_name='split_layer_'+layer_num+'_'+str(i))
206 |             x = self.transition_layer(x, out_dim=out_dim, scope='trans_layer_'+layer_num+'_'+str(i))
207 |             x = self.squeeze_excitation_layer(x, out_dim=out_dim, ratio=reduction_ratio, layer_name='squeeze_layer_'+layer_num+'_'+str(i))
208 | 
209 |             if flag is True :
210 |                 pad_input_x = Average_pooling(input_x)
211 |                 pad_input_x = tf.pad(pad_input_x, [[0, 0], [0, 0], [0, 0], [channel, channel]]) # [?, height, width, channel]
212 |             else :
213 |                 pad_input_x = input_x
214 | 
215 |             input_x = Relu(x + pad_input_x)
216 | 
217 |         return x
218 | 
219 |     def generator(self, x, scope="generator"):
220 |         with tf.variable_scope(scope):
221 |             n_downsampling = 5
222 |             for i in range(n_downsampling):
223 |                 mult = pow(2, (n_downsampling - i))
224 |                 x = deconv_layer(x, filter=int((32 * mult) / 2), kernel=[3, 3], stride=2, layer_name='deconv' + str(i))
225 |                 x = Relu(x)
226 | 
227 |             x = conv_layer(x, filter=3, kernel=[7,7], stride=1, layer_name='conv1')
228 |             x = 128 * Batch_Normalization(x, training=self.training, scope=scope+'_batch1') + 128
229 | 
230 |             return x
231 | 
232 |     def Build_SEnet(self, input_x):
233 |         # only cifar10 architecture
234 | 
235 |         input_x = self.first_layer(input_x, scope='first_layer')
236 | 
237 |         x = self.residual_layer(input_x, out_dim=64, layer_num='1')
238 |         x = self.residual_layer(x, out_dim=128, layer_num='2')
239 |         x = self.residual_layer(x, out_dim=256, layer_num='3')
240 |         x = self.residual_layer(x, out_dim=512, layer_num='4')
241 | 
242 |         recon_x = self.generator(x)
243 |         # recon_x = tf.cast(recon_x, dtype=tf.uint8)
244 | 
245 |         x = Global_Average_Pooling(x)
246 |         x = flatten(x)
247 | 
248 |         feat = tf.nn.l2_normalize(x, 1, 1e-10, name='feat')
249 | 
250 |         x = Fully_connected(x, layer_name='final_fully_connected')
251 |         return x, recon_x, feat


--------------------------------------------------------------------------------
/pre_train.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | import numpy as np
  3 | import os
  4 | from tensorpack import imgaug, dataset, ModelDesc, InputDesc
  5 | from tensorpack.dataflow import (PrefetchDataZMQ, BatchData)
  6 | from dataflow_input import MyDataFlow
  7 | import resnet_model
  8 | from IPython import embed
  9 | 
 10 | os.environ['CUDA_VISIBLE_DEVICES']= '0'
 11 | 
 12 | init_learning_rate = 0.1
 13 | batch_size = 128
 14 | image_size = 224
 15 | img_channels = 3
 16 | class_num = 365
 17 | 
 18 | weight_decay = 1e-4
 19 | momentum = 0.9
 20 | 
 21 | total_epochs = 30
 22 | iteration = 14089 // 1
 23 | # 128 * 14089 ~ 1,803,460
 24 | test_iteration = 10
 25 | 
 26 | def center_loss(features, label, alfa, nrof_classes):
 27 |     """Center loss based on the paper "A Discriminative Feature Learning Approach for Deep Face Recognition"
 28 |        (http://ydwen.github.io/papers/WenECCV16.pdf)
 29 |     """
 30 |     nrof_features = features.get_shape()[1]
 31 |     centers = tf.get_variable('centers', [nrof_classes, nrof_features], dtype=tf.float32,
 32 |         initializer=tf.constant_initializer(0), trainable=False)
 33 |     label = tf.reshape(label, [-1])
 34 |     centers_batch = tf.gather(centers, label)
 35 |     diff = (1 - alfa) * (centers_batch - features)
 36 |     centers = tf.scatter_sub(centers, label, diff)
 37 |     # centers = tf.nn.l2_normalize(centers, 1, 1e-10, name='centers_norm')
 38 |     loss = tf.reduce_mean(tf.square(features - centers_batch))
 39 |     return loss, centers
 40 | 
 41 | def focal_loss(onehot_labels, cls_preds,
 42 |                 alpha=0.25, gamma=2.0, name=None, scope=None):
 43 |     """Compute softmax focal loss between logits and onehot labels
 44 |     logits and onehot_labels must have same shape [batchsize, num_classes] and
 45 |     the same data type (float16, 32, 64)
 46 |     Args:
 47 |       onehot_labels: Each row labels[i] must be a valid probability distribution
 48 |       cls_preds: Unscaled log probabilities
 49 |       alpha: The hyperparameter for adjusting biased samples, default is 0.25
 50 |       gamma: The hyperparameter for penalizing the easy labeled samples
 51 |       name: A name for the operation (optional)
 52 |     Returns:
 53 |       A 1-D tensor of length batch_size of same type as logits with softmax focal loss
 54 |     """
 55 |     with tf.name_scope(scope, 'focal_loss', [cls_preds, onehot_labels]) as sc:
 56 |         logits = tf.convert_to_tensor(cls_preds)
 57 |         onehot_labels = tf.convert_to_tensor(onehot_labels)
 58 | 
 59 |         precise_logits = tf.cast(logits, tf.float32) if (
 60 |                         logits.dtype == tf.float16) else logits
 61 |         onehot_labels = tf.cast(onehot_labels, precise_logits.dtype)
 62 |         predictions = tf.nn.sigmoid(logits)
 63 |         predictions_pt = tf.where(tf.equal(onehot_labels, 1), predictions, 1.-predictions)
 64 |         # add small value to avoid 0
 65 |         epsilon = 1e-8
 66 |         alpha_t = tf.scalar_mul(alpha, tf.ones_like(onehot_labels, dtype=tf.float32))
 67 |         alpha_t = tf.where(tf.equal(onehot_labels, 1.0), alpha_t, 1-alpha_t)
 68 |         losses = tf.reduce_sum(-alpha_t * tf.pow(1. - predictions_pt, gamma) * tf.log(predictions_pt+epsilon),
 69 |                                      name=name, axis=1)
 70 |         return losses
 71 | 
 72 | def Evaluate(sess):
 73 |     test_acc = 0.0
 74 |     test_loss = 0.0
 75 | 
 76 |     for it in range(test_iteration):
 77 |         batch_data = next(scene_data_val)
 78 |         test_batch_x = batch_data['data']
 79 |         test_batch_y = batch_data['label']
 80 | 
 81 |         test_feed_dict = {
 82 |             x: test_batch_x,
 83 |             label: test_batch_y,
 84 |             learning_rate: epoch_learning_rate,
 85 |             training_flag: False
 86 |         }
 87 | 
 88 |         loss_, acc_ = sess.run([Total_loss, accuracy], feed_dict=test_feed_dict)
 89 | 
 90 |         test_loss += loss_
 91 |         test_acc += acc_
 92 | 
 93 |     test_loss /= test_iteration # average loss
 94 |     test_acc /= test_iteration # average accuracy
 95 | 
 96 |     summary = tf.Summary(value=[tf.Summary.Value(tag='test_loss', simple_value=test_loss),
 97 |                                 tf.Summary.Value(tag='test_accuracy', simple_value=test_acc)])
 98 | 
 99 |     return test_acc, test_loss, summary
100 | 
101 | def resnet_model_fn(inputs, training):
102 |     """Our model_fn for ResNet to be used with our Estimator."""
103 | 
104 |     network = resnet_model.imagenet_resnet_v2(
105 |         resnet_size=18, num_classes=class_num, mode='se', data_format=None)
106 |     inputs= network(inputs=inputs, is_training=training)
107 |     feat = tf.nn.l2_normalize(inputs, 1, 1e-10, name='feat')
108 |     inputs = tf.layers.dense(inputs=inputs, units=class_num)
109 |     # inputs = tf.layers.dense(inputs=feat, units=class_num)
110 |     inputs = tf.identity(inputs, 'final_dense')
111 | 
112 |     return inputs, feat
113 | 
114 | # image_size = 32, img_channels = 3, class_num = 10 in cifar10
115 | x = tf.placeholder(tf.float32, shape=[None, image_size, image_size, img_channels])
116 | label = tf.placeholder(tf.float32, shape=[None,])
117 | one_hot_labels = tf.one_hot(indices=tf.cast(label, tf.int32), depth=class_num)
118 | 
119 | training_flag = tf.placeholder(tf.bool)
120 | learning_rate = tf.placeholder(tf.float32, name='learning_rate')
121 | 
122 | logits, feat = resnet_model_fn(x, training=training_flag)
123 | 
124 | cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=one_hot_labels, logits=logits))
125 | Focal_loss = tf.reduce_mean(focal_loss(one_hot_labels, logits, alpha=0.5))
126 | l2_loss = weight_decay * tf.add_n([tf.nn.l2_loss(v) for v in tf.trainable_variables()])
127 | Center_loss, Centers = center_loss(feat, tf.cast(label, dtype=tf.int32), 0.95, class_num)
128 | Total_loss = cost + l2_loss
129 | 
130 | optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum=momentum, use_nesterov=True)
131 | # Batch norm requires update_ops to be added as a train_op dependency.
132 | update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
133 | with tf.control_dependencies(update_ops):
134 |     train_op = optimizer.minimize(Total_loss)
135 | 
136 | correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(one_hot_labels, 1))
137 | accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
138 | 
139 | # val_dir = '/data0/AIChallenger/ai_challenger_scene_validation_20170908/scene_validation_images_20170908/'
140 | # annotations = '/data0/AIChallenger/ai_challenger_scene_validation_20170908/scene_validation_annotations_20170908.json'
141 | # # a DataFlow you implement to produce [tensor1, tensor2, ..] lists from whatever sources:
142 | # df = MyDataFlow(val_dir, annotations, is_training=False, batch_size=batch_size, img_size=image_size)
143 | # # start 3 processes to run the dataflow in parallel
144 | # df = PrefetchDataZMQ(df, nr_proc=10)
145 | # df.reset_state()
146 | # scene_data_val = df.get_data()
147 | 
148 | train_dir = '/data0/AIChallenger/data_256'
149 | annotations = '/data0/AIChallenger/data_256.json'
150 | # a DataFlow you implement to produce [tensor1, tensor2, ..] lists from whatever sources:
151 | df = MyDataFlow(train_dir, annotations, is_training=True, batch_size=batch_size, img_size=image_size)
152 | # start 3 processes to run the dataflow in parallel
153 | df = PrefetchDataZMQ(df, nr_proc=10)
154 | df.reset_state()
155 | scene_data = df.get_data()
156 | 
157 | saver = tf.train.Saver(tf.global_variables())
158 | 
159 | with tf.Session() as sess:
160 |     ckpt = tf.train.get_checkpoint_state('./model_pretrain')
161 |     if ckpt and tf.train.checkpoint_exists(ckpt.model_checkpoint_path):
162 |         print("loading checkpoint...")
163 |         saver.restore(sess, ckpt.model_checkpoint_path)
164 |     else:
165 |         sess.run(tf.global_variables_initializer())
166 | 
167 |     summary_writer = tf.summary.FileWriter('./logs_pretrain', sess.graph)
168 |     
169 |     _x = x[:, :, :, ::-1]
170 |     tf.summary.image('x', _x, 4)
171 |     
172 |     summary_op = tf.summary.merge_all()
173 | 
174 |     epoch_learning_rate = init_learning_rate
175 |     for epoch in range(1, total_epochs + 1):
176 |         if epoch % 10 == 0 :
177 |             epoch_learning_rate = epoch_learning_rate / 10
178 | 
179 |         train_acc = 0.0
180 |         train_loss = 0.0
181 | 
182 |         for step in range(1, iteration + 1):
183 |             batch_data = next(scene_data)
184 |             batch_x = batch_data['data']
185 |             batch_y = batch_data['label']
186 | 
187 |             train_feed_dict = {
188 |                 x: batch_x,
189 |                 label: batch_y,
190 |                 learning_rate: epoch_learning_rate,
191 |                 training_flag: True
192 |             }
193 | 
194 |             _, batch_loss = sess.run([train_op, Total_loss], feed_dict=train_feed_dict)
195 |             batch_acc = accuracy.eval(feed_dict=train_feed_dict)
196 | 
197 |             print("epoch: %d/%d, iter: %d/%d, batch_loss: %.4f, batch_acc: %.4f \n" % (
198 |                 epoch, total_epochs, step, iteration, batch_loss, batch_acc))
199 | 
200 |             train_loss += batch_loss
201 |             train_acc += batch_acc
202 | 
203 |             if step % 30 == 0 :
204 |                 summary_str = sess.run(summary_op, feed_dict=train_feed_dict)
205 |                 summary_writer.add_summary(summary=summary_str, global_step=epoch)
206 |                 summary_writer.flush()
207 | 
208 | 
209 |         train_loss /= iteration # average loss
210 |         train_acc /= iteration # average accuracy
211 | 
212 |         train_summary = tf.Summary(value=[tf.Summary.Value(tag='train_loss', simple_value=train_loss),
213 |                                           tf.Summary.Value(tag='train_accuracy', simple_value=train_acc)])
214 | 
215 |         # test_acc, test_loss, test_summary = Evaluate(sess)
216 | 
217 |         summary_writer.add_summary(summary=train_summary, global_step=epoch)
218 |         # summary_writer.add_summary(summary=test_summary, global_step=epoch)
219 |         summary_writer.flush()
220 | 
221 |         # line = "epoch: %d/%d, train_loss: %.4f, train_acc: %.4f, test_loss: %.4f, test_acc: %.4f \n" % (
222 |         #     epoch, total_epochs, train_loss, train_acc, test_loss, test_acc)
223 |         line = "epoch: %d/%d, train_loss: %.4f, train_acc: %.4f \n" % (
224 |             epoch, total_epochs, train_loss, train_acc)        
225 |         print(line)
226 | 
227 |         with open('./logs_pretrain/logs.txt', 'a') as f:
228 |             f.write(line)
229 | 
230 |         saver.save(sess=sess, save_path='./model_pretrain/model.ckpt')
231 | 


--------------------------------------------------------------------------------
/train.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | import numpy as np
  3 | import os
  4 | from tensorpack import imgaug, dataset, ModelDesc, InputDesc
  5 | from tensorpack.dataflow import (PrefetchDataZMQ, BatchData)
  6 | from dataflow_input import MyDataFlow
  7 | import resnet_model
  8 | from IPython import embed
  9 | 
 10 | os.environ['CUDA_VISIBLE_DEVICES'] = '1'
 11 | 
 12 | init_learning_rate = 0.01
 13 | batch_size = 128
 14 | image_size = 224
 15 | img_channels = 3
 16 | class_num = 80
 17 | 
 18 | weight_decay = 1e-4
 19 | momentum = 0.9
 20 | 
 21 | total_epochs = 30
 22 | iteration = 1*421
 23 | # 128 * 421 ~ 53,879
 24 | test_iteration = 10
 25 | 
 26 | def optimistic_restore(session, save_file):
 27 |     reader = tf.train.NewCheckpointReader(save_file)
 28 |     saved_shapes = reader.get_variable_to_shape_map()
 29 |     var_names = sorted([(var.name, var.name.split(':')[0]) for var in tf.global_variables() if var.name.split(':')[0] in saved_shapes])
 30 |     restore_vars = []
 31 |     name2var = dict(zip(map(lambda x:x.name.split(':')[0], tf.global_variables()), tf.global_variables()))
 32 |     with tf.variable_scope('', reuse=True):
 33 |         for var_name, saved_var_name in var_names:
 34 |             curr_var = name2var[saved_var_name]
 35 |             var_shape = curr_var.get_shape().as_list()
 36 |             if var_shape == saved_shapes[saved_var_name]:
 37 |                 restore_vars.append(curr_var)
 38 |     saver = tf.train.Saver(restore_vars)
 39 |     saver.restore(session, save_file)
 40 | 
 41 | def center_loss(features, label, alfa, nrof_classes):
 42 |     """Center loss based on the paper "A Discriminative Feature Learning Approach for Deep Face Recognition"
 43 |        (http://ydwen.github.io/papers/WenECCV16.pdf)
 44 |     """
 45 |     nrof_features = features.get_shape()[1]
 46 |     centers = tf.get_variable('centers', [nrof_classes, nrof_features], dtype=tf.float32,
 47 |         initializer=tf.constant_initializer(0), trainable=False)
 48 |     label = tf.reshape(label, [-1])
 49 |     centers_batch = tf.gather(centers, label)
 50 |     diff = (1 - alfa) * (centers_batch - features)
 51 |     centers = tf.scatter_sub(centers, label, diff)
 52 |     # centers = tf.nn.l2_normalize(centers, 1, 1e-10, name='centers_norm')
 53 |     loss = tf.reduce_mean(tf.square(features - centers_batch))
 54 |     return loss, centers
 55 | 
 56 | def focal_loss(onehot_labels, cls_preds,
 57 |                 alpha=0.25, gamma=2.0, name=None, scope=None):
 58 |     """Compute softmax focal loss between logits and onehot labels
 59 |     logits and onehot_labels must have same shape [batchsize, num_classes] and
 60 |     the same data type (float16, 32, 64)
 61 |     Args:
 62 |       onehot_labels: Each row labels[i] must be a valid probability distribution
 63 |       cls_preds: Unscaled log probabilities
 64 |       alpha: The hyperparameter for adjusting biased samples, default is 0.25
 65 |       gamma: The hyperparameter for penalizing the easy labeled samples
 66 |       name: A name for the operation (optional)
 67 |     Returns:
 68 |       A 1-D tensor of length batch_size of same type as logits with softmax focal loss
 69 |     """
 70 |     with tf.name_scope(scope, 'focal_loss', [cls_preds, onehot_labels]) as sc:
 71 |         logits = tf.convert_to_tensor(cls_preds)
 72 |         onehot_labels = tf.convert_to_tensor(onehot_labels)
 73 | 
 74 |         precise_logits = tf.cast(logits, tf.float32) if (
 75 |                         logits.dtype == tf.float16) else logits
 76 |         onehot_labels = tf.cast(onehot_labels, precise_logits.dtype)
 77 |         predictions = tf.nn.sigmoid(logits)
 78 |         predictions_pt = tf.where(tf.equal(onehot_labels, 1), predictions, 1.-predictions)
 79 |         # add small value to avoid 0
 80 |         epsilon = 1e-8
 81 |         alpha_t = tf.scalar_mul(alpha, tf.ones_like(onehot_labels, dtype=tf.float32))
 82 |         alpha_t = tf.where(tf.equal(onehot_labels, 1.0), alpha_t, 1-alpha_t)
 83 |         losses = tf.reduce_sum(-alpha_t * tf.pow(1. - predictions_pt, gamma) * tf.log(predictions_pt+epsilon),
 84 |                                      name=name, axis=1)
 85 |         return losses
 86 | 
 87 | def Evaluate(sess):
 88 |     test_acc = 0.0
 89 |     test_loss = 0.0
 90 | 
 91 |     for it in range(test_iteration):
 92 |         batch_data = next(scene_data_val)
 93 |         test_batch_x = batch_data['data']
 94 |         test_batch_y = batch_data['label']
 95 | 
 96 |         test_feed_dict = {
 97 |             x: test_batch_x,
 98 |             label: test_batch_y,
 99 |             learning_rate: epoch_learning_rate,
100 |             training_flag: False
101 |         }
102 | 
103 |         loss_, acc_ = sess.run([Total_loss, accuracy], feed_dict=test_feed_dict)
104 | 
105 |         test_loss += loss_
106 |         test_acc += acc_
107 | 
108 |     test_loss /= test_iteration # average loss
109 |     test_acc /= test_iteration # average accuracy
110 | 
111 |     summary = tf.Summary(value=[tf.Summary.Value(tag='test_loss', simple_value=test_loss),
112 |                                 tf.Summary.Value(tag='test_accuracy', simple_value=test_acc)])
113 | 
114 |     return test_acc, test_loss, summary
115 | 
116 | def resnet_model_fn(inputs, training):
117 |     """Our model_fn for ResNet to be used with our Estimator."""
118 | 
119 |     network = resnet_model.imagenet_resnet_v2(
120 |         resnet_size=18, num_classes=class_num, mode='se', data_format=None)
121 |     inputs= network(inputs=inputs, is_training=training)
122 |     feat = tf.nn.l2_normalize(inputs, 1, 1e-10, name='feat')
123 |     inputs = tf.layers.dense(inputs=inputs, units=class_num)
124 |     # inputs = tf.layers.dense(inputs=feat, units=class_num)
125 |     inputs = tf.identity(inputs, 'final_dense')
126 | 
127 |     return inputs, feat
128 | 
129 | # image_size = 32, img_channels = 3, class_num = 10 in cifar10
130 | x = tf.placeholder(tf.float32, shape=[None, image_size, image_size, img_channels])
131 | label = tf.placeholder(tf.float32, shape=[None,])
132 | one_hot_labels = tf.one_hot(indices=tf.cast(label, tf.int32), depth=class_num)
133 | 
134 | training_flag = tf.placeholder(tf.bool)
135 | learning_rate = tf.placeholder(tf.float32, name='learning_rate')
136 | 
137 | logits, feat = resnet_model_fn(x, training=training_flag)
138 | 
139 | cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=one_hot_labels, logits=logits))
140 | Focal_loss = tf.reduce_mean(focal_loss(one_hot_labels, logits, alpha=0.5))
141 | l2_loss = weight_decay * tf.add_n([tf.nn.l2_loss(v) for v in tf.trainable_variables()])
142 | Center_loss, Centers = center_loss(feat, tf.cast(label, dtype=tf.int32), 0.95, class_num)
143 | Total_loss = cost + l2_loss + Center_loss
144 | 
145 | optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum=momentum, use_nesterov=True)
146 | # Batch norm requires update_ops to be added as a train_op dependency.
147 | update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
148 | with tf.control_dependencies(update_ops):
149 |     train_op = optimizer.minimize(Total_loss)
150 | 
151 | correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(one_hot_labels, 1))
152 | accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
153 | 
154 | val_dir = '/data0/AIChallenger/ai_challenger_scene_validation_20170908/scene_validation_images_20170908/'
155 | annotations = '/data0/AIChallenger/ai_challenger_scene_validation_20170908/scene_validation_annotations_20170908.json'
156 | # a DataFlow you implement to produce [tensor1, tensor2, ..] lists from whatever sources:
157 | df = MyDataFlow(val_dir, annotations, is_training=False, batch_size=batch_size, img_size=image_size)
158 | # start 3 processes to run the dataflow in parallel
159 | df = PrefetchDataZMQ(df, nr_proc=1)
160 | df.reset_state()
161 | scene_data_val = df.get_data()
162 | 
163 | train_dir = '/data0/AIChallenger/ai_challenger_scene_train_20170904/scene_train_images_20170904/' 
164 | annotations = '/data0/AIChallenger/ai_challenger_scene_train_20170904/scene_train_annotations_20170904.json'
165 | # a DataFlow you implement to produce [tensor1, tensor2, ..] lists from whatever sources:
166 | df = MyDataFlow(train_dir, annotations, is_training=True, batch_size=batch_size, img_size=image_size)
167 | # start 3 processes to run the dataflow in parallel
168 | df = PrefetchDataZMQ(df, nr_proc=10)
169 | df.reset_state()
170 | scene_data = df.get_data()
171 | 
172 | saver = tf.train.Saver(tf.global_variables())
173 | 
174 | with tf.Session() as sess:
175 |     ckpt = tf.train.get_checkpoint_state('./model')
176 |     if ckpt and tf.train.checkpoint_exists(ckpt.model_checkpoint_path):
177 |         print("loading checkpoint...")
178 |         sess.run(tf.global_variables_initializer())
179 |         optimistic_restore(sess, ckpt.model_checkpoint_path)
180 |         # saver.restore(sess, ckpt.model_checkpoint_path)
181 |     else:
182 |         sess.run(tf.global_variables_initializer())
183 | 
184 |     summary_writer = tf.summary.FileWriter('./logs', sess.graph)
185 |     
186 |     _x = x[:, :, :, ::-1]
187 |     tf.summary.image('x', _x, 4)
188 |     
189 |     summary_op = tf.summary.merge_all()
190 | 
191 |     epoch_learning_rate = init_learning_rate
192 |     for epoch in range(1, total_epochs + 1):
193 |         if epoch % 20 == 0 :
194 |             epoch_learning_rate = epoch_learning_rate / 10
195 | 
196 |         train_acc = 0.0
197 |         train_loss = 0.0
198 | 
199 |         for step in range(1, iteration + 1):
200 |             batch_data = next(scene_data)
201 |             batch_x = batch_data['data']
202 |             batch_y = batch_data['label']
203 | 
204 |             train_feed_dict = {
205 |                 x: batch_x,
206 |                 label: batch_y,
207 |                 learning_rate: epoch_learning_rate,
208 |                 training_flag: True
209 |             }
210 | 
211 |             _, batch_loss, centers_class = sess.run([train_op, Total_loss, Centers], feed_dict=train_feed_dict)
212 |             batch_acc = accuracy.eval(feed_dict=train_feed_dict)
213 | 
214 |             print("epoch: %d/%d, iter: %d/%d, batch_loss: %.4f, batch_acc: %.4f \n" % (
215 |                 epoch, total_epochs, step, iteration, batch_loss, batch_acc))
216 | 
217 |             train_loss += batch_loss
218 |             train_acc += batch_acc
219 | 
220 |             if step % 30 == 0 :
221 |                 summary_str = sess.run(summary_op, feed_dict=train_feed_dict)
222 |                 summary_writer.add_summary(summary=summary_str, global_step=epoch)
223 |                 summary_writer.flush()
224 | 
225 | 
226 |         train_loss /= iteration # average loss
227 |         train_acc /= iteration # average accuracy
228 | 
229 |         train_summary = tf.Summary(value=[tf.Summary.Value(tag='train_loss', simple_value=train_loss),
230 |                                           tf.Summary.Value(tag='train_accuracy', simple_value=train_acc)])
231 | 
232 |         test_acc, test_loss, test_summary = Evaluate(sess)
233 | 
234 |         summary_writer.add_summary(summary=train_summary, global_step=epoch)
235 |         summary_writer.add_summary(summary=test_summary, global_step=epoch)
236 |         summary_writer.flush()
237 | 
238 |         line = "epoch: %d/%d, train_loss: %.4f, train_acc: %.4f, test_loss: %.4f, test_acc: %.4f \n" % (
239 |             epoch, total_epochs, train_loss, train_acc, test_loss, test_acc)
240 |         print(line)
241 | 
242 |         with open('./logs/logs.txt', 'a') as f:
243 |             f.write(line)
244 | 
245 |         saver.save(sess=sess, save_path='./model/model.ckpt')
246 |         np.save("centers.npy", centers_class)
247 | 


--------------------------------------------------------------------------------
/SE_Inception_v4.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | from tflearn.layers.conv import global_avg_pool
  3 | from tensorflow.contrib.layers import batch_norm, flatten
  4 | from tensorflow.contrib.framework import arg_scope
  5 | import numpy as np
  6 | import scene_input
  7 | import os
  8 | 
  9 | os.environ['CUDA_VISIBLE_DEVICES']= '2'
 10 | 
 11 | weight_decay = 0.0005
 12 | momentum = 0.9
 13 | 
 14 | init_learning_rate = 0.1
 15 | reduction_ratio = 4
 16 | 
 17 | batch_size = 32
 18 | image_size = 96
 19 | img_channels = 3
 20 | class_num = 80
 21 | 
 22 | iteration = 391
 23 | # 128 * 391 ~ 50,000
 24 | 
 25 | test_iteration = 10
 26 | 
 27 | total_epochs = 100
 28 | 
 29 | def conv_layer(input, filter, kernel, stride=1, padding='SAME', layer_name="conv"):
 30 |     with tf.name_scope(layer_name):
 31 |         network = tf.layers.conv2d(inputs=input, use_bias=True, filters=filter, kernel_size=kernel, strides=stride, padding=padding)
 32 |         network = Relu(network)
 33 |         return network
 34 | 
 35 | def Fully_connected(x, units=class_num, layer_name='fully_connected') :
 36 |     with tf.name_scope(layer_name) :
 37 |         return tf.layers.dense(inputs=x, use_bias=True, units=units)
 38 | 
 39 | def Relu(x):
 40 |     return tf.nn.relu(x)
 41 | 
 42 | def Sigmoid(x):
 43 |     return tf.nn.sigmoid(x)
 44 | 
 45 | def Global_Average_Pooling(x):
 46 |     return global_avg_pool(x, name='Global_avg_pooling')
 47 | 
 48 | def Max_pooling(x, pool_size=[3,3], stride=2, padding='VALID') :
 49 |     return tf.layers.max_pooling2d(inputs=x, pool_size=pool_size, strides=stride, padding=padding)
 50 | 
 51 | def Avg_pooling(x, pool_size=[3,3], stride=1, padding='SAME') :
 52 |     return tf.layers.average_pooling2d(inputs=x, pool_size=pool_size, strides=stride, padding=padding)
 53 | 
 54 | def Batch_Normalization(x, training, scope):
 55 |     with arg_scope([batch_norm],
 56 |                    scope=scope,
 57 |                    updates_collections=None,
 58 |                    decay=0.9,
 59 |                    center=True,
 60 |                    scale=True,
 61 |                    zero_debias_moving_mean=True) :
 62 |         return tf.cond(training,
 63 |                        lambda : batch_norm(inputs=x, is_training=training, reuse=None),
 64 |                        lambda : batch_norm(inputs=x, is_training=training, reuse=True))
 65 | 
 66 | def Concatenation(layers) :
 67 |     return tf.concat(layers, axis=3)
 68 | 
 69 | def Dropout(x, rate, training) :
 70 |     return tf.layers.dropout(inputs=x, rate=rate, training=training)
 71 | 
 72 | def Evaluate(sess):
 73 |     test_acc = 0.0
 74 |     test_loss = 0.0
 75 | 
 76 |     for it in range(test_iteration):
 77 |         test_batch_x, test_batch_y = scene_data_val.next_batch(batch_size, image_size)
 78 | 
 79 |         test_feed_dict = {
 80 |             x: test_batch_x,
 81 |             label: test_batch_y,
 82 |             learning_rate: epoch_learning_rate,
 83 |             training_flag: False
 84 |         }
 85 | 
 86 |         loss_, acc_ = sess.run([cost, accuracy], feed_dict=test_feed_dict)
 87 | 
 88 |         test_loss += loss_
 89 |         test_acc += acc_
 90 | 
 91 |     test_loss /= test_iteration # average loss
 92 |     test_acc /= test_iteration # average accuracy
 93 | 
 94 |     summary = tf.Summary(value=[tf.Summary.Value(tag='test_loss', simple_value=test_loss),
 95 |                                 tf.Summary.Value(tag='test_accuracy', simple_value=test_acc)])
 96 | 
 97 |     return test_acc, test_loss, summary
 98 | 
 99 | class SE_Inception_v4():
100 |     def __init__(self, x, training):
101 |         self.training = training
102 |         self.model = self.Build_SEnet(x)
103 | 
104 |     def Stem(self, x, scope):
105 |         with tf.name_scope(scope) :
106 |             x = conv_layer(x, filter=32, kernel=[3,3], stride=2, padding='VALID', layer_name=scope+'_conv1')
107 |             x = conv_layer(x, filter=32, kernel=[3,3], padding='VALID', layer_name=scope+'_conv2')
108 |             block_1 = conv_layer(x, filter=64, kernel=[3,3], layer_name=scope+'_conv3')
109 | 
110 |             split_max_x = Max_pooling(block_1)
111 |             split_conv_x = conv_layer(block_1, filter=96, kernel=[3,3], stride=2, padding='VALID', layer_name=scope+'_split_conv1')
112 |             x = Concatenation([split_max_x,split_conv_x])
113 | 
114 |             split_conv_x1 = conv_layer(x, filter=64, kernel=[1,1], layer_name=scope+'_split_conv2')
115 |             split_conv_x1 = conv_layer(split_conv_x1, filter=96, kernel=[3,3], padding='VALID', layer_name=scope+'_split_conv3')
116 | 
117 |             split_conv_x2 = conv_layer(x, filter=64, kernel=[1,1], layer_name=scope+'_split_conv4')
118 |             split_conv_x2 = conv_layer(split_conv_x2, filter=64, kernel=[7,1], layer_name=scope+'_split_conv5')
119 |             split_conv_x2 = conv_layer(split_conv_x2, filter=64, kernel=[1,7], layer_name=scope+'_split_conv6')
120 |             split_conv_x2 = conv_layer(split_conv_x2, filter=96, kernel=[3,3], padding='VALID', layer_name=scope+'_split_conv7')
121 | 
122 |             x = Concatenation([split_conv_x1,split_conv_x2])
123 | 
124 |             split_conv_x = conv_layer(x, filter=192, kernel=[3,3], stride=2, padding='VALID', layer_name=scope+'_split_conv8')
125 |             split_max_x = Max_pooling(x)
126 | 
127 |             x = Concatenation([split_conv_x, split_max_x])
128 | 
129 |             x = Batch_Normalization(x, training=self.training, scope=scope+'_batch1')
130 |             x = Relu(x)
131 | 
132 |             return x
133 | 
134 |     def Inception_A(self, x, scope):
135 |         with tf.name_scope(scope) :
136 |             split_conv_x1 = Avg_pooling(x)
137 |             split_conv_x1 = conv_layer(split_conv_x1, filter=96, kernel=[1,1], layer_name=scope+'_split_conv1')
138 | 
139 |             split_conv_x2 = conv_layer(x, filter=96, kernel=[1,1], layer_name=scope+'_split_conv2')
140 | 
141 |             split_conv_x3 = conv_layer(x, filter=64, kernel=[1,1], layer_name=scope+'_split_conv3')
142 |             split_conv_x3 = conv_layer(split_conv_x3, filter=96, kernel=[3,3], layer_name=scope+'_split_conv4')
143 | 
144 |             split_conv_x4 = conv_layer(x, filter=64, kernel=[1,1], layer_name=scope+'_split_conv5')
145 |             split_conv_x4 = conv_layer(split_conv_x4, filter=96, kernel=[3,3], layer_name=scope+'_split_conv6')
146 |             split_conv_x4 = conv_layer(split_conv_x4, filter=96, kernel=[3,3], layer_name=scope+'_split_conv7')
147 | 
148 |             x = Concatenation([split_conv_x1, split_conv_x2, split_conv_x3, split_conv_x4])
149 | 
150 |             x = Batch_Normalization(x, training=self.training, scope=scope+'_batch1')
151 |             x = Relu(x)
152 | 
153 |             return x
154 | 
155 |     def Inception_B(self, x, scope):
156 |         with tf.name_scope(scope) :
157 |             init = x
158 | 
159 |             split_conv_x1 = Avg_pooling(x)
160 |             split_conv_x1 = conv_layer(split_conv_x1, filter=128, kernel=[1,1], layer_name=scope+'_split_conv1')
161 | 
162 |             split_conv_x2 = conv_layer(x, filter=384, kernel=[1,1], layer_name=scope+'_split_conv2')
163 | 
164 |             split_conv_x3 = conv_layer(x, filter=192, kernel=[1,1], layer_name=scope+'_split_conv3')
165 |             split_conv_x3 = conv_layer(split_conv_x3, filter=224, kernel=[1,7], layer_name=scope+'_split_conv4')
166 |             split_conv_x3 = conv_layer(split_conv_x3, filter=256, kernel=[1,7], layer_name=scope+'_split_conv5')
167 | 
168 |             split_conv_x4 = conv_layer(x, filter=192, kernel=[1,1], layer_name=scope+'_split_conv6')
169 |             split_conv_x4 = conv_layer(split_conv_x4, filter=192, kernel=[1,7], layer_name=scope+'_split_conv7')
170 |             split_conv_x4 = conv_layer(split_conv_x4, filter=224, kernel=[7,1], layer_name=scope+'_split_conv8')
171 |             split_conv_x4 = conv_layer(split_conv_x4, filter=224, kernel=[1,7], layer_name=scope+'_split_conv9')
172 |             split_conv_x4 = conv_layer(split_conv_x4, filter=256, kernel=[7,1], layer_name=scope+'_split_connv10')
173 | 
174 |             x = Concatenation([split_conv_x1, split_conv_x2, split_conv_x3, split_conv_x4])
175 | 
176 |             x = Batch_Normalization(x, training=self.training, scope=scope+'_batch1')
177 |             x = Relu(x)
178 | 
179 |             return x
180 | 
181 |     def Inception_C(self, x, scope):
182 |         with tf.name_scope(scope) :
183 |             split_conv_x1 = Avg_pooling(x)
184 |             split_conv_x1 = conv_layer(split_conv_x1, filter=256, kernel=[1,1], layer_name=scope+'_split_conv1')
185 | 
186 |             split_conv_x2 = conv_layer(x, filter=256, kernel=[1,1], layer_name=scope+'_split_conv2')
187 | 
188 |             split_conv_x3 = conv_layer(x, filter=384, kernel=[1,1], layer_name=scope+'_split_conv3')
189 |             split_conv_x3_1 = conv_layer(split_conv_x3, filter=256, kernel=[1,3], layer_name=scope+'_split_conv4')
190 |             split_conv_x3_2 = conv_layer(split_conv_x3, filter=256, kernel=[3,1], layer_name=scope+'_split_conv5')
191 | 
192 |             split_conv_x4 = conv_layer(x, filter=384, kernel=[1,1], layer_name=scope+'_split_conv6')
193 |             split_conv_x4 = conv_layer(split_conv_x4, filter=448, kernel=[1,3], layer_name=scope+'_split_conv7')
194 |             split_conv_x4 = conv_layer(split_conv_x4, filter=512, kernel=[3,1], layer_name=scope+'_split_conv8')
195 |             split_conv_x4_1 = conv_layer(split_conv_x4, filter=256, kernel=[3,1], layer_name=scope+'_split_conv9')
196 |             split_conv_x4_2 = conv_layer(split_conv_x4, filter=256, kernel=[1,3], layer_name=scope+'_split_conv10')
197 | 
198 |             x = Concatenation([split_conv_x1, split_conv_x2, split_conv_x3_1, split_conv_x3_2, split_conv_x4_1, split_conv_x4_2])
199 | 
200 |             x = Batch_Normalization(x, training=self.training, scope=scope+'_batch1')
201 |             x = Relu(x)
202 | 
203 |             return x
204 | 
205 |     def Reduction_A(self, x, scope):
206 |         with tf.name_scope(scope) :
207 |             k = 256
208 |             l = 256
209 |             m = 384
210 |             n = 384
211 | 
212 |             split_max_x = Max_pooling(x)
213 | 
214 |             split_conv_x1 = conv_layer(x, filter=n, kernel=[3,3], stride=2, padding='VALID', layer_name=scope+'_split_conv1')
215 | 
216 |             split_conv_x2 = conv_layer(x, filter=k, kernel=[1,1], layer_name=scope+'_split_conv2')
217 |             split_conv_x2 = conv_layer(split_conv_x2, filter=l, kernel=[3,3], layer_name=scope+'_split_conv3')
218 |             split_conv_x2 = conv_layer(split_conv_x2, filter=m, kernel=[3,3], stride=2, padding='VALID', layer_name=scope+'_split_conv4')
219 | 
220 |             x = Concatenation([split_max_x, split_conv_x1, split_conv_x2])
221 | 
222 |             x = Batch_Normalization(x, training=self.training, scope=scope+'_batch1')
223 |             x = Relu(x)
224 | 
225 |             return x
226 | 
227 |     def Reduction_B(self, x, scope):
228 |         with tf.name_scope(scope) :
229 |             split_max_x = Max_pooling(x)
230 | 
231 |             split_conv_x1 = conv_layer(x, filter=256, kernel=[1,1], layer_name=scope+'_split_conv1')
232 |             split_conv_x1 = conv_layer(split_conv_x1, filter=384, kernel=[3,3], stride=2, padding='VALID', layer_name=scope+'_split_conv2')
233 | 
234 |             split_conv_x2 = conv_layer(x, filter=256, kernel=[1,1], layer_name=scope+'_split_conv3')
235 |             split_conv_x2 = conv_layer(split_conv_x2, filter=288, kernel=[3,3], stride=2, padding='VALID', layer_name=scope+'_split_conv4')
236 | 
237 |             split_conv_x3 = conv_layer(x, filter=256, kernel=[1,1], layer_name=scope+'_split_conv5')
238 |             split_conv_x3 = conv_layer(split_conv_x3, filter=288, kernel=[3,3], layer_name=scope+'_split_conv6')
239 |             split_conv_x3 = conv_layer(split_conv_x3, filter=320, kernel=[3,3], stride=2, padding='VALID', layer_name=scope+'_split_conv7')
240 | 
241 |             x = Concatenation([split_max_x, split_conv_x1, split_conv_x2, split_conv_x3])
242 | 
243 |             x = Batch_Normalization(x, training=self.training, scope=scope+'_batch1')
244 |             x = Relu(x)
245 | 
246 |             return x
247 | 
248 |     def Squeeze_excitation_layer(self, input_x, out_dim, ratio, layer_name):
249 |         with tf.name_scope(layer_name) :
250 |             squeeze = Global_Average_Pooling(input_x)
251 | 
252 |             excitation = Fully_connected(squeeze, units=out_dim / ratio, layer_name=layer_name+'_fully_connected1')
253 |             excitation = Relu(excitation)
254 |             excitation = Fully_connected(excitation, units=out_dim, layer_name=layer_name+'_fully_connected2')
255 |             excitation = Sigmoid(excitation)
256 | 
257 |             excitation = tf.reshape(excitation, [-1,1,1,out_dim])
258 | 
259 |             scale = input_x * excitation
260 | 
261 |             return scale
262 | 
263 |     def Build_SEnet(self, input_x):
264 |         # input_x = tf.pad(input_x, [[0, 0], [32, 32], [32, 32], [0, 0]])
265 |         # size 32 -> 96
266 |         # only cifar10 architecture
267 | 
268 |         x = self.Stem(input_x, scope='stem')
269 | 
270 |         for i in range(4) :
271 |             x = self.Inception_A(x, scope='Inception_A'+str(i))
272 |             channel = int(np.shape(x)[-1])
273 |             x = self.Squeeze_excitation_layer(x, out_dim=channel, ratio=reduction_ratio, layer_name='SE_A'+str(i))
274 | 
275 |         x = self.Reduction_A(x, scope='Reduction_A')
276 | 
277 |         for i in range(7)  :
278 |             x = self.Inception_B(x, scope='Inception_B'+str(i))
279 |             channel = int(np.shape(x)[-1])
280 |             x = self.Squeeze_excitation_layer(x, out_dim=channel, ratio=reduction_ratio, layer_name='SE_B'+str(i))
281 | 
282 |         x = self.Reduction_B(x, scope='Reduction_B')
283 | 
284 |         for i in range(3) :
285 |             x = self.Inception_C(x, scope='Inception_C'+str(i))
286 |             channel = int(np.shape(x)[-1])
287 |             x = self.Squeeze_excitation_layer(x, out_dim=channel, ratio=reduction_ratio, layer_name='SE_C'+str(i))
288 | 
289 |         x = Global_Average_Pooling(x)
290 |         x = Dropout(x, rate=0.2, training=self.training)
291 |         x = flatten(x)
292 | 
293 |         x = Fully_connected(x, layer_name='final_fully_connected')
294 |         return x
295 | 


--------------------------------------------------------------------------------
/SE_Inception_resnet_v2.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | from tflearn.layers.conv import global_avg_pool
  3 | from tensorflow.contrib.layers import batch_norm, flatten
  4 | from tensorflow.contrib.framework import arg_scope
  5 | from cifar10 import *
  6 | import numpy as np
  7 | 
  8 | weight_decay = 0.0005
  9 | momentum = 0.9
 10 | 
 11 | init_learning_rate = 0.1
 12 | 
 13 | reduction_ratio = 4
 14 | 
 15 | batch_size = 128
 16 | iteration = 391
 17 | # 128 * 391 ~ 50,000
 18 | 
 19 | test_iteration = 10
 20 | 
 21 | total_epochs = 100
 22 | 
 23 | def conv_layer(input, filter, kernel, stride=1, padding='SAME', layer_name="conv", activation=True):
 24 |     with tf.name_scope(layer_name):
 25 |         network = tf.layers.conv2d(inputs=input, use_bias=True, filters=filter, kernel_size=kernel, strides=stride, padding=padding)
 26 |         if activation :
 27 |             network = Relu(network)
 28 |         return network
 29 | 
 30 | def Fully_connected(x, units=class_num, layer_name='fully_connected') :
 31 |     with tf.name_scope(layer_name) :
 32 |         return tf.layers.dense(inputs=x, use_bias=True, units=units)
 33 | 
 34 | def Relu(x):
 35 |     return tf.nn.relu(x)
 36 | 
 37 | def Sigmoid(x):
 38 |     return tf.nn.sigmoid(x)
 39 | 
 40 | def Global_Average_Pooling(x):
 41 |     return global_avg_pool(x, name='Global_avg_pooling')
 42 | 
 43 | def Max_pooling(x, pool_size=[3,3], stride=2, padding='VALID') :
 44 |     return tf.layers.max_pooling2d(inputs=x, pool_size=pool_size, strides=stride, padding=padding)
 45 | 
 46 | def Batch_Normalization(x, training, scope):
 47 |     with arg_scope([batch_norm],
 48 |                    scope=scope,
 49 |                    updates_collections=None,
 50 |                    decay=0.9,
 51 |                    center=True,
 52 |                    scale=True,
 53 |                    zero_debias_moving_mean=True) :
 54 |         return tf.cond(training,
 55 |                        lambda : batch_norm(inputs=x, is_training=training, reuse=None),
 56 |                        lambda : batch_norm(inputs=x, is_training=training, reuse=True))
 57 | 
 58 | def Concatenation(layers) :
 59 |     return tf.concat(layers, axis=3)
 60 | 
 61 | def Dropout(x, rate, training) :
 62 |     return tf.layers.dropout(inputs=x, rate=rate, training=training)
 63 | 
 64 | def Evaluate(sess):
 65 |     test_acc = 0.0
 66 |     test_loss = 0.0
 67 |     test_pre_index = 0
 68 |     add = 1000
 69 | 
 70 |     for it in range(test_iteration):
 71 |         test_batch_x = test_x[test_pre_index: test_pre_index + add]
 72 |         test_batch_y = test_y[test_pre_index: test_pre_index + add]
 73 |         test_pre_index = test_pre_index + add
 74 | 
 75 |         test_feed_dict = {
 76 |             x: test_batch_x,
 77 |             label: test_batch_y,
 78 |             learning_rate: epoch_learning_rate,
 79 |             training_flag: False
 80 |         }
 81 | 
 82 |         loss_, acc_ = sess.run([cost, accuracy], feed_dict=test_feed_dict)
 83 | 
 84 |         test_loss += loss_
 85 |         test_acc += acc_
 86 | 
 87 |     test_loss /= test_iteration # average loss
 88 |     test_acc /= test_iteration # average accuracy
 89 | 
 90 |     summary = tf.Summary(value=[tf.Summary.Value(tag='test_loss', simple_value=test_loss),
 91 |                                 tf.Summary.Value(tag='test_accuracy', simple_value=test_acc)])
 92 | 
 93 |     return test_acc, test_loss, summary
 94 | 
 95 | class SE_Inception_resnet_v2():
 96 |     def __init__(self, x, training):
 97 |         self.training = training
 98 |         self.model = self.Build_SEnet(x)
 99 | 
100 |     def Stem(self, x, scope):
101 |         with tf.name_scope(scope) :
102 |             x = conv_layer(x, filter=32, kernel=[3,3], stride=2, padding='VALID', layer_name=scope+'_conv1')
103 |             x = conv_layer(x, filter=32, kernel=[3,3], padding='VALID', layer_name=scope+'_conv2')
104 |             block_1 = conv_layer(x, filter=64, kernel=[3,3], layer_name=scope+'_conv3')
105 | 
106 |             split_max_x = Max_pooling(block_1)
107 |             split_conv_x = conv_layer(block_1, filter=96, kernel=[3,3], stride=2, padding='VALID', layer_name=scope+'_split_conv1')
108 |             x = Concatenation([split_max_x,split_conv_x])
109 | 
110 |             split_conv_x1 = conv_layer(x, filter=64, kernel=[1,1], layer_name=scope+'_split_conv2')
111 |             split_conv_x1 = conv_layer(split_conv_x1, filter=96, kernel=[3,3], padding='VALID', layer_name=scope+'_split_conv3')
112 | 
113 |             split_conv_x2 = conv_layer(x, filter=64, kernel=[1,1], layer_name=scope+'_split_conv4')
114 |             split_conv_x2 = conv_layer(split_conv_x2, filter=64, kernel=[7,1], layer_name=scope+'_split_conv5')
115 |             split_conv_x2 = conv_layer(split_conv_x2, filter=64, kernel=[1,7], layer_name=scope+'_split_conv6')
116 |             split_conv_x2 = conv_layer(split_conv_x2, filter=96, kernel=[3,3], padding='VALID', layer_name=scope+'_split_conv7')
117 | 
118 |             x = Concatenation([split_conv_x1,split_conv_x2])
119 | 
120 |             split_conv_x = conv_layer(x, filter=192, kernel=[3,3], stride=2, padding='VALID', layer_name=scope+'_split_conv8')
121 |             split_max_x = Max_pooling(x)
122 | 
123 |             x = Concatenation([split_conv_x, split_max_x])
124 | 
125 |             x = Batch_Normalization(x, training=self.training, scope=scope+'_batch1')
126 |             x = Relu(x)
127 | 
128 |             return x
129 | 
130 |     def Inception_resnet_A(self, x, scope):
131 |         with tf.name_scope(scope) :
132 |             init = x
133 | 
134 |             split_conv_x1 = conv_layer(x, filter=32, kernel=[1,1], layer_name=scope+'_split_conv1')
135 | 
136 |             split_conv_x2 = conv_layer(x, filter=32, kernel=[1,1], layer_name=scope+'_split_conv2')
137 |             split_conv_x2 = conv_layer(split_conv_x2, filter=32, kernel=[3,3], layer_name=scope+'_split_conv3')
138 | 
139 |             split_conv_x3 = conv_layer(x, filter=32, kernel=[1,1], layer_name=scope+'_split_conv4')
140 |             split_conv_x3 = conv_layer(split_conv_x3, filter=48, kernel=[3,3], layer_name=scope+'_split_conv5')
141 |             split_conv_x3 = conv_layer(split_conv_x3, filter=64, kernel=[3,3], layer_name=scope+'_split_conv6')
142 | 
143 |             x = Concatenation([split_conv_x1,split_conv_x2,split_conv_x3])
144 |             x = conv_layer(x, filter=384, kernel=[1,1], layer_name=scope+'_final_conv1', activation=False)
145 | 
146 |             x = x*0.1
147 |             x = init + x
148 | 
149 |             x = Batch_Normalization(x, training=self.training, scope=scope+'_batch1')
150 |             x = Relu(x)
151 | 
152 |             return x
153 | 
154 |     def Inception_resnet_B(self, x, scope):
155 |         with tf.name_scope(scope) :
156 |             init = x
157 | 
158 |             split_conv_x1 = conv_layer(x, filter=192, kernel=[1,1], layer_name=scope+'_split_conv1')
159 | 
160 |             split_conv_x2 = conv_layer(x, filter=128, kernel=[1,1], layer_name=scope+'_split_conv2')
161 |             split_conv_x2 = conv_layer(split_conv_x2, filter=160, kernel=[1,7], layer_name=scope+'_split_conv3')
162 |             split_conv_x2 = conv_layer(split_conv_x2, filter=192, kernel=[7,1], layer_name=scope+'_split_conv4')
163 | 
164 |             x = Concatenation([split_conv_x1, split_conv_x2])
165 |             x = conv_layer(x, filter=1152, kernel=[1,1], layer_name=scope+'_final_conv1', activation=False)
166 |             # 1154
167 |             x = x * 0.1
168 |             x = init + x
169 | 
170 |             x = Batch_Normalization(x, training=self.training, scope=scope+'_batch1')
171 |             x = Relu(x)
172 | 
173 |             return x
174 | 
175 |     def Inception_resnet_C(self, x, scope):
176 |         with tf.name_scope(scope) :
177 |             init = x
178 | 
179 |             split_conv_x1 = conv_layer(x, filter=192, kernel=[1,1], layer_name=scope+'_split_conv1')
180 | 
181 |             split_conv_x2 = conv_layer(x, filter=192, kernel=[1, 1], layer_name=scope + '_split_conv2')
182 |             split_conv_x2 = conv_layer(split_conv_x2, filter=224, kernel=[1, 3], layer_name=scope + '_split_conv3')
183 |             split_conv_x2 = conv_layer(split_conv_x2, filter=256, kernel=[3, 1], layer_name=scope + '_split_conv4')
184 | 
185 |             x = Concatenation([split_conv_x1,split_conv_x2])
186 |             x = conv_layer(x, filter=2144, kernel=[1,1], layer_name=scope+'_final_conv2', activation=False)
187 |             # 2048
188 |             x = x * 0.1
189 |             x = init + x
190 | 
191 |             x = Batch_Normalization(x, training=self.training, scope=scope+'_batch1')
192 |             x = Relu(x)
193 | 
194 |             return x
195 | 
196 |     def Reduction_A(self, x, scope):
197 |         with tf.name_scope(scope) :
198 |             k = 256
199 |             l = 256
200 |             m = 384
201 |             n = 384
202 | 
203 |             split_max_x = Max_pooling(x)
204 | 
205 |             split_conv_x1 = conv_layer(x, filter=n, kernel=[3,3], stride=2, padding='VALID', layer_name=scope+'_split_conv1')
206 | 
207 |             split_conv_x2 = conv_layer(x, filter=k, kernel=[1,1], layer_name=scope+'_split_conv2')
208 |             split_conv_x2 = conv_layer(split_conv_x2, filter=l, kernel=[3,3], layer_name=scope+'_split_conv3')
209 |             split_conv_x2 = conv_layer(split_conv_x2, filter=m, kernel=[3,3], stride=2, padding='VALID', layer_name=scope+'_split_conv4')
210 | 
211 |             x = Concatenation([split_max_x, split_conv_x1, split_conv_x2])
212 | 
213 |             x = Batch_Normalization(x, training=self.training, scope=scope+'_batch1')
214 |             x = Relu(x)
215 | 
216 |             return x
217 | 
218 |     def Reduction_B(self, x, scope):
219 |         with tf.name_scope(scope) :
220 |             split_max_x = Max_pooling(x)
221 | 
222 |             split_conv_x1 = conv_layer(x, filter=256, kernel=[1,1], layer_name=scope+'_split_conv1')
223 |             split_conv_x1 = conv_layer(split_conv_x1, filter=384, kernel=[3,3], stride=2, padding='VALID', layer_name=scope+'_split_conv2')
224 | 
225 |             split_conv_x2 = conv_layer(x, filter=256, kernel=[1,1], layer_name=scope+'_split_conv3')
226 |             split_conv_x2 = conv_layer(split_conv_x2, filter=288, kernel=[3,3], stride=2, padding='VALID', layer_name=scope+'_split_conv4')
227 | 
228 |             split_conv_x3 = conv_layer(x, filter=256, kernel=[1,1], layer_name=scope+'_split_conv5')
229 |             split_conv_x3 = conv_layer(split_conv_x3, filter=288, kernel=[3,3], layer_name=scope+'_split_conv6')
230 |             split_conv_x3 = conv_layer(split_conv_x3, filter=320, kernel=[3,3], stride=2, padding='VALID', layer_name=scope+'_split_conv7')
231 | 
232 |             x = Concatenation([split_max_x, split_conv_x1, split_conv_x2, split_conv_x3])
233 | 
234 |             x = Batch_Normalization(x, training=self.training, scope=scope+'_batch1')
235 |             x = Relu(x)
236 | 
237 |             return x
238 | 
239 |     def Squeeze_excitation_layer(self, input_x, out_dim, ratio, layer_name):
240 |         with tf.name_scope(layer_name) :
241 | 
242 | 
243 |             squeeze = Global_Average_Pooling(input_x)
244 | 
245 |             excitation = Fully_connected(squeeze, units=out_dim / ratio, layer_name=layer_name+'_fully_connected1')
246 |             excitation = Relu(excitation)
247 |             excitation = Fully_connected(excitation, units=out_dim, layer_name=layer_name+'_fully_connected2')
248 |             excitation = Sigmoid(excitation)
249 | 
250 |             excitation = tf.reshape(excitation, [-1,1,1,out_dim])
251 |             scale = input_x * excitation
252 | 
253 |             return scale
254 | 
255 |     def Build_SEnet(self, input_x):
256 |         input_x = tf.pad(input_x, [[0, 0], [32, 32], [32, 32], [0, 0]])
257 |         # size 32 -> 96
258 |         print(np.shape(input_x))
259 |         # only cifar10 architecture
260 | 
261 |         x = self.Stem(input_x, scope='stem')
262 | 
263 |         for i in range(5) :
264 |             x = self.Inception_resnet_A(x, scope='Inception_A'+str(i))
265 |             channel = int(np.shape(x)[-1])
266 |             x = self.Squeeze_excitation_layer(x, out_dim=channel, ratio=reduction_ratio, layer_name='SE_A'+str(i))
267 | 
268 |         x = self.Reduction_A(x, scope='Reduction_A')
269 | 
270 |         for i in range(10)  :
271 |             x = self.Inception_resnet_B(x, scope='Inception_B'+str(i))
272 |             channel = int(np.shape(x)[-1])
273 |             x = self.Squeeze_excitation_layer(x, out_dim=channel, ratio=reduction_ratio, layer_name='SE_B'+str(i))
274 | 
275 |         x = self.Reduction_B(x, scope='Reduction_B')
276 | 
277 |         for i in range(5) :
278 |             x = self.Inception_resnet_C(x, scope='Inception_C'+str(i))
279 |             channel = int(np.shape(x)[-1])
280 |             x = self.Squeeze_excitation_layer(x, out_dim=channel, ratio=reduction_ratio, layer_name='SE_C'+str(i))
281 | 
282 |         x = Global_Average_Pooling(x)
283 |         x = Dropout(x, rate=0.2, training=self.training)
284 |         x = flatten(x)
285 | 
286 |         x = Fully_connected(x, layer_name='final_fully_connected')
287 |         return x
288 | 
289 | 
290 | train_x, train_y, test_x, test_y = prepare_data()
291 | train_x, test_x = color_preprocessing(train_x, test_x)
292 | 
293 | 
294 | # image_size = 32, img_channels = 3, class_num = 10 in cifar10
295 | x = tf.placeholder(tf.float32, shape=[None, image_size, image_size, img_channels])
296 | label = tf.placeholder(tf.float32, shape=[None, class_num])
297 | 
298 | training_flag = tf.placeholder(tf.bool)
299 | 
300 | 
301 | learning_rate = tf.placeholder(tf.float32, name='learning_rate')
302 | 
303 | logits = SE_Inception_resnet_v2(x, training=training_flag).model
304 | cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=label, logits=logits))
305 | 
306 | l2_loss = tf.add_n([tf.nn.l2_loss(var) for var in tf.trainable_variables()])
307 | optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum=momentum, use_nesterov=True)
308 | train = optimizer.minimize(cost + l2_loss * weight_decay)
309 | 
310 | correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(label, 1))
311 | accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
312 | 
313 | saver = tf.train.Saver(tf.global_variables())
314 | 
315 | with tf.Session() as sess:
316 |     ckpt = tf.train.get_checkpoint_state('./model')
317 |     if ckpt and tf.train.checkpoint_exists(ckpt.model_checkpoint_path):
318 |         saver.restore(sess, ckpt.model_checkpoint_path)
319 |     else:
320 |         sess.run(tf.global_variables_initializer())
321 | 
322 |     summary_writer = tf.summary.FileWriter('./logs', sess.graph)
323 | 
324 |     epoch_learning_rate = init_learning_rate
325 |     for epoch in range(1, total_epochs + 1):
326 |         if epoch % 30 == 0 :
327 |             epoch_learning_rate = epoch_learning_rate / 10
328 | 
329 |         pre_index = 0
330 |         train_acc = 0.0
331 |         train_loss = 0.0
332 | 
333 |         for step in range(1, iteration + 1):
334 |             if pre_index + batch_size < 50000:
335 |                 batch_x = train_x[pre_index: pre_index + batch_size]
336 |                 batch_y = train_y[pre_index: pre_index + batch_size]
337 |             else:
338 |                 batch_x = train_x[pre_index:]
339 |                 batch_y = train_y[pre_index:]
340 | 
341 |             batch_x = data_augmentation(batch_x)
342 | 
343 |             train_feed_dict = {
344 |                 x: batch_x,
345 |                 label: batch_y,
346 |                 learning_rate: epoch_learning_rate,
347 |                 training_flag: True
348 |             }
349 | 
350 |             _, batch_loss = sess.run([train, cost], feed_dict=train_feed_dict)
351 |             batch_acc = accuracy.eval(feed_dict=train_feed_dict)
352 | 
353 |             train_loss += batch_loss
354 |             train_acc += batch_acc
355 |             pre_index += batch_size
356 | 
357 | 
358 |         train_loss /= iteration # average loss
359 |         train_acc /= iteration # average accuracy
360 | 
361 |         train_summary = tf.Summary(value=[tf.Summary.Value(tag='train_loss', simple_value=train_loss),
362 |                                           tf.Summary.Value(tag='train_accuracy', simple_value=train_acc)])
363 | 
364 |         test_acc, test_loss, test_summary = Evaluate(sess)
365 | 
366 |         summary_writer.add_summary(summary=train_summary, global_step=epoch)
367 |         summary_writer.add_summary(summary=test_summary, global_step=epoch)
368 |         summary_writer.flush()
369 | 
370 |         line = "epoch: %d/%d, train_loss: %.4f, train_acc: %.4f, test_loss: %.4f, test_acc: %.4f \n" % (
371 |             epoch, total_epochs, train_loss, train_acc, test_loss, test_acc)
372 |         print(line)
373 | 
374 |         with open('logs.txt', 'a') as f:
375 |             f.write(line)
376 | 
377 |         saver.save(sess=sess, save_path='./model/Inception_resnet_v2.ckpt')


--------------------------------------------------------------------------------
/resnet_model.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | # ==============================================================================
 15 | """Contains definitions for the preactivation form of Residual Networks.
 16 | 
 17 | Residual networks (ResNets) were originally proposed in:
 18 | [1] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun
 19 |     Deep Residual Learning for Image Recognition. arXiv:1512.03385
 20 | 
 21 | The full preactivation 'v2' ResNet variant implemented in this module was
 22 | introduced by:
 23 | [2] Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun
 24 |     Identity Mappings in Deep Residual Networks. arXiv: 1603.05027
 25 | 
 26 | The key difference of the full preactivation 'v2' variant compared to the
 27 | 'v1' variant in [1] is the use of batch normalization before every weight layer
 28 | rather than after.
 29 | """
 30 | 
 31 | from __future__ import absolute_import
 32 | from __future__ import division
 33 | from __future__ import print_function
 34 | 
 35 | import tensorflow as tf
 36 | import numpy as np
 37 | from IPython import embed
 38 | 
 39 | _BATCH_NORM_DECAY = 0.997
 40 | _BATCH_NORM_EPSILON = 1e-5
 41 | 
 42 | 
 43 | def batch_norm_relu(inputs, is_training, data_format):
 44 |   """Performs a batch normalization followed by a ReLU."""
 45 |   # We set fused=True for a significant performance boost. See
 46 |   # https://www.tensorflow.org/performance/performance_guide#common_fused_ops
 47 |   inputs = tf.layers.batch_normalization(
 48 |       inputs=inputs, axis=1 if data_format == 'channels_first' else 3,
 49 |       momentum=_BATCH_NORM_DECAY, epsilon=_BATCH_NORM_EPSILON, center=True,
 50 |       scale=True, training=is_training, fused=True)
 51 |   inputs = tf.nn.relu(inputs)
 52 |   return inputs
 53 | 
 54 | 
 55 | def fixed_padding(inputs, kernel_size, data_format):
 56 |   """Pads the input along the spatial dimensions independently of input size.
 57 | 
 58 |   Args:
 59 |     inputs: A tensor of size [batch, channels, height_in, width_in] or
 60 |       [batch, height_in, width_in, channels] depending on data_format.
 61 |     kernel_size: The kernel to be used in the conv2d or max_pool2d operation.
 62 |                  Should be a positive integer.
 63 |     data_format: The input format ('channels_last' or 'channels_first').
 64 | 
 65 |   Returns:
 66 |     A tensor with the same format as the input with the data either intact
 67 |     (if kernel_size == 1) or padded (if kernel_size > 1).
 68 |   """
 69 |   pad_total = kernel_size - 1
 70 |   pad_beg = pad_total // 2
 71 |   pad_end = pad_total - pad_beg
 72 | 
 73 |   if data_format == 'channels_first':
 74 |     padded_inputs = tf.pad(inputs, [[0, 0], [0, 0],
 75 |                                     [pad_beg, pad_end], [pad_beg, pad_end]])
 76 |   else:
 77 |     padded_inputs = tf.pad(inputs, [[0, 0], [pad_beg, pad_end],
 78 |                                     [pad_beg, pad_end], [0, 0]])
 79 |   return padded_inputs
 80 | 
 81 | 
 82 | def conv2d_fixed_padding(inputs, filters, kernel_size, strides, data_format):
 83 |   """Strided 2-D convolution with explicit padding."""
 84 |   # The padding is consistent and is based only on `kernel_size`, not on the
 85 |   # dimensions of `inputs` (as opposed to using `tf.layers.conv2d` alone).
 86 |   if strides > 1:
 87 |     inputs = fixed_padding(inputs, kernel_size, data_format)
 88 | 
 89 |   return tf.layers.conv2d(
 90 |       inputs=inputs, filters=filters, kernel_size=kernel_size, strides=strides,
 91 |       padding=('SAME' if strides == 1 else 'VALID'), use_bias=False,
 92 |       kernel_initializer=tf.variance_scaling_initializer(),
 93 |       data_format=data_format)
 94 | 
 95 | 
 96 | def GlobalAvgPooling(x, data_format):
 97 |     """
 98 |     Global average pooling as in the paper `Network In Network
 99 |     <http://arxiv.org/abs/1312.4400>`_.
100 |     Args:
101 |         x (tf.Tensor): a NHWC tensor.
102 |     Returns:
103 |         tf.Tensor: a NC tensor named ``output``.
104 |     """
105 |     assert x.shape.ndims == 4
106 |     assert data_format in ['channels_last', 'channels_first']
107 |     axis = [1, 2] if data_format == 'channels_last' else [2, 3]
108 |     return tf.reduce_mean(x, axis, name='GlobalAvgPooling')
109 | 
110 | 
111 | def flatten(x):
112 |     """
113 |     Flatten the tensor.
114 |     """
115 |     return tf.reshape(x, [-1])
116 | 
117 | 
118 | def batch_flatten(x):
119 |     """
120 |     Flatten the tensor except the first dimension.
121 |     """
122 |     shape = x.get_shape().as_list()[1:]
123 |     if None not in shape:
124 |         return tf.reshape(x, [-1, int(np.prod(shape))])
125 |     return tf.reshape(x, tf.stack([tf.shape(x)[0], -1]))
126 | 
127 | 
128 | def FullyConnected(x, out_dim,
129 |                    W_init=None, b_init=None,
130 |                    nl=tf.identity, use_bias=True, name='fc'):
131 |     """
132 |     Fully-Connected layer, takes a N>1D tensor and returns a 2D tensor.
133 |     It is an equivalent of `tf.layers.dense` except for naming conventions.
134 |     Args:
135 |         x (tf.Tensor): a tensor to be flattened except for the first dimension.
136 |         out_dim (int): output dimension
137 |         W_init: initializer for W. Defaults to `variance_scaling_initializer`.
138 |         b_init: initializer for b. Defaults to zero.
139 |         nl: a nonlinearity function
140 |         use_bias (bool): whether to use bias.
141 |     Returns:
142 |         tf.Tensor: a NC tensor named ``output`` with attribute `variables`.
143 |     Variable Names:
144 |     * ``W``: weights of shape [in_dim, out_dim]
145 |     * ``b``: bias
146 |     """
147 |     x = batch_flatten(x)
148 | 
149 |     if W_init is None:
150 |         W_init = tf.contrib.layers.variance_scaling_initializer()
151 |     if b_init is None:
152 |         b_init = tf.constant_initializer()
153 |     
154 |     x = tf.layers.dense(
155 |       inputs=x, units=out_dim, activation=lambda x: nl(x, name='output'), use_bias=use_bias,
156 |       kernel_initializer=W_init, bias_initializer=b_init,
157 |       trainable=True)
158 | 
159 |     x = tf.identity(x, name)
160 | 
161 |     return x
162 | 
163 |   
164 | def building_block(inputs, filters, is_training, projection_shortcut, strides,
165 |                    data_format):
166 |   """Standard building block for residual networks with BN before convolutions.
167 | 
168 |   Args:
169 |     inputs: A tensor of size [batch, channels, height_in, width_in] or
170 |       [batch, height_in, width_in, channels] depending on data_format.
171 |     filters: The number of filters for the convolutions.
172 |     is_training: A Boolean for whether the model is in training or inference
173 |       mode. Needed for batch normalization.
174 |     projection_shortcut: The function to use for projection shortcuts (typically
175 |       a 1x1 convolution when downsampling the input).
176 |     strides: The block's stride. If greater than 1, this block will ultimately
177 |       downsample the input.
178 |     data_format: The input format ('channels_last' or 'channels_first').
179 | 
180 |   Returns:
181 |     The output tensor of the block.
182 |   """
183 |   shortcut = inputs
184 |   inputs = batch_norm_relu(inputs, is_training, data_format)
185 | 
186 |   # The projection shortcut should come after the first batch norm and ReLU
187 |   # since it performs a 1x1 convolution.
188 |   if projection_shortcut is not None:
189 |     shortcut = projection_shortcut(inputs)
190 | 
191 |   inputs = conv2d_fixed_padding(
192 |       inputs=inputs, filters=filters, kernel_size=3, strides=strides,
193 |       data_format=data_format)
194 | 
195 |   inputs = batch_norm_relu(inputs, is_training, data_format)
196 |   inputs = conv2d_fixed_padding(
197 |       inputs=inputs, filters=filters, kernel_size=3, strides=1,
198 |       data_format=data_format)
199 | 
200 |   return inputs + shortcut
201 | 
202 | 
203 | def se_building_block(inputs, filters, is_training, projection_shortcut, strides,
204 |                    data_format):
205 |   """Standard building block for residual networks with BN before convolutions.
206 | 
207 |   Args:
208 |     inputs: A tensor of size [batch, channels, height_in, width_in] or
209 |       [batch, height_in, width_in, channels] depending on data_format.
210 |     filters: The number of filters for the convolutions.
211 |     is_training: A Boolean for whether the model is in training or inference
212 |       mode. Needed for batch normalization.
213 |     projection_shortcut: The function to use for projection shortcuts (typically
214 |       a 1x1 convolution when downsampling the input).
215 |     strides: The block's stride. If greater than 1, this block will ultimately
216 |       downsample the input.
217 |     data_format: The input format ('channels_last' or 'channels_first').
218 | 
219 |   Returns:
220 |     The output tensor of the block.
221 |   """
222 |   shortcut = inputs
223 |   inputs = batch_norm_relu(inputs, is_training, data_format)
224 | 
225 |   # The projection shortcut should come after the first batch norm and ReLU
226 |   # since it performs a 1x1 convolution.
227 |   if projection_shortcut is not None:
228 |     shortcut = projection_shortcut(inputs)
229 | 
230 |   inputs = conv2d_fixed_padding(
231 |       inputs=inputs, filters=filters, kernel_size=3, strides=strides,
232 |       data_format=data_format)
233 | 
234 |   inputs = batch_norm_relu(inputs, is_training, data_format)
235 |   inputs = conv2d_fixed_padding(
236 |       inputs=inputs, filters=filters, kernel_size=3, strides=1,
237 |       data_format=data_format)
238 | 
239 |   squeeze = GlobalAvgPooling(inputs, data_format)
240 |   squeeze = FullyConnected(squeeze, filters // 4, nl=tf.nn.relu, name='fc1')
241 |   squeeze = FullyConnected(squeeze, filters, nl=tf.nn.sigmoid, name='fc2')
242 | 
243 |   if data_format == 'channels_first':
244 |     inputs = inputs * tf.reshape(squeeze, [-1, filters, 1, 1])
245 |   else:
246 |     inputs = inputs * tf.reshape(squeeze, [-1, 1, 1, filters])
247 |   return inputs + shortcut
248 | 
249 | 
250 | def bottleneck_block(inputs, filters, is_training, projection_shortcut,
251 |                      strides, data_format):
252 |   """Bottleneck block variant for residual networks with BN before convolutions.
253 | 
254 |   Args:
255 |     inputs: A tensor of size [batch, channels, height_in, width_in] or
256 |       [batch, height_in, width_in, channels] depending on data_format.
257 |     filters: The number of filters for the first two convolutions. Note that the
258 |       third and final convolution will use 4 times as many filters.
259 |     is_training: A Boolean for whether the model is in training or inference
260 |       mode. Needed for batch normalization.
261 |     projection_shortcut: The function to use for projection shortcuts (typically
262 |       a 1x1 convolution when downsampling the input).
263 |     strides: The block's stride. If greater than 1, this block will ultimately
264 |       downsample the input.
265 |     data_format: The input format ('channels_last' or 'channels_first').
266 | 
267 |   Returns:
268 |     The output tensor of the block.
269 |   """
270 |   shortcut = inputs
271 |   inputs = batch_norm_relu(inputs, is_training, data_format)
272 | 
273 |   # The projection shortcut should come after the first batch norm and ReLU
274 |   # since it performs a 1x1 convolution.
275 |   if projection_shortcut is not None:
276 |     shortcut = projection_shortcut(inputs)
277 | 
278 |   inputs = conv2d_fixed_padding(
279 |       inputs=inputs, filters=filters, kernel_size=1, strides=1,
280 |       data_format=data_format)
281 | 
282 |   inputs = batch_norm_relu(inputs, is_training, data_format)
283 |   inputs = conv2d_fixed_padding(
284 |       inputs=inputs, filters=filters, kernel_size=3, strides=strides,
285 |       data_format=data_format)
286 | 
287 |   inputs = batch_norm_relu(inputs, is_training, data_format)
288 |   inputs = conv2d_fixed_padding(
289 |       inputs=inputs, filters=4 * filters, kernel_size=1, strides=1,
290 |       data_format=data_format)
291 | 
292 |   return inputs + shortcut
293 | 
294 | 
295 | def se_bottleneck_block(inputs, filters, is_training, projection_shortcut,
296 |                      strides, data_format):
297 |   """Bottleneck block variant for residual networks with BN before convolutions.
298 | 
299 |   Args:
300 |     inputs: A tensor of size [batch, channels, height_in, width_in] or
301 |       [batch, height_in, width_in, channels] depending on data_format.
302 |     filters: The number of filters for the first two convolutions. Note that the
303 |       third and final convolution will use 4 times as many filters.
304 |     is_training: A Boolean for whether the model is in training or inference
305 |       mode. Needed for batch normalization.
306 |     projection_shortcut: The function to use for projection shortcuts (typically
307 |       a 1x1 convolution when downsampling the input).
308 |     strides: The block's stride. If greater than 1, this block will ultimately
309 |       downsample the input.
310 |     data_format: The input format ('channels_last' or 'channels_first').
311 | 
312 |   Returns:
313 |     The output tensor of the block.
314 |   """
315 |   shortcut = inputs
316 |   inputs = batch_norm_relu(inputs, is_training, data_format)
317 | 
318 |   # The projection shortcut should come after the first batch norm and ReLU
319 |   # since it performs a 1x1 convolution.
320 |   if projection_shortcut is not None:
321 |     shortcut = projection_shortcut(inputs)
322 | 
323 |   inputs = conv2d_fixed_padding(
324 |       inputs=inputs, filters=filters, kernel_size=1, strides=1,
325 |       data_format=data_format)
326 | 
327 |   inputs = batch_norm_relu(inputs, is_training, data_format)
328 |   inputs = conv2d_fixed_padding(
329 |       inputs=inputs, filters=filters, kernel_size=3, strides=strides,
330 |       data_format=data_format)
331 | 
332 |   inputs = batch_norm_relu(inputs, is_training, data_format)
333 |   inputs = conv2d_fixed_padding(
334 |       inputs=inputs, filters=4 * filters, kernel_size=1, strides=1,
335 |       data_format=data_format)
336 | 
337 |   squeeze = GlobalAvgPooling(inputs, data_format)
338 |   squeeze = FullyConnected(squeeze, filters // 4, nl=tf.nn.relu, name='fc1')
339 |   squeeze = FullyConnected(squeeze, filters * 4, nl=tf.nn.sigmoid, name='fc2')
340 |   if data_format == 'channels_first':
341 |     inputs = inputs * tf.reshape(squeeze, [-1, filters * 4, 1, 1])
342 |   else:
343 |     inputs = inputs * tf.reshape(squeeze, [-1, 1, 1, filters * 4])
344 |     
345 |   return inputs + shortcut
346 | 
347 | 
348 | def block_layer(inputs, filters, block_fn, blocks, strides, is_training, name,
349 |                 data_format):
350 |   """Creates one layer of blocks for the ResNet model.
351 | 
352 |   Args:
353 |     inputs: A tensor of size [batch, channels, height_in, width_in] or
354 |       [batch, height_in, width_in, channels] depending on data_format.
355 |     filters: The number of filters for the first convolution of the layer.
356 |     block_fn: The block to use within the model, either `building_block` or
357 |       `bottleneck_block`.
358 |     blocks: The number of blocks contained in the layer.
359 |     strides: The stride to use for the first convolution of the layer. If
360 |       greater than 1, this layer will ultimately downsample the input.
361 |     is_training: Either True or False, whether we are currently training the
362 |       model. Needed for batch norm.
363 |     name: A string name for the tensor output of the block layer.
364 |     data_format: The input format ('channels_last' or 'channels_first').
365 | 
366 |   Returns:
367 |     The output tensor of the block layer.
368 |   """
369 |   # Bottleneck blocks end with 4x the number of filters as they start with
370 |   filters_out = 4 * filters if block_fn in [bottleneck_block, se_bottleneck_block] else filters
371 | 
372 |   def projection_shortcut(inputs):
373 |     return conv2d_fixed_padding(
374 |         inputs=inputs, filters=filters_out, kernel_size=1, strides=strides,
375 |         data_format=data_format)
376 | 
377 |   # Only the first block per block_layer uses projection_shortcut and strides
378 |   inputs = block_fn(inputs, filters, is_training, projection_shortcut, strides,
379 |                     data_format)
380 | 
381 |   for _ in range(1, blocks):
382 |     inputs = block_fn(inputs, filters, is_training, None, 1, data_format)
383 | 
384 |   return tf.identity(inputs, name)
385 | 
386 | 
387 | def cifar10_resnet_v2_generator(resnet_size, num_classes, data_format=None):
388 |   """Generator for CIFAR-10 ResNet v2 models.
389 | 
390 |   Args:
391 |     resnet_size: A single integer for the size of the ResNet model.
392 |     num_classes: The number of possible classes for image classification.
393 |     data_format: The input format ('channels_last', 'channels_first', or None).
394 |       If set to None, the format is dependent on whether a GPU is available.
395 | 
396 |   Returns:
397 |     The model function that takes in `inputs` and `is_training` and
398 |     returns the output tensor of the ResNet model.
399 | 
400 |   Raises:
401 |     ValueError: If `resnet_size` is invalid.
402 |   """
403 |   if resnet_size % 6 != 2:
404 |     raise ValueError('resnet_size must be 6n + 2:', resnet_size)
405 | 
406 |   num_blocks = (resnet_size - 2) // 6
407 | 
408 |   if data_format is None:
409 |     data_format = (
410 |         'channels_first' if tf.test.is_built_with_cuda() else 'channels_last')
411 | 
412 |   def model(inputs, is_training):
413 |     """Constructs the ResNet model given the inputs."""
414 |     if data_format == 'channels_first':
415 |       # Convert from channels_last (NHWC) to channels_first (NCHW). This
416 |       # provides a large performance boost on GPU. See
417 |       # https://www.tensorflow.org/performance/performance_guide#data_formats
418 |       inputs = tf.transpose(inputs, [0, 3, 1, 2])
419 | 
420 |     inputs = conv2d_fixed_padding(
421 |         inputs=inputs, filters=16, kernel_size=3, strides=1,
422 |         data_format=data_format)
423 |     inputs = tf.identity(inputs, 'initial_conv')
424 | 
425 |     inputs = block_layer(
426 |         inputs=inputs, filters=16, block_fn=building_block, blocks=num_blocks,
427 |         strides=1, is_training=is_training, name='block_layer1',
428 |         data_format=data_format)
429 |     inputs = block_layer(
430 |         inputs=inputs, filters=32, block_fn=building_block, blocks=num_blocks,
431 |         strides=2, is_training=is_training, name='block_layer2',
432 |         data_format=data_format)
433 |     inputs = block_layer(
434 |         inputs=inputs, filters=64, block_fn=building_block, blocks=num_blocks,
435 |         strides=2, is_training=is_training, name='block_layer3',
436 |         data_format=data_format)
437 | 
438 |     inputs = batch_norm_relu(inputs, is_training, data_format)
439 |     inputs = tf.layers.average_pooling2d(
440 |         inputs=inputs, pool_size=8, strides=1, padding='VALID',
441 |         data_format=data_format)
442 |     inputs = tf.identity(inputs, 'final_avg_pool')
443 |     inputs = tf.reshape(inputs, [-1, 64])
444 |     inputs = tf.layers.dense(inputs=inputs, units=num_classes)
445 |     inputs = tf.identity(inputs, 'final_dense')
446 |     return inputs
447 | 
448 |   return model
449 | 
450 | 
451 | def imagenet_resnet_v2_generator(block_fn, layers, num_classes,
452 |                                  data_format=None):
453 |   """Generator for ImageNet ResNet v2 models.
454 | 
455 |   Args:
456 |     block_fn: The block to use within the model, either `building_block` or
457 |       `bottleneck_block`.
458 |     layers: A length-4 array denoting the number of blocks to include in each
459 |       layer. Each layer consists of blocks that take inputs of the same size.
460 |     num_classes: The number of possible classes for image classification.
461 |     data_format: The input format ('channels_last', 'channels_first', or None).
462 |       If set to None, the format is dependent on whether a GPU is available.
463 | 
464 |   Returns:
465 |     The model function that takes in `inputs` and `is_training` and
466 |     returns the output tensor of the ResNet model.
467 |   """
468 |   if data_format is None:
469 |     data_format = (
470 |         'channels_first' if tf.test.is_built_with_cuda() else 'channels_last')
471 | 
472 |   def model(inputs, is_training):
473 |     """Constructs the ResNet model given the inputs."""
474 |     if data_format == 'channels_first':
475 |       # Convert from channels_last (NHWC) to channels_first (NCHW). This
476 |       # provides a large performance boost on GPU.
477 |       inputs = tf.transpose(inputs, [0, 3, 1, 2])
478 | 
479 |     inputs = conv2d_fixed_padding(
480 |         inputs=inputs, filters=64, kernel_size=7, strides=2,
481 |         data_format=data_format)
482 |     inputs = tf.identity(inputs, 'initial_conv')
483 |     inputs = tf.layers.max_pooling2d(
484 |         inputs=inputs, pool_size=3, strides=2, padding='SAME',
485 |         data_format=data_format)
486 |     inputs = tf.identity(inputs, 'initial_max_pool')
487 | 
488 |     inputs = block_layer(
489 |         inputs=inputs, filters=64, block_fn=block_fn, blocks=layers[0],
490 |         strides=1, is_training=is_training, name='block_layer1',
491 |         data_format=data_format)
492 |     inputs = block_layer(
493 |         inputs=inputs, filters=128, block_fn=block_fn, blocks=layers[1],
494 |         strides=2, is_training=is_training, name='block_layer2',
495 |         data_format=data_format)
496 |     inputs = block_layer(
497 |         inputs=inputs, filters=256, block_fn=block_fn, blocks=layers[2],
498 |         strides=2, is_training=is_training, name='block_layer3',
499 |         data_format=data_format)
500 |     inputs = block_layer(
501 |         inputs=inputs, filters=512, block_fn=block_fn, blocks=layers[3],
502 |         strides=2, is_training=is_training, name='block_layer4',
503 |         data_format=data_format)
504 |     inputs = batch_norm_relu(inputs, is_training, data_format)
505 |     inputs = tf.layers.average_pooling2d(
506 |         inputs=inputs, pool_size=7, strides=1, padding='VALID',
507 |         data_format=data_format)
508 |     inputs = tf.identity(inputs, 'final_avg_pool')
509 |     inputs = tf.reshape(inputs,
510 |                         [-1, 512 if block_fn in [building_block, se_building_block] else 2048])
511 |     # inputs = tf.layers.dense(inputs=inputs, units=num_classes)
512 |     # inputs = tf.identity(inputs, 'final_dense')
513 |     return inputs
514 | 
515 |   return model
516 | 
517 | 
518 | def imagenet_resnet_v2(resnet_size, num_classes, mode='v2', data_format=None):
519 |   """Returns the ResNet model for a given size and number of output classes."""
520 |   building_block_mode = {
521 |     'v2': building_block,
522 |     'se': se_building_block}[mode]
523 |   bottleneck_block_mode = {
524 |     'v2': bottleneck_block,
525 |     'se': se_bottleneck_block}[mode]
526 | 
527 |   model_params = {
528 |       18: {'block': building_block_mode, 'layers': [2, 2, 2, 2]},
529 |       34: {'block': building_block_mode, 'layers': [3, 4, 6, 3]},
530 |       50: {'block': bottleneck_block_mode, 'layers': [3, 4, 6, 3]},
531 |       101: {'block': bottleneck_block_mode, 'layers': [3, 4, 23, 3]},
532 |       152: {'block': bottleneck_block_mode, 'layers': [3, 8, 36, 3]},
533 |       200: {'block': bottleneck_block_mode, 'layers': [3, 24, 36, 3]}
534 |   }
535 | 
536 |   if resnet_size not in model_params:
537 |     raise ValueError('Not a valid resnet_size:', resnet_size)
538 | 
539 |   params = model_params[resnet_size]
540 |   return imagenet_resnet_v2_generator(
541 |       params['block'], params['layers'], num_classes, data_format)
542 | 


--------------------------------------------------------------------------------