├── .gitignore
├── LICENSE
├── README.md
├── gc.py
├── gc_densenet.py
├── gc_inception_resnet_v2.py
├── gc_inception_v3.py
├── gc_mobilenets.py
├── gc_resnet.py
├── group_norm.py
└── images
    └── gc.PNG


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | *.egg-info/
 24 | .installed.cfg
 25 | *.egg
 26 | MANIFEST
 27 | 
 28 | # PyInstaller
 29 | #  Usually these files are written by a python script from a template
 30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 31 | *.manifest
 32 | *.spec
 33 | 
 34 | # Installer logs
 35 | pip-log.txt
 36 | pip-delete-this-directory.txt
 37 | 
 38 | # Unit test / coverage reports
 39 | htmlcov/
 40 | .tox/
 41 | .coverage
 42 | .coverage.*
 43 | .cache
 44 | nosetests.xml
 45 | coverage.xml
 46 | *.cover
 47 | .hypothesis/
 48 | .pytest_cache/
 49 | 
 50 | # Translations
 51 | *.mo
 52 | *.pot
 53 | 
 54 | # Django stuff:
 55 | *.log
 56 | local_settings.py
 57 | db.sqlite3
 58 | 
 59 | # Flask stuff:
 60 | instance/
 61 | .webassets-cache
 62 | 
 63 | # Scrapy stuff:
 64 | .scrapy
 65 | 
 66 | # Sphinx documentation
 67 | docs/_build/
 68 | 
 69 | # PyBuilder
 70 | target/
 71 | 
 72 | # Jupyter Notebook
 73 | .ipynb_checkpoints
 74 | 
 75 | # pyenv
 76 | .python-version
 77 | 
 78 | # celery beat schedule file
 79 | celerybeat-schedule
 80 | 
 81 | # SageMath parsed files
 82 | *.sage.py
 83 | 
 84 | # Environments
 85 | .env
 86 | .venv
 87 | env/
 88 | venv/
 89 | ENV/
 90 | env.bak/
 91 | venv.bak/
 92 | 
 93 | # Spyder project settings
 94 | .spyderproject
 95 | .spyproject
 96 | 
 97 | # Rope project settings
 98 | .ropeproject
 99 | 
100 | # mkdocs documentation
101 | /site
102 | 
103 | # mypy
104 | .mypy_cache/
105 | 
106 | # Pycharm
107 | .idea
108 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2019 Somshubra Majumdar
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Keras Global Context Attention Blocks
 2 | 
 3 | Keras implementation of the Global Context block from the paper [GCNet: Non-local Networks Meet Squeeze-Excitation Networks and Beyond](https://arxiv.org/abs/1904.11492).
 4 | 
 5 | Supports Conv1D, Conv2D and Conv3D directly with no modifications.
 6 | 
 7 | <img src="https://github.com/titu1994/keras-global-context-networks/blob/master/images/gc.PNG?raw=true" height=100% width=100%>
 8 | 
 9 | # Usage
10 | 
11 | Import `global_context_block` from `gc.py` and provide it a tensor as input.
12 | 
13 | ```python
14 | from gc import global_context_block
15 | 
16 | ip = Input(...)
17 | x = ConvND(...)(ip)
18 | 
19 | # apply Global Context
20 | x = global_context_block(x, reduction_ratio=16, transform_activation='linear')
21 | ...
22 | ```
23 | 
24 | # Parameters
25 | 
26 | There are just two parameters to manage : 
27 | ```
28 |  - reduction_ratio: The ratio to scale the transform block.
29 |  - transform_activation: The activation function prior to addition of the input with the context.
30 |                          The paper uses no activation, but `sigmoid` may do better.
31 | ```
32 | 
33 | # Requirements
34 |   - Keras 2.2.4+
35 |   - Tensorflow (1.13+) or CNTK
36 | 


--------------------------------------------------------------------------------
/gc.py:
--------------------------------------------------------------------------------
  1 | from keras.layers import Conv1D, Conv2D, Conv3D
  2 | from keras.layers import Reshape
  3 | from keras.layers import Activation
  4 | from keras.layers import Softmax
  5 | from keras.layers import Permute
  6 | from keras.layers import add, dot
  7 | 
  8 | from keras import backend as K
  9 | 
 10 | from group_norm import GroupNormalization
 11 | 
 12 | 
 13 | def global_context_block(ip, reduction_ratio=16, transform_activation='linear'):
 14 |     """
 15 |     Adds a Global Context attention block for self attention to the input tensor.
 16 |     Input tensor can be or rank 3 (temporal), 4 (spatial) or 5 (spatio-temporal).
 17 | 
 18 |     # Arguments:
 19 |         ip: input tensor
 20 |         intermediate_dim: The dimension of the intermediate representation. Can be
 21 |             `None` or a positive integer greater than 0. If `None`, computes the
 22 |             intermediate dimension as half of the input channel dimension.
 23 |         reduction_ratio: Reduces the input filters by this factor for the
 24 |             bottleneck block of the transform submodule. Node: the reduction
 25 |             ratio must be set such that it divides the input number of channels,
 26 |         transform_activation: activation function to apply to the output
 27 |             of the transform block. Can be any string activation function availahle
 28 |             to Keras.
 29 | 
 30 |     # Returns:
 31 |         a tensor of same shape as input
 32 |     """
 33 |     channel_dim = 1 if K.image_data_format() == 'channels_first' else -1
 34 |     ip_shape = K.int_shape(ip)
 35 | 
 36 |     # check rank and calculate the input shape
 37 |     if len(ip_shape) == 3:  # temporal / time series data
 38 |         rank = 3
 39 |         batchsize, dim1, channels = ip_shape
 40 | 
 41 |     elif len(ip_shape) == 4:  # spatial / image data
 42 |         rank = 4
 43 | 
 44 |         if channel_dim == 1:
 45 |             batchsize, channels, dim1, dim2 = ip_shape
 46 |         else:
 47 |             batchsize, dim1, dim2, channels = ip_shape
 48 | 
 49 |     elif len(ip_shape) == 5:  # spatio-temporal / Video or Voxel data
 50 |         rank = 5
 51 | 
 52 |         if channel_dim == 1:
 53 |             batchsize, channels, dim1, dim2, dim3 = ip_shape
 54 |         else:
 55 |             batchsize, dim1, dim2, dim3, channels = ip_shape
 56 | 
 57 |     else:
 58 |         raise ValueError('Input dimension has to be either 3 (temporal), 4 (spatial) or 5 (spatio-temporal)')
 59 | 
 60 |     if rank > 3:
 61 |         flat_spatial_dim = -1 if K.image_data_format() == 'channels_first' else 1
 62 |     else:
 63 |         flat_spatial_dim = 1
 64 | 
 65 |     """ Context Modelling Block """
 66 |     # [B, ***, C] or [B, C, ***]
 67 |     input_flat = _spatial_flattenND(ip, rank)
 68 |     # [B, ..., C] or [B, C, ...]
 69 |     context = _convND(ip, rank, channels=1, kernel=1)
 70 |     # [B, ..., 1] or [B, 1, ...]
 71 |     context = _spatial_flattenND(context, rank)
 72 |     # [B, ***, 1] or [B, 1, ***]
 73 |     context = Softmax(axis=flat_spatial_dim)(context)
 74 | 
 75 |     # Compute context block outputs
 76 |     context = dot([input_flat, context], axes=flat_spatial_dim)
 77 |     # [B, C, 1]
 78 |     context = _spatial_expandND(context, rank)
 79 |     # [B, C, 1...] or [B, 1..., C]
 80 | 
 81 |     """ Transform block """
 82 |     # Transform bottleneck
 83 |     # [B, C // R, 1...] or [B, 1..., C // R]
 84 |     transform = _convND(context, rank, channels // reduction_ratio, kernel=1)
 85 |     # Group normalization acts as Layer Normalization when groups = 1
 86 |     transform = GroupNormalization(groups=1, axis=channel_dim)(transform)
 87 |     transform = Activation('relu')(transform)
 88 | 
 89 |     # Transform output block
 90 |     # [B, C, 1...] or [B, 1..., C]
 91 |     transform = _convND(transform, rank, channels, kernel=1)
 92 |     transform = Activation(transform_activation)(transform)
 93 | 
 94 |     # apply context transform
 95 |     out = add([ip, transform])
 96 | 
 97 |     return out
 98 | 
 99 | 
100 | def _convND(ip, rank, channels, kernel=1):
101 |     assert rank in [3, 4, 5], "Rank of input must be 3, 4 or 5"
102 | 
103 |     if rank == 3:
104 |         x = Conv1D(channels, kernel, padding='same', use_bias=False, kernel_initializer='he_normal')(ip)
105 |     elif rank == 4:
106 |         x = Conv2D(channels, (kernel, kernel), padding='same', use_bias=False, kernel_initializer='he_normal')(ip)
107 |     else:
108 |         x = Conv3D(channels, (kernel, kernel, kernel), padding='same', use_bias=False, kernel_initializer='he_normal')(ip)
109 | 
110 |     return x
111 | 
112 | 
113 | def _spatial_flattenND(ip, rank):
114 |     assert rank in [3, 4, 5], "Rank of input must be 3, 4 or 5"
115 | 
116 |     ip_shape = K.int_shape(ip)
117 |     channel_dim = 1 if K.image_data_format() == 'channels_first' else -1
118 | 
119 |     if rank == 3:
120 |         x = ip  # identity op for rank 3
121 | 
122 |     elif rank == 4:
123 |         if channel_dim == 1:
124 |             # [C, D1, D2] -> [C, D1 * D2]
125 |             shape = [ip_shape[1], ip_shape[2] * ip_shape[3]]
126 |         else:
127 |             # [D1, D2, C] -> [D1 * D2, C]
128 |             shape = [ip_shape[1] * ip_shape[2], ip_shape[3]]
129 | 
130 |         x = Reshape(shape)(ip)
131 | 
132 |     else:
133 |         if channel_dim == 1:
134 |             # [C, D1, D2, D3] -> [C, D1 * D2 * D3]
135 |             shape = [ip_shape[1], ip_shape[2] * ip_shape[3] * ip_shape[4]]
136 |         else:
137 |             # [D1, D2, D3, C] -> [D1 * D2 * D3, C]
138 |             shape = [ip_shape[1] * ip_shape[2] * ip_shape[3], ip_shape[4]]
139 | 
140 |         x = Reshape(shape)(ip)
141 | 
142 |     return x
143 | 
144 | 
145 | def _spatial_expandND(ip, rank):
146 |     assert rank in [3, 4, 5], "Rank of input must be 3, 4 or 5"
147 | 
148 |     channel_dim = 1 if K.image_data_format() == 'channels_first' else -1
149 | 
150 |     if rank == 3:
151 |         x = Permute((2, 1))(ip)  # identity op for rank 3
152 | 
153 |     elif rank == 4:
154 |         if channel_dim == 1:
155 |             # [C, D1, D2] -> [C, D1 * D2]
156 |             shape = [-1, 1, 1]
157 |         else:
158 |             # [D1, D2, C] -> [D1 * D2, C]
159 |             shape = [1, 1, -1]
160 | 
161 |         x = Reshape(shape)(ip)
162 | 
163 |     else:
164 |         if channel_dim == 1:
165 |             # [C, D1, D2, D3] -> [C, D1 * D2 * D3]
166 |             shape = [-1, 1, 1, 1]
167 |         else:
168 |             # [D1, D2, D3, C] -> [D1 * D2 * D3, C]
169 |             shape = [1, 1, 1, -1]
170 | 
171 |         x = Reshape(shape)(ip)
172 | 
173 |     return x
174 | 
175 | 
176 | if __name__ == '__main__':
177 |     from keras.layers import Input
178 |     from keras.models import Model
179 | 
180 |     ip = Input(shape=(64, 64, 32))
181 |     x = global_context_block(ip, reduction_ratio=16)
182 | 
183 |     model = Model(ip, x)
184 | 
185 |     model.summary()
186 | 


--------------------------------------------------------------------------------
/gc_densenet.py:
--------------------------------------------------------------------------------
  1 | '''DenseNet models for Keras.
  2 | # Reference
  3 | - [Densely Connected Convolutional Networks](https://arxiv.org/pdf/1608.06993.pdf)
  4 | - [The One Hundred Layers Tiramisu: Fully Convolutional DenseNets for Semantic Segmentation](https://arxiv.org/pdf/1611.09326.pdf)
  5 | '''
  6 | from __future__ import print_function
  7 | from __future__ import absolute_import
  8 | from __future__ import division
  9 | 
 10 | import warnings
 11 | 
 12 | from keras.models import Model
 13 | from keras.layers.core import Dense, Dropout, Activation, Reshape
 14 | from keras.layers.convolutional import Conv2D, Conv2DTranspose, UpSampling2D
 15 | from keras.layers.pooling import AveragePooling2D, MaxPooling2D
 16 | from keras.layers.pooling import GlobalAveragePooling2D
 17 | from keras.layers import Input
 18 | from keras.layers.merge import concatenate
 19 | from keras.layers.normalization import BatchNormalization
 20 | from keras.regularizers import l2
 21 | from keras.utils.layer_utils import convert_all_kernels_in_model, convert_dense_weights_data_format
 22 | from keras.utils.data_utils import get_file
 23 | from keras.engine.topology import get_source_inputs
 24 | from keras_applications.imagenet_utils import _obtain_input_shape
 25 | from keras_applications.imagenet_utils import decode_predictions
 26 | import keras.backend as K
 27 | 
 28 | from gc import global_context_block
 29 | 
 30 | 
 31 | def preprocess_input(x, data_format=None):
 32 |     """Preprocesses a tensor encoding a batch of images.
 33 | 
 34 |     # Arguments
 35 |         x: input Numpy tensor, 4D.
 36 |         data_format: data format of the image tensor.
 37 | 
 38 |     # Returns
 39 |         Preprocessed tensor.
 40 |     """
 41 |     if data_format is None:
 42 |         data_format = K.image_data_format()
 43 |     assert data_format in {'channels_last', 'channels_first'}
 44 | 
 45 |     if data_format == 'channels_first':
 46 |         if x.ndim == 3:
 47 |             # 'RGB'->'BGR'
 48 |             x = x[::-1, ...]
 49 |             # Zero-center by mean pixel
 50 |             x[0, :, :] -= 103.939
 51 |             x[1, :, :] -= 116.779
 52 |             x[2, :, :] -= 123.68
 53 |         else:
 54 |             x = x[:, ::-1, ...]
 55 |             x[:, 0, :, :] -= 103.939
 56 |             x[:, 1, :, :] -= 116.779
 57 |             x[:, 2, :, :] -= 123.68
 58 |     else:
 59 |         # 'RGB'->'BGR'
 60 |         x = x[..., ::-1]
 61 |         # Zero-center by mean pixel
 62 |         x[..., 0] -= 103.939
 63 |         x[..., 1] -= 116.779
 64 |         x[..., 2] -= 123.68
 65 | 
 66 |     x *= 0.017  # scale values
 67 | 
 68 |     return x
 69 | 
 70 | 
 71 | def GCDenseNet(input_shape=None,
 72 |                depth=40,
 73 |                nb_dense_block=3,
 74 |                growth_rate=12,
 75 |                nb_filter=-1,
 76 |                nb_layers_per_block=-1,
 77 |                bottleneck=False,
 78 |                reduction=0.0,
 79 |                dropout_rate=0.0,
 80 |                weight_decay=1e-4,
 81 |                subsample_initial_block=False,
 82 |                include_top=True,
 83 |                weights=None,
 84 |                input_tensor=None,
 85 |                classes=10,
 86 |                activation='softmax'):
 87 |     '''Instantiate the GC DenseNet architecture
 88 |         # Arguments
 89 |             input_shape: optional shape tuple, only to be specified
 90 |                 if `include_top` is False (otherwise the input shape
 91 |                 has to be `(32, 32, 3)` (with `channels_last` dim ordering)
 92 |                 or `(3, 32, 32)` (with `channels_first` dim ordering).
 93 |                 It should have exactly 3 inputs channels,
 94 |                 and width and height should be no smaller than 8.
 95 |                 E.g. `(200, 200, 3)` would be one valid value.
 96 |             depth: number or layers in the DenseNet
 97 |             nb_dense_block: number of dense blocks to add to end (generally = 3)
 98 |             growth_rate: number of filters to add per dense block
 99 |             nb_filter: initial number of filters. -1 indicates initial
100 |                 number of filters is 2 * growth_rate
101 |             nb_layers_per_block: number of layers in each dense block.
102 |                 Can be a -1, positive integer or a list.
103 |                 If -1, calculates nb_layer_per_block from the network depth.
104 |                 If positive integer, a set number of layers per dense block.
105 |                 If list, nb_layer is used as provided. Note that list size must
106 |                 be (nb_dense_block + 1)
107 |             bottleneck: flag to add bottleneck blocks in between dense blocks
108 |             reduction: reduction factor of transition blocks.
109 |                 Note : reduction value is inverted to compute compression.
110 |             dropout_rate: dropout rate
111 |             weight_decay: weight decay rate
112 |             subsample_initial_block: Set to True to subsample the initial convolution and
113 |                 add a MaxPool2D before the dense blocks are added.
114 |             include_top: whether to include the fully-connected
115 |                 layer at the top of the network.
116 |             weights: one of `None` (random initialization) or
117 |                 'imagenet' (pre-training on ImageNet)..
118 |             input_tensor: optional Keras tensor (i.e. output of `layers.Input()`)
119 |                 to use as image input for the model.
120 |             classes: optional number of classes to classify images
121 |                 into, only to be specified if `include_top` is True, and
122 |                 if no `weights` argument is specified.
123 |             activation: Type of activation at the top layer. Can be one of 'softmax' or 'sigmoid'.
124 |                 Note that if sigmoid is used, classes must be 1.
125 |         # Returns
126 |             A Keras model instance.
127 |         '''
128 | 
129 |     if weights not in {'imagenet', None}:
130 |         raise ValueError('The `weights` argument should be either '
131 |                          '`None` (random initialization) or `cifar10` '
132 |                          '(pre-training on CIFAR-10).')
133 | 
134 |     if weights == 'imagenet' and include_top and classes != 1000:
135 |         raise ValueError('If using `weights` as ImageNet with `include_top`'
136 |                          ' as true, `classes` should be 1000')
137 | 
138 |     if activation not in ['softmax', 'sigmoid']:
139 |         raise ValueError('activation must be one of "softmax" or "sigmoid"')
140 | 
141 |     if activation == 'sigmoid' and classes != 1:
142 |         raise ValueError('sigmoid activation can only be used when classes = 1')
143 | 
144 |     # Determine proper input shape
145 |     input_shape = _obtain_input_shape(input_shape,
146 |                                       default_size=32,
147 |                                       min_size=8,
148 |                                       data_format=K.image_data_format(),
149 |                                       require_flatten=include_top)
150 | 
151 |     if input_tensor is None:
152 |         img_input = Input(shape=input_shape)
153 |     else:
154 |         if not K.is_keras_tensor(input_tensor):
155 |             img_input = Input(tensor=input_tensor, shape=input_shape)
156 |         else:
157 |             img_input = input_tensor
158 | 
159 |     x = __create_dense_net(classes, img_input, include_top, depth, nb_dense_block,
160 |                            growth_rate, nb_filter, nb_layers_per_block, bottleneck, reduction,
161 |                            dropout_rate, weight_decay, subsample_initial_block, activation)
162 | 
163 |     # Ensure that the model takes into account
164 |     # any potential predecessors of `input_tensor`.
165 |     if input_tensor is not None:
166 |         inputs = get_source_inputs(input_tensor)
167 |     else:
168 |         inputs = img_input
169 |     # Create model.
170 |     model = Model(inputs, x, name='se-densenet')
171 | 
172 |     return model
173 | 
174 | 
175 | def GCDenseNetImageNet121(input_shape=None,
176 |                           bottleneck=True,
177 |                           reduction=0.5,
178 |                           dropout_rate=0.0,
179 |                           weight_decay=1e-4,
180 |                           include_top=True,
181 |                           weights=None,
182 |                           input_tensor=None,
183 |                           classes=1000,
184 |                           activation='softmax'):
185 |     return GCDenseNet(input_shape, depth=121, nb_dense_block=4, growth_rate=32, nb_filter=64,
186 |                       nb_layers_per_block=[6, 12, 24, 16], bottleneck=bottleneck, reduction=reduction,
187 |                       dropout_rate=dropout_rate, weight_decay=weight_decay, subsample_initial_block=True,
188 |                       include_top=include_top, weights=weights, input_tensor=input_tensor,
189 |                       classes=classes, activation=activation)
190 | 
191 | 
192 | def GCDenseNetImageNet169(input_shape=None,
193 |                           bottleneck=True,
194 |                           reduction=0.5,
195 |                           dropout_rate=0.0,
196 |                           weight_decay=1e-4,
197 |                           include_top=True,
198 |                           weights=None,
199 |                           input_tensor=None,
200 |                           classes=1000,
201 |                           activation='softmax'):
202 |     return GCDenseNet(input_shape, depth=169, nb_dense_block=4, growth_rate=32, nb_filter=64,
203 |                       nb_layers_per_block=[6, 12, 32, 32], bottleneck=bottleneck, reduction=reduction,
204 |                       dropout_rate=dropout_rate, weight_decay=weight_decay, subsample_initial_block=True,
205 |                       include_top=include_top, weights=weights, input_tensor=input_tensor,
206 |                       classes=classes, activation=activation)
207 | 
208 | 
209 | def GCDenseNetImageNet201(input_shape=None,
210 |                           bottleneck=True,
211 |                           reduction=0.5,
212 |                           dropout_rate=0.0,
213 |                           weight_decay=1e-4,
214 |                           include_top=True,
215 |                           weights=None,
216 |                           input_tensor=None,
217 |                           classes=1000,
218 |                           activation='softmax'):
219 |     return GCDenseNet(input_shape, depth=201, nb_dense_block=4, growth_rate=32, nb_filter=64,
220 |                       nb_layers_per_block=[6, 12, 48, 32], bottleneck=bottleneck, reduction=reduction,
221 |                       dropout_rate=dropout_rate, weight_decay=weight_decay, subsample_initial_block=True,
222 |                       include_top=include_top, weights=weights, input_tensor=input_tensor,
223 |                       classes=classes, activation=activation)
224 | 
225 | 
226 | def GCDenseNetImageNet264(input_shape=None,
227 |                           bottleneck=True,
228 |                           reduction=0.5,
229 |                           dropout_rate=0.0,
230 |                           weight_decay=1e-4,
231 |                           include_top=True,
232 |                           weights=None,
233 |                           input_tensor=None,
234 |                           classes=1000,
235 |                           activation='softmax'):
236 |     return GCDenseNet(input_shape, depth=201, nb_dense_block=4, growth_rate=32, nb_filter=64,
237 |                       nb_layers_per_block=[6, 12, 64, 48], bottleneck=bottleneck, reduction=reduction,
238 |                       dropout_rate=dropout_rate, weight_decay=weight_decay, subsample_initial_block=True,
239 |                       include_top=include_top, weights=weights, input_tensor=input_tensor,
240 |                       classes=classes, activation=activation)
241 | 
242 | 
243 | def GCDenseNetImageNet161(input_shape=None,
244 |                           bottleneck=True,
245 |                           reduction=0.5,
246 |                           dropout_rate=0.0,
247 |                           weight_decay=1e-4,
248 |                           include_top=True,
249 |                           weights=None,
250 |                           input_tensor=None,
251 |                           classes=1000,
252 |                           activation='softmax'):
253 |     return GCDenseNet(input_shape, depth=161, nb_dense_block=4, growth_rate=48, nb_filter=96,
254 |                       nb_layers_per_block=[6, 12, 36, 24], bottleneck=bottleneck, reduction=reduction,
255 |                       dropout_rate=dropout_rate, weight_decay=weight_decay, subsample_initial_block=True,
256 |                       include_top=include_top, weights=weights, input_tensor=input_tensor,
257 |                       classes=classes, activation=activation)
258 | 
259 | 
260 | def __conv_block(ip, nb_filter, bottleneck=False, dropout_rate=None, weight_decay=1e-4):
261 |     ''' Apply BatchNorm, Relu, 3x3 Conv2D, optional bottleneck block and dropout
262 |     Args:
263 |         ip: Input keras tensor
264 |         nb_filter: number of filters
265 |         bottleneck: add bottleneck block
266 |         dropout_rate: dropout rate
267 |         weight_decay: weight decay factor
268 |     Returns: keras tensor with batch_norm, relu and convolution2d added (optional bottleneck)
269 |     '''
270 |     concat_axis = 1 if K.image_data_format() == 'channels_first' else -1
271 | 
272 |     x = BatchNormalization(axis=concat_axis, epsilon=1.1e-5)(ip)
273 |     x = Activation('relu')(x)
274 | 
275 |     if bottleneck:
276 |         inter_channel = nb_filter * 4  # Obtained from https://github.com/liuzhuang13/DenseNet/blob/master/densenet.lua
277 | 
278 |         x = Conv2D(inter_channel, (1, 1), kernel_initializer='he_normal', padding='same', use_bias=False,
279 |                    kernel_regularizer=l2(weight_decay))(x)
280 |         x = BatchNormalization(axis=concat_axis, epsilon=1.1e-5)(x)
281 |         x = Activation('relu')(x)
282 | 
283 |     x = Conv2D(nb_filter, (3, 3), kernel_initializer='he_normal', padding='same', use_bias=False)(x)
284 |     if dropout_rate:
285 |         x = Dropout(dropout_rate)(x)
286 | 
287 |     return x
288 | 
289 | 
290 | def __dense_block(x, nb_layers, nb_filter, growth_rate, bottleneck=False, dropout_rate=None, weight_decay=1e-4,
291 |                   grow_nb_filters=True, return_concat_list=False):
292 |     ''' Build a dense_block where the output of each conv_block is fed to subsequent ones
293 |     Args:
294 |         x: keras tensor
295 |         nb_layers: the number of layers of conv_block to append to the model.
296 |         nb_filter: number of filters
297 |         growth_rate: growth rate
298 |         bottleneck: bottleneck block
299 |         dropout_rate: dropout rate
300 |         weight_decay: weight decay factor
301 |         grow_nb_filters: flag to decide to allow number of filters to grow
302 |         return_concat_list: return the list of feature maps along with the actual output
303 |     Returns: keras tensor with nb_layers of conv_block appended
304 |     '''
305 |     concat_axis = 1 if K.image_data_format() == 'channels_first' else -1
306 | 
307 |     x_list = [x]
308 | 
309 |     for i in range(nb_layers):
310 |         cb = __conv_block(x, growth_rate, bottleneck, dropout_rate, weight_decay)
311 |         x_list.append(cb)
312 | 
313 |         x = concatenate([x, cb], axis=concat_axis)
314 | 
315 |         if grow_nb_filters:
316 |             nb_filter += growth_rate
317 | 
318 |     # global context block
319 |     x = global_context_block(x)
320 | 
321 |     if return_concat_list:
322 |         return x, nb_filter, x_list
323 |     else:
324 |         return x, nb_filter
325 | 
326 | 
327 | def __transition_block(ip, nb_filter, compression=1.0, weight_decay=1e-4):
328 |     ''' Apply BatchNorm, Relu 1x1, Conv2D, optional compression, dropout and Maxpooling2D
329 |     Args:
330 |         ip: keras tensor
331 |         nb_filter: number of filters
332 |         compression: calculated as 1 - reduction. Reduces the number of feature maps
333 |                     in the transition block.
334 |         dropout_rate: dropout rate
335 |         weight_decay: weight decay factor
336 |     Returns: keras tensor, after applying batch_norm, relu-conv, dropout, maxpool
337 |     '''
338 |     concat_axis = 1 if K.image_data_format() == 'channels_first' else -1
339 | 
340 |     x = BatchNormalization(axis=concat_axis, epsilon=1.1e-5)(ip)
341 |     x = Activation('relu')(x)
342 |     x = Conv2D(int(nb_filter * compression), (1, 1), kernel_initializer='he_normal', padding='same', use_bias=False,
343 |                kernel_regularizer=l2(weight_decay))(x)
344 |     x = AveragePooling2D((2, 2), strides=(2, 2))(x)
345 | 
346 |     # global context block
347 |     x = global_context_block(x)
348 | 
349 |     return x
350 | 
351 | 
352 | def __create_dense_net(nb_classes, img_input, include_top, depth=40, nb_dense_block=3, growth_rate=12, nb_filter=-1,
353 |                        nb_layers_per_block=-1, bottleneck=False, reduction=0.0, dropout_rate=None, weight_decay=1e-4,
354 |                        subsample_initial_block=False, activation='softmax'):
355 |     ''' Build the DenseNet model
356 |     Args:
357 |         nb_classes: number of classes
358 |         img_input: tuple of shape (channels, rows, columns) or (rows, columns, channels)
359 |         include_top: flag to include the final Dense layer
360 |         depth: number or layers
361 |         nb_dense_block: number of dense blocks to add to end (generally = 3)
362 |         growth_rate: number of filters to add per dense block
363 |         nb_filter: initial number of filters. Default -1 indicates initial number of filters is 2 * growth_rate
364 |         nb_layers_per_block: number of layers in each dense block.
365 |                 Can be a -1, positive integer or a list.
366 |                 If -1, calculates nb_layer_per_block from the depth of the network.
367 |                 If positive integer, a set number of layers per dense block.
368 |                 If list, nb_layer is used as provided. Note that list size must
369 |                 be (nb_dense_block + 1)
370 |         bottleneck: add bottleneck blocks
371 |         reduction: reduction factor of transition blocks. Note : reduction value is inverted to compute compression
372 |         dropout_rate: dropout rate
373 |         weight_decay: weight decay rate
374 |         subsample_initial_block: Set to True to subsample the initial convolution and
375 |                 add a MaxPool2D before the dense blocks are added.
376 |         subsample_initial:
377 |         activation: Type of activation at the top layer. Can be one of 'softmax' or 'sigmoid'.
378 |                 Note that if sigmoid is used, classes must be 1.
379 |     Returns: keras tensor with nb_layers of conv_block appended
380 |     '''
381 | 
382 |     concat_axis = 1 if K.image_data_format() == 'channels_first' else -1
383 | 
384 |     if reduction != 0.0:
385 |         assert reduction <= 1.0 and reduction > 0.0, 'reduction value must lie between 0.0 and 1.0'
386 | 
387 |     # layers in each dense block
388 |     if type(nb_layers_per_block) is list or type(nb_layers_per_block) is tuple:
389 |         nb_layers = list(nb_layers_per_block)  # Convert tuple to list
390 | 
391 |         assert len(nb_layers) == (nb_dense_block), 'If list, nb_layer is used as provided. ' \
392 |                                                    'Note that list size must be (nb_dense_block)'
393 |         final_nb_layer = nb_layers[-1]
394 |         nb_layers = nb_layers[:-1]
395 |     else:
396 |         if nb_layers_per_block == -1:
397 |             assert (depth - 4) % 3 == 0, 'Depth must be 3 N + 4 if nb_layers_per_block == -1'
398 |             count = int((depth - 4) / 3)
399 |             nb_layers = [count for _ in range(nb_dense_block)]
400 |             final_nb_layer = count
401 |         else:
402 |             final_nb_layer = nb_layers_per_block
403 |             nb_layers = [nb_layers_per_block] * nb_dense_block
404 | 
405 |     # compute initial nb_filter if -1, else accept users initial nb_filter
406 |     if nb_filter <= 0:
407 |         nb_filter = 2 * growth_rate
408 | 
409 |     # compute compression factor
410 |     compression = 1.0 - reduction
411 | 
412 |     # Initial convolution
413 |     if subsample_initial_block:
414 |         initial_kernel = (7, 7)
415 |         initial_strides = (2, 2)
416 |     else:
417 |         initial_kernel = (3, 3)
418 |         initial_strides = (1, 1)
419 | 
420 |     x = Conv2D(nb_filter, initial_kernel, kernel_initializer='he_normal', padding='same',
421 |                strides=initial_strides, use_bias=False, kernel_regularizer=l2(weight_decay))(img_input)
422 | 
423 |     if subsample_initial_block:
424 |         x = BatchNormalization(axis=concat_axis, epsilon=1.1e-5)(x)
425 |         x = Activation('relu')(x)
426 |         x = MaxPooling2D((3, 3), strides=(2, 2), padding='same')(x)
427 | 
428 |     # Add dense blocks
429 |     for block_idx in range(nb_dense_block - 1):
430 |         x, nb_filter = __dense_block(x, nb_layers[block_idx], nb_filter, growth_rate, bottleneck=bottleneck,
431 |                                      dropout_rate=dropout_rate, weight_decay=weight_decay)
432 |         # add transition_block
433 |         x = __transition_block(x, nb_filter, compression=compression, weight_decay=weight_decay)
434 |         nb_filter = int(nb_filter * compression)
435 | 
436 |     # The last dense_block does not have a transition_block
437 |     x, nb_filter = __dense_block(x, final_nb_layer, nb_filter, growth_rate, bottleneck=bottleneck,
438 |                                  dropout_rate=dropout_rate, weight_decay=weight_decay)
439 | 
440 |     x = BatchNormalization(axis=concat_axis, epsilon=1.1e-5)(x)
441 |     x = Activation('relu')(x)
442 |     x = GlobalAveragePooling2D()(x)
443 | 
444 |     if include_top:
445 |         x = Dense(nb_classes, activation=activation)(x)
446 | 
447 |     return x
448 | 


--------------------------------------------------------------------------------
/gc_inception_resnet_v2.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """Se Inception-ResNet V2 model for Keras.
  3 | 
  4 | Model naming and structure follows TF-slim implementation (which has some additional
  5 | layers and different number of filters from the original arXiv paper):
  6 | https://github.com/tensorflow/models/blob/master/slim/nets/inception_resnet_v2.py
  7 | Pre-trained ImageNet weights are also converted from TF-slim, which can be found in:
  8 | https://github.com/tensorflow/models/tree/master/slim#pre-trained-models
  9 | 
 10 | Original code from Keras applications
 11 | 
 12 | # Reference
 13 | - [Inception-v4, Inception-ResNet and the Impact of
 14 |    Residual Connections on Learning](https://arxiv.org/abs/1602.07261)
 15 | """
 16 | from __future__ import print_function
 17 | from __future__ import absolute_import
 18 | 
 19 | import warnings
 20 | 
 21 | from keras.models import Model
 22 | from keras.layers import Activation
 23 | from keras.layers import AveragePooling2D
 24 | from keras.layers import BatchNormalization
 25 | from keras.layers import Concatenate
 26 | from keras.layers import Conv2D
 27 | from keras.layers import Dense
 28 | from keras.layers import GlobalAveragePooling2D
 29 | from keras.layers import GlobalMaxPooling2D
 30 | from keras.layers import Input
 31 | from keras.layers import Lambda
 32 | from keras.layers import MaxPooling2D
 33 | from keras.utils.data_utils import get_file
 34 | from keras.engine.topology import get_source_inputs
 35 | from keras_applications import imagenet_utils
 36 | from keras_applications.imagenet_utils import _obtain_input_shape
 37 | from keras.applications.imagenet_utils import decode_predictions
 38 | from keras import backend as K
 39 | 
 40 | from gc import global_context_block
 41 | 
 42 | 
 43 | def preprocess_input(x):
 44 |     """Preprocesses a numpy array encoding a batch of images.
 45 |     # Arguments
 46 |         x: a 4D numpy array consists of RGB values within [0, 255].
 47 |     # Returns
 48 |         Preprocessed array.
 49 |     """
 50 |     return imagenet_utils.preprocess_input(x, mode='tf')
 51 | 
 52 | 
 53 | def conv2d_bn(x,
 54 |               filters,
 55 |               kernel_size,
 56 |               strides=1,
 57 |               padding='same',
 58 |               activation='relu',
 59 |               use_bias=False,
 60 |               name=None):
 61 |     """Utility function to apply conv + BN.
 62 |     # Arguments
 63 |         x: input tensor.
 64 |         filters: filters in `Conv2D`.
 65 |         kernel_size: kernel size as in `Conv2D`.
 66 |         padding: padding mode in `Conv2D`.
 67 |         activation: activation in `Conv2D`.
 68 |         strides: strides in `Conv2D`.
 69 |         name: name of the ops; will become `name + '_ac'` for the activation
 70 |             and `name + '_bn'` for the batch norm layer.
 71 |     # Returns
 72 |         Output tensor after applying `Conv2D` and `BatchNormalization`.
 73 |     """
 74 |     x = Conv2D(filters,
 75 |                kernel_size,
 76 |                strides=strides,
 77 |                padding=padding,
 78 |                use_bias=use_bias,
 79 |                name=name)(x)
 80 |     if not use_bias:
 81 |         bn_axis = 1 if K.image_data_format() == 'channels_first' else 3
 82 |         bn_name = None if name is None else name + '_bn'
 83 |         x = BatchNormalization(axis=bn_axis, scale=False, name=bn_name)(x)
 84 |     if activation is not None:
 85 |         ac_name = None if name is None else name + '_ac'
 86 |         x = Activation(activation, name=ac_name)(x)
 87 |     return x
 88 | 
 89 | 
 90 | def inception_resnet_block(x, scale, block_type, block_idx, activation='relu'):
 91 |     """Adds a Inception-ResNet block with Global Context block at the end.
 92 |     This function builds 3 types of Inception-ResNet blocks mentioned
 93 |     in the paper, controlled by the `block_type` argument (which is the
 94 |     block name used in the official TF-slim implementation):
 95 |         - Inception-ResNet-A: `block_type='block35'`
 96 |         - Inception-ResNet-B: `block_type='block17'`
 97 |         - Inception-ResNet-C: `block_type='block8'`
 98 |     # Arguments
 99 |         x: input tensor.
100 |         scale: scaling factor to scale the residuals (i.e., the output of
101 |             passing `x` through an inception module) before adding them
102 |             to the shortcut branch. Let `r` be the output from the residual branch,
103 |             the output of this block will be `x + scale * r`.
104 |         block_type: `'block35'`, `'block17'` or `'block8'`, determines
105 |             the network structure in the residual branch.
106 |         block_idx: an `int` used for generating layer names. The Inception-ResNet blocks
107 |             are repeated many times in this network. We use `block_idx` to identify
108 |             each of the repetitions. For example, the first Inception-ResNet-A block
109 |             will have `block_type='block35', block_idx=0`, ane the layer names will have
110 |             a common prefix `'block35_0'`.
111 |         activation: activation function to use at the end of the block
112 |             (see [activations](../activations.md)).
113 |             When `activation=None`, no activation is applied
114 |             (i.e., "linear" activation: `a(x) = x`).
115 |     # Returns
116 |         Output tensor for the block.
117 |     # Raises
118 |         ValueError: if `block_type` is not one of `'block35'`,
119 |             `'block17'` or `'block8'`.
120 |     """
121 |     if block_type == 'block35':
122 |         branch_0 = conv2d_bn(x, 32, 1)
123 |         branch_1 = conv2d_bn(x, 32, 1)
124 |         branch_1 = conv2d_bn(branch_1, 32, 3)
125 |         branch_2 = conv2d_bn(x, 32, 1)
126 |         branch_2 = conv2d_bn(branch_2, 48, 3)
127 |         branch_2 = conv2d_bn(branch_2, 64, 3)
128 |         branches = [branch_0, branch_1, branch_2]
129 |     elif block_type == 'block17':
130 |         branch_0 = conv2d_bn(x, 192, 1)
131 |         branch_1 = conv2d_bn(x, 128, 1)
132 |         branch_1 = conv2d_bn(branch_1, 160, [1, 7])
133 |         branch_1 = conv2d_bn(branch_1, 192, [7, 1])
134 |         branches = [branch_0, branch_1]
135 |     elif block_type == 'block8':
136 |         branch_0 = conv2d_bn(x, 192, 1)
137 |         branch_1 = conv2d_bn(x, 192, 1)
138 |         branch_1 = conv2d_bn(branch_1, 224, [1, 3])
139 |         branch_1 = conv2d_bn(branch_1, 256, [3, 1])
140 |         branches = [branch_0, branch_1]
141 |     else:
142 |         raise ValueError('Unknown Inception-ResNet block type. '
143 |                          'Expects "block35", "block17" or "block8", '
144 |                          'but got: ' + str(block_type))
145 | 
146 |     block_name = block_type + '_' + str(block_idx)
147 |     channel_axis = 1 if K.image_data_format() == 'channels_first' else 3
148 |     mixed = Concatenate(axis=channel_axis, name=block_name + '_mixed')(branches)
149 |     up = conv2d_bn(mixed,
150 |                    K.int_shape(x)[channel_axis],
151 |                    1,
152 |                    activation=None,
153 |                    use_bias=True,
154 |                    name=block_name + '_conv')
155 | 
156 |     x = Lambda(lambda inputs, scale: inputs[0] + inputs[1] * scale,
157 |                output_shape=K.int_shape(x)[1:],
158 |                arguments={'scale': scale},
159 |                name=block_name)([x, up])
160 |     if activation is not None:
161 |         x = Activation(activation, name=block_name + '_ac')(x)
162 | 
163 |     # global context block
164 |     x = global_context_block(x)
165 |     return x
166 | 
167 | 
168 | def SEInceptionResNetV2(include_top=True,
169 |                         weights=None,
170 |                         input_tensor=None,
171 |                         input_shape=None,
172 |                         pooling=None,
173 |                         classes=1000):
174 |     """Instantiates the SE-Inception-ResNet v2 architecture.
175 |     Optionally loads weights pre-trained on ImageNet.
176 |     Note that when using TensorFlow, for best performance you should
177 |     set `"image_data_format": "channels_last"` in your Keras config
178 |     at `~/.keras/keras.json`.
179 |     The model and the weights are compatible with both TensorFlow and Theano
180 |     backends (but not CNTK). The data format convention used by the model is
181 |     the one specified in your Keras config file.
182 |     Note that the default input image size for this model is 299x299, instead
183 |     of 224x224 as in the VGG16 and ResNet models. Also, the input preprocessing
184 |     function is different (i.e., do not use `imagenet_utils.preprocess_input()`
185 |     with this model. Use `preprocess_input()` defined in this module instead).
186 |     # Arguments
187 |         include_top: whether to include the fully-connected
188 |             layer at the top of the network.
189 |         weights: one of `None` (random initialization)
190 |             or `'imagenet'` (pre-training on ImageNet).
191 |         input_tensor: optional Keras tensor (i.e. output of `layers.Input()`)
192 |             to use as image input for the model.
193 |         input_shape: optional shape tuple, only to be specified
194 |             if `include_top` is `False` (otherwise the input shape
195 |             has to be `(299, 299, 3)` (with `'channels_last'` data format)
196 |             or `(3, 299, 299)` (with `'channels_first'` data format).
197 |             It should have exactly 3 inputs channels,
198 |             and width and height should be no smaller than 139.
199 |             E.g. `(150, 150, 3)` would be one valid value.
200 |         pooling: Optional pooling mode for feature extraction
201 |             when `include_top` is `False`.
202 |             - `None` means that the output of the model will be
203 |                 the 4D tensor output of the last convolutional layer.
204 |             - `'avg'` means that global average pooling
205 |                 will be applied to the output of the
206 |                 last convolutional layer, and thus
207 |                 the output of the model will be a 2D tensor.
208 |             - `'max'` means that global max pooling will be applied.
209 |         classes: optional number of classes to classify images
210 |             into, only to be specified if `include_top` is `True`, and
211 |             if no `weights` argument is specified.
212 |     # Returns
213 |         A Keras `Model` instance.
214 |     # Raises
215 |         ValueError: in case of invalid argument for `weights`,
216 |             or invalid input shape.
217 |         RuntimeError: If attempting to run this model with an unsupported backend.
218 |     """
219 |     if K.backend() in {'cntk'}:
220 |         raise RuntimeError(K.backend() + ' backend is currently unsupported for this model.')
221 | 
222 |     if weights not in {'imagenet', None}:
223 |         raise ValueError('The `weights` argument should be either '
224 |                          '`None` (random initialization) or `imagenet` '
225 |                          '(pre-training on ImageNet).')
226 | 
227 |     if weights == 'imagenet' and include_top and classes != 1000:
228 |         raise ValueError('If using `weights` as imagenet with `include_top`'
229 |                          ' as true, `classes` should be 1000')
230 | 
231 |     # Determine proper input shape
232 |     input_shape = _obtain_input_shape(
233 |         input_shape,
234 |         default_size=299,
235 |         min_size=139,
236 |         data_format=K.image_data_format(),
237 |         require_flatten=False,
238 |         weights=weights)
239 | 
240 |     if input_tensor is None:
241 |         img_input = Input(shape=input_shape)
242 |     else:
243 |         if not K.is_keras_tensor(input_tensor):
244 |             img_input = Input(tensor=input_tensor, shape=input_shape)
245 |         else:
246 |             img_input = input_tensor
247 | 
248 |     # Stem block: 35 x 35 x 192
249 |     x = conv2d_bn(img_input, 32, 3, strides=2, padding='valid')
250 |     x = conv2d_bn(x, 32, 3, padding='valid')
251 |     x = conv2d_bn(x, 64, 3)
252 |     x = MaxPooling2D(3, strides=2)(x)
253 |     x = conv2d_bn(x, 80, 1, padding='valid')
254 |     x = conv2d_bn(x, 192, 3, padding='valid')
255 |     x = MaxPooling2D(3, strides=2)(x)
256 | 
257 |     # Mixed 5b (Inception-A block): 35 x 35 x 320
258 |     branch_0 = conv2d_bn(x, 96, 1)
259 |     branch_1 = conv2d_bn(x, 48, 1)
260 |     branch_1 = conv2d_bn(branch_1, 64, 5)
261 |     branch_2 = conv2d_bn(x, 64, 1)
262 |     branch_2 = conv2d_bn(branch_2, 96, 3)
263 |     branch_2 = conv2d_bn(branch_2, 96, 3)
264 |     branch_pool = AveragePooling2D(3, strides=1, padding='same')(x)
265 |     branch_pool = conv2d_bn(branch_pool, 64, 1)
266 |     branches = [branch_0, branch_1, branch_2, branch_pool]
267 |     channel_axis = 1 if K.image_data_format() == 'channels_first' else 3
268 |     x = Concatenate(axis=channel_axis, name='mixed_5b')(branches)
269 | 
270 |     # global context block
271 |     x = global_context_block(x)
272 | 
273 |     # 10x block35 (Inception-ResNet-A block): 35 x 35 x 320
274 |     for block_idx in range(1, 11):
275 |         x = inception_resnet_block(x,
276 |                                    scale=0.17,
277 |                                    block_type='block35',
278 |                                    block_idx=block_idx)
279 | 
280 |     # Mixed 6a (Reduction-A block): 17 x 17 x 1088
281 |     branch_0 = conv2d_bn(x, 384, 3, strides=2, padding='valid')
282 |     branch_1 = conv2d_bn(x, 256, 1)
283 |     branch_1 = conv2d_bn(branch_1, 256, 3)
284 |     branch_1 = conv2d_bn(branch_1, 384, 3, strides=2, padding='valid')
285 |     branch_pool = MaxPooling2D(3, strides=2, padding='valid')(x)
286 |     branches = [branch_0, branch_1, branch_pool]
287 |     x = Concatenate(axis=channel_axis, name='mixed_6a')(branches)
288 | 
289 |     # global context block
290 |     x = global_context_block(x)
291 | 
292 |     # 20x block17 (Inception-ResNet-B block): 17 x 17 x 1088
293 |     for block_idx in range(1, 21):
294 |         x = inception_resnet_block(x,
295 |                                    scale=0.1,
296 |                                    block_type='block17',
297 |                                    block_idx=block_idx)
298 | 
299 |     # Mixed 7a (Reduction-B block): 8 x 8 x 2080
300 |     branch_0 = conv2d_bn(x, 256, 1)
301 |     branch_0 = conv2d_bn(branch_0, 384, 3, strides=2, padding='valid')
302 |     branch_1 = conv2d_bn(x, 256, 1)
303 |     branch_1 = conv2d_bn(branch_1, 288, 3, strides=2, padding='valid')
304 |     branch_2 = conv2d_bn(x, 256, 1)
305 |     branch_2 = conv2d_bn(branch_2, 288, 3)
306 |     branch_2 = conv2d_bn(branch_2, 320, 3, strides=2, padding='valid')
307 |     branch_pool = MaxPooling2D(3, strides=2, padding='valid')(x)
308 |     branches = [branch_0, branch_1, branch_2, branch_pool]
309 |     x = Concatenate(axis=channel_axis, name='mixed_7a')(branches)
310 | 
311 |     # global_context block
312 |     x = global_context_block(x)
313 | 
314 |     # 10x block8 (Inception-ResNet-C block): 8 x 8 x 2080
315 |     for block_idx in range(1, 10):
316 |         x = inception_resnet_block(x,
317 |                                    scale=0.2,
318 |                                    block_type='block8',
319 |                                    block_idx=block_idx)
320 |     x = inception_resnet_block(x,
321 |                                scale=1.,
322 |                                activation=None,
323 |                                block_type='block8',
324 |                                block_idx=10)
325 | 
326 |     # global context block
327 |     x = global_context_block(x)
328 | 
329 |     # Final convolution block: 8 x 8 x 1536
330 |     x = conv2d_bn(x, 1536, 1, name='conv_7b')
331 | 
332 |     if include_top:
333 |         # Classification block
334 |         x = GlobalAveragePooling2D(name='avg_pool')(x)
335 |         x = Dense(classes, activation='softmax', name='predictions')(x)
336 |     else:
337 |         if pooling == 'avg':
338 |             x = GlobalAveragePooling2D()(x)
339 |         elif pooling == 'max':
340 |             x = GlobalMaxPooling2D()(x)
341 | 
342 |     # Ensure that the model takes into account
343 |     # any potential predecessors of `input_tensor`
344 |     if input_tensor is not None:
345 |         inputs = get_source_inputs(input_tensor)
346 |     else:
347 |         inputs = img_input
348 | 
349 |     # Create model
350 |     model = Model(inputs, x, name='se_inception_resnet_v2')
351 | 
352 |     return model
353 | 


--------------------------------------------------------------------------------
/gc_inception_v3.py:
--------------------------------------------------------------------------------
  1 | """Global Context Inception V3 model
  2 | 
  3 | Major portions of this code is adapted from the applications folder of Keras.
  4 | 
  5 | Note that the input image format for this model is different than for
  6 | the VGG16 and ResNet models (299x299 instead of 224x224),
  7 | and that the input preprocessing function is also different (same as Xception).
  8 | 
  9 | # Reference
 10 |     - [Rethinking the Inception Architecture for Computer Vision](http://arxiv.org/abs/1512.00567)
 11 |     - []() # added when paper is published on Arxiv
 12 | 
 13 | """
 14 | from __future__ import print_function
 15 | from __future__ import absolute_import
 16 | 
 17 | import warnings
 18 | 
 19 | from keras.models import Model
 20 | from keras import layers
 21 | from keras.layers import Activation
 22 | from keras.layers import Dense
 23 | from keras.layers import Reshape
 24 | from keras.layers import Input
 25 | from keras.layers import BatchNormalization
 26 | from keras.layers import Conv2D
 27 | from keras.layers import MaxPooling2D
 28 | from keras.layers import AveragePooling2D
 29 | from keras.layers import GlobalAveragePooling2D
 30 | from keras.layers import GlobalMaxPooling2D
 31 | from keras.engine.topology import get_source_inputs
 32 | from keras.utils.data_utils import get_file
 33 | from keras import backend as K
 34 | from keras_applications.imagenet_utils import decode_predictions
 35 | from keras_applications.imagenet_utils import _obtain_input_shape
 36 | 
 37 | from gc import global_context_block
 38 | 
 39 | WEIGHTS_PATH = ''
 40 | WEIGHTS_PATH_NO_TOP = ''
 41 | 
 42 | 
 43 | def _conv2d_bn(x,
 44 |                filters,
 45 |                num_row,
 46 |                num_col,
 47 |                padding='same',
 48 |                strides=(1, 1),
 49 |                name=None):
 50 |     """Utility function to apply conv + BN.
 51 | 
 52 |     # Arguments
 53 |         x: input tensor.
 54 |         filters: filters in `Conv2D`.
 55 |         num_row: height of the convolution kernel.
 56 |         num_col: width of the convolution kernel.
 57 |         padding: padding mode in `Conv2D`.
 58 |         strides: strides in `Conv2D`.
 59 |         name: name of the ops; will become `name + '_conv'`
 60 |             for the convolution and `name + '_bn'` for the
 61 |             batch norm layer.
 62 | 
 63 |     # Returns
 64 |         Output tensor after applying `Conv2D` and `BatchNormalization`.
 65 |     """
 66 |     if name is not None:
 67 |         bn_name = name + '_bn'
 68 |         conv_name = name + '_conv'
 69 |     else:
 70 |         bn_name = None
 71 |         conv_name = None
 72 |     if K.image_data_format() == 'channels_first':
 73 |         bn_axis = 1
 74 |     else:
 75 |         bn_axis = 3
 76 |     x = Conv2D(
 77 |         filters, (num_row, num_col),
 78 |         strides=strides,
 79 |         padding=padding,
 80 |         use_bias=False,
 81 |         name=conv_name)(x)
 82 |     x = BatchNormalization(axis=bn_axis, scale=False, name=bn_name)(x)
 83 |     x = Activation('relu', name=name)(x)
 84 |     return x
 85 | 
 86 | 
 87 | def SEInceptionV3(include_top=True,
 88 |                   weights=None,
 89 |                   input_tensor=None,
 90 |                   input_shape=None,
 91 |                   pooling=None,
 92 |                   classes=1000):
 93 |     """Instantiates the Global Context Inception v3 architecture.
 94 | 
 95 |     # Arguments
 96 |         include_top: whether to include the fully-connected
 97 |             layer at the top of the network.
 98 |         weights: one of `None` (random initialization)
 99 |             or "imagenet" (pre-training on ImageNet).
100 |         input_tensor: optional Keras tensor (i.e. output of `layers.Input()`)
101 |             to use as image input for the model.
102 |         input_shape: optional shape tuple, only to be specified
103 |             if `include_top` is False (otherwise the input shape
104 |             has to be `(299, 299, 3)` (with `channels_last` data format)
105 |             or `(3, 299, 299)` (with `channels_first` data format).
106 |             It should have exactly 3 inputs channels,
107 |             and width and height should be no smaller than 139.
108 |             E.g. `(150, 150, 3)` would be one valid value.
109 |         pooling: Optional pooling mode for feature extraction
110 |             when `include_top` is `False`.
111 |             - `None` means that the output of the model will be
112 |                 the 4D tensor output of the
113 |                 last convolutional layer.
114 |             - `avg` means that global average pooling
115 |                 will be applied to the output of the
116 |                 last convolutional layer, and thus
117 |                 the output of the model will be a 2D tensor.
118 |             - `max` means that global max pooling will
119 |                 be applied.
120 |         classes: optional number of classes to classify images
121 |             into, only to be specified if `include_top` is True, and
122 |             if no `weights` argument is specified.
123 | 
124 |     # Returns
125 |         A Keras model instance.
126 | 
127 |     # Raises
128 |         ValueError: in case of invalid argument for `weights`,
129 |             or invalid input shape.
130 |     """
131 |     if weights not in {'imagenet', None}:
132 |         raise ValueError('The `weights` argument should be either '
133 |                          '`None` (random initialization) or `imagenet` '
134 |                          '(pre-training on ImageNet).')
135 | 
136 |     if weights == 'imagenet' and include_top and classes != 1000:
137 |         raise ValueError('If using `weights` as imagenet with `include_top`'
138 |                          ' as true, `classes` should be 1000')
139 | 
140 |     # Determine proper input shape
141 |     input_shape = _obtain_input_shape(
142 |         input_shape,
143 |         default_size=299,
144 |         min_size=139,
145 |         data_format=K.image_data_format(),
146 |         require_flatten=include_top)
147 | 
148 |     if input_tensor is None:
149 |         img_input = Input(shape=input_shape)
150 |     else:
151 |         if not K.is_keras_tensor(input_tensor):
152 |             img_input = Input(tensor=input_tensor, shape=input_shape)
153 |         else:
154 |             img_input = input_tensor
155 | 
156 |     if K.image_data_format() == 'channels_first':
157 |         channel_axis = 1
158 |     else:
159 |         channel_axis = 3
160 | 
161 |     x = _conv2d_bn(img_input, 32, 3, 3, strides=(2, 2), padding='valid')
162 |     x = _conv2d_bn(x, 32, 3, 3, padding='valid')
163 |     x = _conv2d_bn(x, 64, 3, 3)
164 |     x = MaxPooling2D((3, 3), strides=(2, 2))(x)
165 | 
166 |     x = _conv2d_bn(x, 80, 1, 1, padding='valid')
167 |     x = _conv2d_bn(x, 192, 3, 3, padding='valid')
168 |     x = MaxPooling2D((3, 3), strides=(2, 2))(x)
169 | 
170 |     # mixed 0, 1, 2: 35 x 35 x 256
171 |     branch1x1 = _conv2d_bn(x, 64, 1, 1)
172 | 
173 |     branch5x5 = _conv2d_bn(x, 48, 1, 1)
174 |     branch5x5 = _conv2d_bn(branch5x5, 64, 5, 5)
175 | 
176 |     branch3x3dbl = _conv2d_bn(x, 64, 1, 1)
177 |     branch3x3dbl = _conv2d_bn(branch3x3dbl, 96, 3, 3)
178 |     branch3x3dbl = _conv2d_bn(branch3x3dbl, 96, 3, 3)
179 | 
180 |     branch_pool = AveragePooling2D((3, 3), strides=(1, 1), padding='same')(x)
181 |     branch_pool = _conv2d_bn(branch_pool, 32, 1, 1)
182 |     x = layers.concatenate(
183 |         [branch1x1, branch5x5, branch3x3dbl, branch_pool],
184 |         axis=channel_axis,
185 |         name='mixed0')
186 | 
187 |     # global context block
188 |     x = global_context_block(x)
189 | 
190 |     # mixed 1: 35 x 35 x 256
191 |     branch1x1 = _conv2d_bn(x, 64, 1, 1)
192 | 
193 |     branch5x5 = _conv2d_bn(x, 48, 1, 1)
194 |     branch5x5 = _conv2d_bn(branch5x5, 64, 5, 5)
195 | 
196 |     branch3x3dbl = _conv2d_bn(x, 64, 1, 1)
197 |     branch3x3dbl = _conv2d_bn(branch3x3dbl, 96, 3, 3)
198 |     branch3x3dbl = _conv2d_bn(branch3x3dbl, 96, 3, 3)
199 | 
200 |     branch_pool = AveragePooling2D((3, 3), strides=(1, 1), padding='same')(x)
201 |     branch_pool = _conv2d_bn(branch_pool, 64, 1, 1)
202 |     x = layers.concatenate(
203 |         [branch1x1, branch5x5, branch3x3dbl, branch_pool],
204 |         axis=channel_axis,
205 |         name='mixed1')
206 | 
207 |     # global context block
208 |     x = global_context_block(x)
209 | 
210 |     # mixed 2: 35 x 35 x 256
211 |     branch1x1 = _conv2d_bn(x, 64, 1, 1)
212 | 
213 |     branch5x5 = _conv2d_bn(x, 48, 1, 1)
214 |     branch5x5 = _conv2d_bn(branch5x5, 64, 5, 5)
215 | 
216 |     branch3x3dbl = _conv2d_bn(x, 64, 1, 1)
217 |     branch3x3dbl = _conv2d_bn(branch3x3dbl, 96, 3, 3)
218 |     branch3x3dbl = _conv2d_bn(branch3x3dbl, 96, 3, 3)
219 | 
220 |     branch_pool = AveragePooling2D((3, 3), strides=(1, 1), padding='same')(x)
221 |     branch_pool = _conv2d_bn(branch_pool, 64, 1, 1)
222 |     x = layers.concatenate(
223 |         [branch1x1, branch5x5, branch3x3dbl, branch_pool],
224 |         axis=channel_axis,
225 |         name='mixed2')
226 | 
227 |     # global context block
228 |     x = global_context_block(x)
229 | 
230 |     # mixed 3: 17 x 17 x 768
231 |     branch3x3 = _conv2d_bn(x, 384, 3, 3, strides=(2, 2), padding='valid')
232 | 
233 |     branch3x3dbl = _conv2d_bn(x, 64, 1, 1)
234 |     branch3x3dbl = _conv2d_bn(branch3x3dbl, 96, 3, 3)
235 |     branch3x3dbl = _conv2d_bn(
236 |         branch3x3dbl, 96, 3, 3, strides=(2, 2), padding='valid')
237 | 
238 |     branch_pool = MaxPooling2D((3, 3), strides=(2, 2))(x)
239 |     x = layers.concatenate(
240 |         [branch3x3, branch3x3dbl, branch_pool], axis=channel_axis, name='mixed3')
241 | 
242 |     # global context block
243 |     x = global_context_block(x)
244 | 
245 |     # mixed 4: 17 x 17 x 768
246 |     branch1x1 = _conv2d_bn(x, 192, 1, 1)
247 | 
248 |     branch7x7 = _conv2d_bn(x, 128, 1, 1)
249 |     branch7x7 = _conv2d_bn(branch7x7, 128, 1, 7)
250 |     branch7x7 = _conv2d_bn(branch7x7, 192, 7, 1)
251 | 
252 |     branch7x7dbl = _conv2d_bn(x, 128, 1, 1)
253 |     branch7x7dbl = _conv2d_bn(branch7x7dbl, 128, 7, 1)
254 |     branch7x7dbl = _conv2d_bn(branch7x7dbl, 128, 1, 7)
255 |     branch7x7dbl = _conv2d_bn(branch7x7dbl, 128, 7, 1)
256 |     branch7x7dbl = _conv2d_bn(branch7x7dbl, 192, 1, 7)
257 | 
258 |     branch_pool = AveragePooling2D((3, 3), strides=(1, 1), padding='same')(x)
259 |     branch_pool = _conv2d_bn(branch_pool, 192, 1, 1)
260 |     x = layers.concatenate(
261 |         [branch1x1, branch7x7, branch7x7dbl, branch_pool],
262 |         axis=channel_axis,
263 |         name='mixed4')
264 | 
265 |     # global context block
266 |     x = global_context_block(x)
267 | 
268 |     # mixed 5, 6: 17 x 17 x 768
269 |     for i in range(2):
270 |         branch1x1 = _conv2d_bn(x, 192, 1, 1)
271 | 
272 |         branch7x7 = _conv2d_bn(x, 160, 1, 1)
273 |         branch7x7 = _conv2d_bn(branch7x7, 160, 1, 7)
274 |         branch7x7 = _conv2d_bn(branch7x7, 192, 7, 1)
275 | 
276 |         branch7x7dbl = _conv2d_bn(x, 160, 1, 1)
277 |         branch7x7dbl = _conv2d_bn(branch7x7dbl, 160, 7, 1)
278 |         branch7x7dbl = _conv2d_bn(branch7x7dbl, 160, 1, 7)
279 |         branch7x7dbl = _conv2d_bn(branch7x7dbl, 160, 7, 1)
280 |         branch7x7dbl = _conv2d_bn(branch7x7dbl, 192, 1, 7)
281 | 
282 |         branch_pool = AveragePooling2D(
283 |             (3, 3), strides=(1, 1), padding='same')(x)
284 |         branch_pool = _conv2d_bn(branch_pool, 192, 1, 1)
285 |         x = layers.concatenate(
286 |             [branch1x1, branch7x7, branch7x7dbl, branch_pool],
287 |             axis=channel_axis,
288 |             name='mixed' + str(5 + i))
289 | 
290 |         # global context block
291 |         x = global_context_block(x)
292 | 
293 |     # mixed 7: 17 x 17 x 768
294 |     branch1x1 = _conv2d_bn(x, 192, 1, 1)
295 | 
296 |     branch7x7 = _conv2d_bn(x, 192, 1, 1)
297 |     branch7x7 = _conv2d_bn(branch7x7, 192, 1, 7)
298 |     branch7x7 = _conv2d_bn(branch7x7, 192, 7, 1)
299 | 
300 |     branch7x7dbl = _conv2d_bn(x, 192, 1, 1)
301 |     branch7x7dbl = _conv2d_bn(branch7x7dbl, 192, 7, 1)
302 |     branch7x7dbl = _conv2d_bn(branch7x7dbl, 192, 1, 7)
303 |     branch7x7dbl = _conv2d_bn(branch7x7dbl, 192, 7, 1)
304 |     branch7x7dbl = _conv2d_bn(branch7x7dbl, 192, 1, 7)
305 | 
306 |     branch_pool = AveragePooling2D((3, 3), strides=(1, 1), padding='same')(x)
307 |     branch_pool = _conv2d_bn(branch_pool, 192, 1, 1)
308 |     x = layers.concatenate(
309 |         [branch1x1, branch7x7, branch7x7dbl, branch_pool],
310 |         axis=channel_axis,
311 |         name='mixed7')
312 | 
313 |     # global context block
314 |     x = global_context_block(x)
315 | 
316 |     # mixed 8: 8 x 8 x 1280
317 |     branch3x3 = _conv2d_bn(x, 192, 1, 1)
318 |     branch3x3 = _conv2d_bn(branch3x3, 320, 3, 3,
319 |                            strides=(2, 2), padding='valid')
320 | 
321 |     branch7x7x3 = _conv2d_bn(x, 192, 1, 1)
322 |     branch7x7x3 = _conv2d_bn(branch7x7x3, 192, 1, 7)
323 |     branch7x7x3 = _conv2d_bn(branch7x7x3, 192, 7, 1)
324 |     branch7x7x3 = _conv2d_bn(
325 |         branch7x7x3, 192, 3, 3, strides=(2, 2), padding='valid')
326 | 
327 |     branch_pool = MaxPooling2D((3, 3), strides=(2, 2))(x)
328 |     x = layers.concatenate(
329 |         [branch3x3, branch7x7x3, branch_pool], axis=channel_axis, name='mixed8')
330 | 
331 |     # global context block
332 |     x = global_context_block(x)
333 | 
334 |     # mixed 9: 8 x 8 x 2048
335 |     for i in range(2):
336 |         branch1x1 = _conv2d_bn(x, 320, 1, 1)
337 | 
338 |         branch3x3 = _conv2d_bn(x, 384, 1, 1)
339 |         branch3x3_1 = _conv2d_bn(branch3x3, 384, 1, 3)
340 |         branch3x3_2 = _conv2d_bn(branch3x3, 384, 3, 1)
341 |         branch3x3 = layers.concatenate(
342 |             [branch3x3_1, branch3x3_2], axis=channel_axis, name='mixed9_' + str(i))
343 | 
344 |         branch3x3dbl = _conv2d_bn(x, 448, 1, 1)
345 |         branch3x3dbl = _conv2d_bn(branch3x3dbl, 384, 3, 3)
346 |         branch3x3dbl_1 = _conv2d_bn(branch3x3dbl, 384, 1, 3)
347 |         branch3x3dbl_2 = _conv2d_bn(branch3x3dbl, 384, 3, 1)
348 |         branch3x3dbl = layers.concatenate(
349 |             [branch3x3dbl_1, branch3x3dbl_2], axis=channel_axis)
350 | 
351 |         branch_pool = AveragePooling2D(
352 |             (3, 3), strides=(1, 1), padding='same')(x)
353 |         branch_pool = _conv2d_bn(branch_pool, 192, 1, 1)
354 |         x = layers.concatenate(
355 |             [branch1x1, branch3x3, branch3x3dbl, branch_pool],
356 |             axis=channel_axis,
357 |             name='mixed' + str(9 + i))
358 | 
359 |         # global context block
360 |         x = global_context_block(x)
361 | 
362 |     if include_top:
363 |         # Classification block
364 |         x = GlobalAveragePooling2D(name='avg_pool')(x)
365 |         x = Dense(classes, activation='softmax', name='predictions')(x)
366 |     else:
367 |         if pooling == 'avg':
368 |             x = GlobalAveragePooling2D()(x)
369 |         elif pooling == 'max':
370 |             x = GlobalMaxPooling2D()(x)
371 | 
372 |     # Ensure that the model takes into account
373 |     # any potential predecessors of `input_tensor`.
374 |     if input_tensor is not None:
375 |         inputs = get_source_inputs(input_tensor)
376 |     else:
377 |         inputs = img_input
378 |     # Create model.
379 |     model = Model(inputs, x, name='inception_v3')
380 | 
381 |     return model
382 | 
383 | 
384 | def preprocess_input(x):
385 |     x /= 255.
386 |     x -= 0.5
387 |     x *= 2.
388 |     return x
389 | 


--------------------------------------------------------------------------------
/gc_mobilenets.py:
--------------------------------------------------------------------------------
  1 | """SE MobileNet v1 models for Keras.
  2 | 
  3 | # Reference
  4 | - [MobileNets: Efficient Convolutional Neural Networks for
  5 |    Mobile Vision Applications](https://arxiv.org/pdf/1704.04861.pdf))
  6 | """
  7 | from __future__ import print_function
  8 | from __future__ import absolute_import
  9 | from __future__ import division
 10 | 
 11 | import warnings
 12 | 
 13 | from keras.models import Model
 14 | from keras.layers import Input
 15 | from keras.layers import Activation
 16 | from keras.layers import Dropout
 17 | from keras.layers import Reshape
 18 | from keras.layers import BatchNormalization
 19 | from keras.layers import GlobalAveragePooling2D
 20 | from keras.layers import GlobalMaxPooling2D
 21 | from keras.layers import Conv2D
 22 | from keras import initializers
 23 | from keras import regularizers
 24 | from keras import constraints
 25 | from keras.utils import conv_utils
 26 | from keras.utils.data_utils import get_file
 27 | from keras.engine.topology import get_source_inputs
 28 | from keras.engine import InputSpec
 29 | from keras_applications import imagenet_utils
 30 | from keras_applications.imagenet_utils import _obtain_input_shape
 31 | from keras.applications.imagenet_utils import decode_predictions
 32 | from keras import backend as K
 33 | 
 34 | from gc import global_context_block
 35 | 
 36 | 
 37 | def relu6(x):
 38 |     return K.relu(x, max_value=6)
 39 | 
 40 | 
 41 | def preprocess_input(x):
 42 |     """Preprocesses a numpy array encoding a batch of images.
 43 |     # Arguments
 44 |         x: a 4D numpy array consists of RGB values within [0, 255].
 45 |     # Returns
 46 |         Preprocessed array.
 47 |     """
 48 |     return imagenet_utils.preprocess_input(x, mode='tf')
 49 | 
 50 | 
 51 | class DepthwiseConv2D(Conv2D):
 52 |     """Depthwise separable 2D convolution.
 53 |     Depthwise Separable convolutions consists in performing
 54 |     just the first step in a depthwise spatial convolution
 55 |     (which acts on each input channel separately).
 56 |     The `depth_multiplier` argument controls how many
 57 |     output channels are generated per input channel in the depthwise step.
 58 |     # Arguments
 59 |         kernel_size: An integer or tuple/list of 2 integers, specifying the
 60 |             width and height of the 2D convolution window.
 61 |             Can be a single integer to specify the same value for
 62 |             all spatial dimensions.
 63 |         strides: An integer or tuple/list of 2 integers,
 64 |             specifying the strides of the convolution along the width and height.
 65 |             Can be a single integer to specify the same value for
 66 |             all spatial dimensions.
 67 |             Specifying any stride value != 1 is incompatible with specifying
 68 |             any `dilation_rate` value != 1.
 69 |         padding: one of `'valid'` or `'same'` (case-insensitive).
 70 |         depth_multiplier: The number of depthwise convolution output channels
 71 |             for each input channel.
 72 |             The total number of depthwise convolution output
 73 |             channels will be equal to `filters_in * depth_multiplier`.
 74 |         data_format: A string,
 75 |             one of `channels_last` (default) or `channels_first`.
 76 |             The ordering of the dimensions in the inputs.
 77 |             `channels_last` corresponds to inputs with shape
 78 |             `(batch, height, width, channels)` while `channels_first`
 79 |             corresponds to inputs with shape
 80 |             `(batch, channels, height, width)`.
 81 |             It defaults to the `image_data_format` value found in your
 82 |             Keras config file at `~/.keras/keras.json`.
 83 |             If you never set it, then it will be 'channels_last'.
 84 |         activation: Activation function to use
 85 |             (see [activations](../activations.md)).
 86 |             If you don't specify anything, no activation is applied
 87 |             (ie. 'linear' activation: `a(x) = x`).
 88 |         use_bias: Boolean, whether the layer uses a bias vector.
 89 |         depthwise_initializer: Initializer for the depthwise kernel matrix
 90 |             (see [initializers](../initializers.md)).
 91 |         bias_initializer: Initializer for the bias vector
 92 |             (see [initializers](../initializers.md)).
 93 |         depthwise_regularizer: Regularizer function applied to
 94 |             the depthwise kernel matrix
 95 |             (see [regularizer](../regularizers.md)).
 96 |         bias_regularizer: Regularizer function applied to the bias vector
 97 |             (see [regularizer](../regularizers.md)).
 98 |         activity_regularizer: Regularizer function applied to
 99 |             the output of the layer (its 'activation').
100 |             (see [regularizer](../regularizers.md)).
101 |         depthwise_constraint: Constraint function applied to
102 |             the depthwise kernel matrix
103 |             (see [constraints](../constraints.md)).
104 |         bias_constraint: Constraint function applied to the bias vector
105 |             (see [constraints](../constraints.md)).
106 |     # Input shape
107 |         4D tensor with shape:
108 |         `[batch, channels, rows, cols]` if data_format='channels_first'
109 |         or 4D tensor with shape:
110 |         `[batch, rows, cols, channels]` if data_format='channels_last'.
111 |     # Output shape
112 |         4D tensor with shape:
113 |         `[batch, filters, new_rows, new_cols]` if data_format='channels_first'
114 |         or 4D tensor with shape:
115 |         `[batch, new_rows, new_cols, filters]` if data_format='channels_last'.
116 |         `rows` and `cols` values might have changed due to padding.
117 |     """
118 | 
119 |     def __init__(self,
120 |                  kernel_size,
121 |                  strides=(1, 1),
122 |                  padding='valid',
123 |                  depth_multiplier=1,
124 |                  data_format=None,
125 |                  activation=None,
126 |                  use_bias=True,
127 |                  depthwise_initializer='glorot_uniform',
128 |                  bias_initializer='zeros',
129 |                  depthwise_regularizer=None,
130 |                  bias_regularizer=None,
131 |                  activity_regularizer=None,
132 |                  depthwise_constraint=None,
133 |                  bias_constraint=None,
134 |                  **kwargs):
135 |         super(DepthwiseConv2D, self).__init__(
136 |             filters=None,
137 |             kernel_size=kernel_size,
138 |             strides=strides,
139 |             padding=padding,
140 |             data_format=data_format,
141 |             activation=activation,
142 |             use_bias=use_bias,
143 |             bias_regularizer=bias_regularizer,
144 |             activity_regularizer=activity_regularizer,
145 |             bias_constraint=bias_constraint,
146 |             **kwargs)
147 |         self.depth_multiplier = depth_multiplier
148 |         self.depthwise_initializer = initializers.get(depthwise_initializer)
149 |         self.depthwise_regularizer = regularizers.get(depthwise_regularizer)
150 |         self.depthwise_constraint = constraints.get(depthwise_constraint)
151 |         self.bias_initializer = initializers.get(bias_initializer)
152 | 
153 |     def build(self, input_shape):
154 |         if len(input_shape) < 4:
155 |             raise ValueError('Inputs to `DepthwiseConv2D` should have rank 4. '
156 |                              'Received input shape:', str(input_shape))
157 |         if self.data_format == 'channels_first':
158 |             channel_axis = 1
159 |         else:
160 |             channel_axis = 3
161 |         if input_shape[channel_axis] is None:
162 |             raise ValueError('The channel dimension of the inputs to '
163 |                              '`DepthwiseConv2D` '
164 |                              'should be defined. Found `None`.')
165 |         input_dim = int(input_shape[channel_axis])
166 |         depthwise_kernel_shape = (self.kernel_size[0],
167 |                                   self.kernel_size[1],
168 |                                   input_dim,
169 |                                   self.depth_multiplier)
170 | 
171 |         self.depthwise_kernel = self.add_weight(
172 |             shape=depthwise_kernel_shape,
173 |             initializer=self.depthwise_initializer,
174 |             name='depthwise_kernel',
175 |             regularizer=self.depthwise_regularizer,
176 |             constraint=self.depthwise_constraint)
177 | 
178 |         if self.use_bias:
179 |             self.bias = self.add_weight(shape=(input_dim * self.depth_multiplier,),
180 |                                         initializer=self.bias_initializer,
181 |                                         name='bias',
182 |                                         regularizer=self.bias_regularizer,
183 |                                         constraint=self.bias_constraint)
184 |         else:
185 |             self.bias = None
186 |         # Set input spec.
187 |         self.input_spec = InputSpec(ndim=4, axes={channel_axis: input_dim})
188 |         self.built = True
189 | 
190 |     def call(self, inputs, training=None):
191 |         outputs = K.depthwise_conv2d(
192 |             inputs,
193 |             self.depthwise_kernel,
194 |             strides=self.strides,
195 |             padding=self.padding,
196 |             dilation_rate=self.dilation_rate,
197 |             data_format=self.data_format)
198 | 
199 |         if self.bias:
200 |             outputs = K.bias_add(
201 |                 outputs,
202 |                 self.bias,
203 |                 data_format=self.data_format)
204 | 
205 |         if self.activation is not None:
206 |             return self.activation(outputs)
207 | 
208 |         return outputs
209 | 
210 |     def compute_output_shape(self, input_shape):
211 |         if self.data_format == 'channels_first':
212 |             rows = input_shape[2]
213 |             cols = input_shape[3]
214 |             out_filters = input_shape[1] * self.depth_multiplier
215 |         elif self.data_format == 'channels_last':
216 |             rows = input_shape[1]
217 |             cols = input_shape[2]
218 |             out_filters = input_shape[3] * self.depth_multiplier
219 | 
220 |         rows = conv_utils.conv_output_length(rows, self.kernel_size[0],
221 |                                              self.padding,
222 |                                              self.strides[0])
223 |         cols = conv_utils.conv_output_length(cols, self.kernel_size[1],
224 |                                              self.padding,
225 |                                              self.strides[1])
226 | 
227 |         if self.data_format == 'channels_first':
228 |             return (input_shape[0], out_filters, rows, cols)
229 |         elif self.data_format == 'channels_last':
230 |             return (input_shape[0], rows, cols, out_filters)
231 | 
232 |     def get_config(self):
233 |         config = super(DepthwiseConv2D, self).get_config()
234 |         config.pop('filters')
235 |         config.pop('kernel_initializer')
236 |         config.pop('kernel_regularizer')
237 |         config.pop('kernel_constraint')
238 |         config['depth_multiplier'] = self.depth_multiplier
239 |         config['depthwise_initializer'] = initializers.serialize(self.depthwise_initializer)
240 |         config['depthwise_regularizer'] = regularizers.serialize(self.depthwise_regularizer)
241 |         config['depthwise_constraint'] = constraints.serialize(self.depthwise_constraint)
242 |         return config
243 | 
244 | 
245 | def SEMobileNet(input_shape=None,
246 |                 alpha=1.0,
247 |                 depth_multiplier=1,
248 |                 dropout=1e-3,
249 |                 include_top=True,
250 |                 weights=None,
251 |                 input_tensor=None,
252 |                 pooling=None,
253 |                 classes=1000):
254 |     """Instantiates the SE-MobileNet architecture.
255 |     Note that only TensorFlow is supported for now,
256 |     therefore it only works with the data format
257 |     `image_data_format='channels_last'` in your Keras config
258 |     at `~/.keras/keras.json`.
259 |     To load a MobileNet model via `load_model`, import the custom
260 |     objects `relu6` and `DepthwiseConv2D` and pass them to the
261 |     `custom_objects` parameter.
262 |     E.g.
263 |     model = load_model('mobilenet.h5', custom_objects={
264 |                        'relu6': mobilenet.relu6,
265 |                        'DepthwiseConv2D': mobilenet.DepthwiseConv2D})
266 |     # Arguments
267 |         input_shape: optional shape tuple, only to be specified
268 |             if `include_top` is False (otherwise the input shape
269 |             has to be `(224, 224, 3)` (with `channels_last` data format)
270 |             or (3, 224, 224) (with `channels_first` data format).
271 |             It should have exactly 3 inputs channels,
272 |             and width and height should be no smaller than 32.
273 |             E.g. `(200, 200, 3)` would be one valid value.
274 |         alpha: controls the width of the network.
275 |             - If `alpha` < 1.0, proportionally decreases the number
276 |                 of filters in each layer.
277 |             - If `alpha` > 1.0, proportionally increases the number
278 |                 of filters in each layer.
279 |             - If `alpha` = 1, default number of filters from the paper
280 |                  are used at each layer.
281 |         depth_multiplier: depth multiplier for depthwise convolution
282 |             (also called the resolution multiplier)
283 |         dropout: dropout rate
284 |         include_top: whether to include the fully-connected
285 |             layer at the top of the network.
286 |         weights: `None` (random initialization) or
287 |             `imagenet` (ImageNet weights)
288 |         input_tensor: optional Keras tensor (i.e. output of
289 |             `layers.Input()`)
290 |             to use as image input for the model.
291 |         pooling: Optional pooling mode for feature extraction
292 |             when `include_top` is `False`.
293 |             - `None` means that the output of the model
294 |                 will be the 4D tensor output of the
295 |                 last convolutional layer.
296 |             - `avg` means that global average pooling
297 |                 will be applied to the output of the
298 |                 last convolutional layer, and thus
299 |                 the output of the model will be a
300 |                 2D tensor.
301 |             - `max` means that global max pooling will
302 |                 be applied.
303 |         classes: optional number of classes to classify images
304 |             into, only to be specified if `include_top` is True, and
305 |             if no `weights` argument is specified.
306 |     # Returns
307 |         A Keras model instance.
308 |     # Raises
309 |         ValueError: in case of invalid argument for `weights`,
310 |             or invalid input shape.
311 |         RuntimeError: If attempting to run this model with a
312 |             backend that does not support separable convolutions.
313 |     """
314 | 
315 |     if K.backend() != 'tensorflow':
316 |         raise RuntimeError('Only TensorFlow backend is currently supported, '
317 |                            'as other backends do not support '
318 |                            'depthwise convolution.')
319 | 
320 |     if weights not in {'imagenet', None}:
321 |         raise ValueError('The `weights` argument should be either '
322 |                          '`None` (random initialization) or `imagenet` '
323 |                          '(pre-training on ImageNet).')
324 | 
325 |     if weights == 'imagenet' and include_top and classes != 1000:
326 |         raise ValueError('If using `weights` as ImageNet with `include_top` '
327 |                          'as true, `classes` should be 1000')
328 | 
329 |     # Determine proper input shape and default size.
330 |     if input_shape is None:
331 |         default_size = 224
332 |     else:
333 |         if K.image_data_format() == 'channels_first':
334 |             rows = input_shape[1]
335 |             cols = input_shape[2]
336 |         else:
337 |             rows = input_shape[0]
338 |             cols = input_shape[1]
339 | 
340 |         if rows == cols and rows in [128, 160, 192, 224]:
341 |             default_size = rows
342 |         else:
343 |             default_size = 224
344 | 
345 |     input_shape = _obtain_input_shape(input_shape,
346 |                                       default_size=default_size,
347 |                                       min_size=32,
348 |                                       data_format=K.image_data_format(),
349 |                                       require_flatten=include_top,
350 |                                       weights=weights)
351 | 
352 |     if K.image_data_format() == 'channels_last':
353 |         row_axis, col_axis = (0, 1)
354 |     else:
355 |         row_axis, col_axis = (1, 2)
356 |     rows = input_shape[row_axis]
357 |     cols = input_shape[col_axis]
358 | 
359 |     if input_tensor is None:
360 |         img_input = Input(shape=input_shape)
361 |     else:
362 |         if not K.is_keras_tensor(input_tensor):
363 |             img_input = Input(tensor=input_tensor, shape=input_shape)
364 |         else:
365 |             img_input = input_tensor
366 | 
367 |     x = _conv_block(img_input, 32, alpha, strides=(2, 2))
368 |     x = _depthwise_conv_block(x, 64, alpha, depth_multiplier, block_id=1)
369 | 
370 |     x = _depthwise_conv_block(x, 128, alpha, depth_multiplier,
371 |                               strides=(2, 2), block_id=2)
372 |     x = _depthwise_conv_block(x, 128, alpha, depth_multiplier, block_id=3)
373 | 
374 |     x = _depthwise_conv_block(x, 256, alpha, depth_multiplier,
375 |                               strides=(2, 2), block_id=4)
376 |     x = _depthwise_conv_block(x, 256, alpha, depth_multiplier, block_id=5)
377 | 
378 |     x = _depthwise_conv_block(x, 512, alpha, depth_multiplier,
379 |                               strides=(2, 2), block_id=6)
380 |     x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=7)
381 |     x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=8)
382 |     x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=9)
383 |     x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=10)
384 |     x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=11)
385 | 
386 |     x = _depthwise_conv_block(x, 1024, alpha, depth_multiplier,
387 |                               strides=(2, 2), block_id=12)
388 |     x = _depthwise_conv_block(x, 1024, alpha, depth_multiplier, block_id=13)
389 | 
390 |     if include_top:
391 |         if K.image_data_format() == 'channels_first':
392 |             shape = (int(1024 * alpha), 1, 1)
393 |         else:
394 |             shape = (1, 1, int(1024 * alpha))
395 | 
396 |         x = GlobalAveragePooling2D()(x)
397 |         x = Reshape(shape, name='reshape_n_1')(x)
398 |         x = Dropout(dropout, name='dropout')(x)
399 |         x = Conv2D(classes, (1, 1),
400 |                    padding='same', name='conv_preds')(x)
401 |         x = Activation('softmax', name='act_softmax')(x)
402 |         x = Reshape((classes,), name='reshape_final')(x)
403 |     else:
404 |         if pooling == 'avg':
405 |             x = GlobalAveragePooling2D()(x)
406 |         elif pooling == 'max':
407 |             x = GlobalMaxPooling2D()(x)
408 | 
409 |     # Ensure that the model takes into account
410 |     # any potential predecessors of `input_tensor`.
411 |     if input_tensor is not None:
412 |         inputs = get_source_inputs(input_tensor)
413 |     else:
414 |         inputs = img_input
415 | 
416 |     # Create model.
417 |     model = Model(inputs, x, name='se_mobilenet_%0.2f_%s' % (alpha, rows))
418 | 
419 |     return model
420 | 
421 | 
422 | def _conv_block(inputs, filters, alpha, kernel=(3, 3), strides=(1, 1)):
423 |     """Adds an initial convolution layer (with batch normalization and relu6).
424 |     # Arguments
425 |         inputs: Input tensor of shape `(rows, cols, 3)`
426 |             (with `channels_last` data format) or
427 |             (3, rows, cols) (with `channels_first` data format).
428 |             It should have exactly 3 inputs channels,
429 |             and width and height should be no smaller than 32.
430 |             E.g. `(224, 224, 3)` would be one valid value.
431 |         filters: Integer, the dimensionality of the output space
432 |             (i.e. the number output of filters in the convolution).
433 |         alpha: controls the width of the network.
434 |             - If `alpha` < 1.0, proportionally decreases the number
435 |                 of filters in each layer.
436 |             - If `alpha` > 1.0, proportionally increases the number
437 |                 of filters in each layer.
438 |             - If `alpha` = 1, default number of filters from the paper
439 |                  are used at each layer.
440 |         kernel: An integer or tuple/list of 2 integers, specifying the
441 |             width and height of the 2D convolution window.
442 |             Can be a single integer to specify the same value for
443 |             all spatial dimensions.
444 |         strides: An integer or tuple/list of 2 integers,
445 |             specifying the strides of the convolution along the width and height.
446 |             Can be a single integer to specify the same value for
447 |             all spatial dimensions.
448 |             Specifying any stride value != 1 is incompatible with specifying
449 |             any `dilation_rate` value != 1.
450 |     # Input shape
451 |         4D tensor with shape:
452 |         `(samples, channels, rows, cols)` if data_format='channels_first'
453 |         or 4D tensor with shape:
454 |         `(samples, rows, cols, channels)` if data_format='channels_last'.
455 |     # Output shape
456 |         4D tensor with shape:
457 |         `(samples, filters, new_rows, new_cols)` if data_format='channels_first'
458 |         or 4D tensor with shape:
459 |         `(samples, new_rows, new_cols, filters)` if data_format='channels_last'.
460 |         `rows` and `cols` values might have changed due to stride.
461 |     # Returns
462 |         Output tensor of block.
463 |     """
464 |     channel_axis = 1 if K.image_data_format() == 'channels_first' else -1
465 |     filters = int(filters * alpha)
466 |     x = Conv2D(filters, kernel,
467 |                padding='same',
468 |                use_bias=False,
469 |                strides=strides,
470 |                name='conv1')(inputs)
471 |     x = BatchNormalization(axis=channel_axis, name='conv1_bn')(x)
472 |     return Activation(relu6, name='conv1_relu')(x)
473 | 
474 | 
475 | def _depthwise_conv_block(inputs, pointwise_conv_filters, alpha,
476 |                           depth_multiplier=1, strides=(1, 1), block_id=1):
477 |     """Adds a depthwise convolution block.
478 |     A depthwise convolution block consists of a depthwise conv,
479 |     batch normalization, relu6, pointwise convolution,
480 |     batch normalization and relu6 activation.
481 |     # Arguments
482 |         inputs: Input tensor of shape `(rows, cols, channels)`
483 |             (with `channels_last` data format) or
484 |             (channels, rows, cols) (with `channels_first` data format).
485 |         pointwise_conv_filters: Integer, the dimensionality of the output space
486 |             (i.e. the number output of filters in the pointwise convolution).
487 |         alpha: controls the width of the network.
488 |             - If `alpha` < 1.0, proportionally decreases the number
489 |                 of filters in each layer.
490 |             - If `alpha` > 1.0, proportionally increases the number
491 |                 of filters in each layer.
492 |             - If `alpha` = 1, default number of filters from the paper
493 |                  are used at each layer.
494 |         depth_multiplier: The number of depthwise convolution output channels
495 |             for each input channel.
496 |             The total number of depthwise convolution output
497 |             channels will be equal to `filters_in * depth_multiplier`.
498 |         strides: An integer or tuple/list of 2 integers,
499 |             specifying the strides of the convolution along the width and height.
500 |             Can be a single integer to specify the same value for
501 |             all spatial dimensions.
502 |             Specifying any stride value != 1 is incompatible with specifying
503 |             any `dilation_rate` value != 1.
504 |         block_id: Integer, a unique identification designating the block number.
505 |     # Input shape
506 |         4D tensor with shape:
507 |         `(batch, channels, rows, cols)` if data_format='channels_first'
508 |         or 4D tensor with shape:
509 |         `(batch, rows, cols, channels)` if data_format='channels_last'.
510 |     # Output shape
511 |         4D tensor with shape:
512 |         `(batch, filters, new_rows, new_cols)` if data_format='channels_first'
513 |         or 4D tensor with shape:
514 |         `(batch, new_rows, new_cols, filters)` if data_format='channels_last'.
515 |         `rows` and `cols` values might have changed due to stride.
516 |     # Returns
517 |         Output tensor of block.
518 |     """
519 |     channel_axis = 1 if K.image_data_format() == 'channels_first' else -1
520 |     pointwise_conv_filters = int(pointwise_conv_filters * alpha)
521 | 
522 |     x = DepthwiseConv2D((3, 3),
523 |                         padding='same',
524 |                         depth_multiplier=depth_multiplier,
525 |                         strides=strides,
526 |                         use_bias=False,
527 |                         name='conv_dw_%d' % block_id)(inputs)
528 |     x = BatchNormalization(axis=channel_axis, name='conv_dw_%d_bn' % block_id)(x)
529 |     x = Activation(relu6, name='conv_dw_%d_relu' % block_id)(x)
530 | 
531 |     x = Conv2D(pointwise_conv_filters, (1, 1),
532 |                padding='same',
533 |                use_bias=False,
534 |                strides=(1, 1),
535 |                name='conv_pw_%d' % block_id)(x)
536 |     x = BatchNormalization(axis=channel_axis, name='conv_pw_%d_bn' % block_id)(x)
537 |     x = Activation(relu6, name='conv_pw_%d_relu' % block_id)(x)
538 | 
539 |     # global context block
540 |     x = global_context_block(x)
541 |     return x
542 | 


--------------------------------------------------------------------------------
/gc_resnet.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | Global Context ResNets
  3 | 
  4 | References:
  5 |     - [Deep Residual Learning for Image Recognition](https://arxiv.org/abs/1512.03385)
  6 |     - []() # added when paper is published on Arxiv
  7 | '''
  8 | from __future__ import print_function
  9 | from __future__ import absolute_import
 10 | from __future__ import division
 11 | 
 12 | from keras.models import Model
 13 | from keras.layers import Input
 14 | from keras.layers import Dense
 15 | from keras.layers import Reshape
 16 | from keras.layers import Activation
 17 | from keras.layers import BatchNormalization
 18 | from keras.layers import MaxPooling2D
 19 | from keras.layers import GlobalAveragePooling2D
 20 | from keras.layers import GlobalMaxPooling2D
 21 | from keras.layers import Conv2D
 22 | from keras.layers import add
 23 | from keras.layers import multiply
 24 | from keras.regularizers import l2
 25 | from keras.utils import conv_utils
 26 | from keras.utils.data_utils import get_file
 27 | from keras.engine.topology import get_source_inputs
 28 | from keras_applications.imagenet_utils import _obtain_input_shape
 29 | from keras_applications.resnet50 import preprocess_input
 30 | from keras_applications.imagenet_utils import decode_predictions
 31 | from keras import backend as K
 32 | 
 33 | from gc import global_context_block
 34 | 
 35 | __all__ = ['GCResNet', 'GCResNet50', 'GCResNet101', 'GCResNet154', 'preprocess_input', 'decode_predictions']
 36 | 
 37 | 
 38 | WEIGHTS_PATH = ""
 39 | WEIGHTS_PATH_NO_TOP = ""
 40 | 
 41 | 
 42 | def GCResNet(input_shape=None,
 43 |              initial_conv_filters=64,
 44 |              depth=[3, 4, 6, 3],
 45 |              filters=[64, 128, 256, 512],
 46 |              width=1,
 47 |              bottleneck=False,
 48 |              weight_decay=1e-4,
 49 |              include_top=True,
 50 |              weights=None,
 51 |              input_tensor=None,
 52 |              pooling=None,
 53 |              classes=1000):
 54 |     """ Instantiate the Global Context ResNet architecture. Note that ,
 55 |         when using TensorFlow for best performance you should set
 56 |         `image_data_format="channels_last"` in your Keras config
 57 |         at ~/.keras/keras.json.
 58 |         The model are compatible with both
 59 |         TensorFlow and Theano. The dimension ordering
 60 |         convention used by the model is the one
 61 |         specified in your Keras config file.
 62 |         # Arguments
 63 |             initial_conv_filters: number of features for the initial convolution
 64 |             depth: number or layers in the each block, defined as a list.
 65 |                 ResNet-50  = [3, 4, 6, 3]
 66 |                 ResNet-101 = [3, 6, 23, 3]
 67 |                 ResNet-152 = [3, 8, 36, 3]
 68 |             filter: number of filters per block, defined as a list.
 69 |                 filters = [64, 128, 256, 512
 70 |             width: width multiplier for the network (for Wide ResNets)
 71 |             bottleneck: adds a bottleneck conv to reduce computation
 72 |             weight_decay: weight decay (l2 norm)
 73 |             include_top: whether to include the fully-connected
 74 |                 layer at the top of the network.
 75 |             weights: `None` (random initialization) or `imagenet` (trained
 76 |                 on ImageNet)
 77 |             input_tensor: optional Keras tensor (i.e. output of `layers.Input()`)
 78 |                 to use as image input for the model.
 79 |             input_shape: optional shape tuple, only to be specified
 80 |                 if `include_top` is False (otherwise the input shape
 81 |                 has to be `(224, 224, 3)` (with `tf` dim ordering)
 82 |                 or `(3, 224, 224)` (with `th` dim ordering).
 83 |                 It should have exactly 3 inputs channels,
 84 |                 and width and height should be no smaller than 8.
 85 |                 E.g. `(200, 200, 3)` would be one valid value.
 86 |             pooling: Optional pooling mode for feature extraction
 87 |                 when `include_top` is `False`.
 88 |                 - `None` means that the output of the model will be
 89 |                     the 4D tensor output of the
 90 |                     last convolutional layer.
 91 |                 - `avg` means that global average pooling
 92 |                     will be applied to the output of the
 93 |                     last convolutional layer, and thus
 94 |                     the output of the model will be a 2D tensor.
 95 |                 - `max` means that global max pooling will
 96 |                     be applied.
 97 |             classes: optional number of classes to classify images
 98 |                 into, only to be specified if `include_top` is True, and
 99 |                 if no `weights` argument is specified.
100 |         # Returns
101 |             A Keras model instance.
102 |         """
103 | 
104 |     if weights not in {'imagenet', None}:
105 |         raise ValueError('The `weights` argument should be either '
106 |                          '`None` (random initialization) or `imagenet` '
107 |                          '(pre-training on ImageNet).')
108 | 
109 |     if weights == 'imagenet' and include_top and classes != 1000:
110 |         raise ValueError('If using `weights` as imagenet with `include_top`'
111 |                          ' as true, `classes` should be 1000')
112 | 
113 |     assert len(depth) == len(filters), "The length of filter increment list must match the length " \
114 |                                        "of the depth list."
115 | 
116 |     # Determine proper input shape
117 |     input_shape = _obtain_input_shape(input_shape,
118 |                                       default_size=224,
119 |                                       min_size=32,
120 |                                       data_format=K.image_data_format(),
121 |                                       require_flatten=False)
122 | 
123 |     if input_tensor is None:
124 |         img_input = Input(shape=input_shape)
125 |     else:
126 |         if not K.is_keras_tensor(input_tensor):
127 |             img_input = Input(tensor=input_tensor, shape=input_shape)
128 |         else:
129 |             img_input = input_tensor
130 | 
131 |     x = _create_se_resnet(classes, img_input, include_top, initial_conv_filters,
132 |                           filters, depth, width, bottleneck, weight_decay, pooling)
133 | 
134 |     # Ensure that the model takes into account
135 |     # any potential predecessors of `input_tensor`.
136 |     if input_tensor is not None:
137 |         inputs = get_source_inputs(input_tensor)
138 |     else:
139 |         inputs = img_input
140 |     # Create model.
141 |     model = Model(inputs, x, name='resnext')
142 | 
143 |     # load weights
144 | 
145 |     return model
146 | 
147 | 
148 | def GCResNet18(input_shape=None,
149 |                width=1,
150 |                bottleneck=False,
151 |                weight_decay=1e-4,
152 |                include_top=True,
153 |                weights=None,
154 |                input_tensor=None,
155 |                pooling=None,
156 |                classes=1000):
157 |     return GCResNet(input_shape,
158 |                     depth=[2, 2, 2, 2],
159 |                     width=width,
160 |                     bottleneck=bottleneck,
161 |                     weight_decay=weight_decay,
162 |                     include_top=include_top,
163 |                     weights=weights,
164 |                     input_tensor=input_tensor,
165 |                     pooling=pooling,
166 |                     classes=classes)
167 | 
168 | 
169 | def GCResNet34(input_shape=None,
170 |                width=1,
171 |                bottleneck=False,
172 |                weight_decay=1e-4,
173 |                include_top=True,
174 |                weights=None,
175 |                input_tensor=None,
176 |                pooling=None,
177 |                classes=1000):
178 |     return GCResNet(input_shape,
179 |                     depth=[3, 4, 6, 3],
180 |                     width=width,
181 |                     bottleneck=bottleneck,
182 |                     weight_decay=weight_decay,
183 |                     include_top=include_top,
184 |                     weights=weights,
185 |                     input_tensor=input_tensor,
186 |                     pooling=pooling,
187 |                     classes=classes)
188 | 
189 | 
190 | def GCResNet50(input_shape=None,
191 |                width=1,
192 |                bottleneck=True,
193 |                weight_decay=1e-4,
194 |                include_top=True,
195 |                weights=None,
196 |                input_tensor=None,
197 |                pooling=None,
198 |                classes=1000):
199 |     return GCResNet(input_shape,
200 |                     width=width,
201 |                     bottleneck=bottleneck,
202 |                     weight_decay=weight_decay,
203 |                     include_top=include_top,
204 |                     weights=weights,
205 |                     input_tensor=input_tensor,
206 |                     pooling=pooling,
207 |                     classes=classes)
208 | 
209 | 
210 | def GCResNet101(input_shape=None,
211 |                 width=1,
212 |                 bottleneck=True,
213 |                 weight_decay=1e-4,
214 |                 include_top=True,
215 |                 weights=None,
216 |                 input_tensor=None,
217 |                 pooling=None,
218 |                 classes=1000):
219 |     return GCResNet(input_shape,
220 |                     depth=[3, 6, 23, 3],
221 |                     width=width,
222 |                     bottleneck=bottleneck,
223 |                     weight_decay=weight_decay,
224 |                     include_top=include_top,
225 |                     weights=weights,
226 |                     input_tensor=input_tensor,
227 |                     pooling=pooling,
228 |                     classes=classes)
229 | 
230 | 
231 | def GCResNet154(input_shape=None,
232 |                 width=1,
233 |                 bottleneck=True,
234 |                 weight_decay=1e-4,
235 |                 include_top=True,
236 |                 weights=None,
237 |                 input_tensor=None,
238 |                 pooling=None,
239 |                 classes=1000):
240 |     return GCResNet(input_shape,
241 |                     depth=[3, 8, 36, 3],
242 |                     width=width,
243 |                     bottleneck=bottleneck,
244 |                     weight_decay=weight_decay,
245 |                     include_top=include_top,
246 |                     weights=weights,
247 |                     input_tensor=input_tensor,
248 |                     pooling=pooling,
249 |                     classes=classes)
250 | 
251 | 
252 | def _resnet_block(input, filters, k=1, strides=(1, 1)):
253 |     ''' Adds a pre-activation resnet block without bottleneck layers
254 | 
255 |     Args:
256 |         input: input tensor
257 |         filters: number of output filters
258 |         k: width factor
259 |         strides: strides of the convolution layer
260 | 
261 |     Returns: a keras tensor
262 |     '''
263 |     init = input
264 |     channel_axis = 1 if K.image_data_format() == "channels_first" else -1
265 | 
266 |     x = BatchNormalization(axis=channel_axis)(input)
267 |     x = Activation('relu')(x)
268 | 
269 |     if strides != (1, 1) or init._keras_shape[channel_axis] != filters * k:
270 |         init = Conv2D(filters * k, (1, 1), padding='same', kernel_initializer='he_normal',
271 |                       use_bias=False, strides=strides)(x)
272 | 
273 |     x = Conv2D(filters * k, (3, 3), padding='same', kernel_initializer='he_normal',
274 |                use_bias=False, strides=strides)(x)
275 |     x = BatchNormalization(axis=channel_axis)(x)
276 |     x = Activation('relu')(x)
277 | 
278 |     x = Conv2D(filters * k, (3, 3), padding='same', kernel_initializer='he_normal',
279 |                use_bias=False)(x)
280 | 
281 |     # global context block
282 |     x = global_context_block(x)
283 | 
284 |     m = add([x, init])
285 |     return m
286 | 
287 | 
288 | def _resnet_bottleneck_block(input, filters, k=1, strides=(1, 1)):
289 |     ''' Adds a pre-activation resnet block with bottleneck layers
290 | 
291 |     Args:
292 |         input: input tensor
293 |         filters: number of output filters
294 |         k: width factor
295 |         strides: strides of the convolution layer
296 | 
297 |     Returns: a keras tensor
298 |     '''
299 |     init = input
300 |     channel_axis = 1 if K.image_data_format() == "channels_first" else -1
301 |     bottleneck_expand = 4
302 | 
303 |     x = BatchNormalization(axis=channel_axis)(input)
304 |     x = Activation('relu')(x)
305 | 
306 |     if strides != (1, 1) or init._keras_shape[channel_axis] != bottleneck_expand * filters * k:
307 |         init = Conv2D(bottleneck_expand * filters * k, (1, 1), padding='same', kernel_initializer='he_normal',
308 |                       use_bias=False, strides=strides)(x)
309 | 
310 |     x = Conv2D(filters * k, (1, 1), padding='same', kernel_initializer='he_normal',
311 |                use_bias=False)(x)
312 |     x = BatchNormalization(axis=channel_axis)(x)
313 |     x = Activation('relu')(x)
314 | 
315 |     x = Conv2D(filters * k, (3, 3), padding='same', kernel_initializer='he_normal',
316 |                use_bias=False, strides=strides)(x)
317 |     x = BatchNormalization(axis=channel_axis)(x)
318 |     x = Activation('relu')(x)
319 | 
320 |     x = Conv2D(bottleneck_expand * filters * k, (1, 1), padding='same', kernel_initializer='he_normal',
321 |                use_bias=False)(x)
322 | 
323 |     # global context block
324 |     x = global_context_block(x)
325 | 
326 |     m = add([x, init])
327 |     return m
328 | 
329 | 
330 | def _create_se_resnet(classes, img_input, include_top, initial_conv_filters, filters,
331 |                       depth, width, bottleneck, weight_decay, pooling):
332 |     '''Creates a SE ResNet model with specified parameters
333 |     Args:
334 |         initial_conv_filters: number of features for the initial convolution
335 |         include_top: Flag to include the last dense layer
336 |         filters: number of filters per block, defined as a list.
337 |             filters = [64, 128, 256, 512
338 |         depth: number or layers in the each block, defined as a list.
339 |             ResNet-50  = [3, 4, 6, 3]
340 |             ResNet-101 = [3, 6, 23, 3]
341 |             ResNet-152 = [3, 8, 36, 3]
342 |         width: width multiplier for network (for Wide ResNet)
343 |         bottleneck: adds a bottleneck conv to reduce computation
344 |         weight_decay: weight_decay (l2 norm)
345 |         pooling: Optional pooling mode for feature extraction
346 |             when `include_top` is `False`.
347 |             - `None` means that the output of the model will be
348 |                 the 4D tensor output of the
349 |                 last convolutional layer.
350 |             - `avg` means that global average pooling
351 |                 will be applied to the output of the
352 |                 last convolutional layer, and thus
353 |                 the output of the model will be a 2D tensor.
354 |             - `max` means that global max pooling will
355 |                 be applied.
356 |     Returns: a Keras Model
357 |     '''
358 |     channel_axis = 1 if K.image_data_format() == 'channels_first' else -1
359 |     N = list(depth)
360 | 
361 |     # block 1 (initial conv block)
362 |     x = Conv2D(initial_conv_filters, (7, 7), padding='same', use_bias=False, strides=(2, 2),
363 |                kernel_initializer='he_normal', kernel_regularizer=l2(weight_decay))(img_input)
364 | 
365 |     x = MaxPooling2D((3, 3), strides=(2, 2), padding='same')(x)
366 | 
367 |     # block 2 (projection block)
368 |     for i in range(N[0]):
369 |         if bottleneck:
370 |             x = _resnet_bottleneck_block(x, filters[0], width)
371 |         else:
372 |             x = _resnet_block(x, filters[0], width)
373 | 
374 |     # block 3 - N
375 |     for k in range(1, len(N)):
376 |         if bottleneck:
377 |             x = _resnet_bottleneck_block(x, filters[k], width, strides=(2, 2))
378 |         else:
379 |             x = _resnet_block(x, filters[k], width, strides=(2, 2))
380 | 
381 |         for i in range(N[k] - 1):
382 |             if bottleneck:
383 |                 x = _resnet_bottleneck_block(x, filters[k], width)
384 |             else:
385 |                 x = _resnet_block(x, filters[k], width)
386 | 
387 |     x = BatchNormalization(axis=channel_axis)(x)
388 |     x = Activation('relu')(x)
389 | 
390 |     if include_top:
391 |         x = GlobalAveragePooling2D()(x)
392 |         x = Dense(classes, use_bias=False, kernel_regularizer=l2(weight_decay),
393 |                   activation='softmax')(x)
394 |     else:
395 |         if pooling == 'avg':
396 |             x = GlobalAveragePooling2D()(x)
397 |         elif pooling == 'max':
398 |             x = GlobalMaxPooling2D()(x)
399 | 
400 |     return x
401 | 


--------------------------------------------------------------------------------
/group_norm.py:
--------------------------------------------------------------------------------
  1 | from keras.engine import Layer, InputSpec
  2 | from keras import initializers
  3 | from keras import regularizers
  4 | from keras import constraints
  5 | from keras import backend as K
  6 | 
  7 | from keras.utils.generic_utils import get_custom_objects
  8 | 
  9 | 
 10 | class GroupNormalization(Layer):
 11 |     """Group normalization layer
 12 |     Group Normalization divides the channels into groups and computes within each group
 13 |     the mean and variance for normalization. GN's computation is independent of batch sizes,
 14 |     and its accuracy is stable in a wide range of batch sizes
 15 |     # Arguments
 16 |         groups: Integer, the number of groups for Group Normalization.
 17 |         axis: Integer, the axis that should be normalized
 18 |             (typically the features axis).
 19 |             For instance, after a `Conv2D` layer with
 20 |             `data_format="channels_first"`,
 21 |             set `axis=1` in `BatchNormalization`.
 22 |         epsilon: Small float added to variance to avoid dividing by zero.
 23 |         center: If True, add offset of `beta` to normalized tensor.
 24 |             If False, `beta` is ignored.
 25 |         scale: If True, multiply by `gamma`.
 26 |             If False, `gamma` is not used.
 27 |             When the next layer is linear (also e.g. `nn.relu`),
 28 |             this can be disabled since the scaling
 29 |             will be done by the next layer.
 30 |         beta_initializer: Initializer for the beta weight.
 31 |         gamma_initializer: Initializer for the gamma weight.
 32 |         beta_regularizer: Optional regularizer for the beta weight.
 33 |         gamma_regularizer: Optional regularizer for the gamma weight.
 34 |         beta_constraint: Optional constraint for the beta weight.
 35 |         gamma_constraint: Optional constraint for the gamma weight.
 36 |     # Input shape
 37 |         Arbitrary. Use the keyword argument `input_shape`
 38 |         (tuple of integers, does not include the samples axis)
 39 |         when using this layer as the first layer in a model.
 40 |     # Output shape
 41 |         Same shape as input.
 42 |     # References
 43 |         - [Group Normalization](https://arxiv.org/abs/1803.08494)
 44 |     """
 45 | 
 46 |     def __init__(self,
 47 |                  groups=32,
 48 |                  axis=-1,
 49 |                  epsilon=1e-5,
 50 |                  center=True,
 51 |                  scale=True,
 52 |                  beta_initializer='zeros',
 53 |                  gamma_initializer='ones',
 54 |                  beta_regularizer=None,
 55 |                  gamma_regularizer=None,
 56 |                  beta_constraint=None,
 57 |                  gamma_constraint=None,
 58 |                  **kwargs):
 59 |         super(GroupNormalization, self).__init__(**kwargs)
 60 |         self.supports_masking = True
 61 |         self.groups = groups
 62 |         self.axis = axis
 63 |         self.epsilon = epsilon
 64 |         self.center = center
 65 |         self.scale = scale
 66 |         self.beta_initializer = initializers.get(beta_initializer)
 67 |         self.gamma_initializer = initializers.get(gamma_initializer)
 68 |         self.beta_regularizer = regularizers.get(beta_regularizer)
 69 |         self.gamma_regularizer = regularizers.get(gamma_regularizer)
 70 |         self.beta_constraint = constraints.get(beta_constraint)
 71 |         self.gamma_constraint = constraints.get(gamma_constraint)
 72 | 
 73 |     def build(self, input_shape):
 74 |         dim = input_shape[self.axis]
 75 | 
 76 |         if dim is None:
 77 |             raise ValueError('Axis ' + str(self.axis) + ' of '
 78 |                              'input tensor should have a defined dimension '
 79 |                              'but the layer received an input with shape ' +
 80 |                              str(input_shape) + '.')
 81 | 
 82 |         if dim < self.groups:
 83 |             raise ValueError('Number of groups (' + str(self.groups) + ') cannot be '
 84 |                              'more than the number of channels (' +
 85 |                              str(dim) + ').')
 86 | 
 87 |         if dim % self.groups != 0:
 88 |             raise ValueError('Number of groups (' + str(self.groups) + ') must be a '
 89 |                              'multiple of the number of channels (' +
 90 |                              str(dim) + ').')
 91 | 
 92 |         self.input_spec = InputSpec(ndim=len(input_shape),
 93 |                                     axes={self.axis: dim})
 94 |         shape = (dim,)
 95 | 
 96 |         if self.scale:
 97 |             self.gamma = self.add_weight(shape=shape,
 98 |                                          name='gamma',
 99 |                                          initializer=self.gamma_initializer,
100 |                                          regularizer=self.gamma_regularizer,
101 |                                          constraint=self.gamma_constraint)
102 |         else:
103 |             self.gamma = None
104 |         if self.center:
105 |             self.beta = self.add_weight(shape=shape,
106 |                                         name='beta',
107 |                                         initializer=self.beta_initializer,
108 |                                         regularizer=self.beta_regularizer,
109 |                                         constraint=self.beta_constraint)
110 |         else:
111 |             self.beta = None
112 |         self.built = True
113 | 
114 |     def call(self, inputs, **kwargs):
115 |         input_shape = K.int_shape(inputs)
116 |         tensor_input_shape = K.shape(inputs)
117 | 
118 |         # Prepare broadcasting shape.
119 |         reduction_axes = list(range(len(input_shape)))
120 |         del reduction_axes[self.axis]
121 |         broadcast_shape = [1] * len(input_shape)
122 |         broadcast_shape[self.axis] = input_shape[self.axis] // self.groups
123 |         broadcast_shape.insert(1, self.groups)
124 | 
125 |         reshape_group_shape = K.shape(inputs)
126 |         group_axes = [reshape_group_shape[i] for i in range(len(input_shape))]
127 |         group_axes[self.axis] = input_shape[self.axis] // self.groups
128 |         group_axes.insert(1, self.groups)
129 | 
130 |         # reshape inputs to new group shape
131 |         group_shape = [group_axes[0], self.groups] + group_axes[2:]
132 |         group_shape = K.stack(group_shape)
133 |         inputs = K.reshape(inputs, group_shape)
134 | 
135 |         group_reduction_axes = list(range(len(group_axes)))
136 |         group_reduction_axes = group_reduction_axes[2:]
137 | 
138 |         mean = K.mean(inputs, axis=group_reduction_axes, keepdims=True)
139 |         variance = K.var(inputs, axis=group_reduction_axes, keepdims=True)
140 | 
141 |         inputs = (inputs - mean) / (K.sqrt(variance + self.epsilon))
142 | 
143 |         # prepare broadcast shape
144 |         inputs = K.reshape(inputs, group_shape)
145 |         outputs = inputs
146 | 
147 |         # In this case we must explicitly broadcast all parameters.
148 |         if self.scale:
149 |             broadcast_gamma = K.reshape(self.gamma, broadcast_shape)
150 |             outputs = outputs * broadcast_gamma
151 | 
152 |         if self.center:
153 |             broadcast_beta = K.reshape(self.beta, broadcast_shape)
154 |             outputs = outputs + broadcast_beta
155 | 
156 |         outputs = K.reshape(outputs, tensor_input_shape)
157 | 
158 |         return outputs
159 | 
160 |     def get_config(self):
161 |         config = {
162 |             'groups': self.groups,
163 |             'axis': self.axis,
164 |             'epsilon': self.epsilon,
165 |             'center': self.center,
166 |             'scale': self.scale,
167 |             'beta_initializer': initializers.serialize(self.beta_initializer),
168 |             'gamma_initializer': initializers.serialize(self.gamma_initializer),
169 |             'beta_regularizer': regularizers.serialize(self.beta_regularizer),
170 |             'gamma_regularizer': regularizers.serialize(self.gamma_regularizer),
171 |             'beta_constraint': constraints.serialize(self.beta_constraint),
172 |             'gamma_constraint': constraints.serialize(self.gamma_constraint)
173 |         }
174 |         base_config = super(GroupNormalization, self).get_config()
175 |         return dict(list(base_config.items()) + list(config.items()))
176 | 
177 |     def compute_output_shape(self, input_shape):
178 |         return input_shape
179 | 
180 | 
181 | get_custom_objects().update({'GroupNormalization': GroupNormalization})
182 | 
183 | 
184 | if __name__ == '__main__':
185 |     from keras.layers import Input
186 |     from keras.models import Model
187 |     ip = Input(shape=(None, None, 4))
188 |     x = GroupNormalization(groups=2, axis=-1, epsilon=0.1)(ip)
189 |     model = Model(ip, x)
190 |     model.summary()
191 | 


--------------------------------------------------------------------------------
/images/gc.PNG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/titu1994/keras-global-context-networks/73f0a08f7c9e3a37e41054c0828e7ac3b87a22af/images/gc.PNG


--------------------------------------------------------------------------------