├── .gitignore ├── LICENSE ├── README.md ├── gc.py ├── gc_densenet.py ├── gc_inception_resnet_v2.py ├── gc_inception_v3.py ├── gc_mobilenets.py ├── gc_resnet.py ├── group_norm.py └── images └── gc.PNG /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | MANIFEST 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | .pytest_cache/ 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | local_settings.py 57 | db.sqlite3 58 | 59 | # Flask stuff: 60 | instance/ 61 | .webassets-cache 62 | 63 | # Scrapy stuff: 64 | .scrapy 65 | 66 | # Sphinx documentation 67 | docs/_build/ 68 | 69 | # PyBuilder 70 | target/ 71 | 72 | # Jupyter Notebook 73 | .ipynb_checkpoints 74 | 75 | # pyenv 76 | .python-version 77 | 78 | # celery beat schedule file 79 | celerybeat-schedule 80 | 81 | # SageMath parsed files 82 | *.sage.py 83 | 84 | # Environments 85 | .env 86 | .venv 87 | env/ 88 | venv/ 89 | ENV/ 90 | env.bak/ 91 | venv.bak/ 92 | 93 | # Spyder project settings 94 | .spyderproject 95 | .spyproject 96 | 97 | # Rope project settings 98 | .ropeproject 99 | 100 | # mkdocs documentation 101 | /site 102 | 103 | # mypy 104 | .mypy_cache/ 105 | 106 | # Pycharm 107 | .idea 108 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Somshubra Majumdar 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Keras Global Context Attention Blocks 2 | 3 | Keras implementation of the Global Context block from the paper [GCNet: Non-local Networks Meet Squeeze-Excitation Networks and Beyond](https://arxiv.org/abs/1904.11492). 4 | 5 | Supports Conv1D, Conv2D and Conv3D directly with no modifications. 6 | 7 | 8 | 9 | # Usage 10 | 11 | Import `global_context_block` from `gc.py` and provide it a tensor as input. 12 | 13 | ```python 14 | from gc import global_context_block 15 | 16 | ip = Input(...) 17 | x = ConvND(...)(ip) 18 | 19 | # apply Global Context 20 | x = global_context_block(x, reduction_ratio=16, transform_activation='linear') 21 | ... 22 | ``` 23 | 24 | # Parameters 25 | 26 | There are just two parameters to manage : 27 | ``` 28 | - reduction_ratio: The ratio to scale the transform block. 29 | - transform_activation: The activation function prior to addition of the input with the context. 30 | The paper uses no activation, but `sigmoid` may do better. 31 | ``` 32 | 33 | # Requirements 34 | - Keras 2.2.4+ 35 | - Tensorflow (1.13+) or CNTK 36 | -------------------------------------------------------------------------------- /gc.py: -------------------------------------------------------------------------------- 1 | from keras.layers import Conv1D, Conv2D, Conv3D 2 | from keras.layers import Reshape 3 | from keras.layers import Activation 4 | from keras.layers import Softmax 5 | from keras.layers import Permute 6 | from keras.layers import add, dot 7 | 8 | from keras import backend as K 9 | 10 | from group_norm import GroupNormalization 11 | 12 | 13 | def global_context_block(ip, reduction_ratio=16, transform_activation='linear'): 14 | """ 15 | Adds a Global Context attention block for self attention to the input tensor. 16 | Input tensor can be or rank 3 (temporal), 4 (spatial) or 5 (spatio-temporal). 17 | 18 | # Arguments: 19 | ip: input tensor 20 | intermediate_dim: The dimension of the intermediate representation. Can be 21 | `None` or a positive integer greater than 0. If `None`, computes the 22 | intermediate dimension as half of the input channel dimension. 23 | reduction_ratio: Reduces the input filters by this factor for the 24 | bottleneck block of the transform submodule. Node: the reduction 25 | ratio must be set such that it divides the input number of channels, 26 | transform_activation: activation function to apply to the output 27 | of the transform block. Can be any string activation function availahle 28 | to Keras. 29 | 30 | # Returns: 31 | a tensor of same shape as input 32 | """ 33 | channel_dim = 1 if K.image_data_format() == 'channels_first' else -1 34 | ip_shape = K.int_shape(ip) 35 | 36 | # check rank and calculate the input shape 37 | if len(ip_shape) == 3: # temporal / time series data 38 | rank = 3 39 | batchsize, dim1, channels = ip_shape 40 | 41 | elif len(ip_shape) == 4: # spatial / image data 42 | rank = 4 43 | 44 | if channel_dim == 1: 45 | batchsize, channels, dim1, dim2 = ip_shape 46 | else: 47 | batchsize, dim1, dim2, channels = ip_shape 48 | 49 | elif len(ip_shape) == 5: # spatio-temporal / Video or Voxel data 50 | rank = 5 51 | 52 | if channel_dim == 1: 53 | batchsize, channels, dim1, dim2, dim3 = ip_shape 54 | else: 55 | batchsize, dim1, dim2, dim3, channels = ip_shape 56 | 57 | else: 58 | raise ValueError('Input dimension has to be either 3 (temporal), 4 (spatial) or 5 (spatio-temporal)') 59 | 60 | if rank > 3: 61 | flat_spatial_dim = -1 if K.image_data_format() == 'channels_first' else 1 62 | else: 63 | flat_spatial_dim = 1 64 | 65 | """ Context Modelling Block """ 66 | # [B, ***, C] or [B, C, ***] 67 | input_flat = _spatial_flattenND(ip, rank) 68 | # [B, ..., C] or [B, C, ...] 69 | context = _convND(ip, rank, channels=1, kernel=1) 70 | # [B, ..., 1] or [B, 1, ...] 71 | context = _spatial_flattenND(context, rank) 72 | # [B, ***, 1] or [B, 1, ***] 73 | context = Softmax(axis=flat_spatial_dim)(context) 74 | 75 | # Compute context block outputs 76 | context = dot([input_flat, context], axes=flat_spatial_dim) 77 | # [B, C, 1] 78 | context = _spatial_expandND(context, rank) 79 | # [B, C, 1...] or [B, 1..., C] 80 | 81 | """ Transform block """ 82 | # Transform bottleneck 83 | # [B, C // R, 1...] or [B, 1..., C // R] 84 | transform = _convND(context, rank, channels // reduction_ratio, kernel=1) 85 | # Group normalization acts as Layer Normalization when groups = 1 86 | transform = GroupNormalization(groups=1, axis=channel_dim)(transform) 87 | transform = Activation('relu')(transform) 88 | 89 | # Transform output block 90 | # [B, C, 1...] or [B, 1..., C] 91 | transform = _convND(transform, rank, channels, kernel=1) 92 | transform = Activation(transform_activation)(transform) 93 | 94 | # apply context transform 95 | out = add([ip, transform]) 96 | 97 | return out 98 | 99 | 100 | def _convND(ip, rank, channels, kernel=1): 101 | assert rank in [3, 4, 5], "Rank of input must be 3, 4 or 5" 102 | 103 | if rank == 3: 104 | x = Conv1D(channels, kernel, padding='same', use_bias=False, kernel_initializer='he_normal')(ip) 105 | elif rank == 4: 106 | x = Conv2D(channels, (kernel, kernel), padding='same', use_bias=False, kernel_initializer='he_normal')(ip) 107 | else: 108 | x = Conv3D(channels, (kernel, kernel, kernel), padding='same', use_bias=False, kernel_initializer='he_normal')(ip) 109 | 110 | return x 111 | 112 | 113 | def _spatial_flattenND(ip, rank): 114 | assert rank in [3, 4, 5], "Rank of input must be 3, 4 or 5" 115 | 116 | ip_shape = K.int_shape(ip) 117 | channel_dim = 1 if K.image_data_format() == 'channels_first' else -1 118 | 119 | if rank == 3: 120 | x = ip # identity op for rank 3 121 | 122 | elif rank == 4: 123 | if channel_dim == 1: 124 | # [C, D1, D2] -> [C, D1 * D2] 125 | shape = [ip_shape[1], ip_shape[2] * ip_shape[3]] 126 | else: 127 | # [D1, D2, C] -> [D1 * D2, C] 128 | shape = [ip_shape[1] * ip_shape[2], ip_shape[3]] 129 | 130 | x = Reshape(shape)(ip) 131 | 132 | else: 133 | if channel_dim == 1: 134 | # [C, D1, D2, D3] -> [C, D1 * D2 * D3] 135 | shape = [ip_shape[1], ip_shape[2] * ip_shape[3] * ip_shape[4]] 136 | else: 137 | # [D1, D2, D3, C] -> [D1 * D2 * D3, C] 138 | shape = [ip_shape[1] * ip_shape[2] * ip_shape[3], ip_shape[4]] 139 | 140 | x = Reshape(shape)(ip) 141 | 142 | return x 143 | 144 | 145 | def _spatial_expandND(ip, rank): 146 | assert rank in [3, 4, 5], "Rank of input must be 3, 4 or 5" 147 | 148 | channel_dim = 1 if K.image_data_format() == 'channels_first' else -1 149 | 150 | if rank == 3: 151 | x = Permute((2, 1))(ip) # identity op for rank 3 152 | 153 | elif rank == 4: 154 | if channel_dim == 1: 155 | # [C, D1, D2] -> [C, D1 * D2] 156 | shape = [-1, 1, 1] 157 | else: 158 | # [D1, D2, C] -> [D1 * D2, C] 159 | shape = [1, 1, -1] 160 | 161 | x = Reshape(shape)(ip) 162 | 163 | else: 164 | if channel_dim == 1: 165 | # [C, D1, D2, D3] -> [C, D1 * D2 * D3] 166 | shape = [-1, 1, 1, 1] 167 | else: 168 | # [D1, D2, D3, C] -> [D1 * D2 * D3, C] 169 | shape = [1, 1, 1, -1] 170 | 171 | x = Reshape(shape)(ip) 172 | 173 | return x 174 | 175 | 176 | if __name__ == '__main__': 177 | from keras.layers import Input 178 | from keras.models import Model 179 | 180 | ip = Input(shape=(64, 64, 32)) 181 | x = global_context_block(ip, reduction_ratio=16) 182 | 183 | model = Model(ip, x) 184 | 185 | model.summary() 186 | -------------------------------------------------------------------------------- /gc_densenet.py: -------------------------------------------------------------------------------- 1 | '''DenseNet models for Keras. 2 | # Reference 3 | - [Densely Connected Convolutional Networks](https://arxiv.org/pdf/1608.06993.pdf) 4 | - [The One Hundred Layers Tiramisu: Fully Convolutional DenseNets for Semantic Segmentation](https://arxiv.org/pdf/1611.09326.pdf) 5 | ''' 6 | from __future__ import print_function 7 | from __future__ import absolute_import 8 | from __future__ import division 9 | 10 | import warnings 11 | 12 | from keras.models import Model 13 | from keras.layers.core import Dense, Dropout, Activation, Reshape 14 | from keras.layers.convolutional import Conv2D, Conv2DTranspose, UpSampling2D 15 | from keras.layers.pooling import AveragePooling2D, MaxPooling2D 16 | from keras.layers.pooling import GlobalAveragePooling2D 17 | from keras.layers import Input 18 | from keras.layers.merge import concatenate 19 | from keras.layers.normalization import BatchNormalization 20 | from keras.regularizers import l2 21 | from keras.utils.layer_utils import convert_all_kernels_in_model, convert_dense_weights_data_format 22 | from keras.utils.data_utils import get_file 23 | from keras.engine.topology import get_source_inputs 24 | from keras_applications.imagenet_utils import _obtain_input_shape 25 | from keras_applications.imagenet_utils import decode_predictions 26 | import keras.backend as K 27 | 28 | from gc import global_context_block 29 | 30 | 31 | def preprocess_input(x, data_format=None): 32 | """Preprocesses a tensor encoding a batch of images. 33 | 34 | # Arguments 35 | x: input Numpy tensor, 4D. 36 | data_format: data format of the image tensor. 37 | 38 | # Returns 39 | Preprocessed tensor. 40 | """ 41 | if data_format is None: 42 | data_format = K.image_data_format() 43 | assert data_format in {'channels_last', 'channels_first'} 44 | 45 | if data_format == 'channels_first': 46 | if x.ndim == 3: 47 | # 'RGB'->'BGR' 48 | x = x[::-1, ...] 49 | # Zero-center by mean pixel 50 | x[0, :, :] -= 103.939 51 | x[1, :, :] -= 116.779 52 | x[2, :, :] -= 123.68 53 | else: 54 | x = x[:, ::-1, ...] 55 | x[:, 0, :, :] -= 103.939 56 | x[:, 1, :, :] -= 116.779 57 | x[:, 2, :, :] -= 123.68 58 | else: 59 | # 'RGB'->'BGR' 60 | x = x[..., ::-1] 61 | # Zero-center by mean pixel 62 | x[..., 0] -= 103.939 63 | x[..., 1] -= 116.779 64 | x[..., 2] -= 123.68 65 | 66 | x *= 0.017 # scale values 67 | 68 | return x 69 | 70 | 71 | def GCDenseNet(input_shape=None, 72 | depth=40, 73 | nb_dense_block=3, 74 | growth_rate=12, 75 | nb_filter=-1, 76 | nb_layers_per_block=-1, 77 | bottleneck=False, 78 | reduction=0.0, 79 | dropout_rate=0.0, 80 | weight_decay=1e-4, 81 | subsample_initial_block=False, 82 | include_top=True, 83 | weights=None, 84 | input_tensor=None, 85 | classes=10, 86 | activation='softmax'): 87 | '''Instantiate the GC DenseNet architecture 88 | # Arguments 89 | input_shape: optional shape tuple, only to be specified 90 | if `include_top` is False (otherwise the input shape 91 | has to be `(32, 32, 3)` (with `channels_last` dim ordering) 92 | or `(3, 32, 32)` (with `channels_first` dim ordering). 93 | It should have exactly 3 inputs channels, 94 | and width and height should be no smaller than 8. 95 | E.g. `(200, 200, 3)` would be one valid value. 96 | depth: number or layers in the DenseNet 97 | nb_dense_block: number of dense blocks to add to end (generally = 3) 98 | growth_rate: number of filters to add per dense block 99 | nb_filter: initial number of filters. -1 indicates initial 100 | number of filters is 2 * growth_rate 101 | nb_layers_per_block: number of layers in each dense block. 102 | Can be a -1, positive integer or a list. 103 | If -1, calculates nb_layer_per_block from the network depth. 104 | If positive integer, a set number of layers per dense block. 105 | If list, nb_layer is used as provided. Note that list size must 106 | be (nb_dense_block + 1) 107 | bottleneck: flag to add bottleneck blocks in between dense blocks 108 | reduction: reduction factor of transition blocks. 109 | Note : reduction value is inverted to compute compression. 110 | dropout_rate: dropout rate 111 | weight_decay: weight decay rate 112 | subsample_initial_block: Set to True to subsample the initial convolution and 113 | add a MaxPool2D before the dense blocks are added. 114 | include_top: whether to include the fully-connected 115 | layer at the top of the network. 116 | weights: one of `None` (random initialization) or 117 | 'imagenet' (pre-training on ImageNet).. 118 | input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) 119 | to use as image input for the model. 120 | classes: optional number of classes to classify images 121 | into, only to be specified if `include_top` is True, and 122 | if no `weights` argument is specified. 123 | activation: Type of activation at the top layer. Can be one of 'softmax' or 'sigmoid'. 124 | Note that if sigmoid is used, classes must be 1. 125 | # Returns 126 | A Keras model instance. 127 | ''' 128 | 129 | if weights not in {'imagenet', None}: 130 | raise ValueError('The `weights` argument should be either ' 131 | '`None` (random initialization) or `cifar10` ' 132 | '(pre-training on CIFAR-10).') 133 | 134 | if weights == 'imagenet' and include_top and classes != 1000: 135 | raise ValueError('If using `weights` as ImageNet with `include_top`' 136 | ' as true, `classes` should be 1000') 137 | 138 | if activation not in ['softmax', 'sigmoid']: 139 | raise ValueError('activation must be one of "softmax" or "sigmoid"') 140 | 141 | if activation == 'sigmoid' and classes != 1: 142 | raise ValueError('sigmoid activation can only be used when classes = 1') 143 | 144 | # Determine proper input shape 145 | input_shape = _obtain_input_shape(input_shape, 146 | default_size=32, 147 | min_size=8, 148 | data_format=K.image_data_format(), 149 | require_flatten=include_top) 150 | 151 | if input_tensor is None: 152 | img_input = Input(shape=input_shape) 153 | else: 154 | if not K.is_keras_tensor(input_tensor): 155 | img_input = Input(tensor=input_tensor, shape=input_shape) 156 | else: 157 | img_input = input_tensor 158 | 159 | x = __create_dense_net(classes, img_input, include_top, depth, nb_dense_block, 160 | growth_rate, nb_filter, nb_layers_per_block, bottleneck, reduction, 161 | dropout_rate, weight_decay, subsample_initial_block, activation) 162 | 163 | # Ensure that the model takes into account 164 | # any potential predecessors of `input_tensor`. 165 | if input_tensor is not None: 166 | inputs = get_source_inputs(input_tensor) 167 | else: 168 | inputs = img_input 169 | # Create model. 170 | model = Model(inputs, x, name='se-densenet') 171 | 172 | return model 173 | 174 | 175 | def GCDenseNetImageNet121(input_shape=None, 176 | bottleneck=True, 177 | reduction=0.5, 178 | dropout_rate=0.0, 179 | weight_decay=1e-4, 180 | include_top=True, 181 | weights=None, 182 | input_tensor=None, 183 | classes=1000, 184 | activation='softmax'): 185 | return GCDenseNet(input_shape, depth=121, nb_dense_block=4, growth_rate=32, nb_filter=64, 186 | nb_layers_per_block=[6, 12, 24, 16], bottleneck=bottleneck, reduction=reduction, 187 | dropout_rate=dropout_rate, weight_decay=weight_decay, subsample_initial_block=True, 188 | include_top=include_top, weights=weights, input_tensor=input_tensor, 189 | classes=classes, activation=activation) 190 | 191 | 192 | def GCDenseNetImageNet169(input_shape=None, 193 | bottleneck=True, 194 | reduction=0.5, 195 | dropout_rate=0.0, 196 | weight_decay=1e-4, 197 | include_top=True, 198 | weights=None, 199 | input_tensor=None, 200 | classes=1000, 201 | activation='softmax'): 202 | return GCDenseNet(input_shape, depth=169, nb_dense_block=4, growth_rate=32, nb_filter=64, 203 | nb_layers_per_block=[6, 12, 32, 32], bottleneck=bottleneck, reduction=reduction, 204 | dropout_rate=dropout_rate, weight_decay=weight_decay, subsample_initial_block=True, 205 | include_top=include_top, weights=weights, input_tensor=input_tensor, 206 | classes=classes, activation=activation) 207 | 208 | 209 | def GCDenseNetImageNet201(input_shape=None, 210 | bottleneck=True, 211 | reduction=0.5, 212 | dropout_rate=0.0, 213 | weight_decay=1e-4, 214 | include_top=True, 215 | weights=None, 216 | input_tensor=None, 217 | classes=1000, 218 | activation='softmax'): 219 | return GCDenseNet(input_shape, depth=201, nb_dense_block=4, growth_rate=32, nb_filter=64, 220 | nb_layers_per_block=[6, 12, 48, 32], bottleneck=bottleneck, reduction=reduction, 221 | dropout_rate=dropout_rate, weight_decay=weight_decay, subsample_initial_block=True, 222 | include_top=include_top, weights=weights, input_tensor=input_tensor, 223 | classes=classes, activation=activation) 224 | 225 | 226 | def GCDenseNetImageNet264(input_shape=None, 227 | bottleneck=True, 228 | reduction=0.5, 229 | dropout_rate=0.0, 230 | weight_decay=1e-4, 231 | include_top=True, 232 | weights=None, 233 | input_tensor=None, 234 | classes=1000, 235 | activation='softmax'): 236 | return GCDenseNet(input_shape, depth=201, nb_dense_block=4, growth_rate=32, nb_filter=64, 237 | nb_layers_per_block=[6, 12, 64, 48], bottleneck=bottleneck, reduction=reduction, 238 | dropout_rate=dropout_rate, weight_decay=weight_decay, subsample_initial_block=True, 239 | include_top=include_top, weights=weights, input_tensor=input_tensor, 240 | classes=classes, activation=activation) 241 | 242 | 243 | def GCDenseNetImageNet161(input_shape=None, 244 | bottleneck=True, 245 | reduction=0.5, 246 | dropout_rate=0.0, 247 | weight_decay=1e-4, 248 | include_top=True, 249 | weights=None, 250 | input_tensor=None, 251 | classes=1000, 252 | activation='softmax'): 253 | return GCDenseNet(input_shape, depth=161, nb_dense_block=4, growth_rate=48, nb_filter=96, 254 | nb_layers_per_block=[6, 12, 36, 24], bottleneck=bottleneck, reduction=reduction, 255 | dropout_rate=dropout_rate, weight_decay=weight_decay, subsample_initial_block=True, 256 | include_top=include_top, weights=weights, input_tensor=input_tensor, 257 | classes=classes, activation=activation) 258 | 259 | 260 | def __conv_block(ip, nb_filter, bottleneck=False, dropout_rate=None, weight_decay=1e-4): 261 | ''' Apply BatchNorm, Relu, 3x3 Conv2D, optional bottleneck block and dropout 262 | Args: 263 | ip: Input keras tensor 264 | nb_filter: number of filters 265 | bottleneck: add bottleneck block 266 | dropout_rate: dropout rate 267 | weight_decay: weight decay factor 268 | Returns: keras tensor with batch_norm, relu and convolution2d added (optional bottleneck) 269 | ''' 270 | concat_axis = 1 if K.image_data_format() == 'channels_first' else -1 271 | 272 | x = BatchNormalization(axis=concat_axis, epsilon=1.1e-5)(ip) 273 | x = Activation('relu')(x) 274 | 275 | if bottleneck: 276 | inter_channel = nb_filter * 4 # Obtained from https://github.com/liuzhuang13/DenseNet/blob/master/densenet.lua 277 | 278 | x = Conv2D(inter_channel, (1, 1), kernel_initializer='he_normal', padding='same', use_bias=False, 279 | kernel_regularizer=l2(weight_decay))(x) 280 | x = BatchNormalization(axis=concat_axis, epsilon=1.1e-5)(x) 281 | x = Activation('relu')(x) 282 | 283 | x = Conv2D(nb_filter, (3, 3), kernel_initializer='he_normal', padding='same', use_bias=False)(x) 284 | if dropout_rate: 285 | x = Dropout(dropout_rate)(x) 286 | 287 | return x 288 | 289 | 290 | def __dense_block(x, nb_layers, nb_filter, growth_rate, bottleneck=False, dropout_rate=None, weight_decay=1e-4, 291 | grow_nb_filters=True, return_concat_list=False): 292 | ''' Build a dense_block where the output of each conv_block is fed to subsequent ones 293 | Args: 294 | x: keras tensor 295 | nb_layers: the number of layers of conv_block to append to the model. 296 | nb_filter: number of filters 297 | growth_rate: growth rate 298 | bottleneck: bottleneck block 299 | dropout_rate: dropout rate 300 | weight_decay: weight decay factor 301 | grow_nb_filters: flag to decide to allow number of filters to grow 302 | return_concat_list: return the list of feature maps along with the actual output 303 | Returns: keras tensor with nb_layers of conv_block appended 304 | ''' 305 | concat_axis = 1 if K.image_data_format() == 'channels_first' else -1 306 | 307 | x_list = [x] 308 | 309 | for i in range(nb_layers): 310 | cb = __conv_block(x, growth_rate, bottleneck, dropout_rate, weight_decay) 311 | x_list.append(cb) 312 | 313 | x = concatenate([x, cb], axis=concat_axis) 314 | 315 | if grow_nb_filters: 316 | nb_filter += growth_rate 317 | 318 | # global context block 319 | x = global_context_block(x) 320 | 321 | if return_concat_list: 322 | return x, nb_filter, x_list 323 | else: 324 | return x, nb_filter 325 | 326 | 327 | def __transition_block(ip, nb_filter, compression=1.0, weight_decay=1e-4): 328 | ''' Apply BatchNorm, Relu 1x1, Conv2D, optional compression, dropout and Maxpooling2D 329 | Args: 330 | ip: keras tensor 331 | nb_filter: number of filters 332 | compression: calculated as 1 - reduction. Reduces the number of feature maps 333 | in the transition block. 334 | dropout_rate: dropout rate 335 | weight_decay: weight decay factor 336 | Returns: keras tensor, after applying batch_norm, relu-conv, dropout, maxpool 337 | ''' 338 | concat_axis = 1 if K.image_data_format() == 'channels_first' else -1 339 | 340 | x = BatchNormalization(axis=concat_axis, epsilon=1.1e-5)(ip) 341 | x = Activation('relu')(x) 342 | x = Conv2D(int(nb_filter * compression), (1, 1), kernel_initializer='he_normal', padding='same', use_bias=False, 343 | kernel_regularizer=l2(weight_decay))(x) 344 | x = AveragePooling2D((2, 2), strides=(2, 2))(x) 345 | 346 | # global context block 347 | x = global_context_block(x) 348 | 349 | return x 350 | 351 | 352 | def __create_dense_net(nb_classes, img_input, include_top, depth=40, nb_dense_block=3, growth_rate=12, nb_filter=-1, 353 | nb_layers_per_block=-1, bottleneck=False, reduction=0.0, dropout_rate=None, weight_decay=1e-4, 354 | subsample_initial_block=False, activation='softmax'): 355 | ''' Build the DenseNet model 356 | Args: 357 | nb_classes: number of classes 358 | img_input: tuple of shape (channels, rows, columns) or (rows, columns, channels) 359 | include_top: flag to include the final Dense layer 360 | depth: number or layers 361 | nb_dense_block: number of dense blocks to add to end (generally = 3) 362 | growth_rate: number of filters to add per dense block 363 | nb_filter: initial number of filters. Default -1 indicates initial number of filters is 2 * growth_rate 364 | nb_layers_per_block: number of layers in each dense block. 365 | Can be a -1, positive integer or a list. 366 | If -1, calculates nb_layer_per_block from the depth of the network. 367 | If positive integer, a set number of layers per dense block. 368 | If list, nb_layer is used as provided. Note that list size must 369 | be (nb_dense_block + 1) 370 | bottleneck: add bottleneck blocks 371 | reduction: reduction factor of transition blocks. Note : reduction value is inverted to compute compression 372 | dropout_rate: dropout rate 373 | weight_decay: weight decay rate 374 | subsample_initial_block: Set to True to subsample the initial convolution and 375 | add a MaxPool2D before the dense blocks are added. 376 | subsample_initial: 377 | activation: Type of activation at the top layer. Can be one of 'softmax' or 'sigmoid'. 378 | Note that if sigmoid is used, classes must be 1. 379 | Returns: keras tensor with nb_layers of conv_block appended 380 | ''' 381 | 382 | concat_axis = 1 if K.image_data_format() == 'channels_first' else -1 383 | 384 | if reduction != 0.0: 385 | assert reduction <= 1.0 and reduction > 0.0, 'reduction value must lie between 0.0 and 1.0' 386 | 387 | # layers in each dense block 388 | if type(nb_layers_per_block) is list or type(nb_layers_per_block) is tuple: 389 | nb_layers = list(nb_layers_per_block) # Convert tuple to list 390 | 391 | assert len(nb_layers) == (nb_dense_block), 'If list, nb_layer is used as provided. ' \ 392 | 'Note that list size must be (nb_dense_block)' 393 | final_nb_layer = nb_layers[-1] 394 | nb_layers = nb_layers[:-1] 395 | else: 396 | if nb_layers_per_block == -1: 397 | assert (depth - 4) % 3 == 0, 'Depth must be 3 N + 4 if nb_layers_per_block == -1' 398 | count = int((depth - 4) / 3) 399 | nb_layers = [count for _ in range(nb_dense_block)] 400 | final_nb_layer = count 401 | else: 402 | final_nb_layer = nb_layers_per_block 403 | nb_layers = [nb_layers_per_block] * nb_dense_block 404 | 405 | # compute initial nb_filter if -1, else accept users initial nb_filter 406 | if nb_filter <= 0: 407 | nb_filter = 2 * growth_rate 408 | 409 | # compute compression factor 410 | compression = 1.0 - reduction 411 | 412 | # Initial convolution 413 | if subsample_initial_block: 414 | initial_kernel = (7, 7) 415 | initial_strides = (2, 2) 416 | else: 417 | initial_kernel = (3, 3) 418 | initial_strides = (1, 1) 419 | 420 | x = Conv2D(nb_filter, initial_kernel, kernel_initializer='he_normal', padding='same', 421 | strides=initial_strides, use_bias=False, kernel_regularizer=l2(weight_decay))(img_input) 422 | 423 | if subsample_initial_block: 424 | x = BatchNormalization(axis=concat_axis, epsilon=1.1e-5)(x) 425 | x = Activation('relu')(x) 426 | x = MaxPooling2D((3, 3), strides=(2, 2), padding='same')(x) 427 | 428 | # Add dense blocks 429 | for block_idx in range(nb_dense_block - 1): 430 | x, nb_filter = __dense_block(x, nb_layers[block_idx], nb_filter, growth_rate, bottleneck=bottleneck, 431 | dropout_rate=dropout_rate, weight_decay=weight_decay) 432 | # add transition_block 433 | x = __transition_block(x, nb_filter, compression=compression, weight_decay=weight_decay) 434 | nb_filter = int(nb_filter * compression) 435 | 436 | # The last dense_block does not have a transition_block 437 | x, nb_filter = __dense_block(x, final_nb_layer, nb_filter, growth_rate, bottleneck=bottleneck, 438 | dropout_rate=dropout_rate, weight_decay=weight_decay) 439 | 440 | x = BatchNormalization(axis=concat_axis, epsilon=1.1e-5)(x) 441 | x = Activation('relu')(x) 442 | x = GlobalAveragePooling2D()(x) 443 | 444 | if include_top: 445 | x = Dense(nb_classes, activation=activation)(x) 446 | 447 | return x 448 | -------------------------------------------------------------------------------- /gc_inception_resnet_v2.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """Se Inception-ResNet V2 model for Keras. 3 | 4 | Model naming and structure follows TF-slim implementation (which has some additional 5 | layers and different number of filters from the original arXiv paper): 6 | https://github.com/tensorflow/models/blob/master/slim/nets/inception_resnet_v2.py 7 | Pre-trained ImageNet weights are also converted from TF-slim, which can be found in: 8 | https://github.com/tensorflow/models/tree/master/slim#pre-trained-models 9 | 10 | Original code from Keras applications 11 | 12 | # Reference 13 | - [Inception-v4, Inception-ResNet and the Impact of 14 | Residual Connections on Learning](https://arxiv.org/abs/1602.07261) 15 | """ 16 | from __future__ import print_function 17 | from __future__ import absolute_import 18 | 19 | import warnings 20 | 21 | from keras.models import Model 22 | from keras.layers import Activation 23 | from keras.layers import AveragePooling2D 24 | from keras.layers import BatchNormalization 25 | from keras.layers import Concatenate 26 | from keras.layers import Conv2D 27 | from keras.layers import Dense 28 | from keras.layers import GlobalAveragePooling2D 29 | from keras.layers import GlobalMaxPooling2D 30 | from keras.layers import Input 31 | from keras.layers import Lambda 32 | from keras.layers import MaxPooling2D 33 | from keras.utils.data_utils import get_file 34 | from keras.engine.topology import get_source_inputs 35 | from keras_applications import imagenet_utils 36 | from keras_applications.imagenet_utils import _obtain_input_shape 37 | from keras.applications.imagenet_utils import decode_predictions 38 | from keras import backend as K 39 | 40 | from gc import global_context_block 41 | 42 | 43 | def preprocess_input(x): 44 | """Preprocesses a numpy array encoding a batch of images. 45 | # Arguments 46 | x: a 4D numpy array consists of RGB values within [0, 255]. 47 | # Returns 48 | Preprocessed array. 49 | """ 50 | return imagenet_utils.preprocess_input(x, mode='tf') 51 | 52 | 53 | def conv2d_bn(x, 54 | filters, 55 | kernel_size, 56 | strides=1, 57 | padding='same', 58 | activation='relu', 59 | use_bias=False, 60 | name=None): 61 | """Utility function to apply conv + BN. 62 | # Arguments 63 | x: input tensor. 64 | filters: filters in `Conv2D`. 65 | kernel_size: kernel size as in `Conv2D`. 66 | padding: padding mode in `Conv2D`. 67 | activation: activation in `Conv2D`. 68 | strides: strides in `Conv2D`. 69 | name: name of the ops; will become `name + '_ac'` for the activation 70 | and `name + '_bn'` for the batch norm layer. 71 | # Returns 72 | Output tensor after applying `Conv2D` and `BatchNormalization`. 73 | """ 74 | x = Conv2D(filters, 75 | kernel_size, 76 | strides=strides, 77 | padding=padding, 78 | use_bias=use_bias, 79 | name=name)(x) 80 | if not use_bias: 81 | bn_axis = 1 if K.image_data_format() == 'channels_first' else 3 82 | bn_name = None if name is None else name + '_bn' 83 | x = BatchNormalization(axis=bn_axis, scale=False, name=bn_name)(x) 84 | if activation is not None: 85 | ac_name = None if name is None else name + '_ac' 86 | x = Activation(activation, name=ac_name)(x) 87 | return x 88 | 89 | 90 | def inception_resnet_block(x, scale, block_type, block_idx, activation='relu'): 91 | """Adds a Inception-ResNet block with Global Context block at the end. 92 | This function builds 3 types of Inception-ResNet blocks mentioned 93 | in the paper, controlled by the `block_type` argument (which is the 94 | block name used in the official TF-slim implementation): 95 | - Inception-ResNet-A: `block_type='block35'` 96 | - Inception-ResNet-B: `block_type='block17'` 97 | - Inception-ResNet-C: `block_type='block8'` 98 | # Arguments 99 | x: input tensor. 100 | scale: scaling factor to scale the residuals (i.e., the output of 101 | passing `x` through an inception module) before adding them 102 | to the shortcut branch. Let `r` be the output from the residual branch, 103 | the output of this block will be `x + scale * r`. 104 | block_type: `'block35'`, `'block17'` or `'block8'`, determines 105 | the network structure in the residual branch. 106 | block_idx: an `int` used for generating layer names. The Inception-ResNet blocks 107 | are repeated many times in this network. We use `block_idx` to identify 108 | each of the repetitions. For example, the first Inception-ResNet-A block 109 | will have `block_type='block35', block_idx=0`, ane the layer names will have 110 | a common prefix `'block35_0'`. 111 | activation: activation function to use at the end of the block 112 | (see [activations](../activations.md)). 113 | When `activation=None`, no activation is applied 114 | (i.e., "linear" activation: `a(x) = x`). 115 | # Returns 116 | Output tensor for the block. 117 | # Raises 118 | ValueError: if `block_type` is not one of `'block35'`, 119 | `'block17'` or `'block8'`. 120 | """ 121 | if block_type == 'block35': 122 | branch_0 = conv2d_bn(x, 32, 1) 123 | branch_1 = conv2d_bn(x, 32, 1) 124 | branch_1 = conv2d_bn(branch_1, 32, 3) 125 | branch_2 = conv2d_bn(x, 32, 1) 126 | branch_2 = conv2d_bn(branch_2, 48, 3) 127 | branch_2 = conv2d_bn(branch_2, 64, 3) 128 | branches = [branch_0, branch_1, branch_2] 129 | elif block_type == 'block17': 130 | branch_0 = conv2d_bn(x, 192, 1) 131 | branch_1 = conv2d_bn(x, 128, 1) 132 | branch_1 = conv2d_bn(branch_1, 160, [1, 7]) 133 | branch_1 = conv2d_bn(branch_1, 192, [7, 1]) 134 | branches = [branch_0, branch_1] 135 | elif block_type == 'block8': 136 | branch_0 = conv2d_bn(x, 192, 1) 137 | branch_1 = conv2d_bn(x, 192, 1) 138 | branch_1 = conv2d_bn(branch_1, 224, [1, 3]) 139 | branch_1 = conv2d_bn(branch_1, 256, [3, 1]) 140 | branches = [branch_0, branch_1] 141 | else: 142 | raise ValueError('Unknown Inception-ResNet block type. ' 143 | 'Expects "block35", "block17" or "block8", ' 144 | 'but got: ' + str(block_type)) 145 | 146 | block_name = block_type + '_' + str(block_idx) 147 | channel_axis = 1 if K.image_data_format() == 'channels_first' else 3 148 | mixed = Concatenate(axis=channel_axis, name=block_name + '_mixed')(branches) 149 | up = conv2d_bn(mixed, 150 | K.int_shape(x)[channel_axis], 151 | 1, 152 | activation=None, 153 | use_bias=True, 154 | name=block_name + '_conv') 155 | 156 | x = Lambda(lambda inputs, scale: inputs[0] + inputs[1] * scale, 157 | output_shape=K.int_shape(x)[1:], 158 | arguments={'scale': scale}, 159 | name=block_name)([x, up]) 160 | if activation is not None: 161 | x = Activation(activation, name=block_name + '_ac')(x) 162 | 163 | # global context block 164 | x = global_context_block(x) 165 | return x 166 | 167 | 168 | def SEInceptionResNetV2(include_top=True, 169 | weights=None, 170 | input_tensor=None, 171 | input_shape=None, 172 | pooling=None, 173 | classes=1000): 174 | """Instantiates the SE-Inception-ResNet v2 architecture. 175 | Optionally loads weights pre-trained on ImageNet. 176 | Note that when using TensorFlow, for best performance you should 177 | set `"image_data_format": "channels_last"` in your Keras config 178 | at `~/.keras/keras.json`. 179 | The model and the weights are compatible with both TensorFlow and Theano 180 | backends (but not CNTK). The data format convention used by the model is 181 | the one specified in your Keras config file. 182 | Note that the default input image size for this model is 299x299, instead 183 | of 224x224 as in the VGG16 and ResNet models. Also, the input preprocessing 184 | function is different (i.e., do not use `imagenet_utils.preprocess_input()` 185 | with this model. Use `preprocess_input()` defined in this module instead). 186 | # Arguments 187 | include_top: whether to include the fully-connected 188 | layer at the top of the network. 189 | weights: one of `None` (random initialization) 190 | or `'imagenet'` (pre-training on ImageNet). 191 | input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) 192 | to use as image input for the model. 193 | input_shape: optional shape tuple, only to be specified 194 | if `include_top` is `False` (otherwise the input shape 195 | has to be `(299, 299, 3)` (with `'channels_last'` data format) 196 | or `(3, 299, 299)` (with `'channels_first'` data format). 197 | It should have exactly 3 inputs channels, 198 | and width and height should be no smaller than 139. 199 | E.g. `(150, 150, 3)` would be one valid value. 200 | pooling: Optional pooling mode for feature extraction 201 | when `include_top` is `False`. 202 | - `None` means that the output of the model will be 203 | the 4D tensor output of the last convolutional layer. 204 | - `'avg'` means that global average pooling 205 | will be applied to the output of the 206 | last convolutional layer, and thus 207 | the output of the model will be a 2D tensor. 208 | - `'max'` means that global max pooling will be applied. 209 | classes: optional number of classes to classify images 210 | into, only to be specified if `include_top` is `True`, and 211 | if no `weights` argument is specified. 212 | # Returns 213 | A Keras `Model` instance. 214 | # Raises 215 | ValueError: in case of invalid argument for `weights`, 216 | or invalid input shape. 217 | RuntimeError: If attempting to run this model with an unsupported backend. 218 | """ 219 | if K.backend() in {'cntk'}: 220 | raise RuntimeError(K.backend() + ' backend is currently unsupported for this model.') 221 | 222 | if weights not in {'imagenet', None}: 223 | raise ValueError('The `weights` argument should be either ' 224 | '`None` (random initialization) or `imagenet` ' 225 | '(pre-training on ImageNet).') 226 | 227 | if weights == 'imagenet' and include_top and classes != 1000: 228 | raise ValueError('If using `weights` as imagenet with `include_top`' 229 | ' as true, `classes` should be 1000') 230 | 231 | # Determine proper input shape 232 | input_shape = _obtain_input_shape( 233 | input_shape, 234 | default_size=299, 235 | min_size=139, 236 | data_format=K.image_data_format(), 237 | require_flatten=False, 238 | weights=weights) 239 | 240 | if input_tensor is None: 241 | img_input = Input(shape=input_shape) 242 | else: 243 | if not K.is_keras_tensor(input_tensor): 244 | img_input = Input(tensor=input_tensor, shape=input_shape) 245 | else: 246 | img_input = input_tensor 247 | 248 | # Stem block: 35 x 35 x 192 249 | x = conv2d_bn(img_input, 32, 3, strides=2, padding='valid') 250 | x = conv2d_bn(x, 32, 3, padding='valid') 251 | x = conv2d_bn(x, 64, 3) 252 | x = MaxPooling2D(3, strides=2)(x) 253 | x = conv2d_bn(x, 80, 1, padding='valid') 254 | x = conv2d_bn(x, 192, 3, padding='valid') 255 | x = MaxPooling2D(3, strides=2)(x) 256 | 257 | # Mixed 5b (Inception-A block): 35 x 35 x 320 258 | branch_0 = conv2d_bn(x, 96, 1) 259 | branch_1 = conv2d_bn(x, 48, 1) 260 | branch_1 = conv2d_bn(branch_1, 64, 5) 261 | branch_2 = conv2d_bn(x, 64, 1) 262 | branch_2 = conv2d_bn(branch_2, 96, 3) 263 | branch_2 = conv2d_bn(branch_2, 96, 3) 264 | branch_pool = AveragePooling2D(3, strides=1, padding='same')(x) 265 | branch_pool = conv2d_bn(branch_pool, 64, 1) 266 | branches = [branch_0, branch_1, branch_2, branch_pool] 267 | channel_axis = 1 if K.image_data_format() == 'channels_first' else 3 268 | x = Concatenate(axis=channel_axis, name='mixed_5b')(branches) 269 | 270 | # global context block 271 | x = global_context_block(x) 272 | 273 | # 10x block35 (Inception-ResNet-A block): 35 x 35 x 320 274 | for block_idx in range(1, 11): 275 | x = inception_resnet_block(x, 276 | scale=0.17, 277 | block_type='block35', 278 | block_idx=block_idx) 279 | 280 | # Mixed 6a (Reduction-A block): 17 x 17 x 1088 281 | branch_0 = conv2d_bn(x, 384, 3, strides=2, padding='valid') 282 | branch_1 = conv2d_bn(x, 256, 1) 283 | branch_1 = conv2d_bn(branch_1, 256, 3) 284 | branch_1 = conv2d_bn(branch_1, 384, 3, strides=2, padding='valid') 285 | branch_pool = MaxPooling2D(3, strides=2, padding='valid')(x) 286 | branches = [branch_0, branch_1, branch_pool] 287 | x = Concatenate(axis=channel_axis, name='mixed_6a')(branches) 288 | 289 | # global context block 290 | x = global_context_block(x) 291 | 292 | # 20x block17 (Inception-ResNet-B block): 17 x 17 x 1088 293 | for block_idx in range(1, 21): 294 | x = inception_resnet_block(x, 295 | scale=0.1, 296 | block_type='block17', 297 | block_idx=block_idx) 298 | 299 | # Mixed 7a (Reduction-B block): 8 x 8 x 2080 300 | branch_0 = conv2d_bn(x, 256, 1) 301 | branch_0 = conv2d_bn(branch_0, 384, 3, strides=2, padding='valid') 302 | branch_1 = conv2d_bn(x, 256, 1) 303 | branch_1 = conv2d_bn(branch_1, 288, 3, strides=2, padding='valid') 304 | branch_2 = conv2d_bn(x, 256, 1) 305 | branch_2 = conv2d_bn(branch_2, 288, 3) 306 | branch_2 = conv2d_bn(branch_2, 320, 3, strides=2, padding='valid') 307 | branch_pool = MaxPooling2D(3, strides=2, padding='valid')(x) 308 | branches = [branch_0, branch_1, branch_2, branch_pool] 309 | x = Concatenate(axis=channel_axis, name='mixed_7a')(branches) 310 | 311 | # global_context block 312 | x = global_context_block(x) 313 | 314 | # 10x block8 (Inception-ResNet-C block): 8 x 8 x 2080 315 | for block_idx in range(1, 10): 316 | x = inception_resnet_block(x, 317 | scale=0.2, 318 | block_type='block8', 319 | block_idx=block_idx) 320 | x = inception_resnet_block(x, 321 | scale=1., 322 | activation=None, 323 | block_type='block8', 324 | block_idx=10) 325 | 326 | # global context block 327 | x = global_context_block(x) 328 | 329 | # Final convolution block: 8 x 8 x 1536 330 | x = conv2d_bn(x, 1536, 1, name='conv_7b') 331 | 332 | if include_top: 333 | # Classification block 334 | x = GlobalAveragePooling2D(name='avg_pool')(x) 335 | x = Dense(classes, activation='softmax', name='predictions')(x) 336 | else: 337 | if pooling == 'avg': 338 | x = GlobalAveragePooling2D()(x) 339 | elif pooling == 'max': 340 | x = GlobalMaxPooling2D()(x) 341 | 342 | # Ensure that the model takes into account 343 | # any potential predecessors of `input_tensor` 344 | if input_tensor is not None: 345 | inputs = get_source_inputs(input_tensor) 346 | else: 347 | inputs = img_input 348 | 349 | # Create model 350 | model = Model(inputs, x, name='se_inception_resnet_v2') 351 | 352 | return model 353 | -------------------------------------------------------------------------------- /gc_inception_v3.py: -------------------------------------------------------------------------------- 1 | """Global Context Inception V3 model 2 | 3 | Major portions of this code is adapted from the applications folder of Keras. 4 | 5 | Note that the input image format for this model is different than for 6 | the VGG16 and ResNet models (299x299 instead of 224x224), 7 | and that the input preprocessing function is also different (same as Xception). 8 | 9 | # Reference 10 | - [Rethinking the Inception Architecture for Computer Vision](http://arxiv.org/abs/1512.00567) 11 | - []() # added when paper is published on Arxiv 12 | 13 | """ 14 | from __future__ import print_function 15 | from __future__ import absolute_import 16 | 17 | import warnings 18 | 19 | from keras.models import Model 20 | from keras import layers 21 | from keras.layers import Activation 22 | from keras.layers import Dense 23 | from keras.layers import Reshape 24 | from keras.layers import Input 25 | from keras.layers import BatchNormalization 26 | from keras.layers import Conv2D 27 | from keras.layers import MaxPooling2D 28 | from keras.layers import AveragePooling2D 29 | from keras.layers import GlobalAveragePooling2D 30 | from keras.layers import GlobalMaxPooling2D 31 | from keras.engine.topology import get_source_inputs 32 | from keras.utils.data_utils import get_file 33 | from keras import backend as K 34 | from keras_applications.imagenet_utils import decode_predictions 35 | from keras_applications.imagenet_utils import _obtain_input_shape 36 | 37 | from gc import global_context_block 38 | 39 | WEIGHTS_PATH = '' 40 | WEIGHTS_PATH_NO_TOP = '' 41 | 42 | 43 | def _conv2d_bn(x, 44 | filters, 45 | num_row, 46 | num_col, 47 | padding='same', 48 | strides=(1, 1), 49 | name=None): 50 | """Utility function to apply conv + BN. 51 | 52 | # Arguments 53 | x: input tensor. 54 | filters: filters in `Conv2D`. 55 | num_row: height of the convolution kernel. 56 | num_col: width of the convolution kernel. 57 | padding: padding mode in `Conv2D`. 58 | strides: strides in `Conv2D`. 59 | name: name of the ops; will become `name + '_conv'` 60 | for the convolution and `name + '_bn'` for the 61 | batch norm layer. 62 | 63 | # Returns 64 | Output tensor after applying `Conv2D` and `BatchNormalization`. 65 | """ 66 | if name is not None: 67 | bn_name = name + '_bn' 68 | conv_name = name + '_conv' 69 | else: 70 | bn_name = None 71 | conv_name = None 72 | if K.image_data_format() == 'channels_first': 73 | bn_axis = 1 74 | else: 75 | bn_axis = 3 76 | x = Conv2D( 77 | filters, (num_row, num_col), 78 | strides=strides, 79 | padding=padding, 80 | use_bias=False, 81 | name=conv_name)(x) 82 | x = BatchNormalization(axis=bn_axis, scale=False, name=bn_name)(x) 83 | x = Activation('relu', name=name)(x) 84 | return x 85 | 86 | 87 | def SEInceptionV3(include_top=True, 88 | weights=None, 89 | input_tensor=None, 90 | input_shape=None, 91 | pooling=None, 92 | classes=1000): 93 | """Instantiates the Global Context Inception v3 architecture. 94 | 95 | # Arguments 96 | include_top: whether to include the fully-connected 97 | layer at the top of the network. 98 | weights: one of `None` (random initialization) 99 | or "imagenet" (pre-training on ImageNet). 100 | input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) 101 | to use as image input for the model. 102 | input_shape: optional shape tuple, only to be specified 103 | if `include_top` is False (otherwise the input shape 104 | has to be `(299, 299, 3)` (with `channels_last` data format) 105 | or `(3, 299, 299)` (with `channels_first` data format). 106 | It should have exactly 3 inputs channels, 107 | and width and height should be no smaller than 139. 108 | E.g. `(150, 150, 3)` would be one valid value. 109 | pooling: Optional pooling mode for feature extraction 110 | when `include_top` is `False`. 111 | - `None` means that the output of the model will be 112 | the 4D tensor output of the 113 | last convolutional layer. 114 | - `avg` means that global average pooling 115 | will be applied to the output of the 116 | last convolutional layer, and thus 117 | the output of the model will be a 2D tensor. 118 | - `max` means that global max pooling will 119 | be applied. 120 | classes: optional number of classes to classify images 121 | into, only to be specified if `include_top` is True, and 122 | if no `weights` argument is specified. 123 | 124 | # Returns 125 | A Keras model instance. 126 | 127 | # Raises 128 | ValueError: in case of invalid argument for `weights`, 129 | or invalid input shape. 130 | """ 131 | if weights not in {'imagenet', None}: 132 | raise ValueError('The `weights` argument should be either ' 133 | '`None` (random initialization) or `imagenet` ' 134 | '(pre-training on ImageNet).') 135 | 136 | if weights == 'imagenet' and include_top and classes != 1000: 137 | raise ValueError('If using `weights` as imagenet with `include_top`' 138 | ' as true, `classes` should be 1000') 139 | 140 | # Determine proper input shape 141 | input_shape = _obtain_input_shape( 142 | input_shape, 143 | default_size=299, 144 | min_size=139, 145 | data_format=K.image_data_format(), 146 | require_flatten=include_top) 147 | 148 | if input_tensor is None: 149 | img_input = Input(shape=input_shape) 150 | else: 151 | if not K.is_keras_tensor(input_tensor): 152 | img_input = Input(tensor=input_tensor, shape=input_shape) 153 | else: 154 | img_input = input_tensor 155 | 156 | if K.image_data_format() == 'channels_first': 157 | channel_axis = 1 158 | else: 159 | channel_axis = 3 160 | 161 | x = _conv2d_bn(img_input, 32, 3, 3, strides=(2, 2), padding='valid') 162 | x = _conv2d_bn(x, 32, 3, 3, padding='valid') 163 | x = _conv2d_bn(x, 64, 3, 3) 164 | x = MaxPooling2D((3, 3), strides=(2, 2))(x) 165 | 166 | x = _conv2d_bn(x, 80, 1, 1, padding='valid') 167 | x = _conv2d_bn(x, 192, 3, 3, padding='valid') 168 | x = MaxPooling2D((3, 3), strides=(2, 2))(x) 169 | 170 | # mixed 0, 1, 2: 35 x 35 x 256 171 | branch1x1 = _conv2d_bn(x, 64, 1, 1) 172 | 173 | branch5x5 = _conv2d_bn(x, 48, 1, 1) 174 | branch5x5 = _conv2d_bn(branch5x5, 64, 5, 5) 175 | 176 | branch3x3dbl = _conv2d_bn(x, 64, 1, 1) 177 | branch3x3dbl = _conv2d_bn(branch3x3dbl, 96, 3, 3) 178 | branch3x3dbl = _conv2d_bn(branch3x3dbl, 96, 3, 3) 179 | 180 | branch_pool = AveragePooling2D((3, 3), strides=(1, 1), padding='same')(x) 181 | branch_pool = _conv2d_bn(branch_pool, 32, 1, 1) 182 | x = layers.concatenate( 183 | [branch1x1, branch5x5, branch3x3dbl, branch_pool], 184 | axis=channel_axis, 185 | name='mixed0') 186 | 187 | # global context block 188 | x = global_context_block(x) 189 | 190 | # mixed 1: 35 x 35 x 256 191 | branch1x1 = _conv2d_bn(x, 64, 1, 1) 192 | 193 | branch5x5 = _conv2d_bn(x, 48, 1, 1) 194 | branch5x5 = _conv2d_bn(branch5x5, 64, 5, 5) 195 | 196 | branch3x3dbl = _conv2d_bn(x, 64, 1, 1) 197 | branch3x3dbl = _conv2d_bn(branch3x3dbl, 96, 3, 3) 198 | branch3x3dbl = _conv2d_bn(branch3x3dbl, 96, 3, 3) 199 | 200 | branch_pool = AveragePooling2D((3, 3), strides=(1, 1), padding='same')(x) 201 | branch_pool = _conv2d_bn(branch_pool, 64, 1, 1) 202 | x = layers.concatenate( 203 | [branch1x1, branch5x5, branch3x3dbl, branch_pool], 204 | axis=channel_axis, 205 | name='mixed1') 206 | 207 | # global context block 208 | x = global_context_block(x) 209 | 210 | # mixed 2: 35 x 35 x 256 211 | branch1x1 = _conv2d_bn(x, 64, 1, 1) 212 | 213 | branch5x5 = _conv2d_bn(x, 48, 1, 1) 214 | branch5x5 = _conv2d_bn(branch5x5, 64, 5, 5) 215 | 216 | branch3x3dbl = _conv2d_bn(x, 64, 1, 1) 217 | branch3x3dbl = _conv2d_bn(branch3x3dbl, 96, 3, 3) 218 | branch3x3dbl = _conv2d_bn(branch3x3dbl, 96, 3, 3) 219 | 220 | branch_pool = AveragePooling2D((3, 3), strides=(1, 1), padding='same')(x) 221 | branch_pool = _conv2d_bn(branch_pool, 64, 1, 1) 222 | x = layers.concatenate( 223 | [branch1x1, branch5x5, branch3x3dbl, branch_pool], 224 | axis=channel_axis, 225 | name='mixed2') 226 | 227 | # global context block 228 | x = global_context_block(x) 229 | 230 | # mixed 3: 17 x 17 x 768 231 | branch3x3 = _conv2d_bn(x, 384, 3, 3, strides=(2, 2), padding='valid') 232 | 233 | branch3x3dbl = _conv2d_bn(x, 64, 1, 1) 234 | branch3x3dbl = _conv2d_bn(branch3x3dbl, 96, 3, 3) 235 | branch3x3dbl = _conv2d_bn( 236 | branch3x3dbl, 96, 3, 3, strides=(2, 2), padding='valid') 237 | 238 | branch_pool = MaxPooling2D((3, 3), strides=(2, 2))(x) 239 | x = layers.concatenate( 240 | [branch3x3, branch3x3dbl, branch_pool], axis=channel_axis, name='mixed3') 241 | 242 | # global context block 243 | x = global_context_block(x) 244 | 245 | # mixed 4: 17 x 17 x 768 246 | branch1x1 = _conv2d_bn(x, 192, 1, 1) 247 | 248 | branch7x7 = _conv2d_bn(x, 128, 1, 1) 249 | branch7x7 = _conv2d_bn(branch7x7, 128, 1, 7) 250 | branch7x7 = _conv2d_bn(branch7x7, 192, 7, 1) 251 | 252 | branch7x7dbl = _conv2d_bn(x, 128, 1, 1) 253 | branch7x7dbl = _conv2d_bn(branch7x7dbl, 128, 7, 1) 254 | branch7x7dbl = _conv2d_bn(branch7x7dbl, 128, 1, 7) 255 | branch7x7dbl = _conv2d_bn(branch7x7dbl, 128, 7, 1) 256 | branch7x7dbl = _conv2d_bn(branch7x7dbl, 192, 1, 7) 257 | 258 | branch_pool = AveragePooling2D((3, 3), strides=(1, 1), padding='same')(x) 259 | branch_pool = _conv2d_bn(branch_pool, 192, 1, 1) 260 | x = layers.concatenate( 261 | [branch1x1, branch7x7, branch7x7dbl, branch_pool], 262 | axis=channel_axis, 263 | name='mixed4') 264 | 265 | # global context block 266 | x = global_context_block(x) 267 | 268 | # mixed 5, 6: 17 x 17 x 768 269 | for i in range(2): 270 | branch1x1 = _conv2d_bn(x, 192, 1, 1) 271 | 272 | branch7x7 = _conv2d_bn(x, 160, 1, 1) 273 | branch7x7 = _conv2d_bn(branch7x7, 160, 1, 7) 274 | branch7x7 = _conv2d_bn(branch7x7, 192, 7, 1) 275 | 276 | branch7x7dbl = _conv2d_bn(x, 160, 1, 1) 277 | branch7x7dbl = _conv2d_bn(branch7x7dbl, 160, 7, 1) 278 | branch7x7dbl = _conv2d_bn(branch7x7dbl, 160, 1, 7) 279 | branch7x7dbl = _conv2d_bn(branch7x7dbl, 160, 7, 1) 280 | branch7x7dbl = _conv2d_bn(branch7x7dbl, 192, 1, 7) 281 | 282 | branch_pool = AveragePooling2D( 283 | (3, 3), strides=(1, 1), padding='same')(x) 284 | branch_pool = _conv2d_bn(branch_pool, 192, 1, 1) 285 | x = layers.concatenate( 286 | [branch1x1, branch7x7, branch7x7dbl, branch_pool], 287 | axis=channel_axis, 288 | name='mixed' + str(5 + i)) 289 | 290 | # global context block 291 | x = global_context_block(x) 292 | 293 | # mixed 7: 17 x 17 x 768 294 | branch1x1 = _conv2d_bn(x, 192, 1, 1) 295 | 296 | branch7x7 = _conv2d_bn(x, 192, 1, 1) 297 | branch7x7 = _conv2d_bn(branch7x7, 192, 1, 7) 298 | branch7x7 = _conv2d_bn(branch7x7, 192, 7, 1) 299 | 300 | branch7x7dbl = _conv2d_bn(x, 192, 1, 1) 301 | branch7x7dbl = _conv2d_bn(branch7x7dbl, 192, 7, 1) 302 | branch7x7dbl = _conv2d_bn(branch7x7dbl, 192, 1, 7) 303 | branch7x7dbl = _conv2d_bn(branch7x7dbl, 192, 7, 1) 304 | branch7x7dbl = _conv2d_bn(branch7x7dbl, 192, 1, 7) 305 | 306 | branch_pool = AveragePooling2D((3, 3), strides=(1, 1), padding='same')(x) 307 | branch_pool = _conv2d_bn(branch_pool, 192, 1, 1) 308 | x = layers.concatenate( 309 | [branch1x1, branch7x7, branch7x7dbl, branch_pool], 310 | axis=channel_axis, 311 | name='mixed7') 312 | 313 | # global context block 314 | x = global_context_block(x) 315 | 316 | # mixed 8: 8 x 8 x 1280 317 | branch3x3 = _conv2d_bn(x, 192, 1, 1) 318 | branch3x3 = _conv2d_bn(branch3x3, 320, 3, 3, 319 | strides=(2, 2), padding='valid') 320 | 321 | branch7x7x3 = _conv2d_bn(x, 192, 1, 1) 322 | branch7x7x3 = _conv2d_bn(branch7x7x3, 192, 1, 7) 323 | branch7x7x3 = _conv2d_bn(branch7x7x3, 192, 7, 1) 324 | branch7x7x3 = _conv2d_bn( 325 | branch7x7x3, 192, 3, 3, strides=(2, 2), padding='valid') 326 | 327 | branch_pool = MaxPooling2D((3, 3), strides=(2, 2))(x) 328 | x = layers.concatenate( 329 | [branch3x3, branch7x7x3, branch_pool], axis=channel_axis, name='mixed8') 330 | 331 | # global context block 332 | x = global_context_block(x) 333 | 334 | # mixed 9: 8 x 8 x 2048 335 | for i in range(2): 336 | branch1x1 = _conv2d_bn(x, 320, 1, 1) 337 | 338 | branch3x3 = _conv2d_bn(x, 384, 1, 1) 339 | branch3x3_1 = _conv2d_bn(branch3x3, 384, 1, 3) 340 | branch3x3_2 = _conv2d_bn(branch3x3, 384, 3, 1) 341 | branch3x3 = layers.concatenate( 342 | [branch3x3_1, branch3x3_2], axis=channel_axis, name='mixed9_' + str(i)) 343 | 344 | branch3x3dbl = _conv2d_bn(x, 448, 1, 1) 345 | branch3x3dbl = _conv2d_bn(branch3x3dbl, 384, 3, 3) 346 | branch3x3dbl_1 = _conv2d_bn(branch3x3dbl, 384, 1, 3) 347 | branch3x3dbl_2 = _conv2d_bn(branch3x3dbl, 384, 3, 1) 348 | branch3x3dbl = layers.concatenate( 349 | [branch3x3dbl_1, branch3x3dbl_2], axis=channel_axis) 350 | 351 | branch_pool = AveragePooling2D( 352 | (3, 3), strides=(1, 1), padding='same')(x) 353 | branch_pool = _conv2d_bn(branch_pool, 192, 1, 1) 354 | x = layers.concatenate( 355 | [branch1x1, branch3x3, branch3x3dbl, branch_pool], 356 | axis=channel_axis, 357 | name='mixed' + str(9 + i)) 358 | 359 | # global context block 360 | x = global_context_block(x) 361 | 362 | if include_top: 363 | # Classification block 364 | x = GlobalAveragePooling2D(name='avg_pool')(x) 365 | x = Dense(classes, activation='softmax', name='predictions')(x) 366 | else: 367 | if pooling == 'avg': 368 | x = GlobalAveragePooling2D()(x) 369 | elif pooling == 'max': 370 | x = GlobalMaxPooling2D()(x) 371 | 372 | # Ensure that the model takes into account 373 | # any potential predecessors of `input_tensor`. 374 | if input_tensor is not None: 375 | inputs = get_source_inputs(input_tensor) 376 | else: 377 | inputs = img_input 378 | # Create model. 379 | model = Model(inputs, x, name='inception_v3') 380 | 381 | return model 382 | 383 | 384 | def preprocess_input(x): 385 | x /= 255. 386 | x -= 0.5 387 | x *= 2. 388 | return x 389 | -------------------------------------------------------------------------------- /gc_mobilenets.py: -------------------------------------------------------------------------------- 1 | """SE MobileNet v1 models for Keras. 2 | 3 | # Reference 4 | - [MobileNets: Efficient Convolutional Neural Networks for 5 | Mobile Vision Applications](https://arxiv.org/pdf/1704.04861.pdf)) 6 | """ 7 | from __future__ import print_function 8 | from __future__ import absolute_import 9 | from __future__ import division 10 | 11 | import warnings 12 | 13 | from keras.models import Model 14 | from keras.layers import Input 15 | from keras.layers import Activation 16 | from keras.layers import Dropout 17 | from keras.layers import Reshape 18 | from keras.layers import BatchNormalization 19 | from keras.layers import GlobalAveragePooling2D 20 | from keras.layers import GlobalMaxPooling2D 21 | from keras.layers import Conv2D 22 | from keras import initializers 23 | from keras import regularizers 24 | from keras import constraints 25 | from keras.utils import conv_utils 26 | from keras.utils.data_utils import get_file 27 | from keras.engine.topology import get_source_inputs 28 | from keras.engine import InputSpec 29 | from keras_applications import imagenet_utils 30 | from keras_applications.imagenet_utils import _obtain_input_shape 31 | from keras.applications.imagenet_utils import decode_predictions 32 | from keras import backend as K 33 | 34 | from gc import global_context_block 35 | 36 | 37 | def relu6(x): 38 | return K.relu(x, max_value=6) 39 | 40 | 41 | def preprocess_input(x): 42 | """Preprocesses a numpy array encoding a batch of images. 43 | # Arguments 44 | x: a 4D numpy array consists of RGB values within [0, 255]. 45 | # Returns 46 | Preprocessed array. 47 | """ 48 | return imagenet_utils.preprocess_input(x, mode='tf') 49 | 50 | 51 | class DepthwiseConv2D(Conv2D): 52 | """Depthwise separable 2D convolution. 53 | Depthwise Separable convolutions consists in performing 54 | just the first step in a depthwise spatial convolution 55 | (which acts on each input channel separately). 56 | The `depth_multiplier` argument controls how many 57 | output channels are generated per input channel in the depthwise step. 58 | # Arguments 59 | kernel_size: An integer or tuple/list of 2 integers, specifying the 60 | width and height of the 2D convolution window. 61 | Can be a single integer to specify the same value for 62 | all spatial dimensions. 63 | strides: An integer or tuple/list of 2 integers, 64 | specifying the strides of the convolution along the width and height. 65 | Can be a single integer to specify the same value for 66 | all spatial dimensions. 67 | Specifying any stride value != 1 is incompatible with specifying 68 | any `dilation_rate` value != 1. 69 | padding: one of `'valid'` or `'same'` (case-insensitive). 70 | depth_multiplier: The number of depthwise convolution output channels 71 | for each input channel. 72 | The total number of depthwise convolution output 73 | channels will be equal to `filters_in * depth_multiplier`. 74 | data_format: A string, 75 | one of `channels_last` (default) or `channels_first`. 76 | The ordering of the dimensions in the inputs. 77 | `channels_last` corresponds to inputs with shape 78 | `(batch, height, width, channels)` while `channels_first` 79 | corresponds to inputs with shape 80 | `(batch, channels, height, width)`. 81 | It defaults to the `image_data_format` value found in your 82 | Keras config file at `~/.keras/keras.json`. 83 | If you never set it, then it will be 'channels_last'. 84 | activation: Activation function to use 85 | (see [activations](../activations.md)). 86 | If you don't specify anything, no activation is applied 87 | (ie. 'linear' activation: `a(x) = x`). 88 | use_bias: Boolean, whether the layer uses a bias vector. 89 | depthwise_initializer: Initializer for the depthwise kernel matrix 90 | (see [initializers](../initializers.md)). 91 | bias_initializer: Initializer for the bias vector 92 | (see [initializers](../initializers.md)). 93 | depthwise_regularizer: Regularizer function applied to 94 | the depthwise kernel matrix 95 | (see [regularizer](../regularizers.md)). 96 | bias_regularizer: Regularizer function applied to the bias vector 97 | (see [regularizer](../regularizers.md)). 98 | activity_regularizer: Regularizer function applied to 99 | the output of the layer (its 'activation'). 100 | (see [regularizer](../regularizers.md)). 101 | depthwise_constraint: Constraint function applied to 102 | the depthwise kernel matrix 103 | (see [constraints](../constraints.md)). 104 | bias_constraint: Constraint function applied to the bias vector 105 | (see [constraints](../constraints.md)). 106 | # Input shape 107 | 4D tensor with shape: 108 | `[batch, channels, rows, cols]` if data_format='channels_first' 109 | or 4D tensor with shape: 110 | `[batch, rows, cols, channels]` if data_format='channels_last'. 111 | # Output shape 112 | 4D tensor with shape: 113 | `[batch, filters, new_rows, new_cols]` if data_format='channels_first' 114 | or 4D tensor with shape: 115 | `[batch, new_rows, new_cols, filters]` if data_format='channels_last'. 116 | `rows` and `cols` values might have changed due to padding. 117 | """ 118 | 119 | def __init__(self, 120 | kernel_size, 121 | strides=(1, 1), 122 | padding='valid', 123 | depth_multiplier=1, 124 | data_format=None, 125 | activation=None, 126 | use_bias=True, 127 | depthwise_initializer='glorot_uniform', 128 | bias_initializer='zeros', 129 | depthwise_regularizer=None, 130 | bias_regularizer=None, 131 | activity_regularizer=None, 132 | depthwise_constraint=None, 133 | bias_constraint=None, 134 | **kwargs): 135 | super(DepthwiseConv2D, self).__init__( 136 | filters=None, 137 | kernel_size=kernel_size, 138 | strides=strides, 139 | padding=padding, 140 | data_format=data_format, 141 | activation=activation, 142 | use_bias=use_bias, 143 | bias_regularizer=bias_regularizer, 144 | activity_regularizer=activity_regularizer, 145 | bias_constraint=bias_constraint, 146 | **kwargs) 147 | self.depth_multiplier = depth_multiplier 148 | self.depthwise_initializer = initializers.get(depthwise_initializer) 149 | self.depthwise_regularizer = regularizers.get(depthwise_regularizer) 150 | self.depthwise_constraint = constraints.get(depthwise_constraint) 151 | self.bias_initializer = initializers.get(bias_initializer) 152 | 153 | def build(self, input_shape): 154 | if len(input_shape) < 4: 155 | raise ValueError('Inputs to `DepthwiseConv2D` should have rank 4. ' 156 | 'Received input shape:', str(input_shape)) 157 | if self.data_format == 'channels_first': 158 | channel_axis = 1 159 | else: 160 | channel_axis = 3 161 | if input_shape[channel_axis] is None: 162 | raise ValueError('The channel dimension of the inputs to ' 163 | '`DepthwiseConv2D` ' 164 | 'should be defined. Found `None`.') 165 | input_dim = int(input_shape[channel_axis]) 166 | depthwise_kernel_shape = (self.kernel_size[0], 167 | self.kernel_size[1], 168 | input_dim, 169 | self.depth_multiplier) 170 | 171 | self.depthwise_kernel = self.add_weight( 172 | shape=depthwise_kernel_shape, 173 | initializer=self.depthwise_initializer, 174 | name='depthwise_kernel', 175 | regularizer=self.depthwise_regularizer, 176 | constraint=self.depthwise_constraint) 177 | 178 | if self.use_bias: 179 | self.bias = self.add_weight(shape=(input_dim * self.depth_multiplier,), 180 | initializer=self.bias_initializer, 181 | name='bias', 182 | regularizer=self.bias_regularizer, 183 | constraint=self.bias_constraint) 184 | else: 185 | self.bias = None 186 | # Set input spec. 187 | self.input_spec = InputSpec(ndim=4, axes={channel_axis: input_dim}) 188 | self.built = True 189 | 190 | def call(self, inputs, training=None): 191 | outputs = K.depthwise_conv2d( 192 | inputs, 193 | self.depthwise_kernel, 194 | strides=self.strides, 195 | padding=self.padding, 196 | dilation_rate=self.dilation_rate, 197 | data_format=self.data_format) 198 | 199 | if self.bias: 200 | outputs = K.bias_add( 201 | outputs, 202 | self.bias, 203 | data_format=self.data_format) 204 | 205 | if self.activation is not None: 206 | return self.activation(outputs) 207 | 208 | return outputs 209 | 210 | def compute_output_shape(self, input_shape): 211 | if self.data_format == 'channels_first': 212 | rows = input_shape[2] 213 | cols = input_shape[3] 214 | out_filters = input_shape[1] * self.depth_multiplier 215 | elif self.data_format == 'channels_last': 216 | rows = input_shape[1] 217 | cols = input_shape[2] 218 | out_filters = input_shape[3] * self.depth_multiplier 219 | 220 | rows = conv_utils.conv_output_length(rows, self.kernel_size[0], 221 | self.padding, 222 | self.strides[0]) 223 | cols = conv_utils.conv_output_length(cols, self.kernel_size[1], 224 | self.padding, 225 | self.strides[1]) 226 | 227 | if self.data_format == 'channels_first': 228 | return (input_shape[0], out_filters, rows, cols) 229 | elif self.data_format == 'channels_last': 230 | return (input_shape[0], rows, cols, out_filters) 231 | 232 | def get_config(self): 233 | config = super(DepthwiseConv2D, self).get_config() 234 | config.pop('filters') 235 | config.pop('kernel_initializer') 236 | config.pop('kernel_regularizer') 237 | config.pop('kernel_constraint') 238 | config['depth_multiplier'] = self.depth_multiplier 239 | config['depthwise_initializer'] = initializers.serialize(self.depthwise_initializer) 240 | config['depthwise_regularizer'] = regularizers.serialize(self.depthwise_regularizer) 241 | config['depthwise_constraint'] = constraints.serialize(self.depthwise_constraint) 242 | return config 243 | 244 | 245 | def SEMobileNet(input_shape=None, 246 | alpha=1.0, 247 | depth_multiplier=1, 248 | dropout=1e-3, 249 | include_top=True, 250 | weights=None, 251 | input_tensor=None, 252 | pooling=None, 253 | classes=1000): 254 | """Instantiates the SE-MobileNet architecture. 255 | Note that only TensorFlow is supported for now, 256 | therefore it only works with the data format 257 | `image_data_format='channels_last'` in your Keras config 258 | at `~/.keras/keras.json`. 259 | To load a MobileNet model via `load_model`, import the custom 260 | objects `relu6` and `DepthwiseConv2D` and pass them to the 261 | `custom_objects` parameter. 262 | E.g. 263 | model = load_model('mobilenet.h5', custom_objects={ 264 | 'relu6': mobilenet.relu6, 265 | 'DepthwiseConv2D': mobilenet.DepthwiseConv2D}) 266 | # Arguments 267 | input_shape: optional shape tuple, only to be specified 268 | if `include_top` is False (otherwise the input shape 269 | has to be `(224, 224, 3)` (with `channels_last` data format) 270 | or (3, 224, 224) (with `channels_first` data format). 271 | It should have exactly 3 inputs channels, 272 | and width and height should be no smaller than 32. 273 | E.g. `(200, 200, 3)` would be one valid value. 274 | alpha: controls the width of the network. 275 | - If `alpha` < 1.0, proportionally decreases the number 276 | of filters in each layer. 277 | - If `alpha` > 1.0, proportionally increases the number 278 | of filters in each layer. 279 | - If `alpha` = 1, default number of filters from the paper 280 | are used at each layer. 281 | depth_multiplier: depth multiplier for depthwise convolution 282 | (also called the resolution multiplier) 283 | dropout: dropout rate 284 | include_top: whether to include the fully-connected 285 | layer at the top of the network. 286 | weights: `None` (random initialization) or 287 | `imagenet` (ImageNet weights) 288 | input_tensor: optional Keras tensor (i.e. output of 289 | `layers.Input()`) 290 | to use as image input for the model. 291 | pooling: Optional pooling mode for feature extraction 292 | when `include_top` is `False`. 293 | - `None` means that the output of the model 294 | will be the 4D tensor output of the 295 | last convolutional layer. 296 | - `avg` means that global average pooling 297 | will be applied to the output of the 298 | last convolutional layer, and thus 299 | the output of the model will be a 300 | 2D tensor. 301 | - `max` means that global max pooling will 302 | be applied. 303 | classes: optional number of classes to classify images 304 | into, only to be specified if `include_top` is True, and 305 | if no `weights` argument is specified. 306 | # Returns 307 | A Keras model instance. 308 | # Raises 309 | ValueError: in case of invalid argument for `weights`, 310 | or invalid input shape. 311 | RuntimeError: If attempting to run this model with a 312 | backend that does not support separable convolutions. 313 | """ 314 | 315 | if K.backend() != 'tensorflow': 316 | raise RuntimeError('Only TensorFlow backend is currently supported, ' 317 | 'as other backends do not support ' 318 | 'depthwise convolution.') 319 | 320 | if weights not in {'imagenet', None}: 321 | raise ValueError('The `weights` argument should be either ' 322 | '`None` (random initialization) or `imagenet` ' 323 | '(pre-training on ImageNet).') 324 | 325 | if weights == 'imagenet' and include_top and classes != 1000: 326 | raise ValueError('If using `weights` as ImageNet with `include_top` ' 327 | 'as true, `classes` should be 1000') 328 | 329 | # Determine proper input shape and default size. 330 | if input_shape is None: 331 | default_size = 224 332 | else: 333 | if K.image_data_format() == 'channels_first': 334 | rows = input_shape[1] 335 | cols = input_shape[2] 336 | else: 337 | rows = input_shape[0] 338 | cols = input_shape[1] 339 | 340 | if rows == cols and rows in [128, 160, 192, 224]: 341 | default_size = rows 342 | else: 343 | default_size = 224 344 | 345 | input_shape = _obtain_input_shape(input_shape, 346 | default_size=default_size, 347 | min_size=32, 348 | data_format=K.image_data_format(), 349 | require_flatten=include_top, 350 | weights=weights) 351 | 352 | if K.image_data_format() == 'channels_last': 353 | row_axis, col_axis = (0, 1) 354 | else: 355 | row_axis, col_axis = (1, 2) 356 | rows = input_shape[row_axis] 357 | cols = input_shape[col_axis] 358 | 359 | if input_tensor is None: 360 | img_input = Input(shape=input_shape) 361 | else: 362 | if not K.is_keras_tensor(input_tensor): 363 | img_input = Input(tensor=input_tensor, shape=input_shape) 364 | else: 365 | img_input = input_tensor 366 | 367 | x = _conv_block(img_input, 32, alpha, strides=(2, 2)) 368 | x = _depthwise_conv_block(x, 64, alpha, depth_multiplier, block_id=1) 369 | 370 | x = _depthwise_conv_block(x, 128, alpha, depth_multiplier, 371 | strides=(2, 2), block_id=2) 372 | x = _depthwise_conv_block(x, 128, alpha, depth_multiplier, block_id=3) 373 | 374 | x = _depthwise_conv_block(x, 256, alpha, depth_multiplier, 375 | strides=(2, 2), block_id=4) 376 | x = _depthwise_conv_block(x, 256, alpha, depth_multiplier, block_id=5) 377 | 378 | x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, 379 | strides=(2, 2), block_id=6) 380 | x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=7) 381 | x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=8) 382 | x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=9) 383 | x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=10) 384 | x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=11) 385 | 386 | x = _depthwise_conv_block(x, 1024, alpha, depth_multiplier, 387 | strides=(2, 2), block_id=12) 388 | x = _depthwise_conv_block(x, 1024, alpha, depth_multiplier, block_id=13) 389 | 390 | if include_top: 391 | if K.image_data_format() == 'channels_first': 392 | shape = (int(1024 * alpha), 1, 1) 393 | else: 394 | shape = (1, 1, int(1024 * alpha)) 395 | 396 | x = GlobalAveragePooling2D()(x) 397 | x = Reshape(shape, name='reshape_n_1')(x) 398 | x = Dropout(dropout, name='dropout')(x) 399 | x = Conv2D(classes, (1, 1), 400 | padding='same', name='conv_preds')(x) 401 | x = Activation('softmax', name='act_softmax')(x) 402 | x = Reshape((classes,), name='reshape_final')(x) 403 | else: 404 | if pooling == 'avg': 405 | x = GlobalAveragePooling2D()(x) 406 | elif pooling == 'max': 407 | x = GlobalMaxPooling2D()(x) 408 | 409 | # Ensure that the model takes into account 410 | # any potential predecessors of `input_tensor`. 411 | if input_tensor is not None: 412 | inputs = get_source_inputs(input_tensor) 413 | else: 414 | inputs = img_input 415 | 416 | # Create model. 417 | model = Model(inputs, x, name='se_mobilenet_%0.2f_%s' % (alpha, rows)) 418 | 419 | return model 420 | 421 | 422 | def _conv_block(inputs, filters, alpha, kernel=(3, 3), strides=(1, 1)): 423 | """Adds an initial convolution layer (with batch normalization and relu6). 424 | # Arguments 425 | inputs: Input tensor of shape `(rows, cols, 3)` 426 | (with `channels_last` data format) or 427 | (3, rows, cols) (with `channels_first` data format). 428 | It should have exactly 3 inputs channels, 429 | and width and height should be no smaller than 32. 430 | E.g. `(224, 224, 3)` would be one valid value. 431 | filters: Integer, the dimensionality of the output space 432 | (i.e. the number output of filters in the convolution). 433 | alpha: controls the width of the network. 434 | - If `alpha` < 1.0, proportionally decreases the number 435 | of filters in each layer. 436 | - If `alpha` > 1.0, proportionally increases the number 437 | of filters in each layer. 438 | - If `alpha` = 1, default number of filters from the paper 439 | are used at each layer. 440 | kernel: An integer or tuple/list of 2 integers, specifying the 441 | width and height of the 2D convolution window. 442 | Can be a single integer to specify the same value for 443 | all spatial dimensions. 444 | strides: An integer or tuple/list of 2 integers, 445 | specifying the strides of the convolution along the width and height. 446 | Can be a single integer to specify the same value for 447 | all spatial dimensions. 448 | Specifying any stride value != 1 is incompatible with specifying 449 | any `dilation_rate` value != 1. 450 | # Input shape 451 | 4D tensor with shape: 452 | `(samples, channels, rows, cols)` if data_format='channels_first' 453 | or 4D tensor with shape: 454 | `(samples, rows, cols, channels)` if data_format='channels_last'. 455 | # Output shape 456 | 4D tensor with shape: 457 | `(samples, filters, new_rows, new_cols)` if data_format='channels_first' 458 | or 4D tensor with shape: 459 | `(samples, new_rows, new_cols, filters)` if data_format='channels_last'. 460 | `rows` and `cols` values might have changed due to stride. 461 | # Returns 462 | Output tensor of block. 463 | """ 464 | channel_axis = 1 if K.image_data_format() == 'channels_first' else -1 465 | filters = int(filters * alpha) 466 | x = Conv2D(filters, kernel, 467 | padding='same', 468 | use_bias=False, 469 | strides=strides, 470 | name='conv1')(inputs) 471 | x = BatchNormalization(axis=channel_axis, name='conv1_bn')(x) 472 | return Activation(relu6, name='conv1_relu')(x) 473 | 474 | 475 | def _depthwise_conv_block(inputs, pointwise_conv_filters, alpha, 476 | depth_multiplier=1, strides=(1, 1), block_id=1): 477 | """Adds a depthwise convolution block. 478 | A depthwise convolution block consists of a depthwise conv, 479 | batch normalization, relu6, pointwise convolution, 480 | batch normalization and relu6 activation. 481 | # Arguments 482 | inputs: Input tensor of shape `(rows, cols, channels)` 483 | (with `channels_last` data format) or 484 | (channels, rows, cols) (with `channels_first` data format). 485 | pointwise_conv_filters: Integer, the dimensionality of the output space 486 | (i.e. the number output of filters in the pointwise convolution). 487 | alpha: controls the width of the network. 488 | - If `alpha` < 1.0, proportionally decreases the number 489 | of filters in each layer. 490 | - If `alpha` > 1.0, proportionally increases the number 491 | of filters in each layer. 492 | - If `alpha` = 1, default number of filters from the paper 493 | are used at each layer. 494 | depth_multiplier: The number of depthwise convolution output channels 495 | for each input channel. 496 | The total number of depthwise convolution output 497 | channels will be equal to `filters_in * depth_multiplier`. 498 | strides: An integer or tuple/list of 2 integers, 499 | specifying the strides of the convolution along the width and height. 500 | Can be a single integer to specify the same value for 501 | all spatial dimensions. 502 | Specifying any stride value != 1 is incompatible with specifying 503 | any `dilation_rate` value != 1. 504 | block_id: Integer, a unique identification designating the block number. 505 | # Input shape 506 | 4D tensor with shape: 507 | `(batch, channels, rows, cols)` if data_format='channels_first' 508 | or 4D tensor with shape: 509 | `(batch, rows, cols, channels)` if data_format='channels_last'. 510 | # Output shape 511 | 4D tensor with shape: 512 | `(batch, filters, new_rows, new_cols)` if data_format='channels_first' 513 | or 4D tensor with shape: 514 | `(batch, new_rows, new_cols, filters)` if data_format='channels_last'. 515 | `rows` and `cols` values might have changed due to stride. 516 | # Returns 517 | Output tensor of block. 518 | """ 519 | channel_axis = 1 if K.image_data_format() == 'channels_first' else -1 520 | pointwise_conv_filters = int(pointwise_conv_filters * alpha) 521 | 522 | x = DepthwiseConv2D((3, 3), 523 | padding='same', 524 | depth_multiplier=depth_multiplier, 525 | strides=strides, 526 | use_bias=False, 527 | name='conv_dw_%d' % block_id)(inputs) 528 | x = BatchNormalization(axis=channel_axis, name='conv_dw_%d_bn' % block_id)(x) 529 | x = Activation(relu6, name='conv_dw_%d_relu' % block_id)(x) 530 | 531 | x = Conv2D(pointwise_conv_filters, (1, 1), 532 | padding='same', 533 | use_bias=False, 534 | strides=(1, 1), 535 | name='conv_pw_%d' % block_id)(x) 536 | x = BatchNormalization(axis=channel_axis, name='conv_pw_%d_bn' % block_id)(x) 537 | x = Activation(relu6, name='conv_pw_%d_relu' % block_id)(x) 538 | 539 | # global context block 540 | x = global_context_block(x) 541 | return x 542 | -------------------------------------------------------------------------------- /gc_resnet.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Global Context ResNets 3 | 4 | References: 5 | - [Deep Residual Learning for Image Recognition](https://arxiv.org/abs/1512.03385) 6 | - []() # added when paper is published on Arxiv 7 | ''' 8 | from __future__ import print_function 9 | from __future__ import absolute_import 10 | from __future__ import division 11 | 12 | from keras.models import Model 13 | from keras.layers import Input 14 | from keras.layers import Dense 15 | from keras.layers import Reshape 16 | from keras.layers import Activation 17 | from keras.layers import BatchNormalization 18 | from keras.layers import MaxPooling2D 19 | from keras.layers import GlobalAveragePooling2D 20 | from keras.layers import GlobalMaxPooling2D 21 | from keras.layers import Conv2D 22 | from keras.layers import add 23 | from keras.layers import multiply 24 | from keras.regularizers import l2 25 | from keras.utils import conv_utils 26 | from keras.utils.data_utils import get_file 27 | from keras.engine.topology import get_source_inputs 28 | from keras_applications.imagenet_utils import _obtain_input_shape 29 | from keras_applications.resnet50 import preprocess_input 30 | from keras_applications.imagenet_utils import decode_predictions 31 | from keras import backend as K 32 | 33 | from gc import global_context_block 34 | 35 | __all__ = ['GCResNet', 'GCResNet50', 'GCResNet101', 'GCResNet154', 'preprocess_input', 'decode_predictions'] 36 | 37 | 38 | WEIGHTS_PATH = "" 39 | WEIGHTS_PATH_NO_TOP = "" 40 | 41 | 42 | def GCResNet(input_shape=None, 43 | initial_conv_filters=64, 44 | depth=[3, 4, 6, 3], 45 | filters=[64, 128, 256, 512], 46 | width=1, 47 | bottleneck=False, 48 | weight_decay=1e-4, 49 | include_top=True, 50 | weights=None, 51 | input_tensor=None, 52 | pooling=None, 53 | classes=1000): 54 | """ Instantiate the Global Context ResNet architecture. Note that , 55 | when using TensorFlow for best performance you should set 56 | `image_data_format="channels_last"` in your Keras config 57 | at ~/.keras/keras.json. 58 | The model are compatible with both 59 | TensorFlow and Theano. The dimension ordering 60 | convention used by the model is the one 61 | specified in your Keras config file. 62 | # Arguments 63 | initial_conv_filters: number of features for the initial convolution 64 | depth: number or layers in the each block, defined as a list. 65 | ResNet-50 = [3, 4, 6, 3] 66 | ResNet-101 = [3, 6, 23, 3] 67 | ResNet-152 = [3, 8, 36, 3] 68 | filter: number of filters per block, defined as a list. 69 | filters = [64, 128, 256, 512 70 | width: width multiplier for the network (for Wide ResNets) 71 | bottleneck: adds a bottleneck conv to reduce computation 72 | weight_decay: weight decay (l2 norm) 73 | include_top: whether to include the fully-connected 74 | layer at the top of the network. 75 | weights: `None` (random initialization) or `imagenet` (trained 76 | on ImageNet) 77 | input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) 78 | to use as image input for the model. 79 | input_shape: optional shape tuple, only to be specified 80 | if `include_top` is False (otherwise the input shape 81 | has to be `(224, 224, 3)` (with `tf` dim ordering) 82 | or `(3, 224, 224)` (with `th` dim ordering). 83 | It should have exactly 3 inputs channels, 84 | and width and height should be no smaller than 8. 85 | E.g. `(200, 200, 3)` would be one valid value. 86 | pooling: Optional pooling mode for feature extraction 87 | when `include_top` is `False`. 88 | - `None` means that the output of the model will be 89 | the 4D tensor output of the 90 | last convolutional layer. 91 | - `avg` means that global average pooling 92 | will be applied to the output of the 93 | last convolutional layer, and thus 94 | the output of the model will be a 2D tensor. 95 | - `max` means that global max pooling will 96 | be applied. 97 | classes: optional number of classes to classify images 98 | into, only to be specified if `include_top` is True, and 99 | if no `weights` argument is specified. 100 | # Returns 101 | A Keras model instance. 102 | """ 103 | 104 | if weights not in {'imagenet', None}: 105 | raise ValueError('The `weights` argument should be either ' 106 | '`None` (random initialization) or `imagenet` ' 107 | '(pre-training on ImageNet).') 108 | 109 | if weights == 'imagenet' and include_top and classes != 1000: 110 | raise ValueError('If using `weights` as imagenet with `include_top`' 111 | ' as true, `classes` should be 1000') 112 | 113 | assert len(depth) == len(filters), "The length of filter increment list must match the length " \ 114 | "of the depth list." 115 | 116 | # Determine proper input shape 117 | input_shape = _obtain_input_shape(input_shape, 118 | default_size=224, 119 | min_size=32, 120 | data_format=K.image_data_format(), 121 | require_flatten=False) 122 | 123 | if input_tensor is None: 124 | img_input = Input(shape=input_shape) 125 | else: 126 | if not K.is_keras_tensor(input_tensor): 127 | img_input = Input(tensor=input_tensor, shape=input_shape) 128 | else: 129 | img_input = input_tensor 130 | 131 | x = _create_se_resnet(classes, img_input, include_top, initial_conv_filters, 132 | filters, depth, width, bottleneck, weight_decay, pooling) 133 | 134 | # Ensure that the model takes into account 135 | # any potential predecessors of `input_tensor`. 136 | if input_tensor is not None: 137 | inputs = get_source_inputs(input_tensor) 138 | else: 139 | inputs = img_input 140 | # Create model. 141 | model = Model(inputs, x, name='resnext') 142 | 143 | # load weights 144 | 145 | return model 146 | 147 | 148 | def GCResNet18(input_shape=None, 149 | width=1, 150 | bottleneck=False, 151 | weight_decay=1e-4, 152 | include_top=True, 153 | weights=None, 154 | input_tensor=None, 155 | pooling=None, 156 | classes=1000): 157 | return GCResNet(input_shape, 158 | depth=[2, 2, 2, 2], 159 | width=width, 160 | bottleneck=bottleneck, 161 | weight_decay=weight_decay, 162 | include_top=include_top, 163 | weights=weights, 164 | input_tensor=input_tensor, 165 | pooling=pooling, 166 | classes=classes) 167 | 168 | 169 | def GCResNet34(input_shape=None, 170 | width=1, 171 | bottleneck=False, 172 | weight_decay=1e-4, 173 | include_top=True, 174 | weights=None, 175 | input_tensor=None, 176 | pooling=None, 177 | classes=1000): 178 | return GCResNet(input_shape, 179 | depth=[3, 4, 6, 3], 180 | width=width, 181 | bottleneck=bottleneck, 182 | weight_decay=weight_decay, 183 | include_top=include_top, 184 | weights=weights, 185 | input_tensor=input_tensor, 186 | pooling=pooling, 187 | classes=classes) 188 | 189 | 190 | def GCResNet50(input_shape=None, 191 | width=1, 192 | bottleneck=True, 193 | weight_decay=1e-4, 194 | include_top=True, 195 | weights=None, 196 | input_tensor=None, 197 | pooling=None, 198 | classes=1000): 199 | return GCResNet(input_shape, 200 | width=width, 201 | bottleneck=bottleneck, 202 | weight_decay=weight_decay, 203 | include_top=include_top, 204 | weights=weights, 205 | input_tensor=input_tensor, 206 | pooling=pooling, 207 | classes=classes) 208 | 209 | 210 | def GCResNet101(input_shape=None, 211 | width=1, 212 | bottleneck=True, 213 | weight_decay=1e-4, 214 | include_top=True, 215 | weights=None, 216 | input_tensor=None, 217 | pooling=None, 218 | classes=1000): 219 | return GCResNet(input_shape, 220 | depth=[3, 6, 23, 3], 221 | width=width, 222 | bottleneck=bottleneck, 223 | weight_decay=weight_decay, 224 | include_top=include_top, 225 | weights=weights, 226 | input_tensor=input_tensor, 227 | pooling=pooling, 228 | classes=classes) 229 | 230 | 231 | def GCResNet154(input_shape=None, 232 | width=1, 233 | bottleneck=True, 234 | weight_decay=1e-4, 235 | include_top=True, 236 | weights=None, 237 | input_tensor=None, 238 | pooling=None, 239 | classes=1000): 240 | return GCResNet(input_shape, 241 | depth=[3, 8, 36, 3], 242 | width=width, 243 | bottleneck=bottleneck, 244 | weight_decay=weight_decay, 245 | include_top=include_top, 246 | weights=weights, 247 | input_tensor=input_tensor, 248 | pooling=pooling, 249 | classes=classes) 250 | 251 | 252 | def _resnet_block(input, filters, k=1, strides=(1, 1)): 253 | ''' Adds a pre-activation resnet block without bottleneck layers 254 | 255 | Args: 256 | input: input tensor 257 | filters: number of output filters 258 | k: width factor 259 | strides: strides of the convolution layer 260 | 261 | Returns: a keras tensor 262 | ''' 263 | init = input 264 | channel_axis = 1 if K.image_data_format() == "channels_first" else -1 265 | 266 | x = BatchNormalization(axis=channel_axis)(input) 267 | x = Activation('relu')(x) 268 | 269 | if strides != (1, 1) or init._keras_shape[channel_axis] != filters * k: 270 | init = Conv2D(filters * k, (1, 1), padding='same', kernel_initializer='he_normal', 271 | use_bias=False, strides=strides)(x) 272 | 273 | x = Conv2D(filters * k, (3, 3), padding='same', kernel_initializer='he_normal', 274 | use_bias=False, strides=strides)(x) 275 | x = BatchNormalization(axis=channel_axis)(x) 276 | x = Activation('relu')(x) 277 | 278 | x = Conv2D(filters * k, (3, 3), padding='same', kernel_initializer='he_normal', 279 | use_bias=False)(x) 280 | 281 | # global context block 282 | x = global_context_block(x) 283 | 284 | m = add([x, init]) 285 | return m 286 | 287 | 288 | def _resnet_bottleneck_block(input, filters, k=1, strides=(1, 1)): 289 | ''' Adds a pre-activation resnet block with bottleneck layers 290 | 291 | Args: 292 | input: input tensor 293 | filters: number of output filters 294 | k: width factor 295 | strides: strides of the convolution layer 296 | 297 | Returns: a keras tensor 298 | ''' 299 | init = input 300 | channel_axis = 1 if K.image_data_format() == "channels_first" else -1 301 | bottleneck_expand = 4 302 | 303 | x = BatchNormalization(axis=channel_axis)(input) 304 | x = Activation('relu')(x) 305 | 306 | if strides != (1, 1) or init._keras_shape[channel_axis] != bottleneck_expand * filters * k: 307 | init = Conv2D(bottleneck_expand * filters * k, (1, 1), padding='same', kernel_initializer='he_normal', 308 | use_bias=False, strides=strides)(x) 309 | 310 | x = Conv2D(filters * k, (1, 1), padding='same', kernel_initializer='he_normal', 311 | use_bias=False)(x) 312 | x = BatchNormalization(axis=channel_axis)(x) 313 | x = Activation('relu')(x) 314 | 315 | x = Conv2D(filters * k, (3, 3), padding='same', kernel_initializer='he_normal', 316 | use_bias=False, strides=strides)(x) 317 | x = BatchNormalization(axis=channel_axis)(x) 318 | x = Activation('relu')(x) 319 | 320 | x = Conv2D(bottleneck_expand * filters * k, (1, 1), padding='same', kernel_initializer='he_normal', 321 | use_bias=False)(x) 322 | 323 | # global context block 324 | x = global_context_block(x) 325 | 326 | m = add([x, init]) 327 | return m 328 | 329 | 330 | def _create_se_resnet(classes, img_input, include_top, initial_conv_filters, filters, 331 | depth, width, bottleneck, weight_decay, pooling): 332 | '''Creates a SE ResNet model with specified parameters 333 | Args: 334 | initial_conv_filters: number of features for the initial convolution 335 | include_top: Flag to include the last dense layer 336 | filters: number of filters per block, defined as a list. 337 | filters = [64, 128, 256, 512 338 | depth: number or layers in the each block, defined as a list. 339 | ResNet-50 = [3, 4, 6, 3] 340 | ResNet-101 = [3, 6, 23, 3] 341 | ResNet-152 = [3, 8, 36, 3] 342 | width: width multiplier for network (for Wide ResNet) 343 | bottleneck: adds a bottleneck conv to reduce computation 344 | weight_decay: weight_decay (l2 norm) 345 | pooling: Optional pooling mode for feature extraction 346 | when `include_top` is `False`. 347 | - `None` means that the output of the model will be 348 | the 4D tensor output of the 349 | last convolutional layer. 350 | - `avg` means that global average pooling 351 | will be applied to the output of the 352 | last convolutional layer, and thus 353 | the output of the model will be a 2D tensor. 354 | - `max` means that global max pooling will 355 | be applied. 356 | Returns: a Keras Model 357 | ''' 358 | channel_axis = 1 if K.image_data_format() == 'channels_first' else -1 359 | N = list(depth) 360 | 361 | # block 1 (initial conv block) 362 | x = Conv2D(initial_conv_filters, (7, 7), padding='same', use_bias=False, strides=(2, 2), 363 | kernel_initializer='he_normal', kernel_regularizer=l2(weight_decay))(img_input) 364 | 365 | x = MaxPooling2D((3, 3), strides=(2, 2), padding='same')(x) 366 | 367 | # block 2 (projection block) 368 | for i in range(N[0]): 369 | if bottleneck: 370 | x = _resnet_bottleneck_block(x, filters[0], width) 371 | else: 372 | x = _resnet_block(x, filters[0], width) 373 | 374 | # block 3 - N 375 | for k in range(1, len(N)): 376 | if bottleneck: 377 | x = _resnet_bottleneck_block(x, filters[k], width, strides=(2, 2)) 378 | else: 379 | x = _resnet_block(x, filters[k], width, strides=(2, 2)) 380 | 381 | for i in range(N[k] - 1): 382 | if bottleneck: 383 | x = _resnet_bottleneck_block(x, filters[k], width) 384 | else: 385 | x = _resnet_block(x, filters[k], width) 386 | 387 | x = BatchNormalization(axis=channel_axis)(x) 388 | x = Activation('relu')(x) 389 | 390 | if include_top: 391 | x = GlobalAveragePooling2D()(x) 392 | x = Dense(classes, use_bias=False, kernel_regularizer=l2(weight_decay), 393 | activation='softmax')(x) 394 | else: 395 | if pooling == 'avg': 396 | x = GlobalAveragePooling2D()(x) 397 | elif pooling == 'max': 398 | x = GlobalMaxPooling2D()(x) 399 | 400 | return x 401 | -------------------------------------------------------------------------------- /group_norm.py: -------------------------------------------------------------------------------- 1 | from keras.engine import Layer, InputSpec 2 | from keras import initializers 3 | from keras import regularizers 4 | from keras import constraints 5 | from keras import backend as K 6 | 7 | from keras.utils.generic_utils import get_custom_objects 8 | 9 | 10 | class GroupNormalization(Layer): 11 | """Group normalization layer 12 | Group Normalization divides the channels into groups and computes within each group 13 | the mean and variance for normalization. GN's computation is independent of batch sizes, 14 | and its accuracy is stable in a wide range of batch sizes 15 | # Arguments 16 | groups: Integer, the number of groups for Group Normalization. 17 | axis: Integer, the axis that should be normalized 18 | (typically the features axis). 19 | For instance, after a `Conv2D` layer with 20 | `data_format="channels_first"`, 21 | set `axis=1` in `BatchNormalization`. 22 | epsilon: Small float added to variance to avoid dividing by zero. 23 | center: If True, add offset of `beta` to normalized tensor. 24 | If False, `beta` is ignored. 25 | scale: If True, multiply by `gamma`. 26 | If False, `gamma` is not used. 27 | When the next layer is linear (also e.g. `nn.relu`), 28 | this can be disabled since the scaling 29 | will be done by the next layer. 30 | beta_initializer: Initializer for the beta weight. 31 | gamma_initializer: Initializer for the gamma weight. 32 | beta_regularizer: Optional regularizer for the beta weight. 33 | gamma_regularizer: Optional regularizer for the gamma weight. 34 | beta_constraint: Optional constraint for the beta weight. 35 | gamma_constraint: Optional constraint for the gamma weight. 36 | # Input shape 37 | Arbitrary. Use the keyword argument `input_shape` 38 | (tuple of integers, does not include the samples axis) 39 | when using this layer as the first layer in a model. 40 | # Output shape 41 | Same shape as input. 42 | # References 43 | - [Group Normalization](https://arxiv.org/abs/1803.08494) 44 | """ 45 | 46 | def __init__(self, 47 | groups=32, 48 | axis=-1, 49 | epsilon=1e-5, 50 | center=True, 51 | scale=True, 52 | beta_initializer='zeros', 53 | gamma_initializer='ones', 54 | beta_regularizer=None, 55 | gamma_regularizer=None, 56 | beta_constraint=None, 57 | gamma_constraint=None, 58 | **kwargs): 59 | super(GroupNormalization, self).__init__(**kwargs) 60 | self.supports_masking = True 61 | self.groups = groups 62 | self.axis = axis 63 | self.epsilon = epsilon 64 | self.center = center 65 | self.scale = scale 66 | self.beta_initializer = initializers.get(beta_initializer) 67 | self.gamma_initializer = initializers.get(gamma_initializer) 68 | self.beta_regularizer = regularizers.get(beta_regularizer) 69 | self.gamma_regularizer = regularizers.get(gamma_regularizer) 70 | self.beta_constraint = constraints.get(beta_constraint) 71 | self.gamma_constraint = constraints.get(gamma_constraint) 72 | 73 | def build(self, input_shape): 74 | dim = input_shape[self.axis] 75 | 76 | if dim is None: 77 | raise ValueError('Axis ' + str(self.axis) + ' of ' 78 | 'input tensor should have a defined dimension ' 79 | 'but the layer received an input with shape ' + 80 | str(input_shape) + '.') 81 | 82 | if dim < self.groups: 83 | raise ValueError('Number of groups (' + str(self.groups) + ') cannot be ' 84 | 'more than the number of channels (' + 85 | str(dim) + ').') 86 | 87 | if dim % self.groups != 0: 88 | raise ValueError('Number of groups (' + str(self.groups) + ') must be a ' 89 | 'multiple of the number of channels (' + 90 | str(dim) + ').') 91 | 92 | self.input_spec = InputSpec(ndim=len(input_shape), 93 | axes={self.axis: dim}) 94 | shape = (dim,) 95 | 96 | if self.scale: 97 | self.gamma = self.add_weight(shape=shape, 98 | name='gamma', 99 | initializer=self.gamma_initializer, 100 | regularizer=self.gamma_regularizer, 101 | constraint=self.gamma_constraint) 102 | else: 103 | self.gamma = None 104 | if self.center: 105 | self.beta = self.add_weight(shape=shape, 106 | name='beta', 107 | initializer=self.beta_initializer, 108 | regularizer=self.beta_regularizer, 109 | constraint=self.beta_constraint) 110 | else: 111 | self.beta = None 112 | self.built = True 113 | 114 | def call(self, inputs, **kwargs): 115 | input_shape = K.int_shape(inputs) 116 | tensor_input_shape = K.shape(inputs) 117 | 118 | # Prepare broadcasting shape. 119 | reduction_axes = list(range(len(input_shape))) 120 | del reduction_axes[self.axis] 121 | broadcast_shape = [1] * len(input_shape) 122 | broadcast_shape[self.axis] = input_shape[self.axis] // self.groups 123 | broadcast_shape.insert(1, self.groups) 124 | 125 | reshape_group_shape = K.shape(inputs) 126 | group_axes = [reshape_group_shape[i] for i in range(len(input_shape))] 127 | group_axes[self.axis] = input_shape[self.axis] // self.groups 128 | group_axes.insert(1, self.groups) 129 | 130 | # reshape inputs to new group shape 131 | group_shape = [group_axes[0], self.groups] + group_axes[2:] 132 | group_shape = K.stack(group_shape) 133 | inputs = K.reshape(inputs, group_shape) 134 | 135 | group_reduction_axes = list(range(len(group_axes))) 136 | group_reduction_axes = group_reduction_axes[2:] 137 | 138 | mean = K.mean(inputs, axis=group_reduction_axes, keepdims=True) 139 | variance = K.var(inputs, axis=group_reduction_axes, keepdims=True) 140 | 141 | inputs = (inputs - mean) / (K.sqrt(variance + self.epsilon)) 142 | 143 | # prepare broadcast shape 144 | inputs = K.reshape(inputs, group_shape) 145 | outputs = inputs 146 | 147 | # In this case we must explicitly broadcast all parameters. 148 | if self.scale: 149 | broadcast_gamma = K.reshape(self.gamma, broadcast_shape) 150 | outputs = outputs * broadcast_gamma 151 | 152 | if self.center: 153 | broadcast_beta = K.reshape(self.beta, broadcast_shape) 154 | outputs = outputs + broadcast_beta 155 | 156 | outputs = K.reshape(outputs, tensor_input_shape) 157 | 158 | return outputs 159 | 160 | def get_config(self): 161 | config = { 162 | 'groups': self.groups, 163 | 'axis': self.axis, 164 | 'epsilon': self.epsilon, 165 | 'center': self.center, 166 | 'scale': self.scale, 167 | 'beta_initializer': initializers.serialize(self.beta_initializer), 168 | 'gamma_initializer': initializers.serialize(self.gamma_initializer), 169 | 'beta_regularizer': regularizers.serialize(self.beta_regularizer), 170 | 'gamma_regularizer': regularizers.serialize(self.gamma_regularizer), 171 | 'beta_constraint': constraints.serialize(self.beta_constraint), 172 | 'gamma_constraint': constraints.serialize(self.gamma_constraint) 173 | } 174 | base_config = super(GroupNormalization, self).get_config() 175 | return dict(list(base_config.items()) + list(config.items())) 176 | 177 | def compute_output_shape(self, input_shape): 178 | return input_shape 179 | 180 | 181 | get_custom_objects().update({'GroupNormalization': GroupNormalization}) 182 | 183 | 184 | if __name__ == '__main__': 185 | from keras.layers import Input 186 | from keras.models import Model 187 | ip = Input(shape=(None, None, 4)) 188 | x = GroupNormalization(groups=2, axis=-1, epsilon=0.1)(ip) 189 | model = Model(ip, x) 190 | model.summary() 191 | -------------------------------------------------------------------------------- /images/gc.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/titu1994/keras-global-context-networks/73f0a08f7c9e3a37e41054c0828e7ac3b87a22af/images/gc.PNG --------------------------------------------------------------------------------