├── LICENSE ├── README.md ├── efficientnetV2.py └── ghost_efficientnetV2.py /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 Zhantao Yang 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # EfficientNetV2_TensorFlow2 2 | a TensorFlow2(keras model) reimplementation of EfficientNetV2\ 3 | This is NOT an official implementation.\ 4 | Now the implementation has been modified to match the # of params and architecture of official repo\ 5 | ~~As official implementation is not yet released by the time I write this code, this model has not been verified and therefore cannot be guarateed to match exactly with official code~~ 6 | 7 | ## efficientnetV2.py 8 | an implementation in TensorFlow2 Keras,\ 9 | ~~currently, only EfficientNetV2-s is included~~\ 10 | EfficientNetV2-s, m, l, xl are now all implemented\ 11 | this implementation is based on description from paper\ 12 | https://arxiv.org/abs/2104.00298 \ 13 | EfficientNetV2: Smaller Models and Faster Training\ 14 | by Mingxing Tan, Quoc V. Le\ 15 | and official repo\ 16 | https://github.com/google/automl/tree/master/efficientnetv2 \ 17 | Codes are partially inspired and adapted from official repo\ 18 | ~~Codes are partially inspired and adapted from TensorFlow.keras.application MobileNet Code~~ 19 | 20 | 21 | ## ghost_efficientnetV2.py 22 | a custom version of EfficientNetV2,\ 23 | replaced most Convolutional layers with Ghost Modules introduced in paper\ 24 | https://arxiv.org/abs/1911.11907 \ 25 | GhostNet: More Features from Cheap Operations\ 26 | by Han et al.\ 27 | Ghost Modules significantly reduces number of parameter in the model 28 | 29 | also, instead of ResNet-C downsampling, this version uses ResNet-D downsampling\ 30 | see paper https://arxiv.org/abs/1812.01187v2 \ 31 | Bag of Tricks for Image Classification with Convolutional Neural Networks\ 32 | by He et al. 33 | 34 | reduction ratio of SE module is also slightly changed 35 | 36 | **with above changes, this custom version has only ~65% of original # of params** \ 37 | **However, notice that this custom version has approximately the same or even longer training time comparing to the original version on GPU,** \ 38 | this is because of the hardware limitation on DepthWiseConv computation.\ 39 | According to the GhostNet paper, mobile devices and other devices with limited resource can benefit from this.\ 40 | And there are also some special cases that ghost modules can perform better than normal convs. 41 | -------------------------------------------------------------------------------- /efficientnetV2.py: -------------------------------------------------------------------------------- 1 | from tensorflow.keras import Model, layers, activations 2 | import tensorflow_addons as tfa 3 | import math 4 | 5 | """ 6 | round_filters and round_repeats are borrowed from official repo 7 | https://github.com/google/automl/tree/master/efficientnetv2 8 | """ 9 | 10 | 11 | def round_filters(filters, multiplier=1.): 12 | divisor = 8 13 | min_depth = 8 14 | filters *= multiplier 15 | min_depth = min_depth or divisor 16 | new_filters = max(min_depth, int(filters + divisor / 2) // divisor * divisor) 17 | return int(new_filters) 18 | 19 | 20 | def round_repeats(repeats, multiplier=1.): 21 | return int(math.ceil(multiplier * repeats)) 22 | 23 | 24 | def squeeze_and_excite(x, in_channels, out_channels, activation, reduction_ratio=4): 25 | x = layers.GlobalAvgPool2D()(x) 26 | x = layers.Dense(in_channels // reduction_ratio)(x) 27 | x = layers.Activation(activation)(x) 28 | x = layers.Dense(out_channels)(x) 29 | x = layers.Activation(activations.sigmoid)(x) 30 | return x 31 | 32 | 33 | def fused_mbconv(x, in_channels, out_channels, kernel_size, activation, stride=1, reduction_ratio=4, 34 | expansion=6, dropout=None, drop_connect=.2): 35 | shortcut = x 36 | expanded = round_filters(in_channels * expansion) 37 | 38 | if expansion != 1: 39 | x = layers.Conv2D(expanded, kernel_size, stride, padding="same", use_bias=False)(x) 40 | x = layers.BatchNormalization(epsilon=1e-5)(x) 41 | x = layers.Activation(activation)(x) 42 | 43 | if (dropout is not None) and (dropout != 0.): 44 | x = layers.Dropout(dropout)(x) 45 | 46 | if reduction_ratio is not None: 47 | se = squeeze_and_excite(x, in_channels, expanded, activation, reduction_ratio) 48 | x = layers.Multiply()([x, se]) 49 | 50 | x = layers.Conv2D(out_channels, (1, 1) if expansion != 1 else kernel_size, 1, padding="same", use_bias=False)(x) 51 | x = layers.BatchNormalization(epsilon=1e-5)(x) 52 | if expansion == 1: 53 | x = layers.Activation(activation)(x) 54 | if (stride == 1) and (in_channels == out_channels): 55 | x = tfa.layers.StochasticDepth(1 - drop_connect)([shortcut, x]) 56 | return x 57 | 58 | 59 | def mbconv(x, in_channels, out_channels, kernel_size, activation, stride=1, 60 | reduction_ratio=4, expansion=6, dropout=None, drop_connect=.2): 61 | shortcut = x 62 | expanded = round_filters(in_channels * expansion) 63 | 64 | if expansion != 1: 65 | x = layers.Conv2D(expanded, (1, 1), 1, padding="same", use_bias=False)(x) 66 | x = layers.BatchNormalization(epsilon=1e-5)(x) 67 | x = layers.Activation(activation)(x) 68 | 69 | x = layers.DepthwiseConv2D(kernel_size=kernel_size, strides=stride, padding="same", use_bias=False)(x) 70 | x = layers.BatchNormalization(epsilon=1e-5)(x) 71 | x = layers.Activation(activation)(x) 72 | 73 | if (expansion != 1) and (dropout is not None) and (dropout != 0.): 74 | x = layers.Dropout(dropout)(x) 75 | 76 | if reduction_ratio is not None: 77 | se = squeeze_and_excite(x, in_channels, expanded, activation, reduction_ratio) 78 | x = layers.Multiply()([x, se]) 79 | 80 | x = layers.Conv2D(out_channels, (1, 1), 1, padding="same", use_bias=False)(x) 81 | x = layers.BatchNormalization(epsilon=1e-5)(x) 82 | if (stride == 1) and (in_channels == out_channels): 83 | x = tfa.layers.StochasticDepth(1 - drop_connect)([shortcut, x]) 84 | return x 85 | 86 | 87 | def repeat(x, count, in_channels, out_channels, kernel_size, activation, 88 | stride=1, reduction_ratio=None, expansion=6, fused=False, dropout=None, drop_connect=.2): 89 | for i in range(count): 90 | if fused: 91 | x = fused_mbconv(x, in_channels, out_channels, kernel_size, 92 | activation, stride, reduction_ratio, expansion, dropout, drop_connect) 93 | else: 94 | x = mbconv(x, in_channels, out_channels, kernel_size, activation, stride, 95 | reduction_ratio, expansion, dropout, drop_connect) 96 | return x 97 | 98 | 99 | def stage(x, count, in_channels, out_channels, kernel_size, activation, 100 | stride=1, reduction_ratio=None, expansion=6, fused=False, dropout=None, drop_connect=.2): 101 | x = repeat(x, count=1, in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, 102 | activation=activation, stride=stride, reduction_ratio=reduction_ratio, 103 | expansion=expansion, fused=fused, dropout=dropout, drop_connect=drop_connect) 104 | x = repeat(x, count=count - 1, in_channels=out_channels, out_channels=out_channels, kernel_size=kernel_size, 105 | activation=activation, stride=1, reduction_ratio=reduction_ratio, 106 | expansion=expansion, fused=fused, dropout=dropout, drop_connect=drop_connect) 107 | return x 108 | 109 | 110 | def base(cfg, num_classes=1000, input_tensor=None, activation=activations.swish, 111 | width_mult=1., depth_mult=1., conv_dropout_rate=None, dropout_rate=None, drop_connect=.2): 112 | """ 113 | EfficientNet-V2-s, re-implementation according to 114 | https://arxiv.org/abs/2104.00298 115 | and official code 116 | https://github.com/google/automl/tree/master/efficientnetv2 117 | EfficientNetV2: Smaller Models and Faster Training 118 | by Mingxing Tan, Quoc V. Le 119 | 120 | :param cfg: configuration of stages 121 | :param num_classes: number of classes to output 122 | :param input_tensor: given a tensor as input, if provided, in_shape will be ignored 123 | :param activation: activation to use across hidden layers 124 | :param width_mult: width factor, default to 1.0 125 | :param depth_mult: depth multiplier, default to 1.0 126 | :param conv_dropout_rate: probability to drop after each MBConv/stage, 0 or None means no dropout will be applied 127 | :param dropout_rate: probability to drop after GlobalAveragePooling, 0 or None means no dropout will be applied 128 | :param drop_connect: probability to drop spatially in skip connections, 0 or None means no dropout will be applied 129 | :return: a tf.keras model 130 | """ 131 | inp = input_tensor 132 | # stage 0 133 | x = layers.Conv2D(cfg[0][4], kernel_size=(3, 3), strides=2, padding="same", use_bias=False)(inp) 134 | x = layers.BatchNormalization(epsilon=1e-5)(x) 135 | x = layers.Activation(activation)(x) 136 | 137 | for stage_cfg in cfg: 138 | x = stage(x, count=round_repeats(stage_cfg[0], depth_mult), 139 | in_channels=round_filters(stage_cfg[4], width_mult), 140 | out_channels=round_filters(stage_cfg[5], width_mult), 141 | kernel_size=stage_cfg[1], activation=activation, stride=stage_cfg[2], 142 | reduction_ratio=stage_cfg[7], expansion=stage_cfg[3], fused=stage_cfg[6] == 1, 143 | dropout=conv_dropout_rate, drop_connect=drop_connect) 144 | 145 | # final stage 146 | x = layers.Conv2D(round_filters(1280, width_mult), (1, 1), strides=1, padding="same", use_bias=False)(x) 147 | x = layers.BatchNormalization(epsilon=1e-5)(x) 148 | x = layers.Activation(activation)(x) 149 | 150 | x = layers.GlobalAvgPool2D()(x) 151 | if (dropout_rate is not None) and (dropout_rate != 0): 152 | x = layers.Dropout(dropout_rate)(x) 153 | x = layers.Dense(num_classes)(x) 154 | x = layers.Activation(activations.softmax)(x) 155 | 156 | return Model(inp, x) 157 | 158 | 159 | def s(in_shape=(224, 224, 3), num_classes=1000, input_tensor=None, activation=activations.swish, 160 | width_mult=1., depth_mult=1., conv_dropout_rate=None, dropout_rate=None, drop_connect=.2): 161 | """ 162 | EfficientNet-V2-s, re-implementation according to 163 | https://arxiv.org/abs/2104.00298 164 | and official code 165 | https://github.com/google/automl/tree/master/efficientnetv2 166 | EfficientNetV2: Smaller Models and Faster Training 167 | by Mingxing Tan, Quoc V. Le 168 | 169 | :param in_shape: input shape of the model, in form of (H, W, C) 170 | :param num_classes: number of classes to output 171 | :param input_tensor: given a tensor as input, if provided, in_shape will be ignored 172 | :param activation: activation to use across hidden layers 173 | :param width_mult: width factor, default to 1.0 174 | :param depth_mult: depth multiplier, default to 1.0 175 | :param conv_dropout_rate: probability to drop after each MBConv/stage, 0 or None means no dropout will be applied 176 | :param dropout_rate: probability to drop after GlobalAveragePooling, 0 or None means no dropout will be applied 177 | :param drop_connect: probability to drop spatially in skip connections, 0 or None means no dropout will be applied 178 | :return: a tf.keras model 179 | """ 180 | 181 | # each row is a stage 182 | # count, kernel size, stride, expansion ratio, in channel, out channel, is fused(1 if true), reduction ratio(None if no se) 183 | cfg = [ 184 | [2, 3, 1, 1, 24, 24, 1, None], 185 | [4, 3, 2, 4, 24, 48, 1, None], 186 | [4, 3, 2, 4, 48, 64, 1, None], 187 | [6, 3, 2, 4, 64, 128, 0, 4], 188 | [9, 3, 1, 6, 128, 160, 0, 4], 189 | [15, 3, 2, 6, 160, 256, 0, 4], 190 | ] 191 | input_tensor = layers.Input(in_shape) if input_tensor is None else input_tensor 192 | return base(cfg=cfg, num_classes=num_classes, input_tensor=input_tensor, activation=activation, 193 | width_mult=width_mult, depth_mult=depth_mult, conv_dropout_rate=conv_dropout_rate, 194 | dropout_rate=dropout_rate, drop_connect=drop_connect) 195 | 196 | 197 | def m(in_shape=(224, 224, 3), num_classes=1000, input_tensor=None, activation=activations.swish, 198 | width_mult=1.0, depth_mult=1., conv_dropout_rate=None, dropout_rate=None, drop_connect=.2): 199 | """ 200 | EfficientNet-V2-m, re-implementation according to 201 | https://arxiv.org/abs/2104.00298 202 | and official code 203 | https://github.com/google/automl/tree/master/efficientnetv2 204 | EfficientNetV2: Smaller Models and Faster Training 205 | by Mingxing Tan, Quoc V. Le 206 | 207 | :param in_shape: input shape of the model, in form of (H, W, C) 208 | :param num_classes: number of classes to output 209 | :param input_tensor: given a tensor as input, if provided, in_shape will be ignored 210 | :param activation: activation to use across hidden layers 211 | :param width_mult: width factor, default to 1.0 212 | :param depth_mult: depth multiplier, default to 1.0 213 | :param conv_dropout_rate: probability to drop after each MBConv/stage, 0 or None means no dropout will be applied 214 | :param dropout_rate: probability to drop after GlobalAveragePooling, 0 or None means no dropout will be applied 215 | :param drop_connect: probability to drop spatially in skip connections, 0 or None means no dropout will be applied 216 | :return: a tf.keras model 217 | """ 218 | 219 | # each row is a stage 220 | # count, kernel size, stride, expansion ratio, in channel, out channel, is fused(1 if true), reduction ratio(None if no se) 221 | cfg = [ 222 | [3, 3, 1, 1, 24, 24, 1, None], 223 | [5, 3, 2, 4, 24, 48, 1, None], 224 | [5, 3, 2, 4, 48, 80, 1, None], 225 | [7, 3, 2, 4, 80, 160, 0, 4], 226 | [14, 3, 1, 6, 160, 176, 0, 4], 227 | [18, 3, 2, 6, 176, 304, 0, 4], 228 | [5, 3, 1, 6, 304, 512, 0, 4], 229 | ] 230 | input_tensor = layers.Input(in_shape) if input_tensor is None else input_tensor 231 | return base(cfg=cfg, num_classes=num_classes, input_tensor=input_tensor, activation=activation, 232 | width_mult=width_mult, depth_mult=depth_mult, conv_dropout_rate=conv_dropout_rate, 233 | dropout_rate=dropout_rate, drop_connect=drop_connect) 234 | 235 | 236 | def l(in_shape=(224, 224, 3), num_classes=1000, input_tensor=None, activation=activations.swish, 237 | width_mult=1.0, depth_mult=1., conv_dropout_rate=None, dropout_rate=None, drop_connect=.2): 238 | """ 239 | EfficientNet-V2-l, re-implementation according to 240 | https://arxiv.org/abs/2104.00298 241 | and official code 242 | https://github.com/google/automl/tree/master/efficientnetv2 243 | EfficientNetV2: Smaller Models and Faster Training 244 | by Mingxing Tan, Quoc V. Le 245 | 246 | :param in_shape: input shape of the model, in form of (H, W, C) 247 | :param num_classes: number of classes to output 248 | :param input_tensor: given a tensor as input, if provided, in_shape will be ignored 249 | :param activation: activation to use across hidden layers 250 | :param width_mult: width factor, default to 1.0 251 | :param depth_mult: depth multiplier, default to 1.0 252 | :param conv_dropout_rate: probability to drop after each MBConv/stage, 0 or None means no dropout will be applied 253 | :param dropout_rate: probability to drop after GlobalAveragePooling, 0 or None means no dropout will be applied 254 | :param drop_connect: probability to drop spatially in skip connections, 0 or None means no dropout will be applied 255 | :return: a tf.keras model 256 | """ 257 | 258 | # each row is a stage 259 | # count, kernel size, stride, expansion ratio, in channel, out channel, is fused(1 if true), reduction ratio(None if no se) 260 | 261 | cfg = [ 262 | [4, 3, 1, 1, 32, 32, 1, None], 263 | [7, 3, 2, 4, 32, 64, 1, None], 264 | [7, 3, 2, 4, 64, 96, 1, None], 265 | [10, 3, 2, 4, 96, 192, 0, 4], 266 | [19, 3, 1, 6, 192, 224, 0, 4], 267 | [25, 3, 2, 6, 224, 384, 0, 4], 268 | [7, 3, 1, 6, 384, 640, 0, 4], 269 | ] 270 | input_tensor = layers.Input(in_shape) if input_tensor is None else input_tensor 271 | return base(cfg=cfg, num_classes=num_classes, input_tensor=input_tensor, activation=activation, 272 | width_mult=width_mult, depth_mult=depth_mult, conv_dropout_rate=conv_dropout_rate, 273 | dropout_rate=dropout_rate, drop_connect=drop_connect) 274 | 275 | 276 | def xl(in_shape=(224, 224, 3), num_classes=1000, input_tensor=None, activation=activations.swish, 277 | width_mult=1.0, depth_mult=1., conv_dropout_rate=None, dropout_rate=None, drop_connect=.2): 278 | """ 279 | EfficientNet-V2-xl, re-implementation according to 280 | https://arxiv.org/abs/2104.00298 281 | and official code 282 | https://github.com/google/automl/tree/master/efficientnetv2 283 | EfficientNetV2: Smaller Models and Faster Training 284 | by Mingxing Tan, Quoc V. Le 285 | 286 | :param in_shape: input shape of the model, in form of (H, W, C) 287 | :param num_classes: number of classes to output 288 | :param input_tensor: given a tensor as input, if provided, in_shape will be ignored 289 | :param activation: activation to use across hidden layers 290 | :param width_mult: width factor, default to 1.0 291 | :param depth_mult: depth multiplier, default to 1.0 292 | :param conv_dropout_rate: probability to drop after each MBConv/stage, 0 or None means no dropout will be applied 293 | :param dropout_rate: probability to drop after GlobalAveragePooling, 0 or None means no dropout will be applied 294 | :param drop_connect: probability to drop spatially in skip connections, 0 or None means no dropout will be applied 295 | :return: a tf.keras model 296 | """ 297 | 298 | cfg = [ 299 | [4, 3, 1, 1, 32, 32, 1, None], 300 | [8, 3, 2, 4, 32, 64, 1, None], 301 | [8, 3, 2, 4, 64, 96, 1, None], 302 | [16, 3, 2, 4, 96, 192, 0, 4], 303 | [24, 3, 1, 6, 192, 256, 0, 4], 304 | [32, 3, 2, 6, 256, 512, 0, 4], 305 | [8, 3, 1, 6, 512, 640, 0, 4], 306 | ] 307 | input_tensor = layers.Input(in_shape) if input_tensor is None else input_tensor 308 | return base(cfg=cfg, num_classes=num_classes, input_tensor=input_tensor, activation=activation, 309 | width_mult=width_mult, depth_mult=depth_mult, conv_dropout_rate=conv_dropout_rate, 310 | dropout_rate=dropout_rate, drop_connect=drop_connect) 311 | 312 | 313 | def main(): 314 | model = xl((224, 224, 3), 1000) 315 | model.summary() 316 | 317 | 318 | if __name__ == '__main__': 319 | main() 320 | -------------------------------------------------------------------------------- /ghost_efficientnetV2.py: -------------------------------------------------------------------------------- 1 | from tensorflow.keras import Model, layers, activations 2 | import tensorflow_addons as tfa 3 | import math 4 | 5 | """ 6 | round_filters and round_repeats are borrowed from official repo 7 | https://github.com/google/automl/tree/master/efficientnetv2 8 | """ 9 | 10 | 11 | def round_filters(filters, multiplier=1.): 12 | divisor = 8 13 | min_depth = 8 14 | filters *= multiplier 15 | min_depth = min_depth or divisor 16 | new_filters = max(min_depth, int(filters + divisor / 2) // divisor * divisor) 17 | return int(new_filters) 18 | 19 | 20 | def round_repeats(repeats, multiplier=1.): 21 | return int(math.ceil(multiplier * repeats)) 22 | 23 | 24 | def squeeze_and_excite(x, in_channels, out_channels, activation, reduction_ratio=4): 25 | x = layers.GlobalAvgPool2D()(x) 26 | x = layers.Dense(in_channels // reduction_ratio)(x) 27 | x = layers.Activation(activation)(x) 28 | x = layers.Dense(out_channels)(x) 29 | x = layers.Activation(activations.sigmoid)(x) 30 | return x 31 | 32 | def ghost_conv(x, out_channels, kernel_size, stride, kernel_regularizer=None): 33 | x1 = layers.Conv2D(out_channels // 2, kernel_size=kernel_size, strides=stride, padding="same", 34 | use_bias=False, kernel_regularizer=kernel_regularizer)(x) 35 | x2 = layers.BatchNormalization(epsilon=1e-5)(x1) 36 | x2 = layers.Activation(activations.elu)(x2) 37 | x2 = layers.DepthwiseConv2D(kernel_size=(3, 3), strides=1, padding="same", 38 | use_bias=False, kernel_regularizer=kernel_regularizer)(x2) 39 | return layers.Concatenate()([x1, x2]) 40 | 41 | def fused_mbconv(x, in_channels, out_channels, kernel_size, activation, stride=1, reduction_ratio=4, 42 | expansion=6, dropout=None, drop_connect=.2): 43 | shortcut = x 44 | expanded = round_filters(in_channels * expansion) 45 | 46 | if stride == 2: 47 | shortcut = layers.AveragePooling2D()(shortcut) 48 | if in_channels != out_channels: 49 | shortcut = ghost_conv(shortcut, out_channels, (1, 1), 1) 50 | 51 | if expansion != 1: 52 | x = ghost_conv(x, expanded, kernel_size, stride) 53 | x = layers.BatchNormalization(epsilon=1e-5)(x) 54 | x = layers.Activation(activation)(x) 55 | 56 | if (dropout is not None) and (dropout != 0.): 57 | x = layers.Dropout(dropout)(x) 58 | 59 | if reduction_ratio is not None: 60 | se = squeeze_and_excite(x, in_channels, expanded, activation, reduction_ratio) 61 | x = layers.Multiply()([x, se]) 62 | 63 | x = ghost_conv(x, out_channels, (1, 1) if expansion != 1 else kernel_size, 1) 64 | x = layers.BatchNormalization(epsilon=1e-5)(x) 65 | 66 | x = tfa.layers.StochasticDepth()([shortcut, x]) 67 | return x 68 | 69 | 70 | def mbconv(x, in_channels, out_channels, kernel_size, activation, stride=1, 71 | reduction_ratio=4, expansion=6, dropout=None, drop_connect=.2): 72 | shortcut = x 73 | expanded = round_filters(in_channels * expansion) 74 | 75 | if stride == 2: 76 | shortcut = layers.AveragePooling2D()(shortcut) 77 | if in_channels != out_channels: 78 | shortcut = ghost_conv(shortcut, out_channels, (1, 1), 1) 79 | 80 | if expansion != 1: 81 | x = ghost_conv(x, expanded, (1, 1), 1) 82 | x = layers.BatchNormalization(epsilon=1e-5)(x) 83 | x = layers.Activation(activation)(x) 84 | 85 | x = layers.DepthwiseConv2D(kernel_size=kernel_size, strides=stride, padding="same", use_bias=False)(x) 86 | x = layers.BatchNormalization(epsilon=1e-5)(x) 87 | x = layers.Activation(activation)(x) 88 | 89 | if (expansion != 1) and (dropout is not None) and (dropout != 0.): 90 | x = layers.Dropout(dropout)(x) 91 | 92 | if reduction_ratio is not None: 93 | se = squeeze_and_excite(x, in_channels, expanded, activation, reduction_ratio) 94 | x = layers.Multiply()([x, se]) 95 | 96 | x = ghost_conv(x, out_channels, (1, 1), 1) 97 | x = layers.BatchNormalization(epsilon=1e-5)(x) 98 | x = tfa.layers.StochasticDepth()([shortcut, x]) 99 | return x 100 | 101 | 102 | def repeat(x, count, in_channels, out_channels, kernel_size, activation, 103 | stride=1, reduction_ratio=None, expansion=6, fused=False, dropout=None, drop_connect=.2): 104 | for i in range(count): 105 | if fused: 106 | x = fused_mbconv(x, in_channels, out_channels, kernel_size, 107 | activation, stride, reduction_ratio, expansion, dropout, drop_connect) 108 | else: 109 | x = mbconv(x, in_channels, out_channels, kernel_size, activation, stride, 110 | reduction_ratio, expansion, dropout, drop_connect) 111 | return x 112 | 113 | 114 | def stage(x, count, in_channels, out_channels, kernel_size, activation, 115 | stride=1, reduction_ratio=None, expansion=6, fused=False, dropout=None, drop_connect=.2): 116 | x = repeat(x, count=1, in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, 117 | activation=activation, stride=stride, reduction_ratio=reduction_ratio, 118 | expansion=expansion, fused=fused, dropout=dropout, drop_connect=drop_connect) 119 | x = repeat(x, count=count - 1, in_channels=out_channels, out_channels=out_channels, kernel_size=kernel_size, 120 | activation=activation, stride=1, reduction_ratio=reduction_ratio, 121 | expansion=expansion, fused=fused, dropout=dropout, drop_connect=drop_connect) 122 | return x 123 | 124 | 125 | def base(cfg, num_classes=1000, input_tensor=None, activation=activations.swish, 126 | width_mult=1., depth_mult=1., conv_dropout_rate=None, dropout_rate=None, drop_connect=.2): 127 | """ 128 | EfficientNet-V2-s, re-implementation according to 129 | https://arxiv.org/abs/2104.00298 130 | and official code 131 | https://github.com/google/automl/tree/master/efficientnetv2 132 | EfficientNetV2: Smaller Models and Faster Training 133 | by Mingxing Tan, Quoc V. Le 134 | 135 | :param cfg: configuration of stages 136 | :param num_classes: number of classes to output 137 | :param input_tensor: given a tensor as input, if provided, in_shape will be ignored 138 | :param activation: activation to use across hidden layers 139 | :param width_mult: width factor, default to 1.0 140 | :param depth_mult: depth multiplier, default to 1.0 141 | :param conv_dropout_rate: probability to drop after each MBConv/stage, 0 or None means no dropout will be applied 142 | :param dropout_rate: probability to drop after GlobalAveragePooling, 0 or None means no dropout will be applied 143 | :param drop_connect: probability to drop spatially in skip connections, 0 or None means no dropout will be applied 144 | :return: a tf.keras model 145 | """ 146 | inp = input_tensor 147 | # stage 0 148 | x = layers.Conv2D(cfg[0][4], kernel_size=(3, 3), strides=2, padding="same", use_bias=False)(inp) 149 | x = layers.BatchNormalization(epsilon=1e-5)(x) 150 | x = layers.Activation(activation)(x) 151 | 152 | for stage_cfg in cfg: 153 | x = stage(x, count=round_repeats(stage_cfg[0], depth_mult), 154 | in_channels=round_filters(stage_cfg[4], width_mult), 155 | out_channels=round_filters(stage_cfg[5], width_mult), 156 | kernel_size=stage_cfg[1], activation=activation, stride=stage_cfg[2], 157 | reduction_ratio=stage_cfg[7], expansion=stage_cfg[3], fused=stage_cfg[6] == 1, 158 | dropout=conv_dropout_rate, drop_connect=drop_connect) 159 | 160 | # final stage 161 | x = layers.Conv2D(round_filters(1280, width_mult), (1, 1), strides=1, padding="same", use_bias=False)(x) 162 | x = layers.BatchNormalization(epsilon=1e-5)(x) 163 | x = layers.Activation(activation)(x) 164 | 165 | x = layers.GlobalAvgPool2D()(x) 166 | if (dropout_rate is not None) and (dropout_rate != 0): 167 | x = layers.Dropout(dropout_rate)(x) 168 | x = layers.Dense(num_classes)(x) 169 | x = layers.Activation(activations.softmax)(x) 170 | 171 | return Model(inp, x) 172 | 173 | 174 | def s(in_shape=(224, 224, 3), num_classes=1000, input_tensor=None, activation=activations.swish, 175 | width_mult=1., depth_mult=1., conv_dropout_rate=None, dropout_rate=None, drop_connect=.2): 176 | """ 177 | EfficientNet-V2-s, re-implementation according to 178 | https://arxiv.org/abs/2104.00298 179 | and official code 180 | https://github.com/google/automl/tree/master/efficientnetv2 181 | EfficientNetV2: Smaller Models and Faster Training 182 | by Mingxing Tan, Quoc V. Le 183 | 184 | :param in_shape: input shape of the model, in form of (H, W, C) 185 | :param num_classes: number of classes to output 186 | :param input_tensor: given a tensor as input, if provided, in_shape will be ignored 187 | :param activation: activation to use across hidden layers 188 | :param width_mult: width factor, default to 1.0 189 | :param depth_mult: depth multiplier, default to 1.0 190 | :param conv_dropout_rate: probability to drop after each MBConv/stage, 0 or None means no dropout will be applied 191 | :param dropout_rate: probability to drop after GlobalAveragePooling, 0 or None means no dropout will be applied 192 | :param drop_connect: probability to drop spatially in skip connections, 0 or None means no dropout will be applied 193 | :return: a tf.keras model 194 | """ 195 | 196 | # each row is a stage 197 | # count, kernel size, stride, expansion ratio, in channel, out channel, is fused(1 if true), reduction ratio(None if no se) 198 | cfg = [ 199 | [2, 3, 1, 1, 24, 24, 1, None], 200 | [4, 3, 2, 4, 24, 48, 1, None], 201 | [4, 3, 2, 4, 48, 64, 1, None], 202 | [6, 3, 2, 4, 64, 128, 0, 4], 203 | [9, 3, 1, 6, 128, 160, 0, 4], 204 | [15, 3, 2, 6, 160, 256, 0, 4], 205 | ] 206 | input_tensor = layers.Input(in_shape) if input_tensor is None else input_tensor 207 | return base(cfg=cfg, num_classes=num_classes, input_tensor=input_tensor, activation=activation, 208 | width_mult=width_mult, depth_mult=depth_mult, conv_dropout_rate=conv_dropout_rate, 209 | dropout_rate=dropout_rate, drop_connect=drop_connect) 210 | 211 | 212 | def m(in_shape=(224, 224, 3), num_classes=1000, input_tensor=None, activation=activations.swish, 213 | width_mult=1.0, depth_mult=1., conv_dropout_rate=None, dropout_rate=None, drop_connect=.2): 214 | """ 215 | EfficientNet-V2-m, re-implementation according to 216 | https://arxiv.org/abs/2104.00298 217 | and official code 218 | https://github.com/google/automl/tree/master/efficientnetv2 219 | EfficientNetV2: Smaller Models and Faster Training 220 | by Mingxing Tan, Quoc V. Le 221 | 222 | :param in_shape: input shape of the model, in form of (H, W, C) 223 | :param num_classes: number of classes to output 224 | :param input_tensor: given a tensor as input, if provided, in_shape will be ignored 225 | :param activation: activation to use across hidden layers 226 | :param width_mult: width factor, default to 1.0 227 | :param depth_mult: depth multiplier, default to 1.0 228 | :param conv_dropout_rate: probability to drop after each MBConv/stage, 0 or None means no dropout will be applied 229 | :param dropout_rate: probability to drop after GlobalAveragePooling, 0 or None means no dropout will be applied 230 | :param drop_connect: probability to drop spatially in skip connections, 0 or None means no dropout will be applied 231 | :return: a tf.keras model 232 | """ 233 | 234 | # each row is a stage 235 | # count, kernel size, stride, expansion ratio, in channel, out channel, is fused(1 if true), reduction ratio(None if no se) 236 | cfg = [ 237 | [3, 3, 1, 1, 24, 24, 1, None], 238 | [5, 3, 2, 4, 24, 48, 1, None], 239 | [5, 3, 2, 4, 48, 80, 1, None], 240 | [7, 3, 2, 4, 80, 160, 0, 4], 241 | [14, 3, 1, 6, 160, 176, 0, 4], 242 | [18, 3, 2, 6, 176, 304, 0, 4], 243 | [5, 3, 1, 6, 304, 512, 0, 4], 244 | ] 245 | input_tensor = layers.Input(in_shape) if input_tensor is None else input_tensor 246 | return base(cfg=cfg, num_classes=num_classes, input_tensor=input_tensor, activation=activation, 247 | width_mult=width_mult, depth_mult=depth_mult, conv_dropout_rate=conv_dropout_rate, 248 | dropout_rate=dropout_rate, drop_connect=drop_connect) 249 | 250 | 251 | def l(in_shape=(224, 224, 3), num_classes=1000, input_tensor=None, activation=activations.swish, 252 | width_mult=1.0, depth_mult=1., conv_dropout_rate=None, dropout_rate=None, drop_connect=.2): 253 | """ 254 | EfficientNet-V2-l, re-implementation according to 255 | https://arxiv.org/abs/2104.00298 256 | and official code 257 | https://github.com/google/automl/tree/master/efficientnetv2 258 | EfficientNetV2: Smaller Models and Faster Training 259 | by Mingxing Tan, Quoc V. Le 260 | 261 | :param in_shape: input shape of the model, in form of (H, W, C) 262 | :param num_classes: number of classes to output 263 | :param input_tensor: given a tensor as input, if provided, in_shape will be ignored 264 | :param activation: activation to use across hidden layers 265 | :param width_mult: width factor, default to 1.0 266 | :param depth_mult: depth multiplier, default to 1.0 267 | :param conv_dropout_rate: probability to drop after each MBConv/stage, 0 or None means no dropout will be applied 268 | :param dropout_rate: probability to drop after GlobalAveragePooling, 0 or None means no dropout will be applied 269 | :param drop_connect: probability to drop spatially in skip connections, 0 or None means no dropout will be applied 270 | :return: a tf.keras model 271 | """ 272 | 273 | # each row is a stage 274 | # count, kernel size, stride, expansion ratio, in channel, out channel, is fused(1 if true), reduction ratio(None if no se) 275 | 276 | cfg = [ 277 | [4, 3, 1, 1, 32, 32, 1, None], 278 | [7, 3, 2, 4, 32, 64, 1, None], 279 | [7, 3, 2, 4, 64, 96, 1, None], 280 | [10, 3, 2, 4, 96, 192, 0, 4], 281 | [19, 3, 1, 6, 192, 224, 0, 4], 282 | [25, 3, 2, 6, 224, 384, 0, 4], 283 | [7, 3, 1, 6, 384, 640, 0, 4], 284 | ] 285 | input_tensor = layers.Input(in_shape) if input_tensor is None else input_tensor 286 | return base(cfg=cfg, num_classes=num_classes, input_tensor=input_tensor, activation=activation, 287 | width_mult=width_mult, depth_mult=depth_mult, conv_dropout_rate=conv_dropout_rate, 288 | dropout_rate=dropout_rate, drop_connect=drop_connect) 289 | 290 | 291 | def xl(in_shape=(224, 224, 3), num_classes=1000, input_tensor=None, activation=activations.swish, 292 | width_mult=1.0, depth_mult=1., conv_dropout_rate=None, dropout_rate=None, drop_connect=.2): 293 | """ 294 | EfficientNet-V2-xl, re-implementation according to 295 | https://arxiv.org/abs/2104.00298 296 | and official code 297 | https://github.com/google/automl/tree/master/efficientnetv2 298 | EfficientNetV2: Smaller Models and Faster Training 299 | by Mingxing Tan, Quoc V. Le 300 | 301 | :param in_shape: input shape of the model, in form of (H, W, C) 302 | :param num_classes: number of classes to output 303 | :param input_tensor: given a tensor as input, if provided, in_shape will be ignored 304 | :param activation: activation to use across hidden layers 305 | :param width_mult: width factor, default to 1.0 306 | :param depth_mult: depth multiplier, default to 1.0 307 | :param conv_dropout_rate: probability to drop after each MBConv/stage, 0 or None means no dropout will be applied 308 | :param dropout_rate: probability to drop after GlobalAveragePooling, 0 or None means no dropout will be applied 309 | :param drop_connect: probability to drop spatially in skip connections, 0 or None means no dropout will be applied 310 | :return: a tf.keras model 311 | """ 312 | 313 | cfg = [ 314 | [4, 3, 1, 1, 32, 32, 1, None], 315 | [8, 3, 2, 4, 32, 64, 1, None], 316 | [8, 3, 2, 4, 64, 96, 1, None], 317 | [16, 3, 2, 4, 96, 192, 0, 4], 318 | [24, 3, 1, 6, 192, 256, 0, 4], 319 | [32, 3, 2, 6, 256, 512, 0, 4], 320 | [8, 3, 1, 6, 512, 640, 0, 4], 321 | ] 322 | input_tensor = layers.Input(in_shape) if input_tensor is None else input_tensor 323 | return base(cfg=cfg, num_classes=num_classes, input_tensor=input_tensor, activation=activation, 324 | width_mult=width_mult, depth_mult=depth_mult, conv_dropout_rate=conv_dropout_rate, 325 | dropout_rate=dropout_rate, drop_connect=drop_connect) 326 | 327 | 328 | def main(): 329 | model = s((224, 224, 3), 1000) 330 | model.summary() 331 | 332 | 333 | if __name__ == '__main__': 334 | main() 335 | --------------------------------------------------------------------------------