├── .gitignore ├── LICENSE ├── README.md └── efficientnet.py /.gitignore: -------------------------------------------------------------------------------- 1 | .idea 2 | __pycache__ 3 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 calmisential 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # EfficientNet_TensorFlow2 2 | A tensorflow2 implementation of EfficientNet. 3 | 4 | See https://github.com/calmisential/Basic_CNNs_TensorFlow2 for training details. 5 | 6 | ## References: 7 | 1. The original paper: [EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks](https://arxiv.org/abs/1905.11946) 8 | 2. The official code: https://github.com/tensorflow/tpu/tree/master/models/official/efficientnet -------------------------------------------------------------------------------- /efficientnet.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import math 3 | 4 | NUM_CLASSES = 10 5 | 6 | 7 | def swish(x): 8 | return x * tf.nn.sigmoid(x) 9 | 10 | 11 | def round_filters(filters, multiplier): 12 | depth_divisor = 8 13 | min_depth = None 14 | min_depth = min_depth or depth_divisor 15 | filters = filters * multiplier 16 | new_filters = max(min_depth, int(filters + depth_divisor / 2) // depth_divisor * depth_divisor) 17 | if new_filters < 0.9 * filters: 18 | new_filters += depth_divisor 19 | return int(new_filters) 20 | 21 | 22 | def round_repeats(repeats, multiplier): 23 | if not multiplier: 24 | return repeats 25 | return int(math.ceil(multiplier * repeats)) 26 | 27 | 28 | class SEBlock(tf.keras.layers.Layer): 29 | def __init__(self, input_channels, ratio=0.25): 30 | super(SEBlock, self).__init__() 31 | self.num_reduced_filters = max(1, int(input_channels * ratio)) 32 | self.pool = tf.keras.layers.GlobalAveragePooling2D() 33 | self.reduce_conv = tf.keras.layers.Conv2D(filters=self.num_reduced_filters, 34 | kernel_size=(1, 1), 35 | strides=1, 36 | padding="same") 37 | self.expand_conv = tf.keras.layers.Conv2D(filters=input_channels, 38 | kernel_size=(1, 1), 39 | strides=1, 40 | padding="same") 41 | 42 | def call(self, inputs, **kwargs): 43 | branch = self.pool(inputs) 44 | branch = tf.expand_dims(input=branch, axis=1) 45 | branch = tf.expand_dims(input=branch, axis=1) 46 | branch = self.reduce_conv(branch) 47 | branch = swish(branch) 48 | branch = self.expand_conv(branch) 49 | branch = tf.nn.sigmoid(branch) 50 | output = inputs * branch 51 | return output 52 | 53 | 54 | class MBConv(tf.keras.layers.Layer): 55 | def __init__(self, in_channels, out_channels, expansion_factor, stride, k, drop_connect_rate): 56 | super(MBConv, self).__init__() 57 | self.in_channels = in_channels 58 | self.out_channels = out_channels 59 | self.stride = stride 60 | self.drop_connect_rate = drop_connect_rate 61 | self.conv1 = tf.keras.layers.Conv2D(filters=in_channels * expansion_factor, 62 | kernel_size=(1, 1), 63 | strides=1, 64 | padding="same") 65 | self.bn1 = tf.keras.layers.BatchNormalization() 66 | self.dwconv = tf.keras.layers.DepthwiseConv2D(kernel_size=(k, k), 67 | strides=stride, 68 | padding="same") 69 | self.bn2 = tf.keras.layers.BatchNormalization() 70 | self.se = SEBlock(input_channels=in_channels * expansion_factor) 71 | self.conv2 = tf.keras.layers.Conv2D(filters=out_channels, 72 | kernel_size=(1, 1), 73 | strides=1, 74 | padding="same") 75 | self.bn3 = tf.keras.layers.BatchNormalization() 76 | self.dropout = tf.keras.layers.Dropout(rate=drop_connect_rate) 77 | 78 | def call(self, inputs, training=None, **kwargs): 79 | x = self.conv1(inputs) 80 | x = self.bn1(x, training=training) 81 | x = swish(x) 82 | x = self.dwconv(x) 83 | x = self.bn2(x, training=training) 84 | x = self.se(x) 85 | x = swish(x) 86 | x = self.conv2(x) 87 | x = self.bn3(x, training=training) 88 | if self.stride == 1 and self.in_channels == self.out_channels: 89 | if self.drop_connect_rate: 90 | x = self.dropout(x, training=training) 91 | x = tf.keras.layers.add([x, inputs]) 92 | return x 93 | 94 | 95 | def build_mbconv_block(in_channels, out_channels, layers, stride, expansion_factor, k, drop_connect_rate): 96 | block = tf.keras.Sequential() 97 | for i in range(layers): 98 | if i == 0: 99 | block.add(MBConv(in_channels=in_channels, 100 | out_channels=out_channels, 101 | expansion_factor=expansion_factor, 102 | stride=stride, 103 | k=k, 104 | drop_connect_rate=drop_connect_rate)) 105 | else: 106 | block.add(MBConv(in_channels=out_channels, 107 | out_channels=out_channels, 108 | expansion_factor=expansion_factor, 109 | stride=1, 110 | k=k, 111 | drop_connect_rate=drop_connect_rate)) 112 | return block 113 | 114 | 115 | class EfficientNet(tf.keras.Model): 116 | def __init__(self, width_coefficient, depth_coefficient, dropout_rate, drop_connect_rate=0.2): 117 | super(EfficientNet, self).__init__() 118 | 119 | self.conv1 = tf.keras.layers.Conv2D(filters=round_filters(32, width_coefficient), 120 | kernel_size=(3, 3), 121 | strides=2, 122 | padding="same") 123 | self.bn1 = tf.keras.layers.BatchNormalization() 124 | self.block1 = build_mbconv_block(in_channels=round_filters(32, width_coefficient), 125 | out_channels=round_filters(16, width_coefficient), 126 | layers=round_repeats(1, depth_coefficient), 127 | stride=1, 128 | expansion_factor=1, k=3, drop_connect_rate=drop_connect_rate) 129 | self.block2 = build_mbconv_block(in_channels=round_filters(16, width_coefficient), 130 | out_channels=round_filters(24, width_coefficient), 131 | layers=round_repeats(2, depth_coefficient), 132 | stride=2, 133 | expansion_factor=6, k=3, drop_connect_rate=drop_connect_rate) 134 | self.block3 = build_mbconv_block(in_channels=round_filters(24, width_coefficient), 135 | out_channels=round_filters(40, width_coefficient), 136 | layers=round_repeats(2, depth_coefficient), 137 | stride=2, 138 | expansion_factor=6, k=5, drop_connect_rate=drop_connect_rate) 139 | self.block4 = build_mbconv_block(in_channels=round_filters(40, width_coefficient), 140 | out_channels=round_filters(80, width_coefficient), 141 | layers=round_repeats(3, depth_coefficient), 142 | stride=2, 143 | expansion_factor=6, k=3, drop_connect_rate=drop_connect_rate) 144 | self.block5 = build_mbconv_block(in_channels=round_filters(80, width_coefficient), 145 | out_channels=round_filters(112, width_coefficient), 146 | layers=round_repeats(3, depth_coefficient), 147 | stride=1, 148 | expansion_factor=6, k=5, drop_connect_rate=drop_connect_rate) 149 | self.block6 = build_mbconv_block(in_channels=round_filters(112, width_coefficient), 150 | out_channels=round_filters(192, width_coefficient), 151 | layers=round_repeats(4, depth_coefficient), 152 | stride=2, 153 | expansion_factor=6, k=5, drop_connect_rate=drop_connect_rate) 154 | self.block7 = build_mbconv_block(in_channels=round_filters(192, width_coefficient), 155 | out_channels=round_filters(320, width_coefficient), 156 | layers=round_repeats(1, depth_coefficient), 157 | stride=1, 158 | expansion_factor=6, k=3, drop_connect_rate=drop_connect_rate) 159 | 160 | self.conv2 = tf.keras.layers.Conv2D(filters=round_filters(1280, width_coefficient), 161 | kernel_size=(1, 1), 162 | strides=1, 163 | padding="same") 164 | self.bn2 = tf.keras.layers.BatchNormalization() 165 | self.pool = tf.keras.layers.GlobalAveragePooling2D() 166 | self.dropout = tf.keras.layers.Dropout(rate=dropout_rate) 167 | self.fc = tf.keras.layers.Dense(units=NUM_CLASSES, 168 | activation=tf.keras.activations.softmax) 169 | 170 | def call(self, inputs, training=None, mask=None): 171 | x = self.conv1(inputs) 172 | x = self.bn1(x, training=training) 173 | x = swish(x) 174 | 175 | x = self.block1(x) 176 | x = self.block2(x) 177 | x = self.block3(x) 178 | x = self.block4(x) 179 | x = self.block5(x) 180 | x = self.block6(x) 181 | x = self.block7(x) 182 | 183 | x = self.conv2(x) 184 | x = self.bn2(x, training=training) 185 | x = swish(x) 186 | x = self.pool(x) 187 | x = self.dropout(x, training=training) 188 | x = self.fc(x) 189 | 190 | return x 191 | 192 | 193 | def get_efficient_net(width_coefficient, depth_coefficient, resolution, dropout_rate): 194 | net = EfficientNet(width_coefficient=width_coefficient, 195 | depth_coefficient=depth_coefficient, 196 | dropout_rate=dropout_rate) 197 | net.build(input_shape=(None, resolution, resolution, 3)) 198 | net.summary() 199 | 200 | return net 201 | 202 | 203 | def efficient_net_b0(): 204 | return get_efficient_net(1.0, 1.0, 224, 0.2) 205 | 206 | 207 | def efficient_net_b1(): 208 | return get_efficient_net(1.0, 1.1, 240, 0.2) 209 | 210 | 211 | def efficient_net_b2(): 212 | return get_efficient_net(1.1, 1.2, 260, 0.3) 213 | 214 | 215 | def efficient_net_b3(): 216 | return get_efficient_net(1.2, 1.4, 300, 0.3) 217 | 218 | 219 | def efficient_net_b4(): 220 | return get_efficient_net(1.4, 1.8, 380, 0.4) 221 | 222 | 223 | def efficient_net_b5(): 224 | return get_efficient_net(1.6, 2.2, 456, 0.4) 225 | 226 | 227 | def efficient_net_b6(): 228 | return get_efficient_net(1.8, 2.6, 528, 0.5) 229 | 230 | 231 | def efficient_net_b7(): 232 | return get_efficient_net(2.0, 3.1, 600, 0.5) 233 | 234 | --------------------------------------------------------------------------------