├── .gitignore
├── LICENSE
├── README.md
└── efficientnet.py


/.gitignore:
--------------------------------------------------------------------------------
1 | .idea
2 | __pycache__
3 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2019 calmisential
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # EfficientNet_TensorFlow2
2 | A tensorflow2 implementation of EfficientNet.
3 | 
4 | See https://github.com/calmisential/Basic_CNNs_TensorFlow2 for training details.
5 | 
6 | ## References:
7 | 1. The original paper: [EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks](https://arxiv.org/abs/1905.11946)
8 | 2. The official code: https://github.com/tensorflow/tpu/tree/master/models/official/efficientnet


--------------------------------------------------------------------------------
/efficientnet.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | import math
  3 | 
  4 | NUM_CLASSES = 10
  5 | 
  6 | 
  7 | def swish(x):
  8 |     return x * tf.nn.sigmoid(x)
  9 | 
 10 | 
 11 | def round_filters(filters, multiplier):
 12 |     depth_divisor = 8
 13 |     min_depth = None
 14 |     min_depth = min_depth or depth_divisor
 15 |     filters = filters * multiplier
 16 |     new_filters = max(min_depth, int(filters + depth_divisor / 2) // depth_divisor * depth_divisor)
 17 |     if new_filters < 0.9 * filters:
 18 |         new_filters += depth_divisor
 19 |     return int(new_filters)
 20 | 
 21 | 
 22 | def round_repeats(repeats, multiplier):
 23 |     if not multiplier:
 24 |         return repeats
 25 |     return int(math.ceil(multiplier * repeats))
 26 | 
 27 | 
 28 | class SEBlock(tf.keras.layers.Layer):
 29 |     def __init__(self, input_channels, ratio=0.25):
 30 |         super(SEBlock, self).__init__()
 31 |         self.num_reduced_filters = max(1, int(input_channels * ratio))
 32 |         self.pool = tf.keras.layers.GlobalAveragePooling2D()
 33 |         self.reduce_conv = tf.keras.layers.Conv2D(filters=self.num_reduced_filters,
 34 |                                                   kernel_size=(1, 1),
 35 |                                                   strides=1,
 36 |                                                   padding="same")
 37 |         self.expand_conv = tf.keras.layers.Conv2D(filters=input_channels,
 38 |                                                   kernel_size=(1, 1),
 39 |                                                   strides=1,
 40 |                                                   padding="same")
 41 | 
 42 |     def call(self, inputs, **kwargs):
 43 |         branch = self.pool(inputs)
 44 |         branch = tf.expand_dims(input=branch, axis=1)
 45 |         branch = tf.expand_dims(input=branch, axis=1)
 46 |         branch = self.reduce_conv(branch)
 47 |         branch = swish(branch)
 48 |         branch = self.expand_conv(branch)
 49 |         branch = tf.nn.sigmoid(branch)
 50 |         output = inputs * branch
 51 |         return output
 52 | 
 53 | 
 54 | class MBConv(tf.keras.layers.Layer):
 55 |     def __init__(self, in_channels, out_channels, expansion_factor, stride, k, drop_connect_rate):
 56 |         super(MBConv, self).__init__()
 57 |         self.in_channels = in_channels
 58 |         self.out_channels = out_channels
 59 |         self.stride = stride
 60 |         self.drop_connect_rate = drop_connect_rate
 61 |         self.conv1 = tf.keras.layers.Conv2D(filters=in_channels * expansion_factor,
 62 |                                             kernel_size=(1, 1),
 63 |                                             strides=1,
 64 |                                             padding="same")
 65 |         self.bn1 = tf.keras.layers.BatchNormalization()
 66 |         self.dwconv = tf.keras.layers.DepthwiseConv2D(kernel_size=(k, k),
 67 |                                                       strides=stride,
 68 |                                                       padding="same")
 69 |         self.bn2 = tf.keras.layers.BatchNormalization()
 70 |         self.se = SEBlock(input_channels=in_channels * expansion_factor)
 71 |         self.conv2 = tf.keras.layers.Conv2D(filters=out_channels,
 72 |                                             kernel_size=(1, 1),
 73 |                                             strides=1,
 74 |                                             padding="same")
 75 |         self.bn3 = tf.keras.layers.BatchNormalization()
 76 |         self.dropout = tf.keras.layers.Dropout(rate=drop_connect_rate)
 77 | 
 78 |     def call(self, inputs, training=None, **kwargs):
 79 |         x = self.conv1(inputs)
 80 |         x = self.bn1(x, training=training)
 81 |         x = swish(x)
 82 |         x = self.dwconv(x)
 83 |         x = self.bn2(x, training=training)
 84 |         x = self.se(x)
 85 |         x = swish(x)
 86 |         x = self.conv2(x)
 87 |         x = self.bn3(x, training=training)
 88 |         if self.stride == 1 and self.in_channels == self.out_channels:
 89 |             if self.drop_connect_rate:
 90 |                 x = self.dropout(x, training=training)
 91 |             x = tf.keras.layers.add([x, inputs])
 92 |         return x
 93 | 
 94 | 
 95 | def build_mbconv_block(in_channels, out_channels, layers, stride, expansion_factor, k, drop_connect_rate):
 96 |     block = tf.keras.Sequential()
 97 |     for i in range(layers):
 98 |         if i == 0:
 99 |             block.add(MBConv(in_channels=in_channels,
100 |                              out_channels=out_channels,
101 |                              expansion_factor=expansion_factor,
102 |                              stride=stride,
103 |                              k=k,
104 |                              drop_connect_rate=drop_connect_rate))
105 |         else:
106 |             block.add(MBConv(in_channels=out_channels,
107 |                              out_channels=out_channels,
108 |                              expansion_factor=expansion_factor,
109 |                              stride=1,
110 |                              k=k,
111 |                              drop_connect_rate=drop_connect_rate))
112 |     return block
113 | 
114 | 
115 | class EfficientNet(tf.keras.Model):
116 |     def __init__(self, width_coefficient, depth_coefficient, dropout_rate, drop_connect_rate=0.2):
117 |         super(EfficientNet, self).__init__()
118 | 
119 |         self.conv1 = tf.keras.layers.Conv2D(filters=round_filters(32, width_coefficient),
120 |                                             kernel_size=(3, 3),
121 |                                             strides=2,
122 |                                             padding="same")
123 |         self.bn1 = tf.keras.layers.BatchNormalization()
124 |         self.block1 = build_mbconv_block(in_channels=round_filters(32, width_coefficient),
125 |                                          out_channels=round_filters(16, width_coefficient),
126 |                                          layers=round_repeats(1, depth_coefficient),
127 |                                          stride=1,
128 |                                          expansion_factor=1, k=3, drop_connect_rate=drop_connect_rate)
129 |         self.block2 = build_mbconv_block(in_channels=round_filters(16, width_coefficient),
130 |                                          out_channels=round_filters(24, width_coefficient),
131 |                                          layers=round_repeats(2, depth_coefficient),
132 |                                          stride=2,
133 |                                          expansion_factor=6, k=3, drop_connect_rate=drop_connect_rate)
134 |         self.block3 = build_mbconv_block(in_channels=round_filters(24, width_coefficient),
135 |                                          out_channels=round_filters(40, width_coefficient),
136 |                                          layers=round_repeats(2, depth_coefficient),
137 |                                          stride=2,
138 |                                          expansion_factor=6, k=5, drop_connect_rate=drop_connect_rate)
139 |         self.block4 = build_mbconv_block(in_channels=round_filters(40, width_coefficient),
140 |                                          out_channels=round_filters(80, width_coefficient),
141 |                                          layers=round_repeats(3, depth_coefficient),
142 |                                          stride=2,
143 |                                          expansion_factor=6, k=3, drop_connect_rate=drop_connect_rate)
144 |         self.block5 = build_mbconv_block(in_channels=round_filters(80, width_coefficient),
145 |                                          out_channels=round_filters(112, width_coefficient),
146 |                                          layers=round_repeats(3, depth_coefficient),
147 |                                          stride=1,
148 |                                          expansion_factor=6, k=5, drop_connect_rate=drop_connect_rate)
149 |         self.block6 = build_mbconv_block(in_channels=round_filters(112, width_coefficient),
150 |                                          out_channels=round_filters(192, width_coefficient),
151 |                                          layers=round_repeats(4, depth_coefficient),
152 |                                          stride=2,
153 |                                          expansion_factor=6, k=5, drop_connect_rate=drop_connect_rate)
154 |         self.block7 = build_mbconv_block(in_channels=round_filters(192, width_coefficient),
155 |                                          out_channels=round_filters(320, width_coefficient),
156 |                                          layers=round_repeats(1, depth_coefficient),
157 |                                          stride=1,
158 |                                          expansion_factor=6, k=3, drop_connect_rate=drop_connect_rate)
159 | 
160 |         self.conv2 = tf.keras.layers.Conv2D(filters=round_filters(1280, width_coefficient),
161 |                                             kernel_size=(1, 1),
162 |                                             strides=1,
163 |                                             padding="same")
164 |         self.bn2 = tf.keras.layers.BatchNormalization()
165 |         self.pool = tf.keras.layers.GlobalAveragePooling2D()
166 |         self.dropout = tf.keras.layers.Dropout(rate=dropout_rate)
167 |         self.fc = tf.keras.layers.Dense(units=NUM_CLASSES,
168 |                                         activation=tf.keras.activations.softmax)
169 | 
170 |     def call(self, inputs, training=None, mask=None):
171 |         x = self.conv1(inputs)
172 |         x = self.bn1(x, training=training)
173 |         x = swish(x)
174 | 
175 |         x = self.block1(x)
176 |         x = self.block2(x)
177 |         x = self.block3(x)
178 |         x = self.block4(x)
179 |         x = self.block5(x)
180 |         x = self.block6(x)
181 |         x = self.block7(x)
182 | 
183 |         x = self.conv2(x)
184 |         x = self.bn2(x, training=training)
185 |         x = swish(x)
186 |         x = self.pool(x)
187 |         x = self.dropout(x, training=training)
188 |         x = self.fc(x)
189 | 
190 |         return x
191 | 
192 | 
193 | def get_efficient_net(width_coefficient, depth_coefficient, resolution, dropout_rate):
194 |     net = EfficientNet(width_coefficient=width_coefficient,
195 |                        depth_coefficient=depth_coefficient,
196 |                        dropout_rate=dropout_rate)
197 |     net.build(input_shape=(None, resolution, resolution, 3))
198 |     net.summary()
199 | 
200 |     return net
201 | 
202 | 
203 | def efficient_net_b0():
204 |     return get_efficient_net(1.0, 1.0, 224, 0.2)
205 | 
206 | 
207 | def efficient_net_b1():
208 |     return get_efficient_net(1.0, 1.1, 240, 0.2)
209 | 
210 | 
211 | def efficient_net_b2():
212 |     return get_efficient_net(1.1, 1.2, 260, 0.3)
213 | 
214 | 
215 | def efficient_net_b3():
216 |     return get_efficient_net(1.2, 1.4, 300, 0.3)
217 | 
218 | 
219 | def efficient_net_b4():
220 |     return get_efficient_net(1.4, 1.8, 380, 0.4)
221 | 
222 | 
223 | def efficient_net_b5():
224 |     return get_efficient_net(1.6, 2.2, 456, 0.4)
225 | 
226 | 
227 | def efficient_net_b6():
228 |     return get_efficient_net(1.8, 2.6, 528, 0.5)
229 | 
230 | 
231 | def efficient_net_b7():
232 |     return get_efficient_net(2.0, 3.1, 600, 0.5)
233 | 
234 | 


--------------------------------------------------------------------------------