├── .gitignore ├── README.md ├── convert_tflite.py ├── MnasNet_models.py └── MnasNet.py /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | .ipynb_checkpoints 3 | __pycache__ 4 | mnasnet-a1* -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # MnasNet Tensorflow 2 Implementation 2 | Mingxing Tan, Bo Chen, Ruoming Pang, Vijay Vasudevan, Mark Sandler, Andrew Howard, Quoc V. Le. **MnasNet: Platform-Aware Neural Architecture Search for Mobile**. CVPR 2019. 3 | Arxiv link: https://arxiv.org/abs/1807.11626 4 | 5 | ## Usage 6 | Available implementations: **a1, b1, small, d1, d1_320** 7 | ```python 8 | from MnasNet_models import Build_MnasNet 9 | 10 | # Standard model 11 | model = Build_MnasNet('a1') 12 | 13 | 14 | # Change default parameters: 15 | model = Build_MnasNet('a1', dict(input_shape=(128, 128, 3), dropout_rate=0.5)) 16 | ``` 17 | 18 | 19 | ## Pretrained models 20 | | Model | Dataset | Input Size | Depth Multiplier | Top-1 Accuracy | Top-5 Accuracy | Pixel 1 latency (ms) | DownLoad Link | 21 | | :---- | ------- | ---------- | ---------------- | -------------- | -------------- | -------------------- | ------------- | 22 | | MnasNet-A1 | ImageNet | 224*224 | 1.0 | 75.2 | 95.2 | 78ms | [Google Drive](https://drive.google.com/file/d/1tGHQC8vwrCKsMTKVTJMK-7uElYgLeR20/view?usp=sharing) 23 | 24 | ## Reference 25 | [MnasNet - Official implementation for Cloud TPU](https://github.com/tensorflow/tpu/tree/master/models/official/mnasnet) 26 | -------------------------------------------------------------------------------- /convert_tflite.py: -------------------------------------------------------------------------------- 1 | from MnasNet_models import Build_MnasNet 2 | import numpy as np 3 | import tensorflow as tf 4 | import argparse 5 | 6 | def model_compare(tf_model, tflite_model): 7 | # Load TFLite model and allocate tensors. 8 | interpreter = tf.lite.Interpreter(model_content=tflite_model) 9 | interpreter.allocate_tensors() 10 | 11 | # Get input and output tensors. 12 | input_details = interpreter.get_input_details() 13 | output_details = interpreter.get_output_details() 14 | 15 | # Test the TensorFlow Lite model on random input data. 16 | input_shape = input_details[0]['shape'] 17 | input_data = np.array(np.random.random_sample(input_shape), dtype=np.float32) 18 | interpreter.set_tensor(input_details[0]['index'], input_data) 19 | 20 | interpreter.invoke() 21 | 22 | # The function `get_tensor()` returns a copy of the tensor data. 23 | # Use `tensor()` in order to get a pointer to the tensor. 24 | tflite_results = interpreter.get_tensor(output_details[0]['index']) 25 | 26 | # Test the TensorFlow model on random input data. 27 | tf_results = tf_model(tf.constant(input_data)) 28 | 29 | # Compare the result. 30 | for tf_result, tflite_result in zip(tf_results, tflite_results): 31 | np.testing.assert_almost_equal(tf_result, tflite_result, decimal=5) 32 | 33 | 34 | 35 | 36 | if __name__ == '__main__': 37 | parser = argparse.ArgumentParser(description='Convert TF2 model to TFlite.') 38 | parser.add_argument('-t', '--type', choices=['savedmodel', 'tf_keras'], 39 | help='type of input model', required=True) 40 | parser.add_argument('-i', '--model-path', help='path to the model (depending on the specified type)', required=True) 41 | parser.add_argument('-o', '--output-path', help='path to output tflite file', required=True) 42 | args = parser.parse_args() 43 | 44 | # Load model 45 | if args.type == 'savedmodel': 46 | converter = tf.lite.TFLiteConverter.from_saved_model(args.model_path) 47 | else: 48 | model = tf.keras.models.load_model(args.model_path) 49 | converter = tf.lite.TFLiteConverter.from_keras_model(model) 50 | 51 | output = args.output_path 52 | if output.endswith('.tflite'): 53 | output = output[:-7] 54 | 55 | # Convert the model. 56 | tflite_model = converter.convert() 57 | open(output + ".tflite", "wb").write(tflite_model) 58 | 59 | converter.optimizations = [tf.lite.Optimize.OPTIMIZE_FOR_SIZE] 60 | tflite_model = converter.convert() 61 | open(output + "_quantized.tflite", "wb").write(tflite_model) 62 | 63 | -------------------------------------------------------------------------------- /MnasNet_models.py: -------------------------------------------------------------------------------- 1 | """Predefined MnasNet models.""" 2 | 3 | 4 | import tensorflow as tf 5 | import re 6 | from collections import namedtuple 7 | from MnasNet import MnasNetModel 8 | 9 | 10 | BlockArgs = namedtuple( 11 | "BlockArgs", 12 | [ 13 | "kernel_size", 14 | "num_repeat", 15 | "input_filters", 16 | "output_filters", 17 | "expand_ratio", 18 | "id_skip", 19 | "strides", 20 | "se_ratio", 21 | ], 22 | defaults=(None,) * 8, 23 | ) 24 | 25 | 26 | GlobalParams = namedtuple( 27 | "GlobalParams", 28 | [ 29 | "batch_norm_momentum", 30 | "batch_norm_epsilon", 31 | "dropout_rate", 32 | "data_format", 33 | "input_shape", 34 | "num_classes", 35 | "depth_multiplier", 36 | "depth_divisor", 37 | "min_depth", 38 | "stem_size", 39 | "normalize_input", 40 | ], 41 | defaults=(None,) * 11, 42 | ) 43 | 44 | 45 | 46 | def decode_block_string(block_string): 47 | """Gets a MNasNet block through a string notation of arguments. 48 | E.g. r2_k3_s2_e1_i32_o16_se0.25_noskip: r - number of repeat blocks, 49 | k - kernel size, s - strides (1-9), e - expansion ratio, i - input filters, 50 | o - output filters, se - squeeze/excitation ratio 51 | Args: 52 | block_string: a string, a string representation of block arguments. 53 | Returns: 54 | A BlockArgs instance. 55 | Raises: 56 | ValueError: if the strides option is not correctly specified. 57 | """ 58 | 59 | assert isinstance(block_string, str) 60 | ops = block_string.split("_") 61 | options = {} 62 | for op in ops: 63 | splits = re.split(r"(\d.*)", op) 64 | if len(splits) >= 2: 65 | (key, value) = splits[:2] 66 | options[key] = value 67 | 68 | if "s" not in options or len(options["s"]) != 2: 69 | raise ValueError("Strides options should be a pair of integers.") 70 | 71 | return BlockArgs( 72 | kernel_size=int(options["k"]), 73 | num_repeat=int(options["r"]), 74 | input_filters=int(options["i"]), 75 | output_filters=int(options["o"]), 76 | expand_ratio=int(options["e"]), 77 | id_skip="noskip" not in block_string, 78 | se_ratio=(float(options["se"]) if "se" in options else None), 79 | strides=[int(options["s"][0]), int(options["s"][1])], 80 | ) 81 | 82 | 83 | 84 | def Build_MnasNet(model_name, override_params=None): 85 | global_params = GlobalParams( 86 | batch_norm_momentum=0.99, 87 | batch_norm_epsilon=1e-3, 88 | dropout_rate=0.2, 89 | data_format="channels_last", 90 | num_classes=1000, 91 | depth_multiplier=None, 92 | input_shape=(224, 224, 3), 93 | depth_divisor=8, 94 | min_depth=None, 95 | stem_size=32, 96 | normalize_input=True, 97 | ) 98 | 99 | if model_name == "b1": 100 | blocks_args = [ 101 | "r1_k3_s11_e1_i32_o16_noskip", 102 | "r3_k3_s22_e3_i16_o24", 103 | "r3_k5_s22_e3_i24_o40", 104 | "r3_k5_s22_e6_i40_o80", 105 | "r2_k3_s11_e6_i80_o96", 106 | "r4_k5_s22_e6_i96_o192", 107 | "r1_k3_s11_e6_i192_o320_noskip", 108 | ] 109 | 110 | elif model_name == "a1": 111 | blocks_args = [ 112 | "r1_k3_s11_e1_i32_o16_noskip", 113 | "r2_k3_s22_e6_i16_o24", 114 | "r3_k5_s22_e3_i24_o40_se0.25", 115 | "r4_k3_s22_e6_i40_o80", 116 | "r2_k3_s11_e6_i80_o112_se0.25", 117 | "r3_k5_s22_e6_i112_o160_se0.25", 118 | "r1_k3_s11_e6_i160_o320", 119 | ] 120 | 121 | elif model_name == "small": 122 | blocks_args = [ 123 | "r1_k3_s11_e1_i16_o8", 124 | "r1_k3_s22_e3_i8_o16", 125 | "r2_k3_s22_e6_i16_o16", 126 | "r4_k5_s22_e6_i16_o32_se0.25", 127 | "r3_k3_s11_e6_i32_o32_se0.25", 128 | "r3_k5_s22_e6_i32_o88_se0.25", 129 | "r1_k3_s11_e6_i88_o144", 130 | ] 131 | global_params = global_params._replace(dropout_rate=0.0, stem_size=8) 132 | 133 | elif model_name == "d1": 134 | blocks_args = [ 135 | "r1_k3_s11_e9_i32_o24", 136 | "r3_k3_s22_e9_i24_o36", 137 | "r5_k3_s22_e9_i36_o48", 138 | "r4_k5_s22_e9_i48_o96", 139 | "r5_k7_s11_e3_i96_o96", 140 | "r3_k3_s22_e9_i96_o80", 141 | "r1_k7_s11_e6_i80_o320_noskip", 142 | ] 143 | 144 | elif model_name == "d1_320": 145 | blocks_args = [ 146 | "r3_k5_s11_e6_i32_o24", 147 | "r4_k7_s22_e9_i24_o36", 148 | "r5_k5_s22_e9_i36_o48", 149 | "r5_k7_s22_e6_i48_o96", 150 | "r5_k3_s11_e9_i96_o144", 151 | "r5_k5_s22_e6_i144_o160", 152 | "r1_k7_s11_e9_i160_o320", 153 | ] 154 | 155 | else: 156 | raise NotImplementedError("model name is not pre-defined: %s" % model_name) 157 | 158 | if override_params: 159 | global_params = global_params._replace(**override_params) 160 | 161 | decoded_strings = [decode_block_string(s) for s in blocks_args] 162 | model = MnasNetModel(decoded_strings, global_params) 163 | return model -------------------------------------------------------------------------------- /MnasNet.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # -*- coding: utf-8 -*- 3 | 4 | import numpy as np 5 | import tensorflow as tf 6 | 7 | 8 | # The input tensor is in the range of [0, 255], we need to scale them to the 9 | # range of [0, 1] 10 | MEAN_RGB = [0.485 * 255, 0.456 * 255, 0.406 * 255] 11 | STDDEV_RGB = [0.229 * 255, 0.224 * 255, 0.225 * 255] 12 | 13 | 14 | def conv_kernel_initializer(shape, dtype=None): 15 | """Initialization for convolutional kernels. 16 | The main difference with tf.variance_scaling_initializer is that 17 | tf.variance_scaling_initializer uses a truncated normal with an uncorrected 18 | standard deviation, whereas here we use a normal distribution. Similarly, 19 | tf.contrib.layers.variance_scaling_initializer uses a truncated normal with 20 | a corrected standard deviation. 21 | Args: 22 | shape: shape of variable 23 | dtype: dtype of variable 24 | partition_info: unused 25 | Returns: 26 | an initialization for the variable 27 | """ 28 | 29 | kernel_height, kernel_width, _, out_filters = shape 30 | fan_out = int(kernel_height * kernel_width * out_filters) 31 | return tf.random.normal(shape, mean=0.0, stddev=np.sqrt(2.0 / fan_out), dtype=dtype) 32 | 33 | 34 | def dense_kernel_initializer(shape, dtype=None): 35 | """Initialization for dense kernels. 36 | This initialization is equal to 37 | tf.variance_scaling_initializer(scale=1.0/3.0, mode='fan_out', 38 | distribution='uniform'). 39 | It is written out explicitly here for clarity. 40 | Args: 41 | shape: shape of variable 42 | dtype: dtype of variable 43 | partition_info: unused 44 | Returns: 45 | an initialization for the variable 46 | """ 47 | 48 | init_range = 1.0 / np.sqrt(shape[1]) 49 | return tf.random.uniform(shape, -init_range, init_range, dtype=dtype) 50 | 51 | 52 | def round_filters(filters, global_params): 53 | """Round number of filters based on depth multiplier.""" 54 | 55 | multiplier = global_params.depth_multiplier 56 | divisor = global_params.depth_divisor 57 | min_depth = global_params.min_depth 58 | if not multiplier: 59 | return filters 60 | 61 | filters *= multiplier 62 | min_depth = min_depth or divisor 63 | new_filters = max(min_depth, int(filters + divisor / 2) // divisor * divisor) 64 | 65 | # Make sure that round down does not go down by more than 10%. 66 | if new_filters < 0.9 * filters: 67 | new_filters += divisor 68 | return new_filters 69 | 70 | 71 | def MnasBlock(input_tensor, block_args, global_params, name): 72 | 73 | batch_norm_momentum = global_params.batch_norm_momentum 74 | batch_norm_epsilon = global_params.batch_norm_epsilon 75 | data_format = global_params.data_format 76 | 77 | if data_format == "channels_first": 78 | channel_axis = 1 79 | spatial_dims = [2, 3] 80 | else: 81 | channel_axis = -1 82 | spatial_dims = [1, 2] 83 | has_se = block_args.se_ratio is not None and ( 84 | block_args.se_ratio > 0 and block_args.se_ratio <= 1 85 | ) 86 | 87 | x = input_tensor 88 | filters = block_args.input_filters * block_args.expand_ratio 89 | if block_args.expand_ratio != 1: 90 | 91 | # Expansion phase: 92 | x = tf.keras.layers.Conv2D( 93 | filters=filters, 94 | kernel_size=[1, 1], 95 | strides=[1, 1], 96 | kernel_initializer=conv_kernel_initializer, 97 | padding="same", 98 | use_bias=False, 99 | data_format=data_format, 100 | name=name + "_expand_conv", 101 | )(input_tensor) 102 | x = tf.keras.layers.BatchNormalization( 103 | axis=channel_axis, 104 | momentum=batch_norm_momentum, 105 | epsilon=batch_norm_epsilon, 106 | fused=True, 107 | name=name + "_expand_conv_BN", 108 | )(x) 109 | x = tf.keras.layers.ReLU()(x) 110 | 111 | kernel_size = block_args.kernel_size 112 | 113 | # Depth-wise convolution phase: 114 | x = tf.keras.layers.DepthwiseConv2D( 115 | kernel_size=[kernel_size, kernel_size], 116 | strides=block_args.strides, 117 | depthwise_initializer=conv_kernel_initializer, 118 | padding="same", 119 | use_bias=False, 120 | data_format=data_format, 121 | name=name + "_depthwise_conv", 122 | )(x) 123 | x = tf.keras.layers.BatchNormalization( 124 | axis=channel_axis, 125 | momentum=batch_norm_momentum, 126 | epsilon=batch_norm_epsilon, 127 | fused=True, 128 | name=name + "_depthwise_conv_BN", 129 | )(x) 130 | x = tf.keras.layers.ReLU()(x) 131 | 132 | if has_se: 133 | num_reduced_filters = max( 134 | 1, int(block_args.input_filters * block_args.se_ratio) 135 | ) 136 | 137 | # Squeeze and Excitation layer. 138 | se_tensor = tf.reduce_mean(x, spatial_dims, keepdims=True) 139 | se_tensor = tf.keras.layers.Conv2D( 140 | filters=num_reduced_filters, 141 | kernel_size=[1, 1], 142 | strides=[1, 1], 143 | kernel_initializer=conv_kernel_initializer, 144 | padding="same", 145 | use_bias=True, 146 | data_format=data_format, 147 | name=name + "_se_reduce_conv", 148 | )(se_tensor) 149 | se_tensor = tf.keras.layers.ReLU()(se_tensor) 150 | se_tensor = tf.keras.layers.Conv2D( 151 | filters=filters, 152 | kernel_size=[1, 1], 153 | strides=[1, 1], 154 | kernel_initializer=conv_kernel_initializer, 155 | padding="same", 156 | use_bias=True, 157 | data_format=data_format, 158 | name=name + "_se_expand_conv", 159 | )(se_tensor) 160 | x = tf.sigmoid(se_tensor) * x 161 | 162 | # Output phase: 163 | filters = block_args.output_filters 164 | x = tf.keras.layers.Conv2D( 165 | filters=filters, 166 | kernel_size=[1, 1], 167 | strides=[1, 1], 168 | kernel_initializer=conv_kernel_initializer, 169 | padding="same", 170 | use_bias=False, 171 | data_format=data_format, 172 | name=name + "_project_conv", 173 | )(x) 174 | x = tf.keras.layers.BatchNormalization( 175 | axis=channel_axis, 176 | momentum=batch_norm_momentum, 177 | epsilon=batch_norm_epsilon, 178 | fused=True, 179 | name=name + "_project_conv_BN", 180 | )(x) 181 | 182 | if block_args.id_skip: 183 | if ( 184 | all(s == 1 for s in block_args.strides) 185 | and block_args.input_filters == block_args.output_filters 186 | ): 187 | x = tf.keras.layers.add([x, input_tensor], name=name + "_add") 188 | return x 189 | 190 | 191 | def MnasNetModel(blocks_args, global_params): 192 | 193 | batch_norm_momentum = global_params.batch_norm_momentum 194 | batch_norm_epsilon = global_params.batch_norm_epsilon 195 | channel_axis = 1 if global_params.data_format == "channels_first" else -1 196 | stem_size = global_params.stem_size 197 | data_format = global_params.data_format 198 | 199 | if data_format == "channels_first": 200 | stats_shape = [3, 1, 1] 201 | else: 202 | stats_shape = [1, 1, 3] 203 | 204 | # Process input 205 | input_tensor = tf.keras.layers.Input( 206 | shape=global_params.input_shape, name="float_image_input" 207 | ) 208 | # Normalize the image to zero mean and unit variance. 209 | x = input_tensor 210 | if global_params.normalize_input: 211 | x -= tf.constant(MEAN_RGB, shape=stats_shape) 212 | x /= tf.constant(STDDEV_RGB, shape=stats_shape) 213 | 214 | # Stem part. 215 | x = tf.keras.layers.Conv2D( 216 | filters=round_filters(stem_size, global_params), 217 | kernel_size=[3, 3], 218 | strides=[2, 2], 219 | kernel_initializer=conv_kernel_initializer, 220 | padding="same", 221 | use_bias=False, 222 | data_format=data_format, 223 | name="stem_conv", 224 | )(x) 225 | x = tf.keras.layers.BatchNormalization( 226 | axis=channel_axis, 227 | momentum=batch_norm_momentum, 228 | epsilon=batch_norm_epsilon, 229 | fused=True, 230 | name="stem_conv_BN", 231 | )(x) 232 | x = tf.keras.layers.ReLU()(x) 233 | 234 | # Builds blocks. 235 | for (i, block_args) in enumerate(blocks_args): 236 | assert block_args.num_repeat > 0 237 | 238 | # Update block input and output filters based on depth multiplier. 239 | block_args = block_args._replace( 240 | input_filters=round_filters(block_args.input_filters, global_params), 241 | output_filters=round_filters(block_args.output_filters, global_params), 242 | ) 243 | 244 | # The first block needs to take care of stride and filter size increase. 245 | name = "block_{}__num{}_".format(i, 0) 246 | x = MnasBlock(x, block_args, global_params, name) 247 | 248 | if block_args.num_repeat > 1: 249 | block_args = block_args._replace( 250 | input_filters=block_args.output_filters, strides=[1, 1] 251 | ) 252 | for j in range(1, block_args.num_repeat): 253 | name = "block_{}__num{}_".format(i, j) 254 | x = MnasBlock(x, block_args, global_params, name) 255 | 256 | # Head part. 257 | x = tf.keras.layers.Conv2D( 258 | filters=1280, 259 | kernel_size=[1, 1], 260 | strides=[1, 1], 261 | kernel_initializer=conv_kernel_initializer, 262 | padding="same", 263 | use_bias=False, 264 | data_format=data_format, 265 | name="head_conv", 266 | )(x) 267 | x = tf.keras.layers.BatchNormalization( 268 | axis=channel_axis, 269 | momentum=batch_norm_momentum, 270 | epsilon=batch_norm_epsilon, 271 | fused=True, 272 | name="head_conv_BN", 273 | )(x) 274 | x = tf.keras.layers.ReLU()(x) 275 | 276 | x = tf.keras.layers.GlobalAveragePooling2D( 277 | data_format=data_format, name="avg_pooling" 278 | )(x) 279 | 280 | if global_params.dropout_rate > 0: 281 | x = tf.keras.layers.Dropout(global_params.dropout_rate)(x) 282 | 283 | output_fc = tf.keras.layers.Dense( 284 | global_params.num_classes, 285 | kernel_initializer=dense_kernel_initializer, 286 | name="FC", 287 | )(x) 288 | 289 | output_softmax = tf.keras.layers.Softmax(name="softmax")(output_fc) 290 | 291 | model = tf.keras.models.Model(inputs=input_tensor, outputs=output_softmax) 292 | return model --------------------------------------------------------------------------------