├── .gitignore ├── .travis.yml ├── LICENSE ├── README.md ├── convert.py ├── convert_model.sh ├── download.py └── model.py /.gitignore: -------------------------------------------------------------------------------- 1 | models/ 2 | weights/ 3 | __pycache__ 4 | test.ipynb 5 | *.pyc 6 | *.mlmodel 7 | .DS_Store 8 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | os: osx 2 | osx_image: xcode10 3 | 4 | script: 5 | - ./convert_model.sh 6 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Emil Zakirov 4 | 2019 Giovanni Terlingen 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # BlindAssist model scripts 2 | [![Build Status](https://travis-ci.com/BlindAssist/blindassist-scripts.svg?branch=develop)](https://travis-ci.com/BlindAssist/blindassist-scripts) 3 | [![License](https://img.shields.io/badge/License-MIT-blue.svg)](LICENSE) 4 | 5 | These scripts will download the pretrained DeepLabv3+ model based on MobileNetv2 from the Tensorflow model 6 | zoo. Since they trained their models on MobileNetV2, I decided to use their model as the base model for the 7 | BlindAssist app. 8 | 9 | # Download and convert the model to CoreML (macOS) 10 | - Clone this repo 11 | - Run `./convert_model.sh` 12 | 13 | After that the model has been generated. Then add `cityscapes.mlmodel` to the BlindAssist application. 14 | 15 | This scripts are based on the work of @bonlime which created DeepLabv3+ for Keras and @seantempesta who 16 | made DeepLabv3+ working on CoreML. 17 | 18 | Original source: https://github.com/bonlime/keras-deeplab-v3-plus 19 | 20 | * DeepLabv3+: 21 | 22 | ``` 23 | @inproceedings{deeplabv3plus2018, 24 | title={Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation}, 25 | author={Liang-Chieh Chen and Yukun Zhu and George Papandreou and Florian Schroff and Hartwig Adam}, 26 | booktitle={ECCV}, 27 | year={2018} 28 | } 29 | ``` 30 | 31 | * MobileNetv2: 32 | 33 | ``` 34 | @inproceedings{mobilenetv22018, 35 | title={MobileNetV2: Inverted Residuals and Linear Bottlenecks}, 36 | author={Mark Sandler and Andrew Howard and Menglong Zhu and Andrey Zhmoginov and Liang-Chieh Chen}, 37 | booktitle={CVPR}, 38 | year={2018} 39 | } 40 | ``` 41 | -------------------------------------------------------------------------------- /convert.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | import os 4 | import numpy as np 5 | from tqdm import tqdm 6 | from model import Deeplabv3 7 | import coremltools 8 | from coremltools.proto import NeuralNetwork_pb2 9 | 10 | MODEL_DIR = 'models' 11 | MLMODEL_NAME = 'cityscapes.mlmodel' 12 | 13 | print('Instantiating an empty Deeplabv3+ model...') 14 | keras_model = Deeplabv3(input_shape=(384, 384, 3), classes=19) 15 | 16 | WEIGHTS_DIR = 'weights/mobilenetv2' 17 | print('Loading weights from', WEIGHTS_DIR) 18 | for layer in tqdm(keras_model.layers): 19 | if layer.weights: 20 | weights = [] 21 | for w in layer.weights: 22 | weight_name = os.path.basename(w.name).replace(':0', '') 23 | weight_file = layer.name + '_' + weight_name + '.npy' 24 | weight_arr = np.load(os.path.join(WEIGHTS_DIR, weight_file)) 25 | weights.append(weight_arr) 26 | layer.set_weights(weights) 27 | 28 | # CoreML model needs to normalize the input (by converting image bits from (-1,1)), that's why 29 | # we are defining the image_scale, red, green, and blue bias 30 | print('converting...') 31 | coreml_model = coremltools.converters.keras.convert(keras_model, 32 | input_names=['input_1'], 33 | image_input_names='input_1', 34 | output_names='up_sampling2d_2', 35 | image_scale=2/255.0, 36 | red_bias=-1, 37 | green_bias=-1, 38 | blue_bias=-1) 39 | 40 | coreml_model.author = 'Giovanni Terlingen' 41 | coreml_model.license = 'GPLv3 License' 42 | coreml_model.short_description = 'Produces segmentation info for urban scene images.' 43 | 44 | coreml_model.save(MLMODEL_NAME) 45 | print('model converted, optimizing...') 46 | 47 | # Load a model, lower its precision, and then save the smaller model. 48 | model_spec = coremltools.utils.load_spec(MLMODEL_NAME) 49 | model_fp16_spec = coremltools.utils.convert_neural_network_spec_weights_to_fp16(model_spec) 50 | coremltools.utils.save_spec(model_fp16_spec, MLMODEL_NAME) 51 | 52 | print('Done.') 53 | -------------------------------------------------------------------------------- /convert_model.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | brew install python@2 4 | 5 | sudo pip install coremltools==2.0 6 | sudo pip install tqdm==4.28.1 7 | sudo pip install tensorflow==1.5.0 8 | sudo pip install keras==2.1.6 9 | 10 | python ./download.py 11 | python ./convert.py 12 | -------------------------------------------------------------------------------- /download.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | import os 4 | import numpy as np 5 | import tensorflow as tf 6 | from keras.utils.data_utils import get_file 7 | 8 | def get_mobilenetv2_filename(key): 9 | filename = str(key) 10 | filename = filename.replace('/', '_') 11 | filename = filename.replace('MobilenetV2_', '') 12 | filename = filename.replace('BatchNorm', 'BN') 13 | if 'Momentum' in filename: 14 | return None 15 | 16 | filename = filename.replace('_weights', '_kernel') 17 | filename = filename.replace('_biases', '_bias') 18 | 19 | return filename + '.npy' 20 | 21 | 22 | def extract_tensors_from_checkpoint_file(filename, output_folder='weights'): 23 | if not os.path.exists(output_folder): 24 | os.makedirs(output_folder) 25 | 26 | reader = tf.train.NewCheckpointReader(filename) 27 | 28 | for key in reader.get_variable_to_shape_map(): 29 | filename = get_mobilenetv2_filename(key) 30 | 31 | if (filename): 32 | path = os.path.join(output_folder, filename) 33 | arr = reader.get_tensor(key) 34 | np.save(path, arr) 35 | print("tensor_name: ", key) 36 | 37 | CKPT_URL = 'http://download.tensorflow.org/models/deeplabv3_mnv2_cityscapes_train_2018_02_05.tar.gz' 38 | MODEL_DIR = 'models' 39 | MODEL_SUBDIR = 'deeplabv3_mnv2_cityscapes_train' 40 | 41 | if not os.path.exists(MODEL_DIR): 42 | os.makedirs(MODEL_DIR) 43 | 44 | checkpoint_tar_mobile = get_file( 45 | 'deeplabv3_mnv2_cityscapes_train_2018_02_05.tar.gz', 46 | CKPT_URL, 47 | extract=True, 48 | cache_subdir='', 49 | cache_dir=MODEL_DIR) 50 | 51 | checkpoint_file = os.path.join(MODEL_DIR, MODEL_SUBDIR, 'model.ckpt') 52 | 53 | extract_tensors_from_checkpoint_file(checkpoint_file, 54 | output_folder='weights/mobilenetv2') 55 | -------------------------------------------------------------------------------- /model.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import numpy as np 6 | 7 | from keras.models import Model 8 | from keras import layers 9 | from keras.layers import Input 10 | from keras.layers import Activation 11 | from keras.layers import Concatenate 12 | from keras.layers import Add 13 | from keras.layers import Dropout 14 | from keras.layers import BatchNormalization 15 | from keras.layers import Conv2D 16 | from keras.layers import DepthwiseConv2D 17 | from keras.layers import ZeroPadding2D 18 | from keras.layers import AveragePooling2D 19 | from keras.layers import UpSampling2D 20 | from keras.engine import Layer 21 | from keras.engine import InputSpec 22 | from keras.engine.topology import get_source_inputs 23 | from keras import backend as K 24 | from keras.applications import imagenet_utils 25 | from keras.utils import conv_utils 26 | from keras.utils.data_utils import get_file 27 | 28 | class BilinearUpsampling(Layer): 29 | 30 | def __init__(self, upsampling=(2, 2), output_size=None, data_format=None, **kwargs): 31 | super(BilinearUpsampling, self).__init__(**kwargs) 32 | 33 | self.data_format = conv_utils.normalize_data_format(data_format) 34 | self.input_spec = InputSpec(ndim=4) 35 | if output_size: 36 | self.output_size = conv_utils.normalize_tuple( 37 | output_size, 2, 'output_size') 38 | self.upsampling = None 39 | else: 40 | self.output_size = None 41 | self.upsampling = conv_utils.normalize_tuple( 42 | upsampling, 2, 'upsampling') 43 | 44 | def compute_output_shape(self, input_shape): 45 | if self.upsampling: 46 | height = self.upsampling[0] * \ 47 | input_shape[1] if input_shape[1] is not None else None 48 | width = self.upsampling[1] * \ 49 | input_shape[2] if input_shape[2] is not None else None 50 | else: 51 | height = self.output_size[0] 52 | width = self.output_size[1] 53 | return (input_shape[0], 54 | height, 55 | width, 56 | input_shape[3]) 57 | 58 | def call(self, inputs): 59 | if self.upsampling: 60 | return K.tf.image.resize_bilinear(inputs, (inputs.shape[1] * self.upsampling[0], 61 | inputs.shape[2] * self.upsampling[1]), 62 | align_corners=True) 63 | else: 64 | return K.tf.image.resize_bilinear(inputs, (self.output_size[0], 65 | self.output_size[1]), 66 | align_corners=True) 67 | 68 | def get_config(self): 69 | config = {'upsampling': self.upsampling, 70 | 'output_size': self.output_size, 71 | 'data_format': self.data_format} 72 | base_config = super(BilinearUpsampling, self).get_config() 73 | return dict(list(base_config.items()) + list(config.items())) 74 | 75 | def relu6(x): 76 | return K.relu(x, max_value=6) 77 | 78 | def _make_divisible(v, divisor, min_value=None): 79 | if min_value is None: 80 | min_value = divisor 81 | new_v = max(min_value, int(v + divisor / 2) // divisor * divisor) 82 | if new_v < 0.9 * v: 83 | new_v += divisor 84 | return new_v 85 | 86 | def _inverted_res_block(inputs, expansion, stride, alpha, filters, block_id, skip_connection, rate=1): 87 | in_channels = inputs._keras_shape[-1] 88 | pointwise_conv_filters = int(filters * alpha) 89 | pointwise_filters = _make_divisible(pointwise_conv_filters, 8) 90 | x = inputs 91 | prefix = 'expanded_conv_{}_'.format(block_id) 92 | 93 | if block_id: 94 | x = Conv2D(expansion * in_channels, kernel_size=1, padding='same', 95 | use_bias=False, activation=None, 96 | name=prefix + 'expand')(x) 97 | x = BatchNormalization(epsilon=1e-3, momentum=0.999, 98 | name=prefix + 'expand_BN')(x) 99 | x = Activation(relu6, name=prefix + 'expand_relu')(x) 100 | else: 101 | prefix = 'expanded_conv_' 102 | 103 | x = DepthwiseConv2D(kernel_size=3, strides=stride, activation=None, 104 | use_bias=False, padding='same', dilation_rate=(rate, rate), 105 | name=prefix + 'depthwise')(x) 106 | x = BatchNormalization(epsilon=1e-3, momentum=0.999, 107 | name=prefix + 'depthwise_BN')(x) 108 | 109 | x = Activation(relu6, name=prefix + 'depthwise_relu')(x) 110 | 111 | x = Conv2D(pointwise_filters, 112 | kernel_size=1, padding='same', use_bias=False, activation=None, 113 | name=prefix + 'project')(x) 114 | x = BatchNormalization(epsilon=1e-3, momentum=0.999, 115 | name=prefix + 'project_BN')(x) 116 | 117 | if skip_connection: 118 | return Add(name=prefix + 'add')([inputs, x]) 119 | 120 | return x 121 | 122 | 123 | def Deeplabv3(input_tensor=None, input_shape=(512, 512, 3), classes=19, alpha=1.): 124 | if K.backend() != 'tensorflow': 125 | raise RuntimeError('The Deeplabv3+ model is only available with ' 126 | 'the TensorFlow backend.') 127 | 128 | if input_tensor is None: 129 | img_input = Input(shape=input_shape) 130 | else: 131 | if not K.is_keras_tensor(input_tensor): 132 | img_input = Input(tensor=input_tensor, shape=input_shape) 133 | else: 134 | img_input = input_tensor 135 | 136 | OS = 8 137 | first_block_filters = _make_divisible(32 * alpha, 8) 138 | x = Conv2D(first_block_filters, kernel_size=3, strides=(2, 2), padding='same', 139 | use_bias=False, name='Conv')(img_input) 140 | x = BatchNormalization(epsilon=1e-3, momentum=0.999, name='Conv_BN')(x) 141 | x = Activation(relu6, name='Conv_Relu6')(x) 142 | 143 | x = _inverted_res_block(x, filters=16, alpha=alpha, stride=1, 144 | expansion=1, block_id=0, skip_connection=False) 145 | 146 | x = _inverted_res_block(x, filters=24, alpha=alpha, stride=2, 147 | expansion=6, block_id=1, skip_connection=False) 148 | x = _inverted_res_block(x, filters=24, alpha=alpha, stride=1, 149 | expansion=6, block_id=2, skip_connection=True) 150 | 151 | x = _inverted_res_block(x, filters=32, alpha=alpha, stride=2, 152 | expansion=6, block_id=3, skip_connection=False) 153 | x = _inverted_res_block(x, filters=32, alpha=alpha, stride=1, 154 | expansion=6, block_id=4, skip_connection=True) 155 | x = _inverted_res_block(x, filters=32, alpha=alpha, stride=1, 156 | expansion=6, block_id=5, skip_connection=True) 157 | 158 | x = _inverted_res_block(x, filters=64, alpha=alpha, stride=1, 159 | expansion=6, block_id=6, skip_connection=False) 160 | x = _inverted_res_block(x, filters=64, alpha=alpha, stride=1, rate=2, 161 | expansion=6, block_id=7, skip_connection=True) 162 | x = _inverted_res_block(x, filters=64, alpha=alpha, stride=1, rate=2, 163 | expansion=6, block_id=8, skip_connection=True) 164 | x = _inverted_res_block(x, filters=64, alpha=alpha, stride=1, rate=2, 165 | expansion=6, block_id=9, skip_connection=True) 166 | 167 | x = _inverted_res_block(x, filters=96, alpha=alpha, stride=1, rate=2, 168 | expansion=6, block_id=10, skip_connection=False) 169 | x = _inverted_res_block(x, filters=96, alpha=alpha, stride=1, rate=2, 170 | expansion=6, block_id=11, skip_connection=True) 171 | x = _inverted_res_block(x, filters=96, alpha=alpha, stride=1, rate=2, 172 | expansion=6, block_id=12, skip_connection=True) 173 | 174 | x = _inverted_res_block(x, filters=160, alpha=alpha, stride=1, rate=2, # 1! 175 | expansion=6, block_id=13, skip_connection=False) 176 | x = _inverted_res_block(x, filters=160, alpha=alpha, stride=1, rate=4, 177 | expansion=6, block_id=14, skip_connection=True) 178 | x = _inverted_res_block(x, filters=160, alpha=alpha, stride=1, rate=4, 179 | expansion=6, block_id=15, skip_connection=True) 180 | 181 | x = _inverted_res_block(x, filters=320, alpha=alpha, stride=1, rate=4, 182 | expansion=6, block_id=16, skip_connection=False) 183 | 184 | b4 = AveragePooling2D(pool_size=(int(np.ceil(input_shape[0] / OS)), int(np.ceil(input_shape[1] / OS))))(x) 185 | b4 = Conv2D(256, (1, 1), padding='same', use_bias=False, name='image_pooling')(b4) 186 | b4 = BatchNormalization(name='image_pooling_BN', epsilon=1e-5)(b4) 187 | b4 = Activation('relu')(b4) 188 | b4 = UpSampling2D(size=(int(np.ceil(input_shape[0] / OS)), int(np.ceil(input_shape[1] / OS))), data_format="channels_last")(b4) 189 | 190 | b0 = Conv2D(256, (1, 1), padding='same', use_bias=False, name='aspp0')(x) 191 | b0 = BatchNormalization(name='aspp0_BN', epsilon=1e-5)(b0) 192 | b0 = Activation('relu', name='aspp0_activation')(b0) 193 | 194 | x = Concatenate()([b4, b0]) 195 | x = Conv2D(256, (1, 1), padding='same', use_bias=False, name='concat_projection')(x) 196 | x = BatchNormalization(name='concat_projection_BN', epsilon=1e-5)(x) 197 | x = Activation('relu')(x) 198 | x = Dropout(0.1)(x) 199 | 200 | x = Conv2D(classes, (1, 1), padding='same', name='logits_semantic')(x) 201 | prev_shape = (int(np.ceil(input_shape[0] / OS)), int(np.ceil(input_shape[1] / OS))) 202 | upscale_size = (int(input_shape[0] / prev_shape[0]), int(input_shape[1] / prev_shape[1])) 203 | x = UpSampling2D(size=upscale_size, data_format="channels_last")(x) 204 | 205 | if input_tensor is not None: 206 | inputs = get_source_inputs(input_tensor) 207 | else: 208 | inputs = img_input 209 | 210 | model = Model(inputs, x, name='deeplabv3+') 211 | 212 | return model 213 | 214 | def preprocess_input(x): 215 | return imagenet_utils.preprocess_input(x, mode='tf') 216 | --------------------------------------------------------------------------------