├── .gitignore ├── Code ├── Binary Image Segmentation with Tiramisu-Keras.ipynb ├── foo.txt ├── imagenet_utils.py ├── layers_builder.py ├── mask_conversion.py ├── pspnet.py ├── python_utils │ └── utils.py └── utils2.py ├── Data └── label_colors.txt ├── JupyterNotebooks └── Binary Image Segmentation with Tiramisu-Keras.ipynb ├── LICENSE ├── README.md └── Strata-NYC-Sept-2018 ├── Strata NYC - Tiramisu final_novideos.pptx └── readme.md /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | 49 | # Translations 50 | *.mo 51 | *.pot 52 | 53 | # Django stuff: 54 | *.log 55 | local_settings.py 56 | 57 | # Flask stuff: 58 | instance/ 59 | .webassets-cache 60 | 61 | # Scrapy stuff: 62 | .scrapy 63 | 64 | # Sphinx documentation 65 | docs/_build/ 66 | 67 | # PyBuilder 68 | target/ 69 | 70 | # Jupyter Notebook 71 | .ipynb_checkpoints 72 | 73 | # pyenv 74 | .python-version 75 | 76 | # celery beat schedule file 77 | celerybeat-schedule 78 | 79 | # SageMath parsed files 80 | *.sage.py 81 | 82 | # dotenv 83 | .env 84 | 85 | # virtualenv 86 | .venv 87 | venv/ 88 | ENV/ 89 | 90 | # Spyder project settings 91 | .spyderproject 92 | .spyproject 93 | 94 | # Rope project settings 95 | .ropeproject 96 | 97 | # mkdocs documentation 98 | /site 99 | 100 | # mypy 101 | .mypy_cache/ 102 | -------------------------------------------------------------------------------- /Code/foo.txt: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /Code/imagenet_utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import json 3 | 4 | from keras.utils.data_utils import get_file 5 | from keras import backend as K 6 | 7 | CLASS_INDEX = None 8 | CLASS_INDEX_PATH = 'https://s3.amazonaws.com/deep-learning-models/image-models/imagenet_class_index.json' 9 | 10 | 11 | def preprocess_input(x, dim_ordering='default'): 12 | if dim_ordering == 'default': 13 | dim_ordering = K.image_dim_ordering() 14 | assert dim_ordering in {'tf', 'th'} 15 | 16 | if dim_ordering == 'th': 17 | x[:, 0, :, :] -= 103.939 18 | x[:, 1, :, :] -= 116.779 19 | x[:, 2, :, :] -= 123.68 20 | # 'RGB'->'BGR' 21 | x = x[:, ::-1, :, :] 22 | else: 23 | x[:, :, :, 0] -= 103.939 24 | x[:, :, :, 1] -= 116.779 25 | x[:, :, :, 2] -= 123.68 26 | # 'RGB'->'BGR' 27 | x = x[:, :, :, ::-1] 28 | return x 29 | 30 | 31 | def decode_predictions(preds, top=5): 32 | global CLASS_INDEX 33 | if len(preds.shape) != 2 or preds.shape[1] != 1000: 34 | raise ValueError('`decode_predictions` expects ' 35 | 'a batch of predictions ' 36 | '(i.e. a 2D array of shape (samples, 1000)). ' 37 | 'Found array with shape: ' + str(preds.shape)) 38 | if CLASS_INDEX is None: 39 | fpath = get_file('imagenet_class_index.json', 40 | CLASS_INDEX_PATH, 41 | cache_subdir='models') 42 | CLASS_INDEX = json.load(open(fpath)) 43 | results = [] 44 | for pred in preds: 45 | top_indices = pred.argsort()[-top:][::-1] 46 | result = [tuple(CLASS_INDEX[str(i)]) + (pred[i],) for i in top_indices] 47 | results.append(result) 48 | return results 49 | -------------------------------------------------------------------------------- /Code/layers_builder.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | from math import ceil 3 | from keras import layers 4 | from keras.layers import Conv2D, MaxPooling2D, AveragePooling2D 5 | from keras.layers import BatchNormalization, Activation, Input, Dropout, ZeroPadding2D, Lambda 6 | from keras.layers.merge import Concatenate, Add 7 | from keras.models import Model 8 | from keras.optimizers import SGD 9 | from keras.backend import tf as ktf 10 | 11 | import tensorflow as tf 12 | 13 | learning_rate = 1e-3 # Layer specific learning rate 14 | # Weight decay not implemented 15 | 16 | 17 | def BN(name=""): 18 | return BatchNormalization(momentum=0.95, name=name, epsilon=1e-5) 19 | 20 | 21 | class Interp(layers.Layer): 22 | 23 | def __init__(self, new_size, **kwargs): 24 | self.new_size = new_size 25 | super(Interp, self).__init__(**kwargs) 26 | 27 | def build(self, input_shape): 28 | super(Interp, self).build(input_shape) 29 | 30 | def call(self, inputs, **kwargs): 31 | new_height, new_width = self.new_size 32 | resized = ktf.image.resize_images(inputs, [new_height, new_width], 33 | align_corners=True) 34 | return resized 35 | 36 | def compute_output_shape(self, input_shape): 37 | return tuple([None, self.new_size[0], self.new_size[1], input_shape[3]]) 38 | 39 | def get_config(self): 40 | config = super(Interp, self).get_config() 41 | config['new_size'] = self.new_size 42 | return config 43 | 44 | 45 | # def Interp(x, shape): 46 | # new_height, new_width = shape 47 | # resized = ktf.image.resize_images(x, [new_height, new_width], 48 | # align_corners=True) 49 | # return resized 50 | 51 | 52 | def residual_conv(prev, level, pad=1, lvl=1, sub_lvl=1, modify_stride=False): 53 | lvl = str(lvl) 54 | sub_lvl = str(sub_lvl) 55 | names = ["conv" + lvl + "_" + sub_lvl + "_1x1_reduce", 56 | "conv" + lvl + "_" + sub_lvl + "_1x1_reduce_bn", 57 | "conv" + lvl + "_" + sub_lvl + "_3x3", 58 | "conv" + lvl + "_" + sub_lvl + "_3x3_bn", 59 | "conv" + lvl + "_" + sub_lvl + "_1x1_increase", 60 | "conv" + lvl + "_" + sub_lvl + "_1x1_increase_bn"] 61 | if modify_stride is False: 62 | prev = Conv2D(64 * level, (1, 1), strides=(1, 1), name=names[0], 63 | use_bias=False)(prev) 64 | elif modify_stride is True: 65 | prev = Conv2D(64 * level, (1, 1), strides=(2, 2), name=names[0], 66 | use_bias=False)(prev) 67 | 68 | prev = BN(name=names[1])(prev) 69 | prev = Activation('relu')(prev) 70 | 71 | prev = ZeroPadding2D(padding=(pad, pad))(prev) 72 | prev = Conv2D(64 * level, (3, 3), strides=(1, 1), dilation_rate=pad, 73 | name=names[2], use_bias=False)(prev) 74 | 75 | prev = BN(name=names[3])(prev) 76 | prev = Activation('relu')(prev) 77 | prev = Conv2D(256 * level, (1, 1), strides=(1, 1), name=names[4], 78 | use_bias=False)(prev) 79 | prev = BN(name=names[5])(prev) 80 | return prev 81 | 82 | 83 | def short_convolution_branch(prev, level, lvl=1, sub_lvl=1, modify_stride=False): 84 | lvl = str(lvl) 85 | sub_lvl = str(sub_lvl) 86 | names = ["conv" + lvl + "_" + sub_lvl + "_1x1_proj", 87 | "conv" + lvl + "_" + sub_lvl + "_1x1_proj_bn"] 88 | 89 | if modify_stride is False: 90 | prev = Conv2D(256 * level, (1, 1), strides=(1, 1), name=names[0], 91 | use_bias=False)(prev) 92 | elif modify_stride is True: 93 | prev = Conv2D(256 * level, (1, 1), strides=(2, 2), name=names[0], 94 | use_bias=False)(prev) 95 | 96 | prev = BN(name=names[1])(prev) 97 | return prev 98 | 99 | 100 | def empty_branch(prev): 101 | return prev 102 | 103 | 104 | def residual_short(prev_layer, level, pad=1, lvl=1, sub_lvl=1, modify_stride=False): 105 | prev_layer = Activation('relu')(prev_layer) 106 | block_1 = residual_conv(prev_layer, level, 107 | pad=pad, lvl=lvl, sub_lvl=sub_lvl, 108 | modify_stride=modify_stride) 109 | 110 | block_2 = short_convolution_branch(prev_layer, level, 111 | lvl=lvl, sub_lvl=sub_lvl, 112 | modify_stride=modify_stride) 113 | added = Add()([block_1, block_2]) 114 | return added 115 | 116 | 117 | def residual_empty(prev_layer, level, pad=1, lvl=1, sub_lvl=1): 118 | prev_layer = Activation('relu')(prev_layer) 119 | 120 | block_1 = residual_conv(prev_layer, level, pad=pad, 121 | lvl=lvl, sub_lvl=sub_lvl) 122 | block_2 = empty_branch(prev_layer) 123 | added = Add()([block_1, block_2]) 124 | return added 125 | 126 | 127 | def ResNet(inp, layers): 128 | # Names for the first couple layers of model 129 | names = ["conv1_1_3x3_s2", 130 | "conv1_1_3x3_s2_bn", 131 | "conv1_2_3x3", 132 | "conv1_2_3x3_bn", 133 | "conv1_3_3x3", 134 | "conv1_3_3x3_bn"] 135 | 136 | # Short branch(only start of network) 137 | 138 | cnv1 = Conv2D(64, (3, 3), strides=(2, 2), padding='same', name=names[0], 139 | use_bias=False)(inp) # "conv1_1_3x3_s2" 140 | bn1 = BN(name=names[1])(cnv1) # "conv1_1_3x3_s2/bn" 141 | relu1 = Activation('relu')(bn1) # "conv1_1_3x3_s2/relu" 142 | 143 | cnv1 = Conv2D(64, (3, 3), strides=(1, 1), padding='same', name=names[2], 144 | use_bias=False)(relu1) # "conv1_2_3x3" 145 | bn1 = BN(name=names[3])(cnv1) # "conv1_2_3x3/bn" 146 | relu1 = Activation('relu')(bn1) # "conv1_2_3x3/relu" 147 | 148 | cnv1 = Conv2D(128, (3, 3), strides=(1, 1), padding='same', name=names[4], 149 | use_bias=False)(relu1) # "conv1_3_3x3" 150 | bn1 = BN(name=names[5])(cnv1) # "conv1_3_3x3/bn" 151 | relu1 = Activation('relu')(bn1) # "conv1_3_3x3/relu" 152 | 153 | res = MaxPooling2D(pool_size=(3, 3), padding='same', 154 | strides=(2, 2))(relu1) # "pool1_3x3_s2" 155 | 156 | # ---Residual layers(body of network) 157 | 158 | """ 159 | Modify_stride --Used only once in first 3_1 convolutions block. 160 | changes stride of first convolution from 1 -> 2 161 | """ 162 | 163 | # 2_1- 2_3 164 | res = residual_short(res, 1, pad=1, lvl=2, sub_lvl=1) 165 | for i in range(2): 166 | res = residual_empty(res, 1, pad=1, lvl=2, sub_lvl=i + 2) 167 | 168 | # 3_1 - 3_3 169 | res = residual_short(res, 2, pad=1, lvl=3, sub_lvl=1, modify_stride=True) 170 | for i in range(3): 171 | res = residual_empty(res, 2, pad=1, lvl=3, sub_lvl=i + 2) 172 | if layers is 50: 173 | # 4_1 - 4_6 174 | res = residual_short(res, 4, pad=2, lvl=4, sub_lvl=1) 175 | for i in range(5): 176 | res = residual_empty(res, 4, pad=2, lvl=4, sub_lvl=i + 2) 177 | elif layers is 101: 178 | # 4_1 - 4_23 179 | res = residual_short(res, 4, pad=2, lvl=4, sub_lvl=1) 180 | for i in range(22): 181 | res = residual_empty(res, 4, pad=2, lvl=4, sub_lvl=i + 2) 182 | else: 183 | print("This ResNet is not implemented") 184 | 185 | # 5_1 - 5_3 186 | res = residual_short(res, 8, pad=4, lvl=5, sub_lvl=1) 187 | for i in range(2): 188 | res = residual_empty(res, 8, pad=4, lvl=5, sub_lvl=i + 2) 189 | 190 | res = Activation('relu')(res) 191 | return res 192 | 193 | 194 | def interp_block(prev_layer, level, feature_map_shape, input_shape): 195 | if input_shape == (473, 473): 196 | kernel_strides_map = {1: 60, 197 | 2: 30, 198 | 3: 20, 199 | 6: 10} 200 | elif input_shape == (713, 713): 201 | kernel_strides_map = {1: 90, 202 | 2: 45, 203 | 3: 30, 204 | 6: 15} 205 | else: 206 | print("Pooling parameters for input shape ", 207 | input_shape, " are not defined.") 208 | exit(1) 209 | 210 | names = [ 211 | "conv5_3_pool" + str(level) + "_conv", 212 | "conv5_3_pool" + str(level) + "_conv_bn" 213 | ] 214 | kernel = (kernel_strides_map[level], kernel_strides_map[level]) 215 | strides = (kernel_strides_map[level], kernel_strides_map[level]) 216 | prev_layer = AveragePooling2D(kernel, strides=strides)(prev_layer) 217 | prev_layer = Conv2D(512, (1, 1), strides=(1, 1), name=names[0], 218 | use_bias=False)(prev_layer) 219 | prev_layer = BN(name=names[1])(prev_layer) 220 | prev_layer = Activation('relu')(prev_layer) 221 | # prev_layer = Lambda(Interp, arguments={ 222 | # 'shape': feature_map_shape})(prev_layer) 223 | prev_layer = Interp(feature_map_shape)(prev_layer) 224 | return prev_layer 225 | 226 | 227 | def build_pyramid_pooling_module(res, input_shape): 228 | """Build the Pyramid Pooling Module.""" 229 | # ---PSPNet concat layers with Interpolation 230 | feature_map_size = tuple(int(ceil(input_dim / 8.0)) 231 | for input_dim in input_shape) 232 | print("PSP module will interpolate to a final feature map size of %s" % 233 | (feature_map_size, )) 234 | 235 | interp_block1 = interp_block(res, 1, feature_map_size, input_shape) 236 | interp_block2 = interp_block(res, 2, feature_map_size, input_shape) 237 | interp_block3 = interp_block(res, 3, feature_map_size, input_shape) 238 | interp_block6 = interp_block(res, 6, feature_map_size, input_shape) 239 | 240 | # concat all these layers. resulted 241 | # shape=(1,feature_map_size_x,feature_map_size_y,4096) 242 | res = Concatenate()([res, 243 | interp_block6, 244 | interp_block3, 245 | interp_block2, 246 | interp_block1]) 247 | return res 248 | 249 | 250 | def build_pspnet(nb_classes, resnet_layers, input_shape, activation='softmax'): 251 | """Build PSPNet.""" 252 | print("Building a PSPNet based on ResNet %i expecting inputs of shape %s predicting %i classes" % ( 253 | resnet_layers, input_shape, nb_classes)) 254 | 255 | inp = Input((input_shape[0], input_shape[1], 3)) 256 | res = ResNet(inp, layers=resnet_layers) 257 | psp = build_pyramid_pooling_module(res, input_shape) 258 | 259 | x = Conv2D(512, (3, 3), strides=(1, 1), padding="same", name="conv5_4", 260 | use_bias=False)(psp) 261 | x = BN(name="conv5_4_bn")(x) 262 | x = Activation('relu')(x) 263 | x = Dropout(0.1)(x) 264 | 265 | x = Conv2D(nb_classes, (1, 1), strides=(1, 1), name="conv6")(x) 266 | # x = Lambda(Interp, arguments={'shape': ( 267 | # input_shape[0], input_shape[1])})(x) 268 | x = Interp([input_shape[0], input_shape[1]])(x) 269 | x = Activation('softmax')(x) 270 | 271 | model = Model(inputs=inp, outputs=x) 272 | 273 | # Solver 274 | sgd = SGD(lr=learning_rate, momentum=0.9, nesterov=True) 275 | model.compile(optimizer=sgd, 276 | loss='categorical_crossentropy', 277 | metrics=['accuracy']) 278 | return model 279 | -------------------------------------------------------------------------------- /Code/mask_conversion.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | import imageio 4 | import azure.storage.blob as azureblob 5 | from PIL import Image 6 | import glob 7 | 8 | 9 | _STORAGE_ACCOUNT_NAME = 'yourstgacct' 10 | _STORAGE_ACCOUNT_KEY = 'youraccountkey==' 11 | _STORAGE_INPUT_CONTAINER = 'data' 12 | _PREFIX_='yoursubfolder/foreground_segmented/' 13 | _SAVE_DIR='save_dir/' 14 | filepath=os.path.join(os.path.realpath('.'),'save_dir') 15 | npy_path=os.path.join(filepath,'12031854sd.quick.npy') 16 | msk_path=os.path.join(filepath,'12031854sd.quick.mask.gif') 17 | 18 | 19 | def open_image(fn,img_sz = (516,516)): 20 | return np.array(Image.open(fn).resize(img_sz, Image.NEAREST)) 21 | 22 | def list_files_in_container(blob_client,container,prefix): 23 | blobs = [blob.name for blob in blob_client.list_blobs(container) if prefix in blob.name and 'npy' in blob.name] 24 | return blobs 25 | 26 | def download_blob(blob_client, container,prefix,savedir, blob): 27 | blob_client.get_blob_to_path(container_name=container, blob_name= blob, file_path= savedir+blob.replace(prefix,'')) 28 | return savedir+blob.replace(prefix,'') 29 | 30 | def upload_blob(blob_client, container, blobname, localfile): 31 | blob_client.create_blob_from_path(container, 32 | blobname, 33 | localfile) 34 | 35 | def npy_to_gif(npy_array_path, gif_path): 36 | np_array=np.load(npy_array_path)*255 37 | imageio.imsave(gif_path,np_array) 38 | np_array2=imageio.imread(gif_path) 39 | 40 | def gif_to_3dnpy_and_upload(mask_dir,blob_vfolder , blob_client, container): 41 | lnames = sorted(glob.glob(mask_dir+'/*.gif')) 42 | labels = np.stack(([open_image(fn) for fn in lnames])) 43 | labels_3d = labels[:, :, :, None] * np.ones(3, dtype=int)[None, None,None, :] 44 | for i,fn in enumerate(lnames): 45 | npa = labels_3d[i,:,:,:] 46 | npname = fn.replace('.mask.gif','.3dmask.npy').replace('_Mask_Img/','_Mask_Npy/') 47 | np.save(npname,npa) 48 | blobname = blob_vfolder + fn.replace('.mask.gif','.3dmask.npy').split('/')[-1] 49 | upload_blob(blob_client, container, blobname, npname) 50 | 51 | def gif_to_3dnpy(mask_dir,npy_dir, sz): 52 | lnames = sorted(glob.glob(mask_dir+'/*.gif')) 53 | labels = np.stack(([open_image(fn, img_sz=sz) for fn in lnames])) 54 | labels_3d = labels[:, :, :, None] * np.ones(3, dtype=int)[None, None,None, :] 55 | for i,fn in enumerate(lnames): 56 | npa = labels_3d[i,:,:,:] 57 | npname = fn.replace('.mask.gif','.3dmask.npy').replace(mask_dir,npy_dir) 58 | np.save(npname,npa) 59 | 60 | 61 | if __name__ == '__main__': 62 | #npy_to_gif(npy_path,msk_path) 63 | blob_client = azureblob.BlockBlobService( 64 | account_name=_STORAGE_ACCOUNT_NAME, 65 | account_key=_STORAGE_ACCOUNT_KEY) 66 | blobs=list_files_in_container(blob_client, _STORAGE_INPUT_CONTAINER, _PREFIX_) 67 | print('got {} blobs'.format(len(blobs))) 68 | 69 | i=0 70 | for blob in blobs: 71 | i=i+1 72 | npy_array=download_blob(blob_client,_STORAGE_INPUT_CONTAINER,_PREFIX_,_SAVE_DIR,blob) 73 | npy_to_gif(npy_array, npy_array.replace('.npy','.mask.gif')) 74 | blob_name=_PREFIX_+npy_array.replace('.npy','.mask.gif').replace(_SAVE_DIR,'') 75 | upload_blob(blob_client,_STORAGE_INPUT_CONTAINER,blob_name,npy_array.replace('.npy','.mask.gif')) 76 | os.remove(npy_array) 77 | os.remove(npy_array.replace('.npy','.mask.gif')) 78 | print('progress {} out of {}'.format(i,len(blobs))) 79 | -------------------------------------------------------------------------------- /Code/pspnet.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from __future__ import print_function 4 | #from python_utils import utils 5 | import os 6 | from os.path import splitext, join 7 | import argparse 8 | import numpy as np 9 | from scipy import misc, ndimage 10 | from keras import backend as K 11 | from keras.models import model_from_json, load_model 12 | import tensorflow as tf 13 | import layers_builder as layers 14 | #from python_utils import utils 15 | #from python_utils.preprocessing import preprocess_img 16 | from keras.utils.generic_utils import CustomObjectScope 17 | 18 | # These are the means for the ImageNet pretrained ResNet 19 | DATA_MEAN = np.array([[[123.68, 116.779, 103.939]]]) # RGB order 20 | 21 | 22 | class PSPNet(object): 23 | """Pyramid Scene Parsing Network by Hengshuang Zhao et al 2017""" 24 | 25 | def __init__(self, nb_classes, resnet_layers, input_shape, weights): 26 | self.input_shape = input_shape 27 | json_path = weights + ".json" #join("weights", "keras", weights + ".json") 28 | h5_path = weights + ".h5" #join("weights", "keras", weights + ".h5") 29 | if 'pspnet' in weights: 30 | if os.path.isfile(json_path) and os.path.isfile(h5_path): 31 | print("Keras model & weights found, loading...") 32 | with CustomObjectScope({'Interp': layers.Interp}): 33 | with open(json_path, 'r') as file_handle: 34 | self.model = model_from_json(file_handle.read()) 35 | self.model.load_weights(h5_path) 36 | else: 37 | print("No Keras model & weights found, import from npy weights.") 38 | self.model = layers.build_pspnet(nb_classes=nb_classes, 39 | resnet_layers=resnet_layers, 40 | input_shape=self.input_shape) 41 | self.set_npy_weights(weights) 42 | else: 43 | print('Load pre-trained weights') 44 | self.model = load_model(weights) 45 | 46 | def predict(self, img, flip_evaluation=False): 47 | """ 48 | Predict segementation for an image. 49 | 50 | Arguments: 51 | img: must be rowsxcolsx3 52 | """ 53 | h_ori, w_ori = img.shape[:2] 54 | 55 | # Preprocess 56 | img = misc.imresize(img, self.input_shape) 57 | 58 | img = img - DATA_MEAN 59 | img = img[:, :, ::-1] # RGB => BGR 60 | img = img.astype('float32') 61 | print("Predicting...") 62 | 63 | probs = self.feed_forward(img) 64 | h, w = probs.shape[:2] 65 | probs = ndimage.zoom(probs, (1. * h_ori / h, 1. * w_ori / w, 1.), 66 | order=1, prefilter=False) 67 | print("Finished prediction...") 68 | 69 | return probs 70 | 71 | def feed_forward(self, input_data, flip_evaluation=False): 72 | assert input_data.shape == (self.input_shape[0], self.input_shape[1], 3) 73 | input_data = input_data[np.newaxis, :, :, :] 74 | 75 | # utils.debug(self.model, data) 76 | pred = self.model.predict(input_data)[0] 77 | 78 | if flip_evaluation: 79 | print("Predict flipped") 80 | input_with_flipped = np.array( 81 | [input_data, np.flip(input_data, axis=1)]) 82 | prediction_with_flipped = self.model.predict(input_with_flipped) 83 | prediction = (prediction_with_flipped[ 84 | 0] + np.fliplr(prediction_with_flipped[1])) / 2.0 85 | else: 86 | prediction = self.model.predict(np.expand_dims(input_data, 0))[0] 87 | 88 | if img.shape[0:1] != self.input_shape: # upscale prediction if necessary 89 | h, w = prediction.shape[:2] 90 | prediction = ndimage.zoom(prediction, (1. * h_ori / h, 1. * w_ori / w, 1.), 91 | order=1, prefilter=False) 92 | return prediction 93 | 94 | def set_npy_weights(self, weights_path): 95 | npy_weights_path = join("weights", "npy", weights_path + ".npy") 96 | json_path = join("weights", "keras", weights_path + ".json") 97 | h5_path = join("weights", "keras", weights_path + ".h5") 98 | 99 | print("Importing weights from %s" % npy_weights_path) 100 | weights = np.load(npy_weights_path, encoding='bytes').item() 101 | for layer in self.model.layers: 102 | print(layer.name) 103 | if layer.name[:4] == 'conv' and layer.name[-2:] == 'bn': 104 | mean = weights[layer.name.encode()][ 105 | 'mean'.encode()].reshape(-1) 106 | variance = weights[layer.name.encode()][ 107 | 'variance'.encode()].reshape(-1) 108 | scale = weights[layer.name.encode()][ 109 | 'scale'.encode()].reshape(-1) 110 | offset = weights[layer.name.encode()][ 111 | 'offset'.encode()].reshape(-1) 112 | 113 | self.model.get_layer(layer.name).set_weights( 114 | [scale, offset, mean, variance]) 115 | 116 | elif layer.name[:4] == 'conv' and not layer.name[-4:] == 'relu': 117 | try: 118 | weight = weights[layer.name.encode()]['weights'.encode()] 119 | self.model.get_layer(layer.name).set_weights([weight]) 120 | except Exception as err: 121 | biases = weights[layer.name.encode()]['biases'.encode()] 122 | self.model.get_layer(layer.name).set_weights([weight, 123 | biases]) 124 | print('Finished importing weights.') 125 | 126 | print("Writing keras model & weights") 127 | json_string = self.model.to_json() 128 | with open(json_path, 'w') as file_handle: 129 | file_handle.write(json_string) 130 | self.model.save_weights(h5_path) 131 | print("Finished writing Keras model & weights") 132 | 133 | 134 | class PSPNet50(PSPNet): 135 | """Build a PSPNet based on a 50-Layer ResNet.""" 136 | 137 | def __init__(self, nb_classes, weights, input_shape): 138 | PSPNet.__init__(self, nb_classes=nb_classes, resnet_layers=50, 139 | input_shape=input_shape, weights=weights) 140 | 141 | 142 | class PSPNet101(PSPNet): 143 | """Build a PSPNet based on a 101-Layer ResNet.""" 144 | 145 | def __init__(self, nb_classes, weights, input_shape): 146 | PSPNet.__init__(self, nb_classes=nb_classes, resnet_layers=101, 147 | input_shape=input_shape, weights=weights) 148 | 149 | 150 | if __name__ == "__main__": 151 | parser = argparse.ArgumentParser() 152 | parser.add_argument('-m', '--model', type=str, default='pspnet101_voc2012', 153 | help='Model/Weights to use', 154 | choices=['pspnet50_ade20k', 155 | 'pspnet101_cityscapes', 156 | 'pspnet101_voc2012']) 157 | parser.add_argument('-w', '--weights', type=str, default=None) 158 | parser.add_argument('-i', '--input_path', type=str, default='example_images/ade20k.jpg', 159 | help='Path the input image') 160 | parser.add_argument('-o', '--output_path', type=str, default='example_results/ade20k.jpg', 161 | help='Path to output') 162 | parser.add_argument('--id', default="0") 163 | parser.add_argument('--input_size', type=int, default=500) 164 | parser.add_argument('-f', '--flip', type=bool, default=False, 165 | help="Whether the network should predict on both image and flipped image.") 166 | 167 | args = parser.parse_args() 168 | 169 | os.environ["CUDA_VISIBLE_DEVICES"] = args.id 170 | 171 | sess = tf.Session() 172 | K.set_session(sess) 173 | 174 | with sess.as_default(): 175 | img = misc.imread(args.input_path, mode='RGB') 176 | cimg = misc.imresize(img, (args.input_size, args.input_size)) 177 | print(args) 178 | if not args.weights: 179 | if "pspnet50" in args.model: 180 | pspnet = PSPNet50(nb_classes=150, input_shape=(473, 473), 181 | weights=args.model) 182 | elif "pspnet101" in args.model: 183 | if "cityscapes" in args.model: 184 | pspnet = PSPNet101(nb_classes=19, input_shape=(713, 713), 185 | weights=args.model) 186 | if "voc2012" in args.model: 187 | pspnet = PSPNet101(nb_classes=21, input_shape=(473, 473), 188 | weights=args.model) 189 | 190 | else: 191 | print("Network architecture not implemented.") 192 | else: 193 | pspnet = PSPNet50(nb_classes=2, input_shape=( 194 | 768, 480), weights=args.weights) 195 | 196 | probs = pspnet.predict(cimg, args.flip) 197 | print("Writing results...") 198 | # import ipdb; ipdb.set_trace() 199 | cm = np.argmax(probs, axis=2) 200 | pm = np.max(probs, axis=2) 201 | 202 | color_cm = utils.add_color(cm) 203 | # color cm is [0.0-1.0] img is [0-255] 204 | alpha_blended = 0.5 * color_cm * 255 + 0.5 * img 205 | filename, ext = splitext(args.output_path) 206 | misc.imsave(filename + "_seg_read" + ext, cm) 207 | misc.imsave(filename + "_seg" + ext, color_cm) 208 | misc.imsave(filename + "_probs" + ext, pm) 209 | misc.imsave(filename + "_seg_blended" + ext, alpha_blended) 210 | -------------------------------------------------------------------------------- /Code/python_utils/utils.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import colorsys 3 | import numpy as np 4 | from keras.models import Model 5 | from cityscapes_labels import trainId2label 6 | from ade20k_labels import ade20k_id2label 7 | from pascal_voc_labels import voc_id2label 8 | 9 | 10 | def class_image_to_image(class_id_image, class_id_to_rgb_map): 11 | """Map the class image to a rgb-color image.""" 12 | colored_image = np.zeros( 13 | (class_id_image.shape[0], class_id_image.shape[1], 3), np.uint8) 14 | for row in range(class_id_image.shape[0]): 15 | for col in range(class_id_image.shape[1]): 16 | try: 17 | colored_image[row, col, :] = class_id_to_rgb_map[ 18 | int(class_id_image[row, col])].color 19 | except KeyError as key_error: 20 | print("Warning: could not resolve classid %s" % key_error) 21 | return colored_image 22 | 23 | 24 | def color_class_image(class_image, model_name): 25 | """Color classed depending on the model used.""" 26 | if 'cityscapes' in model_name: 27 | colored_image = class_image_to_image(class_image, trainId2label) 28 | elif 'voc' in model_name: 29 | colored_image = class_image_to_image(class_image, voc_id2label) 30 | elif 'ade20k' in model_name: 31 | colored_image = class_image_to_image(class_image, ade20k_id2label) 32 | else: 33 | colored_image = add_color(class_image) 34 | return colored_image 35 | 36 | 37 | def add_color(img, num_classes=32): 38 | h, w = img.shape 39 | img_color = np.zeros((h, w, 3)) 40 | for i in range(1, 151): 41 | img_color[img == i] = to_color(i) 42 | img_color[img == num_classes] = (1.0, 1.0, 1.0) 43 | return img_color 44 | 45 | 46 | def to_color(category): 47 | """Map each category color a good distance away from each other on the HSV color space.""" 48 | v = (category - 1) * (137.5 / 360) 49 | return colorsys.hsv_to_rgb(v, 1, 1) 50 | 51 | 52 | def debug(model, data): 53 | """Debug model by printing the activations in each layer.""" 54 | names = [layer.name for layer in model.layers] 55 | for name in names[:]: 56 | print_activation(model, name, data) 57 | 58 | 59 | def print_activation(model, layer_name, data): 60 | """Print the activations in each layer.""" 61 | intermediate_layer_model = Model(inputs=model.input, 62 | outputs=model.get_layer(layer_name).output) 63 | io = intermediate_layer_model.predict(data) 64 | print(layer_name, array_to_str(io)) 65 | 66 | 67 | def array_to_str(a): 68 | return "{} {} {} {} {}".format(a.dtype, a.shape, np.min(a), 69 | np.max(a), np.mean(a)) 70 | -------------------------------------------------------------------------------- /Code/utils2.py: -------------------------------------------------------------------------------- 1 | import math, keras, datetime, pandas as pd, numpy as np, keras.backend as K, threading, json, re, collections 2 | import tarfile, tensorflow as tf, matplotlib.pyplot as plt, xgboost, operator, random, pickle, glob, os, bcolz 3 | import shutil, sklearn, functools, itertools, scipy 4 | from PIL import Image 5 | from concurrent.futures import ProcessPoolExecutor, as_completed, ThreadPoolExecutor 6 | import matplotlib.patheffects as PathEffects 7 | from sklearn.preprocessing import LabelEncoder, StandardScaler 8 | from sklearn.neighbors import NearestNeighbors, LSHForest 9 | import IPython 10 | from IPython.display import display, Audio 11 | from numpy.random import normal 12 | from gensim.models import word2vec 13 | from keras.preprocessing.text import Tokenizer 14 | #from nltk.tokenize import ToktokTokenizer, StanfordTokenizer # - changed for compatibility with conda-installed nltk 15 | from nltk.tokenize import ToktokTokenizer # - changed for compatibility with conda-installed nltk 16 | from nltk.tokenize.stanford import StanfordTokenizer # - changed for compatibility with conda-installed nltk 17 | from functools import reduce 18 | from itertools import chain 19 | 20 | from tensorflow.python.framework import ops 21 | #from tensorflow.contrib import rnn, legacy_seq2seq as seq2seq 22 | 23 | from keras_tqdm import TQDMNotebookCallback 24 | #from keras import initializations # Keras 1 25 | from keras.applications.resnet50 import ResNet50, decode_predictions, conv_block, identity_block 26 | from keras.applications.vgg16 import VGG16 27 | from keras.preprocessing import image 28 | from keras.preprocessing.sequence import pad_sequences 29 | from keras.models import Model, Sequential 30 | from keras.layers import * 31 | from keras.optimizers import Adam 32 | from keras.regularizers import l2 33 | from keras.utils.data_utils import get_file 34 | from keras.applications.imagenet_utils import decode_predictions, preprocess_input 35 | 36 | 37 | np.set_printoptions(threshold=50, edgeitems=20) 38 | def beep(): return Audio(filename='/home/jhoward/beep.mp3', autoplay=True) 39 | def dump(obj, fname): pickle.dump(obj, open(fname, 'wb')) 40 | def load(fname): return pickle.load(open(fname, 'rb')) 41 | 42 | 43 | def limit_mem(): 44 | K.get_session().close() 45 | cfg = K.tf.ConfigProto() 46 | cfg.gpu_options.allow_growth = True 47 | K.set_session(K.tf.Session(config=cfg)) 48 | 49 | 50 | def autolabel(plt, fmt='%.2f'): 51 | rects = plt.patches 52 | ax = rects[0].axes 53 | y_bottom, y_top = ax.get_ylim() 54 | y_height = y_top - y_bottom 55 | for rect in rects: 56 | height = rect.get_height() 57 | if height / y_height > 0.95: 58 | label_position = height - (y_height * 0.06) 59 | else: 60 | label_position = height + (y_height * 0.01) 61 | txt = ax.text(rect.get_x() + rect.get_width()/2., label_position, 62 | fmt % height, ha='center', va='bottom') 63 | txt.set_path_effects([PathEffects.withStroke(linewidth=3, foreground='w')]) 64 | 65 | 66 | def column_chart(lbls, vals, val_lbls='%.2f'): 67 | n = len(lbls) 68 | p = plt.bar(np.arange(n), vals) 69 | plt.xticks(np.arange(n), lbls) 70 | if val_lbls: autolabel(p, val_lbls) 71 | 72 | 73 | def save_array(fname, arr): 74 | c=bcolz.carray(arr, rootdir=fname, mode='w') 75 | c.flush() 76 | 77 | 78 | def load_array(fname): return bcolz.open(fname)[:] 79 | 80 | 81 | def load_glove(loc): 82 | return (load_array(loc+'.dat'), 83 | pickle.load(open(loc+'_words.pkl','rb'), encoding='latin1'), 84 | pickle.load(open(loc+'_idx.pkl','rb'), encoding='latin1')) 85 | 86 | def plot_multi(im, dim=(4,4), figsize=(6,6), **kwargs ): 87 | plt.figure(figsize=figsize) 88 | for i,img in enumerate(im): 89 | plt.subplot(*dim, i+1) 90 | plt.imshow(img, **kwargs) 91 | plt.axis('off') 92 | plt.tight_layout() 93 | 94 | 95 | def plot_train(hist): 96 | h = hist.history 97 | if 'acc' in h: 98 | meas='acc' 99 | loc='lower right' 100 | else: 101 | meas='loss' 102 | loc='upper right' 103 | plt.plot(hist.history[meas]) 104 | plt.plot(hist.history['val_'+meas]) 105 | plt.title('model '+meas) 106 | plt.ylabel(meas) 107 | plt.xlabel('epoch') 108 | plt.legend(['train', 'validation'], loc=loc) 109 | 110 | 111 | def fit_gen(gen, fn, eval_fn, nb_iter): 112 | for i in range(nb_iter): 113 | fn(*next(gen)) 114 | if i % (nb_iter//10) == 0: eval_fn() 115 | 116 | 117 | def wrap_config(layer): 118 | return {'class_name': layer.__class__.__name__, 'config': layer.get_config()} 119 | 120 | 121 | def copy_layer(layer): return layer_from_config(wrap_config(layer)) 122 | 123 | 124 | def copy_layers(layers): return [copy_layer(layer) for layer in layers] 125 | 126 | 127 | def copy_weights(from_layers, to_layers): 128 | for from_layer,to_layer in zip(from_layers, to_layers): 129 | to_layer.set_weights(from_layer.get_weights()) 130 | 131 | 132 | def copy_model(m): 133 | res = Sequential(copy_layers(m.layers)) 134 | copy_weights(m.layers, res.layers) 135 | return res 136 | 137 | 138 | def insert_layer(model, new_layer, index): 139 | res = Sequential() 140 | for i,layer in enumerate(model.layers): 141 | if i==index: res.add(new_layer) 142 | copied = layer_from_config(wrap_config(layer)) 143 | res.add(copied) 144 | copied.set_weights(layer.get_weights()) 145 | return res -------------------------------------------------------------------------------- /Data/label_colors.txt: -------------------------------------------------------------------------------- 1 | 0 0 0 Background 2 | 255 255 255 Garment 3 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Microsoft Partner Catalyst Team 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | # Image Segmentation 3 | 4 | ## Overview 5 | We recently developed a solution based on deep learning, i.e. Tiramisu, to segment the forground and remove the background in images. Our goal was to enable image retreival from a target catalogue image of a fashion retailer given a query mobile snapshot. 6 | 7 | ## Contents 8 | In this repository, you'll find: 9 | 10 | - the code to reproduce the image segmetnation described in the accompanying [code story](https://www.microsoft.com/developerblog/2018/04/18/deep-learning-image-segmentation-for-ecommerce-catalogue-visual-search/) 11 | 12 | - how to train your own specific image segmentation with your own data 13 | 14 | A trained model can be found [here](https://dwrds.blob.core.windows.net/tiramisu/TiramisuSmallModel_20180225.h5). 15 | -------------------------------------------------------------------------------- /Strata-NYC-Sept-2018/Strata NYC - Tiramisu final_novideos.pptx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CatalystCode/image-segmentation-using-tiramisu/4c54b533b6595836a8a616a7dcb778f3eccc49a7/Strata-NYC-Sept-2018/Strata NYC - Tiramisu final_novideos.pptx -------------------------------------------------------------------------------- /Strata-NYC-Sept-2018/readme.md: -------------------------------------------------------------------------------- 1 | 2 | This folder contains the presentation which was give at Strata Data NYC in September 2018 3 | --------------------------------------------------------------------------------