├── README.md ├── architecture.py ├── dataProcessing.py └── main.py /README.md: -------------------------------------------------------------------------------- 1 | # DeepMask 2 | A Keras implementation of DeepMask based on NIPS 2015 paper [Learning to Segment Object Candidates](https://arxiv.org/abs/1506.06204). 3 | 4 | ## Requirements 5 | ANACONDA、Keras、OpenCV3、mscoco 6 | 7 | Here is the instructions to install them all: 8 | * Download [ANACONDA](https://www.continuum.io/downloads) and then install it, I suggest you to install the Python 3.6 version. 9 | * Install [Keras](https://keras.io) by the following steps: 10 | 11 | `sudo pip install -U --pre pip setuptools wheel` 12 | 13 | `sudo pip install -U --pre numpy scipy matplotlib scikit-learn scikit-image` 14 | 15 | `sudo pip install -U --pre tensorflow` 16 | 17 | >If your computer supports CUDA, you could install tensorflow-gpu by 18 | 19 | >`sudo pip install -U --pre tensorflow-gpu` 20 | 21 | > Make sure you have installed [CUDA](https://developer.nvidia.com/cuda-downloads) and [cuDNN](https://developer.nvidia.com/cudnn) first. 22 | 23 | `sudo pip install -U --pre keras` 24 | 25 | * Install [OpenCV3](http://opencv.org) by the following steps: 26 | 27 | `brew tap homebrew/science` 28 | 29 | `brew install opencv3 --with-python3 --without-python --without-numpy` 30 | 31 | `cd ~/anaconda/lib/python3.6/site-packages/` 32 | 33 | `ln -s /usr/local/Cellar/opencv3/3.2.0/lib/python3.6/site-packages/cv2.cpython-36m-darwin.so cv2.so` 34 | 35 | If your computer system aren't macOS Sierra, you should download [OpenCV3.2.0](https://github.com/opencv/opencv/archive/3.2.0.zip) 36 | and then install it from source. 37 | 38 | Make sure the compile setting 'with-python3' is on, you could do that by using [cmake-gui](https://cmake.org). 39 | 40 | When you have installed OpenCV3, make sure the cv2.so is in '~/anaconda/lib/python3.6/site-packages/'. 41 | 42 | * Install [MS COCO API](https://github.com/pdollar/coco) by the following steps: 43 | 44 | Download [coco](https://codeload.github.com/pdollar/coco/zip/master) and unzip it. 45 | 46 | `cd coco-master/PythonAPI/` 47 | 48 | `python setup.py build_ext install` 49 | 50 | ## Usage 51 | Download the [mscoco datasets](http://mscoco.org/dataset/#download) first, you should only download '2014 Training images' and '2014 Train/Val object instances'. 52 | 53 | Make a dir named 'coco', go inside and make two dir named 'images' and 'annotations'. 54 | 55 | Unzip '2014 Training images' to dir 'images', '2014 Train/Val object instances' to dir 'annotations'. 56 | 57 | `cd DeepMask\` 58 | 59 | `python main.py` 60 | 61 | 62 | 63 | -------------------------------------------------------------------------------- /architecture.py: -------------------------------------------------------------------------------- 1 | from keras.layers import Dense, Input, Convolution2D, Flatten, Reshape, Dropout, MaxPooling2D 2 | from keras.applications import vgg16 3 | from keras.models import Model, model_from_json 4 | import os.path 5 | 6 | 7 | def initVgg16(): 8 | vgg = vgg16.VGG16(weights="imagenet") 9 | inp = Input(shape=(224, 224, 3), name='in') 10 | shared_layers = vgg.layers[1](inp) 11 | for i in range(len(vgg.layers)): 12 | if 1 < i < len(vgg.layers) - 5: 13 | shared_layers = vgg.layers[i](shared_layers) 14 | return inp, shared_layers 15 | 16 | 17 | def getModel(filename): 18 | if os.path.isfile(filename + '.json'): 19 | return loadModel(filename) 20 | 21 | inp, shared_layers = initVgg16() 22 | score_predictions = MaxPooling2D(pool_size=(2, 2), strides=(2, 2))(shared_layers) 23 | score_predictions = Flatten()(score_predictions) 24 | score_predictions = Dense(512, activation='relu')(score_predictions) 25 | score_predictions = Dropout(0.5)(score_predictions) 26 | # to change in order to the number of classes 27 | score_predictions = Dense(10, activation='relu')(score_predictions) 28 | score_predictions = Dropout(0.5)(score_predictions) 29 | score_predictions = Dense(1, name='score_out')(score_predictions) 30 | 31 | seg_predictions = Convolution2D(512, (1, 1), activation='relu')(shared_layers) 32 | seg_predictions = Flatten()(seg_predictions) 33 | seg_predictions = Dense(512)(seg_predictions) 34 | seg_predictions = Dense(56 * 56)(seg_predictions) 35 | seg_predictions = Reshape(target_shape=(56, 56), name='seg_out')(seg_predictions) 36 | 37 | model = Model(inputs=inp, outputs=[seg_predictions, score_predictions]) 38 | return model 39 | 40 | 41 | def loadModel(filename): 42 | json_file = open(filename + '.json', 'r') 43 | loaded_model_json = json_file.read() 44 | json_file.close() 45 | loaded_model = model_from_json(loaded_model_json) 46 | loaded_model.load_weights(filename + '.h5') 47 | return loaded_model 48 | 49 | 50 | def saveModel(model, filename): 51 | model_json = model.to_json() 52 | with open(filename + '.json', 'w') as json_file: 53 | json_file.write(model_json) 54 | 55 | model.save_weights(filename + '.h5') 56 | -------------------------------------------------------------------------------- /dataProcessing.py: -------------------------------------------------------------------------------- 1 | from PIL import Image, ImageDraw 2 | from pycocotools.coco import COCO 3 | import numpy as np 4 | import skimage.io as io 5 | import cv2 6 | 7 | dataDir = '../coco' 8 | dataType = 'train2014' 9 | annFile = '%s/annotations/instances_%s.json' % (dataDir, dataType) 10 | 11 | 12 | def getScore(mask): 13 | isCentered = -1 14 | centerFrame = 16 15 | offset = int((224 / 2) - centerFrame) 16 | for x in range(centerFrame * 2): 17 | for y in range(centerFrame * 2): 18 | if mask[offset + x][offset + y] == 1: 19 | isCentered = 1 20 | if isCentered == 1: 21 | break 22 | if isCentered == 1: 23 | break 24 | 25 | isNotTooLarge = 1 26 | if isCentered == -1: 27 | return -1 28 | 29 | offset = int((224 - 128) / 2) 30 | for x in range(128): 31 | if mask[offset][offset + x] == 1: 32 | isNotTooLarge = -1 33 | if mask[offset + x][offset] == 1: 34 | isNotTooLarge = -1 35 | if mask[224 - offset][offset + x] == 1: 36 | isNotTooLarge = -1 37 | if mask[offset + x][224 - offset] == 1: 38 | isNotTooLarge = -1 39 | if isNotTooLarge == -1: 40 | break 41 | return isNotTooLarge 42 | 43 | 44 | def setupMask(mask, length): 45 | for x in range(length): 46 | for y in range(length): 47 | if mask[x][y] != -1.0: 48 | mask[x][y] = 1.0 49 | return mask 50 | 51 | 52 | def getDatas(coco, cat, nbMax, offset): 53 | catIds = coco.getCatIds(catNms=[cat]) 54 | imgIds = coco.getImgIds(catIds=catIds) 55 | nbPos = nbMax / 2 56 | nbNeg = nbMax / 2 57 | 58 | retIn = [] 59 | retMask = [] 60 | retScore = [] 61 | 62 | for i in range(len(imgIds)): 63 | img = coco.loadImgs(imgIds[i + offset])[0] 64 | I = io.imread('%s/images/%s/%s' % (dataDir, dataType, img['file_name'])) 65 | I = cv2.resize(I, (224, 224)).astype(np.float32) 66 | 67 | if I.shape == (224, 224, 3): 68 | 69 | I = np.vectorize(lambda x: 256 - x)(I) 70 | I[:, :, 0] -= 103.939 71 | I[:, :, 1] -= 116.779 72 | I[:, :, 2] -= 123.68 73 | 74 | annIds = coco.getAnnIds(imgIds=img['id'], catIds=catIds, iscrowd=0) 75 | anns = coco.loadAnns(annIds) 76 | for ann in anns: 77 | nI = Image.new('F', (img['width'], img['height']), color=-1) 78 | ImageDraw.Draw(nI).rectangle(ann['bbox'], outline=1, fill=1) 79 | nI = np.asarray(nI) 80 | nI = cv2.resize(nI, (224, 224)) 81 | sI = getScore(nI) 82 | nI = cv2.resize(nI, (56, 56)) 83 | nI = setupMask(nI, 56).astype(np.float32) 84 | if (sI == -1 and nbNeg > 0) or (sI == 1 and nbPos > 0): 85 | retIn.append(I) 86 | retMask.append(nI) 87 | retScore.append(sI) 88 | nbMax -= 1 89 | if nbMax <= 0: 90 | return retIn, retMask, retScore 91 | if sI == 1: 92 | nbPos -= 1 93 | elif sI == -1: 94 | nbNeg -= 1 95 | 96 | 97 | def prepareAllData(nbElem, cats, offset): 98 | 99 | coco = COCO(annFile) 100 | allInputs = [] 101 | allMasks = [] 102 | allScores = [] 103 | for catStr in cats: 104 | inputs, masks, scores = getDatas(coco, catStr, nbElem, offset) 105 | allInputs.extend(inputs) 106 | allMasks.extend(masks) 107 | allScores.extend(scores) 108 | return np.asarray(allInputs), np.asarray(allMasks), np.asarray(allScores).astype(np.float32) 109 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | from architecture import getModel, saveModel 2 | from dataProcessing import prepareAllData 3 | from keras.optimizers import SGD 4 | from keras import backend as K 5 | 6 | 7 | def binary_regression_error(y_true, y_pred): 8 | return 1./32 * K.log(1 + K.exp(-y_true*y_pred)) 9 | 10 | 11 | def mask_binary_regression_error(y_true, y_pred): 12 | return 0.5 * (1 - y_true[0][0][0]) * K.mean(K.log(1 + K.exp(-y_true*y_pred))) 13 | 14 | 15 | model = getModel('none') 16 | sgd = SGD(lr=0.001, decay=0.00005, momentum=0.9, nesterov=True, clipvalue=500) 17 | model.compile(optimizer=sgd, loss={'score_out': binary_regression_error, 'seg_out': mask_binary_regression_error},metrics=['acc']) 18 | 19 | inputs, masks, scores = prepareAllData(1000, ['outdoor', 'food', 'indoor', 'appliance', 'sports', 'person', 'animal', 20 | 'vehicle', 'furniture', 'accessory'], offset=0) 21 | 22 | model.fit({'in': inputs}, {'score_out': scores, 'seg_out': masks}, epochs=1, batch_size=32, verbose=1, shuffle=True) 23 | saveModel(model, "deepmask10000") 24 | --------------------------------------------------------------------------------