├── INSTALL.md ├── README.md ├── capsal ├── __init__.py ├── bew_bs.py ├── caption_embed2.py ├── combine.py ├── compare.py ├── config.py ├── eval_cap.py ├── model_new10_upcap.py ├── model_new10_upcap11.py ├── nn.py ├── parallel_model.py ├── tfnpytokeras.py ├── train.py ├── train_list.txt ├── utils.py ├── val_list.txt ├── visualize.py └── vocabulary.py ├── data ├── ReadMe.md └── preprocessing.py ├── requirements.txt ├── setup.cfg ├── setup.py └── test_capsal.py /INSTALL.md: -------------------------------------------------------------------------------- 1 | # Mask R-CNN for Object Detection and Segmentation 2 | 3 | This instruction indicates the installation steps of MASKRCNN used in the CapSal model. Please refer to the original [repository](https://github.com/matterport/Mask_RCNN.git) if you have any questions. 4 | 5 | The repository includes: 6 | * Source code of CapSal based on ResNet101 7 | * Training code on COCO-CapSal 8 | * Pre-trained weights for CapSal 9 | 10 | 11 | 12 | # Getting Started 13 | The codes required in the CapSal model are stored in the `CapSal`. To begin with, you should first install the requirements for the MaskRCNN benchmark. 14 | 15 | ## Requirements 16 | Python 2.7 , TensorFlow 1.4.1, Keras 2.1.4 and other common packages listed in `requirements.txt`. 17 | 18 | ### MS COCO Requirements: 19 | To train or test on COCO-CapSal, you'll also need: 20 | * pycocotools (installation instructions below) 21 | * [COCO-CapSal Dataset]() 22 | 23 | 24 | If you use Docker, the code has been verified to work on 25 | [this Docker container](https://hub.docker.com/r/waleedka/modern-deep-learning/). 26 | 27 | 28 | ## Installation 29 | 1. Install dependencies 30 | ```bash 31 | pip install -r requirements.txt 32 | ``` 33 | 2. Clone this repository 34 | 3. Run setup from the repository root directory 35 | ```bash 36 | python setup.py install 37 | ``` 38 | 3. Download pre-trained COCO weights (mask_rcnn_coco.h5) from the [releases page](https://github.com/matterport/Mask_RCNN/releases). 39 | 4. (Optional) To train or test on MS COCO install `pycocotools` from one of these repos. They are forks of the original pycocotools with fixes for Python3 and Windows (the official repo doesn't seem to be active anymore). 40 | 41 | * Linux: https://github.com/waleedka/coco 42 | 43 | 44 | 45 | 46 | 47 | 48 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Code-and-Dataset-for-CapSal 2 | This project provides the code and datasets for 'CapSal: Leveraging Captioning to Boost Semantics for Salient Object Detection', CVPR 2019. [Paper link](https://drive.google.com/open?id=1JcZMHBXEX-7AR1P010OXg_wCCC5HukeZ) 3 | 4 | 5 | Our code is implemented based on the Mask RCNN in Tensorflow and Keras. You can first install the maskrcnn according to the [instruction](https://github.com/matterport/Mask_RCNN.git) or `INSTALL.md`. 6 | # COCO-CapSal Dataset 7 | The COCO-CapSal dataset provides the saliency ground truth as well as the image captions for each image. It contains 5265 images for training and 1459 ones for validation. The annotations can be downloaded at [BaiduYun](https://pan.baidu.com/s/1iU8A-RII7rvOG9KHz5Dysg) or [GoogleDrive](https://drive.google.com/open?id=1d04vkomA2sT2cUAst9CJYYHwTwNkSg2p). The folder 'capsal' contains the images, ground truth maps as well as the caprions (json file) of both training and validation sets. 8 | # Evaluation 9 | For testing the CapSal model, first download the trained model at [BaiduYun](https://pan.baidu.com/s/1dQwQ5AdJqBfSSgZPUNR_gg) or [Google](https://drive.google.com/drive/folders/1d04vkomA2sT2cUAst9CJYYHwTwNkSg2p?usp=sharing) 10 | ) and put it under the `./model`. Run `test_capsal.py` to obtain the saliency maps of different datasets. 11 | The saliency map is avaliable at [Google](https://drive.google.com/open?id=1d04vkomA2sT2cUAst9CJYYHwTwNkSg2p) or [BaiduYun](https://pan.baidu.com/s/1LtlK3ZH8adZCEi8n0ys9BA). 12 | # Train 13 | Run 'train.py'. 14 | # Citation 15 | @InProceedings{Zhang_2019_CVPR, 16 | author = {Zhang, Lu and Zhang, Jianming and Lin, Zhe and Lu, Huchuan and He, You}, 17 | title = {CapSal: Leveraging Captioning to Boost Semantics for Salient Object Detection}, 18 | booktitle = CVPR, 19 | year = {2019}} 20 | -------------------------------------------------------------------------------- /capsal/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /capsal/caption_embed2.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np 3 | import tensorflow.contrib.layers as layers 4 | import keras.backend as K 5 | import keras.layers as KL 6 | import keras.engine as KE 7 | import keras.models as KM 8 | import warnings 9 | warnings.filterwarnings("ignore") 10 | from tqdm import tqdm 11 | from keras.initializers import Constant 12 | 13 | def fc_kernel_initializer(): 14 | return tf.random_uniform_initializer(minval = -0.08,maxval = 0.08) 15 | def fc_kernel_regularizer(): 16 | return layers.l2_regularizer(scale = 1e-4) 17 | def load_weight_caption(): 18 | model_dir = './model/keras_caption.npy' 19 | data_dict = np.load(model_dir,encoding="bytes").item() 20 | count = 0 21 | sess = tf.Session() 22 | K.set_session(sess) 23 | sess.run(tf.global_variables_initializer()) 24 | for v in tqdm(tf.global_variables()): 25 | if v.name in data_dict.keys(): 26 | sess.run(v.assign(data_dict[v.name])) 27 | count += 1 28 | print("%d tensors loaded." % count) 29 | print("image caption load") 30 | def load_weight(): 31 | model_dir = '/home/zhanglu/Mask_RCNN_new/mrcnn/keras_caption.npy' 32 | data_dict = np.load(model_dir,encoding='latin1').item() 33 | return data_dict 34 | 35 | 36 | def build_rnn(input,config): 37 | ctx = 64 38 | down = KL.Conv2D(512, (3, 3), padding="same", activation="relu", name='gcap_down_imagefeature')(input) 39 | 40 | reshaped_conv5_3_feats = KL.Lambda(lambda x: tf.reshape(x, [config.BATCH_SIZE, ctx, 512]))(down) 41 | conv_feats = reshaped_conv5_3_feats 42 | 43 | print("Building the RNN...") 44 | 45 | contexts = conv_feats 46 | reshaped_contexts = KL.Lambda(lambda x: tf.reshape(x, [-1, 512]))(contexts) 47 | temp1 = attend_1(reshaped_contexts) 48 | w_embedding = KL.Embedding(input_dim=5000, output_dim=512, name='gcap_embedding') 49 | 50 | # Setup the LSTM 51 | 52 | # Initialize the LSTM using the mean context 53 | # with tf.variable_scope("initialize"): 54 | context_mean = KL.Lambda(lambda x: tf.reduce_mean(x, axis=1))(conv_feats) 55 | initial_memory, initial_output = initialize(context_mean) 56 | initial_state = initial_memory, initial_output 57 | 58 | # Prepare to run 59 | predictions = [] 60 | outputs = [] 61 | current_inputs = [] 62 | num_steps = 15 63 | last_output = initial_output 64 | last_memory = initial_memory 65 | last_word = KL.Lambda(lambda x: K.zeros([config.BATCH_SIZE], 'int32'))(input) 66 | last_state = last_output, last_memory 67 | alphas = [] 68 | att_masks = [] 69 | cross_entropies = [] 70 | predictions_correct = [] 71 | lstm = KL.LSTM(512, return_state=True, recurrent_activation='hard_sigmoid', name='gcap_lstm', 72 | unit_forget_bias=False) # (last_output,initial_state = initial_state) 73 | 74 | # Generate the words one by one 75 | for idx in range(num_steps): 76 | # Attention mechanism 77 | # with tf.variable_scope("attend"): 78 | # alpha = attend(reshaped_contexts, last_output) 79 | 80 | # use 2 fc layers to attend 81 | 82 | temp2 = attend_2(last_output) 83 | 84 | temp2 = KL.Lambda(lambda x: tf.reshape(tf.tile(tf.expand_dims(x, 1), [1, ctx, 1]), [-1, 512]))(temp2) 85 | temp = KL.Add()([temp1,temp2]) 86 | att_logits = attend_3(temp) 87 | att_logits = KL.Lambda(lambda x: tf.reshape(x, [-1, ctx]))(att_logits) 88 | alpha = KL.Softmax()(att_logits) 89 | alpha1 = KL.RepeatVector(512)(alpha) 90 | alpha1 = KL.Permute((2, 1))(alpha1) 91 | context = KL.Multiply()([contexts, alpha1]) 92 | context = KL.Lambda(lambda x: tf.reduce_sum(x, 93 | axis=1))(context) 94 | alphas.append(alpha) 95 | word_embed = w_embedding(last_word) 96 | # Apply the LSTM 97 | # with tf.variable_scope("lstm"): 98 | 99 | current_input = KL.Concatenate(axis=-1)([context, word_embed]) 100 | current_input = KL.Lambda(lambda x: tf.expand_dims(x, 1))(current_input) 101 | 102 | output, memory, cell_out = lstm(current_input, initial_state=list(last_state)) # 103 | state = memory, cell_out 104 | current_inputs.append(current_input) 105 | outputs.append(output) 106 | # Decode the expanded output of LSTM into a word 107 | # with tf.variable_scope("decode"): 108 | 109 | expanded_output = KL.Concatenate(axis=-1)([output, 110 | context, 111 | word_embed]) 112 | logits = decode(expanded_output) 113 | # probs = KL.Lambda(lambda x: tf.nn.softmax(logits))(logits) 114 | prediction = KL.Lambda(lambda x: tf.argmax(x, 1))(logits) 115 | predictions.append(prediction) 116 | 117 | 118 | 119 | 120 | last_output = output 121 | last_memory = memory 122 | last_state = state 123 | if idx == 0: 124 | att_mask = KL.Lambda(lambda x: K.switch(tf.equal(x[0], 0), tf.constant(0.0), tf.constant(1.0)))(last_word) 125 | else: 126 | att_mask = KL.Lambda(lambda x: K.switch(tf.equal(x[0], 2), tf.constant(0.0), tf.constant(1.0)))(last_word) 127 | att_masks.append(att_mask) 128 | last_word = KL.Lambda(lambda x: tf.cast(x, tf.int32))(prediction) # 129 | 130 | 131 | # tf.get_variable_scope().reuse_variables() 132 | 133 | # Compute the final loss, if necessary 134 | 135 | outputs = KL.Lambda(lambda x: tf.reshape(x, [config.BATCH_SIZE, num_steps, 512]))(outputs) 136 | predictions = KL.Lambda(lambda x: tf.reshape(tf.cast(x, tf.float32), [config.BATCH_SIZE, num_steps, 1]))( 137 | predictions) 138 | att_masks = KL.Lambda(lambda x: tf.reshape(tf.cast(x, tf.float32), [num_steps, 1,1,1]))( 139 | att_masks) 140 | alphas = KL.Lambda(lambda x: tf.reshape(x,[config.BATCH_SIZE, num_steps, ctx]))(alphas) 141 | 142 | print("RNN built.") 143 | return outputs, predictions,alphas,att_masks 144 | def build_rnn2(input,caption_gt,masks,config): 145 | 146 | down = KL.Conv2D(512, (3, 3), padding="same", activation="relu", name='gcap_down_imagefeature')(input) 147 | 148 | reshaped_conv5_3_feats = KL.Lambda(lambda x:tf.reshape(x, [config.BATCH_SIZE, 64, 512]))(down) 149 | conv_feats = reshaped_conv5_3_feats 150 | 151 | print("Building the RNN...") 152 | 153 | contexts = conv_feats 154 | reshaped_contexts = KL.Lambda(lambda x: tf.reshape(x, [-1, 512]))(contexts) 155 | temp1 = attend_1(reshaped_contexts) 156 | w_embedding = KL.Embedding(input_dim=5000,output_dim=512,name='gcap_embedding') 157 | 158 | 159 | # Setup the LSTM 160 | 161 | # Initialize the LSTM using the mean context 162 | # with tf.variable_scope("initialize"): 163 | context_mean = KL.Lambda(lambda x: tf.reduce_mean(x, axis=1))(conv_feats) 164 | initial_memory, initial_output = initialize(context_mean) 165 | initial_state = initial_memory, initial_output 166 | 167 | # Prepare to run 168 | predictions = [] 169 | outputs = [] 170 | current_inputs = [] 171 | num_steps = 15 172 | last_output = initial_output 173 | last_memory = initial_memory 174 | last_word = KL.Lambda(lambda x: K.zeros([config.BATCH_SIZE],'int32'))(input) 175 | last_state = last_output,last_memory 176 | alphas = [] 177 | cross_entropies = [] 178 | predictions_correct = [] 179 | lstm = KL.LSTM(512,return_state=True,recurrent_activation='hard_sigmoid',name='gcap_lstm', 180 | unit_forget_bias=False)#(last_output,initial_state = initial_state) 181 | 182 | # Generate the words one by one 183 | for idx in range(num_steps): 184 | # Attention mechanism 185 | # with tf.variable_scope("attend"): 186 | # alpha = attend(contexts, last_output) 187 | 188 | # use 2 fc layers to attend 189 | 190 | temp2 = attend_2(last_output) 191 | 192 | temp2 = KL.Lambda(lambda x: tf.reshape(tf.tile(tf.expand_dims(x, 1), [1, 64, 1]), [-1, 512]))(temp2) 193 | temp = KL.Add()([temp1,temp2]) 194 | att_logits = attend_3(temp) 195 | att_logits = KL.Lambda(lambda x: tf.reshape(x, [-1, 64]))(att_logits) 196 | alpha = KL.Softmax()(att_logits) 197 | alpha1 = KL.RepeatVector(512)(alpha) 198 | alpha1 = KL.Permute((2,1))(alpha1) 199 | context = KL.Multiply()([contexts,alpha1]) 200 | context = KL.Lambda(lambda x: tf.reduce_sum(x, 201 | axis=1))(context) 202 | tiled_masks = KL.Lambda(lambda x: tf.tile(tf.expand_dims(x[:, idx], 1),[1, 64]))(masks) 203 | masked_alpha = KL.Lambda(lambda x: tf.reshape(x * tiled_masks,[-1]))(alpha) 204 | alphas.append(masked_alpha) 205 | 206 | word_embed = w_embedding(last_word) 207 | # Apply the LSTM 208 | # with tf.variable_scope("lstm"): 209 | 210 | current_input = KL.Concatenate(axis=-1)([context, word_embed]) 211 | current_input = KL.Lambda(lambda x: tf.expand_dims(x,1))(current_input) 212 | 213 | 214 | output, memory, cell_out = lstm(current_input, initial_state = list(last_state))# 215 | state = memory, cell_out 216 | current_inputs.append(current_input) 217 | outputs.append(output) 218 | # Decode the expanded output of LSTM into a word 219 | # with tf.variable_scope("decode"): 220 | 221 | expanded_output = KL.Concatenate(axis = -1)([output, 222 | context, 223 | word_embed]) 224 | logits = decode(expanded_output) 225 | # probs = KL.Lambda(lambda x: tf.nn.softmax(logits))(logits) 226 | prediction = KL.Lambda(lambda x: tf.argmax(x, 1))(logits) 227 | predictions.append(prediction) 228 | 229 | # Compute the loss for this step, if necessary 230 | masked_cross_entropy = KL.Lambda(lambda x: caption_loss(*x))([caption_gt[:,idx],logits,masks[:,idx]]) 231 | cross_entropies.append(masked_cross_entropy) 232 | 233 | # ground_truth = KL.Lambda(lambda x: tf.cast(caption_gt[:, idx], tf.int64))(caption_gt) 234 | # prediction_correct = tf.where( 235 | # tf.equal(prediction, ground_truth), 236 | # tf.cast(masks[:, idx], tf.float32), 237 | # tf.cast(tf.zeros_like(prediction), tf.float32)) 238 | # predictions_correct.append(prediction_correct) 239 | 240 | last_output = output 241 | last_memory = memory 242 | last_state = state 243 | last_word = KL.Lambda(lambda x: tf.reshape(tf.cast(x[:,idx], tf.int32),[config.BATCH_SIZE]))(caption_gt) # 244 | 245 | # tf.get_variable_scope().reuse_variables() 246 | 247 | # Compute the final loss, if necessary 248 | cross_entropies = KL.Lambda(lambda x : tf.stack(x, axis=1))(cross_entropies) 249 | cross_entropy_loss = KL.Lambda(lambda x: tf.reduce_sum(x) / tf.reduce_sum(masks))(cross_entropies) 250 | 251 | alphas = KL.Lambda(lambda x: tf.reshape(tf.stack(x, axis=1),[1,64,-1]))(alphas) 252 | attentions = KL.Lambda(lambda x: tf.reduce_sum(x, axis=2))(alphas) 253 | diffs = KL.Lambda(lambda x: tf.ones_like(x) - x)(attentions) 254 | attention_loss = KL.Lambda(lambda x: 0.01 * tf.nn.l2_loss(x) / (64))(diffs) 255 | 256 | 257 | total_loss = KL.Lambda(lambda x:cross_entropy_loss + x,name="caption_loss")(attention_loss) 258 | 259 | outputs = KL.Lambda(lambda x: tf.reshape(x,[config.BATCH_SIZE,num_steps,512]))(outputs) 260 | predictions = KL.Lambda(lambda x: tf.reshape(tf.cast(x,tf.float32),[config.BATCH_SIZE,num_steps,1]))(predictions) 261 | # outputs2 = KL.Lambda(lambda x: tf.concat([outputs,predictions],axis=0))(outputs) 262 | 263 | print("RNN built.") 264 | return outputs, predictions, total_loss 265 | def caption_loss(label,prediction,mask): 266 | 267 | cross_entropy =K.sparse_categorical_crossentropy(target=label,output=prediction,from_logits=True) 268 | masked_cross_entropy = mask * cross_entropy 269 | return masked_cross_entropy 270 | 271 | def initialize( context_mean): 272 | # use 2 fc layers to initialize 273 | temp1 = KL.Dense(512,activation='tanh',name='gcap_initialize_fc_a1')(context_mean)# 274 | memory = KL.Dense(512, name='gcap_initialize_fc_a2')(temp1)# 275 | temp2 = KL.Dense(512, activation='tanh', name='gcap_initialize_fc_b1')(context_mean)# 276 | 277 | output = KL.Dense(512, name='gcap_initialize_fc_b2')(temp2) 278 | return memory, output 279 | attend_1 = KL.Dense(512,activation='tanh',name='gcap_attend_fc_1a')# 280 | attend_2 = KL.Dense(512,activation='tanh',name='gcap_attend_fc_1b')# 281 | attend_3 = KL.Dense(1,use_bias=False,name='gcap_attend_fc_2')# 282 | def attend(inpu, output): 283 | 284 | # """ Attention Mechanism. """ 285 | 286 | # reshaped_contexts = KL.Lambda(lambda x: tf.reshape(x, [-1, 512]))(contexts) 287 | # use 2 fc layers to attend 288 | temp1 = attend_1(inpu) 289 | temp2 = attend_2(output) 290 | 291 | temp2 = KL.Lambda(lambda x: tf.tile(tf.expand_dims(temp2, 1), [1, 64, 1]))(temp2) 292 | temp2 = KL.Lambda(lambda x: tf.reshape(temp2, [-1, 512]))(temp2) 293 | temp = KL.Lambda(lambda x: temp1 + x)(temp2) 294 | logits = attend_3(temp) 295 | logits = KL.Lambda(lambda x: tf.reshape(logits, [-1, 64]))(logits) 296 | alpha = KL.Lambda(lambda x: tf.nn.softmax(logits))(logits) 297 | return alpha 298 | decode_1 = KL.Dense(1024,activation='tanh',name='gcap_decode_fc_1')# 299 | decode_2 = KL.Dense(5000,activation=None,name='gcap_decode_fc_2')# 300 | def decode(expanded_output): 301 | # """ Decode the expanded output of the LSTM into a word. """ 302 | # use 2 fc layers to decode 303 | temp = decode_1(expanded_output) 304 | 305 | logits = decode_2(temp) 306 | 307 | return logits 308 | # def initialize( context_mean): 309 | # # use 2 fc layers to initialize 310 | # temp1 = KL.Dense(512,activation='tanh',kernel_initializer=fc_kernel_initializer(),name='fc_a1')(context_mean) 311 | # memory = KL.Dense(512, kernel_initializer=fc_kernel_initializer(), name='fc_a2')(temp1) 312 | # temp2 = KL.Dense(512, activation='tanh', kernel_initializer=fc_kernel_initializer(), name='fc_b1')(context_mean) 313 | # output = KL.Dense(512, kernel_initializer=fc_kernel_initializer(), name='fc_b2')(temp2) 314 | # return memory, output 315 | # 316 | # def attend(contexts, output): 317 | # 318 | # # """ Attention Mechanism. """ 319 | # 320 | # reshaped_contexts = tf.reshape(contexts, [-1, 512]) 321 | # # use 2 fc layers to attend 322 | # temp1 = KL.Dense(512, activation='tanh', kernel_initializer=fc_kernel_initializer(),name='fc_1a')(reshaped_contexts) 323 | # temp2 = KL.Dense(512, activation='tanh', kernel_initializer=fc_kernel_initializer(), name='fc_1b')( 324 | # output) 325 | # temp2 = tf.tile(tf.expand_dims(temp2, 1), [1, 64, 1]) 326 | # temp2 = tf.reshape(temp2, [-1, 512]) 327 | # temp = temp1 + temp2 328 | # logits = KL.Dense(1, use_bias=False,kernel_initializer=fc_kernel_initializer(), name='fc_2')( 329 | # temp) 330 | # logits = tf.reshape(logits, [-1, 64]) 331 | # alpha = tf.nn.softmax(logits) 332 | # return alpha 333 | # 334 | # 335 | # def decode(expanded_output): 336 | # # """ Decode the expanded output of the LSTM into a word. """ 337 | # # use 2 fc layers to decode 338 | # temp = KL.Dense(1024, activation='tanh', kernel_initializer=fc_kernel_initializer(), name='fc_1')( 339 | # expanded_output) 340 | # logits = KL.Dense(5000, kernel_initializer=fc_kernel_initializer(), name='fc_2')( 341 | # temp) 342 | # return logits 343 | def conv2d(input_,shape,activation = tf.nn.relu,padding = 'SAME',name = None): 344 | with tf.variable_scope(name) as scope: 345 | W = tf.get_variable('kernel', 346 | shape=shape, 347 | initializer=tf.truncated_normal_initializer(stddev=0.01)) 348 | 349 | conv = tf.nn.conv2d(input_, W, [1, 1, 1, 1], padding=padding) 350 | 351 | # b = tf.Variable(tf.constant(0.0, shape=[shape[3]]), name='b') 352 | b = tf.get_variable('bias', shape=[shape[3]], initializer=tf.constant_initializer(0.0)) 353 | conv = tf.nn.bias_add(conv, b) 354 | conv = activation(conv) 355 | return conv 356 | def dense(input,shape,use_bias = True,name = None): 357 | 358 | with tf.variable_scope(name) as scope: 359 | weight = tf.get_variable('kernel',shape=shape,initializer=fc_kernel_initializer()) 360 | if use_bias: 361 | bias = tf.get_variable('bias',shape = [shape[1]],initializer=tf.constant_initializer(0.0)) 362 | out = tf.nn.bias_add(tf.matmul(input, weight), bias) 363 | else: 364 | out = tf.matmul(input, weight) 365 | 366 | return out 367 | -------------------------------------------------------------------------------- /capsal/combine.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | import tensorflow as tf 4 | import os 5 | restore_path = '/home/zhanglu/GBS/tensorflow/NEW_Model/Model_bs_dilated3/model.ckpt-1' 6 | regex_list = ['gru_cell/'] 7 | multiple = 10. 8 | os.environ['CUDA_VISIBLE_DEVICES'] = '0' 9 | def load_training_list(): 10 | 11 | 12 | with open('train_list.txt') as f: 13 | lines = f.read().splitlines() 14 | 15 | files1 = [] 16 | files2 = [] 17 | labels = [] 18 | sals = [] 19 | for line in lines: 20 | # labels.append('/home/zhanglu/Documents/dataset/DUTS-TR/DUTS-TR-Mask01-extend/%s' % line.replace('.jpg', '.png')) 21 | # files.append('/home/zhanglu/Documents/dataset/DUTS-TR/DUTS-TR-Image-extend/%s' % line) 22 | labels.append('/home/zhanglu/Mask_RCNN/train/gt01/%s' % line.replace('.jpg', '.png')) 23 | files1.append('/home/zhanglu/Mask_RCNN_new/logs/saliency20180610T2239/result1/%s' % line) 24 | files2.append('/home/zhanglu/Mask_RCNN_new/logs/saliency20180610T2239/result1_pixel/%s' % line) 25 | # sals.append('/home/zhanglu/Documents/dataset/DUTS-TR/contour-extend/%s' % line.replace('.jpg','.png')) 26 | return files1, files2, labels, lines 27 | 28 | 29 | def load_train_val_list(): 30 | 31 | files = [] 32 | labels = [] 33 | 34 | with open('train_label_list3.txt') as f: 35 | lines = f.read().splitlines() 36 | 37 | for line in lines: 38 | labels.append('dataset/MSRA-B/annotation/%s' % line) 39 | files.append('dataset/MSRA-B/image/%s' % line.replace('.png', '.jpg')) 40 | 41 | with open('dataset/MSRA-B/valid_cvpr2013.txt') as f: 42 | lines = f.read().splitlines() 43 | 44 | for line in lines: 45 | labels.append('dataset/MSRA-B/annotation/%s' % line) 46 | files.append('dataset/MSRA-B/image/%s' % line.replace('.png', '.jpg')) 47 | 48 | return files, labels 49 | 50 | def Conv_2d(input_, shape, stddev, name, padding='SAME'): 51 | with tf.variable_scope(name) as scope: 52 | W = tf.get_variable('W', 53 | shape=shape, 54 | initializer=tf.truncated_normal_initializer(stddev=stddev)) 55 | 56 | conv = tf.nn.conv2d(input_, W, [1, 1, 1, 1], padding=padding) 57 | 58 | # b = tf.Variable(tf.constant(0.0, shape=[shape[3]]), name='b') 59 | b = tf.get_variable('b', shape=[shape[3]], initializer=tf.constant_initializer(0.0)) 60 | conv = tf.nn.bias_add(conv, b) 61 | 62 | return conv 63 | 64 | if __name__ == "__main__": 65 | 66 | 67 | # model.build_model() 68 | opt = tf.train.AdamOptimizer(learning_rate=1e-4) 69 | with tf.variable_scope(tf.get_variable_scope()): 70 | 71 | input1 = tf.placeholder(np.float32,[1,512,512,1],'sal1') 72 | input2 = tf.placeholder(np.float32, [1, 512, 512, 1], 'sal2') 73 | label_holder = tf.placeholder(np.float32,[1,512,512,1],'label') 74 | # input1 = tf.log(input1 / (1.0 - input1)) 75 | # input2 = tf.log(input2 / (1.0 - input2)) 76 | x = tf.concat([input1,input2],3) 77 | x = Conv_2d(x,[1,1,2,1],0.01,name='combination') 78 | output = tf.reshape(x,[-1,1]) 79 | 80 | label = tf.reshape(label_holder,[-1,1]) 81 | _epsilon = tf.convert_to_tensor(1e-6, output.dtype.base_dtype) 82 | 83 | output = tf.clip_by_value(output, _epsilon, 1. - _epsilon) 84 | Loss_Mean = tf.reduce_mean(- tf.reduce_sum(label * tf.log(output), 85 | len(output.get_shape()) - 1)) 86 | # Loss_Mean = tf.reduce_mean(tf.losses.absolute_difference(labels=label,predictions=x)) 87 | 88 | # Loss_Mean = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=x, labels=label)) 89 | # output = tf.nn.sigmoid(x) >0.5 90 | # Loss_Mean= tf.reduce_mean(-label* tf.log(x)-(1-label)*tf.log(1-x)) 91 | # # Loss_Mean = tf.reduce_mean(tf.keras.losses.binary_crossentropy(y_true=label,y_pred=x)) 92 | # output = x 93 | # model = NM.Model() 94 | # model.bs_and_dilation() 95 | max_grad_norm = 1 96 | tvars = tf.trainable_variables() 97 | grads = tf.gradients(Loss_Mean, tvars) 98 | # grads_and_vars = variables_helper.multiply_gradients_matching_regex(zip(grads, tvars), regex_list, multiple) 99 | # mul_grad = [pair[0] for pair in grads_and_vars] 100 | # mul_vars = [pair[1] for pair in grads_and_vars] 101 | # clip_grads, _ = tf.clip_by_global_norm(grads, max_grad_norm) 102 | 103 | train_op = opt.apply_gradients(zip(grads, tvars)) 104 | sess = tf.Session() 105 | sess.run(tf.global_variables_initializer()) 106 | saver = tf.train.Saver(max_to_keep=100) 107 | # 108 | # variables_to_restore = variables_helper.get_variables_available_in_checkpoint(tvars, restore_path) 109 | # restorer = tf.train.Saver(variables_to_restore) 110 | # restorer.restore(sess, os.path.join(restore_path)) 111 | # ckpt = tf.train.get_checkpoint_state('val/') 112 | # saver.restore(sess, ckpt.model_checkpoint_path) 113 | # # # train_list, label_list = load_train_val_list() 114 | train1_list, train2_list, label_list, lines= load_training_list() 115 | n_epochs = 20 116 | img_size = 512 117 | label_size = 512 118 | if not os.path.isdir('val'): 119 | os.mkdir('val') 120 | 121 | for i in range(1,n_epochs): 122 | whole_loss = 0.0 123 | whole_acc = 0.0 124 | count = 0 125 | 126 | for f_img1, f_img2, f_label, line in zip(train1_list, train2_list, label_list, lines): 127 | 128 | img1 = cv2.imread(f_img1)[:, :, 0].astype(np.float32) 129 | img_shape = img1.shape 130 | img1 = cv2.resize(img1, (img_size, img_size)) 131 | img1 = img1.reshape((1, img_size, img_size, 1)) 132 | img1 = img1 / 255. 133 | 134 | 135 | img2 = cv2.imread(f_img2)[:, :, 0].astype(np.float32) 136 | img2 = cv2.resize(img2, (img_size, img_size)) 137 | img2 = img2.reshape((1, img_size, img_size, 1)) 138 | img2 = img2 /255. 139 | 140 | label = cv2.imread(f_label)[:, :, 0].astype(np.float32) 141 | label = cv2.resize(label, (label_size, label_size)) 142 | label = label.reshape((1,512,512,1)) 143 | 144 | # label_c = cv2.imread(sals)[:, :, 0].astype(np.float32) 145 | # label_c = cv2.resize(label_c, (label_size, label_size)) 146 | # label_c = np.reshape(label_c, [-1, 1]) 147 | 148 | _, loss, out = sess.run([train_op,Loss_Mean,output], 149 | feed_dict={input1: img1, 150 | input2: img2, 151 | label_holder: label 152 | }) 153 | 154 | whole_loss += loss 155 | 156 | count = count + 1 157 | 158 | 159 | 160 | if count % 200 == 0: 161 | out = out.astype(np.float32) 162 | out = np.reshape(out, [img_size, img_size]) 163 | out = cv2.resize(out, (img_shape[1], img_shape[0])) 164 | cv2.imwrite('combine/' + line, out * 255) 165 | 166 | print "Loss of %d images: %f" % (count, (whole_loss/count)) 167 | 168 | 169 | 170 | 171 | print "Epoch %d: %f" % (i, (whole_loss/len(train1_list))) 172 | 173 | # os.mkdir('Model2') 174 | saver.save(sess, 'val/model.ckpt', global_step=i) -------------------------------------------------------------------------------- /capsal/compare.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np 3 | 4 | from base_model import BaseModel 5 | 6 | class CaptionGenerator(BaseModel): 7 | def build_rnn(self): 8 | """ Build the RNN. """ 9 | print("Building the RNN...") 10 | config = self.config 11 | 12 | # Setup the placeholders 13 | if self.is_train: 14 | contexts = self.conv_feats 15 | sentences = tf.placeholder( 16 | dtype = tf.int32, 17 | shape = [config.batch_size, config.max_caption_length]) 18 | masks = tf.placeholder( 19 | dtype = tf.float32, 20 | shape = [config.batch_size, config.max_caption_length]) 21 | else: 22 | contexts = tf.placeholder( 23 | dtype = tf.float32, 24 | shape = [config.batch_size, self.num_ctx, self.dim_ctx]) 25 | last_memory = tf.placeholder( 26 | dtype = tf.float32, 27 | shape = [config.batch_size, config.num_lstm_units]) 28 | last_output = tf.placeholder( 29 | dtype = tf.float32, 30 | shape = [config.batch_size, config.num_lstm_units]) 31 | last_word = tf.placeholder( 32 | dtype = tf.int32, 33 | shape = [config.batch_size]) 34 | 35 | # Setup the word embedding 36 | with tf.variable_scope("word_embedding"): 37 | embedding_matrix = tf.get_variable( 38 | name = 'weights', 39 | shape = [config.vocabulary_size, config.dim_embedding], 40 | initializer = self.nn.fc_kernel_initializer, 41 | regularizer = self.nn.fc_kernel_regularizer, 42 | trainable = self.is_train) 43 | 44 | # Setup the LSTM 45 | lstm = tf.nn.rnn_cell.LSTMCell( 46 | config.num_lstm_units, 47 | initializer = self.nn.fc_kernel_initializer) 48 | if self.is_train: 49 | lstm = tf.nn.rnn_cell.DropoutWrapper( 50 | lstm, 51 | input_keep_prob = 1.0-config.lstm_drop_rate, 52 | output_keep_prob = 1.0-config.lstm_drop_rate, 53 | state_keep_prob = 1.0-config.lstm_drop_rate) 54 | 55 | # Initialize the LSTM using the mean context 56 | with tf.variable_scope("initialize"): 57 | context_mean = tf.reduce_mean(self.conv_feats, axis = 1) 58 | initial_memory, initial_output = self.initialize(context_mean) 59 | initial_state = initial_memory, initial_output 60 | 61 | # Prepare to run 62 | predictions = [] 63 | if self.is_train: 64 | alphas = [] 65 | cross_entropies = [] 66 | predictions_correct = [] 67 | num_steps = config.max_caption_length 68 | last_output = initial_output 69 | last_memory = initial_memory 70 | last_word = tf.zeros([config.batch_size], tf.int32) 71 | else: 72 | num_steps = 1 73 | last_state = last_memory, last_output 74 | 75 | # Generate the words one by one 76 | for idx in range(num_steps): 77 | # Attention mechanism 78 | with tf.variable_scope("attend"): 79 | alpha = self.attend(contexts, last_output) 80 | context = tf.reduce_sum(contexts*tf.expand_dims(alpha, 2), 81 | axis = 1) 82 | if self.is_train: 83 | tiled_masks = tf.tile(tf.expand_dims(masks[:, idx], 1), 84 | [1, self.num_ctx]) 85 | masked_alpha = alpha * tiled_masks 86 | alphas.append(tf.reshape(masked_alpha, [-1])) 87 | 88 | # Embed the last word 89 | with tf.variable_scope("word_embedding"): 90 | word_embed = tf.nn.embedding_lookup(embedding_matrix, 91 | last_word) 92 | # Apply the LSTM 93 | with tf.variable_scope("lstm"): 94 | current_input = tf.concat([context, word_embed], 1) 95 | output, state = lstm(current_input, last_state) 96 | memory, _ = state 97 | 98 | # Decode the expanded output of LSTM into a word 99 | with tf.variable_scope("decode"): 100 | expanded_output = tf.concat([output, 101 | context, 102 | word_embed], 103 | axis = 1) 104 | logits = self.decode(expanded_output) 105 | probs = tf.nn.softmax(logits) 106 | prediction = tf.argmax(logits, 1) 107 | predictions.append(prediction) 108 | 109 | # Compute the loss for this step, if necessary 110 | if self.is_train: 111 | cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits( 112 | labels = sentences[:, idx], 113 | logits = logits) 114 | masked_cross_entropy = cross_entropy * masks[:, idx] 115 | cross_entropies.append(masked_cross_entropy) 116 | 117 | ground_truth = tf.cast(sentences[:, idx], tf.int64) 118 | prediction_correct = tf.where( 119 | tf.equal(prediction, ground_truth), 120 | tf.cast(masks[:, idx], tf.float32), 121 | tf.cast(tf.zeros_like(prediction), tf.float32)) 122 | predictions_correct.append(prediction_correct) 123 | 124 | last_output = output 125 | last_memory = memory 126 | last_state = state 127 | last_word = sentences[:, idx] 128 | 129 | tf.get_variable_scope().reuse_variables() 130 | 131 | # Compute the final loss, if necessary 132 | if self.is_train: 133 | cross_entropies = tf.stack(cross_entropies, axis = 1) 134 | cross_entropy_loss = tf.reduce_sum(cross_entropies) \ 135 | / tf.reduce_sum(masks) 136 | 137 | alphas = tf.stack(alphas, axis = 1) 138 | alphas = tf.reshape(alphas, [config.batch_size, self.num_ctx, -1]) 139 | attentions = tf.reduce_sum(alphas, axis = 2) 140 | diffs = tf.ones_like(attentions) - attentions 141 | attention_loss = config.attention_loss_factor \ 142 | * tf.nn.l2_loss(diffs) \ 143 | / (config.batch_size * self.num_ctx) 144 | 145 | reg_loss = tf.losses.get_regularization_loss() 146 | 147 | total_loss = cross_entropy_loss + attention_loss + reg_loss 148 | 149 | predictions_correct = tf.stack(predictions_correct, axis = 1) 150 | accuracy = tf.reduce_sum(predictions_correct) \ 151 | / tf.reduce_sum(masks) 152 | 153 | self.contexts = contexts 154 | if self.is_train: 155 | self.sentences = sentences 156 | self.masks = masks 157 | self.total_loss = total_loss 158 | self.cross_entropy_loss = cross_entropy_loss 159 | self.attention_loss = attention_loss 160 | self.reg_loss = reg_loss 161 | self.accuracy = accuracy 162 | self.attentions = attentions 163 | else: 164 | self.initial_memory = initial_memory 165 | self.initial_output = initial_output 166 | self.last_memory = last_memory 167 | self.last_output = last_output 168 | self.last_word = last_word 169 | self.memory = memory 170 | self.output = output 171 | self.probs = probs 172 | 173 | print("RNN built.") 174 | 175 | def initialize(self, context_mean): 176 | """ Initialize the LSTM using the mean context. """ 177 | config = self.config 178 | context_mean = self.nn.dropout(context_mean) 179 | if config.num_initalize_layers == 1: 180 | # use 1 fc layer to initialize 181 | memory = self.nn.dense(context_mean, 182 | units = config.num_lstm_units, 183 | activation = None, 184 | name = 'fc_a') 185 | output = self.nn.dense(context_mean, 186 | units = config.num_lstm_units, 187 | activation = None, 188 | name = 'fc_b') 189 | else: 190 | # use 2 fc layers to initialize 191 | temp1 = self.nn.dense(context_mean, 192 | units = config.dim_initalize_layer, 193 | activation = tf.tanh, 194 | name = 'fc_a1') 195 | temp1 = self.nn.dropout(temp1) 196 | memory = self.nn.dense(temp1, 197 | units = config.num_lstm_units, 198 | activation = None, 199 | name = 'fc_a2') 200 | 201 | temp2 = self.nn.dense(context_mean, 202 | units = config.dim_initalize_layer, 203 | activation = tf.tanh, 204 | name = 'fc_b1') 205 | temp2 = self.nn.dropout(temp2) 206 | output = self.nn.dense(temp2, 207 | units = config.num_lstm_units, 208 | activation = None, 209 | name = 'fc_b2') 210 | return memory, output 211 | 212 | def attend(self, contexts, output): 213 | """ Attention Mechanism. """ 214 | config = self.config 215 | reshaped_contexts = tf.reshape(contexts, [-1, self.dim_ctx]) 216 | reshaped_contexts = self.nn.dropout(reshaped_contexts) 217 | output = self.nn.dropout(output) 218 | if config.num_attend_layers == 1: 219 | # use 1 fc layer to attend 220 | logits1 = self.nn.dense(reshaped_contexts, 221 | units = 1, 222 | activation = None, 223 | use_bias = False, 224 | name = 'fc_a') 225 | logits1 = tf.reshape(logits1, [-1, self.num_ctx]) 226 | logits2 = self.nn.dense(output, 227 | units = self.num_ctx, 228 | activation = None, 229 | use_bias = False, 230 | name = 'fc_b') 231 | logits = logits1 + logits2 232 | else: 233 | # use 2 fc layers to attend 234 | temp1 = self.nn.dense(reshaped_contexts, 235 | units = config.dim_attend_layer, 236 | activation = tf.tanh, 237 | name = 'fc_1a') 238 | temp2 = self.nn.dense(output, 239 | units = config.dim_attend_layer, 240 | activation = tf.tanh, 241 | name = 'fc_1b') 242 | temp2 = tf.tile(tf.expand_dims(temp2, 1), [1, self.num_ctx, 1]) 243 | temp2 = tf.reshape(temp2, [-1, config.dim_attend_layer]) 244 | temp = temp1 + temp2 245 | temp = self.nn.dropout(temp) 246 | logits = self.nn.dense(temp, 247 | units = 1, 248 | activation = None, 249 | use_bias = False, 250 | name = 'fc_2') 251 | logits = tf.reshape(logits, [-1, self.num_ctx]) 252 | alpha = tf.nn.softmax(logits) 253 | return alpha 254 | 255 | def decode(self, expanded_output): 256 | """ Decode the expanded output of the LSTM into a word. """ 257 | config = self.config 258 | expanded_output = self.nn.dropout(expanded_output) 259 | if config.num_decode_layers == 1: 260 | # use 1 fc layer to decode 261 | logits = self.nn.dense(expanded_output, 262 | units = config.vocabulary_size, 263 | activation = None, 264 | name = 'fc') 265 | else: 266 | # use 2 fc layers to decode 267 | temp = self.nn.dense(expanded_output, 268 | units = config.dim_decode_layer, 269 | activation = tf.tanh, 270 | name = 'fc_1') 271 | temp = self.nn.dropout(temp) 272 | logits = self.nn.dense(temp, 273 | units = config.vocabulary_size, 274 | activation = None, 275 | name = 'fc_2') 276 | return logits 277 | 278 | def build_optimizer(self): 279 | """ Setup the optimizer and training operation. """ 280 | config = self.config 281 | 282 | learning_rate = tf.constant(config.initial_learning_rate) 283 | if config.learning_rate_decay_factor < 1.0: 284 | def _learning_rate_decay_fn(learning_rate, global_step): 285 | return tf.train.exponential_decay( 286 | learning_rate, 287 | global_step, 288 | decay_steps = config.num_steps_per_decay, 289 | decay_rate = config.learning_rate_decay_factor, 290 | staircase = True) 291 | learning_rate_decay_fn = _learning_rate_decay_fn 292 | else: 293 | learning_rate_decay_fn = None 294 | 295 | with tf.variable_scope('optimizer', reuse = tf.AUTO_REUSE): 296 | if config.optimizer == 'Adam': 297 | optimizer = tf.train.AdamOptimizer( 298 | learning_rate = config.initial_learning_rate, 299 | beta1 = config.beta1, 300 | beta2 = config.beta2, 301 | epsilon = config.epsilon 302 | ) 303 | elif config.optimizer == 'RMSProp': 304 | optimizer = tf.train.RMSPropOptimizer( 305 | learning_rate = config.initial_learning_rate, 306 | decay = config.decay, 307 | momentum = config.momentum, 308 | centered = config.centered, 309 | epsilon = config.epsilon 310 | ) 311 | elif config.optimizer == 'Momentum': 312 | optimizer = tf.train.MomentumOptimizer( 313 | learning_rate = config.initial_learning_rate, 314 | momentum = config.momentum, 315 | use_nesterov = config.use_nesterov 316 | ) 317 | else: 318 | optimizer = tf.train.GradientDescentOptimizer( 319 | learning_rate = config.initial_learning_rate 320 | ) 321 | 322 | opt_op = tf.contrib.layers.optimize_loss( 323 | loss = self.total_loss, 324 | global_step = self.global_step, 325 | learning_rate = learning_rate, 326 | optimizer = optimizer, 327 | clip_gradients = config.clip_gradients, 328 | learning_rate_decay_fn = learning_rate_decay_fn) 329 | 330 | self.opt_op = opt_op 331 | 332 | def build_summary(self): 333 | """ Build the summary (for TensorBoard visualization). """ 334 | with tf.name_scope("variables"): 335 | for var in tf.trainable_variables(): 336 | with tf.name_scope(var.name[:var.name.find(":")]): 337 | self.variable_summary(var) 338 | 339 | with tf.name_scope("metrics"): 340 | tf.summary.scalar("cross_entropy_loss", self.cross_entropy_loss) 341 | tf.summary.scalar("attention_loss", self.attention_loss) 342 | tf.summary.scalar("reg_loss", self.reg_loss) 343 | tf.summary.scalar("total_loss", self.total_loss) 344 | tf.summary.scalar("accuracy", self.accuracy) 345 | 346 | with tf.name_scope("attentions"): 347 | self.variable_summary(self.attentions) 348 | 349 | self.summary = tf.summary.merge_all() 350 | 351 | def variable_summary(self, var): 352 | """ Build the summary for a variable. """ 353 | mean = tf.reduce_mean(var) 354 | tf.summary.scalar('mean', mean) 355 | stddev = tf.sqrt(tf.reduce_mean(tf.square(var - mean))) 356 | tf.summary.scalar('stddev', stddev) 357 | tf.summary.scalar('max', tf.reduce_max(var)) 358 | tf.summary.scalar('min', tf.reduce_min(var)) 359 | tf.summary.histogram('histogram', var) 360 | -------------------------------------------------------------------------------- /capsal/config.py: -------------------------------------------------------------------------------- 1 | """ 2 | Mask R-CNN 3 | Base Configurations class. 4 | 5 | Copyright (c) 2017 Matterport, Inc. 6 | Licensed under the MIT License (see LICENSE for details) 7 | Written by Waleed Abdulla 8 | """ 9 | 10 | import math 11 | import numpy as np 12 | 13 | 14 | # Base Configuration Class 15 | # Don't use this class directly. Instead, sub-class it and override 16 | # the configurations you need to change. 17 | 18 | class Config(object): 19 | """Base configuration class. For custom configurations, create a 20 | sub-class that inherits from this one and override properties 21 | that need to be changed. 22 | """ 23 | # Name the configurations. For example, 'COCO', 'Experiment 3', ...etc. 24 | # Useful if your code needs to do things differently depending on which 25 | # experiment is running. 26 | NAME = None # Override in sub-classes 27 | 28 | # NUMBER OF GPUs to use. For CPU training, use 1 29 | GPU_COUNT = 1 30 | 31 | # Number of images to train with on each GPU. A 12GB GPU can typically 32 | # handle 2 images of 1024x1024px. 33 | # Adjust based on your GPU memory and image sizes. Use the highest 34 | # number that your GPU can handle for best performance. 35 | IMAGES_PER_GPU = 2 36 | 37 | # Number of training steps per epoch 38 | # This doesn't need to match the size of the training set. Tensorboard 39 | # updates are saved at the end of each epoch, so setting this to a 40 | # smaller number means getting more frequent TensorBoard updates. 41 | # Validation stats are also calculated at each epoch end and they 42 | # might take a while, so don't set this too small to avoid spending 43 | # a lot of time on validation stats. 44 | STEPS_PER_EPOCH = 1000 45 | 46 | # Number of validation steps to run at the end of every training epoch. 47 | # A bigger number improves accuracy of validation stats, but slows 48 | # down the training. 49 | VALIDATION_STEPS = 50 50 | 51 | # Backbone network architecture 52 | # Supported values are: resnet50, resnet101 53 | BACKBONE = "resnet101" 54 | 55 | # The strides of each layer of the FPN Pyramid. These values 56 | # are based on a Resnet101 backbone. 57 | BACKBONE_STRIDES = [4, 8, 16, 32, 64] 58 | 59 | # Number of classification classes (including background) 60 | NUM_CLASSES = 1 # Override in sub-classes 61 | 62 | # Length of square anchor side in pixels 63 | RPN_ANCHOR_SCALES = (32, 64, 128, 256, 512) 64 | # RPN_ANCHOR_SCALES = (8, 16, 32, 64, 128) 65 | # Ratios of anchors at each cell (width/height) 66 | # A value of 1 represents a square anchor, and 0.5 is a wide anchor 67 | RPN_ANCHOR_RATIOS = [0.5, 1, 2] 68 | 69 | # Anchor stride 70 | # If 1 then anchors are created for each cell in the backbone feature map. 71 | # If 2, then anchors are created for every other cell, and so on. 72 | RPN_ANCHOR_STRIDE = 1 73 | 74 | # Non-max suppression threshold to filter RPN proposals. 75 | # You can increase this during training to generate more propsals. 76 | RPN_NMS_THRESHOLD = 0.7 77 | 78 | # How many anchors per image to use for RPN training 79 | RPN_TRAIN_ANCHORS_PER_IMAGE = 256 80 | 81 | # ROIs kept after non-maximum supression (training and inference) 82 | POST_NMS_ROIS_TRAINING = 2000 83 | POST_NMS_ROIS_INFERENCE = 1000 84 | 85 | # If enabled, resizes instance masks to a smaller size to reduce 86 | # memory load. Recommended when using high-resolution images. 87 | USE_MINI_MASK = True 88 | MINI_MASK_SHAPE = (56, 56) # (height, width) of the mini-mask 89 | PIXEL_MASK_SHAPE = 256 90 | # 91 | WORD_STEP = 15 92 | W_EMB_DIM = 300 93 | VOCAB_SIZE = 6951#6277# 94 | # Input image resizing 95 | # Generally, use the "square" resizing mode for training and inferencing 96 | # and it should work well in most cases. In this mode, images are scaled 97 | # up such that the small side is = IMAGE_MIN_DIM, but ensuring that the 98 | # scaling doesn't make the long side > IMAGE_MAX_DIM. Then the image is 99 | # padded with zeros to make it a square so multiple images can be put 100 | # in one batch. 101 | # Available resizing modes: 102 | # none: No resizing or padding. Return the image unchanged. 103 | # square: Resize and pad with zeros to get a square image 104 | # of size [max_dim, max_dim]. 105 | # pad64: Pads width and height with zeros to make them multiples of 64. 106 | # If IMAGE_MIN_DIM or IMAGE_MIN_SCALE are not None, then it scales 107 | # up before padding. IMAGE_MAX_DIM is ignored in this mode. 108 | # The multiple of 64 is needed to ensure smooth scaling of feature 109 | # maps up and down the 6 levels of the FPN pyramid (2**6=64). 110 | # crop: Picks random crops from the image. First, scales the image based 111 | # on IMAGE_MIN_DIM and IMAGE_MIN_SCALE, then picks a random crop of 112 | # size IMAGE_MIN_DIM x IMAGE_MIN_DIM. Can be used in training only. 113 | # IMAGE_MAX_DIM is not used in this mode. 114 | IMAGE_RESIZE_MODE = "square" 115 | IMAGE_MIN_DIM =800 116 | IMAGE_MAX_DIM = 1024 117 | # Minimum scaling ratio. Checked after MIN_IMAGE_DIM and can force further 118 | # up scaling. For example, if set to 2 then images are scaled up to double 119 | # the width and height, or more, even if MIN_IMAGE_DIM doesn't require it. 120 | # Howver, in 'square' mode, it can be overruled by IMAGE_MAX_DIM. 121 | IMAGE_MIN_SCALE = 0 122 | 123 | # Image mean (RGB) 124 | MEAN_PIXEL = np.array([123.7, 116.8, 103.9]) 125 | 126 | # Number of ROIs per image to feed to classifier/mask heads 127 | # The Mask RCNN paper uses 512 but often the RPN doesn't generate 128 | # enough positive proposals to fill this and keep a positive:negative 129 | # ratio of 1:3. You can increase the number of proposals by adjusting 130 | # the RPN NMS threshold. 131 | TRAIN_ROIS_PER_IMAGE = 100 132 | 133 | # Percent of positive ROIs used to train classifier/mask heads 134 | ROI_POSITIVE_RATIO = 0.33 135 | 136 | # Pooled ROIs 137 | POOL_SIZE = 7 138 | MASK_POOL_SIZE = 14 139 | 140 | # Shape of output mask 141 | # To change this you also need to change the neural network mask branch 142 | MASK_SHAPE = [28, 28] 143 | 144 | # Maximum number of ground truth instances to use in one image 145 | MAX_GT_INSTANCES = 100 146 | 147 | # Bounding box refinement standard deviation for RPN and final detections. 148 | RPN_BBOX_STD_DEV = np.array([0.1, 0.1, 0.2, 0.2]) 149 | BBOX_STD_DEV = np.array([0.1, 0.1, 0.2, 0.2]) 150 | 151 | # Max number of final detections 152 | DETECTION_MAX_INSTANCES = 100 153 | 154 | # Minimum probability value to accept a detected instance 155 | # ROIs below this threshold are skipped 156 | DETECTION_MIN_CONFIDENCE = 0.7 157 | 158 | # Non-maximum suppression threshold for detection 159 | DETECTION_NMS_THRESHOLD = 0.3 160 | 161 | # Learning rate and momentum 162 | # The Mask RCNN paper uses lr=0.02, but on TensorFlow it causes 163 | # weights to explode. Likely due to differences in optimzer 164 | # implementation. 165 | LEARNING_RATE = 0.001#0.001 166 | LEARNING_MOMENTUM = 0.9 167 | 168 | # Weight decay regularization 169 | WEIGHT_DECAY = 0.0001 170 | 171 | # Loss weights for more precise optimization. 172 | # Can be used for R-CNN training setup. 173 | LOSS_WEIGHTS = { 174 | "rpn_class_loss": 1., 175 | "rpn_bbox_loss": 1., 176 | "mrcnn_class_loss": 1., 177 | "mrcnn_bbox_loss": 1., 178 | "mrcnn_mask_loss": 1., 179 | "mrcnn_pixel_loss": 1., 180 | "mrcnn_final_loss":1., 181 | "caption_loss":0.1 182 | } 183 | 184 | # Use RPN ROIs or externally generated ROIs for training 185 | # Keep this True for most situations. Set to False if you want to train 186 | # the head branches on ROI generated by code rather than the ROIs from 187 | # the RPN. For example, to debug the classifier head without having to 188 | # train the RPN. 189 | USE_RPN_ROIS = True 190 | 191 | # Train or freeze batch normalization layers 192 | # None: Train BN layers. This is the normal mode 193 | # False: Freeze BN layers. Good when using a small batch size 194 | # True: (don't use). Set layer in training mode even when inferencing 195 | TRAIN_BN = False # Defaulting to False since batch size is often small 196 | 197 | # Gradient norm clipping 198 | GRADIENT_CLIP_NORM = 5.0 199 | 200 | def __init__(self): 201 | """Set values of computed attributes.""" 202 | # Effective batch size 203 | self.BATCH_SIZE = self.IMAGES_PER_GPU * self.GPU_COUNT 204 | 205 | # Input image size 206 | if self.IMAGE_RESIZE_MODE == "crop": 207 | self.IMAGE_SHAPE = np.array([self.IMAGE_MIN_DIM, self.IMAGE_MIN_DIM, 3]) 208 | else: 209 | self.IMAGE_SHAPE = np.array([self.IMAGE_MAX_DIM, self.IMAGE_MAX_DIM, 3]) 210 | 211 | # Image meta data length 212 | # See compose_image_meta() for details 213 | self.IMAGE_META_SIZE = 1 + 3 + 3 + 4 + 1 + self.NUM_CLASSES 214 | 215 | def display(self): 216 | """Display Configuration values.""" 217 | print("\nConfigurations:") 218 | for a in dir(self): 219 | if not a.startswith("__") and not callable(getattr(self, a)): 220 | print("{:30} {}".format(a, getattr(self, a))) 221 | print("\n") 222 | -------------------------------------------------------------------------------- /capsal/eval_cap.py: -------------------------------------------------------------------------------- 1 | from mrcnn.coco.pycocoevalcap.tokenizer.ptbtokenizer import PTBTokenizer 2 | from mrcnn.coco.pycocoevalcap.bleu.bleu import Bleu 3 | from mrcnn.coco.pycocoevalcap.meteor.meteor import Meteor 4 | from mrcnn.coco.pycocoevalcap.rouge.rouge import Rouge 5 | from mrcnn.coco.pycocoevalcap.cider.cider import Cider 6 | # Down load the coco evaluation tool at https://pan.baidu.com/s/1mRN_qV7X8ZLUeuARQY3EwQ 7 | 8 | class COCOEvalCap: 9 | def __init__(self, coco, cocoRes): 10 | self.evalImgs = [] 11 | self.eval = {} 12 | self.imgToEval = {} 13 | self.coco = coco 14 | self.cocoRes = cocoRes 15 | 16 | 17 | def evaluate(self,imgIds): 18 | 19 | # imgIds = self.coco.getImgIds() 20 | gts = {} 21 | res = {} 22 | for imgId in imgIds: 23 | gts[imgId] = [k.encode('utf-8') for k in self.coco[str(imgId)]] 24 | res[imgId] = self.cocoRes[imgId] 25 | # imgIds = self.params['image_id'] 26 | # ================================================= 27 | # Set up scorers 28 | # ================================================= 29 | print 'tokenization...' 30 | # tokenizer = PTBTokenizer() 31 | # 32 | # res = tokenizer.tokenize(res) 33 | 34 | # ================================================= 35 | # Set up scorers 36 | # ================================================= 37 | print 'setting up scorers...' 38 | scorers = [ 39 | (Bleu(4), ["Bleu_1", "Bleu_2", "Bleu_3", "Bleu_4"]), 40 | (Meteor(),"METEOR"), 41 | (Rouge(), "ROUGE_L"), 42 | (Cider(), "CIDEr") 43 | ] 44 | 45 | # ================================================= 46 | # Compute scores 47 | # ================================================= 48 | for scorer, method in scorers: 49 | print 'computing %s score...'%(scorer.method()) 50 | score, scores = scorer.compute_score(gts, res) 51 | if type(method) == list: 52 | for sc, scs, m in zip(score, scores, method): 53 | self.setEval(sc, m) 54 | self.setImgToEvalImgs(scs, gts.keys(), m) 55 | print "%s: %0.3f"%(m, sc) 56 | else: 57 | self.setEval(score, method) 58 | self.setImgToEvalImgs(scores, gts.keys(), method) 59 | print "%s: %0.3f"%(method, score) 60 | self.setEvalImgs() 61 | 62 | def setEval(self, score, method): 63 | self.eval[method] = score 64 | 65 | def setImgToEvalImgs(self, scores, imgIds, method): 66 | for imgId, score in zip(imgIds, scores): 67 | if not imgId in self.imgToEval: 68 | self.imgToEval[imgId] = {} 69 | self.imgToEval[imgId]["image_id"] = imgId 70 | self.imgToEval[imgId][method] = score 71 | 72 | def setEvalImgs(self): 73 | self.evalImgs = [eval for imgId, eval in self.imgToEval.items()] 74 | -------------------------------------------------------------------------------- /capsal/nn.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import tensorflow.contrib.layers as layers 3 | 4 | class NN(object): 5 | def __init__(self): 6 | self.config = Config 7 | self.is_train = False 8 | self.train_cnn = False 9 | self.prepare() 10 | 11 | def prepare(self): 12 | """ Setup the weight initalizers and regularizers. """ 13 | config = self.config 14 | 15 | self.conv_kernel_initializer = layers.xavier_initializer() 16 | 17 | if self.train_cnn and config.conv_kernel_regularizer_scale > 0: 18 | self.conv_kernel_regularizer = layers.l2_regularizer( 19 | scale = config.conv_kernel_regularizer_scale) 20 | else: 21 | self.conv_kernel_regularizer = None 22 | 23 | if self.train_cnn and config.conv_activity_regularizer_scale > 0: 24 | self.conv_activity_regularizer = layers.l1_regularizer( 25 | scale = config.conv_activity_regularizer_scale) 26 | else: 27 | self.conv_activity_regularizer = None 28 | 29 | self.fc_kernel_initializer = tf.random_uniform_initializer( 30 | minval = -config.fc_kernel_initializer_scale, 31 | maxval = config.fc_kernel_initializer_scale) 32 | 33 | if self.is_train and config.fc_kernel_regularizer_scale > 0: 34 | self.fc_kernel_regularizer = layers.l2_regularizer( 35 | scale = config.fc_kernel_regularizer_scale) 36 | else: 37 | self.fc_kernel_regularizer = None 38 | 39 | if self.is_train and config.fc_activity_regularizer_scale > 0: 40 | self.fc_activity_regularizer = layers.l1_regularizer( 41 | scale = config.fc_activity_regularizer_scale) 42 | else: 43 | self.fc_activity_regularizer = None 44 | 45 | def conv2d(self, 46 | inputs, 47 | filters, 48 | kernel_size = (3, 3), 49 | strides = (1, 1), 50 | activation = tf.nn.relu, 51 | use_bias = True, 52 | name = None): 53 | """ 2D Convolution layer. """ 54 | if activation is not None: 55 | activity_regularizer = self.conv_activity_regularizer 56 | else: 57 | activity_regularizer = None 58 | return tf.layers.conv2d( 59 | inputs = inputs, 60 | filters = filters, 61 | kernel_size = kernel_size, 62 | strides = strides, 63 | padding='same', 64 | activation = activation, 65 | use_bias = use_bias, 66 | trainable = self.train_cnn, 67 | kernel_initializer = self.conv_kernel_initializer, 68 | kernel_regularizer = self.conv_kernel_regularizer, 69 | activity_regularizer = activity_regularizer, 70 | name = name) 71 | 72 | def max_pool2d(self, 73 | inputs, 74 | pool_size = (2, 2), 75 | strides = (2, 2), 76 | name = None): 77 | """ 2D Max Pooling layer. """ 78 | return tf.layers.max_pooling2d( 79 | inputs = inputs, 80 | pool_size = pool_size, 81 | strides = strides, 82 | padding='same', 83 | name = name) 84 | 85 | def dense(self, 86 | inputs, 87 | units, 88 | activation = tf.tanh, 89 | use_bias = True, 90 | name = None): 91 | """ Fully-connected layer. """ 92 | if activation is not None: 93 | activity_regularizer = self.fc_activity_regularizer 94 | else: 95 | activity_regularizer = None 96 | return tf.layers.dense( 97 | inputs = inputs, 98 | units = units, 99 | activation = activation, 100 | use_bias = use_bias, 101 | trainable = self.is_train, 102 | kernel_initializer = self.fc_kernel_initializer, 103 | kernel_regularizer = self.fc_kernel_regularizer, 104 | activity_regularizer = activity_regularizer, 105 | name = name) 106 | 107 | def dropout(self, 108 | inputs, 109 | name = None): 110 | """ Dropout layer. """ 111 | return tf.layers.dropout( 112 | inputs = inputs, 113 | rate = self.config.fc_drop_rate, 114 | training = self.is_train) 115 | 116 | def batch_norm(self, 117 | inputs, 118 | name = None): 119 | """ Batch normalization layer. """ 120 | return tf.layers.batch_normalization( 121 | inputs = inputs, 122 | training = self.train_cnn, 123 | trainable = self.train_cnn, 124 | name = name 125 | ) 126 | class Config(object): 127 | """ Wrapper class for various (hyper)parameters. """ 128 | def __init__(self): 129 | # about the model architecture 130 | self.cnn = 'vgg16' # 'vgg16' or 'resnet50' 131 | self.max_caption_length = 15 132 | self.dim_embedding = 512 133 | self.num_lstm_units = 512 134 | self.num_initalize_layers = 2 # 1 or 2 135 | self.dim_initalize_layer = 512 136 | self.num_attend_layers = 2 # 1 or 2 137 | self.dim_attend_layer = 512 138 | self.num_decode_layers = 2 # 1 or 2 139 | self.dim_decode_layer = 1024 140 | 141 | # about the weight initialization and regularization 142 | self.fc_kernel_initializer_scale = 0.08 143 | self.fc_kernel_regularizer_scale = 1e-4 144 | self.fc_activity_regularizer_scale = 0.0 145 | self.conv_kernel_regularizer_scale = 1e-4 146 | self.conv_activity_regularizer_scale = 0.0 147 | self.fc_drop_rate = 0.5 148 | self.lstm_drop_rate = 0.3 149 | self.attention_loss_factor = 0.01 150 | 151 | # about the optimization 152 | self.num_epochs = 100 153 | self.batch_size = 1#3264 154 | self.optimizer = 'Adam' # 'Adam', 'RMSProp', 'Momentum' or 'SGD' 155 | self.initial_learning_rate = 0.0001#0.0001 156 | self.learning_rate_decay_factor = 1.0 157 | self.num_steps_per_decay = 100000 158 | self.clip_gradients = 5.0 159 | self.momentum = 0.0 160 | self.use_nesterov = True 161 | self.decay = 0.9 162 | self.centered = True 163 | self.beta1 = 0.9 164 | self.beta2 = 0.999 165 | self.epsilon = 1e-6 166 | 167 | # about the saver 168 | self.save_period = 1000 169 | self.save_dir = './models/' 170 | self.summary_dir = './summary/' 171 | 172 | # about the vocabulary 173 | self.vocabulary_file = './vocabulary.csv' 174 | self.vocabulary_size = 5000 175 | 176 | # about the training 177 | self.train_feature_dir = '/home/zhanglu/Mask_RCNN_new/logs/feat/' 178 | self.train_image_dir = '/home/zhanglu/Documents/Referring Image Segmentation/MSCOCO/2014/train2014/' 179 | self.train_caption_file = '/home/zhanglu/Documents/Referring Image Segmentation/MSCOCO/2014/annotations/captions_train2014.json' 180 | self.temp_annotation_file = './train/anns.csv' 181 | self.temp_data_file = './train/data.npy' 182 | 183 | # about the evaluation 184 | self.eval_feature_dir = '/home/zhanglu/Mask_RCNN_new/val_feat/' 185 | self.eval_image_dir = '/home/zhanglu/Documents/Referring Image Segmentation/MSCOCO/2014/val2014/' 186 | self.eval_caption_file = '/home/zhanglu/Documents/Referring Image Segmentation/MSCOCO/2014/annotations/captions_val2014.json' 187 | self.eval_result_dir = './val/results/' 188 | self.eval_result_file = './val/results.json' 189 | self.save_eval_result_as_image = False 190 | self.eval_my_image_dir = '/home/zhanglu/Mask_RCNN/val/val256/'#/home/zhanglu/Mask_RCNN/train/train256/ 191 | self.eval_my_save_dir = '/home/zhanglu/image_captioning/feat/train/' 192 | # about the testing 193 | self.test_image_dir = './test/images/' 194 | self.test_result_dir = './test/results/' 195 | self.test_result_file = './test/results.csv' 196 | -------------------------------------------------------------------------------- /capsal/parallel_model.py: -------------------------------------------------------------------------------- 1 | """ 2 | Mask R-CNN 3 | Multi-GPU Support for Keras. 4 | 5 | Copyright (c) 2017 Matterport, Inc. 6 | Licensed under the MIT License (see LICENSE for details) 7 | Written by Waleed Abdulla 8 | 9 | Ideas and a small code snippets from these sources: 10 | https://github.com/fchollet/keras/issues/2436 11 | https://medium.com/@kuza55/transparent-multi-gpu-training-on-tensorflow-with-keras-8b0016fd9012 12 | https://github.com/avolkov1/keras_experiments/blob/master/keras_exp/multigpu/ 13 | https://github.com/fchollet/keras/blob/master/keras/utils/training_utils.py 14 | """ 15 | 16 | import tensorflow as tf 17 | import keras.backend as K 18 | import keras.layers as KL 19 | import keras.models as KM 20 | 21 | 22 | class ParallelModel(KM.Model): 23 | """Subclasses the standard Keras Model and adds multi-GPU support. 24 | It works by creating a copy of the model on each GPU. Then it slices 25 | the inputs and sends a slice to each copy of the model, and then 26 | merges the outputs together and applies the loss on the combined 27 | outputs. 28 | """ 29 | 30 | def __init__(self, keras_model, gpu_count): 31 | """Class constructor. 32 | keras_model: The Keras model to parallelize 33 | gpu_count: Number of GPUs. Must be > 1 34 | """ 35 | self.inner_model = keras_model 36 | self.gpu_count = gpu_count 37 | merged_outputs = self.make_parallel() 38 | super(ParallelModel, self).__init__(inputs=self.inner_model.inputs, 39 | outputs=merged_outputs) 40 | 41 | def __getattribute__(self, attrname): 42 | """Redirect loading and saving methods to the inner model. That's where 43 | the weights are stored.""" 44 | if 'load' in attrname or 'save' in attrname: 45 | return getattr(self.inner_model, attrname) 46 | return super(ParallelModel, self).__getattribute__(attrname) 47 | 48 | def summary(self, *args, **kwargs): 49 | """Override summary() to display summaries of both, the wrapper 50 | and inner models.""" 51 | super(ParallelModel, self).summary(*args, **kwargs) 52 | self.inner_model.summary(*args, **kwargs) 53 | 54 | def make_parallel(self): 55 | """Creates a new wrapper model that consists of multiple replicas of 56 | the original model placed on different GPUs. 57 | """ 58 | # Slice inputs. Slice inputs on the CPU to avoid sending a copy 59 | # of the full inputs to all GPUs. Saves on bandwidth and memory. 60 | input_slices = {name: tf.split(x, self.gpu_count) 61 | for name, x in zip(self.inner_model.input_names, 62 | self.inner_model.inputs)} 63 | 64 | output_names = self.inner_model.output_names 65 | outputs_all = [] 66 | for i in range(len(self.inner_model.outputs)): 67 | outputs_all.append([]) 68 | 69 | # Run the model call() on each GPU to place the ops there 70 | for i in range(self.gpu_count): 71 | with tf.device('/gpu:%d' % i): 72 | with tf.name_scope('tower_%d' % i): 73 | # Run a slice of inputs through this replica 74 | zipped_inputs = zip(self.inner_model.input_names, 75 | self.inner_model.inputs) 76 | inputs = [ 77 | KL.Lambda(lambda s: input_slices[name][i], 78 | output_shape=lambda s: (None,) + s[1:])(tensor) 79 | for name, tensor in zipped_inputs] 80 | # Create the model replica and get the outputs 81 | outputs = self.inner_model(inputs) 82 | if not isinstance(outputs, list): 83 | outputs = [outputs] 84 | # Save the outputs for merging back together later 85 | for l, o in enumerate(outputs): 86 | outputs_all[l].append(o) 87 | 88 | # Merge outputs on CPU 89 | with tf.device('/cpu:0'): 90 | merged = [] 91 | for outputs, name in zip(outputs_all, output_names): 92 | # Concatenate or average outputs? 93 | # Outputs usually have a batch dimension and we concatenate 94 | # across it. If they don't, then the output is likely a loss 95 | # or a metric value that gets averaged across the batch. 96 | # Keras expects losses and metrics to be scalars. 97 | if K.int_shape(outputs[0]) == (): 98 | # Average 99 | m = KL.Lambda(lambda o: tf.add_n(o) / len(outputs), name=name)(outputs) 100 | else: 101 | # Concatenate 102 | m = KL.Concatenate(axis=0, name=name)(outputs) 103 | merged.append(m) 104 | return merged 105 | 106 | 107 | if __name__ == "__main__": 108 | # Testing code below. It creates a simple model to train on MNIST and 109 | # tries to run it on 2 GPUs. It saves the graph so it can be viewed 110 | # in TensorBoard. Run it as: 111 | # 112 | # python3 parallel_model.py 113 | 114 | import os 115 | import numpy as np 116 | import keras.optimizers 117 | from keras.datasets import mnist 118 | from keras.preprocessing.image import ImageDataGenerator 119 | 120 | GPU_COUNT = 2 121 | 122 | # Root directory of the project 123 | ROOT_DIR = os.path.abspath("../") 124 | 125 | # Directory to save logs and trained model 126 | MODEL_DIR = os.path.join(ROOT_DIR, "logs") 127 | 128 | def build_model(x_train, num_classes): 129 | # Reset default graph. Keras leaves old ops in the graph, 130 | # which are ignored for execution but clutter graph 131 | # visualization in TensorBoard. 132 | tf.reset_default_graph() 133 | 134 | inputs = KL.Input(shape=x_train.shape[1:], name="input_image") 135 | x = KL.Conv2D(32, (3, 3), activation='relu', padding="same", 136 | name="conv1")(inputs) 137 | x = KL.Conv2D(64, (3, 3), activation='relu', padding="same", 138 | name="conv2")(x) 139 | x = KL.MaxPooling2D(pool_size=(2, 2), name="pool1")(x) 140 | x = KL.Flatten(name="flat1")(x) 141 | x = KL.Dense(128, activation='relu', name="dense1")(x) 142 | x = KL.Dense(num_classes, activation='softmax', name="dense2")(x) 143 | 144 | return KM.Model(inputs, x, "digit_classifier_model") 145 | 146 | # Load MNIST Data 147 | (x_train, y_train), (x_test, y_test) = mnist.load_data() 148 | x_train = np.expand_dims(x_train, -1).astype('float32') / 255 149 | x_test = np.expand_dims(x_test, -1).astype('float32') / 255 150 | 151 | print('x_train shape:', x_train.shape) 152 | print('x_test shape:', x_test.shape) 153 | 154 | # Build data generator and model 155 | datagen = ImageDataGenerator() 156 | model = build_model(x_train, 10) 157 | 158 | # Add multi-GPU support. 159 | model = ParallelModel(model, GPU_COUNT) 160 | 161 | optimizer = keras.optimizers.SGD(lr=0.01, momentum=0.9, clipnorm=5.0) 162 | 163 | model.compile(loss='sparse_categorical_crossentropy', 164 | optimizer=optimizer, metrics=['accuracy']) 165 | 166 | model.summary() 167 | 168 | # Train 169 | model.fit_generator( 170 | datagen.flow(x_train, y_train, batch_size=64), 171 | steps_per_epoch=50, epochs=10, verbose=1, 172 | validation_data=(x_test, y_test), 173 | callbacks=[keras.callbacks.TensorBoard(log_dir=MODEL_DIR, 174 | write_graph=True)] 175 | ) 176 | -------------------------------------------------------------------------------- /capsal/tfnpytokeras.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | from mrcnn.vocabulary import Vocabulary 4 | model_dir = '/home/zhanglu/image_captioning/models/332999.npy' 5 | data_dict = np.load(model_dir).item() 6 | new_dict = {} 7 | keras_indices_order = np.argsort([0,2,1,3]) 8 | for k, v in data_dict.items(): 9 | if 'optimizer' not in k: 10 | if 'lstm' in k and 'kernel' in k: 11 | i = v[:,:512] 12 | j = v[:,512:1024] 13 | c = v[:,1024:1536] 14 | o = v[:,1536:2048] 15 | new_v = np.concatenate([i,c,j,o],axis=1) 16 | kernel = new_v[:1024,:] 17 | recurrent_kernel = new_v[1024:,:] 18 | new_k = 'gcap_lstm/kernel:0' 19 | new_k2 = 'gcap_lstm/recurrent_kernel:0' 20 | new_dict[new_k]=kernel 21 | new_dict[new_k2] = recurrent_kernel 22 | elif 'lstm' in k and 'bias' in k: 23 | i = v[ :512] 24 | j = v[512:1024] 25 | c = v[1024:1536] 26 | o = v[1536:2048] 27 | new_v = np.concatenate([i, c, j, o], axis=0) 28 | new_k = 'gcap_lstm/bias:0' 29 | new_dict[new_k] = new_v 30 | elif 'embedding' in k: 31 | new_k = 'gcap_embedding/embeddings:0' 32 | new_dict[new_k] = v 33 | elif 'attend' in k: 34 | new_k = k.replace('attend/','gcap_attend_') 35 | new_dict[new_k] = v 36 | elif 'initialize' in k: 37 | new_k = k.replace('initialize/', 'gcap_initialize_') 38 | new_dict[new_k] = v 39 | elif 'decode' in k: 40 | new_k = k.replace('decode/', 'gcap_decode_') 41 | new_dict[new_k] = v 42 | elif 'down_imagefeature' in k: 43 | new_k = k.replace('down_imagefeature', 'gcap_down_imagefeature') 44 | new_dict[new_k] = v 45 | np.save('keras_caption2.npy',new_dict) 46 | # import os 47 | # import sys 48 | # import random 49 | # import math 50 | # import cv2 51 | # import numpy as np 52 | # import skimage.io 53 | # import matplotlib 54 | # import matplotlib.pyplot as plt 55 | # import pickle 56 | # import time 57 | # import utils 58 | # import scipy.io as scio 59 | # import json 60 | # from mrcnn.vocabulary import Vocabulary 61 | # def load_coco_data(data_path='/home/zhanglu/Mask_RCNN/train/train', split='train'): 62 | # data_path = os.path.join(data_path, split) 63 | # start_t = time.time() 64 | # data = {} 65 | # 66 | # # data['features'] = hickle.load(os.path.join(data_path, '%s.features.hkl' %split)) 67 | # with open(os.path.join(data_path, '%s.file.names.pkl' % split), 'rb') as f: 68 | # data['file_names'] = pickle.load(f) 69 | # with open(os.path.join(data_path, '%s.captions.pkl' % split), 'rb') as f: 70 | # data['captions'] = pickle.load(f) 71 | # with open(os.path.join(data_path, '%s.annotations.pkl' % split), 'rb') as f: 72 | # data['annotations'] = pickle.load(f) 73 | # with open(os.path.join(data_path, '%s.image.idxs.pkl' % split), 'rb') as f: 74 | # data['image_idxs'] = pickle.load(f) 75 | # 76 | # if split == 'train': 77 | # with open(os.path.join(data_path, 'word_to_idx.pkl'), 'rb') as f: 78 | # data['word_to_idx'] = pickle.load(f) 79 | # 80 | # for k, v in data.iteritems(): 81 | # if type(v) == np.ndarray: 82 | # print k, type(v), v.shape, v.dtype 83 | # else: 84 | # print k, type(v), len(v) 85 | # end_t = time.time() 86 | # print "Elapse time: %.2f" % (end_t - start_t) 87 | # return data 88 | # def load_training_sample(lines): 89 | # 90 | # 91 | # 92 | # 93 | # files = [] 94 | # labels = [] 95 | # sals = [] 96 | # for line in lines: 97 | # # labels.append('/home/zhanglu/Documents/dataset/DUTS-TR/DUTS-TR-Mask01-extend/%s' % line.replace('.jpg', '.png')) 98 | # # files.append('/home/zhanglu/Documents/dataset/DUTS-TR/DUTS-TR-Image-extend/%s' % line) 99 | # # 100 | # labels.append('/home/zhanglu/Mask_RCNN/val/gt/%s' % line.replace('.jpg', '.png')) 101 | # files.append('/home/zhanglu/Mask_RCNN/val/val/%s' % line) 102 | # # sals.append('/home/zhanglu/Documents/dataset/DUTS-TR/contour-extend/%s' % line.replace('.jpg','.png')) 103 | # return files, labels 104 | # def save_pickle(data, path): 105 | # with open(path, 'wb') as f: 106 | # pickle.dump(data, f, pickle.HIGHEST_PROTOCOL) 107 | # print ('Saved %s..' %path) 108 | # data = load_coco_data(data_path='/home/zhanglu/Mask_RCNN/pixel-prediction/data', split='train') 109 | # word_to_idx = data['word_to_idx'] 110 | # n_examples = data['captions'].shape[0] 111 | # n_iters_per_epoch = int(np.ceil(float(n_examples))) 112 | # captions = data['captions'] 113 | # image_idxs = data['image_idxs'] 114 | # annotations = data['annotations'] 115 | # cap_sentence = annotations['caption'] 116 | # # vocabulary = Vocabulary(5000, 117 | # # '/home/zhanglu/image_captioning/vocabulary.csv') 118 | # # capidxs = [] 119 | # # for i, capp in enumerate(cap_sentence): 120 | # # idx = vocabulary.process_sentence(capp) 121 | # # capidxs.append(idx) 122 | # img_dir = data['file_names'] 123 | # num = image_idxs.shape[0] 124 | # num = img_dir.shape[0] 125 | # 126 | # # embedding_index = {} 127 | # # f = open('/home/zhanglu/Downloads/glove.6B/glove.6B.300d.txt') 128 | # # a = 0 129 | # # for line in f: 130 | # # values = line.split() 131 | # # word = values[0] 132 | # # coefs = np.asarray(values[1:],dtype='float32') 133 | # # embedding_index[word] = coefs 134 | # # 135 | # # num_words = 6277#6951 136 | # # embedding_matrix = np.zeros((6277,300)) 137 | # # for word, i in word_to_idx.items(): 138 | # # embedding_vector = embedding_index.get(word) 139 | # # if embedding_vector is not None: 140 | # # embedding_matrix[i] = embedding_vector 141 | # # save_pickle(embedding_matrix, 'embedding_matrix2.pkl') 142 | # files,labels = load_training_sample(img_dir) 143 | # in_dir = '/home/zhanglu/Mask_RCNN/seg_fet_val' 144 | # in_dir2 = '/home/zhanglu/Mask_RCNN/train/cls_masks_true' 145 | # save_dir = '/home/zhanglu/Mask_RCNN/proposal-prediction/Result/coco/gtptoposa' 146 | # gt_store = {}; 147 | # vocabulary = Vocabulary(5000, 148 | # '/home/zhanglu/image_captioning/vocabulary.csv') 149 | # word = vocabulary.get_sentence([0]) 150 | # data = np.load('/home/zhanglu/image_captioning/train/data.npy').item() 151 | # word_idxs = data['word_idxs'] 152 | # masks = data['masks'] 153 | # annotations = pd.read_csv('/home/zhanglu/image_captioning/train/anns.csv') 154 | # captions = annotations['caption'].values 155 | # image_ids = annotations['image_id'].values 156 | # image_files = annotations['image_file'].values 157 | # feature_files = annotations['feature_file'].values 158 | # result=[] 159 | # a = np.load('/home/zhanglu/Mask_RCNN/train/train_data_upcap.npy') 160 | # aaa = 0 161 | # for i in range(num): 162 | # 163 | # # img_idx = image_idxs[i] 164 | # # file_names = files[img_idx] 165 | # # img_name = img_dir[img_idx] 166 | # # out_dir1 = os.path.join(in_dir, img_name).replace('.jpg', '.npy') 167 | # # out_dir2 = os.path.join(in_dir2, img_name).replace('.jpg', '.mat') 168 | # # 169 | # # cap = captions[i,:] 170 | # # gt = scio.loadmat(out_dir2) 171 | # # 172 | # # gt_mask = gt['new_mask'] 173 | # # 174 | # # gt_ids = np.squeeze(gt['id']) 175 | # # if len(gt_mask.shape) > 2: 176 | # # gt_map = np.max(gt_mask, axis=2) 177 | # # bb = utils.extract_bboxes(gt_mask).astype(np.float32) 178 | # # else: 179 | # # gt_map = gt_mask 180 | # # gt_mask1 = np.reshape(gt_mask, [gt_mask.shape[0], gt_mask.shape[1], 1]) 181 | # # bb = utils.extract_bboxes(gt_mask1).astype(np.float32) 182 | # # gt_mask = gt_mask1 183 | # # result.append({'image_id': gt_ids, 184 | # # 'masks': gt_mask, 185 | # # 'gt': gt_map, 186 | # # 'b_box': bb, 187 | # # 'caption':cap, 188 | # # 'image_name': img_name}) 189 | # print(i) 190 | # ### old 191 | # file_names = files[i] 192 | # img_name = img_dir[i] 193 | # out_dir1 = os.path.join(in_dir, img_name).replace('.jpg', '.npy') 194 | # out_dir2 = os.path.join(in_dir2, img_name).replace('.jpg', '.mat') 195 | # 196 | # gt = scio.loadmat(out_dir2) 197 | # 198 | # gt_mask = gt['new_mask'] 199 | # 200 | # gt_ids = np.squeeze(gt['id']) 201 | # cap_ids = np.where(image_ids == gt_ids) 202 | # caption = captions[cap_ids] 203 | # cap = word_idxs[cap_ids,:] 204 | # cap_mask = masks[cap_ids,:] 205 | # 206 | # if cap.shape[1] !=0: 207 | # aaa = aaa + cap.shape[1] 208 | # if len(gt_mask.shape) > 2: 209 | # gt_map = np.max(gt_mask, axis=2) 210 | # bb = utils.extract_bboxes(gt_mask).astype(np.float32) 211 | # else: 212 | # gt_map = gt_mask 213 | # gt_mask1 = np.reshape(gt_mask, [gt_mask.shape[0], gt_mask.shape[1], 1]) 214 | # bb = utils.extract_bboxes(gt_mask1).astype(np.float32) 215 | # gt_mask = gt_mask1 216 | # result.append({'image_id': gt_ids, 217 | # 'masks': gt_mask, 218 | # 'gt': gt_map, 219 | # 'b_box': bb, 220 | # 'caption':cap, 221 | # 'caption_mask':cap_mask, 222 | # 'image_name': img_name}) 223 | # print(i) 224 | # else: 225 | # print(i) 226 | # 227 | # np.save('/home/zhanglu/Mask_RCNN/train/train_data_upcap2.npy', result) 228 | # 229 | # 230 | # 231 | # 232 | # 233 | # # if not os.path.exists(out_dir1): 234 | # # if img_name !=b: 235 | # # image = skimage.io.imread(os.path.join(IMAGE_DIR, random.choice(file_names))) 236 | # # image = skimage.io.imread(file_names) 237 | # # # Run detection 238 | # # if len(image.shape) <3: 239 | # # image = np.stack((image,image,image),axis=2) 240 | # 241 | # 242 | -------------------------------------------------------------------------------- /capsal/train.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import random 4 | import math 5 | import re 6 | import time 7 | import numpy as np 8 | import cv2 9 | import matplotlib 10 | import matplotlib.pyplot as plt 11 | import skimage.color 12 | import skimage.io 13 | 14 | from capsal.config import Config 15 | from capsal import utils 16 | from capsal import model_new10_upcap as modellib 17 | from capsal.eval_cap import COCOEvalCap 18 | import json 19 | 20 | os.environ["CUDA_VISIBLE_DEVICES"]='1' 21 | from capsal.vocabulary import Vocabulary 22 | import skimage.transform 23 | # import skimage 24 | # Root directory of the project 25 | ROOT_DIR = os.getcwd() 26 | 27 | # Directory to save logs and trained model 28 | DEFAULT_LOGS_DIR = os.path.join(ROOT_DIR, "logs") 29 | # Local path to trained weights file 30 | COCO_MODEL_PATH = os.path.join(ROOT_DIR, "mask_rcnn_coco.h5") 31 | # Download COCO trained weights from Releases if needed 32 | if not os.path.exists(COCO_MODEL_PATH): 33 | utils.download_trained_weights(COCO_MODEL_PATH) 34 | 35 | 36 | class SaliencyConfig(Config): 37 | """Configuration for training on the toy shapes dataset. 38 | Derives from the base Config class and overrides values specific 39 | to the toy shapes dataset. 40 | """ 41 | # Give the configuration a recognizable name 42 | NAME = "saliency" 43 | 44 | # Train on 1 GPU and 8 images per GPU. We can put multiple images on each 45 | # GPU because the images are small. Batch size is 8 (GPUs * images/GPU). 46 | GPU_COUNT = 1 47 | IMAGES_PER_GPU = 1 48 | STEPS_PER_EPOCH = 5265 // IMAGES_PER_GPU#25256 5265 49 | VALIDATION_STEPS = 100 // IMAGES_PER_GPU 50 | TRAIN_ROIS_PER_IMAGE = 200 51 | # Number of classes (including background) 52 | NUM_CLASSES = 1 + 1 # background + 3 shapes 53 | DETECTION_MIN_CONFIDENCE = 0.8 54 | # Use small images for faster training. Set the limits of the small side 55 | # the large side, and that determines the image shape. 56 | 57 | # 58 | # # Use smaller anchors because our image and objects are small 59 | # RPN_ANCHOR_SCALES = (8, 16, 32, 64, 128) # anchor side in pixels 60 | # 61 | # # Reduce training ROIs per image because the images are small and have 62 | # # few objects. Aim to allow ROI sampling to pick 33% positive ROIs. 63 | # TRAIN_ROIS_PER_IMAGE = 32 64 | # 65 | # # Use a small epoch since the data is simple 66 | # STEPS_PER_EPOCH = 100 67 | # 68 | # # use small validation steps since the epoch is small 69 | 70 | class SaliencyDataset(utils.Dataset): 71 | def load_sal(self, subset): 72 | """Load the saliency dataset for train or validation. 73 | dataset_dir: The root directory of the saliency dataset.. 74 | subset: train or val. 75 | """ 76 | # Add classes 77 | self.add_class("saliency", 1, "foreground") 78 | if subset == 'train': 79 | sal_dataset = np.load('./data/train.npy',encoding='latin1') 80 | else: 81 | sal_dataset = np.load('./data/val.npy',encoding='latin1') 82 | self.sal_data = sal_dataset 83 | 84 | for sal_info in sal_dataset: 85 | 86 | image_id = int(sal_info['image_id']) 87 | image_name = sal_info['image_name'] 88 | masks = sal_info['masks'].astype(np.int32) 89 | gt = sal_info['gt'].astype(np.float32) 90 | if subset == 'train': 91 | caption = sal_info['caption'].astype(np.int32) 92 | 93 | caption_mask = sal_info['caption_mask'].astype(np.float32) 94 | # b_box = float(sal_info['b_box']) 95 | if subset == 'train': 96 | dataset_dir = './data/train_img_gt/image/' 97 | self.add_image("saliency", image_id=image_id, path=os.path.join(dataset_dir, image_name), 98 | mask=masks, image_name=image_name, gt=gt, caption=caption, caption_mask=caption_mask) 99 | else: 100 | dataset_dir = './data/val_img_gt/image/' 101 | self.add_image("saliency", image_id=image_id, path=os.path.join(dataset_dir, image_name), 102 | mask=masks, image_name=image_name, gt=gt) 103 | # 104 | def load_mask(self,image_id): 105 | info = self.image_info[image_id] 106 | gt = info['gt'] 107 | # getmask 108 | mask = info['mask'] 109 | return mask, np.ones([mask.shape[-1]],dtype=np.int32) 110 | def load_caption(self,image_id): 111 | info = self.image_info[image_id] 112 | caption = info['caption'] 113 | caption_mask = info['caption_mask'] 114 | # caption = np.zeros((2,15)) 115 | return caption, caption_mask 116 | def image_reference(self,image_id): 117 | #':return the path og the image' 118 | info = self.image_info[image_id] 119 | if info["source"] == "saliency": 120 | return info['id'] 121 | else: 122 | super(self.__class__).image_reference(self, image_id) 123 | def load_img_list(dataset): 124 | 125 | if dataset == 'coco': 126 | path = '/home/zhanglu/Mask_RCNN/val/val' 127 | elif dataset == 'HKU-IS': 128 | path = './dataset/HKU-IS/HKU-IS_Image' 129 | elif dataset == 'PASCAL-S': 130 | path = './dataset/pascal-s/PASCAL_S-Image' 131 | elif dataset == 'DUT': 132 | path = './dataset/DUTS-TR/DUTS/DUT-test/DUT-test-Image' 133 | elif dataset == 'THUS': 134 | path = './dataset/THUR/THUR-Image' 135 | elif dataset == 'SOC': 136 | path = './dataset/SOC6K_Release/' 137 | 138 | imgs = os.listdir(path) 139 | 140 | return path, imgs 141 | def predict2(model): 142 | datasets = ['coco']#'coco','PASCAL-S','SOC','ECSSD','DUT','THUS','HKU-IS' 143 | for dataset in datasets: 144 | print(dataset) 145 | path, imgs = load_img_list(dataset) 146 | 147 | save_dir = './result' 148 | save_dir1 = save_dir + '/result1'+'_'+dataset +'/' 149 | if not os.path.exists(save_dir1): 150 | os.mkdir(save_dir1) 151 | save_dir2 = save_dir + '/result_pixel1'+'_'+dataset +'/' 152 | if not os.path.exists(save_dir2): 153 | os.mkdir(save_dir2) 154 | save_dir3 = save_dir + '/combine1'+'_'+dataset +'/' 155 | if not os.path.exists(save_dir3): 156 | os.mkdir(save_dir3) 157 | save_dir4 = save_dir + '/caption' + '_' + dataset + '/' 158 | if not os.path.exists(save_dir4): 159 | os.mkdir(save_dir4) 160 | idx = 0 161 | 162 | for f_img in imgs: 163 | print(idx) 164 | image_name = f_img 165 | 166 | 167 | image = skimage.io.imread(os.path.join(path, f_img)) 168 | # If grayscale. Convert to RGB for consistency. 169 | if image.ndim != 3: 170 | image = skimage.color.gray2rgb(image) 171 | # If has an alpha channel, remove it for consistency 172 | if image.shape[-1] == 4: 173 | image = image[..., :3] 174 | if image.shape[0] > 1024 or image.shape[1] > 1024: 175 | image = skimage.transform.resize(image,(800,800),preserve_range=1) 176 | image = image.astype(np.uint8) 177 | r = model.detect([image], verbose=0)[0] 178 | # visualize.display_instances(image, r['rois'], r['masks'], r['class_ids'], 179 | # class_names, r['scores']) 180 | score_masks = r['proposal'].astype(np.float32) 181 | score_masks = np.squeeze(score_masks) 182 | pixel_mask = r['pixel'].astype(np.float32) 183 | combine_mask = r['combine'].astype(np.float32) 184 | 185 | 186 | cv2.imwrite(save_dir1 + image_name, score_masks * 255) 187 | cv2.imwrite(save_dir2 + image_name, pixel_mask * 255) 188 | cv2.imwrite(save_dir3 + image_name, combine_mask * 255) 189 | idx = idx +1 190 | 191 | def predict(dataset,model,save_dir): 192 | class_names = ['BG','foreground'] 193 | image_ids = dataset.image_ids 194 | save_dir = './result' 195 | save_dir1 = save_dir + '/result/' 196 | if not os.path.exists(save_dir1): 197 | os.mkdir(save_dir1) 198 | save_dir2 = save_dir + '/result_pixel/' 199 | if not os.path.exists(save_dir2): 200 | os.mkdir(save_dir2) 201 | save_dir3 = save_dir + '/combine/' 202 | if not os.path.exists(save_dir3): 203 | os.mkdir(save_dir3) 204 | # save_dir4 = save_dir + '/combine4/' 205 | # if not os.path.exists(save_dir4): 206 | # os.mkdir(save_dir4) 207 | vocabulary = Vocabulary(5000, 208 | './data/vocabulary.csv') 209 | ids =[] 210 | caption = {} 211 | for image_id in image_ids: 212 | word_out = [] 213 | print(image_id) 214 | image = dataset.load_image(image_id) 215 | 216 | image_name = dataset.image_info[image_id]['image_name'] 217 | img_name2, ext = os.path.splitext(image_name) 218 | final = np.zeros((image.shape[0],image.shape[1])) 219 | final_pro = np.zeros((image.shape[0], image.shape[1])) 220 | final_combine = np.zeros((image.shape[0], image.shape[1])) 221 | id = dataset.image_info[image_id]['id'] 222 | ids.append(id) 223 | 224 | 225 | 226 | r = model.detect([image], verbose=0)[0] 227 | 228 | cap_id = np.squeeze(r['word']).astype(np.int) 229 | word = vocabulary.get_sentence(cap_id) 230 | word_out.append(word.replace('.','')) 231 | caption[id] = word_out 232 | 233 | score_masks = r['proposal'].astype(np.float32) 234 | score_masks = np.squeeze(score_masks) 235 | out_name = save_dir1 + img_name2 + '.jpg' 236 | cv2.imwrite(out_name, score_masks * 255) 237 | pixel_mask = r['pixel'].astype(np.float32) 238 | out_name = save_dir2 + img_name2 + '.jpg' 239 | cv2.imwrite(out_name, pixel_mask * 255) 240 | combine_mask = r['combine'].astype(np.float32) 241 | out_name = save_dir3 + img_name2 + '.jpg' 242 | cv2.imwrite(out_name, combine_mask * 255) 243 | caption_gt = json.load(open('./data/caption_gt.json'), encoding='utf-8') 244 | ceval = COCOEvalCap(caption_gt, caption) 245 | ceval.evaluate(ids) 246 | 247 | if __name__ == '__main__': 248 | import argparse 249 | 250 | # Parse command line arguments 251 | parser = argparse.ArgumentParser( 252 | description='Train Mask R-CNN on MS COCO.') 253 | parser.add_argument("--command", 254 | default='evaluate', required=False, 255 | metavar="", 256 | help="'train' or 'evaluate' on MS COCO") 257 | parser.add_argument('--dataset', required=False, 258 | default='', 259 | metavar="/path/to/coco/", 260 | help='Directory of the MS-COCO dataset') 261 | 262 | parser.add_argument('--model', required=False, 263 | default='/home/zhanglu/Mask_RCNN_new/logs/saliency20181122T1118/mask_rcnn_saliency_0020.h5',#', 264 | metavar="/path/to/weights.h5", 265 | help="Path to weights .h5 file or 'coco'") 266 | parser.add_argument('--logs', required=False, 267 | default=DEFAULT_LOGS_DIR, 268 | metavar="/path/to/logs/", 269 | help='Logs and checkpoints directory (default=logs/)') 270 | parser.add_argument('--limit', required=False, 271 | default=500, 272 | metavar="", 273 | help='Images to use for evaluation (default=500)') 274 | parser.add_argument('--download', required=False, 275 | default=False, 276 | metavar="", 277 | help='Automatically download and unzip MS-COCO files (default=False)', 278 | type=bool) 279 | args = parser.parse_args() 280 | print("Command: ", args.command) 281 | print("Model: ", args.model) 282 | print("Dataset: ", args.dataset) 283 | print("Logs: ", args.logs) 284 | print("Auto Download: ", args.download) 285 | 286 | # Configurations 287 | if args.command == "train": 288 | config = SaliencyConfig() 289 | else: 290 | class InferenceConfig(SaliencyConfig): 291 | # Set batch size to 1 since we'll be running inference on 292 | # one image at a time. Batch size = GPU_COUNT * IMAGES_PER_GPU 293 | GPU_COUNT = 1 294 | IMAGES_PER_GPU = 1 295 | DETECTION_MIN_CONFIDENCE = 0.8 296 | config = InferenceConfig() 297 | config.display() 298 | 299 | # Create model 300 | if args.command == "train": 301 | model = modellib.MaskRCNN(mode="training", config=config, 302 | model_dir=args.logs) 303 | else: 304 | model = modellib.MaskRCNN(mode="inference", config=config, 305 | model_dir=args.logs) 306 | 307 | # Select weights file to load 308 | if args.model.lower() == "coco": 309 | model_path = COCO_MODEL_PATH 310 | elif args.model.lower() == "last": 311 | # Find last trained weights 312 | model_path = model.find_last()[1] 313 | elif args.model.lower() == "imagenet": 314 | # Start from ImageNet trained weights 315 | model_path = model.get_imagenet_weights() 316 | else: 317 | model_path = args.model 318 | 319 | # Load weights 320 | print("Loading weights ", model_path) 321 | 322 | if args.model.lower() == "coco": 323 | # Load weights trained on MS COCO, but skip layers that 324 | # are different due to the different number of classes 325 | # See README for instructions to download the COCO weights 326 | model.load_weights(model_path, by_name=True, 327 | exclude=["mrcnn_class_logits", "mrcnn_bbox_fc", 328 | "mrcnn_bbox", "mrcnn_mask"]) 329 | 330 | else: 331 | model.load_weights(model_path, by_name= True) 332 | 333 | 334 | model.train(dataset_train, dataset_val, 335 | learning_rate=config.LEARNING_RATE, 336 | epochs=40, 337 | layers='heads', 338 | augmentation=augmentation) 339 | 340 | 341 | 342 | model.train(dataset_train, dataset_val, 343 | learning_rate=config.LEARNING_RATE/10, 344 | epochs=120, 345 | layers='all', 346 | augmentation=augmentation) 347 | 348 | 349 | 350 | -------------------------------------------------------------------------------- /capsal/utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | Mask R-CNN 3 | Common utility functions and classes. 4 | 5 | Copyright (c) 2017 Matterport, Inc. 6 | Licensed under the MIT License (see LICENSE for details) 7 | Written by Waleed Abdulla 8 | """ 9 | 10 | import sys 11 | import os 12 | import math 13 | import random 14 | import numpy as np 15 | import tensorflow as tf 16 | import scipy 17 | import skimage.color 18 | import skimage.io 19 | import skimage.transform 20 | # import urllib2 21 | import shutil 22 | import warnings 23 | 24 | # URL from which to download the latest COCO trained weights 25 | COCO_MODEL_URL = "https://github.com/matterport/Mask_RCNN/releases/download/v2.0/mask_rcnn_coco.h5" 26 | 27 | def resize_bbox(bbox_1f,input_image): 28 | # oh_masks = (skimage.transform.resize(oh_masks, (512, 512), 29 | # order=1, mode="constant", preserve_range=True)).astype(np.uint8) 30 | bbox_1f = np.squeeze(bbox_1f) 31 | h = bbox_1f[2] - bbox_1f[0] 32 | w = bbox_1f[3] - bbox_1f[1] 33 | c_y = (bbox_1f[2] - h / 2) 34 | c_x = (bbox_1f[3] - w / 2) 35 | new_y1 = np.maximum((c_y - np.round(h * 0.75)), 0).astype(np.int32) 36 | new_x1 = np.maximum((c_x - np.round(w * 0.75)), 0).astype(np.int32) 37 | new_y2 = np.minimum(c_y + np.round(h * 0.75), input_image.shape[0]).astype(np.int32) 38 | new_x2 = np.minimum((c_x + np.round(w * 0.75)), input_image.shape[1]).astype(np.int32) 39 | return [new_y1,new_x1,new_y2,new_x2] 40 | ############################################################ 41 | # Bounding Boxes 42 | ############################################################ 43 | import re 44 | # Backport Python 3.4's regular expression "fullmatch()" to Python 2 45 | def fullmatch(regex, string, flags=0): 46 | """Emulate python-3.4 re.fullmatch().""" 47 | return re.match("(?:" + regex + r")\Z", string, flags=flags) 48 | 49 | def extract_bboxes(mask): 50 | """Compute bounding boxes from masks. 51 | mask: [height, width, num_instances]. Mask pixels are either 1 or 0. 52 | 53 | Returns: bbox array [num_instances, (y1, x1, y2, x2)]. 54 | """ 55 | boxes = np.zeros([mask.shape[-1], 4], dtype=np.int32) 56 | for i in range(mask.shape[-1]): 57 | m = mask[:, :, i] 58 | # Bounding box. 59 | horizontal_indicies = np.where(np.any(m, axis=0))[0] 60 | vertical_indicies = np.where(np.any(m, axis=1))[0] 61 | if horizontal_indicies.shape[0]: 62 | x1, x2 = horizontal_indicies[[0, -1]] 63 | y1, y2 = vertical_indicies[[0, -1]] 64 | # x2 and y2 should not be part of the box. Increment by 1. 65 | x2 += 1 66 | y2 += 1 67 | else: 68 | # No mask for this instance. Might happen due to 69 | # resizing or cropping. Set bbox to zeros 70 | x1, x2, y1, y2 = 0, 0, 0, 0 71 | boxes[i] = np.array([y1, x1, y2, x2]) 72 | return boxes.astype(np.int32) 73 | 74 | 75 | def compute_iou(box, boxes, box_area, boxes_area): 76 | """Calculates IoU of the given box with the array of the given boxes. 77 | box: 1D vector [y1, x1, y2, x2] 78 | boxes: [boxes_count, (y1, x1, y2, x2)] 79 | box_area: float. the area of 'box' 80 | boxes_area: array of length boxes_count. 81 | 82 | Note: the areas are passed in rather than calculated here for 83 | efficency. Calculate once in the caller to avoid duplicate work. 84 | """ 85 | # Calculate intersection areas 86 | y1 = np.maximum(box[0], boxes[:, 0]) 87 | y2 = np.minimum(box[2], boxes[:, 2]) 88 | x1 = np.maximum(box[1], boxes[:, 1]) 89 | x2 = np.minimum(box[3], boxes[:, 3]) 90 | intersection = np.maximum(x2 - x1, 0) * np.maximum(y2 - y1, 0) 91 | union = box_area + boxes_area[:] - intersection[:] 92 | iou = (intersection) / (union) 93 | return iou 94 | 95 | 96 | def compute_overlaps(boxes1, boxes2): 97 | """Computes IoU overlaps between two sets of boxes. 98 | boxes1, boxes2: [N, (y1, x1, y2, x2)]. 99 | 100 | For better performance, pass the largest set first and the smaller second. 101 | """ 102 | # Areas of anchors and GT boxes 103 | area1 = (boxes1[:, 2] - boxes1[:, 0]) * (boxes1[:, 3] - boxes1[:, 1]) 104 | area2 = (boxes2[:, 2] - boxes2[:, 0]) * (boxes2[:, 3] - boxes2[:, 1]) 105 | 106 | # Compute overlaps to generate matrix [boxes1 count, boxes2 count] 107 | # Each cell contains the IoU value. 108 | overlaps = np.zeros((boxes1.shape[0], boxes2.shape[0])) 109 | for i in range(overlaps.shape[1]): 110 | box2 = boxes2[i] 111 | overlaps[:, i] = compute_iou(box2, boxes1, area2[i], area1) 112 | return overlaps 113 | 114 | 115 | def compute_overlaps_masks(masks1, masks2): 116 | '''Computes IoU overlaps between two sets of masks. 117 | masks1, masks2: [Height, Width, instances] 118 | ''' 119 | # flatten masks 120 | masks1 = np.reshape(masks1 > .5, (-1, masks1.shape[-1])).astype(np.float32) 121 | masks2 = np.reshape(masks2 > .5, (-1, masks2.shape[-1])).astype(np.float32) 122 | area1 = np.sum(masks1, axis=0) 123 | area2 = np.sum(masks2, axis=0) 124 | 125 | # intersections and union 126 | intersections = np.dot(masks1.T, masks2) 127 | union = area1[:, None] + area2[None, :] - intersections 128 | overlaps = intersections / union 129 | 130 | return overlaps 131 | 132 | 133 | def non_max_suppression(boxes, scores, threshold): 134 | """Performs non-maximum supression and returns indicies of kept boxes. 135 | boxes: [N, (y1, x1, y2, x2)]. Notice that (y2, x2) lays outside the box. 136 | scores: 1-D array of box scores. 137 | threshold: Float. IoU threshold to use for filtering. 138 | """ 139 | assert boxes.shape[0] > 0 140 | if boxes.dtype.kind != "f": 141 | boxes = boxes.astype(np.float32) 142 | 143 | # Compute box areas 144 | y1 = boxes[:, 0] 145 | x1 = boxes[:, 1] 146 | y2 = boxes[:, 2] 147 | x2 = boxes[:, 3] 148 | area = (y2 - y1) * (x2 - x1) 149 | 150 | # Get indicies of boxes sorted by scores (highest first) 151 | ixs = scores.argsort()[::-1] 152 | 153 | pick = [] 154 | while len(ixs) > 0: 155 | # Pick top box and add its index to the list 156 | i = ixs[0] 157 | pick.append(i) 158 | # Compute IoU of the picked box with the rest 159 | iou = compute_iou(boxes[i], boxes[ixs[1:]], area[i], area[ixs[1:]]) 160 | # Identify boxes with IoU over the threshold. This 161 | # returns indicies into ixs[1:], so add 1 to get 162 | # indicies into ixs. 163 | remove_ixs = np.where(iou > threshold)[0] + 1 164 | # Remove indicies of the picked and overlapped boxes. 165 | ixs = np.delete(ixs, remove_ixs) 166 | ixs = np.delete(ixs, 0) 167 | return np.array(pick, dtype=np.int32) 168 | 169 | 170 | def apply_box_deltas(boxes, deltas): 171 | """Applies the given deltas to the given boxes. 172 | boxes: [N, (y1, x1, y2, x2)]. Note that (y2, x2) is outside the box. 173 | deltas: [N, (dy, dx, log(dh), log(dw))] 174 | """ 175 | boxes = boxes.astype(np.float32) 176 | # Convert to y, x, h, w 177 | height = boxes[:, 2] - boxes[:, 0] 178 | width = boxes[:, 3] - boxes[:, 1] 179 | center_y = boxes[:, 0] + 0.5 * height 180 | center_x = boxes[:, 1] + 0.5 * width 181 | # Apply deltas 182 | center_y += deltas[:, 0] * height 183 | center_x += deltas[:, 1] * width 184 | height *= np.exp(deltas[:, 2]) 185 | width *= np.exp(deltas[:, 3]) 186 | # Convert back to y1, x1, y2, x2 187 | y1 = center_y - 0.5 * height 188 | x1 = center_x - 0.5 * width 189 | y2 = y1 + height 190 | x2 = x1 + width 191 | return np.stack([y1, x1, y2, x2], axis=1) 192 | 193 | 194 | def box_refinement_graph(box, gt_box): 195 | """Compute refinement needed to transform box to gt_box. 196 | box and gt_box are [N, (y1, x1, y2, x2)] 197 | """ 198 | box = tf.cast(box, tf.float32) 199 | gt_box = tf.cast(gt_box, tf.float32) 200 | 201 | height = box[:, 2] - box[:, 0] 202 | width = box[:, 3] - box[:, 1] 203 | center_y = box[:, 0] + 0.5 * height 204 | center_x = box[:, 1] + 0.5 * width 205 | 206 | gt_height = gt_box[:, 2] - gt_box[:, 0] 207 | gt_width = gt_box[:, 3] - gt_box[:, 1] 208 | gt_center_y = gt_box[:, 0] + 0.5 * gt_height 209 | gt_center_x = gt_box[:, 1] + 0.5 * gt_width 210 | 211 | dy = (gt_center_y - center_y) / height 212 | dx = (gt_center_x - center_x) / width 213 | dh = tf.log(gt_height / height) 214 | dw = tf.log(gt_width / width) 215 | 216 | result = tf.stack([dy, dx, dh, dw], axis=1) 217 | return result 218 | 219 | 220 | def box_refinement(box, gt_box): 221 | """Compute refinement needed to transform box to gt_box. 222 | box and gt_box are [N, (y1, x1, y2, x2)]. (y2, x2) is 223 | assumed to be outside the box. 224 | """ 225 | box = box.astype(np.float32) 226 | gt_box = gt_box.astype(np.float32) 227 | 228 | height = box[:, 2] - box[:, 0] 229 | width = box[:, 3] - box[:, 1] 230 | center_y = box[:, 0] + 0.5 * height 231 | center_x = box[:, 1] + 0.5 * width 232 | 233 | gt_height = gt_box[:, 2] - gt_box[:, 0] 234 | gt_width = gt_box[:, 3] - gt_box[:, 1] 235 | gt_center_y = gt_box[:, 0] + 0.5 * gt_height 236 | gt_center_x = gt_box[:, 1] + 0.5 * gt_width 237 | 238 | dy = (gt_center_y - center_y) / height 239 | dx = (gt_center_x - center_x) / width 240 | dh = np.log(gt_height / height) 241 | dw = np.log(gt_width / width) 242 | 243 | return np.stack([dy, dx, dh, dw], axis=1) 244 | 245 | 246 | ############################################################ 247 | # Dataset 248 | ############################################################ 249 | 250 | class Dataset(object): 251 | """The base class for dataset classes. 252 | To use it, create a new class that adds functions specific to the dataset 253 | you want to use. For example: 254 | 255 | class CatsAndDogsDataset(Dataset): 256 | def load_cats_and_dogs(self): 257 | ... 258 | def load_mask(self, image_id): 259 | ... 260 | def image_reference(self, image_id): 261 | ... 262 | 263 | See COCODataset and ShapesDataset as examples. 264 | """ 265 | 266 | def __init__(self, class_map=None): 267 | self._image_ids = [] 268 | self.image_info = [] 269 | # Background is always the first class 270 | self.class_info = [{"source": "", "id": 0, "name": "BG"}] 271 | self.source_class_ids = {} 272 | 273 | def add_class(self, source, class_id, class_name): 274 | assert "." not in source, "Source name cannot contain a dot" 275 | # Does the class exist already? 276 | for info in self.class_info: 277 | if info['source'] == source and info["id"] == class_id: 278 | # source.class_id combination already available, skip 279 | return 280 | # Add the class 281 | self.class_info.append({ 282 | "source": source, 283 | "id": class_id, 284 | "name": class_name, 285 | }) 286 | 287 | def add_image(self, source, image_id, path, **kwargs): 288 | image_info = { 289 | "id": image_id, 290 | "source": source, 291 | "path": path, 292 | } 293 | image_info.update(kwargs) 294 | self.image_info.append(image_info) 295 | 296 | def image_reference(self, image_id): 297 | """Return a link to the image in its source Website or details about 298 | the image that help looking it up or debugging it. 299 | 300 | Override for your dataset, but pass to this function 301 | if you encounter images not in your dataset. 302 | """ 303 | return "" 304 | 305 | def prepare(self, class_map=None): 306 | """Prepares the Dataset class for use. 307 | 308 | TODO: class map is not supported yet. When done, it should handle mapping 309 | classes from different datasets to the same class ID. 310 | """ 311 | 312 | def clean_name(name): 313 | """Returns a shorter version of object names for cleaner display.""" 314 | return ",".join(name.split(",")[:1]) 315 | 316 | # Build (or rebuild) everything else from the info dicts. 317 | self.num_classes = len(self.class_info) 318 | self.class_ids = np.arange(self.num_classes) 319 | self.class_names = [clean_name(c["name"]) for c in self.class_info] 320 | self.num_images = len(self.image_info) 321 | self._image_ids = np.arange(self.num_images) 322 | 323 | # Mapping from source class and image IDs to internal IDs 324 | self.class_from_source_map = {"{}.{}".format(info['source'], info['id']): id 325 | for info, id in zip(self.class_info, self.class_ids)} 326 | self.image_from_source_map = {"{}.{}".format(info['source'], info['id']): id 327 | for info, id in zip(self.image_info, self.image_ids)} 328 | 329 | # Map sources to class_ids they support 330 | self.sources = list(set([i['source'] for i in self.class_info])) 331 | self.source_class_ids = {} 332 | # Loop over datasets 333 | for source in self.sources: 334 | self.source_class_ids[source] = [] 335 | # Find classes that belong to this dataset 336 | for i, info in enumerate(self.class_info): 337 | # Include BG class in all datasets 338 | if i == 0 or source == info['source']: 339 | self.source_class_ids[source].append(i) 340 | 341 | def map_source_class_id(self, source_class_id): 342 | """Takes a source class ID and returns the int class ID assigned to it. 343 | 344 | For example: 345 | dataset.map_source_class_id("coco.12") -> 23 346 | """ 347 | return self.class_from_source_map[source_class_id] 348 | 349 | def get_source_class_id(self, class_id, source): 350 | """Map an internal class ID to the corresponding class ID in the source dataset.""" 351 | info = self.class_info[class_id] 352 | assert info['source'] == source 353 | return info['id'] 354 | 355 | def append_data(self, class_info, image_info): 356 | self.external_to_class_id = {} 357 | for i, c in enumerate(self.class_info): 358 | for ds, id in c["map"]: 359 | self.external_to_class_id[ds + str(id)] = i 360 | 361 | # Map external image IDs to internal ones. 362 | self.external_to_image_id = {} 363 | for i, info in enumerate(self.image_info): 364 | self.external_to_image_id[info["ds"] + str(info["id"])] = i 365 | 366 | @property 367 | def image_ids(self): 368 | return self._image_ids 369 | 370 | def source_image_link(self, image_id): 371 | """Returns the path or URL to the image. 372 | Override this to return a URL to the image if it's availble online for easy 373 | debugging. 374 | """ 375 | return self.image_info[image_id]["path"] 376 | 377 | def load_image(self, image_id): 378 | """Load the specified image and return a [H,W,3] Numpy array. 379 | """ 380 | # Load image 381 | image = skimage.io.imread(self.image_info[image_id]['path']) 382 | # If grayscale. Convert to RGB for consistency. 383 | if image.ndim != 3: 384 | image = skimage.color.gray2rgb(image) 385 | # If has an alpha channel, remove it for consistency 386 | if image.shape[-1] == 4: 387 | image = image[..., :3] 388 | return image 389 | 390 | def load_mask(self, image_id): 391 | """Load instance masks for the given image. 392 | 393 | Different datasets use different ways to store masks. Override this 394 | method to load instance masks and return them in the form of am 395 | array of binary masks of shape [height, width, instances]. 396 | 397 | Returns: 398 | masks: A bool array of shape [height, width, instance count] with 399 | a binary mask per instance. 400 | class_ids: a 1D array of class IDs of the instance masks. 401 | """ 402 | # Override this function to load a mask from your dataset. 403 | # Otherwise, it returns an empty mask. 404 | mask = np.empty([0, 0, 0]) 405 | class_ids = np.empty([0], np.int32) 406 | return mask, class_ids 407 | 408 | 409 | def resize_image(image, min_dim=None, max_dim=None, min_scale=None, mode="square"): 410 | """Resizes an image keeping the aspect ratio unchanged. 411 | 412 | min_dim: if provided, resizes the image such that it's smaller 413 | dimension == min_dim 414 | max_dim: if provided, ensures that the image longest side doesn't 415 | exceed this value. 416 | min_scale: if provided, ensure that the image is scaled up by at least 417 | this percent even if min_dim doesn't require it. 418 | mode: Resizing mode. 419 | none: No resizing. Return the image unchanged. 420 | square: Resize and pad with zeros to get a square image 421 | of size [max_dim, max_dim]. 422 | pad64: Pads width and height with zeros to make them multiples of 64. 423 | If min_dim or min_scale are provided, it scales the image up 424 | before padding. max_dim is ignored in this mode. 425 | The multiple of 64 is needed to ensure smooth scaling of feature 426 | maps up and down the 6 levels of the FPN pyramid (2**6=64). 427 | crop: Picks random crops from the image. First, scales the image based 428 | on min_dim and min_scale, then picks a random crop of 429 | size min_dim x min_dim. Can be used in training only. 430 | max_dim is not used in this mode. 431 | 432 | Returns: 433 | image: the resized image 434 | window: (y1, x1, y2, x2). If max_dim is provided, padding might 435 | be inserted in the returned image. If so, this window is the 436 | coordinates of the image part of the full image (excluding 437 | the padding). The x2, y2 pixels are not included. 438 | scale: The scale factor used to resize the image 439 | padding: Padding added to the image [(top, bottom), (left, right), (0, 0)] 440 | """ 441 | # Keep track of image dtype and return results in the same dtype 442 | image_dtype = image.dtype 443 | # Default window (y1, x1, y2, x2) and default scale == 1. 444 | h, w = image.shape[:2] 445 | window = (0, 0, h, w) 446 | scale = 1 447 | padding = [(0, 0), (0, 0), (0, 0)] 448 | crop = None 449 | 450 | if mode == "none": 451 | return image, window, scale, padding, crop 452 | 453 | # Scale? 454 | if min_dim: 455 | # Scale up but not down 456 | scale = max(1, min_dim / min(h, w)) 457 | if min_scale and scale < min_scale: 458 | scale = min_scale 459 | 460 | # Does it exceed max dim? 461 | if max_dim and mode == "square": 462 | image_max = float(max(h, w)) 463 | if round(image_max * scale) > max_dim: 464 | scale = max_dim / image_max 465 | 466 | # Resize image using bilinear interpolation 467 | if scale != 1 : 468 | image = skimage.transform.resize( 469 | image, (round(h * scale), round(w * scale)), 470 | order=1, mode="constant", preserve_range=True) 471 | 472 | # Need padding or cropping? 473 | if mode == "square": 474 | # Get new height and width 475 | h, w = image.shape[:2] 476 | top_pad = (max_dim - h) // 2 477 | bottom_pad = max_dim - h - top_pad 478 | left_pad = (max_dim - w) // 2 479 | right_pad = max_dim - w - left_pad 480 | padding = [(top_pad, bottom_pad), (left_pad, right_pad), (0, 0)] 481 | image = np.pad(image, padding, mode='constant', constant_values=0) 482 | window = (top_pad, left_pad, h + top_pad, w + left_pad) 483 | elif mode == "pad64": 484 | h, w = image.shape[:2] 485 | # Both sides must be divisible by 64 486 | assert min_dim % 64 == 0, "Minimum dimension must be a multiple of 64" 487 | # Height 488 | if h % 64 > 0: 489 | max_h = h - (h % 64) + 64 490 | top_pad = (max_h - h) // 2 491 | bottom_pad = max_h - h - top_pad 492 | else: 493 | top_pad = bottom_pad = 0 494 | # Width 495 | if w % 64 > 0: 496 | max_w = w - (w % 64) + 64 497 | left_pad = (max_w - w) // 2 498 | right_pad = max_w - w - left_pad 499 | else: 500 | left_pad = right_pad = 0 501 | padding = [(top_pad, bottom_pad), (left_pad, right_pad), (0, 0)] 502 | image = np.pad(image, padding, mode='constant', constant_values=0) 503 | window = (top_pad, left_pad, h + top_pad, w + left_pad) 504 | elif mode == "crop": 505 | # Pick a random crop 506 | h, w = image.shape[:2] 507 | y = random.randint(0, (h - min_dim)) 508 | x = random.randint(0, (w - min_dim)) 509 | crop = (y, x, min_dim, min_dim) 510 | image = image[y:y + min_dim, x:x + min_dim] 511 | window = (0, 0, min_dim, min_dim) 512 | elif mode == "resize": 513 | image = skimage.transform.resize( 514 | image, (max_dim, max_dim), 515 | order=1, mode="constant", preserve_range=True) 516 | window = (0, 0, max_dim, max_dim) 517 | scale = 1 518 | else: 519 | raise Exception("Mode {} not supported".format(mode)) 520 | return image.astype(image_dtype), window, scale, padding, crop 521 | 522 | 523 | def resize_mask(mask, scale, padding, crop=None): 524 | """Resizes a mask using the given scale and padding. 525 | Typically, you get the scale and padding from resize_image() to 526 | ensure both, the image and the mask, are resized consistently. 527 | 528 | scale: mask scaling factor 529 | padding: Padding to add to the mask in the form 530 | [(top, bottom), (left, right), (0, 0)] 531 | """ 532 | # Suppress warning from scipy 0.13.0, the output shape of zoom() is 533 | # calculated with round() instead of int() 534 | 535 | 536 | with warnings.catch_warnings(): 537 | warnings.simplefilter("ignore") 538 | mask = scipy.ndimage.zoom(mask, zoom=[scale, scale, 1], order=0) 539 | if crop is not None: 540 | y, x, h, w = crop 541 | mask = mask[y:y + h, x:x + w] 542 | else: 543 | mask = np.pad(mask, padding, mode='constant', constant_values=0) 544 | 545 | return mask 546 | 547 | 548 | def minimize_mask(bbox, mask, mini_shape): 549 | """Resize masks to a smaller version to reduce memory load. 550 | Mini-masks can be resized back to image scale using expand_masks() 551 | 552 | See inspect_data.ipynb notebook for more details. 553 | """ 554 | mini_mask = np.zeros(mini_shape + (mask.shape[-1],), dtype=bool) 555 | for i in range(mask.shape[-1]): 556 | # Pick slice and cast to bool in case load_mask() returned wrong dtype 557 | m = mask[:, :, i].astype(bool) 558 | y1, x1, y2, x2 = bbox[i][:4] 559 | m = m[y1:y2, x1:x2] 560 | if m.size == 0: 561 | raise Exception("Invalid bounding box with area of zero") 562 | # Resize with bilinear interpolation 563 | m = skimage.transform.resize(m, mini_shape, order=1, mode="constant") 564 | mini_mask[:, :, i] = np.around(m).astype(np.bool) 565 | return mini_mask 566 | 567 | 568 | def expand_mask(bbox, mini_mask, image_shape): 569 | """Resizes mini masks back to image size. Reverses the change 570 | of minimize_mask(). 571 | 572 | See inspect_data.ipynb notebook for more details. 573 | """ 574 | mask = np.zeros(image_shape[:2] + (mini_mask.shape[-1],), dtype=bool) 575 | for i in range(mask.shape[-1]): 576 | m = mini_mask[:, :, i] 577 | y1, x1, y2, x2 = bbox[i][:4] 578 | h = y2 - y1 579 | w = x2 - x1 580 | # Resize with bilinear interpolation 581 | m = skimage.transform.resize(m, (h, w), order=1, mode="constant") 582 | mask[y1:y2, x1:x2, i] = np.around(m).astype(np.bool) 583 | return mask 584 | 585 | 586 | # TODO: Build and use this function to reduce code duplication 587 | def mold_mask(mask, config): 588 | pass 589 | 590 | 591 | def unmold_mask(mask, bbox, image_shape): 592 | """Converts a mask generated by the neural network to a format similar 593 | to its original shape. 594 | mask: [height, width] of type float. A small, typically 28x28 mask. 595 | bbox: [y1, x1, y2, x2]. The box to fit the mask in. 596 | 597 | Returns a binary mask with the same size as the original image. 598 | """ 599 | threshold = 0.5 600 | y1, x1, y2, x2 = bbox 601 | mask = skimage.transform.resize(mask, (y2 - y1, x2 - x1), order=1,mode="constant") 602 | mask = np.where(mask >= threshold, 1, 0).astype(np.bool) 603 | 604 | # Put the mask in the right location. 605 | full_mask = np.zeros(image_shape[:2], dtype=np.bool) 606 | full_mask[y1:y2, x1:x2] = mask 607 | return full_mask 608 | 609 | 610 | ############################################################ 611 | # Anchors 612 | ############################################################ 613 | 614 | def generate_anchors(scales, ratios, shape, feature_stride, anchor_stride): 615 | """ 616 | scales: 1D array of anchor sizes in pixels. Example: [32, 64, 128] 617 | ratios: 1D array of anchor ratios of width/height. Example: [0.5, 1, 2] 618 | shape: [height, width] spatial shape of the feature map over which 619 | to generate anchors. 620 | feature_stride: Stride of the feature map relative to the image in pixels. 621 | anchor_stride: Stride of anchors on the feature map. For example, if the 622 | value is 2 then generate anchors for every other feature map pixel. 623 | """ 624 | # Get all combinations of scales and ratios 625 | scales, ratios = np.meshgrid(np.array(scales), np.array(ratios)) 626 | scales = scales.flatten() 627 | ratios = ratios.flatten() 628 | 629 | # Enumerate heights and widths from scales and ratios 630 | heights = scales / np.sqrt(ratios) 631 | widths = scales * np.sqrt(ratios) 632 | 633 | # Enumerate shifts in feature space 634 | shifts_y = np.arange(0, shape[0], anchor_stride) * feature_stride 635 | shifts_x = np.arange(0, shape[1], anchor_stride) * feature_stride 636 | shifts_x, shifts_y = np.meshgrid(shifts_x, shifts_y) 637 | 638 | # Enumerate combinations of shifts, widths, and heights 639 | box_widths, box_centers_x = np.meshgrid(widths, shifts_x) 640 | box_heights, box_centers_y = np.meshgrid(heights, shifts_y) 641 | 642 | # Reshape to get a list of (y, x) and a list of (h, w) 643 | box_centers = np.stack( 644 | [box_centers_y, box_centers_x], axis=2).reshape([-1, 2]) 645 | box_sizes = np.stack([box_heights, box_widths], axis=2).reshape([-1, 2]) 646 | 647 | # Convert to corner coordinates (y1, x1, y2, x2) 648 | boxes = np.concatenate([box_centers - 0.5 * box_sizes, 649 | box_centers + 0.5 * box_sizes], axis=1) 650 | return boxes 651 | 652 | 653 | def generate_pyramid_anchors(scales, ratios, feature_shapes, feature_strides, 654 | anchor_stride): 655 | """Generate anchors at different levels of a feature pyramid. Each scale 656 | is associated with a level of the pyramid, but each ratio is used in 657 | all levels of the pyramid. 658 | 659 | Returns: 660 | anchors: [N, (y1, x1, y2, x2)]. All generated anchors in one array. Sorted 661 | with the same order of the given scales. So, anchors of scale[0] come 662 | first, then anchors of scale[1], and so on. 663 | """ 664 | # Anchors 665 | # [anchor_count, (y1, x1, y2, x2)] 666 | anchors = [] 667 | for i in range(len(scales)): 668 | anchors.append(generate_anchors(scales[i], ratios, feature_shapes[i], 669 | feature_strides[i], anchor_stride)) 670 | return np.concatenate(anchors, axis=0) 671 | 672 | 673 | ############################################################ 674 | # Miscellaneous 675 | ############################################################ 676 | 677 | def trim_zeros(x): 678 | """It's common to have tensors larger than the available data and 679 | pad with zeros. This function removes rows that are all zeros. 680 | 681 | x: [rows, columns]. 682 | """ 683 | assert len(x.shape) == 2 684 | return x[~np.all(x == 0, axis=1)] 685 | 686 | 687 | def compute_matches(gt_boxes, gt_class_ids, gt_masks, 688 | pred_boxes, pred_class_ids, pred_scores, pred_masks, 689 | iou_threshold=0.5, score_threshold=0.0): 690 | """Finds matches between prediction and ground truth instances. 691 | 692 | Returns: 693 | gt_match: 1-D array. For each GT box it has the index of the matched 694 | predicted box. 695 | pred_match: 1-D array. For each predicted box, it has the index of 696 | the matched ground truth box. 697 | overlaps: [pred_boxes, gt_boxes] IoU overlaps. 698 | """ 699 | # Trim zero padding 700 | # TODO: cleaner to do zero unpadding upstream 701 | gt_boxes = trim_zeros(gt_boxes) 702 | gt_masks = gt_masks[..., :gt_boxes.shape[0]] 703 | pred_boxes = trim_zeros(pred_boxes) 704 | pred_scores = pred_scores[:pred_boxes.shape[0]] 705 | # Sort predictions by score from high to low 706 | indices = np.argsort(pred_scores)[::-1] 707 | pred_boxes = pred_boxes[indices] 708 | pred_class_ids = pred_class_ids[indices] 709 | pred_scores = pred_scores[indices] 710 | pred_masks = pred_masks[..., indices] 711 | 712 | # Compute IoU overlaps [pred_masks, gt_masks] 713 | overlaps = compute_overlaps_masks(pred_masks, gt_masks) 714 | 715 | # Loop through predictions and find matching ground truth boxes 716 | match_count = 0 717 | pred_match = -1 * np.ones([pred_boxes.shape[0]]) 718 | gt_match = -1 * np.ones([gt_boxes.shape[0]]) 719 | for i in range(len(pred_boxes)): 720 | # Find best matching ground truth box 721 | # 1. Sort matches by score 722 | sorted_ixs = np.argsort(overlaps[i])[::-1] 723 | # 2. Remove low scores 724 | low_score_idx = np.where(overlaps[i, sorted_ixs] < score_threshold)[0] 725 | if low_score_idx.size > 0: 726 | sorted_ixs = sorted_ixs[:low_score_idx[0]] 727 | # 3. Find the match 728 | for j in sorted_ixs: 729 | # If ground truth box is already matched, go to next one 730 | if gt_match[j] > 0: 731 | continue 732 | # If we reach IoU smaller than the threshold, end the loop 733 | iou = overlaps[i, j] 734 | if iou < iou_threshold: 735 | break 736 | # Do we have a match? 737 | if pred_class_ids[i] == gt_class_ids[j]: 738 | match_count += 1 739 | gt_match[j] = i 740 | pred_match[i] = j 741 | break 742 | 743 | return gt_match, pred_match, overlaps 744 | 745 | 746 | def compute_ap(gt_boxes, gt_class_ids, gt_masks, 747 | pred_boxes, pred_class_ids, pred_scores, pred_masks, 748 | iou_threshold=0.5): 749 | """Compute Average Precision at a set IoU threshold (default 0.5). 750 | 751 | Returns: 752 | mAP: Mean Average Precision 753 | precisions: List of precisions at different class score thresholds. 754 | recalls: List of recall values at different class score thresholds. 755 | overlaps: [pred_boxes, gt_boxes] IoU overlaps. 756 | """ 757 | # Get matches and overlaps 758 | gt_match, pred_match, overlaps = compute_matches( 759 | gt_boxes, gt_class_ids, gt_masks, 760 | pred_boxes, pred_class_ids, pred_scores, pred_masks, 761 | iou_threshold) 762 | 763 | # Compute precision and recall at each prediction box step 764 | precisions = np.cumsum(pred_match > -1) / (np.arange(len(pred_match)) + 1) 765 | recalls = np.cumsum(pred_match > -1).astype(np.float32) / len(gt_match) 766 | 767 | # Pad with start and end values to simplify the math 768 | precisions = np.concatenate([[0], precisions, [0]]) 769 | recalls = np.concatenate([[0], recalls, [1]]) 770 | 771 | # Ensure precision values decrease but don't increase. This way, the 772 | # precision value at each recall threshold is the maximum it can be 773 | # for all following recall thresholds, as specified by the VOC paper. 774 | for i in range(len(precisions) - 2, -1, -1): 775 | precisions[i] = np.maximum(precisions[i], precisions[i + 1]) 776 | 777 | # Compute mean AP over recall range 778 | indices = np.where(recalls[:-1] != recalls[1:])[0] + 1 779 | mAP = np.sum((recalls[indices] - recalls[indices - 1]) * 780 | precisions[indices]) 781 | 782 | return mAP, precisions, recalls, overlaps 783 | 784 | 785 | def compute_ap_range(gt_box, gt_class_id, gt_mask, 786 | pred_box, pred_class_id, pred_score, pred_mask, 787 | iou_thresholds=None, verbose=1): 788 | """Compute AP over a range or IoU thresholds. Default range is 0.5-0.95.""" 789 | # Default is 0.5 to 0.95 with increments of 0.05 790 | iou_thresholds = iou_thresholds or np.arange(0.5, 1.0, 0.05) 791 | 792 | # Compute AP over range of IoU thresholds 793 | AP = [] 794 | for iou_threshold in iou_thresholds: 795 | ap, precisions, recalls, overlaps =\ 796 | compute_ap(gt_box, gt_class_id, gt_mask, 797 | pred_box, pred_class_id, pred_score, pred_mask, 798 | iou_threshold=iou_threshold) 799 | if verbose: 800 | print("AP @{:.2f}:\t {:.3f}".format(iou_threshold, ap)) 801 | AP.append(ap) 802 | AP = np.array(AP).mean() 803 | if verbose: 804 | print("AP @{:.2f}-{:.2f}:\t {:.3f}".format( 805 | iou_thresholds[0], iou_thresholds[-1], AP)) 806 | return AP 807 | 808 | 809 | def compute_recall(pred_boxes, gt_boxes, iou): 810 | """Compute the recall at the given IoU threshold. It's an indication 811 | of how many GT boxes were found by the given prediction boxes. 812 | 813 | pred_boxes: [N, (y1, x1, y2, x2)] in image coordinates 814 | gt_boxes: [N, (y1, x1, y2, x2)] in image coordinates 815 | """ 816 | # Measure overlaps 817 | overlaps = compute_overlaps(pred_boxes, gt_boxes) 818 | iou_max = np.max(overlaps, axis=1) 819 | iou_argmax = np.argmax(overlaps, axis=1) 820 | positive_ids = np.where(iou_max >= iou)[0] 821 | matched_gt_boxes = iou_argmax[positive_ids] 822 | 823 | recall = len(set(matched_gt_boxes)) / gt_boxes.shape[0] 824 | return recall, positive_ids 825 | 826 | 827 | # ## Batch Slicing 828 | # Some custom layers support a batch size of 1 only, and require a lot of work 829 | # to support batches greater than 1. This function slices an input tensor 830 | # across the batch dimension and feeds batches of size 1. Effectively, 831 | # an easy way to support batches > 1 quickly with little code modification. 832 | # In the long run, it's more efficient to modify the code to support large 833 | # batches and getting rid of this function. Consider this a temporary solution 834 | def batch_slice(inputs, graph_fn, batch_size, names=None): 835 | """Splits inputs into slices and feeds each slice to a copy of the given 836 | computation graph and then combines the results. It allows you to run a 837 | graph on a batch of inputs even if the graph is written to support one 838 | instance only. 839 | 840 | inputs: list of tensors. All must have the same first dimension length 841 | graph_fn: A function that returns a TF tensor that's part of a graph. 842 | batch_size: number of slices to divide the data into. 843 | names: If provided, assigns names to the resulting tensors. 844 | """ 845 | if not isinstance(inputs, list): 846 | inputs = [inputs] 847 | 848 | outputs = [] 849 | for i in range(batch_size): 850 | inputs_slice = [x[i] for x in inputs] 851 | output_slice = graph_fn(*inputs_slice) 852 | if not isinstance(output_slice, (tuple, list)): 853 | output_slice = [output_slice] 854 | outputs.append(output_slice) 855 | # Change outputs from a list of slices where each is 856 | # a list of outputs to a list of outputs and each has 857 | # a list of slices 858 | outputs = list(zip(*outputs)) 859 | 860 | if names is None: 861 | names = [None] * len(outputs) 862 | 863 | result = [tf.stack(o, axis=0, name=n) 864 | for o, n in zip(outputs, names)] 865 | if len(result) == 1: 866 | result = result[0] 867 | 868 | return result 869 | 870 | 871 | # def download_trained_weights(coco_model_path, verbose=1): 872 | # """Download COCO trained weights from Releases. 873 | # 874 | # coco_model_path: local path of COCO trained weights 875 | # """ 876 | # if verbose > 0: 877 | # print("Downloading pretrained model to " + coco_model_path + " ...") 878 | # with urllib2.urlopen(COCO_MODEL_URL) as resp, open(coco_model_path, 'wb') as out: 879 | # shutil.copyfileobj(resp, out) 880 | # if verbose > 0: 881 | # print("... done downloading pretrained model!") 882 | 883 | 884 | def norm_boxes(boxes, shape): 885 | """Converts boxes from pixel coordinates to normalized coordinates. 886 | boxes: [N, (y1, x1, y2, x2)] in pixel coordinates 887 | shape: [..., (height, width)] in pixels 888 | 889 | Note: In pixel coordinates (y2, x2) is outside the box. But in normalized 890 | coordinates it's inside the box. 891 | 892 | Returns: 893 | [N, (y1, x1, y2, x2)] in normalized coordinates 894 | """ 895 | h, w = shape 896 | scale = np.array([h - 1, w - 1, h - 1, w - 1]).astype(np.float32) 897 | shift = np.array([0, 0, 1, 1]).astype(np.float32) 898 | return np.divide((boxes - shift), scale).astype(np.float32) 899 | 900 | 901 | def denorm_boxes(boxes, shape): 902 | """Converts boxes from normalized coordinates to pixel coordinates. 903 | boxes: [N, (y1, x1, y2, x2)] in normalized coordinates 904 | shape: [..., (height, width)] in pixels 905 | 906 | Note: In pixel coordinates (y2, x2) is outside the box. But in normalized 907 | coordinates it's inside the box. 908 | 909 | Returns: 910 | [N, (y1, x1, y2, x2)] in pixel coordinates 911 | """ 912 | h, w = shape 913 | scale = np.array([h - 1, w - 1, h - 1, w - 1]).astype(np.float32) 914 | shift = np.array([0, 0, 1, 1]).astype(np.float32) 915 | return np.around(np.multiply(boxes, scale) + shift).astype(np.int32) 916 | -------------------------------------------------------------------------------- /capsal/val_list.txt: -------------------------------------------------------------------------------- 1 | COCO_val2014_000000000192.jpg 2 | COCO_val2014_000000000397.jpg 3 | COCO_val2014_000000000589.jpg 4 | COCO_val2014_000000000675.jpg 5 | COCO_val2014_000000000692.jpg 6 | COCO_val2014_000000001000.jpg 7 | COCO_val2014_000000001369.jpg 8 | COCO_val2014_000000001561.jpg 9 | COCO_val2014_000000001668.jpg 10 | COCO_val2014_000000001675.jpg 11 | COCO_val2014_000000001700.jpg 12 | COCO_val2014_000000002006.jpg 13 | COCO_val2014_000000002153.jpg 14 | COCO_val2014_000000002302.jpg 15 | COCO_val2014_000000002640.jpg 16 | COCO_val2014_000000002839.jpg 17 | COCO_val2014_000000003001.jpg 18 | COCO_val2014_000000003093.jpg 19 | COCO_val2014_000000003192.jpg 20 | COCO_val2014_000000003326.jpg 21 | COCO_val2014_000000003716.jpg 22 | COCO_val2014_000000003786.jpg 23 | COCO_val2014_000000004795.jpg 24 | COCO_val2014_000000004988.jpg 25 | COCO_val2014_000000005038.jpg 26 | COCO_val2014_000000005105.jpg 27 | COCO_val2014_000000005385.jpg 28 | COCO_val2014_000000005617.jpg 29 | COCO_val2014_000000005713.jpg 30 | COCO_val2014_000000006712.jpg 31 | COCO_val2014_000000007298.jpg 32 | COCO_val2014_000000007511.jpg 33 | COCO_val2014_000000008594.jpg 34 | COCO_val2014_000000008665.jpg 35 | COCO_val2014_000000009002.jpg 36 | COCO_val2014_000000009769.jpg 37 | COCO_val2014_000000010216.jpg 38 | COCO_val2014_000000010393.jpg 39 | COCO_val2014_000000011291.jpg 40 | COCO_val2014_000000011696.jpg 41 | COCO_val2014_000000011796.jpg 42 | COCO_val2014_000000012085.jpg 43 | COCO_val2014_000000012209.jpg 44 | COCO_val2014_000000012443.jpg 45 | COCO_val2014_000000012764.jpg 46 | COCO_val2014_000000012991.jpg 47 | COCO_val2014_000000013081.jpg 48 | COCO_val2014_000000013729.jpg 49 | COCO_val2014_000000014108.jpg 50 | COCO_val2014_000000014226.jpg 51 | COCO_val2014_000000014248.jpg 52 | COCO_val2014_000000015157.jpg 53 | COCO_val2014_000000015345.jpg 54 | COCO_val2014_000000015497.jpg 55 | COCO_val2014_000000015725.jpg 56 | COCO_val2014_000000015846.jpg 57 | COCO_val2014_000000015956.jpg 58 | COCO_val2014_000000017018.jpg 59 | COCO_val2014_000000017095.jpg 60 | COCO_val2014_000000017207.jpg 61 | COCO_val2014_000000017311.jpg 62 | COCO_val2014_000000017313.jpg 63 | COCO_val2014_000000017425.jpg 64 | COCO_val2014_000000017667.jpg 65 | COCO_val2014_000000018150.jpg 66 | COCO_val2014_000000018224.jpg 67 | COCO_val2014_000000018699.jpg 68 | COCO_val2014_000000018903.jpg 69 | COCO_val2014_000000019158.jpg 70 | COCO_val2014_000000019176.jpg 71 | COCO_val2014_000000019432.jpg 72 | COCO_val2014_000000020161.jpg 73 | COCO_val2014_000000020273.jpg 74 | COCO_val2014_000000020410.jpg 75 | COCO_val2014_000000020671.jpg 76 | COCO_val2014_000000020784.jpg 77 | COCO_val2014_000000020925.jpg 78 | COCO_val2014_000000021979.jpg 79 | COCO_val2014_000000023121.jpg 80 | COCO_val2014_000000024112.jpg 81 | COCO_val2014_000000024223.jpg 82 | COCO_val2014_000000024243.jpg 83 | COCO_val2014_000000025123.jpg 84 | COCO_val2014_000000025282.jpg 85 | COCO_val2014_000000025393.jpg 86 | COCO_val2014_000000025560.jpg 87 | COCO_val2014_000000025603.jpg 88 | COCO_val2014_000000025860.jpg 89 | COCO_val2014_000000025997.jpg 90 | COCO_val2014_000000026730.jpg 91 | COCO_val2014_000000026768.jpg 92 | COCO_val2014_000000027478.jpg 93 | COCO_val2014_000000027493.jpg 94 | COCO_val2014_000000027578.jpg 95 | COCO_val2014_000000028273.jpg 96 | COCO_val2014_000000028719.jpg 97 | COCO_val2014_000000028874.jpg 98 | COCO_val2014_000000029059.jpg 99 | COCO_val2014_000000030534.jpg 100 | COCO_val2014_000000030657.jpg 101 | COCO_val2014_000000031569.jpg 102 | COCO_val2014_000000032001.jpg 103 | COCO_val2014_000000032300.jpg 104 | COCO_val2014_000000032777.jpg 105 | COCO_val2014_000000032887.jpg 106 | COCO_val2014_000000032965.jpg 107 | COCO_val2014_000000033109.jpg 108 | COCO_val2014_000000033377.jpg 109 | COCO_val2014_000000033645.jpg 110 | COCO_val2014_000000033652.jpg 111 | COCO_val2014_000000033835.jpg 112 | COCO_val2014_000000033904.jpg 113 | COCO_val2014_000000034196.jpg 114 | COCO_val2014_000000034438.jpg 115 | COCO_val2014_000000034452.jpg 116 | COCO_val2014_000000034464.jpg 117 | COCO_val2014_000000034471.jpg 118 | COCO_val2014_000000034500.jpg 119 | COCO_val2014_000000034580.jpg 120 | COCO_val2014_000000034657.jpg 121 | COCO_val2014_000000034786.jpg 122 | COCO_val2014_000000035429.jpg 123 | COCO_val2014_000000035726.jpg 124 | COCO_val2014_000000035825.jpg 125 | COCO_val2014_000000035894.jpg 126 | COCO_val2014_000000035975.jpg 127 | COCO_val2014_000000036077.jpg 128 | COCO_val2014_000000036508.jpg 129 | COCO_val2014_000000036810.jpg 130 | COCO_val2014_000000037017.jpg 131 | COCO_val2014_000000037403.jpg 132 | COCO_val2014_000000037616.jpg 133 | COCO_val2014_000000037670.jpg 134 | COCO_val2014_000000037729.jpg 135 | COCO_val2014_000000037751.jpg 136 | COCO_val2014_000000037988.jpg 137 | COCO_val2014_000000038073.jpg 138 | COCO_val2014_000000038769.jpg 139 | COCO_val2014_000000039081.jpg 140 | COCO_val2014_000000039589.jpg 141 | COCO_val2014_000000039718.jpg 142 | COCO_val2014_000000039769.jpg 143 | COCO_val2014_000000040602.jpg 144 | COCO_val2014_000000041110.jpg 145 | COCO_val2014_000000041279.jpg 146 | COCO_val2014_000000042091.jpg 147 | COCO_val2014_000000042260.jpg 148 | COCO_val2014_000000042399.jpg 149 | COCO_val2014_000000043305.jpg 150 | COCO_val2014_000000043345.jpg 151 | COCO_val2014_000000043670.jpg 152 | COCO_val2014_000000043961.jpg 153 | COCO_val2014_000000044136.jpg 154 | COCO_val2014_000000044171.jpg 155 | COCO_val2014_000000044269.jpg 156 | COCO_val2014_000000044347.jpg 157 | COCO_val2014_000000044621.jpg 158 | COCO_val2014_000000045197.jpg 159 | COCO_val2014_000000045594.jpg 160 | COCO_val2014_000000045721.jpg 161 | COCO_val2014_000000046048.jpg 162 | COCO_val2014_000000046101.jpg 163 | COCO_val2014_000000046252.jpg 164 | COCO_val2014_000000047263.jpg 165 | COCO_val2014_000000047361.jpg 166 | COCO_val2014_000000047511.jpg 167 | COCO_val2014_000000047916.jpg 168 | COCO_val2014_000000048281.jpg 169 | COCO_val2014_000000049194.jpg 170 | COCO_val2014_000000049731.jpg 171 | COCO_val2014_000000050148.jpg 172 | COCO_val2014_000000050485.jpg 173 | COCO_val2014_000000050561.jpg 174 | COCO_val2014_000000050627.jpg 175 | COCO_val2014_000000050752.jpg 176 | COCO_val2014_000000051203.jpg 177 | COCO_val2014_000000051500.jpg 178 | COCO_val2014_000000051628.jpg 179 | COCO_val2014_000000052290.jpg 180 | COCO_val2014_000000052664.jpg 181 | COCO_val2014_000000053328.jpg 182 | COCO_val2014_000000053629.jpg 183 | COCO_val2014_000000054091.jpg 184 | COCO_val2014_000000054355.jpg 185 | COCO_val2014_000000054533.jpg 186 | COCO_val2014_000000055466.jpg 187 | COCO_val2014_000000055528.jpg 188 | COCO_val2014_000000055768.jpg 189 | COCO_val2014_000000056002.jpg 190 | COCO_val2014_000000056433.jpg 191 | COCO_val2014_000000056724.jpg 192 | COCO_val2014_000000056960.jpg 193 | COCO_val2014_000000057508.jpg 194 | COCO_val2014_000000057672.jpg 195 | COCO_val2014_000000057827.jpg 196 | COCO_val2014_000000057878.jpg 197 | COCO_val2014_000000058304.jpg 198 | COCO_val2014_000000058350.jpg 199 | COCO_val2014_000000058690.jpg 200 | COCO_val2014_000000059015.jpg 201 | COCO_val2014_000000059201.jpg 202 | COCO_val2014_000000059666.jpg 203 | COCO_val2014_000000059744.jpg 204 | COCO_val2014_000000060125.jpg 205 | COCO_val2014_000000060128.jpg 206 | COCO_val2014_000000060448.jpg 207 | COCO_val2014_000000060641.jpg 208 | COCO_val2014_000000060835.jpg 209 | COCO_val2014_000000061108.jpg 210 | COCO_val2014_000000061171.jpg 211 | COCO_val2014_000000061693.jpg 212 | COCO_val2014_000000061735.jpg 213 | COCO_val2014_000000062703.jpg 214 | COCO_val2014_000000063328.jpg 215 | COCO_val2014_000000063525.jpg 216 | COCO_val2014_000000064015.jpg 217 | COCO_val2014_000000064121.jpg 218 | COCO_val2014_000000064909.jpg 219 | COCO_val2014_000000065008.jpg 220 | COCO_val2014_000000066072.jpg 221 | COCO_val2014_000000066397.jpg 222 | COCO_val2014_000000066675.jpg 223 | COCO_val2014_000000066769.jpg 224 | COCO_val2014_000000067065.jpg 225 | COCO_val2014_000000067106.jpg 226 | COCO_val2014_000000067122.jpg 227 | COCO_val2014_000000067975.jpg 228 | COCO_val2014_000000068005.jpg 229 | COCO_val2014_000000068059.jpg 230 | COCO_val2014_000000068093.jpg 231 | COCO_val2014_000000068387.jpg 232 | COCO_val2014_000000068435.jpg 233 | COCO_val2014_000000069015.jpg 234 | COCO_val2014_000000069544.jpg 235 | COCO_val2014_000000069827.jpg 236 | COCO_val2014_000000069914.jpg 237 | COCO_val2014_000000071136.jpg 238 | COCO_val2014_000000071302.jpg 239 | COCO_val2014_000000071563.jpg 240 | COCO_val2014_000000071938.jpg 241 | COCO_val2014_000000072376.jpg 242 | COCO_val2014_000000072582.jpg 243 | COCO_val2014_000000072657.jpg 244 | COCO_val2014_000000072795.jpg 245 | COCO_val2014_000000072810.jpg 246 | COCO_val2014_000000072813.jpg 247 | COCO_val2014_000000072944.jpg 248 | COCO_val2014_000000073256.jpg 249 | COCO_val2014_000000073753.jpg 250 | COCO_val2014_000000074256.jpg 251 | COCO_val2014_000000075023.jpg 252 | COCO_val2014_000000075527.jpg 253 | COCO_val2014_000000075696.jpg 254 | COCO_val2014_000000076416.jpg 255 | COCO_val2014_000000076468.jpg 256 | COCO_val2014_000000076595.jpg 257 | COCO_val2014_000000076619.jpg 258 | COCO_val2014_000000076844.jpg 259 | COCO_val2014_000000076942.jpg 260 | COCO_val2014_000000077811.jpg 261 | COCO_val2014_000000078060.jpg 262 | COCO_val2014_000000078188.jpg 263 | COCO_val2014_000000078696.jpg 264 | COCO_val2014_000000078823.jpg 265 | COCO_val2014_000000079860.jpg 266 | COCO_val2014_000000080131.jpg 267 | COCO_val2014_000000080698.jpg 268 | COCO_val2014_000000080714.jpg 269 | COCO_val2014_000000080895.jpg 270 | COCO_val2014_000000081035.jpg 271 | COCO_val2014_000000081079.jpg 272 | COCO_val2014_000000081210.jpg 273 | COCO_val2014_000000081394.jpg 274 | COCO_val2014_000000081738.jpg 275 | COCO_val2014_000000082174.jpg 276 | COCO_val2014_000000082456.jpg 277 | COCO_val2014_000000082787.jpg 278 | COCO_val2014_000000083761.jpg 279 | COCO_val2014_000000083925.jpg 280 | COCO_val2014_000000084170.jpg 281 | COCO_val2014_000000084362.jpg 282 | COCO_val2014_000000084540.jpg 283 | COCO_val2014_000000084650.jpg 284 | COCO_val2014_000000085183.jpg 285 | COCO_val2014_000000085195.jpg 286 | COCO_val2014_000000085390.jpg 287 | COCO_val2014_000000085795.jpg 288 | COCO_val2014_000000085814.jpg 289 | COCO_val2014_000000085826.jpg 290 | COCO_val2014_000000086011.jpg 291 | COCO_val2014_000000086646.jpg 292 | COCO_val2014_000000086755.jpg 293 | COCO_val2014_000000087144.jpg 294 | COCO_val2014_000000087393.jpg 295 | COCO_val2014_000000087740.jpg 296 | COCO_val2014_000000088183.jpg 297 | COCO_val2014_000000088252.jpg 298 | COCO_val2014_000000088349.jpg 299 | COCO_val2014_000000088513.jpg 300 | COCO_val2014_000000089210.jpg 301 | COCO_val2014_000000089362.jpg 302 | COCO_val2014_000000089378.jpg 303 | COCO_val2014_000000089589.jpg 304 | COCO_val2014_000000090659.jpg 305 | COCO_val2014_000000091283.jpg 306 | COCO_val2014_000000091318.jpg 307 | COCO_val2014_000000091326.jpg 308 | COCO_val2014_000000091500.jpg 309 | COCO_val2014_000000091909.jpg 310 | COCO_val2014_000000092801.jpg 311 | COCO_val2014_000000092815.jpg 312 | COCO_val2014_000000093353.jpg 313 | COCO_val2014_000000094422.jpg 314 | COCO_val2014_000000095251.jpg 315 | COCO_val2014_000000095297.jpg 316 | COCO_val2014_000000096048.jpg 317 | COCO_val2014_000000096215.jpg 318 | COCO_val2014_000000096226.jpg 319 | COCO_val2014_000000096288.jpg 320 | COCO_val2014_000000096539.jpg 321 | COCO_val2014_000000097036.jpg 322 | COCO_val2014_000000097479.jpg 323 | COCO_val2014_000000097679.jpg 324 | COCO_val2014_000000097964.jpg 325 | COCO_val2014_000000098003.jpg 326 | COCO_val2014_000000098520.jpg 327 | COCO_val2014_000000098839.jpg 328 | COCO_val2014_000000098979.jpg 329 | COCO_val2014_000000099026.jpg 330 | COCO_val2014_000000099341.jpg 331 | COCO_val2014_000000099567.jpg 332 | COCO_val2014_000000099584.jpg 333 | COCO_val2014_000000099875.jpg 334 | COCO_val2014_000000100238.jpg 335 | COCO_val2014_000000100306.jpg 336 | COCO_val2014_000000100977.jpg 337 | COCO_val2014_000000101762.jpg 338 | COCO_val2014_000000101969.jpg 339 | COCO_val2014_000000102240.jpg 340 | COCO_val2014_000000102837.jpg 341 | COCO_val2014_000000103122.jpg 342 | COCO_val2014_000000103509.jpg 343 | COCO_val2014_000000103558.jpg 344 | COCO_val2014_000000103806.jpg 345 | COCO_val2014_000000103855.jpg 346 | COCO_val2014_000000103870.jpg 347 | COCO_val2014_000000104091.jpg 348 | COCO_val2014_000000104150.jpg 349 | COCO_val2014_000000104625.jpg 350 | COCO_val2014_000000105052.jpg 351 | COCO_val2014_000000106389.jpg 352 | COCO_val2014_000000106617.jpg 353 | COCO_val2014_000000106810.jpg 354 | COCO_val2014_000000107304.jpg 355 | COCO_val2014_000000108425.jpg 356 | COCO_val2014_000000108681.jpg 357 | COCO_val2014_000000108803.jpg 358 | COCO_val2014_000000109291.jpg 359 | COCO_val2014_000000111448.jpg 360 | COCO_val2014_000000111593.jpg 361 | COCO_val2014_000000112093.jpg 362 | COCO_val2014_000000112298.jpg 363 | COCO_val2014_000000112818.jpg 364 | COCO_val2014_000000112830.jpg 365 | COCO_val2014_000000113271.jpg 366 | COCO_val2014_000000113473.jpg 367 | COCO_val2014_000000113707.jpg 368 | COCO_val2014_000000113736.jpg 369 | COCO_val2014_000000113860.jpg 370 | COCO_val2014_000000114158.jpg 371 | COCO_val2014_000000114481.jpg 372 | COCO_val2014_000000114684.jpg 373 | COCO_val2014_000000114790.jpg 374 | COCO_val2014_000000115721.jpg 375 | COCO_val2014_000000115875.jpg 376 | COCO_val2014_000000116046.jpg 377 | COCO_val2014_000000116405.jpg 378 | COCO_val2014_000000117112.jpg 379 | COCO_val2014_000000117310.jpg 380 | COCO_val2014_000000118181.jpg 381 | COCO_val2014_000000118542.jpg 382 | COCO_val2014_000000118911.jpg 383 | COCO_val2014_000000119120.jpg 384 | COCO_val2014_000000119192.jpg 385 | COCO_val2014_000000119402.jpg 386 | COCO_val2014_000000119581.jpg 387 | COCO_val2014_000000119641.jpg 388 | COCO_val2014_000000119939.jpg 389 | COCO_val2014_000000120021.jpg 390 | COCO_val2014_000000120356.jpg 391 | COCO_val2014_000000120420.jpg 392 | COCO_val2014_000000121442.jpg 393 | COCO_val2014_000000121788.jpg 394 | COCO_val2014_000000122467.jpg 395 | COCO_val2014_000000123480.jpg 396 | COCO_val2014_000000123633.jpg 397 | COCO_val2014_000000123867.jpg 398 | COCO_val2014_000000125228.jpg 399 | COCO_val2014_000000126107.jpg 400 | COCO_val2014_000000126340.jpg 401 | COCO_val2014_000000126631.jpg 402 | COCO_val2014_000000127141.jpg 403 | COCO_val2014_000000127535.jpg 404 | COCO_val2014_000000127702.jpg 405 | COCO_val2014_000000128515.jpg 406 | COCO_val2014_000000128675.jpg 407 | COCO_val2014_000000129100.jpg 408 | COCO_val2014_000000129492.jpg 409 | COCO_val2014_000000129502.jpg 410 | COCO_val2014_000000129571.jpg 411 | COCO_val2014_000000129864.jpg 412 | COCO_val2014_000000129897.jpg 413 | COCO_val2014_000000130043.jpg 414 | COCO_val2014_000000130826.jpg 415 | COCO_val2014_000000131131.jpg 416 | COCO_val2014_000000131714.jpg 417 | COCO_val2014_000000131743.jpg 418 | COCO_val2014_000000132189.jpg 419 | COCO_val2014_000000132272.jpg 420 | COCO_val2014_000000132612.jpg 421 | COCO_val2014_000000133327.jpg 422 | COCO_val2014_000000133963.jpg 423 | COCO_val2014_000000134096.jpg 424 | COCO_val2014_000000134160.jpg 425 | COCO_val2014_000000134366.jpg 426 | COCO_val2014_000000134856.jpg 427 | COCO_val2014_000000134886.jpg 428 | COCO_val2014_000000135155.jpg 429 | COCO_val2014_000000135356.jpg 430 | COCO_val2014_000000135914.jpg 431 | COCO_val2014_000000135975.jpg 432 | COCO_val2014_000000136227.jpg 433 | COCO_val2014_000000136720.jpg 434 | COCO_val2014_000000136938.jpg 435 | COCO_val2014_000000137297.jpg 436 | COCO_val2014_000000137412.jpg 437 | COCO_val2014_000000137826.jpg 438 | COCO_val2014_000000137961.jpg 439 | COCO_val2014_000000138821.jpg 440 | COCO_val2014_000000138975.jpg 441 | COCO_val2014_000000139428.jpg 442 | COCO_val2014_000000139721.jpg 443 | COCO_val2014_000000139856.jpg 444 | COCO_val2014_000000139872.jpg 445 | COCO_val2014_000000140006.jpg 446 | COCO_val2014_000000140017.jpg 447 | COCO_val2014_000000140487.jpg 448 | COCO_val2014_000000140783.jpg 449 | COCO_val2014_000000141158.jpg 450 | COCO_val2014_000000141821.jpg 451 | COCO_val2014_000000142454.jpg 452 | COCO_val2014_000000142742.jpg 453 | COCO_val2014_000000143224.jpg 454 | COCO_val2014_000000143275.jpg 455 | COCO_val2014_000000143320.jpg 456 | COCO_val2014_000000144228.jpg 457 | COCO_val2014_000000144379.jpg 458 | COCO_val2014_000000144863.jpg 459 | COCO_val2014_000000144874.jpg 460 | COCO_val2014_000000144878.jpg 461 | COCO_val2014_000000144938.jpg 462 | COCO_val2014_000000145824.jpg 463 | COCO_val2014_000000146120.jpg 464 | COCO_val2014_000000148193.jpg 465 | COCO_val2014_000000148329.jpg 466 | COCO_val2014_000000148392.jpg 467 | COCO_val2014_000000148719.jpg 468 | COCO_val2014_000000149284.jpg 469 | COCO_val2014_000000149500.jpg 470 | COCO_val2014_000000149676.jpg 471 | COCO_val2014_000000150235.jpg 472 | COCO_val2014_000000150358.jpg 473 | COCO_val2014_000000150599.jpg 474 | COCO_val2014_000000150639.jpg 475 | COCO_val2014_000000150686.jpg 476 | COCO_val2014_000000150986.jpg 477 | COCO_val2014_000000151084.jpg 478 | COCO_val2014_000000151480.jpg 479 | COCO_val2014_000000151742.jpg 480 | COCO_val2014_000000151988.jpg 481 | COCO_val2014_000000152103.jpg 482 | COCO_val2014_000000152598.jpg 483 | COCO_val2014_000000152962.jpg 484 | COCO_val2014_000000153520.jpg 485 | COCO_val2014_000000153717.jpg 486 | COCO_val2014_000000153931.jpg 487 | COCO_val2014_000000154971.jpg 488 | COCO_val2014_000000156100.jpg 489 | COCO_val2014_000000156324.jpg 490 | COCO_val2014_000000156572.jpg 491 | COCO_val2014_000000156643.jpg 492 | COCO_val2014_000000156687.jpg 493 | COCO_val2014_000000156974.jpg 494 | COCO_val2014_000000157001.jpg 495 | COCO_val2014_000000157516.jpg 496 | COCO_val2014_000000158333.jpg 497 | COCO_val2014_000000158922.jpg 498 | COCO_val2014_000000159223.jpg 499 | COCO_val2014_000000159399.jpg 500 | COCO_val2014_000000159609.jpg 501 | COCO_val2014_000000159627.jpg 502 | COCO_val2014_000000160195.jpg 503 | COCO_val2014_000000160330.jpg 504 | COCO_val2014_000000160836.jpg 505 | COCO_val2014_000000160860.jpg 506 | COCO_val2014_000000161772.jpg 507 | COCO_val2014_000000161940.jpg 508 | COCO_val2014_000000162627.jpg 509 | COCO_val2014_000000162952.jpg 510 | COCO_val2014_000000162998.jpg 511 | COCO_val2014_000000163112.jpg 512 | COCO_val2014_000000163518.jpg 513 | COCO_val2014_000000163611.jpg 514 | COCO_val2014_000000163831.jpg 515 | COCO_val2014_000000164005.jpg 516 | COCO_val2014_000000164104.jpg 517 | COCO_val2014_000000164602.jpg 518 | COCO_val2014_000000164635.jpg 519 | COCO_val2014_000000164780.jpg 520 | COCO_val2014_000000165056.jpg 521 | COCO_val2014_000000165522.jpg 522 | COCO_val2014_000000165643.jpg 523 | COCO_val2014_000000166287.jpg 524 | COCO_val2014_000000166948.jpg 525 | COCO_val2014_000000167347.jpg 526 | COCO_val2014_000000167696.jpg 527 | COCO_val2014_000000167854.jpg 528 | COCO_val2014_000000168031.jpg 529 | COCO_val2014_000000168662.jpg 530 | COCO_val2014_000000168713.jpg 531 | COCO_val2014_000000168781.jpg 532 | COCO_val2014_000000168890.jpg 533 | COCO_val2014_000000170340.jpg 534 | COCO_val2014_000000170640.jpg 535 | COCO_val2014_000000170784.jpg 536 | COCO_val2014_000000171192.jpg 537 | COCO_val2014_000000171201.jpg 538 | COCO_val2014_000000171241.jpg 539 | COCO_val2014_000000171464.jpg 540 | COCO_val2014_000000171753.jpg 541 | COCO_val2014_000000171757.jpg 542 | COCO_val2014_000000171809.jpg 543 | COCO_val2014_000000171962.jpg 544 | COCO_val2014_000000172021.jpg 545 | COCO_val2014_000000172556.jpg 546 | COCO_val2014_000000172648.jpg 547 | COCO_val2014_000000172877.jpg 548 | COCO_val2014_000000172995.jpg 549 | COCO_val2014_000000173008.jpg 550 | COCO_val2014_000000173514.jpg 551 | COCO_val2014_000000173693.jpg 552 | COCO_val2014_000000173997.jpg 553 | COCO_val2014_000000174070.jpg 554 | COCO_val2014_000000174113.jpg 555 | COCO_val2014_000000174511.jpg 556 | COCO_val2014_000000174888.jpg 557 | COCO_val2014_000000175102.jpg 558 | COCO_val2014_000000175653.jpg 559 | COCO_val2014_000000175878.jpg 560 | COCO_val2014_000000175908.jpg 561 | COCO_val2014_000000176180.jpg 562 | COCO_val2014_000000176226.jpg 563 | COCO_val2014_000000176328.jpg 564 | COCO_val2014_000000176606.jpg 565 | COCO_val2014_000000177323.jpg 566 | COCO_val2014_000000177366.jpg 567 | COCO_val2014_000000178691.jpg 568 | COCO_val2014_000000178941.jpg 569 | COCO_val2014_000000179415.jpg 570 | COCO_val2014_000000179441.jpg 571 | COCO_val2014_000000179727.jpg 572 | COCO_val2014_000000180366.jpg 573 | COCO_val2014_000000180383.jpg 574 | COCO_val2014_000000180521.jpg 575 | COCO_val2014_000000180584.jpg 576 | COCO_val2014_000000180653.jpg 577 | COCO_val2014_000000180764.jpg 578 | COCO_val2014_000000181449.jpg 579 | COCO_val2014_000000181627.jpg 580 | COCO_val2014_000000181655.jpg 581 | COCO_val2014_000000182236.jpg 582 | COCO_val2014_000000182523.jpg 583 | COCO_val2014_000000183364.jpg 584 | COCO_val2014_000000183905.jpg 585 | COCO_val2014_000000184485.jpg 586 | COCO_val2014_000000184621.jpg 587 | COCO_val2014_000000185472.jpg 588 | COCO_val2014_000000185781.jpg 589 | COCO_val2014_000000185936.jpg 590 | COCO_val2014_000000186036.jpg 591 | COCO_val2014_000000186095.jpg 592 | COCO_val2014_000000186697.jpg 593 | COCO_val2014_000000187072.jpg 594 | COCO_val2014_000000187177.jpg 595 | COCO_val2014_000000187240.jpg 596 | COCO_val2014_000000187543.jpg 597 | COCO_val2014_000000188460.jpg 598 | COCO_val2014_000000188824.jpg 599 | COCO_val2014_000000188865.jpg 600 | COCO_val2014_000000188958.jpg 601 | COCO_val2014_000000189244.jpg 602 | COCO_val2014_000000189368.jpg 603 | COCO_val2014_000000190218.jpg 604 | COCO_val2014_000000190829.jpg 605 | COCO_val2014_000000191218.jpg 606 | COCO_val2014_000000191225.jpg 607 | COCO_val2014_000000191691.jpg 608 | COCO_val2014_000000191846.jpg 609 | COCO_val2014_000000191874.jpg 610 | COCO_val2014_000000191925.jpg 611 | COCO_val2014_000000191949.jpg 612 | COCO_val2014_000000192007.jpg 613 | COCO_val2014_000000192154.jpg 614 | COCO_val2014_000000192296.jpg 615 | COCO_val2014_000000192932.jpg 616 | COCO_val2014_000000193674.jpg 617 | COCO_val2014_000000194203.jpg 618 | COCO_val2014_000000194414.jpg 619 | COCO_val2014_000000194532.jpg 620 | COCO_val2014_000000195800.jpg 621 | COCO_val2014_000000195815.jpg 622 | COCO_val2014_000000195817.jpg 623 | COCO_val2014_000000196430.jpg 624 | COCO_val2014_000000196664.jpg 625 | COCO_val2014_000000196715.jpg 626 | COCO_val2014_000000196759.jpg 627 | COCO_val2014_000000196815.jpg 628 | COCO_val2014_000000196843.jpg 629 | COCO_val2014_000000196885.jpg 630 | COCO_val2014_000000197097.jpg 631 | COCO_val2014_000000197130.jpg 632 | COCO_val2014_000000197809.jpg 633 | COCO_val2014_000000197997.jpg 634 | COCO_val2014_000000198223.jpg 635 | COCO_val2014_000000198492.jpg 636 | COCO_val2014_000000198530.jpg 637 | COCO_val2014_000000198563.jpg 638 | COCO_val2014_000000198775.jpg 639 | COCO_val2014_000000199127.jpg 640 | COCO_val2014_000000199158.jpg 641 | COCO_val2014_000000199842.jpg 642 | COCO_val2014_000000199951.jpg 643 | COCO_val2014_000000199989.jpg 644 | COCO_val2014_000000200381.jpg 645 | COCO_val2014_000000200421.jpg 646 | COCO_val2014_000000200541.jpg 647 | COCO_val2014_000000200839.jpg 648 | COCO_val2014_000000201463.jpg 649 | COCO_val2014_000000202981.jpg 650 | COCO_val2014_000000203294.jpg 651 | COCO_val2014_000000203865.jpg 652 | COCO_val2014_000000204432.jpg 653 | COCO_val2014_000000204507.jpg 654 | COCO_val2014_000000204650.jpg 655 | COCO_val2014_000000204887.jpg 656 | COCO_val2014_000000205573.jpg 657 | COCO_val2014_000000205636.jpg 658 | COCO_val2014_000000205729.jpg 659 | COCO_val2014_000000206049.jpg 660 | COCO_val2014_000000207151.jpg 661 | COCO_val2014_000000207826.jpg 662 | COCO_val2014_000000208793.jpg 663 | COCO_val2014_000000208955.jpg 664 | COCO_val2014_000000209044.jpg 665 | COCO_val2014_000000209357.jpg 666 | COCO_val2014_000000209468.jpg 667 | COCO_val2014_000000210522.jpg 668 | COCO_val2014_000000210782.jpg 669 | COCO_val2014_000000210795.jpg 670 | COCO_val2014_000000210990.jpg 671 | COCO_val2014_000000211825.jpg 672 | COCO_val2014_000000213728.jpg 673 | COCO_val2014_000000214491.jpg 674 | COCO_val2014_000000214727.jpg 675 | COCO_val2014_000000214984.jpg 676 | COCO_val2014_000000215482.jpg 677 | COCO_val2014_000000215723.jpg 678 | COCO_val2014_000000215982.jpg 679 | COCO_val2014_000000216103.jpg 680 | COCO_val2014_000000216161.jpg 681 | COCO_val2014_000000216277.jpg 682 | COCO_val2014_000000217016.jpg 683 | COCO_val2014_000000218205.jpg 684 | COCO_val2014_000000218988.jpg 685 | COCO_val2014_000000219063.jpg 686 | COCO_val2014_000000219075.jpg 687 | COCO_val2014_000000219135.jpg 688 | COCO_val2014_000000219909.jpg 689 | COCO_val2014_000000221303.jpg 690 | COCO_val2014_000000221562.jpg 691 | COCO_val2014_000000221932.jpg 692 | COCO_val2014_000000222146.jpg 693 | COCO_val2014_000000222228.jpg 694 | COCO_val2014_000000222317.jpg 695 | COCO_val2014_000000222512.jpg 696 | COCO_val2014_000000222662.jpg 697 | COCO_val2014_000000223414.jpg 698 | COCO_val2014_000000223757.jpg 699 | COCO_val2014_000000224104.jpg 700 | COCO_val2014_000000224126.jpg 701 | COCO_val2014_000000224530.jpg 702 | COCO_val2014_000000224554.jpg 703 | COCO_val2014_000000225558.jpg 704 | COCO_val2014_000000227044.jpg 705 | COCO_val2014_000000227204.jpg 706 | COCO_val2014_000000227428.jpg 707 | COCO_val2014_000000227440.jpg 708 | COCO_val2014_000000227478.jpg 709 | COCO_val2014_000000227491.jpg 710 | COCO_val2014_000000227851.jpg 711 | COCO_val2014_000000227940.jpg 712 | COCO_val2014_000000228013.jpg 713 | COCO_val2014_000000228306.jpg 714 | COCO_val2014_000000228309.jpg 715 | COCO_val2014_000000229647.jpg 716 | COCO_val2014_000000229740.jpg 717 | COCO_val2014_000000229889.jpg 718 | COCO_val2014_000000230096.jpg 719 | COCO_val2014_000000230585.jpg 720 | COCO_val2014_000000230679.jpg 721 | COCO_val2014_000000230964.jpg 722 | COCO_val2014_000000231028.jpg 723 | COCO_val2014_000000231281.jpg 724 | COCO_val2014_000000231343.jpg 725 | COCO_val2014_000000231408.jpg 726 | COCO_val2014_000000231616.jpg 727 | COCO_val2014_000000232121.jpg 728 | COCO_val2014_000000232185.jpg 729 | COCO_val2014_000000232432.jpg 730 | COCO_val2014_000000232538.jpg 731 | COCO_val2014_000000232760.jpg 732 | COCO_val2014_000000232849.jpg 733 | COCO_val2014_000000233005.jpg 734 | COCO_val2014_000000233266.jpg 735 | COCO_val2014_000000233365.jpg 736 | COCO_val2014_000000233868.jpg 737 | COCO_val2014_000000235832.jpg 738 | COCO_val2014_000000235964.jpg 739 | COCO_val2014_000000236925.jpg 740 | COCO_val2014_000000236945.jpg 741 | COCO_val2014_000000239444.jpg 742 | COCO_val2014_000000240210.jpg 743 | COCO_val2014_000000240495.jpg 744 | COCO_val2014_000000240903.jpg 745 | COCO_val2014_000000241113.jpg 746 | COCO_val2014_000000241837.jpg 747 | COCO_val2014_000000242297.jpg 748 | COCO_val2014_000000242570.jpg 749 | COCO_val2014_000000242610.jpg 750 | COCO_val2014_000000244050.jpg 751 | COCO_val2014_000000244060.jpg 752 | COCO_val2014_000000245460.jpg 753 | COCO_val2014_000000245997.jpg 754 | COCO_val2014_000000246004.jpg 755 | COCO_val2014_000000247259.jpg 756 | COCO_val2014_000000247264.jpg 757 | COCO_val2014_000000247407.jpg 758 | COCO_val2014_000000247484.jpg 759 | COCO_val2014_000000247720.jpg 760 | COCO_val2014_000000248235.jpg 761 | COCO_val2014_000000248457.jpg 762 | COCO_val2014_000000248559.jpg 763 | COCO_val2014_000000249599.jpg 764 | COCO_val2014_000000250893.jpg 765 | COCO_val2014_000000250951.jpg 766 | COCO_val2014_000000251395.jpg 767 | COCO_val2014_000000251741.jpg 768 | COCO_val2014_000000251798.jpg 769 | COCO_val2014_000000251920.jpg 770 | COCO_val2014_000000252354.jpg 771 | COCO_val2014_000000252388.jpg 772 | COCO_val2014_000000252748.jpg 773 | COCO_val2014_000000253408.jpg 774 | COCO_val2014_000000253770.jpg 775 | COCO_val2014_000000256192.jpg 776 | COCO_val2014_000000256628.jpg 777 | COCO_val2014_000000256787.jpg 778 | COCO_val2014_000000256838.jpg 779 | COCO_val2014_000000257058.jpg 780 | COCO_val2014_000000257060.jpg 781 | COCO_val2014_000000257971.jpg 782 | COCO_val2014_000000258850.jpg 783 | COCO_val2014_000000259049.jpg 784 | COCO_val2014_000000259690.jpg 785 | COCO_val2014_000000259814.jpg 786 | COCO_val2014_000000260478.jpg 787 | COCO_val2014_000000260925.jpg 788 | COCO_val2014_000000261805.jpg 789 | COCO_val2014_000000262274.jpg 790 | COCO_val2014_000000262323.jpg 791 | COCO_val2014_000000262371.jpg 792 | COCO_val2014_000000262487.jpg 793 | COCO_val2014_000000262576.jpg 794 | COCO_val2014_000000262587.jpg 795 | COCO_val2014_000000262658.jpg 796 | COCO_val2014_000000263052.jpg 797 | COCO_val2014_000000263351.jpg 798 | COCO_val2014_000000264976.jpg 799 | COCO_val2014_000000265176.jpg 800 | COCO_val2014_000000265256.jpg 801 | COCO_val2014_000000265552.jpg 802 | COCO_val2014_000000265611.jpg 803 | COCO_val2014_000000265719.jpg 804 | COCO_val2014_000000266434.jpg 805 | COCO_val2014_000000267224.jpg 806 | COCO_val2014_000000267300.jpg 807 | COCO_val2014_000000267684.jpg 808 | COCO_val2014_000000268363.jpg 809 | COCO_val2014_000000268435.jpg 810 | COCO_val2014_000000269358.jpg 811 | COCO_val2014_000000270066.jpg 812 | COCO_val2014_000000270744.jpg 813 | COCO_val2014_000000271643.jpg 814 | COCO_val2014_000000272153.jpg 815 | COCO_val2014_000000273579.jpg 816 | COCO_val2014_000000273855.jpg 817 | COCO_val2014_000000274216.jpg 818 | COCO_val2014_000000274687.jpg 819 | COCO_val2014_000000275449.jpg 820 | COCO_val2014_000000275611.jpg 821 | COCO_val2014_000000276024.jpg 822 | COCO_val2014_000000276413.jpg 823 | COCO_val2014_000000276986.jpg 824 | COCO_val2014_000000277326.jpg 825 | COCO_val2014_000000277642.jpg 826 | COCO_val2014_000000277793.jpg 827 | COCO_val2014_000000277869.jpg 828 | COCO_val2014_000000278134.jpg 829 | COCO_val2014_000000278161.jpg 830 | COCO_val2014_000000278449.jpg 831 | COCO_val2014_000000278705.jpg 832 | COCO_val2014_000000278771.jpg 833 | COCO_val2014_000000278966.jpg 834 | COCO_val2014_000000279140.jpg 835 | COCO_val2014_000000279386.jpg 836 | COCO_val2014_000000279491.jpg 837 | COCO_val2014_000000280858.jpg 838 | COCO_val2014_000000281688.jpg 839 | COCO_val2014_000000281758.jpg 840 | COCO_val2014_000000282553.jpg 841 | COCO_val2014_000000283012.jpg 842 | COCO_val2014_000000283203.jpg 843 | COCO_val2014_000000283286.jpg 844 | COCO_val2014_000000283471.jpg 845 | COCO_val2014_000000283498.jpg 846 | COCO_val2014_000000284152.jpg 847 | COCO_val2014_000000284623.jpg 848 | COCO_val2014_000000285258.jpg 849 | COCO_val2014_000000285534.jpg 850 | COCO_val2014_000000286089.jpg 851 | COCO_val2014_000000287033.jpg 852 | COCO_val2014_000000287235.jpg 853 | COCO_val2014_000000287387.jpg 854 | COCO_val2014_000000287402.jpg 855 | COCO_val2014_000000287649.jpg 856 | COCO_val2014_000000287741.jpg 857 | COCO_val2014_000000288442.jpg 858 | COCO_val2014_000000288639.jpg 859 | COCO_val2014_000000288944.jpg 860 | COCO_val2014_000000288955.jpg 861 | COCO_val2014_000000289318.jpg 862 | COCO_val2014_000000289813.jpg 863 | COCO_val2014_000000289941.jpg 864 | COCO_val2014_000000290113.jpg 865 | COCO_val2014_000000290839.jpg 866 | COCO_val2014_000000290979.jpg 867 | COCO_val2014_000000292118.jpg 868 | COCO_val2014_000000292463.jpg 869 | COCO_val2014_000000292931.jpg 870 | COCO_val2014_000000293027.jpg 871 | COCO_val2014_000000293125.jpg 872 | COCO_val2014_000000293554.jpg 873 | COCO_val2014_000000293625.jpg 874 | COCO_val2014_000000294134.jpg 875 | COCO_val2014_000000295316.jpg 876 | COCO_val2014_000000295403.jpg 877 | COCO_val2014_000000295420.jpg 878 | COCO_val2014_000000296056.jpg 879 | COCO_val2014_000000296176.jpg 880 | COCO_val2014_000000296222.jpg 881 | COCO_val2014_000000296224.jpg 882 | COCO_val2014_000000296243.jpg 883 | COCO_val2014_000000296604.jpg 884 | COCO_val2014_000000296825.jpg 885 | COCO_val2014_000000296907.jpg 886 | COCO_val2014_000000296988.jpg 887 | COCO_val2014_000000296997.jpg 888 | COCO_val2014_000000298261.jpg 889 | COCO_val2014_000000298578.jpg 890 | COCO_val2014_000000299074.jpg 891 | COCO_val2014_000000299207.jpg 892 | COCO_val2014_000000299887.jpg 893 | COCO_val2014_000000300066.jpg 894 | COCO_val2014_000000300330.jpg 895 | COCO_val2014_000000300413.jpg 896 | COCO_val2014_000000300437.jpg 897 | COCO_val2014_000000300472.jpg 898 | COCO_val2014_000000300538.jpg 899 | COCO_val2014_000000301082.jpg 900 | COCO_val2014_000000301582.jpg 901 | COCO_val2014_000000301735.jpg 902 | COCO_val2014_000000301867.jpg 903 | COCO_val2014_000000302275.jpg 904 | COCO_val2014_000000303024.jpg 905 | COCO_val2014_000000303219.jpg 906 | COCO_val2014_000000303305.jpg 907 | COCO_val2014_000000303413.jpg 908 | COCO_val2014_000000304083.jpg 909 | COCO_val2014_000000304917.jpg 910 | COCO_val2014_000000304941.jpg 911 | COCO_val2014_000000305050.jpg 912 | COCO_val2014_000000306395.jpg 913 | COCO_val2014_000000306467.jpg 914 | COCO_val2014_000000306974.jpg 915 | COCO_val2014_000000307511.jpg 916 | COCO_val2014_000000307800.jpg 917 | COCO_val2014_000000308101.jpg 918 | COCO_val2014_000000308274.jpg 919 | COCO_val2014_000000309237.jpg 920 | COCO_val2014_000000309514.jpg 921 | COCO_val2014_000000309655.jpg 922 | COCO_val2014_000000309852.jpg 923 | COCO_val2014_000000310035.jpg 924 | COCO_val2014_000000310200.jpg 925 | COCO_val2014_000000310735.jpg 926 | COCO_val2014_000000311309.jpg 927 | COCO_val2014_000000311327.jpg 928 | COCO_val2014_000000311531.jpg 929 | COCO_val2014_000000311846.jpg 930 | COCO_val2014_000000311891.jpg 931 | COCO_val2014_000000312056.jpg 932 | COCO_val2014_000000312868.jpg 933 | COCO_val2014_000000313034.jpg 934 | COCO_val2014_000000313246.jpg 935 | COCO_val2014_000000313337.jpg 936 | COCO_val2014_000000313562.jpg 937 | COCO_val2014_000000313593.jpg 938 | COCO_val2014_000000314023.jpg 939 | COCO_val2014_000000314504.jpg 940 | COCO_val2014_000000314694.jpg 941 | COCO_val2014_000000314876.jpg 942 | COCO_val2014_000000315565.jpg 943 | COCO_val2014_000000316189.jpg 944 | COCO_val2014_000000316499.jpg 945 | COCO_val2014_000000316526.jpg 946 | COCO_val2014_000000316617.jpg 947 | COCO_val2014_000000318585.jpg 948 | COCO_val2014_000000318888.jpg 949 | COCO_val2014_000000319743.jpg 950 | COCO_val2014_000000320428.jpg 951 | COCO_val2014_000000320641.jpg 952 | COCO_val2014_000000321214.jpg 953 | COCO_val2014_000000321332.jpg 954 | COCO_val2014_000000321861.jpg 955 | COCO_val2014_000000322482.jpg 956 | COCO_val2014_000000322848.jpg 957 | COCO_val2014_000000323288.jpg 958 | COCO_val2014_000000323327.jpg 959 | COCO_val2014_000000323525.jpg 960 | COCO_val2014_000000323682.jpg 961 | COCO_val2014_000000324258.jpg 962 | COCO_val2014_000000324338.jpg 963 | COCO_val2014_000000324500.jpg 964 | COCO_val2014_000000325374.jpg 965 | COCO_val2014_000000326863.jpg 966 | COCO_val2014_000000326937.jpg 967 | COCO_val2014_000000326970.jpg 968 | COCO_val2014_000000327255.jpg 969 | COCO_val2014_000000328008.jpg 970 | COCO_val2014_000000328200.jpg 971 | COCO_val2014_000000328289.jpg 972 | COCO_val2014_000000328421.jpg 973 | COCO_val2014_000000328433.jpg 974 | COCO_val2014_000000329373.jpg 975 | COCO_val2014_000000329573.jpg 976 | COCO_val2014_000000330248.jpg 977 | COCO_val2014_000000330500.jpg 978 | COCO_val2014_000000330522.jpg 979 | COCO_val2014_000000331627.jpg 980 | COCO_val2014_000000332159.jpg 981 | COCO_val2014_000000332407.jpg 982 | COCO_val2014_000000332844.jpg 983 | COCO_val2014_000000332845.jpg 984 | COCO_val2014_000000332914.jpg 985 | COCO_val2014_000000334034.jpg 986 | COCO_val2014_000000334178.jpg 987 | COCO_val2014_000000334332.jpg 988 | COCO_val2014_000000336182.jpg 989 | COCO_val2014_000000336862.jpg 990 | COCO_val2014_000000337160.jpg 991 | COCO_val2014_000000337180.jpg 992 | COCO_val2014_000000337233.jpg 993 | COCO_val2014_000000337498.jpg 994 | COCO_val2014_000000337551.jpg 995 | COCO_val2014_000000338625.jpg 996 | COCO_val2014_000000340069.jpg 997 | COCO_val2014_000000340420.jpg 998 | COCO_val2014_000000340665.jpg 999 | COCO_val2014_000000340804.jpg 1000 | COCO_val2014_000000341401.jpg 1001 | COCO_val2014_000000341603.jpg 1002 | COCO_val2014_000000342293.jpg 1003 | COCO_val2014_000000342367.jpg 1004 | COCO_val2014_000000342770.jpg 1005 | COCO_val2014_000000343504.jpg 1006 | COCO_val2014_000000343552.jpg 1007 | COCO_val2014_000000344029.jpg 1008 | COCO_val2014_000000344125.jpg 1009 | COCO_val2014_000000344271.jpg 1010 | COCO_val2014_000000344548.jpg 1011 | COCO_val2014_000000344881.jpg 1012 | COCO_val2014_000000345356.jpg 1013 | COCO_val2014_000000346207.jpg 1014 | COCO_val2014_000000346589.jpg 1015 | COCO_val2014_000000346759.jpg 1016 | COCO_val2014_000000346904.jpg 1017 | COCO_val2014_000000347544.jpg 1018 | COCO_val2014_000000347766.jpg 1019 | COCO_val2014_000000347930.jpg 1020 | COCO_val2014_000000347995.jpg 1021 | COCO_val2014_000000348186.jpg 1022 | COCO_val2014_000000348730.jpg 1023 | COCO_val2014_000000349101.jpg 1024 | COCO_val2014_000000349888.jpg 1025 | COCO_val2014_000000350073.jpg 1026 | COCO_val2014_000000350231.jpg 1027 | COCO_val2014_000000350389.jpg 1028 | COCO_val2014_000000350639.jpg 1029 | COCO_val2014_000000351331.jpg 1030 | COCO_val2014_000000351489.jpg 1031 | COCO_val2014_000000353028.jpg 1032 | COCO_val2014_000000353149.jpg 1033 | COCO_val2014_000000353510.jpg 1034 | COCO_val2014_000000353953.jpg 1035 | COCO_val2014_000000353982.jpg 1036 | COCO_val2014_000000356733.jpg 1037 | COCO_val2014_000000357229.jpg 1038 | COCO_val2014_000000357317.jpg 1039 | COCO_val2014_000000357578.jpg 1040 | COCO_val2014_000000357633.jpg 1041 | COCO_val2014_000000357888.jpg 1042 | COCO_val2014_000000358231.jpg 1043 | COCO_val2014_000000358525.jpg 1044 | COCO_val2014_000000358765.jpg 1045 | COCO_val2014_000000358976.jpg 1046 | COCO_val2014_000000359164.jpg 1047 | COCO_val2014_000000359219.jpg 1048 | COCO_val2014_000000359715.jpg 1049 | COCO_val2014_000000360073.jpg 1050 | COCO_val2014_000000360216.jpg 1051 | COCO_val2014_000000360661.jpg 1052 | COCO_val2014_000000360878.jpg 1053 | COCO_val2014_000000361073.jpg 1054 | COCO_val2014_000000361238.jpg 1055 | COCO_val2014_000000361382.jpg 1056 | COCO_val2014_000000361521.jpg 1057 | COCO_val2014_000000362219.jpg 1058 | COCO_val2014_000000362640.jpg 1059 | COCO_val2014_000000362941.jpg 1060 | COCO_val2014_000000362951.jpg 1061 | COCO_val2014_000000363415.jpg 1062 | COCO_val2014_000000363581.jpg 1063 | COCO_val2014_000000363767.jpg 1064 | COCO_val2014_000000364266.jpg 1065 | COCO_val2014_000000364284.jpg 1066 | COCO_val2014_000000364608.jpg 1067 | COCO_val2014_000000364745.jpg 1068 | COCO_val2014_000000366499.jpg 1069 | COCO_val2014_000000366659.jpg 1070 | COCO_val2014_000000367142.jpg 1071 | COCO_val2014_000000367310.jpg 1072 | COCO_val2014_000000367398.jpg 1073 | COCO_val2014_000000367468.jpg 1074 | COCO_val2014_000000368193.jpg 1075 | COCO_val2014_000000368648.jpg 1076 | COCO_val2014_000000368855.jpg 1077 | COCO_val2014_000000368961.jpg 1078 | COCO_val2014_000000368980.jpg 1079 | COCO_val2014_000000369584.jpg 1080 | COCO_val2014_000000369685.jpg 1081 | COCO_val2014_000000370209.jpg 1082 | COCO_val2014_000000370266.jpg 1083 | COCO_val2014_000000370475.jpg 1084 | COCO_val2014_000000370953.jpg 1085 | COCO_val2014_000000371054.jpg 1086 | COCO_val2014_000000371365.jpg 1087 | COCO_val2014_000000371890.jpg 1088 | COCO_val2014_000000373424.jpg 1089 | COCO_val2014_000000375184.jpg 1090 | COCO_val2014_000000375285.jpg 1091 | COCO_val2014_000000375407.jpg 1092 | COCO_val2014_000000375493.jpg 1093 | COCO_val2014_000000376295.jpg 1094 | COCO_val2014_000000376549.jpg 1095 | COCO_val2014_000000376773.jpg 1096 | COCO_val2014_000000376959.jpg 1097 | COCO_val2014_000000377097.jpg 1098 | COCO_val2014_000000378968.jpg 1099 | COCO_val2014_000000379882.jpg 1100 | COCO_val2014_000000381416.jpg 1101 | COCO_val2014_000000382668.jpg 1102 | COCO_val2014_000000382737.jpg 1103 | COCO_val2014_000000383448.jpg 1104 | COCO_val2014_000000384531.jpg 1105 | COCO_val2014_000000384670.jpg 1106 | COCO_val2014_000000386525.jpg 1107 | COCO_val2014_000000386553.jpg 1108 | COCO_val2014_000000386912.jpg 1109 | COCO_val2014_000000388299.jpg 1110 | COCO_val2014_000000388677.jpg 1111 | COCO_val2014_000000388829.jpg 1112 | COCO_val2014_000000389258.jpg 1113 | COCO_val2014_000000390130.jpg 1114 | COCO_val2014_000000391862.jpg 1115 | COCO_val2014_000000394131.jpg 1116 | COCO_val2014_000000394510.jpg 1117 | COCO_val2014_000000395402.jpg 1118 | COCO_val2014_000000396541.jpg 1119 | COCO_val2014_000000396845.jpg 1120 | COCO_val2014_000000397045.jpg 1121 | COCO_val2014_000000397433.jpg 1122 | COCO_val2014_000000397636.jpg 1123 | COCO_val2014_000000398423.jpg 1124 | COCO_val2014_000000398489.jpg 1125 | COCO_val2014_000000398534.jpg 1126 | COCO_val2014_000000398758.jpg 1127 | COCO_val2014_000000398992.jpg 1128 | COCO_val2014_000000399879.jpg 1129 | COCO_val2014_000000400238.jpg 1130 | COCO_val2014_000000401446.jpg 1131 | COCO_val2014_000000402077.jpg 1132 | COCO_val2014_000000402405.jpg 1133 | COCO_val2014_000000403294.jpg 1134 | COCO_val2014_000000403500.jpg 1135 | COCO_val2014_000000403639.jpg 1136 | COCO_val2014_000000404444.jpg 1137 | COCO_val2014_000000404517.jpg 1138 | COCO_val2014_000000405736.jpg 1139 | COCO_val2014_000000406403.jpg 1140 | COCO_val2014_000000407056.jpg 1141 | COCO_val2014_000000407868.jpg 1142 | COCO_val2014_000000408235.jpg 1143 | COCO_val2014_000000409921.jpg 1144 | COCO_val2014_000000410319.jpg 1145 | COCO_val2014_000000410942.jpg 1146 | COCO_val2014_000000411674.jpg 1147 | COCO_val2014_000000412247.jpg 1148 | COCO_val2014_000000413044.jpg 1149 | COCO_val2014_000000413079.jpg 1150 | COCO_val2014_000000413146.jpg 1151 | COCO_val2014_000000413339.jpg 1152 | COCO_val2014_000000413358.jpg 1153 | COCO_val2014_000000414102.jpg 1154 | COCO_val2014_000000414744.jpg 1155 | COCO_val2014_000000416543.jpg 1156 | COCO_val2014_000000416930.jpg 1157 | COCO_val2014_000000416960.jpg 1158 | COCO_val2014_000000417430.jpg 1159 | COCO_val2014_000000417632.jpg 1160 | COCO_val2014_000000418109.jpg 1161 | COCO_val2014_000000418680.jpg 1162 | COCO_val2014_000000418893.jpg 1163 | COCO_val2014_000000419089.jpg 1164 | COCO_val2014_000000419789.jpg 1165 | COCO_val2014_000000421188.jpg 1166 | COCO_val2014_000000421564.jpg 1167 | COCO_val2014_000000422700.jpg 1168 | COCO_val2014_000000422918.jpg 1169 | COCO_val2014_000000424866.jpg 1170 | COCO_val2014_000000425324.jpg 1171 | COCO_val2014_000000426259.jpg 1172 | COCO_val2014_000000426917.jpg 1173 | COCO_val2014_000000427034.jpg 1174 | COCO_val2014_000000427055.jpg 1175 | COCO_val2014_000000427118.jpg 1176 | COCO_val2014_000000427476.jpg 1177 | COCO_val2014_000000427615.jpg 1178 | COCO_val2014_000000430319.jpg 1179 | COCO_val2014_000000431023.jpg 1180 | COCO_val2014_000000431364.jpg 1181 | COCO_val2014_000000432234.jpg 1182 | COCO_val2014_000000432519.jpg 1183 | COCO_val2014_000000432906.jpg 1184 | COCO_val2014_000000433499.jpg 1185 | COCO_val2014_000000433547.jpg 1186 | COCO_val2014_000000433574.jpg 1187 | COCO_val2014_000000434548.jpg 1188 | COCO_val2014_000000435391.jpg 1189 | COCO_val2014_000000435922.jpg 1190 | COCO_val2014_000000437240.jpg 1191 | COCO_val2014_000000437325.jpg 1192 | COCO_val2014_000000437494.jpg 1193 | COCO_val2014_000000437540.jpg 1194 | COCO_val2014_000000438364.jpg 1195 | COCO_val2014_000000440060.jpg 1196 | COCO_val2014_000000440779.jpg 1197 | COCO_val2014_000000441156.jpg 1198 | COCO_val2014_000000441196.jpg 1199 | COCO_val2014_000000441240.jpg 1200 | COCO_val2014_000000441511.jpg 1201 | COCO_val2014_000000441862.jpg 1202 | COCO_val2014_000000441874.jpg 1203 | COCO_val2014_000000443167.jpg 1204 | COCO_val2014_000000444913.jpg 1205 | COCO_val2014_000000445309.jpg 1206 | COCO_val2014_000000445643.jpg 1207 | COCO_val2014_000000446626.jpg 1208 | COCO_val2014_000000447187.jpg 1209 | COCO_val2014_000000447602.jpg 1210 | COCO_val2014_000000448897.jpg 1211 | COCO_val2014_000000449888.jpg 1212 | COCO_val2014_000000450724.jpg 1213 | COCO_val2014_000000450790.jpg 1214 | COCO_val2014_000000451123.jpg 1215 | COCO_val2014_000000451144.jpg 1216 | COCO_val2014_000000451345.jpg 1217 | COCO_val2014_000000451559.jpg 1218 | COCO_val2014_000000452966.jpg 1219 | COCO_val2014_000000453001.jpg 1220 | COCO_val2014_000000453334.jpg 1221 | COCO_val2014_000000453400.jpg 1222 | COCO_val2014_000000454252.jpg 1223 | COCO_val2014_000000455384.jpg 1224 | COCO_val2014_000000456254.jpg 1225 | COCO_val2014_000000457217.jpg 1226 | COCO_val2014_000000458119.jpg 1227 | COCO_val2014_000000458485.jpg 1228 | COCO_val2014_000000458755.jpg 1229 | COCO_val2014_000000459598.jpg 1230 | COCO_val2014_000000460333.jpg 1231 | COCO_val2014_000000460646.jpg 1232 | COCO_val2014_000000460982.jpg 1233 | COCO_val2014_000000461410.jpg 1234 | COCO_val2014_000000461775.jpg 1235 | COCO_val2014_000000461963.jpg 1236 | COCO_val2014_000000462424.jpg 1237 | COCO_val2014_000000463633.jpg 1238 | COCO_val2014_000000463703.jpg 1239 | COCO_val2014_000000463836.jpg 1240 | COCO_val2014_000000464089.jpg 1241 | COCO_val2014_000000465129.jpg 1242 | COCO_val2014_000000465735.jpg 1243 | COCO_val2014_000000466211.jpg 1244 | COCO_val2014_000000467791.jpg 1245 | COCO_val2014_000000468917.jpg 1246 | COCO_val2014_000000468965.jpg 1247 | COCO_val2014_000000469982.jpg 1248 | COCO_val2014_000000470095.jpg 1249 | COCO_val2014_000000472530.jpg 1250 | COCO_val2014_000000474078.jpg 1251 | COCO_val2014_000000474119.jpg 1252 | COCO_val2014_000000474751.jpg 1253 | COCO_val2014_000000476810.jpg 1254 | COCO_val2014_000000476856.jpg 1255 | COCO_val2014_000000477112.jpg 1256 | COCO_val2014_000000478906.jpg 1257 | COCO_val2014_000000479948.jpg 1258 | COCO_val2014_000000481010.jpg 1259 | COCO_val2014_000000481285.jpg 1260 | COCO_val2014_000000482275.jpg 1261 | COCO_val2014_000000482319.jpg 1262 | COCO_val2014_000000482585.jpg 1263 | COCO_val2014_000000482589.jpg 1264 | COCO_val2014_000000485139.jpg 1265 | COCO_val2014_000000485201.jpg 1266 | COCO_val2014_000000485485.jpg 1267 | COCO_val2014_000000485489.jpg 1268 | COCO_val2014_000000485673.jpg 1269 | COCO_val2014_000000485822.jpg 1270 | COCO_val2014_000000486369.jpg 1271 | COCO_val2014_000000487002.jpg 1272 | COCO_val2014_000000487282.jpg 1273 | COCO_val2014_000000487375.jpg 1274 | COCO_val2014_000000487498.jpg 1275 | COCO_val2014_000000487925.jpg 1276 | COCO_val2014_000000488346.jpg 1277 | COCO_val2014_000000488463.jpg 1278 | COCO_val2014_000000488942.jpg 1279 | COCO_val2014_000000488977.jpg 1280 | COCO_val2014_000000489014.jpg 1281 | COCO_val2014_000000491213.jpg 1282 | COCO_val2014_000000491784.jpg 1283 | COCO_val2014_000000492084.jpg 1284 | COCO_val2014_000000492284.jpg 1285 | COCO_val2014_000000492886.jpg 1286 | COCO_val2014_000000493004.jpg 1287 | COCO_val2014_000000493102.jpg 1288 | COCO_val2014_000000493196.jpg 1289 | COCO_val2014_000000493218.jpg 1290 | COCO_val2014_000000493295.jpg 1291 | COCO_val2014_000000493724.jpg 1292 | COCO_val2014_000000494144.jpg 1293 | COCO_val2014_000000494345.jpg 1294 | COCO_val2014_000000494456.jpg 1295 | COCO_val2014_000000494957.jpg 1296 | COCO_val2014_000000496081.jpg 1297 | COCO_val2014_000000496252.jpg 1298 | COCO_val2014_000000496379.jpg 1299 | COCO_val2014_000000497002.jpg 1300 | COCO_val2014_000000497049.jpg 1301 | COCO_val2014_000000497365.jpg 1302 | COCO_val2014_000000497801.jpg 1303 | COCO_val2014_000000499266.jpg 1304 | COCO_val2014_000000503101.jpg 1305 | COCO_val2014_000000504621.jpg 1306 | COCO_val2014_000000505845.jpg 1307 | COCO_val2014_000000505899.jpg 1308 | COCO_val2014_000000506656.jpg 1309 | COCO_val2014_000000506933.jpg 1310 | COCO_val2014_000000507330.jpg 1311 | COCO_val2014_000000508101.jpg 1312 | COCO_val2014_000000508440.jpg 1313 | COCO_val2014_000000508949.jpg 1314 | COCO_val2014_000000510358.jpg 1315 | COCO_val2014_000000510798.jpg 1316 | COCO_val2014_000000511674.jpg 1317 | COCO_val2014_000000512116.jpg 1318 | COCO_val2014_000000512442.jpg 1319 | COCO_val2014_000000513136.jpg 1320 | COCO_val2014_000000514049.jpg 1321 | COCO_val2014_000000514083.jpg 1322 | COCO_val2014_000000514376.jpg 1323 | COCO_val2014_000000515226.jpg 1324 | COCO_val2014_000000515247.jpg 1325 | COCO_val2014_000000515777.jpg 1326 | COCO_val2014_000000516677.jpg 1327 | COCO_val2014_000000516786.jpg 1328 | COCO_val2014_000000516856.jpg 1329 | COCO_val2014_000000517306.jpg 1330 | COCO_val2014_000000518383.jpg 1331 | COCO_val2014_000000518721.jpg 1332 | COCO_val2014_000000520531.jpg 1333 | COCO_val2014_000000520832.jpg 1334 | COCO_val2014_000000520982.jpg 1335 | COCO_val2014_000000521008.jpg 1336 | COCO_val2014_000000521819.jpg 1337 | COCO_val2014_000000522380.jpg 1338 | COCO_val2014_000000522935.jpg 1339 | COCO_val2014_000000523527.jpg 1340 | COCO_val2014_000000523871.jpg 1341 | COCO_val2014_000000524187.jpg 1342 | COCO_val2014_000000525272.jpg 1343 | COCO_val2014_000000525373.jpg 1344 | COCO_val2014_000000525587.jpg 1345 | COCO_val2014_000000526186.jpg 1346 | COCO_val2014_000000527733.jpg 1347 | COCO_val2014_000000527961.jpg 1348 | COCO_val2014_000000527995.jpg 1349 | COCO_val2014_000000528098.jpg 1350 | COCO_val2014_000000528201.jpg 1351 | COCO_val2014_000000528729.jpg 1352 | COCO_val2014_000000529355.jpg 1353 | COCO_val2014_000000529838.jpg 1354 | COCO_val2014_000000529944.jpg 1355 | COCO_val2014_000000531286.jpg 1356 | COCO_val2014_000000531510.jpg 1357 | COCO_val2014_000000531532.jpg 1358 | COCO_val2014_000000531967.jpg 1359 | COCO_val2014_000000532003.jpg 1360 | COCO_val2014_000000532058.jpg 1361 | COCO_val2014_000000532142.jpg 1362 | COCO_val2014_000000533408.jpg 1363 | COCO_val2014_000000534669.jpg 1364 | COCO_val2014_000000535080.jpg 1365 | COCO_val2014_000000535579.jpg 1366 | COCO_val2014_000000535889.jpg 1367 | COCO_val2014_000000536321.jpg 1368 | COCO_val2014_000000537991.jpg 1369 | COCO_val2014_000000538054.jpg 1370 | COCO_val2014_000000539683.jpg 1371 | COCO_val2014_000000539808.jpg 1372 | COCO_val2014_000000540697.jpg 1373 | COCO_val2014_000000541613.jpg 1374 | COCO_val2014_000000542079.jpg 1375 | COCO_val2014_000000542205.jpg 1376 | COCO_val2014_000000543231.jpg 1377 | COCO_val2014_000000543347.jpg 1378 | COCO_val2014_000000543570.jpg 1379 | COCO_val2014_000000544876.jpg 1380 | COCO_val2014_000000545002.jpg 1381 | COCO_val2014_000000546029.jpg 1382 | COCO_val2014_000000546052.jpg 1383 | COCO_val2014_000000546792.jpg 1384 | COCO_val2014_000000546896.jpg 1385 | COCO_val2014_000000546928.jpg 1386 | COCO_val2014_000000548164.jpg 1387 | COCO_val2014_000000548336.jpg 1388 | COCO_val2014_000000549427.jpg 1389 | COCO_val2014_000000550405.jpg 1390 | COCO_val2014_000000551334.jpg 1391 | COCO_val2014_000000551336.jpg 1392 | COCO_val2014_000000551794.jpg 1393 | COCO_val2014_000000551921.jpg 1394 | COCO_val2014_000000551974.jpg 1395 | COCO_val2014_000000552945.jpg 1396 | COCO_val2014_000000553561.jpg 1397 | COCO_val2014_000000555131.jpg 1398 | COCO_val2014_000000555898.jpg 1399 | COCO_val2014_000000557117.jpg 1400 | COCO_val2014_000000557150.jpg 1401 | COCO_val2014_000000557163.jpg 1402 | COCO_val2014_000000557324.jpg 1403 | COCO_val2014_000000557517.jpg 1404 | COCO_val2014_000000557780.jpg 1405 | COCO_val2014_000000557926.jpg 1406 | COCO_val2014_000000558171.jpg 1407 | COCO_val2014_000000558457.jpg 1408 | COCO_val2014_000000558498.jpg 1409 | COCO_val2014_000000558539.jpg 1410 | COCO_val2014_000000558587.jpg 1411 | COCO_val2014_000000559277.jpg 1412 | COCO_val2014_000000561004.jpg 1413 | COCO_val2014_000000561116.jpg 1414 | COCO_val2014_000000561128.jpg 1415 | COCO_val2014_000000561393.jpg 1416 | COCO_val2014_000000561745.jpg 1417 | COCO_val2014_000000561780.jpg 1418 | COCO_val2014_000000562289.jpg 1419 | COCO_val2014_000000562803.jpg 1420 | COCO_val2014_000000562856.jpg 1421 | COCO_val2014_000000562906.jpg 1422 | COCO_val2014_000000563615.jpg 1423 | COCO_val2014_000000563909.jpg 1424 | COCO_val2014_000000564166.jpg 1425 | COCO_val2014_000000564489.jpg 1426 | COCO_val2014_000000564743.jpg 1427 | COCO_val2014_000000564936.jpg 1428 | COCO_val2014_000000565085.jpg 1429 | COCO_val2014_000000565575.jpg 1430 | COCO_val2014_000000566364.jpg 1431 | COCO_val2014_000000566672.jpg 1432 | COCO_val2014_000000566941.jpg 1433 | COCO_val2014_000000567494.jpg 1434 | COCO_val2014_000000567740.jpg 1435 | COCO_val2014_000000569543.jpg 1436 | COCO_val2014_000000570773.jpg 1437 | COCO_val2014_000000571598.jpg 1438 | COCO_val2014_000000572453.jpg 1439 | COCO_val2014_000000572790.jpg 1440 | COCO_val2014_000000572825.jpg 1441 | COCO_val2014_000000572900.jpg 1442 | COCO_val2014_000000573258.jpg 1443 | COCO_val2014_000000573753.jpg 1444 | COCO_val2014_000000573935.jpg 1445 | COCO_val2014_000000574108.jpg 1446 | COCO_val2014_000000574315.jpg 1447 | COCO_val2014_000000574350.jpg 1448 | COCO_val2014_000000575135.jpg 1449 | COCO_val2014_000000575719.jpg 1450 | COCO_val2014_000000575915.jpg 1451 | COCO_val2014_000000577077.jpg 1452 | COCO_val2014_000000577783.jpg 1453 | COCO_val2014_000000578210.jpg 1454 | COCO_val2014_000000578324.jpg 1455 | COCO_val2014_000000578391.jpg 1456 | COCO_val2014_000000578813.jpg 1457 | COCO_val2014_000000579815.jpg 1458 | COCO_val2014_000000579822.jpg 1459 | COCO_val2014_000000581451.jpg 1460 | -------------------------------------------------------------------------------- /capsal/visualize.py: -------------------------------------------------------------------------------- 1 | """ 2 | Mask R-CNN 3 | Display and Visualization Functions. 4 | 5 | Copyright (c) 2017 Matterport, Inc. 6 | Licensed under the MIT License (see LICENSE for details) 7 | Written by Waleed Abdulla 8 | """ 9 | 10 | import os 11 | import sys 12 | import logging 13 | import random 14 | import itertools 15 | import colorsys 16 | 17 | import numpy as np 18 | from skimage.measure import find_contours 19 | import matplotlib.pyplot as plt 20 | from matplotlib import patches, lines 21 | from matplotlib.patches import Polygon 22 | import IPython.display 23 | 24 | # Root directory of the project 25 | ROOT_DIR = os.path.abspath("../") 26 | 27 | # Import Mask RCNN 28 | sys.path.append(ROOT_DIR) # To find local version of the library 29 | from mrcnn import utils 30 | 31 | 32 | ############################################################ 33 | # Visualization 34 | ############################################################ 35 | 36 | def display_images(images, titles=None, cols=4, cmap=None, norm=None, 37 | interpolation=None): 38 | """Display the given set of images, optionally with titles. 39 | images: list or array of image tensors in HWC format. 40 | titles: optional. A list of titles to display with each image. 41 | cols: number of images per row 42 | cmap: Optional. Color map to use. For example, "Blues". 43 | norm: Optional. A Normalize instance to map values to colors. 44 | interpolation: Optional. Image interporlation to use for display. 45 | """ 46 | titles = titles if titles is not None else [""] * len(images) 47 | rows = len(images) // cols + 1 48 | plt.figure(figsize=(14, 14 * rows // cols)) 49 | i = 1 50 | for image, title in zip(images, titles): 51 | plt.subplot(rows, cols, i) 52 | plt.title(title, fontsize=9) 53 | plt.axis('off') 54 | plt.imshow(image.astype(np.uint8), cmap=cmap, 55 | norm=norm, interpolation=interpolation) 56 | i += 1 57 | plt.show() 58 | 59 | 60 | def random_colors(N, bright=True): 61 | """ 62 | Generate random colors. 63 | To get visually distinct colors, generate them in HSV space then 64 | convert to RGB. 65 | """ 66 | brightness = 1.0 if bright else 0.7 67 | hsv = [(i / N, 1, brightness) for i in range(N)] 68 | colors = list(map(lambda c: colorsys.hsv_to_rgb(*c), hsv)) 69 | random.shuffle(colors) 70 | return colors 71 | 72 | 73 | def apply_mask(image, mask, color, alpha=0.5): 74 | """Apply the given mask to the image. 75 | """ 76 | for c in range(3): 77 | image[:, :, c] = np.where(mask == 1, 78 | image[:, :, c] * 79 | (1 - alpha) + alpha * color[c] * 255, 80 | image[:, :, c]) 81 | return image 82 | 83 | 84 | def display_instances(image, boxes, masks, class_ids, class_names, 85 | scores=None, title="", 86 | figsize=(16, 16), ax=None, 87 | show_mask=True, show_bbox=True, 88 | colors=None, captions=None): 89 | """ 90 | boxes: [num_instance, (y1, x1, y2, x2, class_id)] in image coordinates. 91 | masks: [height, width, num_instances] 92 | class_ids: [num_instances] 93 | class_names: list of class names of the dataset 94 | scores: (optional) confidence scores for each box 95 | title: (optional) Figure title 96 | show_mask, show_bbox: To show masks and bounding boxes or not 97 | figsize: (optional) the size of the image 98 | colors: (optional) An array or colors to use with each object 99 | captions: (optional) A list of strings to use as captions for each object 100 | """ 101 | # Number of instances 102 | N = boxes.shape[0] 103 | if not N: 104 | print("\n*** No instances to display *** \n") 105 | else: 106 | assert boxes.shape[0] == masks.shape[-1] == class_ids.shape[0] 107 | 108 | # If no axis is passed, create one and automatically call show() 109 | auto_show = False 110 | if not ax: 111 | _, ax = plt.subplots(1, figsize=figsize) 112 | auto_show = True 113 | 114 | # Generate random colors 115 | colors = colors or random_colors(N) 116 | 117 | # Show area outside image boundaries. 118 | height, width = image.shape[:2] 119 | ax.set_ylim(height + 10, -10) 120 | ax.set_xlim(-10, width + 10) 121 | ax.axis('off') 122 | ax.set_title(title) 123 | 124 | masked_image = image.astype(np.uint32).copy() 125 | for i in range(N): 126 | color = colors[i] 127 | 128 | # Bounding box 129 | if not np.any(boxes[i]): 130 | # Skip this instance. Has no bbox. Likely lost in image cropping. 131 | continue 132 | y1, x1, y2, x2 = boxes[i] 133 | if show_bbox: 134 | p = patches.Rectangle((x1, y1), x2 - x1, y2 - y1, linewidth=2, 135 | alpha=0.7, linestyle="dashed", 136 | edgecolor=color, facecolor='none') 137 | ax.add_patch(p) 138 | 139 | # Label 140 | # if not captions: 141 | # class_id = class_ids[i] 142 | # score = scores[i] if scores is not None else None 143 | # label = class_names[class_id] 144 | # x = random.randint(x1, (x1 + x2) // 2) 145 | # caption = "{} {:.3f}".format(label, score) if score else label 146 | # else: 147 | # caption = captions[i] 148 | # ax.text(x1, y1 + 8, caption, 149 | # color='w', size=11, backgroundcolor="none") 150 | 151 | # Mask 152 | mask = masks[:, :, i] 153 | if show_mask: 154 | masked_image = apply_mask(masked_image, mask, color) 155 | 156 | # Mask Polygon 157 | # Pad to ensure proper polygons for masks that touch image edges. 158 | padded_mask = np.zeros( 159 | (mask.shape[0] + 2, mask.shape[1] + 2), dtype=np.uint8) 160 | padded_mask[1:-1, 1:-1] = mask 161 | contours = find_contours(padded_mask, 0.5) 162 | for verts in contours: 163 | # Subtract the padding and flip (y, x) to (x, y) 164 | verts = np.fliplr(verts) - 1 165 | p = Polygon(verts, facecolor="none", edgecolor=color) 166 | ax.add_patch(p) 167 | ax.imshow(masked_image.astype(np.uint8)) 168 | # if auto_show: 169 | # plt.show() 170 | 171 | 172 | def display_differences(image, 173 | gt_box, gt_class_id, gt_mask, 174 | pred_box, pred_class_id, pred_score, pred_mask, 175 | class_names, title="", ax=None, 176 | show_mask=True, show_box=True, 177 | iou_threshold=0.5, score_threshold=0.5): 178 | """Display ground truth and prediction instances on the same image.""" 179 | # Match predictions to ground truth 180 | gt_match, pred_match, overlaps = utils.compute_matches( 181 | gt_box, gt_class_id, gt_mask, 182 | pred_box, pred_class_id, pred_score, pred_mask, 183 | iou_threshold=iou_threshold, score_threshold=score_threshold) 184 | # Ground truth = green. Predictions = red 185 | colors = [(0, 1, 0, .8)] * len(gt_match)\ 186 | + [(1, 0, 0, 1)] * len(pred_match) 187 | # Concatenate GT and predictions 188 | class_ids = np.concatenate([gt_class_id, pred_class_id]) 189 | scores = np.concatenate([np.zeros([len(gt_match)]), pred_score]) 190 | boxes = np.concatenate([gt_box, pred_box]) 191 | masks = np.concatenate([gt_mask, pred_mask], axis=-1) 192 | # Captions per instance show score/IoU 193 | captions = ["" for m in gt_match] + ["{:.2f} / {:.2f}".format( 194 | pred_score[i], 195 | (overlaps[i, int(pred_match[i])] 196 | if pred_match[i] > -1 else overlaps[i].max())) 197 | for i in range(len(pred_match))] 198 | # Set title if not provided 199 | title = title or "Ground Truth and Detections\n GT=green, pred=red, captions: score/IoU" 200 | # Display 201 | display_instances( 202 | image, 203 | boxes, masks, class_ids, 204 | class_names, scores, ax=ax, 205 | show_bbox=show_box, show_mask=show_mask, 206 | colors=colors, captions=captions, 207 | title=title) 208 | 209 | 210 | def draw_rois(image, rois, refined_rois, mask, class_ids, class_names, limit=10): 211 | """ 212 | anchors: [n, (y1, x1, y2, x2)] list of anchors in image coordinates. 213 | proposals: [n, 4] the same anchors but refined to fit objects better. 214 | """ 215 | masked_image = image.copy() 216 | 217 | # Pick random anchors in case there are too many. 218 | ids = np.arange(rois.shape[0], dtype=np.int32) 219 | ids = np.random.choice( 220 | ids, limit, replace=False) if ids.shape[0] > limit else ids 221 | 222 | fig, ax = plt.subplots(1, figsize=(12, 12)) 223 | if rois.shape[0] > limit: 224 | plt.title("Showing {} random ROIs out of {}".format( 225 | len(ids), rois.shape[0])) 226 | else: 227 | plt.title("{} ROIs".format(len(ids))) 228 | 229 | # Show area outside image boundaries. 230 | ax.set_ylim(image.shape[0] + 20, -20) 231 | ax.set_xlim(-50, image.shape[1] + 20) 232 | ax.axis('off') 233 | 234 | for i, id in enumerate(ids): 235 | color = np.random.rand(3) 236 | class_id = class_ids[id] 237 | # ROI 238 | y1, x1, y2, x2 = rois[id] 239 | p = patches.Rectangle((x1, y1), x2 - x1, y2 - y1, linewidth=2, 240 | edgecolor=color if class_id else "gray", 241 | facecolor='none', linestyle="dashed") 242 | ax.add_patch(p) 243 | # Refined ROI 244 | if class_id: 245 | ry1, rx1, ry2, rx2 = refined_rois[id] 246 | p = patches.Rectangle((rx1, ry1), rx2 - rx1, ry2 - ry1, linewidth=2, 247 | edgecolor=color, facecolor='none') 248 | ax.add_patch(p) 249 | # Connect the top-left corners of the anchor and proposal for easy visualization 250 | ax.add_line(lines.Line2D([x1, rx1], [y1, ry1], color=color)) 251 | 252 | # Label 253 | label = class_names[class_id] 254 | ax.text(rx1, ry1 + 8, "{}".format(label), 255 | color='w', size=11, backgroundcolor="none") 256 | 257 | # Mask 258 | m = utils.unmold_mask(mask[id], rois[id] 259 | [:4].astype(np.int32), image.shape) 260 | masked_image = apply_mask(masked_image, m, color) 261 | 262 | ax.imshow(masked_image) 263 | 264 | # Print stats 265 | print("Positive ROIs: ", class_ids[class_ids > 0].shape[0]) 266 | print("Negative ROIs: ", class_ids[class_ids == 0].shape[0]) 267 | print("Positive Ratio: {:.2f}".format( 268 | class_ids[class_ids > 0].shape[0] / class_ids.shape[0])) 269 | 270 | 271 | # TODO: Replace with matplotlib equivalent? 272 | def draw_box(image, box, color): 273 | """Draw 3-pixel width bounding boxes on the given image array. 274 | color: list of 3 int values for RGB. 275 | """ 276 | y1, x1, y2, x2 = box 277 | image[y1:y1 + 2, x1:x2] = color 278 | image[y2:y2 + 2, x1:x2] = color 279 | image[y1:y2, x1:x1 + 2] = color 280 | image[y1:y2, x2:x2 + 2] = color 281 | return image 282 | 283 | 284 | def display_top_masks(image, mask, class_ids, class_names, limit=4): 285 | """Display the given image and the top few class masks.""" 286 | to_display = [] 287 | titles = [] 288 | to_display.append(image) 289 | titles.append("H x W={}x{}".format(image.shape[0], image.shape[1])) 290 | # Pick top prominent classes in this image 291 | unique_class_ids = np.unique(class_ids) 292 | mask_area = [np.sum(mask[:, :, np.where(class_ids == i)[0]]) 293 | for i in unique_class_ids] 294 | top_ids = [v[0] for v in sorted(zip(unique_class_ids, mask_area), 295 | key=lambda r: r[1], reverse=True) if v[1] > 0] 296 | # Generate images and titles 297 | for i in range(limit): 298 | class_id = top_ids[i] if i < len(top_ids) else -1 299 | # Pull masks of instances belonging to the same class. 300 | m = mask[:, :, np.where(class_ids == class_id)[0]] 301 | m = np.sum(m * np.arange(1, m.shape[-1] + 1), -1) 302 | to_display.append(m) 303 | titles.append(class_names[class_id] if class_id != -1 else "-") 304 | display_images(to_display, titles=titles, cols=limit + 1, cmap="Blues_r") 305 | 306 | 307 | def plot_precision_recall(AP, precisions, recalls): 308 | """Draw the precision-recall curve. 309 | 310 | AP: Average precision at IoU >= 0.5 311 | precisions: list of precision values 312 | recalls: list of recall values 313 | """ 314 | # Plot the Precision-Recall curve 315 | _, ax = plt.subplots(1) 316 | ax.set_title("Precision-Recall Curve. AP@50 = {:.3f}".format(AP)) 317 | ax.set_ylim(0, 1.1) 318 | ax.set_xlim(0, 1.1) 319 | _ = ax.plot(recalls, precisions) 320 | 321 | 322 | def plot_overlaps(gt_class_ids, pred_class_ids, pred_scores, 323 | overlaps, class_names, threshold=0.5): 324 | """Draw a grid showing how ground truth objects are classified. 325 | gt_class_ids: [N] int. Ground truth class IDs 326 | pred_class_id: [N] int. Predicted class IDs 327 | pred_scores: [N] float. The probability scores of predicted classes 328 | overlaps: [pred_boxes, gt_boxes] IoU overlaps of predictins and GT boxes. 329 | class_names: list of all class names in the dataset 330 | threshold: Float. The prediction probability required to predict a class 331 | """ 332 | gt_class_ids = gt_class_ids[gt_class_ids != 0] 333 | pred_class_ids = pred_class_ids[pred_class_ids != 0] 334 | 335 | plt.figure(figsize=(12, 10)) 336 | plt.imshow(overlaps, interpolation='nearest', cmap=plt.cm.Blues) 337 | plt.yticks(np.arange(len(pred_class_ids)), 338 | ["{} ({:.2f})".format(class_names[int(id)], pred_scores[i]) 339 | for i, id in enumerate(pred_class_ids)]) 340 | plt.xticks(np.arange(len(gt_class_ids)), 341 | [class_names[int(id)] for id in gt_class_ids], rotation=90) 342 | 343 | thresh = overlaps.max() / 2. 344 | for i, j in itertools.product(range(overlaps.shape[0]), 345 | range(overlaps.shape[1])): 346 | text = "" 347 | if overlaps[i, j] > threshold: 348 | text = "match" if gt_class_ids[j] == pred_class_ids[i] else "wrong" 349 | color = ("white" if overlaps[i, j] > thresh 350 | else "black" if overlaps[i, j] > 0 351 | else "grey") 352 | plt.text(j, i, "{:.3f}\n{}".format(overlaps[i, j], text), 353 | horizontalalignment="center", verticalalignment="center", 354 | fontsize=9, color=color) 355 | 356 | plt.tight_layout() 357 | plt.xlabel("Ground Truth") 358 | plt.ylabel("Predictions") 359 | 360 | 361 | def draw_boxes(image, boxes=None, refined_boxes=None, 362 | masks=None, captions=None, visibilities=None, 363 | title="", ax=None): 364 | """Draw bounding boxes and segmentation masks with differnt 365 | customizations. 366 | 367 | boxes: [N, (y1, x1, y2, x2, class_id)] in image coordinates. 368 | refined_boxes: Like boxes, but draw with solid lines to show 369 | that they're the result of refining 'boxes'. 370 | masks: [N, height, width] 371 | captions: List of N titles to display on each box 372 | visibilities: (optional) List of values of 0, 1, or 2. Determine how 373 | prominant each bounding box should be. 374 | title: An optional title to show over the image 375 | ax: (optional) Matplotlib axis to draw on. 376 | """ 377 | # Number of boxes 378 | assert boxes is not None or refined_boxes is not None 379 | N = boxes.shape[0] if boxes is not None else refined_boxes.shape[0] 380 | 381 | # Matplotlib Axis 382 | if not ax: 383 | _, ax = plt.subplots(1, figsize=(12, 12)) 384 | 385 | # Generate random colors 386 | colors = random_colors(N) 387 | 388 | # Show area outside image boundaries. 389 | margin = image.shape[0] // 10 390 | ax.set_ylim(image.shape[0] + margin, -margin) 391 | ax.set_xlim(-margin, image.shape[1] + margin) 392 | ax.axis('off') 393 | 394 | ax.set_title(title) 395 | 396 | masked_image = image.astype(np.uint32).copy() 397 | for i in range(N): 398 | # Box visibility 399 | visibility = visibilities[i] if visibilities is not None else 1 400 | if visibility == 0: 401 | color = "gray" 402 | style = "dotted" 403 | alpha = 0.5 404 | elif visibility == 1: 405 | color = colors[i] 406 | style = "dotted" 407 | alpha = 1 408 | elif visibility == 2: 409 | color = colors[i] 410 | style = "solid" 411 | alpha = 1 412 | 413 | # Boxes 414 | if boxes is not None: 415 | if not np.any(boxes[i]): 416 | # Skip this instance. Has no bbox. Likely lost in cropping. 417 | continue 418 | y1, x1, y2, x2 = boxes[i] 419 | p = patches.Rectangle((x1, y1), x2 - x1, y2 - y1, linewidth=2, 420 | alpha=alpha, linestyle=style, 421 | edgecolor=color, facecolor='none') 422 | ax.add_patch(p) 423 | 424 | # Refined boxes 425 | if refined_boxes is not None and visibility > 0: 426 | ry1, rx1, ry2, rx2 = refined_boxes[i].astype(np.int32) 427 | p = patches.Rectangle((rx1, ry1), rx2 - rx1, ry2 - ry1, linewidth=2, 428 | edgecolor=color, facecolor='none') 429 | ax.add_patch(p) 430 | # Connect the top-left corners of the anchor and proposal 431 | if boxes is not None: 432 | ax.add_line(lines.Line2D([x1, rx1], [y1, ry1], color=color)) 433 | 434 | # Captions 435 | if captions is not None: 436 | caption = captions[i] 437 | # If there are refined boxes, display captions on them 438 | if refined_boxes is not None: 439 | y1, x1, y2, x2 = ry1, rx1, ry2, rx2 440 | x = random.randint(x1, (x1 + x2) // 2) 441 | ax.text(x1, y1, caption, size=11, verticalalignment='top', 442 | color='w', backgroundcolor="none", 443 | bbox={'facecolor': color, 'alpha': 0.5, 444 | 'pad': 2, 'edgecolor': 'none'}) 445 | 446 | # Masks 447 | if masks is not None: 448 | mask = masks[:, :, i] 449 | masked_image = apply_mask(masked_image, mask, color) 450 | # Mask Polygon 451 | # Pad to ensure proper polygons for masks that touch image edges. 452 | padded_mask = np.zeros( 453 | (mask.shape[0] + 2, mask.shape[1] + 2), dtype=np.uint8) 454 | padded_mask[1:-1, 1:-1] = mask 455 | contours = find_contours(padded_mask, 0.5) 456 | for verts in contours: 457 | # Subtract the padding and flip (y, x) to (x, y) 458 | verts = np.fliplr(verts) - 1 459 | p = Polygon(verts, facecolor="none", edgecolor=color) 460 | ax.add_patch(p) 461 | ax.imshow(masked_image.astype(np.uint8)) 462 | 463 | 464 | def display_table(table): 465 | """Display values in a table format. 466 | table: an iterable of rows, and each row is an iterable of values. 467 | """ 468 | html = "" 469 | for row in table: 470 | row_html = "" 471 | for col in row: 472 | row_html += "{:40}".format(str(col)) 473 | html += "" + row_html + "" 474 | html = "" + html + "
" 475 | IPython.display.display(IPython.display.HTML(html)) 476 | 477 | 478 | def display_weight_stats(model): 479 | """Scans all the weights in the model and returns a list of tuples 480 | that contain stats about each weight. 481 | """ 482 | layers = model.get_trainable_layers() 483 | table = [["WEIGHT NAME", "SHAPE", "MIN", "MAX", "STD"]] 484 | for l in layers: 485 | weight_values = l.get_weights() # list of Numpy arrays 486 | weight_tensors = l.weights # list of TF tensors 487 | for i, w in enumerate(weight_values): 488 | weight_name = weight_tensors[i].name 489 | # Detect problematic layers. Exclude biases of conv layers. 490 | alert = "" 491 | if w.min() == w.max() and not (l.__class__.__name__ == "Conv2D" and i == 1): 492 | alert += "*** dead?" 493 | if np.abs(w.min()) > 1000 or np.abs(w.max()) > 1000: 494 | alert += "*** Overflow?" 495 | # Add row 496 | table.append([ 497 | weight_name + alert, 498 | str(w.shape), 499 | "{:+9.4f}".format(w.min()), 500 | "{:+10.4f}".format(w.max()), 501 | "{:+9.4f}".format(w.std()), 502 | ]) 503 | display_table(table) 504 | -------------------------------------------------------------------------------- /capsal/vocabulary.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | import pandas as pd 4 | from tqdm import tqdm 5 | import string 6 | from nltk.tokenize import word_tokenize 7 | class Vocabulary(object): 8 | def __init__(self, size, save_file=None): 9 | self.words = [] 10 | self.word2idx = {} 11 | self.word_frequencies = [] 12 | self.size = size 13 | if save_file is not None: 14 | self.load(save_file) 15 | 16 | def build(self, sentences): 17 | """ Build the vocabulary and compute the frequency of each word. """ 18 | word_counts = {} 19 | for sentence in tqdm(sentences): 20 | for w in word_tokenize(sentence.lower()): 21 | word_counts[w] = word_counts.get(w, 0) + 1.0 22 | 23 | assert self.size-1 <= len(word_counts.keys()) 24 | self.words.append('') 25 | self.word2idx[''] = 0 26 | self.word_frequencies.append(1.0) 27 | 28 | word_counts = sorted(list(word_counts.items()), 29 | key=lambda x: x[1], 30 | reverse=True) 31 | 32 | for idx in range(self.size-1): 33 | word, frequency = word_counts[idx] 34 | self.words.append(word) 35 | self.word2idx[word] = idx + 1 36 | self.word_frequencies.append(frequency) 37 | 38 | self.word_frequencies = np.array(self.word_frequencies) 39 | self.word_frequencies /= np.sum(self.word_frequencies) 40 | self.word_frequencies = np.log(self.word_frequencies) 41 | self.word_frequencies -= np.max(self.word_frequencies) 42 | 43 | def process_sentence(self, sentence): 44 | """ Tokenize a sentence, and translate each token into its index 45 | in the vocabulary. """ 46 | words = word_tokenize(sentence.lower()) 47 | word_idxs = [self.word2idx[w] for w in words] 48 | return word_idxs 49 | 50 | def get_sentence(self, idxs): 51 | """ Translate a vector of indicies into a sentence. """ 52 | words = [self.words[i] for i in idxs] 53 | if words[-1] != '.': 54 | words.append('.') 55 | length = np.argmax(np.array(words)=='.') + 1 56 | words = words[:length] 57 | sentence = "".join([" "+w if not w.startswith("'") \ 58 | and w not in string.punctuation \ 59 | else w for w in words]).strip() 60 | return sentence 61 | 62 | def save(self, save_file): 63 | """ Save the vocabulary to a file. """ 64 | data = pd.DataFrame({'word': self.words, 65 | 'index': list(range(self.size)), 66 | 'frequency': self.word_frequencies}) 67 | data.to_csv(save_file) 68 | 69 | def load(self, save_file): 70 | """ Load the vocabulary from a file. """ 71 | assert os.path.exists(save_file) 72 | data = pd.read_csv(save_file) 73 | self.words = data['word'].values 74 | self.word2idx = {self.words[i]:i for i in range(self.size)} 75 | self.word_frequencies = data['frequency'].values 76 | -------------------------------------------------------------------------------- /data/ReadMe.md: -------------------------------------------------------------------------------- 1 | ## Instruction 2 | Download the COCO-CapSal dataset from [BaiduYun](https://pan.baidu.com/s/1iU8A-RII7rvOG9KHz5Dysg) or [Google](). We provide the images, ground truth, image captions and instance masks. 3 | * `train_img_gt.zip`: images and GT of training set. 4 | * `val_img_gt.zip`: images and GT for validation set. 5 | * `train.zip`: image captions and instance masks for training set. 6 | * `val.zip`: image captions and instance masks for validation set. 7 | 8 | `train.zip` contains `vocabulary.csv` (dictionary of captions) and `train.npy` (captions and instance masks). 9 | `val.zip` contains `caption_gt.json` (gt captions for evaluation) and `val.npy` (captions and instance masks). 10 | `train/val.npy` contains `image_id` (image index), `image_name` (image name), `gt` (GT saliency map), `masks` (instance masks), `b_box` (bounding box), `captionw` (captions). 11 | For training, first run `preprocessing.py` to preprocess the caption data. 12 | -------------------------------------------------------------------------------- /data/preprocessing.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import os 3 | from tqdm import tqdm 4 | import pandas as pd 5 | import json 6 | from mrcnn.vocabulary import Vocabulary 7 | from nltk.tokenize import word_tokenize 8 | data = np.load('train.npy') 9 | vocabulary = Vocabulary(5000,'vocabulary.csv') 10 | caps = [] 11 | train_update=[] 12 | for dd in data: 13 | 14 | word = dd['captionw'] 15 | word_idxs = [] 16 | masks = [] 17 | for w in word: 18 | current_word_idxs_ = vocabulary.process_sentence(w) 19 | current_num_words = len(current_word_idxs_) 20 | current_word_idxs = np.zeros(15,dtype=np.int32) 21 | current_masks = np.zeros(15) 22 | current_word_idxs[:current_num_words] = np.array(current_word_idxs_) 23 | current_masks[:current_num_words] = 1.0 24 | word_idxs.append(current_word_idxs) 25 | masks.append(current_masks) 26 | word_idxs = np.array([word_idxs]) 27 | word_masks = np.array([masks]) 28 | dd['caption']=word_idxs 29 | dd['caption_mask']=word_masks 30 | train_update.append(dd) 31 | np.save('./data/train_update.npy',train_update) 32 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | numpy 2 | scipy 3 | Pillow 4 | cython 5 | matplotlib 6 | scikit-image 7 | tensorflow>=1.3.0 8 | keras>=2.0.8 9 | opencv-python 10 | h5py 11 | imgaug 12 | IPython[all] -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | description-file = README.md 3 | license-file = LICENSE 4 | requirements-file = requirements.txt -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | """ 2 | The build/compilations setup 3 | 4 | >> pip install -r requirements.txt 5 | >> python setup.py install 6 | """ 7 | import pip 8 | import logging 9 | import pkg_resources 10 | try: 11 | from setuptools import setup 12 | except ImportError: 13 | from distutils.core import setup 14 | 15 | 16 | def _parse_requirements(file_path): 17 | pip_ver = pkg_resources.get_distribution('pip').version 18 | pip_version = list(map(int, pip_ver.split('.')[:2])) 19 | if pip_version >= [6, 0]: 20 | raw = pip.req.parse_requirements(file_path, 21 | session=pip.download.PipSession()) 22 | else: 23 | raw = pip.req.parse_requirements(file_path) 24 | return [str(i.req) for i in raw] 25 | 26 | 27 | # parse_requirements() returns generator of pip.req.InstallRequirement objects 28 | try: 29 | install_reqs = _parse_requirements("requirements.txt") 30 | except Exception: 31 | logging.warning('Fail load requirements file, so using default ones.') 32 | install_reqs = [] 33 | 34 | setup( 35 | name='mask-rcnn', 36 | version='2.1', 37 | url='https://github.com/matterport/Mask_RCNN', 38 | author='Matterport', 39 | author_email='waleed.abdulla@gmail.com', 40 | license='MIT', 41 | description='Mask R-CNN for object detection and instance segmentation', 42 | packages=["mrcnn"], 43 | install_requires=install_reqs, 44 | include_package_data=True, 45 | python_requires='>=3.4', 46 | long_description="""This is an implementation of Mask R-CNN on Python 3, Keras, and TensorFlow. 47 | The model generates bounding boxes and segmentation masks for each instance of an object in the image. 48 | It's based on Feature Pyramid Network (FPN) and a ResNet101 backbone.""", 49 | classifiers=[ 50 | "Development Status :: 5 - Production/Stable", 51 | "Environment :: Console", 52 | "Intended Audience :: Developers", 53 | "Intended Audience :: Information Technology", 54 | "Intended Audience :: Education", 55 | "Intended Audience :: Science/Research", 56 | "License :: OSI Approved :: MIT License", 57 | "Natural Language :: English", 58 | "Operating System :: OS Independent", 59 | "Topic :: Scientific/Engineering :: Artificial Intelligence", 60 | "Topic :: Scientific/Engineering :: Image Recognition", 61 | "Topic :: Scientific/Engineering :: Visualization", 62 | "Topic :: Scientific/Engineering :: Image Segmentation", 63 | 'Programming Language :: Python :: 3.4', 64 | 'Programming Language :: Python :: 3.5', 65 | 'Programming Language :: Python :: 3.6', 66 | ], 67 | keywords="image instance segmentation object detection mask rcnn r-cnn tensorflow keras", 68 | ) 69 | -------------------------------------------------------------------------------- /test_capsal.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import random 4 | import math 5 | import re 6 | import time 7 | import numpy as np 8 | import cv2 9 | import matplotlib 10 | import matplotlib.pyplot as plt 11 | import skimage.color 12 | import skimage.io 13 | 14 | from capsal.config import Config 15 | from capsal import utils 16 | from capsal import model_new10_upcap11 as modellib 17 | from capsal.eval_cap import COCOEvalCap 18 | import json 19 | 20 | os.environ["CUDA_VISIBLE_DEVICES"]='1' 21 | from capsal.vocabulary import Vocabulary 22 | import skimage.transform 23 | # import skimage 24 | # Root directory of the project 25 | ROOT_DIR = os.getcwd() 26 | 27 | # Directory to save logs and trained model 28 | DEFAULT_LOGS_DIR = os.path.join(ROOT_DIR, "logs") 29 | # Local path to trained weights file 30 | COCO_MODEL_PATH = os.path.join(ROOT_DIR, "mask_rcnn_coco.h5") 31 | # Download COCO trained weights from Releases if needed 32 | if not os.path.exists(COCO_MODEL_PATH): 33 | utils.download_trained_weights(COCO_MODEL_PATH) 34 | 35 | 36 | class SaliencyConfig(Config): 37 | """Configuration for training on the toy shapes dataset. 38 | Derives from the base Config class and overrides values specific 39 | to the toy shapes dataset. 40 | """ 41 | # Give the configuration a recognizable name 42 | NAME = "saliency" 43 | 44 | # Train on 1 GPU and 8 images per GPU. We can put multiple images on each 45 | # GPU because the images are small. Batch size is 8 (GPUs * images/GPU). 46 | GPU_COUNT = 1 47 | IMAGES_PER_GPU = 1 48 | STEPS_PER_EPOCH = 5265 // IMAGES_PER_GPU#25256 5265 49 | VALIDATION_STEPS = 100 // IMAGES_PER_GPU 50 | TRAIN_ROIS_PER_IMAGE = 200 51 | # Number of classes (including background) 52 | NUM_CLASSES = 1 + 1 # background + 3 shapes 53 | DETECTION_MIN_CONFIDENCE = 0.8 54 | # Use small images for faster training. Set the limits of the small side 55 | # the large side, and that determines the image shape. 56 | 57 | # 58 | # # Use smaller anchors because our image and objects are small 59 | # RPN_ANCHOR_SCALES = (8, 16, 32, 64, 128) # anchor side in pixels 60 | # 61 | # # Reduce training ROIs per image because the images are small and have 62 | # # few objects. Aim to allow ROI sampling to pick 33% positive ROIs. 63 | # TRAIN_ROIS_PER_IMAGE = 32 64 | # 65 | # # Use a small epoch since the data is simple 66 | # STEPS_PER_EPOCH = 100 67 | # 68 | # # use small validation steps since the epoch is small 69 | 70 | class SaliencyDataset(utils.Dataset): 71 | def load_sal(self, subset): 72 | """Load the saliency dataset for train or validation. 73 | dataset_dir: The root directory of the saliency dataset.. 74 | subset: train or val. 75 | """ 76 | # Add classes 77 | self.add_class("saliency", 1, "foreground") 78 | if subset == 'train': 79 | sal_dataset = np.load('./data/train.npy',encoding='latin1') 80 | else: 81 | sal_dataset = np.load('./data/val.npy',encoding='latin1') 82 | self.sal_data = sal_dataset 83 | 84 | for sal_info in sal_dataset: 85 | 86 | image_id = int(sal_info['image_id']) 87 | image_name = sal_info['image_name'] 88 | masks = sal_info['masks'].astype(np.int32) 89 | gt = sal_info['gt'].astype(np.float32) 90 | if subset == 'train': 91 | caption = sal_info['caption'].astype(np.int32) 92 | 93 | caption_mask = sal_info['caption_mask'].astype(np.float32) 94 | # b_box = float(sal_info['b_box']) 95 | if subset == 'train': 96 | dataset_dir = './data/train_img_gt/image/' 97 | self.add_image("saliency", image_id=image_id, path=os.path.join(dataset_dir, image_name), 98 | mask=masks, image_name=image_name, gt=gt, caption=caption, caption_mask=caption_mask) 99 | else: 100 | dataset_dir = './data/val_img_gt/image/' 101 | self.add_image("saliency", image_id=image_id, path=os.path.join(dataset_dir, image_name), 102 | mask=masks, image_name=image_name, gt=gt) 103 | # 104 | def load_mask(self,image_id): 105 | info = self.image_info[image_id] 106 | gt = info['gt'] 107 | # getmask 108 | mask = info['mask'] 109 | return mask, np.ones([mask.shape[-1]],dtype=np.int32) 110 | def load_caption(self,image_id): 111 | info = self.image_info[image_id] 112 | caption = info['caption'] 113 | caption_mask = info['caption_mask'] 114 | # caption = np.zeros((2,15)) 115 | return caption, caption_mask 116 | def image_reference(self,image_id): 117 | #':return the path og the image' 118 | info = self.image_info[image_id] 119 | if info["source"] == "saliency": 120 | return info['id'] 121 | else: 122 | super(self.__class__).image_reference(self, image_id) 123 | def load_img_list(dataset): 124 | 125 | if dataset == 'coco': 126 | path = '/home/zhanglu/Mask_RCNN/val/val' 127 | elif dataset == 'HKU-IS': 128 | path = './dataset/HKU-IS/HKU-IS_Image' 129 | elif dataset == 'PASCAL-S': 130 | path = './dataset/pascal-s/PASCAL_S-Image' 131 | elif dataset == 'DUT': 132 | path = './dataset/DUTS-TR/DUTS/DUT-test/DUT-test-Image' 133 | elif dataset == 'THUS': 134 | path = './dataset/THUR/THUR-Image' 135 | elif dataset == 'SOC': 136 | path = './dataset/SOC6K_Release/' 137 | 138 | imgs = os.listdir(path) 139 | 140 | return path, imgs 141 | def predict2(model): 142 | datasets = ['coco']#'coco','PASCAL-S','SOC','ECSSD','DUT','THUS','HKU-IS' 143 | for dataset in datasets: 144 | print(dataset) 145 | path, imgs = load_img_list(dataset) 146 | 147 | save_dir = './result' 148 | save_dir1 = save_dir + '/result1'+'_'+dataset +'/' 149 | if not os.path.exists(save_dir1): 150 | os.mkdir(save_dir1) 151 | save_dir2 = save_dir + '/result_pixel1'+'_'+dataset +'/' 152 | if not os.path.exists(save_dir2): 153 | os.mkdir(save_dir2) 154 | save_dir3 = save_dir + '/combine1'+'_'+dataset +'/' 155 | if not os.path.exists(save_dir3): 156 | os.mkdir(save_dir3) 157 | save_dir4 = save_dir + '/caption' + '_' + dataset + '/' 158 | if not os.path.exists(save_dir4): 159 | os.mkdir(save_dir4) 160 | idx = 0 161 | 162 | for f_img in imgs: 163 | print(idx) 164 | image_name = f_img 165 | 166 | 167 | image = skimage.io.imread(os.path.join(path, f_img)) 168 | # If grayscale. Convert to RGB for consistency. 169 | if image.ndim != 3: 170 | image = skimage.color.gray2rgb(image) 171 | # If has an alpha channel, remove it for consistency 172 | if image.shape[-1] == 4: 173 | image = image[..., :3] 174 | if image.shape[0] > 1024 or image.shape[1] > 1024: 175 | image = skimage.transform.resize(image,(800,800),preserve_range=1) 176 | image = image.astype(np.uint8) 177 | r = model.detect([image], verbose=0)[0] 178 | # visualize.display_instances(image, r['rois'], r['masks'], r['class_ids'], 179 | # class_names, r['scores']) 180 | score_masks = r['proposal'].astype(np.float32) 181 | score_masks = np.squeeze(score_masks) 182 | pixel_mask = r['pixel'].astype(np.float32) 183 | combine_mask = r['combine'].astype(np.float32) 184 | 185 | 186 | cv2.imwrite(save_dir1 + image_name, score_masks * 255) 187 | cv2.imwrite(save_dir2 + image_name, pixel_mask * 255) 188 | cv2.imwrite(save_dir3 + image_name, combine_mask * 255) 189 | idx = idx +1 190 | 191 | def predict(dataset,model,save_dir): 192 | class_names = ['BG','foreground'] 193 | image_ids = dataset.image_ids 194 | save_dir = './result' 195 | save_dir1 = save_dir + '/result/' 196 | if not os.path.exists(save_dir1): 197 | os.mkdir(save_dir1) 198 | save_dir2 = save_dir + '/result_pixel/' 199 | if not os.path.exists(save_dir2): 200 | os.mkdir(save_dir2) 201 | save_dir3 = save_dir + '/combine/' 202 | if not os.path.exists(save_dir3): 203 | os.mkdir(save_dir3) 204 | # save_dir4 = save_dir + '/combine4/' 205 | # if not os.path.exists(save_dir4): 206 | # os.mkdir(save_dir4) 207 | vocabulary = Vocabulary(5000, 208 | './data/vocabulary.csv') 209 | ids =[] 210 | caption = {} 211 | for image_id in image_ids: 212 | word_out = [] 213 | print(image_id) 214 | image = dataset.load_image(image_id) 215 | 216 | image_name = dataset.image_info[image_id]['image_name'] 217 | img_name2, ext = os.path.splitext(image_name) 218 | final = np.zeros((image.shape[0],image.shape[1])) 219 | final_pro = np.zeros((image.shape[0], image.shape[1])) 220 | final_combine = np.zeros((image.shape[0], image.shape[1])) 221 | id = dataset.image_info[image_id]['id'] 222 | ids.append(id) 223 | 224 | 225 | 226 | r = model.detect([image], verbose=0)[0] 227 | 228 | cap_id = np.squeeze(r['word']).astype(np.int) 229 | word = vocabulary.get_sentence(cap_id) 230 | word_out.append(word.replace('.','')) 231 | caption[id] = word_out 232 | 233 | score_masks = r['proposal'].astype(np.float32) 234 | score_masks = np.squeeze(score_masks) 235 | out_name = save_dir1 + img_name2 + '.jpg' 236 | cv2.imwrite(out_name, score_masks * 255) 237 | pixel_mask = r['pixel'].astype(np.float32) 238 | out_name = save_dir2 + img_name2 + '.jpg' 239 | cv2.imwrite(out_name, pixel_mask * 255) 240 | combine_mask = r['combine'].astype(np.float32) 241 | out_name = save_dir3 + img_name2 + '.jpg' 242 | cv2.imwrite(out_name, combine_mask * 255) 243 | caption_gt = json.load(open('./data/caption_gt.json'), encoding='utf-8') 244 | ceval = COCOEvalCap(caption_gt, caption) 245 | ceval.evaluate(ids) 246 | 247 | if __name__ == '__main__': 248 | import argparse 249 | 250 | # Parse command line arguments 251 | parser = argparse.ArgumentParser( 252 | description='Train Mask R-CNN on MS COCO.') 253 | parser.add_argument("--command", 254 | default='evaluate', required=False, 255 | metavar="", 256 | help="'train' or 'evaluate' on MS COCO") 257 | parser.add_argument('--dataset', required=False, 258 | default='', 259 | metavar="/path/to/coco/", 260 | help='Directory of the MS-COCO dataset') 261 | 262 | parser.add_argument('--model', required=False, 263 | default='/home/zhanglu/Mask_RCNN_new/logs/saliency20181122T1118/mask_rcnn_saliency_0020.h5',#', 264 | metavar="/path/to/weights.h5", 265 | help="Path to weights .h5 file or 'coco'") 266 | parser.add_argument('--logs', required=False, 267 | default=DEFAULT_LOGS_DIR, 268 | metavar="/path/to/logs/", 269 | help='Logs and checkpoints directory (default=logs/)') 270 | parser.add_argument('--limit', required=False, 271 | default=500, 272 | metavar="", 273 | help='Images to use for evaluation (default=500)') 274 | parser.add_argument('--download', required=False, 275 | default=False, 276 | metavar="", 277 | help='Automatically download and unzip MS-COCO files (default=False)', 278 | type=bool) 279 | args = parser.parse_args() 280 | print("Command: ", args.command) 281 | print("Model: ", args.model) 282 | print("Dataset: ", args.dataset) 283 | print("Logs: ", args.logs) 284 | print("Auto Download: ", args.download) 285 | 286 | # Configurations 287 | if args.command == "train": 288 | config = SaliencyConfig() 289 | else: 290 | class InferenceConfig(SaliencyConfig): 291 | # Set batch size to 1 since we'll be running inference on 292 | # one image at a time. Batch size = GPU_COUNT * IMAGES_PER_GPU 293 | GPU_COUNT = 1 294 | IMAGES_PER_GPU = 1 295 | DETECTION_MIN_CONFIDENCE = 0.8 296 | config = InferenceConfig() 297 | config.display() 298 | 299 | # Create model 300 | if args.command == "train": 301 | model = modellib.MaskRCNN(mode="training", config=config, 302 | model_dir=args.logs) 303 | else: 304 | model = modellib.MaskRCNN(mode="inference", config=config, 305 | model_dir=args.logs) 306 | 307 | # Select weights file to load 308 | if args.model.lower() == "coco": 309 | model_path = COCO_MODEL_PATH 310 | elif args.model.lower() == "last": 311 | # Find last trained weights 312 | model_path = model.find_last()[1] 313 | elif args.model.lower() == "imagenet": 314 | # Start from ImageNet trained weights 315 | model_path = model.get_imagenet_weights() 316 | else: 317 | model_path = args.model 318 | 319 | # Load weights 320 | print("Loading weights ", model_path) 321 | 322 | if args.model.lower() == "coco": 323 | # Load weights trained on MS COCO, but skip layers that 324 | # are different due to the different number of classes 325 | # See README for instructions to download the COCO weights 326 | model.load_weights(model_path, by_name=True, 327 | exclude=["mrcnn_class_logits", "mrcnn_bbox_fc", 328 | "mrcnn_bbox", "mrcnn_mask"]) 329 | 330 | else: 331 | model.load_weights(model_path, by_name= True) 332 | 333 | 334 | # Validation dataset 335 | dataset_val = SaliencyDataset() 336 | dataset_val.load_sal('val') 337 | dataset_val.prepare() 338 | print("Running COCO evaluation on {} images.".format(1459)) 339 | predict2(model) 340 | 341 | --------------------------------------------------------------------------------