├── 01_pascal.py ├── 02_pascal_alexnet.py ├── 03_pascal_vgg16.py ├── 04_pascal_vggfinetune.py ├── README.md └── eval.py /01_pascal.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Mon Feb 26 03:57:30 2018 5 | 6 | @author: snigdha 7 | """ 8 | 9 | from __future__ import absolute_import 10 | from __future__ import division 11 | from __future__ import print_function 12 | 13 | # Imports 14 | import sys 15 | import numpy as np 16 | import tensorflow as tf 17 | import argparse 18 | #import os.path as osp 19 | from PIL import Image 20 | from functools import partial 21 | import pickle 22 | 23 | from eval import compute_map 24 | #import models 25 | 26 | tf.logging.set_verbosity(tf.logging.INFO) 27 | 28 | CLASS_NAMES = [ 29 | 'aeroplane', 30 | 'bicycle', 31 | 'bird', 32 | 'boat', 33 | 'bottle', 34 | 'bus', 35 | 'car', 36 | 'cat', 37 | 'chair', 38 | 'cow', 39 | 'diningtable', 40 | 'dog', 41 | 'horse', 42 | 'motorbike', 43 | 'person', 44 | 'pottedplant', 45 | 'sheep', 46 | 'sofa', 47 | 'train', 48 | 'tvmonitor', 49 | ] 50 | 51 | 52 | def cnn_model_fn(features, labels, mode, num_classes=20): 53 | # Write this function 54 | # """Model function for CNN.""" 55 | # Input Layer 56 | input_layer = tf.reshape(features["x"], [-1, 256, 256, 3]) 57 | 58 | # Convolutional Layer #1 59 | conv1 = tf.layers.conv2d( 60 | inputs=input_layer, 61 | filters=32, 62 | kernel_size=[5, 5], 63 | padding="same", 64 | activation=tf.nn.relu) 65 | 66 | # Pooling Layer #1 67 | pool1 = tf.layers.max_pooling2d(inputs=conv1, pool_size=[2, 2], strides=2) 68 | 69 | # Convolutional Layer #2 and Pooling Layer #2 70 | conv2 = tf.layers.conv2d( 71 | inputs=pool1, 72 | filters=64, 73 | kernel_size=[5, 5], 74 | padding="same", 75 | activation=tf.nn.relu) 76 | pool2 = tf.layers.max_pooling2d(inputs=conv2, pool_size=[2, 2], strides=2) 77 | 78 | # Dense Layer 79 | pool2_flat = tf.reshape(pool2, [-1, 64 * 64 * 64]) 80 | dense = tf.layers.dense(inputs=pool2_flat, units=1024, 81 | activation=tf.nn.relu) 82 | dropout = tf.layers.dropout( 83 | inputs=dense, rate=0.4, training=mode == tf.estimator.ModeKeys.TRAIN) 84 | 85 | # Logits Layer 86 | logits = tf.layers.dense(inputs=dropout, units=20) 87 | 88 | predictions = { 89 | # Generate predictions (for PREDICT and EVAL mode) 90 | "classes": tf.argmax(input=logits, axis=1), 91 | # Add `softmax_tensor` to the graph. It is used for PREDICT and by the 92 | # `logging_hook`. 93 | "probabilities": tf.sigmoid(logits, name="softmax_tensor") 94 | } 95 | 96 | if mode == tf.estimator.ModeKeys.PREDICT: 97 | return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions) 98 | 99 | # Calculate Loss (for both TRAIN and EVAL modes) 100 | 101 | loss = tf.identity(tf.losses.sigmoid_cross_entropy( 102 | labels, logits=logits), name='loss') 103 | 104 | # Configure the Training Op (for TRAIN mode) 105 | if mode == tf.estimator.ModeKeys.TRAIN: 106 | optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.001) 107 | train_op = optimizer.minimize( 108 | loss=loss, 109 | global_step=tf.train.get_global_step()) 110 | return tf.estimator.EstimatorSpec( 111 | mode=mode, loss=loss, train_op=train_op) 112 | 113 | # Add evaluation metrics (for EVAL mode) 114 | eval_metric_ops = { 115 | "accuracy": tf.metrics.accuracy( 116 | labels=labels, predictions=predictions)} 117 | return tf.estimator.EstimatorSpec( 118 | mode=mode, loss=loss, eval_metric_ops=eval_metric_ops) 119 | 120 | 121 | def load_pascal(data_dir, split='train'): 122 | 123 | """ 124 | Function to read images from PASCAL data folder. 125 | Args: 126 | data_dir (str): Path to the VOC2007 directory. 127 | split (str): train/val/trainval split to use. 128 | Returns: 129 | images (np.ndarray): Return a np.float32 array of 130 | shape (N, H, W, 3), where H, W are 224px each, 131 | and each image is in RGB format. 132 | labels (np.ndarray): An array of shape (N, 20) of 133 | type np.int32, with 0s and 1s; 1s for classes that 134 | are active in that image. 135 | """ 136 | 137 | 138 | sub_dir1 = '/ImageSets/Main/' 139 | sub_dir2 = '/JPEGImages/' 140 | f1 = open(data_dir+sub_dir1+"aeroplane"+"_"+split+".txt", 'r') 141 | 142 | img = [] 143 | 144 | for line1 in f1: 145 | g1 = line1.strip().split(' ') 146 | img.append(g1[0]) 147 | 148 | num =len(img) 149 | print("num",num) 150 | 151 | w = np.int32(np.zeros((num,20))) 152 | l = np.int32(np.zeros((num,20))) 153 | 154 | print("Entering the loop for weights and labels") 155 | 156 | cnt = 0 157 | for i in range(0,20): 158 | 159 | f2 = open(data_dir + '/ImageSets/Main/'+CLASS_NAMES[i]+'_'+split+'.txt') 160 | a1 = f2.read().split() 161 | t = a1[1::2] 162 | tt = np.int32(t) 163 | ttt = tt.reshape(1,num) 164 | w[:,cnt] = np.int32(np.abs(ttt)) 165 | l[:,cnt] = ttt.clip(min = 0) 166 | cnt = cnt + 1 167 | 168 | 169 | labels = np.int32(l) 170 | weights = np.int32(w) 171 | print("Entering the loop for images") 172 | arr = [] 173 | for j in img: 174 | 175 | im = Image.open(data_dir+sub_dir2+ j +'.jpg') 176 | im = im.resize((256, 256), Image.ANTIALIAS) 177 | arr.append(np.float32(im)) 178 | 179 | image_ar = np.float32(arr) 180 | return (image_ar,labels,weights) 181 | 182 | 183 | def parse_args(): 184 | parser = argparse.ArgumentParser( 185 | description='Train a classifier in tensorflow!') 186 | parser.add_argument( 187 | 'data_dir', type=str, default='data/VOC2007', 188 | help='Path to PASCAL data storage') 189 | if len(sys.argv) == 1: 190 | parser.print_help() 191 | sys.exit(1) 192 | args = parser.parse_args() 193 | return args 194 | 195 | 196 | def _get_el(arr, i): 197 | try: 198 | return arr[i] 199 | except IndexError: 200 | return arr 201 | 202 | from tensorflow.core.framework import summary_pb2 203 | def summary_var(log_dir, name, val, step): 204 | writer = tf.summary.FileWriterCache.get(log_dir) 205 | summary_proto = summary_pb2.Summary() 206 | value = summary_proto.value.add() 207 | value.tag = name 208 | value.simple_value = float(val) 209 | writer.add_summary(summary_proto, step) 210 | writer.flush() 211 | 212 | def main(): 213 | args = parse_args() 214 | #Load training and eval data 215 | train_data, train_labels, train_weights = load_pascal( 216 | args.data_dir, split='trainval') 217 | eval_data, eval_labels, eval_weights = load_pascal( 218 | args.data_dir, split='test') 219 | 220 | print ("Done loading weights") 221 | 222 | pascal_classifier = tf.estimator.Estimator( 223 | model_fn=partial(cnn_model_fn, 224 | num_classes=train_labels.shape[1]), 225 | model_dir="pascal_model_scratch") 226 | tensors_to_log = {"loss": "loss"} 227 | logging_hook = tf.train.LoggingTensorHook( 228 | tensors=tensors_to_log, every_n_iter=50) 229 | 230 | 231 | list22 = [] 232 | for i in range(0,20): 233 | 234 | # Train the model 235 | train_input_fn = tf.estimator.inputs.numpy_input_fn( 236 | x={"x": train_data, "w": train_weights}, 237 | y=train_labels, 238 | batch_size=10, 239 | num_epochs=None, 240 | shuffle=True) 241 | 242 | pascal_classifier.train( 243 | input_fn=train_input_fn, 244 | steps=50, 245 | hooks=[logging_hook]) 246 | 247 | # Evaluate the model and print results 248 | eval_input_fn = tf.estimator.inputs.numpy_input_fn( 249 | x={"x": eval_data, "w": eval_weights}, 250 | y=eval_labels, 251 | num_epochs=1, 252 | shuffle=False) 253 | 254 | pred = list(pascal_classifier.predict(input_fn=eval_input_fn)) 255 | pred = np.stack([p['probabilities'] for p in pred]) 256 | rand_AP = compute_map( 257 | eval_labels, np.random.random(eval_labels.shape), 258 | eval_weights, average=None) 259 | print('Random AP: {} mAP'.format(np.mean(rand_AP))) 260 | gt_AP = compute_map( 261 | eval_labels, eval_labels, eval_weights, average=None) 262 | print('GT AP: {} mAP'.format(np.mean(gt_AP))) 263 | AP = compute_map(eval_labels, pred, eval_weights, average=None) 264 | print('Obtained {} mAP'.format(np.mean(AP))) 265 | print('per class:') 266 | for cid, cname in enumerate(CLASS_NAMES): 267 | print('{}: {}'.format(cname, _get_el(AP, cid))) 268 | list22.append(np.mean(AP)) 269 | 270 | 271 | summary_var("pascal_model_scratch","mAP",np.mean(AP),i) 272 | 273 | # with open('list11.pkl','wb') as fr1: 274 | # pickle.dump(list11,fr1) 275 | 276 | with open('list22.pkl','wb') as fr2: 277 | pickle.dump(list22,fr2) 278 | 279 | 280 | 281 | if __name__ == "__main__": 282 | main() -------------------------------------------------------------------------------- /02_pascal_alexnet.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Sat Mar 3 17:07:30 2018 5 | 6 | @author: snigdha 7 | """ 8 | 9 | from __future__ import absolute_import 10 | from __future__ import division 11 | from __future__ import print_function 12 | 13 | # Imports 14 | import sys 15 | import os 16 | import numpy as np 17 | import tensorflow as tf 18 | import argparse 19 | #import os.path as osp 20 | from PIL import Image 21 | from functools import partial 22 | from collections import defaultdict 23 | import pickle 24 | 25 | from eval import compute_map 26 | #import models 27 | 28 | tf.logging.set_verbosity(tf.logging.INFO) 29 | 30 | CLASS_NAMES = [ 31 | 'aeroplane', 32 | 'bicycle', 33 | 'bird', 34 | 'boat', 35 | 'bottle', 36 | 'bus', 37 | 'car', 38 | 'cat', 39 | 'chair', 40 | 'cow', 41 | 'diningtable', 42 | 'dog', 43 | 'horse', 44 | 'motorbike', 45 | 'person', 46 | 'pottedplant', 47 | 'sheep', 48 | 'sofa', 49 | 'train', 50 | 'tvmonitor', 51 | ] 52 | 53 | def cnn_model_fn(features, labels, mode, num_classes=20): 54 | # Write this function 55 | # """Model function for CNN.""" 56 | # Input Layer 57 | 58 | input_layer = tf.reshape(features["x"], [-1, 224, 224, 3]) 59 | 60 | if mode == tf.estimator.ModeKeys.TRAIN: 61 | flipped = tf.map_fn(lambda image: tf.image.random_flip_left_right(image),features["x"]) 62 | cropped = tf.map_fn(lambda image:tf.random_crop(image,size=[224,224,3]),features["x"]) 63 | 64 | fets = tf.concat([features["x"],flipped,cropped],axis = 0) 65 | #wts = tf.concat([features["w"],features["w"],features["w"]],axis = 0) 66 | lbls = tf.concat([labels,labels,labels],axis = 0) 67 | 68 | feats = tf.random_shuffle(fets,seed = features["x"].shape[0]*3) 69 | #wtgs = tf.random_shuffle(wts,seed = features["x"].shape[0]*3) 70 | lbels = tf.random_shuffle(lbls,seed = features["x"].shape[0]*3) 71 | 72 | features["x"]= feats 73 | input_layer = features["x"] 74 | labels = lbels 75 | 76 | 77 | # Convolutional Layer #1 78 | conv1 = tf.layers.conv2d( 79 | inputs=input_layer, 80 | filters=96, 81 | kernel_size=[11,11], 82 | padding="valid", 83 | strides = 4, 84 | activation=tf.nn.relu, 85 | kernel_initializer=tf.truncated_normal_initializer(mean = 0.0,stddev=0.01), 86 | bias_initializer=tf.zeros_initializer() 87 | ) 88 | 89 | 90 | # Pooling Layer #1 91 | pool1 = tf.layers.max_pooling2d(inputs=conv1, pool_size=[3, 3], strides=2) 92 | 93 | 94 | # Convolutional Layer #2 95 | conv2 = tf.layers.conv2d( 96 | inputs=pool1, 97 | filters=256, 98 | kernel_size=[5,5], 99 | padding="same", 100 | strides = 1, 101 | activation=tf.nn.relu, 102 | kernel_initializer=tf.truncated_normal_initializer(mean = 0.0,stddev=0.01), 103 | bias_initializer=tf.zeros_initializer() 104 | ) 105 | 106 | # Pooling Layer #2 107 | pool2 = tf.layers.max_pooling2d(inputs=conv2, pool_size=[3, 3], strides=2) 108 | 109 | 110 | # Convolutional Layer #3 111 | conv3 = tf.layers.conv2d( 112 | inputs=pool2, 113 | filters=384, 114 | kernel_size=[3, 3], 115 | padding="same", 116 | strides = 1, 117 | activation=tf.nn.relu, 118 | kernel_initializer=tf.truncated_normal_initializer(mean = 0.0,stddev=0.01), 119 | bias_initializer=tf.zeros_initializer()) 120 | 121 | # Convolutional Layer #4 122 | conv4 = tf.layers.conv2d( 123 | inputs=conv3, 124 | filters=384, 125 | kernel_size=[3, 3], 126 | padding="same", 127 | strides = 1, 128 | activation=tf.nn.relu, 129 | kernel_initializer=tf.truncated_normal_initializer(mean = 0.0,stddev=0.01), 130 | bias_initializer=tf.zeros_initializer()) 131 | 132 | # Convolutional Layer #5 133 | conv5 = tf.layers.conv2d( 134 | inputs=conv4, 135 | filters=256, 136 | kernel_size=[3, 3], 137 | padding="same", 138 | strides = 1, 139 | activation=tf.nn.relu, 140 | kernel_initializer=tf.truncated_normal_initializer(mean = 0.0,stddev=0.01), 141 | bias_initializer=tf.zeros_initializer()) 142 | 143 | # Pooling Layer #3 144 | pool3 = tf.layers.max_pooling2d(inputs=conv5, pool_size=[3, 3], strides=2) 145 | 146 | # Dense Layer 147 | pool3_flat = tf.contrib.layers.flatten(pool3) 148 | 149 | dense1 = tf.layers.dense(inputs=pool3_flat, units=4096, 150 | activation=tf.nn.relu, 151 | kernel_initializer=tf.truncated_normal_initializer(mean = 0.0,stddev=0.01), 152 | bias_initializer=tf.zeros_initializer() 153 | ) 154 | dropout1 = tf.layers.dropout( 155 | inputs=dense1, rate=0.5, training=mode == tf.estimator.ModeKeys.TRAIN) 156 | 157 | dense2 = tf.layers.dense(inputs=dropout1, units=4096, 158 | activation=tf.nn.relu, 159 | kernel_initializer=tf.truncated_normal_initializer(mean = 0.0,stddev=0.01), 160 | bias_initializer=tf.zeros_initializer(), 161 | ) 162 | dropout2 = tf.layers.dropout( 163 | inputs=dense2, rate=0.5, training=mode == tf.estimator.ModeKeys.TRAIN) 164 | 165 | 166 | # Logits Layer 167 | logits = tf.layers.dense(inputs=dropout2, units=20) 168 | 169 | predictions = { 170 | # Generate predictions (for PREDICT and EVAL mode) 171 | "classes": tf.argmax(input=logits, axis=1), 172 | # Add `softmax_tensor` to the graph. It is used for PREDICT and by the 173 | # `logging_hook`. 174 | "probabilities": tf.sigmoid(logits, name="sigmoid_tensor") 175 | } 176 | 177 | if mode == tf.estimator.ModeKeys.PREDICT: 178 | return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions) 179 | 180 | # Calculate Loss (for both TRAIN and EVAL modes) 181 | 182 | 183 | # Configure the Training Op (for TRAIN mode) 184 | if mode == tf.estimator.ModeKeys.TRAIN: 185 | 186 | loss = tf.identity(tf.losses.sigmoid_cross_entropy( 187 | labels, logits=logits), name='loss') 188 | 189 | decay_learning_rate = tf.train.exponential_decay( 190 | learning_rate = 0.001, 191 | global_step=tf.train.get_global_step(), 192 | decay_steps = 10000, 193 | decay_rate = 0.5, 194 | staircase = True, 195 | name = None) 196 | optimizer = tf.train.MomentumOptimizer(learning_rate=decay_learning_rate, 197 | momentum = 0.9) 198 | 199 | train_op = optimizer.minimize( 200 | loss=loss, 201 | global_step=tf.train.get_global_step()) 202 | return tf.estimator.EstimatorSpec( 203 | mode=mode, loss=loss, train_op=train_op) 204 | 205 | # Add evaluation metrics (for EVAL mode) 206 | eval_metric_ops = { 207 | "accuracy": tf.metrics.accuracy( 208 | labels=labels, predictions=predictions)} 209 | return tf.estimator.EstimatorSpec( 210 | mode=mode, loss=loss, eval_metric_ops=eval_metric_ops) 211 | 212 | def load_pascal(data_dir, split='train'): 213 | 214 | """ 215 | Function to read images from PASCAL data folder. 216 | Args: 217 | data_dir (str): Path to the VOC2007 directory. 218 | split (str): train/val/trainval split to use. 219 | Returns: 220 | images (np.ndarray): Return a np.float32 array of 221 | shape (N, H, W, 3), where H, W are 224px each, 222 | and each image is in RGB format. 223 | labels (np.ndarray): An array of shape (N, 20) of 224 | type np.int32, with 0s and 1s; 1s for classes that 225 | are active in that image. 226 | """ 227 | 228 | 229 | sub_dir1 = '/ImageSets/Main/' 230 | sub_dir2 = '/JPEGImages/' 231 | f1 = open(data_dir+sub_dir1+"aeroplane"+"_"+split+".txt", 'r') 232 | 233 | img = [] 234 | 235 | for line1 in f1: 236 | g1 = line1.strip().split(' ') 237 | img.append(g1[0]) 238 | 239 | num =len(img) 240 | print("num",num) 241 | 242 | w = np.int32(np.zeros((num,20))) 243 | l = np.int32(np.zeros((num,20))) 244 | 245 | print("Entering the loop for weights and labels") 246 | 247 | cnt = 0 248 | for i in range(0,20): 249 | 250 | f2 = open(data_dir + '/ImageSets/Main/'+CLASS_NAMES[i]+'_'+split+'.txt') 251 | a1 = f2.read().split() 252 | t = a1[1::2] 253 | tt = np.int32(t) 254 | ttt = tt.reshape(1,num) 255 | w[:,cnt] = np.int32(np.abs(ttt)) 256 | l[:,cnt] = ttt.clip(min = 0) 257 | cnt = cnt + 1 258 | 259 | 260 | labels = np.int32(l) 261 | weights = np.int32(w) 262 | print("Entering the loop for images") 263 | arr = [] 264 | for j in img: 265 | 266 | im = Image.open(data_dir+sub_dir2+ j +'.jpg') 267 | im = im.resize((256, 256), Image.ANTIALIAS) 268 | arr.append(np.float32(im)) 269 | 270 | image_ar = np.float32(arr) 271 | return (image_ar,labels,weights) 272 | 273 | 274 | 275 | def parse_args(): 276 | parser = argparse.ArgumentParser( 277 | description='Train a classifier in tensorflow!') 278 | parser.add_argument( 279 | 'data_dir', type=str, default='data/VOC2007', 280 | help='Path to PASCAL data storage') 281 | if len(sys.argv) == 1: 282 | parser.print_help() 283 | sys.exit(1) 284 | args = parser.parse_args() 285 | return args 286 | 287 | 288 | def _get_el(arr, i): 289 | try: 290 | return arr[i] 291 | except IndexError: 292 | return arr 293 | 294 | from tensorflow.core.framework import summary_pb2 295 | def summary_var(log_dir, name, val, step): 296 | writer = tf.summary.FileWriterCache.get(log_dir) 297 | summary_proto = summary_pb2.Summary() 298 | value = summary_proto.value.add() 299 | value.tag = name 300 | value.simple_value = float(val) 301 | writer.add_summary(summary_proto, step) 302 | writer.flush() 303 | 304 | 305 | def main(): 306 | args = parse_args() 307 | # Load training and eval data 308 | train_data, train_labels, train_weights = load_pascal( 309 | args.data_dir, split='trainval') 310 | eval_data, eval_labels, eval_weights = load_pascal( 311 | args.data_dir, split='test') 312 | 313 | 314 | 315 | pascal_classifier = tf.estimator.Estimator( 316 | model_fn=partial(cnn_model_fn, 317 | num_classes=train_labels.shape[1]), 318 | model_dir="pascal_alexnet") 319 | tensors_to_log = {"loss": "loss"} 320 | logging_hook = tf.train.LoggingTensorHook( 321 | tensors=tensors_to_log, every_n_iter=400) 322 | 323 | 324 | 325 | mAP = [] 326 | for i in range(0,100): 327 | 328 | # Train the model 329 | train_input_fn = tf.estimator.inputs.numpy_input_fn( 330 | x={"x": train_data, "w": train_weights}, 331 | y=train_labels, 332 | batch_size=10, 333 | num_epochs=None, 334 | shuffle=True) 335 | 336 | pascal_classifier.train( 337 | input_fn=train_input_fn, 338 | steps=400, 339 | hooks=[logging_hook]) 340 | 341 | 342 | # Evaluate the model and print results 343 | eval_input_fn = tf.estimator.inputs.numpy_input_fn( 344 | x={"x": eval_data, "w": eval_weights}, 345 | y=eval_labels, 346 | num_epochs=1, 347 | shuffle=False) 348 | 349 | pred = list(pascal_classifier.predict(input_fn=eval_input_fn)) 350 | pred = np.stack([p['probabilities'] for p in pred]) 351 | rand_AP = compute_map( 352 | eval_labels, np.random.random(eval_labels.shape), 353 | eval_weights, average=None) 354 | print('Random AP: {} mAP'.format(np.mean(rand_AP))) 355 | gt_AP = compute_map( 356 | eval_labels, eval_labels, eval_weights, average=None) 357 | print('GT AP: {} mAP'.format(np.mean(gt_AP))) 358 | AP = compute_map(eval_labels, pred, eval_weights, average=None) 359 | print('Obtained {} mAP'.format(np.mean(AP))) 360 | print('per class:') 361 | for cid, cname in enumerate(CLASS_NAMES): 362 | print('{}: {}'.format(cname, _get_el(AP, cid))) 363 | mAP.append(np.mean(AP)) 364 | 365 | 366 | summary_var("pascal_alexnet","mAP",np.mean(AP),i*400) 367 | 368 | 369 | with open('map.pkl','wb') as fr2: 370 | pickle.dump(mAP,fr2) 371 | 372 | 373 | 374 | 375 | if __name__ == "__main__": 376 | main() -------------------------------------------------------------------------------- /03_pascal_vgg16.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Sat Mar 3 17:07:30 2018 5 | 6 | @author: snigdha 7 | """ 8 | 9 | from __future__ import absolute_import 10 | from __future__ import division 11 | from __future__ import print_function 12 | 13 | # Imports 14 | import sys 15 | import os 16 | import numpy as np 17 | import tensorflow as tf 18 | import argparse 19 | #import os.path as osp 20 | from PIL import Image 21 | from functools import partial 22 | from collections import defaultdict 23 | import pickle 24 | 25 | from eval import compute_map 26 | #import models 27 | 28 | tf.logging.set_verbosity(tf.logging.INFO) 29 | 30 | CLASS_NAMES = [ 31 | 'aeroplane', 32 | 'bicycle', 33 | 'bird', 34 | 'boat', 35 | 'bottle', 36 | 'bus', 37 | 'car', 38 | 'cat', 39 | 'chair', 40 | 'cow', 41 | 'diningtable', 42 | 'dog', 43 | 'horse', 44 | 'motorbike', 45 | 'person', 46 | 'pottedplant', 47 | 'sheep', 48 | 'sofa', 49 | 'train', 50 | 'tvmonitor', 51 | ] 52 | 53 | def cnn_model_fn(features, labels, mode, num_classes=20): 54 | # Write this function 55 | # """Model function for CNN.""" 56 | # Input Layer 57 | 58 | input_layer = tf.reshape(features["x"], [-1, 224, 224, 3]) 59 | 60 | if mode == tf.estimator.ModeKeys.TRAIN: 61 | flipped = tf.map_fn(lambda image: tf.image.random_flip_left_right(image),features["x"]) 62 | cropped = tf.map_fn(lambda image:tf.random_crop(image,size=[224,224,3]),features["x"]) 63 | 64 | fets = tf.concat([features["x"],flipped,cropped],axis = 0) 65 | #wts = tf.concat([features["w"],features["w"],features["w"]],axis = 0) 66 | lbls = tf.concat([labels,labels,labels],axis = 0) 67 | 68 | feats = tf.random_shuffle(fets,seed = features["x"].shape[0]*3) 69 | #wtgs = tf.random_shuffle(wts,seed = features["x"].shape[0]*3) 70 | lbels = tf.random_shuffle(lbls,seed = features["x"].shape[0]*3) 71 | 72 | features["x"]= feats 73 | input_layer = features["x"] 74 | labels = lbels 75 | 76 | tf.summary.image("Training_images",input_layer) 77 | 78 | # Convolutional Layer #1 79 | conv1 = tf.layers.conv2d( 80 | inputs=input_layer, 81 | filters=64, 82 | kernel_size=[3,3], 83 | padding="same", 84 | strides = 1, 85 | activation=tf.nn.relu 86 | ) 87 | 88 | # Convolutional Layer #2 89 | conv2 = tf.layers.conv2d( 90 | inputs=conv1, 91 | filters=64, 92 | kernel_size=[3,3], 93 | padding="same", 94 | strides = 1, 95 | activation=tf.nn.relu) 96 | 97 | # Pooling Layer #1 98 | pool1 = tf.layers.max_pooling2d(inputs=conv2, pool_size=[2, 2], strides=2) 99 | 100 | # Convolutional Layer #3 101 | conv3 = tf.layers.conv2d( 102 | inputs=pool1, 103 | filters=128, 104 | kernel_size=[3, 3], 105 | padding="same", 106 | strides = 1, 107 | activation=tf.nn.relu) 108 | 109 | # Convolutional Layer #4 110 | conv4 = tf.layers.conv2d( 111 | inputs=conv3, 112 | filters=128, 113 | kernel_size=[3, 3], 114 | padding="same", 115 | strides = 1, 116 | activation=tf.nn.relu 117 | ) 118 | 119 | #Pooling layer 2 120 | pool2 = tf.layers.max_pooling2d(inputs=conv4, pool_size=[2, 2], strides=2) 121 | 122 | # Convolutional Layer #5 123 | conv5 = tf.layers.conv2d( 124 | inputs=pool2, 125 | filters=256, 126 | kernel_size=[3, 3], 127 | padding="same", 128 | strides = 1, 129 | activation=tf.nn.relu, 130 | ) 131 | 132 | # Convolutional Layer #6 133 | conv6 = tf.layers.conv2d( 134 | inputs=conv5, 135 | filters=256, 136 | kernel_size=[3, 3], 137 | padding="same", 138 | strides = 1, 139 | activation=tf.nn.relu, 140 | ) 141 | 142 | # Convolutional Layer #7 143 | conv7 = tf.layers.conv2d( 144 | inputs=conv6, 145 | filters=256, 146 | kernel_size=[3, 3], 147 | padding="same", 148 | strides = 1, 149 | activation=tf.nn.relu, 150 | ) 151 | 152 | #Pooling layer 3 153 | pool3 = tf.layers.max_pooling2d(inputs=conv7, pool_size=[2, 2], strides=2) 154 | 155 | # Convolutional Layer #8 156 | conv8 = tf.layers.conv2d( 157 | inputs=pool3, 158 | filters=512, 159 | kernel_size=[3, 3], 160 | padding="same", 161 | strides = 1, 162 | activation=tf.nn.relu 163 | ) 164 | 165 | # Convolutional Layer #9 166 | conv9 = tf.layers.conv2d( 167 | inputs=conv8, 168 | filters=512, 169 | kernel_size=[3, 3], 170 | padding="same", 171 | strides = 1, 172 | activation=tf.nn.relu, 173 | ) 174 | 175 | # Convolutional Layer #10 176 | conv10 = tf.layers.conv2d( 177 | inputs=conv9, 178 | filters=512, 179 | kernel_size=[3, 3], 180 | padding="same", 181 | strides = 1, 182 | activation=tf.nn.relu 183 | ) 184 | 185 | #Pooling layer 4 186 | pool4 = tf.layers.max_pooling2d(inputs=conv10, pool_size=[2, 2], strides=2) 187 | 188 | 189 | # Convolutional Layer #11 190 | conv11 = tf.layers.conv2d( 191 | inputs=pool4, 192 | filters=512, 193 | kernel_size=[3, 3], 194 | padding="same", 195 | strides = 1, 196 | activation=tf.nn.relu 197 | ) 198 | 199 | # Convolutional Layer #12 200 | conv12 = tf.layers.conv2d( 201 | inputs=conv11, 202 | filters=512, 203 | kernel_size=[3, 3], 204 | padding="same", 205 | strides = 1, 206 | activation=tf.nn.relu 207 | ) 208 | 209 | # Convolutional Layer #13 210 | conv13 = tf.layers.conv2d( 211 | inputs=conv12, 212 | filters=512, 213 | kernel_size=[3, 3], 214 | padding="same", 215 | strides = 1, 216 | activation=tf.nn.relu 217 | ) 218 | 219 | #Pooling layer 5 220 | pool5 = tf.layers.max_pooling2d(inputs=conv13, pool_size=[2, 2], strides=2) 221 | 222 | 223 | # Dense Layer 224 | pool5_flat = tf.contrib.layers.flatten(pool5) 225 | 226 | dense1 = tf.layers.dense(inputs=pool5_flat, units=4096, 227 | activation=tf.nn.relu) 228 | 229 | dropout1 = tf.layers.dropout( 230 | inputs=dense1, rate=0.5, training=mode == tf.estimator.ModeKeys.TRAIN) 231 | 232 | dense2 = tf.layers.dense(inputs=dropout1, units=4096, 233 | activation=tf.nn.relu) 234 | 235 | dropout2 = tf.layers.dropout( 236 | inputs=dense2, rate=0.5, training=mode == tf.estimator.ModeKeys.TRAIN) 237 | 238 | 239 | dense3 = tf.layers.dense(inputs=dropout2, units=1000, 240 | activation = tf.nn.relu) 241 | 242 | # Logits Layer 243 | logits = tf.layers.dense(inputs=dense3, units=20) 244 | 245 | predictions = { 246 | # Generate predictions (for PREDICT and EVAL mode) 247 | "classes": tf.argmax(input=logits, axis=1), 248 | # Add `softmax_tensor` to the graph. It is used for PREDICT and by the 249 | # `logging_hook`. 250 | "probabilities": tf.sigmoid(logits, name="sigmoid_tensor") 251 | } 252 | 253 | if mode == tf.estimator.ModeKeys.PREDICT: 254 | return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions) 255 | 256 | # Calculate Loss (for both TRAIN and EVAL modes) 257 | 258 | 259 | # Configure the Training Op (for TRAIN mode) 260 | if mode == tf.estimator.ModeKeys.TRAIN: 261 | 262 | loss = tf.identity(tf.losses.sigmoid_cross_entropy( 263 | labels, logits=logits), name='loss') 264 | 265 | decay_learning_rate = tf.train.exponential_decay( 266 | learning_rate = 0.001, 267 | global_step=tf.train.get_global_step(), 268 | decay_steps = 10000, 269 | decay_rate = 0.5, 270 | staircase = False, 271 | name = None) 272 | optimizer = tf.train.MomentumOptimizer(learning_rate=decay_learning_rate, 273 | momentum = 0.9) 274 | 275 | tf.summary.scalar("decayed_learning_rate",decay_learning_rate) 276 | 277 | grads_and_vars= optimizer.compute_gradients(loss) 278 | 279 | for g, v in grads_and_vars: 280 | if g is not None: 281 | #print(format(v.name)) 282 | tf.summary.histogram("{}/grad_histogram".format(v.name), g) 283 | 284 | train_op = optimizer.minimize( 285 | loss=loss, 286 | global_step=tf.train.get_global_step()) 287 | 288 | return tf.estimator.EstimatorSpec( 289 | mode=mode, loss=loss, train_op=train_op) 290 | 291 | 292 | # Add evaluation metrics (for EVAL mode) 293 | eval_metric_ops = { 294 | "accuracy": tf.metrics.accuracy( 295 | labels=labels, predictions=predictions["classes"])} 296 | return tf.estimator.EstimatorSpec( 297 | mode=mode, loss=loss, eval_metric_ops=eval_metric_ops) 298 | 299 | 300 | def parse_args(): 301 | parser = argparse.ArgumentParser( 302 | description='Train a classifier in tensorflow!') 303 | parser.add_argument( 304 | 'data_dir', type=str, default='data/VOC2007', 305 | help='Path to PASCAL data storage') 306 | if len(sys.argv) == 1: 307 | parser.print_help() 308 | sys.exit(1) 309 | args = parser.parse_args() 310 | return args 311 | 312 | 313 | def _get_el(arr, i): 314 | try: 315 | return arr[i] 316 | except IndexError: 317 | return arr 318 | 319 | from tensorflow.core.framework import summary_pb2 320 | def summary_var(log_dir, name, val, step): 321 | writer = tf.summary.FileWriterCache.get(log_dir) 322 | summary_proto = summary_pb2.Summary() 323 | value = summary_proto.value.add() 324 | value.tag = name 325 | value.simple_value = float(val) 326 | writer.add_summary(summary_proto, step) 327 | writer.flush() 328 | 329 | def load_pascal(data_dir, split='train'): 330 | 331 | """ 332 | Function to read images from PASCAL data folder. 333 | Args: 334 | data_dir (str): Path to the VOC2007 directory. 335 | split (str): train/val/trainval split to use. 336 | Returns: 337 | images (np.ndarray): Return a np.float32 array of 338 | shape (N, H, W, 3), where H, W are 224px each, 339 | and each image is in RGB format. 340 | labels (np.ndarray): An array of shape (N, 20) of 341 | type np.int32, with 0s and 1s; 1s for classes that 342 | are active in that image. 343 | """ 344 | 345 | 346 | sub_dir1 = '/ImageSets/Main/' 347 | sub_dir2 = '/JPEGImages/' 348 | f1 = open(data_dir+sub_dir1+"aeroplane"+"_"+split+".txt", 'r') 349 | 350 | img = [] 351 | 352 | for line1 in f1: 353 | g1 = line1.strip().split(' ') 354 | img.append(g1[0]) 355 | 356 | num =len(img) 357 | print("num",num) 358 | 359 | w = np.int32(np.zeros((num,20))) 360 | l = np.int32(np.zeros((num,20))) 361 | 362 | print("Entering the loop for weights and labels") 363 | 364 | cnt = 0 365 | for i in range(0,20): 366 | 367 | f2 = open(data_dir + '/ImageSets/Main/'+CLASS_NAMES[i]+'_'+split+'.txt') 368 | a1 = f2.read().split() 369 | t = a1[1::2] 370 | tt = np.int32(t) 371 | ttt = tt.reshape(1,num) 372 | w[:,cnt] = np.int32(np.abs(ttt)) 373 | l[:,cnt] = ttt.clip(min = 0) 374 | cnt = cnt + 1 375 | 376 | 377 | labels = np.int32(l) 378 | weights = np.int32(w) 379 | print("Entering the loop for images") 380 | arr = [] 381 | for j in img: 382 | 383 | im = Image.open(data_dir+sub_dir2+ j +'.jpg') 384 | im = im.resize((256, 256), Image.ANTIALIAS) 385 | arr.append(np.float32(im)) 386 | 387 | image_ar = np.float32(arr) 388 | return (image_ar,labels,weights) 389 | 390 | 391 | def main(): 392 | args = parse_args() 393 | # Load training and eval data 394 | train_data, train_labels, train_weights = load_pascal( 395 | args.data_dir, split='trainval') 396 | eval_data, eval_labels, eval_weights = load_pascal( 397 | args.data_dir, split='test') 398 | 399 | 400 | pascal_classifier = tf.estimator.Estimator( 401 | model_fn=partial(cnn_model_fn, 402 | num_classes=train_labels.shape[1]), 403 | model_dir="pascal_vgg") 404 | tensors_to_log = {"loss": "loss"} 405 | logging_hook = tf.train.LoggingTensorHook( 406 | tensors=tensors_to_log, every_n_iter=400) 407 | 408 | list22 = [] 409 | for i in range(0,100): 410 | 411 | # Train the model 412 | train_input_fn = tf.estimator.inputs.numpy_input_fn( 413 | x={"x": train_data, "w": train_weights}, 414 | y=train_labels, 415 | batch_size=10, 416 | num_epochs=None, 417 | shuffle=True) 418 | 419 | pascal_classifier.train( 420 | input_fn=train_input_fn, 421 | steps=400, 422 | hooks=[logging_hook]) 423 | 424 | # Evaluate the model and print results 425 | eval_input_fn = tf.estimator.inputs.numpy_input_fn( 426 | x={"x": eval_data, "w": eval_weights}, 427 | y=eval_labels, 428 | num_epochs=1, 429 | shuffle=False) 430 | 431 | pred = list(pascal_classifier.predict(input_fn=eval_input_fn)) 432 | pred = np.stack([p['probabilities'] for p in pred]) 433 | rand_AP = compute_map( 434 | eval_labels, np.random.random(eval_labels.shape), 435 | eval_weights, average=None) 436 | print('Random AP: {} mAP'.format(np.mean(rand_AP))) 437 | gt_AP = compute_map( 438 | eval_labels, eval_labels, eval_weights, average=None) 439 | print('GT AP: {} mAP'.format(np.mean(gt_AP))) 440 | AP = compute_map(eval_labels, pred, eval_weights, average=None) 441 | print('Obtained {} mAP'.format(np.mean(AP))) 442 | print('per class:') 443 | for cid, cname in enumerate(CLASS_NAMES): 444 | print('{}: {}'.format(cname, _get_el(AP, cid))) 445 | list22.append(np.mean(AP)) 446 | 447 | summary_var("pascal_vgg","mAP",np.mean(AP),i*400) 448 | 449 | with open('list22.pkl','wb') as fr2: 450 | pickle.dump(list22,fr2) 451 | 452 | 453 | 454 | 455 | if __name__ == "__main__": 456 | main() -------------------------------------------------------------------------------- /04_pascal_vggfinetune.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | """ 4 | Created on Mon Mar 5 02:07:46 2018 5 | 6 | @author: snigdha 7 | """ 8 | 9 | from __future__ import absolute_import 10 | from __future__ import division 11 | from __future__ import print_function 12 | 13 | # Imports 14 | import sys 15 | import os 16 | import numpy as np 17 | import tensorflow as tf 18 | import argparse 19 | #import os.path as osp 20 | from PIL import Image 21 | from functools import partial 22 | from collections import defaultdict 23 | import pickle 24 | 25 | from eval import compute_map 26 | #import models 27 | 28 | tf.logging.set_verbosity(tf.logging.INFO) 29 | 30 | CLASS_NAMES = [ 31 | 'aeroplane', 32 | 'bicycle', 33 | 'bird', 34 | 'boat', 35 | 'bottle', 36 | 'bus', 37 | 'car', 38 | 'cat', 39 | 'chair', 40 | 'cow', 41 | 'diningtable', 42 | 'dog', 43 | 'horse', 44 | 'motorbike', 45 | 'person', 46 | 'pottedplant', 47 | 'sheep', 48 | 'sofa', 49 | 'train', 50 | 'tvmonitor', 51 | ] 52 | 53 | rdr = tf.train.NewCheckpointReader("./vgg_16.ckpt") 54 | 55 | def cnn_model_fn(features, labels, mode, num_classes=20): 56 | # Write this function 57 | # """Model function for CNN.""" 58 | # Input Layer 59 | 60 | input_layer = tf.reshape(features["x"], [-1, 224, 224, 3]) 61 | 62 | if mode == tf.estimator.ModeKeys.TRAIN: 63 | flipped = tf.map_fn(lambda image: tf.image.random_flip_left_right(image),features["x"]) 64 | cropped = tf.map_fn(lambda image:tf.random_crop(image,size=[224,224,3]),features["x"]) 65 | 66 | fets = tf.concat([features["x"],flipped,cropped],axis = 0) 67 | #wts = tf.concat([features["w"],features["w"],features["w"]],axis = 0) 68 | lbls = tf.concat([labels,labels,labels],axis = 0) 69 | 70 | feats = tf.random_shuffle(fets,seed = features["x"].shape[0]*3) 71 | #wtgs = tf.random_shuffle(wts,seed = features["x"].shape[0]*3) 72 | lbels = tf.random_shuffle(lbls,seed = features["x"].shape[0]*3) 73 | 74 | features["x"]= feats 75 | input_layer = features["x"] 76 | labels = lbels 77 | 78 | tf.summary.image("Training_images",input_layer) 79 | 80 | # Convolutional Layer #1 81 | conv1 = tf.layers.conv2d( 82 | inputs=input_layer, 83 | filters=64, 84 | kernel_size=[3,3], 85 | padding="same", 86 | strides = 1, 87 | activation=tf.nn.relu, 88 | kernel_initializer=tf.constant_initializer(value=rdr.get_tensor('vgg_16/conv1/conv1_1/weights'),verify_shape=True), 89 | bias_initializer=tf.constant_initializer(value=rdr.get_tensor('vgg_16/conv1/conv1_1/biases'),verify_shape=True)) 90 | 91 | 92 | # Convolutional Layer #2 93 | conv2 = tf.layers.conv2d( 94 | inputs=conv1, 95 | filters=64, 96 | kernel_size=[3,3], 97 | padding="same", 98 | strides = 1, 99 | activation=tf.nn.relu, 100 | kernel_initializer=tf.constant_initializer(value=rdr.get_tensor('vgg_16/conv1/conv1_2/weights'),verify_shape=True), 101 | bias_initializer=tf.constant_initializer(value=rdr.get_tensor('vgg_16/conv1/conv1_2/biases'),verify_shape=True)) 102 | 103 | # Pooling Layer #1 104 | pool1 = tf.layers.max_pooling2d(inputs=conv2, pool_size=[2, 2], strides=2) 105 | 106 | # Convolutional Layer #3 107 | conv3 = tf.layers.conv2d( 108 | inputs=pool1, 109 | filters=128, 110 | kernel_size=[3, 3], 111 | padding="same", 112 | strides = 1, 113 | activation=tf.nn.relu, 114 | kernel_initializer=tf.constant_initializer(value=rdr.get_tensor('vgg_16/conv2/conv2_1/weights'),verify_shape=True), 115 | bias_initializer=tf.constant_initializer(value=rdr.get_tensor('vgg_16/conv2/conv2_1/biases'),verify_shape=True)) 116 | 117 | # Convolutional Layer #4 118 | conv4 = tf.layers.conv2d( 119 | inputs=conv3, 120 | filters=128, 121 | kernel_size=[3, 3], 122 | padding="same", 123 | strides = 1, 124 | activation=tf.nn.relu, 125 | kernel_initializer=tf.constant_initializer(value=rdr.get_tensor('vgg_16/conv2/conv2_2/weights'),verify_shape=True), 126 | bias_initializer=tf.constant_initializer(value=rdr.get_tensor('vgg_16/conv2/conv2_2/biases'),verify_shape=True)) 127 | 128 | #Pooling layer 2 129 | pool2 = tf.layers.max_pooling2d(inputs=conv4, pool_size=[2, 2], strides=2) 130 | 131 | # Convolutional Layer #5 132 | conv5 = tf.layers.conv2d( 133 | inputs=pool2, 134 | filters=256, 135 | kernel_size=[3, 3], 136 | padding="same", 137 | strides = 1, 138 | activation=tf.nn.relu, 139 | kernel_initializer=tf.constant_initializer(value=rdr.get_tensor('vgg_16/conv3/conv3_1/weights'),verify_shape=True), 140 | bias_initializer=tf.constant_initializer(value=rdr.get_tensor('vgg_16/conv3/conv3_1/biases'),verify_shape=True)) 141 | 142 | # Convolutional Layer #6 143 | conv6 = tf.layers.conv2d( 144 | inputs=conv5, 145 | filters=256, 146 | kernel_size=[3, 3], 147 | padding="same", 148 | strides = 1, 149 | activation=tf.nn.relu, 150 | kernel_initializer=tf.constant_initializer(value=rdr.get_tensor('vgg_16/conv3/conv3_2/weights'),verify_shape=True), 151 | bias_initializer=tf.constant_initializer(value=rdr.get_tensor('vgg_16/conv3/conv3_2/biases'),verify_shape=True)) 152 | 153 | 154 | # Convolutional Layer #7 155 | conv7 = tf.layers.conv2d( 156 | inputs=conv6, 157 | filters=256, 158 | kernel_size=[3, 3], 159 | padding="same", 160 | strides = 1, 161 | activation=tf.nn.relu, 162 | kernel_initializer=tf.constant_initializer(value=rdr.get_tensor('vgg_16/conv3/conv3_3/weights'),verify_shape=True), 163 | bias_initializer=tf.constant_initializer(value=rdr.get_tensor('vgg_16/conv3/conv3_3/biases'),verify_shape=True)) 164 | 165 | #Pooling layer 3 166 | pool3 = tf.layers.max_pooling2d(inputs=conv7, pool_size=[2, 2], strides=2) 167 | 168 | # Convolutional Layer #8 169 | conv8 = tf.layers.conv2d( 170 | inputs=pool3, 171 | filters=512, 172 | kernel_size=[3, 3], 173 | padding="same", 174 | strides = 1, 175 | activation=tf.nn.relu, 176 | kernel_initializer=tf.constant_initializer(value=rdr.get_tensor('vgg_16/conv4/conv4_1/weights'),verify_shape=True), 177 | bias_initializer=tf.constant_initializer(value=rdr.get_tensor('vgg_16/conv4/conv4_1/biases'),verify_shape=True)) 178 | 179 | # Convolutional Layer #9 180 | conv9 = tf.layers.conv2d( 181 | inputs=conv8, 182 | filters=512, 183 | kernel_size=[3, 3], 184 | padding="same", 185 | strides = 1, 186 | activation=tf.nn.relu, 187 | kernel_initializer=tf.constant_initializer(value=rdr.get_tensor('vgg_16/conv4/conv4_2/weights'),verify_shape=True), 188 | bias_initializer=tf.constant_initializer(value=rdr.get_tensor('vgg_16/conv4/conv4_2/biases'),verify_shape=True)) 189 | 190 | 191 | # Convolutional Layer #10 192 | conv10 = tf.layers.conv2d( 193 | inputs=conv9, 194 | filters=512, 195 | kernel_size=[3, 3], 196 | padding="same", 197 | strides = 1, 198 | activation=tf.nn.relu, 199 | kernel_initializer=tf.constant_initializer(value=rdr.get_tensor('vgg_16/conv4/conv4_3/weights'),verify_shape=True), 200 | bias_initializer=tf.constant_initializer(value=rdr.get_tensor('vgg_16/conv4/conv4_3/biases'),verify_shape=True)) 201 | 202 | 203 | #Pooling layer 4 204 | pool4 = tf.layers.max_pooling2d(inputs=conv10, pool_size=[2, 2], strides=2) 205 | 206 | 207 | # Convolutional Layer #11 208 | conv11 = tf.layers.conv2d( 209 | inputs=pool4, 210 | filters=512, 211 | kernel_size=[3, 3], 212 | padding="same", 213 | strides = 1, 214 | activation=tf.nn.relu, 215 | kernel_initializer=tf.constant_initializer(value=rdr.get_tensor('vgg_16/conv5/conv5_1/weights'),verify_shape=True), 216 | bias_initializer=tf.constant_initializer(value=rdr.get_tensor('vgg_16/conv5/conv5_1/biases'),verify_shape=True)) 217 | 218 | 219 | 220 | # Convolutional Layer #12 221 | conv12 = tf.layers.conv2d( 222 | inputs=conv11, 223 | filters=512, 224 | kernel_size=[3, 3], 225 | padding="same", 226 | strides = 1, 227 | activation=tf.nn.relu, 228 | kernel_initializer=tf.constant_initializer(value=rdr.get_tensor('vgg_16/conv5/conv5_2/weights'),verify_shape=True), 229 | bias_initializer=tf.constant_initializer(value=rdr.get_tensor('vgg_16/conv5/conv5_2/biases'),verify_shape=True)) 230 | 231 | 232 | # Convolutional Layer #13 233 | conv13 = tf.layers.conv2d( 234 | inputs=conv12, 235 | filters=512, 236 | kernel_size=[3, 3], 237 | padding="same", 238 | strides = 1, 239 | activation=tf.nn.relu, 240 | kernel_initializer=tf.constant_initializer(value=rdr.get_tensor('vgg_16/conv5/conv5_3/weights'),verify_shape=True), 241 | bias_initializer=tf.constant_initializer(value=rdr.get_tensor('vgg_16/conv5/conv5_3/biases'),verify_shape=True)) 242 | 243 | 244 | #Pooling layer 5 245 | pool5 = tf.layers.max_pooling2d(inputs=conv13, pool_size=[2, 2], strides=2) 246 | 247 | 248 | dense1 = tf.layers.conv2d(inputs=pool5, 249 | activation=tf.nn.relu, 250 | filters=4096, # this specifies the number of channels in the output layer 251 | kernel_size=[7, 7], 252 | strides=[1,1], 253 | padding="same", 254 | kernel_initializer=tf.constant_initializer(value=rdr.get_tensor('vgg_16/fc6/weights'),verify_shape=True), 255 | bias_initializer=tf.constant_initializer(value=rdr.get_tensor('vgg_16/fc6/biases'),verify_shape=True)) 256 | 257 | 258 | dropout1 = tf.layers.dropout( 259 | inputs=dense1, rate=0.5, training=mode == tf.estimator.ModeKeys.TRAIN) 260 | 261 | 262 | dense2 = tf.layers.conv2d(inputs=dropout1, 263 | filters=4096, # this specifies the number of channels in the output layer 264 | kernel_size=[1, 1], 265 | strides=[1,1], 266 | padding="same", 267 | activation=tf.nn.relu, 268 | kernel_initializer=tf.constant_initializer(value=rdr.get_tensor('vgg_16/fc7/weights'),verify_shape=True), 269 | bias_initializer=tf.constant_initializer(value=rdr.get_tensor('vgg_16/fc7/biases'),verify_shape=True)) 270 | 271 | 272 | dropout2 = tf.layers.dropout( 273 | inputs=dense2, rate=0.5, training=mode == tf.estimator.ModeKeys.TRAIN) 274 | 275 | 276 | dense3 = tf.layers.conv2d(inputs=dropout2, 277 | filters=1000, # this specifies the number of channels in the output layer 278 | kernel_size=[1, 1], 279 | strides=[1,1], 280 | padding="same", 281 | activation=tf.nn.relu) 282 | 283 | # Logits Layer 284 | logits = tf.layers.dense(inputs=tf.contrib.layers.flatten(dense3), units=20) 285 | 286 | 287 | predictions = { 288 | # Generate predictions (for PREDICT and EVAL mode) 289 | "classes": tf.argmax(input=logits, axis=1), 290 | # Add `softmax_tensor` to the graph. It is used for PREDICT and by the 291 | # `logging_hook`. 292 | "probabilities": tf.sigmoid(logits, name="sigmoid_tensor") 293 | } 294 | 295 | if mode == tf.estimator.ModeKeys.PREDICT: 296 | return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions) 297 | 298 | # Calculate Loss (for both TRAIN and EVAL modes) 299 | 300 | 301 | # Configure the Training Op (for TRAIN mode) 302 | if mode == tf.estimator.ModeKeys.TRAIN: 303 | 304 | loss = tf.identity(tf.losses.sigmoid_cross_entropy( 305 | labels, logits=logits), name='loss') 306 | 307 | decay_learning_rate = tf.train.exponential_decay( 308 | learning_rate = 0.0001, 309 | global_step=tf.train.get_global_step(), 310 | decay_steps = 1000, 311 | decay_rate = 0.5, 312 | staircase = False, 313 | name = None) 314 | optimizer = tf.train.MomentumOptimizer(learning_rate=decay_learning_rate, 315 | momentum = 0.9) 316 | 317 | tf.summary.scalar("decayed_learning_rate",decay_learning_rate) 318 | 319 | grads_and_vars= optimizer.compute_gradients(loss) 320 | 321 | for g, v in grads_and_vars: 322 | if g is not None: 323 | #print(format(v.name)) 324 | tf.summary.histogram("{}/grad_histogram".format(v.name), g) 325 | 326 | train_op = optimizer.minimize( 327 | loss=loss, 328 | global_step=tf.train.get_global_step()) 329 | 330 | return tf.estimator.EstimatorSpec( 331 | mode=mode, loss=loss, train_op=train_op) 332 | 333 | 334 | # Add evaluation metrics (for EVAL mode) 335 | eval_metric_ops = { 336 | "accuracy": tf.metrics.accuracy( 337 | labels=labels, predictions=predictions["classes"])} 338 | return tf.estimator.EstimatorSpec( 339 | mode=mode, loss=loss, eval_metric_ops=eval_metric_ops) 340 | 341 | 342 | def parse_args(): 343 | parser = argparse.ArgumentParser( 344 | description='Train a classifier in tensorflow!') 345 | parser.add_argument( 346 | 'data_dir', type=str, default='data/VOC2007', 347 | help='Path to PASCAL data storage') 348 | if len(sys.argv) == 1: 349 | parser.print_help() 350 | sys.exit(1) 351 | args = parser.parse_args() 352 | return args 353 | 354 | 355 | def _get_el(arr, i): 356 | try: 357 | return arr[i] 358 | except IndexError: 359 | return arr 360 | 361 | from tensorflow.core.framework import summary_pb2 362 | def summary_var(log_dir, name, val, step): 363 | writer = tf.summary.FileWriterCache.get(log_dir) 364 | summary_proto = summary_pb2.Summary() 365 | value = summary_proto.value.add() 366 | value.tag = name 367 | value.simple_value = float(val) 368 | writer.add_summary(summary_proto, step) 369 | writer.flush() 370 | 371 | 372 | def load_pascal(data_dir, split='train'): 373 | 374 | """ 375 | Function to read images from PASCAL data folder. 376 | Args: 377 | data_dir (str): Path to the VOC2007 directory. 378 | split (str): train/val/trainval split to use. 379 | Returns: 380 | images (np.ndarray): Return a np.float32 array of 381 | shape (N, H, W, 3), where H, W are 224px each, 382 | and each image is in RGB format. 383 | labels (np.ndarray): An array of shape (N, 20) of 384 | type np.int32, with 0s and 1s; 1s for classes that 385 | are active in that image. 386 | """ 387 | 388 | 389 | sub_dir1 = '/ImageSets/Main/' 390 | sub_dir2 = '/JPEGImages/' 391 | f1 = open(data_dir+sub_dir1+"aeroplane"+"_"+split+".txt", 'r') 392 | 393 | img = [] 394 | 395 | for line1 in f1: 396 | g1 = line1.strip().split(' ') 397 | img.append(g1[0]) 398 | 399 | num =len(img) 400 | print("num",num) 401 | 402 | w = np.int32(np.zeros((num,20))) 403 | l = np.int32(np.zeros((num,20))) 404 | 405 | print("Entering the loop for weights and labels") 406 | 407 | cnt = 0 408 | for i in range(0,20): 409 | 410 | f2 = open(data_dir + '/ImageSets/Main/'+CLASS_NAMES[i]+'_'+split+'.txt') 411 | a1 = f2.read().split() 412 | t = a1[1::2] 413 | tt = np.int32(t) 414 | ttt = tt.reshape(1,num) 415 | w[:,cnt] = np.int32(np.abs(ttt)) 416 | l[:,cnt] = ttt.clip(min = 0) 417 | cnt = cnt + 1 418 | 419 | 420 | labels = np.int32(l) 421 | weights = np.int32(w) 422 | print("Entering the loop for images") 423 | arr = [] 424 | for j in img: 425 | 426 | im = Image.open(data_dir+sub_dir2+ j +'.jpg') 427 | im = im.resize((256, 256), Image.ANTIALIAS) 428 | arr.append(np.float32(im)) 429 | 430 | image_ar = np.float32(arr) 431 | return (image_ar,labels,weights) 432 | 433 | def main(): 434 | args = parse_args() 435 | # Load training and eval data 436 | train_data, train_labels, train_weights = load_pascal( 437 | args.data_dir, split='trainval') 438 | eval_data, eval_labels, eval_weights = load_pascal( 439 | args.data_dir, split='test') 440 | 441 | 442 | pascal_classifier = tf.estimator.Estimator( 443 | model_fn=partial(cnn_model_fn, 444 | num_classes=train_labels.shape[1]), 445 | model_dir="pascal_vggfinetune") 446 | tensors_to_log = {"loss": "loss"} 447 | logging_hook = tf.train.LoggingTensorHook( 448 | tensors=tensors_to_log, every_n_iter=400) 449 | 450 | list22 = [] 451 | for i in range(0,10): 452 | 453 | # Train the model 454 | train_input_fn = tf.estimator.inputs.numpy_input_fn( 455 | x={"x": train_data, "w": train_weights}, 456 | y=train_labels, 457 | batch_size=10, 458 | num_epochs=None, 459 | shuffle=True) 460 | 461 | pascal_classifier.train( 462 | input_fn=train_input_fn, 463 | steps=400, 464 | hooks=[logging_hook]) 465 | 466 | # Evaluate the model and print results 467 | eval_input_fn = tf.estimator.inputs.numpy_input_fn( 468 | x={"x": eval_data, "w": eval_weights}, 469 | y=eval_labels, 470 | num_epochs=1, 471 | shuffle=False) 472 | 473 | pred = list(pascal_classifier.predict(input_fn=eval_input_fn)) 474 | pred = np.stack([p['probabilities'] for p in pred]) 475 | rand_AP = compute_map( 476 | eval_labels, np.random.random(eval_labels.shape), 477 | eval_weights, average=None) 478 | print('Random AP: {} mAP'.format(np.mean(rand_AP))) 479 | gt_AP = compute_map( 480 | eval_labels, eval_labels, eval_weights, average=None) 481 | print('GT AP: {} mAP'.format(np.mean(gt_AP))) 482 | AP = compute_map(eval_labels, pred, eval_weights, average=None) 483 | print('Obtained {} mAP'.format(np.mean(AP))) 484 | print('per class:') 485 | for cid, cname in enumerate(CLASS_NAMES): 486 | print('{}: {}'.format(cname, _get_el(AP, cid))) 487 | list22.append(np.mean(AP)) 488 | summary_var("pascal_vggfinetune","mAP",np.mean(AP),i*400) 489 | 490 | with open('list22.pkl','wb') as fr2: 491 | pickle.dump(list22,fr2) 492 | 493 | 494 | 495 | 496 | if __name__ == "__main__": 497 | main() -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Multi-label Image Classification using Tensorflow 2 | Implementation of simple CNN on MNIST, VGG16 and Alexnet on Pascal VOC dataset 3 | 4 | 1) 00_mnist.py: Contains code for MNIST 10-digit classification in Tensorflow 5 | 6 | 2) 01_pascal.py: CNN architecture for MNIST on Pascal VOC dataset 7 | 8 | 3) 02_pascal_alexnet.py: Alexnet on Pascal VOC 9 | 10 | 4) 03_pascal_vgg16.py: VGG16 on Pascal VOC from scratch 11 | 12 | 5) 04_pascal_vggfinetune.py : Fine-tuning VGG16 on Pascal VOC using pre-trained weights 13 | 14 | 6) 5a_conv1.py: Script to generate conv1 visualisation features. 15 | gist_cifar10_train.py : Needed to run 5a_conv1.py 16 | 17 | i) Place 5a_conv1.py in the created directory containing the ckpt files(obtained from train). 18 | ii) Run 5a_conv1.py to obtain a folder containing the tensor board object. 19 | iii) Run tensor board —logdir= image_filters 20 | -------------------------------------------------------------------------------- /eval.py: -------------------------------------------------------------------------------- 1 | import sklearn.metrics 2 | 3 | 4 | def compute_map(gt, pred, valid, average=None): 5 | """ 6 | Compute the multi-label classification accuracy. 7 | gt (np.ndarray): Shape Nx20, 0 or 1, 1 if the object i is present in that 8 | image. 9 | pred (np.ndarray): Shape Nx20, probability of that object in the image 10 | (output probablitiy). 11 | valid (np.ndarray): Shape Nx20, 0 if you want to ignore that class for that 12 | image. Some objects are labeled as ambiguous. 13 | """ 14 | nclasses = gt.shape[1] 15 | all_ap = [] 16 | for cid in range(nclasses): 17 | gt_cls = gt[:, cid][valid[:, cid] > 0].astype('float32') 18 | pred_cls = pred[:, cid][valid[:, cid] > 0].astype('float32') 19 | # As per PhilK. code: 20 | # https://github.com/philkr/voc-classification/blob/master/src/train_cls.py 21 | pred_cls -= 1e-5 * gt_cls 22 | ap = sklearn.metrics.average_precision_score( 23 | gt_cls, pred_cls, average=average) 24 | all_ap.append(ap) 25 | return all_ap 26 | --------------------------------------------------------------------------------