├── 01_pascal.py
├── 02_pascal_alexnet.py
├── 03_pascal_vgg16.py
├── 04_pascal_vggfinetune.py
├── README.md
└── eval.py


/01_pascal.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | Created on Mon Feb 26 03:57:30 2018
  5 | 
  6 | @author: snigdha
  7 | """
  8 | 
  9 | from __future__ import absolute_import
 10 | from __future__ import division
 11 | from __future__ import print_function
 12 | 
 13 | # Imports
 14 | import sys
 15 | import numpy as np
 16 | import tensorflow as tf
 17 | import argparse
 18 | #import os.path as osp
 19 | from PIL import Image
 20 | from functools import partial
 21 | import pickle
 22 | 
 23 | from eval import compute_map
 24 | #import models
 25 | 
 26 | tf.logging.set_verbosity(tf.logging.INFO)
 27 | 
 28 | CLASS_NAMES = [
 29 |     'aeroplane',
 30 |     'bicycle',
 31 |     'bird',
 32 |     'boat',
 33 |     'bottle',
 34 |     'bus',
 35 |     'car',
 36 |     'cat',
 37 |     'chair',
 38 |     'cow',
 39 |     'diningtable',
 40 |     'dog',
 41 |     'horse',
 42 |     'motorbike',
 43 |     'person',
 44 |     'pottedplant',
 45 |     'sheep',
 46 |     'sofa',
 47 |     'train',
 48 |     'tvmonitor',
 49 | ]
 50 | 
 51 | 
 52 | def cnn_model_fn(features, labels, mode, num_classes=20):
 53 |     # Write this function
 54 |     # """Model function for CNN."""
 55 |     # Input Layer
 56 |     input_layer = tf.reshape(features["x"], [-1, 256, 256, 3])
 57 | 
 58 |     # Convolutional Layer #1
 59 |     conv1 = tf.layers.conv2d(
 60 |         inputs=input_layer,
 61 |         filters=32,
 62 |         kernel_size=[5, 5],
 63 |         padding="same",
 64 |         activation=tf.nn.relu)
 65 | 
 66 |     # Pooling Layer #1
 67 |     pool1 = tf.layers.max_pooling2d(inputs=conv1, pool_size=[2, 2], strides=2)
 68 | 
 69 |     # Convolutional Layer #2 and Pooling Layer #2
 70 |     conv2 = tf.layers.conv2d(
 71 |         inputs=pool1,
 72 |         filters=64,
 73 |         kernel_size=[5, 5],
 74 |         padding="same",
 75 |         activation=tf.nn.relu)
 76 |     pool2 = tf.layers.max_pooling2d(inputs=conv2, pool_size=[2, 2], strides=2)
 77 | 
 78 |     # Dense Layer
 79 |     pool2_flat = tf.reshape(pool2, [-1, 64 * 64 * 64])
 80 |     dense = tf.layers.dense(inputs=pool2_flat, units=1024,
 81 |                             activation=tf.nn.relu)
 82 |     dropout = tf.layers.dropout(
 83 |         inputs=dense, rate=0.4, training=mode == tf.estimator.ModeKeys.TRAIN)
 84 | 
 85 |     # Logits Layer
 86 |     logits = tf.layers.dense(inputs=dropout, units=20)
 87 | 
 88 |     predictions = {
 89 |         # Generate predictions (for PREDICT and EVAL mode)
 90 |         "classes": tf.argmax(input=logits, axis=1),
 91 |         # Add `softmax_tensor` to the graph. It is used for PREDICT and by the
 92 |         # `logging_hook`.
 93 |         "probabilities": tf.sigmoid(logits, name="softmax_tensor")
 94 |     }
 95 | 
 96 |     if mode == tf.estimator.ModeKeys.PREDICT:
 97 |         return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)
 98 | 
 99 |     # Calculate Loss (for both TRAIN and EVAL modes)
100 |     
101 |     loss = tf.identity(tf.losses.sigmoid_cross_entropy(
102 |                  labels, logits=logits), name='loss')
103 | 
104 |     # Configure the Training Op (for TRAIN mode)
105 |     if mode == tf.estimator.ModeKeys.TRAIN:
106 |         optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.001)
107 |         train_op = optimizer.minimize(
108 |             loss=loss,
109 |             global_step=tf.train.get_global_step())
110 |         return tf.estimator.EstimatorSpec(
111 |             mode=mode, loss=loss, train_op=train_op)
112 | 
113 |     # Add evaluation metrics (for EVAL mode)
114 |     eval_metric_ops = {
115 |         "accuracy": tf.metrics.accuracy(
116 |             labels=labels, predictions=predictions)}
117 |     return tf.estimator.EstimatorSpec(
118 |         mode=mode, loss=loss, eval_metric_ops=eval_metric_ops)
119 | 
120 | 
121 | def load_pascal(data_dir, split='train'):
122 |     
123 |     """
124 |     Function to read images from PASCAL data folder.
125 |     Args:
126 |         data_dir (str): Path to the VOC2007 directory.
127 |         split (str): train/val/trainval split to use.
128 |     Returns:
129 |         images (np.ndarray): Return a np.float32 array of
130 |             shape (N, H, W, 3), where H, W are 224px each,
131 |             and each image is in RGB format.
132 |         labels (np.ndarray): An array of shape (N, 20) of
133 |             type np.int32, with 0s and 1s; 1s for classes that
134 |             are active in that image.
135 |     """
136 |     
137 |     
138 |     sub_dir1 = '/ImageSets/Main/'
139 |     sub_dir2 = '/JPEGImages/'
140 |     f1 = open(data_dir+sub_dir1+"aeroplane"+"_"+split+".txt", 'r')
141 | 
142 |     img = []
143 | 
144 |     for line1 in f1:
145 |         g1 = line1.strip().split(' ')
146 |         img.append(g1[0])
147 |     
148 |     num =len(img)
149 |     print("num",num)
150 | 
151 |     w = np.int32(np.zeros((num,20)))
152 |     l = np.int32(np.zeros((num,20)))
153 | 
154 |     print("Entering the loop for weights and labels")
155 | 
156 |     cnt = 0
157 |     for i in range(0,20):
158 |     
159 |         f2 = open(data_dir + '/ImageSets/Main/'+CLASS_NAMES[i]+'_'+split+'.txt')
160 |         a1 = f2.read().split()
161 |         t = a1[1::2]
162 |         tt = np.int32(t)
163 |         ttt = tt.reshape(1,num)
164 |         w[:,cnt] = np.int32(np.abs(ttt))
165 |         l[:,cnt] = ttt.clip(min = 0)
166 |         cnt = cnt + 1
167 |     
168 | 
169 |     labels = np.int32(l)
170 |     weights = np.int32(w)
171 |     print("Entering the loop for images")
172 |     arr = []
173 |     for j in img:
174 |     
175 |         im = Image.open(data_dir+sub_dir2+ j +'.jpg')
176 |         im = im.resize((256, 256), Image.ANTIALIAS)
177 |         arr.append(np.float32(im))
178 | 
179 |     image_ar = np.float32(arr)
180 |     return (image_ar,labels,weights)  
181 |     
182 | 
183 | def parse_args():
184 |     parser = argparse.ArgumentParser(
185 |         description='Train a classifier in tensorflow!')
186 |     parser.add_argument(
187 |         'data_dir', type=str, default='data/VOC2007',
188 |         help='Path to PASCAL data storage')
189 |     if len(sys.argv) == 1:
190 |         parser.print_help()
191 |         sys.exit(1)
192 |     args = parser.parse_args()
193 |     return args
194 | 
195 | 
196 | def _get_el(arr, i):
197 |     try:
198 |         return arr[i]
199 |     except IndexError:
200 |         return arr
201 | 
202 | from tensorflow.core.framework import summary_pb2
203 | def summary_var(log_dir, name, val, step):
204 |     writer = tf.summary.FileWriterCache.get(log_dir)
205 |     summary_proto = summary_pb2.Summary()
206 |     value = summary_proto.value.add()
207 |     value.tag = name
208 |     value.simple_value = float(val)
209 |     writer.add_summary(summary_proto, step)
210 |     writer.flush()
211 | 
212 | def main():
213 |     args = parse_args()
214 |     #Load training and eval data
215 |     train_data, train_labels, train_weights = load_pascal(
216 |         args.data_dir, split='trainval')
217 |     eval_data, eval_labels, eval_weights = load_pascal(
218 |         args.data_dir, split='test')
219 |     
220 |     print ("Done loading weights")
221 |     
222 |     pascal_classifier = tf.estimator.Estimator(
223 |         model_fn=partial(cnn_model_fn,
224 |                          num_classes=train_labels.shape[1]),
225 |         model_dir="pascal_model_scratch")
226 |     tensors_to_log = {"loss": "loss"}
227 |     logging_hook = tf.train.LoggingTensorHook(
228 |         tensors=tensors_to_log, every_n_iter=50)
229 |     
230 | 
231 |     list22 = []
232 |     for i in range(0,20):
233 |         
234 |         # Train the model
235 |         train_input_fn = tf.estimator.inputs.numpy_input_fn(
236 |         x={"x": train_data, "w": train_weights},
237 |         y=train_labels,
238 |         batch_size=10,
239 |         num_epochs=None,
240 |         shuffle=True)
241 |         
242 |         pascal_classifier.train(
243 |                 input_fn=train_input_fn,
244 |                 steps=50,
245 |                 hooks=[logging_hook])
246 |         
247 |         # Evaluate the model and print results
248 |         eval_input_fn = tf.estimator.inputs.numpy_input_fn(
249 |                 x={"x": eval_data, "w": eval_weights},
250 |                 y=eval_labels,
251 |                 num_epochs=1,
252 |                 shuffle=False)
253 |         
254 |         pred = list(pascal_classifier.predict(input_fn=eval_input_fn))
255 |         pred = np.stack([p['probabilities'] for p in pred])
256 |         rand_AP = compute_map(
257 |                 eval_labels, np.random.random(eval_labels.shape),
258 |                 eval_weights, average=None)
259 |         print('Random AP: {} mAP'.format(np.mean(rand_AP)))
260 |         gt_AP = compute_map(
261 |                 eval_labels, eval_labels, eval_weights, average=None)
262 |         print('GT AP: {} mAP'.format(np.mean(gt_AP)))
263 |         AP = compute_map(eval_labels, pred, eval_weights, average=None)
264 |         print('Obtained {} mAP'.format(np.mean(AP)))
265 |         print('per class:')
266 |         for cid, cname in enumerate(CLASS_NAMES):
267 |             print('{}: {}'.format(cname, _get_el(AP, cid)))
268 |         list22.append(np.mean(AP))
269 |     
270 |         
271 |         summary_var("pascal_model_scratch","mAP",np.mean(AP),i)
272 |         
273 |    # with open('list11.pkl','wb') as fr1:
274 |     #    pickle.dump(list11,fr1)
275 |         
276 |     with open('list22.pkl','wb') as fr2:
277 |         pickle.dump(list22,fr2)    
278 |             
279 | 
280 | 
281 | if __name__ == "__main__":
282 |     main()


--------------------------------------------------------------------------------
/02_pascal_alexnet.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | Created on Sat Mar  3 17:07:30 2018
  5 | 
  6 | @author: snigdha
  7 | """
  8 | 
  9 | from __future__ import absolute_import
 10 | from __future__ import division
 11 | from __future__ import print_function
 12 | 
 13 | # Imports
 14 | import sys
 15 | import os
 16 | import numpy as np
 17 | import tensorflow as tf
 18 | import argparse
 19 | #import os.path as osp
 20 | from PIL import Image
 21 | from functools import partial
 22 | from collections import defaultdict
 23 | import pickle
 24 | 
 25 | from eval import compute_map
 26 | #import models
 27 | 
 28 | tf.logging.set_verbosity(tf.logging.INFO)
 29 | 
 30 | CLASS_NAMES = [
 31 |     'aeroplane',
 32 |     'bicycle',
 33 |     'bird',
 34 |     'boat',
 35 |     'bottle',
 36 |     'bus',
 37 |     'car',
 38 |     'cat',
 39 |     'chair',
 40 |     'cow',
 41 |     'diningtable',
 42 |     'dog',
 43 |     'horse',
 44 |     'motorbike',
 45 |     'person',
 46 |     'pottedplant',
 47 |     'sheep',
 48 |     'sofa',
 49 |     'train',
 50 |     'tvmonitor',
 51 | ]
 52 | 
 53 | def cnn_model_fn(features, labels, mode, num_classes=20):
 54 |     # Write this function
 55 |     # """Model function for CNN."""
 56 |     # Input Layer
 57 |     
 58 |     input_layer = tf.reshape(features["x"], [-1, 224, 224, 3])
 59 |     
 60 |     if mode == tf.estimator.ModeKeys.TRAIN:
 61 |         flipped = tf.map_fn(lambda image: tf.image.random_flip_left_right(image),features["x"])
 62 |         cropped = tf.map_fn(lambda image:tf.random_crop(image,size=[224,224,3]),features["x"])
 63 |         
 64 |         fets = tf.concat([features["x"],flipped,cropped],axis = 0)
 65 |         #wts = tf.concat([features["w"],features["w"],features["w"]],axis = 0)
 66 |         lbls = tf.concat([labels,labels,labels],axis = 0)
 67 |         
 68 |         feats = tf.random_shuffle(fets,seed = features["x"].shape[0]*3)
 69 |         #wtgs = tf.random_shuffle(wts,seed = features["x"].shape[0]*3)
 70 |         lbels = tf.random_shuffle(lbls,seed = features["x"].shape[0]*3)
 71 |         
 72 |         features["x"]= feats
 73 |         input_layer = features["x"]
 74 |         labels = lbels
 75 |     
 76 | 
 77 |     # Convolutional Layer #1
 78 |     conv1 = tf.layers.conv2d(
 79 |         inputs=input_layer,
 80 |         filters=96,
 81 |         kernel_size=[11,11],
 82 |         padding="valid",
 83 |         strides = 4,
 84 |         activation=tf.nn.relu,
 85 |         kernel_initializer=tf.truncated_normal_initializer(mean = 0.0,stddev=0.01),
 86 |         bias_initializer=tf.zeros_initializer()
 87 |         )
 88 |     
 89 |     
 90 |     # Pooling Layer #1
 91 |     pool1 = tf.layers.max_pooling2d(inputs=conv1, pool_size=[3, 3], strides=2)
 92 |     
 93 |     
 94 |     # Convolutional Layer #2
 95 |     conv2 = tf.layers.conv2d(
 96 |         inputs=pool1,
 97 |         filters=256,
 98 |         kernel_size=[5,5],
 99 |         padding="same",
100 |         strides = 1,
101 |         activation=tf.nn.relu,
102 |         kernel_initializer=tf.truncated_normal_initializer(mean = 0.0,stddev=0.01),
103 |         bias_initializer=tf.zeros_initializer()
104 |         )
105 | 
106 |     # Pooling Layer #2
107 |     pool2 = tf.layers.max_pooling2d(inputs=conv2, pool_size=[3, 3], strides=2)
108 |     
109 | 
110 |     # Convolutional Layer #3 
111 |     conv3 = tf.layers.conv2d(
112 |         inputs=pool2,
113 |         filters=384,
114 |         kernel_size=[3, 3],
115 |         padding="same",
116 |         strides = 1,
117 |         activation=tf.nn.relu,
118 |         kernel_initializer=tf.truncated_normal_initializer(mean = 0.0,stddev=0.01),
119 |         bias_initializer=tf.zeros_initializer())
120 |     
121 |     # Convolutional Layer #4 
122 |     conv4 = tf.layers.conv2d(
123 |         inputs=conv3,
124 |         filters=384,
125 |         kernel_size=[3, 3],
126 |         padding="same",
127 |         strides = 1,
128 |         activation=tf.nn.relu,
129 |         kernel_initializer=tf.truncated_normal_initializer(mean = 0.0,stddev=0.01),
130 |         bias_initializer=tf.zeros_initializer())
131 |     
132 |     # Convolutional Layer #5 
133 |     conv5 = tf.layers.conv2d(
134 |         inputs=conv4,
135 |         filters=256,
136 |         kernel_size=[3, 3],
137 |         padding="same",
138 |         strides = 1,
139 |         activation=tf.nn.relu,
140 |         kernel_initializer=tf.truncated_normal_initializer(mean = 0.0,stddev=0.01),
141 |         bias_initializer=tf.zeros_initializer())
142 |     
143 |     # Pooling Layer #3
144 |     pool3 = tf.layers.max_pooling2d(inputs=conv5, pool_size=[3, 3], strides=2)   
145 |     
146 |     # Dense Layer
147 |     pool3_flat = tf.contrib.layers.flatten(pool3)
148 |     
149 |     dense1 = tf.layers.dense(inputs=pool3_flat, units=4096,
150 |                             activation=tf.nn.relu,
151 |                             kernel_initializer=tf.truncated_normal_initializer(mean = 0.0,stddev=0.01),
152 |                             bias_initializer=tf.zeros_initializer()
153 |                             )
154 |     dropout1 = tf.layers.dropout(
155 |         inputs=dense1, rate=0.5, training=mode == tf.estimator.ModeKeys.TRAIN)
156 |     
157 |     dense2 = tf.layers.dense(inputs=dropout1, units=4096,
158 |                             activation=tf.nn.relu,
159 |                             kernel_initializer=tf.truncated_normal_initializer(mean = 0.0,stddev=0.01),
160 |                             bias_initializer=tf.zeros_initializer(),
161 |                             )
162 |     dropout2 = tf.layers.dropout(
163 |         inputs=dense2, rate=0.5, training=mode == tf.estimator.ModeKeys.TRAIN)
164 |               
165 |     
166 |     # Logits Layer
167 |     logits = tf.layers.dense(inputs=dropout2, units=20)
168 | 
169 |     predictions = {
170 |         # Generate predictions (for PREDICT and EVAL mode)
171 |         "classes": tf.argmax(input=logits, axis=1),
172 |         # Add `softmax_tensor` to the graph. It is used for PREDICT and by the
173 |         # `logging_hook`.
174 |         "probabilities": tf.sigmoid(logits, name="sigmoid_tensor")
175 |     }
176 | 
177 |     if mode == tf.estimator.ModeKeys.PREDICT:
178 |         return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)
179 | 
180 |     # Calculate Loss (for both TRAIN and EVAL modes)
181 |     
182 | 
183 |     # Configure the Training Op (for TRAIN mode)
184 |     if mode == tf.estimator.ModeKeys.TRAIN:
185 |         
186 |         loss = tf.identity(tf.losses.sigmoid_cross_entropy(
187 |         labels, logits=logits), name='loss')
188 |         
189 |         decay_learning_rate = tf.train.exponential_decay(
190 |                 learning_rate = 0.001,
191 |                 global_step=tf.train.get_global_step(),
192 |                 decay_steps = 10000,
193 |                 decay_rate = 0.5,
194 |                 staircase = True,
195 |                 name = None)
196 |         optimizer = tf.train.MomentumOptimizer(learning_rate=decay_learning_rate,
197 |                                                          momentum = 0.9)
198 |         
199 |         train_op = optimizer.minimize(
200 |             loss=loss,
201 |             global_step=tf.train.get_global_step())
202 |         return tf.estimator.EstimatorSpec(
203 |             mode=mode, loss=loss, train_op=train_op)
204 | 
205 |     # Add evaluation metrics (for EVAL mode)
206 |     eval_metric_ops = {
207 |         "accuracy": tf.metrics.accuracy(
208 |             labels=labels, predictions=predictions)}
209 |     return tf.estimator.EstimatorSpec(
210 |         mode=mode, loss=loss, eval_metric_ops=eval_metric_ops)
211 | 
212 | def load_pascal(data_dir, split='train'):
213 |     
214 |     """
215 |     Function to read images from PASCAL data folder.
216 |     Args:
217 |         data_dir (str): Path to the VOC2007 directory.
218 |         split (str): train/val/trainval split to use.
219 |     Returns:
220 |         images (np.ndarray): Return a np.float32 array of
221 |             shape (N, H, W, 3), where H, W are 224px each,
222 |             and each image is in RGB format.
223 |         labels (np.ndarray): An array of shape (N, 20) of
224 |             type np.int32, with 0s and 1s; 1s for classes that
225 |             are active in that image.
226 |     """
227 |     
228 |     
229 |     sub_dir1 = '/ImageSets/Main/'
230 |     sub_dir2 = '/JPEGImages/'
231 |     f1 = open(data_dir+sub_dir1+"aeroplane"+"_"+split+".txt", 'r')
232 | 
233 |     img = []
234 | 
235 |     for line1 in f1:
236 |         g1 = line1.strip().split(' ')
237 |         img.append(g1[0])
238 |     
239 |     num =len(img)
240 |     print("num",num)
241 | 
242 |     w = np.int32(np.zeros((num,20)))
243 |     l = np.int32(np.zeros((num,20)))
244 | 
245 |     print("Entering the loop for weights and labels")
246 | 
247 |     cnt = 0
248 |     for i in range(0,20):
249 |     
250 |         f2 = open(data_dir + '/ImageSets/Main/'+CLASS_NAMES[i]+'_'+split+'.txt')
251 |         a1 = f2.read().split()
252 |         t = a1[1::2]
253 |         tt = np.int32(t)
254 |         ttt = tt.reshape(1,num)
255 |         w[:,cnt] = np.int32(np.abs(ttt))
256 |         l[:,cnt] = ttt.clip(min = 0)
257 |         cnt = cnt + 1
258 |     
259 | 
260 |     labels = np.int32(l)
261 |     weights = np.int32(w)
262 |     print("Entering the loop for images")
263 |     arr = []
264 |     for j in img:
265 |     
266 |         im = Image.open(data_dir+sub_dir2+ j +'.jpg')
267 |         im = im.resize((256, 256), Image.ANTIALIAS)
268 |         arr.append(np.float32(im))
269 | 
270 |     image_ar = np.float32(arr)
271 |     return (image_ar,labels,weights)  
272 |         
273 | 
274 | 
275 | def parse_args():
276 |     parser = argparse.ArgumentParser(
277 |         description='Train a classifier in tensorflow!')
278 |     parser.add_argument(
279 |         'data_dir', type=str, default='data/VOC2007',
280 |         help='Path to PASCAL data storage')
281 |     if len(sys.argv) == 1:
282 |         parser.print_help()
283 |         sys.exit(1)
284 |     args = parser.parse_args()
285 |     return args
286 | 
287 | 
288 | def _get_el(arr, i):
289 |     try:
290 |         return arr[i]
291 |     except IndexError:
292 |         return arr
293 |     
294 | from tensorflow.core.framework import summary_pb2
295 | def summary_var(log_dir, name, val, step):
296 |     writer = tf.summary.FileWriterCache.get(log_dir)
297 |     summary_proto = summary_pb2.Summary()
298 |     value = summary_proto.value.add()
299 |     value.tag = name
300 |     value.simple_value = float(val)
301 |     writer.add_summary(summary_proto, step)
302 |     writer.flush()
303 | 
304 | 
305 | def main():
306 |     args = parse_args()
307 |     # Load training and eval data
308 |     train_data, train_labels, train_weights = load_pascal(
309 |         args.data_dir, split='trainval')
310 |     eval_data, eval_labels, eval_weights = load_pascal(
311 |         args.data_dir, split='test')
312 |     
313 |     
314 |     
315 |     pascal_classifier = tf.estimator.Estimator(
316 |         model_fn=partial(cnn_model_fn,
317 |                          num_classes=train_labels.shape[1]),
318 |         model_dir="pascal_alexnet")
319 |     tensors_to_log = {"loss": "loss"}
320 |     logging_hook = tf.train.LoggingTensorHook(
321 |         tensors=tensors_to_log, every_n_iter=400)
322 |     
323 |     
324 |     
325 |     mAP = []
326 |     for i in range(0,100):
327 |         
328 |         # Train the model
329 |         train_input_fn = tf.estimator.inputs.numpy_input_fn(
330 |         x={"x": train_data, "w": train_weights},
331 |         y=train_labels,
332 |         batch_size=10,
333 |         num_epochs=None,
334 |         shuffle=True)
335 |         
336 |         pascal_classifier.train(
337 |                 input_fn=train_input_fn,
338 |                 steps=400,
339 |                 hooks=[logging_hook])
340 |         
341 |         
342 |         # Evaluate the model and print results
343 |         eval_input_fn = tf.estimator.inputs.numpy_input_fn(
344 |                 x={"x": eval_data, "w": eval_weights},
345 |                 y=eval_labels,
346 |                 num_epochs=1,
347 |                 shuffle=False)
348 |         
349 |         pred = list(pascal_classifier.predict(input_fn=eval_input_fn))
350 |         pred = np.stack([p['probabilities'] for p in pred])
351 |         rand_AP = compute_map(
352 |                 eval_labels, np.random.random(eval_labels.shape),
353 |                 eval_weights, average=None)
354 |         print('Random AP: {} mAP'.format(np.mean(rand_AP)))
355 |         gt_AP = compute_map(
356 |                 eval_labels, eval_labels, eval_weights, average=None)
357 |         print('GT AP: {} mAP'.format(np.mean(gt_AP)))
358 |         AP = compute_map(eval_labels, pred, eval_weights, average=None)
359 |         print('Obtained {} mAP'.format(np.mean(AP)))
360 |         print('per class:')
361 |         for cid, cname in enumerate(CLASS_NAMES):
362 |             print('{}: {}'.format(cname, _get_el(AP, cid)))
363 |         mAP.append(np.mean(AP))
364 |         
365 |       
366 |         summary_var("pascal_alexnet","mAP",np.mean(AP),i*400)
367 |         
368 |     
369 |     with open('map.pkl','wb') as fr2:
370 |         pickle.dump(mAP,fr2)    
371 |   
372 |             
373 | 
374 | 
375 | if __name__ == "__main__":
376 |     main()


--------------------------------------------------------------------------------
/03_pascal_vgg16.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | Created on Sat Mar  3 17:07:30 2018
  5 | 
  6 | @author: snigdha
  7 | """
  8 | 
  9 | from __future__ import absolute_import
 10 | from __future__ import division
 11 | from __future__ import print_function
 12 | 
 13 | # Imports
 14 | import sys
 15 | import os
 16 | import numpy as np
 17 | import tensorflow as tf
 18 | import argparse
 19 | #import os.path as osp
 20 | from PIL import Image
 21 | from functools import partial
 22 | from collections import defaultdict
 23 | import pickle
 24 | 
 25 | from eval import compute_map
 26 | #import models
 27 | 
 28 | tf.logging.set_verbosity(tf.logging.INFO)
 29 | 
 30 | CLASS_NAMES = [
 31 |     'aeroplane',
 32 |     'bicycle',
 33 |     'bird',
 34 |     'boat',
 35 |     'bottle',
 36 |     'bus',
 37 |     'car',
 38 |     'cat',
 39 |     'chair',
 40 |     'cow',
 41 |     'diningtable',
 42 |     'dog',
 43 |     'horse',
 44 |     'motorbike',
 45 |     'person',
 46 |     'pottedplant',
 47 |     'sheep',
 48 |     'sofa',
 49 |     'train',
 50 |     'tvmonitor',
 51 | ]
 52 | 
 53 | def cnn_model_fn(features, labels, mode, num_classes=20):
 54 |     # Write this function
 55 |     # """Model function for CNN."""
 56 |     # Input Layer
 57 |     
 58 |     input_layer = tf.reshape(features["x"], [-1, 224, 224, 3])
 59 |     
 60 |     if mode == tf.estimator.ModeKeys.TRAIN:
 61 |         flipped = tf.map_fn(lambda image: tf.image.random_flip_left_right(image),features["x"])
 62 |         cropped = tf.map_fn(lambda image:tf.random_crop(image,size=[224,224,3]),features["x"])
 63 |         
 64 |         fets = tf.concat([features["x"],flipped,cropped],axis = 0)
 65 |         #wts = tf.concat([features["w"],features["w"],features["w"]],axis = 0)
 66 |         lbls = tf.concat([labels,labels,labels],axis = 0)
 67 |         
 68 |         feats = tf.random_shuffle(fets,seed = features["x"].shape[0]*3)
 69 |         #wtgs = tf.random_shuffle(wts,seed = features["x"].shape[0]*3)
 70 |         lbels = tf.random_shuffle(lbls,seed = features["x"].shape[0]*3)
 71 |         
 72 |         features["x"]= feats
 73 |         input_layer = features["x"]
 74 |         labels = lbels
 75 |     
 76 |     tf.summary.image("Training_images",input_layer)
 77 |     
 78 |     # Convolutional Layer #1
 79 |     conv1 = tf.layers.conv2d(
 80 |         inputs=input_layer,
 81 |         filters=64,
 82 |         kernel_size=[3,3],
 83 |         padding="same",
 84 |         strides = 1,
 85 |         activation=tf.nn.relu     
 86 |         )
 87 | 
 88 |     # Convolutional Layer #2
 89 |     conv2 = tf.layers.conv2d(
 90 |         inputs=conv1,
 91 |         filters=64,
 92 |         kernel_size=[3,3],
 93 |         padding="same",
 94 |         strides = 1,
 95 |         activation=tf.nn.relu)
 96 | 
 97 |     # Pooling Layer #1
 98 |     pool1 = tf.layers.max_pooling2d(inputs=conv2, pool_size=[2, 2], strides=2)
 99 | 
100 |     # Convolutional Layer #3 
101 |     conv3 = tf.layers.conv2d(
102 |         inputs=pool1,
103 |         filters=128,
104 |         kernel_size=[3, 3],
105 |         padding="same",
106 |         strides = 1,
107 |         activation=tf.nn.relu)
108 |     
109 |     # Convolutional Layer #4 
110 |     conv4 = tf.layers.conv2d(
111 |         inputs=conv3,
112 |         filters=128,
113 |         kernel_size=[3, 3],
114 |         padding="same",
115 |         strides = 1,
116 |         activation=tf.nn.relu
117 |         )
118 |     
119 |     #Pooling layer 2
120 |     pool2 = tf.layers.max_pooling2d(inputs=conv4, pool_size=[2, 2], strides=2)
121 |     
122 |     # Convolutional Layer #5 
123 |     conv5 = tf.layers.conv2d(
124 |         inputs=pool2,
125 |         filters=256,
126 |         kernel_size=[3, 3],
127 |         padding="same",
128 |         strides = 1,
129 |         activation=tf.nn.relu,
130 |         )
131 |     
132 |     # Convolutional Layer #6 
133 |     conv6 = tf.layers.conv2d(
134 |         inputs=conv5,
135 |         filters=256,
136 |         kernel_size=[3, 3],
137 |         padding="same",
138 |         strides = 1,
139 |         activation=tf.nn.relu,
140 |         )
141 |     
142 |     # Convolutional Layer #7 
143 |     conv7 = tf.layers.conv2d(
144 |         inputs=conv6,
145 |         filters=256,
146 |         kernel_size=[3, 3],
147 |         padding="same",
148 |         strides = 1,
149 |         activation=tf.nn.relu,
150 |         )
151 |     
152 |     #Pooling layer 3
153 |     pool3 = tf.layers.max_pooling2d(inputs=conv7, pool_size=[2, 2], strides=2)
154 |     
155 |     # Convolutional Layer #8 
156 |     conv8 = tf.layers.conv2d(
157 |         inputs=pool3,
158 |         filters=512,
159 |         kernel_size=[3, 3],
160 |         padding="same",
161 |         strides = 1,
162 |         activation=tf.nn.relu
163 |         )
164 |     
165 |     # Convolutional Layer #9
166 |     conv9 = tf.layers.conv2d(
167 |         inputs=conv8,
168 |         filters=512,
169 |         kernel_size=[3, 3],
170 |         padding="same",
171 |         strides = 1,
172 |         activation=tf.nn.relu,
173 |         )
174 |     
175 |     # Convolutional Layer #10
176 |     conv10 = tf.layers.conv2d(
177 |         inputs=conv9,
178 |         filters=512,
179 |         kernel_size=[3, 3],
180 |         padding="same",
181 |         strides = 1,
182 |         activation=tf.nn.relu
183 |         )
184 |     
185 |     #Pooling layer 4
186 |     pool4 = tf.layers.max_pooling2d(inputs=conv10, pool_size=[2, 2], strides=2)
187 |     
188 |     
189 |     # Convolutional Layer #11
190 |     conv11 = tf.layers.conv2d(
191 |         inputs=pool4,
192 |         filters=512,
193 |         kernel_size=[3, 3],
194 |         padding="same",
195 |         strides = 1,
196 |         activation=tf.nn.relu
197 |         )
198 |     
199 |     # Convolutional Layer #12
200 |     conv12 = tf.layers.conv2d(
201 |         inputs=conv11,
202 |         filters=512,
203 |         kernel_size=[3, 3],
204 |         padding="same",
205 |         strides = 1,
206 |         activation=tf.nn.relu
207 |         )
208 |     
209 |     # Convolutional Layer #13
210 |     conv13 = tf.layers.conv2d(
211 |         inputs=conv12,
212 |         filters=512,
213 |         kernel_size=[3, 3],
214 |         padding="same",
215 |         strides = 1,
216 |         activation=tf.nn.relu
217 |         )
218 |     
219 |     #Pooling layer 5
220 |     pool5 = tf.layers.max_pooling2d(inputs=conv13, pool_size=[2, 2], strides=2)
221 |     
222 |     
223 |     # Dense Layer
224 |     pool5_flat = tf.contrib.layers.flatten(pool5)
225 |     
226 |     dense1 = tf.layers.dense(inputs=pool5_flat, units=4096,
227 |                             activation=tf.nn.relu)                       
228 |                             
229 |     dropout1 = tf.layers.dropout(
230 |         inputs=dense1, rate=0.5, training=mode == tf.estimator.ModeKeys.TRAIN)
231 |     
232 |     dense2 = tf.layers.dense(inputs=dropout1, units=4096,
233 |                             activation=tf.nn.relu)
234 |                             
235 |     dropout2 = tf.layers.dropout(
236 |         inputs=dense2, rate=0.5, training=mode == tf.estimator.ModeKeys.TRAIN)
237 |     
238 |     
239 |     dense3 = tf.layers.dense(inputs=dropout2, units=1000,
240 |                              activation = tf.nn.relu)
241 |                              
242 |     # Logits Layer
243 |     logits = tf.layers.dense(inputs=dense3, units=20)
244 | 
245 |     predictions = {
246 |         # Generate predictions (for PREDICT and EVAL mode)
247 |         "classes": tf.argmax(input=logits, axis=1),
248 |         # Add `softmax_tensor` to the graph. It is used for PREDICT and by the
249 |         # `logging_hook`.
250 |         "probabilities": tf.sigmoid(logits, name="sigmoid_tensor")
251 |     }
252 | 
253 |     if mode == tf.estimator.ModeKeys.PREDICT:
254 |         return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)
255 | 
256 |     # Calculate Loss (for both TRAIN and EVAL modes)
257 |     
258 |     
259 |     # Configure the Training Op (for TRAIN mode)
260 |     if mode == tf.estimator.ModeKeys.TRAIN:
261 |         
262 |         loss = tf.identity(tf.losses.sigmoid_cross_entropy(
263 |         labels, logits=logits), name='loss')
264 |         
265 |         decay_learning_rate = tf.train.exponential_decay(
266 |                 learning_rate = 0.001,
267 |                 global_step=tf.train.get_global_step(),
268 |                 decay_steps = 10000,
269 |                 decay_rate = 0.5,
270 |                 staircase = False,
271 |                 name = None)
272 |         optimizer = tf.train.MomentumOptimizer(learning_rate=decay_learning_rate,
273 |                                                          momentum = 0.9)
274 | 
275 |         tf.summary.scalar("decayed_learning_rate",decay_learning_rate)
276 |         
277 |         grads_and_vars= optimizer.compute_gradients(loss)
278 |         
279 |         for g, v in grads_and_vars:
280 |             if g is not None:
281 |                 #print(format(v.name))
282 |                 tf.summary.histogram("{}/grad_histogram".format(v.name), g)
283 |                 
284 |         train_op = optimizer.minimize(
285 |             loss=loss,
286 |             global_step=tf.train.get_global_step())
287 |         
288 |         return tf.estimator.EstimatorSpec(
289 |             mode=mode, loss=loss, train_op=train_op)
290 |         
291 | 
292 |     # Add evaluation metrics (for EVAL mode)
293 |     eval_metric_ops = {
294 |         "accuracy": tf.metrics.accuracy(
295 |             labels=labels, predictions=predictions["classes"])}
296 |     return tf.estimator.EstimatorSpec(
297 |         mode=mode, loss=loss, eval_metric_ops=eval_metric_ops)
298 |     
299 |     
300 | def parse_args():
301 |     parser = argparse.ArgumentParser(
302 |         description='Train a classifier in tensorflow!')
303 |     parser.add_argument(
304 |         'data_dir', type=str, default='data/VOC2007',
305 |         help='Path to PASCAL data storage')
306 |     if len(sys.argv) == 1:
307 |         parser.print_help()
308 |         sys.exit(1)
309 |     args = parser.parse_args()
310 |     return args
311 | 
312 | 
313 | def _get_el(arr, i):
314 |     try:
315 |         return arr[i]
316 |     except IndexError:
317 |         return arr
318 |     
319 | from tensorflow.core.framework import summary_pb2
320 | def summary_var(log_dir, name, val, step):
321 |     writer = tf.summary.FileWriterCache.get(log_dir)
322 |     summary_proto = summary_pb2.Summary()
323 |     value = summary_proto.value.add()
324 |     value.tag = name
325 |     value.simple_value = float(val)
326 |     writer.add_summary(summary_proto, step)
327 |     writer.flush()
328 | 
329 | def load_pascal(data_dir, split='train'):
330 |     
331 |     """
332 |     Function to read images from PASCAL data folder.
333 |     Args:
334 |         data_dir (str): Path to the VOC2007 directory.
335 |         split (str): train/val/trainval split to use.
336 |     Returns:
337 |         images (np.ndarray): Return a np.float32 array of
338 |             shape (N, H, W, 3), where H, W are 224px each,
339 |             and each image is in RGB format.
340 |         labels (np.ndarray): An array of shape (N, 20) of
341 |             type np.int32, with 0s and 1s; 1s for classes that
342 |             are active in that image.
343 |     """
344 |     
345 |     
346 |     sub_dir1 = '/ImageSets/Main/'
347 |     sub_dir2 = '/JPEGImages/'
348 |     f1 = open(data_dir+sub_dir1+"aeroplane"+"_"+split+".txt", 'r')
349 | 
350 |     img = []
351 | 
352 |     for line1 in f1:
353 |         g1 = line1.strip().split(' ')
354 |         img.append(g1[0])
355 |     
356 |     num =len(img)
357 |     print("num",num)
358 | 
359 |     w = np.int32(np.zeros((num,20)))
360 |     l = np.int32(np.zeros((num,20)))
361 | 
362 |     print("Entering the loop for weights and labels")
363 | 
364 |     cnt = 0
365 |     for i in range(0,20):
366 |     
367 |         f2 = open(data_dir + '/ImageSets/Main/'+CLASS_NAMES[i]+'_'+split+'.txt')
368 |         a1 = f2.read().split()
369 |         t = a1[1::2]
370 |         tt = np.int32(t)
371 |         ttt = tt.reshape(1,num)
372 |         w[:,cnt] = np.int32(np.abs(ttt))
373 |         l[:,cnt] = ttt.clip(min = 0)
374 |         cnt = cnt + 1
375 |     
376 | 
377 |     labels = np.int32(l)
378 |     weights = np.int32(w)
379 |     print("Entering the loop for images")
380 |     arr = []
381 |     for j in img:
382 |     
383 |         im = Image.open(data_dir+sub_dir2+ j +'.jpg')
384 |         im = im.resize((256, 256), Image.ANTIALIAS)
385 |         arr.append(np.float32(im))
386 | 
387 |     image_ar = np.float32(arr)
388 |     return (image_ar,labels,weights)  
389 | 
390 | 
391 | def main():
392 |     args = parse_args()
393 |     # Load training and eval data
394 |     train_data, train_labels, train_weights = load_pascal(
395 |         args.data_dir, split='trainval')
396 |     eval_data, eval_labels, eval_weights = load_pascal(
397 |         args.data_dir, split='test')
398 |     
399 |         
400 |     pascal_classifier = tf.estimator.Estimator(
401 |         model_fn=partial(cnn_model_fn,
402 |                          num_classes=train_labels.shape[1]),
403 |         model_dir="pascal_vgg")
404 |     tensors_to_log = {"loss": "loss"}
405 |     logging_hook = tf.train.LoggingTensorHook(
406 |         tensors=tensors_to_log, every_n_iter=400)
407 |     
408 |     list22 = []
409 |     for i in range(0,100):
410 |         
411 |         # Train the model
412 |         train_input_fn = tf.estimator.inputs.numpy_input_fn(
413 |         x={"x": train_data, "w": train_weights},
414 |         y=train_labels,
415 |         batch_size=10,
416 |         num_epochs=None,
417 |         shuffle=True)
418 |         
419 |         pascal_classifier.train(
420 |                 input_fn=train_input_fn,
421 |                 steps=400,
422 |                 hooks=[logging_hook])
423 |         
424 |         # Evaluate the model and print results
425 |         eval_input_fn = tf.estimator.inputs.numpy_input_fn(
426 |                 x={"x": eval_data, "w": eval_weights},
427 |                 y=eval_labels,
428 |                 num_epochs=1,
429 |                 shuffle=False)
430 |         
431 |         pred = list(pascal_classifier.predict(input_fn=eval_input_fn))
432 |         pred = np.stack([p['probabilities'] for p in pred])
433 |         rand_AP = compute_map(
434 |                 eval_labels, np.random.random(eval_labels.shape),
435 |                 eval_weights, average=None)
436 |         print('Random AP: {} mAP'.format(np.mean(rand_AP)))
437 |         gt_AP = compute_map(
438 |                 eval_labels, eval_labels, eval_weights, average=None)
439 |         print('GT AP: {} mAP'.format(np.mean(gt_AP)))
440 |         AP = compute_map(eval_labels, pred, eval_weights, average=None)
441 |         print('Obtained {} mAP'.format(np.mean(AP)))
442 |         print('per class:')
443 |         for cid, cname in enumerate(CLASS_NAMES):
444 |             print('{}: {}'.format(cname, _get_el(AP, cid)))
445 |         list22.append(np.mean(AP))
446 |         
447 |         summary_var("pascal_vgg","mAP",np.mean(AP),i*400)
448 |         
449 |     with open('list22.pkl','wb') as fr2:
450 |         pickle.dump(list22,fr2)    
451 |     
452 |             
453 |         
454 | 
455 | if __name__ == "__main__":
456 |     main()


--------------------------------------------------------------------------------
/04_pascal_vggfinetune.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # -*- coding: utf-8 -*-
  3 | """
  4 | Created on Mon Mar  5 02:07:46 2018
  5 | 
  6 | @author: snigdha
  7 | """
  8 | 
  9 | from __future__ import absolute_import
 10 | from __future__ import division
 11 | from __future__ import print_function
 12 | 
 13 | # Imports
 14 | import sys
 15 | import os
 16 | import numpy as np
 17 | import tensorflow as tf
 18 | import argparse
 19 | #import os.path as osp
 20 | from PIL import Image
 21 | from functools import partial
 22 | from collections import defaultdict
 23 | import pickle
 24 | 
 25 | from eval import compute_map
 26 | #import models
 27 | 
 28 | tf.logging.set_verbosity(tf.logging.INFO)
 29 | 
 30 | CLASS_NAMES = [
 31 |     'aeroplane',
 32 |     'bicycle',
 33 |     'bird',
 34 |     'boat',
 35 |     'bottle',
 36 |     'bus',
 37 |     'car',
 38 |     'cat',
 39 |     'chair',
 40 |     'cow',
 41 |     'diningtable',
 42 |     'dog',
 43 |     'horse',
 44 |     'motorbike',
 45 |     'person',
 46 |     'pottedplant',
 47 |     'sheep',
 48 |     'sofa',
 49 |     'train',
 50 |     'tvmonitor',
 51 | ]
 52 | 
 53 | rdr = tf.train.NewCheckpointReader("./vgg_16.ckpt")
 54 | 
 55 | def cnn_model_fn(features, labels, mode, num_classes=20):
 56 |     # Write this function
 57 |     # """Model function for CNN."""
 58 |     # Input Layer
 59 |     
 60 |     input_layer = tf.reshape(features["x"], [-1, 224, 224, 3])
 61 |     
 62 |     if mode == tf.estimator.ModeKeys.TRAIN:
 63 |         flipped = tf.map_fn(lambda image: tf.image.random_flip_left_right(image),features["x"])
 64 |         cropped = tf.map_fn(lambda image:tf.random_crop(image,size=[224,224,3]),features["x"])
 65 |         
 66 |         fets = tf.concat([features["x"],flipped,cropped],axis = 0)
 67 |         #wts = tf.concat([features["w"],features["w"],features["w"]],axis = 0)
 68 |         lbls = tf.concat([labels,labels,labels],axis = 0)
 69 |         
 70 |         feats = tf.random_shuffle(fets,seed = features["x"].shape[0]*3)
 71 |         #wtgs = tf.random_shuffle(wts,seed = features["x"].shape[0]*3)
 72 |         lbels = tf.random_shuffle(lbls,seed = features["x"].shape[0]*3)
 73 |         
 74 |         features["x"]= feats
 75 |         input_layer = features["x"]
 76 |         labels = lbels
 77 |     
 78 |     tf.summary.image("Training_images",input_layer)
 79 |     
 80 |     # Convolutional Layer #1
 81 |     conv1 = tf.layers.conv2d(
 82 |         inputs=input_layer,
 83 |         filters=64,
 84 |         kernel_size=[3,3],
 85 |         padding="same",
 86 |         strides = 1,
 87 |         activation=tf.nn.relu,
 88 |         kernel_initializer=tf.constant_initializer(value=rdr.get_tensor('vgg_16/conv1/conv1_1/weights'),verify_shape=True),
 89 |         bias_initializer=tf.constant_initializer(value=rdr.get_tensor('vgg_16/conv1/conv1_1/biases'),verify_shape=True))
 90 | 
 91 | 
 92 |     # Convolutional Layer #2
 93 |     conv2 = tf.layers.conv2d(
 94 |         inputs=conv1,
 95 |         filters=64,
 96 |         kernel_size=[3,3],
 97 |         padding="same",
 98 |         strides = 1,
 99 |         activation=tf.nn.relu,
100 |         kernel_initializer=tf.constant_initializer(value=rdr.get_tensor('vgg_16/conv1/conv1_2/weights'),verify_shape=True),
101 |         bias_initializer=tf.constant_initializer(value=rdr.get_tensor('vgg_16/conv1/conv1_2/biases'),verify_shape=True))
102 | 
103 |     # Pooling Layer #1
104 |     pool1 = tf.layers.max_pooling2d(inputs=conv2, pool_size=[2, 2], strides=2)
105 | 
106 |     # Convolutional Layer #3 
107 |     conv3 = tf.layers.conv2d(
108 |         inputs=pool1,
109 |         filters=128,
110 |         kernel_size=[3, 3],
111 |         padding="same",
112 |         strides = 1,
113 |         activation=tf.nn.relu,
114 |         kernel_initializer=tf.constant_initializer(value=rdr.get_tensor('vgg_16/conv2/conv2_1/weights'),verify_shape=True),
115 |         bias_initializer=tf.constant_initializer(value=rdr.get_tensor('vgg_16/conv2/conv2_1/biases'),verify_shape=True))
116 |     
117 |     # Convolutional Layer #4 
118 |     conv4 = tf.layers.conv2d(
119 |         inputs=conv3,
120 |         filters=128,
121 |         kernel_size=[3, 3],
122 |         padding="same",
123 |         strides = 1,
124 |         activation=tf.nn.relu,
125 |         kernel_initializer=tf.constant_initializer(value=rdr.get_tensor('vgg_16/conv2/conv2_2/weights'),verify_shape=True),
126 |         bias_initializer=tf.constant_initializer(value=rdr.get_tensor('vgg_16/conv2/conv2_2/biases'),verify_shape=True))
127 |     
128 |     #Pooling layer 2
129 |     pool2 = tf.layers.max_pooling2d(inputs=conv4, pool_size=[2, 2], strides=2)
130 |     
131 |     # Convolutional Layer #5 
132 |     conv5 = tf.layers.conv2d(
133 |         inputs=pool2,
134 |         filters=256,
135 |         kernel_size=[3, 3],
136 |         padding="same",
137 |         strides = 1,
138 |         activation=tf.nn.relu,
139 |         kernel_initializer=tf.constant_initializer(value=rdr.get_tensor('vgg_16/conv3/conv3_1/weights'),verify_shape=True),
140 |         bias_initializer=tf.constant_initializer(value=rdr.get_tensor('vgg_16/conv3/conv3_1/biases'),verify_shape=True))
141 |     
142 |     # Convolutional Layer #6 
143 |     conv6 = tf.layers.conv2d(
144 |         inputs=conv5,
145 |         filters=256,
146 |         kernel_size=[3, 3],
147 |         padding="same",
148 |         strides = 1,
149 |         activation=tf.nn.relu,
150 |         kernel_initializer=tf.constant_initializer(value=rdr.get_tensor('vgg_16/conv3/conv3_2/weights'),verify_shape=True),
151 |         bias_initializer=tf.constant_initializer(value=rdr.get_tensor('vgg_16/conv3/conv3_2/biases'),verify_shape=True))
152 | 
153 |     
154 |     # Convolutional Layer #7 
155 |     conv7 = tf.layers.conv2d(
156 |         inputs=conv6,
157 |         filters=256,
158 |         kernel_size=[3, 3],
159 |         padding="same",
160 |         strides = 1,
161 |         activation=tf.nn.relu,
162 |         kernel_initializer=tf.constant_initializer(value=rdr.get_tensor('vgg_16/conv3/conv3_3/weights'),verify_shape=True),
163 |         bias_initializer=tf.constant_initializer(value=rdr.get_tensor('vgg_16/conv3/conv3_3/biases'),verify_shape=True))
164 |     
165 |     #Pooling layer 3
166 |     pool3 = tf.layers.max_pooling2d(inputs=conv7, pool_size=[2, 2], strides=2)
167 |     
168 |     # Convolutional Layer #8 
169 |     conv8 = tf.layers.conv2d(
170 |         inputs=pool3,
171 |         filters=512,
172 |         kernel_size=[3, 3],
173 |         padding="same",
174 |         strides = 1,
175 |         activation=tf.nn.relu,
176 |         kernel_initializer=tf.constant_initializer(value=rdr.get_tensor('vgg_16/conv4/conv4_1/weights'),verify_shape=True),
177 |         bias_initializer=tf.constant_initializer(value=rdr.get_tensor('vgg_16/conv4/conv4_1/biases'),verify_shape=True))
178 |     
179 |     # Convolutional Layer #9
180 |     conv9 = tf.layers.conv2d(
181 |         inputs=conv8,
182 |         filters=512,
183 |         kernel_size=[3, 3],
184 |         padding="same",
185 |         strides = 1,
186 |         activation=tf.nn.relu,
187 |         kernel_initializer=tf.constant_initializer(value=rdr.get_tensor('vgg_16/conv4/conv4_2/weights'),verify_shape=True),
188 |         bias_initializer=tf.constant_initializer(value=rdr.get_tensor('vgg_16/conv4/conv4_2/biases'),verify_shape=True))
189 |        
190 |     
191 |     # Convolutional Layer #10
192 |     conv10 = tf.layers.conv2d(
193 |         inputs=conv9,
194 |         filters=512,
195 |         kernel_size=[3, 3],
196 |         padding="same",
197 |         strides = 1,
198 |         activation=tf.nn.relu,
199 |         kernel_initializer=tf.constant_initializer(value=rdr.get_tensor('vgg_16/conv4/conv4_3/weights'),verify_shape=True),
200 |         bias_initializer=tf.constant_initializer(value=rdr.get_tensor('vgg_16/conv4/conv4_3/biases'),verify_shape=True))
201 |         
202 |     
203 |     #Pooling layer 4
204 |     pool4 = tf.layers.max_pooling2d(inputs=conv10, pool_size=[2, 2], strides=2)
205 |     
206 |     
207 |     # Convolutional Layer #11
208 |     conv11 = tf.layers.conv2d(
209 |         inputs=pool4,
210 |         filters=512,
211 |         kernel_size=[3, 3],
212 |         padding="same",
213 |         strides = 1,
214 |         activation=tf.nn.relu,
215 |         kernel_initializer=tf.constant_initializer(value=rdr.get_tensor('vgg_16/conv5/conv5_1/weights'),verify_shape=True),
216 |         bias_initializer=tf.constant_initializer(value=rdr.get_tensor('vgg_16/conv5/conv5_1/biases'),verify_shape=True))
217 |         
218 |         
219 |     
220 |     # Convolutional Layer #12
221 |     conv12 = tf.layers.conv2d(
222 |         inputs=conv11,
223 |         filters=512,
224 |         kernel_size=[3, 3],
225 |         padding="same",
226 |         strides = 1,
227 |         activation=tf.nn.relu,
228 |         kernel_initializer=tf.constant_initializer(value=rdr.get_tensor('vgg_16/conv5/conv5_2/weights'),verify_shape=True),
229 |         bias_initializer=tf.constant_initializer(value=rdr.get_tensor('vgg_16/conv5/conv5_2/biases'),verify_shape=True))
230 | 
231 |            
232 |     # Convolutional Layer #13
233 |     conv13 = tf.layers.conv2d(
234 |         inputs=conv12,
235 |         filters=512,
236 |         kernel_size=[3, 3],
237 |         padding="same",
238 |         strides = 1,
239 |         activation=tf.nn.relu,
240 |         kernel_initializer=tf.constant_initializer(value=rdr.get_tensor('vgg_16/conv5/conv5_3/weights'),verify_shape=True),
241 |         bias_initializer=tf.constant_initializer(value=rdr.get_tensor('vgg_16/conv5/conv5_3/biases'),verify_shape=True))
242 | 
243 |     
244 |     #Pooling layer 5
245 |     pool5 = tf.layers.max_pooling2d(inputs=conv13, pool_size=[2, 2], strides=2)
246 |     
247 |     
248 |     dense1 = tf.layers.conv2d(inputs=pool5, 
249 |                             activation=tf.nn.relu,
250 |                             filters=4096,  # this specifies the number of channels in the output layer
251 |                             kernel_size=[7, 7],
252 |                             strides=[1,1],
253 |                             padding="same",
254 |                             kernel_initializer=tf.constant_initializer(value=rdr.get_tensor('vgg_16/fc6/weights'),verify_shape=True),
255 |                             bias_initializer=tf.constant_initializer(value=rdr.get_tensor('vgg_16/fc6/biases'),verify_shape=True))
256 | 
257 | 
258 |     dropout1 = tf.layers.dropout(
259 |         inputs=dense1, rate=0.5, training=mode == tf.estimator.ModeKeys.TRAIN)
260 | 
261 | 
262 |     dense2 = tf.layers.conv2d(inputs=dropout1, 
263 |                             filters=4096,  # this specifies the number of channels in the output layer
264 |                             kernel_size=[1, 1],
265 |                             strides=[1,1],
266 |                             padding="same",
267 |                             activation=tf.nn.relu,
268 |                             kernel_initializer=tf.constant_initializer(value=rdr.get_tensor('vgg_16/fc7/weights'),verify_shape=True),
269 |                             bias_initializer=tf.constant_initializer(value=rdr.get_tensor('vgg_16/fc7/biases'),verify_shape=True))
270 | 
271 | 
272 |     dropout2 = tf.layers.dropout(
273 |         inputs=dense2, rate=0.5, training=mode == tf.estimator.ModeKeys.TRAIN)
274 | 
275 |     
276 |     dense3 = tf.layers.conv2d(inputs=dropout2, 
277 |                             filters=1000,  # this specifies the number of channels in the output layer
278 |                             kernel_size=[1, 1],
279 |                             strides=[1,1],
280 |                             padding="same",
281 |                             activation=tf.nn.relu)
282 |     
283 |     # Logits Layer
284 |     logits = tf.layers.dense(inputs=tf.contrib.layers.flatten(dense3), units=20)
285 | 
286 | 
287 |     predictions = {
288 |         # Generate predictions (for PREDICT and EVAL mode)
289 |         "classes": tf.argmax(input=logits, axis=1),
290 |         # Add `softmax_tensor` to the graph. It is used for PREDICT and by the
291 |         # `logging_hook`.
292 |         "probabilities": tf.sigmoid(logits, name="sigmoid_tensor")
293 |     }
294 | 
295 |     if mode == tf.estimator.ModeKeys.PREDICT:
296 |         return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions)
297 | 
298 |     # Calculate Loss (for both TRAIN and EVAL modes)
299 |     
300 |     
301 |     # Configure the Training Op (for TRAIN mode)
302 |     if mode == tf.estimator.ModeKeys.TRAIN:
303 |         
304 |         loss = tf.identity(tf.losses.sigmoid_cross_entropy(
305 |         labels, logits=logits), name='loss')
306 |         
307 |         decay_learning_rate = tf.train.exponential_decay(
308 |                 learning_rate = 0.0001,
309 |                 global_step=tf.train.get_global_step(),
310 |                 decay_steps = 1000,
311 |                 decay_rate = 0.5,
312 |                 staircase = False,
313 |                 name = None)
314 |         optimizer = tf.train.MomentumOptimizer(learning_rate=decay_learning_rate,
315 |                                                          momentum = 0.9)
316 | 
317 |         tf.summary.scalar("decayed_learning_rate",decay_learning_rate)
318 |         
319 |         grads_and_vars= optimizer.compute_gradients(loss)
320 |         
321 |         for g, v in grads_and_vars:
322 |             if g is not None:
323 |                 #print(format(v.name))
324 |                 tf.summary.histogram("{}/grad_histogram".format(v.name), g)
325 |                 
326 |         train_op = optimizer.minimize(
327 |             loss=loss,
328 |             global_step=tf.train.get_global_step())
329 |         
330 |         return tf.estimator.EstimatorSpec(
331 |             mode=mode, loss=loss, train_op=train_op)
332 |         
333 | 
334 |     # Add evaluation metrics (for EVAL mode)
335 |     eval_metric_ops = {
336 |         "accuracy": tf.metrics.accuracy(
337 |             labels=labels, predictions=predictions["classes"])}
338 |     return tf.estimator.EstimatorSpec(
339 |         mode=mode, loss=loss, eval_metric_ops=eval_metric_ops)
340 |     
341 |     
342 | def parse_args():
343 |     parser = argparse.ArgumentParser(
344 |         description='Train a classifier in tensorflow!')
345 |     parser.add_argument(
346 |         'data_dir', type=str, default='data/VOC2007',
347 |         help='Path to PASCAL data storage')
348 |     if len(sys.argv) == 1:
349 |         parser.print_help()
350 |         sys.exit(1)
351 |     args = parser.parse_args()
352 |     return args
353 | 
354 | 
355 | def _get_el(arr, i):
356 |     try:
357 |         return arr[i]
358 |     except IndexError:
359 |         return arr
360 |     
361 | from tensorflow.core.framework import summary_pb2
362 | def summary_var(log_dir, name, val, step):
363 |     writer = tf.summary.FileWriterCache.get(log_dir)
364 |     summary_proto = summary_pb2.Summary()
365 |     value = summary_proto.value.add()
366 |     value.tag = name
367 |     value.simple_value = float(val)
368 |     writer.add_summary(summary_proto, step)
369 |     writer.flush()
370 | 
371 | 
372 | def load_pascal(data_dir, split='train'):
373 |     
374 |     """
375 |     Function to read images from PASCAL data folder.
376 |     Args:
377 |         data_dir (str): Path to the VOC2007 directory.
378 |         split (str): train/val/trainval split to use.
379 |     Returns:
380 |         images (np.ndarray): Return a np.float32 array of
381 |             shape (N, H, W, 3), where H, W are 224px each,
382 |             and each image is in RGB format.
383 |         labels (np.ndarray): An array of shape (N, 20) of
384 |             type np.int32, with 0s and 1s; 1s for classes that
385 |             are active in that image.
386 |     """
387 |     
388 |     
389 |     sub_dir1 = '/ImageSets/Main/'
390 |     sub_dir2 = '/JPEGImages/'
391 |     f1 = open(data_dir+sub_dir1+"aeroplane"+"_"+split+".txt", 'r')
392 | 
393 |     img = []
394 | 
395 |     for line1 in f1:
396 |         g1 = line1.strip().split(' ')
397 |         img.append(g1[0])
398 |     
399 |     num =len(img)
400 |     print("num",num)
401 | 
402 |     w = np.int32(np.zeros((num,20)))
403 |     l = np.int32(np.zeros((num,20)))
404 | 
405 |     print("Entering the loop for weights and labels")
406 | 
407 |     cnt = 0
408 |     for i in range(0,20):
409 |     
410 |         f2 = open(data_dir + '/ImageSets/Main/'+CLASS_NAMES[i]+'_'+split+'.txt')
411 |         a1 = f2.read().split()
412 |         t = a1[1::2]
413 |         tt = np.int32(t)
414 |         ttt = tt.reshape(1,num)
415 |         w[:,cnt] = np.int32(np.abs(ttt))
416 |         l[:,cnt] = ttt.clip(min = 0)
417 |         cnt = cnt + 1
418 |     
419 | 
420 |     labels = np.int32(l)
421 |     weights = np.int32(w)
422 |     print("Entering the loop for images")
423 |     arr = []
424 |     for j in img:
425 |     
426 |         im = Image.open(data_dir+sub_dir2+ j +'.jpg')
427 |         im = im.resize((256, 256), Image.ANTIALIAS)
428 |         arr.append(np.float32(im))
429 | 
430 |     image_ar = np.float32(arr)
431 |     return (image_ar,labels,weights)  
432 | 
433 | def main():
434 |     args = parse_args()
435 |     # Load training and eval data
436 |     train_data, train_labels, train_weights = load_pascal(
437 |         args.data_dir, split='trainval')
438 |     eval_data, eval_labels, eval_weights = load_pascal(
439 |         args.data_dir, split='test')
440 |     
441 |         
442 |     pascal_classifier = tf.estimator.Estimator(
443 |         model_fn=partial(cnn_model_fn,
444 |                          num_classes=train_labels.shape[1]),
445 |                          model_dir="pascal_vggfinetune")
446 |     tensors_to_log = {"loss": "loss"}
447 |     logging_hook = tf.train.LoggingTensorHook(
448 |         tensors=tensors_to_log, every_n_iter=400)
449 |     
450 |     list22 = []
451 |     for i in range(0,10):
452 |         
453 |         # Train the model
454 |         train_input_fn = tf.estimator.inputs.numpy_input_fn(
455 |         x={"x": train_data, "w": train_weights},
456 |         y=train_labels,
457 |         batch_size=10,
458 |         num_epochs=None,
459 |         shuffle=True)
460 |         
461 |         pascal_classifier.train(
462 |                 input_fn=train_input_fn,
463 |                 steps=400,
464 |                 hooks=[logging_hook])
465 |         
466 |         # Evaluate the model and print results
467 |         eval_input_fn = tf.estimator.inputs.numpy_input_fn(
468 |                 x={"x": eval_data, "w": eval_weights},
469 |                 y=eval_labels,
470 |                 num_epochs=1,
471 |                 shuffle=False)
472 |         
473 |         pred = list(pascal_classifier.predict(input_fn=eval_input_fn))
474 |         pred = np.stack([p['probabilities'] for p in pred])
475 |         rand_AP = compute_map(
476 |                 eval_labels, np.random.random(eval_labels.shape),
477 |                 eval_weights, average=None)
478 |         print('Random AP: {} mAP'.format(np.mean(rand_AP)))
479 |         gt_AP = compute_map(
480 |                 eval_labels, eval_labels, eval_weights, average=None)
481 |         print('GT AP: {} mAP'.format(np.mean(gt_AP)))
482 |         AP = compute_map(eval_labels, pred, eval_weights, average=None)
483 |         print('Obtained {} mAP'.format(np.mean(AP)))
484 |         print('per class:')
485 |         for cid, cname in enumerate(CLASS_NAMES):
486 |             print('{}: {}'.format(cname, _get_el(AP, cid)))
487 |         list22.append(np.mean(AP))
488 |         summary_var("pascal_vggfinetune","mAP",np.mean(AP),i*400)
489 |         
490 |     with open('list22.pkl','wb') as fr2:
491 |         pickle.dump(list22,fr2)    
492 |     
493 |             
494 |         
495 | 
496 | if __name__ == "__main__":
497 |     main()


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Multi-label Image Classification using Tensorflow
 2 | Implementation of simple CNN on MNIST, VGG16 and Alexnet on Pascal VOC dataset
 3 | 
 4 | 1) 00_mnist.py: Contains code for MNIST 10-digit classification in Tensorflow
 5 | 
 6 | 2) 01_pascal.py: CNN architecture for MNIST on Pascal VOC dataset
 7 | 
 8 | 3) 02_pascal_alexnet.py: Alexnet on Pascal VOC
 9 | 
10 | 4) 03_pascal_vgg16.py: VGG16 on Pascal VOC from scratch
11 | 
12 | 5) 04_pascal_vggfinetune.py : Fine-tuning VGG16 on Pascal VOC using pre-trained weights
13 | 
14 | 6) 5a_conv1.py: Script to generate conv1 visualisation features. 
15 | 	gist_cifar10_train.py : Needed to run 5a_conv1.py
16 | 
17 | i) Place 5a_conv1.py in the created directory containing the ckpt files(obtained from train).
18 | ii) Run 5a_conv1.py to obtain a folder containing the tensor board object.
19 | iii) Run tensor board —logdir= image_filters
20 | 


--------------------------------------------------------------------------------
/eval.py:
--------------------------------------------------------------------------------
 1 | import sklearn.metrics
 2 | 
 3 | 
 4 | def compute_map(gt, pred, valid, average=None):
 5 |     """
 6 |     Compute the multi-label classification accuracy.
 7 |     gt (np.ndarray): Shape Nx20, 0 or 1, 1 if the object i is present in that
 8 |         image.
 9 |     pred (np.ndarray): Shape Nx20, probability of that object in the image
10 |         (output probablitiy).
11 |     valid (np.ndarray): Shape Nx20, 0 if you want to ignore that class for that
12 |         image. Some objects are labeled as ambiguous.
13 |     """
14 |     nclasses = gt.shape[1]
15 |     all_ap = []
16 |     for cid in range(nclasses):
17 |         gt_cls = gt[:, cid][valid[:, cid] > 0].astype('float32')
18 |         pred_cls = pred[:, cid][valid[:, cid] > 0].astype('float32')
19 |         # As per PhilK. code:
20 |         # https://github.com/philkr/voc-classification/blob/master/src/train_cls.py
21 |         pred_cls -= 1e-5 * gt_cls
22 |         ap = sklearn.metrics.average_precision_score(
23 |             gt_cls, pred_cls, average=average)
24 |         all_ap.append(ap)
25 |     return all_ap
26 | 


--------------------------------------------------------------------------------