├── README.md ├── download_dataset_and_weights.sh └── vgg16_example.py /README.md: -------------------------------------------------------------------------------- 1 | # VGG16-Image-Retrieval 2 | Uses TensorFlow and FC2 features to match test images to the same category given a query image as input 3 | 4 | How to use: 5 | * Run the download_dataset_and_weights.sh script 6 | * ./download_dataset_and_weights.sh 7 | * Simply execute the vgg_example.py file 8 | * python vgg16_example.py 9 | 10 | Once it is up and running, every so often you will see prints similar to the following: 11 | 12 | Current Correct list: ['image_00748.jpg', 'image_00749.jpg', 'image_00750.jpg', 'image_00751.jpg'] 13 | 14 | Matches are: 15 | * Distance: 2.45867538452, FileName: image_00748.jpg 16 | * Distance: 22.8059749603, FileName: image_00751.jpg 17 | * Distance: 37.0147171021, FileName: image_00750.jpg 18 | * Distance: 38.9278831482, FileName: image_00749.jpg 19 | * Precision@4: 1.0 20 | 21 | This shows the similarity distance between the query and the matches as well as the filename of the matched images. 22 | This also shows the precision@4 compared to the ground truth. A file containing these prints as well as final results 23 | will be created as "Last_Run.txt" 24 | -------------------------------------------------------------------------------- /download_dataset_and_weights.sh: -------------------------------------------------------------------------------- 1 | wget https://github.com/kentsommer/VGG16-Image-Retrieval/releases/download/v1.0/Dataset_Directory.zip 2 | wget https://github.com/kentsommer/VGG16-Image-Retrieval/releases/download/v1.0/vgg16_weights.npz 3 | 4 | unzip Dataset_Directory.zip 5 | rm -rf Dataset_Directory.zip -------------------------------------------------------------------------------- /vgg16_example.py: -------------------------------------------------------------------------------- 1 | ######################################################################################## 2 | # Davi Frossard, 2016 # 3 | # VGG16 implementation in TensorFlow # 4 | # Details: # 5 | # http://www.cs.toronto.edu/~frossard/post/vgg16/ # 6 | # # 7 | # Model from https://gist.github.com/ksimonyan/211839e770f7b538e2d8#file-readme-md # 8 | # Weights from Caffe converted using https://github.com/ethereon/caffe-tensorflow # 9 | ######################################################################################## 10 | # Kent Sommer, 2016 # 11 | # Modified for Image retrieval on the UKBenchmark image set using fc2 features # 12 | ######################################################################################## 13 | 14 | import tensorflow as tf 15 | import numpy as np 16 | import scipy.spatial.distance 17 | from os import listdir 18 | from os.path import join 19 | from scipy.misc import imread, imresize 20 | from math import* 21 | import heapq 22 | import time 23 | 24 | 25 | class vgg16: 26 | def __init__(self, imgs, weights=None, sess=None): 27 | self.imgs = imgs 28 | self.convlayers() 29 | self.fc_layers() 30 | self.probs = tf.nn.softmax(self.fc3l) 31 | if weights is not None and sess is not None: 32 | self.load_weights(weights, sess) 33 | 34 | 35 | def convlayers(self): 36 | self.parameters = [] 37 | 38 | # zero-mean input 39 | with tf.name_scope('preprocess') as scope: 40 | mean = tf.constant([123.68, 116.779, 103.939], dtype=tf.float32, shape=[1, 1, 1, 3], name='img_mean') 41 | images = self.imgs-mean 42 | 43 | # conv1_1 44 | with tf.name_scope('conv1_1') as scope: 45 | kernel = tf.Variable(tf.truncated_normal([3, 3, 3, 64], dtype=tf.float32, 46 | stddev=1e-1), name='weights') 47 | conv = tf.nn.conv2d(images, kernel, [1, 1, 1, 1], padding='SAME') 48 | biases = tf.Variable(tf.constant(0.0, shape=[64], dtype=tf.float32), 49 | trainable=True, name='biases') 50 | out = tf.nn.bias_add(conv, biases) 51 | self.conv1_1 = tf.nn.relu(out, name=scope) 52 | self.parameters += [kernel, biases] 53 | 54 | # conv1_2 55 | with tf.name_scope('conv1_2') as scope: 56 | kernel = tf.Variable(tf.truncated_normal([3, 3, 64, 64], dtype=tf.float32, 57 | stddev=1e-1), name='weights') 58 | conv = tf.nn.conv2d(self.conv1_1, kernel, [1, 1, 1, 1], padding='SAME') 59 | biases = tf.Variable(tf.constant(0.0, shape=[64], dtype=tf.float32), 60 | trainable=True, name='biases') 61 | out = tf.nn.bias_add(conv, biases) 62 | self.conv1_2 = tf.nn.relu(out, name=scope) 63 | self.parameters += [kernel, biases] 64 | 65 | # pool1 66 | self.pool1 = tf.nn.max_pool(self.conv1_2, 67 | ksize=[1, 2, 2, 1], 68 | strides=[1, 2, 2, 1], 69 | padding='SAME', 70 | name='pool1') 71 | 72 | # conv2_1 73 | with tf.name_scope('conv2_1') as scope: 74 | kernel = tf.Variable(tf.truncated_normal([3, 3, 64, 128], dtype=tf.float32, 75 | stddev=1e-1), name='weights') 76 | conv = tf.nn.conv2d(self.pool1, kernel, [1, 1, 1, 1], padding='SAME') 77 | biases = tf.Variable(tf.constant(0.0, shape=[128], dtype=tf.float32), 78 | trainable=True, name='biases') 79 | out = tf.nn.bias_add(conv, biases) 80 | self.conv2_1 = tf.nn.relu(out, name=scope) 81 | self.parameters += [kernel, biases] 82 | 83 | # conv2_2 84 | with tf.name_scope('conv2_2') as scope: 85 | kernel = tf.Variable(tf.truncated_normal([3, 3, 128, 128], dtype=tf.float32, 86 | stddev=1e-1), name='weights') 87 | conv = tf.nn.conv2d(self.conv2_1, kernel, [1, 1, 1, 1], padding='SAME') 88 | biases = tf.Variable(tf.constant(0.0, shape=[128], dtype=tf.float32), 89 | trainable=True, name='biases') 90 | out = tf.nn.bias_add(conv, biases) 91 | self.conv2_2 = tf.nn.relu(out, name=scope) 92 | self.parameters += [kernel, biases] 93 | 94 | # pool2 95 | self.pool2 = tf.nn.max_pool(self.conv2_2, 96 | ksize=[1, 2, 2, 1], 97 | strides=[1, 2, 2, 1], 98 | padding='SAME', 99 | name='pool2') 100 | 101 | # conv3_1 102 | with tf.name_scope('conv3_1') as scope: 103 | kernel = tf.Variable(tf.truncated_normal([3, 3, 128, 256], dtype=tf.float32, 104 | stddev=1e-1), name='weights') 105 | conv = tf.nn.conv2d(self.pool2, kernel, [1, 1, 1, 1], padding='SAME') 106 | biases = tf.Variable(tf.constant(0.0, shape=[256], dtype=tf.float32), 107 | trainable=True, name='biases') 108 | out = tf.nn.bias_add(conv, biases) 109 | self.conv3_1 = tf.nn.relu(out, name=scope) 110 | self.parameters += [kernel, biases] 111 | 112 | # conv3_2 113 | with tf.name_scope('conv3_2') as scope: 114 | kernel = tf.Variable(tf.truncated_normal([3, 3, 256, 256], dtype=tf.float32, 115 | stddev=1e-1), name='weights') 116 | conv = tf.nn.conv2d(self.conv3_1, kernel, [1, 1, 1, 1], padding='SAME') 117 | biases = tf.Variable(tf.constant(0.0, shape=[256], dtype=tf.float32), 118 | trainable=True, name='biases') 119 | out = tf.nn.bias_add(conv, biases) 120 | self.conv3_2 = tf.nn.relu(out, name=scope) 121 | self.parameters += [kernel, biases] 122 | 123 | # conv3_3 124 | with tf.name_scope('conv3_3') as scope: 125 | kernel = tf.Variable(tf.truncated_normal([3, 3, 256, 256], dtype=tf.float32, 126 | stddev=1e-1), name='weights') 127 | conv = tf.nn.conv2d(self.conv3_2, kernel, [1, 1, 1, 1], padding='SAME') 128 | biases = tf.Variable(tf.constant(0.0, shape=[256], dtype=tf.float32), 129 | trainable=True, name='biases') 130 | out = tf.nn.bias_add(conv, biases) 131 | self.conv3_3 = tf.nn.relu(out, name=scope) 132 | self.parameters += [kernel, biases] 133 | 134 | # pool3 135 | self.pool3 = tf.nn.max_pool(self.conv3_3, 136 | ksize=[1, 2, 2, 1], 137 | strides=[1, 2, 2, 1], 138 | padding='SAME', 139 | name='pool3') 140 | 141 | # conv4_1 142 | with tf.name_scope('conv4_1') as scope: 143 | kernel = tf.Variable(tf.truncated_normal([3, 3, 256, 512], dtype=tf.float32, 144 | stddev=1e-1), name='weights') 145 | conv = tf.nn.conv2d(self.pool3, kernel, [1, 1, 1, 1], padding='SAME') 146 | biases = tf.Variable(tf.constant(0.0, shape=[512], dtype=tf.float32), 147 | trainable=True, name='biases') 148 | out = tf.nn.bias_add(conv, biases) 149 | self.conv4_1 = tf.nn.relu(out, name=scope) 150 | self.parameters += [kernel, biases] 151 | 152 | # conv4_2 153 | with tf.name_scope('conv4_2') as scope: 154 | kernel = tf.Variable(tf.truncated_normal([3, 3, 512, 512], dtype=tf.float32, 155 | stddev=1e-1), name='weights') 156 | conv = tf.nn.conv2d(self.conv4_1, kernel, [1, 1, 1, 1], padding='SAME') 157 | biases = tf.Variable(tf.constant(0.0, shape=[512], dtype=tf.float32), 158 | trainable=True, name='biases') 159 | out = tf.nn.bias_add(conv, biases) 160 | self.conv4_2 = tf.nn.relu(out, name=scope) 161 | self.parameters += [kernel, biases] 162 | 163 | # conv4_3 164 | with tf.name_scope('conv4_3') as scope: 165 | kernel = tf.Variable(tf.truncated_normal([3, 3, 512, 512], dtype=tf.float32, 166 | stddev=1e-1), name='weights') 167 | conv = tf.nn.conv2d(self.conv4_2, kernel, [1, 1, 1, 1], padding='SAME') 168 | biases = tf.Variable(tf.constant(0.0, shape=[512], dtype=tf.float32), 169 | trainable=True, name='biases') 170 | out = tf.nn.bias_add(conv, biases) 171 | self.conv4_3 = tf.nn.relu(out, name=scope) 172 | self.parameters += [kernel, biases] 173 | 174 | # pool4 175 | self.pool4 = tf.nn.max_pool(self.conv4_3, 176 | ksize=[1, 2, 2, 1], 177 | strides=[1, 2, 2, 1], 178 | padding='SAME', 179 | name='pool4') 180 | 181 | # conv5_1 182 | with tf.name_scope('conv5_1') as scope: 183 | kernel = tf.Variable(tf.truncated_normal([3, 3, 512, 512], dtype=tf.float32, 184 | stddev=1e-1), name='weights') 185 | conv = tf.nn.conv2d(self.pool4, kernel, [1, 1, 1, 1], padding='SAME') 186 | biases = tf.Variable(tf.constant(0.0, shape=[512], dtype=tf.float32), 187 | trainable=True, name='biases') 188 | out = tf.nn.bias_add(conv, biases) 189 | self.conv5_1 = tf.nn.relu(out, name=scope) 190 | self.parameters += [kernel, biases] 191 | 192 | # conv5_2 193 | with tf.name_scope('conv5_2') as scope: 194 | kernel = tf.Variable(tf.truncated_normal([3, 3, 512, 512], dtype=tf.float32, 195 | stddev=1e-1), name='weights') 196 | conv = tf.nn.conv2d(self.conv5_1, kernel, [1, 1, 1, 1], padding='SAME') 197 | biases = tf.Variable(tf.constant(0.0, shape=[512], dtype=tf.float32), 198 | trainable=True, name='biases') 199 | out = tf.nn.bias_add(conv, biases) 200 | self.conv5_2 = tf.nn.relu(out, name=scope) 201 | self.parameters += [kernel, biases] 202 | 203 | # conv5_3 204 | with tf.name_scope('conv5_3') as scope: 205 | kernel = tf.Variable(tf.truncated_normal([3, 3, 512, 512], dtype=tf.float32, 206 | stddev=1e-1), name='weights') 207 | conv = tf.nn.conv2d(self.conv5_2, kernel, [1, 1, 1, 1], padding='SAME') 208 | biases = tf.Variable(tf.constant(0.0, shape=[512], dtype=tf.float32), 209 | trainable=True, name='biases') 210 | out = tf.nn.bias_add(conv, biases) 211 | self.conv5_3 = tf.nn.relu(out, name=scope) 212 | self.parameters += [kernel, biases] 213 | 214 | # pool5 215 | self.pool5 = tf.nn.max_pool(self.conv5_3, 216 | ksize=[1, 2, 2, 1], 217 | strides=[1, 2, 2, 1], 218 | padding='SAME', 219 | name='pool4') 220 | 221 | def fc_layers(self): 222 | # fc1 223 | with tf.name_scope('fc1') as scope: 224 | shape = int(np.prod(self.pool5.get_shape()[1:])) 225 | fc1w = tf.Variable(tf.truncated_normal([shape, 4096], 226 | dtype=tf.float32, 227 | stddev=1e-1), name='weights') 228 | fc1b = tf.Variable(tf.constant(1.0, shape=[4096], dtype=tf.float32), 229 | trainable=True, name='biases') 230 | pool5_flat = tf.reshape(self.pool5, [-1, shape]) 231 | fc1l = tf.nn.bias_add(tf.matmul(pool5_flat, fc1w), fc1b) 232 | self.fc1 = tf.nn.relu(fc1l) 233 | self.parameters += [fc1w, fc1b] 234 | 235 | # fc2 236 | with tf.name_scope('fc2') as scope: 237 | fc2w = tf.Variable(tf.truncated_normal([4096, 4096], 238 | dtype=tf.float32, 239 | stddev=1e-1), name='weights') 240 | fc2b = tf.Variable(tf.constant(1.0, shape=[4096], dtype=tf.float32), 241 | trainable=True, name='biases') 242 | fc2l = tf.nn.bias_add(tf.matmul(self.fc1, fc2w), fc2b) 243 | self.fc2 = tf.nn.relu(fc2l) 244 | self.parameters += [fc2w, fc2b] 245 | 246 | # fc3 247 | with tf.name_scope('fc3') as scope: 248 | fc3w = tf.Variable(tf.truncated_normal([4096, 1000], 249 | dtype=tf.float32, 250 | stddev=1e-1), name='weights') 251 | fc3b = tf.Variable(tf.constant(1.0, shape=[1000], dtype=tf.float32), 252 | trainable=True, name='biases') 253 | self.fc3l = tf.nn.bias_add(tf.matmul(self.fc2, fc3w), fc3b) 254 | self.parameters += [fc3w, fc3b] 255 | 256 | def load_weights(self, weight_file, sess): 257 | weights = np.load(weight_file) 258 | keys = sorted(weights.keys()) 259 | print ('Load weights...') 260 | for i, k in enumerate(keys): 261 | sess.run(self.parameters[i].assign(weights[k])) 262 | print ('Load complete.') 263 | 264 | def square_rooted(x): 265 | return round(sqrt(sum([a*a for a in x])),3) 266 | 267 | def cosine_similarity(x,y): 268 | numerator = sum(a*b for a,b in zip(x,y)) 269 | denominator = square_rooted(x)*square_rooted(y) 270 | return round(numerator/float(denominator),3) 271 | 272 | if __name__ == '__main__': 273 | # Get number of images to match (default 4) 274 | dist_type = raw_input("Enter distance algorithm (euc, cos, chev): \n") or "euc" 275 | print("distance type selected: " + dist_type) 276 | # Setup Session 277 | sess = tf.Session() 278 | imgs = tf.placeholder(tf.float32, [None, 224, 224, 3]) 279 | vgg = vgg16(imgs, 'vgg16_weights.npz', sess) 280 | 281 | # Set dataset directory path 282 | data_dir_query = 'Dataset_Directory/query' 283 | data_dir_test = 'Dataset_Directory/test' 284 | datalist_query = [join(data_dir_query, f) for f in listdir(data_dir_query)] 285 | datalist_test = [join(data_dir_test, f) for f in listdir(data_dir_test)] 286 | 287 | #################### 288 | ###Perform Search### 289 | #################### 290 | 291 | #Timer and precision count total + open file for saving data 292 | t0 = time.time() 293 | shouldCompute = True 294 | feat_dict = {} 295 | total_precision_cn = 0 296 | fp = open("Last_Run.txt", 'w') 297 | fp.truncate() 298 | 299 | # Retrieve feature vector for query image 300 | for i in datalist_query: 301 | #Setup Dict and precision tracking 302 | img_dict = {} 303 | cor_list = [] 304 | p1, p2, p3 = i.split("_") 305 | p4, p5 = p3.split(".") 306 | starting_val = int(p4) 307 | for x in range(starting_val, starting_val + 4): 308 | cor_list.append("image_" + str(x).zfill(5) + ".jpg") 309 | print("Current Correct list: " + str(cor_list)) 310 | fp.write("Current Correct list: " + str(cor_list) + "\n") 311 | 312 | img_query = imread(i) 313 | img_query = imresize(img_query, (224, 224)) 314 | 315 | # Extract image descriptor in layer fc2/Relu. If you want, change fc2 to fc1 316 | layer_query = sess.graph.get_tensor_by_name('fc2/Relu:0') 317 | # layer_query = sess.graph.get_tensor_by_name('fc1/Relu:0') 318 | # Run the session for feature extract at 'fc2/Relu' layer 319 | _feature_query = sess.run(layer_query, feed_dict={vgg.imgs: [img_query]}) 320 | # Convert tensor variable into numpy array 321 | # It is 4096 dimension vector 322 | feature_query = np.array(_feature_query) 323 | 324 | # Retrieve feature vector for test image 325 | for j in datalist_test: 326 | if shouldCompute: 327 | img_test = imread(j) 328 | img_test = imresize(img_test, (224, 224)) 329 | 330 | # Extract image descriptor in layer fc2/Relu. If you want, change fc2 to fc1 331 | layer_test = sess.graph.get_tensor_by_name('fc2/Relu:0') 332 | # layer_test = sess.graph.get_tensor_by_name('fc1/Relu:0') 333 | # Run the session for feature extract at 'fc2/Relu' layer 334 | _feature_test = sess.run(layer_test, feed_dict={vgg.imgs: [img_test]}) 335 | # Convert tensor variable into numpy array 336 | # It is 4096 dimension vector 337 | feature_test = np.array(_feature_test) 338 | feat_dict[j] = feature_test 339 | else: 340 | feature_test = feat_dict[j] 341 | 342 | # Calculate Euclidean distance between two feature vectors 343 | if dist_type == "euc": 344 | curr_dist = scipy.spatial.distance.euclidean(feature_query, feature_test) 345 | # Calculate Cosine distance between two feature vectors 346 | if dist_type == "cos": 347 | curr_dist = scipy.spatial.distance.cosine(feature_query, feature_test) 348 | # Calculate Chevyshev distance between two feature vectors 349 | if dist_type == "chev": 350 | curr_dist = scipy.spatial.distance.chebyshev(feature_query, feature_test) 351 | 352 | # Add to dictionary 353 | img_dict[curr_dist] = str(j) 354 | 355 | # Get Results for Query 356 | keys_sorted = heapq.nsmallest(4, img_dict) 357 | num_correct = 0.0 358 | num_incorrect = 0.0 359 | print("Matches are: ") 360 | fp.write("Matches are: \n") 361 | for y in range(0,4): 362 | p1, p2, p3 = str(img_dict[keys_sorted[y]]).split("_") 363 | p3 = ("image_" + p3) 364 | print("\t" + "Distance: " + str(keys_sorted[y]) + ", FileName: " + p3) 365 | fp.write("\t" + "Distance: " + str(keys_sorted[y]) + ", FileName: " + p3 + "\n") 366 | if p3 in cor_list: 367 | num_correct += 1 368 | total_precision_cn += num_correct/4.0 369 | print("\t" + "Precision@4: " + str(num_correct/4.0) + "\n\n") 370 | fp.write("\t" + "Precision@4: " + str(num_correct/4.0) + "\n\n") 371 | shouldCompute = False 372 | 373 | t1 = time.time() 374 | total = t1-t0 375 | print("Total time taken for 250 images (minutes): " + str(total/60)) 376 | fp.write("Total time taken for 250 images (minutes): " + str(total/60) + "\n") 377 | print("Total Precision@4 Avg: " + str(total_precision_cn/250.0)) 378 | fp.write("Total Precision@4 Avg: " + str(total_precision_cn/250.0)) 379 | fp.close() 380 | 381 | 382 | 383 | --------------------------------------------------------------------------------