├── FineTuneVGG16 ├── FineTuneVGG16.py ├── data.py ├── test │ ├── compute_ap │ └── test.py └── vgg.py └── README.md /FineTuneVGG16/FineTuneVGG16.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np 3 | import sys 4 | import os 5 | 6 | from vgg import vgg_16 7 | 8 | slim = tf.contrib.slim 9 | 10 | class FineTuneVGG16(object): 11 | def __init__(self, learning_rate, num_classes=1000, is_training=True): 12 | 13 | self.global_step = tf.Variable(0, trainable=False, name='global_step') 14 | 15 | self.batch_imgs = tf.placeholder(tf.float32, (None, None, None, 3)) 16 | self.batch_lbls = tf.placeholder(tf.int32, (None, 1)) 17 | 18 | _, end_points = vgg_16(self.batch_imgs, num_classes, is_training) 19 | 20 | self.loss = tf.losses.sparse_softmax_cross_entropy(labels=self.batch_lbls, logits=end_points['vgg_16/fc8']) 21 | gradients = tf.gradients(self.loss, tf.trainable_variables()) 22 | self.update = tf.train.GradientDescentOptimizer(learning_rate).apply_gradients(zip(gradients, tf.trainable_variables()), global_step=self.global_step) 23 | 24 | self.saver = tf.train.Saver(tf.global_variables(), max_to_keep=99999999) 25 | 26 | def step(self, session, batch_imgs, batch_lbls, is_training): 27 | input_feed = {} 28 | input_feed[self.batch_imgs] = batch_imgs 29 | input_feed[self.batch_lbls] = batch_lbls 30 | 31 | if is_training: 32 | output_feed = [self.loss, self.update] 33 | else: 34 | output_feed = [end_points['vgg_16/fc8']] 35 | 36 | outputs = session.run(output_feed, input_feed) 37 | return outputs[0] 38 | 39 | #################################################################################################### 40 | tf.app.flags.DEFINE_float("learning_rate", 0.001, "Learning rate.") 41 | tf.app.flags.DEFINE_integer("batch_size", 64, "Batch size to use during training.") 42 | tf.app.flags.DEFINE_string("checkpoint_dir", "tfmodel-FT", "Checkpoint directory.") 43 | tf.app.flags.DEFINE_integer("steps_per_checkpoint", 100, "How many training steps to do per checkpoint.") 44 | 45 | FLAGS = tf.app.flags.FLAGS 46 | 47 | from data import get_data 48 | 49 | init_ckpt = './vgg_16.ckpt' 50 | 51 | train, validation = get_data() 52 | 53 | model = FineTuneVGG16(FLAGS.learning_rate, 713) 54 | 55 | exclusions = ['vgg_16/fc8'] 56 | variables_to_restore = [] 57 | for var in slim.get_model_variables(): 58 | excluded = False 59 | for exclusion in exclusions: 60 | if var.op.name.startswith(exclusion): 61 | excluded = True 62 | break 63 | if not excluded: 64 | variables_to_restore.append(var) 65 | 66 | saver = tf.train.Saver(var_list=variables_to_restore) 67 | 68 | with tf.Session() as sess: 69 | sess.run(tf.global_variables_initializer()) 70 | saver.restore(sess, init_ckpt) 71 | 72 | current_step = 0 73 | data = train + validation 74 | while True: 75 | np.random.shuffle(data) 76 | for start,end in zip(range(0, len(data), FLAGS.batch_size), range(FLAGS.batch_size, len(data), FLAGS.batch_size)): 77 | curr_data = data[start:end] 78 | imgs = [] 79 | lbls = [] 80 | for (img, lbl) in curr_data: 81 | imgs.append(img.astype(float)) 82 | lbls.append(lbl) 83 | step_loss = model.step(sess, np.array(imgs), np.expand_dims(np.array(lbls), axis=1), True) 84 | current_step += 1 85 | print ("step %d - loss %.3f" % (current_step, step_loss)) 86 | if current_step % FLAGS.steps_per_checkpoint == 0: 87 | checkpoint_path = os.path.join(FLAGS.checkpoint_dir, 'ckpt') 88 | model.saver.save(sess, checkpoint_path, global_step=model.global_step) 89 | sys.stdout.flush() 90 | -------------------------------------------------------------------------------- /FineTuneVGG16/data.py: -------------------------------------------------------------------------------- 1 | import cPickle 2 | import sys 3 | import os 4 | from PIL import Image 5 | import numpy as np 6 | import sys 7 | 8 | IMG_PATH = "3DLandmark" 9 | 10 | def get_data(): 11 | print "Reading data ..." 12 | sys.stdout.flush() 13 | train = [] 14 | val = [] 15 | val_flag = [] 16 | 17 | for img in open('image.list', 'r'): 18 | img = img.strip() 19 | arr = img[:-4].split('_') 20 | name = arr[0] 21 | label = int(arr[1]) - 1 22 | img_path = os.path.join(IMG_PATH, img) 23 | im = Image.open(img_path) 24 | im = im.resize((362, 272)) 25 | im = np.array(im) 26 | im = np.transpose(im, (1, 0, 2)) 27 | 28 | if label not in val_flag: 29 | val_flag.append(label) 30 | val.append((im, label)) 31 | else: 32 | train.append((im, label)) 33 | 34 | return train, val 35 | 36 | -------------------------------------------------------------------------------- /FineTuneVGG16/test/compute_ap: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/hehefan/Image-Retrieval/ec5ac58157032ce846f7ce4859d19a88515e169d/FineTuneVGG16/test/compute_ap -------------------------------------------------------------------------------- /FineTuneVGG16/test/test.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | sys.path.append('../') 4 | import tensorflow as tf 5 | from vgg import vgg_16 6 | from PIL import Image 7 | import numpy as np 8 | from scipy.spatial import distance 9 | import operator 10 | import commands 11 | 12 | slim = tf.contrib.slim 13 | step = 100 14 | tfmodels = '../tfmodel-FT' 15 | IMG_PATH = "data/Oxford5k" 16 | QRY_PATH = "data/Query" 17 | 18 | 19 | cnt = 0 20 | image_tensor = tf.placeholder(tf.float32, (None, None, None, 3)) 21 | sess = tf.Session() 22 | logits, end_points = vgg_16(image_tensor, num_classes=713, is_training=False, spatial_squeeze=False) 23 | saver = tf.train.Saver() 24 | 25 | max_mAP_conv5 = 0 26 | max_cnt_conv5 = 0 27 | 28 | max_mAP_pool5 = 0 29 | max_cnt_pool5 = 0 30 | 31 | max_mAP_fc6 = 0 32 | max_cnt_fc6 = 0 33 | 34 | max_mAP_fc7 = 0 35 | max_cnt_fc7 = 0 36 | 37 | sess = tf.Session() 38 | while True: 39 | cnt += step 40 | if not os.path.isfile(os.path.join(tfmodels, 'ckpt-%d.meta'%cnt)): 41 | print("%d: %f - %d: %f - %d: %f - %d: %f"%(max_cnt_conv5, max_mAP_conv5, max_cnt_pool5, max_mAP_pool5, max_cnt_fc6, max_mAP_fc6, max_cnt_fc7, max_mAP_fc7)) 42 | break 43 | 44 | ckpt = os.path.join(tfmodels, 'ckpt-%d'%cnt) 45 | saver.restore(sess, ckpt) 46 | 47 | conv5 = {} 48 | pool5 = {} 49 | fc6 = {} 50 | fc7 = {} 51 | 52 | for img in open('data/image.list', 'r'): 53 | img = img.strip() 54 | img_path = os.path.join(IMG_PATH, img) 55 | im = Image.open(img_path) 56 | width, height = im.size 57 | width = max(width, 224) 58 | height = max(height, 224) 59 | im = im.resize((width, height)) 60 | im = np.array(im) 61 | im = np.expand_dims(np.transpose(im, (1, 0, 2)), axis=0) 62 | f1, f2, f3, f4 = sess.run([end_points['conv5'], end_points['pool5'], end_points['fc6'], end_points['fc7']], feed_dict={image_tensor: im}) 63 | conv5[img[:-4]] = f1[0] 64 | pool5[img[:-4]] = f2[0] 65 | fc6[img[:-4]] = f3[0] 66 | fc7[img[:-4]] = f4[0] 67 | 68 | qry_conv5 = {} 69 | qry_pool5 = {} 70 | qry_fc6 = {} 71 | qry_fc7 = {} 72 | for img in open('data/query.list', 'r'): 73 | img = img.strip() 74 | img_path = os.path.join(QRY_PATH, img) 75 | im = Image.open(img_path) 76 | width, height = im.size 77 | width = max(width, 224) 78 | height = max(height, 224) 79 | im = im.resize((width, height)) 80 | im = np.array(im) 81 | im = np.expand_dims(np.transpose(im, (1, 0, 2)), axis=0) 82 | f1, f2, f3, f4 = sess.run([end_points['conv5'], end_points['pool5'], end_points['fc6'], end_points['fc7']], feed_dict={image_tensor: im}) 83 | qry_conv5[img[:-4]] = f1[0] 84 | qry_pool5[img[:-4]] = f2[0] 85 | qry_fc6[img[:-4]] = f3[0] 86 | qry_fc7[img[:-4]] = f4[0] 87 | 88 | # conv5 89 | for query, feature in qry_conv5.iteritems(): 90 | feature /= np.linalg.norm(feature) 91 | rst = {} 92 | for img, feat in conv5.iteritems(): 93 | feat /= np.linalg.norm(feat) 94 | rst[img] = np.linalg.norm(feature - feat) 95 | rst = sorted(rst.items(), key=operator.itemgetter(1)) 96 | with open('ans/'+query, 'w') as f: 97 | for (k, v) in rst: 98 | f.write(k+'\n') 99 | 100 | mAP_conv5 = 0.0 101 | with open("LIST", "r") as f: 102 | num = 0.0 103 | for line in f: 104 | line = line.strip() 105 | _, rst = commands.getstatusoutput("./compute_ap data/Groundtruth_files/" + line + " ans/" + line) 106 | mAP_conv5 += float(rst) 107 | num += 1 108 | mAP_conv5 /= num 109 | if mAP_conv5 > max_mAP_conv5: 110 | max_mAP_conv5 = mAP_conv5 111 | max_cnt_conv5 = cnt 112 | 113 | # pool5 114 | for query, feature in qry_pool5.iteritems(): 115 | feature /= np.linalg.norm(feature) 116 | rst = {} 117 | for img, feat in pool5.iteritems(): 118 | feat /= np.linalg.norm(feat) 119 | rst[img] = np.linalg.norm(feature - feat) 120 | rst = sorted(rst.items(), key=operator.itemgetter(1)) 121 | with open('ans/'+query, 'w') as f: 122 | for (k, v) in rst: 123 | f.write(k+'\n') 124 | 125 | mAP_pool5 = 0.0 126 | with open("LIST", "r") as f: 127 | num = 0.0 128 | for line in f: 129 | line = line.strip() 130 | _, rst = commands.getstatusoutput("./compute_ap data/Groundtruth_files/" + line + " ans/" + line) 131 | mAP_pool5 += float(rst) 132 | num += 1 133 | mAP_pool5 /= num 134 | if mAP_pool5 > max_mAP_pool5: 135 | max_mAP_pool5 = mAP_pool5 136 | max_cnt_pool5 = cnt 137 | 138 | # fc6 139 | for query, feature in qry_fc6.iteritems(): 140 | feature /= np.linalg.norm(feature) 141 | rst = {} 142 | for img, feat in fc6.iteritems(): 143 | feat /= np.linalg.norm(feat) 144 | rst[img] = np.linalg.norm(feature - feat) 145 | rst = sorted(rst.items(), key=operator.itemgetter(1)) 146 | with open('ans/'+query, 'w') as f: 147 | for (k, v) in rst: 148 | f.write(k+'\n') 149 | 150 | mAP_fc6 = 0.0 151 | with open("LIST", "r") as f: 152 | num = 0.0 153 | for line in f: 154 | line = line.strip() 155 | _, rst = commands.getstatusoutput("./compute_ap data/Groundtruth_files/" + line + " ans/" + line) 156 | mAP_fc6 += float(rst) 157 | num += 1 158 | mAP_fc6 /= num 159 | if mAP_fc6 > max_mAP_fc6: 160 | max_mAP_fc6 = mAP_fc6 161 | max_cnt_fc6 = cnt 162 | 163 | # fc7 164 | for query, feature in qry_fc7.iteritems(): 165 | feature /= np.linalg.norm(feature) 166 | rst = {} 167 | for img, feat in fc7.iteritems(): 168 | feat /= np.linalg.norm(feat) 169 | rst[img] = np.linalg.norm(feature - feat) 170 | rst = sorted(rst.items(), key=operator.itemgetter(1)) 171 | with open('ans/'+query, 'w') as f: 172 | for (k, v) in rst: 173 | f.write(k+'\n') 174 | 175 | mAP_fc7 = 0.0 176 | with open("LIST", "r") as f: 177 | num = 0.0 178 | for line in f: 179 | line = line.strip() 180 | _, rst = commands.getstatusoutput("./compute_ap data/Groundtruth_files/" + line + " ans/" + line) 181 | mAP_fc7 += float(rst) 182 | num += 1 183 | mAP_fc7 /= num 184 | if mAP_fc7 > max_mAP_fc7: 185 | max_mAP_fc7 = mAP_fc7 186 | max_cnt_fc7 = cnt 187 | 188 | print("STEP: %d - CONV5: %f - POOL5: %f - FC6: %f - FC7: %f"%(cnt, mAP_conv5, mAP_pool5, mAP_fc6, mAP_fc7)) 189 | sys.stdout.flush() 190 | -------------------------------------------------------------------------------- /FineTuneVGG16/vgg.py: -------------------------------------------------------------------------------- 1 | # Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | """Contains model definitions for versions of the Oxford VGG network. 16 | 17 | These model definitions were introduced in the following technical report: 18 | 19 | Very Deep Convolutional Networks For Large-Scale Image Recognition 20 | Karen Simonyan and Andrew Zisserman 21 | arXiv technical report, 2015 22 | PDF: http://arxiv.org/pdf/1409.1556.pdf 23 | ILSVRC 2014 Slides: http://www.robots.ox.ac.uk/~karen/pdf/ILSVRC_2014.pdf 24 | CC-BY-4.0 25 | 26 | More information can be obtained from the VGG website: 27 | www.robots.ox.ac.uk/~vgg/research/very_deep/ 28 | 29 | Usage: 30 | with slim.arg_scope(vgg.vgg_arg_scope()): 31 | outputs, end_points = vgg.vgg_a(inputs) 32 | 33 | with slim.arg_scope(vgg.vgg_arg_scope()): 34 | outputs, end_points = vgg.vgg_16(inputs) 35 | 36 | @@vgg_a 37 | @@vgg_16 38 | @@vgg_19 39 | """ 40 | 41 | from __future__ import absolute_import 42 | from __future__ import division 43 | from __future__ import print_function 44 | 45 | import tensorflow as tf 46 | from tensorflow.contrib import layers 47 | from tensorflow.contrib.framework.python.ops import arg_scope 48 | from tensorflow.contrib.layers.python.layers import layers as layers_lib 49 | from tensorflow.contrib.layers.python.layers import regularizers 50 | from tensorflow.contrib.layers.python.layers import utils 51 | from tensorflow.python.ops import array_ops 52 | from tensorflow.python.ops import init_ops 53 | from tensorflow.python.ops import nn_ops 54 | from tensorflow.python.ops import variable_scope 55 | 56 | 57 | def vgg_arg_scope(weight_decay=0.0005): 58 | """Defines the VGG arg scope. 59 | 60 | Args: 61 | weight_decay: The l2 regularization coefficient. 62 | 63 | Returns: 64 | An arg_scope. 65 | """ 66 | with arg_scope( 67 | [layers.conv2d, layers_lib.fully_connected], 68 | activation_fn=nn_ops.relu, 69 | weights_regularizer=regularizers.l2_regularizer(weight_decay), 70 | biases_initializer=init_ops.zeros_initializer()): 71 | with arg_scope([layers.conv2d], padding='SAME') as arg_sc: 72 | return arg_sc 73 | 74 | 75 | def vgg_a(inputs, 76 | num_classes=1000, 77 | is_training=True, 78 | dropout_keep_prob=0.5, 79 | spatial_squeeze=True, 80 | scope='vgg_a'): 81 | """Oxford Net VGG 11-Layers version A Example. 82 | 83 | Note: All the fully_connected layers have been transformed to conv2d layers. 84 | To use in classification mode, resize input to 224x224. 85 | 86 | Args: 87 | inputs: a tensor of size [batch_size, height, width, channels]. 88 | num_classes: number of predicted classes. 89 | is_training: whether or not the model is being trained. 90 | dropout_keep_prob: the probability that activations are kept in the dropout 91 | layers during training. 92 | spatial_squeeze: whether or not should squeeze the spatial dimensions of the 93 | outputs. Useful to remove unnecessary dimensions for classification. 94 | scope: Optional scope for the variables. 95 | 96 | Returns: 97 | the last op containing the log predictions and end_points dict. 98 | """ 99 | with variable_scope.variable_scope(scope, 'vgg_a', [inputs]) as sc: 100 | end_points_collection = sc.original_name_scope + '_end_points' 101 | # Collect outputs for conv2d, fully_connected and max_pool2d. 102 | with arg_scope( 103 | [layers.conv2d, layers_lib.max_pool2d], 104 | outputs_collections=end_points_collection): 105 | net = layers_lib.repeat( 106 | inputs, 1, layers.conv2d, 64, [3, 3], scope='conv1') 107 | net = layers_lib.max_pool2d(net, [2, 2], scope='pool1') 108 | net = layers_lib.repeat(net, 1, layers.conv2d, 128, [3, 3], scope='conv2') 109 | net = layers_lib.max_pool2d(net, [2, 2], scope='pool2') 110 | net = layers_lib.repeat(net, 2, layers.conv2d, 256, [3, 3], scope='conv3') 111 | net = layers_lib.max_pool2d(net, [2, 2], scope='pool3') 112 | net = layers_lib.repeat(net, 2, layers.conv2d, 512, [3, 3], scope='conv4') 113 | net = layers_lib.max_pool2d(net, [2, 2], scope='pool4') 114 | net = layers_lib.repeat(net, 2, layers.conv2d, 512, [3, 3], scope='conv5') 115 | net = layers_lib.max_pool2d(net, [2, 2], scope='pool5') 116 | # Use conv2d instead of fully_connected layers. 117 | net = layers.conv2d(net, 4096, [7, 7], padding='VALID', scope='fc6') 118 | net = layers_lib.dropout( 119 | net, dropout_keep_prob, is_training=is_training, scope='dropout6') 120 | net = layers.conv2d(net, 4096, [1, 1], scope='fc7') 121 | net = layers_lib.dropout( 122 | net, dropout_keep_prob, is_training=is_training, scope='dropout7') 123 | net = layers.conv2d( 124 | net, 125 | num_classes, [1, 1], 126 | activation_fn=None, 127 | normalizer_fn=None, 128 | scope='fc8') 129 | # Convert end_points_collection into a end_point dict. 130 | end_points = utils.convert_collection_to_dict(end_points_collection) 131 | if spatial_squeeze: 132 | net = array_ops.squeeze(net, [1, 2], name='fc8/squeezed') 133 | end_points[sc.name + '/fc8'] = net 134 | return net, end_points 135 | 136 | 137 | vgg_a.default_image_size = 224 138 | 139 | 140 | def vgg_16(inputs, 141 | num_classes=1000, 142 | is_training=True, 143 | dropout_keep_prob=0.5, 144 | spatial_squeeze=True, 145 | scope='vgg_16'): 146 | """Oxford Net VGG 16-Layers version D Example. 147 | 148 | Note: All the fully_connected layers have been transformed to conv2d layers. 149 | To use in classification mode, resize input to 224x224. 150 | 151 | Args: 152 | inputs: a tensor of size [batch_size, height, width, channels]. 153 | num_classes: number of predicted classes. 154 | is_training: whether or not the model is being trained. 155 | dropout_keep_prob: the probability that activations are kept in the dropout 156 | layers during training. 157 | spatial_squeeze: whether or not should squeeze the spatial dimensions of the 158 | outputs. Useful to remove unnecessary dimensions for classification. 159 | scope: Optional scope for the variables. 160 | 161 | Returns: 162 | the last op containing the log predictions and end_points dict. 163 | """ 164 | with variable_scope.variable_scope(scope, 'vgg_16', [inputs]) as sc: 165 | end_points_collection = sc.original_name_scope + '_end_points' 166 | # Collect outputs for conv2d, fully_connected and max_pool2d. 167 | with arg_scope( 168 | [layers.conv2d, layers_lib.fully_connected, layers_lib.max_pool2d], 169 | outputs_collections=end_points_collection): 170 | net = layers_lib.repeat( 171 | inputs, 2, layers.conv2d, 64, [3, 3], scope='conv1') 172 | net = layers_lib.max_pool2d(net, [2, 2], scope='pool1') 173 | net = layers_lib.repeat(net, 2, layers.conv2d, 128, [3, 3], scope='conv2') 174 | net = layers_lib.max_pool2d(net, [2, 2], scope='pool2') 175 | net = layers_lib.repeat(net, 3, layers.conv2d, 256, [3, 3], scope='conv3') 176 | net = layers_lib.max_pool2d(net, [2, 2], scope='pool3') 177 | net = layers_lib.repeat(net, 3, layers.conv2d, 512, [3, 3], scope='conv4') 178 | net = layers_lib.max_pool2d(net, [2, 2], scope='pool4') 179 | net = layers_lib.repeat(net, 3, layers.conv2d, 512, [3, 3], scope='conv5') 180 | conv5 = tf.reduce_max(net, axis=(1,2)) 181 | net = layers_lib.max_pool2d(net, [2, 2], scope='pool5') 182 | pool5 = tf.reduce_max(net, axis=(1,2)) 183 | # Use conv2d instead of fully_connected layers. 184 | net = layers.conv2d(net, 4096, [7, 7], padding='VALID', scope='fc6') 185 | fc6 = tf.reduce_max(net, axis=(1,2)) 186 | net = layers_lib.dropout( 187 | net, dropout_keep_prob, is_training=is_training, scope='dropout6') 188 | net = layers.conv2d(net, 4096, [1, 1], scope='fc7') 189 | fc7 = tf.reduce_max(net, axis=(1,2)) 190 | net = layers_lib.dropout( 191 | net, dropout_keep_prob, is_training=is_training, scope='dropout7') 192 | net = tf.reduce_max(net, axis=(1,2), keep_dims=True) 193 | net = layers.conv2d( 194 | net, 195 | num_classes, [1, 1], 196 | activation_fn=None, 197 | normalizer_fn=None, 198 | scope='fc8') 199 | # Convert end_points_collection into a end_point dict. 200 | end_points = utils.convert_collection_to_dict(end_points_collection) 201 | end_points['conv5'] = conv5 202 | end_points['pool5'] = pool5 203 | end_points['fc6'] = fc6 204 | end_points['fc7'] = fc7 205 | if spatial_squeeze: 206 | net = array_ops.squeeze(net, [1, 2], name='fc8/squeezed') 207 | end_points[sc.name + '/fc8'] = net 208 | return net, end_points 209 | 210 | 211 | vgg_16.default_image_size = 224 212 | 213 | 214 | def vgg_19(inputs, 215 | num_classes=1000, 216 | is_training=True, 217 | dropout_keep_prob=0.5, 218 | spatial_squeeze=True, 219 | scope='vgg_19'): 220 | """Oxford Net VGG 19-Layers version E Example. 221 | 222 | Note: All the fully_connected layers have been transformed to conv2d layers. 223 | To use in classification mode, resize input to 224x224. 224 | 225 | Args: 226 | inputs: a tensor of size [batch_size, height, width, channels]. 227 | num_classes: number of predicted classes. 228 | is_training: whether or not the model is being trained. 229 | dropout_keep_prob: the probability that activations are kept in the dropout 230 | layers during training. 231 | spatial_squeeze: whether or not should squeeze the spatial dimensions of the 232 | outputs. Useful to remove unnecessary dimensions for classification. 233 | scope: Optional scope for the variables. 234 | 235 | Returns: 236 | the last op containing the log predictions and end_points dict. 237 | """ 238 | with variable_scope.variable_scope(scope, 'vgg_19', [inputs]) as sc: 239 | end_points_collection = sc.name + '_end_points' 240 | # Collect outputs for conv2d, fully_connected and max_pool2d. 241 | with arg_scope( 242 | [layers.conv2d, layers_lib.fully_connected, layers_lib.max_pool2d], 243 | outputs_collections=end_points_collection): 244 | net = layers_lib.repeat( 245 | inputs, 2, layers.conv2d, 64, [3, 3], scope='conv1') 246 | net = layers_lib.max_pool2d(net, [2, 2], scope='pool1') 247 | net = layers_lib.repeat(net, 2, layers.conv2d, 128, [3, 3], scope='conv2') 248 | net = layers_lib.max_pool2d(net, [2, 2], scope='pool2') 249 | net = layers_lib.repeat(net, 4, layers.conv2d, 256, [3, 3], scope='conv3') 250 | net = layers_lib.max_pool2d(net, [2, 2], scope='pool3') 251 | net = layers_lib.repeat(net, 4, layers.conv2d, 512, [3, 3], scope='conv4') 252 | net = layers_lib.max_pool2d(net, [2, 2], scope='pool4') 253 | net = layers_lib.repeat(net, 4, layers.conv2d, 512, [3, 3], scope='conv5') 254 | net = layers_lib.max_pool2d(net, [2, 2], scope='pool5') 255 | # Use conv2d instead of fully_connected layers. 256 | net = layers.conv2d(net, 4096, [7, 7], padding='VALID', scope='fc6') 257 | net = layers_lib.dropout( 258 | net, dropout_keep_prob, is_training=is_training, scope='dropout6') 259 | net = layers.conv2d(net, 4096, [1, 1], scope='fc7') 260 | net = layers_lib.dropout( 261 | net, dropout_keep_prob, is_training=is_training, scope='dropout7') 262 | net = layers.conv2d( 263 | net, 264 | num_classes, [1, 1], 265 | activation_fn=None, 266 | normalizer_fn=None, 267 | scope='fc8') 268 | # Convert end_points_collection into a end_point dict. 269 | end_points = utils.convert_collection_to_dict(end_points_collection) 270 | if spatial_squeeze: 271 | net = array_ops.squeeze(net, [1, 2], name='fc8/squeezed') 272 | end_points[sc.name + '/fc8'] = net 273 | return net, end_points 274 | 275 | 276 | vgg_19.default_image_size = 224 277 | 278 | # Alias 279 | vgg_d = vgg_16 280 | vgg_e = vgg_19 281 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Image-Retrieval 2 | ## 1. Baseline(Performance: [Oxford5k](http://www.robots.ox.ac.uk/~vgg/data/oxbuildings/)) 3 | ### 1.1 VGG16([TensorFlow Pre-trained Model](http://download.tensorflow.org/models/vgg_16_2016_08_28.tar.gz)) 4 | POOL5: 58.21 - FC6: 61.03 - FC7: 55.54 5 | ### 1.2 VGG16 fine tuned on dataset [CNN Image Retrieval Learns from BoW: Unsupervised Fine-Tuning with Hard Examples](http://cmp.felk.cvut.cz/~radenfil/projects/siamac.html) 6 | POOL5: 0.679571 - FC6: 0.643479 - FC7: 0.569642 7 | --------------------------------------------------------------------------------