├── README.md ├── keras ├── funcs.py ├── lenet5.py └── utils.py └── tf ├── img_trans.py ├── lenet5_model.py ├── lenet_train.py ├── retrieval.py └── retrieval_.py /README.md: -------------------------------------------------------------------------------- 1 | # image-retrieval 2 | Deep Learning for content-based image retrieval with TensorFlow and Keras 3 | 4 | 5 | ## tensorflow version 6 | 7 | **usage: python retrieval.py -h** 8 | 9 | [img_trans](tf/img_trans.py) : 10 | 11 | skimage and keras are required in this file. The following transformations are included: 12 | 13 | - adjust brightness 14 | - shit, rotate, flip, zoom 15 | - dilation, erosion 16 | - add oblique line 17 | - add salt noise 18 | 19 | [lenet5_model](tf/lenet5_model.py) : 20 | 21 | LeNet-5 is a classical CNN model proposed by Yann LeCun. 22 | 23 | See [Gradient-based learning applied to document recognition](https://ieeexplore.ieee.org/abstract/document/726791/) for more details. 24 | 25 | ReLu is used as the activate function for convlayer and fclayer. Drop-out layer is added before softmax in this implementation. 26 | 27 | [lenet_train](tf/lenet_train.py) : 28 | 29 | Train lenet-5 on MNIST. 30 | 31 | [retrieval](tf/retrieval.py) : 32 | 33 | Extract features of query image and all retrieval images from fully-connected layer. 34 | 35 | Perform feature similarity computation for retrieval task. Cosine similarity is adopted in this implementation. 36 | 37 | [retrieval_](tf/retrieval_.py) : 38 | 39 | Example of using images stored on disk as tf inputs. 40 | 41 | 42 | ## keras version 43 | 44 | [utils](keras/utils.py) : Image preprocessing. 45 | 46 | [funcs](keras/funcs.py) : Image retrieval function. 47 | 48 | [lenet5](keras/lenet5.py) : Implementation of Lenet-5 with keras. 49 | -------------------------------------------------------------------------------- /keras/funcs.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | import numpy as np 3 | import pandas as pd 4 | from keras.models import Model 5 | from collections import defaultdict 6 | 7 | 8 | def get_outputs(input_data, model): 9 | layer_names = [layer.name for layer in model.layers if 10 | 'flatten' not in layer.name and 'input' not in layer.name 11 | and 'predictions' not in layer.name] 12 | intermediate_layer_model = Model(inputs=model.input, 13 | outputs=[model.get_layer(layer_name).output for layer_name in layer_names]) 14 | layer_outputs = intermediate_layer_model.predict(input_data) 15 | return layer_outputs, layer_names 16 | 17 | 18 | def retrieval(query_data, test_data, query_label, test_labels, model): 19 | ''' 20 | retrieval task 21 | :param query_data: query input 22 | :param test_data: test data set 23 | :param query_label: predicted label of query input 24 | :param test_labels: test label 25 | :param model: investigated model 26 | :return: 27 | df: dataframe 28 | f1: F1 measure 29 | ''' 30 | related = 0 31 | retrieved_related = 0 32 | retrieved = 0 33 | img_index = [] 34 | img_label = [] 35 | similarity = [] 36 | query_outputs = get_outputs(query_data, model)[0] 37 | query_feat = query_outputs[5][0] 38 | 39 | layer_outputs = get_outputs(test_data, model)[0] 40 | layer_output = layer_outputs[5] 41 | 42 | for i in xrange(10000): 43 | test_feat = layer_output[i] 44 | test_label = np.argmax(test_labels[i]) 45 | if query_label == test_label: 46 | related += 1 47 | # use cosine distance as similarity metric 48 | cos = np.dot(query_feat, test_feat) / (np.linalg.norm(query_feat)*np.linalg.norm(test_feat)) 49 | sim = 0.5 + 0.5*cos 50 | if sim >= 0.85: 51 | retrieved += 1 52 | img_index.append(i) 53 | similarity.append(sim) 54 | img_label.append(test_label) 55 | if query_label == test_label: 56 | retrieved_related += 1 57 | 58 | # evaluation metric 59 | recall = retrieved_related / related 60 | precision = retrieved_related / retrieved 61 | f1 = recall * precision * 2 / (recall + precision) 62 | 63 | df = pd.DataFrame({'retrieved_index': img_index, 'similarity': similarity, 'label': img_label}) 64 | df = df.sort_values('similarity', ascending=True) 65 | return df, f1 66 | 67 | -------------------------------------------------------------------------------- /keras/lenet5.py: -------------------------------------------------------------------------------- 1 | ''' 2 | LeNet-5 3 | ''' 4 | 5 | 6 | from __future__ import print_function 7 | 8 | from keras.datasets import mnist 9 | from keras.layers import Convolution2D, MaxPooling2D, Input, Dense, Activation, Flatten 10 | from keras.models import Model 11 | from keras.utils import to_categorical 12 | 13 | from configs import bcolors 14 | 15 | 16 | def lenet5(input_tensor=None, train=False): 17 | nb_classes = 10 18 | # convolution kernel size 19 | kernel_size = (5, 5) 20 | 21 | if train: 22 | batch_size = 256 23 | nb_epoch = 10 24 | 25 | # input image dimensions 26 | img_rows, img_cols = 28, 28 27 | 28 | # the data, shuffled and split between train and test sets 29 | (x_train, y_train), (x_test, y_test) = mnist.load_data() 30 | 31 | x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1) 32 | x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1) 33 | input_shape = (img_rows, img_cols, 1) 34 | 35 | x_train = x_train.astype('float32') 36 | x_test = x_test.astype('float32') 37 | x_train /= 255 38 | x_test /= 255 39 | 40 | # convert class vectors to binary class matrices 41 | y_train = to_categorical(y_train, nb_classes) 42 | y_test = to_categorical(y_test, nb_classes) 43 | 44 | input_tensor = Input(shape=input_shape) 45 | elif input_tensor is None: 46 | print(bcolors.FAIL + 'you have to proved input_tensor when testing') 47 | exit() 48 | 49 | # block1 50 | x = Convolution2D(6, kernel_size, activation='relu', padding='same', name='block1_conv1')(input_tensor) 51 | x = MaxPooling2D(pool_size=(2, 2), name='block1_pool1')(x) 52 | 53 | # block2 54 | x = Convolution2D(16, kernel_size, activation='relu', padding='same', name='block2_conv1')(x) 55 | x = MaxPooling2D(pool_size=(2, 2), name='block2_pool1')(x) 56 | 57 | x = Flatten(name='flatten')(x) 58 | x = Dense(120, activation='relu', name='fc1')(x) 59 | x = Dense(84, activation='relu', name='fc2')(x) 60 | x = Dense(nb_classes, name='before_softmax')(x) 61 | x = Activation('softmax', name='predictions')(x) 62 | 63 | model = Model(input_tensor, x) 64 | 65 | if train: 66 | # compiling 67 | model.compile(loss='categorical_crossentropy', optimizer='adadelta', metrics=['accuracy']) 68 | 69 | # trainig 70 | model.fit(x_train, y_train, validation_data=(x_test, y_test), batch_size=batch_size, epochs=nb_epoch, verbose=1) 71 | # save model 72 | model.save_weights('./lenet5.h5') 73 | score = model.evaluate(x_test, y_test, verbose=0) 74 | print('\n') 75 | print('Overall Test score:', score[0]) 76 | print('Overall Test accuracy:', score[1]) 77 | else: 78 | model.load_weights('./lenet5.h5') 79 | print(bcolors.OKBLUE + 'Model loaded' + bcolors.ENDC) 80 | 81 | return model 82 | 83 | 84 | if __name__ == '__main__': 85 | lenet5(train=True) 86 | -------------------------------------------------------------------------------- /keras/utils.py: -------------------------------------------------------------------------------- 1 | ''' 2 | image transformation 3 | ''' 4 | 5 | import numpy as np 6 | import random 7 | from skimage import draw 8 | from skimage import morphology as mp 9 | from skimage import exposure as ep 10 | from keras.preprocessing import image 11 | 12 | 13 | def deprocess_img(x): 14 | # convert tensor to a valid image 15 | x *= 255 16 | x = np.clip(x, 0, 255).astype('uint8') 17 | return x.reshape(x.shape[1], x.shape[2]) 18 | 19 | 20 | def light(img): 21 | # adjust_gamma(x, gamma) 22 | # gamma > 1 --- darker 23 | # gamma < 1 --- brighter 24 | img = img.astype(np.float32) 25 | return ep.adjust_gamma(img, random.uniform(0.7, 1.05)) 26 | 27 | 28 | def shift(img): 29 | # random_shift(x, wrg, hrg, row_axis=1, col_axis=2, channel_axis=0, fill_mode='nearest', cval=0.0) 30 | img = img.reshape(1, 28, 28) 31 | wrg = 0.2 32 | hrg = 0.2 33 | return image.random_shift(img, wrg, hrg).reshape(28, 28) 34 | 35 | 36 | def rotate(img): 37 | # random_rotation(x, rg, row_axis=1, col_axis=2, channel_axis=0, fill_mode='nearest', cval=0.0) 38 | img = img.reshape(1, 28, 28) 39 | rg = 30 40 | return image.random_rotation(img, rg).reshape(28, 28) 41 | 42 | 43 | def flip(img): 44 | # flip_axis(x, axis) 45 | # horizontal 46 | return image.flip_axis(img, 1) 47 | 48 | 49 | def zoom(img): 50 | # random_zoom(x, zoom_range, row_axis=1, col_axis=2, channel_axis=0, fill_mode='nearest', cval=0.0) 51 | img = img.reshape(1, 28, 28) 52 | zoom_range = (0.6, 1.3) 53 | return image.random_zoom(img, zoom_range).reshape(28, 28) 54 | 55 | 56 | def dilation(img): 57 | # return greyscale morphological dilation of an image 58 | return mp.dilation(img, mp.square(2, dtype=np.uint8)) 59 | 60 | 61 | def erosion(img): 62 | # return greyscale morphological erosion of an image 63 | return mp.erosion(img, mp.square(2, dtype=np.uint8)) 64 | 65 | 66 | def draw_line(img): 67 | img = img.reshape(28, 28) 68 | # draw a straight line across the digit 69 | r0 = random.randint(1, 27) 70 | c0 = random.randint(1, 4) 71 | r1 = random.randint(1, 27) 72 | c1 = random.randint(24, 27) 73 | rr, cc = draw.line(r0, c0, r1, c1) 74 | img[rr, cc] = 255 75 | return img 76 | 77 | 78 | def s_p_noise(img): 79 | # salt and pepper noise 80 | img = img.reshape(28, 28) 81 | rows, cols = img.shape 82 | for i in range(50): 83 | x = np.random.randint(0, rows) 84 | y = np.random.randint(0, cols) 85 | img[x, y] = 255 86 | x_ = np.random.randint(4, 24) 87 | y_ = np.random.randint(4, 24) 88 | img[x_, y_] = 0 89 | return img 90 | 91 | 92 | 93 | 94 | -------------------------------------------------------------------------------- /tf/img_trans.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import random 3 | from skimage import draw 4 | from skimage import morphology as mp 5 | from skimage import exposure as ep 6 | from keras.preprocessing import image 7 | 8 | 9 | def light(img): 10 | # adjust_gamma(x, gamma) 11 | # gamma > 1 --- darker 12 | # gamma < 1 --- brighter 13 | return ep.adjust_gamma(img, random.uniform(0.8, 1.05)) 14 | 15 | 16 | def shift(img): 17 | # random_shift(x, wrg, hrg, row_axis=1, col_axis=2, channel_axis=0, fill_mode='nearest', cval=0.0) 18 | img = img.reshape(1, 28, 28) 19 | wrg = 0.2 20 | hrg = 0.2 21 | return image.random_shift(img, wrg, hrg).reshape(28, 28) 22 | 23 | 24 | def rotate(img): 25 | # random_rotation(x, rg, row_axis=1, col_axis=2, channel_axis=0, fill_mode='nearest', cval=0.0) 26 | img = img.reshape(1, 28, 28) 27 | rg = 30 28 | return image.random_rotation(img, rg).reshape(28, 28) 29 | 30 | 31 | def flip(img): 32 | # flip_axis(x, axis) 33 | # horizontal 34 | return image.flip_axis(img, 1) 35 | 36 | 37 | def zoom(img): 38 | # random_zoom(x, zoom_range, row_axis=1, col_axis=2, channel_axis=0, fill_mode='nearest', cval=0.0) 39 | img = img.reshape(1, 28, 28) 40 | zoom_range = (0.6, 1.3) 41 | return image.random_zoom(img, zoom_range).reshape(28, 28) 42 | 43 | 44 | def dilation(img): 45 | # return greyscale morphological dilation of an image 46 | return mp.dilation(img, mp.square(2, dtype=np.uint64)) 47 | 48 | 49 | def erosion(img): 50 | # return greyscale morphological erosion of an image 51 | return mp.erosion(img, mp.square(2, dtype=np.uint64)) 52 | 53 | 54 | def draw_line(img): 55 | # draw a straight line across the digit 56 | r0 = random.randint(0, 28) 57 | c0 = random.randint(0, 4) 58 | r1 = random.randint(0, 28) 59 | c1 = random.randint(24, 28) 60 | rr, cc = draw.line(r0, c0, r1, c1) 61 | img[rr, cc] = 255 62 | return img 63 | 64 | 65 | def s_p_noise(img): 66 | # salt and pepper noise 67 | rows, cols = img.shape 68 | for i in range(50): 69 | x = np.random.randint(0, rows) 70 | y = np.random.randint(0, cols) 71 | img[x, y] = 255 72 | return img 73 | -------------------------------------------------------------------------------- /tf/lenet5_model.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | # import numpy as np 3 | 4 | 5 | def weight_init(shape): 6 | initial = tf.truncated_normal(shape, stddev=0.1) 7 | return tf.Variable(initial) 8 | 9 | 10 | def bias_init(shape): 11 | initial = tf.constant(0.1, shape=shape) 12 | return tf.Variable(initial) 13 | 14 | 15 | def conv2d(x, W): 16 | """ 17 | convolution operation 18 | 19 | Args: 20 | x: image input. 4-d tensor of [batch, height, weight, channel] 21 | W: filter. 4-d tensor of [height,weight,num_in,num_out] 22 | """ 23 | return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='VALID') 24 | 25 | 26 | def max_pool(x): 27 | """ 28 | max_pooling operation 29 | 30 | Args: 31 | x: image input. 4-d tensor 32 | """ 33 | return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='VALID') 34 | 35 | 36 | def conv_layer(x, weight, bias): 37 | """ 38 | convolutional layer 39 | 40 | Args: 41 | x: input layer 42 | weight: the weight of filter 43 | bias: bias 44 | 45 | Return: 46 | h_pool: the feture map after convolution and max pooling 47 | """ 48 | W_conv = weight_init(weight) 49 | b_conv = bias_init(bias) 50 | 51 | h_conv = tf.nn.relu(conv2d(x, W_conv) + b_conv) 52 | h_pool = max_pool(h_conv) 53 | 54 | return h_pool 55 | 56 | 57 | def fc_layer(x, weight, bias): 58 | """ 59 | fully-connected layer 60 | 61 | Args: 62 | x: input layer 63 | weight: weight of fully-connected layer 64 | bias: bias of fully-connected layer 65 | 66 | Return: 67 | h_fc: output of the layer 68 | """ 69 | W_fc = weight_init(weight) 70 | b_fc = weight_init(bias) 71 | 72 | h_fc = tf.nn.relu(tf.matmul(x, W_fc) + b_fc) 73 | 74 | return h_fc 75 | 76 | 77 | def model(x, keep_prob): 78 | """ 79 | construt the LeNet-5 layers 80 | 81 | Args: 82 | x: input 83 | keep_prob: drop out rate 84 | 85 | Return: 86 | l_: probability 87 | """ 88 | 89 | # input layer 90 | # padding the input image to 32*32 91 | x_image = tf.pad(tf.reshape(x, [-1, 28, 28, 1]), [[0, 0], [2, 2], [2, 2], [0, 0]]) 92 | 93 | # Layer 1 94 | h1 = conv_layer(x_image, [5, 5, 1, 6], [6]) 95 | 96 | # Layer 2 97 | h2 = conv_layer(h1, [5, 5, 6, 16], [16]) 98 | 99 | # Layer 3 100 | # without pooling 101 | weight = weight_init([5, 5, 16, 120]) 102 | bias = bias_init([120]) 103 | h3 = tf.nn.relu(conv2d(h2, weight) + bias) 104 | h3_flat = tf.reshape(h3, [-1, 120]) 105 | 106 | # Layer 4 107 | # fully-connected layer 108 | h4 = fc_layer(h3_flat, [120, 84], [84]) 109 | 110 | # Layer 5 111 | # output layer,fully-connected 112 | # l_ : probability vector 113 | h4_drop = tf.nn.dropout(h4, keep_prob) 114 | weight = weight_init([84, 10]) 115 | bias = bias_init([10]) 116 | l_ = tf.nn.softmax(tf.matmul(h4_drop, weight) + bias) 117 | 118 | return l_ 119 | 120 | 121 | -------------------------------------------------------------------------------- /tf/lenet_train.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from tensorflow.examples.tutorials.mnist import input_data 3 | import lenet5_model 4 | 5 | mnist = input_data.read_data_sets("MNIST_data/", one_hot=True) 6 | 7 | 8 | def train(): 9 | """ 10 | train the lenet model on MNIST 11 | """ 12 | 13 | sess = tf.InteractiveSession() 14 | 15 | x = tf.placeholder(tf.float32, shape=[None, 784]) 16 | label = tf.placeholder(tf.float32, shape=[None, 10]) 17 | keep_prob = tf.placeholder(tf.float32) 18 | 19 | l_ = lenet5_model.model(x, keep_prob) 20 | cross_entropy = -tf.reduce_sum(label * tf.log(l_)) 21 | # minimize the cross entropy with lr=0.0001 22 | train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy) 23 | correct_predict = tf.equal(tf.argmax(l_, 1), tf.argmax(label, 1)) 24 | accuracy = tf.reduce_mean(tf.cast(correct_predict, tf.float32)) 25 | 26 | # create summary operations in tensorboard 27 | tf.summary.scalar('loss', cross_entropy) 28 | tf.summary.scalar('accuracy', accuracy) 29 | 30 | sess.run(tf.global_variables_initializer()) 31 | 32 | # merge all the summary nodes 33 | merged = tf.summary.merge_all() 34 | # write data to local file 35 | summary_writer = tf.summary.FileWriter('./mnistEnv/', graph=sess.graph) 36 | 37 | for i in range(20000): 38 | batch = mnist.train.next_batch(50) 39 | if i % 100 == 0: 40 | # print the log every 100 steps 41 | train_accuracy = accuracy.eval( 42 | feed_dict={ 43 | x: batch[0], 44 | label: batch[1], 45 | keep_prob: 1.0 46 | }) 47 | print("step %d, training accuracy %.4f" % (i, train_accuracy)) 48 | train_step.run(feed_dict={ 49 | x: batch[0], 50 | label: batch[1], 51 | keep_prob: 0.5 52 | }) 53 | 54 | # run the merged node 55 | # my tensorboard at http:// zhang:6006 56 | summary = sess.run(merged, feed_dict={x: batch[0], label: batch[1], keep_prob: 1.0}) 57 | summary_writer.add_summary(summary, i) 58 | 59 | # save the model 60 | saver = tf.train.Saver() 61 | save_path = saver.save(sess, "/home/zzy/py/lenet5/lenet5.ckpt") 62 | print("Model Saved in File: ", save_path) 63 | 64 | sess.close() 65 | 66 | 67 | if __name__ == '__main__': 68 | train() 69 | -------------------------------------------------------------------------------- /tf/retrieval.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | import tensorflow as tf 3 | import numpy as np 4 | import pandas as pd 5 | import argparse 6 | import cv2 7 | from img_trans import * 8 | from tensorflow.examples.tutorials.mnist import input_data 9 | 10 | mnist = input_data.read_data_sets("MNIST_data/", one_hot=True) 11 | 12 | # read the parameter 13 | # argument parsing 14 | parser = argparse.ArgumentParser(description='Main function for follow-up input generation and nc calculation') 15 | parser.add_argument('query_batch_size', help="batch size of query images", type=int, default=1) 16 | parser.add_argument('trans', help="tranformation type", 17 | choices=['light', 'shift', 'rotate', 'zoom', 18 | 's_p_noise', 'dilation', 'erosion', 'draw_line', 19 | ]) 20 | 21 | args = parser.parse_args() 22 | 23 | def retrieval(): 24 | """ 25 | restore lenet model to conduct retrieval task 26 | """ 27 | sess = tf.InteractiveSession() 28 | 29 | 30 | all_related = 0 # number of all related images 31 | retrieved_related = 0 # number of related images in retrieved ones 32 | retrieved = 0 # number of retrieved image 33 | 34 | 35 | # load meta graph 36 | saver = tf.train.import_meta_graph('./lenet5.ckpt.meta') 37 | saver.restore(sess, tf.train.latest_checkpoint('./')) 38 | print("Model Restored") 39 | 40 | 41 | # get tensors from graph 42 | graph = tf.get_default_graph() 43 | h4 = graph.get_tensor_by_name('Relu_3:0') 44 | x = graph.get_tensor_by_name('Placeholder:0') 45 | label = graph.get_tensor_by_name('Placeholder_1:0') 46 | keep_prob = graph.get_tensor_by_name('Placeholder_2:0') 47 | 48 | query_batch_size = args.query_batch_size 49 | batch = mnist.test.next_batch(query_batch_size) 50 | b = h4.eval(feed_dict={x: batch[0], label: batch[1], keep_prob: 1.0}) # feature extracted from fc layer 51 | test_digit = tf.argmax(batch[1], 1).eval() 52 | 53 | precision = [] 54 | recall = [] 55 | 56 | for i in range(query_batch_size): 57 | # restore the test image to 28*28 58 | b1 = tf.reshape(batch[0], [query_batch_size, 28, 28]).eval() 59 | b2 = 255 * b1 60 | b3 = b2.astype(np.uint64) 61 | cv2.imwrite('./pics/source'+str(i)+'.jpg', b3[i]) 62 | 63 | # =====================generate new image============================= 64 | #img = b3[i] 65 | #category = test_digit[i] 66 | #gen_image(i, img, args.trans, category) 67 | #print '%s finished' % str(trans) 68 | 69 | 70 | # ===========================retrieval task================================= 71 | 72 | # here I put in all test images for one batch 73 | batch_ = mnist.test.next_batch(10000) 74 | 75 | # extract the feature of fully-connected layer 76 | feat = h4.eval(feed_dict={x: batch_[0], label: batch_[1], keep_prob: 1.0}) 77 | pred_digit = tf.argmax(batch_[1], 1).eval() 78 | 79 | # restore the image to 28*28 80 | feat1 = tf.reshape(batch_[0], [10000, 28, 28]).eval() 81 | feat2 = 255 * feat1 82 | feat3 = feat2.astype(np.uint64) 83 | 84 | index = [] # index of retrieved images 85 | distance = [] # distance between the feature vectors of test image and retrieved image 86 | 87 | for j in range(10000): 88 | if pred_digit[j] == test_digit[i]: 89 | all_related += 1 90 | 91 | for k in range(10000): 92 | # use cosine distance as similarity metric 93 | cos = np.dot(feat[k], b[i])/(np.linalg.norm(feat[k])*np.linalg.norm(b[i])) 94 | # regularize 95 | sim = 0.5+0.5*cos 96 | if sim >= 0.85: # set a threshold 97 | index.append(k) 98 | similarity.append(sim) 99 | # write the retrieved images 100 | # cv2.imwrite('./pics/retrieved/'+str(k)+'.jpg', feat3[k]) 101 | retrieved += 1 102 | if pred_digit[k] == test_digit[i]: 103 | retrieved_related += 1 104 | 105 | ''' 106 | def gen_image(i, img, trans, category): 107 | """ 108 | # generate follow-up image 109 | :param i: image id 110 | :param img: source query image 111 | :param mr: image transformation type 112 | :param category: category of the image 113 | """ 114 | img_source = img.copy() 115 | img_new = eval(trans)(img_source) 116 | path_follow = './pics/' + str(mr) + '/' + str(category) 117 | if not (os.path.exists(path_follow)): 118 | os.makedirs(path_follow) 119 | cv2.imwrite(path_follow + '/' + str(i) + '.jpg', img_new) 120 | ''' 121 | 122 | if __name__ == '__main__': 123 | retrieval() 124 | -------------------------------------------------------------------------------- /tf/retrieval_.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | import cv2 3 | import os 4 | import argparse 5 | import tensorflow as tf 6 | import pandas as pd 7 | import numpy as np 8 | 9 | from tensorflow.examples.tutorials.mnist import input_data 10 | mnist = input_data.read_data_sets("MNIST_data/", one_hot=True) 11 | 12 | # restore lenet model to conduct retrieval task 13 | sess = tf.InteractiveSession() 14 | # load meta graph 15 | saver = tf.train.import_meta_graph('./lenet5.ckpt.meta') 16 | saver.restore(sess, tf.train.latest_checkpoint('./')) 17 | print "Model Restored" 18 | 19 | # get tensors from graph 20 | graph = tf.get_default_graph() 21 | # fclayer 1 22 | f6 = graph.get_tensor_by_name('Relu_3:0') 23 | 24 | x = graph.get_tensor_by_name('Placeholder:0') 25 | label = graph.get_tensor_by_name('Placeholder_1:0') 26 | keep_prob = graph.get_tensor_by_name('Placeholder_2:0') 27 | 28 | 29 | # load your own local pics 30 | def read_from_disk(): 31 | """ 32 | read file names and labels 33 | :return: 34 | file_list: file name list 35 | label_list: label list 36 | """ 37 | 38 | file_list = [] 39 | label_list = [] 40 | name_list = [] 41 | classes = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9'] 42 | 43 | for class_item in classes: 44 | dir_name = './pics/'+class_item 45 | for files in os.listdir(dir_name): 46 | file_list.append(dir_name+'/'+files) 47 | label_list.append(classes.index(class_item)) 48 | name_list.append(files) 49 | return file_list, label_list, name_list 50 | 51 | 52 | def image_init(img_path): 53 | """ 54 | image preprocessing 55 | :param img_path: the path of query image 56 | :return: image tensor 57 | """ 58 | im = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE).astype(np.float32) 59 | x_img = im/float(255) 60 | x_img = np.reshape(x_img, [-1, 784]) 61 | return x_img 62 | 63 | 64 | def label_init(y): 65 | """ 66 | transfer label to one-hot representation 67 | :param y: label list 68 | :return: one-hot tensor 69 | """ 70 | batch_size = tf.size(y) 71 | label_list = tf.expand_dims(y, 1) 72 | indices = tf.expand_dims(tf.range(0, batch_size, 1), 1) 73 | concated = tf.concat([indices, label_list], 1) 74 | label_list = tf.sparse_to_dense(concated, tf.stack([batch_size, 10]), 1.0, 0.0) 75 | return label_list 76 | 77 | 78 | def retrieval(): 79 | 80 | """ 81 | retrieval task on MNIST 82 | """ 83 | 84 | all_related = 0 85 | retrieved_related = 0 86 | retrieved = 0 87 | precision = [] 88 | recall = [] 89 | neuron_coverage = [] 90 | indexes = [] 91 | 92 | file_list, label_list, name_list = read_from_disk() 93 | label_list = label_init(label_list) 94 | 95 | # here I put in all test images for one batch 96 | batch_ = mnist.test.next_batch(10000, shuffle=False) 97 | # extract the feature of fully-connected layer 98 | feat = f6.eval(feed_dict={x: batch_[0], label: batch_[1], keep_prob: 1.0}) 99 | # label of retrieved image 100 | pred_digit = tf.argmax(batch_[1], 1).eval() 101 | 102 | for i in range(0, 1000): 103 | 104 | # read query image 105 | img_path = file_list[i] 106 | x_img = image_init(img_path) 107 | img_name = name_list[i] 108 | img_index = int(img_name.split('.')[0]) 109 | indexes.append(img_index) 110 | 111 | # read label 112 | labels = sess.run(label_list) 113 | y_label = labels[i] 114 | y_label = np.reshape(y_label, [-1, 10]) 115 | 116 | query_feat = f6.eval(feed_dict={x: x_img, label: y_label, keep_prob: 1.0}) 117 | query_digit = tf.argmax(y_label, 1).eval() 118 | 119 | 120 | # ===========================retrieval task================================= 121 | 122 | index = [] 123 | similarity = [] 124 | pred_label = [] 125 | 126 | for j in xrange(10000): 127 | if pred_digit[j] == query_digit: 128 | all_related += 1 129 | 130 | for m in xrange(10000): 131 | cos = np.dot(feat[m], query_feat[0])/(np.linalg.norm(feat[m])*np.linalg.norm(query_feat[0])) 132 | sim = 0.5+0.5*cos 133 | if sim >= 0.85: # set a thresholds 134 | index.append(m) 135 | similarity.append(sim) 136 | pred_label.append(pred_digit[m]) 137 | retrieved += 1 138 | if pred_digit[m] == query_digit: 139 | retrieved_related += 1 140 | 141 | df = pd.DataFrame({'retrieved_image_index': index, 'similarity': similarity, 'label': pred_label}) 142 | 143 | # define the evaluation metric 144 | r = retrieved_related / all_related 145 | p = retrieved_related / retrieved 146 | f = r * p * 2 / (r + p) 147 | precision.append(p) 148 | recall.append(r) 149 | 150 | # write precision and recall 151 | df_measure = pd.DataFrame({'index': indexes, 'precision': precision, 'recall': recall}) 152 | df_measure = df_measure.sort_values('index', ascending=True) 153 | df_measure.to_csv('./' + args.mr + '/' + str(i) + '.csv', index=True) 154 | 155 | 156 | if __name__ == '__main__': 157 | retrieval() 158 | --------------------------------------------------------------------------------