├── README.md ├── autoencoder.h5 ├── computed_data └── .gitkeep ├── requirements.txt ├── test_model.py ├── test_results └── .gitkeep └── train_model.py /README.md: -------------------------------------------------------------------------------- 1 | # Unsupervised image retrieval 2 | 3 | This code is related to this [blog post](https://medium.com/p/867a671b7e65/edit) on using convolutional denoising 4 | autoencoder for content based image retrieval. 5 | 6 | We use [keras]() machine learning python library and [mnist dataset](http://yann.lecun.com/exdb/mnist/). 7 | 8 | -------------------------------------------------------------------------------- /autoencoder.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AdilBaaj/unsupervised-image-retrieval/01cd922a9143bbc7edae4df149f545e8bd57174a/autoencoder.h5 -------------------------------------------------------------------------------- /computed_data/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AdilBaaj/unsupervised-image-retrieval/01cd922a9143bbc7edae4df149f545e8bd57174a/computed_data/.gitkeep -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | bleach==1.5.0 2 | cycler==0.10.0 3 | html5lib==0.9999999 4 | Keras==2.0.8 5 | Markdown==2.6.9 6 | matplotlib==2.0.2 7 | numpy==1.13.1 8 | protobuf==3.4.0 9 | pyparsing==2.2.0 10 | python-dateutil==2.6.1 11 | pytz==2017.2 12 | PyYAML==3.12 13 | scipy==0.19.1 14 | six==1.10.0 15 | tensorflow==1.3.0 16 | tensorflow-tensorboard==0.1.5 17 | Werkzeug==0.12.2 18 | -------------------------------------------------------------------------------- /test_model.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from keras.models import Model 3 | from keras.datasets import mnist 4 | import cv2 5 | from keras.models import load_model 6 | from sklearn.metrics import label_ranking_average_precision_score 7 | import time 8 | 9 | print('Loading mnist dataset') 10 | t0 = time.time() 11 | (x_train, y_train), (x_test, y_test) = mnist.load_data() 12 | x_train = x_train.astype('float32') / 255. 13 | x_test = x_test.astype('float32') / 255. 14 | x_train = np.reshape(x_train, (len(x_train), 28, 28, 1)) # adapt this if using `channels_first` image data format 15 | x_test = np.reshape(x_test, (len(x_test), 28, 28, 1)) # adapt this if using `channels_first` image data format 16 | 17 | noise_factor = 0.5 18 | x_train_noisy = x_train + noise_factor * np.random.normal(loc=0.0, scale=1.0, size=x_train.shape) 19 | x_test_noisy = x_test + noise_factor * np.random.normal(loc=0.0, scale=1.0, size=x_test.shape) 20 | 21 | x_train_noisy = np.clip(x_train_noisy, 0., 1.) 22 | x_test_noisy = np.clip(x_test_noisy, 0., 1.) 23 | t1 = time.time() 24 | print('mnist dataset loaded in: ', t1-t0) 25 | 26 | print('Loading model :') 27 | t0 = time.time() 28 | autoencoder = load_model('autoencoder.h5') 29 | encoder = Model(inputs=autoencoder.input, outputs=autoencoder.get_layer('encoder').output) 30 | t1 = time.time() 31 | print('Model loaded in: ', t1-t0) 32 | 33 | scores = [] 34 | 35 | 36 | def retrieve_closest_elements(test_code, test_label, learned_codes): 37 | distances = [] 38 | for code in learned_codes: 39 | distance = np.linalg.norm(code - test_code) 40 | distances.append(distance) 41 | nb_elements = learned_codes.shape[0] 42 | distances = np.array(distances) 43 | learned_code_index = np.arange(nb_elements) 44 | labels = np.copy(y_train).astype('float32') 45 | labels[labels != test_label] = -1 46 | labels[labels == test_label] = 1 47 | labels[labels == -1] = 0 48 | distance_with_labels = np.stack((distances, labels, learned_code_index), axis=-1) 49 | sorted_distance_with_labels = distance_with_labels[distance_with_labels[:, 0].argsort()] 50 | 51 | sorted_distances = 28 - sorted_distance_with_labels[:, 0] 52 | sorted_labels = sorted_distance_with_labels[:, 1] 53 | sorted_indexes = sorted_distance_with_labels[:, 2] 54 | return sorted_distances, sorted_labels, sorted_indexes 55 | 56 | 57 | def compute_average_precision_score(test_codes, test_labels, learned_codes, n_samples): 58 | out_labels = [] 59 | out_distances = [] 60 | retrieved_elements_indexes = [] 61 | for i in range(len(test_codes)): 62 | sorted_distances, sorted_labels, sorted_indexes = retrieve_closest_elements(test_codes[i], test_labels[i], learned_codes) 63 | out_distances.append(sorted_distances[:n_samples]) 64 | out_labels.append(sorted_labels[:n_samples]) 65 | retrieved_elements_indexes.append(sorted_indexes[:n_samples]) 66 | 67 | out_labels = np.array(out_labels) 68 | out_labels_file_name = 'computed_data/out_labels_{}'.format(n_samples) 69 | np.save(out_labels_file_name, out_labels) 70 | 71 | out_distances_file_name = 'computed_data/out_distances_{}'.format(n_samples) 72 | out_distances = np.array(out_distances) 73 | np.save(out_distances_file_name, out_distances) 74 | score = label_ranking_average_precision_score(out_labels, out_distances) 75 | scores.append(score) 76 | return score 77 | 78 | 79 | def retrieve_closest_images(test_element, test_label, n_samples=10): 80 | learned_codes = encoder.predict(x_train) 81 | learned_codes = learned_codes.reshape(learned_codes.shape[0], 82 | learned_codes.shape[1] * learned_codes.shape[2] * learned_codes.shape[3]) 83 | 84 | test_code = encoder.predict(np.array([test_element])) 85 | test_code = test_code.reshape(test_code.shape[1] * test_code.shape[2] * test_code.shape[3]) 86 | 87 | distances = [] 88 | 89 | for code in learned_codes: 90 | distance = np.linalg.norm(code - test_code) 91 | distances.append(distance) 92 | nb_elements = learned_codes.shape[0] 93 | distances = np.array(distances) 94 | learned_code_index = np.arange(nb_elements) 95 | labels = np.copy(y_train).astype('float32') 96 | labels[labels != test_label] = -1 97 | labels[labels == test_label] = 1 98 | labels[labels == -1] = 0 99 | distance_with_labels = np.stack((distances, labels, learned_code_index), axis=-1) 100 | sorted_distance_with_labels = distance_with_labels[distance_with_labels[:, 0].argsort()] 101 | 102 | sorted_distances = 28 - sorted_distance_with_labels[:, 0] 103 | sorted_labels = sorted_distance_with_labels[:, 1] 104 | sorted_indexes = sorted_distance_with_labels[:, 2] 105 | kept_indexes = sorted_indexes[:n_samples] 106 | 107 | score = label_ranking_average_precision_score(np.array([sorted_labels[:n_samples]]), np.array([sorted_distances[:n_samples]])) 108 | 109 | print("Average precision ranking score for tested element is {}".format(score)) 110 | 111 | original_image = x_test[0] 112 | cv2.imshow('original_image', original_image) 113 | retrieved_images = x_train[int(kept_indexes[0]), :] 114 | for i in range(1, n_samples): 115 | retrieved_images = np.hstack((retrieved_images, x_train[int(kept_indexes[i]), :])) 116 | cv2.imshow('Results', retrieved_images) 117 | cv2.waitKey(0) 118 | 119 | cv2.imwrite('test_results/original_image.jpg', 255 * cv2.resize(original_image, (0,0), fx=3, fy=3)) 120 | cv2.imwrite('test_results/retrieved_results.jpg', 255 * cv2.resize(retrieved_images, (0,0), fx=2, fy=2)) 121 | 122 | 123 | def test_model(n_test_samples, n_train_samples): 124 | learned_codes = encoder.predict(x_train) 125 | learned_codes = learned_codes.reshape(learned_codes.shape[0], learned_codes.shape[1] * learned_codes.shape[2] * learned_codes.shape[3]) 126 | test_codes = encoder.predict(x_test) 127 | test_codes = test_codes.reshape(test_codes.shape[0], test_codes.shape[1] * test_codes.shape[2] * test_codes.shape[3]) 128 | indexes = np.arange(len(y_test)) 129 | np.random.shuffle(indexes) 130 | indexes = indexes[:n_test_samples] 131 | 132 | print('Start computing score for {} train samples'.format(n_train_samples)) 133 | t1 = time.time() 134 | score = compute_average_precision_score(test_codes[indexes], y_test[indexes], learned_codes, n_train_samples) 135 | t2 = time.time() 136 | print('Score computed in: ', t2-t1) 137 | print('Model score:', score) 138 | 139 | 140 | def plot_denoised_images(): 141 | denoised_images = autoencoder.predict(x_test_noisy.reshape(x_test_noisy.shape[0], x_test_noisy.shape[1], x_test_noisy.shape[2], 1)) 142 | test_img = x_test_noisy[0] 143 | resized_test_img = cv2.resize(test_img, (280, 280)) 144 | cv2.imshow('input', resized_test_img) 145 | cv2.waitKey(0) 146 | output = denoised_images[0] 147 | resized_output = cv2.resize(output, (280, 280)) 148 | cv2.imshow('output', resized_output) 149 | cv2.waitKey(0) 150 | cv2.imwrite('test_results/noisy_image.jpg', 255 * resized_test_img) 151 | cv2.imwrite('test_results/denoised_image.jpg', 255 * resized_output) 152 | 153 | 154 | # To test the whole model 155 | n_test_samples = 1000 156 | n_train_samples = [10, 50, 100, 200, 300, 400, 500, 750, 1000, 2000, 3000, 4000, 5000, 6000, 7000, 8000, 9000, 10000, 157 | 20000, 30000, 40000, 50000, 60000] 158 | 159 | 160 | for n_train_sample in n_train_samples: 161 | test_model(n_test_samples, n_train_sample) 162 | 163 | np.save('computed_data/scores', np.array(scores)) 164 | 165 | 166 | # To retrieve closest image 167 | retrieve_closest_images(x_test[0], y_test[0]) 168 | 169 | 170 | # To plot a denoised image 171 | plot_denoised_images() 172 | 173 | -------------------------------------------------------------------------------- /test_results/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AdilBaaj/unsupervised-image-retrieval/01cd922a9143bbc7edae4df149f545e8bd57174a/test_results/.gitkeep -------------------------------------------------------------------------------- /train_model.py: -------------------------------------------------------------------------------- 1 | from keras.layers import Input, Conv2D, MaxPooling2D, UpSampling2D, ZeroPadding2D 2 | from keras.models import Model 3 | from keras.callbacks import TensorBoard 4 | from keras.datasets import mnist 5 | import numpy as np 6 | 7 | (x_train, y_train), (x_test, y_test) = mnist.load_data() 8 | 9 | x_train = x_train.astype('float32') / 255. 10 | x_test = x_test.astype('float32') / 255. 11 | x_train = np.reshape(x_train, (len(x_train), 28, 28, 1)) # adapt this if using `channels_first` image data format 12 | x_test = np.reshape(x_test, (len(x_test), 28, 28, 1)) # adapt this if using `channels_first` image data format 13 | 14 | np.save('x_train') 15 | 16 | noise_factor = 0.5 17 | x_train_noisy = x_train + noise_factor * np.random.normal(loc=0.0, scale=1.0, size=x_train.shape) 18 | x_test_noisy = x_test + noise_factor * np.random.normal(loc=0.0, scale=1.0, size=x_test.shape) 19 | 20 | x_train_noisy = np.clip(x_train_noisy, 0., 1.) 21 | x_test_noisy = np.clip(x_test_noisy, 0., 1.) 22 | 23 | 24 | def train_model(): 25 | input_img = Input(shape=(28, 28, 1)) # adapt this if using `channels_first` image data format 26 | x = Conv2D(16, (3, 3), activation='relu', padding='same')(input_img) 27 | x = MaxPooling2D((2, 2), padding='same')(x) 28 | x = Conv2D(8, (3, 3), activation='relu', padding='same')(x) 29 | x = MaxPooling2D((2, 2), padding='same')(x) 30 | x = Conv2D(8, (3, 3), activation='relu', padding='same')(x) 31 | encoded = MaxPooling2D((2, 2), padding='same', name='encoder')(x) 32 | 33 | # at this point the representation is (4, 4, 8) i.e. 128-dimensional 34 | 35 | x = Conv2D(8, (3, 3), activation='relu', padding='same')(encoded) 36 | x = UpSampling2D((2, 2))(x) 37 | x = Conv2D(8, (3, 3), activation='relu', padding='same')(x) 38 | x = UpSampling2D((2, 2))(x) 39 | x = Conv2D(16, (3, 3), activation='relu')(x) 40 | x = UpSampling2D((2, 2))(x) 41 | decoded = Conv2D(1, (3, 3), activation='sigmoid', padding='same')(x) 42 | 43 | autoencoder = Model(input_img, decoded) 44 | autoencoder.compile(optimizer='adadelta', loss='binary_crossentropy') 45 | 46 | autoencoder.fit(x_train_noisy, x_train, 47 | epochs=20, 48 | batch_size=128, 49 | shuffle=True, 50 | validation_data=(x_test_noisy, x_test), 51 | callbacks=[TensorBoard(log_dir='/tmp/tb', histogram_freq=0, write_graph=False)]) 52 | 53 | autoencoder.save('autoencoder.h5') 54 | 55 | train_model() 56 | --------------------------------------------------------------------------------