├── Evaluate.py ├── Feature_Extraction.py ├── GUI ├── Output │ ├── Description.txt │ └── Matched-images.txt ├── index.php ├── result.php ├── search.php └── upload.php ├── Loader.py ├── Model Weights └── model_19.h5 ├── Model_handling.py ├── Predict.py ├── Predict_bleu_score.py ├── README.md ├── Text_Preprocess.py ├── demo_video.mp4 ├── model.png └── tokenizer.pkl /Evaluate.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Wed Nov 21 20:41:32 2018 4 | 5 | @author: ishaa 6 | """ 7 | 8 | # -*- coding: utf-8 -*- 9 | """ 10 | Created on Mon Nov 5 09:00:02 2018 11 | 12 | @author: ishaa 13 | """ 14 | 15 | import Loader 16 | import Model_handling 17 | from pickle import load 18 | from pickle import dump 19 | from PIL import Image 20 | 21 | from keras.preprocessing.text import Tokenizer 22 | from keras.models import load_model 23 | from nltk.translate.bleu_score import corpus_bleu 24 | 25 | #convert descriptions into a list of descriptions 26 | def to_lines(descriptions): 27 | all_desc = list() 28 | for key in descriptions.keys(): 29 | [all_desc.append(des) for des in descriptions[key]] 30 | return all_desc 31 | 32 | #Encode using tokenizer 33 | #eg - tokenize --> Boy on horse --> startseq Boy on horse endSeq 34 | # X1 X2 y (word) 35 | # photo startseq Boy 36 | # photo startseq, Boy on 37 | # photo startseq, Boy,on horse 38 | # photo startseq,Boy,on,horse endseq 39 | def create_tokenizer(descriptions): 40 | lines = to_lines(descriptions) 41 | tokenizer = Tokenizer() 42 | tokenizer.fit_on_texts(lines) 43 | return tokenizer 44 | 45 | 46 | # calculate the length of the description with the most words 47 | def max_length(descriptions): 48 | lines = to_lines(descriptions) 49 | return max(len(d.split()) for d in lines) 50 | 51 | 52 | #mapping of integer to word 53 | def word_for_id(integer, tokenizer): 54 | for word, index in tokenizer.word_index.items(): 55 | if index == integer: 56 | return word 57 | return None 58 | 59 | def evaluate_model(model, descriptions, photos, tokenizer, max_length): 60 | actual, predicted = list(), list() 61 | 62 | for key, desc_list in descriptions.items(): 63 | #generate description 64 | yhat = Model_handling.generate_desc(model, tokenizer, photos[key], max_length) 65 | # store actual and predicted captions 66 | references = [d.split() for d in desc_list] 67 | actual.append(references) 68 | predicted.append(yhat.split()) 69 | # calculate BLEU score 70 | print('BLEU-1: %f' % corpus_bleu(actual, predicted, weights=(1.0, 0, 0, 0))) 71 | print('BLEU-2: %f' % corpus_bleu(actual, predicted, weights=(0.5, 0.5, 0, 0))) 72 | print('BLEU-3: %f' % corpus_bleu(actual, predicted, weights=(0.3, 0.3, 0.3, 0))) 73 | print('BLEU-4: %f' % corpus_bleu(actual, predicted, weights=(0.25, 0.25, 0.25, 0.25))) 74 | 75 | 76 | #load trainset 77 | trainFile = 'D:\\Study\\Machine Learning\\DataSets\\Image Caption Generator\\Flickr_8k\\Flickr_8k.trainImages.txt' 78 | train = Loader.load_set(trainFile) 79 | print('Dataset: %d' % len(train)) 80 | # descriptions 81 | train_descriptions = Loader.load_clean_descriptions('D:\\Study\\Machine Learning\\DataSets\\Image Caption Generator\\descriptions.txt', train) 82 | print('Descriptions: train=%d' % len(train_descriptions)) 83 | # photo features 84 | train_features = Loader.load_photo_features('D:\\Study\\Machine Learning\\DataSets\\Image Caption Generator\\features_resnet.pkl', train) 85 | print('Photos: train=%d' % len(train_features)) 86 | tokenizer = create_tokenizer(train_descriptions) 87 | vocab_size = len(tokenizer.word_index) + 1 88 | print('Vocabulary Size: %d' % vocab_size) 89 | 90 | # determine the maximum sequence length 91 | max_length = max_length(train_descriptions) 92 | print('Description Length: %d' % max_length) 93 | # prepare sequences 94 | 95 | 96 | dataset_root_dir = 'D:\\Study\\Machine Learning\\DataSets\\Image Caption Generator\\' 97 | code_root_dir = 'D:\\Study\\Machine Learning\\Codes\\Caption Generator\\Reverse-Image-Search\\' 98 | 99 | 100 | # define the model 101 | model = Model_handling.define_model(vocab_size, max_length) 102 | # train the model, run epochs manually and save after each epoch 103 | epochs = 20 104 | steps = len(train_descriptions) 105 | for i in range(epochs): 106 | # create the data generator 107 | generator = Model_handling.data_generator(train_descriptions, train_features, tokenizer, max_length) 108 | # fit for one epoch 109 | model.fit_generator(generator, epochs=1, steps_per_epoch=steps, verbose=1) 110 | # save model 111 | model.save(code_root_dir + 'ResNet50\\model_' + str(i) + '.h5') 112 | 113 | 114 | 115 | # load test set 116 | testImgList = dataset_root_dir + 'Flickr_8k\\Flickr_8k.testImages.txt' 117 | testImg = Loader.load_set(testImgList) 118 | print('Dataset: %d' % len(testImg)) 119 | # descriptions 120 | test_descriptions = Loader.load_clean_descriptions(dataset_root_dir + 'descriptions.txt', testImg) 121 | print('Descriptions: test=%d' % len(test_descriptions)) 122 | # photo features 123 | test_features = Loader.load_photo_features('D:\\Study\\Machine Learning\\DataSets\\Image Caption Generator\\features_resnet.pkl', testImg) 124 | print('Photos: test=%d' % len(test_features)) 125 | 126 | weights = code_root_dir + 'ResNet50\\model_19.h5' 127 | model = load_model(weights) 128 | evaluate_model(model, test_descriptions, test_features, tokenizer, max_length) 129 | 130 | # prepare tokenizer 131 | tokenizer = create_tokenizer(train_descriptions) 132 | # save the tokenizer 133 | dump(tokenizer, open(code_root_dir + 'tokenizer_resnet50.pkl', 'wb')) 134 | -------------------------------------------------------------------------------- /Feature_Extraction.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Wed Nov 21 20:12:55 2018 4 | 5 | @author: ishaa 6 | """ 7 | 8 | 9 | from os import listdir 10 | from pickle import dump 11 | from keras.applications.resnet50 import ResNet50 12 | from keras.preprocessing.image import load_img 13 | from keras.preprocessing.image import img_to_array 14 | from keras.applications.resnet50 import preprocess_input 15 | from keras.models import Model 16 | 17 | 18 | #extract features from all photos in library 19 | def extract_features(directory): 20 | #load model 21 | model = ResNet50() 22 | 23 | #remove classification layer 24 | model.layers.pop() 25 | model = Model(inputs = model.inputs, outputs = model.layers[-1].output) 26 | 27 | features = dict() 28 | 29 | #os.listdir --> returns a list of files in the directory 30 | for name in listdir(directory): 31 | filename = directory + '/' + name 32 | #load image 33 | image = load_img(filename, target_size = (224,224)) 34 | #reshaping image into 4D for fitting in model 35 | image = img_to_array(image) 36 | image = image.reshape(1,image.shape[0],image.shape[1],image.shape[2]) 37 | 38 | #The preprocess_input function is meant to adequate your image to the format the model requires 39 | image = preprocess_input(image) 40 | 41 | #extract features 42 | feature = model.predict(image, verbose = 0) 43 | #remove .jpg 44 | image_id = name.split('.')[0] 45 | features[image_id] = feature 46 | print('.') 47 | return features 48 | 49 | directory = 'D:\\Study\\Machine Learning\\DataSets\\Image Caption Generator\\Flicker8k_Dataset' 50 | features = extract_features(directory) 51 | print('Extracted features: %d' %len(features)) 52 | dump(features, open('D:\\Study\\Machine Learning\\DataSets\\Image Caption Generator\\features_resnet.pkl','wb')) -------------------------------------------------------------------------------- /GUI/Output/Description.txt: -------------------------------------------------------------------------------- 1 | startseq two children are playing in the grass endseq -------------------------------------------------------------------------------- /GUI/Output/Matched-images.txt: -------------------------------------------------------------------------------- 1 | D:\Study\Machine Learning\DataSets\Image Caption Generator\Flicker8k_Dataset\416106657_cab2a107a5.jpg 2 | D:\Study\Machine Learning\DataSets\Image Caption Generator\Flicker8k_Dataset\3686924335_3c51e8834a.jpg 3 | D:\Study\Machine Learning\DataSets\Image Caption Generator\Flicker8k_Dataset\3363750526_efcedc47a9.jpg 4 | D:\Study\Machine Learning\DataSets\Image Caption Generator\Flicker8k_Dataset\1517721825_10176d0683.jpg 5 | D:\Study\Machine Learning\DataSets\Image Caption Generator\Flicker8k_Dataset\2708686056_1b8f356264.jpg 6 | D:\Study\Machine Learning\DataSets\Image Caption Generator\Flicker8k_Dataset\1772859261_236c09b861.jpg 7 | D:\Study\Machine Learning\DataSets\Image Caption Generator\Flicker8k_Dataset\3004823335_9b82cbd8a7.jpg 8 | D:\Study\Machine Learning\DataSets\Image Caption Generator\Flicker8k_Dataset\537559285_29be110134.jpg 9 | D:\Study\Machine Learning\DataSets\Image Caption Generator\Flicker8k_Dataset\3333921867_6cc7d7c73d.jpg 10 | D:\Study\Machine Learning\DataSets\Image Caption Generator\Flicker8k_Dataset\468310111_d9396abcbd.jpg 11 | D:\Study\Machine Learning\DataSets\Image Caption Generator\Flicker8k_Dataset\2479162876_a5ce3306af.jpg 12 | D:\Study\Machine Learning\DataSets\Image Caption Generator\Flicker8k_Dataset\359837950_9e22ffe6c2.jpg 13 | D:\Study\Machine Learning\DataSets\Image Caption Generator\Flicker8k_Dataset\1773928579_5664a810dc.jpg 14 | D:\Study\Machine Learning\DataSets\Image Caption Generator\Flicker8k_Dataset\3461041826_0e24cdf597.jpg 15 | D:\Study\Machine Learning\DataSets\Image Caption Generator\Flicker8k_Dataset\505929313_7668f021ab.jpg 16 | D:\Study\Machine Learning\DataSets\Image Caption Generator\Flicker8k_Dataset\3602838407_bf13e49243.jpg 17 | D:\Study\Machine Learning\DataSets\Image Caption Generator\Flicker8k_Dataset\3610683688_bbe6d725ed.jpg 18 | D:\Study\Machine Learning\DataSets\Image Caption Generator\Flicker8k_Dataset\3227148358_f152303584.jpg 19 | D:\Study\Machine Learning\DataSets\Image Caption Generator\Flicker8k_Dataset\3294209955_a1f1e2cc19.jpg 20 | D:\Study\Machine Learning\DataSets\Image Caption Generator\Flicker8k_Dataset\3613800013_5a54968ab0.jpg 21 | D:\Study\Machine Learning\DataSets\Image Caption Generator\Flicker8k_Dataset\3435035138_af32890a4c.jpg 22 | D:\Study\Machine Learning\DataSets\Image Caption Generator\Flicker8k_Dataset\925491651_57df3a5b36.jpg 23 | D:\Study\Machine Learning\DataSets\Image Caption Generator\Flicker8k_Dataset\2926233397_71e617f3a3.jpg 24 | D:\Study\Machine Learning\DataSets\Image Caption Generator\Flicker8k_Dataset\1392272228_cf104086e6.jpg 25 | D:\Study\Machine Learning\DataSets\Image Caption Generator\Flicker8k_Dataset\2482629385_f370b290d1.jpg 26 | D:\Study\Machine Learning\DataSets\Image Caption Generator\Flicker8k_Dataset\1523984678_edd68464da.jpg 27 | D:\Study\Machine Learning\DataSets\Image Caption Generator\Flicker8k_Dataset\2938747424_64e64784f0.jpg 28 | D:\Study\Machine Learning\DataSets\Image Caption Generator\Flicker8k_Dataset\2196846255_2c1635359a.jpg 29 | D:\Study\Machine Learning\DataSets\Image Caption Generator\Flicker8k_Dataset\2473791980_805c819bd4.jpg 30 | D:\Study\Machine Learning\DataSets\Image Caption Generator\Flicker8k_Dataset\219301555_17883a51bd.jpg 31 | D:\Study\Machine Learning\DataSets\Image Caption Generator\Flicker8k_Dataset\3767841911_6678052eb6.jpg 32 | D:\Study\Machine Learning\DataSets\Image Caption Generator\Flicker8k_Dataset\2886411666_72d8b12ce4.jpg 33 | D:\Study\Machine Learning\DataSets\Image Caption Generator\Flicker8k_Dataset\3498327617_d2e3db3ee3.jpg 34 | D:\Study\Machine Learning\DataSets\Image Caption Generator\Flicker8k_Dataset\2607462776_78e639d891.jpg 35 | D:\Study\Machine Learning\DataSets\Image Caption Generator\Flicker8k_Dataset\2280525192_81911f2b00.jpg 36 | D:\Study\Machine Learning\DataSets\Image Caption Generator\Flicker8k_Dataset\460935487_75b2da7854.jpg 37 | D:\Study\Machine Learning\DataSets\Image Caption Generator\Flicker8k_Dataset\1808370027_2088394eb4.jpg 38 | D:\Study\Machine Learning\DataSets\Image Caption Generator\Flicker8k_Dataset\3070011270_390e597783.jpg 39 | D:\Study\Machine Learning\DataSets\Image Caption Generator\Flicker8k_Dataset\2345984157_724823b1e4.jpg 40 | D:\Study\Machine Learning\DataSets\Image Caption Generator\Flicker8k_Dataset\2878272032_fda05ffac7.jpg 41 | D:\Study\Machine Learning\DataSets\Image Caption Generator\Flicker8k_Dataset\317488612_70ac35493b.jpg 42 | D:\Study\Machine Learning\DataSets\Image Caption Generator\Flicker8k_Dataset\3472364264_dbde5a8d0a.jpg 43 | D:\Study\Machine Learning\DataSets\Image Caption Generator\Flicker8k_Dataset\3040033126_9f4b88261b.jpg 44 | D:\Study\Machine Learning\DataSets\Image Caption Generator\Flicker8k_Dataset\3123463486_f5b36a3624.jpg 45 | D:\Study\Machine Learning\DataSets\Image Caption Generator\Flicker8k_Dataset\384577800_fc325af410.jpg 46 | D:\Study\Machine Learning\DataSets\Image Caption Generator\Flicker8k_Dataset\2543589122_ec3e55f434.jpg 47 | D:\Study\Machine Learning\DataSets\Image Caption Generator\Flicker8k_Dataset\3354883962_170d19bfe4.jpg 48 | D:\Study\Machine Learning\DataSets\Image Caption Generator\Flicker8k_Dataset\270724499_107481c88f.jpg 49 | D:\Study\Machine Learning\DataSets\Image Caption Generator\Flicker8k_Dataset\500446858_125702b296.jpg 50 | D:\Study\Machine Learning\DataSets\Image Caption Generator\Flicker8k_Dataset\1119015538_e8e796281e.jpg 51 | D:\Study\Machine Learning\DataSets\Image Caption Generator\Flicker8k_Dataset\339350939_6643bfb270.jpg 52 | D:\Study\Machine Learning\DataSets\Image Caption Generator\Flicker8k_Dataset\247704641_d883902277.jpg 53 | D:\Study\Machine Learning\DataSets\Image Caption Generator\Flicker8k_Dataset\1267711451_e2a754b4f8.jpg 54 | D:\Study\Machine Learning\DataSets\Image Caption Generator\Flicker8k_Dataset\493621130_152bdd4e91.jpg 55 | D:\Study\Machine Learning\DataSets\Image Caption Generator\Flicker8k_Dataset\3364151356_eecd07a23e.jpg 56 | D:\Study\Machine Learning\DataSets\Image Caption Generator\Flicker8k_Dataset\2541104331_a2d65cfa54.jpg 57 | D:\Study\Machine Learning\DataSets\Image Caption Generator\Flicker8k_Dataset\3028969146_26929ae0e8.jpg 58 | D:\Study\Machine Learning\DataSets\Image Caption Generator\Flicker8k_Dataset\1404832008_68e432665b.jpg 59 | D:\Study\Machine Learning\DataSets\Image Caption Generator\Flicker8k_Dataset\2518508760_68d8df7365.jpg 60 | D:\Study\Machine Learning\DataSets\Image Caption Generator\Flicker8k_Dataset\1131800850_89c7ffd477.jpg 61 | D:\Study\Machine Learning\DataSets\Image Caption Generator\Flicker8k_Dataset\3647750811_395fbd397e.jpg 62 | D:\Study\Machine Learning\DataSets\Image Caption Generator\Flicker8k_Dataset\3540416981_4e74f08cbb.jpg 63 | D:\Study\Machine Learning\DataSets\Image Caption Generator\Flicker8k_Dataset\2534502836_7a75305655.jpg 64 | D:\Study\Machine Learning\DataSets\Image Caption Generator\Flicker8k_Dataset\300314926_0b2e4b64f5.jpg 65 | D:\Study\Machine Learning\DataSets\Image Caption Generator\Flicker8k_Dataset\2677656448_6b7e7702af.jpg 66 | -------------------------------------------------------------------------------- /GUI/index.php: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | Content Based Image Retreival 24 | 63 | 64 | 65 | 66 | 67 | 68 |

CONTENT BASED IMAGE RETRIEVAL

69 |
70 |
71 | 72 | 73 | 76 | 77 | 80 | 81 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 146 | 147 | 166 | 167 | 168 | 169 | 170 | 171 | 172 | 173 | 174 | 175 | 176 | 177 | 184 | 185 | 186 | -------------------------------------------------------------------------------- /GUI/result.php: -------------------------------------------------------------------------------- 1 | 2 | 3 | 8 | 9 | 10 | 11 | 12 | 13 | Result 14 | 25 | 26 | 27 | 28 | 29 |

RESULT


30 |

Input Image:

31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 45 |
46 |

Similar Images:

47 | '; 54 | if (($line = fgets($inputFile))) 55 | { 56 | 57 | echo "\n"; 58 | echo "$line

"; 59 | } 60 | else 61 | { 62 | echo "Image '$image' has no metadata"; 63 | } 64 | } 65 | fclose($inputFile); 66 | 67 | 68 | ?> 69 | 70 | 71 | 72 | 73 | -------------------------------------------------------------------------------- /GUI/search.php: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 9 | 10 | 11 | 12 | 13 | 14 | Result 15 | 27 | 28 | 29 | 30 | 31 |

RESULT


32 |

Input Search Term:

33 | 34 | 35 | 36 | 37 | 47 | 48 | 49 |

Related Images:

50 | '; 57 | if (($line = fgets($inputFile))) 58 | { 59 | 60 | echo "\n"; 61 | echo "$line

"; 62 | } 63 | else 64 | { 65 | echo "Image '$image' has no metadata"; 66 | } 67 | } 68 | fclose($inputFile); 69 | 70 | 71 | ?> 72 | 73 | 74 | 75 | 76 | -------------------------------------------------------------------------------- /GUI/upload.php: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /Loader.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Mon Nov 5 08:58:40 2018 4 | 5 | @author: ishaa 6 | """ 7 | 8 | import pickle 9 | 10 | def load_doc(filename): 11 | file = open(filename,'r') 12 | text = file.read() 13 | file.close() 14 | return text 15 | 16 | #Image Identifier --- TrainImage.txt 17 | #2513260012_03d33305cf.jpg 18 | def load_set(filename): 19 | doc = load_doc(filename) 20 | dataset = list() 21 | 22 | for line in doc.split('\n'): 23 | if len(line)<1: 24 | continue 25 | identifier = line.split('.')[0] 26 | dataset.append(identifier) 27 | return set(dataset) 28 | 29 | #load cleaned description 30 | #1000268201_693b08cb0e child in pink dress is climbing up set of stairs in an entry way 31 | #assign start and end token to the description 32 | def load_clean_descriptions(filename, dataset): 33 | doc = load_doc(filename) 34 | descriptions = dict() 35 | 36 | for line in doc.split('\n'): 37 | tokens = line.split() 38 | image_id, image_desc = tokens[0],tokens[1:] 39 | #if image_id not in dataset ignore 40 | if image_id in dataset: 41 | if image_id not in descriptions: 42 | descriptions[image_id] = list() 43 | desc = 'startseq ' + ' '.join(image_desc) + ' endseq' 44 | descriptions[image_id].append(desc) 45 | return descriptions 46 | 47 | 48 | # load photo features from features.pkl 49 | def load_photo_features(filename, dataset): 50 | # load all features 51 | all_features = pickle.load(open(filename, 'rb')) 52 | # filter features 53 | features = {k: all_features[k] for k in dataset} 54 | return features 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | -------------------------------------------------------------------------------- /Model Weights/model_19.h5: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/abhinav23dixit/Text-and-Content-Based-Image-Retrieval/4c8c29390c009c8e54bb1e2d882a024d1cfc01ec/Model Weights/model_19.h5 -------------------------------------------------------------------------------- /Model_handling.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Mon Nov 5 09:03:06 2018 4 | 5 | @author: ishaa 6 | """ 7 | from numpy import argmax 8 | from numpy import array 9 | from keras.utils import plot_model 10 | from keras.models import Model 11 | from keras.layers import Input 12 | from keras.layers import LSTM 13 | from keras.layers import Embedding 14 | from keras.layers import Dropout 15 | from keras.layers.merge import add 16 | from keras.layers import Dense 17 | from keras.preprocessing.sequence import pad_sequences 18 | from keras.utils import to_categorical 19 | 20 | from keras.applications.resnet50 import ResNet50 21 | from keras.preprocessing.image import load_img 22 | from keras.preprocessing.image import img_to_array 23 | from keras.applications.resnet50 import preprocess_input 24 | 25 | #mapping of integer to word 26 | def word_for_id(integer, tokenizer): 27 | for word, index in tokenizer.word_index.items(): 28 | if index == integer: 29 | return word 30 | return None 31 | 32 | 33 | def define_model(vocab_size, max_length): 34 | #feature extractor model 35 | inputs1 = Input(shape=(2048,)) 36 | fe1 = Dropout(0.4)(inputs1) 37 | fe2 = Dense(256, activation = 'relu')(fe1) 38 | #Sequence model 39 | inputs2 = Input(shape=(max_length,)) 40 | se1 = Embedding(vocab_size, 256, mask_zero=True)(inputs2) 41 | se2 = Dropout(0.5)(se1) 42 | se3 = LSTM(256)(se2) 43 | #decoder model 44 | decoder1 = add([fe2,se3]) 45 | decoder2 = Dense(256, activation = 'relu')(decoder1) 46 | outputs = Dense(vocab_size, activation = 'softmax')(decoder2) 47 | # tie it together [image, seq] [word] 48 | model = Model(inputs=[inputs1, inputs2], outputs=outputs) 49 | model.compile(loss='categorical_crossentropy', optimizer='adam') 50 | # summarize model 51 | print(model.summary()) 52 | plot_model(model, to_file='model.png', show_shapes=True) 53 | return model 54 | 55 | # data generator, intended to be used in a call to model.fit_generator() 56 | def data_generator(descriptions, photos, tokenizer, max_length): 57 | # loop for ever over images 58 | while 1: 59 | for key, desc_list in descriptions.items(): 60 | # retrieve the photo feature 61 | photo = photos[key][0] 62 | in_img, in_seq, out_word = create_sequences(tokenizer, max_length, desc_list, photo) 63 | yield [[in_img, in_seq], out_word] 64 | 65 | 66 | def generate_desc(model, tokenizer, photo, max_length): 67 | #start seq 68 | in_text = 'startseq' 69 | #repeatedly generate next letter using created in_text until it is not 'endseq' 70 | for i in range(max_length): 71 | #convert to tokens 72 | sequence = tokenizer.texts_to_sequences([in_text])[0] 73 | #padding 74 | sequence = pad_sequences([sequence], maxlen = max_length) 75 | #find probability 76 | yhat = model.predict([photo, sequence], verbose=0) 77 | #how? 78 | yhat = argmax(yhat) 79 | #map integer to word 80 | word = word_for_id(yhat, tokenizer) 81 | if word is None: 82 | break 83 | in_text += ' ' + word 84 | if word == "endseq": 85 | break 86 | return in_text 87 | 88 | 89 | # create sequences of images, input sequences and output words for an image 90 | def create_sequences(tokenizer, max_length, desc_list, photo): 91 | X1, X2, y = list(), list(), list() 92 | # walk through each description for the image 93 | for desc in desc_list: 94 | # encode the sequence 95 | seq = tokenizer.texts_to_sequences([desc])[0] 96 | # split one sequence into multiple X,y pairs 97 | for i in range(1, len(seq)): 98 | # split into input and output pair 99 | in_seq, out_seq = seq[:i], seq[i] 100 | # pad input sequence 101 | in_seq = pad_sequences([in_seq], maxlen=max_length)[0] 102 | # encode output sequence 103 | out_seq = to_categorical([out_seq], num_classes=7579)[0] 104 | # store 105 | X1.append(photo) 106 | X2.append(in_seq) 107 | y.append(out_seq) 108 | return array(X1), array(X2), array(y) 109 | 110 | def extract_features(filename): 111 | # load the model 112 | model = ResNet50() 113 | # re-structure the model 114 | model.layers.pop() 115 | model = Model(inputs=model.inputs, outputs=model.layers[-1].output) 116 | # load the photo 117 | image = load_img(filename, target_size=(224, 224)) 118 | # convert the image pixels to a numpy array 119 | image = img_to_array(image) 120 | # reshape data for the model 121 | image = image.reshape((1, image.shape[0], image.shape[1], image.shape[2])) 122 | # prepare the image for the VGG model 123 | image = preprocess_input(image) 124 | # get features 125 | feature = model.predict(image, verbose=0) 126 | return feature -------------------------------------------------------------------------------- /Predict.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Mon Nov 5 13:30:35 2018 4 | 5 | @author: ishaa 6 | """ 7 | import Loader 8 | import Text_Preprocess 9 | from pickle import load 10 | from keras.models import load_model 11 | from Model_handling import extract_features 12 | from Model_handling import generate_desc 13 | from shutil import copyfile 14 | import os 15 | 16 | from nltk.translate.bleu_score import corpus_bleu 17 | from PIL import Image 18 | 19 | dataset_root_dir = 'D:\\Study\\Machine Learning\\DataSets\\Image Caption Generator\\' 20 | code_root_dir = 'D:\\Study\\Machine Learning\\Codes\\Caption Generator\\Reverse-Image-Search\\' 21 | weights = code_root_dir + 'ResNet50\\model_19.h5' 22 | model = load_model(weights) 23 | 24 | # load the tokenizer 25 | tokenizer = load(open(code_root_dir + 'tokenizer_resnet50.pkl', 'rb')) 26 | # pre-define the max sequence length (from training) 27 | max_length = 34 28 | 29 | 30 | # load and prepare the photograph 31 | photo = extract_features('C:\\xampp\\htdocs\\uploads\\file.jpg') 32 | # generate description 33 | predicted_description = generate_desc(model, tokenizer, photo, max_length) 34 | print_description = ' '.join(predicted_description.split(' ')[1:-1]) 35 | 36 | desc_file = open('C:\\xampp\\htdocs\\uploads\\description.txt',"w") 37 | desc_file.write(print_description) 38 | desc_file.close() 39 | 40 | testFile = 'D:\\Study\\Machine Learning\\DataSets\\Image Caption Generator\\Flickr_8k\\Flickr_8k.txt' 41 | testImagesLabel = Loader.load_set(testFile) 42 | test_descriptions = Loader.load_clean_descriptions(dataset_root_dir + 'descriptions.txt', testImagesLabel) 43 | 44 | matchedFiles = set() 45 | 46 | for img in testImagesLabel: 47 | if len(matchedFiles) > 50: 48 | break 49 | actual, predicted = list(), list() 50 | yhat = predicted_description.split() 51 | predicted.append(yhat) 52 | references = [d.split() for d in test_descriptions[img]] 53 | actual.append(references) 54 | bleu_score_1 = corpus_bleu(actual, predicted, weights=(1, 0, 0, 0)) 55 | bleu_score_2 = corpus_bleu(actual, predicted, weights=(0.5, 0.5, 0, 0)) 56 | bleu_score_3 = corpus_bleu(actual, predicted, weights=(0.33, 0.33, 0.34, 0)) 57 | bleu_score_4 = corpus_bleu(actual, predicted, weights=(0.25, 0.25, 0.25, 0.25)) 58 | bleu_score = ( 8*bleu_score_4 + 4*bleu_score_3 + 2*bleu_score_2 + bleu_score_1 )/15 59 | if bleu_score > 0.5: 60 | matchedFiles.add(img) 61 | continue 62 | 63 | len(matchedFiles) 64 | 65 | path = 'D:\\Study\\Machine Learning\\DataSets\\Image Caption Generator\\Flicker8k_Dataset\\' 66 | 67 | matched_img_file = open('C:\\xampp\\htdocs\\uploads\\matched_images.txt',"w") 68 | 69 | folder = 'C:\\xampp\\htdocs\\uploads\\matched-images' 70 | for the_file in os.listdir(folder): 71 | file_path = os.path.join(folder, the_file) 72 | try: 73 | if os.path.isfile(file_path): 74 | os.unlink(file_path) 75 | #elif os.path.isdir(file_path): shutil.rmtree(file_path) 76 | except Exception as e: 77 | print(e) 78 | 79 | desc_text = Text_Preprocess.load_text(dataset_root_dir + '\\Flickr_8k\\Flickr8k.token.txt') 80 | descriptions = Text_Preprocess.load_description(desc_text) 81 | i=0 82 | for img in matchedFiles: 83 | img_path = path + img + '.jpg' 84 | i += 1 85 | matched_img_file.write(descriptions[img][0]+ '\n') 86 | copyfile(img_path, folder + '\\' + format(i,'03d') + '.jpg') 87 | 88 | matchedFiles 89 | matched_img_file.close() 90 | 91 | -------------------------------------------------------------------------------- /Predict_bleu_score.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Mon Nov 12 21:54:01 2018 4 | 5 | @author: ishaa 6 | """ 7 | import Loader 8 | import string 9 | import Text_Preprocess 10 | from nltk.translate.bleu_score import corpus_bleu 11 | from shutil import copyfile 12 | import os 13 | 14 | 15 | dataset_root_dir = 'D:\\Study\\Machine Learning\\DataSets\\Image Caption Generator\\' 16 | code_root_dir = 'D:\\Study\\Machine Learning\\Codes\\Caption Generator\\Reverse-Image-Search\\' 17 | 18 | 19 | input_file = open('C:\\xampp\\htdocs\\uploads\\description.txt', 'r') 20 | predicted_description = input_file.readline() 21 | 22 | table = str.maketrans('','',string.punctuation) 23 | 24 | desc = predicted_description.split() 25 | desc = [word.lower() for word in desc] 26 | desc = [word.translate(table) for word in desc] 27 | desc = [word for word in desc if len(word)>1] 28 | desc = [word for word in desc if word.isalpha()] 29 | predicted_description = ' '.join(desc) 30 | 31 | testFile = dataset_root_dir + 'Flickr_8k\\Flickr_8k.txt' 32 | testImagesLabel = Loader.load_set(testFile) 33 | test_descriptions = Loader.load_clean_descriptions(dataset_root_dir + 'descriptions.txt', testImagesLabel) 34 | 35 | matchedFiles = set() 36 | 37 | for img in testImagesLabel: 38 | actual, predicted = list(), list() 39 | yhat = predicted_description.split() 40 | predicted.append(yhat) 41 | references = [d.split() for d in test_descriptions[img]] 42 | actual.append(references) 43 | bleu_score_1 = corpus_bleu(actual, predicted, weights=(1, 0, 0, 0)) 44 | bleu_score_2 = corpus_bleu(actual, predicted, weights=(0.5, 0.5, 0, 0)) 45 | bleu_score_3 = corpus_bleu(actual, predicted, weights=(0.33, 0.33, 0.34, 0)) 46 | bleu_score_4 = corpus_bleu(actual, predicted, weights=(0.25, 0.25, 0.25, 0.25)) 47 | bleu_score = ( 4*bleu_score_4 + 3*bleu_score_3 + 2*bleu_score_2 + bleu_score_1 )/10 48 | #print(bleu_score) 49 | if bleu_score > 0.4: 50 | matchedFiles.add(img) 51 | continue 52 | 53 | len(matchedFiles) 54 | 55 | path = 'D:/Study/Machine Learning/DataSets/Image Caption Generator/Flicker8k_Dataset/' 56 | 57 | matched_img_file = open('C:\\xampp\\htdocs\\uploads\\matched_images.txt',"w") 58 | 59 | folder = 'C:\\xampp\\htdocs\\uploads\\matched-images' 60 | for the_file in os.listdir(folder): 61 | file_path = os.path.join(folder, the_file) 62 | try: 63 | if os.path.isfile(file_path): 64 | os.unlink(file_path) 65 | #elif os.path.isdir(file_path): shutil.rmtree(file_path) 66 | except Exception as e: 67 | print(e) 68 | 69 | desc_text = Text_Preprocess.load_text(dataset_root_dir + '\\Flickr_8k\\Flickr8k.token.txt') 70 | descriptions = Text_Preprocess.load_description(desc_text) 71 | 72 | i=0 73 | for img in matchedFiles: 74 | img_path = path + img + '.jpg' 75 | i += 1 76 | matched_img_file.write(descriptions[img][0]+ '\n') 77 | copyfile(img_path, folder + '\\' + format(i,'03d') + '.jpg') 78 | 79 | matchedFiles 80 | matched_img_file.close() 81 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |

Text and Content Based Image Retrieval


2 |

3 | A image captioning based image retrieval model which can be used both via GUI and command line 4 |

5 | 6 | ## Contents 7 | 8 | - [Introduction](#introduction) 9 | - [Features](#features) 10 | - [Demo Video](Video) 11 | - [Contributors](#contributors) 12 | - [Contribute](#contribute) 13 | - [Acknowledgement](#acknowledgement) 14 | 15 | ## Introduction 16 | This is a python based image retrieval model which makes use of deep learning image caption generator. It uses a merge model comprising of Convolutional Neural Network (CNN) and a Long Short Term Memory Network (LSTM) . The dataset used here is Flickr8K dataset. You can request the dataset [here](https://forms.illinois.edu/sec/1713398). 17 | 18 | ## Features 19 | This model can be used both via GUI and command line. In both of these models image can be retrieved by- 20 | * A description of image you want to retrieve 21 | * An image whose semantically image you want to retrieve. 22 | 23 | Note- The model performs well when text and images are semantically similar to the images in the dataset. 24 | 25 | ## Video 26 | [![Content and Text Based Image Retrieval](https://img.youtube.com/vi/WT5GZaArBpM/maxresdefault.jpg)](https://www.youtube.com/watch?v=WT5GZaArBpM&feature=youtu.be "Content and Text Based Image Retrieval") 27 | 28 | ## Contributors 29 | 36 | 37 | ## Contribute 38 | * Fork this repository and contribute. 39 | * Feel free to report bugs. 40 | * All types of feedbacks are welcome 41 | 42 | ## Acknowledgement 43 | * Thanks to [Keras](https://keras.io/) for providing us with the implementation of deep learning libraries. 44 | * A special thanks to [Machine Learning Mastery](https://machinelearningmastery.com/) without which we couldn't have thought about the right approach to tackle this problem. 45 | 46 | -------------------------------------------------------------------------------- /Text_Preprocess.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Wed Nov 21 23:49:33 2018 4 | 5 | @author: ishaa 6 | """ 7 | 8 | import string 9 | 10 | 11 | #Token File eg -- 1000268201_693b08cb0e.jpg#0 A child in a pink dress is climbing up a set of stairs in an entry way . 12 | #1000268201_693b08cb0e.jpg#1 A girl going into a wooden building . 13 | 14 | def load_text(filename): 15 | file = open(filename,'r') 16 | text = file.read() 17 | file.close() 18 | return text 19 | 20 | def load_description(doc): 21 | mapping = dict() 22 | #one entry in each line 23 | for line in doc.split('\n'): 24 | tokens = line.split() 25 | #if no of tokens less than 2 --> incorrect desc 26 | if len(line)<2: 27 | continue 28 | #first -- id rest -- desc 29 | image_id, image_desc = tokens[0], tokens[1:] 30 | image_id = image_id.split('.')[0] 31 | 32 | #convert description token back to string 33 | image_desc = ' '.join(image_desc) 34 | #create a list (containing all desc of a given image) 35 | if image_id not in mapping: 36 | mapping[image_id] = list() 37 | mapping[image_id].append(image_desc) 38 | return mapping 39 | 40 | #Cleaning description -- convert to lowercase, remove punctuation, remove words less than some len, remove words with number 41 | def clean_description(description): 42 | #remove punctuation -- make translation table 43 | #param1 - to be replaced by param2 ---- param3 removed 44 | table = str.maketrans('','',string.punctuation) 45 | 46 | for key, desc_list in descriptions.items(): 47 | for i in range(len(desc_list)): 48 | desc = desc_list[i] 49 | #tokenize 50 | desc = desc.split() 51 | #to lower 52 | desc = [word.lower() for word in desc] 53 | #remove punctuation 54 | desc = [word.translate(table) for word in desc] 55 | #remove words less in len 56 | desc = [word for word in desc if len(word)>1] 57 | #remove numbers 58 | desc = [word for word in desc if word.isalpha()] 59 | #re-convert to desc 60 | desc_list[i] = ' '.join(desc) 61 | 62 | def save_description(description, filename): 63 | lines = list() 64 | for key, desc_list in descriptions.items(): 65 | for desc in desc_list: 66 | lines.append(key + ' ' + desc) 67 | data = ('\n').join(lines) 68 | file = open(filename,'w') 69 | file.write(data) 70 | file.close() 71 | 72 | # convert the loaded descriptions into a vocabulary of words 73 | def to_vocabulary(descriptions): 74 | # build a list of all description strings 75 | all_desc = set() 76 | for key in descriptions.keys(): 77 | [all_desc.update(d.split()) for d in descriptions[key]] 78 | return all_desc 79 | 80 | tokenFile = 'D:\\Study\\Machine Learning\\DataSets\\Image Caption Generator\\Flickr_8k\\Flickr8k.token.txt' 81 | # load descriptions 82 | doc = load_text(tokenFile) 83 | # parse descriptions 84 | descriptions = load_description(doc) 85 | print('Loaded: %d ' % len(descriptions)) 86 | # clean descriptions 87 | clean_description(descriptions) 88 | # summarize vocabulary 89 | vocabulary = to_vocabulary(descriptions) 90 | print('Vocabulary Size: %d' % len(vocabulary)) 91 | # save to file 92 | descrOut = 'D:\\Study\\Machine Learning\\DataSets\\Image Caption Generator\\descriptions.txt' 93 | 94 | save_description(descriptions, descrOut) 95 | -------------------------------------------------------------------------------- /demo_video.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/abhinav23dixit/Text-and-Content-Based-Image-Retrieval/4c8c29390c009c8e54bb1e2d882a024d1cfc01ec/demo_video.mp4 -------------------------------------------------------------------------------- /model.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/abhinav23dixit/Text-and-Content-Based-Image-Retrieval/4c8c29390c009c8e54bb1e2d882a024d1cfc01ec/model.png -------------------------------------------------------------------------------- /tokenizer.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/abhinav23dixit/Text-and-Content-Based-Image-Retrieval/4c8c29390c009c8e54bb1e2d882a024d1cfc01ec/tokenizer.pkl --------------------------------------------------------------------------------