";
62 | }
63 | else
64 | {
65 | echo "Image '$image' has no metadata";
66 | }
67 | }
68 | fclose($inputFile);
69 |
70 |
71 | ?>
72 |
73 |
74 |
75 |
76 |
--------------------------------------------------------------------------------
/GUI/upload.php:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/Loader.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on Mon Nov 5 08:58:40 2018
4 |
5 | @author: ishaa
6 | """
7 |
8 | import pickle
9 |
10 | def load_doc(filename):
11 | file = open(filename,'r')
12 | text = file.read()
13 | file.close()
14 | return text
15 |
16 | #Image Identifier --- TrainImage.txt
17 | #2513260012_03d33305cf.jpg
18 | def load_set(filename):
19 | doc = load_doc(filename)
20 | dataset = list()
21 |
22 | for line in doc.split('\n'):
23 | if len(line)<1:
24 | continue
25 | identifier = line.split('.')[0]
26 | dataset.append(identifier)
27 | return set(dataset)
28 |
29 | #load cleaned description
30 | #1000268201_693b08cb0e child in pink dress is climbing up set of stairs in an entry way
31 | #assign start and end token to the description
32 | def load_clean_descriptions(filename, dataset):
33 | doc = load_doc(filename)
34 | descriptions = dict()
35 |
36 | for line in doc.split('\n'):
37 | tokens = line.split()
38 | image_id, image_desc = tokens[0],tokens[1:]
39 | #if image_id not in dataset ignore
40 | if image_id in dataset:
41 | if image_id not in descriptions:
42 | descriptions[image_id] = list()
43 | desc = 'startseq ' + ' '.join(image_desc) + ' endseq'
44 | descriptions[image_id].append(desc)
45 | return descriptions
46 |
47 |
48 | # load photo features from features.pkl
49 | def load_photo_features(filename, dataset):
50 | # load all features
51 | all_features = pickle.load(open(filename, 'rb'))
52 | # filter features
53 | features = {k: all_features[k] for k in dataset}
54 | return features
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
--------------------------------------------------------------------------------
/Model Weights/model_19.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/abhinav23dixit/Text-and-Content-Based-Image-Retrieval/4c8c29390c009c8e54bb1e2d882a024d1cfc01ec/Model Weights/model_19.h5
--------------------------------------------------------------------------------
/Model_handling.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on Mon Nov 5 09:03:06 2018
4 |
5 | @author: ishaa
6 | """
7 | from numpy import argmax
8 | from numpy import array
9 | from keras.utils import plot_model
10 | from keras.models import Model
11 | from keras.layers import Input
12 | from keras.layers import LSTM
13 | from keras.layers import Embedding
14 | from keras.layers import Dropout
15 | from keras.layers.merge import add
16 | from keras.layers import Dense
17 | from keras.preprocessing.sequence import pad_sequences
18 | from keras.utils import to_categorical
19 |
20 | from keras.applications.resnet50 import ResNet50
21 | from keras.preprocessing.image import load_img
22 | from keras.preprocessing.image import img_to_array
23 | from keras.applications.resnet50 import preprocess_input
24 |
25 | #mapping of integer to word
26 | def word_for_id(integer, tokenizer):
27 | for word, index in tokenizer.word_index.items():
28 | if index == integer:
29 | return word
30 | return None
31 |
32 |
33 | def define_model(vocab_size, max_length):
34 | #feature extractor model
35 | inputs1 = Input(shape=(2048,))
36 | fe1 = Dropout(0.4)(inputs1)
37 | fe2 = Dense(256, activation = 'relu')(fe1)
38 | #Sequence model
39 | inputs2 = Input(shape=(max_length,))
40 | se1 = Embedding(vocab_size, 256, mask_zero=True)(inputs2)
41 | se2 = Dropout(0.5)(se1)
42 | se3 = LSTM(256)(se2)
43 | #decoder model
44 | decoder1 = add([fe2,se3])
45 | decoder2 = Dense(256, activation = 'relu')(decoder1)
46 | outputs = Dense(vocab_size, activation = 'softmax')(decoder2)
47 | # tie it together [image, seq] [word]
48 | model = Model(inputs=[inputs1, inputs2], outputs=outputs)
49 | model.compile(loss='categorical_crossentropy', optimizer='adam')
50 | # summarize model
51 | print(model.summary())
52 | plot_model(model, to_file='model.png', show_shapes=True)
53 | return model
54 |
55 | # data generator, intended to be used in a call to model.fit_generator()
56 | def data_generator(descriptions, photos, tokenizer, max_length):
57 | # loop for ever over images
58 | while 1:
59 | for key, desc_list in descriptions.items():
60 | # retrieve the photo feature
61 | photo = photos[key][0]
62 | in_img, in_seq, out_word = create_sequences(tokenizer, max_length, desc_list, photo)
63 | yield [[in_img, in_seq], out_word]
64 |
65 |
66 | def generate_desc(model, tokenizer, photo, max_length):
67 | #start seq
68 | in_text = 'startseq'
69 | #repeatedly generate next letter using created in_text until it is not 'endseq'
70 | for i in range(max_length):
71 | #convert to tokens
72 | sequence = tokenizer.texts_to_sequences([in_text])[0]
73 | #padding
74 | sequence = pad_sequences([sequence], maxlen = max_length)
75 | #find probability
76 | yhat = model.predict([photo, sequence], verbose=0)
77 | #how?
78 | yhat = argmax(yhat)
79 | #map integer to word
80 | word = word_for_id(yhat, tokenizer)
81 | if word is None:
82 | break
83 | in_text += ' ' + word
84 | if word == "endseq":
85 | break
86 | return in_text
87 |
88 |
89 | # create sequences of images, input sequences and output words for an image
90 | def create_sequences(tokenizer, max_length, desc_list, photo):
91 | X1, X2, y = list(), list(), list()
92 | # walk through each description for the image
93 | for desc in desc_list:
94 | # encode the sequence
95 | seq = tokenizer.texts_to_sequences([desc])[0]
96 | # split one sequence into multiple X,y pairs
97 | for i in range(1, len(seq)):
98 | # split into input and output pair
99 | in_seq, out_seq = seq[:i], seq[i]
100 | # pad input sequence
101 | in_seq = pad_sequences([in_seq], maxlen=max_length)[0]
102 | # encode output sequence
103 | out_seq = to_categorical([out_seq], num_classes=7579)[0]
104 | # store
105 | X1.append(photo)
106 | X2.append(in_seq)
107 | y.append(out_seq)
108 | return array(X1), array(X2), array(y)
109 |
110 | def extract_features(filename):
111 | # load the model
112 | model = ResNet50()
113 | # re-structure the model
114 | model.layers.pop()
115 | model = Model(inputs=model.inputs, outputs=model.layers[-1].output)
116 | # load the photo
117 | image = load_img(filename, target_size=(224, 224))
118 | # convert the image pixels to a numpy array
119 | image = img_to_array(image)
120 | # reshape data for the model
121 | image = image.reshape((1, image.shape[0], image.shape[1], image.shape[2]))
122 | # prepare the image for the VGG model
123 | image = preprocess_input(image)
124 | # get features
125 | feature = model.predict(image, verbose=0)
126 | return feature
--------------------------------------------------------------------------------
/Predict.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on Mon Nov 5 13:30:35 2018
4 |
5 | @author: ishaa
6 | """
7 | import Loader
8 | import Text_Preprocess
9 | from pickle import load
10 | from keras.models import load_model
11 | from Model_handling import extract_features
12 | from Model_handling import generate_desc
13 | from shutil import copyfile
14 | import os
15 |
16 | from nltk.translate.bleu_score import corpus_bleu
17 | from PIL import Image
18 |
19 | dataset_root_dir = 'D:\\Study\\Machine Learning\\DataSets\\Image Caption Generator\\'
20 | code_root_dir = 'D:\\Study\\Machine Learning\\Codes\\Caption Generator\\Reverse-Image-Search\\'
21 | weights = code_root_dir + 'ResNet50\\model_19.h5'
22 | model = load_model(weights)
23 |
24 | # load the tokenizer
25 | tokenizer = load(open(code_root_dir + 'tokenizer_resnet50.pkl', 'rb'))
26 | # pre-define the max sequence length (from training)
27 | max_length = 34
28 |
29 |
30 | # load and prepare the photograph
31 | photo = extract_features('C:\\xampp\\htdocs\\uploads\\file.jpg')
32 | # generate description
33 | predicted_description = generate_desc(model, tokenizer, photo, max_length)
34 | print_description = ' '.join(predicted_description.split(' ')[1:-1])
35 |
36 | desc_file = open('C:\\xampp\\htdocs\\uploads\\description.txt',"w")
37 | desc_file.write(print_description)
38 | desc_file.close()
39 |
40 | testFile = 'D:\\Study\\Machine Learning\\DataSets\\Image Caption Generator\\Flickr_8k\\Flickr_8k.txt'
41 | testImagesLabel = Loader.load_set(testFile)
42 | test_descriptions = Loader.load_clean_descriptions(dataset_root_dir + 'descriptions.txt', testImagesLabel)
43 |
44 | matchedFiles = set()
45 |
46 | for img in testImagesLabel:
47 | if len(matchedFiles) > 50:
48 | break
49 | actual, predicted = list(), list()
50 | yhat = predicted_description.split()
51 | predicted.append(yhat)
52 | references = [d.split() for d in test_descriptions[img]]
53 | actual.append(references)
54 | bleu_score_1 = corpus_bleu(actual, predicted, weights=(1, 0, 0, 0))
55 | bleu_score_2 = corpus_bleu(actual, predicted, weights=(0.5, 0.5, 0, 0))
56 | bleu_score_3 = corpus_bleu(actual, predicted, weights=(0.33, 0.33, 0.34, 0))
57 | bleu_score_4 = corpus_bleu(actual, predicted, weights=(0.25, 0.25, 0.25, 0.25))
58 | bleu_score = ( 8*bleu_score_4 + 4*bleu_score_3 + 2*bleu_score_2 + bleu_score_1 )/15
59 | if bleu_score > 0.5:
60 | matchedFiles.add(img)
61 | continue
62 |
63 | len(matchedFiles)
64 |
65 | path = 'D:\\Study\\Machine Learning\\DataSets\\Image Caption Generator\\Flicker8k_Dataset\\'
66 |
67 | matched_img_file = open('C:\\xampp\\htdocs\\uploads\\matched_images.txt',"w")
68 |
69 | folder = 'C:\\xampp\\htdocs\\uploads\\matched-images'
70 | for the_file in os.listdir(folder):
71 | file_path = os.path.join(folder, the_file)
72 | try:
73 | if os.path.isfile(file_path):
74 | os.unlink(file_path)
75 | #elif os.path.isdir(file_path): shutil.rmtree(file_path)
76 | except Exception as e:
77 | print(e)
78 |
79 | desc_text = Text_Preprocess.load_text(dataset_root_dir + '\\Flickr_8k\\Flickr8k.token.txt')
80 | descriptions = Text_Preprocess.load_description(desc_text)
81 | i=0
82 | for img in matchedFiles:
83 | img_path = path + img + '.jpg'
84 | i += 1
85 | matched_img_file.write(descriptions[img][0]+ '\n')
86 | copyfile(img_path, folder + '\\' + format(i,'03d') + '.jpg')
87 |
88 | matchedFiles
89 | matched_img_file.close()
90 |
91 |
--------------------------------------------------------------------------------
/Predict_bleu_score.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on Mon Nov 12 21:54:01 2018
4 |
5 | @author: ishaa
6 | """
7 | import Loader
8 | import string
9 | import Text_Preprocess
10 | from nltk.translate.bleu_score import corpus_bleu
11 | from shutil import copyfile
12 | import os
13 |
14 |
15 | dataset_root_dir = 'D:\\Study\\Machine Learning\\DataSets\\Image Caption Generator\\'
16 | code_root_dir = 'D:\\Study\\Machine Learning\\Codes\\Caption Generator\\Reverse-Image-Search\\'
17 |
18 |
19 | input_file = open('C:\\xampp\\htdocs\\uploads\\description.txt', 'r')
20 | predicted_description = input_file.readline()
21 |
22 | table = str.maketrans('','',string.punctuation)
23 |
24 | desc = predicted_description.split()
25 | desc = [word.lower() for word in desc]
26 | desc = [word.translate(table) for word in desc]
27 | desc = [word for word in desc if len(word)>1]
28 | desc = [word for word in desc if word.isalpha()]
29 | predicted_description = ' '.join(desc)
30 |
31 | testFile = dataset_root_dir + 'Flickr_8k\\Flickr_8k.txt'
32 | testImagesLabel = Loader.load_set(testFile)
33 | test_descriptions = Loader.load_clean_descriptions(dataset_root_dir + 'descriptions.txt', testImagesLabel)
34 |
35 | matchedFiles = set()
36 |
37 | for img in testImagesLabel:
38 | actual, predicted = list(), list()
39 | yhat = predicted_description.split()
40 | predicted.append(yhat)
41 | references = [d.split() for d in test_descriptions[img]]
42 | actual.append(references)
43 | bleu_score_1 = corpus_bleu(actual, predicted, weights=(1, 0, 0, 0))
44 | bleu_score_2 = corpus_bleu(actual, predicted, weights=(0.5, 0.5, 0, 0))
45 | bleu_score_3 = corpus_bleu(actual, predicted, weights=(0.33, 0.33, 0.34, 0))
46 | bleu_score_4 = corpus_bleu(actual, predicted, weights=(0.25, 0.25, 0.25, 0.25))
47 | bleu_score = ( 4*bleu_score_4 + 3*bleu_score_3 + 2*bleu_score_2 + bleu_score_1 )/10
48 | #print(bleu_score)
49 | if bleu_score > 0.4:
50 | matchedFiles.add(img)
51 | continue
52 |
53 | len(matchedFiles)
54 |
55 | path = 'D:/Study/Machine Learning/DataSets/Image Caption Generator/Flicker8k_Dataset/'
56 |
57 | matched_img_file = open('C:\\xampp\\htdocs\\uploads\\matched_images.txt',"w")
58 |
59 | folder = 'C:\\xampp\\htdocs\\uploads\\matched-images'
60 | for the_file in os.listdir(folder):
61 | file_path = os.path.join(folder, the_file)
62 | try:
63 | if os.path.isfile(file_path):
64 | os.unlink(file_path)
65 | #elif os.path.isdir(file_path): shutil.rmtree(file_path)
66 | except Exception as e:
67 | print(e)
68 |
69 | desc_text = Text_Preprocess.load_text(dataset_root_dir + '\\Flickr_8k\\Flickr8k.token.txt')
70 | descriptions = Text_Preprocess.load_description(desc_text)
71 |
72 | i=0
73 | for img in matchedFiles:
74 | img_path = path + img + '.jpg'
75 | i += 1
76 | matched_img_file.write(descriptions[img][0]+ '\n')
77 | copyfile(img_path, folder + '\\' + format(i,'03d') + '.jpg')
78 |
79 | matchedFiles
80 | matched_img_file.close()
81 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
Text and Content Based Image Retrieval
2 |
3 | A image captioning based image retrieval model which can be used both via GUI and command line
4 |
5 |
6 | ## Contents
7 |
8 | - [Introduction](#introduction)
9 | - [Features](#features)
10 | - [Demo Video](Video)
11 | - [Contributors](#contributors)
12 | - [Contribute](#contribute)
13 | - [Acknowledgement](#acknowledgement)
14 |
15 | ## Introduction
16 | This is a python based image retrieval model which makes use of deep learning image caption generator. It uses a merge model comprising of Convolutional Neural Network (CNN) and a Long Short Term Memory Network (LSTM) . The dataset used here is Flickr8K dataset. You can request the dataset [here](https://forms.illinois.edu/sec/1713398).
17 |
18 | ## Features
19 | This model can be used both via GUI and command line. In both of these models image can be retrieved by-
20 | * A description of image you want to retrieve
21 | * An image whose semantically image you want to retrieve.
22 |
23 | Note- The model performs well when text and images are semantically similar to the images in the dataset.
24 |
25 | ## Video
26 | [](https://www.youtube.com/watch?v=WT5GZaArBpM&feature=youtu.be "Content and Text Based Image Retrieval")
27 |
28 | ## Contributors
29 |