├── LICENSE ├── Prediction.ipynb ├── README.md ├── download_dataset.py ├── figures ├── similar_images.jpg ├── triplet_loss_clusters.jpg └── triplet_loss_model_architecture.png ├── model.py ├── model_triplet ├── checkpoint ├── model.ckpt.data-00000-of-00001 ├── model.ckpt.index └── model.ckpt.meta ├── preprocessing.py ├── requirements.txt ├── setup.sh └── train_triplets.py /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2025 Shibsankar Das 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Triplet Loss for Image Similarity using tensorflow 2 | This repository is an implementation of following "medium" story: 3 | **[Image similarity using Triplet Loss](https://towardsdatascience.com/image-similarity-using-triplet-loss-3744c0f67973?source=friends_link&sk=ad2d8e0921e7cdaec1e65e6e0474df10)** 4 | 5 | ## Requirements 6 | - Python 3 7 | - Pip 3 8 | - Tensorflow 9 | - Matplotlib 10 | - Requests 11 | 12 | ## Environment Setup 13 | Execute requirements.txt to install dependency packages 14 | ```bash 15 | pip install -r requirements.txt 16 | ``` 17 | 18 | ## Training 19 | 1. Download Training Dataset by executing download_dataset.py 20 | ```bash 21 | python download_dataset.py 22 | ``` 23 | 2. To train 24 | ```bash 25 | python train_triplets.py 26 | ``` 27 | ## Prediction 28 | 29 | Run Prediction.ipynb using Jupyter notebook to look into Prediction code. 30 | ``` 31 | Prediction.ipynb 32 | ``` 33 | 34 | 35 | 36 | -------------------------------------------------------------------------------- /download_dataset.py: -------------------------------------------------------------------------------- 1 | import os 2 | import requests 3 | from zipfile import ZipFile 4 | 5 | dataset_url = "http://aws-proserve-data-science.s3.amazonaws.com/geological_similarity.zip" 6 | filePath = './data_repository/geological_similarity.zip' 7 | data_directory = './data_repository' 8 | 9 | if not os.path.exists(data_directory): 10 | try: 11 | os.makedirs(data_directory) 12 | print(data_directory," created successfully.") 13 | except: 14 | print("Unable to create directory at ",data_directory," Please create ",data_directory," manually. Then run this file again.") 15 | 16 | if os.path.exists(filePath): 17 | os.remove(filePath) 18 | else: 19 | print("Have to download dataset.") 20 | 21 | 22 | r = requests.get(dataset_url, stream = True) 23 | print('Started downloading dataset...') 24 | with open(filePath, "wb") as data: 25 | for chunk in r.iter_content(chunk_size=1024): 26 | # writing one chunk at a time to data file 27 | 28 | if chunk: 29 | print('...',end = ''), 30 | data.write(chunk) 31 | print('Download finished.') 32 | print('Unzipping File...') 33 | zf = ZipFile(filePath, 'r') 34 | zf.extractall('./data_repository/') 35 | zf.close() 36 | print('Successfully unzipped file. Ready to run model...') 37 | -------------------------------------------------------------------------------- /figures/similar_images.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sanku-lib/image_triplet_loss/0710e32cef97e48ccda96830d30d30490e86de5f/figures/similar_images.jpg -------------------------------------------------------------------------------- /figures/triplet_loss_clusters.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sanku-lib/image_triplet_loss/0710e32cef97e48ccda96830d30d30490e86de5f/figures/triplet_loss_clusters.jpg -------------------------------------------------------------------------------- /figures/triplet_loss_model_architecture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sanku-lib/image_triplet_loss/0710e32cef97e48ccda96830d30d30490e86de5f/figures/triplet_loss_model_architecture.png -------------------------------------------------------------------------------- /model.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | 4 | class TripletLoss: 5 | 6 | def conv_net(self, x, reuse=False): 7 | with tf.name_scope("model"): 8 | with tf.variable_scope("conv1") as scope: 9 | net = tf.contrib.layers.conv2d(x, 32, [7, 7], activation_fn=tf.nn.relu, padding='SAME', 10 | weights_initializer=tf.contrib.layers.xavier_initializer_conv2d(), 11 | scope=scope, reuse=reuse) 12 | net = tf.contrib.layers.max_pool2d(net, [2, 2], padding='SAME') 13 | 14 | with tf.variable_scope("conv2") as scope: 15 | net = tf.contrib.layers.conv2d(net, 64, [5, 5], activation_fn=tf.nn.relu, padding='SAME', 16 | weights_initializer=tf.contrib.layers.xavier_initializer_conv2d(), 17 | scope=scope, reuse=reuse) 18 | net = tf.contrib.layers.max_pool2d(net, [2, 2], padding='SAME') 19 | 20 | with tf.variable_scope("conv3") as scope: 21 | net = tf.contrib.layers.conv2d(net, 128, [3, 3], activation_fn=tf.nn.relu, padding='SAME', 22 | weights_initializer=tf.contrib.layers.xavier_initializer_conv2d(), 23 | scope=scope, reuse=reuse) 24 | net = tf.contrib.layers.max_pool2d(net, [2, 2], padding='SAME') 25 | 26 | with tf.variable_scope("conv4") as scope: 27 | net = tf.contrib.layers.conv2d(net, 256, [1, 1], activation_fn=tf.nn.relu, padding='SAME', 28 | weights_initializer=tf.contrib.layers.xavier_initializer_conv2d(), 29 | scope=scope, reuse=reuse) 30 | net = tf.contrib.layers.max_pool2d(net, [2, 2], padding='SAME') 31 | 32 | with tf.variable_scope("conv5") as scope: 33 | net = tf.contrib.layers.conv2d(net, 28, [1, 1], activation_fn=None, padding='SAME', 34 | weights_initializer=tf.contrib.layers.xavier_initializer_conv2d(), 35 | scope=scope, reuse=reuse) 36 | net = tf.contrib.layers.max_pool2d(net, [2, 2], padding='SAME') 37 | 38 | net = tf.contrib.layers.flatten(net) 39 | 40 | return net 41 | 42 | 43 | def triplet_loss(self, model_anchor, model_positive, model_negative, margin): 44 | distance1 = tf.sqrt(tf.reduce_sum(tf.pow(model_anchor - model_positive, 2), 1, keepdims=True)) 45 | distance2 = tf.sqrt(tf.reduce_sum(tf.pow(model_anchor - model_negative, 2), 1, keepdims=True)) 46 | return tf.reduce_mean(tf.maximum(distance1 - distance2 + margin, 0)) -------------------------------------------------------------------------------- /model_triplet/checkpoint: -------------------------------------------------------------------------------- 1 | model_checkpoint_path: "model.ckpt" 2 | all_model_checkpoint_paths: "model.ckpt" 3 | -------------------------------------------------------------------------------- /model_triplet/model.ckpt.data-00000-of-00001: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sanku-lib/image_triplet_loss/0710e32cef97e48ccda96830d30d30490e86de5f/model_triplet/model.ckpt.data-00000-of-00001 -------------------------------------------------------------------------------- /model_triplet/model.ckpt.index: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sanku-lib/image_triplet_loss/0710e32cef97e48ccda96830d30d30490e86de5f/model_triplet/model.ckpt.index -------------------------------------------------------------------------------- /model_triplet/model.ckpt.meta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sanku-lib/image_triplet_loss/0710e32cef97e48ccda96830d30d30490e86de5f/model_triplet/model.ckpt.meta -------------------------------------------------------------------------------- /preprocessing.py: -------------------------------------------------------------------------------- 1 | import os 2 | from matplotlib.image import imread 3 | import numpy as np 4 | 5 | 6 | class PreProcessing: 7 | 8 | images_train = np.array([]) 9 | images_test = np.array([]) 10 | labels_train = np.array([]) 11 | labels_test = np.array([]) 12 | unique_train_label = np.array([]) 13 | map_train_label_indices = dict() 14 | 15 | def __init__(self,data_src): 16 | self.data_src = data_src 17 | print("Loading Geological Similarity Dataset...") 18 | self.images_train, self.images_test, self.labels_train, self.labels_test = self.preprocessing(0.9) 19 | self.unique_train_label = np.unique(self.labels_train) 20 | self.map_train_label_indices = {label: np.flatnonzero(self.labels_train == label) for label in 21 | self.unique_train_label} 22 | print('Preprocessing Done. Summary:') 23 | print("Images train :", self.images_train.shape) 24 | print("Labels train :", self.labels_train.shape) 25 | print("Images test :", self.images_test.shape) 26 | print("Labels test :", self.labels_test.shape) 27 | print("Unique label :", self.unique_train_label) 28 | 29 | def normalize(self,x): 30 | min_val = np.min(x) 31 | max_val = np.max(x) 32 | x = (x - min_val) / (max_val - min_val) 33 | return x 34 | 35 | def read_dataset(self): 36 | X = [] 37 | y = [] 38 | for directory in os.listdir(self.data_src): 39 | try: 40 | for pic in os.listdir(os.path.join(self.data_src, directory)): 41 | img = imread(os.path.join(self.data_src, directory, pic)) 42 | X.append(np.squeeze(np.asarray(img))) 43 | y.append(directory) 44 | except Exception as e: 45 | print('Failed to read images from Directory: ', directory) 46 | print('Exception Message: ', e) 47 | print('Dataset loaded successfully.') 48 | return X,y 49 | 50 | def preprocessing(self,train_test_ratio): 51 | X, y = self.read_dataset() 52 | labels = list(set(y)) 53 | label_dict = dict(zip(labels, range(len(labels)))) 54 | Y = np.asarray([label_dict[label] for label in y]) 55 | X = [self.normalize(x) for x in X] # normalize images 56 | 57 | shuffle_indices = np.random.permutation(np.arange(len(y))) 58 | x_shuffled = [] 59 | y_shuffled = [] 60 | for index in shuffle_indices: 61 | x_shuffled.append(X[index]) 62 | y_shuffled.append(Y[index]) 63 | 64 | size_of_dataset = len(x_shuffled) 65 | n_train = int(np.ceil(size_of_dataset * train_test_ratio)) 66 | return np.asarray(x_shuffled[0:n_train]), np.asarray(x_shuffled[n_train + 1:size_of_dataset]), np.asarray( 67 | y_shuffled[0:n_train]), np.asarray(y_shuffled[ 68 | n_train + 1:size_of_dataset]) 69 | 70 | 71 | def get_triplets(self): 72 | label_l, label_r = np.random.choice(self.unique_train_label, 2, replace=False) 73 | a, p = np.random.choice(self.map_train_label_indices[label_l],2, replace=False) 74 | n = np.random.choice(self.map_train_label_indices[label_r]) 75 | return a, p, n 76 | 77 | def get_triplets_batch(self,n): 78 | idxs_a, idxs_p, idxs_n = [], [], [] 79 | for _ in range(n): 80 | a, p, n = self.get_triplets() 81 | idxs_a.append(a) 82 | idxs_p.append(p) 83 | idxs_n.append(n) 84 | return self.images_train[idxs_a,:], self.images_train[idxs_p, :], self.images_train[idxs_n, :] 85 | 86 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | tensorflow==2.12.1 2 | matplotlib==2.1.2 3 | requests>=2.20.0 -------------------------------------------------------------------------------- /setup.sh: -------------------------------------------------------------------------------- 1 | sudo apt-get update 2 | sudo apt-get install python3.6 3 | sudo apt-get install python3-pip 4 | sudo pip3 install -r requirements.txt 5 | echo "Environment Setup Successful." 6 | echo "Downloading Dataset" 7 | python3 download_dataset.py 8 | echo "SETUP DONE" -------------------------------------------------------------------------------- /train_triplets.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from preprocessing import PreProcessing 3 | from model import TripletLoss 4 | 5 | flags = tf.app.flags 6 | FLAGS = flags.FLAGS 7 | flags.DEFINE_integer('batch_size', 512, 'Batch size.') 8 | flags.DEFINE_integer('train_iter', 2000, 'Total training iter') 9 | flags.DEFINE_integer('step', 50, 'Save after ... iteration') 10 | flags.DEFINE_float('learning_rate','0.01','Learning rate') 11 | flags.DEFINE_float('momentum','0.99', 'Momentum') 12 | flags.DEFINE_string('model', 'conv_net', 'model to run') 13 | flags.DEFINE_string('data_src', './data_repository/geological_similarity/', 'source of training dataset') 14 | 15 | if __name__ == "__main__": 16 | 17 | # Setup Dataset 18 | dataset = PreProcessing(FLAGS.data_src) 19 | model = TripletLoss() 20 | placeholder_shape = [None] + list(dataset.images_train.shape[1:]) 21 | print("placeholder_shape", placeholder_shape) 22 | 23 | # Setup Network 24 | next_batch = dataset.get_triplets_batch 25 | anchor_input = tf.placeholder(tf.float32, placeholder_shape, name='anchor_input') 26 | positive_input = tf.placeholder(tf.float32, placeholder_shape, name='positive_input') 27 | negative_input = tf.placeholder(tf.float32, placeholder_shape, name='negative_input') 28 | 29 | margin = 0.5 30 | anchor_output = model.conv_net(anchor_input, reuse=False) 31 | positive_output = model.conv_net(positive_input, reuse=True) 32 | negative_output = model.conv_net(negative_input, reuse=True) 33 | loss = model.triplet_loss(anchor_output, positive_output, negative_output, margin) 34 | 35 | # Setup Optimizer 36 | global_step = tf.Variable(0, trainable=False) 37 | 38 | train_step = tf.train.MomentumOptimizer(FLAGS.learning_rate, FLAGS.momentum, use_nesterov=True).minimize(loss, 39 | global_step=global_step) 40 | 41 | # Start Training 42 | saver = tf.train.Saver() 43 | with tf.Session() as sess: 44 | sess.run(tf.global_variables_initializer()) 45 | 46 | # Setup Tensorboard 47 | tf.summary.scalar('step', global_step) 48 | tf.summary.scalar('loss', loss) 49 | for var in tf.trainable_variables(): 50 | tf.summary.histogram(var.op.name, var) 51 | merged = tf.summary.merge_all() 52 | writer = tf.summary.FileWriter('train.log', sess.graph) 53 | 54 | # Train iter 55 | for i in range(FLAGS.train_iter): 56 | batch_anchor, batch_positive, batch_negative = next_batch(FLAGS.batch_size) 57 | 58 | _, l, summary_str = sess.run([train_step, loss, merged], 59 | feed_dict={anchor_input: batch_anchor, positive_input: batch_positive, negative_input: batch_negative}) 60 | 61 | writer.add_summary(summary_str, i) 62 | print("\r#%d - Loss" % i, l) 63 | 64 | if (i + 1) % FLAGS.step == 0: 65 | saver.save(sess, "model_triplet/model.ckpt") 66 | saver.save(sess, "model_triplet/model.ckpt") 67 | print('Training completed successfully.') --------------------------------------------------------------------------------