├── README.md
├── input_data.py
└── main.py


/README.md:
--------------------------------------------------------------------------------
1 | # Tensorflow-siamese
2 | * This is a simple siamese MLP network with Tensorflow
3 | * Training and Test accuracy are 96.41 95.74 after 30 epoch 
4 | * 1.5 second per epoch with Titan Black X 
5 | * MLP network is built based on Keras mnist_siamese example
6 | 


--------------------------------------------------------------------------------
/input_data.py:
--------------------------------------------------------------------------------
  1 | """Functions for downloading and reading MNIST data."""
  2 | from __future__ import print_function
  3 | import gzip
  4 | import os
  5 | import urllib
  6 | import numpy
  7 | SOURCE_URL = 'http://yann.lecun.com/exdb/mnist/'
  8 | def maybe_download(filename, work_directory):
  9 |   """Download the data from Yann's website, unless it's already here."""
 10 |   if not os.path.exists(work_directory):
 11 |     os.mkdir(work_directory)
 12 |   filepath = os.path.join(work_directory, filename)
 13 |   if not os.path.exists(filepath):
 14 |     filepath, _ = urllib.urlretrieve(SOURCE_URL + filename, filepath)
 15 |     statinfo = os.stat(filepath)
 16 |     print('Succesfully downloaded', filename, statinfo.st_size, 'bytes.')
 17 |   return filepath
 18 | def _read32(bytestream):
 19 |   dt = numpy.dtype(numpy.uint32).newbyteorder('>')
 20 |   return numpy.frombuffer(bytestream.read(4), dtype=dt)
 21 | def extract_images(filename):
 22 |   """Extract the images into a 4D uint8 numpy array [index, y, x, depth]."""
 23 |   print('Extracting', filename)
 24 |   with gzip.open(filename) as bytestream:
 25 |     magic = _read32(bytestream)
 26 |     if magic != 2051:
 27 |       raise ValueError(
 28 |           'Invalid magic number %d in MNIST image file: %s' %
 29 |           (magic, filename))
 30 |     num_images = _read32(bytestream)
 31 |     rows = _read32(bytestream)
 32 |     cols = _read32(bytestream)
 33 |     buf = bytestream.read(rows * cols * num_images)
 34 |     data = numpy.frombuffer(buf, dtype=numpy.uint8)
 35 |     data = data.reshape(num_images, rows, cols, 1)
 36 |     return data
 37 | def dense_to_one_hot(labels_dense, num_classes=10):
 38 |   """Convert class labels from scalars to one-hot vectors."""
 39 |   num_labels = labels_dense.shape[0]
 40 |   index_offset = numpy.arange(num_labels) * num_classes
 41 |   labels_one_hot = numpy.zeros((num_labels, num_classes))
 42 |   labels_one_hot.flat[index_offset + labels_dense.ravel()] = 1
 43 |   return labels_one_hot
 44 | def extract_labels(filename, one_hot=False):
 45 |   """Extract the labels into a 1D uint8 numpy array [index]."""
 46 |   print('Extracting', filename)
 47 |   with gzip.open(filename) as bytestream:
 48 |     magic = _read32(bytestream)
 49 |     if magic != 2049:
 50 |       raise ValueError(
 51 |           'Invalid magic number %d in MNIST label file: %s' %
 52 |           (magic, filename))
 53 |     num_items = _read32(bytestream)
 54 |     buf = bytestream.read(num_items)
 55 |     labels = numpy.frombuffer(buf, dtype=numpy.uint8)
 56 |     if one_hot:
 57 |       return dense_to_one_hot(labels)
 58 |     return labels
 59 | class DataSet(object):
 60 |   def __init__(self, images, labels, fake_data=False):
 61 |     if fake_data:
 62 |       self._num_examples = 10000
 63 |     else:
 64 |       assert images.shape[0] == labels.shape[0], (
 65 |           "images.shape: %s labels.shape: %s" % (images.shape,
 66 |                                                  labels.shape))
 67 |       self._num_examples = images.shape[0]
 68 |       # Convert shape from [num examples, rows, columns, depth]
 69 |       # to [num examples, rows*columns] (assuming depth == 1)
 70 |       assert images.shape[3] == 1
 71 |       images = images.reshape(images.shape[0],
 72 |                               images.shape[1] * images.shape[2])
 73 |       # Convert from [0, 255] -> [0.0, 1.0].
 74 |       images = images.astype(numpy.float32)
 75 |       images = numpy.multiply(images, 1.0 / 255.0)
 76 |     self._images = images
 77 |     self._labels = labels
 78 |     self._epochs_completed = 0
 79 |     self._index_in_epoch = 0
 80 |   @property
 81 |   def images(self):
 82 |     return self._images
 83 |   @property
 84 |   def labels(self):
 85 |     return self._labels
 86 |   @property
 87 |   def num_examples(self):
 88 |     return self._num_examples
 89 |   @property
 90 |   def epochs_completed(self):
 91 |     return self._epochs_completed
 92 |   def next_batch(self, batch_size, fake_data=False):
 93 |     """Return the next `batch_size` examples from this data set."""
 94 |     if fake_data:
 95 |       fake_image = [1.0 for _ in xrange(784)]
 96 |       fake_label = 0
 97 |       return [fake_image for _ in xrange(batch_size)], [
 98 |           fake_label for _ in xrange(batch_size)]
 99 |     start = self._index_in_epoch
100 |     self._index_in_epoch += batch_size
101 |     if self._index_in_epoch > self._num_examples:
102 |       # Finished epoch
103 |       self._epochs_completed += 1
104 |       # Shuffle the data
105 |       perm = numpy.arange(self._num_examples)
106 |       numpy.random.shuffle(perm)
107 |       self._images = self._images[perm]
108 |       self._labels = self._labels[perm]
109 |       # Start next epoch
110 |       start = 0
111 |       self._index_in_epoch = batch_size
112 |       assert batch_size <= self._num_examples
113 |     end = self._index_in_epoch
114 |     return self._images[start:end], self._labels[start:end]
115 | def read_data_sets(train_dir, fake_data=False, one_hot=False):
116 |   class DataSets(object):
117 |     pass
118 |   data_sets = DataSets()
119 |   if fake_data:
120 |     data_sets.train = DataSet([], [], fake_data=True)
121 |     data_sets.validation = DataSet([], [], fake_data=True)
122 |     data_sets.test = DataSet([], [], fake_data=True)
123 |     return data_sets
124 |   TRAIN_IMAGES = 'train-images-idx3-ubyte.gz'
125 |   TRAIN_LABELS = 'train-labels-idx1-ubyte.gz'
126 |   TEST_IMAGES = 't10k-images-idx3-ubyte.gz'
127 |   TEST_LABELS = 't10k-labels-idx1-ubyte.gz'
128 |   VALIDATION_SIZE = 5000
129 |   local_file = maybe_download(TRAIN_IMAGES, train_dir)
130 |   train_images = extract_images(local_file)
131 |   local_file = maybe_download(TRAIN_LABELS, train_dir)
132 |   train_labels = extract_labels(local_file, one_hot=one_hot)
133 |   local_file = maybe_download(TEST_IMAGES, train_dir)
134 |   test_images = extract_images(local_file)
135 |   local_file = maybe_download(TEST_LABELS, train_dir)
136 |   test_labels = extract_labels(local_file, one_hot=one_hot)
137 |   #validation_images = train_images[:VALIDATION_SIZE]
138 |   #validation_labels = train_labels[:VALIDATION_SIZE]
139 |   #train_images = train_images[VALIDATION_SIZE:]
140 |   #train_labels = train_labels[VALIDATION_SIZE:]
141 |   data_sets.train = DataSet(train_images, train_labels)
142 |   #data_sets.validation = DataSet(validation_images, validation_labels)
143 |   data_sets.test = DataSet(test_images, test_labels)
144 |   return data_sets
145 | 


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
  1 | import random
  2 | import numpy as np
  3 | import time
  4 | import tensorflow as tf 
  5 | import input_data
  6 | import math
  7 | 
  8 | mnist = input_data.read_data_sets("/tmp/data",one_hot=False)
  9 | 
 10 | import pdb
 11 | def create_pairs(x, digit_indices):
 12 |     '''Positive and negative pair creation.
 13 |     Alternates between positive and negative pairs.
 14 |     '''
 15 |     pairs = []
 16 |     labels = []
 17 |     n = min([len(digit_indices[d]) for d in range(10)]) - 1
 18 |     for d in range(10):
 19 |         for i in range(n):
 20 |             z1, z2 = digit_indices[d][i], digit_indices[d][i+1]
 21 |             pairs += [[x[z1], x[z2]]]
 22 |             inc = random.randrange(1, 10)
 23 |             dn = (d + inc) % 10
 24 |             z1, z2 = digit_indices[d][i], digit_indices[dn][i]
 25 |             pairs += [[x[z1], x[z2]]]
 26 |             labels += [1, 0]
 27 |     return np.array(pairs), np.array(labels)
 28 | 
 29 | 
 30 | def mlp(input_,input_dim,output_dim,name="mlp"):
 31 |     with tf.variable_scope(name):
 32 |         w = tf.get_variable('w',[input_dim,output_dim],tf.float32,tf.random_normal_initializer(mean = 0.001,stddev=0.02))
 33 |         return tf.nn.relu(tf.matmul(input_,w))
 34 |         
 35 | def build_model_mlp(X_,_dropout):
 36 | 
 37 |     model = mlpnet(X_,_dropout)
 38 |     return model
 39 | 
 40 | def mlpnet(image,_dropout):
 41 |     l1 = mlp(image,784,128,name='l1')
 42 |     l1 = tf.nn.dropout(l1,_dropout)
 43 |     l2 = mlp(l1,128,128,name='l2')
 44 |     l2 = tf.nn.dropout(l2,_dropout)
 45 |     l3 = mlp(l2,128,128,name='l3')
 46 |     return l3
 47 | def contrastive_loss(y,d):
 48 |     tmp= y *tf.square(d)
 49 |     #tmp= tf.mul(y,tf.square(d))
 50 |     tmp2 = (1-y) *tf.square(tf.maximum((1 - d),0))
 51 |     return tf.reduce_sum(tmp +tmp2)/batch_size/2
 52 | 
 53 | def compute_accuracy(prediction,labels):
 54 |     return labels[prediction.ravel() < 0.5].mean()
 55 |     #return tf.reduce_mean(labels[prediction.ravel() < 0.5])
 56 | def next_batch(s,e,inputs,labels):
 57 |     input1 = inputs[s:e,0]
 58 |     input2 = inputs[s:e,1]
 59 |     y= np.reshape(labels[s:e],(len(range(s,e)),1))
 60 |     return input1,input2,y
 61 |     
 62 | # Initializing the variables
 63 | init = tf.initialize_all_variables()
 64 | # the data, shuffled and split between train and test sets
 65 | X_train = mnist.train._images
 66 | y_train = mnist.train._labels
 67 | X_test = mnist.test._images
 68 | y_test = mnist.test._labels
 69 | batch_size =128
 70 | global_step = tf.Variable(0,trainable=False)
 71 | starter_learning_rate = 0.001
 72 | learning_rate = tf.train.exponential_decay(starter_learning_rate,global_step,10,0.1,staircase=True)
 73 | # create training+test positive and negative pairs
 74 | digit_indices = [np.where(y_train == i)[0] for i in range(10)]
 75 | tr_pairs, tr_y = create_pairs(X_train, digit_indices)
 76 | digit_indices = [np.where(y_test == i)[0] for i in range(10)]
 77 | te_pairs, te_y = create_pairs(X_test, digit_indices)
 78 | 
 79 | images_L = tf.placeholder(tf.float32,shape=([None,784]),name='L')
 80 | images_R = tf.placeholder(tf.float32,shape=([None,784]),name='R')
 81 | labels = tf.placeholder(tf.float32,shape=([None,1]),name='gt')
 82 | dropout_f = tf.placeholder("float")
 83 | 
 84 | with tf.variable_scope("siamese") as scope:
 85 |     model1= build_model_mlp(images_L,dropout_f)
 86 |     scope.reuse_variables()
 87 |     model2 = build_model_mlp(images_R,dropout_f)
 88 | 
 89 | distance  = tf.sqrt(tf.reduce_sum(tf.pow(tf.sub(model1,model2),2),1,keep_dims=True))
 90 | loss = contrastive_loss(labels,distance)
 91 | #contrastice loss
 92 | t_vars = tf.trainable_variables()
 93 | d_vars  = [var for var in t_vars if 'l' in var.name]
 94 | batch = tf.Variable(0)
 95 | optimizer = tf.train.AdamOptimizer(learning_rate = 0.0001).minimize(loss)
 96 | #optimizer = tf.train.RMSPropOptimizer(0.0001,momentum=0.9,epsilon=1e-6).minimize(loss)
 97 | # Launch the graph
 98 | with tf.Session() as sess:
 99 |     #sess.run(init)
100 |     tf.initialize_all_variables().run()
101 |     # Training cycle
102 |     for epoch in range(30):
103 |         avg_loss = 0.
104 |         avg_acc = 0.
105 |         total_batch = int(X_train.shape[0]/batch_size)
106 |         start_time = time.time()
107 |         # Loop over all batches
108 |         for i in range(total_batch):
109 |             s  = i * batch_size
110 |             e = (i+1) *batch_size
111 |             # Fit training using batch data
112 |             input1,input2,y =next_batch(s,e,tr_pairs,tr_y)
113 |             _,loss_value,predict=sess.run([optimizer,loss,distance], feed_dict={images_L:input1,images_R:input2 ,labels:y,dropout_f:0.9})
114 |             feature1=model1.eval(feed_dict={images_L:input1,dropout_f:0.9})
115 |             feature2=model2.eval(feed_dict={images_R:input2,dropout_f:0.9})
116 |             tr_acc = compute_accuracy(predict,y)
117 |             if math.isnan(tr_acc) and epoch != 0:
118 |                 print('tr_acc %0.2f' % tr_acc)
119 |                 pdb.set_trace()
120 |             avg_loss += loss_value
121 |             avg_acc +=tr_acc*100
122 |         #print('epoch %d loss %0.2f' %(epoch,avg_loss/total_batch))
123 |         duration = time.time() - start_time
124 |         print('epoch %d  time: %f loss %0.5f acc %0.2f' %(epoch,duration,avg_loss/(total_batch),avg_acc/total_batch))
125 |     y = np.reshape(tr_y,(tr_y.shape[0],1))
126 |     predict=distance.eval(feed_dict={images_L:tr_pairs[:,0],images_R:tr_pairs[:,1],labels:y,dropout_f:1.0})
127 |     tr_acc = compute_accuracy(predict,y)
128 |     print('Accuract training set %0.2f' % (100 * tr_acc))
129 | 
130 |     # Test model
131 |     predict=distance.eval(feed_dict={images_L:te_pairs[:,0],images_R:te_pairs[:,1],labels:y,dropout_f:1.0})
132 |     y = np.reshape(te_y,(te_y.shape[0],1))
133 |     te_acc = compute_accuracy(predict,y)
134 |     print('Accuract test set %0.2f' % (100 * te_acc))
135 | 


--------------------------------------------------------------------------------