├── README.md ├── input_data.py └── main.py /README.md: -------------------------------------------------------------------------------- 1 | # Tensorflow-siamese 2 | * This is a simple siamese MLP network with Tensorflow 3 | * Training and Test accuracy are 96.41 95.74 after 30 epoch 4 | * 1.5 second per epoch with Titan Black X 5 | * MLP network is built based on Keras mnist_siamese example 6 | -------------------------------------------------------------------------------- /input_data.py: -------------------------------------------------------------------------------- 1 | """Functions for downloading and reading MNIST data.""" 2 | from __future__ import print_function 3 | import gzip 4 | import os 5 | import urllib 6 | import numpy 7 | SOURCE_URL = 'http://yann.lecun.com/exdb/mnist/' 8 | def maybe_download(filename, work_directory): 9 | """Download the data from Yann's website, unless it's already here.""" 10 | if not os.path.exists(work_directory): 11 | os.mkdir(work_directory) 12 | filepath = os.path.join(work_directory, filename) 13 | if not os.path.exists(filepath): 14 | filepath, _ = urllib.urlretrieve(SOURCE_URL + filename, filepath) 15 | statinfo = os.stat(filepath) 16 | print('Succesfully downloaded', filename, statinfo.st_size, 'bytes.') 17 | return filepath 18 | def _read32(bytestream): 19 | dt = numpy.dtype(numpy.uint32).newbyteorder('>') 20 | return numpy.frombuffer(bytestream.read(4), dtype=dt) 21 | def extract_images(filename): 22 | """Extract the images into a 4D uint8 numpy array [index, y, x, depth].""" 23 | print('Extracting', filename) 24 | with gzip.open(filename) as bytestream: 25 | magic = _read32(bytestream) 26 | if magic != 2051: 27 | raise ValueError( 28 | 'Invalid magic number %d in MNIST image file: %s' % 29 | (magic, filename)) 30 | num_images = _read32(bytestream) 31 | rows = _read32(bytestream) 32 | cols = _read32(bytestream) 33 | buf = bytestream.read(rows * cols * num_images) 34 | data = numpy.frombuffer(buf, dtype=numpy.uint8) 35 | data = data.reshape(num_images, rows, cols, 1) 36 | return data 37 | def dense_to_one_hot(labels_dense, num_classes=10): 38 | """Convert class labels from scalars to one-hot vectors.""" 39 | num_labels = labels_dense.shape[0] 40 | index_offset = numpy.arange(num_labels) * num_classes 41 | labels_one_hot = numpy.zeros((num_labels, num_classes)) 42 | labels_one_hot.flat[index_offset + labels_dense.ravel()] = 1 43 | return labels_one_hot 44 | def extract_labels(filename, one_hot=False): 45 | """Extract the labels into a 1D uint8 numpy array [index].""" 46 | print('Extracting', filename) 47 | with gzip.open(filename) as bytestream: 48 | magic = _read32(bytestream) 49 | if magic != 2049: 50 | raise ValueError( 51 | 'Invalid magic number %d in MNIST label file: %s' % 52 | (magic, filename)) 53 | num_items = _read32(bytestream) 54 | buf = bytestream.read(num_items) 55 | labels = numpy.frombuffer(buf, dtype=numpy.uint8) 56 | if one_hot: 57 | return dense_to_one_hot(labels) 58 | return labels 59 | class DataSet(object): 60 | def __init__(self, images, labels, fake_data=False): 61 | if fake_data: 62 | self._num_examples = 10000 63 | else: 64 | assert images.shape[0] == labels.shape[0], ( 65 | "images.shape: %s labels.shape: %s" % (images.shape, 66 | labels.shape)) 67 | self._num_examples = images.shape[0] 68 | # Convert shape from [num examples, rows, columns, depth] 69 | # to [num examples, rows*columns] (assuming depth == 1) 70 | assert images.shape[3] == 1 71 | images = images.reshape(images.shape[0], 72 | images.shape[1] * images.shape[2]) 73 | # Convert from [0, 255] -> [0.0, 1.0]. 74 | images = images.astype(numpy.float32) 75 | images = numpy.multiply(images, 1.0 / 255.0) 76 | self._images = images 77 | self._labels = labels 78 | self._epochs_completed = 0 79 | self._index_in_epoch = 0 80 | @property 81 | def images(self): 82 | return self._images 83 | @property 84 | def labels(self): 85 | return self._labels 86 | @property 87 | def num_examples(self): 88 | return self._num_examples 89 | @property 90 | def epochs_completed(self): 91 | return self._epochs_completed 92 | def next_batch(self, batch_size, fake_data=False): 93 | """Return the next `batch_size` examples from this data set.""" 94 | if fake_data: 95 | fake_image = [1.0 for _ in xrange(784)] 96 | fake_label = 0 97 | return [fake_image for _ in xrange(batch_size)], [ 98 | fake_label for _ in xrange(batch_size)] 99 | start = self._index_in_epoch 100 | self._index_in_epoch += batch_size 101 | if self._index_in_epoch > self._num_examples: 102 | # Finished epoch 103 | self._epochs_completed += 1 104 | # Shuffle the data 105 | perm = numpy.arange(self._num_examples) 106 | numpy.random.shuffle(perm) 107 | self._images = self._images[perm] 108 | self._labels = self._labels[perm] 109 | # Start next epoch 110 | start = 0 111 | self._index_in_epoch = batch_size 112 | assert batch_size <= self._num_examples 113 | end = self._index_in_epoch 114 | return self._images[start:end], self._labels[start:end] 115 | def read_data_sets(train_dir, fake_data=False, one_hot=False): 116 | class DataSets(object): 117 | pass 118 | data_sets = DataSets() 119 | if fake_data: 120 | data_sets.train = DataSet([], [], fake_data=True) 121 | data_sets.validation = DataSet([], [], fake_data=True) 122 | data_sets.test = DataSet([], [], fake_data=True) 123 | return data_sets 124 | TRAIN_IMAGES = 'train-images-idx3-ubyte.gz' 125 | TRAIN_LABELS = 'train-labels-idx1-ubyte.gz' 126 | TEST_IMAGES = 't10k-images-idx3-ubyte.gz' 127 | TEST_LABELS = 't10k-labels-idx1-ubyte.gz' 128 | VALIDATION_SIZE = 5000 129 | local_file = maybe_download(TRAIN_IMAGES, train_dir) 130 | train_images = extract_images(local_file) 131 | local_file = maybe_download(TRAIN_LABELS, train_dir) 132 | train_labels = extract_labels(local_file, one_hot=one_hot) 133 | local_file = maybe_download(TEST_IMAGES, train_dir) 134 | test_images = extract_images(local_file) 135 | local_file = maybe_download(TEST_LABELS, train_dir) 136 | test_labels = extract_labels(local_file, one_hot=one_hot) 137 | #validation_images = train_images[:VALIDATION_SIZE] 138 | #validation_labels = train_labels[:VALIDATION_SIZE] 139 | #train_images = train_images[VALIDATION_SIZE:] 140 | #train_labels = train_labels[VALIDATION_SIZE:] 141 | data_sets.train = DataSet(train_images, train_labels) 142 | #data_sets.validation = DataSet(validation_images, validation_labels) 143 | data_sets.test = DataSet(test_images, test_labels) 144 | return data_sets 145 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | import random 2 | import numpy as np 3 | import time 4 | import tensorflow as tf 5 | import input_data 6 | import math 7 | 8 | mnist = input_data.read_data_sets("/tmp/data",one_hot=False) 9 | 10 | import pdb 11 | def create_pairs(x, digit_indices): 12 | '''Positive and negative pair creation. 13 | Alternates between positive and negative pairs. 14 | ''' 15 | pairs = [] 16 | labels = [] 17 | n = min([len(digit_indices[d]) for d in range(10)]) - 1 18 | for d in range(10): 19 | for i in range(n): 20 | z1, z2 = digit_indices[d][i], digit_indices[d][i+1] 21 | pairs += [[x[z1], x[z2]]] 22 | inc = random.randrange(1, 10) 23 | dn = (d + inc) % 10 24 | z1, z2 = digit_indices[d][i], digit_indices[dn][i] 25 | pairs += [[x[z1], x[z2]]] 26 | labels += [1, 0] 27 | return np.array(pairs), np.array(labels) 28 | 29 | 30 | def mlp(input_,input_dim,output_dim,name="mlp"): 31 | with tf.variable_scope(name): 32 | w = tf.get_variable('w',[input_dim,output_dim],tf.float32,tf.random_normal_initializer(mean = 0.001,stddev=0.02)) 33 | return tf.nn.relu(tf.matmul(input_,w)) 34 | 35 | def build_model_mlp(X_,_dropout): 36 | 37 | model = mlpnet(X_,_dropout) 38 | return model 39 | 40 | def mlpnet(image,_dropout): 41 | l1 = mlp(image,784,128,name='l1') 42 | l1 = tf.nn.dropout(l1,_dropout) 43 | l2 = mlp(l1,128,128,name='l2') 44 | l2 = tf.nn.dropout(l2,_dropout) 45 | l3 = mlp(l2,128,128,name='l3') 46 | return l3 47 | def contrastive_loss(y,d): 48 | tmp= y *tf.square(d) 49 | #tmp= tf.mul(y,tf.square(d)) 50 | tmp2 = (1-y) *tf.square(tf.maximum((1 - d),0)) 51 | return tf.reduce_sum(tmp +tmp2)/batch_size/2 52 | 53 | def compute_accuracy(prediction,labels): 54 | return labels[prediction.ravel() < 0.5].mean() 55 | #return tf.reduce_mean(labels[prediction.ravel() < 0.5]) 56 | def next_batch(s,e,inputs,labels): 57 | input1 = inputs[s:e,0] 58 | input2 = inputs[s:e,1] 59 | y= np.reshape(labels[s:e],(len(range(s,e)),1)) 60 | return input1,input2,y 61 | 62 | # Initializing the variables 63 | init = tf.initialize_all_variables() 64 | # the data, shuffled and split between train and test sets 65 | X_train = mnist.train._images 66 | y_train = mnist.train._labels 67 | X_test = mnist.test._images 68 | y_test = mnist.test._labels 69 | batch_size =128 70 | global_step = tf.Variable(0,trainable=False) 71 | starter_learning_rate = 0.001 72 | learning_rate = tf.train.exponential_decay(starter_learning_rate,global_step,10,0.1,staircase=True) 73 | # create training+test positive and negative pairs 74 | digit_indices = [np.where(y_train == i)[0] for i in range(10)] 75 | tr_pairs, tr_y = create_pairs(X_train, digit_indices) 76 | digit_indices = [np.where(y_test == i)[0] for i in range(10)] 77 | te_pairs, te_y = create_pairs(X_test, digit_indices) 78 | 79 | images_L = tf.placeholder(tf.float32,shape=([None,784]),name='L') 80 | images_R = tf.placeholder(tf.float32,shape=([None,784]),name='R') 81 | labels = tf.placeholder(tf.float32,shape=([None,1]),name='gt') 82 | dropout_f = tf.placeholder("float") 83 | 84 | with tf.variable_scope("siamese") as scope: 85 | model1= build_model_mlp(images_L,dropout_f) 86 | scope.reuse_variables() 87 | model2 = build_model_mlp(images_R,dropout_f) 88 | 89 | distance = tf.sqrt(tf.reduce_sum(tf.pow(tf.sub(model1,model2),2),1,keep_dims=True)) 90 | loss = contrastive_loss(labels,distance) 91 | #contrastice loss 92 | t_vars = tf.trainable_variables() 93 | d_vars = [var for var in t_vars if 'l' in var.name] 94 | batch = tf.Variable(0) 95 | optimizer = tf.train.AdamOptimizer(learning_rate = 0.0001).minimize(loss) 96 | #optimizer = tf.train.RMSPropOptimizer(0.0001,momentum=0.9,epsilon=1e-6).minimize(loss) 97 | # Launch the graph 98 | with tf.Session() as sess: 99 | #sess.run(init) 100 | tf.initialize_all_variables().run() 101 | # Training cycle 102 | for epoch in range(30): 103 | avg_loss = 0. 104 | avg_acc = 0. 105 | total_batch = int(X_train.shape[0]/batch_size) 106 | start_time = time.time() 107 | # Loop over all batches 108 | for i in range(total_batch): 109 | s = i * batch_size 110 | e = (i+1) *batch_size 111 | # Fit training using batch data 112 | input1,input2,y =next_batch(s,e,tr_pairs,tr_y) 113 | _,loss_value,predict=sess.run([optimizer,loss,distance], feed_dict={images_L:input1,images_R:input2 ,labels:y,dropout_f:0.9}) 114 | feature1=model1.eval(feed_dict={images_L:input1,dropout_f:0.9}) 115 | feature2=model2.eval(feed_dict={images_R:input2,dropout_f:0.9}) 116 | tr_acc = compute_accuracy(predict,y) 117 | if math.isnan(tr_acc) and epoch != 0: 118 | print('tr_acc %0.2f' % tr_acc) 119 | pdb.set_trace() 120 | avg_loss += loss_value 121 | avg_acc +=tr_acc*100 122 | #print('epoch %d loss %0.2f' %(epoch,avg_loss/total_batch)) 123 | duration = time.time() - start_time 124 | print('epoch %d time: %f loss %0.5f acc %0.2f' %(epoch,duration,avg_loss/(total_batch),avg_acc/total_batch)) 125 | y = np.reshape(tr_y,(tr_y.shape[0],1)) 126 | predict=distance.eval(feed_dict={images_L:tr_pairs[:,0],images_R:tr_pairs[:,1],labels:y,dropout_f:1.0}) 127 | tr_acc = compute_accuracy(predict,y) 128 | print('Accuract training set %0.2f' % (100 * tr_acc)) 129 | 130 | # Test model 131 | predict=distance.eval(feed_dict={images_L:te_pairs[:,0],images_R:te_pairs[:,1],labels:y,dropout_f:1.0}) 132 | y = np.reshape(te_y,(te_y.shape[0],1)) 133 | te_acc = compute_accuracy(predict,y) 134 | print('Accuract test set %0.2f' % (100 * te_acc)) 135 | --------------------------------------------------------------------------------