├── ConvolutionalAutoEncoder.py ├── README.md ├── graph-run1.png └── graph-run2.png /ConvolutionalAutoEncoder.py: -------------------------------------------------------------------------------- 1 | """ 2 | Deep Convolutional Autoencoder with TensorFlow 3 | 4 | Arash Saber Tehrani - May 2017 5 | 6 | """ 7 | # --------------------------------- 8 | # import required packages 9 | import numpy as np 10 | import matplotlib.pyplot as plt 11 | import tensorflow as tf 12 | from tensorflow.examples.tutorials.mnist import input_data 13 | # --------------------------------- 14 | mnist = input_data.read_data_sets("/tmp/data/", one_hot=True) 15 | 16 | n_classes = 10 17 | batch_size = 100 18 | 19 | # tf Graph Input 20 | # mnist data image of shape 28*28=784 21 | x = tf.placeholder(tf.float32, [None, 784], name='InputData') 22 | # 0-9 digits recognition => 10 classes 23 | y = tf.placeholder(tf.float32, [None, 10], name='LabelData') 24 | 25 | # This is 26 | logs_path = "./logs/" 27 | # --------------------------------- 28 | """ 29 | We start by creating the layers with name scopes so that the graph in 30 | the tensorboard looks meaningful 31 | """ 32 | # --------------------------------- 33 | def conv2d(input, name, kshape, strides=[1, 1, 1, 1]): 34 | with tf.name_scope(name): 35 | W = tf.get_variable(name='w_'+name, 36 | shape=kshape, 37 | initializer=tf.contrib.layers.xavier_initializer(uniform=False)) 38 | b = tf.get_variable(name='b_' + name, 39 | shape=[kshape[3]], 40 | initializer=tf.contrib.layers.xavier_initializer(uniform=False)) 41 | out = tf.nn.conv2d(input,W,strides=strides, padding='SAME') 42 | out = tf.nn.bias_add(out, b) 43 | out = tf.nn.relu(out) 44 | return out 45 | # --------------------------------- 46 | def deconv2d(input, name, kshape, n_outputs, strides=[1, 1]): 47 | with tf.name_scope(name): 48 | out = tf.contrib.layers.conv2d_transpose(input, 49 | num_outputs= n_outputs, 50 | kernel_size=kshape, 51 | stride=strides, 52 | padding='SAME', 53 | weights_initializer=tf.contrib.layers.xavier_initializer_conv2d(uniform=False), 54 | biases_initializer=tf.contrib.layers.xavier_initializer(uniform=False), 55 | activation_fn=tf.nn.relu) 56 | return out 57 | # --------------------------------- 58 | def maxpool2d(x,name,kshape=[1, 2, 2, 1], strides=[1, 2, 2, 1]): 59 | with tf.name_scope(name): 60 | out = tf.nn.max_pool(x, 61 | ksize=kshape, #size of window 62 | strides=strides, 63 | padding='SAME') 64 | return out 65 | # --------------------------------- 66 | def upsample(input, name, factor=[2,2]): 67 | size = [int(input.shape[1] * factor[0]), int(input.shape[2] * factor[1])] 68 | with tf.name_scope(name): 69 | out = tf.image.resize_bilinear(input, size=size, align_corners=None, name=None) 70 | return out 71 | # --------------------------------- 72 | def fullyConnected(input, name, output_size): 73 | with tf.name_scope(name): 74 | input_size = input.shape[1:] 75 | input_size = int(np.prod(input_size)) 76 | W = tf.get_variable(name='w_'+name, 77 | shape=[input_size, output_size], 78 | initializer=tf.contrib.layers.xavier_initializer(uniform=False)) 79 | b = tf.get_variable(name='b_'+name, 80 | shape=[output_size], 81 | initializer=tf.contrib.layers.xavier_initializer(uniform=False)) 82 | input = tf.reshape(input, [-1, input_size]) 83 | out = tf.nn.relu(tf.add(tf.matmul(input, W), b)) 84 | return out 85 | # --------------------------------- 86 | def dropout(input, name, keep_rate): 87 | with tf.name_scope(name): 88 | out = tf.nn.dropout(input, keep_rate) 89 | return out 90 | # --------------------------------- 91 | # Let us now design the autoencoder 92 | def ConvAutoEncoder(x, name): 93 | with tf.name_scope(name): 94 | """ 95 | We want to get dimensionality reduction of 784 to 196 96 | Layers: 97 | input --> 28, 28 (784) 98 | conv1 --> kernel size: (5,5), n_filters:25 ???make it small so that it runs fast 99 | pool1 --> 14, 14, 25 100 | dropout1 --> keeprate 0.8 101 | reshape --> 14*14*25 102 | FC1 --> 14*14*25, 14*14*5 103 | dropout2 --> keeprate 0.8 104 | FC2 --> 14*14*5, 196 --> output is the encoder vars 105 | FC3 --> 196, 14*14*5 106 | dropout3 --> keeprate 0.8 107 | FC4 --> 14*14*5,14*14*25 108 | dropout4 --> keeprate 0.8 109 | reshape --> 14, 14, 25 110 | deconv1 --> kernel size:(5,5,25), n_filters: 25 111 | upsample1 --> 28, 28, 25 112 | FullyConnected (outputlayer) --> 28* 28* 25, 28 * 28 113 | reshape --> 28*28 114 | """ 115 | input = tf.reshape(x, shape=[-1, 28, 28, 1]) 116 | 117 | # coding part 118 | c1 = conv2d(input, name='c1', kshape=[5, 5, 1, 25]) 119 | p1 = maxpool2d(c1, name='p1') 120 | do1 = dropout(p1, name='do1', keep_rate=0.75) 121 | do1 = tf.reshape(do1, shape=[-1, 14*14*25]) 122 | fc1 = fullyConnected(do1, name='fc1', output_size=14*14*5) 123 | do2 = dropout(fc1, name='do2', keep_rate=0.75) 124 | fc2 = fullyConnected(do2, name='fc2', output_size=14*14) 125 | # Decoding part 126 | fc3 = fullyConnected(fc2, name='fc3', output_size=14 * 14 * 5) 127 | do3 = dropout(fc3, name='do3', keep_rate=0.75) 128 | fc4 = fullyConnected(do3, name='fc4', output_size=14 * 14 * 25) 129 | do4 = dropout(fc4, name='do3', keep_rate=0.75) 130 | do4 = tf.reshape(do4, shape=[-1, 14, 14, 25]) 131 | dc1 = deconv2d(do4, name='dc1', kshape=[5,5],n_outputs=25) 132 | up1 = upsample(dc1, name='up1', factor=[2, 2]) 133 | output = fullyConnected(up1, name='output', output_size=28*28) 134 | with tf.name_scope('cost'): 135 | cost = tf.reduce_mean(tf.square(tf.subtract(output, x))) 136 | return output, cost 137 | # --------------------------------- 138 | def train_network(x): 139 | prediction, cost = ConvAutoEncoder(x, 'ConvAutoEnc') 140 | with tf.name_scope('opt'): 141 | optimizer = tf.train.AdamOptimizer().minimize(cost) 142 | 143 | # Create a summary to monitor cost tensor 144 | tf.summary.scalar("cost", cost) 145 | 146 | # Merge all summaries into a single op 147 | merged_summary_op = tf.summary.merge_all() 148 | 149 | n_epochs = 5 150 | with tf.Session() as sess: 151 | sess.run(tf.global_variables_initializer()) 152 | # create log writer object 153 | writer = tf.summary.FileWriter(logs_path, graph=tf.get_default_graph()) 154 | 155 | for epoch in range(n_epochs): 156 | avg_cost = 0 157 | n_batches = int(mnist.train.num_examples / batch_size) 158 | # Loop over all batches 159 | for i in range(n_batches): 160 | batch_x, batch_y = mnist.train.next_batch(batch_size) 161 | # Run optimization op (backprop) and cost op (to get loss value) 162 | _, c, summary = sess.run([optimizer, cost, merged_summary_op], feed_dict={x: batch_x, y: batch_y}) 163 | # Compute average loss 164 | avg_cost += c / n_batches 165 | # write log 166 | writer.add_summary(summary, epoch * n_batches + i) 167 | 168 | # Display logs per epoch step 169 | print('Epoch', epoch+1, ' / ', n_epochs, 'cost:', avg_cost) 170 | print('Optimization Finished') 171 | print('Cost:', cost.eval({x: mnist.test.images})) 172 | 173 | 174 | train_network(x) 175 | 176 | 177 | 178 | 179 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Deep-Convolutional-AutoEncoder 2 | 3 | This is a tutorial on creating a deep convolutional autoencoder with tensorflow. 4 | The goal of the tutorial is to provide a simple template for convolutional autoencoders. Also, I value the use of tensorboard, and I hate it when the resulted graph and parameters of the model are not presented clearly in the tensorboard. Here, beside the main goal, I do my best to create a nice looking graph of the network on the tensorboard. The complete code can be found in the ConvolutionalAutoEncoder.py, further, brief explanation is presented in the wiki. 5 | 6 | The layers are as follows: 7 | 8 | coder part: 9 | - input layer 10 | - convolution 11 | - maxpool 12 | - drop out 13 | - fully connected 14 | - drop out 15 | - fully connected 16 | 17 | Decoder part: 18 | - fully connected 19 | - drop out 20 | - fully connected 21 | - drop out 22 | - deconvolution 23 | - upsample 24 | - fully connected 25 | 26 | We test the autoencoder on the MNIST database, and reduce the dimension of the inputs from 28*28 = 784 to 14*14 = 196 at the encoding layer. 27 | -------------------------------------------------------------------------------- /graph-run1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/arashsaber/Deep-Convolutional-AutoEncoder/bc697e81fbf810d0c704b472f49d72222dd1641a/graph-run1.png -------------------------------------------------------------------------------- /graph-run2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/arashsaber/Deep-Convolutional-AutoEncoder/bc697e81fbf810d0c704b472f49d72222dd1641a/graph-run2.png --------------------------------------------------------------------------------