├── README.md ├── all_conv_autoencoder.py ├── input_data.py ├── mnist.py └── typical_images ├── 40_steps_in.png └── lots_o_steps.png /README.md: -------------------------------------------------------------------------------- 1 | # All-Convnet-Autoencoder-Example 2 | Just a simple use example of the `conv2d_transpose` function in TensorFlow. Its run on MNIST. I was having a little trouble understanding the transpose conv stuff so I thought I would upload share my simple example 3 | 4 | # Why All Convnet Autoecoder 5 | All Convnet Autoencoder can be pretty powerful. They allow you to use modern network architetures on the image autoencoder problems. I havnet seen too many papers on them but I havnt been looking very hard. The best source I have seen explaining convolutional transpose is [here](http://arxiv.org/pdf/1511.06434v2.pdf). 6 | 7 | # How well it do 8 | I does ok. Having it reduce to a whopping 245 dimentions (mnist is 784) works well. I dont have a gpu right now so I cant test it out too well. It was geting stuck in local optima like crazy so I put dropout on the first layer at 20%. I have trained it to do this! 9 | 10 | before 11 | ![alt text](https://github.com/loliverhennigh/All-Convnet-Autoencoder-Example/blob/master/typical_images/40_steps_in.png) 12 | after 13 | ![alt text](https://github.com/loliverhennigh/All-Convnet-Autoencoder-Example/blob/master/typical_images/lots_o_steps.png) 14 | 15 | 16 | -------------------------------------------------------------------------------- /all_conv_autoencoder.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | import input_data 4 | mnist = input_data.read_data_sets("MNIST_data/", one_hot=True) 5 | 6 | import tensorflow as tf 7 | import numpy as np 8 | import matplotlib.pyplot as plt 9 | 10 | def weight_variable(shape): 11 | initial = tf.truncated_normal(shape, stddev=0.1) 12 | return tf.Variable(initial) 13 | 14 | def bias_variable(shape): 15 | initial = tf.constant(0.1, shape=shape) 16 | return tf.Variable(initial) 17 | 18 | def conv2d(x, W): 19 | return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME') 20 | 21 | def pool_2x2(x, W): 22 | return tf.nn.conv2d(x, W, 23 | strides=[1, 2, 2, 1], padding='SAME') 24 | 25 | sess = tf.InteractiveSession() 26 | 27 | # ins and outs 28 | x = tf.placeholder(tf.float32, [None, 784]) 29 | keep_prob = tf.placeholder("float") # do a little dropout to normalize 30 | x_norm = tf.nn.dropout(x, keep_prob) 31 | y_ = tf.placeholder(tf.float32, [None, 784]) 32 | x_image = tf.reshape(x_norm, [-1, 28, 28, 1]) 33 | # Need the batch size for the transpose layers. 34 | batch_size = tf.shape(x)[0] 35 | 36 | # Define all the weight for the encoder part 37 | W_conv1 = weight_variable([5, 5, 1, 32]) 38 | b_conv1 = bias_variable([32]) 39 | 40 | W_pool1 = weight_variable([2, 2, 32, 32]) 41 | b_pool1 = bias_variable([32]) 42 | 43 | W_conv2 = weight_variable([5, 5, 32, 64]) 44 | b_conv2 = bias_variable([64]) 45 | 46 | W_pool2 = weight_variable([2, 2, 64, 64]) 47 | b_pool2 = bias_variable([64]) 48 | 49 | W_conv3 = weight_variable([1, 1, 64, 64]) 50 | b_conv3 = bias_variable([64]) 51 | 52 | W_conv4 = weight_variable([1, 1, 64, 5]) 53 | b_conv4 = bias_variable([5]) 54 | 55 | # Calc all layers 56 | h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1) 57 | h_pool1 = tf.nn.relu(pool_2x2(h_conv1, W_pool1) + b_pool1) 58 | h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2) 59 | h_pool2 = tf.nn.relu(pool_2x2(h_conv2, W_pool2) + b_pool2) 60 | h_conv3 = tf.nn.relu(conv2d(h_pool2, W_conv3) + b_conv3) 61 | h_conv4 =tf.nn.relu(conv2d(h_conv3, W_conv4) + b_conv4) 62 | 63 | #################################################### 64 | ## Now the we have the min layer as a 7x7x5 = 245 ## 65 | #################################################### 66 | 67 | # Define all the weight for the decoder part 68 | # and deconv_shape. Thats the tricky bit because 69 | # you have to start thinking about the height 70 | # and width 71 | W_conv5 = weight_variable([1, 1, 64, 5]) 72 | b_conv5 = bias_variable([64]) 73 | deconv_shape_conv5 = tf.pack([batch_size, 7, 7, 64]) 74 | 75 | W_pool3 = weight_variable([2, 2, 64, 64]) 76 | b_pool3 = bias_variable([64]) 77 | deconv_shape_pool3 = tf.pack([batch_size, 14, 14, 64]) 78 | 79 | W_conv6 = weight_variable([5, 5, 32, 64]) 80 | b_conv6 = bias_variable([32]) 81 | deconv_shape_conv6 = tf.pack([batch_size, 14, 14, 32]) 82 | 83 | W_pool4 = weight_variable([2, 2, 32, 32]) 84 | b_pool4 = bias_variable([32]) 85 | deconv_shape_pool4 = tf.pack([batch_size, 28, 28, 32]) 86 | 87 | W_conv7 = weight_variable([5, 5, 1, 32]) 88 | b_conv7 = bias_variable([1]) 89 | deconv_shape_conv7 = tf.pack([batch_size, 28, 28, 1]) 90 | 91 | # Now the conv2d_transpose part. Hopfuly just looking 92 | # at the encoder part and decoder part side by side 93 | # will make it clear how it works. 94 | h_conv5 = tf.nn.relu(tf.nn.conv2d_transpose(h_conv4, W_conv5, output_shape = deconv_shape_conv5, strides=[1,1,1,1], padding='SAME') + b_conv5) 95 | h_pool3 = tf.nn.relu(tf.nn.conv2d_transpose(h_conv5, W_pool3, output_shape = deconv_shape_pool3, strides=[1,2,2,1], padding='SAME') + b_pool3) 96 | h_conv6 = tf.nn.relu(tf.nn.conv2d_transpose(h_pool3, W_conv6, output_shape = deconv_shape_conv6, strides=[1,1,1,1], padding='SAME') + b_conv6) 97 | h_pool4 = tf.nn.relu(tf.nn.conv2d_transpose(h_conv6, W_pool4, output_shape = deconv_shape_pool4, strides=[1,2,2,1], padding='SAME') + b_pool4) 98 | h_conv7 = tf.nn.relu(tf.nn.conv2d_transpose(h_pool4, W_conv7, output_shape = deconv_shape_conv7, strides=[1,1,1,1], padding='SAME') + b_conv7) 99 | 100 | y_conv = tf.reshape(h_conv7, [-1, 784]) 101 | 102 | error = tf.nn.l2_loss(y_ - y_conv) 103 | train_step = tf.train.AdamOptimizer(1e-4).minimize(error) # I made the learning rate smaller then normal 104 | accuracy = tf.nn.l2_loss(y_ - y_conv) 105 | sess.run(tf.initialize_all_variables()) 106 | for i in range(20000): 107 | batch = mnist.train.next_batch(50) 108 | if i%20 == 0: 109 | train_accuracy = accuracy.eval(feed_dict={ 110 | x:batch[0], y_:batch[0], keep_prob: 1.0}) 111 | print("step %d, training accuracy %g"%(i, train_accuracy)) 112 | print("Saving test image to new_run_1.png") 113 | new_im = y_conv.eval(feed_dict={x: batch[0], y_: batch[0], keep_prob: 1.0}) 114 | plt.imshow(new_im[1].reshape((28,28))) 115 | plt.savefig('new_run_1.png') 116 | print("Saved") 117 | train_step.run(feed_dict={x: batch[0], y_: batch[0], keep_prob: 0.8}) 118 | 119 | 120 | -------------------------------------------------------------------------------- /input_data.py: -------------------------------------------------------------------------------- 1 | # Copyright 2015 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | """Functions for downloading and reading MNIST data.""" 17 | from __future__ import absolute_import 18 | from __future__ import division 19 | from __future__ import print_function 20 | 21 | import gzip 22 | import os 23 | import tempfile 24 | 25 | import numpy 26 | from six.moves import urllib 27 | from six.moves import xrange # pylint: disable=redefined-builtin 28 | import tensorflow as tf 29 | 30 | SOURCE_URL = 'http://yann.lecun.com/exdb/mnist/' 31 | 32 | 33 | def maybe_download(filename, work_directory): 34 | """Download the data from Yann's website, unless it's already here.""" 35 | if not tf.gfile.Exists(work_directory): 36 | tf.gfile.MakeDirs(work_directory) 37 | filepath = os.path.join(work_directory, filename) 38 | if not tf.gfile.Exists(filepath): 39 | with tempfile.NamedTemporaryFile() as tmpfile: 40 | temp_file_name = tmpfile.name 41 | urllib.request.urlretrieve(SOURCE_URL + filename, temp_file_name) 42 | tf.gfile.Copy(temp_file_name, filepath) 43 | with tf.gfile.GFile(filepath) as f: 44 | size = f.Size() 45 | print('Successfully downloaded', filename, size, 'bytes.') 46 | return filepath 47 | 48 | 49 | def _read32(bytestream): 50 | dt = numpy.dtype(numpy.uint32).newbyteorder('>') 51 | return numpy.frombuffer(bytestream.read(4), dtype=dt)[0] 52 | 53 | 54 | def extract_images(filename): 55 | """Extract the images into a 4D uint8 numpy array [index, y, x, depth].""" 56 | print('Extracting', filename) 57 | with tf.gfile.Open(filename, 'rb') as f, gzip.GzipFile(fileobj=f) as bytestream: 58 | magic = _read32(bytestream) 59 | if magic != 2051: 60 | raise ValueError( 61 | 'Invalid magic number %d in MNIST image file: %s' % 62 | (magic, filename)) 63 | num_images = _read32(bytestream) 64 | rows = _read32(bytestream) 65 | cols = _read32(bytestream) 66 | buf = bytestream.read(rows * cols * num_images) 67 | data = numpy.frombuffer(buf, dtype=numpy.uint8) 68 | data = data.reshape(num_images, rows, cols, 1) 69 | return data 70 | 71 | 72 | def dense_to_one_hot(labels_dense, num_classes): 73 | """Convert class labels from scalars to one-hot vectors.""" 74 | num_labels = labels_dense.shape[0] 75 | index_offset = numpy.arange(num_labels) * num_classes 76 | labels_one_hot = numpy.zeros((num_labels, num_classes)) 77 | labels_one_hot.flat[index_offset + labels_dense.ravel()] = 1 78 | return labels_one_hot 79 | 80 | 81 | def extract_labels(filename, one_hot=False, num_classes=10): 82 | """Extract the labels into a 1D uint8 numpy array [index].""" 83 | print('Extracting', filename) 84 | with tf.gfile.Open(filename, 'rb') as f, gzip.GzipFile(fileobj=f) as bytestream: 85 | magic = _read32(bytestream) 86 | if magic != 2049: 87 | raise ValueError( 88 | 'Invalid magic number %d in MNIST label file: %s' % 89 | (magic, filename)) 90 | num_items = _read32(bytestream) 91 | buf = bytestream.read(num_items) 92 | labels = numpy.frombuffer(buf, dtype=numpy.uint8) 93 | if one_hot: 94 | return dense_to_one_hot(labels, num_classes) 95 | return labels 96 | 97 | 98 | class DataSet(object): 99 | 100 | def __init__(self, images, labels, fake_data=False, one_hot=False, 101 | dtype=tf.float32): 102 | """Construct a DataSet. 103 | 104 | one_hot arg is used only if fake_data is true. `dtype` can be either 105 | `uint8` to leave the input as `[0, 255]`, or `float32` to rescale into 106 | `[0, 1]`. 107 | """ 108 | dtype = tf.as_dtype(dtype).base_dtype 109 | if dtype not in (tf.uint8, tf.float32): 110 | raise TypeError('Invalid image dtype %r, expected uint8 or float32' % 111 | dtype) 112 | if fake_data: 113 | self._num_examples = 10000 114 | self.one_hot = one_hot 115 | else: 116 | assert images.shape[0] == labels.shape[0], ( 117 | 'images.shape: %s labels.shape: %s' % (images.shape, 118 | labels.shape)) 119 | self._num_examples = images.shape[0] 120 | 121 | # Convert shape from [num examples, rows, columns, depth] 122 | # to [num examples, rows*columns] (assuming depth == 1) 123 | assert images.shape[3] == 1 124 | images = images.reshape(images.shape[0], 125 | images.shape[1] * images.shape[2]) 126 | if dtype == tf.float32: 127 | # Convert from [0, 255] -> [0.0, 1.0]. 128 | images = images.astype(numpy.float32) 129 | images = numpy.multiply(images, 1.0 / 255.0) 130 | self._images = images 131 | self._labels = labels 132 | self._epochs_completed = 0 133 | self._index_in_epoch = 0 134 | 135 | @property 136 | def images(self): 137 | return self._images 138 | 139 | @property 140 | def labels(self): 141 | return self._labels 142 | 143 | @property 144 | def num_examples(self): 145 | return self._num_examples 146 | 147 | @property 148 | def epochs_completed(self): 149 | return self._epochs_completed 150 | 151 | def next_batch(self, batch_size, fake_data=False): 152 | """Return the next `batch_size` examples from this data set.""" 153 | if fake_data: 154 | fake_image = [1] * 784 155 | if self.one_hot: 156 | fake_label = [1] + [0] * 9 157 | else: 158 | fake_label = 0 159 | return [fake_image for _ in xrange(batch_size)], [ 160 | fake_label for _ in xrange(batch_size)] 161 | start = self._index_in_epoch 162 | self._index_in_epoch += batch_size 163 | if self._index_in_epoch > self._num_examples: 164 | # Finished epoch 165 | self._epochs_completed += 1 166 | # Shuffle the data 167 | perm = numpy.arange(self._num_examples) 168 | numpy.random.shuffle(perm) 169 | self._images = self._images[perm] 170 | self._labels = self._labels[perm] 171 | # Start next epoch 172 | start = 0 173 | self._index_in_epoch = batch_size 174 | assert batch_size <= self._num_examples 175 | end = self._index_in_epoch 176 | return self._images[start:end], self._labels[start:end] 177 | 178 | 179 | def read_data_sets(train_dir, fake_data=False, one_hot=False, dtype=tf.float32): 180 | class DataSets(object): 181 | pass 182 | data_sets = DataSets() 183 | 184 | if fake_data: 185 | def fake(): 186 | return DataSet([], [], fake_data=True, one_hot=one_hot, dtype=dtype) 187 | data_sets.train = fake() 188 | data_sets.validation = fake() 189 | data_sets.test = fake() 190 | return data_sets 191 | 192 | TRAIN_IMAGES = 'train-images-idx3-ubyte.gz' 193 | TRAIN_LABELS = 'train-labels-idx1-ubyte.gz' 194 | TEST_IMAGES = 't10k-images-idx3-ubyte.gz' 195 | TEST_LABELS = 't10k-labels-idx1-ubyte.gz' 196 | VALIDATION_SIZE = 5000 197 | 198 | local_file = maybe_download(TRAIN_IMAGES, train_dir) 199 | train_images = extract_images(local_file) 200 | 201 | local_file = maybe_download(TRAIN_LABELS, train_dir) 202 | train_labels = extract_labels(local_file, one_hot=one_hot) 203 | 204 | local_file = maybe_download(TEST_IMAGES, train_dir) 205 | test_images = extract_images(local_file) 206 | 207 | local_file = maybe_download(TEST_LABELS, train_dir) 208 | test_labels = extract_labels(local_file, one_hot=one_hot) 209 | 210 | validation_images = train_images[:VALIDATION_SIZE] 211 | validation_labels = train_labels[:VALIDATION_SIZE] 212 | train_images = train_images[VALIDATION_SIZE:] 213 | train_labels = train_labels[VALIDATION_SIZE:] 214 | 215 | data_sets.train = DataSet(train_images, train_labels, dtype=dtype) 216 | data_sets.validation = DataSet(validation_images, validation_labels, 217 | dtype=dtype) 218 | data_sets.test = DataSet(test_images, test_labels, dtype=dtype) 219 | 220 | return data_sets 221 | -------------------------------------------------------------------------------- /mnist.py: -------------------------------------------------------------------------------- 1 | # Copyright 2015 Google Inc. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | """Builds the MNIST network. 17 | 18 | Implements the inference/loss/training pattern for model building. 19 | 20 | 1. inference() - Builds the model as far as is required for running the network 21 | forward to make predictions. 22 | 2. loss() - Adds to the inference model the layers required to generate loss. 23 | 3. training() - Adds to the loss model the Ops required to generate and 24 | apply gradients. 25 | 26 | This file is used by the various "fully_connected_*.py" files and not meant to 27 | be run. 28 | """ 29 | from __future__ import absolute_import 30 | from __future__ import division 31 | from __future__ import print_function 32 | 33 | import math 34 | 35 | import tensorflow as tf 36 | 37 | # The MNIST dataset has 10 classes, representing the digits 0 through 9. 38 | NUM_CLASSES = 10 39 | 40 | # The MNIST images are always 28x28 pixels. 41 | IMAGE_SIZE = 28 42 | IMAGE_PIXELS = IMAGE_SIZE * IMAGE_SIZE 43 | 44 | 45 | def inference(images, hidden1_units, hidden2_units): 46 | """Build the MNIST model up to where it may be used for inference. 47 | 48 | Args: 49 | images: Images placeholder, from inputs(). 50 | hidden1_units: Size of the first hidden layer. 51 | hidden2_units: Size of the second hidden layer. 52 | 53 | Returns: 54 | softmax_linear: Output tensor with the computed logits. 55 | """ 56 | # Hidden 1 57 | with tf.name_scope('hidden1'): 58 | weights = tf.Variable( 59 | tf.truncated_normal([IMAGE_PIXELS, hidden1_units], 60 | stddev=1.0 / math.sqrt(float(IMAGE_PIXELS))), 61 | name='weights') 62 | biases = tf.Variable(tf.zeros([hidden1_units]), 63 | name='biases') 64 | hidden1 = tf.nn.relu(tf.matmul(images, weights) + biases) 65 | # Hidden 2 66 | with tf.name_scope('hidden2'): 67 | weights = tf.Variable( 68 | tf.truncated_normal([hidden1_units, hidden2_units], 69 | stddev=1.0 / math.sqrt(float(hidden1_units))), 70 | name='weights') 71 | biases = tf.Variable(tf.zeros([hidden2_units]), 72 | name='biases') 73 | hidden2 = tf.nn.relu(tf.matmul(hidden1, weights) + biases) 74 | # Linear 75 | with tf.name_scope('softmax_linear'): 76 | weights = tf.Variable( 77 | tf.truncated_normal([hidden2_units, NUM_CLASSES], 78 | stddev=1.0 / math.sqrt(float(hidden2_units))), 79 | name='weights') 80 | biases = tf.Variable(tf.zeros([NUM_CLASSES]), 81 | name='biases') 82 | logits = tf.matmul(hidden2, weights) + biases 83 | return logits 84 | 85 | 86 | def loss(logits, labels): 87 | """Calculates the loss from the logits and the labels. 88 | 89 | Args: 90 | logits: Logits tensor, float - [batch_size, NUM_CLASSES]. 91 | labels: Labels tensor, int32 - [batch_size]. 92 | 93 | Returns: 94 | loss: Loss tensor of type float. 95 | """ 96 | labels = tf.to_int64(labels) 97 | cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits( 98 | logits, labels, name='xentropy') 99 | loss = tf.reduce_mean(cross_entropy, name='xentropy_mean') 100 | return loss 101 | 102 | 103 | def training(loss, learning_rate): 104 | """Sets up the training Ops. 105 | 106 | Creates a summarizer to track the loss over time in TensorBoard. 107 | 108 | Creates an optimizer and applies the gradients to all trainable variables. 109 | 110 | The Op returned by this function is what must be passed to the 111 | `sess.run()` call to cause the model to train. 112 | 113 | Args: 114 | loss: Loss tensor, from loss(). 115 | learning_rate: The learning rate to use for gradient descent. 116 | 117 | Returns: 118 | train_op: The Op for training. 119 | """ 120 | # Add a scalar summary for the snapshot loss. 121 | tf.scalar_summary(loss.op.name, loss) 122 | # Create the gradient descent optimizer with the given learning rate. 123 | optimizer = tf.train.GradientDescentOptimizer(learning_rate) 124 | # Create a variable to track the global step. 125 | global_step = tf.Variable(0, name='global_step', trainable=False) 126 | # Use the optimizer to apply the gradients that minimize the loss 127 | # (and also increment the global step counter) as a single training step. 128 | train_op = optimizer.minimize(loss, global_step=global_step) 129 | return train_op 130 | 131 | 132 | def evaluation(logits, labels): 133 | """Evaluate the quality of the logits at predicting the label. 134 | 135 | Args: 136 | logits: Logits tensor, float - [batch_size, NUM_CLASSES]. 137 | labels: Labels tensor, int32 - [batch_size], with values in the 138 | range [0, NUM_CLASSES). 139 | 140 | Returns: 141 | A scalar int32 tensor with the number of examples (out of batch_size) 142 | that were predicted correctly. 143 | """ 144 | # For a classifier model, we can use the in_top_k Op. 145 | # It returns a bool tensor with shape [batch_size] that is true for 146 | # the examples where the label is in the top k (here k=1) 147 | # of all logits for that example. 148 | correct = tf.nn.in_top_k(logits, labels, 1) 149 | # Return the number of true entries. 150 | return tf.reduce_sum(tf.cast(correct, tf.int32)) 151 | -------------------------------------------------------------------------------- /typical_images/40_steps_in.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/loliverhennigh/All-Convnet-Autoencoder-Example/eb512de70122ce0b3e4df242edc6d7eb0f16eec8/typical_images/40_steps_in.png -------------------------------------------------------------------------------- /typical_images/lots_o_steps.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/loliverhennigh/All-Convnet-Autoencoder-Example/eb512de70122ce0b3e4df242edc6d7eb0f16eec8/typical_images/lots_o_steps.png --------------------------------------------------------------------------------