├── ConvAutoencoder.py
├── README.md
├── celebF.tar.gz
└── sample_output
    ├── A0.png
    ├── A1.png
    ├── B0.png
    ├── B1.png
    ├── C0.png
    ├── C1.png
    ├── D0.png
    ├── D1.png
    └── cost.png


/ConvAutoencoder.py:
--------------------------------------------------------------------------------
  1 | """
  2 | TF Convolutional Autoencoder
  3 | 
  4 | Arash Saber Tehrani - May 2017
  5 | Reference: https://github.com/arashsaber/Deep-Convolutional-AutoEncoder
  6 | 
  7 | Modified David Yu - July 2018
  8 | Reference: https://github.com/MrDavidYu/TF_Convolutional_Autoencoder
  9 | Add ons:
 10 | 1. Allows for custom .jpg input
 11 | 2. Checkpoint save/restore
 12 | 3. TensorBoard logs for input/output images
 13 | 3. Input autorescaling
 14 | 4. ReLU activation replaced by LeakyReLU
 15 | 
 16 | """
 17 | import os
 18 | import re
 19 | import scipy.misc
 20 | import numpy as np
 21 | import matplotlib.pyplot as plt
 22 | import tensorflow as tf
 23 | from glob import glob
 24 | 
 25 | # Some important consts
 26 | num_examples = 669
 27 | batch_size = 30
 28 | n_epochs = 1000
 29 | save_steps = 500  # Number of training batches between checkpoint saves
 30 | 
 31 | checkpoint_dir = "./ckpt/"
 32 | model_name = "ConvAutoEnc.model"
 33 | logs_dir = "./logs/run1/"
 34 | 
 35 | # Fetch input data (faces/trees/imgs)
 36 | data_dir = "./data/celebG/"
 37 | data_path = os.path.join(data_dir, '*.jpg')
 38 | data = glob(data_path)
 39 | 
 40 | if len(data) == 0:
 41 |     raise Exception("[!] No data found in '" + data_path+ "'")
 42 | 
 43 | 
 44 | '''
 45 | Some util functions from https://github.com/carpedm20/DCGAN-tensorflow
 46 | '''
 47 | 
 48 | def path_to_img(path, grayscale = False):
 49 |   if (grayscale):
 50 |     return scipy.misc.imread(path, flatten = True).astype(np.float)
 51 |   else:
 52 |     return scipy.misc.imread(path).astype(np.float)
 53 | 
 54 | def center_crop(x, crop_h, crop_w,
 55 |                 resize_h=64, resize_w=64):
 56 |   if crop_w is None:
 57 |     crop_w = crop_h
 58 |   h, w = x.shape[:2]
 59 |   j = int(round((h - crop_h)/2.))
 60 |   i = int(round((w - crop_w)/2.))
 61 |   return scipy.misc.imresize(
 62 |       x[j:j+crop_h, i:i+crop_w], [resize_h, resize_w])
 63 | 
 64 | def transform(image, input_height, input_width, 
 65 |               resize_height=48, resize_width=48, crop=True):
 66 |   if crop:
 67 |     cropped_image = center_crop(
 68 |       image, input_height, input_width, 
 69 |       resize_height, resize_width)
 70 |   else:
 71 |     cropped_image = scipy.misc.imresize(image, [resize_height, resize_width])
 72 |   return np.array(cropped_image)/127.5 - 1.
 73 | 
 74 | def autoresize(image_path, input_height, input_width,
 75 |               resize_height=48, resize_width=48,
 76 |               crop=True, grayscale=False):
 77 |   image = path_to_img(image_path, grayscale)
 78 |   return transform(image, input_height, input_width,
 79 |                    resize_height, resize_width, crop)
 80 | 
 81 | np.random.shuffle(data)
 82 | imread_img = path_to_img(data[0])  # test read an image
 83 | 
 84 | if len(imread_img.shape) >= 3: # check if image is a non-grayscale image by checking channel number
 85 |     c_dim = path_to_img(data[0]).shape[-1]
 86 | else:
 87 |     c_dim = 1
 88 | 
 89 | is_grayscale = (c_dim == 1)
 90 | 
 91 | '''
 92 | tf Graph Input
 93 | '''
 94 | x = tf.placeholder(tf.float32, [None, 48, 48, 3], name='InputData')
 95 | 
 96 | if __debug__:
 97 |     print("Reading input from:" + data_dir)
 98 |     print("Input image shape:" + str(imread_img.shape))
 99 |     print("Assigning input tensor of shape:" + str(x.shape))
100 |     print("Writing checkpoints to:" + checkpoint_dir)
101 |     print("Writing TensorBoard logs to:" + logs_dir)
102 | 
103 | 
104 | # strides = [Batch, Height, Width, Channels]  in default NHWC data_format. Batch and Channels
105 | # must always be set to 1. If channels is set to 3, then we would increment the index for the
106 | # color channel by 3 everytime we convolve the filter. So this means we would only use one of
107 | # the channels and skip the other two. If we change the Batch number then it means some images
108 | # in the batch are skipped.
109 | #
110 | # To calculate the size of the output of CONV layer:
111 | # OutWidth = (InWidth - FilterWidth + 2*Padding)/Stride + 1
112 | def conv2d(input, name, kshape, strides=[1, 1, 1, 1]):
113 |     with tf.variable_scope(name):
114 |         W = tf.get_variable(name='w_' + name,
115 |                             shape=kshape,
116 |                             initializer=tf.contrib.layers.xavier_initializer(uniform=False))
117 |         b = tf.get_variable(name='b_' + name,
118 |                             shape=[kshape[3]],
119 |                             initializer=tf.contrib.layers.xavier_initializer(uniform=False))
120 |         out = tf.nn.conv2d(input,W,strides=strides, padding='SAME')
121 |         out = tf.nn.bias_add(out, b)
122 |         out = tf.nn.leaky_relu(out)
123 |         return out
124 | 
125 | 
126 | # tf.contrib.layers.conv2d_transpose, do not get confused with 
127 | # tf.layers.conv2d_transpose
128 | def deconv2d(input, name, kshape, n_outputs, strides=[1, 1]):
129 |     with tf.variable_scope(name):
130 |         out = tf.contrib.layers.conv2d_transpose(input,
131 |                  num_outputs= n_outputs,
132 |                  kernel_size=kshape,
133 |                  stride=strides,
134 |                  padding='SAME',
135 |                  weights_initializer=tf.contrib.layers.xavier_initializer_conv2d(uniform=False),
136 |                  biases_initializer=tf.contrib.layers.xavier_initializer(uniform=False),
137 |                  activation_fn=tf.nn.leaky_relu)
138 |         return out
139 | 
140 | 
141 | # Input to maxpool: [BatchSize, Width1, Height1, Channels]
142 | # Output of maxpool: [BatchSize, Width2, Height2, Channels]
143 | #
144 | # To calculate the size of the output of maxpool layer:
145 | # OutWidth = (InWidth - FilterWidth)/Stride + 1
146 | #
147 | # The kernel kshape will typically be [1,2,2,1] for a general 
148 | # RGB image input of [batch_size,48,48,3]
149 | # kshape is 1 for batch and channels because we don't want to take
150 | # the maximum over multiple examples of channels.
151 | def maxpool2d(x,name,kshape=[1, 2, 2, 1], strides=[1, 2, 2, 1]):
152 |     with tf.variable_scope(name):
153 |         out = tf.nn.max_pool(x,
154 |                  ksize=kshape, #size of window
155 |                  strides=strides,
156 |                  padding='SAME')
157 |         return out
158 | 
159 | 
160 | def upsample(input, name, factor=[2,2]):
161 |     size = [int(input.shape[1] * factor[0]), int(input.shape[2] * factor[1])]
162 |     with tf.variable_scope(name):
163 |         out = tf.image.resize_bilinear(input, size=size, align_corners=None, name=None)
164 |         return out
165 | 
166 | 
167 | def fullyConnected(input, name, output_size):
168 |     with tf.variable_scope(name):
169 |         input_size = input.shape[1:]
170 |         input_size = int(np.prod(input_size)) # get total num of cells in one input image
171 |         W = tf.get_variable(name='w_'+name,
172 |                 shape=[input_size, output_size],
173 |                 initializer=tf.contrib.layers.xavier_initializer(uniform=False))
174 |         b = tf.get_variable(name='b_'+name,
175 |                 shape=[output_size],
176 |                 initializer=tf.contrib.layers.xavier_initializer(uniform=False))
177 |         input = tf.reshape(input, [-1, input_size])
178 |         out = tf.nn.leaky_relu(tf.add(tf.matmul(input, W), b))
179 |         return out
180 | 
181 | 
182 | def dropout(input, name, keep_rate):
183 |     with tf.variable_scope(name):
184 |         out = tf.nn.dropout(input, keep_rate)
185 |         return out
186 | 
187 | 
188 | def ConvAutoEncoder(x, name, reuse=False):
189 |     with tf.variable_scope(name) as scope:
190 |         if reuse:
191 |             scope.reuse_variables()
192 | 
193 |         input = tf.reshape(x, shape=[-1, 48, 48, 3])
194 | 
195 |         # kshape = [k_h, k_w, in_channels, out_chnnels]
196 |         c1 = conv2d(input, name='c1', kshape=[7, 7, 3, 15])         # Input: [48,48,3];  Output: [48,48,15]
197 |         p1 = maxpool2d(c1, name='p1')                               # Input: [48,48,15]; Output: [24,24,15]
198 |         do1 = dropout(p1, name='do1', keep_rate=0.75)
199 |         c2 = conv2d(do1, name='c2', kshape=[5, 5, 15, 25])          # Input: [24,24,15]; Output: [24,24,25]
200 |         p2 = maxpool2d(c2, name='p2')                               # Input: [24,24,25]; Output: [12,12,25]
201 |         p2 = tf.reshape(p2, shape=[-1, 12*12*25])                   # Input: [12,12,25]; Output: [12*12*25]
202 |         fc1 = fullyConnected(p2, name='fc1', output_size=12*12*5)   # Input: [12*12*25]; Output: [12*12*5]
203 |         do2 = dropout(fc1, name='do2', keep_rate=0.75)
204 |         fc2 = fullyConnected(do2, name='fc2', output_size=12*12*3)  # Input: [12*12*5];  Output: [12*12*3]
205 |         do3 = dropout(fc2, name='do3', keep_rate=0.75)
206 |         fc3 = fullyConnected(do3, name='fc3', output_size=64)       # Input: [12*12*3];  Output: [64] --> bottleneck layer
207 |         # Decoding part
208 |         fc4 = fullyConnected(fc3, name='fc4', output_size=12*12*3)  # Input: [64];       Output: [12*12*3]
209 |         do4 = dropout(fc4, name='do4', keep_rate=0.75)
210 |         fc5 = fullyConnected(do4, name='fc5', output_size=12*12*5)  # Input: [12*12*3];  Output: [12*12*5]
211 |         do5 = dropout(fc5, name='do5', keep_rate=0.75)
212 |         fc6 = fullyConnected(do5, name='fc6', output_size=21*21*25) # Input: [12*12*5];  Output: [12*12*25]
213 |         do6 = dropout(fc6, name='do6', keep_rate=0.75)
214 |         do6 = tf.reshape(do6, shape=[-1, 21, 21, 25])               # Input: [12*12*25]; Output: [12,12,25]
215 |         dc1 = deconv2d(do6, name='dc1', kshape=[5, 5],n_outputs=15) # Input: [12,12,25]; Output: [12,12,15]
216 |         up1 = upsample(dc1, name='up1', factor=[2, 2])              # Input: [12,12,15]; Output: [24,24,15]
217 |         dc2 = deconv2d(up1, name='dc2', kshape=[7, 7],n_outputs=3)  # Input: [24,24,15]; Output: [24,24,3]
218 |         up2 = upsample(dc2, name='up2', factor=[2, 2])              # Input: [24,24,3];  Output: [48,48,3]
219 |         output = fullyConnected(up2, name='output', output_size=48*48*3)
220 | 
221 |         with tf.variable_scope('cost'):
222 |             # N.B. reduce_mean is a batch operation! finds the mean across the batch
223 |             cost = tf.reduce_mean(tf.square(tf.subtract(output, tf.reshape(x,shape=[-1,48*48*3]))))
224 |         return x, tf.reshape(output,shape=[-1,48,48,3]), cost # returning, input, output and cost
225 | 
226 | 
227 | def train_network(x):
228 | 
229 |     with tf.Session() as sess:
230 | 
231 |         _, _, cost = ConvAutoEncoder(x, 'ConvAutoEnc')
232 |         with tf.variable_scope('opt'):
233 |             optimizer = tf.train.AdamOptimizer().minimize(cost)
234 | 
235 |         # Create a summary to monitor cost tensor
236 |         tf.summary.scalar("cost", cost)
237 |         tf.summary.image("face_input", ConvAutoEncoder(x, 'ConvAutoEnc', reuse=True)[0], max_outputs=4)
238 |         tf.summary.image("face_output", ConvAutoEncoder(x, 'ConvAutoEnc', reuse=True)[1], max_outputs=4)
239 |         merged_summary_op = tf.summary.merge_all()  # Merge all summaries into a single op
240 | 
241 |         sess.run(tf.global_variables_initializer())  # memory allocation exceeded 10% issue
242 | 
243 |         # Model saver
244 |         saver = tf.train.Saver()
245 | 
246 |         counter = 0  # Used for checkpointing
247 |         success, restored_counter = restore(saver, sess)
248 |         if success:
249 |             counter = restored_counter
250 |             print(">>> Restore successful")
251 |         else:
252 |             print(">>> No restore checkpoints detected")        
253 | 
254 |         # create log writer object
255 |         writer = tf.summary.FileWriter(logs_dir, graph=tf.get_default_graph())
256 | 
257 |         for epoch in range(n_epochs):
258 |             avg_cost = 0
259 |             n_batches = int(num_examples / batch_size)
260 |             # Loop over all batches
261 |             for i in range(n_batches):
262 |                 counter += 1
263 |                 print("epoch " + str(epoch) + " batch " + str(i))
264 | 
265 |                 batch_files = data[i*batch_size:(i+1)*batch_size]  # get the current batch of files
266 | 
267 |                 batch = [autoresize(batch_file,
268 |                                         input_height=48,
269 |                                         input_width=48,
270 |                                         resize_height=48,
271 |                                         resize_width=48,
272 |                                         crop=True,
273 |                                         grayscale=False) for batch_file in batch_files]
274 | 
275 |                 batch_images = np.array(batch).astype(np.float32)
276 | 
277 |                 # Get cost function from running optimizer
278 |                 _, c, summary = sess.run([optimizer, cost, merged_summary_op], feed_dict={x: batch_images})
279 | 
280 |                 # Compute average loss
281 |                 avg_cost += c / n_batches
282 | 
283 |                 writer.add_summary(summary, epoch * n_batches + i)
284 | 
285 |                 if counter % save_steps == 0:
286 |                     save(saver, counter, sess)
287 | 
288 |             # Display logs per epoch step
289 |             print('Epoch', epoch + 1, ' / ', n_epochs, 'cost:', avg_cost)
290 | 
291 |         print('>>> Optimization Finished')
292 | 
293 | 
294 | # Create checkpoint
295 | def save(saver, step, session):
296 |     print(">>> Saving to checkpoint, step:" + str(step))
297 |     if not os.path.exists(checkpoint_dir):
298 |         os.makedirs(checkpoint_dir)
299 | 
300 |     saver.save(session,
301 |             os.path.join(checkpoint_dir, model_name),
302 |             global_step=step)
303 | 
304 | 
305 | # Restore from checkpoint
306 | def restore(saver, session):
307 |     print(">>> Restoring from checkpoints...")
308 |     checkpoint_state = tf.train.get_checkpoint_state(checkpoint_dir)
309 |     if checkpoint_state and checkpoint_state.model_checkpoint_path:
310 |       checkpoint_name = os.path.basename(checkpoint_state.model_checkpoint_path)
311 |       saver.restore(session, os.path.join(checkpoint_dir, checkpoint_name))
312 |       counter = int(next(re.finditer("(\d+)(?!.*\d)",checkpoint_name)).group(0))
313 |       print(">>> Found restore checkpoint {}".format(checkpoint_name))
314 |       return True, counter
315 |     else:
316 |       return False, 0
317 | 
318 | train_network(x)
319 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # TF_Convolutional_Autoencoder
 2 | #### _Convolutional autoencoder for encoding/decoding RGB images in TensorFlow with high compression_
 3 | 
 4 | This is a sample template adapted from Arash Saber Tehrani's Deep-Convolutional-AutoEncoder tutorial https://github.com/arashsaber/Deep-Convolutional-AutoEncoder for encoding/decoding 3-channel images. The template has been fully commented. I have tested this implementation on rescaled samples from the CelebA dataset from CUHK http://mmlab.ie.cuhk.edu.hk/projects/CelebA.html to produce reasonably decent results from a short period of training. The compression ratio of this implementation is 108. That is, for an input tensor of shape [-1, 48, 48, 3], the bottleneck layer has been reduced to a tensor of shape [-1, 64].
 5 | 
 6 | **Add on features:**
 7 | * Takes 3-channel images as input instead of MNIST
 8 | * Training now performs checkpoint saves and restores
 9 | * Both inputs to the encoder and outputs from the decoder are available for viewing in TensorBoard
10 | * Input autorescaling
11 | * ReLU activation replaced by LeakyReLU to resolve dying ReLU
12 | 
13 | **Caveats:**
14 | * It is highly recommended to perform training on the GPU (Took ~40 min to train 20,000 steps on a Tesla K80 for celebG).
15 | * The input size can be increased, but during testing OOM errors occured on the K80 for the input size of 84x84. This will be fixed in a later update. For now if you get any OOM errors in tensor allocation, try to reduce the input size.
16 | * Sample output is currently visibly undersaturated owing to noise introduced by dropout and high model bias. Problems should go away with further training.
17 | 
18 | ## Outputs
19 | N.B. The input images are 48x48, hence the blurriness. Additionally these outputs are from setting n_epochs to 1000, which could be increased for even better results (note the cost function trend).
20 | 
21 | Inputs:
22 | <img src="https://github.com/MrDavidYu/TF_Convolutional_Autoencoder/blob/master/sample_output/A0.png" width="150" height="150" />
23 | <img src="https://github.com/MrDavidYu/TF_Convolutional_Autoencoder/blob/master/sample_output/B0.png" width="150" height="150" />
24 | <img src="https://github.com/MrDavidYu/TF_Convolutional_Autoencoder/blob/master/sample_output/C0.png" width="150" height="150" />
25 | <img src="https://github.com/MrDavidYu/TF_Convolutional_Autoencoder/blob/master/sample_output/D0.png" width="150" height="150" />
26 | <br>
27 | Outputs:
28 | <img src="https://github.com/MrDavidYu/TF_Convolutional_Autoencoder/blob/master/sample_output/A1.png" width="150" height="150" />
29 | <img src="https://github.com/MrDavidYu/TF_Convolutional_Autoencoder/blob/master/sample_output/B1.png" width="150" height="150" />
30 | <img src="https://github.com/MrDavidYu/TF_Convolutional_Autoencoder/blob/master/sample_output/C1.png" width="150" height="150" />
31 | <img src="https://github.com/MrDavidYu/TF_Convolutional_Autoencoder/blob/master/sample_output/D1.png" width="150" height="150" />
32 | <br>
33 | <br>
34 | <img src="https://github.com/MrDavidYu/TF_Convolutional_Autoencoder/blob/master/sample_output/cost.png" height="300" />
35 | ## How to run
36 | 1. Make sure to create directory `./logs/run1/` to save TensorBoard output. For pushing multiple runs to TensorBoard, simply save additional logs as `./logs/run2/`, `./logs/run3/` etc.
37 | 
38 | 2. Unzip `./celebG.tar.gz` and save jpegs in `./data/celebG/`
39 | 
40 | 3. Either use provided image set or your own. If using your own dataset, I recommend ImageMagick for resizing: https://www.imagemagick.org/script/download.php
41 | 
42 | 4. If using ImageMagick, start Bash in `./data/<your_dir>/`:
43 | ```
44 | for file in $PWD/*.jpg
45 | do
46 | convert $file -resize 42x42 $file
47 | done
48 | ```
49 | 
50 | 5. In root dir, `python ConvAutoencoder.py`
51 | 
52 | ## Debug
53 | Here is a list of common problems:
54 | 1. The error(cost) is very high (in the thousands or millions): Check that the input images are fetched properly when transforming batch_files to batch_images etc. This high an error is typical of very large natural differences in MSE of input/output and is not caused by a large number of model parameters.
55 | 
56 | ## Additional References
57 | Reference https://github.com/carpedm20/DCGAN-tensorflow/blob/master/utils.py for several dynamic image resize functions I have incorporated into my implementation.
58 | 


--------------------------------------------------------------------------------
/celebF.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MrDavidYu/TF_Convolutional_Autoencoder/cb2e2fb8bf3f67a7106ac654e3a717e6e0ebbd77/celebF.tar.gz


--------------------------------------------------------------------------------
/sample_output/A0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MrDavidYu/TF_Convolutional_Autoencoder/cb2e2fb8bf3f67a7106ac654e3a717e6e0ebbd77/sample_output/A0.png


--------------------------------------------------------------------------------
/sample_output/A1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MrDavidYu/TF_Convolutional_Autoencoder/cb2e2fb8bf3f67a7106ac654e3a717e6e0ebbd77/sample_output/A1.png


--------------------------------------------------------------------------------
/sample_output/B0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MrDavidYu/TF_Convolutional_Autoencoder/cb2e2fb8bf3f67a7106ac654e3a717e6e0ebbd77/sample_output/B0.png


--------------------------------------------------------------------------------
/sample_output/B1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MrDavidYu/TF_Convolutional_Autoencoder/cb2e2fb8bf3f67a7106ac654e3a717e6e0ebbd77/sample_output/B1.png


--------------------------------------------------------------------------------
/sample_output/C0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MrDavidYu/TF_Convolutional_Autoencoder/cb2e2fb8bf3f67a7106ac654e3a717e6e0ebbd77/sample_output/C0.png


--------------------------------------------------------------------------------
/sample_output/C1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MrDavidYu/TF_Convolutional_Autoencoder/cb2e2fb8bf3f67a7106ac654e3a717e6e0ebbd77/sample_output/C1.png


--------------------------------------------------------------------------------
/sample_output/D0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MrDavidYu/TF_Convolutional_Autoencoder/cb2e2fb8bf3f67a7106ac654e3a717e6e0ebbd77/sample_output/D0.png


--------------------------------------------------------------------------------
/sample_output/D1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MrDavidYu/TF_Convolutional_Autoencoder/cb2e2fb8bf3f67a7106ac654e3a717e6e0ebbd77/sample_output/D1.png


--------------------------------------------------------------------------------
/sample_output/cost.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MrDavidYu/TF_Convolutional_Autoencoder/cb2e2fb8bf3f67a7106ac654e3a717e6e0ebbd77/sample_output/cost.png


--------------------------------------------------------------------------------