├── ConvAutoencoder.py
├── README.md
├── celebF.tar.gz
└── sample_output
├── A0.png
├── A1.png
├── B0.png
├── B1.png
├── C0.png
├── C1.png
├── D0.png
├── D1.png
└── cost.png
/ConvAutoencoder.py:
--------------------------------------------------------------------------------
1 | """
2 | TF Convolutional Autoencoder
3 |
4 | Arash Saber Tehrani - May 2017
5 | Reference: https://github.com/arashsaber/Deep-Convolutional-AutoEncoder
6 |
7 | Modified David Yu - July 2018
8 | Reference: https://github.com/MrDavidYu/TF_Convolutional_Autoencoder
9 | Add ons:
10 | 1. Allows for custom .jpg input
11 | 2. Checkpoint save/restore
12 | 3. TensorBoard logs for input/output images
13 | 3. Input autorescaling
14 | 4. ReLU activation replaced by LeakyReLU
15 |
16 | """
17 | import os
18 | import re
19 | import scipy.misc
20 | import numpy as np
21 | import matplotlib.pyplot as plt
22 | import tensorflow as tf
23 | from glob import glob
24 |
25 | # Some important consts
26 | num_examples = 669
27 | batch_size = 30
28 | n_epochs = 1000
29 | save_steps = 500 # Number of training batches between checkpoint saves
30 |
31 | checkpoint_dir = "./ckpt/"
32 | model_name = "ConvAutoEnc.model"
33 | logs_dir = "./logs/run1/"
34 |
35 | # Fetch input data (faces/trees/imgs)
36 | data_dir = "./data/celebG/"
37 | data_path = os.path.join(data_dir, '*.jpg')
38 | data = glob(data_path)
39 |
40 | if len(data) == 0:
41 | raise Exception("[!] No data found in '" + data_path+ "'")
42 |
43 |
44 | '''
45 | Some util functions from https://github.com/carpedm20/DCGAN-tensorflow
46 | '''
47 |
48 | def path_to_img(path, grayscale = False):
49 | if (grayscale):
50 | return scipy.misc.imread(path, flatten = True).astype(np.float)
51 | else:
52 | return scipy.misc.imread(path).astype(np.float)
53 |
54 | def center_crop(x, crop_h, crop_w,
55 | resize_h=64, resize_w=64):
56 | if crop_w is None:
57 | crop_w = crop_h
58 | h, w = x.shape[:2]
59 | j = int(round((h - crop_h)/2.))
60 | i = int(round((w - crop_w)/2.))
61 | return scipy.misc.imresize(
62 | x[j:j+crop_h, i:i+crop_w], [resize_h, resize_w])
63 |
64 | def transform(image, input_height, input_width,
65 | resize_height=48, resize_width=48, crop=True):
66 | if crop:
67 | cropped_image = center_crop(
68 | image, input_height, input_width,
69 | resize_height, resize_width)
70 | else:
71 | cropped_image = scipy.misc.imresize(image, [resize_height, resize_width])
72 | return np.array(cropped_image)/127.5 - 1.
73 |
74 | def autoresize(image_path, input_height, input_width,
75 | resize_height=48, resize_width=48,
76 | crop=True, grayscale=False):
77 | image = path_to_img(image_path, grayscale)
78 | return transform(image, input_height, input_width,
79 | resize_height, resize_width, crop)
80 |
81 | np.random.shuffle(data)
82 | imread_img = path_to_img(data[0]) # test read an image
83 |
84 | if len(imread_img.shape) >= 3: # check if image is a non-grayscale image by checking channel number
85 | c_dim = path_to_img(data[0]).shape[-1]
86 | else:
87 | c_dim = 1
88 |
89 | is_grayscale = (c_dim == 1)
90 |
91 | '''
92 | tf Graph Input
93 | '''
94 | x = tf.placeholder(tf.float32, [None, 48, 48, 3], name='InputData')
95 |
96 | if __debug__:
97 | print("Reading input from:" + data_dir)
98 | print("Input image shape:" + str(imread_img.shape))
99 | print("Assigning input tensor of shape:" + str(x.shape))
100 | print("Writing checkpoints to:" + checkpoint_dir)
101 | print("Writing TensorBoard logs to:" + logs_dir)
102 |
103 |
104 | # strides = [Batch, Height, Width, Channels] in default NHWC data_format. Batch and Channels
105 | # must always be set to 1. If channels is set to 3, then we would increment the index for the
106 | # color channel by 3 everytime we convolve the filter. So this means we would only use one of
107 | # the channels and skip the other two. If we change the Batch number then it means some images
108 | # in the batch are skipped.
109 | #
110 | # To calculate the size of the output of CONV layer:
111 | # OutWidth = (InWidth - FilterWidth + 2*Padding)/Stride + 1
112 | def conv2d(input, name, kshape, strides=[1, 1, 1, 1]):
113 | with tf.variable_scope(name):
114 | W = tf.get_variable(name='w_' + name,
115 | shape=kshape,
116 | initializer=tf.contrib.layers.xavier_initializer(uniform=False))
117 | b = tf.get_variable(name='b_' + name,
118 | shape=[kshape[3]],
119 | initializer=tf.contrib.layers.xavier_initializer(uniform=False))
120 | out = tf.nn.conv2d(input,W,strides=strides, padding='SAME')
121 | out = tf.nn.bias_add(out, b)
122 | out = tf.nn.leaky_relu(out)
123 | return out
124 |
125 |
126 | # tf.contrib.layers.conv2d_transpose, do not get confused with
127 | # tf.layers.conv2d_transpose
128 | def deconv2d(input, name, kshape, n_outputs, strides=[1, 1]):
129 | with tf.variable_scope(name):
130 | out = tf.contrib.layers.conv2d_transpose(input,
131 | num_outputs= n_outputs,
132 | kernel_size=kshape,
133 | stride=strides,
134 | padding='SAME',
135 | weights_initializer=tf.contrib.layers.xavier_initializer_conv2d(uniform=False),
136 | biases_initializer=tf.contrib.layers.xavier_initializer(uniform=False),
137 | activation_fn=tf.nn.leaky_relu)
138 | return out
139 |
140 |
141 | # Input to maxpool: [BatchSize, Width1, Height1, Channels]
142 | # Output of maxpool: [BatchSize, Width2, Height2, Channels]
143 | #
144 | # To calculate the size of the output of maxpool layer:
145 | # OutWidth = (InWidth - FilterWidth)/Stride + 1
146 | #
147 | # The kernel kshape will typically be [1,2,2,1] for a general
148 | # RGB image input of [batch_size,48,48,3]
149 | # kshape is 1 for batch and channels because we don't want to take
150 | # the maximum over multiple examples of channels.
151 | def maxpool2d(x,name,kshape=[1, 2, 2, 1], strides=[1, 2, 2, 1]):
152 | with tf.variable_scope(name):
153 | out = tf.nn.max_pool(x,
154 | ksize=kshape, #size of window
155 | strides=strides,
156 | padding='SAME')
157 | return out
158 |
159 |
160 | def upsample(input, name, factor=[2,2]):
161 | size = [int(input.shape[1] * factor[0]), int(input.shape[2] * factor[1])]
162 | with tf.variable_scope(name):
163 | out = tf.image.resize_bilinear(input, size=size, align_corners=None, name=None)
164 | return out
165 |
166 |
167 | def fullyConnected(input, name, output_size):
168 | with tf.variable_scope(name):
169 | input_size = input.shape[1:]
170 | input_size = int(np.prod(input_size)) # get total num of cells in one input image
171 | W = tf.get_variable(name='w_'+name,
172 | shape=[input_size, output_size],
173 | initializer=tf.contrib.layers.xavier_initializer(uniform=False))
174 | b = tf.get_variable(name='b_'+name,
175 | shape=[output_size],
176 | initializer=tf.contrib.layers.xavier_initializer(uniform=False))
177 | input = tf.reshape(input, [-1, input_size])
178 | out = tf.nn.leaky_relu(tf.add(tf.matmul(input, W), b))
179 | return out
180 |
181 |
182 | def dropout(input, name, keep_rate):
183 | with tf.variable_scope(name):
184 | out = tf.nn.dropout(input, keep_rate)
185 | return out
186 |
187 |
188 | def ConvAutoEncoder(x, name, reuse=False):
189 | with tf.variable_scope(name) as scope:
190 | if reuse:
191 | scope.reuse_variables()
192 |
193 | input = tf.reshape(x, shape=[-1, 48, 48, 3])
194 |
195 | # kshape = [k_h, k_w, in_channels, out_chnnels]
196 | c1 = conv2d(input, name='c1', kshape=[7, 7, 3, 15]) # Input: [48,48,3]; Output: [48,48,15]
197 | p1 = maxpool2d(c1, name='p1') # Input: [48,48,15]; Output: [24,24,15]
198 | do1 = dropout(p1, name='do1', keep_rate=0.75)
199 | c2 = conv2d(do1, name='c2', kshape=[5, 5, 15, 25]) # Input: [24,24,15]; Output: [24,24,25]
200 | p2 = maxpool2d(c2, name='p2') # Input: [24,24,25]; Output: [12,12,25]
201 | p2 = tf.reshape(p2, shape=[-1, 12*12*25]) # Input: [12,12,25]; Output: [12*12*25]
202 | fc1 = fullyConnected(p2, name='fc1', output_size=12*12*5) # Input: [12*12*25]; Output: [12*12*5]
203 | do2 = dropout(fc1, name='do2', keep_rate=0.75)
204 | fc2 = fullyConnected(do2, name='fc2', output_size=12*12*3) # Input: [12*12*5]; Output: [12*12*3]
205 | do3 = dropout(fc2, name='do3', keep_rate=0.75)
206 | fc3 = fullyConnected(do3, name='fc3', output_size=64) # Input: [12*12*3]; Output: [64] --> bottleneck layer
207 | # Decoding part
208 | fc4 = fullyConnected(fc3, name='fc4', output_size=12*12*3) # Input: [64]; Output: [12*12*3]
209 | do4 = dropout(fc4, name='do4', keep_rate=0.75)
210 | fc5 = fullyConnected(do4, name='fc5', output_size=12*12*5) # Input: [12*12*3]; Output: [12*12*5]
211 | do5 = dropout(fc5, name='do5', keep_rate=0.75)
212 | fc6 = fullyConnected(do5, name='fc6', output_size=21*21*25) # Input: [12*12*5]; Output: [12*12*25]
213 | do6 = dropout(fc6, name='do6', keep_rate=0.75)
214 | do6 = tf.reshape(do6, shape=[-1, 21, 21, 25]) # Input: [12*12*25]; Output: [12,12,25]
215 | dc1 = deconv2d(do6, name='dc1', kshape=[5, 5],n_outputs=15) # Input: [12,12,25]; Output: [12,12,15]
216 | up1 = upsample(dc1, name='up1', factor=[2, 2]) # Input: [12,12,15]; Output: [24,24,15]
217 | dc2 = deconv2d(up1, name='dc2', kshape=[7, 7],n_outputs=3) # Input: [24,24,15]; Output: [24,24,3]
218 | up2 = upsample(dc2, name='up2', factor=[2, 2]) # Input: [24,24,3]; Output: [48,48,3]
219 | output = fullyConnected(up2, name='output', output_size=48*48*3)
220 |
221 | with tf.variable_scope('cost'):
222 | # N.B. reduce_mean is a batch operation! finds the mean across the batch
223 | cost = tf.reduce_mean(tf.square(tf.subtract(output, tf.reshape(x,shape=[-1,48*48*3]))))
224 | return x, tf.reshape(output,shape=[-1,48,48,3]), cost # returning, input, output and cost
225 |
226 |
227 | def train_network(x):
228 |
229 | with tf.Session() as sess:
230 |
231 | _, _, cost = ConvAutoEncoder(x, 'ConvAutoEnc')
232 | with tf.variable_scope('opt'):
233 | optimizer = tf.train.AdamOptimizer().minimize(cost)
234 |
235 | # Create a summary to monitor cost tensor
236 | tf.summary.scalar("cost", cost)
237 | tf.summary.image("face_input", ConvAutoEncoder(x, 'ConvAutoEnc', reuse=True)[0], max_outputs=4)
238 | tf.summary.image("face_output", ConvAutoEncoder(x, 'ConvAutoEnc', reuse=True)[1], max_outputs=4)
239 | merged_summary_op = tf.summary.merge_all() # Merge all summaries into a single op
240 |
241 | sess.run(tf.global_variables_initializer()) # memory allocation exceeded 10% issue
242 |
243 | # Model saver
244 | saver = tf.train.Saver()
245 |
246 | counter = 0 # Used for checkpointing
247 | success, restored_counter = restore(saver, sess)
248 | if success:
249 | counter = restored_counter
250 | print(">>> Restore successful")
251 | else:
252 | print(">>> No restore checkpoints detected")
253 |
254 | # create log writer object
255 | writer = tf.summary.FileWriter(logs_dir, graph=tf.get_default_graph())
256 |
257 | for epoch in range(n_epochs):
258 | avg_cost = 0
259 | n_batches = int(num_examples / batch_size)
260 | # Loop over all batches
261 | for i in range(n_batches):
262 | counter += 1
263 | print("epoch " + str(epoch) + " batch " + str(i))
264 |
265 | batch_files = data[i*batch_size:(i+1)*batch_size] # get the current batch of files
266 |
267 | batch = [autoresize(batch_file,
268 | input_height=48,
269 | input_width=48,
270 | resize_height=48,
271 | resize_width=48,
272 | crop=True,
273 | grayscale=False) for batch_file in batch_files]
274 |
275 | batch_images = np.array(batch).astype(np.float32)
276 |
277 | # Get cost function from running optimizer
278 | _, c, summary = sess.run([optimizer, cost, merged_summary_op], feed_dict={x: batch_images})
279 |
280 | # Compute average loss
281 | avg_cost += c / n_batches
282 |
283 | writer.add_summary(summary, epoch * n_batches + i)
284 |
285 | if counter % save_steps == 0:
286 | save(saver, counter, sess)
287 |
288 | # Display logs per epoch step
289 | print('Epoch', epoch + 1, ' / ', n_epochs, 'cost:', avg_cost)
290 |
291 | print('>>> Optimization Finished')
292 |
293 |
294 | # Create checkpoint
295 | def save(saver, step, session):
296 | print(">>> Saving to checkpoint, step:" + str(step))
297 | if not os.path.exists(checkpoint_dir):
298 | os.makedirs(checkpoint_dir)
299 |
300 | saver.save(session,
301 | os.path.join(checkpoint_dir, model_name),
302 | global_step=step)
303 |
304 |
305 | # Restore from checkpoint
306 | def restore(saver, session):
307 | print(">>> Restoring from checkpoints...")
308 | checkpoint_state = tf.train.get_checkpoint_state(checkpoint_dir)
309 | if checkpoint_state and checkpoint_state.model_checkpoint_path:
310 | checkpoint_name = os.path.basename(checkpoint_state.model_checkpoint_path)
311 | saver.restore(session, os.path.join(checkpoint_dir, checkpoint_name))
312 | counter = int(next(re.finditer("(\d+)(?!.*\d)",checkpoint_name)).group(0))
313 | print(">>> Found restore checkpoint {}".format(checkpoint_name))
314 | return True, counter
315 | else:
316 | return False, 0
317 |
318 | train_network(x)
319 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # TF_Convolutional_Autoencoder
2 | #### _Convolutional autoencoder for encoding/decoding RGB images in TensorFlow with high compression_
3 |
4 | This is a sample template adapted from Arash Saber Tehrani's Deep-Convolutional-AutoEncoder tutorial https://github.com/arashsaber/Deep-Convolutional-AutoEncoder for encoding/decoding 3-channel images. The template has been fully commented. I have tested this implementation on rescaled samples from the CelebA dataset from CUHK http://mmlab.ie.cuhk.edu.hk/projects/CelebA.html to produce reasonably decent results from a short period of training. The compression ratio of this implementation is 108. That is, for an input tensor of shape [-1, 48, 48, 3], the bottleneck layer has been reduced to a tensor of shape [-1, 64].
5 |
6 | **Add on features:**
7 | * Takes 3-channel images as input instead of MNIST
8 | * Training now performs checkpoint saves and restores
9 | * Both inputs to the encoder and outputs from the decoder are available for viewing in TensorBoard
10 | * Input autorescaling
11 | * ReLU activation replaced by LeakyReLU to resolve dying ReLU
12 |
13 | **Caveats:**
14 | * It is highly recommended to perform training on the GPU (Took ~40 min to train 20,000 steps on a Tesla K80 for celebG).
15 | * The input size can be increased, but during testing OOM errors occured on the K80 for the input size of 84x84. This will be fixed in a later update. For now if you get any OOM errors in tensor allocation, try to reduce the input size.
16 | * Sample output is currently visibly undersaturated owing to noise introduced by dropout and high model bias. Problems should go away with further training.
17 |
18 | ## Outputs
19 | N.B. The input images are 48x48, hence the blurriness. Additionally these outputs are from setting n_epochs to 1000, which could be increased for even better results (note the cost function trend).
20 |
21 | Inputs:
22 |
23 |
24 |
25 |
26 |
27 | Outputs:
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 | ## How to run
36 | 1. Make sure to create directory `./logs/run1/` to save TensorBoard output. For pushing multiple runs to TensorBoard, simply save additional logs as `./logs/run2/`, `./logs/run3/` etc.
37 |
38 | 2. Unzip `./celebG.tar.gz` and save jpegs in `./data/celebG/`
39 |
40 | 3. Either use provided image set or your own. If using your own dataset, I recommend ImageMagick for resizing: https://www.imagemagick.org/script/download.php
41 |
42 | 4. If using ImageMagick, start Bash in `./data//`:
43 | ```
44 | for file in $PWD/*.jpg
45 | do
46 | convert $file -resize 42x42 $file
47 | done
48 | ```
49 |
50 | 5. In root dir, `python ConvAutoencoder.py`
51 |
52 | ## Debug
53 | Here is a list of common problems:
54 | 1. The error(cost) is very high (in the thousands or millions): Check that the input images are fetched properly when transforming batch_files to batch_images etc. This high an error is typical of very large natural differences in MSE of input/output and is not caused by a large number of model parameters.
55 |
56 | ## Additional References
57 | Reference https://github.com/carpedm20/DCGAN-tensorflow/blob/master/utils.py for several dynamic image resize functions I have incorporated into my implementation.
58 |
--------------------------------------------------------------------------------
/celebF.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MrDavidYu/TF_Convolutional_Autoencoder/cb2e2fb8bf3f67a7106ac654e3a717e6e0ebbd77/celebF.tar.gz
--------------------------------------------------------------------------------
/sample_output/A0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MrDavidYu/TF_Convolutional_Autoencoder/cb2e2fb8bf3f67a7106ac654e3a717e6e0ebbd77/sample_output/A0.png
--------------------------------------------------------------------------------
/sample_output/A1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MrDavidYu/TF_Convolutional_Autoencoder/cb2e2fb8bf3f67a7106ac654e3a717e6e0ebbd77/sample_output/A1.png
--------------------------------------------------------------------------------
/sample_output/B0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MrDavidYu/TF_Convolutional_Autoencoder/cb2e2fb8bf3f67a7106ac654e3a717e6e0ebbd77/sample_output/B0.png
--------------------------------------------------------------------------------
/sample_output/B1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MrDavidYu/TF_Convolutional_Autoencoder/cb2e2fb8bf3f67a7106ac654e3a717e6e0ebbd77/sample_output/B1.png
--------------------------------------------------------------------------------
/sample_output/C0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MrDavidYu/TF_Convolutional_Autoencoder/cb2e2fb8bf3f67a7106ac654e3a717e6e0ebbd77/sample_output/C0.png
--------------------------------------------------------------------------------
/sample_output/C1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MrDavidYu/TF_Convolutional_Autoencoder/cb2e2fb8bf3f67a7106ac654e3a717e6e0ebbd77/sample_output/C1.png
--------------------------------------------------------------------------------
/sample_output/D0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MrDavidYu/TF_Convolutional_Autoencoder/cb2e2fb8bf3f67a7106ac654e3a717e6e0ebbd77/sample_output/D0.png
--------------------------------------------------------------------------------
/sample_output/D1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MrDavidYu/TF_Convolutional_Autoencoder/cb2e2fb8bf3f67a7106ac654e3a717e6e0ebbd77/sample_output/D1.png
--------------------------------------------------------------------------------
/sample_output/cost.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MrDavidYu/TF_Convolutional_Autoencoder/cb2e2fb8bf3f67a7106ac654e3a717e6e0ebbd77/sample_output/cost.png
--------------------------------------------------------------------------------