├── ConvAutoencoder.py ├── README.md ├── celebF.tar.gz └── sample_output ├── A0.png ├── A1.png ├── B0.png ├── B1.png ├── C0.png ├── C1.png ├── D0.png ├── D1.png └── cost.png /ConvAutoencoder.py: -------------------------------------------------------------------------------- 1 | """ 2 | TF Convolutional Autoencoder 3 | 4 | Arash Saber Tehrani - May 2017 5 | Reference: https://github.com/arashsaber/Deep-Convolutional-AutoEncoder 6 | 7 | Modified David Yu - July 2018 8 | Reference: https://github.com/MrDavidYu/TF_Convolutional_Autoencoder 9 | Add ons: 10 | 1. Allows for custom .jpg input 11 | 2. Checkpoint save/restore 12 | 3. TensorBoard logs for input/output images 13 | 3. Input autorescaling 14 | 4. ReLU activation replaced by LeakyReLU 15 | 16 | """ 17 | import os 18 | import re 19 | import scipy.misc 20 | import numpy as np 21 | import matplotlib.pyplot as plt 22 | import tensorflow as tf 23 | from glob import glob 24 | 25 | # Some important consts 26 | num_examples = 669 27 | batch_size = 30 28 | n_epochs = 1000 29 | save_steps = 500 # Number of training batches between checkpoint saves 30 | 31 | checkpoint_dir = "./ckpt/" 32 | model_name = "ConvAutoEnc.model" 33 | logs_dir = "./logs/run1/" 34 | 35 | # Fetch input data (faces/trees/imgs) 36 | data_dir = "./data/celebG/" 37 | data_path = os.path.join(data_dir, '*.jpg') 38 | data = glob(data_path) 39 | 40 | if len(data) == 0: 41 | raise Exception("[!] No data found in '" + data_path+ "'") 42 | 43 | 44 | ''' 45 | Some util functions from https://github.com/carpedm20/DCGAN-tensorflow 46 | ''' 47 | 48 | def path_to_img(path, grayscale = False): 49 | if (grayscale): 50 | return scipy.misc.imread(path, flatten = True).astype(np.float) 51 | else: 52 | return scipy.misc.imread(path).astype(np.float) 53 | 54 | def center_crop(x, crop_h, crop_w, 55 | resize_h=64, resize_w=64): 56 | if crop_w is None: 57 | crop_w = crop_h 58 | h, w = x.shape[:2] 59 | j = int(round((h - crop_h)/2.)) 60 | i = int(round((w - crop_w)/2.)) 61 | return scipy.misc.imresize( 62 | x[j:j+crop_h, i:i+crop_w], [resize_h, resize_w]) 63 | 64 | def transform(image, input_height, input_width, 65 | resize_height=48, resize_width=48, crop=True): 66 | if crop: 67 | cropped_image = center_crop( 68 | image, input_height, input_width, 69 | resize_height, resize_width) 70 | else: 71 | cropped_image = scipy.misc.imresize(image, [resize_height, resize_width]) 72 | return np.array(cropped_image)/127.5 - 1. 73 | 74 | def autoresize(image_path, input_height, input_width, 75 | resize_height=48, resize_width=48, 76 | crop=True, grayscale=False): 77 | image = path_to_img(image_path, grayscale) 78 | return transform(image, input_height, input_width, 79 | resize_height, resize_width, crop) 80 | 81 | np.random.shuffle(data) 82 | imread_img = path_to_img(data[0]) # test read an image 83 | 84 | if len(imread_img.shape) >= 3: # check if image is a non-grayscale image by checking channel number 85 | c_dim = path_to_img(data[0]).shape[-1] 86 | else: 87 | c_dim = 1 88 | 89 | is_grayscale = (c_dim == 1) 90 | 91 | ''' 92 | tf Graph Input 93 | ''' 94 | x = tf.placeholder(tf.float32, [None, 48, 48, 3], name='InputData') 95 | 96 | if __debug__: 97 | print("Reading input from:" + data_dir) 98 | print("Input image shape:" + str(imread_img.shape)) 99 | print("Assigning input tensor of shape:" + str(x.shape)) 100 | print("Writing checkpoints to:" + checkpoint_dir) 101 | print("Writing TensorBoard logs to:" + logs_dir) 102 | 103 | 104 | # strides = [Batch, Height, Width, Channels] in default NHWC data_format. Batch and Channels 105 | # must always be set to 1. If channels is set to 3, then we would increment the index for the 106 | # color channel by 3 everytime we convolve the filter. So this means we would only use one of 107 | # the channels and skip the other two. If we change the Batch number then it means some images 108 | # in the batch are skipped. 109 | # 110 | # To calculate the size of the output of CONV layer: 111 | # OutWidth = (InWidth - FilterWidth + 2*Padding)/Stride + 1 112 | def conv2d(input, name, kshape, strides=[1, 1, 1, 1]): 113 | with tf.variable_scope(name): 114 | W = tf.get_variable(name='w_' + name, 115 | shape=kshape, 116 | initializer=tf.contrib.layers.xavier_initializer(uniform=False)) 117 | b = tf.get_variable(name='b_' + name, 118 | shape=[kshape[3]], 119 | initializer=tf.contrib.layers.xavier_initializer(uniform=False)) 120 | out = tf.nn.conv2d(input,W,strides=strides, padding='SAME') 121 | out = tf.nn.bias_add(out, b) 122 | out = tf.nn.leaky_relu(out) 123 | return out 124 | 125 | 126 | # tf.contrib.layers.conv2d_transpose, do not get confused with 127 | # tf.layers.conv2d_transpose 128 | def deconv2d(input, name, kshape, n_outputs, strides=[1, 1]): 129 | with tf.variable_scope(name): 130 | out = tf.contrib.layers.conv2d_transpose(input, 131 | num_outputs= n_outputs, 132 | kernel_size=kshape, 133 | stride=strides, 134 | padding='SAME', 135 | weights_initializer=tf.contrib.layers.xavier_initializer_conv2d(uniform=False), 136 | biases_initializer=tf.contrib.layers.xavier_initializer(uniform=False), 137 | activation_fn=tf.nn.leaky_relu) 138 | return out 139 | 140 | 141 | # Input to maxpool: [BatchSize, Width1, Height1, Channels] 142 | # Output of maxpool: [BatchSize, Width2, Height2, Channels] 143 | # 144 | # To calculate the size of the output of maxpool layer: 145 | # OutWidth = (InWidth - FilterWidth)/Stride + 1 146 | # 147 | # The kernel kshape will typically be [1,2,2,1] for a general 148 | # RGB image input of [batch_size,48,48,3] 149 | # kshape is 1 for batch and channels because we don't want to take 150 | # the maximum over multiple examples of channels. 151 | def maxpool2d(x,name,kshape=[1, 2, 2, 1], strides=[1, 2, 2, 1]): 152 | with tf.variable_scope(name): 153 | out = tf.nn.max_pool(x, 154 | ksize=kshape, #size of window 155 | strides=strides, 156 | padding='SAME') 157 | return out 158 | 159 | 160 | def upsample(input, name, factor=[2,2]): 161 | size = [int(input.shape[1] * factor[0]), int(input.shape[2] * factor[1])] 162 | with tf.variable_scope(name): 163 | out = tf.image.resize_bilinear(input, size=size, align_corners=None, name=None) 164 | return out 165 | 166 | 167 | def fullyConnected(input, name, output_size): 168 | with tf.variable_scope(name): 169 | input_size = input.shape[1:] 170 | input_size = int(np.prod(input_size)) # get total num of cells in one input image 171 | W = tf.get_variable(name='w_'+name, 172 | shape=[input_size, output_size], 173 | initializer=tf.contrib.layers.xavier_initializer(uniform=False)) 174 | b = tf.get_variable(name='b_'+name, 175 | shape=[output_size], 176 | initializer=tf.contrib.layers.xavier_initializer(uniform=False)) 177 | input = tf.reshape(input, [-1, input_size]) 178 | out = tf.nn.leaky_relu(tf.add(tf.matmul(input, W), b)) 179 | return out 180 | 181 | 182 | def dropout(input, name, keep_rate): 183 | with tf.variable_scope(name): 184 | out = tf.nn.dropout(input, keep_rate) 185 | return out 186 | 187 | 188 | def ConvAutoEncoder(x, name, reuse=False): 189 | with tf.variable_scope(name) as scope: 190 | if reuse: 191 | scope.reuse_variables() 192 | 193 | input = tf.reshape(x, shape=[-1, 48, 48, 3]) 194 | 195 | # kshape = [k_h, k_w, in_channels, out_chnnels] 196 | c1 = conv2d(input, name='c1', kshape=[7, 7, 3, 15]) # Input: [48,48,3]; Output: [48,48,15] 197 | p1 = maxpool2d(c1, name='p1') # Input: [48,48,15]; Output: [24,24,15] 198 | do1 = dropout(p1, name='do1', keep_rate=0.75) 199 | c2 = conv2d(do1, name='c2', kshape=[5, 5, 15, 25]) # Input: [24,24,15]; Output: [24,24,25] 200 | p2 = maxpool2d(c2, name='p2') # Input: [24,24,25]; Output: [12,12,25] 201 | p2 = tf.reshape(p2, shape=[-1, 12*12*25]) # Input: [12,12,25]; Output: [12*12*25] 202 | fc1 = fullyConnected(p2, name='fc1', output_size=12*12*5) # Input: [12*12*25]; Output: [12*12*5] 203 | do2 = dropout(fc1, name='do2', keep_rate=0.75) 204 | fc2 = fullyConnected(do2, name='fc2', output_size=12*12*3) # Input: [12*12*5]; Output: [12*12*3] 205 | do3 = dropout(fc2, name='do3', keep_rate=0.75) 206 | fc3 = fullyConnected(do3, name='fc3', output_size=64) # Input: [12*12*3]; Output: [64] --> bottleneck layer 207 | # Decoding part 208 | fc4 = fullyConnected(fc3, name='fc4', output_size=12*12*3) # Input: [64]; Output: [12*12*3] 209 | do4 = dropout(fc4, name='do4', keep_rate=0.75) 210 | fc5 = fullyConnected(do4, name='fc5', output_size=12*12*5) # Input: [12*12*3]; Output: [12*12*5] 211 | do5 = dropout(fc5, name='do5', keep_rate=0.75) 212 | fc6 = fullyConnected(do5, name='fc6', output_size=21*21*25) # Input: [12*12*5]; Output: [12*12*25] 213 | do6 = dropout(fc6, name='do6', keep_rate=0.75) 214 | do6 = tf.reshape(do6, shape=[-1, 21, 21, 25]) # Input: [12*12*25]; Output: [12,12,25] 215 | dc1 = deconv2d(do6, name='dc1', kshape=[5, 5],n_outputs=15) # Input: [12,12,25]; Output: [12,12,15] 216 | up1 = upsample(dc1, name='up1', factor=[2, 2]) # Input: [12,12,15]; Output: [24,24,15] 217 | dc2 = deconv2d(up1, name='dc2', kshape=[7, 7],n_outputs=3) # Input: [24,24,15]; Output: [24,24,3] 218 | up2 = upsample(dc2, name='up2', factor=[2, 2]) # Input: [24,24,3]; Output: [48,48,3] 219 | output = fullyConnected(up2, name='output', output_size=48*48*3) 220 | 221 | with tf.variable_scope('cost'): 222 | # N.B. reduce_mean is a batch operation! finds the mean across the batch 223 | cost = tf.reduce_mean(tf.square(tf.subtract(output, tf.reshape(x,shape=[-1,48*48*3])))) 224 | return x, tf.reshape(output,shape=[-1,48,48,3]), cost # returning, input, output and cost 225 | 226 | 227 | def train_network(x): 228 | 229 | with tf.Session() as sess: 230 | 231 | _, _, cost = ConvAutoEncoder(x, 'ConvAutoEnc') 232 | with tf.variable_scope('opt'): 233 | optimizer = tf.train.AdamOptimizer().minimize(cost) 234 | 235 | # Create a summary to monitor cost tensor 236 | tf.summary.scalar("cost", cost) 237 | tf.summary.image("face_input", ConvAutoEncoder(x, 'ConvAutoEnc', reuse=True)[0], max_outputs=4) 238 | tf.summary.image("face_output", ConvAutoEncoder(x, 'ConvAutoEnc', reuse=True)[1], max_outputs=4) 239 | merged_summary_op = tf.summary.merge_all() # Merge all summaries into a single op 240 | 241 | sess.run(tf.global_variables_initializer()) # memory allocation exceeded 10% issue 242 | 243 | # Model saver 244 | saver = tf.train.Saver() 245 | 246 | counter = 0 # Used for checkpointing 247 | success, restored_counter = restore(saver, sess) 248 | if success: 249 | counter = restored_counter 250 | print(">>> Restore successful") 251 | else: 252 | print(">>> No restore checkpoints detected") 253 | 254 | # create log writer object 255 | writer = tf.summary.FileWriter(logs_dir, graph=tf.get_default_graph()) 256 | 257 | for epoch in range(n_epochs): 258 | avg_cost = 0 259 | n_batches = int(num_examples / batch_size) 260 | # Loop over all batches 261 | for i in range(n_batches): 262 | counter += 1 263 | print("epoch " + str(epoch) + " batch " + str(i)) 264 | 265 | batch_files = data[i*batch_size:(i+1)*batch_size] # get the current batch of files 266 | 267 | batch = [autoresize(batch_file, 268 | input_height=48, 269 | input_width=48, 270 | resize_height=48, 271 | resize_width=48, 272 | crop=True, 273 | grayscale=False) for batch_file in batch_files] 274 | 275 | batch_images = np.array(batch).astype(np.float32) 276 | 277 | # Get cost function from running optimizer 278 | _, c, summary = sess.run([optimizer, cost, merged_summary_op], feed_dict={x: batch_images}) 279 | 280 | # Compute average loss 281 | avg_cost += c / n_batches 282 | 283 | writer.add_summary(summary, epoch * n_batches + i) 284 | 285 | if counter % save_steps == 0: 286 | save(saver, counter, sess) 287 | 288 | # Display logs per epoch step 289 | print('Epoch', epoch + 1, ' / ', n_epochs, 'cost:', avg_cost) 290 | 291 | print('>>> Optimization Finished') 292 | 293 | 294 | # Create checkpoint 295 | def save(saver, step, session): 296 | print(">>> Saving to checkpoint, step:" + str(step)) 297 | if not os.path.exists(checkpoint_dir): 298 | os.makedirs(checkpoint_dir) 299 | 300 | saver.save(session, 301 | os.path.join(checkpoint_dir, model_name), 302 | global_step=step) 303 | 304 | 305 | # Restore from checkpoint 306 | def restore(saver, session): 307 | print(">>> Restoring from checkpoints...") 308 | checkpoint_state = tf.train.get_checkpoint_state(checkpoint_dir) 309 | if checkpoint_state and checkpoint_state.model_checkpoint_path: 310 | checkpoint_name = os.path.basename(checkpoint_state.model_checkpoint_path) 311 | saver.restore(session, os.path.join(checkpoint_dir, checkpoint_name)) 312 | counter = int(next(re.finditer("(\d+)(?!.*\d)",checkpoint_name)).group(0)) 313 | print(">>> Found restore checkpoint {}".format(checkpoint_name)) 314 | return True, counter 315 | else: 316 | return False, 0 317 | 318 | train_network(x) 319 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # TF_Convolutional_Autoencoder 2 | #### _Convolutional autoencoder for encoding/decoding RGB images in TensorFlow with high compression_ 3 | 4 | This is a sample template adapted from Arash Saber Tehrani's Deep-Convolutional-AutoEncoder tutorial https://github.com/arashsaber/Deep-Convolutional-AutoEncoder for encoding/decoding 3-channel images. The template has been fully commented. I have tested this implementation on rescaled samples from the CelebA dataset from CUHK http://mmlab.ie.cuhk.edu.hk/projects/CelebA.html to produce reasonably decent results from a short period of training. The compression ratio of this implementation is 108. That is, for an input tensor of shape [-1, 48, 48, 3], the bottleneck layer has been reduced to a tensor of shape [-1, 64]. 5 | 6 | **Add on features:** 7 | * Takes 3-channel images as input instead of MNIST 8 | * Training now performs checkpoint saves and restores 9 | * Both inputs to the encoder and outputs from the decoder are available for viewing in TensorBoard 10 | * Input autorescaling 11 | * ReLU activation replaced by LeakyReLU to resolve dying ReLU 12 | 13 | **Caveats:** 14 | * It is highly recommended to perform training on the GPU (Took ~40 min to train 20,000 steps on a Tesla K80 for celebG). 15 | * The input size can be increased, but during testing OOM errors occured on the K80 for the input size of 84x84. This will be fixed in a later update. For now if you get any OOM errors in tensor allocation, try to reduce the input size. 16 | * Sample output is currently visibly undersaturated owing to noise introduced by dropout and high model bias. Problems should go away with further training. 17 | 18 | ## Outputs 19 | N.B. The input images are 48x48, hence the blurriness. Additionally these outputs are from setting n_epochs to 1000, which could be increased for even better results (note the cost function trend). 20 | 21 | Inputs: 22 | 23 | 24 | 25 | 26 |
27 | Outputs: 28 | 29 | 30 | 31 | 32 |
33 |
34 | 35 | ## How to run 36 | 1. Make sure to create directory `./logs/run1/` to save TensorBoard output. For pushing multiple runs to TensorBoard, simply save additional logs as `./logs/run2/`, `./logs/run3/` etc. 37 | 38 | 2. Unzip `./celebG.tar.gz` and save jpegs in `./data/celebG/` 39 | 40 | 3. Either use provided image set or your own. If using your own dataset, I recommend ImageMagick for resizing: https://www.imagemagick.org/script/download.php 41 | 42 | 4. If using ImageMagick, start Bash in `./data//`: 43 | ``` 44 | for file in $PWD/*.jpg 45 | do 46 | convert $file -resize 42x42 $file 47 | done 48 | ``` 49 | 50 | 5. In root dir, `python ConvAutoencoder.py` 51 | 52 | ## Debug 53 | Here is a list of common problems: 54 | 1. The error(cost) is very high (in the thousands or millions): Check that the input images are fetched properly when transforming batch_files to batch_images etc. This high an error is typical of very large natural differences in MSE of input/output and is not caused by a large number of model parameters. 55 | 56 | ## Additional References 57 | Reference https://github.com/carpedm20/DCGAN-tensorflow/blob/master/utils.py for several dynamic image resize functions I have incorporated into my implementation. 58 | -------------------------------------------------------------------------------- /celebF.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MrDavidYu/TF_Convolutional_Autoencoder/cb2e2fb8bf3f67a7106ac654e3a717e6e0ebbd77/celebF.tar.gz -------------------------------------------------------------------------------- /sample_output/A0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MrDavidYu/TF_Convolutional_Autoencoder/cb2e2fb8bf3f67a7106ac654e3a717e6e0ebbd77/sample_output/A0.png -------------------------------------------------------------------------------- /sample_output/A1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MrDavidYu/TF_Convolutional_Autoencoder/cb2e2fb8bf3f67a7106ac654e3a717e6e0ebbd77/sample_output/A1.png -------------------------------------------------------------------------------- /sample_output/B0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MrDavidYu/TF_Convolutional_Autoencoder/cb2e2fb8bf3f67a7106ac654e3a717e6e0ebbd77/sample_output/B0.png -------------------------------------------------------------------------------- /sample_output/B1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MrDavidYu/TF_Convolutional_Autoencoder/cb2e2fb8bf3f67a7106ac654e3a717e6e0ebbd77/sample_output/B1.png -------------------------------------------------------------------------------- /sample_output/C0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MrDavidYu/TF_Convolutional_Autoencoder/cb2e2fb8bf3f67a7106ac654e3a717e6e0ebbd77/sample_output/C0.png -------------------------------------------------------------------------------- /sample_output/C1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MrDavidYu/TF_Convolutional_Autoencoder/cb2e2fb8bf3f67a7106ac654e3a717e6e0ebbd77/sample_output/C1.png -------------------------------------------------------------------------------- /sample_output/D0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MrDavidYu/TF_Convolutional_Autoencoder/cb2e2fb8bf3f67a7106ac654e3a717e6e0ebbd77/sample_output/D0.png -------------------------------------------------------------------------------- /sample_output/D1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MrDavidYu/TF_Convolutional_Autoencoder/cb2e2fb8bf3f67a7106ac654e3a717e6e0ebbd77/sample_output/D1.png -------------------------------------------------------------------------------- /sample_output/cost.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MrDavidYu/TF_Convolutional_Autoencoder/cb2e2fb8bf3f67a7106ac654e3a717e6e0ebbd77/sample_output/cost.png --------------------------------------------------------------------------------