├── .DS_Store ├── .gitignore └── HW3 ├── .DS_Store ├── readme.md └── src ├── .DS_Store ├── __init__.py ├── __pycache__ ├── data.cpython-36.pyc └── net.cpython-36.pyc ├── data.py ├── net.py ├── test.py └── train.py /.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CodePlay2016/GAN_learn/fcb27919893508b3a3c421addc236a4294804a89/.DS_Store -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | HW3/data 2 | HW3/model 3 | HW3/tensorboard 4 | .vscode/settings.json 5 | -------------------------------------------------------------------------------- /HW3/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CodePlay2016/GAN_learn/fcb27919893508b3a3c421addc236a4294804a89/HW3/.DS_Store -------------------------------------------------------------------------------- /HW3/readme.md: -------------------------------------------------------------------------------- 1 | ## 作业3-利用GAN生成二次元头像 2 | * [作业的ppt链接](https://docs.google.com/presentation/d/1UdLXHcu-pvvYkNvZIWT7tFbuGO2HzHuAZhcA0Xdrtd8/edit#slide=id.g395c9f0b29_15_0) 3 | * [数据集地址: Anime Dataset](https://drive.google.com/drive/folders/1mCsY5LEsgCnc0Txv0rpAUhKVPWVkbw5I?usp=sharing) 4 | * [数据集地址: Extra data](https://drive.google.com/file/d/1tpW7ZVNosXsIAWu8-f5EpwtF3ls3pb79/view) 5 | * 跑data.py生成tfrecord,需要改路径 6 | * 跑train.py进行训练,需要修改路径 7 | 8 | * 一些经验和记录在[我的博客](https://www.jianshu.com/p/d32134293fff) -------------------------------------------------------------------------------- /HW3/src/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CodePlay2016/GAN_learn/fcb27919893508b3a3c421addc236a4294804a89/HW3/src/.DS_Store -------------------------------------------------------------------------------- /HW3/src/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CodePlay2016/GAN_learn/fcb27919893508b3a3c421addc236a4294804a89/HW3/src/__init__.py -------------------------------------------------------------------------------- /HW3/src/__pycache__/data.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CodePlay2016/GAN_learn/fcb27919893508b3a3c421addc236a4294804a89/HW3/src/__pycache__/data.cpython-36.pyc -------------------------------------------------------------------------------- /HW3/src/__pycache__/net.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CodePlay2016/GAN_learn/fcb27919893508b3a3c421addc236a4294804a89/HW3/src/__pycache__/net.cpython-36.pyc -------------------------------------------------------------------------------- /HW3/src/data.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from PIL import Image 3 | import os, pdb 4 | 5 | HEIGHT = 64 6 | WIDTH = 64 7 | CHANNEL = 3 8 | 9 | def _bytes_feature(value): 10 | """ 11 | generate byte feature. 12 | """ 13 | return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value])) 14 | 15 | def generate_tfrecord(source_path): 16 | """ 17 | generate tf_record for basic GAN model 18 | https://blog.csdn.net/sinat_34474705/article/details/78966064 19 | """ 20 | writer = tf.python_io.TFRecordWriter(os.path.join(source_path , "train.tfrecords")) 21 | img_dir = os.path.join(source_path, 'faces') 22 | for imageName in os.listdir(img_dir): 23 | image = Image.open(os.path.join(img_dir,imageName)) 24 | image = image.resize((64,64),Image.BILINEAR) 25 | image_raw = image.tobytes() # convert image to binary format 26 | example = tf.train.Example(features = tf.train.Features(feature = { 27 | "image_raw": _bytes_feature(image_raw), 28 | })) 29 | writer.write(example.SerializeToString()) 30 | writer.close() 31 | 32 | def readRecord(recordName): 33 | """ 34 | read TFRecord data (images). 35 | 36 | Arguments: 37 | recordName -- the TFRecord file to be read. 38 | return: data saved in recordName 39 | """ 40 | filenameQueue = tf.train.string_input_producer([recordName]) 41 | reader = tf.TFRecordReader() 42 | _, serializedExample = reader.read(filenameQueue) 43 | features = tf.parse_single_example(serializedExample, features={ 44 | "image_raw": tf.FixedLenFeature([], tf.string) 45 | }) 46 | 47 | image = features["image_raw"] 48 | image = tf.decode_raw(image, tf.uint8) 49 | image = preprocess(image) 50 | return image 51 | 52 | def preprocess(image): 53 | image = tf.reshape(image,[64,64,3]) 54 | image = tf.image.random_flip_left_right(image) 55 | image = tf.image.random_brightness(image, max_delta = 0.1) 56 | image = tf.image.random_contrast(image, lower = 0.9, upper = 1.1) 57 | print('image_record shape before process', image.get_shape().as_list()) 58 | size = [HEIGHT, WIDTH] 59 | image = tf.image.resize_images(image, size) 60 | print('image_record shape after process', image.get_shape().as_list()) 61 | # image.set_shape([HEIGHT,WIDTH,CHANNEL]) 62 | image = (tf.cast(image, tf.float32) * (1. / 255) - 0.5) * 2 # scale image to [-1,1] 63 | return image 64 | 65 | def get_batch_image(data, batch_size, shuffle=True): 66 | ''' 67 | input: 68 | data: list of datas, such as [image, label] 69 | ''' 70 | if shuffle: return tf.train.shuffle_batch(data, batch_size=batch_size, capacity=2*batch_size, min_after_dequeue=batch_size) 71 | return tf.train.batch(data, batch_size, capacity=2*batch_size) 72 | 73 | def get_batch_noise(dimension, batch_size): 74 | noise = tf.random_normal([batch_size,dimension],mean=0.0,stddev=1.0) 75 | return noise 76 | 77 | if __name__ == '__main__': 78 | img_path = '/Users/hufangquan/code/GAN_learn/HW3/data/' 79 | generate_tfrecord(img_path) 80 | image = readRecord(os.path.join(img_path,'train.tfrecords')) 81 | batch = get_batch_image([image], 10) 82 | sess = tf.Session() 83 | 84 | print('') 85 | -------------------------------------------------------------------------------- /HW3/src/net.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | def discriminator(inpt,gn_stddev, training=True): 4 | ''' 5 | input_arguments: 6 | inpt: the inpt image tensor, [batch, width, length, channels] 7 | gn_stddev: a scalar, the stddev for the gaussian noise added to the input image, 8 | ''' 9 | with tf.variable_scope('dis',reuse=tf.AUTO_REUSE): 10 | channels = 128 11 | inpt = inpt+tf.random_normal(shape=tf.shape(inpt), mean=0.0, stddev=gn_stddev, dtype=tf.float32) 12 | out = tf.layers.conv2d(inpt, filters=channels, kernel_size=5, strides=1, padding='SAME') 13 | out = tf.layers.batch_normalization(out,epsilon=1e-5,training=training) 14 | out = tf.nn.leaky_relu(out) 15 | 16 | out = tf.layers.conv2d(out, filters=channels*2, kernel_size=5, strides=2, padding='SAME') 17 | out = tf.layers.batch_normalization(out,epsilon=1e-5,training=training) 18 | out = tf.nn.leaky_relu(out) 19 | 20 | out = tf.layers.conv2d(out, filters=channels*4, kernel_size=5, strides=2, padding='SAME') 21 | out = tf.layers.batch_normalization(out,epsilon=1e-5,training=training) 22 | out = tf.nn.leaky_relu(out) 23 | 24 | out = tf.layers.conv2d(out, filters=channels*8, kernel_size=5, strides=2, padding='SAME') 25 | out = tf.layers.batch_normalization(out,epsilon=1e-5,training=training) 26 | out = tf.nn.leaky_relu(out) 27 | 28 | out = tf.layers.flatten(out) 29 | out = tf.layers.dense(out, 1) 30 | # out = tf.nn.sigmoid(out) 31 | return out 32 | 33 | def generator(inpt,training=True): 34 | channels = 128 35 | with tf.variable_scope('gen',reuse=tf.AUTO_REUSE): 36 | out = tf.layers.dense(inpt, 8*channels*8*8) 37 | out = tf.layers.batch_normalization(out,epsilon=1e-5,training=training) 38 | out = tf.nn.relu(out) 39 | out = tf.reshape(out, [-1,8,8,channels*8]) # (8,8,1024) 40 | 41 | # out = tf.layers.conv2d(out, channels*8, 5, padding='SAME') 42 | # out = tf.nn.leaky_relu(out) 43 | # out = tf.layers.batch_normalization(out,epsilon=1e-5,training=training) 44 | 45 | out = tf.layers.conv2d_transpose(out, channels*4, 4, 2, padding='SAME') 46 | out = tf.layers.batch_normalization(out,epsilon=1e-5,training=training) 47 | out = tf.nn.leaky_relu(out) # (16,16,512) 48 | 49 | out = tf.layers.conv2d_transpose(out, channels*2, 4, 2, padding='SAME') 50 | out = tf.layers.batch_normalization(out,epsilon=1e-5,training=training) 51 | out = tf.nn.leaky_relu(out) # (32,32,256) 52 | 53 | out = tf.layers.conv2d_transpose(out, channels, 4, 2, padding='SAME') 54 | out = tf.layers.batch_normalization(out,epsilon=1e-5,training=training) 55 | out = tf.nn.leaky_relu(out) # (64,64,128) 56 | 57 | # out = tf.layers.conv2d(out, channels, 4, padding='SAME') 58 | # out = tf.layers.batch_normalization(out,epsilon=1e-5,training=training) 59 | # out = tf.nn.leaky_relu(out) 60 | out = tf.layers.conv2d(out, 3, 4,padding='SAME') 61 | out = tf.nn.tanh(out) 62 | return out 63 | 64 | def loss_fn_d(real_scores, fake_scores): 65 | ''' 66 | https://www.cnblogs.com/sandy-t/p/7076401.html 67 | ''' 68 | # d_loss = -tf.reduce_mean(tf.log(real_scores)) -tf.reduce_mean(tf.log(1-fake_scores)) 69 | # d_loss_real = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=real_scores, labels=tf.ones_like(real_scores))) 70 | # d_loss_fake = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=fake_scores, labels=tf.zeros_like(fake_scores))) 71 | d_loss_real = tf.reduce_mean(tf.scalar_mul(-1, real_scores)) 72 | d_loss_fake = tf.reduce_mean(fake_scores) 73 | d_loss=d_loss_real + d_loss_fake 74 | return d_loss 75 | 76 | def loss_fn_g(fake_scores): 77 | # g_loss = -tf.reduce_mean(tf.log(fake_scores)) 78 | # g_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=fake_scores, labels=tf.ones_like(fake_scores))) 79 | g_loss = tf.reduce_mean(tf.scalar_mul(-1, fake_scores)) 80 | return g_loss 81 | 82 | -------------------------------------------------------------------------------- /HW3/src/test.py: -------------------------------------------------------------------------------- 1 | def printM(mat, direction, low, high, left, right): 2 | if direction == [0,1]: 3 | for ii in range(left, right+1): 4 | print(mat[high][ii]) 5 | high += 1 6 | if high >= low and left >= right: return 7 | printM(mat, [-1,0], low, high, left, right) 8 | if direction == [-1,0]: 9 | for ii in range(high, low+1): 10 | print(mat[ii][right]) 11 | right -= 1 12 | if high >= low and left >= right: return 13 | printM(mat, [0,-1], low, high, left, right) 14 | if direction == [0,-1]: 15 | for ii in range(right, left-1, -1): 16 | print(mat[low][ii]) 17 | low -= 1 18 | if high >= low and left >= right: return 19 | printM(mat, [1,0], low, high, left, right) 20 | if direction == [1,0]: 21 | for ii in range(low, high-1, -1): 22 | print(mat[ii][left]) 23 | left += 1 24 | if high >= low and left >= right: return 25 | printM(mat, [0,1], low, high, left, right) 26 | 27 | mat = [ [1, 2, 3, 4], 28 | [5, 6, 7, 8], 29 | [13,14,15,16]] 30 | printM(mat,[0,1],2,0,0,3) 31 | print('') -------------------------------------------------------------------------------- /HW3/src/train.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import data, net 3 | import datetime, pdb 4 | 5 | d_pretrain_iter = 0 6 | max_iter = 100000 7 | d_k_step, g_k_step = 5, 1 8 | lr_d, lr_g = 5e-5, 5e-5 9 | show_interval = 100 // ((d_k_step + g_k_step) // 2) 10 | save_interval = 200 11 | batch_size = 64 12 | noise_size = 100 13 | switch_threshold=1 14 | real_score_threshold=0.95 15 | top_k = 10 16 | clip_value = [-0.01,0.01] 17 | 18 | tf.reset_default_graph() 19 | image_record = data.readRecord('../data/train_clean.tfrecords') 20 | train_from_checkpoint = True 21 | checkpoint_dir = "../model/20190110-090118_ft_from_20190109-090711/" 22 | stddev_scheme = [0] if train_from_checkpoint else [1e-3]#[ii*1e-5 for ii in range(50,0,-1)]+[0] #[0.01,0.009,...,0.001] 23 | scheme_step = 2000 24 | 25 | 26 | ## define input 27 | real_image = tf.placeholder(tf.float32, (batch_size,64,64,3)) 28 | inptG = tf.placeholder(tf.float32, (batch_size, noise_size)) 29 | gn_stddev = tf.placeholder(tf.float32, []) 30 | training = tf.placeholder(tf.bool, []) 31 | fake_image = net.generator(inptG, training) 32 | 33 | real_image_batch = data.get_batch_image([image_record], batch_size) 34 | noise_batch = data.get_batch_noise(noise_size, batch_size) 35 | noise_batch_show = data.get_batch_noise(noise_size, top_k) 36 | 37 | real_scores = net.discriminator(real_image, gn_stddev, training) 38 | fake_scores = net.discriminator(fake_image, gn_stddev, training) 39 | # topk_scores, topk_index = tf.nn.top_k(tf.reshape(fake_scores,[-1,]),top_k) 40 | m_real_score = tf.reduce_mean(real_scores) 41 | m_fake_score = tf.reduce_mean(fake_scores) 42 | 43 | # define losses 44 | d_loss = net.loss_fn_d(real_scores, fake_scores) 45 | g_loss = net.loss_fn_g(fake_scores) 46 | 47 | # WGAN-GP gradient penalty 48 | # https://github.com/changwoolee/WGAN-GP-tensorflow/blob/master/model.py 49 | epsilon = tf.random_uniform( 50 | shape=[batch_size, 1, 1, 1], 51 | minval=0., 52 | maxval=1.) 53 | X_hat = real_image + epsilon * (fake_image - real_image) 54 | D_X_hat = net.discriminator(X_hat, gn_stddev=gn_stddev) 55 | grad_D_X_hat = tf.gradients(D_X_hat, [X_hat])[0] 56 | slopes = tf.sqrt(tf.reduce_sum(tf.square(grad_D_X_hat))) 57 | gradient_penalty = tf.reduce_mean((slopes - 1.) ** 2) 58 | d_loss = d_loss + 10.0 * gradient_penalty 59 | 60 | tvars = tf.trainable_variables() 61 | d_vars = [var for var in tvars if 'dis' in var.name] 62 | g_vars = [var for var in tvars if 'gen' in var.name] 63 | 64 | # print([v.name for v in d_vars]) 65 | # print([v.name for v in g_vars]) 66 | # set trainer to train G and D seperately 67 | update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) 68 | with tf.control_dependencies(update_ops): 69 | d_trainer = tf.train.AdamOptimizer(lr_d).minimize(d_loss, var_list=d_vars) 70 | # d_trainer = tf.train.RMSPropOptimizer(lr_d).minimize(d_loss, var_list=d_vars) 71 | g_trainer = tf.train.AdamOptimizer(lr_g).minimize(g_loss, var_list=g_vars) 72 | # g_trainer = tf.train.RMSPropOptimizer(lr_g).minimize(g_loss, var_list=g_vars) 73 | # clip_d_op = [var.assign(tf.clip_by_value(var, clip_value[0],clip_value[1])) for var in d_vars] 74 | 75 | inptG_show = tf.placeholder(tf.float32, (top_k, noise_size)) 76 | fake_image_show = net.generator(inptG_show, training) 77 | # add summaries 78 | tf.summary.scalar("Discriminator_loss", d_loss) 79 | tf.summary.scalar("Generator_loss", g_loss) 80 | tf.summary.scalar("Gradient_penalty", gradient_penalty) 81 | tf.summary.image('Generated_images', fake_image_show, top_k) 82 | tf.summary.image('original_images', real_image, top_k) 83 | time_info = datetime.datetime.now().strftime("%Y%m%d-%H%M%S") 84 | logdir = "../tensorboard/" + time_info + "/" 85 | if train_from_checkpoint: 86 | pre_data = checkpoint_dir.split('/')[-2].split('_from_')[0] 87 | fine_tune_msg = "_from_" + pre_data 88 | else: 89 | fine_tune_msg = "" 90 | model_path = "../model/" + time_info + fine_tune_msg + "/" 91 | merged = tf.summary.merge_all() 92 | ginit = tf.global_variables_initializer() 93 | linit = tf.global_variables_initializer() 94 | 95 | saver = tf.train.Saver() 96 | with tf.Session() as sess: 97 | writer = tf.summary.FileWriter(logdir, sess.graph) 98 | if train_from_checkpoint: 99 | saver.restore(sess, tf.train.latest_checkpoint(checkpoint_dir)) 100 | graph = tf.get_default_graph() 101 | else: 102 | sess.run([ginit, linit]) 103 | coord = tf.train.Coordinator() 104 | thread = tf.train.start_queue_runners(sess=sess, coord=coord) 105 | print('start') 106 | for ii in range(d_pretrain_iter): 107 | rib = sess.run(real_image_batch) 108 | finp = sess.run(noise_batch) 109 | _,dLoss = sess.run([d_trainer,d_loss], 110 | feed_dict={real_image:rib, inptG:finp}) 111 | if ii % 50 == 0: 112 | print("dLoss:", dLoss) 113 | summary = sess.run(merged,{real_image:rib, inptG:finp}) 114 | writer.add_summary(summary,ii) 115 | #pdb.set_trace() 116 | ii = 0 117 | #TODO 118 | # 1. add checkpoint saving 119 | # 2. change training scheme (1 vs 1) cause G loss gets constantly big 120 | while True: 121 | nb_show = sess.run(noise_batch_show) 122 | scheme_index = ii//scheme_step if ii < len(stddev_scheme)*scheme_step else -1 123 | for jj in range(d_k_step): 124 | rib = sess.run(real_image_batch) 125 | nb= sess.run(noise_batch) 126 | sess.run(d_trainer, feed_dict={real_image:rib, inptG:nb, 127 | gn_stddev:stddev_scheme[scheme_index], training:True}) 128 | for kk in range(g_k_step): 129 | rib = sess.run(real_image_batch) 130 | nb= sess.run(noise_batch) 131 | sess.run(g_trainer, feed_dict={real_image:rib, inptG:nb, 132 | gn_stddev:stddev_scheme[scheme_index], training:True}) 133 | if ii % show_interval == 0: 134 | real_score,fake_score,dLoss,gLoss = sess.run([m_real_score,m_fake_score,d_loss,g_loss], 135 | feed_dict={real_image:rib, inptG:nb, gn_stddev:stddev_scheme[scheme_index], training:True}) 136 | print('step ',ii,',dLoss is ',dLoss,',gLoss is ',gLoss,'real_score and fake score',real_score,fake_score) 137 | summary = sess.run(merged,{real_image:rib, inptG:nb, inptG_show:nb_show, gn_stddev:0, training:False}) 138 | writer.add_summary(summary,ii) 139 | 140 | if ii % save_interval == 0: 141 | saver.save(sess=sess, save_path=model_path+'model.ckpt') 142 | ii += 1 143 | coord.request_stop() 144 | coord.join(thread) 145 | 146 | 147 | 148 | 149 | 150 | 151 | 152 | 153 | 154 | 155 | --------------------------------------------------------------------------------