├── Chapter02 ├── README.md ├── __init__.py ├── requirements.txt └── run.py ├── Chapter03 ├── README.md ├── __init__.py ├── requirements.txt └── run.py ├── Chapter04 ├── README.md ├── __init__.py ├── requirements.txt └── run.py ├── Chapter05 ├── README.md ├── __init__.py ├── requirements.txt └── run.py ├── Chapter06 ├── README.md ├── __init__.py ├── requirements.txt ├── stage1.py └── stage2.py ├── Chapter07 ├── README.md ├── __init__.py ├── requirements.txt └── run.py ├── Chapter08 ├── README.md ├── __init__.py ├── requirements.txt └── run.py ├── LICENSE └── README.md /Chapter02/README.md: -------------------------------------------------------------------------------- 1 | ## 3D-GAN - Generating Shapes Using GANs 2 | 3 | Python 3.6 4 | 5 | Steps to set up the project: 6 | 1. Create a python3 virtual environment and activate it 7 | 2. Install dependencies using "pip install -r requirements.txt" 8 | 3. Create essential folders like 1. logs 2. results 3. data 9 | 4. Download dataset to data directory 10 | 5. Train the model by executing "python3 run.py" 11 | -------------------------------------------------------------------------------- /Chapter02/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Generative-Adversarial-Networks-Projects/317e7682acfeb5563f70c020a09b1b2e4c6595bb/Chapter02/__init__.py -------------------------------------------------------------------------------- /Chapter02/requirements.txt: -------------------------------------------------------------------------------- 1 | absl-py==0.6.1 2 | astor==0.7.1 3 | certifi==2018.11.29 4 | chardet==3.0.4 5 | cloudpickle==0.6.1 6 | cycler==0.10.0 7 | dask==1.0.0 8 | decorator==4.3.0 9 | gast==0.2.0 10 | grpcio==1.17.1 11 | h5py==2.9.0 12 | idna==2.8 13 | Keras==2.2.4 14 | Keras-Applications==1.0.6 15 | Keras-Preprocessing==1.0.5 16 | kiwisolver==1.0.1 17 | Markdown==3.0.1 18 | matplotlib==3.0.2 19 | networkx==2.2 20 | numpy==1.15.4 21 | Pillow==5.3.0 22 | protobuf==3.6.1 23 | pydot==1.4.1 24 | pyparsing==2.3.0 25 | python-dateutil==2.7.5 26 | PyWavelets==1.0.1 27 | PyYAML==3.13 28 | pyzmq==17.1.2 29 | requests==2.21.0 30 | scikit-image==0.14.1 31 | scipy==1.2.0 32 | six==1.12.0 33 | stl==0.0.3 34 | tensorboard==1.12.1 35 | tensorflow==1.12.0 36 | termcolor==1.1.0 37 | toolz==0.9.0 38 | torchfile==0.1.0 39 | tornado==5.1.1 40 | trimesh==2.35.48 41 | urllib3==1.24.1 42 | visdom==0.1.8.5 43 | websocket-client==0.54.0 44 | Werkzeug==0.14.1 45 | -------------------------------------------------------------------------------- /Chapter02/run.py: -------------------------------------------------------------------------------- 1 | import glob 2 | import os 3 | import time 4 | 5 | import numpy as np 6 | import scipy.io as io 7 | import scipy.ndimage as nd 8 | import tensorflow as tf 9 | from keras import Sequential 10 | from keras.callbacks import TensorBoard 11 | from keras.layers import Input 12 | from keras.layers.advanced_activations import LeakyReLU 13 | from keras.layers.convolutional import Conv3D, Deconv3D 14 | from keras.layers.core import Activation 15 | from keras.layers.normalization import BatchNormalization 16 | from keras.models import Model 17 | from keras.optimizers import Adam 18 | from mpl_toolkits.mplot3d import Axes3D 19 | import matplotlib 20 | import matplotlib.pyplot as plt 21 | 22 | 23 | def build_generator(): 24 | """ 25 | Create a Generator Model with hyperparameters values defined as follows 26 | """ 27 | z_size = 200 28 | gen_filters = [512, 256, 128, 64, 1] 29 | gen_kernel_sizes = [4, 4, 4, 4, 4] 30 | gen_strides = [1, 2, 2, 2, 2] 31 | gen_input_shape = (1, 1, 1, z_size) 32 | gen_activations = ['relu', 'relu', 'relu', 'relu', 'sigmoid'] 33 | gen_convolutional_blocks = 5 34 | 35 | input_layer = Input(shape=gen_input_shape) 36 | 37 | # First 3D transpose convolution(or 3D deconvolution) block 38 | a = Deconv3D(filters=gen_filters[0], 39 | kernel_size=gen_kernel_sizes[0], 40 | strides=gen_strides[0])(input_layer) 41 | a = BatchNormalization()(a, training=True) 42 | a = Activation(activation='relu')(a) 43 | 44 | # Next 4 3D transpose convolution(or 3D deconvolution) blocks 45 | for i in range(gen_convolutional_blocks - 1): 46 | a = Deconv3D(filters=gen_filters[i + 1], 47 | kernel_size=gen_kernel_sizes[i + 1], 48 | strides=gen_strides[i + 1], padding='same')(a) 49 | a = BatchNormalization()(a, training=True) 50 | a = Activation(activation=gen_activations[i + 1])(a) 51 | 52 | gen_model = Model(inputs=[input_layer], outputs=[a]) 53 | return gen_model 54 | 55 | 56 | def build_discriminator(): 57 | """ 58 | Create a Discriminator Model using hyperparameters values defined as follows 59 | """ 60 | 61 | dis_input_shape = (64, 64, 64, 1) 62 | dis_filters = [64, 128, 256, 512, 1] 63 | dis_kernel_sizes = [4, 4, 4, 4, 4] 64 | dis_strides = [2, 2, 2, 2, 1] 65 | dis_paddings = ['same', 'same', 'same', 'same', 'valid'] 66 | dis_alphas = [0.2, 0.2, 0.2, 0.2, 0.2] 67 | dis_activations = ['leaky_relu', 'leaky_relu', 'leaky_relu', 68 | 'leaky_relu', 'sigmoid'] 69 | dis_convolutional_blocks = 5 70 | 71 | dis_input_layer = Input(shape=dis_input_shape) 72 | 73 | # The first 3D Convolutional block 74 | a = Conv3D(filters=dis_filters[0], 75 | kernel_size=dis_kernel_sizes[0], 76 | strides=dis_strides[0], 77 | padding=dis_paddings[0])(dis_input_layer) 78 | # a = BatchNormalization()(a, training=True) 79 | a = LeakyReLU(dis_alphas[0])(a) 80 | 81 | # Next 4 3D Convolutional Blocks 82 | for i in range(dis_convolutional_blocks - 1): 83 | a = Conv3D(filters=dis_filters[i + 1], 84 | kernel_size=dis_kernel_sizes[i + 1], 85 | strides=dis_strides[i + 1], 86 | padding=dis_paddings[i + 1])(a) 87 | a = BatchNormalization()(a, training=True) 88 | if dis_activations[i + 1] == 'leaky_relu': 89 | a = LeakyReLU(dis_alphas[i + 1])(a) 90 | elif dis_activations[i + 1] == 'sigmoid': 91 | a = Activation(activation='sigmoid')(a) 92 | 93 | dis_model = Model(inputs=[dis_input_layer], outputs=[a]) 94 | return dis_model 95 | 96 | 97 | def write_log(callback, name, value, batch_no): 98 | summary = tf.Summary() 99 | summary_value = summary.value.add() 100 | summary_value.simple_value = value 101 | summary_value.tag = name 102 | callback.writer.add_summary(summary, batch_no) 103 | callback.writer.flush() 104 | 105 | 106 | """ 107 | Load datasets 108 | """ 109 | 110 | 111 | def get3DImages(data_dir): 112 | all_files = np.random.choice(glob.glob(data_dir), size=10) 113 | # all_files = glob.glob(data_dir) 114 | all_volumes = np.asarray([getVoxelsFromMat(f) for f in all_files], dtype=np.bool) 115 | return all_volumes 116 | 117 | 118 | def getVoxelsFromMat(path, cube_len=64): 119 | voxels = io.loadmat(path)['instance'] 120 | voxels = np.pad(voxels, (1, 1), 'constant', constant_values=(0, 0)) 121 | if cube_len != 32 and cube_len == 64: 122 | voxels = nd.zoom(voxels, (2, 2, 2), mode='constant', order=0) 123 | return voxels 124 | 125 | 126 | def saveFromVoxels(voxels, path): 127 | z, x, y = voxels.nonzero() 128 | fig = plt.figure() 129 | ax = fig.add_subplot(111, projection='3d') 130 | ax.scatter(x, y, -z, zdir='z', c='red') 131 | plt.savefig(path) 132 | 133 | 134 | def plotAndSaveVoxel(file_path, voxel): 135 | """ 136 | Plot a voxel 137 | """ 138 | fig = plt.figure() 139 | ax = fig.gca(projection='3d') 140 | ax.set_aspect('equal') 141 | ax.voxels(voxel, edgecolor="red") 142 | # plt.show() 143 | plt.savefig(file_path) 144 | 145 | 146 | if __name__ == '__main__': 147 | """ 148 | Specify Hyperparameters 149 | """ 150 | object_name = "chair" 151 | data_dir = "data/3DShapeNets/volumetric_data/" \ 152 | "{}/30/train/*.mat".format(object_name) 153 | gen_learning_rate = 0.0025 154 | dis_learning_rate = 10e-5 155 | beta = 0.5 156 | batch_size = 1 157 | z_size = 200 158 | epochs = 10 159 | MODE = "train" 160 | 161 | """ 162 | Create models 163 | """ 164 | gen_optimizer = Adam(lr=gen_learning_rate, beta_1=beta) 165 | dis_optimizer = Adam(lr=dis_learning_rate, beta_1=beta) 166 | 167 | discriminator = build_discriminator() 168 | discriminator.compile(loss='binary_crossentropy', optimizer=dis_optimizer) 169 | 170 | generator = build_generator() 171 | generator.compile(loss='binary_crossentropy', optimizer=gen_optimizer) 172 | 173 | discriminator.trainable = False 174 | 175 | input_layer = Input(shape=(1, 1, 1, z_size)) 176 | generated_volumes = generator(input_layer) 177 | validity = discriminator(generated_volumes) 178 | adversarial_model = Model(inputs=[input_layer], outputs=[validity]) 179 | adversarial_model.compile(loss='binary_crossentropy', optimizer=gen_optimizer) 180 | 181 | print("Loading data...") 182 | volumes = get3DImages(data_dir=data_dir) 183 | volumes = volumes[..., np.newaxis].astype(np.float) 184 | print("Data loaded...") 185 | 186 | tensorboard = TensorBoard(log_dir="logs/{}".format(time.time())) 187 | tensorboard.set_model(generator) 188 | tensorboard.set_model(discriminator) 189 | 190 | labels_real = np.reshape(np.ones((batch_size,)), (-1, 1, 1, 1, 1)) 191 | labels_fake = np.reshape(np.zeros((batch_size,)), (-1, 1, 1, 1, 1)) 192 | 193 | if MODE == 'train': 194 | for epoch in range(epochs): 195 | print("Epoch:", epoch) 196 | 197 | gen_losses = [] 198 | dis_losses = [] 199 | 200 | number_of_batches = int(volumes.shape[0] / batch_size) 201 | print("Number of batches:", number_of_batches) 202 | for index in range(number_of_batches): 203 | print("Batch:", index + 1) 204 | 205 | z_sample = np.random.normal(0, 0.33, size=[batch_size, 1, 1, 1, z_size]).astype(np.float32) 206 | volumes_batch = volumes[index * batch_size:(index + 1) * batch_size, :, :, :] 207 | 208 | # Next, generate volumes using the generate network 209 | gen_volumes = generator.predict_on_batch(z_sample) 210 | 211 | """ 212 | Train the discriminator network 213 | """ 214 | discriminator.trainable = True 215 | if index % 2 == 0: 216 | loss_real = discriminator.train_on_batch(volumes_batch, labels_real) 217 | loss_fake = discriminator.train_on_batch(gen_volumes, labels_fake) 218 | 219 | d_loss = 0.5 * np.add(loss_real, loss_fake) 220 | print("d_loss:{}".format(d_loss)) 221 | 222 | else: 223 | d_loss = 0.0 224 | 225 | discriminator.trainable = False 226 | """ 227 | Train the generator network 228 | """ 229 | z = np.random.normal(0, 0.33, size=[batch_size, 1, 1, 1, z_size]).astype(np.float32) 230 | g_loss = adversarial_model.train_on_batch(z, labels_real) 231 | print("g_loss:{}".format(g_loss)) 232 | 233 | gen_losses.append(g_loss) 234 | dis_losses.append(d_loss) 235 | 236 | # Every 10th mini-batch, generate volumes and save them 237 | if index % 10 == 0: 238 | z_sample2 = np.random.normal(0, 0.33, size=[batch_size, 1, 1, 1, z_size]).astype(np.float32) 239 | generated_volumes = generator.predict(z_sample2, verbose=3) 240 | for i, generated_volume in enumerate(generated_volumes[:5]): 241 | voxels = np.squeeze(generated_volume) 242 | voxels[voxels < 0.5] = 0. 243 | voxels[voxels >= 0.5] = 1. 244 | saveFromVoxels(voxels, "results/img_{}_{}_{}".format(epoch, index, i)) 245 | 246 | # Write losses to Tensorboard 247 | write_log(tensorboard, 'g_loss', np.mean(gen_losses), epoch) 248 | write_log(tensorboard, 'd_loss', np.mean(dis_losses), epoch) 249 | 250 | """ 251 | Save models 252 | """ 253 | generator.save_weights(os.path.join("models", "generator_weights.h5")) 254 | discriminator.save_weights(os.path.join("models", "discriminator_weights.h5")) 255 | 256 | if MODE == 'predict': 257 | # Create models 258 | generator = build_generator() 259 | discriminator = build_discriminator() 260 | 261 | # Load model weights 262 | generator.load_weights(os.path.join("models", "generator_weights.h5"), True) 263 | discriminator.load_weights(os.path.join("models", "discriminator_weights.h5"), True) 264 | 265 | # Generate 3D models 266 | z_sample = np.random.normal(0, 1, size=[batch_size, 1, 1, 1, z_size]).astype(np.float32) 267 | generated_volumes = generator.predict(z_sample, verbose=3) 268 | 269 | for i, generated_volume in enumerate(generated_volumes[:2]): 270 | voxels = np.squeeze(generated_volume) 271 | voxels[voxels < 0.5] = 0. 272 | voxels[voxels >= 0.5] = 1. 273 | saveFromVoxels(voxels, "results/gen_{}".format(i)) 274 | -------------------------------------------------------------------------------- /Chapter03/README.md: -------------------------------------------------------------------------------- 1 | ## Face Aging Using Conditional GAN 2 | 3 | Python 3.6 4 | 5 | Steps to set up the project: 6 | 1. Create a python3 virtual environment and activate it 7 | 2. Install dependencies using "pip install -r requirements.txt" 8 | 3. Create essential folders like 1. logs 2. results 3. data 9 | 4. Download dataset to data directory 10 | 5. Train the model by executing "python3 run.py" 11 | -------------------------------------------------------------------------------- /Chapter03/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Generative-Adversarial-Networks-Projects/317e7682acfeb5563f70c020a09b1b2e4c6595bb/Chapter03/__init__.py -------------------------------------------------------------------------------- /Chapter03/requirements.txt: -------------------------------------------------------------------------------- 1 | absl-py==0.5.0 2 | astor==0.7.1 3 | backcall==0.1.0 4 | bleach==3.1.1 5 | cloudpickle==0.6.1 6 | cycler==0.10.0 7 | dask==1.0.0 8 | decorator==4.3.0 9 | defusedxml==0.5.0 10 | entrypoints==0.2.3 11 | gast==0.2.0 12 | grpcio==1.15.0 13 | h5py==2.8.0 14 | ipykernel==5.1.0 15 | ipython==7.2.0 16 | ipython-genutils==0.2.0 17 | ipywidgets==7.4.2 18 | jedi==0.13.2 19 | Jinja2==2.10 20 | jsonschema==2.6.0 21 | jupyter==1.0.0 22 | jupyter-client==5.2.4 23 | jupyter-console==6.0.0 24 | jupyter-core==4.4.0 25 | Keras==2.2.3 26 | Keras-Applications==1.0.6 27 | Keras-Preprocessing==1.0.5 28 | keras-vis==0.4.1 29 | kiwisolver==1.0.1 30 | Markdown==3.0.1 31 | MarkupSafe==1.1.0 32 | matplotlib==3.0.2 33 | mistune==0.8.4 34 | nbconvert==5.4.0 35 | nbformat==4.4.0 36 | networkx==2.2 37 | notebook==5.7.4 38 | numpy==1.15.2 39 | pandocfilters==1.4.2 40 | parso==0.3.1 41 | pexpect==4.6.0 42 | pickleshare==0.7.5 43 | Pillow==5.3.0 44 | prometheus-client==0.5.0 45 | prompt-toolkit==2.0.7 46 | protobuf==3.6.1 47 | ptyprocess==0.6.0 48 | Pygments==2.3.1 49 | pyparsing==2.3.0 50 | python-dateutil==2.7.5 51 | PyWavelets==1.0.1 52 | PyYAML==3.13 53 | pyzmq==17.1.2 54 | qtconsole==4.4.3 55 | scikit-image==0.14.1 56 | scipy==1.1.0 57 | Send2Trash==1.5.0 58 | six==1.11.0 59 | tensorboard==1.11.0 60 | tensorflow==1.11.0 61 | termcolor==1.1.0 62 | terminado==0.8.1 63 | testpath==0.4.2 64 | toolz==0.9.0 65 | tornado==5.1.1 66 | traitlets==4.3.2 67 | wcwidth==0.1.7 68 | webencodings==0.5.1 69 | Werkzeug==0.15.3 70 | widgetsnbextension==3.4.2 71 | -------------------------------------------------------------------------------- /Chapter03/run.py: -------------------------------------------------------------------------------- 1 | import os 2 | import os 3 | import time 4 | from datetime import datetime 5 | 6 | import matplotlib.pyplot as plt 7 | import numpy as np 8 | import tensorflow as tf 9 | from keras import Input, Model 10 | from keras.applications import InceptionResNetV2 11 | from keras.callbacks import TensorBoard 12 | from keras.layers import Conv2D, Flatten, Dense, BatchNormalization, Reshape, concatenate, LeakyReLU, Lambda, \ 13 | K, Activation, UpSampling2D, Dropout 14 | from keras.optimizers import Adam 15 | from keras.utils import to_categorical 16 | from keras_preprocessing import image 17 | from scipy.io import loadmat 18 | 19 | 20 | def build_encoder(): 21 | """ 22 | Encoder Network 23 | """ 24 | input_layer = Input(shape=(64, 64, 3)) 25 | 26 | # 1st Convolutional Block 27 | enc = Conv2D(filters=32, kernel_size=5, strides=2, padding='same')(input_layer) 28 | # enc = BatchNormalization()(enc) 29 | enc = LeakyReLU(alpha=0.2)(enc) 30 | 31 | # 2nd Convolutional Block 32 | enc = Conv2D(filters=64, kernel_size=5, strides=2, padding='same')(enc) 33 | enc = BatchNormalization()(enc) 34 | enc = LeakyReLU(alpha=0.2)(enc) 35 | 36 | # 3rd Convolutional Block 37 | enc = Conv2D(filters=128, kernel_size=5, strides=2, padding='same')(enc) 38 | enc = BatchNormalization()(enc) 39 | enc = LeakyReLU(alpha=0.2)(enc) 40 | 41 | # 4th Convolutional Block 42 | enc = Conv2D(filters=256, kernel_size=5, strides=2, padding='same')(enc) 43 | enc = BatchNormalization()(enc) 44 | enc = LeakyReLU(alpha=0.2)(enc) 45 | 46 | # Flatten layer 47 | enc = Flatten()(enc) 48 | 49 | # 1st Fully Connected Layer 50 | enc = Dense(4096)(enc) 51 | enc = BatchNormalization()(enc) 52 | enc = LeakyReLU(alpha=0.2)(enc) 53 | 54 | # Second Fully Connected Layer 55 | enc = Dense(100)(enc) 56 | 57 | # Create a model 58 | model = Model(inputs=[input_layer], outputs=[enc]) 59 | return model 60 | 61 | 62 | def build_generator(): 63 | """ 64 | Create a Generator Model with hyperparameters values defined as follows 65 | """ 66 | latent_dims = 100 67 | num_classes = 6 68 | 69 | input_z_noise = Input(shape=(latent_dims,)) 70 | input_label = Input(shape=(num_classes,)) 71 | 72 | x = concatenate([input_z_noise, input_label]) 73 | 74 | x = Dense(2048, input_dim=latent_dims + num_classes)(x) 75 | x = LeakyReLU(alpha=0.2)(x) 76 | x = Dropout(0.2)(x) 77 | 78 | x = Dense(256 * 8 * 8)(x) 79 | x = BatchNormalization()(x) 80 | x = LeakyReLU(alpha=0.2)(x) 81 | x = Dropout(0.2)(x) 82 | 83 | x = Reshape((8, 8, 256))(x) 84 | 85 | x = UpSampling2D(size=(2, 2))(x) 86 | x = Conv2D(filters=128, kernel_size=5, padding='same')(x) 87 | x = BatchNormalization(momentum=0.8)(x) 88 | x = LeakyReLU(alpha=0.2)(x) 89 | 90 | x = UpSampling2D(size=(2, 2))(x) 91 | x = Conv2D(filters=64, kernel_size=5, padding='same')(x) 92 | x = BatchNormalization(momentum=0.8)(x) 93 | x = LeakyReLU(alpha=0.2)(x) 94 | 95 | x = UpSampling2D(size=(2, 2))(x) 96 | x = Conv2D(filters=3, kernel_size=5, padding='same')(x) 97 | x = Activation('tanh')(x) 98 | 99 | model = Model(inputs=[input_z_noise, input_label], outputs=[x]) 100 | return model 101 | 102 | 103 | def expand_label_input(x): 104 | x = K.expand_dims(x, axis=1) 105 | x = K.expand_dims(x, axis=1) 106 | x = K.tile(x, [1, 32, 32, 1]) 107 | return x 108 | 109 | 110 | def build_discriminator(): 111 | """ 112 | Create a Discriminator Model with hyperparameters values defined as follows 113 | """ 114 | input_shape = (64, 64, 3) 115 | label_shape = (6,) 116 | image_input = Input(shape=input_shape) 117 | label_input = Input(shape=label_shape) 118 | 119 | x = Conv2D(64, kernel_size=3, strides=2, padding='same')(image_input) 120 | x = LeakyReLU(alpha=0.2)(x) 121 | 122 | label_input1 = Lambda(expand_label_input)(label_input) 123 | x = concatenate([x, label_input1], axis=3) 124 | 125 | x = Conv2D(128, kernel_size=3, strides=2, padding='same')(x) 126 | x = BatchNormalization()(x) 127 | x = LeakyReLU(alpha=0.2)(x) 128 | 129 | x = Conv2D(256, kernel_size=3, strides=2, padding='same')(x) 130 | x = BatchNormalization()(x) 131 | x = LeakyReLU(alpha=0.2)(x) 132 | 133 | x = Conv2D(512, kernel_size=3, strides=2, padding='same')(x) 134 | x = BatchNormalization()(x) 135 | x = LeakyReLU(alpha=0.2)(x) 136 | 137 | x = Flatten()(x) 138 | x = Dense(1, activation='sigmoid')(x) 139 | 140 | model = Model(inputs=[image_input, label_input], outputs=[x]) 141 | return model 142 | 143 | 144 | def build_fr_combined_network(encoder, generator, fr_model): 145 | input_image = Input(shape=(64, 64, 3)) 146 | input_label = Input(shape=(6,)) 147 | 148 | latent0 = encoder(input_image) 149 | 150 | gen_images = generator([latent0, input_label]) 151 | 152 | fr_model.trainable = False 153 | 154 | resized_images = Lambda(lambda x: K.resize_images(gen_images, height_factor=2, width_factor=2, 155 | data_format='channels_last'))(gen_images) 156 | embeddings = fr_model(resized_images) 157 | 158 | model = Model(inputs=[input_image, input_label], outputs=[embeddings]) 159 | return model 160 | 161 | 162 | def build_fr_model(input_shape): 163 | resent_model = InceptionResNetV2(include_top=False, weights='imagenet', input_shape=input_shape, pooling='avg') 164 | image_input = resent_model.input 165 | x = resent_model.layers[-1].output 166 | out = Dense(128)(x) 167 | embedder_model = Model(inputs=[image_input], outputs=[out]) 168 | 169 | input_layer = Input(shape=input_shape) 170 | 171 | x = embedder_model(input_layer) 172 | output = Lambda(lambda x: K.l2_normalize(x, axis=-1))(x) 173 | 174 | model = Model(inputs=[input_layer], outputs=[output]) 175 | return model 176 | 177 | 178 | def build_image_resizer(): 179 | input_layer = Input(shape=(64, 64, 3)) 180 | 181 | resized_images = Lambda(lambda x: K.resize_images(x, height_factor=3, width_factor=3, 182 | data_format='channels_last'))(input_layer) 183 | 184 | model = Model(inputs=[input_layer], outputs=[resized_images]) 185 | return model 186 | 187 | 188 | def calculate_age(taken, dob): 189 | birth = datetime.fromordinal(max(int(dob) - 366, 1)) 190 | 191 | if birth.month < 7: 192 | return taken - birth.year 193 | else: 194 | return taken - birth.year - 1 195 | 196 | 197 | def load_data(wiki_dir, dataset='wiki'): 198 | # Load the wiki.mat file 199 | meta = loadmat(os.path.join(wiki_dir, "{}.mat".format(dataset))) 200 | 201 | # Load the list of all files 202 | full_path = meta[dataset][0, 0]["full_path"][0] 203 | 204 | # List of Matlab serial date numbers 205 | dob = meta[dataset][0, 0]["dob"][0] 206 | 207 | # List of years when photo was taken 208 | photo_taken = meta[dataset][0, 0]["photo_taken"][0] # year 209 | 210 | # Calculate age for all dobs 211 | age = [calculate_age(photo_taken[i], dob[i]) for i in range(len(dob))] 212 | 213 | # Create a list of tuples containing a pair of an image path and age 214 | images = [] 215 | age_list = [] 216 | for index, image_path in enumerate(full_path): 217 | images.append(image_path[0]) 218 | age_list.append(age[index]) 219 | 220 | # Return a list of all images and respective age 221 | return images, age_list 222 | 223 | 224 | def age_to_category(age_list): 225 | age_list1 = [] 226 | 227 | for age in age_list: 228 | if 0 < age <= 18: 229 | age_category = 0 230 | elif 18 < age <= 29: 231 | age_category = 1 232 | elif 29 < age <= 39: 233 | age_category = 2 234 | elif 39 < age <= 49: 235 | age_category = 3 236 | elif 49 < age <= 59: 237 | age_category = 4 238 | elif age >= 60: 239 | age_category = 5 240 | 241 | age_list1.append(age_category) 242 | 243 | return age_list1 244 | 245 | 246 | def load_images(data_dir, image_paths, image_shape): 247 | images = None 248 | 249 | for i, image_path in enumerate(image_paths): 250 | print() 251 | try: 252 | # Load image 253 | loaded_image = image.load_img(os.path.join(data_dir, image_path), target_size=image_shape) 254 | 255 | # Convert PIL image to numpy ndarray 256 | loaded_image = image.img_to_array(loaded_image) 257 | 258 | # Add another dimension (Add batch dimension) 259 | loaded_image = np.expand_dims(loaded_image, axis=0) 260 | 261 | # Concatenate all images into one tensor 262 | if images is None: 263 | images = loaded_image 264 | else: 265 | images = np.concatenate([images, loaded_image], axis=0) 266 | except Exception as e: 267 | print("Error:", i, e) 268 | 269 | return images 270 | 271 | 272 | def euclidean_distance_loss(y_true, y_pred): 273 | """ 274 | Euclidean distance loss 275 | https://en.wikipedia.org/wiki/Euclidean_distance 276 | :param y_true: TensorFlow/Theano tensor 277 | :param y_pred: TensorFlow/Theano tensor of the same shape as y_true 278 | :return: float 279 | """ 280 | return K.sqrt(K.sum(K.square(y_pred - y_true), axis=-1)) 281 | 282 | 283 | def write_log(callback, name, value, batch_no): 284 | summary = tf.Summary() 285 | summary_value = summary.value.add() 286 | summary_value.simple_value = value 287 | summary_value.tag = name 288 | callback.writer.add_summary(summary, batch_no) 289 | callback.writer.flush() 290 | 291 | 292 | def save_rgb_img(img, path): 293 | """ 294 | Save an rgb image 295 | """ 296 | fig = plt.figure() 297 | ax = fig.add_subplot(1, 1, 1) 298 | ax.imshow(img) 299 | ax.axis("off") 300 | ax.set_title("Image") 301 | 302 | plt.savefig(path) 303 | plt.close() 304 | 305 | 306 | if __name__ == '__main__': 307 | # Define hyperparameters 308 | data_dir = "data" 309 | wiki_dir = os.path.join(data_dir, "wiki_crop1") 310 | epochs = 500 311 | batch_size = 2 312 | image_shape = (64, 64, 3) 313 | z_shape = 100 314 | TRAIN_GAN = True 315 | TRAIN_ENCODER = False 316 | TRAIN_GAN_WITH_FR = False 317 | fr_image_shape = (192, 192, 3) 318 | 319 | # Define optimizers 320 | dis_optimizer = Adam(lr=0.0002, beta_1=0.5, beta_2=0.999, epsilon=10e-8) 321 | gen_optimizer = Adam(lr=0.0002, beta_1=0.5, beta_2=0.999, epsilon=10e-8) 322 | adversarial_optimizer = Adam(lr=0.0002, beta_1=0.5, beta_2=0.999, epsilon=10e-8) 323 | 324 | """ 325 | Build and compile networks 326 | """ 327 | # Build and compile the discriminator network 328 | discriminator = build_discriminator() 329 | discriminator.compile(loss=['binary_crossentropy'], optimizer=dis_optimizer) 330 | 331 | # Build and compile the generator network 332 | generator = build_generator() 333 | generator.compile(loss=['binary_crossentropy'], optimizer=gen_optimizer) 334 | 335 | # Build and compile the adversarial model 336 | discriminator.trainable = False 337 | input_z_noise = Input(shape=(100,)) 338 | input_label = Input(shape=(6,)) 339 | recons_images = generator([input_z_noise, input_label]) 340 | valid = discriminator([recons_images, input_label]) 341 | adversarial_model = Model(inputs=[input_z_noise, input_label], outputs=[valid]) 342 | adversarial_model.compile(loss=['binary_crossentropy'], optimizer=gen_optimizer) 343 | 344 | tensorboard = TensorBoard(log_dir="logs/{}".format(time.time())) 345 | tensorboard.set_model(generator) 346 | tensorboard.set_model(discriminator) 347 | 348 | """ 349 | Load the dataset 350 | """ 351 | images, age_list = load_data(wiki_dir=wiki_dir, dataset="wiki") 352 | age_cat = age_to_category(age_list) 353 | final_age_cat = np.reshape(np.array(age_cat), [len(age_cat), 1]) 354 | classes = len(set(age_cat)) 355 | y = to_categorical(final_age_cat, num_classes=len(set(age_cat))) 356 | 357 | loaded_images = load_images(wiki_dir, images, (image_shape[0], image_shape[1])) 358 | 359 | # Implement label smoothing 360 | real_labels = np.ones((batch_size, 1), dtype=np.float32) * 0.9 361 | fake_labels = np.zeros((batch_size, 1), dtype=np.float32) * 0.1 362 | 363 | """ 364 | Train the generator and the discriminator network 365 | """ 366 | if TRAIN_GAN: 367 | for epoch in range(epochs): 368 | print("Epoch:{}".format(epoch)) 369 | 370 | gen_losses = [] 371 | dis_losses = [] 372 | 373 | number_of_batches = int(len(loaded_images) / batch_size) 374 | print("Number of batches:", number_of_batches) 375 | for index in range(number_of_batches): 376 | print("Batch:{}".format(index + 1)) 377 | 378 | images_batch = loaded_images[index * batch_size:(index + 1) * batch_size] 379 | images_batch = images_batch / 127.5 - 1.0 380 | images_batch = images_batch.astype(np.float32) 381 | 382 | y_batch = y[index * batch_size:(index + 1) * batch_size] 383 | z_noise = np.random.normal(0, 1, size=(batch_size, z_shape)) 384 | 385 | """ 386 | Train the discriminator network 387 | """ 388 | 389 | # Generate fake images 390 | initial_recon_images = generator.predict_on_batch([z_noise, y_batch]) 391 | 392 | d_loss_real = discriminator.train_on_batch([images_batch, y_batch], real_labels) 393 | d_loss_fake = discriminator.train_on_batch([initial_recon_images, y_batch], fake_labels) 394 | 395 | d_loss = 0.5 * np.add(d_loss_real, d_loss_fake) 396 | print("d_loss:{}".format(d_loss)) 397 | 398 | """ 399 | Train the generator network 400 | """ 401 | 402 | z_noise2 = np.random.normal(0, 1, size=(batch_size, z_shape)) 403 | random_labels = np.random.randint(0, 6, batch_size).reshape(-1, 1) 404 | random_labels = to_categorical(random_labels, 6) 405 | 406 | g_loss = adversarial_model.train_on_batch([z_noise2, random_labels], [1] * batch_size) 407 | 408 | print("g_loss:{}".format(g_loss)) 409 | 410 | gen_losses.append(g_loss) 411 | dis_losses.append(d_loss) 412 | 413 | # Write losses to Tensorboard 414 | write_log(tensorboard, 'g_loss', np.mean(gen_losses), epoch) 415 | write_log(tensorboard, 'd_loss', np.mean(dis_losses), epoch) 416 | 417 | """ 418 | Generate images after every 10th epoch 419 | """ 420 | if epoch % 10 == 0: 421 | images_batch = loaded_images[0:batch_size] 422 | images_batch = images_batch / 127.5 - 1.0 423 | images_batch = images_batch.astype(np.float32) 424 | 425 | y_batch = y[0:batch_size] 426 | z_noise = np.random.normal(0, 1, size=(batch_size, z_shape)) 427 | 428 | gen_images = generator.predict_on_batch([z_noise, y_batch]) 429 | 430 | for i, img in enumerate(gen_images[:5]): 431 | save_rgb_img(img, path="results/img_{}_{}.png".format(epoch, i)) 432 | 433 | # Save networks 434 | try: 435 | generator.save_weights("generator.h5") 436 | discriminator.save_weights("discriminator.h5") 437 | except Exception as e: 438 | print("Error:", e) 439 | 440 | """ 441 | Train encoder 442 | """ 443 | 444 | if TRAIN_ENCODER: 445 | # Build and compile encoder 446 | encoder = build_encoder() 447 | encoder.compile(loss=euclidean_distance_loss, optimizer='adam') 448 | 449 | # Load the generator network's weights 450 | try: 451 | generator.load_weights("generator.h5") 452 | except Exception as e: 453 | print("Error:", e) 454 | 455 | z_i = np.random.normal(0, 1, size=(5000, z_shape)) 456 | 457 | y = np.random.randint(low=0, high=6, size=(5000,), dtype=np.int64) 458 | num_classes = len(set(y)) 459 | y = np.reshape(np.array(y), [len(y), 1]) 460 | y = to_categorical(y, num_classes=num_classes) 461 | 462 | for epoch in range(epochs): 463 | print("Epoch:", epoch) 464 | 465 | encoder_losses = [] 466 | 467 | number_of_batches = int(z_i.shape[0] / batch_size) 468 | print("Number of batches:", number_of_batches) 469 | for index in range(number_of_batches): 470 | print("Batch:", index + 1) 471 | 472 | z_batch = z_i[index * batch_size:(index + 1) * batch_size] 473 | y_batch = y[index * batch_size:(index + 1) * batch_size] 474 | 475 | generated_images = generator.predict_on_batch([z_batch, y_batch]) 476 | 477 | # Train the encoder model 478 | encoder_loss = encoder.train_on_batch(generated_images, z_batch) 479 | print("Encoder loss:", encoder_loss) 480 | 481 | encoder_losses.append(encoder_loss) 482 | 483 | # Write the encoder loss to Tensorboard 484 | write_log(tensorboard, "encoder_loss", np.mean(encoder_losses), epoch) 485 | 486 | # Save the encoder model 487 | encoder.save_weights("encoder.h5") 488 | 489 | """ 490 | Optimize the encoder and the generator network 491 | """ 492 | if TRAIN_GAN_WITH_FR: 493 | 494 | # Load the encoder network 495 | encoder = build_encoder() 496 | encoder.load_weights("encoder.h5") 497 | 498 | # Load the generator network 499 | generator.load_weights("generator.h5") 500 | 501 | image_resizer = build_image_resizer() 502 | image_resizer.compile(loss=['binary_crossentropy'], optimizer='adam') 503 | 504 | # Face recognition model 505 | fr_model = build_fr_model(input_shape=fr_image_shape) 506 | fr_model.compile(loss=['binary_crossentropy'], optimizer="adam") 507 | 508 | # Make the face recognition network as non-trainable 509 | fr_model.trainable = False 510 | 511 | # Input layers 512 | input_image = Input(shape=(64, 64, 3)) 513 | input_label = Input(shape=(6,)) 514 | 515 | # Use the encoder and the generator network 516 | latent0 = encoder(input_image) 517 | gen_images = generator([latent0, input_label]) 518 | 519 | # Resize images to the desired shape 520 | resized_images = Lambda(lambda x: K.resize_images(gen_images, height_factor=3, width_factor=3, 521 | data_format='channels_last'))(gen_images) 522 | embeddings = fr_model(resized_images) 523 | 524 | # Create a Keras model and specify the inputs and outputs for the network 525 | fr_adversarial_model = Model(inputs=[input_image, input_label], outputs=[embeddings]) 526 | 527 | # Compile the model 528 | fr_adversarial_model.compile(loss=euclidean_distance_loss, optimizer=adversarial_optimizer) 529 | 530 | for epoch in range(epochs): 531 | print("Epoch:", epoch) 532 | 533 | reconstruction_losses = [] 534 | 535 | number_of_batches = int(len(loaded_images) / batch_size) 536 | print("Number of batches:", number_of_batches) 537 | for index in range(number_of_batches): 538 | print("Batch:", index + 1) 539 | 540 | images_batch = loaded_images[index * batch_size:(index + 1) * batch_size] 541 | images_batch = images_batch / 127.5 - 1.0 542 | images_batch = images_batch.astype(np.float32) 543 | 544 | y_batch = y[index * batch_size:(index + 1) * batch_size] 545 | 546 | images_batch_resized = image_resizer.predict_on_batch(images_batch) 547 | 548 | real_embeddings = fr_model.predict_on_batch(images_batch_resized) 549 | 550 | reconstruction_loss = fr_adversarial_model.train_on_batch([images_batch, y_batch], real_embeddings) 551 | 552 | print("Reconstruction loss:", reconstruction_loss) 553 | 554 | reconstruction_losses.append(reconstruction_loss) 555 | 556 | # Write the reconstruction loss to Tensorboard 557 | write_log(tensorboard, "reconstruction_loss", np.mean(reconstruction_losses), epoch) 558 | 559 | """ 560 | Generate images 561 | """ 562 | if epoch % 10 == 0: 563 | images_batch = loaded_images[0:batch_size] 564 | images_batch = images_batch / 127.5 - 1.0 565 | images_batch = images_batch.astype(np.float32) 566 | 567 | y_batch = y[0:batch_size] 568 | z_noise = np.random.normal(0, 1, size=(batch_size, z_shape)) 569 | 570 | gen_images = generator.predict_on_batch([z_noise, y_batch]) 571 | 572 | for i, img in enumerate(gen_images[:5]): 573 | save_rgb_img(img, path="results/img_opt_{}_{}.png".format(epoch, i)) 574 | 575 | # Save improved weights for both of the networks 576 | generator.save_weights("generator_optimized.h5") 577 | encoder.save_weights("encoder_optimized.h5") 578 | -------------------------------------------------------------------------------- /Chapter04/README.md: -------------------------------------------------------------------------------- 1 | ## Generating Anime Characters using DCGAN 2 | 3 | Python 3.6 4 | 5 | Steps to set up the project: 6 | 1. Create a python3 virtual environment and activate it 7 | 2. Install dependencies using "pip install -r requirements.txt" 8 | 3. Create essential folders 1. logs 2. results 3. data 9 | 4. Download and format the dataset 10 | 5. Train the model by executing python3 run.py 11 | -------------------------------------------------------------------------------- /Chapter04/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Generative-Adversarial-Networks-Projects/317e7682acfeb5563f70c020a09b1b2e4c6595bb/Chapter04/__init__.py -------------------------------------------------------------------------------- /Chapter04/requirements.txt: -------------------------------------------------------------------------------- 1 | absl-py==0.5.0 2 | animeface==1.1.0 3 | astor==0.7.1 4 | certifi==2018.10.15 5 | chardet==3.0.4 6 | cloudpickle==0.6.1 7 | cycler==0.10.0 8 | dask==0.20.2 9 | decorator==4.3.0 10 | gallery-dl==1.5.3 11 | gast==0.2.0 12 | grpcio==1.15.0 13 | h5py==2.8.0 14 | idna==2.7 15 | Keras==2.2.4 16 | Keras-Applications==1.0.6 17 | Keras-Preprocessing==1.0.5 18 | kiwisolver==1.0.1 19 | Markdown==3.0.1 20 | matplotlib==3.0.0 21 | networkx==2.2 22 | numpy==1.15.2 23 | Pillow==5.3.0 24 | protobuf==3.6.1 25 | pydot==1.2.4 26 | pyparsing==2.2.2 27 | python-dateutil==2.7.3 28 | PyWavelets==1.0.1 29 | PyYAML==3.13 30 | requests==2.20.0 31 | scikit-image==0.14.1 32 | scipy==1.1.0 33 | six==1.11.0 34 | tensorboard==1.11.0 35 | tensorflow==1.11.0 36 | termcolor==1.1.0 37 | toolz==0.9.0 38 | urllib3==1.24 39 | Werkzeug==0.14.1 40 | -------------------------------------------------------------------------------- /Chapter04/run.py: -------------------------------------------------------------------------------- 1 | import glob 2 | import io 3 | import math 4 | import time 5 | 6 | import keras.backend as K 7 | import matplotlib.gridspec as gridspec 8 | import matplotlib.pyplot as plt 9 | import numpy as np 10 | import tensorflow as tf 11 | from PIL import Image 12 | from keras import Sequential, Input, Model 13 | from keras.applications.inception_resnet_v2 import InceptionResNetV2, preprocess_input 14 | from keras.callbacks import TensorBoard 15 | from keras.layers import Conv2D 16 | from keras.layers import Dense 17 | from keras.layers import ReLU 18 | from keras.layers import Reshape 19 | from keras.layers.advanced_activations import LeakyReLU 20 | from keras.layers.convolutional import UpSampling2D 21 | from keras.layers.core import Activation 22 | from keras.layers.core import Flatten 23 | from keras.layers.normalization import BatchNormalization 24 | from keras.layers.pooling import MaxPooling2D 25 | from keras.optimizers import Adam, SGD 26 | from keras.preprocessing import image 27 | from scipy.misc import imread, imsave 28 | from scipy.stats import entropy 29 | 30 | K.set_image_dim_ordering('tf') 31 | 32 | np.random.seed(1337) 33 | 34 | 35 | def build_generator(): 36 | gen_model = Sequential() 37 | 38 | gen_model.add(Dense(input_dim=100, output_dim=2048)) 39 | gen_model.add(ReLU()) 40 | 41 | gen_model.add(Dense(256 * 8 * 8)) 42 | gen_model.add(BatchNormalization()) 43 | gen_model.add(ReLU()) 44 | gen_model.add(Reshape((8, 8, 256), input_shape=(256 * 8 * 8,))) 45 | gen_model.add(UpSampling2D(size=(2, 2))) 46 | 47 | gen_model.add(Conv2D(128, (5, 5), padding='same')) 48 | gen_model.add(ReLU()) 49 | 50 | gen_model.add(UpSampling2D(size=(2, 2))) 51 | 52 | gen_model.add(Conv2D(64, (5, 5), padding='same')) 53 | gen_model.add(ReLU()) 54 | 55 | gen_model.add(UpSampling2D(size=(2, 2))) 56 | 57 | gen_model.add(Conv2D(3, (5, 5), padding='same')) 58 | gen_model.add(Activation('tanh')) 59 | return gen_model 60 | 61 | 62 | def build_discriminator(): 63 | dis_model = Sequential() 64 | dis_model.add( 65 | Conv2D(128, (5, 5), 66 | padding='same', 67 | input_shape=(64, 64, 3)) 68 | ) 69 | dis_model.add(LeakyReLU(alpha=0.2)) 70 | dis_model.add(MaxPooling2D(pool_size=(2, 2))) 71 | 72 | dis_model.add(Conv2D(256, (3, 3))) 73 | dis_model.add(LeakyReLU(alpha=0.2)) 74 | dis_model.add(MaxPooling2D(pool_size=(2, 2))) 75 | 76 | dis_model.add(Conv2D(512, (3, 3))) 77 | dis_model.add(LeakyReLU(alpha=0.2)) 78 | dis_model.add(MaxPooling2D(pool_size=(2, 2))) 79 | 80 | dis_model.add(Flatten()) 81 | dis_model.add(Dense(1024)) 82 | dis_model.add(LeakyReLU(alpha=0.2)) 83 | 84 | dis_model.add(Dense(1)) 85 | dis_model.add(Activation('sigmoid')) 86 | 87 | return dis_model 88 | 89 | 90 | def build_adversarial_model(gen_model, dis_model): 91 | model = Sequential() 92 | model.add(gen_model) 93 | dis_model.trainable = False 94 | model.add(dis_model) 95 | return model 96 | 97 | 98 | def write_log(callback, name, loss, batch_no): 99 | """ 100 | Write training summary to TensorBoard 101 | """ 102 | # for name, value in zip(names, logs): 103 | summary = tf.Summary() 104 | summary_value = summary.value.add() 105 | summary_value.simple_value = loss 106 | summary_value.tag = name 107 | callback.writer.add_summary(summary, batch_no) 108 | callback.writer.flush() 109 | 110 | 111 | def calculate_inception_score(images_path, batch_size=1, splits=10): 112 | # Create an instance of InceptionV3 113 | model = InceptionResNetV2() 114 | 115 | images = None 116 | for image_ in glob.glob(images_path): 117 | # Load image 118 | loaded_image = image.load_img(image_, target_size=(299, 299)) 119 | 120 | # Convert PIL image to numpy ndarray 121 | loaded_image = image.img_to_array(loaded_image) 122 | 123 | # Another another dimension (Add batch dimension) 124 | loaded_image = np.expand_dims(loaded_image, axis=0) 125 | 126 | # Concatenate all images into one tensor 127 | if images is None: 128 | images = loaded_image 129 | else: 130 | images = np.concatenate([images, loaded_image], axis=0) 131 | 132 | # Calculate number of batches 133 | num_batches = (images.shape[0] + batch_size - 1) // batch_size 134 | 135 | probs = None 136 | 137 | # Use InceptionV3 to calculate probabilities 138 | for i in range(num_batches): 139 | image_batch = images[i * batch_size:(i + 1) * batch_size, :, :, :] 140 | prob = model.predict(preprocess_input(image_batch)) 141 | 142 | if probs is None: 143 | probs = prob 144 | else: 145 | probs = np.concatenate([prob, probs], axis=0) 146 | 147 | # Calculate Inception scores 148 | divs = [] 149 | split_size = probs.shape[0] // splits 150 | 151 | for i in range(splits): 152 | prob_batch = probs[(i * split_size):((i + 1) * split_size), :] 153 | p_y = np.expand_dims(np.mean(prob_batch, 0), 0) 154 | div = prob_batch * (np.log(prob_batch / p_y)) 155 | div = np.mean(np.sum(div, 1)) 156 | divs.append(np.exp(div)) 157 | 158 | return np.mean(divs), np.std(divs) 159 | 160 | 161 | def calculate_mode_score(gen_images_path, real_images_path, batch_size=32, splits=10): 162 | # Create an instance of InceptionV3 163 | model = InceptionResNetV2() 164 | 165 | # Load real images 166 | real_images = None 167 | for image_ in glob.glob(real_images_path): 168 | # Load image 169 | loaded_image = image.load_img(image_, target_size=(299, 299)) 170 | 171 | # Convert PIL image to numpy ndarray 172 | loaded_image = image.img_to_array(loaded_image) 173 | 174 | # Another another dimension (Add batch dimension) 175 | loaded_image = np.expand_dims(loaded_image, axis=0) 176 | 177 | # Concatenate all images into one tensor 178 | if real_images is None: 179 | real_images = loaded_image 180 | else: 181 | real_images = np.concatenate([real_images, loaded_image], axis=0) 182 | 183 | # Load generated images 184 | gen_images = None 185 | for image_ in glob.glob(gen_images_path): 186 | # Load image 187 | loaded_image = image.load_img(image_, target_size=(299, 299)) 188 | 189 | # Convert PIL image to numpy ndarray 190 | loaded_image = image.img_to_array(loaded_image) 191 | 192 | # Another another dimension (Add batch dimension) 193 | loaded_image = np.expand_dims(loaded_image, axis=0) 194 | 195 | # Concatenate all images into one tensor 196 | if gen_images is None: 197 | gen_images = loaded_image 198 | else: 199 | gen_images = np.concatenate([gen_images, loaded_image], axis=0) 200 | 201 | # Calculate number of batches for generated images 202 | gen_num_batches = (gen_images.shape[0] + batch_size - 1) // batch_size 203 | gen_images_probs = None 204 | # Use InceptionV3 to calculate probabilities of generated images 205 | for i in range(gen_num_batches): 206 | image_batch = gen_images[i * batch_size:(i + 1) * batch_size, :, :, :] 207 | prob = model.predict(preprocess_input(image_batch)) 208 | 209 | if gen_images_probs is None: 210 | gen_images_probs = prob 211 | else: 212 | gen_images_probs = np.concatenate([prob, gen_images_probs], axis=0) 213 | 214 | # Calculate number of batches for real images 215 | real_num_batches = (real_images.shape[0] + batch_size - 1) // batch_size 216 | real_images_probs = None 217 | # Use InceptionV3 to calculate probabilities of real images 218 | for i in range(real_num_batches): 219 | image_batch = real_images[i * batch_size:(i + 1) * batch_size, :, :, :] 220 | prob = model.predict(preprocess_input(image_batch)) 221 | 222 | if real_images_probs is None: 223 | real_images_probs = prob 224 | else: 225 | real_images_probs = np.concatenate([prob, real_images_probs], axis=0) 226 | 227 | # KL-Divergence: compute kl-divergence and mean of it 228 | num_gen_images = len(gen_images) 229 | split_scores = [] 230 | 231 | for j in range(splits): 232 | gen_part = gen_images_probs[j * (num_gen_images // splits): (j + 1) * (num_gen_images // splits), :] 233 | real_part = real_images_probs[j * (num_gen_images // splits): (j + 1) * (num_gen_images // splits), :] 234 | gen_py = np.mean(gen_part, axis=0) 235 | real_py = np.mean(real_part, axis=0) 236 | scores = [] 237 | for i in range(gen_part.shape[0]): 238 | scores.append(entropy(gen_part[i, :], gen_py)) 239 | 240 | split_scores.append(np.exp(np.mean(scores) - entropy(gen_py, real_py))) 241 | 242 | final_mean = np.mean(split_scores) 243 | final_std = np.std(split_scores) 244 | 245 | return final_mean, final_std 246 | 247 | 248 | def denormalize(img): 249 | img = (img + 1) * 127.5 250 | return img.astype(np.uint8) 251 | 252 | 253 | def normalize(img): 254 | return (img - 127.5) / 127.5 255 | 256 | 257 | def visualize_rgb(img): 258 | """ 259 | Visualize a rgb image 260 | :param img: RGB image 261 | """ 262 | fig = plt.figure() 263 | ax = fig.add_subplot(1, 1, 1) 264 | ax.imshow(img) 265 | ax.axis("off") 266 | ax.set_title("Image") 267 | plt.show() 268 | 269 | 270 | def save_rgb_img(img, path): 271 | """ 272 | Save a rgb image 273 | """ 274 | fig = plt.figure() 275 | ax = fig.add_subplot(1, 1, 1) 276 | ax.imshow(img) 277 | ax.axis("off") 278 | ax.set_title("RGB Image") 279 | 280 | plt.savefig(path) 281 | plt.close() 282 | 283 | 284 | def train(): 285 | start_time = time.time() 286 | dataset_dir = "data/*.*" 287 | batch_size = 128 288 | z_shape = 100 289 | epochs = 10000 290 | dis_learning_rate = 0.005 291 | gen_learning_rate = 0.005 292 | dis_momentum = 0.5 293 | gen_momentum = 0.5 294 | dis_nesterov = True 295 | gen_nesterov = True 296 | 297 | dis_optimizer = SGD(lr=dis_learning_rate, momentum=dis_momentum, nesterov=dis_nesterov) 298 | gen_optimizer = SGD(lr=gen_learning_rate, momentum=gen_momentum, nesterov=gen_nesterov) 299 | 300 | # Load images 301 | all_images = [] 302 | for index, filename in enumerate(glob.glob(dataset_dir)): 303 | all_images.append(imread(filename, flatten=False, mode='RGB')) 304 | 305 | X = np.array(all_images) 306 | X = (X - 127.5) / 127.5 307 | X = X.astype(np.float32) 308 | 309 | dis_model = build_discriminator() 310 | dis_model.compile(loss='binary_crossentropy', optimizer=dis_optimizer) 311 | 312 | gen_model = build_generator() 313 | gen_model.compile(loss='mse', optimizer=gen_optimizer) 314 | 315 | adversarial_model = build_adversarial_model(gen_model, dis_model) 316 | adversarial_model.compile(loss='binary_crossentropy', optimizer=gen_optimizer) 317 | 318 | tensorboard = TensorBoard(log_dir="logs/{}".format(time.time()), write_images=True, write_grads=True, write_graph=True) 319 | tensorboard.set_model(gen_model) 320 | tensorboard.set_model(dis_model) 321 | 322 | for epoch in range(epochs): 323 | print("--------------------------") 324 | print("Epoch:{}".format(epoch)) 325 | 326 | dis_losses = [] 327 | gen_losses = [] 328 | 329 | num_batches = int(X.shape[0] / batch_size) 330 | 331 | print("Number of batches:{}".format(num_batches)) 332 | for index in range(num_batches): 333 | print("Batch:{}".format(index)) 334 | 335 | z_noise = np.random.normal(0, 1, size=(batch_size, z_shape)) 336 | # z_noise = np.random.uniform(-1, 1, size=(batch_size, 100)) 337 | 338 | generated_images = gen_model.predict_on_batch(z_noise) 339 | 340 | # visualize_rgb(generated_images[0]) 341 | 342 | """ 343 | Train the discriminator model 344 | """ 345 | 346 | dis_model.trainable = True 347 | 348 | image_batch = X[index * batch_size:(index + 1) * batch_size] 349 | 350 | y_real = np.ones((batch_size, )) * 0.9 351 | y_fake = np.zeros((batch_size, )) * 0.1 352 | 353 | dis_loss_real = dis_model.train_on_batch(image_batch, y_real) 354 | dis_loss_fake = dis_model.train_on_batch(generated_images, y_fake) 355 | 356 | d_loss = (dis_loss_real+dis_loss_fake)/2 357 | print("d_loss:", d_loss) 358 | 359 | dis_model.trainable = False 360 | 361 | """ 362 | Train the generator model(adversarial model) 363 | """ 364 | z_noise = np.random.normal(0, 1, size=(batch_size, z_shape)) 365 | # z_noise = np.random.uniform(-1, 1, size=(batch_size, 100)) 366 | 367 | g_loss = adversarial_model.train_on_batch(z_noise, y_real) 368 | print("g_loss:", g_loss) 369 | 370 | dis_losses.append(d_loss) 371 | gen_losses.append(g_loss) 372 | 373 | """ 374 | Sample some images and save them 375 | """ 376 | if epoch % 100 == 0: 377 | z_noise = np.random.normal(0, 1, size=(batch_size, z_shape)) 378 | gen_images1 = gen_model.predict_on_batch(z_noise) 379 | 380 | for img in gen_images1[:2]: 381 | save_rgb_img(img, "results/one_{}.png".format(epoch)) 382 | 383 | print("Epoch:{}, dis_loss:{}".format(epoch, np.mean(dis_losses))) 384 | print("Epoch:{}, gen_loss: {}".format(epoch, np.mean(gen_losses))) 385 | 386 | """ 387 | Save losses to Tensorboard after each epoch 388 | """ 389 | write_log(tensorboard, 'discriminator_loss', np.mean(dis_losses), epoch) 390 | write_log(tensorboard, 'generator_loss', np.mean(gen_losses), epoch) 391 | 392 | """ 393 | Save models 394 | """ 395 | gen_model.save("generator_model.h5") 396 | dis_model.save("generator_model.h5") 397 | 398 | print("Time:", (time.time() - start_time)) 399 | 400 | 401 | if __name__ == '__main__': 402 | train() 403 | -------------------------------------------------------------------------------- /Chapter05/README.md: -------------------------------------------------------------------------------- 1 | ## Using SRGANs to Generate Photo-Realistic Images 2 | 3 | Keras implementation of "Photo-Realistic Single Image Super-Resolution Using a Generative Adversarial Network" 4 | 5 | Python 3.6 6 | 7 | Steps to set up the project: 8 | 1. Create a python3 virtual environment and activate it 9 | 2. Install dependencies using "pip install -r requirements.txt" 10 | 3. Create essential folders like 1. logs 2. results 3. data 11 | 4. Download 'img_align_celeba.zip' from "https://drive.google.com/drive/folders/0B7EVK8r0v71pTUZsaXdaSnZBZzg" 12 | 5. Train the model by executing "python3 run.py" 13 | -------------------------------------------------------------------------------- /Chapter05/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Generative-Adversarial-Networks-Projects/317e7682acfeb5563f70c020a09b1b2e4c6595bb/Chapter05/__init__.py -------------------------------------------------------------------------------- /Chapter05/requirements.txt: -------------------------------------------------------------------------------- 1 | absl-py==0.6.1 2 | astor==0.7.1 3 | cloudpickle==0.6.1 4 | cycler==0.10.0 5 | dask==1.0.0 6 | decorator==4.3.0 7 | gast==0.2.0 8 | grpcio==1.17.1 9 | h5py==2.9.0 10 | imageio==2.4.1 11 | imgaug==0.2.7 12 | Keras==2.2.4 13 | Keras-Applications==1.0.6 14 | keras-contrib==2.0.8 15 | Keras-Preprocessing==1.0.5 16 | kiwisolver==1.0.1 17 | Markdown==3.0.1 18 | matplotlib==3.0.2 19 | networkx==2.2 20 | numpy==1.15.4 21 | opencv-python==3.4.5.20 22 | Pillow==5.4.0 23 | protobuf==3.6.1 24 | pyparsing==2.3.0 25 | python-dateutil==2.7.5 26 | PyWavelets==1.0.1 27 | PyYAML==3.13 28 | scikit-image==0.14.1 29 | scipy==1.2.0 30 | Shapely==1.6.4.post2 31 | six==1.12.0 32 | tensorboard==1.12.2 33 | tensorflow==1.12.0 34 | termcolor==1.1.0 35 | toolz==0.9.0 36 | Werkzeug==0.14.1 37 | -------------------------------------------------------------------------------- /Chapter05/run.py: -------------------------------------------------------------------------------- 1 | import glob 2 | import time 3 | 4 | import matplotlib.pyplot as plt 5 | import numpy as np 6 | import tensorflow as tf 7 | from keras import Input 8 | from keras.applications import VGG19 9 | from keras.callbacks import TensorBoard 10 | from keras.layers import BatchNormalization, Activation, LeakyReLU, Add, Dense 11 | from keras.layers.convolutional import Conv2D, UpSampling2D 12 | from keras.models import Model 13 | from keras.optimizers import Adam 14 | from scipy.misc import imread, imresize 15 | 16 | 17 | def residual_block(x): 18 | """ 19 | Residual block 20 | """ 21 | filters = [64, 64] 22 | kernel_size = 3 23 | strides = 1 24 | padding = "same" 25 | momentum = 0.8 26 | activation = "relu" 27 | 28 | res = Conv2D(filters=filters[0], kernel_size=kernel_size, strides=strides, padding=padding)(x) 29 | res = Activation(activation=activation)(res) 30 | res = BatchNormalization(momentum=momentum)(res) 31 | 32 | res = Conv2D(filters=filters[1], kernel_size=kernel_size, strides=strides, padding=padding)(res) 33 | res = BatchNormalization(momentum=momentum)(res) 34 | 35 | # Add res and x 36 | res = Add()([res, x]) 37 | return res 38 | 39 | 40 | def build_generator(): 41 | """ 42 | Create a generator network using the hyperparameter values defined below 43 | :return: 44 | """ 45 | residual_blocks = 16 46 | momentum = 0.8 47 | input_shape = (64, 64, 3) 48 | 49 | # Input Layer of the generator network 50 | input_layer = Input(shape=input_shape) 51 | 52 | # Add the pre-residual block 53 | gen1 = Conv2D(filters=64, kernel_size=9, strides=1, padding='same', activation='relu')(input_layer) 54 | 55 | # Add 16 residual blocks 56 | res = residual_block(gen1) 57 | for i in range(residual_blocks - 1): 58 | res = residual_block(res) 59 | 60 | # Add the post-residual block 61 | gen2 = Conv2D(filters=64, kernel_size=3, strides=1, padding='same')(res) 62 | gen2 = BatchNormalization(momentum=momentum)(gen2) 63 | 64 | # Take the sum of the output from the pre-residual block(gen1) and the post-residual block(gen2) 65 | gen3 = Add()([gen2, gen1]) 66 | 67 | # Add an upsampling block 68 | gen4 = UpSampling2D(size=2)(gen3) 69 | gen4 = Conv2D(filters=256, kernel_size=3, strides=1, padding='same')(gen4) 70 | gen4 = Activation('relu')(gen4) 71 | 72 | # Add another upsampling block 73 | gen5 = UpSampling2D(size=2)(gen4) 74 | gen5 = Conv2D(filters=256, kernel_size=3, strides=1, padding='same')(gen5) 75 | gen5 = Activation('relu')(gen5) 76 | 77 | # Output convolution layer 78 | gen6 = Conv2D(filters=3, kernel_size=9, strides=1, padding='same')(gen5) 79 | output = Activation('tanh')(gen6) 80 | 81 | # Keras model 82 | model = Model(inputs=[input_layer], outputs=[output], name='generator') 83 | return model 84 | 85 | 86 | def build_discriminator(): 87 | """ 88 | Create a discriminator network using the hyperparameter values defined below 89 | :return: 90 | """ 91 | leakyrelu_alpha = 0.2 92 | momentum = 0.8 93 | input_shape = (256, 256, 3) 94 | 95 | input_layer = Input(shape=input_shape) 96 | 97 | # Add the first convolution block 98 | dis1 = Conv2D(filters=64, kernel_size=3, strides=1, padding='same')(input_layer) 99 | dis1 = LeakyReLU(alpha=leakyrelu_alpha)(dis1) 100 | 101 | # Add the 2nd convolution block 102 | dis2 = Conv2D(filters=64, kernel_size=3, strides=2, padding='same')(dis1) 103 | dis2 = LeakyReLU(alpha=leakyrelu_alpha)(dis2) 104 | dis2 = BatchNormalization(momentum=momentum)(dis2) 105 | 106 | # Add the third convolution block 107 | dis3 = Conv2D(filters=128, kernel_size=3, strides=1, padding='same')(dis2) 108 | dis3 = LeakyReLU(alpha=leakyrelu_alpha)(dis3) 109 | dis3 = BatchNormalization(momentum=momentum)(dis3) 110 | 111 | # Add the fourth convolution block 112 | dis4 = Conv2D(filters=128, kernel_size=3, strides=2, padding='same')(dis3) 113 | dis4 = LeakyReLU(alpha=leakyrelu_alpha)(dis4) 114 | dis4 = BatchNormalization(momentum=0.8)(dis4) 115 | 116 | # Add the fifth convolution block 117 | dis5 = Conv2D(256, kernel_size=3, strides=1, padding='same')(dis4) 118 | dis5 = LeakyReLU(alpha=leakyrelu_alpha)(dis5) 119 | dis5 = BatchNormalization(momentum=momentum)(dis5) 120 | 121 | # Add the sixth convolution block 122 | dis6 = Conv2D(filters=256, kernel_size=3, strides=2, padding='same')(dis5) 123 | dis6 = LeakyReLU(alpha=leakyrelu_alpha)(dis6) 124 | dis6 = BatchNormalization(momentum=momentum)(dis6) 125 | 126 | # Add the seventh convolution block 127 | dis7 = Conv2D(filters=512, kernel_size=3, strides=1, padding='same')(dis6) 128 | dis7 = LeakyReLU(alpha=leakyrelu_alpha)(dis7) 129 | dis7 = BatchNormalization(momentum=momentum)(dis7) 130 | 131 | # Add the eight convolution block 132 | dis8 = Conv2D(filters=512, kernel_size=3, strides=2, padding='same')(dis7) 133 | dis8 = LeakyReLU(alpha=leakyrelu_alpha)(dis8) 134 | dis8 = BatchNormalization(momentum=momentum)(dis8) 135 | 136 | # Add a dense layer 137 | dis9 = Dense(units=1024)(dis8) 138 | dis9 = LeakyReLU(alpha=0.2)(dis9) 139 | 140 | # Last dense layer - for classification 141 | output = Dense(units=1, activation='sigmoid')(dis9) 142 | 143 | model = Model(inputs=[input_layer], outputs=[output], name='discriminator') 144 | return model 145 | 146 | 147 | def build_vgg(): 148 | """ 149 | Build VGG network to extract image features 150 | """ 151 | input_shape = (256, 256, 3) 152 | 153 | # Load a pre-trained VGG19 model trained on 'Imagenet' dataset 154 | vgg = VGG19(weights="imagenet") 155 | vgg.outputs = [vgg.layers[9].output] 156 | 157 | input_layer = Input(shape=input_shape) 158 | 159 | # Extract features 160 | features = vgg(input_layer) 161 | 162 | # Create a Keras model 163 | model = Model(inputs=[input_layer], outputs=[features]) 164 | return model 165 | 166 | 167 | def sample_images(data_dir, batch_size, high_resolution_shape, low_resolution_shape): 168 | # Make a list of all images inside the data directory 169 | all_images = glob.glob(data_dir) 170 | 171 | # Choose a random batch of images 172 | images_batch = np.random.choice(all_images, size=batch_size) 173 | 174 | low_resolution_images = [] 175 | high_resolution_images = [] 176 | 177 | for img in images_batch: 178 | # Get an ndarray of the current image 179 | img1 = imread(img, mode='RGB') 180 | img1 = img1.astype(np.float32) 181 | 182 | # Resize the image 183 | img1_high_resolution = imresize(img1, high_resolution_shape) 184 | img1_low_resolution = imresize(img1, low_resolution_shape) 185 | 186 | # Do a random horizontal flip 187 | if np.random.random() < 0.5: 188 | img1_high_resolution = np.fliplr(img1_high_resolution) 189 | img1_low_resolution = np.fliplr(img1_low_resolution) 190 | 191 | high_resolution_images.append(img1_high_resolution) 192 | low_resolution_images.append(img1_low_resolution) 193 | 194 | # Convert the lists to Numpy NDArrays 195 | return np.array(high_resolution_images), np.array(low_resolution_images) 196 | 197 | 198 | def save_images(low_resolution_image, original_image, generated_image, path): 199 | """ 200 | Save low-resolution, high-resolution(original) and 201 | generated high-resolution images in a single image 202 | """ 203 | fig = plt.figure() 204 | ax = fig.add_subplot(1, 3, 1) 205 | ax.imshow(low_resolution_image) 206 | ax.axis("off") 207 | ax.set_title("Low-resolution") 208 | 209 | ax = fig.add_subplot(1, 3, 2) 210 | ax.imshow(original_image) 211 | ax.axis("off") 212 | ax.set_title("Original") 213 | 214 | ax = fig.add_subplot(1, 3, 3) 215 | ax.imshow(generated_image) 216 | ax.axis("off") 217 | ax.set_title("Generated") 218 | 219 | plt.savefig(path) 220 | 221 | 222 | def write_log(callback, name, value, batch_no): 223 | """ 224 | Write scalars to Tensorboard 225 | """ 226 | summary = tf.Summary() 227 | summary_value = summary.value.add() 228 | summary_value.simple_value = value 229 | summary_value.tag = name 230 | callback.writer.add_summary(summary, batch_no) 231 | callback.writer.flush() 232 | 233 | 234 | if __name__ == '__main__': 235 | data_dir = "data/img_align_celeba/*.*" 236 | epochs = 30000 237 | batch_size = 1 238 | mode = 'predict' 239 | 240 | # Shape of low-resolution and high-resolution images 241 | low_resolution_shape = (64, 64, 3) 242 | high_resolution_shape = (256, 256, 3) 243 | 244 | # Common optimizer for all networks 245 | common_optimizer = Adam(0.0002, 0.5) 246 | 247 | if mode == 'train': 248 | # Build and compile VGG19 network to extract features 249 | vgg = build_vgg() 250 | vgg.trainable = False 251 | vgg.compile(loss='mse', optimizer=common_optimizer, metrics=['accuracy']) 252 | 253 | # Build and compile the discriminator network 254 | discriminator = build_discriminator() 255 | discriminator.compile(loss='mse', optimizer=common_optimizer, metrics=['accuracy']) 256 | 257 | # Build the generator network 258 | generator = build_generator() 259 | 260 | """ 261 | Build and compile the adversarial model 262 | """ 263 | 264 | # Input layers for high-resolution and low-resolution images 265 | input_high_resolution = Input(shape=high_resolution_shape) 266 | input_low_resolution = Input(shape=low_resolution_shape) 267 | 268 | # Generate high-resolution images from low-resolution images 269 | generated_high_resolution_images = generator(input_low_resolution) 270 | 271 | # Extract feature maps of the generated images 272 | features = vgg(generated_high_resolution_images) 273 | 274 | # Make the discriminator network as non-trainable 275 | discriminator.trainable = False 276 | 277 | # Get the probability of generated high-resolution images 278 | probs = discriminator(generated_high_resolution_images) 279 | 280 | # Create and compile an adversarial model 281 | adversarial_model = Model([input_low_resolution, input_high_resolution], [probs, features]) 282 | adversarial_model.compile(loss=['binary_crossentropy', 'mse'], loss_weights=[1e-3, 1], optimizer=common_optimizer) 283 | 284 | # Add Tensorboard 285 | tensorboard = TensorBoard(log_dir="logs/".format(time.time())) 286 | tensorboard.set_model(generator) 287 | tensorboard.set_model(discriminator) 288 | 289 | for epoch in range(epochs): 290 | print("Epoch:{}".format(epoch)) 291 | 292 | """ 293 | Train the discriminator network 294 | """ 295 | 296 | # Sample a batch of images 297 | high_resolution_images, low_resolution_images = sample_images(data_dir=data_dir, batch_size=batch_size, 298 | low_resolution_shape=low_resolution_shape, 299 | high_resolution_shape=high_resolution_shape) 300 | # Normalize images 301 | high_resolution_images = high_resolution_images / 127.5 - 1. 302 | low_resolution_images = low_resolution_images / 127.5 - 1. 303 | 304 | # Generate high-resolution images from low-resolution images 305 | generated_high_resolution_images = generator.predict(low_resolution_images) 306 | 307 | # Generate batch of real and fake labels 308 | real_labels = np.ones((batch_size, 16, 16, 1)) 309 | fake_labels = np.zeros((batch_size, 16, 16, 1)) 310 | 311 | # Train the discriminator network on real and fake images 312 | d_loss_real = discriminator.train_on_batch(high_resolution_images, real_labels) 313 | d_loss_fake = discriminator.train_on_batch(generated_high_resolution_images, fake_labels) 314 | 315 | # Calculate total discriminator loss 316 | d_loss = 0.5 * np.add(d_loss_real, d_loss_fake) 317 | print("d_loss:", d_loss) 318 | 319 | """ 320 | Train the generator network 321 | """ 322 | 323 | # Sample a batch of images 324 | high_resolution_images, low_resolution_images = sample_images(data_dir=data_dir, batch_size=batch_size, 325 | low_resolution_shape=low_resolution_shape, 326 | high_resolution_shape=high_resolution_shape) 327 | # Normalize images 328 | high_resolution_images = high_resolution_images / 127.5 - 1. 329 | low_resolution_images = low_resolution_images / 127.5 - 1. 330 | 331 | # Extract feature maps for real high-resolution images 332 | image_features = vgg.predict(high_resolution_images) 333 | 334 | # Train the generator network 335 | g_loss = adversarial_model.train_on_batch([low_resolution_images, high_resolution_images], 336 | [real_labels, image_features]) 337 | 338 | print("g_loss:", g_loss) 339 | 340 | # Write the losses to Tensorboard 341 | write_log(tensorboard, 'g_loss', g_loss[0], epoch) 342 | write_log(tensorboard, 'd_loss', d_loss[0], epoch) 343 | 344 | # Sample and save images after every 100 epochs 345 | if epoch % 100 == 0: 346 | high_resolution_images, low_resolution_images = sample_images(data_dir=data_dir, batch_size=batch_size, 347 | low_resolution_shape=low_resolution_shape, 348 | high_resolution_shape=high_resolution_shape) 349 | # Normalize images 350 | high_resolution_images = high_resolution_images / 127.5 - 1. 351 | low_resolution_images = low_resolution_images / 127.5 - 1. 352 | 353 | generated_images = generator.predict_on_batch(low_resolution_images) 354 | 355 | for index, img in enumerate(generated_images): 356 | save_images(low_resolution_images[index], high_resolution_images[index], img, 357 | path="results/img_{}_{}".format(epoch, index)) 358 | 359 | # Save models 360 | generator.save_weights("generator.h5") 361 | discriminator.save_weights("discriminator.h5") 362 | 363 | if mode == 'predict': 364 | # Build and compile the discriminator network 365 | discriminator = build_discriminator() 366 | 367 | # Build the generator network 368 | generator = build_generator() 369 | 370 | # Load models 371 | generator.load_weights("generator.h5") 372 | discriminator.load_weights("discriminator.h5") 373 | 374 | # Get 10 random images 375 | high_resolution_images, low_resolution_images = sample_images(data_dir=data_dir, batch_size=10, 376 | low_resolution_shape=low_resolution_shape, 377 | high_resolution_shape=high_resolution_shape) 378 | # Normalize images 379 | high_resolution_images = high_resolution_images / 127.5 - 1. 380 | low_resolution_images = low_resolution_images / 127.5 - 1. 381 | 382 | # Generate high-resolution images from low-resolution images 383 | generated_images = generator.predict_on_batch(low_resolution_images) 384 | 385 | # Save images 386 | for index, img in enumerate(generated_images): 387 | save_images(low_resolution_images[index], high_resolution_images[index], img, 388 | path="results/gen_{}".format(index)) 389 | -------------------------------------------------------------------------------- /Chapter06/README.md: -------------------------------------------------------------------------------- 1 | ## StackGAN - Text to Photo-Realistic Image Synthesis 2 | 3 | Python 3.6 4 | 5 | Steps to set up the project: 6 | 1. Create a python3 virtual environment and activate it 7 | 2. Install dependencies using "pip install -r requirements.txt" 8 | 3. Create essential folders like 1. logs 2. results 3. data 9 | 4. Download the dataset to data directory 10 | 5. Train the model by executing "python3 run.py" 11 | -------------------------------------------------------------------------------- /Chapter06/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Generative-Adversarial-Networks-Projects/317e7682acfeb5563f70c020a09b1b2e4c6595bb/Chapter06/__init__.py -------------------------------------------------------------------------------- /Chapter06/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Generative-Adversarial-Networks-Projects/317e7682acfeb5563f70c020a09b1b2e4c6595bb/Chapter06/requirements.txt -------------------------------------------------------------------------------- /Chapter06/stage1.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pickle 3 | import random 4 | import time 5 | 6 | import PIL 7 | import numpy as np 8 | import pandas as pd 9 | import tensorflow as tf 10 | from PIL import Image 11 | from keras import Input, Model 12 | from keras import backend as K 13 | from keras.callbacks import TensorBoard 14 | from keras.layers import Dense, LeakyReLU, BatchNormalization, ReLU, Reshape, UpSampling2D, Conv2D, Activation, \ 15 | concatenate, Flatten, Lambda, Concatenate 16 | from keras.optimizers import Adam 17 | from matplotlib import pyplot as plt 18 | 19 | 20 | def load_class_ids(class_info_file_path): 21 | """ 22 | Load class ids from class_info.pickle file 23 | """ 24 | with open(class_info_file_path, 'rb') as f: 25 | class_ids = pickle.load(f, encoding='latin1') 26 | return class_ids 27 | 28 | 29 | def load_embeddings(embeddings_file_path): 30 | """ 31 | Load embeddings 32 | """ 33 | with open(embeddings_file_path, 'rb') as f: 34 | embeddings = pickle.load(f, encoding='latin1') 35 | embeddings = np.array(embeddings) 36 | print('embeddings: ', embeddings.shape) 37 | return embeddings 38 | 39 | 40 | def load_filenames(filenames_file_path): 41 | """ 42 | Load filenames.pickle file and return a list of all file names 43 | """ 44 | with open(filenames_file_path, 'rb') as f: 45 | filenames = pickle.load(f, encoding='latin1') 46 | return filenames 47 | 48 | 49 | def load_bounding_boxes(dataset_dir): 50 | """ 51 | Load bounding boxes and return a dictionary of file names and corresponding bounding boxes 52 | """ 53 | # Paths 54 | bounding_boxes_path = os.path.join(dataset_dir, 'bounding_boxes.txt') 55 | file_paths_path = os.path.join(dataset_dir, 'images.txt') 56 | 57 | # Read bounding_boxes.txt and images.txt file 58 | df_bounding_boxes = pd.read_csv(bounding_boxes_path, 59 | delim_whitespace=True, header=None).astype(int) 60 | df_file_names = pd.read_csv(file_paths_path, delim_whitespace=True, header=None) 61 | 62 | # Create a list of file names 63 | file_names = df_file_names[1].tolist() 64 | 65 | # Create a dictionary of file_names and bounding boxes 66 | filename_boundingbox_dict = {img_file[:-4]: [] for img_file in file_names[:2]} 67 | 68 | # Assign a bounding box to the corresponding image 69 | for i in range(0, len(file_names)): 70 | # Get the bounding box 71 | bounding_box = df_bounding_boxes.iloc[i][1:].tolist() 72 | key = file_names[i][:-4] 73 | filename_boundingbox_dict[key] = bounding_box 74 | 75 | return filename_boundingbox_dict 76 | 77 | 78 | def get_img(img_path, bbox, image_size): 79 | """ 80 | Load and resize image 81 | """ 82 | img = Image.open(img_path).convert('RGB') 83 | width, height = img.size 84 | if bbox is not None: 85 | R = int(np.maximum(bbox[2], bbox[3]) * 0.75) 86 | center_x = int((2 * bbox[0] + bbox[2]) / 2) 87 | center_y = int((2 * bbox[1] + bbox[3]) / 2) 88 | y1 = np.maximum(0, center_y - R) 89 | y2 = np.minimum(height, center_y + R) 90 | x1 = np.maximum(0, center_x - R) 91 | x2 = np.minimum(width, center_x + R) 92 | img = img.crop([x1, y1, x2, y2]) 93 | img = img.resize(image_size, PIL.Image.BILINEAR) 94 | return img 95 | 96 | 97 | def load_dataset(filenames_file_path, class_info_file_path, cub_dataset_dir, embeddings_file_path, image_size): 98 | """ 99 | Load dataset 100 | """ 101 | filenames = load_filenames(filenames_file_path) 102 | class_ids = load_class_ids(class_info_file_path) 103 | bounding_boxes = load_bounding_boxes(cub_dataset_dir) 104 | all_embeddings = load_embeddings(embeddings_file_path) 105 | 106 | X, y, embeddings = [], [], [] 107 | 108 | print("Embeddings shape:", all_embeddings.shape) 109 | 110 | for index, filename in enumerate(filenames): 111 | bounding_box = bounding_boxes[filename] 112 | 113 | try: 114 | # Load images 115 | img_name = '{}/images/{}.jpg'.format(cub_dataset_dir, filename) 116 | img = get_img(img_name, bounding_box, image_size) 117 | 118 | all_embeddings1 = all_embeddings[index, :, :] 119 | 120 | embedding_ix = random.randint(0, all_embeddings1.shape[0] - 1) 121 | embedding = all_embeddings1[embedding_ix, :] 122 | 123 | X.append(np.array(img)) 124 | y.append(class_ids[index]) 125 | embeddings.append(embedding) 126 | except Exception as e: 127 | print(e) 128 | 129 | X = np.array(X) 130 | y = np.array(y) 131 | embeddings = np.array(embeddings) 132 | return X, y, embeddings 133 | 134 | 135 | def generate_c(x): 136 | mean = x[:, :128] 137 | log_sigma = x[:, 128:] 138 | stddev = K.exp(log_sigma) 139 | epsilon = K.random_normal(shape=K.constant((mean.shape[1],), dtype='int32')) 140 | c = stddev * epsilon + mean 141 | return c 142 | 143 | 144 | def build_ca_model(): 145 | """ 146 | Get conditioning augmentation model. 147 | Takes an embedding of shape (1024,) and returns a tensor of shape (256,) 148 | """ 149 | input_layer = Input(shape=(1024,)) 150 | x = Dense(256)(input_layer) 151 | x = LeakyReLU(alpha=0.2)(x) 152 | model = Model(inputs=[input_layer], outputs=[x]) 153 | return model 154 | 155 | 156 | def build_embedding_compressor_model(): 157 | """ 158 | Build embedding compressor model 159 | """ 160 | input_layer = Input(shape=(1024,)) 161 | x = Dense(128)(input_layer) 162 | x = ReLU()(x) 163 | 164 | model = Model(inputs=[input_layer], outputs=[x]) 165 | return model 166 | 167 | 168 | def build_stage1_generator(): 169 | """ 170 | Builds a generator model used in Stage-I 171 | """ 172 | input_layer = Input(shape=(1024,)) 173 | x = Dense(256)(input_layer) 174 | mean_logsigma = LeakyReLU(alpha=0.2)(x) 175 | 176 | c = Lambda(generate_c)(mean_logsigma) 177 | 178 | input_layer2 = Input(shape=(100,)) 179 | 180 | gen_input = Concatenate(axis=1)([c, input_layer2]) 181 | 182 | x = Dense(128 * 8 * 4 * 4, use_bias=False)(gen_input) 183 | x = ReLU()(x) 184 | 185 | x = Reshape((4, 4, 128 * 8), input_shape=(128 * 8 * 4 * 4,))(x) 186 | 187 | x = UpSampling2D(size=(2, 2))(x) 188 | x = Conv2D(512, kernel_size=3, padding="same", strides=1, use_bias=False)(x) 189 | x = BatchNormalization()(x) 190 | x = ReLU()(x) 191 | 192 | x = UpSampling2D(size=(2, 2))(x) 193 | x = Conv2D(256, kernel_size=3, padding="same", strides=1, use_bias=False)(x) 194 | x = BatchNormalization()(x) 195 | x = ReLU()(x) 196 | 197 | x = UpSampling2D(size=(2, 2))(x) 198 | x = Conv2D(128, kernel_size=3, padding="same", strides=1, use_bias=False)(x) 199 | x = BatchNormalization()(x) 200 | x = ReLU()(x) 201 | 202 | x = UpSampling2D(size=(2, 2))(x) 203 | x = Conv2D(64, kernel_size=3, padding="same", strides=1, use_bias=False)(x) 204 | x = BatchNormalization()(x) 205 | x = ReLU()(x) 206 | 207 | x = Conv2D(3, kernel_size=3, padding="same", strides=1, use_bias=False)(x) 208 | x = Activation(activation='tanh')(x) 209 | 210 | stage1_gen = Model(inputs=[input_layer, input_layer2], outputs=[x, mean_logsigma]) 211 | return stage1_gen 212 | 213 | 214 | def build_stage1_discriminator(): 215 | """ 216 | Create a model which takes two inputs 217 | 1. One from above network 218 | 2. One from the embedding layer 219 | 3. Concatenate along the axis dimension and feed it to the last module which produces final logits 220 | """ 221 | input_layer = Input(shape=(64, 64, 3)) 222 | 223 | x = Conv2D(64, (4, 4), 224 | padding='same', strides=2, 225 | input_shape=(64, 64, 3), use_bias=False)(input_layer) 226 | x = LeakyReLU(alpha=0.2)(x) 227 | 228 | x = Conv2D(128, (4, 4), padding='same', strides=2, use_bias=False)(x) 229 | x = BatchNormalization()(x) 230 | x = LeakyReLU(alpha=0.2)(x) 231 | 232 | x = Conv2D(256, (4, 4), padding='same', strides=2, use_bias=False)(x) 233 | x = BatchNormalization()(x) 234 | x = LeakyReLU(alpha=0.2)(x) 235 | 236 | x = Conv2D(512, (4, 4), padding='same', strides=2, use_bias=False)(x) 237 | x = BatchNormalization()(x) 238 | x = LeakyReLU(alpha=0.2)(x) 239 | 240 | input_layer2 = Input(shape=(4, 4, 128)) 241 | 242 | merged_input = concatenate([x, input_layer2]) 243 | 244 | x2 = Conv2D(64 * 8, kernel_size=1, 245 | padding="same", strides=1)(merged_input) 246 | x2 = BatchNormalization()(x2) 247 | x2 = LeakyReLU(alpha=0.2)(x2) 248 | x2 = Flatten()(x2) 249 | x2 = Dense(1)(x2) 250 | x2 = Activation('sigmoid')(x2) 251 | 252 | stage1_dis = Model(inputs=[input_layer, input_layer2], outputs=[x2]) 253 | return stage1_dis 254 | 255 | 256 | def build_adversarial_model(gen_model, dis_model): 257 | input_layer = Input(shape=(1024,)) 258 | input_layer2 = Input(shape=(100,)) 259 | input_layer3 = Input(shape=(4, 4, 128)) 260 | 261 | x, mean_logsigma = gen_model([input_layer, input_layer2]) 262 | 263 | dis_model.trainable = False 264 | valid = dis_model([x, input_layer3]) 265 | 266 | model = Model(inputs=[input_layer, input_layer2, input_layer3], outputs=[valid, mean_logsigma]) 267 | return model 268 | 269 | 270 | def KL_loss(y_true, y_pred): 271 | mean = y_pred[:, :128] 272 | logsigma = y_pred[:, :128] 273 | loss = -logsigma + .5 * (-1 + K.exp(2. * logsigma) + K.square(mean)) 274 | loss = K.mean(loss) 275 | return loss 276 | 277 | 278 | def custom_generator_loss(y_true, y_pred): 279 | # Calculate binary cross entropy loss 280 | return K.binary_crossentropy(y_true, y_pred) 281 | 282 | 283 | def save_rgb_img(img, path): 284 | """ 285 | Save an rgb image 286 | """ 287 | fig = plt.figure() 288 | ax = fig.add_subplot(1, 1, 1) 289 | ax.imshow(img) 290 | ax.axis("off") 291 | ax.set_title("Image") 292 | 293 | plt.savefig(path) 294 | plt.close() 295 | 296 | 297 | def write_log(callback, name, loss, batch_no): 298 | """ 299 | Write training summary to TensorBoard 300 | """ 301 | summary = tf.Summary() 302 | summary_value = summary.value.add() 303 | summary_value.simple_value = loss 304 | summary_value.tag = name 305 | callback.writer.add_summary(summary, batch_no) 306 | callback.writer.flush() 307 | 308 | 309 | if __name__ == '__main__': 310 | data_dir = "data/birds/" 311 | train_dir = data_dir + "/train" 312 | test_dir = data_dir + "/test" 313 | image_size = 64 314 | batch_size = 64 315 | z_dim = 100 316 | stage1_generator_lr = 0.0002 317 | stage1_discriminator_lr = 0.0002 318 | stage1_lr_decay_step = 600 319 | epochs = 1000 320 | condition_dim = 128 321 | 322 | embeddings_file_path_train = train_dir + "/char-CNN-RNN-embeddings.pickle" 323 | embeddings_file_path_test = test_dir + "/char-CNN-RNN-embeddings.pickle" 324 | 325 | filenames_file_path_train = train_dir + "/filenames.pickle" 326 | filenames_file_path_test = test_dir + "/filenames.pickle" 327 | 328 | class_info_file_path_train = train_dir + "/class_info.pickle" 329 | class_info_file_path_test = test_dir + "/class_info.pickle" 330 | 331 | cub_dataset_dir = data_dir + "/CUB_200_2011" 332 | 333 | # Define optimizers 334 | dis_optimizer = Adam(lr=stage1_discriminator_lr, beta_1=0.5, beta_2=0.999) 335 | gen_optimizer = Adam(lr=stage1_generator_lr, beta_1=0.5, beta_2=0.999) 336 | 337 | """" 338 | Load datasets 339 | """ 340 | X_train, y_train, embeddings_train = load_dataset(filenames_file_path=filenames_file_path_train, 341 | class_info_file_path=class_info_file_path_train, 342 | cub_dataset_dir=cub_dataset_dir, 343 | embeddings_file_path=embeddings_file_path_train, 344 | image_size=(64, 64)) 345 | 346 | X_test, y_test, embeddings_test = load_dataset(filenames_file_path=filenames_file_path_test, 347 | class_info_file_path=class_info_file_path_test, 348 | cub_dataset_dir=cub_dataset_dir, 349 | embeddings_file_path=embeddings_file_path_test, 350 | image_size=(64, 64)) 351 | 352 | """ 353 | Build and compile networks 354 | """ 355 | ca_model = build_ca_model() 356 | ca_model.compile(loss="binary_crossentropy", optimizer="adam") 357 | 358 | stage1_dis = build_stage1_discriminator() 359 | stage1_dis.compile(loss='binary_crossentropy', optimizer=dis_optimizer) 360 | 361 | stage1_gen = build_stage1_generator() 362 | stage1_gen.compile(loss="mse", optimizer=gen_optimizer) 363 | 364 | embedding_compressor_model = build_embedding_compressor_model() 365 | embedding_compressor_model.compile(loss="binary_crossentropy", optimizer="adam") 366 | 367 | adversarial_model = build_adversarial_model(gen_model=stage1_gen, dis_model=stage1_dis) 368 | adversarial_model.compile(loss=['binary_crossentropy', KL_loss], loss_weights=[1, 2.0], 369 | optimizer=gen_optimizer, metrics=None) 370 | 371 | tensorboard = TensorBoard(log_dir="logs/".format(time.time())) 372 | tensorboard.set_model(stage1_gen) 373 | tensorboard.set_model(stage1_dis) 374 | tensorboard.set_model(ca_model) 375 | tensorboard.set_model(embedding_compressor_model) 376 | 377 | # Generate an array containing real and fake values 378 | # Apply label smoothing as well 379 | real_labels = np.ones((batch_size, 1), dtype=float) * 0.9 380 | fake_labels = np.zeros((batch_size, 1), dtype=float) * 0.1 381 | 382 | for epoch in range(epochs): 383 | print("========================================") 384 | print("Epoch is:", epoch) 385 | print("Number of batches", int(X_train.shape[0] / batch_size)) 386 | 387 | gen_losses = [] 388 | dis_losses = [] 389 | 390 | # Load data and train model 391 | number_of_batches = int(X_train.shape[0] / batch_size) 392 | for index in range(number_of_batches): 393 | print("Batch:{}".format(index+1)) 394 | 395 | """ 396 | Train the discriminator network 397 | """ 398 | # Sample a batch of data 399 | z_noise = np.random.normal(0, 1, size=(batch_size, z_dim)) 400 | image_batch = X_train[index * batch_size:(index + 1) * batch_size] 401 | embedding_batch = embeddings_train[index * batch_size:(index + 1) * batch_size] 402 | image_batch = (image_batch - 127.5) / 127.5 403 | 404 | # Generate fake images 405 | fake_images, _ = stage1_gen.predict([embedding_batch, z_noise], verbose=3) 406 | 407 | # Generate compressed embeddings 408 | compressed_embedding = embedding_compressor_model.predict_on_batch(embedding_batch) 409 | compressed_embedding = np.reshape(compressed_embedding, (-1, 1, 1, condition_dim)) 410 | compressed_embedding = np.tile(compressed_embedding, (1, 4, 4, 1)) 411 | 412 | dis_loss_real = stage1_dis.train_on_batch([image_batch, compressed_embedding], 413 | np.reshape(real_labels, (batch_size, 1))) 414 | dis_loss_fake = stage1_dis.train_on_batch([fake_images, compressed_embedding], 415 | np.reshape(fake_labels, (batch_size, 1))) 416 | dis_loss_wrong = stage1_dis.train_on_batch([image_batch[:(batch_size - 1)], compressed_embedding[1:]], 417 | np.reshape(fake_labels[1:], (batch_size-1, 1))) 418 | 419 | d_loss = 0.5 * np.add(dis_loss_real, 0.5 * np.add(dis_loss_wrong, dis_loss_fake)) 420 | 421 | print("d_loss_real:{}".format(dis_loss_real)) 422 | print("d_loss_fake:{}".format(dis_loss_fake)) 423 | print("d_loss_wrong:{}".format(dis_loss_wrong)) 424 | print("d_loss:{}".format(d_loss)) 425 | 426 | """ 427 | Train the generator network 428 | """ 429 | g_loss = adversarial_model.train_on_batch([embedding_batch, z_noise, compressed_embedding],[K.ones((batch_size, 1)) * 0.9, K.ones((batch_size, 256)) * 0.9]) 430 | print("g_loss:{}".format(g_loss)) 431 | 432 | dis_losses.append(d_loss) 433 | gen_losses.append(g_loss) 434 | 435 | """ 436 | Save losses to Tensorboard after each epoch 437 | """ 438 | write_log(tensorboard, 'discriminator_loss', np.mean(dis_losses), epoch) 439 | write_log(tensorboard, 'generator_loss', np.mean(gen_losses[0]), epoch) 440 | 441 | # Generate and save images after every 2nd epoch 442 | if epoch % 2 == 0: 443 | # z_noise2 = np.random.uniform(-1, 1, size=(batch_size, z_dim)) 444 | z_noise2 = np.random.normal(0, 1, size=(batch_size, z_dim)) 445 | embedding_batch = embeddings_test[0:batch_size] 446 | fake_images, _ = stage1_gen.predict_on_batch([embedding_batch, z_noise2]) 447 | 448 | # Save images 449 | for i, img in enumerate(fake_images[:10]): 450 | save_rgb_img(img, "results/gen_{}_{}.png".format(epoch, i)) 451 | 452 | # Save models 453 | stage1_gen.save_weights("stage1_gen.h5") 454 | stage1_dis.save_weights("stage1_dis.h5") 455 | -------------------------------------------------------------------------------- /Chapter06/stage2.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pickle 3 | import random 4 | import time 5 | 6 | import PIL 7 | import numpy as np 8 | import pandas as pd 9 | import tensorflow as tf 10 | from PIL import Image 11 | from keras import Input, Model 12 | from keras import backend as K 13 | from keras.callbacks import TensorBoard 14 | from keras.layers import Dense, LeakyReLU, BatchNormalization, ReLU, Reshape, UpSampling2D, Conv2D, Activation, \ 15 | concatenate, Flatten, Lambda, Concatenate, ZeroPadding2D 16 | from keras.layers import add 17 | from keras.optimizers import Adam 18 | from matplotlib import pyplot as plt 19 | 20 | 21 | def build_ca_model(): 22 | """ 23 | Get conditioning augmentation model. 24 | Takes an embedding of shape (1024,) and returns a tensor of shape (256,) 25 | """ 26 | input_layer = Input(shape=(1024,)) 27 | x = Dense(256)(input_layer) 28 | x = LeakyReLU(alpha=0.2)(x) 29 | model = Model(inputs=[input_layer], outputs=[x]) 30 | return model 31 | 32 | 33 | def build_embedding_compressor_model(): 34 | """ 35 | Build embedding compressor model 36 | """ 37 | input_layer = Input(shape=(1024,)) 38 | x = Dense(128)(input_layer) 39 | x = ReLU()(x) 40 | model = Model(inputs=[input_layer], outputs=[x]) 41 | return model 42 | 43 | 44 | def generate_c(x): 45 | mean = x[:, :128] 46 | log_sigma = x[:, 128:] 47 | 48 | stddev = K.exp(log_sigma) 49 | epsilon = K.random_normal(shape=K.constant((mean.shape[1],), dtype='int32')) 50 | c = stddev * epsilon + mean 51 | 52 | return c 53 | 54 | 55 | def build_stage1_generator(): 56 | """ 57 | Builds a generator model used in Stage-I 58 | """ 59 | input_layer = Input(shape=(1024,)) 60 | x = Dense(256)(input_layer) 61 | mean_logsigma = LeakyReLU(alpha=0.2)(x) 62 | 63 | c = Lambda(generate_c)(mean_logsigma) 64 | 65 | input_layer2 = Input(shape=(100,)) 66 | 67 | gen_input = Concatenate(axis=1)([c, input_layer2]) 68 | 69 | x = Dense(128 * 8 * 4 * 4, use_bias=False)(gen_input) 70 | x = ReLU()(x) 71 | 72 | x = Reshape((4, 4, 128 * 8), input_shape=(128 * 8 * 4 * 4,))(x) 73 | 74 | x = UpSampling2D(size=(2, 2))(x) 75 | x = Conv2D(512, kernel_size=3, padding="same", strides=1, use_bias=False)(x) 76 | x = BatchNormalization()(x) 77 | x = ReLU()(x) 78 | 79 | x = UpSampling2D(size=(2, 2))(x) 80 | x = Conv2D(256, kernel_size=3, padding="same", strides=1, use_bias=False)(x) 81 | x = BatchNormalization()(x) 82 | x = ReLU()(x) 83 | 84 | x = UpSampling2D(size=(2, 2))(x) 85 | x = Conv2D(128, kernel_size=3, padding="same", strides=1, use_bias=False)(x) 86 | x = BatchNormalization()(x) 87 | x = ReLU()(x) 88 | 89 | x = UpSampling2D(size=(2, 2))(x) 90 | x = Conv2D(64, kernel_size=3, padding="same", strides=1, use_bias=False)(x) 91 | x = BatchNormalization()(x) 92 | x = ReLU()(x) 93 | 94 | x = Conv2D(3, kernel_size=3, padding="same", strides=1, use_bias=False)(x) 95 | x = Activation(activation='tanh')(x) 96 | 97 | stage1_gen = Model(inputs=[input_layer, input_layer2], outputs=[x, mean_logsigma]) 98 | return stage1_gen 99 | 100 | 101 | def residual_block(input): 102 | """ 103 | Residual block in the generator network 104 | """ 105 | x = Conv2D(128 * 4, kernel_size=(3, 3), padding='same', strides=1)(input) 106 | x = BatchNormalization()(x) 107 | x = ReLU()(x) 108 | 109 | x = Conv2D(128 * 4, kernel_size=(3, 3), strides=1, padding='same')(x) 110 | x = BatchNormalization()(x) 111 | 112 | x = add([x, input]) 113 | x = ReLU()(x) 114 | 115 | return x 116 | 117 | 118 | def joint_block(inputs): 119 | c = inputs[0] 120 | x = inputs[1] 121 | 122 | c = K.expand_dims(c, axis=1) 123 | c = K.expand_dims(c, axis=1) 124 | c = K.tile(c, [1, 16, 16, 1]) 125 | return K.concatenate([c, x], axis=3) 126 | 127 | 128 | def build_stage2_generator(): 129 | """ 130 | Create Stage-II generator containing the CA Augmentation Network, 131 | the image encoder and the generator network 132 | """ 133 | 134 | # 1. CA Augmentation Network 135 | input_layer = Input(shape=(1024,)) 136 | input_lr_images = Input(shape=(64, 64, 3)) 137 | 138 | ca = Dense(256)(input_layer) 139 | mean_logsigma = LeakyReLU(alpha=0.2)(ca) 140 | c = Lambda(generate_c)(mean_logsigma) 141 | 142 | # 2. Image Encoder 143 | x = ZeroPadding2D(padding=(1, 1))(input_lr_images) 144 | x = Conv2D(128, kernel_size=(3, 3), strides=1, use_bias=False)(x) 145 | x = ReLU()(x) 146 | 147 | x = ZeroPadding2D(padding=(1, 1))(x) 148 | x = Conv2D(256, kernel_size=(4, 4), strides=2, use_bias=False)(x) 149 | x = BatchNormalization()(x) 150 | x = ReLU()(x) 151 | 152 | x = ZeroPadding2D(padding=(1, 1))(x) 153 | x = Conv2D(512, kernel_size=(4, 4), strides=2, use_bias=False)(x) 154 | x = BatchNormalization()(x) 155 | x = ReLU()(x) 156 | 157 | # 3. Joint 158 | c_code = Lambda(joint_block)([c, x]) 159 | 160 | x = ZeroPadding2D(padding=(1, 1))(c_code) 161 | x = Conv2D(512, kernel_size=(3, 3), strides=1, use_bias=False)(x) 162 | x = BatchNormalization()(x) 163 | x = ReLU()(x) 164 | 165 | # 4. Residual blocks 166 | x = residual_block(x) 167 | x = residual_block(x) 168 | x = residual_block(x) 169 | x = residual_block(x) 170 | 171 | # 5. Upsampling blocks 172 | x = UpSampling2D(size=(2, 2))(x) 173 | x = Conv2D(512, kernel_size=3, padding="same", strides=1, use_bias=False)(x) 174 | x = BatchNormalization()(x) 175 | x = ReLU()(x) 176 | 177 | x = UpSampling2D(size=(2, 2))(x) 178 | x = Conv2D(256, kernel_size=3, padding="same", strides=1, use_bias=False)(x) 179 | x = BatchNormalization()(x) 180 | x = ReLU()(x) 181 | 182 | x = UpSampling2D(size=(2, 2))(x) 183 | x = Conv2D(128, kernel_size=3, padding="same", strides=1, use_bias=False)(x) 184 | x = BatchNormalization()(x) 185 | x = ReLU()(x) 186 | 187 | x = UpSampling2D(size=(2, 2))(x) 188 | x = Conv2D(64, kernel_size=3, padding="same", strides=1, use_bias=False)(x) 189 | x = BatchNormalization()(x) 190 | x = ReLU()(x) 191 | 192 | x = Conv2D(3, kernel_size=3, padding="same", strides=1, use_bias=False)(x) 193 | x = Activation('tanh')(x) 194 | 195 | model = Model(inputs=[input_layer, input_lr_images], outputs=[x, mean_logsigma]) 196 | return model 197 | 198 | 199 | def build_stage2_discriminator(): 200 | """ 201 | Create Stage-II discriminator network 202 | """ 203 | input_layer = Input(shape=(256, 256, 3)) 204 | 205 | x = Conv2D(64, (4, 4), padding='same', strides=2, input_shape=(256, 256, 3), use_bias=False)(input_layer) 206 | x = LeakyReLU(alpha=0.2)(x) 207 | 208 | x = Conv2D(128, (4, 4), padding='same', strides=2, use_bias=False)(x) 209 | x = BatchNormalization()(x) 210 | x = LeakyReLU(alpha=0.2)(x) 211 | 212 | x = Conv2D(256, (4, 4), padding='same', strides=2, use_bias=False)(x) 213 | x = BatchNormalization()(x) 214 | x = LeakyReLU(alpha=0.2)(x) 215 | 216 | x = Conv2D(512, (4, 4), padding='same', strides=2, use_bias=False)(x) 217 | x = BatchNormalization()(x) 218 | x = LeakyReLU(alpha=0.2)(x) 219 | 220 | x = Conv2D(1024, (4, 4), padding='same', strides=2, use_bias=False)(x) 221 | x = BatchNormalization()(x) 222 | x = LeakyReLU(alpha=0.2)(x) 223 | 224 | x = Conv2D(2048, (4, 4), padding='same', strides=2, use_bias=False)(x) 225 | x = BatchNormalization()(x) 226 | x = LeakyReLU(alpha=0.2)(x) 227 | 228 | x = Conv2D(1024, (1, 1), padding='same', strides=1, use_bias=False)(x) 229 | x = BatchNormalization()(x) 230 | x = LeakyReLU(alpha=0.2)(x) 231 | 232 | x = Conv2D(512, (1, 1), padding='same', strides=1, use_bias=False)(x) 233 | x = BatchNormalization()(x) 234 | 235 | x2 = Conv2D(128, (1, 1), padding='same', strides=1, use_bias=False)(x) 236 | x2 = BatchNormalization()(x2) 237 | x2 = LeakyReLU(alpha=0.2)(x2) 238 | 239 | x2 = Conv2D(128, (3, 3), padding='same', strides=1, use_bias=False)(x2) 240 | x2 = BatchNormalization()(x2) 241 | x2 = LeakyReLU(alpha=0.2)(x2) 242 | 243 | x2 = Conv2D(512, (3, 3), padding='same', strides=1, use_bias=False)(x2) 244 | x2 = BatchNormalization()(x2) 245 | 246 | added_x = add([x, x2]) 247 | added_x = LeakyReLU(alpha=0.2)(added_x) 248 | 249 | input_layer2 = Input(shape=(4, 4, 128)) 250 | 251 | merged_input = concatenate([added_x, input_layer2]) 252 | 253 | x3 = Conv2D(64 * 8, kernel_size=1, padding="same", strides=1)(merged_input) 254 | x3 = BatchNormalization()(x3) 255 | x3 = LeakyReLU(alpha=0.2)(x3) 256 | x3 = Flatten()(x3) 257 | x3 = Dense(1)(x3) 258 | x3 = Activation('sigmoid')(x3) 259 | 260 | stage2_dis = Model(inputs=[input_layer, input_layer2], outputs=[x3]) 261 | return stage2_dis 262 | 263 | 264 | def build_adversarial_model(gen_model2, dis_model, gen_model1): 265 | """ 266 | Create adversarial model 267 | """ 268 | embeddings_input_layer = Input(shape=(1024, )) 269 | noise_input_layer = Input(shape=(100, )) 270 | compressed_embedding_input_layer = Input(shape=(4, 4, 128)) 271 | 272 | gen_model1.trainable = False 273 | dis_model.trainable = False 274 | 275 | lr_images, mean_logsigma1 = gen_model1([embeddings_input_layer, noise_input_layer]) 276 | hr_images, mean_logsigma2 = gen_model2([embeddings_input_layer, lr_images]) 277 | valid = dis_model([hr_images, compressed_embedding_input_layer]) 278 | 279 | model = Model(inputs=[embeddings_input_layer, noise_input_layer, compressed_embedding_input_layer], outputs=[valid, mean_logsigma2]) 280 | return model 281 | 282 | 283 | """ 284 | Dataset loading related methods 285 | """ 286 | 287 | 288 | def load_class_ids(class_info_file_path): 289 | """ 290 | Load class ids from class_info.pickle file 291 | """ 292 | with open(class_info_file_path, 'rb') as f: 293 | class_ids = pickle.load(f, encoding='latin1') 294 | return class_ids 295 | 296 | 297 | def load_embeddings(embeddings_file_path): 298 | """ 299 | Function to load embeddings 300 | """ 301 | with open(embeddings_file_path, 'rb') as f: 302 | embeddings = pickle.load(f, encoding='latin1') 303 | embeddings = np.array(embeddings) 304 | print('embeddings: ', embeddings.shape) 305 | return embeddings 306 | 307 | 308 | def load_filenames(filenames_file_path): 309 | """ 310 | Load filenames.pickle file and return a list of all file names 311 | """ 312 | with open(filenames_file_path, 'rb') as f: 313 | filenames = pickle.load(f, encoding='latin1') 314 | return filenames 315 | 316 | 317 | def load_bounding_boxes(dataset_dir): 318 | """ 319 | Load bounding boxes and return a dictionary of file names and corresponding bounding boxes 320 | """ 321 | # Paths 322 | bounding_boxes_path = os.path.join(dataset_dir, 'bounding_boxes.txt') 323 | file_paths_path = os.path.join(dataset_dir, 'images.txt') 324 | 325 | # Read bounding_boxes.txt and images.txt file 326 | df_bounding_boxes = pd.read_csv(bounding_boxes_path, 327 | delim_whitespace=True, header=None).astype(int) 328 | df_file_names = pd.read_csv(file_paths_path, delim_whitespace=True, header=None) 329 | 330 | # Create a list of file names 331 | file_names = df_file_names[1].tolist() 332 | 333 | # Create a dictionary of file_names and bounding boxes 334 | filename_boundingbox_dict = {img_file[:-4]: [] for img_file in file_names[:2]} 335 | 336 | # Assign a bounding box to the corresponding image 337 | for i in range(0, len(file_names)): 338 | # Get the bounding box 339 | bounding_box = df_bounding_boxes.iloc[i][1:].tolist() 340 | key = file_names[i][:-4] 341 | filename_boundingbox_dict[key] = bounding_box 342 | 343 | return filename_boundingbox_dict 344 | 345 | 346 | def get_img(img_path, bbox, image_size): 347 | """ 348 | Load and resize images 349 | """ 350 | img = Image.open(img_path).convert('RGB') 351 | width, height = img.size 352 | if bbox is not None: 353 | R = int(np.maximum(bbox[2], bbox[3]) * 0.75) 354 | center_x = int((2 * bbox[0] + bbox[2]) / 2) 355 | center_y = int((2 * bbox[1] + bbox[3]) / 2) 356 | y1 = np.maximum(0, center_y - R) 357 | y2 = np.minimum(height, center_y + R) 358 | x1 = np.maximum(0, center_x - R) 359 | x2 = np.minimum(width, center_x + R) 360 | img = img.crop([x1, y1, x2, y2]) 361 | img = img.resize(image_size, PIL.Image.BILINEAR) 362 | return img 363 | 364 | 365 | def load_dataset(filenames_file_path, class_info_file_path, cub_dataset_dir, embeddings_file_path, image_size): 366 | filenames = load_filenames(filenames_file_path) 367 | class_ids = load_class_ids(class_info_file_path) 368 | bounding_boxes = load_bounding_boxes(cub_dataset_dir) 369 | all_embeddings = load_embeddings(embeddings_file_path) 370 | 371 | X, y, embeddings = [], [], [] 372 | 373 | print("All embeddings shape:", all_embeddings.shape) 374 | 375 | for index, filename in enumerate(filenames): 376 | bounding_box = bounding_boxes[filename] 377 | 378 | try: 379 | # Load images 380 | img_name = '{}/images/{}.jpg'.format(cub_dataset_dir, filename) 381 | img = get_img(img_name, bounding_box, image_size) 382 | 383 | all_embeddings1 = all_embeddings[index, :, :] 384 | 385 | embedding_ix = random.randint(0, all_embeddings1.shape[0] - 1) 386 | embedding = all_embeddings1[embedding_ix, :] 387 | 388 | X.append(np.array(img)) 389 | y.append(class_ids[index]) 390 | embeddings.append(embedding) 391 | except Exception as e: 392 | print(e) 393 | 394 | X = np.array(X) 395 | y = np.array(y) 396 | embeddings = np.array(embeddings) 397 | 398 | return X, y, embeddings 399 | 400 | 401 | """ 402 | Loss functions 403 | """ 404 | 405 | 406 | def KL_loss(y_true, y_pred): 407 | mean = y_pred[:, :128] 408 | logsigma = y_pred[:, :128] 409 | loss = -logsigma + .5 * (-1 + K.exp(2. * logsigma) + K.square(mean)) 410 | loss = K.mean(loss) 411 | return loss 412 | 413 | 414 | def custom_generator_loss(y_true, y_pred): 415 | # Calculate binary cross entropy loss 416 | return K.binary_crossentropy(y_true, y_pred) 417 | 418 | 419 | def write_log(callback, name, loss, batch_no): 420 | """ 421 | Write training summary to TensorBoard 422 | """ 423 | summary = tf.Summary() 424 | summary_value = summary.value.add() 425 | summary_value.simple_value = loss 426 | summary_value.tag = name 427 | callback.writer.add_summary(summary, batch_no) 428 | callback.writer.flush() 429 | 430 | 431 | def save_rgb_img(img, path): 432 | """ 433 | Save an rgb image 434 | """ 435 | fig = plt.figure() 436 | ax = fig.add_subplot(1, 1, 1) 437 | ax.imshow(img) 438 | ax.axis("off") 439 | ax.set_title("Image") 440 | 441 | plt.savefig(path) 442 | plt.close() 443 | 444 | 445 | if __name__ == '__main__': 446 | data_dir = "data/birds/" 447 | train_dir = data_dir + "/train" 448 | test_dir = data_dir + "/test" 449 | hr_image_size = (256, 256) 450 | lr_image_size = (64, 64) 451 | batch_size = 64 452 | z_dim = 100 453 | stage1_generator_lr = 0.0002 454 | stage1_discriminator_lr = 0.0002 455 | stage1_lr_decay_step = 600 456 | epochs = 1000 457 | condition_dim = 128 458 | 459 | embeddings_file_path_train = train_dir + "/char-CNN-RNN-embeddings.pickle" 460 | embeddings_file_path_test = test_dir + "/char-CNN-RNN-embeddings.pickle" 461 | 462 | filenames_file_path_train = train_dir + "/filenames.pickle" 463 | filenames_file_path_test = test_dir + "/filenames.pickle" 464 | 465 | class_info_file_path_train = train_dir + "/class_info.pickle" 466 | class_info_file_path_test = test_dir + "/class_info.pickle" 467 | 468 | cub_dataset_dir = data_dir + "/CUB_200_2011" 469 | 470 | # Define optimizers 471 | dis_optimizer = Adam(lr=stage1_discriminator_lr, beta_1=0.5, beta_2=0.999) 472 | gen_optimizer = Adam(lr=stage1_generator_lr, beta_1=0.5, beta_2=0.999) 473 | 474 | """ 475 | Load datasets 476 | """ 477 | X_hr_train, y_hr_train, embeddings_train = load_dataset(filenames_file_path=filenames_file_path_train, 478 | class_info_file_path=class_info_file_path_train, 479 | cub_dataset_dir=cub_dataset_dir, 480 | embeddings_file_path=embeddings_file_path_train, 481 | image_size=(256, 256)) 482 | 483 | X_hr_test, y_hr_test, embeddings_test = load_dataset(filenames_file_path=filenames_file_path_test, 484 | class_info_file_path=class_info_file_path_test, 485 | cub_dataset_dir=cub_dataset_dir, 486 | embeddings_file_path=embeddings_file_path_test, 487 | image_size=(256, 256)) 488 | 489 | X_lr_train, y_lr_train, _ = load_dataset(filenames_file_path=filenames_file_path_train, 490 | class_info_file_path=class_info_file_path_train, 491 | cub_dataset_dir=cub_dataset_dir, 492 | embeddings_file_path=embeddings_file_path_train, 493 | image_size=(64, 64)) 494 | 495 | X_lr_test, y_lr_test, _ = load_dataset(filenames_file_path=filenames_file_path_test, 496 | class_info_file_path=class_info_file_path_test, 497 | cub_dataset_dir=cub_dataset_dir, 498 | embeddings_file_path=embeddings_file_path_test, 499 | image_size=(64, 64)) 500 | 501 | """ 502 | Build and compile models 503 | """ 504 | stage2_dis = build_stage2_discriminator() 505 | stage2_dis.compile(loss='binary_crossentropy', optimizer=dis_optimizer) 506 | 507 | stage1_gen = build_stage1_generator() 508 | stage1_gen.compile(loss="binary_crossentropy", optimizer=gen_optimizer) 509 | 510 | stage1_gen.load_weights("stage1_gen.h5") 511 | 512 | stage2_gen = build_stage2_generator() 513 | stage2_gen.compile(loss="binary_crossentropy", optimizer=gen_optimizer) 514 | 515 | embedding_compressor_model = build_embedding_compressor_model() 516 | embedding_compressor_model.compile(loss='binary_crossentropy', optimizer='adam') 517 | 518 | adversarial_model = build_adversarial_model(stage2_gen, stage2_dis, stage1_gen) 519 | adversarial_model.compile(loss=['binary_crossentropy', KL_loss], loss_weights=[1.0, 2.0], 520 | optimizer=gen_optimizer, metrics=None) 521 | 522 | tensorboard = TensorBoard(log_dir="logs/".format(time.time())) 523 | tensorboard.set_model(stage2_gen) 524 | tensorboard.set_model(stage2_dis) 525 | 526 | # Generate an array containing real and fake values 527 | # Apply label smoothing 528 | real_labels = np.ones((batch_size, 1), dtype=float) * 0.9 529 | fake_labels = np.zeros((batch_size, 1), dtype=float) * 0.1 530 | 531 | for epoch in range(epochs): 532 | print("========================================") 533 | print("Epoch is:", epoch) 534 | 535 | gen_losses = [] 536 | dis_losses = [] 537 | 538 | # Load data and train model 539 | number_of_batches = int(X_hr_train.shape[0] / batch_size) 540 | print("Number of batches:{}".format(number_of_batches)) 541 | for index in range(number_of_batches): 542 | print("Batch:{}".format(index)) 543 | 544 | # Create a noise vector 545 | z_noise = np.random.normal(0, 1, size=(batch_size, z_dim)) 546 | X_hr_train_batch = X_hr_train[index * batch_size:(index + 1) * batch_size] 547 | embedding_batch = embeddings_train[index * batch_size:(index + 1) * batch_size] 548 | X_hr_train_batch = (X_hr_train_batch - 127.5) / 127.5 549 | 550 | # Generate fake images 551 | lr_fake_images, _ = stage1_gen.predict([embedding_batch, z_noise], verbose=3) 552 | hr_fake_images, _ = stage2_gen.predict([embedding_batch, lr_fake_images], verbose=3) 553 | 554 | """ 555 | 4. Generate compressed embeddings 556 | """ 557 | compressed_embedding = embedding_compressor_model.predict_on_batch(embedding_batch) 558 | compressed_embedding = np.reshape(compressed_embedding, (-1, 1, 1, condition_dim)) 559 | compressed_embedding = np.tile(compressed_embedding, (1, 4, 4, 1)) 560 | 561 | """ 562 | 5. Train the discriminator model 563 | """ 564 | dis_loss_real = stage2_dis.train_on_batch([X_hr_train_batch, compressed_embedding], 565 | np.reshape(real_labels, (batch_size, 1))) 566 | dis_loss_fake = stage2_dis.train_on_batch([hr_fake_images, compressed_embedding], 567 | np.reshape(fake_labels, (batch_size, 1))) 568 | dis_loss_wrong = stage2_dis.train_on_batch([X_hr_train_batch[:(batch_size - 1)], compressed_embedding[1:]], 569 | np.reshape(fake_labels[1:], (batch_size-1, 1))) 570 | d_loss = 0.5 * np.add(dis_loss_real, 0.5 * np.add(dis_loss_wrong, dis_loss_fake)) 571 | print("d_loss:{}".format(d_loss)) 572 | 573 | """ 574 | Train the adversarial model 575 | """ 576 | g_loss = adversarial_model.train_on_batch([embedding_batch, z_noise, compressed_embedding], 577 | [K.ones((batch_size, 1)) * 0.9, K.ones((batch_size, 256)) * 0.9]) 578 | 579 | print("g_loss:{}".format(g_loss)) 580 | 581 | dis_losses.append(d_loss) 582 | gen_losses.append(g_loss) 583 | 584 | """ 585 | Save losses to Tensorboard after each epoch 586 | """ 587 | write_log(tensorboard, 'discriminator_loss', np.mean(dis_losses), epoch) 588 | write_log(tensorboard, 'generator_loss', np.mean(gen_losses)[0], epoch) 589 | 590 | # Generate and save images after every 2nd epoch 591 | if epoch % 2 == 0: 592 | # z_noise2 = np.random.uniform(-1, 1, size=(batch_size, z_dim)) 593 | z_noise2 = np.random.normal(0, 1, size=(batch_size, z_dim)) 594 | embedding_batch = embeddings_test[0:batch_size] 595 | 596 | lr_fake_images, _ = stage1_gen.predict([embedding_batch, z_noise2], verbose=3) 597 | hr_fake_images, _ = stage2_gen.predict([embedding_batch, lr_fake_images], verbose=3) 598 | 599 | # Save images 600 | for i, img in enumerate(hr_fake_images[:10]): 601 | save_rgb_img(img, "results2/gen_{}_{}.png".format(epoch, i)) 602 | 603 | # Saving the models 604 | stage2_gen.save_weights("stage2_gen.h5") 605 | stage2_dis.save_weights("stage2_dis.h5") 606 | -------------------------------------------------------------------------------- /Chapter07/README.md: -------------------------------------------------------------------------------- 1 | ## CycleGAN - Turn Paintings into Photos 2 | 3 | Python 3.6 4 | 5 | Steps to set up the project: 6 | 1. Create a python3 virtual environment and activate it 7 | 2. Install dependencies using "pip install -r requirements.txt" 8 | 3. Create essential folders like 1. logs 2. results 3. data 9 | 4. Download the dataset to data directory 10 | 5. Train the model by executing "python3 run.py" 11 | -------------------------------------------------------------------------------- /Chapter07/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Generative-Adversarial-Networks-Projects/317e7682acfeb5563f70c020a09b1b2e4c6595bb/Chapter07/__init__.py -------------------------------------------------------------------------------- /Chapter07/requirements.txt: -------------------------------------------------------------------------------- 1 | absl-py==0.6.1 2 | astor==0.7.1 3 | backcall==0.1.0 4 | cycler==0.10.0 5 | decorator==4.3.0 6 | gast==0.2.1.post0 7 | grpcio==1.17.1 8 | h5py==2.9.0 9 | ipython==7.2.0 10 | ipython-genutils==0.2.0 11 | jedi==0.13.2 12 | Keras==2.2.4 13 | Keras-Applications==1.0.6 14 | keras-contrib==2.0.8 15 | Keras-Preprocessing==1.0.5 16 | kiwisolver==1.0.1 17 | Markdown==3.0.1 18 | matplotlib==3.0.2 19 | numpy==1.15.4 20 | parso==0.3.1 21 | pexpect==4.6.0 22 | pickleshare==0.7.5 23 | Pillow==5.4.1 24 | prompt-toolkit==2.0.7 25 | protobuf==3.6.1 26 | ptyprocess==0.6.0 27 | Pygments==2.3.1 28 | pyparsing==2.3.0 29 | python-dateutil==2.7.5 30 | PyYAML==3.13 31 | scipy==1.2.0 32 | six==1.12.0 33 | tensorboard==1.12.2 34 | tensorflow==1.12.0 35 | termcolor==1.1.0 36 | traitlets==4.3.2 37 | wcwidth==0.1.7 38 | Werkzeug==0.14.1 39 | -------------------------------------------------------------------------------- /Chapter07/run.py: -------------------------------------------------------------------------------- 1 | import time 2 | from glob import glob 3 | 4 | import matplotlib.pyplot as plt 5 | import numpy as np 6 | import tensorflow as tf 7 | from keras import Input, Model 8 | from keras.callbacks import TensorBoard 9 | from keras.layers import Conv2D, BatchNormalization, Activation, Add, Conv2DTranspose, \ 10 | ZeroPadding2D, LeakyReLU 11 | from keras.optimizers import Adam 12 | from keras_contrib.layers.normalization.instancenormalization import InstanceNormalization 13 | from imageio import imread 14 | from skimage.transform import resize 15 | 16 | 17 | def residual_block(x): 18 | """ 19 | Residual block 20 | """ 21 | res = Conv2D(filters=128, kernel_size=3, strides=1, padding="same")(x) 22 | res = BatchNormalization(axis=3, momentum=0.9, epsilon=1e-5)(res) 23 | res = Activation('relu')(res) 24 | 25 | res = Conv2D(filters=128, kernel_size=3, strides=1, padding="same")(res) 26 | res = BatchNormalization(axis=3, momentum=0.9, epsilon=1e-5)(res) 27 | 28 | return Add()([res, x]) 29 | 30 | 31 | def build_generator(): 32 | """ 33 | Create a generator network using the hyperparameter values defined below 34 | """ 35 | input_shape = (128, 128, 3) 36 | residual_blocks = 6 37 | input_layer = Input(shape=input_shape) 38 | 39 | # First Convolution block 40 | x = Conv2D(filters=32, kernel_size=7, strides=1, padding="same")(input_layer) 41 | x = InstanceNormalization(axis=1)(x) 42 | x = Activation("relu")(x) 43 | 44 | # 2nd Convolution block 45 | x = Conv2D(filters=64, kernel_size=3, strides=2, padding="same")(x) 46 | x = InstanceNormalization(axis=1)(x) 47 | x = Activation("relu")(x) 48 | 49 | # 3rd Convolution block 50 | x = Conv2D(filters=128, kernel_size=3, strides=2, padding="same")(x) 51 | x = InstanceNormalization(axis=1)(x) 52 | x = Activation("relu")(x) 53 | 54 | # Residual blocks 55 | for _ in range(residual_blocks): 56 | x = residual_block(x) 57 | 58 | # Upsampling blocks 59 | 60 | # 1st Upsampling block 61 | x = Conv2DTranspose(filters=64, kernel_size=3, strides=2, padding='same', use_bias=False)(x) 62 | x = InstanceNormalization(axis=1)(x) 63 | x = Activation("relu")(x) 64 | 65 | # 2nd Upsampling block 66 | x = Conv2DTranspose(filters=32, kernel_size=3, strides=2, padding='same', use_bias=False)(x) 67 | x = InstanceNormalization(axis=1)(x) 68 | x = Activation("relu")(x) 69 | 70 | # Last Convolution layer 71 | x = Conv2D(filters=3, kernel_size=7, strides=1, padding="same")(x) 72 | output = Activation('tanh')(x) 73 | 74 | model = Model(inputs=[input_layer], outputs=[output]) 75 | return model 76 | 77 | 78 | def build_discriminator(): 79 | """ 80 | Create a discriminator network using the hyperparameter values defined below 81 | """ 82 | input_shape = (128, 128, 3) 83 | hidden_layers = 3 84 | 85 | input_layer = Input(shape=input_shape) 86 | 87 | x = ZeroPadding2D(padding=(1, 1))(input_layer) 88 | 89 | # 1st Convolutional block 90 | x = Conv2D(filters=64, kernel_size=4, strides=2, padding="valid")(x) 91 | x = LeakyReLU(alpha=0.2)(x) 92 | 93 | x = ZeroPadding2D(padding=(1, 1))(x) 94 | 95 | # 3 Hidden Convolution blocks 96 | for i in range(1, hidden_layers + 1): 97 | x = Conv2D(filters=2 ** i * 64, kernel_size=4, strides=2, padding="valid")(x) 98 | x = InstanceNormalization(axis=1)(x) 99 | x = LeakyReLU(alpha=0.2)(x) 100 | 101 | x = ZeroPadding2D(padding=(1, 1))(x) 102 | 103 | # Last Convolution layer 104 | output = Conv2D(filters=1, kernel_size=4, strides=1, activation="sigmoid")(x) 105 | 106 | model = Model(inputs=[input_layer], outputs=[output]) 107 | return model 108 | 109 | 110 | def load_images(data_dir): 111 | imagesA = glob(data_dir + '/testA/*.*') 112 | imagesB = glob(data_dir + '/testB/*.*') 113 | 114 | allImagesA = [] 115 | allImagesB = [] 116 | 117 | for index, filename in enumerate(imagesA): 118 | imgA = imread(filename, pilmode='RGB') 119 | imgB = imread(imagesB[index], pilmode='RGB') 120 | 121 | imgA = resize(imgA, (128, 128)) 122 | imgB = resize(imgB, (128, 128)) 123 | 124 | if np.random.random() > 0.5: 125 | imgA = np.fliplr(imgA) 126 | imgB = np.fliplr(imgB) 127 | 128 | allImagesA.append(imgA) 129 | allImagesB.append(imgB) 130 | 131 | # Normalize images 132 | allImagesA = np.array(allImagesA) / 127.5 - 1. 133 | allImagesB = np.array(allImagesB) / 127.5 - 1. 134 | 135 | return allImagesA, allImagesB 136 | 137 | 138 | def load_test_batch(data_dir, batch_size): 139 | imagesA = glob(data_dir + '/testA/*.*') 140 | imagesB = glob(data_dir + '/testB/*.*') 141 | 142 | imagesA = np.random.choice(imagesA, batch_size) 143 | imagesB = np.random.choice(imagesB, batch_size) 144 | 145 | allA = [] 146 | allB = [] 147 | 148 | for i in range(len(imagesA)): 149 | # Load images and resize images 150 | imgA = resize(imread(imagesA[i], pilmode='RGB').astype(np.float32), (128, 128)) 151 | imgB = resize(imread(imagesB[i], pilmode='RGB').astype(np.float32), (128, 128)) 152 | 153 | allA.append(imgA) 154 | allB.append(imgB) 155 | 156 | return np.array(allA) / 127.5 - 1.0, np.array(allB) / 127.5 - 1.0 157 | 158 | 159 | def save_images(originalA, generatedB, recosntructedA, originalB, generatedA, reconstructedB, path): 160 | """ 161 | Save images 162 | """ 163 | fig = plt.figure() 164 | ax = fig.add_subplot(2, 3, 1) 165 | ax.imshow(originalA) 166 | ax.axis("off") 167 | ax.set_title("Original") 168 | 169 | ax = fig.add_subplot(2, 3, 2) 170 | ax.imshow(generatedB) 171 | ax.axis("off") 172 | ax.set_title("Generated") 173 | 174 | ax = fig.add_subplot(2, 3, 3) 175 | ax.imshow(recosntructedA) 176 | ax.axis("off") 177 | ax.set_title("Reconstructed") 178 | 179 | ax = fig.add_subplot(2, 3, 4) 180 | ax.imshow(originalB) 181 | ax.axis("off") 182 | ax.set_title("Original") 183 | 184 | ax = fig.add_subplot(2, 3, 5) 185 | ax.imshow(generatedA) 186 | ax.axis("off") 187 | ax.set_title("Generated") 188 | 189 | ax = fig.add_subplot(2, 3, 6) 190 | ax.imshow(reconstructedB) 191 | ax.axis("off") 192 | ax.set_title("Reconstructed") 193 | 194 | plt.savefig(path) 195 | 196 | 197 | def write_log(callback, name, loss, batch_no): 198 | """ 199 | Write training summary to TensorBoard 200 | """ 201 | summary = tf.Summary() 202 | summary_value = summary.value.add() 203 | summary_value.simple_value = loss 204 | summary_value.tag = name 205 | callback.writer.add_summary(summary, batch_no) 206 | callback.writer.flush() 207 | 208 | 209 | if __name__ == '__main__': 210 | data_dir = "data/monet2photo/" 211 | batch_size = 1 212 | epochs = 500 213 | mode = 'train' 214 | 215 | if mode == 'train': 216 | """ 217 | Load dataset 218 | """ 219 | imagesA, imagesB = load_images(data_dir=data_dir) 220 | 221 | # Define the common optimizer 222 | common_optimizer = Adam(0.002, 0.5) 223 | 224 | # Build and compile generator networks 225 | discriminatorA = build_discriminator() 226 | discriminatorB = build_discriminator() 227 | 228 | discriminatorA.compile(loss='mse', optimizer=common_optimizer, metrics=['accuracy']) 229 | discriminatorB.compile(loss='mse', optimizer=common_optimizer, metrics=['accuracy']) 230 | 231 | # Build generator networks 232 | generatorAToB = build_generator() 233 | generatorBToA = build_generator() 234 | 235 | """ 236 | Create an adversarial network 237 | """ 238 | inputA = Input(shape=(128, 128, 3)) 239 | inputB = Input(shape=(128, 128, 3)) 240 | 241 | # Generated images using both of the generator networks 242 | generatedB = generatorAToB(inputA) 243 | generatedA = generatorBToA(inputB) 244 | 245 | # Reconstruct images back to original images 246 | reconstructedA = generatorBToA(generatedB) 247 | reconstructedB = generatorAToB(generatedA) 248 | 249 | generatedAId = generatorBToA(inputA) 250 | generatedBId = generatorAToB(inputB) 251 | 252 | # Make both of the discriminator networks non-trainable 253 | discriminatorA.trainable = False 254 | discriminatorB.trainable = False 255 | 256 | probsA = discriminatorA(generatedA) 257 | probsB = discriminatorB(generatedB) 258 | 259 | adversarial_model = Model(inputs=[inputA, inputB], 260 | outputs=[probsA, probsB, reconstructedA, reconstructedB, 261 | generatedAId, generatedBId]) 262 | adversarial_model.compile(loss=['mse', 'mse', 'mae', 'mae', 'mae', 'mae'], 263 | loss_weights=[1, 1, 10.0, 10.0, 1.0, 1.0], 264 | optimizer=common_optimizer) 265 | 266 | tensorboard = TensorBoard(log_dir="logs/{}".format(time.time()), write_images=True, write_grads=True, 267 | write_graph=True) 268 | tensorboard.set_model(generatorAToB) 269 | tensorboard.set_model(generatorBToA) 270 | tensorboard.set_model(discriminatorA) 271 | tensorboard.set_model(discriminatorB) 272 | 273 | real_labels = np.ones((batch_size, 7, 7, 1)) 274 | fake_labels = np.zeros((batch_size, 7, 7, 1)) 275 | 276 | for epoch in range(epochs): 277 | print("Epoch:{}".format(epoch)) 278 | 279 | dis_losses = [] 280 | gen_losses = [] 281 | 282 | num_batches = int(min(imagesA.shape[0], imagesB.shape[0]) / batch_size) 283 | print("Number of batches:{}".format(num_batches)) 284 | 285 | for index in range(num_batches): 286 | print("Batch:{}".format(index)) 287 | 288 | # Sample images 289 | batchA = imagesA[index * batch_size:(index + 1) * batch_size] 290 | batchB = imagesB[index * batch_size:(index + 1) * batch_size] 291 | 292 | # Translate images to opposite domain 293 | generatedB = generatorAToB.predict(batchA) 294 | generatedA = generatorBToA.predict(batchB) 295 | 296 | # Train the discriminator A on real and fake images 297 | dALoss1 = discriminatorA.train_on_batch(batchA, real_labels) 298 | dALoss2 = discriminatorA.train_on_batch(generatedA, fake_labels) 299 | 300 | # Train the discriminator B on ral and fake images 301 | dBLoss1 = discriminatorB.train_on_batch(batchB, real_labels) 302 | dbLoss2 = discriminatorB.train_on_batch(generatedB, fake_labels) 303 | 304 | # Calculate the total discriminator loss 305 | d_loss = 0.5 * np.add(0.5 * np.add(dALoss1, dALoss2), 0.5 * np.add(dBLoss1, dbLoss2)) 306 | 307 | print("d_loss:{}".format(d_loss)) 308 | 309 | """ 310 | Train the generator networks 311 | """ 312 | g_loss = adversarial_model.train_on_batch([batchA, batchB], 313 | [real_labels, real_labels, batchA, batchB, batchA, batchB]) 314 | 315 | print("g_loss:{}".format(g_loss)) 316 | 317 | dis_losses.append(d_loss) 318 | gen_losses.append(g_loss) 319 | 320 | """ 321 | Save losses to Tensorboard after each epoch 322 | """ 323 | write_log(tensorboard, 'discriminator_loss', np.mean(dis_losses), epoch) 324 | write_log(tensorboard, 'generator_loss', np.mean(gen_losses), epoch) 325 | 326 | # Sample and save images after every 10 epochs 327 | if epoch % 10 == 0: 328 | # Get a batch of test data 329 | batchA, batchB = load_test_batch(data_dir=data_dir, batch_size=2) 330 | 331 | # Generate images 332 | generatedB = generatorAToB.predict(batchA) 333 | generatedA = generatorBToA.predict(batchB) 334 | 335 | # Get reconstructed images 336 | reconsA = generatorBToA.predict(generatedB) 337 | reconsB = generatorAToB.predict(generatedA) 338 | 339 | # Save original, generated and reconstructed images 340 | for i in range(len(generatedA)): 341 | save_images(originalA=batchA[i], generatedB=generatedB[i], recosntructedA=reconsA[i], 342 | originalB=batchB[i], generatedA=generatedA[i], reconstructedB=reconsB[i], 343 | path="results/gen_{}_{}".format(epoch, i)) 344 | 345 | # Save models 346 | generatorAToB.save_weights("generatorAToB.h5") 347 | generatorBToA.save_weights("generatorBToA.h5") 348 | discriminatorA.save_weights("discriminatorA.h5") 349 | discriminatorB.save_weights("discriminatorB.h5") 350 | 351 | elif mode == 'predict': 352 | # Build generator networks 353 | generatorAToB = build_generator() 354 | generatorBToA = build_generator() 355 | 356 | generatorAToB.load_weights("generatorAToB.h5") 357 | generatorBToA.load_weights("generatorBToA.h5") 358 | 359 | # Get a batch of test data 360 | batchA, batchB = load_test_batch(data_dir=data_dir, batch_size=2) 361 | 362 | # Save images 363 | generatedB = generatorAToB.predict(batchA) 364 | generatedA = generatorBToA.predict(batchB) 365 | 366 | reconsA = generatorBToA.predict(generatedB) 367 | reconsB = generatorAToB.predict(generatedA) 368 | 369 | for i in range(len(generatedA)): 370 | save_images(originalA=batchA[i], generatedB=generatedB[i], recosntructedA=reconsA[i], 371 | originalB=batchB[i], generatedA=generatedA[i], reconstructedB=reconsB[i], 372 | path="results/test_{}".format(i)) 373 | -------------------------------------------------------------------------------- /Chapter08/README.md: -------------------------------------------------------------------------------- 1 | ## Conditional GAN - Image-to-Image Translation Using Conditional Adversarial Networks 2 | 3 | Python 3.6 4 | 5 | Steps to set up the project: 6 | 1. Create a python3 virtual environment and activate it 7 | 2. Install dependencies using "pip install -r requirements.txt" 8 | 3. Create essential folders like 1. logs 2. results 3. data 9 | 4. Download the dataset to data directory 10 | 5. Train the model by executing "python3 run.py" 11 | -------------------------------------------------------------------------------- /Chapter08/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/Generative-Adversarial-Networks-Projects/317e7682acfeb5563f70c020a09b1b2e4c6595bb/Chapter08/__init__.py -------------------------------------------------------------------------------- /Chapter08/requirements.txt: -------------------------------------------------------------------------------- 1 | absl-py==0.4.1 2 | astor==0.7.1 3 | cycler==0.10.0 4 | gast==0.2.0 5 | graphviz==0.9 6 | grpcio==1.15.0 7 | h5py==2.8.0 8 | Keras==2.2.2 9 | Keras-Applications==1.0.4 10 | Keras-Preprocessing==1.0.2 11 | kiwisolver==1.0.1 12 | Markdown==2.6.11 13 | matplotlib==3.0.0 14 | numpy==1.14.5 15 | opencv-python==3.4.3.18 16 | pkg-resources==0.0.0 17 | protobuf==3.6.1 18 | pydot==1.2.4 19 | pyparsing==2.2.1 20 | python-dateutil==2.7.3 21 | PyYAML==3.13 22 | scipy==1.1.0 23 | six==1.11.0 24 | tensorboard==1.10.0 25 | tensorflow==1.10.1 26 | termcolor==1.1.0 27 | Werkzeug==0.15.3 28 | -------------------------------------------------------------------------------- /Chapter08/run.py: -------------------------------------------------------------------------------- 1 | import os 2 | import time 3 | 4 | import h5py 5 | import keras.backend as K 6 | import matplotlib.pyplot as plt 7 | import numpy as np 8 | from cv2 import imwrite 9 | from keras import Input, Model 10 | from keras.callbacks import TensorBoard 11 | from keras.layers import Convolution2D, LeakyReLU, BatchNormalization, UpSampling2D, Dropout, Activation, Flatten, \ 12 | Dense, Lambda, Reshape, concatenate 13 | from keras.optimizers import Adam 14 | import tensorflow as tf 15 | 16 | 17 | def build_unet_generator(): 18 | """ 19 | Create the U-Net Generator using the hyperparameter values defined below 20 | """ 21 | kernel_size = 4 22 | strides = 2 23 | leakyrelu_alpha = 0.2 24 | upsampling_size = 2 25 | dropout = 0.5 26 | output_channels = 1 27 | input_shape = (256, 256, 1) 28 | 29 | input_layer = Input(shape=input_shape) 30 | 31 | # Encoder Network 32 | 33 | # 1st Convolutional block in the encoder network 34 | encoder1 = Convolution2D(filters=64, kernel_size=kernel_size, padding='same', 35 | strides=strides)(input_layer) 36 | encoder1 = LeakyReLU(alpha=leakyrelu_alpha)(encoder1) 37 | 38 | # 2nd Convolutional block in the encoder network 39 | encoder2 = Convolution2D(filters=128, kernel_size=kernel_size, padding='same', 40 | strides=strides)(encoder1) 41 | encoder2 = BatchNormalization()(encoder2) 42 | encoder2 = LeakyReLU(alpha=leakyrelu_alpha)(encoder2) 43 | 44 | # 3rd Convolutional block in the encoder network 45 | encoder3 = Convolution2D(filters=256, kernel_size=kernel_size, padding='same', 46 | strides=strides)(encoder2) 47 | encoder3 = BatchNormalization()(encoder3) 48 | encoder3 = LeakyReLU(alpha=leakyrelu_alpha)(encoder3) 49 | 50 | # 4th Convolutional block in the encoder network 51 | encoder4 = Convolution2D(filters=512, kernel_size=kernel_size, padding='same', 52 | strides=strides)(encoder3) 53 | encoder4 = BatchNormalization()(encoder4) 54 | encoder4 = LeakyReLU(alpha=leakyrelu_alpha)(encoder4) 55 | 56 | # 5th Convolutional block in the encoder network 57 | encoder5 = Convolution2D(filters=512, kernel_size=kernel_size, padding='same', 58 | strides=strides)(encoder4) 59 | encoder5 = BatchNormalization()(encoder5) 60 | encoder5 = LeakyReLU(alpha=leakyrelu_alpha)(encoder5) 61 | 62 | # 6th Convolutional block in the encoder network 63 | encoder6 = Convolution2D(filters=512, kernel_size=kernel_size, padding='same', 64 | strides=strides)(encoder5) 65 | encoder6 = BatchNormalization()(encoder6) 66 | encoder6 = LeakyReLU(alpha=leakyrelu_alpha)(encoder6) 67 | 68 | # 7th Convolutional block in the encoder network 69 | encoder7 = Convolution2D(filters=512, kernel_size=kernel_size, padding='same', 70 | strides=strides)(encoder6) 71 | encoder7 = BatchNormalization()(encoder7) 72 | encoder7 = LeakyReLU(alpha=leakyrelu_alpha)(encoder7) 73 | 74 | # 8th Convolutional block in the encoder network 75 | encoder8 = Convolution2D(filters=512, kernel_size=kernel_size, padding='same', 76 | strides=strides)(encoder7) 77 | encoder8 = BatchNormalization()(encoder8) 78 | encoder8 = LeakyReLU(alpha=leakyrelu_alpha)(encoder8) 79 | 80 | # Decoder Network 81 | 82 | # 1st Upsampling Convolutional Block in the decoder network 83 | decoder1 = UpSampling2D(size=upsampling_size)(encoder8) 84 | decoder1 = Convolution2D(filters=512, kernel_size=kernel_size, padding='same')(decoder1) 85 | decoder1 = BatchNormalization()(decoder1) 86 | decoder1 = Dropout(dropout)(decoder1) 87 | decoder1 = concatenate([decoder1, encoder7], axis=3) 88 | decoder1 = Activation('relu')(decoder1) 89 | 90 | # 2nd Upsampling Convolutional block in the decoder network 91 | decoder2 = UpSampling2D(size=upsampling_size)(decoder1) 92 | decoder2 = Convolution2D(filters=1024, kernel_size=kernel_size, padding='same')(decoder2) 93 | decoder2 = BatchNormalization()(decoder2) 94 | decoder2 = Dropout(dropout)(decoder2) 95 | decoder2 = concatenate([decoder2, encoder6]) 96 | decoder2 = Activation('relu')(decoder2) 97 | 98 | # 3rd Upsampling Convolutional block in the decoder network 99 | decoder3 = UpSampling2D(size=upsampling_size)(decoder2) 100 | decoder3 = Convolution2D(filters=1024, kernel_size=kernel_size, padding='same')(decoder3) 101 | decoder3 = BatchNormalization()(decoder3) 102 | decoder3 = Dropout(dropout)(decoder3) 103 | decoder3 = concatenate([decoder3, encoder5]) 104 | decoder3 = Activation('relu')(decoder3) 105 | 106 | # 4th Upsampling Convolutional block in the decoder network 107 | decoder4 = UpSampling2D(size=upsampling_size)(decoder3) 108 | decoder4 = Convolution2D(filters=1024, kernel_size=kernel_size, padding='same')(decoder4) 109 | decoder4 = BatchNormalization()(decoder4) 110 | decoder4 = concatenate([decoder4, encoder4]) 111 | decoder4 = Activation('relu')(decoder4) 112 | 113 | # 5th Upsampling Convolutional block in the decoder network 114 | decoder5 = UpSampling2D(size=upsampling_size)(decoder4) 115 | decoder5 = Convolution2D(filters=1024, kernel_size=kernel_size, padding='same')(decoder5) 116 | decoder5 = BatchNormalization()(decoder5) 117 | decoder5 = concatenate([decoder5, encoder3]) 118 | decoder5 = Activation('relu')(decoder5) 119 | 120 | # 6th Upsampling Convolutional block in the decoder network 121 | decoder6 = UpSampling2D(size=upsampling_size)(decoder5) 122 | decoder6 = Convolution2D(filters=512, kernel_size=kernel_size, padding='same')(decoder6) 123 | decoder6 = BatchNormalization()(decoder6) 124 | decoder6 = concatenate([decoder6, encoder2]) 125 | decoder6 = Activation('relu')(decoder6) 126 | 127 | # 7th Upsampling Convolutional block in the decoder network 128 | decoder7 = UpSampling2D(size=upsampling_size)(decoder6) 129 | decoder7 = Convolution2D(filters=256, kernel_size=kernel_size, padding='same')(decoder7) 130 | decoder7 = BatchNormalization()(decoder7) 131 | decoder7 = concatenate([decoder7, encoder1]) 132 | decoder7 = Activation('relu')(decoder7) 133 | 134 | # Last Convolutional layer 135 | decoder8 = UpSampling2D(size=upsampling_size)(decoder7) 136 | decoder8 = Convolution2D(filters=output_channels, kernel_size=kernel_size, padding='same')(decoder8) 137 | decoder8 = Activation('tanh')(decoder8) 138 | 139 | model = Model(inputs=[input_layer], outputs=[decoder8]) 140 | return model 141 | 142 | 143 | def build_patchgan_discriminator(): 144 | """ 145 | Create the PatchGAN discriminator using the hyperparameter values defined below 146 | """ 147 | kernel_size = 4 148 | strides = 2 149 | leakyrelu_alpha = 0.2 150 | padding = 'same' 151 | num_filters_start = 64 # Number of filters to start with 152 | num_kernels = 100 153 | kernel_dim = 5 154 | patchgan_output_dim = (256, 256, 1) 155 | patchgan_patch_dim = (256, 256, 1) 156 | number_patches = int( 157 | (patchgan_output_dim[0] / patchgan_patch_dim[0]) * (patchgan_output_dim[1] / patchgan_patch_dim[1])) 158 | 159 | input_layer = Input(shape=patchgan_patch_dim) 160 | 161 | des = Convolution2D(filters=64, kernel_size=kernel_size, padding=padding, strides=strides)(input_layer) 162 | des = LeakyReLU(alpha=leakyrelu_alpha)(des) 163 | 164 | # Calculate the number of convolutional layers 165 | total_conv_layers = int(np.floor(np.log(patchgan_output_dim[1]) / np.log(2))) 166 | list_filters = [num_filters_start * min(total_conv_layers, (2 ** i)) for i in range(total_conv_layers)] 167 | 168 | # Next 7 Convolutional blocks 169 | for filters in list_filters[1:]: 170 | des = Convolution2D(filters=filters, kernel_size=kernel_size, padding=padding, strides=strides)(des) 171 | des = BatchNormalization()(des) 172 | des = LeakyReLU(alpha=leakyrelu_alpha)(des) 173 | 174 | # Add a flatten layer 175 | flatten_layer = Flatten()(des) 176 | 177 | # Add the final dense layer 178 | dense_layer = Dense(units=2, activation='softmax')(flatten_layer) 179 | 180 | # Create the PatchGAN model 181 | model_patch_gan = Model(inputs=[input_layer], outputs=[dense_layer, flatten_layer]) 182 | 183 | # Create a list of input layers equal to the number of patches 184 | list_input_layers = [Input(shape=patchgan_patch_dim) for _ in range(number_patches)] 185 | 186 | # Pass the patches through the PatchGAN network 187 | output1 = [model_patch_gan(patch)[0] for patch in list_input_layers] 188 | output2 = [model_patch_gan(patch)[1] for patch in list_input_layers] 189 | 190 | # In case of multiple patches, concatenate outputs to calculate perceptual loss 191 | if len(output1) > 1: 192 | output1 = concatenate(output1) 193 | else: 194 | output1 = output1[0] 195 | 196 | # In case of multiple patches, merge output2 as well 197 | if len(output2) > 1: 198 | output2 = concatenate(output2) 199 | else: 200 | output2 = output2[0] 201 | 202 | # Add a dense layer 203 | dense_layer2 = Dense(num_kernels * kernel_dim, use_bias=False, activation=None) 204 | 205 | # Add a lambda layer 206 | custom_loss_layer = Lambda(lambda x: K.sum( 207 | K.exp(-K.sum(K.abs(K.expand_dims(x, 3) - K.expand_dims(K.permute_dimensions(x, pattern=(1, 2, 0)), 0)), 2)), 2)) 208 | 209 | # Pass the output2 tensor through dense_layer2 210 | output2 = dense_layer2(output2) 211 | 212 | # Reshape the output2 tensor 213 | output2 = Reshape((num_kernels, kernel_dim))(output2) 214 | 215 | # Pass the output2 tensor through the custom_loss_layer 216 | output2 = custom_loss_layer(output2) 217 | 218 | # Finally concatenate output1 and output2 219 | output1 = concatenate([output1, output2]) 220 | final_output = Dense(2, activation="softmax")(output1) 221 | 222 | # Create a discriminator model 223 | discriminator = Model(inputs=list_input_layers, outputs=[final_output]) 224 | return discriminator 225 | 226 | 227 | def build_adversarial_model(generator, discriminator): 228 | """ 229 | Create an adversarial model 230 | """ 231 | input_image_dim = (256, 256, 1) 232 | patch_dim = (256, 256) 233 | 234 | # Create an input layer 235 | input_layer = Input(shape=input_image_dim) 236 | 237 | # Use the generator network to generate images 238 | generated_images = generator(input_layer) 239 | 240 | # Extract patches from the generated images 241 | img_height, img_width = input_img_dim[:2] 242 | patch_height, patch_width = patch_dim 243 | 244 | row_idx_list = [(i * patch_height, (i + 1) * patch_height) for i in range(int(img_height / patch_height))] 245 | column_idx_list = [(i * patch_width, (i + 1) * patch_width) for i in range(int(img_width / patch_width))] 246 | 247 | generated_patches_list = [] 248 | for row_idx in row_idx_list: 249 | for column_idx in column_idx_list: 250 | generated_patches_list.append(Lambda(lambda z: z[:, column_idx[0]:column_idx[1], row_idx[0]:row_idx[1], :], 251 | output_shape=input_img_dim)(generated_images)) 252 | 253 | discriminator.trainable = False 254 | 255 | # Pass the generated patches through the discriminator network 256 | dis_output = discriminator(generated_patches_list) 257 | 258 | # Create a model 259 | model = Model(inputs=[input_layer], outputs=[generated_images, dis_output]) 260 | return model 261 | 262 | 263 | """ 264 | Data preprocessing methods 265 | """ 266 | 267 | 268 | def generate_and_extract_patches(images, facades, generator_model, batch_counter, patch_dim): 269 | # Alternatively, train the discriminator network on real and generated images 270 | if batch_counter % 2 == 0: 271 | # Generate fake images 272 | output_images = generator_model.predict(facades) 273 | 274 | # Create a batch of ground truth labels 275 | labels = np.zeros((output_images.shape[0], 2), dtype=np.uint8) 276 | labels[:, 0] = 1 277 | 278 | else: 279 | # Take real images 280 | output_images = images 281 | 282 | # Create a batch of ground truth labels 283 | labels = np.zeros((output_images.shape[0], 2), dtype=np.uint8) 284 | labels[:, 1] = 1 285 | 286 | patches = [] 287 | for y in range(0, output_images.shape[0], patch_dim[0]): 288 | for x in range(0, output_images.shape[1], patch_dim[1]): 289 | image_patches = output_images[:, y: y + patch_dim[0], x: x + patch_dim[1], :] 290 | patches.append(np.asarray(image_patches, dtype=np.float32)) 291 | 292 | return patches, labels 293 | 294 | 295 | def save_images(real_images, real_sketches, generated_images, num_epoch, dataset_name, limit): 296 | real_sketches = real_sketches * 255.0 297 | real_images = real_images * 255.0 298 | generated_images = generated_images * 255.0 299 | 300 | # Save some images only 301 | real_sketches = real_sketches[:limit] 302 | generated_images = generated_images[:limit] 303 | real_images = real_images[:limit] 304 | 305 | # Create a stack of images 306 | X = np.hstack((real_sketches, generated_images, real_images)) 307 | 308 | # Save stack of images 309 | imwrite('results/X_full_{}_{}.png'.format(dataset_name, num_epoch), X[0]) 310 | 311 | 312 | def load_dataset(data_dir, data_type, img_width, img_height): 313 | data_dir_path = os.path.join(data_dir, data_type) 314 | 315 | # Get all .h5 files containing training images 316 | facade_photos_h5 = [f for f in os.listdir(os.path.join(data_dir_path, 'images')) if '.h5' in f] 317 | facade_labels_h5 = [f for f in os.listdir(os.path.join(data_dir_path, 'facades')) if '.h5' in f] 318 | 319 | final_facade_photos = None 320 | final_facade_labels = None 321 | 322 | for index in range(len(facade_photos_h5)): 323 | facade_photos_path = data_dir_path + '/images/' + facade_photos_h5[index] 324 | facade_labels_path = data_dir_path + '/facades/' + facade_labels_h5[index] 325 | 326 | facade_photos = h5py.File(facade_photos_path, 'r') 327 | facade_labels = h5py.File(facade_labels_path, 'r') 328 | 329 | # Resize and normalize images 330 | num_photos = facade_photos['data'].shape[0] 331 | num_labels = facade_labels['data'].shape[0] 332 | 333 | all_facades_photos = np.array(facade_photos['data'], dtype=np.float32) 334 | all_facades_photos = all_facades_photos.reshape((num_photos, img_width, img_height, 1)) / 255.0 335 | 336 | all_facades_labels = np.array(facade_labels['data'], dtype=np.float32) 337 | all_facades_labels = all_facades_labels.reshape((num_labels, img_width, img_height, 1)) / 255.0 338 | 339 | if final_facade_photos is not None and final_facade_labels is not None: 340 | final_facade_photos = np.concatenate([final_facade_photos, all_facades_photos], axis=0) 341 | final_facade_labels = np.concatenate([final_facade_labels, all_facades_labels], axis=0) 342 | else: 343 | final_facade_photos = all_facades_photos 344 | final_facade_labels = all_facades_labels 345 | 346 | return final_facade_photos, final_facade_labels 347 | 348 | 349 | def visualize_rgb(img): 350 | """ 351 | Visualize a rgb image 352 | :param img: RGB image 353 | """ 354 | fig = plt.figure() 355 | ax = fig.add_subplot(1, 1, 1) 356 | ax.imshow(img) 357 | ax.axis("off") 358 | ax.set_title("Image") 359 | plt.show() 360 | 361 | 362 | def visualize_bw_image(img): 363 | """ 364 | Visualize a black and white image 365 | """ 366 | fig = plt.figure() 367 | ax = fig.add_subplot(1, 1, 1) 368 | ax.imshow(img, cmap='gray', interpolation='nearest') 369 | ax.axis("off") 370 | ax.set_title("Image") 371 | plt.show() 372 | 373 | 374 | def save_bw_image(img, path): 375 | """ 376 | Save a black and white image 377 | """ 378 | fig = plt.figure() 379 | ax = fig.add_subplot(1, 1, 1) 380 | ax.imshow(img, cmap='gray', interpolation='nearest') 381 | ax.axis("off") 382 | ax.set_title("Image") 383 | plt.savefig(path) 384 | 385 | 386 | def write_log(callback, name, loss, batch_no): 387 | """ 388 | Write training summary to TensorBoard 389 | """ 390 | # for name, value in zip(names, logs): 391 | summary = tf.Summary() 392 | summary_value = summary.value.add() 393 | summary_value.simple_value = loss 394 | summary_value.tag = name 395 | callback.writer.add_summary(summary, batch_no) 396 | callback.writer.flush() 397 | 398 | 399 | if __name__ == '__main__': 400 | epochs = 500 401 | num_images_per_epoch = 400 402 | batch_size = 1 403 | img_width = 256 404 | img_height = 256 405 | num_channels = 1 406 | input_img_dim = (256, 256, 1) 407 | patch_dim = (256, 256) 408 | dataset_dir = "data/facades_bw/" 409 | 410 | common_optimizer = Adam(lr=1E-4, beta_1=0.9, beta_2=0.999, epsilon=1e-08) 411 | 412 | """ 413 | Build and compile networks 414 | """ 415 | # build and compile discriminator network 416 | patchgan_discriminator = build_patchgan_discriminator() 417 | patchgan_discriminator.compile(loss='binary_crossentropy', optimizer=common_optimizer) 418 | 419 | # build and compile the generator network 420 | unet_generator = build_unet_generator() 421 | unet_generator.compile(loss='mae', optimizer=common_optimizer) 422 | 423 | # Build and compile the adversarial model 424 | adversarial_model = build_adversarial_model(unet_generator, patchgan_discriminator) 425 | adversarial_model.compile(loss=['mae', 'binary_crossentropy'], loss_weights=[1E2, 1], optimizer=common_optimizer) 426 | 427 | """ 428 | Load the training, testing and validation datasets 429 | """ 430 | training_facade_photos, training_facade_labels = load_dataset(data_dir=dataset_dir, data_type='training', 431 | img_width=img_width, img_height=img_height) 432 | 433 | test_facade_photos, test_facade_labels = load_dataset(data_dir=dataset_dir, data_type='testing', 434 | img_width=img_width, img_height=img_height) 435 | 436 | validation_facade_photos, validation_facade_labels = load_dataset(data_dir=dataset_dir, data_type='validation', 437 | img_width=img_width, img_height=img_height) 438 | 439 | tensorboard = TensorBoard(log_dir="logs/{}".format(time.time())) 440 | tensorboard.set_model(unet_generator) 441 | tensorboard.set_model(patchgan_discriminator) 442 | 443 | print('Starting the training...') 444 | for epoch in range(0, epochs): 445 | print('Epoch {}'.format(epoch)) 446 | 447 | dis_losses = [] 448 | gen_losses = [] 449 | 450 | batch_counter = 1 451 | start = time.time() 452 | 453 | num_batches = int(training_facade_photos.shape[0] / batch_size) 454 | 455 | # Train the networks for number of batches 456 | for index in range(int(training_facade_photos.shape[0] / batch_size)): 457 | print("Batch:{}".format(index)) 458 | 459 | # Sample a batch of training and validation images 460 | train_facades_batch = training_facade_labels[index * batch_size:(index + 1) * batch_size] 461 | train_images_batch = training_facade_photos[index * batch_size:(index + 1) * batch_size] 462 | 463 | val_facades_batch = validation_facade_labels[index * batch_size:(index + 1) * batch_size] 464 | val_images_batch = validation_facade_photos[index * batch_size:(index + 1) * batch_size] 465 | 466 | patches, labels = generate_and_extract_patches(train_images_batch, train_facades_batch, unet_generator, 467 | batch_counter, patch_dim) 468 | 469 | """ 470 | Train the discriminator model 471 | """ 472 | d_loss = patchgan_discriminator.train_on_batch(patches, labels) 473 | 474 | labels = np.zeros((train_images_batch.shape[0], 2), dtype=np.uint8) 475 | labels[:, 1] = 1 476 | 477 | """ 478 | Train the adversarial model 479 | """ 480 | g_loss = adversarial_model.train_on_batch(train_facades_batch, [train_images_batch, labels]) 481 | 482 | # Increase the batch counter 483 | batch_counter += 1 484 | 485 | print("Discriminator loss:", d_loss) 486 | print("Generator loss:", g_loss) 487 | 488 | gen_losses.append(g_loss[1]) 489 | dis_losses.append(d_loss) 490 | 491 | """ 492 | Save losses to Tensorboard after each epoch 493 | """ 494 | write_log(tensorboard, 'discriminator_loss', np.mean(dis_losses), epoch) 495 | write_log(tensorboard, 'generator_loss', np.mean(gen_losses), epoch) 496 | 497 | # After every 10th epoch, generate and save images for visualization 498 | if epoch % 10 == 0: 499 | # Sample a batch of validation datasets 500 | val_facades_batch = validation_facade_labels[0:5] 501 | val_images_batch = validation_facade_photos[0:5] 502 | 503 | # Generate images 504 | validation_generated_images = unet_generator.predict(val_facades_batch) 505 | 506 | # Save images 507 | save_images(val_images_batch, val_facades_batch, validation_generated_images, epoch, 'validation', limit=5) 508 | 509 | # Save models 510 | unet_generator.save_weights("generator.h5") 511 | patchgan_discriminator.save_weights("discriminator.h5") 512 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Packt 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | # Generative-Adversarial-Networks-Projects 5 | Generative Adversarial Networks Projects, published by Packt 6 | # Generative-Adversarial-Networks-Projects 7 | 8 | Book Name 9 | 10 | This is the code repository for [Generative-Adversarial-Networks-Projects](https://www.packtpub.com/big-data-and-business-intelligence/generative-adversarial-networks-projects?utm_source=github&utm_medium=repository&utm_campaign=9781789136678), published by Packt. 11 | 12 | **Build next-generation generative models using TensorFlow and Keras** 13 | 14 | ## What is this book about? 15 | Generative Adversarial Networks (GANs) have the potential to build next-generation models, as they can mimic any distribution of data. Major research and development work is being undertaken in this field since it is one of the rapidly growing areas of machine learning. This book will test unsupervised techniques for training neural networks as you build seven end-to-end projects in the GAN domain. 16 | 17 | This book covers the following exciting features: 18 | * Train a network on the 3D ShapeNet dataset to generate realistic shapes 19 | * Generate anime characters using the Keras implementation of DCGAN 20 | * Implement an SRGAN network to generate high-resolution images 21 | * Train Age-cGAN on Wiki-Cropped images to improve face verification 22 | * Use conditional GANs for image-to-image translation 23 | 24 | If you feel this book is for you, get your [copy](https://www.amazon.com/dp/10DigitISBN) today! 25 | 26 | https://www.packtpub.com/ 28 | 29 | 30 | ## Instructions and Navigations 31 | All of the code is organized into folders. For example, Chapter02. 32 | 33 | The code will look like the following: 34 | ``` 35 | import scipy.io as io 36 | voxels = io.loadmat("path to .mat file")[ 'instance' ] 37 | ``` 38 | 39 | **Following is what you need for this book:** 40 | If you’re a data scientist, machine learning developer, deep learning practitioner, or AI enthusiast looking for a project guide to test your knowledge and expertise in building real-world GANs models, this book is for you. 41 | 42 | With the following software and hardware list you can run all code files present in the book (Chapter 1-09). 43 | 44 | ### Software and Hardware List 45 | 46 | | Chapter | Software required | OS required | 47 | | -------- | ------------------------------------| -----------------------------------| 48 | | 1 | Python 3.5 | Windows, Mac OS X, and Linux (Any) | 49 | | 2 | AWS | Windows, Mac OS X, and Linux (Any) | 50 | | 3 | GPU | Windows, Mac OS X, and Linux (Any) | 51 | 52 | 53 | 54 | ### Related products 55 | * Generative Adversarial Networks Cookbook [[Packt]](https://www.packtpub.com/big-data-and-business-intelligence/generative-adversarial-networks-cookbook?utm_source=github&utm_medium=repository&utm_campaign=9781789139907) [[Amazon]](https://www.amazon.com/dp/1789139902) 56 | 57 | * Python Deep Learning - Second Edition [[Packt]](https://www.packtpub.com/big-data-and-business-intelligence/python-deep-learning-second-edition?utm_source=github&utm_medium=repository&utm_campaign=9781789348460) [[Amazon]](https://www.amazon.com/dp/1789348463) 58 | 59 | ## Get to Know the Author(s) 60 | **Kailash Ahirwar** 61 | Kailash Ahirwar is a machine learning and deep learning enthusiast. He has worked in many areas of Artificial Intelligence (AI), ranging from natural language processing and computer vision to generative modeling using GANs. He is a co-founder and CTO of Mate Labs. He uses GANs to build different models, such as turning paintings into photos and controlling deep image synthesis with texture patches. He is super optimistic about AGI and believes that AI is going to be the workhorse of human evolution. 62 | 63 | 64 | 65 | ### Suggestions and Feedback 66 | [Click here](https://docs.google.com/forms/d/e/1FAIpQLSdy7dATC6QmEL81FIUuymZ0Wy9vH1jHkvpY57OiMeKGqib_Ow/viewform) if you have any feedback or suggestions. 67 | ### Download a free PDF 68 | 69 | If you have already purchased a print or Kindle version of this book, you can get a DRM-free PDF version at no cost.
Simply click on the link to claim your free PDF.
70 |

https://packt.link/free-ebook/9781789136678

--------------------------------------------------------------------------------