├── README.md ├── image_generator.py ├── resnet.py ├── da_resnet18_mnist.py ├── da_lenet5_mnist.py └── da_resnetpa_mnist.py /README.md: -------------------------------------------------------------------------------- 1 | # keras-bda 2 | Train the three sub-networks in the paper "A Bayesian Data Augmentation Approach for Learning Deep Models" (NIPS 2017). 3 | The baseline classifiers are Lenet5, ResNet18, ResNetpa on the MNIST dataset . 4 | The code was adapted from: https://github.com/lukedeo/keras-acgan 5 | 6 | # 7 | -------------------------------------------------------------------------------- /image_generator.py: -------------------------------------------------------------------------------- 1 | from da_lenet5_mnist import build_generator, build_discriminator 2 | from glob import glob 3 | import numpy as np 4 | import matplotlib.pyplot as plt 5 | 6 | latent_size = 100 7 | generator = build_generator(latent_size) 8 | 9 | # load the weights from the last epoch 10 | generator.load_weights(sorted(glob('params_generator*'))[-1]) 11 | 12 | np.random.seed(31337) 13 | 14 | noise = np.random.normal(loc=0.0, scale=1, size=(100, latent_size)) 15 | 16 | sampled_labels = np.array([ 17 | [i] * 10 for i in range(10) 18 | ]).reshape(-1, 1) 19 | 20 | # get a batch to display 21 | generated_images = generator.predict( 22 | [noise, sampled_labels], verbose=0) 23 | 24 | # arrange them into a grid 25 | img = (np.concatenate([r.reshape(-1, 28) 26 | for r in np.split(generated_images, 10) 27 | ], axis=-1) * 127.5 + 127.5).astype(np.uint8) 28 | 29 | plt.imshow(img, cmap='gray') 30 | _ = plt.axis('off') 31 | 32 | plt.show() 33 | -------------------------------------------------------------------------------- /resnet.py: -------------------------------------------------------------------------------- 1 | import six 2 | from keras.models import Model 3 | from keras.layers import ( 4 | Input, 5 | Activation, 6 | merge, 7 | Dense, 8 | Flatten 9 | ) 10 | from keras.layers.convolutional import ( 11 | Convolution2D, 12 | MaxPooling2D, 13 | AveragePooling2D 14 | ) 15 | from keras.layers.normalization import BatchNormalization 16 | from keras.regularizers import l2 17 | from keras import backend as K 18 | 19 | 20 | def _bn_relu(input): 21 | """Helper to build a BN -> relu block 22 | """ 23 | norm = BatchNormalization(mode=0, axis=CHANNEL_AXIS)(input) 24 | return Activation("relu")(norm) 25 | 26 | 27 | def _conv_bn_relu(**conv_params): 28 | """Helper to build a conv -> BN -> relu block 29 | """ 30 | nb_filter = conv_params["nb_filter"] 31 | nb_row = conv_params["nb_row"] 32 | nb_col = conv_params["nb_col"] 33 | subsample = conv_params.setdefault("subsample", (1, 1)) 34 | init = conv_params.setdefault("init", "he_normal") 35 | border_mode = conv_params.setdefault("border_mode", "same") 36 | W_regularizer = conv_params.setdefault("W_regularizer", l2(1.e-4)) 37 | 38 | def f(input): 39 | conv = Convolution2D(nb_filter=nb_filter, nb_row=nb_row, nb_col=nb_col, subsample=subsample, 40 | init=init, border_mode=border_mode, W_regularizer=W_regularizer)(input) 41 | return _bn_relu(conv) 42 | 43 | return f 44 | 45 | 46 | def _bn_relu_conv(**conv_params): 47 | """Helper to build a BN -> relu -> conv block. 48 | This is an improved scheme proposed in http://arxiv.org/pdf/1603.05027v2.pdf 49 | """ 50 | nb_filter = conv_params["nb_filter"] 51 | nb_row = conv_params["nb_row"] 52 | nb_col = conv_params["nb_col"] 53 | subsample = conv_params.setdefault("subsample", (1,1)) 54 | init = conv_params.setdefault("init", "he_normal") 55 | border_mode = conv_params.setdefault("border_mode", "same") 56 | W_regularizer = conv_params.setdefault("W_regularizer", l2(1.e-4)) 57 | 58 | def f(input): 59 | activation = _bn_relu(input) 60 | return Convolution2D(nb_filter=nb_filter, nb_row=nb_row, nb_col=nb_col, subsample=subsample, 61 | init=init, border_mode=border_mode, W_regularizer=W_regularizer)(activation) 62 | 63 | return f 64 | 65 | 66 | def _shortcut(input, residual): 67 | """Adds a shortcut between input and residual block and merges them with "sum" 68 | """ 69 | # Expand channels of shortcut to match residual. 70 | # Stride appropriately to match residual (width, height) 71 | # Should be int if network architecture is correctly configured. 72 | stride_width = input._keras_shape[ROW_AXIS] // residual._keras_shape[ROW_AXIS] 73 | stride_height = input._keras_shape[COL_AXIS] // residual._keras_shape[COL_AXIS] 74 | equal_channels = residual._keras_shape[CHANNEL_AXIS] == input._keras_shape[CHANNEL_AXIS] 75 | 76 | shortcut = input 77 | # 1 X 1 conv if shape is different. Else identity. 78 | if stride_width > 1 or stride_height > 1 or not equal_channels: 79 | shortcut = Convolution2D(nb_filter=residual._keras_shape[CHANNEL_AXIS], 80 | nb_row=1, nb_col=1, 81 | subsample=(stride_width, stride_height), 82 | init="he_normal", border_mode="valid", 83 | W_regularizer=l2(0.0001))(input) 84 | 85 | return merge([shortcut, residual], mode="sum") 86 | 87 | 88 | def _residual_block(block_function, nb_filter, repetitions, is_first_layer=False): 89 | """Builds a residual block with repeating bottleneck blocks. 90 | """ 91 | def f(input): 92 | for i in range(repetitions): 93 | init_subsample = (1, 1) 94 | if i == 0 and not is_first_layer: 95 | init_subsample = (2, 2) 96 | input = block_function( 97 | nb_filter=nb_filter, 98 | init_subsample=init_subsample, 99 | is_first_block_of_first_layer=(is_first_layer and i == 0) 100 | )(input) 101 | return input 102 | 103 | return f 104 | 105 | 106 | def basic_block(nb_filter, init_subsample=(1, 1), is_first_block_of_first_layer=False): 107 | """Basic 3 X 3 convolution blocks for use on resnets with layers <= 34. 108 | Follows improved proposed scheme in http://arxiv.org/pdf/1603.05027v2.pdf 109 | """ 110 | def f(input): 111 | 112 | if is_first_block_of_first_layer: 113 | # don't repeat bn->relu since we just did bn->relu->maxpool 114 | conv1 = Convolution2D(nb_filter=nb_filter, 115 | nb_row=3, nb_col=3, 116 | subsample=init_subsample, 117 | init="he_normal", border_mode="same", 118 | W_regularizer=l2(0.0001))(input) 119 | else: 120 | conv1 = _bn_relu_conv(nb_filter=nb_filter, nb_row=3, nb_col=3, subsample=init_subsample)(input) 121 | 122 | residual = _bn_relu_conv(nb_filter=nb_filter, nb_row=3, nb_col=3)(conv1) 123 | return _shortcut(input, residual) 124 | 125 | return f 126 | 127 | 128 | def bottleneck(nb_filter, init_subsample=(1, 1), is_first_block_of_first_layer=False): 129 | """Bottleneck architecture for > 34 layer resnet. 130 | Follows improved proposed scheme in http://arxiv.org/pdf/1603.05027v2.pdf 131 | 132 | Returns: 133 | A final conv layer of nb_filter * 4 134 | """ 135 | def f(input): 136 | 137 | if is_first_block_of_first_layer: 138 | # don't repeat bn->relu since we just did bn->relu->maxpool 139 | conv_1_1 = Convolution2D(nb_filter=nb_filter, 140 | nb_row=1, nb_col=1, 141 | subsample=init_subsample, 142 | init="he_normal", border_mode="same", 143 | W_regularizer=l2(0.0001))(input) 144 | else: 145 | conv_1_1 = _bn_relu_conv(nb_filter=nb_filter, nb_row=1, nb_col=1, subsample=init_subsample)(input) 146 | 147 | conv_3_3 = _bn_relu_conv(nb_filter=nb_filter, nb_row=3, nb_col=3)(conv_1_1) 148 | residual = _bn_relu_conv(nb_filter=nb_filter * 4, nb_row=1, nb_col=1)(conv_3_3) 149 | return _shortcut(input, residual) 150 | 151 | return f 152 | 153 | 154 | def _handle_dim_ordering(): 155 | global ROW_AXIS 156 | global COL_AXIS 157 | global CHANNEL_AXIS 158 | if K.image_dim_ordering() == 'tf': 159 | ROW_AXIS = 1 160 | COL_AXIS = 2 161 | CHANNEL_AXIS = 3 162 | else: 163 | CHANNEL_AXIS = 1 164 | ROW_AXIS = 2 165 | COL_AXIS = 3 166 | 167 | 168 | def _get_block(identifier): 169 | if isinstance(identifier, six.string_types): 170 | res = globals().get(identifier) 171 | if not res: 172 | raise ValueError('Invalid {}'.format(identifier)) 173 | return res 174 | return identifier 175 | 176 | 177 | class ResnetBuilder(object): 178 | @staticmethod 179 | def build(input_shape, num_outputs, block_fn, repetitions): 180 | """Builds a custom ResNet like architecture. 181 | 182 | Args: 183 | input_shape: The input shape in the form (nb_channels, nb_rows, nb_cols) 184 | num_outputs: The number of outputs at final softmax layer 185 | block_fn: The block function to use. This is either `basic_block` or `bottleneck`. 186 | The original paper used basic_block for layers < 50 187 | repetitions: Number of repetitions of various block units. 188 | At each block unit, the number of filters are doubled and the input size is halved 189 | 190 | Returns: 191 | The keras `Model`. 192 | """ 193 | _handle_dim_ordering() 194 | if len(input_shape) != 3: 195 | raise Exception("Input shape should be a tuple (nb_channels, nb_rows, nb_cols)") 196 | 197 | # Permute dimension order if necessary 198 | if K.image_dim_ordering() == 'tf': 199 | input_shape = (input_shape[1], input_shape[2], input_shape[0]) 200 | 201 | # Load function from str if needed. 202 | block_fn = _get_block(block_fn) 203 | 204 | input = Input(shape=input_shape) 205 | conv1 = _conv_bn_relu(nb_filter=64, nb_row=7, nb_col=7, subsample=(2, 2))(input) 206 | pool1 = MaxPooling2D(pool_size=(3, 3), strides=(2, 2), border_mode="same")(conv1) 207 | 208 | block = pool1 209 | nb_filter = 64 210 | for i, r in enumerate(repetitions): 211 | block = _residual_block(block_fn, nb_filter=nb_filter, repetitions=r, is_first_layer=(i == 0))(block) 212 | nb_filter *= 2 213 | 214 | # Last activation 215 | block = _bn_relu(block) 216 | 217 | block_norm = BatchNormalization(mode=0, axis=CHANNEL_AXIS)(block) 218 | block_output = Activation("relu")(block_norm) 219 | 220 | # Classifier block 221 | pool2 = AveragePooling2D(pool_size=(block._keras_shape[ROW_AXIS], 222 | block._keras_shape[COL_AXIS]), 223 | strides=(1, 1))(block_output) 224 | flatten1 = Flatten()(pool2) 225 | dense = Dense(output_dim=num_outputs, init="he_normal", activation="softmax")(flatten1) 226 | 227 | model = Model(input=input, output=dense) 228 | return model 229 | 230 | @staticmethod 231 | def build_resnet_18(input_shape, num_outputs): 232 | return ResnetBuilder.build(input_shape, num_outputs, basic_block, [2, 2, 2, 2]) 233 | 234 | @staticmethod 235 | def build_resnet_34(input_shape, num_outputs): 236 | return ResnetBuilder.build(input_shape, num_outputs, basic_block, [3, 4, 6, 3]) 237 | 238 | @staticmethod 239 | def build_resnet_50(input_shape, num_outputs): 240 | return ResnetBuilder.build(input_shape, num_outputs, bottleneck, [3, 4, 6, 3]) 241 | 242 | @staticmethod 243 | def build_resnet_101(input_shape, num_outputs): 244 | return ResnetBuilder.build(input_shape, num_outputs, bottleneck, [3, 4, 23, 3]) 245 | 246 | @staticmethod 247 | def build_resnet_152(input_shape, num_outputs): 248 | return ResnetBuilder.build(input_shape, num_outputs, bottleneck, [3, 8, 36, 3]) 249 | -------------------------------------------------------------------------------- /da_resnet18_mnist.py: -------------------------------------------------------------------------------- 1 | 2 | from __future__ import print_function 3 | 4 | from collections import defaultdict 5 | import cPickle as pickle 6 | from PIL import Image 7 | 8 | from six.moves import range 9 | import sys 10 | sys.setrecursionlimit(2**25) 11 | 12 | import keras.backend as K 13 | from keras.datasets import mnist 14 | from keras.layers import Input, Dense, Reshape, Flatten, Embedding, merge, Dropout 15 | from keras.layers.core import Activation 16 | from keras.layers.advanced_activations import LeakyReLU 17 | from keras.layers.convolutional import UpSampling2D, Convolution2D, MaxPooling2D 18 | from keras.models import Sequential, Model 19 | from keras.optimizers import Adam 20 | from keras.utils.generic_utils import Progbar 21 | import numpy as np 22 | import resnet 23 | 24 | 25 | # from pyimagesearch.cnn.networks import LeNet 26 | from keras.optimizers import SGD, RMSprop, Adagrad, Adadelta, Adamax, Nadam 27 | 28 | from keras.utils import np_utils 29 | 30 | np.random.seed(31337) 31 | 32 | K.set_image_dim_ordering('th') 33 | 34 | 35 | def build_generator(latent_size): 36 | # we will map a pair of (z, L), where z is a latent vector and L is a 37 | # label drawn from P_c, to image space (..., 1, 28, 28) 38 | cnn = Sequential() 39 | 40 | cnn.add(Dense(1024, input_dim=latent_size, activation='relu')) 41 | cnn.add(Dense(128 * 7 * 7, activation='relu')) 42 | cnn.add(Reshape((128, 7, 7))) 43 | 44 | # upsample to (..., 14, 14) 45 | cnn.add(UpSampling2D(size=(2, 2))) 46 | cnn.add(Convolution2D(256, 5, 5, border_mode='same', 47 | activation='relu', init='glorot_normal')) 48 | 49 | # upsample to (..., 28, 28) 50 | cnn.add(UpSampling2D(size=(2, 2))) 51 | cnn.add(Convolution2D(128, 5, 5, border_mode='same', 52 | activation='relu', init='glorot_normal')) 53 | 54 | # take a channel axis reduction 55 | cnn.add(Convolution2D(1, 2, 2, border_mode='same', 56 | activation='tanh', init='glorot_normal')) 57 | 58 | # this is the z space commonly refered to in GAN papers 59 | latent = Input(shape=(latent_size, )) 60 | 61 | # this will be our label 62 | image_class = Input(shape=(1,), dtype='int32') 63 | 64 | # 10 classes in MNIST 65 | cls = Flatten()(Embedding(10, latent_size, 66 | init='glorot_normal')(image_class)) 67 | 68 | # hadamard product between z-space and a class conditional embedding 69 | h = merge([latent, cls], mode='mul') 70 | 71 | fake_image = cnn(h) 72 | 73 | return Model(input=[latent, image_class], output=fake_image) 74 | 75 | 76 | def build_discriminator(): 77 | # build a relatively standard conv net, with LeakyReLUs as suggested in 78 | # the reference paper 79 | cnn = Sequential() 80 | 81 | cnn.add(Convolution2D(32, 3, 3, border_mode='same', subsample=(2, 2), 82 | input_shape=(1, 28, 28))) 83 | cnn.add(LeakyReLU()) 84 | cnn.add(Dropout(0.3)) 85 | 86 | cnn.add(Convolution2D(64, 3, 3, border_mode='same', subsample=(1, 1))) 87 | cnn.add(LeakyReLU()) 88 | cnn.add(Dropout(0.3)) 89 | 90 | cnn.add(Convolution2D(128, 3, 3, border_mode='same', subsample=(2, 2))) 91 | cnn.add(LeakyReLU()) 92 | cnn.add(Dropout(0.3)) 93 | 94 | cnn.add(Convolution2D(256, 3, 3, border_mode='same', subsample=(1, 1))) 95 | cnn.add(LeakyReLU()) 96 | cnn.add(Dropout(0.3)) 97 | 98 | cnn.add(Flatten()) 99 | 100 | image = Input(shape=(1, 28, 28)) 101 | 102 | features = cnn(image) 103 | 104 | # output (name=generation) is whether or not the discriminator 105 | # thinks the image that is being shown is fake. 106 | fake = Dense(1, activation='sigmoid', name='generation')(features) 107 | 108 | return Model(input=image, output=fake) 109 | 110 | 111 | def build_resnet(): # source: https://github.com/raghakot/keras-resnet 112 | # Model 113 | 114 | model = resnet.ResnetBuilder.build_resnet_18((1, 28, 28), 10) 115 | # model = resnet.ResnetBuilder.build_resnet_34((1, 28, 28), 10) 116 | # model = resnet.ResnetBuilder.build_resnet_50((1, 28, 28), 10) 117 | # model = resnet.ResnetBuilder.build_resnet_101((1, 28, 28), 10) 118 | # model = resnet.ResnetBuilder.build_resnet_152((1, 28, 28), 10) 119 | 120 | image = Input(shape=(1, 28, 28)) 121 | 122 | aux = model(image) 123 | 124 | return Model(input=image, output=aux) 125 | # return model 126 | 127 | if __name__ == '__main__': 128 | 129 | # batch and latent size taken from the paper 130 | nb_epochs = 100 131 | batch_size = 100 132 | latent_size = 100 133 | nb_classes = 10 134 | # Adam parameters suggested in https://arxiv.org/abs/1511.06434 135 | adam_lr = 0.0002 136 | adam_beta_1 = 0.5 137 | 138 | # build the discriminator 139 | discriminator = build_discriminator() 140 | opt = SGD(lr=0.01) 141 | discriminator.compile( 142 | optimizer=opt, 143 | loss= 'binary_crossentropy') 144 | 145 | # build the classifier 146 | resnet = build_resnet() 147 | resnet.compile(loss="categorical_crossentropy", optimizer='adadelta', metrics=["accuracy"]) 148 | 149 | # build the generator 150 | generator = build_generator(latent_size) 151 | generator.compile(optimizer=Adam(lr=adam_lr, beta_1=adam_beta_1), 152 | loss='binary_crossentropy') 153 | 154 | latent = Input(shape=(latent_size, )) 155 | image_class = Input(shape=(1,), dtype='int32') 156 | 157 | # get a fake image 158 | fake_img = generator([latent, image_class]) 159 | 160 | # we only want to be able to train generation for the combined model 161 | discriminator.trainable = False 162 | resnet.trainable = False 163 | 164 | fake = discriminator(fake_img) 165 | aux = resnet(fake_img) 166 | 167 | combined = Model(input=[latent, image_class], output=[fake, aux]) 168 | 169 | combined.compile( 170 | optimizer=Adam(lr=adam_lr, beta_1=adam_beta_1), 171 | loss=['binary_crossentropy', 'categorical_crossentropy'] 172 | ) 173 | 174 | # get our mnist data, and force it to be of shape (..., 1, 28, 28) with 175 | # range [-1, 1] 176 | (X_train, y_train), (X_test, y_test) = mnist.load_data() 177 | X_train = (X_train.astype(np.float32) - 127.5) / 127.5 178 | X_train = np.expand_dims(X_train, axis=1) 179 | 180 | X_test = (X_test.astype(np.float32) - 127.5) / 127.5 181 | X_test = np.expand_dims(X_test, axis=1) 182 | 183 | Y_train = np_utils.to_categorical(y_train, nb_classes) 184 | Y_test = np_utils.to_categorical(y_test, nb_classes) 185 | nb_train, nb_test = X_train.shape[0], X_test.shape[0] 186 | 187 | train_history = defaultdict(list) 188 | test_history = defaultdict(list) 189 | 190 | # fo = open("accuracy_save.txt", "wb") 191 | for epoch in range(nb_epochs): 192 | print('Epoch {} of {}'.format(epoch + 1, nb_epochs)) 193 | 194 | nb_batches = int(X_train.shape[0] / batch_size) 195 | progress_bar = Progbar(target=nb_batches) 196 | 197 | epoch_gen_loss = [] 198 | epoch_disc_loss = [] 199 | epoch_resnet_loss =[] 200 | 201 | for index in range(nb_batches): 202 | progress_bar.update(index) 203 | # generate a new batch of noise 204 | noise = np.random.normal(loc=0.0, scale=1, size=(batch_size, latent_size)) 205 | # noise = np.random.uniform(-1, 1, (batch_size, latent_size)) 206 | # get a batch of real images 207 | image_batch = X_train[index * batch_size:(index + 1) * batch_size] 208 | label_batch = y_train[index * batch_size:(index + 1) * batch_size] 209 | 210 | # sample some labels from p_c 211 | sampled_labels = np.random.randint(0, 10, batch_size) 212 | 213 | # generate a batch of fake images, using the generated labels as a 214 | # conditioner. We reshape the sampled labels to be 215 | # (batch_size, 1) so that we can feed them into the embedding 216 | # layer as a length one sequence 217 | generated_images = generator.predict( 218 | [noise, sampled_labels.reshape((-1, 1))], verbose=0) 219 | 220 | X = np.concatenate((image_batch, generated_images)) 221 | y = np.array([1] * batch_size + [0] *batch_size) 222 | aux_y = np.concatenate((label_batch, sampled_labels), axis=0) 223 | aux_y = np_utils.to_categorical(aux_y, 10) 224 | 225 | # see if the discriminator can figure itself out... 226 | epoch_disc_loss.append(discriminator.train_on_batch(X, y)) 227 | # 228 | epoch_resnet_loss.append(resnet.train_on_batch(X, aux_y)) 229 | 230 | # make new noise. we generate 2 * batch size here such that we have 231 | # the generator optimize over an identical number of images as the 232 | # discriminator 233 | noise = np.random.normal(loc=0.0, scale=1, size=(2 * batch_size, latent_size)) 234 | # noise = np.random.uniform(-1, 1, (2 * batch_size, latent_size)) 235 | sampled_labels = np.random.randint(0, 10, 2 * batch_size).reshape(-1, 1) 236 | aux_sampled_labels = np_utils.to_categorical(sampled_labels, 10) 237 | 238 | # we want to train the generator to trick the discriminator 239 | # For the generator, we want all the {fake, not-fake} labels to say 240 | # not-fake 241 | trick = np.ones(2 * batch_size) 242 | 243 | epoch_gen_loss.append(combined.train_on_batch( 244 | [noise, sampled_labels], [trick, aux_sampled_labels])) 245 | 246 | print('\nTesting for epoch {}:'.format(epoch + 1)) 247 | 248 | # evaluate the testing loss here 249 | 250 | # generate a new batch of noise 251 | noise = np.random.normal(loc=0.0, scale=1, size=(nb_test, latent_size)) 252 | # noise = np.random.uniform(-1, 1, (nb_test, latent_size)) 253 | 254 | # sample some labels from p_c and generate images from them 255 | sampled_labels = np.random.randint(0, 10, nb_test) 256 | generated_images = generator.predict( 257 | [noise, sampled_labels.reshape((-1, 1))], verbose=False) 258 | 259 | X = np.concatenate((X_test, generated_images)) 260 | y = np.array([1] * nb_test + [0] * nb_test) 261 | aux_y = np.concatenate((y_test, sampled_labels), axis=0) 262 | aux_y = np_utils.to_categorical(aux_y, 10) 263 | 264 | # see if the discriminator can figure itself out... 265 | discriminator_test_loss = discriminator.evaluate(X, y, verbose=False) 266 | 267 | discriminator_train_loss = np.mean(np.array(epoch_disc_loss), axis=0) 268 | 269 | resnet_test_loss = resnet.evaluate(X, aux_y, verbose=False) 270 | 271 | resnet_train_loss = np.mean(np.array(epoch_resnet_loss), axis=0) 272 | 273 | # # evaluate the test classification accuracy 274 | # 275 | # (loss, accuracy) = resnet.evaluate(X_test, Y_test, batch_size=batch_size, verbose=0) 276 | # 277 | # # show the accuracy on the testing set 278 | # print("\n[INFO] accuracy: {:.2f}%".format(accuracy * 100)) 279 | # 280 | # fo.write('Test accuracy at the ' + str(epoch+1) + '-th iteration is: ' + str(accuracy) + '\n') 281 | 282 | 283 | # make new noise 284 | noise = np.random.normal(loc=0.0, scale=1, size=(2 * nb_test, latent_size)) 285 | # noise = np.random.uniform(-1, 1, (2 * nb_test, latent_size)) 286 | sampled_labels = np.random.randint(0, 10, 2 * nb_test).reshape(-1, 1) 287 | aux_sampled_labels = np_utils.to_categorical(sampled_labels, 10) 288 | 289 | trick = np.ones(2 * nb_test) 290 | 291 | generator_test_loss = combined.evaluate( 292 | [noise, sampled_labels], 293 | [trick, aux_sampled_labels], verbose=False) 294 | 295 | generator_train_loss = np.mean(np.array(epoch_gen_loss), axis=0) 296 | 297 | # generate an epoch report on performance 298 | train_history['generator'].append(generator_train_loss) 299 | train_history['discriminator'].append(discriminator_train_loss) 300 | train_history['resnet'].append(resnet_train_loss) 301 | 302 | test_history['generator'].append(generator_test_loss) 303 | test_history['discriminator'].append(discriminator_test_loss) 304 | test_history['resnet'].append(resnet_test_loss) 305 | 306 | 307 | # save weights every epoch 308 | generator.save_weights( 309 | 'params_generator_epoch_{0:03d}.hdf5'.format(epoch), True) 310 | discriminator.save_weights( 311 | 'params_discriminator_epoch_{0:03d}.hdf5'.format(epoch), True) 312 | resnet.save_weights( 313 | 'params_resnet_epoch_{0:03d}.hdf5'.format(epoch), True) 314 | 315 | 316 | pickle.dump({'train': train_history, 'test': test_history}, 317 | open('acgan-history.pkl', 'wb')) 318 | 319 | # evaluate the test classification accuracy 320 | (loss, accuracy) = resnet.evaluate(X_test, Y_test, 321 | batch_size=batch_size, verbose=0) 322 | 323 | # show the accuracy on the testing set 324 | print("\n [INFO] Test accuracy: {:.2f}%".format(accuracy * 100)) 325 | 326 | 327 | # fo.close() 328 | 329 | -------------------------------------------------------------------------------- /da_lenet5_mnist.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | from collections import defaultdict 4 | import cPickle as pickle 5 | from PIL import Image 6 | 7 | from six.moves import range 8 | 9 | import keras.backend as K 10 | from keras.datasets import mnist 11 | from keras.layers import Input, Dense, Reshape, Flatten, Embedding, merge, Dropout 12 | from keras.layers.core import Activation 13 | from keras.layers.advanced_activations import LeakyReLU 14 | from keras.layers.convolutional import UpSampling2D, Convolution2D, MaxPooling2D 15 | from keras.models import Sequential, Model 16 | from keras.optimizers import Adam 17 | from keras.utils.generic_utils import Progbar 18 | import numpy as np 19 | import matplotlib.pyplot as plt 20 | 21 | # from pyimagesearch.cnn.networks import LeNet 22 | from keras.optimizers import SGD 23 | from keras.utils import np_utils 24 | 25 | np.random.seed(1337) 26 | 27 | K.set_image_dim_ordering('th') 28 | 29 | def build_generator(latent_size): 30 | # we will map a pair of (z, L), where z is a latent vector and L is a 31 | # label drawn from P_c, to image space (..., 1, 28, 28) 32 | cnn = Sequential() 33 | 34 | cnn.add(Dense(1024, input_dim=latent_size, activation='relu')) 35 | cnn.add(Dense(128 * 7 * 7, activation='relu')) 36 | cnn.add(Reshape((128, 7, 7))) 37 | 38 | # upsample to (..., 14, 14) 39 | cnn.add(UpSampling2D(size=(2, 2))) 40 | cnn.add(Convolution2D(256, 5, 5, border_mode='same', 41 | activation='relu', init='glorot_normal')) 42 | 43 | # upsample to (..., 28, 28) 44 | cnn.add(UpSampling2D(size=(2, 2))) 45 | cnn.add(Convolution2D(128, 5, 5, border_mode='same', 46 | activation='relu', init='glorot_normal')) 47 | 48 | # take a channel axis reduction 49 | cnn.add(Convolution2D(1, 2, 2, border_mode='same', 50 | activation='tanh', init='glorot_normal')) 51 | 52 | # this is the z space commonly refered to in GAN papers 53 | latent = Input(shape=(latent_size, )) 54 | 55 | # this will be our label 56 | image_class = Input(shape=(1,), dtype='int32') 57 | 58 | # 10 classes in MNIST 59 | cls = Flatten()(Embedding(10, latent_size, 60 | init='glorot_normal')(image_class)) 61 | 62 | # hadamard product between z-space and a class conditional embedding 63 | h = merge([latent, cls], mode='mul') 64 | 65 | fake_image = cnn(h) 66 | 67 | return Model(input=[latent, image_class], output=fake_image) 68 | 69 | 70 | def build_discriminator(): 71 | # build a relatively standard conv net, with LeakyReLUs as suggested in 72 | # the reference paper 73 | cnn = Sequential() 74 | 75 | cnn.add(Convolution2D(32, 3, 3, border_mode='same', subsample=(2, 2), 76 | input_shape=(1, 28, 28))) 77 | cnn.add(LeakyReLU()) 78 | cnn.add(Dropout(0.3)) 79 | 80 | cnn.add(Convolution2D(64, 3, 3, border_mode='same', subsample=(1, 1))) 81 | cnn.add(LeakyReLU()) 82 | cnn.add(Dropout(0.3)) 83 | 84 | cnn.add(Convolution2D(128, 3, 3, border_mode='same', subsample=(2, 2))) 85 | cnn.add(LeakyReLU()) 86 | cnn.add(Dropout(0.3)) 87 | 88 | cnn.add(Convolution2D(256, 3, 3, border_mode='same', subsample=(1, 1))) 89 | cnn.add(LeakyReLU()) 90 | cnn.add(Dropout(0.3)) 91 | 92 | cnn.add(Flatten()) 93 | 94 | image = Input(shape=(1, 28, 28)) 95 | 96 | features = cnn(image) 97 | 98 | # thinks the image that is being shown is fake 99 | fake = Dense(1, activation='sigmoid', name='generation')(features) 100 | 101 | return Model(input=image, output=fake) 102 | 103 | 104 | def build_lenet(): 105 | 106 | # initialize the model 107 | model = Sequential() 108 | 109 | # first set of CONV => RELU => POOL 110 | model.add(Convolution2D(20, 5, 5, border_mode="same", 111 | input_shape=(1, 28, 28))) 112 | model.add(Activation("relu")) 113 | model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2))) 114 | 115 | # second set of CONV => RELU => POOL 116 | model.add(Convolution2D(50, 5, 5, border_mode="same")) 117 | model.add(Activation("relu")) 118 | model.add(MaxPooling2D(pool_size=(2, 2), strides=(2, 2))) 119 | 120 | # set of FC => RELU layers 121 | model.add(Flatten()) 122 | model.add(Dense(500)) 123 | model.add(Activation("relu")) 124 | 125 | image = Input(shape=(1, 28, 28)) 126 | 127 | features = model(image) 128 | 129 | # (name=auxiliary) is the class that the discriminator thinks the image 130 | # belongs to. 131 | aux = Dense(10, activation='softmax', name='auxiliary')(features) 132 | 133 | return Model(input=image, output=aux) 134 | 135 | 136 | if __name__ == '__main__': 137 | 138 | # batch and latent size taken from the paper 139 | nb_epochs = 100 140 | batch_size = 100 141 | latent_size = 100 142 | nb_classes = 10 143 | 144 | # Adam parameters suggested in https://arxiv.org/abs/1511.06434 145 | adam_lr = 0.0002 146 | adam_beta_1 = 0.5 147 | adam_beta_2 = 0.5 148 | 149 | # build the discriminator 150 | discriminator = build_discriminator() 151 | opt = SGD(lr=0.01) 152 | discriminator.compile( 153 | optimizer=opt, 154 | loss='binary_crossentropy') 155 | 156 | # build the classifier 157 | lenet = build_lenet() 158 | 159 | lenet.compile(loss="categorical_crossentropy", optimizer='adadelta', 160 | metrics=["accuracy"]) 161 | 162 | # build the generator 163 | generator = build_generator(latent_size) 164 | generator.compile(optimizer=Adam(lr=adam_lr, beta_1=adam_beta_1), 165 | loss='binary_crossentropy') 166 | 167 | latent = Input(shape=(latent_size, )) 168 | image_class = Input(shape=(1,), dtype='int32') 169 | 170 | # get a fake image 171 | fake_img = generator([latent, image_class]) 172 | 173 | # we only want to be able to train generation for the combined model 174 | discriminator.trainable = False 175 | lenet.trainable = False 176 | 177 | fake = discriminator(fake_img) 178 | aux = lenet(fake_img) 179 | 180 | combined = Model(input=[latent, image_class], output=[fake, aux]) 181 | 182 | combined.compile( 183 | optimizer=Adam(lr=adam_lr, beta_1=adam_beta_1), 184 | loss=['binary_crossentropy', 'categorical_crossentropy'] 185 | ) 186 | 187 | # get our mnist data, and force it to be of shape (..., 1, 28, 28) with 188 | # range [-1, 1] 189 | (X_train, y_train), (X_test, y_test) = mnist.load_data() 190 | 191 | X_train = (X_train.astype(np.float32) - 127.5) / 127.5 192 | X_train = np.expand_dims(X_train, axis=1) 193 | 194 | X_test = (X_test.astype(np.float32) - 127.5) / 127.5 195 | X_test = np.expand_dims(X_test, axis=1) 196 | 197 | nb_train, nb_test = X_train.shape[0], X_test.shape[0] 198 | 199 | Y_train = np_utils.to_categorical(y_train, nb_classes) 200 | Y_test = np_utils.to_categorical(y_test, nb_classes) 201 | 202 | train_history = defaultdict(list) 203 | test_history = defaultdict(list) 204 | 205 | # fo = open("accuracy_save.txt", "wb") 206 | 207 | for epoch in range(nb_epochs): 208 | print('Epoch {} of {}'.format(epoch + 1, nb_epochs)) 209 | 210 | nb_batches = int(X_train.shape[0] / batch_size) 211 | progress_bar = Progbar(target=nb_batches) 212 | 213 | epoch_gen_loss = [] 214 | epoch_disc_loss = [] 215 | epoch_lenet_loss =[] 216 | 217 | for index in range(nb_batches): 218 | progress_bar.update(index) 219 | # generate a new batch of noise 220 | noise = np.random.normal(loc=0.0, scale=1, size=(batch_size, latent_size)) 221 | 222 | # get a batch of real images 223 | image_batch = X_train[index * batch_size:(index + 1) * batch_size] 224 | label_batch = y_train[index * batch_size:(index + 1) * batch_size] 225 | 226 | # sample some labels from p_c 227 | sampled_labels = np.random.randint(0, 10, batch_size) 228 | 229 | # generate a batch of fake images, using the generated labels as a 230 | # conditioner. We reshape the sampled labels to be 231 | # (batch_size, 1) so that we can feed them into the embedding 232 | # layer as a length one sequence 233 | generated_images = generator.predict( 234 | [noise, sampled_labels.reshape((-1, 1))], verbose=0) 235 | 236 | X = np.concatenate((image_batch, generated_images)) 237 | y = np.array([1] * batch_size + [0] * batch_size) 238 | aux_y = np.concatenate((label_batch, sampled_labels), axis=0) 239 | aux_y = np_utils.to_categorical(aux_y, 10) 240 | 241 | # see if the discriminator and the classifier can figure itself out... 242 | # discriminator 243 | epoch_disc_loss.append(discriminator.train_on_batch(X, y)) 244 | # and classifier 245 | epoch_lenet_loss.append(lenet.train_on_batch(X, aux_y)) 246 | 247 | # make new noise. we generate 2 * batch size here such that we have 248 | # the generator optimize over an identical number of images as the 249 | # discriminator 250 | noise = np.random.normal(loc=0.0, scale=1, size=(2 * batch_size, latent_size)) 251 | sampled_labels = np.random.randint(0, 10, 2 * batch_size).reshape(-1, 1) 252 | aux_sampled_labels = np_utils.to_categorical(sampled_labels, 10) 253 | 254 | # we want to train the generator to trick the discriminator 255 | # For the generator, we want all the {fake, not-fake} labels to say 256 | # not-fake 257 | trick = np.ones(2 * batch_size) 258 | 259 | epoch_gen_loss.append(combined.train_on_batch( 260 | [noise, sampled_labels], [trick, aux_sampled_labels])) 261 | 262 | print('\nTesting for epoch {}:'.format(epoch + 1)) 263 | 264 | # evaluate the testing loss here 265 | 266 | # generate a new batch of noise 267 | noise = np.random.normal(loc=0.0, scale=1, size=(nb_test, latent_size)) 268 | 269 | # sample some labels from p_c and generate images from them 270 | sampled_labels = np.random.randint(0, 10, nb_test) 271 | generated_images = generator.predict( 272 | [noise, sampled_labels.reshape((-1, 1))], verbose=False) 273 | 274 | X = np.concatenate((X_test, generated_images)) 275 | y = np.array([1] * nb_test + [0] * nb_test) 276 | aux_y = np.concatenate((y_test, sampled_labels), axis=0) 277 | aux_y = np_utils.to_categorical(aux_y, 10) 278 | 279 | # see if the discriminator can figure itself out... 280 | discriminator_test_loss = discriminator.evaluate(X, y, verbose=False) 281 | 282 | discriminator_train_loss = np.mean(np.array(epoch_disc_loss), axis=0) 283 | 284 | lenet_test_loss = lenet.evaluate(X, aux_y, verbose=False) 285 | 286 | lenet_train_loss = np.mean(np.array(epoch_lenet_loss), axis=0) 287 | 288 | # # evaluate the test classification accuracy 289 | # 290 | # (loss, accuracy) = lenet.evaluate(X_test, Y_test, batch_size=batch_size, verbose=0) 291 | # 292 | # # show the accuracy on the testing set 293 | # print("\n [INFO] accuracy: {:.2f}%".format(accuracy * 100)) 294 | # 295 | # fo.write('Test accuracy at the ' + str(epoch+1) + '-th iteration is: ' + str(accuracy) + '\n') 296 | 297 | # make new noise 298 | noise = np.random.normal(loc=0.0, scale=1, size=(2 * nb_test, latent_size)) 299 | sampled_labels = np.random.randint(0, 10, 2 * nb_test).reshape(-1, 1) 300 | aux_sampled_labels = np_utils.to_categorical(sampled_labels, 10) 301 | 302 | trick = np.ones(2 * nb_test) 303 | 304 | generator_test_loss = combined.evaluate( 305 | [noise, sampled_labels], 306 | [trick, aux_sampled_labels], verbose=False) 307 | 308 | generator_train_loss = np.mean(np.array(epoch_gen_loss), axis=0) 309 | 310 | # generate an epoch report on performance 311 | train_history['generator'].append(generator_train_loss) 312 | train_history['discriminator'].append(discriminator_train_loss) 313 | train_history['lenet'].append(lenet_train_loss) 314 | 315 | test_history['generator'].append(generator_test_loss) 316 | test_history['discriminator'].append(discriminator_test_loss) 317 | test_history['lenet'].append(lenet_test_loss) 318 | 319 | 320 | # save weights every epoch 321 | generator.save_weights( 322 | 'params_generator_epoch_{0:03d}.hdf5'.format(epoch), True) 323 | discriminator.save_weights( 324 | 'params_discriminator_epoch_{0:03d}.hdf5'.format(epoch), True) 325 | lenet.save_weights( 326 | 'params_lenet_epoch_{0:03d}.hdf5'.format(epoch), True) 327 | 328 | 329 | pickle.dump({'train': train_history, 'test': test_history}, 330 | open('acgan-history.pkl', 'wb')) 331 | 332 | # evaluate the test classification accuracy 333 | 334 | (loss, accuracy) = lenet.evaluate(X_test, Y_test, 335 | batch_size=batch_size, verbose=0) 336 | 337 | # show the accuracy on the testing set 338 | print("\n [INFO] Test accuracy: {:.2f}%".format(accuracy * 100)) 339 | 340 | # fo.close() 341 | 342 | -------------------------------------------------------------------------------- /da_resnetpa_mnist.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | from __future__ import print_function 4 | 5 | from collections import defaultdict 6 | import cPickle as pickle 7 | from PIL import Image 8 | 9 | from six.moves import range 10 | 11 | import keras.backend as K 12 | from keras.datasets import mnist 13 | from keras.layers import Input, Dense, Reshape, Flatten, Embedding, merge, Dropout 14 | from keras.layers.core import Activation 15 | from keras.layers.advanced_activations import LeakyReLU 16 | from keras.layers.convolutional import UpSampling2D, Convolution2D, MaxPooling2D 17 | from keras.models import Sequential, Model 18 | from keras.optimizers import Adam, SGD, Adagrad, Adadelta, Adamax, Nadam 19 | from keras.utils.generic_utils import Progbar 20 | import numpy as np 21 | 22 | from keras.datasets import mnist 23 | from keras.preprocessing.image import ImageDataGenerator 24 | from keras.utils import np_utils 25 | from keras.callbacks import ReduceLROnPlateau, CSVLogger, EarlyStopping 26 | 27 | from keras.models import Model 28 | from keras.layers import Input, merge 29 | from keras.layers import Dense, Activation, Flatten, BatchNormalization 30 | from keras.layers import Convolution2D, MaxPooling2D, ZeroPadding2D, AveragePooling2D 31 | from keras.regularizers import l2 32 | 33 | import sys 34 | sys.setrecursionlimit(2 ** 25) 35 | 36 | from keras.optimizers import SGD 37 | from keras.utils import np_utils 38 | 39 | np.random.seed(1337) 40 | 41 | K.set_image_dim_ordering('th') 42 | 43 | 44 | def build_generator(latent_size): 45 | # we will map a pair of (z, L), where z is a latent vector and L is a 46 | # label drawn from P_c, to image space (..., 1, 28, 28) 47 | cnn = Sequential() 48 | 49 | cnn.add(Dense(1024, input_dim=latent_size, activation='relu')) 50 | cnn.add(Dense(128 * 7 * 7, activation='relu')) 51 | cnn.add(Reshape((128, 7, 7))) 52 | 53 | # upsample to (..., 14, 14) 54 | cnn.add(UpSampling2D(size=(2, 2))) 55 | cnn.add(Convolution2D(256, 5, 5, border_mode='same', 56 | activation='relu', init='glorot_normal')) 57 | 58 | # upsample to (..., 28, 28) 59 | cnn.add(UpSampling2D(size=(2, 2))) 60 | cnn.add(Convolution2D(128, 5, 5, border_mode='same', 61 | activation='relu', init='glorot_normal')) 62 | 63 | # take a channel axis reduction 64 | cnn.add(Convolution2D(1, 2, 2, border_mode='same', 65 | activation='tanh', init='glorot_normal')) 66 | 67 | # this is the z space commonly refered to in GAN papers 68 | latent = Input(shape=(latent_size, )) 69 | 70 | # this will be our label 71 | image_class = Input(shape=(1,), dtype='int32') 72 | 73 | # 10 classes in MNIST 74 | cls = Flatten()(Embedding(10, latent_size, 75 | init='glorot_normal')(image_class)) 76 | 77 | # hadamard product between z-space and a class conditional embedding 78 | h = merge([latent, cls], mode='mul') 79 | 80 | fake_image = cnn(h) 81 | 82 | return Model(input=[latent, image_class], output=fake_image) 83 | 84 | 85 | def build_discriminator(): 86 | # build a relatively standard conv net, with LeakyReLUs as suggested in 87 | # the reference paper 88 | cnn = Sequential() 89 | 90 | cnn.add(Convolution2D(32, 3, 3, border_mode='same', subsample=(2, 2), 91 | input_shape=(1, 28, 28))) 92 | cnn.add(LeakyReLU()) 93 | cnn.add(Dropout(0.3)) 94 | 95 | cnn.add(Convolution2D(64, 3, 3, border_mode='same', subsample=(1, 1))) 96 | cnn.add(LeakyReLU()) 97 | cnn.add(Dropout(0.3)) 98 | 99 | cnn.add(Convolution2D(128, 3, 3, border_mode='same', subsample=(2, 2))) 100 | cnn.add(LeakyReLU()) 101 | cnn.add(Dropout(0.3)) 102 | 103 | cnn.add(Convolution2D(256, 3, 3, border_mode='same', subsample=(1, 1))) 104 | cnn.add(LeakyReLU()) 105 | cnn.add(Dropout(0.3)) 106 | 107 | cnn.add(Flatten()) 108 | 109 | image = Input(shape=(1, 28, 28)) 110 | 111 | features = cnn(image) 112 | 113 | # output (name=generation) is whether or not the discriminator 114 | # thinks the image that is being shown is fake. 115 | fake = Dense(1, activation='sigmoid', name='generation')(features) 116 | 117 | return Model(input=image, output=fake) 118 | 119 | 120 | def rnpa_bottleneck_layer(input_tensor, nb_filters, filter_sz, stage, 121 | init='glorot_normal', reg=0.0, use_shortcuts=True): 122 | nb_in_filters, nb_bottleneck_filters = nb_filters 123 | 124 | bn_name = 'bn' + str(stage) 125 | conv_name = 'conv' + str(stage) 126 | relu_name = 'relu' + str(stage) 127 | merge_name = '+' + str(stage) 128 | 129 | # batchnorm-relu-conv, from nb_in_filters to nb_bottleneck_filters via 1x1 conv 130 | if stage > 1: # first activation is just after conv1 131 | x = BatchNormalization(axis=1, name=bn_name + 'a')(input_tensor) 132 | x = Activation('relu', name=relu_name + 'a')(x) 133 | else: 134 | x = input_tensor 135 | 136 | x = Convolution2D( 137 | nb_bottleneck_filters, 1, 1, 138 | init=init, 139 | W_regularizer=l2(reg), 140 | bias=False, 141 | name=conv_name + 'a' 142 | )(x) 143 | 144 | # batchnorm-relu-conv, from nb_bottleneck_filters to nb_bottleneck_filters via FxF conv 145 | x = BatchNormalization(axis=1, name=bn_name + 'b')(x) 146 | x = Activation('relu', name=relu_name + 'b')(x) 147 | x = Convolution2D( 148 | nb_bottleneck_filters, filter_sz, filter_sz, 149 | border_mode='same', 150 | init=init, 151 | W_regularizer=l2(reg), 152 | bias=False, 153 | name=conv_name + 'b' 154 | )(x) 155 | 156 | # batchnorm-relu-conv, from nb_in_filters to nb_bottleneck_filters via 1x1 conv 157 | x = BatchNormalization(axis=1, name=bn_name + 'c')(x) 158 | x = Activation('relu', name=relu_name + 'c')(x) 159 | x = Convolution2D(nb_in_filters, 1, 1, 160 | init=init, W_regularizer=l2(reg), 161 | name=conv_name + 'c' 162 | )(x) 163 | 164 | # merge 165 | if use_shortcuts: 166 | x = merge([x, input_tensor], mode='sum', name=merge_name) 167 | 168 | return x 169 | 170 | 171 | def ResNetPreAct(input_shape=(1, 28, 28), nb_classes=10, 172 | layer1_params=(5, 64, 2), 173 | res_layer_params=(3, 16, 3), 174 | final_layer_params=None, 175 | init='glorot_normal', reg=0.0, use_shortcuts=True 176 | ): 177 | """ 178 | Source: https://gist.github.com/JefferyRPrice/c1ecc3d67068c8d9b3120475baba1d7e 179 | 180 | Return a new Residual Network using full pre-activation based on the work in 181 | "Identity Mappings in Deep Residual Networks" by He et al 182 | http://arxiv.org/abs/1603.05027 183 | 184 | The following network definition achieves 92.0% accuracy on CIFAR-10 test using 185 | `adam` optimizer, 100 epochs, learning rate schedule of 1e.-3 / 1.e-4 / 1.e-5 with 186 | transitions at 50 and 75 epochs: 187 | ResNetPreAct(layer1_params=(3,128,2),res_layer_params=(3,32,25),reg=reg) 188 | 189 | Removed max pooling and using just stride in first convolutional layer. Motivated by 190 | "Striving for Simplicity: The All Convolutional Net" by Springenberg et al 191 | (https://arxiv.org/abs/1412.6806) and my own experiments where I observed about 0.5% 192 | improvement by replacing the max pool operations in the VGG-like cifar10_cnn.py example 193 | in the Keras distribution. 194 | 195 | Parameters 196 | ---------- 197 | input_dim : tuple of (C, H, W) 198 | 199 | nb_classes: number of scores to produce from final affine layer (input to softmax) 200 | 201 | layer1_params: tuple of (filter size, num filters, stride for conv) 202 | 203 | res_layer_params: tuple of (filter size, num res layer filters, num res stages) 204 | 205 | final_layer_params: None or tuple of (filter size, num filters, stride for conv) 206 | 207 | init: type of weight initialization to use 208 | 209 | reg: L2 weight regularization (or weight decay) 210 | 211 | use_shortcuts: to evaluate difference between residual and non-residual network 212 | """ 213 | 214 | sz_L1_filters, nb_L1_filters, stride_L1 = layer1_params 215 | sz_res_filters, nb_res_filters, nb_res_stages = res_layer_params 216 | 217 | use_final_conv = (final_layer_params is not None) 218 | if use_final_conv: 219 | sz_fin_filters, nb_fin_filters, stride_fin = final_layer_params 220 | sz_pool_fin = input_shape[1] / (stride_L1 * stride_fin) 221 | else: 222 | sz_pool_fin = input_shape[1] / (stride_L1) 223 | 224 | img_input = Input(shape=input_shape, name='cifar') 225 | 226 | x = Convolution2D( 227 | nb_L1_filters, sz_L1_filters, sz_L1_filters, 228 | border_mode='same', 229 | subsample=(stride_L1, stride_L1), 230 | init=init, 231 | W_regularizer=l2(reg), 232 | bias=False, 233 | name='conv0' 234 | )(img_input) 235 | x = BatchNormalization(axis=1, name='bn0')(x) 236 | x = Activation('relu', name='relu0')(x) 237 | 238 | for stage in range(1, nb_res_stages + 1): 239 | x = rnpa_bottleneck_layer( 240 | x, 241 | (nb_L1_filters, nb_res_filters), 242 | sz_res_filters, 243 | stage, 244 | init=init, 245 | reg=reg, 246 | use_shortcuts=use_shortcuts 247 | ) 248 | 249 | x = BatchNormalization(axis=1, name='bnF')(x) 250 | x = Activation('relu', name='reluF')(x) 251 | 252 | if use_final_conv: 253 | x = Convolution2D( 254 | nb_fin_filters, sz_fin_filters, sz_fin_filters, 255 | border_mode='same', 256 | subsample=(stride_fin, stride_fin), 257 | init=init, 258 | W_regularizer=l2(reg), 259 | name='convF' 260 | )(x) 261 | 262 | x = AveragePooling2D((sz_pool_fin, sz_pool_fin), name='avg_pool')(x) 263 | 264 | x = Flatten(name='flat')(x) 265 | x = Dense(nb_classes, activation='softmax', name='fc10')(x) 266 | 267 | return Model(img_input, x, name='rnpa') 268 | 269 | if __name__ == '__main__': 270 | 271 | # batch and latent size taken from the paper 272 | nb_epochs = 100 273 | batch_size = 100 274 | latent_size = 100 275 | nb_classes = 10 276 | # Adam parameters suggested in https://arxiv.org/abs/1511.06434 277 | adam_lr = 0.0002 278 | adam_beta_1 = 0.5 279 | 280 | # build the discriminator 281 | discriminator = build_discriminator() 282 | opt = SGD(lr=0.01) 283 | discriminator.compile( 284 | optimizer=opt, 285 | loss='binary_crossentropy') 286 | 287 | # build the classifier 288 | resnet = ResNetPreAct(layer1_params=(3, 128, 2), res_layer_params=(3, 32, 25), reg=0.0) 289 | 290 | resnet.compile(loss="categorical_crossentropy", optimizer='adadelta', metrics=["accuracy"]) 291 | 292 | # build the generator 293 | generator = build_generator(latent_size) 294 | generator.compile(optimizer=Adam(lr=adam_lr, beta_1=adam_beta_1), 295 | loss='binary_crossentropy') 296 | 297 | latent = Input(shape=(latent_size, )) 298 | image_class = Input(shape=(1,), dtype='int32') 299 | 300 | # get a fake image 301 | fake_img = generator([latent, image_class]) 302 | 303 | # we only want to be able to train generation for the combined model 304 | discriminator.trainable = False 305 | resnet.trainable = False 306 | 307 | fake = discriminator(fake_img) 308 | aux = resnet(fake_img) 309 | 310 | combined = Model(input=[latent, image_class], output=[fake, aux]) 311 | 312 | combined.compile( 313 | optimizer=Adam(lr=adam_lr, beta_1=adam_beta_1), 314 | loss=['binary_crossentropy', 'categorical_crossentropy'] 315 | ) 316 | 317 | # get our mnist data, and force it to be of shape (..., 1, 28, 28) with 318 | # range [-1, 1] 319 | 320 | # The data, shuffled and split between train and test sets: 321 | (X_train, y_train), (X_test, y_test) = mnist.load_data() 322 | 323 | X_train = (X_train.astype(np.float32) - 127.5) / 127.5 324 | X_train = np.expand_dims(X_train, axis=1) 325 | 326 | X_test = (X_test.astype(np.float32) - 127.5) / 127.5 327 | X_test = np.expand_dims(X_test, axis=1) 328 | 329 | Y_train = np_utils.to_categorical(y_train, nb_classes) 330 | Y_test = np_utils.to_categorical(y_test, nb_classes) 331 | nb_train, nb_test = X_train.shape[0], X_test.shape[0] 332 | 333 | train_history = defaultdict(list) 334 | test_history = defaultdict(list) 335 | 336 | # fo = open("accuracy_save.txt", "wb") 337 | 338 | for epoch in range(nb_epochs): 339 | print('Epoch {} of {}'.format(epoch + 1, nb_epochs)) 340 | 341 | nb_batches = int(X_train.shape[0] / batch_size) 342 | progress_bar = Progbar(target=nb_batches) 343 | 344 | epoch_gen_loss = [] 345 | epoch_disc_loss = [] 346 | epoch_resnet_loss =[] 347 | 348 | for index in range(nb_batches): 349 | progress_bar.update(index) 350 | 351 | # index = 1 352 | # generate a new batch of noise 353 | noise = np.random.normal(loc=0.0, scale=1, size=(batch_size, latent_size)) 354 | 355 | # get a batch of real images 356 | image_batch = X_train[index * batch_size:(index + 1) * batch_size] 357 | label_batch = y_train[index * batch_size:(index + 1) * batch_size] 358 | 359 | # sample some labels from p_c 360 | sampled_labels = np.random.randint(0, 10, batch_size) 361 | 362 | # generate a batch of fake images, using the generated labels as a 363 | # conditioner. We reshape the sampled labels to be 364 | # (batch_size, 1) so that we can feed them into the embedding 365 | # layer as a length one sequence 366 | generated_images = generator.predict( 367 | [noise, sampled_labels.reshape((-1, 1))], verbose=0) 368 | 369 | # print(label_batch.shape) 370 | # print(sampled_labels.shape) 371 | 372 | X = np.concatenate((image_batch, generated_images)) 373 | y = np.array([1] * batch_size + [0] * batch_size) 374 | aux_y = np.concatenate((label_batch, sampled_labels), axis=0) 375 | aux_y = np_utils.to_categorical(aux_y, 10) 376 | 377 | # see if the discriminator can figure itself out... 378 | epoch_disc_loss.append(discriminator.train_on_batch(X, y)) 379 | # 380 | epoch_resnet_loss.append(resnet.train_on_batch(X, aux_y)) 381 | 382 | # make new noise. we generate 2 * batch size here such that we have 383 | # the generator optimize over an identical number of images as the 384 | # discriminator 385 | noise = np.random.normal(loc=0.0, scale=1, size=(2 * batch_size, latent_size)) 386 | sampled_labels = np.random.randint(0, 10, 2 * batch_size).reshape(-1, 1) 387 | aux_sampled_labels = np_utils.to_categorical(sampled_labels, 10) 388 | 389 | # we want to train the generator to trick the discriminator 390 | # For the generator, we want all the {fake, not-fake} labels to say 391 | # not-fake 392 | trick = np.ones(2 * batch_size) 393 | 394 | epoch_gen_loss.append(combined.train_on_batch( 395 | [noise, sampled_labels], [trick, aux_sampled_labels])) 396 | 397 | print('\nTesting for epoch {}:'.format(epoch + 1)) 398 | 399 | # evaluate the testing loss here 400 | 401 | # generate a new batch of noise 402 | noise = np.random.normal(loc=0.0, scale=1, size=(nb_test, latent_size)) 403 | 404 | # sample some labels from p_c and generate images from them 405 | sampled_labels = np.random.randint(0, 10, nb_test) 406 | generated_images = generator.predict( 407 | [noise, sampled_labels.reshape((-1, 1))], verbose=False) 408 | 409 | X = np.concatenate((X_test, generated_images)) 410 | y = np.array([1] * nb_test + [0] * nb_test) 411 | 412 | aux_y = np.concatenate((y_test, sampled_labels), axis=0) 413 | aux_y = np_utils.to_categorical(aux_y, 10) 414 | 415 | # see if the discriminator can figure itself out... 416 | discriminator_test_loss = discriminator.evaluate(X, y, verbose=False) 417 | 418 | discriminator_train_loss = np.mean(np.array(epoch_disc_loss), axis=0) 419 | 420 | resnet_test_loss = resnet.evaluate(X, aux_y, verbose=False) 421 | 422 | resnet_train_loss = np.mean(np.array(epoch_resnet_loss), axis=0) 423 | 424 | # # evaluate the test classification accuracy 425 | # 426 | # (loss, accuracy) = resnet.evaluate(X_test, Y_test, verbose=0) 427 | # 428 | # # show the accuracy on the testing set 429 | # print("[INFO] accuracy: {:.2f}%".format(accuracy * 100)) 430 | # 431 | # fo.write('Test accuracy at the ' + str(epoch+1) + '-th iteration is: ' + str(accuracy) + '\n') 432 | 433 | 434 | # make new noise 435 | noise = np.random.normal(loc=0.0, scale=1, size=(2 * nb_test, latent_size)) 436 | sampled_labels = np.random.randint(0, 10, 2 * nb_test).reshape(-1, 1) 437 | aux_sampled_labels = np_utils.to_categorical(sampled_labels, 10) 438 | 439 | trick = np.ones(2 * nb_test) 440 | 441 | generator_test_loss = combined.evaluate( 442 | [noise, sampled_labels], 443 | [trick, aux_sampled_labels], verbose=False) 444 | 445 | generator_train_loss = np.mean(np.array(epoch_gen_loss), axis=0) 446 | 447 | # generate an epoch report on performance 448 | train_history['generator'].append(generator_train_loss) 449 | train_history['discriminator'].append(discriminator_train_loss) 450 | train_history['resnet'].append(resnet_train_loss) 451 | 452 | test_history['generator'].append(generator_test_loss) 453 | test_history['discriminator'].append(discriminator_test_loss) 454 | test_history['resnet'].append(resnet_test_loss) 455 | 456 | # save weights every epoch 457 | generator.save_weights( 458 | 'params_generator_epoch_{0:03d}.hdf5'.format(epoch), True) 459 | # discriminator.save_weights( 460 | # 'params_discriminator_epoch_{0:03d}.hdf5'.format(epoch), True) 461 | resnet.save_weights( 462 | 'params_resnet_epoch_{0:03d}.hdf5'.format(epoch), True) 463 | 464 | 465 | 466 | pickle.dump({'train': train_history, 'test': test_history}, 467 | open('acgan-history.pkl', 'wb')) 468 | 469 | # evaluate the test classification accuracy 470 | (loss, accuracy) = resnet.evaluate(X_test, Y_test, 471 | batch_size=batch_size, verbose=0) 472 | 473 | # show the accuracy on the testing set 474 | print("\n [INFO] Test accuracy: {:.2f}%".format(accuracy * 100)) 475 | 476 | # fo.close() 477 | 478 | 479 | --------------------------------------------------------------------------------