├── README.md ├── utils.py └── demo_mnist.py /README.md: -------------------------------------------------------------------------------- 1 | # BGADL 2 | # Requirements: Python 2.7 & Pytorch 0.3 3 | 4 | # To execute the code: 5 | # $ python demo_mnist.py VAR_RATIOS 6 | -------------------------------------------------------------------------------- /utils.py: -------------------------------------------------------------------------------- 1 | import os, gzip, torch 2 | import torch.nn as nn 3 | import numpy as np 4 | import scipy.misc 5 | import imageio 6 | import matplotlib.pyplot as plt 7 | from torchvision import datasets, transforms 8 | import torch.nn.functional as F 9 | from torch.autograd import Variable 10 | 11 | 12 | def print_network(net): 13 | num_params = 0 14 | for param in net.parameters(): 15 | num_params += param.numel() 16 | print(net) 17 | print('Total number of parameters: %d' % num_params) 18 | 19 | def save_images(images, size, image_path): 20 | return imsave(images, size, image_path) 21 | 22 | def imsave(images, size, path): 23 | image = np.squeeze(merge(images, size)) 24 | return scipy.misc.imsave(path, image) 25 | 26 | def merge(images, size): 27 | h, w = images.shape[1], images.shape[2] 28 | if (images.shape[3] in (3,4)): 29 | c = images.shape[3] 30 | img = np.zeros((h * size[0], w * size[1], c)) 31 | for idx, image in enumerate(images): 32 | i = idx % size[1] 33 | j = idx // size[1] 34 | img[j * h:j * h + h, i * w:i * w + w, :] = image 35 | return img 36 | elif images.shape[3]==1: 37 | img = np.zeros((h * size[0], w * size[1])) 38 | for idx, image in enumerate(images): 39 | i = idx % size[1] 40 | j = idx // size[1] 41 | img[j * h:j * h + h, i * w:i * w + w] = image[:,:,0] 42 | return img 43 | else: 44 | raise ValueError('in merge(images,size) images parameter ''must have dimensions: HxW or HxWx3 or HxWx4') 45 | 46 | 47 | def generate_animation(path, num): 48 | images = [] 49 | for e in range(num): 50 | img_name = path + '_epoch%03d' % (e+1) + '.png' 51 | images.append(imageio.imread(img_name)) 52 | imageio.mimsave(path + '_generate_animation.gif', images, fps=5) 53 | 54 | 55 | def loss_plot(hist, path = 'Train_hist.png', model_name = ''): 56 | x = range(len(hist['D_loss'])) 57 | 58 | y1 = hist['D_loss'] 59 | y2 = hist['G_loss'] 60 | y3 = hist['C_loss'] 61 | y4 = hist['E_loss'] 62 | 63 | plt.plot(x, y1, label='D_loss') 64 | plt.plot(x, y2, label='G_loss') 65 | plt.plot(x, y3, label='C_loss') 66 | plt.plot(x, y4, label='E_loss') 67 | 68 | plt.xlabel('Iter') 69 | plt.ylabel('Loss') 70 | 71 | plt.legend(loc=4) 72 | plt.grid(True) 73 | plt.tight_layout() 74 | 75 | path = os.path.join(path, model_name + '_loss.png') 76 | 77 | plt.savefig(path) 78 | 79 | plt.close() 80 | 81 | def initialize_weights(net): 82 | for m in net.modules(): 83 | if isinstance(m, nn.Conv2d): 84 | m.weight.data.normal_(0, 0.02) 85 | m.bias.data.zero_() 86 | elif isinstance(m, nn.ConvTranspose2d): 87 | m.weight.data.normal_(0, 0.02) 88 | m.bias.data.zero_() 89 | elif isinstance(m, nn.Linear): 90 | m.weight.data.normal_(0, 0.02) 91 | m.bias.data.zero_() -------------------------------------------------------------------------------- /demo_mnist.py: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # Pytorch implementation of the paper Bayesian Generative Active Deep Learning based on a VAE-ACGAN model. 3 | # We thank for the help from 4 | # https://github.com/znxlwm/pytorch-generative-model-collections 5 | # and 6 | # https://github.com/fahadm/Bayesian-Active-Learning-Pytorch 7 | 8 | # Requirements: Python 2.7 & Pytorch 0.3 9 | 10 | # To execute the code: 11 | # python demo_mnist.py VAR_RATIOS 12 | 13 | 14 | from __future__ import print_function 15 | import sys 16 | import argparse, os 17 | import numpy as np 18 | import random 19 | import torch 20 | import torch.nn as nn 21 | import torch.nn.functional as F 22 | import torch.optim as optim 23 | from torchvision import datasets, transforms, models 24 | from scipy.misc import imread 25 | 26 | from scipy import linalg 27 | from torch.autograd import Variable 28 | from torch.nn.functional import adaptive_avg_pool2d 29 | 30 | import torch.utils.data as data_utils 31 | from scipy.stats import mode 32 | import utils, time, os, pickle 33 | from torch.optim import lr_scheduler 34 | 35 | cuda = True 36 | 37 | batch_size = 100 38 | input_dim, input_height, input_width = 1, 28, 28 39 | nb_classes = 10 40 | 41 | nb_filters = 32 42 | nb_pool = 2 43 | nb_conv = 4 44 | 45 | lr = 0.01 46 | momentum = 0.9 47 | log_interval = 100 48 | epochs = 50 49 | 50 | kwargs = {'num_workers': 1, 'pin_memory': True} if cuda else {} 51 | 52 | # Load training data (60,000 samples) 53 | train_loader_all = torch.utils.data.DataLoader( 54 | datasets.MNIST('../data', train=True, download=True, 55 | transform=transforms.Compose([ 56 | transforms.ToTensor(), 57 | transforms.Normalize((0.1307,), (0.3081,)) 58 | ])), 59 | batch_size=batch_size, shuffle=True, **kwargs) 60 | 61 | # Load test data (10,000 samples) 62 | test_loader = torch.utils.data.DataLoader( 63 | datasets.MNIST('../data', train=False, transform=transforms.Compose([ 64 | transforms.ToTensor(), 65 | transforms.Normalize((0.1307,), (0.3081,)) 66 | ])), 67 | batch_size=batch_size, shuffle=True, **kwargs) 68 | 69 | 70 | # The whole training data set is split into original and pool data sets 71 | 72 | def prepare_data(): 73 | train_data_all = train_loader_all.dataset.train_data 74 | train_target_all = train_loader_all.dataset.train_labels 75 | shuffler_idx = torch.randperm(train_target_all.size(0)) 76 | train_data_all = train_data_all[shuffler_idx] 77 | train_target_all = train_target_all[shuffler_idx] 78 | 79 | test_data = test_loader.dataset.test_data 80 | test_target = test_loader.dataset.test_labels 81 | 82 | 83 | train_data = [] 84 | train_target = [] 85 | 86 | train_data_pool = train_data_all[15000:60000, :, :] 87 | train_target_pool = train_target_all[15000:60000] 88 | 89 | # train_data_all = train_data_all[0:10000,:,:] 90 | # train_target_all = train_target_all[0:10000] 91 | 92 | train_data_pool.unsqueeze_(1) 93 | train_data_all.unsqueeze_(1) 94 | test_data.unsqueeze_(1) 95 | 96 | train_data_pool = train_data_pool.float() 97 | 98 | train_data_all = train_data_all.float() 99 | 100 | test_data = test_data.float() 101 | 102 | for i in range(0, 10): 103 | arr = np.array(np.where(train_target_all.numpy() == i)) 104 | idx = np.random.permutation(arr) 105 | data_i = train_data_all.numpy()[idx[0][0:10], :, :, :] # pick the first 10 elements of the shuffled idx array 106 | target_i = train_target_all.numpy()[idx[0][0:10]] 107 | train_data.append(data_i) 108 | train_target.append(target_i) 109 | 110 | train_data = np.concatenate(train_data, axis=0).astype("float32") 111 | train_target = np.concatenate(train_target, axis=0) 112 | 113 | return torch.from_numpy(train_data / 255).float(), torch.from_numpy(train_target), \ 114 | train_data_pool / 255, train_target_pool, \ 115 | test_data / 255, test_target 116 | 117 | train_data, train_target, pool_data, pool_target, test_data, test_target = prepare_data() 118 | 119 | 120 | train_loader = None 121 | 122 | 123 | # Initialize the training data 124 | 125 | def initialize_train_set(): 126 | # Training Data set 127 | global train_loader 128 | global train_data 129 | train = data_utils.TensorDataset(train_data, train_target) 130 | train_loader = data_utils.DataLoader(train, batch_size=batch_size, shuffle=True) 131 | 132 | initialize_train_set() 133 | 134 | 135 | # Build a classifier 136 | 137 | class Net_Correct(nn.Module): 138 | def __init__(self, input_shape=(input_dim, input_width, input_height)): 139 | super(Net_Correct, self).__init__() 140 | 141 | self.conv = nn.Sequential( 142 | nn.Conv2d(input_dim, nb_filters, kernel_size=nb_conv), 143 | nn.ReLU(), 144 | nn.Conv2d(nb_filters, nb_filters, kernel_size=nb_conv), 145 | nn.ReLU(), 146 | nn.MaxPool2d(nb_pool), 147 | nn.Dropout2d(0.25), 148 | ) 149 | 150 | input_size = self._get_conv_output_size(input_shape) 151 | 152 | self.dense = nn.Sequential(nn.Linear(input_size, 128)) 153 | 154 | self.fc = nn.Sequential( 155 | nn.ReLU(), 156 | nn.Dropout(0.5), 157 | nn.Linear(128, nb_classes) 158 | ) 159 | 160 | def _get_conv_output_size(self, shape): 161 | bs = batch_size 162 | input = Variable(torch.rand(bs, *shape)) 163 | output_feat = self.conv(input) 164 | n_size = output_feat.data.view(bs, -1).size(1) 165 | return n_size 166 | 167 | def forward(self, x): 168 | x = self.conv(x) 169 | x = x.view(x.size(0), -1) 170 | x = self.fc(self.dense(x)) 171 | return x 172 | 173 | 174 | class lenet(nn.Module): 175 | def __init__(self): 176 | super(lenet, self).__init__() 177 | self.input_height = input_height 178 | self.input_width = input_width 179 | self.input_dim = input_dim 180 | self.class_num = 10 181 | 182 | self.conv1 = nn.Conv2d(self.input_dim, 6, (5, 5), padding=2) 183 | self.conv2 = nn.Conv2d(6, 16, (5, 5)) 184 | self.fc1 = nn.Linear(16 * 5 * 5, 120) 185 | self.fc2 = nn.Linear(120, 84) 186 | self.fc3 = nn.Linear(84, self.class_num) 187 | 188 | def forward(self, x): 189 | x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2)) 190 | x = F.max_pool2d(F.relu(self.conv2(x)), (2, 2)) 191 | x = x.view(-1, self.num_flat_features(x)) 192 | x = F.relu(self.fc1(x)) 193 | x = F.relu(self.fc2(x)) 194 | x = self.fc3(x) 195 | return x 196 | 197 | def num_flat_features(self, x): 198 | size = x.size()[1:] 199 | num_features = 1 200 | for s in size: 201 | num_features *= s 202 | return num_features 203 | 204 | 205 | model = None 206 | optimizer = None 207 | model_scheduler = None 208 | 209 | 210 | def train(epoch): 211 | model.train() 212 | loss = None 213 | for batch_idx, (data, target) in enumerate(train_loader): 214 | if cuda: 215 | data, target = data.cuda(), target.cuda() 216 | data, target = Variable(data), Variable(target) 217 | 218 | optimizer.zero_grad() 219 | output = model(data) 220 | criterion = nn.CrossEntropyLoss() 221 | 222 | loss = criterion(output, target) 223 | loss.backward() 224 | optimizer.step() 225 | 226 | if epoch or epochs: 227 | print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format( 228 | epoch, batch_idx * len(data), len(train_loader.dataset), 229 | 100. * batch_idx / len(train_loader), loss.item())) 230 | 231 | return loss.item() 232 | 233 | 234 | def evaluate(input_data, stochastic=False, predict_classes=False): 235 | 236 | if stochastic: 237 | model.train() # we use dropout at test time 238 | else: 239 | model.eval() 240 | 241 | predictions = [] 242 | test_loss = 0 243 | correct = 0 244 | for data, target in input_data: 245 | if cuda: 246 | data, target = data.cuda(), target.cuda() 247 | data, target = Variable(data, volatile=True), Variable(target) 248 | 249 | output = model(data) 250 | 251 | softmaxed = F.softmax(output.cpu()) 252 | 253 | if predict_classes: 254 | predictions.extend(np.argmax(softmaxed.data.numpy(), axis=-1)) 255 | else: 256 | predictions.extend(softmaxed.data.numpy()) 257 | criterion = nn.CrossEntropyLoss() 258 | 259 | loss = criterion(output, target) 260 | 261 | test_loss += loss.item() 262 | pred = output.data.max(1)[1] # get the index of the max log-probability 263 | pred = pred.eq(target.data).cpu().data.float() 264 | correct += pred.sum() 265 | return test_loss, correct, predictions 266 | 267 | 268 | best_acc = 0 269 | 270 | def test(epoch): 271 | global train_loader 272 | global train_data 273 | global best_acc 274 | test = data_utils.TensorDataset(test_data, test_target) 275 | test_loader = data_utils.DataLoader(test, batch_size=batch_size, shuffle=True) 276 | 277 | test_loss = 0 278 | correct = 0 279 | 280 | test_loss, correct, _ = evaluate(test_loader, stochastic=False) 281 | 282 | test_loss /= len(test_loader) # loss function already averages over batch size 283 | test_acc = 100. * correct / len(test_loader.dataset) 284 | 285 | if epoch or epochs: 286 | print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.2f}%)\n'.format( 287 | test_loss, correct, len(test_loader.dataset), test_acc)) 288 | 289 | if test_acc > best_acc: 290 | print('Saving...') 291 | state = { 292 | 'net': model.state_dict(), 293 | 'acc': test_acc, 294 | 'epoch': epoch, 295 | } 296 | best_acc = test_acc 297 | 298 | return test_loss, best_acc 299 | 300 | 301 | def getAcquisitionFunction(name): 302 | if name == "BALD": 303 | return bald_acquisition 304 | elif name == "VAR_RATIOS": 305 | return variation_ratios_acquisition 306 | elif name == "MAX_ENTROPY": 307 | return max_entroy_acquisition 308 | elif name == "MEAN_STD": 309 | return mean_std_acquisition 310 | else: 311 | print ("ACQUSITION FUNCTION NOT IMPLEMENTED") 312 | sys.exit(-1) 313 | 314 | 315 | def acquire_points(argument, random_sample=False): 316 | global train_data 317 | global train_target 318 | global model 319 | global optimizer 320 | global model_scheduler 321 | 322 | acquisition_iterations = 100 323 | dropout_iterations = 20 # [50, 100, 500, 1000] 324 | Queries = 100 325 | nb_samples = 100 326 | pool_all = np.zeros(shape=(1)) 327 | 328 | if argument == "RANDOM": 329 | random_sample = True 330 | else: 331 | acquisition_function = getAcquisitionFunction(argument) 332 | 333 | test_acc_hist = [] 334 | 335 | for i in range(acquisition_iterations): 336 | pool_subset = 2000 337 | if random_sample: 338 | pool_subset = Queries 339 | print('---------------------------------') 340 | print ("Acquisition Iteration " + str(i)) 341 | pool_subset_dropout = torch.from_numpy(np.asarray(random.sample(range(0, pool_data.size(0)), pool_subset))) 342 | pool_data_dropout = pool_data[pool_subset_dropout] 343 | pool_target_dropout = pool_target[pool_subset_dropout] 344 | if random_sample is True: 345 | pool_index = np.array(range(0, Queries)) 346 | 347 | else: 348 | points_of_interest = acquisition_function(dropout_iterations, pool_data_dropout, pool_target_dropout) 349 | pool_index = points_of_interest.argsort()[-Queries:][::-1] 350 | 351 | pool_index = torch.from_numpy(np.flip(pool_index, axis=0).copy()) 352 | 353 | pool_all = np.append(pool_all, pool_index) 354 | 355 | pooled_data = pool_data_dropout[pool_index] # LongTensor 356 | pooled_target = pool_target_dropout[pool_index] # LongTensor 357 | 358 | train_data = torch.cat((train_data, pooled_data), 0) 359 | train_target = torch.cat((train_target, pooled_target), 0) 360 | 361 | #remove from pool set 362 | remove_pooled_points(pool_subset, pool_data_dropout, pool_target_dropout, pool_index) 363 | 364 | # Train the ACGAN here 365 | 366 | gan = VAEACGAN() 367 | test_acc = gan.train() 368 | test_acc_hist.append(test_acc) 369 | # gan.visualize_results(epochs) 370 | 371 | np.save("./test_acc_VAEACGAN_MNIST" + argument + ".npy", np.asarray(test_acc_hist)) 372 | 373 | 374 | def bald_acquisition(dropout_iterations, pool_data_dropout, pool_target_dropout): 375 | print ("BALD ACQUISITION FUNCTION") 376 | score_all = np.zeros(shape=(pool_data_dropout.size(0), nb_classes)) 377 | all_entropy = np.zeros(shape=pool_data_dropout.size(0)) 378 | 379 | # Validation Dataset 380 | pool = data_utils.TensorDataset(pool_data_dropout, pool_target_dropout) 381 | pool_loader = data_utils.DataLoader(pool, batch_size=batch_size, shuffle=True) 382 | start_time = time.time() 383 | for d in range(dropout_iterations): 384 | _, _, scores = evaluate(pool_loader, stochastic=True) 385 | 386 | scores = np.array(scores) 387 | #predictions = np.expand_dims(predictions, axis=1) 388 | score_all = score_all + scores 389 | 390 | log_score = np.log2(scores) 391 | entropy = - np.multiply(scores, log_score) 392 | entropy_per_dropout = np.sum(entropy, axis =1) 393 | all_entropy = all_entropy + entropy_per_dropout 394 | 395 | print("Dropout Iterations took --- %s seconds ---" % (time.time() - start_time)) 396 | # print (All_Dropout_Classes) 397 | avg_pi = np.divide(score_all, dropout_iterations) 398 | log_avg_pi = np.log2(avg_pi) 399 | entropy_avg_pi = - np.multiply(avg_pi, log_avg_pi) 400 | entropy_average_pi = np.sum(entropy_avg_pi, axis=1) 401 | 402 | g_x = entropy_average_pi 403 | average_entropy = np.divide(all_entropy, dropout_iterations) 404 | f_x = average_entropy 405 | 406 | u_x = g_x - f_x 407 | 408 | 409 | # THIS FINDS THE MINIMUM INDEX 410 | # a_1d = U_X.flatten() 411 | # x_pool_index = a_1d.argsort()[-Queries:] 412 | 413 | points_of_interest = u_x.flatten() 414 | return points_of_interest 415 | 416 | 417 | def max_entroy_acquisition(dropout_iterations, pool_data_dropout, pool_target_dropout): 418 | print("MAX ENTROPY FUNCTION") 419 | score_All = np.zeros(shape=(pool_data_dropout.size(0), nb_classes)) 420 | 421 | # Validation Dataset 422 | pool = data_utils.TensorDataset(pool_data_dropout, pool_target_dropout) 423 | pool_loader = data_utils.DataLoader(pool, batch_size=batch_size, shuffle=True) 424 | start_time = time.time() 425 | for d in range(dropout_iterations): 426 | _, _, predictions = evaluate(pool_loader, stochastic=True) 427 | 428 | predictions = np.array(predictions) 429 | #predictions = np.expand_dims(predictions, axis=1) 430 | score_All = score_All + predictions 431 | print("Dropout Iterations took --- %s seconds ---" % (time.time() - start_time)) 432 | # print (All_Dropout_Classes) 433 | Avg_Pi = np.divide(score_All, dropout_iterations) 434 | Log_Avg_Pi = np.log2(Avg_Pi) 435 | Entropy_Avg_Pi = - np.multiply(Avg_Pi, Log_Avg_Pi) 436 | Entropy_Average_Pi = np.sum(Entropy_Avg_Pi, axis=1) 437 | 438 | U_X = Entropy_Average_Pi 439 | 440 | points_of_interest = U_X.flatten() 441 | return points_of_interest 442 | 443 | 444 | def mean_std_acquisition(dropout_iterations, pool_data_dropout, pool_target_dropout): 445 | print("MEAN STD ACQUISITION FUNCTION") 446 | all_dropout_scores = np.zeros(shape=(pool_data_dropout.size(0), 1)) 447 | # Validation Dataset 448 | pool = data_utils.TensorDataset(pool_data_dropout, pool_target_dropout) 449 | pool_loader = data_utils.DataLoader(pool, batch_size=batch_size, shuffle=True) 450 | start_time = time.time() 451 | for d in range(dropout_iterations): 452 | _, _, scores = evaluate(pool_loader, stochastic=True) 453 | 454 | scores = np.array(scores) 455 | all_dropout_scores = np.append(all_dropout_scores, scores, axis=1) 456 | print("Dropout Iterations took --- %s seconds ---" % (time.time() - start_time)) 457 | std_devs= np.zeros(shape = (pool_data_dropout.size(0), nb_classes)) 458 | sigma = np.zeros(shape = (pool_data_dropout.size(0))) 459 | for t in range(pool_data_dropout.size(0)): 460 | for r in range( nb_classes ): 461 | L = np.array([0]) 462 | for k in range(r + 1, all_dropout_scores.shape[1], 10): 463 | L = np.append(L, all_dropout_scores[t, k]) 464 | 465 | L_std = np.std(L[1:]) 466 | std_devs[t, r] = L_std 467 | E = std_devs[t, :] 468 | sigma[t] = sum(E)/nb_classes 469 | 470 | 471 | points_of_interest = sigma.flatten() 472 | return points_of_interest 473 | 474 | 475 | def variation_ratios_acquisition(dropout_iterations, pool_data_dropout, pool_target_dropout): 476 | # print("VARIATIONAL RATIOS ACQUSITION FUNCTION") 477 | All_Dropout_Classes = np.zeros(shape=(pool_data_dropout.size(0), 1)) 478 | # Validation Dataset 479 | pool = data_utils.TensorDataset(pool_data_dropout, pool_target_dropout) 480 | pool_loader = data_utils.DataLoader(pool, batch_size=batch_size, shuffle=True) 481 | start_time = time.time() 482 | for d in range(dropout_iterations): 483 | _, _, predictions = evaluate(pool_loader, stochastic=True, predict_classes=True) 484 | 485 | predictions = np.array(predictions) 486 | predictions = np.expand_dims(predictions, axis=1) 487 | All_Dropout_Classes = np.append(All_Dropout_Classes, predictions, axis=1) 488 | # print("Dropout Iterations took --- %s seconds ---" % (time.time() - start_time)) 489 | # print (All_Dropout_Classes) 490 | Variation = np.zeros(shape=(pool_data_dropout.size(0))) 491 | for t in range(pool_data_dropout.size(0)): 492 | L = np.array([0]) 493 | for d_iter in range(dropout_iterations): 494 | L = np.append(L, All_Dropout_Classes[t, d_iter + 1]) 495 | Predicted_Class, Mode = mode(L[1:]) 496 | v = np.array([1 - Mode / float(dropout_iterations)]) 497 | Variation[t] = v 498 | points_of_interest = Variation.flatten() 499 | return points_of_interest 500 | 501 | 502 | def remove_pooled_points(pool_subset, pool_data_dropout, pool_target_dropout, pool_index): 503 | global pool_data 504 | global pool_target 505 | np_data = pool_data.numpy() 506 | np_target = pool_target.numpy() 507 | pool_data_dropout = pool_data_dropout.numpy() 508 | pool_target_dropout = pool_target_dropout.numpy() 509 | np_index = pool_index.numpy() 510 | np.delete(np_data, pool_subset, axis=0) 511 | np.delete(np_target, pool_subset, axis=0) 512 | 513 | np.delete(pool_data_dropout, np_index, axis=0) 514 | np.delete(pool_target_dropout, np_index, axis=0) 515 | 516 | np_data = np.concatenate((np_data, pool_data_dropout), axis=0) 517 | np_target = np.concatenate((np_target, pool_target_dropout), axis=0) 518 | 519 | pool_data = torch.from_numpy(np_data) 520 | pool_target = torch.from_numpy(np_target) 521 | 522 | 523 | 524 | # Build an encoder 525 | 526 | class encoder(nn.Module): 527 | def __init__(self): 528 | super(encoder, self).__init__() 529 | self.output_dim = 100 530 | 531 | self.input_height = 28 532 | self.input_width = 28 533 | self.input_dim = 1 534 | 535 | self.conv = nn.Sequential( 536 | nn.Conv2d(self.input_dim, 64, 4, stride=2, padding=1), # 64 x 14 x 14 537 | nn.BatchNorm2d(64), 538 | nn.ReLU(), 539 | 540 | nn.MaxPool2d(2, stride=2), # 64 x 7 x 7 541 | 542 | nn.Conv2d(64, 128, 4, stride=2, padding=1), # 128 x 3 x 3 543 | nn.BatchNorm2d(128), 544 | nn.ReLU(), 545 | 546 | nn.Conv2d(128, 1, 4, stride=2, padding=1), # 1 x 1 x 1 547 | nn.ReLU(), 548 | 549 | ) 550 | 551 | self.fc1 = nn.Sequential( 552 | nn.Linear(1, 1024), 553 | nn.ReLU(), 554 | ) 555 | 556 | self.fc21 = nn.Sequential( 557 | nn.Linear(1024, self.output_dim), 558 | nn.Sigmoid(), 559 | ) 560 | 561 | self.fc22 = nn.Sequential( 562 | nn.Linear(1024, self.output_dim), 563 | nn.Sigmoid(), 564 | ) 565 | 566 | utils.initialize_weights(self) 567 | 568 | def forward(self, x): 569 | x = self.conv(x) 570 | x = self.fc1(x) 571 | mu = self.fc21(x) 572 | log_var = self.fc22(x) 573 | return mu, log_var 574 | 575 | 576 | def reparameterize(mu, log_var): 577 | std = log_var.mul(0.5).exp_() 578 | eps = Variable(std.data.new(std.size()).normal_()) 579 | 580 | return eps.mul(std).add_(mu) 581 | 582 | # # Alternative 583 | # std = torch.exp(0.5 * log_var) 584 | # eps = torch.randn_like(std) 585 | # return eps.mul(std).add_(mu) 586 | 587 | 588 | def latent_loss(mu, log_var): 589 | 590 | std = log_var.mul(0.5).exp_() 591 | mean_sq = mu * mu 592 | stddev_sq = std * std 593 | return 0.5 * torch.mean(mean_sq + stddev_sq - torch.log(stddev_sq) - 1) 594 | 595 | # # Alternative 596 | # return -0.5 * torch.sum(1 + log_var - mu.pow(2) - log_var.exp()) 597 | 598 | 599 | # Build a generator/decoder 600 | 601 | class generator(nn.Module): 602 | # Network Architecture is exactly same as in infoGAN (https://arxiv.org/abs/1606.03657) 603 | # Architecture : FC1024_BR-FC7x7x128_BR-(64)4dc2s_BR-(1)4dc2s_S 604 | def __init__(self): 605 | super(generator, self).__init__() 606 | 607 | self.input_height = 28 608 | self.input_width = 28 609 | self.input_dim = 100 + 10 610 | self.output_dim = 1 611 | 612 | self.fc = nn.Sequential( 613 | nn.Linear(self.input_dim, 1024), 614 | nn.BatchNorm1d(1024), 615 | nn.ReLU(), 616 | nn.Linear(1024, 128 * (self.input_height // 4) * (self.input_width // 4)), 617 | nn.BatchNorm1d(128 * (self.input_height // 4) * (self.input_width // 4)), 618 | nn.ReLU(), 619 | ) 620 | self.deconv = nn.Sequential( 621 | nn.ConvTranspose2d(128, 64, 4, 2, 1), 622 | nn.BatchNorm2d(64), 623 | nn.ReLU(), 624 | nn.ConvTranspose2d(64, self.output_dim, 4, 2, 1), 625 | nn.Sigmoid(), 626 | ) 627 | utils.initialize_weights(self) 628 | 629 | def forward(self, input, label): 630 | x = torch.cat([input, label], 1) 631 | x = self.fc(x) 632 | x = x.view(-1, 128, (self.input_height // 4), (self.input_width // 4)) 633 | x = self.deconv(x) 634 | 635 | return x 636 | 637 | 638 | # Build a discriminator 639 | 640 | class discriminator(nn.Module): 641 | # Network Architecture is exactly same as in infoGAN (https://arxiv.org/abs/1606.03657) 642 | # Architecture : (64)4c2s-(128)4c2s_BL-FC1024_BL-FC1_S 643 | def __init__(self): 644 | super(discriminator, self).__init__() 645 | self.input_height = 28 646 | self.input_width = 28 647 | self.input_dim = 1 648 | self.output_dim = 1 649 | 650 | self.conv = nn.Sequential( 651 | nn.Conv2d(self.input_dim, 64, 4, 2, 1), 652 | nn.LeakyReLU(0.2), 653 | nn.Conv2d(64, 128, 4, 2, 1), 654 | nn.BatchNorm2d(128), 655 | nn.LeakyReLU(0.2), 656 | ) 657 | self.fc1 = nn.Sequential( 658 | nn.Linear(128 * (self.input_height // 4) * (self.input_width // 4), 1024), 659 | nn.BatchNorm1d(1024), 660 | nn.LeakyReLU(0.2), 661 | ) 662 | self.dc = nn.Sequential( 663 | nn.Linear(1024, self.output_dim), 664 | nn.Sigmoid(), 665 | ) 666 | utils.initialize_weights(self) 667 | 668 | def forward(self, input): 669 | x = self.conv(input) 670 | x = x.view(-1, 128 * (self.input_height // 4) * (self.input_width // 4)) 671 | x = self.fc1(x) 672 | d = self.dc(x) 673 | 674 | return d 675 | 676 | 677 | # Initialize the model 678 | 679 | def init_model(): 680 | global model 681 | global optimizer 682 | global model_scheduler 683 | 684 | # model = Net_Correct() 685 | model = lenet() 686 | 687 | if cuda: 688 | model.cuda() 689 | 690 | decay = 3.5 / train_data.size(0) 691 | 692 | # optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.5, weight_decay=decay) 693 | 694 | # optimizer = optim.Adam(model.parameters(), lr=0.01, betas=(0.5, 0.999), weight_decay=decay) 695 | 696 | optimizer = optim.Adadelta(model.parameters(), lr=1.0, rho=0.9, eps=1e-6, weight_decay=decay) 697 | model_scheduler = lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5, last_epoch=-1) 698 | 699 | 700 | # VAE-ACGAN training 701 | 702 | class VAEACGAN(object): 703 | def __init__(self): 704 | # parameters 705 | self.epoch = 50 706 | self.sample_num = 100 707 | self.batch_size = 100 708 | self.save_dir = 'models' 709 | self.result_dir = 'results' 710 | self.log_dir = 'logs' 711 | self.gpu_mode = True 712 | self.model_name = 'VAEACGAN' 713 | 714 | # networks init 715 | init_model() # Classifier 716 | self.E = encoder() # Encoder 717 | self.G = generator() # Generator/Decoder 718 | self.D = discriminator() # Discriminator 719 | 720 | self.E_optimizer = optim.Adadelta(self.E.parameters(), lr=1.0, rho=0.9, eps=1e-6) 721 | self.G_optimizer = optim.Adadelta(self.G.parameters(), lr=1.0, rho=0.9, eps=1e-6) 722 | self.D_optimizer = optim.Adadelta(self.D.parameters(), lr=1.0, rho=0.9, eps=1e-6) 723 | 724 | self.E_scheduler = lr_scheduler.StepLR(self.E_optimizer, step_size=8, gamma=0.5, last_epoch=-1) 725 | self.G_scheduler = lr_scheduler.StepLR(self.G_optimizer, step_size=8, gamma=0.5, last_epoch=-1) 726 | self.D_scheduler = lr_scheduler.StepLR(self.D_optimizer, step_size=8, gamma=0.5, last_epoch=-1) 727 | 728 | 729 | if self.gpu_mode: 730 | self.G.cuda() 731 | self.D.cuda() 732 | self.E.cuda() 733 | self.BCE_loss = nn.BCELoss().cuda() 734 | # self.BCE_loss = nn.BCELoss().cuda() 735 | self.CE_loss = nn.CrossEntropyLoss().cuda() 736 | self.MSE_loss = nn.MSELoss().cuda() 737 | else: 738 | self.BCE_loss = nn.BCELoss() 739 | self.CE_loss = nn.CrossEntropyLoss() 740 | self.MSE_loss = nn.MSELoss() 741 | 742 | self.data_X = train_data 743 | 744 | print(self.data_X.size()) 745 | 746 | y_train = np.asarray(train_target).astype(np.int) 747 | y_train_vec = np.zeros((len(y_train), 10), dtype=np.float) 748 | for i, label in enumerate(y_train): 749 | y_train_vec[i, y_train[i]] = 1 750 | 751 | self.data_Y = torch.from_numpy(y_train_vec).type(torch.FloatTensor) 752 | 753 | self.y_train = y_train 754 | self.X_test = test_data 755 | 756 | y_test = np.asarray(test_target).astype(np.int) 757 | y_test_vec = np.zeros((len(y_test), 10), dtype=np.float) 758 | for i, label in enumerate(y_test): 759 | y_test_vec[i, y_test[i]] = 1 760 | 761 | self.y_test_vec = torch.from_numpy(y_test_vec).type(torch.FloatTensor) 762 | 763 | self.z_dim = 100 764 | self.y_dim = 10 765 | 766 | # fixed noise & condition 767 | self.sample_z_ = torch.zeros((self.sample_num, self.z_dim)) 768 | for i in range(10): 769 | self.sample_z_[i*self.y_dim] = torch.rand(1, self.z_dim) 770 | for j in range(1, self.y_dim): 771 | self.sample_z_[i*self.y_dim + j] = self.sample_z_[i*self.y_dim] 772 | 773 | temp = torch.zeros((10, 1)) 774 | for i in range(self.y_dim): 775 | temp[i, 0] = i 776 | 777 | temp_y = torch.zeros((self.sample_num, 1)) 778 | for i in range(10): 779 | temp_y[i*self.y_dim: (i+1)*self.y_dim] = temp 780 | 781 | self.sample_y_ = torch.zeros((self.sample_num, self.y_dim)) 782 | self.sample_y_.scatter_(1, temp_y.type(torch.LongTensor), 1) 783 | if self.gpu_mode: 784 | self.sample_z_, self.sample_y_ = Variable(self.sample_z_.cuda(), volatile=True), Variable(self.sample_y_.cuda(), volatile=True) 785 | else: 786 | self.sample_z_, self.sample_y_ = Variable(self.sample_z_, volatile=True), Variable(self.sample_y_, volatile=True) 787 | 788 | def train(self): 789 | self.train_hist = {} 790 | self.train_hist['E_loss'] = [] 791 | self.train_hist['D_loss'] = [] 792 | self.train_hist['G_loss'] = [] 793 | self.train_hist['C_loss'] = [] 794 | self.train_hist['per_epoch_time'] = [] 795 | self.train_hist['total_time'] = [] 796 | 797 | if self.gpu_mode: 798 | self.y_real_, self.y_fake_ = Variable(torch.ones(self.batch_size, 1).cuda()), Variable(torch.zeros(self.batch_size, 1).cuda()) 799 | else: 800 | self.y_real_, self.y_fake_ = Variable(torch.ones(self.batch_size, 1)), Variable(torch.zeros(self.batch_size, 1)) 801 | 802 | self.E.train() 803 | self.D.train() 804 | model.train() 805 | print('training start!!') 806 | start_time = time.time() 807 | 808 | for epoch in range(self.epoch): 809 | self.G.train() 810 | model_scheduler.step() 811 | self.E_scheduler.step() 812 | self.G_scheduler.step() 813 | self.D_scheduler.step() 814 | epoch_start_time = time.time() 815 | for iter in range(len(self.data_X) // self.batch_size): 816 | x_ = self.data_X[iter*self.batch_size:(iter+1)*self.batch_size] 817 | y_vec_ = self.data_Y[iter*self.batch_size:(iter+1)*self.batch_size] 818 | # z_ = torch.Tensor(self.batch_size, self.z_dim).normal_(0, 1) 819 | z_ = torch.randn((self.batch_size, self.z_dim)) 820 | if self.gpu_mode: 821 | x_, y_vec_, z_ = Variable(x_.cuda()), Variable(y_vec_.cuda()), Variable(z_.cuda()) 822 | else: 823 | x_, y_vec_, z_ = Variable(x_), Variable(y_vec_), Variable(z_) 824 | 825 | 826 | # Fix G, update E network 827 | 828 | self.E_optimizer.zero_grad() 829 | 830 | mu, log_var = self.E(x_) 831 | noise = reparameterize(mu, log_var) 832 | noise = noise.view(self.batch_size, 100) 833 | output = self.G(noise, y_vec_) 834 | 835 | # Compute the decoder loss that will be added to network E 836 | ll = latent_loss(mu, log_var) 837 | E_loss = self.MSE_loss(output, x_) # / self.batch_size 838 | E_loss += ll 839 | 840 | self.train_hist['E_loss'].append(E_loss.item()) 841 | 842 | E_loss.backward(retain_graph=True) 843 | self.E_optimizer.step() 844 | 845 | # Fix E, D, C, update G network 846 | self.G_optimizer.zero_grad() 847 | 848 | # Compute the GAN loss that will be added to the Generator G 849 | G_ = self.G(z_, y_vec_) 850 | 851 | D_fake = self.D(G_) 852 | C_fake = model(G_) 853 | 854 | G_loss= self.BCE_loss(D_fake, self.y_real_) 855 | C_fake_loss = self.CE_loss(C_fake, torch.max(y_vec_, 1)[1]) 856 | 857 | G_loss += C_fake_loss 858 | 859 | # Compute the decoder loss that will be added to the Generator G 860 | 861 | mu, log_var = self.E(x_) 862 | noise = reparameterize(mu, log_var) 863 | noise = noise.view(self.batch_size, 100) 864 | G_dec = self.G(noise, y_vec_) 865 | 866 | 867 | G_dec_loss = self.MSE_loss(G_dec, x_) # / self.batch_size 868 | # G_dec_loss = F.binary_cross_entropy(G_dec, x_, size_average=False) / self.batch_size 869 | 870 | G_loss += 0.75*G_dec_loss 871 | 872 | self.train_hist['G_loss'].append(G_loss.item()) 873 | 874 | G_loss.backward(retain_graph=True) 875 | self.G_optimizer.step() 876 | 877 | # Fix G, update D, C network 878 | 879 | self.D_optimizer.zero_grad() 880 | optimizer.zero_grad() 881 | 882 | D_real = self.D(x_) 883 | C_real = model(x_) 884 | 885 | D_real_loss = self.BCE_loss(D_real, self.y_real_) 886 | C_real_loss = self.CE_loss(C_real, torch.max(y_vec_, 1)[1]) 887 | 888 | G_ = self.G(z_, y_vec_) 889 | 890 | D_fake = self.D(G_) 891 | C_fake = model(G_) 892 | D_fake_loss = self.BCE_loss(D_fake, self.y_fake_) 893 | C_fake_loss = self.CE_loss(C_fake, torch.max(y_vec_, 1)[1]) 894 | 895 | 896 | mu, log_var = self.E(x_) 897 | noise = reparameterize(mu, log_var) 898 | noise = noise.view(self.batch_size, 100) 899 | # output = self.G(noise, y_vec_) 900 | 901 | G_dec = self.G(noise, y_vec_) 902 | 903 | D_dec = self.D(G_dec) 904 | C_dec = model(G_dec) 905 | D_dec_loss = self.BCE_loss(D_dec, self.y_fake_) 906 | C_dec_loss = self.CE_loss(C_dec, torch.max(y_vec_, 1)[1]) 907 | 908 | 909 | D_loss = D_real_loss + D_fake_loss + D_dec_loss 910 | C_loss = C_real_loss + C_fake_loss + C_dec_loss 911 | 912 | self.train_hist['D_loss'].append(D_loss.item()) 913 | self.train_hist['C_loss'].append(C_loss.item()) 914 | 915 | D_loss.backward(retain_graph=True) 916 | self.D_optimizer.step() 917 | 918 | C_loss.backward(retain_graph=True) 919 | optimizer.step() 920 | 921 | 922 | if ((iter + 1) % 100) == 0: 923 | print("Epoch: [%2d] [%4d/%4d] D_loss: %.8f, G_loss: %.8f, C_loss: %.8f, E_loss: %.8f" % 924 | ((epoch + 1), (iter + 1), len(self.data_X) // self.batch_size, D_loss.item(), G_loss.item() 925 | , C_loss.item(), E_loss.item())) 926 | 927 | self.train_hist['per_epoch_time'].append(time.time() - epoch_start_time) 928 | self.visualize_results((epoch+1)) 929 | 930 | if epoch==self.epoch-1: 931 | model.eval() 932 | _, test_acc_ = test(epoch) 933 | 934 | print("Training finish!... save training results") 935 | 936 | self.save() 937 | utils.generate_animation(self.result_dir + '/' + self.model_name + '/' + self.model_name, 938 | self.epoch) 939 | utils.loss_plot(self.train_hist, os.path.join(self.save_dir, self.model_name), self.model_name) 940 | 941 | return test_acc_ 942 | 943 | def visualize_results(self, epoch, fix=True): 944 | self.G.eval() 945 | 946 | if not os.path.exists(self.result_dir + '/' + self.model_name): 947 | os.makedirs(self.result_dir + '/' + self.model_name) 948 | 949 | image_frame_dim = int(np.floor(np.sqrt(self.sample_num))) 950 | 951 | if fix: 952 | """ fixed noise """ 953 | samples = self.G(self.sample_z_, self.sample_y_) 954 | else: 955 | """ random noise """ 956 | temp = torch.LongTensor(self.batch_size, 1).random_() % 10 957 | sample_y_ = torch.FloatTensor(self.batch_size, 10) 958 | sample_y_.zero_() 959 | sample_y_.scatter_(1, temp, 1) 960 | if self.gpu_mode: 961 | sample_z_, sample_y_ = Variable(torch.randn((self.batch_size, self.z_dim)).cuda(), volatile=True), \ 962 | Variable(sample_y_.cuda(), volatile=True) 963 | else: 964 | sample_z_, sample_y_ = Variable(torch.randn((self.batch_size, self.z_dim)), volatile=True), \ 965 | Variable(sample_y_, volatile=True) 966 | 967 | samples = self.G(sample_z_, sample_y_) 968 | 969 | if self.gpu_mode: 970 | samples = samples.cpu().data.numpy().transpose(0, 2, 3, 1) 971 | else: 972 | samples = samples.data.numpy().transpose(0, 2, 3, 1) 973 | 974 | utils.save_images(samples[:image_frame_dim * image_frame_dim, :, :, :], [image_frame_dim, image_frame_dim], 975 | self.result_dir + '/' + self.model_name + '/' + self.model_name + '_epoch%03d' % epoch + '.png') 976 | 977 | def save(self): 978 | save_dir = os.path.join(self.save_dir, self.model_name) 979 | 980 | if not os.path.exists(save_dir): 981 | os.makedirs(save_dir) 982 | 983 | torch.save(self.G.state_dict(), os.path.join(save_dir, self.model_name + '_G.pkl')) 984 | torch.save(self.D.state_dict(), os.path.join(save_dir, self.model_name + '_D.pkl')) 985 | torch.save(model.state_dict(), os.path.join(save_dir, self.model_name + '_C.pkl')) 986 | torch.save(self.E.state_dict(), os.path.join(save_dir, self.model_name + '_E.pkl')) 987 | 988 | with open(os.path.join(save_dir, self.model_name + '_history.pkl'), 'wb') as f: 989 | pickle.dump(self.train_hist, f) 990 | 991 | def load(self): 992 | save_dir = os.path.join(self.save_dir, self.model_name) 993 | 994 | self.G.load_state_dict(torch.load(os.path.join(save_dir, self.model_name + '_G.pkl'))) 995 | self.D.load_state_dict(torch.load(os.path.join(save_dir, self.model_name + '_D.pkl'))) 996 | model.load_state_dict(torch.load(os.path.join(save_dir, self.model_name + '_C.pkl'))) 997 | self.E.load_state_dict(torch.load(os.path.join(save_dir, self.model_name + '_E.pkl'))) 998 | 999 | 1000 | def main(argv): 1001 | start_time = time.time() 1002 | print (str(argv[0])) 1003 | 1004 | initialize_train_set() 1005 | 1006 | init_model() 1007 | print ("Training without acquisition") 1008 | for epoch in range(1, epochs + 1): 1009 | train(epoch) 1010 | test(epoch) 1011 | 1012 | print ("acquiring points") 1013 | acquire_points(str(argv[0])) 1014 | 1015 | print ("Training again") 1016 | gan = VAEACGAN() 1017 | _, test_acc = gan.train() 1018 | 1019 | print("--- %s seconds ---" % (time.time() - start_time)) 1020 | 1021 | 1022 | if __name__ == '__main__': 1023 | main(sys.argv[1:]) 1024 | 1025 | --------------------------------------------------------------------------------