├── M1+M2 ├── analogy.py ├── model.py ├── test.py ├── train.py └── visualize.py ├── M1 ├── model.py ├── train.py └── visualize.py ├── M2 ├── analogy.py ├── model.py ├── test.py ├── train.py └── visualize.py ├── README.md ├── args.py ├── mnist_tools.py ├── util.py ├── vae_m1.py └── vae_m2.py /M1+M2/analogy.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import os, sys, time, pylab 3 | import numpy as np 4 | from chainer import cuda, Variable 5 | import matplotlib.patches as mpatches 6 | sys.path.append(os.path.split(os.getcwd())[0]) 7 | import util 8 | from args import args 9 | from model import conf1, vae1, conf2, vae2 10 | from vae_m1 import GaussianM1VAE 11 | 12 | try: 13 | os.mkdir(args.vis_dir) 14 | except: 15 | pass 16 | 17 | dist = "bernoulli" 18 | if isinstance(vae1, GaussianM1VAE): 19 | dist = "gaussian" 20 | dataset = util.load_images(args.test_image_dir, dist=dist) 21 | 22 | n_analogies = 10 23 | n_image_channels = 1 24 | image_width = 28 25 | image_height = 28 26 | x = util.sample_x_variable(n_analogies, conf1.ndim_x, dataset, gpu_enabled=conf1.gpu_enabled) 27 | z1 = vae1.encoder(x, test=True) 28 | y = vae2.sample_x_y(z1, test=True) 29 | z2 = vae2.encode_xy_z(z1, y, test=True) 30 | 31 | fig = pylab.gcf() 32 | fig.set_size_inches(16.0, 16.0) 33 | pylab.clf() 34 | if n_image_channels == 1: 35 | pylab.gray() 36 | xp = np 37 | if conf1.gpu_enabled: 38 | x.to_cpu() 39 | xp = cuda.cupy 40 | for m in xrange(n_analogies): 41 | pylab.subplot(n_analogies, conf2.ndim_y + 2, m * 12 + 1) 42 | if n_image_channels == 1: 43 | pylab.imshow(x.data[m].reshape((image_width, image_height)), interpolation="none") 44 | elif n_image_channels == 3: 45 | pylab.imshow(x.data[m].reshape((n_image_channels, image_width, image_height)), interpolation="none") 46 | pylab.axis("off") 47 | analogy_y = xp.identity(conf2.ndim_y, dtype=xp.float32) 48 | analogy_y = Variable(analogy_y) 49 | for m in xrange(n_analogies): 50 | base_z2 = xp.empty((conf2.ndim_y, z2.data.shape[1]), dtype=xp.float32) 51 | for n in xrange(conf2.ndim_y): 52 | base_z2[n] = z2.data[m] 53 | base_z2 = Variable(base_z2) 54 | _z1 = vae2.decode_zy_x(base_z2, analogy_y, test=True, apply_f=True) 55 | _x = vae1.decoder(_z1, test=True, apply_f=True) 56 | if conf1.gpu_enabled: 57 | _x.to_cpu() 58 | for n in xrange(conf2.ndim_y): 59 | pylab.subplot(n_analogies, conf2.ndim_y + 2, m * 12 + 3 + n) 60 | if n_image_channels == 1: 61 | pylab.imshow(_x.data[n].reshape((image_width, image_height)), interpolation="none") 62 | elif n_image_channels == 3: 63 | pylab.imshow(_x.data[n].reshape((n_image_channels, image_width, image_height)), interpolation="none") 64 | pylab.axis("off") 65 | 66 | pylab.savefig("{:s}/analogy.png".format(args.vis_dir)) 67 | 68 | -------------------------------------------------------------------------------- /M1+M2/model.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from args import args 3 | from vae_m1 import BernoulliM1VAE, GaussianM1VAE, Conf as Conf1 4 | from vae_m2 import BernoulliM2VAE, GaussianM2VAE, Conf as Conf2 5 | 6 | # M1 7 | conf1 = Conf1() 8 | conf1.gpu_enabled = True if args.gpu_enabled == 1 else False 9 | conf1.ndim_x = 28 * 28 10 | conf1.ndim_z = 50 11 | conf1.encoder_apply_dropout = False 12 | conf1.decoder_apply_dropout = False 13 | conf1.encoder_apply_batchnorm = True 14 | conf1.decoder_apply_batchnorm = True 15 | conf1.encoder_apply_batchnorm_to_input = True 16 | conf1.decoder_apply_batchnorm_to_input = True 17 | conf1.gradient_clipping = 1.0 18 | conf1.encoder_hidden_units = [600, 600] 19 | conf1.decoder_hidden_units = [600, 600] 20 | vae1 = BernoulliM1VAE(conf1, name="m1") 21 | vae1.load(args.model_dir) 22 | 23 | # M2 24 | conf2 = Conf2() 25 | conf2.gpu_enabled = True if args.gpu_enabled == 1 else False 26 | conf2.ndim_x = 50 27 | conf2.ndim_z = 50 28 | conf2.encoder_xy_z_hidden_units = [500] 29 | conf2.encoder_x_y_hidden_units = [500] 30 | conf2.decoder_hidden_units = [500] 31 | conf2.encoder_xy_z_apply_dropout = False 32 | conf2.encoder_x_y_apply_dropout = False 33 | conf2.decoder_apply_dropout = False 34 | conf2.encoder_xy_z_apply_batchnorm = True 35 | conf2.encoder_x_y_apply_batchnorm = True 36 | conf2.decoder_apply_batchnorm = True 37 | conf2.encoder_xy_z_apply_batchnorm_to_input = True 38 | conf2.encoder_x_y_apply_batchnorm_to_input = True 39 | conf2.decoder_apply_batchnorm_to_input = True 40 | conf2.gradient_clipping = 5.0 41 | vae2 = GaussianM2VAE(conf2, name="m2") 42 | vae2.load(args.model_dir) -------------------------------------------------------------------------------- /M1+M2/test.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import os, sys, time 3 | import numpy as np 4 | from chainer import cuda, Variable 5 | sys.path.append(os.path.split(os.getcwd())[0]) 6 | import util 7 | from args import args 8 | from model import conf1, vae1, conf2, vae2 9 | from vae_m1 import GaussianM1VAE 10 | 11 | dist = "bernoulli" 12 | if isinstance(vae1, GaussianM1VAE): 13 | dist = "gaussian" 14 | dataset, labels = util.load_labeled_images(args.test_image_dir, dist=dist) 15 | num_data = len(dataset) 16 | 17 | x_labeled, _, label_ids = util.sample_x_and_label_variables(num_data, conf1.ndim_x, conf2.ndim_y, dataset, labels, gpu_enabled=False) 18 | if conf1.gpu_enabled: 19 | x_labeled.to_gpu() 20 | z_labeled = vae1.encoder(x_labeled, test=True) 21 | prediction = vae2.sample_x_label(z_labeled, test=True, argmax=True) 22 | 23 | correct = 0 24 | for i in xrange(num_data): 25 | if prediction[i] == label_ids.data[i]: 26 | correct += 1 27 | 28 | print "test:: classification accuracy: {:.3f}".format(correct / float(num_data)) 29 | 30 | -------------------------------------------------------------------------------- /M1+M2/train.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import os, sys, time 3 | import numpy as np 4 | from chainer import cuda, Variable 5 | import pandas as pd 6 | sys.path.append(os.path.split(os.getcwd())[0]) 7 | import util 8 | from args import args 9 | from model import conf1, vae1, conf2, vae2 10 | from vae_m1 import GaussianM1VAE 11 | 12 | dist = "bernoulli" 13 | if isinstance(vae1, GaussianM1VAE): 14 | dist = "gaussian" 15 | dataset, labels = util.load_labeled_images(args.train_image_dir, dist=dist) 16 | 17 | max_epoch = 1000 18 | vae1_num_trains_per_epoch = 5000 19 | vae2_num_trains_per_epoch = 5000 20 | batchsize = 100 21 | 22 | # Create labeled/unlabeled split in training set 23 | num_types_of_label = 10 24 | num_labeled_data = args.num_labeled_data 25 | if num_labeled_data < batchsize: 26 | batchsize = num_labeled_data 27 | num_validation_data = 10000 28 | labeled_dataset, labels, unlabeled_dataset, validation_dataset, validation_labels = util.create_semisupervised(dataset, labels, num_validation_data, num_labeled_data, num_types_of_label) 29 | print "labels:", labels 30 | # alpha = 0.1 * len(dataset) 31 | # alpha = 0.1 * len(dataset) / len(labeled_dataset) 32 | alpha = 1 33 | print "alpha:", alpha 34 | print "dataset:: labeled: {:d} unlabeled: {:d} validation: {:d}".format(len(labeled_dataset), len(unlabeled_dataset), len(validation_dataset)) 35 | 36 | # Export result to csv 37 | csv_epoch = [] 38 | 39 | total_time = 0 40 | for epoch in xrange(max_epoch): 41 | # Train M1 42 | # sum_loss = 0 43 | # epoch_time = time.time() 44 | # for t in xrange(vae1_num_trains_per_epoch): 45 | # x = util.sample_x_variable(batchsize, conf1.ndim_x, dataset, gpu_enabled=conf1.gpu_enabled) 46 | 47 | # # train 48 | # loss = vae1.train(x, L=1) 49 | 50 | # sum_loss += loss 51 | # if t % 10 == 0: 52 | # sys.stdout.write("\rTraining M1 in progress...(%d / %d)" % (t, vae1_num_trains_per_epoch)) 53 | # sys.stdout.flush() 54 | # epoch_time = time.time() - epoch_time 55 | # total_time += epoch_time 56 | # sys.stdout.write("\r") 57 | # print "[M1] epoch:", epoch, "loss: {:.3f}".format(sum_loss / vae1_num_trains_per_epoch), "time: {:d} min".format(int(epoch_time / 60)), "total: {:d} min".format(int(total_time / 60)) 58 | # sys.stdout.flush() 59 | # vae1.save(args.model_dir) 60 | 61 | # Train M2 62 | sum_loss_labeled = 0 63 | sum_loss_unlabeled = 0 64 | sum_loss_classifier = 0 65 | epoch_time = time.time() 66 | for t in xrange(vae2_num_trains_per_epoch): 67 | x_labeled, y_labeled, label_ids = util.sample_x_and_label_variables(batchsize, conf1.ndim_x, conf2.ndim_y, labeled_dataset, labels, gpu_enabled=conf2.gpu_enabled) 68 | x_unlabeled = util.sample_x_variable(batchsize, conf1.ndim_x, unlabeled_dataset, gpu_enabled=conf2.gpu_enabled) 69 | z_labeled = Variable(vae1.encoder(x_labeled, test=True, apply_f=True).data) 70 | z_unlabeled = Variable(vae1.encoder(x_unlabeled, test=True, apply_f=True).data) 71 | 72 | # train 73 | # loss_labeled, loss_unlabeled, loss_classifier = vae2.train_jointly(z_labeled, y_labeled, label_ids, z_unlabeled, alpha=alpha, test=False) 74 | 75 | # train 76 | loss_labeled, loss_unlabeled = vae2.train(z_labeled, y_labeled, label_ids, z_unlabeled) 77 | loss_classifier = vae2.train_classification(z_labeled, label_ids, alpha=alpha) 78 | 79 | sum_loss_labeled += loss_labeled 80 | sum_loss_unlabeled += loss_unlabeled 81 | sum_loss_classifier += loss_classifier 82 | if t % 10 == 0: 83 | sys.stdout.write("\rTraining M2 in progress...({:d} / {:d})".format(t, vae2_num_trains_per_epoch)) 84 | sys.stdout.flush() 85 | epoch_time = time.time() - epoch_time 86 | total_time += epoch_time 87 | sys.stdout.write("\r") 88 | print "[M2] epoch:", epoch, "loss::", "labeled: {:.3f}".format(sum_loss_labeled / vae2_num_trains_per_epoch), "unlabeled: {:.3f}".format(sum_loss_unlabeled / vae2_num_trains_per_epoch), "classifier: {:.3f}".format(sum_loss_classifier / vae2_num_trains_per_epoch), "time: {:d} min".format(int(epoch_time / 60)), "total: {:d} min".format(int(total_time / 60)) 89 | sys.stdout.flush() 90 | vae2.save(args.model_dir) 91 | 92 | # validation 93 | x_labeled, _, label_ids = util.sample_x_and_label_variables(num_validation_data, conf1.ndim_x, conf2.ndim_y, validation_dataset, validation_labels, gpu_enabled=False) 94 | if conf1.gpu_enabled: 95 | x_labeled.to_gpu() 96 | z_labeled = vae1.encoder(x_labeled, test=True) 97 | prediction = vae2.sample_x_label(z_labeled, test=True, argmax=True) 98 | correct = 0 99 | for i in xrange(num_validation_data): 100 | if prediction[i] == label_ids.data[i]: 101 | correct += 1 102 | print "validation:: classification accuracy: {:f}".format(correct / float(num_validation_data)) 103 | 104 | # Export to csv 105 | csv_epoch.append([epoch, int(total_time / 60), correct / float(num_validation_data)]) 106 | data = pd.DataFrame(csv_epoch) 107 | data.columns = ["epoch", "min", "accuracy"] 108 | data.to_csv("{:s}/epoch.csv".format(args.model_dir)) 109 | 110 | -------------------------------------------------------------------------------- /M1+M2/visualize.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import os, sys, time, pylab 3 | import numpy as np 4 | from chainer import cuda, Variable 5 | import matplotlib.patches as mpatches 6 | sys.path.append(os.path.split(os.getcwd())[0]) 7 | import util 8 | from args import args 9 | from model import conf1, vae1, conf2, vae2 10 | from vae_m1 import GaussianM1VAE 11 | 12 | try: 13 | os.mkdir(args.vis_dir) 14 | except: 15 | pass 16 | 17 | dist = "bernoulli" 18 | if isinstance(vae1, GaussianM1VAE): 19 | dist = "gaussian" 20 | dataset, labels = util.load_labeled_images(args.test_image_dir, dist=dist) 21 | 22 | num_plot = 10000 23 | x = util.sample_x_variable(num_plot, conf1.ndim_x, dataset, gpu_enabled=conf1.gpu_enabled) 24 | z1 = vae1.encoder(x, test=True) 25 | y = vae2.sample_x_y(z1, test=True) 26 | z2 = vae2.encode_xy_z(z1, y, test=True) 27 | 28 | _z1 = vae2.decode_zy_x(z2, y, test=True, apply_f=True) 29 | _x = vae1.decoder(_z1, test=True) 30 | if conf1.gpu_enabled: 31 | z2.to_cpu() 32 | _x.to_cpu() 33 | _x = _x.data 34 | 35 | util.visualize_x(_x, dir=args.vis_dir) 36 | util.visualize_z(z2.data, dir=args.vis_dir) 37 | util.visualize_labeled_z(z2.data, labels, dir=args.vis_dir) -------------------------------------------------------------------------------- /M1/model.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from args import args 3 | from vae_m1 import BernoulliM1VAE, GaussianM1VAE, Conf 4 | 5 | conf = Conf() 6 | conf.gpu_enabled = True if args.gpu_enabled == 1 else False 7 | conf.ndim_z = 2 8 | conf.encoder_apply_dropout = False 9 | conf.decoder_apply_dropout = False 10 | conf.encoder_apply_batchnorm = True 11 | conf.decoder_apply_batchnorm = True 12 | conf.encoder_apply_batchnorm_to_input = True 13 | conf.decoder_apply_batchnorm_to_input = True 14 | conf.encoder_units = [600, 600] 15 | conf.decoder_units = [600, 600] 16 | vae = BernoulliM1VAE(conf, name="m1") 17 | vae.load(args.model_dir) -------------------------------------------------------------------------------- /M1/train.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import os, sys, time 3 | import numpy as np 4 | from chainer import cuda, Variable 5 | sys.path.append(os.path.split(os.getcwd())[0]) 6 | import util 7 | from args import args 8 | from model import conf, vae 9 | from vae_m1 import GaussianM1VAE 10 | 11 | dist = "bernoulli" 12 | if isinstance(vae, GaussianM1VAE): 13 | dist = "gaussian" 14 | dataset = util.load_images(args.train_image_dir, dist=dist) 15 | 16 | max_epoch = 1000 17 | num_trains_per_epoch = 2000 18 | batchsize = 100 19 | total_time = 0 20 | 21 | for epoch in xrange(max_epoch): 22 | sum_loss = 0 23 | epoch_time = time.time() 24 | for t in xrange(num_trains_per_epoch): 25 | x = util.sample_x_variable(batchsize, conf.ndim_x, dataset, gpu_enabled=conf.gpu_enabled) 26 | 27 | # train 28 | loss = vae.train(x, L=1) 29 | 30 | sum_loss += loss 31 | if t % 10 == 0: 32 | sys.stdout.write("\rTraining M1 in progress...(%d / %d)" % (t, num_trains_per_epoch)) 33 | sys.stdout.flush() 34 | epoch_time = time.time() - epoch_time 35 | total_time += epoch_time 36 | sys.stdout.write("\r") 37 | print "epoch:", epoch, "loss: {:.3f}".format(sum_loss / num_trains_per_epoch), "time: {:d} min".format(int(epoch_time / 60)), "total: {:d} min".format(int(total_time / 60)) 38 | sys.stdout.flush() 39 | vae.save(args.model_dir) 40 | 41 | -------------------------------------------------------------------------------- /M1/visualize.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import os, sys, time, pylab 3 | import numpy as np 4 | from chainer import cuda, Variable 5 | import matplotlib.patches as mpatches 6 | sys.path.append(os.path.split(os.getcwd())[0]) 7 | import util 8 | from args import args 9 | from model import conf, vae 10 | from vae_m1 import GaussianM1VAE 11 | from chainer import functions as F 12 | from PIL import Image 13 | 14 | try: 15 | os.mkdir(args.vis_dir) 16 | except: 17 | pass 18 | 19 | dist = "bernoulli" 20 | if isinstance(vae, GaussianM1VAE): 21 | dist = "gaussian" 22 | dataset, labels = util.load_labeled_images(args.test_image_dir, dist=dist) 23 | 24 | num_images = 5000 25 | x, y_labeled, label_ids = util.sample_x_and_label_variables(num_images, conf.ndim_x, 10, dataset, labels, gpu_enabled=False) 26 | if conf.gpu_enabled: 27 | x.to_gpu() 28 | z = vae.encoder(x, test=True) 29 | _x = vae.decoder(z, True, True) 30 | if conf.gpu_enabled: 31 | z.to_cpu() 32 | _x.to_cpu() 33 | util.visualize_x(_x.data, dir=args.vis_dir) 34 | print "visualizing x" 35 | util.visualize_z(z.data, dir=args.vis_dir) 36 | print "visualizing z" 37 | util.visualize_labeled_z(z.data, label_ids.data, dir=args.vis_dir) 38 | print "visualizing labeled z" -------------------------------------------------------------------------------- /M2/analogy.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import os, sys, time, pylab 3 | import numpy as np 4 | from chainer import cuda, Variable 5 | import matplotlib.patches as mpatches 6 | sys.path.append(os.path.split(os.getcwd())[0]) 7 | import util 8 | from args import args 9 | from model import conf, vae 10 | from vae_m2 import GaussianM2VAE 11 | 12 | try: 13 | os.mkdir(args.vis_dir) 14 | except: 15 | pass 16 | 17 | dist = "bernoulli" 18 | if isinstance(vae, GaussianM2VAE): 19 | dist = "gaussian" 20 | dataset = util.load_images(args.test_image_dir, dist=dist) 21 | 22 | n_analogies = 10 23 | n_image_channels = 1 24 | image_width = 28 25 | image_height = 28 26 | x = util.sample_x_variable(10, conf.ndim_x, dataset, gpu_enabled=conf.gpu_enabled) 27 | y = vae.sample_x_y(x, test=True) 28 | z = vae.encode_xy_z(x, y, test=True) 29 | 30 | fig = pylab.gcf() 31 | fig.set_size_inches(16.0, 16.0) 32 | pylab.clf() 33 | if n_image_channels == 1: 34 | pylab.gray() 35 | xp = np 36 | if conf.gpu_enabled: 37 | x.to_cpu() 38 | xp = cuda.cupy 39 | for m in xrange(n_analogies): 40 | pylab.subplot(n_analogies, conf.ndim_y + 2, m * 12 + 1) 41 | if n_image_channels == 1: 42 | pylab.imshow(x.data[m].reshape((image_width, image_height)), interpolation="none") 43 | elif n_image_channels == 3: 44 | pylab.imshow(x.data[m].reshape((n_image_channels, image_width, image_height)), interpolation="none") 45 | pylab.axis("off") 46 | all_y = xp.identity(conf.ndim_y, dtype=xp.float32) 47 | all_y = Variable(all_y) 48 | for m in xrange(n_analogies): 49 | base_z = xp.empty((conf.ndim_y, z.data.shape[1]), dtype=xp.float32) 50 | for n in xrange(conf.ndim_y): 51 | base_z[n] = z.data[m] 52 | base_z = Variable(base_z) 53 | _x = vae.decode_zy_x(base_z, all_y, test=True, apply_f=True) 54 | if conf.gpu_enabled: 55 | _x.to_cpu() 56 | for n in xrange(conf.ndim_y): 57 | pylab.subplot(n_analogies, conf.ndim_y + 2, m * 12 + 3 + n) 58 | if n_image_channels == 1: 59 | pylab.imshow(_x.data[n].reshape((image_width, image_height)), interpolation="none") 60 | elif n_image_channels == 3: 61 | pylab.imshow(_x.data[n].reshape((n_image_channels, image_width, image_height)), interpolation="none") 62 | pylab.axis("off") 63 | 64 | pylab.savefig("{:s}/analogy.png".format(args.vis_dir)) 65 | 66 | -------------------------------------------------------------------------------- /M2/model.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from args import args 3 | from vae_m2 import BernoulliM2VAE, GaussianM2VAE, Conf 4 | 5 | conf = Conf() 6 | conf.gpu_enabled = True if args.gpu_enabled == 1 else False 7 | conf.ndim_z = 50 8 | conf.encoder_xy_z_apply_dropout = False 9 | conf.encoder_x_y_apply_dropout = False 10 | conf.decoder_apply_dropout = False 11 | conf.encoder_xy_z_apply_batchnorm_to_input = True 12 | conf.encoder_x_y_apply_batchnorm_to_input = True 13 | conf.decoder_apply_batchnorm_to_input = True 14 | conf.encoder_xy_z_apply_batchnorm = True 15 | conf.encoder_x_y_apply_batchnorm = True 16 | conf.decoder_apply_batchnorm = True 17 | conf.encoder_xy_z_hidden_units = [500] 18 | conf.encoder_x_y_hidden_units = [500] 19 | conf.decoder_hidden_units = [500] 20 | conf.batchnorm_before_activation = True if args.batchnorm_before_activation == 1 else False 21 | 22 | if args.vae_type == "gaussian": 23 | vae = GaussianM2VAE(conf, name="m2") 24 | elif args.vae_type == "bernoulli": 25 | vae = BernoulliM2VAE(conf, name="m2") 26 | else: 27 | raise Exception() 28 | 29 | vae.load(args.model_dir) -------------------------------------------------------------------------------- /M2/test.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import os, sys, time 3 | import numpy as np 4 | from chainer import cuda, Variable 5 | sys.path.append(os.path.split(os.getcwd())[0]) 6 | import util 7 | from args import args 8 | from model import conf, vae 9 | from vae_m2 import GaussianM2VAE 10 | 11 | dist = "bernoulli" 12 | if isinstance(vae, GaussianM2VAE): 13 | dist = "gaussian" 14 | dataset, labels = util.load_labeled_images(args.test_image_dir, dist=dist) 15 | num_data = len(dataset) 16 | 17 | x_labeled, _, label_ids = util.sample_x_and_label_variables(num_data, conf.ndim_x, conf.ndim_y, dataset, labels, gpu_enabled=False) 18 | if conf.gpu_enabled: 19 | x_labeled.to_gpu() 20 | prediction = vae.sample_x_label(x_labeled, test=True, argmax=True) 21 | correct = 0 22 | for i in xrange(num_data): 23 | if prediction[i] == label_ids.data[i]: 24 | correct += 1 25 | print "test:: classification accuracy: {:f}".format(correct / float(num_data)) -------------------------------------------------------------------------------- /M2/train.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import os, sys, time 3 | import numpy as np 4 | from chainer import cuda, Variable 5 | import pandas as pd 6 | sys.path.append(os.path.split(os.getcwd())[0]) 7 | import util 8 | from args import args 9 | from model import conf, vae 10 | from vae_m2 import GaussianM2VAE 11 | 12 | dist = "bernoulli" 13 | if isinstance(vae, GaussianM2VAE): 14 | dist = "gaussian" 15 | dataset, labels = util.load_labeled_images(args.train_image_dir, dist=dist) 16 | 17 | max_epoch = 1000 18 | num_trains_per_epoch = 2000 19 | batchsize_l = 100 20 | batchsize_u = 100 21 | 22 | # Create labeled/unlabeled split in training set 23 | num_types_of_label = 10 24 | num_labeled_data = args.num_labeled_data 25 | num_validation_data = 10000 26 | labeled_dataset, labels, unlabeled_dataset, validation_dataset, validation_labels = util.create_semisupervised(dataset, labels, num_validation_data, num_labeled_data, num_types_of_label) 27 | print "labels:", labels 28 | alpha = 0.1 * len(dataset) / len(labeled_dataset) 29 | alpha = 1.0 30 | print "alpha:", alpha 31 | print "dataset:: labeled: {:d} unlabeled: {:d} validation: {:d}".format(len(labeled_dataset), len(unlabeled_dataset), len(validation_dataset)) 32 | 33 | if num_labeled_data < batchsize_l: 34 | batchsize_l = num_labeled_data 35 | 36 | if len(unlabeled_dataset) < batchsize_u: 37 | batchsize_u = len(unlabeled_dataset) 38 | 39 | # from PIL import Image 40 | # for i in xrange(len(labeled_dataset)): 41 | # image = Image.fromarray(np.uint8(labeled_dataset[i].reshape(28, 28) * 255)) 42 | # image.save("labeled_images/{:d}.bmp".format(i)) 43 | 44 | # Export result to csv 45 | csv_epoch = [] 46 | 47 | total_time = 0 48 | for epoch in xrange(max_epoch): 49 | sum_loss_labeled = 0 50 | sum_loss_unlabeled = 0 51 | sum_loss_classifier = 0 52 | epoch_time = time.time() 53 | for t in xrange(num_trains_per_epoch): 54 | x_labeled, y_labeled, label_ids = util.sample_x_and_label_variables(batchsize_l, conf.ndim_x, conf.ndim_y, labeled_dataset, labels, gpu_enabled=conf.gpu_enabled) 55 | x_unlabeled = util.sample_x_variable(batchsize_u, conf.ndim_x, unlabeled_dataset, gpu_enabled=conf.gpu_enabled) 56 | 57 | # train 58 | loss_labeled, loss_unlabeled = vae.train(x_labeled, y_labeled, label_ids, x_unlabeled) 59 | loss_classifier = vae.train_classification(x_labeled, label_ids, alpha=alpha) 60 | 61 | sum_loss_labeled += loss_labeled 62 | sum_loss_unlabeled += loss_unlabeled 63 | sum_loss_classifier += loss_classifier 64 | if t % 10 == 0: 65 | sys.stdout.write("\rTraining in progress...({:d} / {:d})".format(t, num_trains_per_epoch)) 66 | sys.stdout.flush() 67 | epoch_time = time.time() - epoch_time 68 | total_time += epoch_time 69 | sys.stdout.write("\r") 70 | print "epoch: {:d} loss:: labeled: {:.3f} unlabeled: {:.3f} classifier: {:.3f} time: {:d} min total: {:d} min".format(epoch + 1, sum_loss_labeled / num_trains_per_epoch, sum_loss_unlabeled / num_trains_per_epoch, sum_loss_classifier / num_trains_per_epoch, int(epoch_time / 60), int(total_time / 60)) 71 | sys.stdout.flush() 72 | vae.save(args.model_dir) 73 | 74 | # validation 75 | x_labeled, _, label_ids = util.sample_x_and_label_variables(num_validation_data, conf.ndim_x, conf.ndim_y, validation_dataset, validation_labels, gpu_enabled=False) 76 | if conf.gpu_enabled: 77 | x_labeled.to_gpu() 78 | prediction = vae.sample_x_label(x_labeled, test=True, argmax=True) 79 | correct = 0 80 | for i in xrange(num_validation_data): 81 | if prediction[i] == label_ids.data[i]: 82 | correct += 1 83 | print "validation:: classification accuracy: {:f}".format(correct / float(num_validation_data)) 84 | 85 | # Export to csv 86 | csv_epoch.append([epoch, int(total_time / 60), correct / float(num_validation_data)]) 87 | data = pd.DataFrame(csv_epoch) 88 | data.columns = ["epoch", "min", "accuracy"] 89 | data.to_csv("{:s}/epoch.csv".format(args.model_dir)) 90 | 91 | -------------------------------------------------------------------------------- /M2/visualize.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import os, sys, time, pylab 3 | import numpy as np 4 | from chainer import cuda, Variable 5 | import matplotlib.patches as mpatches 6 | sys.path.append(os.path.split(os.getcwd())[0]) 7 | import util 8 | from args import args 9 | from model import conf, vae 10 | from vae_m2 import GaussianM2VAE 11 | 12 | try: 13 | os.mkdir(args.vis_dir) 14 | except: 15 | pass 16 | dist = "bernoulli" 17 | if isinstance(vae, GaussianM2VAE): 18 | dist = "gaussian" 19 | dataset, labels = util.load_labeled_images(args.test_image_dir, dist=dist) 20 | 21 | def forward_one_step(num_images): 22 | x, y_labeled, label_ids = util.sample_x_and_label_variables(num_images, conf.ndim_x, conf.ndim_y, dataset, labels, gpu_enabled=False) 23 | x.to_gpu() 24 | y = vae.sample_x_y(x, test=True) 25 | z = vae.encoder_xy_z(x, y, test=True) 26 | _x = vae.decode_zy_x(z, y, test=True) 27 | if conf.gpu_enabled: 28 | z.to_cpu() 29 | _x.to_cpu() 30 | _x = _x.data 31 | return z, _x, label_ids 32 | 33 | z, _x, _ = forward_one_step(100) 34 | util.visualize_x(_x, dir=args.vis_dir) 35 | 36 | z, _x, label_ids = forward_one_step(5000) 37 | util.visualize_z(z.data, dir=args.vis_dir) 38 | util.visualize_labeled_z(z.data, label_ids.data, dir=args.vis_dir) -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## Semi-Supervised Learning with Deep Generative Models 2 | 3 | Chainer implementation of Variational AutoEncoder(VAE) model M1, M2, M1+M2 4 | 5 | [この記事](http://musyoku.github.io/2016/07/02/semi-supervised-learning-with-deep-generative-models/)で実装したコードです。 6 | 7 | ### Requirements 8 | 9 | - Chainer 1.8+ 10 | - sklearn 11 | 12 | To visualize results, you need 13 | 14 | - matplotlib.patches 15 | - PIL 16 | - pandas 17 | 18 | #### Download MNIST 19 | 20 | run `mnist-tools.py` to download and extract MNIST. 21 | 22 | #### How to label my own dataset? 23 | 24 | You can provide label information by filename. 25 | 26 | format: 27 | 28 | `{label_id}_{unique_filename}.{extension}` 29 | 30 | regex: 31 | 32 | `([0-9]+)_.+\.(bmp|png|jpg)` 33 | 34 | e.g. MNIST 35 | 36 | ![labeling](http://musyoku.github.io/images/post/2016-07-02/labeling.png) 37 | 38 | 39 | ## M1 40 | 41 | #### Parameters 42 | 43 | | params | value | 44 | |:-----------|------------:| 45 | | OS | Windows 7 | 46 | | GPU | GeForce GTX 970M | 47 | | ndim_z | 2 | 48 | | encoder_apply_dropout | False | 49 | | decoder_apply_dropout | False | 50 | | encoder_apply_batchnorm | True | 51 | | decoder_apply_batchnorm | True | 52 | | encoder_apply_batchnorm_to_input | True | 53 | | decoder_apply_batchnorm_to_input | True | 54 | | encoder_units | [600, 600] | 55 | | decoder_units | [600, 600] | 56 | | gradient_clipping | 1.0 | 57 | | learning_rate | 0.0003 | 58 | | gradient_momentum | 0.9 | 59 | | gradient_clipping | 1.0 | 60 | | nonlinear | softplus| 61 | 62 | #### Result 63 | 64 | ##### Latent space 65 | 66 | ![M1](http://musyoku.github.io/images/post/2016-07-02/m1_latent_space.png) 67 | 68 | ## M2 69 | 70 | ##### Parameters 71 | 72 | | params | value | 73 | |:-----------|------------:| 74 | | OS | Windows 7 | 75 | | GPU | GeForce GTX 970M | 76 | | ndim_z | 50 | 77 | | encoder_xy_z_apply_dropout | False | 78 | | encoder_x_y_apply_dropout | False | 79 | | decoder_apply_dropout | False | 80 | | encoder_xy_z_apply_batchnorm_to_input | True | 81 | | encoder_x_y_apply_batchnorm_to_input | True | 82 | | decoder_apply_batchnorm_to_input | True | 83 | | encoder_xy_z_apply_batchnorm | True | 84 | | encoder_x_y_apply_batchnorm | True | 85 | | decoder_apply_batchnorm | True | 86 | | encoder_xy_z_hidden_units | [500] | 87 | | encoder_x_y_hidden_units | [500] | 88 | | decoder_hidden_units | [500] | 89 | | batchnorm_before_activation | True | 90 | | gradient_clipping | 5.0 | 91 | | learning_rate | 0.0003 | 92 | | gradient_momentum | 0.9 | 93 | | gradient_clipping | 1.0 | 94 | | nonlinear | softplus| 95 | 96 | #### Result 97 | 98 | ##### Classification 99 | 100 | ###### Training details 101 | 102 | | data | # | 103 | |:-----------|------------:| 104 | | labeled | 100 | 105 | | unlabeled | 49900 | 106 | | validation | 10000 | 107 | | test | 10000 | 108 | 109 | | * | # | 110 | |:-----------|------------:| 111 | | epochs | 490 | 112 | | minutes | 1412 | 113 | | weight updates per epoch | 2000 | 114 | 115 | ###### Validation accuracy: 116 | 117 | ![M2](http://musyoku.github.io/images/post/2016-07-02/m2_validation_accuracy.png) 118 | 119 | ###### Test accuracy: **0.9018** 120 | 121 | ##### Analogies 122 | 123 | run `analogy.py` after training 124 | 125 | Model was trained with... 126 | 127 | | data | # | 128 | |:-----------|------------:| 129 | | labeled | 100 | 130 | | unlabeled | 49900 | 131 | 132 | ![M2](http://musyoku.github.io/images/post/2016-07-02/m2_analogy_100.png) 133 | 134 | | data | # | 135 | |:-----------|------------:| 136 | | labeled | 10000 | 137 | | unlabeled | 40000 | 138 | 139 | ![M2](http://musyoku.github.io/images/post/2016-07-02/m2_analogy_10000.png) 140 | 141 | | data | # | 142 | |:-----------|------------:| 143 | | labeled | 50000 | 144 | | unlabeled | 0 | 145 | 146 | ![M2](http://musyoku.github.io/images/post/2016-07-02/m2_analogy_50000.png) 147 | 148 | 149 | ## M1+M2 150 | 151 | ##### Parameters 152 | 153 | ##### M1 154 | 155 | | params | value | 156 | |:-----------|------------:| 157 | | OS | Windows 7 | 158 | | GPU | GeForce GTX 970M | 159 | | ndim_z | 2 | 160 | | encoder_apply_dropout | False | 161 | | decoder_apply_dropout | False | 162 | | encoder_apply_batchnorm | True | 163 | | decoder_apply_batchnorm | True | 164 | | encoder_apply_batchnorm_to_input | True | 165 | | decoder_apply_batchnorm_to_input | True | 166 | | encoder_units | [600, 600] | 167 | | decoder_units | [600, 600] | 168 | | gradient_clipping | 1.0 | 169 | | learning_rate | 0.0003 | 170 | | gradient_momentum | 0.9 | 171 | | gradient_clipping | 1.0 | 172 | | nonlinear | softplus| 173 | 174 | We trained M1 for 500 epochs before starting training of M2. 175 | 176 | | * | # | 177 | |:-----------|------------:| 178 | | epochs | 500 | 179 | | minutes | 860 | 180 | | weight updates per epoch | 2000 | 181 | 182 | ##### M2 183 | 184 | | params | value | 185 | |:-----------|------------:| 186 | | OS | Windows 7 | 187 | | GPU | GeForce GTX 970M | 188 | | ndim_z | 50 | 189 | | encoder_xy_z_apply_dropout | False | 190 | | encoder_x_y_apply_dropout | False | 191 | | decoder_apply_dropout | False | 192 | | encoder_xy_z_apply_batchnorm_to_input | True | 193 | | encoder_x_y_apply_batchnorm_to_input | True | 194 | | decoder_apply_batchnorm_to_input | True | 195 | | encoder_xy_z_apply_batchnorm | True | 196 | | encoder_x_y_apply_batchnorm | True | 197 | | decoder_apply_batchnorm | True | 198 | | encoder_xy_z_hidden_units | [500] | 199 | | encoder_x_y_hidden_units | [500] | 200 | | decoder_hidden_units | [500] | 201 | | batchnorm_before_activation | True | 202 | | gradient_clipping | 5.0 | 203 | | learning_rate | 0.0003 | 204 | | gradient_momentum | 0.9 | 205 | | gradient_clipping | 1.0 | 206 | | nonlinear | softplus| 207 | 208 | #### Result 209 | 210 | ##### Classification 211 | 212 | ###### Training details 213 | 214 | | data | # | 215 | |:-----------|------------:| 216 | | labeled | 100 | 217 | | unlabeled | 49900 | 218 | | validation | 10000 | 219 | | test | 10000 | 220 | 221 | | * | # | 222 | |:-----------|------------:| 223 | | epochs | 600 | 224 | | minutes | 4920 | 225 | | weight updates per epoch | 5000 | 226 | 227 | ###### Validation accuracy: 228 | 229 | ![M1+M2](http://musyoku.github.io/images/post/2016-07-02/m1+m2_validation_accuracy.png) 230 | 231 | ###### Test accuracy 232 | 233 | seed1: **0.954** 234 | 235 | seed2: **0.951** 236 | 237 | -------------------------------------------------------------------------------- /args.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import argparse 3 | 4 | # Arguments 5 | parser = argparse.ArgumentParser() 6 | parser.add_argument("--gpu_enabled", type=int, default=1) 7 | parser.add_argument("--train_image_dir", type=str, default="../train_images") 8 | parser.add_argument("--test_image_dir", type=str, default="../test_images") 9 | parser.add_argument("--model_dir", type=str, default="model") 10 | parser.add_argument("--vis_dir", type=str, default="visualization") 11 | parser.add_argument("--vae_type", type=str, default="bernoulli") 12 | parser.add_argument("--num_labeled_data", type=int, default=100) 13 | parser.add_argument("--batchnorm_before_activation", type=int, default=1) 14 | args = parser.parse_args() -------------------------------------------------------------------------------- /mnist_tools.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import gzip, os, six 3 | from six.moves.urllib import request 4 | from PIL import Image 5 | import numpy as np 6 | 7 | parent = "http://yann.lecun.com/exdb/mnist" 8 | train_images_filename = "train-images-idx3-ubyte.gz" 9 | train_labels_filename = "train-labels-idx1-ubyte.gz" 10 | test_images_filename = "t10k-images-idx3-ubyte.gz" 11 | test_labels_filename = "t10k-labels-idx1-ubyte.gz" 12 | n_train = 60000 13 | n_test = 10000 14 | dim = 28 * 28 15 | 16 | train_dir = "train_images" 17 | test_dir = "test_images" 18 | 19 | try: 20 | os.mkdir(train_dir) 21 | os.mkdir(test_dir) 22 | except: 23 | pass 24 | 25 | def load_mnist(data_filename, label_filename, num): 26 | data = np.zeros(num * dim, dtype=np.uint8).reshape((num, dim)) 27 | label = np.zeros(num, dtype=np.uint8).reshape((num, )) 28 | 29 | with gzip.open(data_filename, "rb") as f_images, gzip.open(label_filename, "rb") as f_labels: 30 | f_images.read(16) 31 | f_labels.read(8) 32 | for i in six.moves.range(num): 33 | label[i] = ord(f_labels.read(1)) 34 | for j in six.moves.range(dim): 35 | data[i, j] = ord(f_images.read(1)) 36 | 37 | return data, label 38 | 39 | def download_mnist_data(): 40 | print("Downloading {}...".format(train_images_filename)) 41 | request.urlretrieve("{}/{}".format(parent, train_images_filename), train_images_filename) 42 | print("Downloading {}...".format(train_labels_filename)) 43 | request.urlretrieve("{}/{}".format(parent, train_labels_filename), train_labels_filename) 44 | print("Downloading {}...".format(test_images_filename)) 45 | request.urlretrieve("{}/{}".format(parent, test_images_filename), test_images_filename) 46 | print("Downloading {}...".format(test_labels_filename)) 47 | request.urlretrieve("{}/{}".format(parent, test_labels_filename), test_labels_filename) 48 | print("Done") 49 | 50 | def extract_mnist_data(): 51 | if not os.path.exists(train_images_filename): 52 | download_mnist_data() 53 | print("Extracting training data...") 54 | data_train, label_train = load_mnist(train_images_filename, train_labels_filename, n_train) 55 | print("Extracting test data...") 56 | data_test, label_test = load_mnist(test_images_filename, test_labels_filename, n_test) 57 | print("Done") 58 | return data_train, label_train, data_test, label_test 59 | 60 | data_train, label_train, data_test, label_test = extract_mnist_data() 61 | print "Saving training images..." 62 | for i in xrange(data_train.shape[0]): 63 | image = Image.fromarray(data_train[i].reshape(28, 28)) 64 | image.save("{}/{}_{}.bmp".format(train_dir, label_train[i], i)) 65 | print "Saving test images..." 66 | for i in xrange(data_test.shape[0]): 67 | image = Image.fromarray(data_test[i].reshape(28, 28)) 68 | image.save("{}/{}_{}.bmp".format(test_dir, label_test[i], i)) 69 | print("Done") 70 | 71 | -------------------------------------------------------------------------------- /util.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import os, re, math, pylab, sys 3 | from math import * 4 | import numpy as np 5 | from StringIO import StringIO 6 | from PIL import Image 7 | from chainer import cuda, Variable, function 8 | from chainer.utils import type_check 9 | from sklearn import preprocessing 10 | import matplotlib.patches as mpatches 11 | 12 | def load_images(image_dir, convert_to_grayscale=True, dist="bernoulli"): 13 | dataset = [] 14 | fs = os.listdir(image_dir) 15 | print "loading", len(fs), "images..." 16 | for fn in fs: 17 | f = open("%s/%s" % (image_dir, fn), "rb") 18 | if convert_to_grayscale: 19 | img = np.asarray(Image.open(StringIO(f.read())).convert("L"), dtype=np.float32) / 255.0 20 | else: 21 | img = np.asarray(Image.open(StringIO(f.read())).convert("RGB"), dtype=np.float32).transpose(2, 0, 1) / 255.0 22 | if dist == "bernoulli": 23 | # Sampling 24 | img = preprocessing.binarize(img, threshold=0.5) 25 | pass 26 | elif dist == "gaussian": 27 | pass 28 | else: 29 | raise Exception() 30 | dataset.append(img) 31 | f.close() 32 | return dataset 33 | 34 | def load_labeled_images(image_dir, convert_to_grayscale=True, dist="bernoulli"): 35 | dataset = [] 36 | labels = [] 37 | fs = os.listdir(image_dir) 38 | i = 0 39 | for fn in fs: 40 | m = re.match("([0-9]+)_.+", fn) 41 | label = int(m.group(1)) 42 | f = open("%s/%s" % (image_dir, fn), "rb") 43 | if convert_to_grayscale: 44 | img = np.asarray(Image.open(StringIO(f.read())).convert("L"), dtype=np.float32) / 255.0 45 | else: 46 | img = np.asarray(Image.open(StringIO(f.read())).convert("RGB"), dtype=np.float32).transpose(2, 0, 1) / 255.0 47 | if dist == "bernoulli": 48 | # Sampling 49 | img = preprocessing.binarize(img, threshold=0.5) 50 | pass 51 | elif dist == "gaussian": 52 | pass 53 | else: 54 | raise Exception() 55 | dataset.append(img) 56 | labels.append(label) 57 | f.close() 58 | i += 1 59 | if i % 100 == 0: 60 | sys.stdout.write("\rloading images...({:d} / {:d})".format(i, len(fs))) 61 | sys.stdout.flush() 62 | sys.stdout.write("\n") 63 | return dataset, labels 64 | 65 | def create_semisupervised(dataset, labels, num_validation_data=10000, num_labeled_data=100, num_types_of_label=10): 66 | if len(dataset) < num_validation_data + num_labeled_data: 67 | raise Exception("len(dataset) < num_validation_data + num_labeled_data") 68 | training_labeled_x = [] 69 | training_unlabeled_x = [] 70 | validation_x = [] 71 | validation_labels = [] 72 | training_labels = [] 73 | indices_for_label = {} 74 | num_data_per_label = int(num_labeled_data / num_types_of_label) 75 | num_unlabeled_data = len(dataset) - num_validation_data - num_labeled_data 76 | 77 | indices = np.arange(len(dataset)) 78 | np.random.shuffle(indices) 79 | 80 | def check(index): 81 | label = labels[index] 82 | if label not in indices_for_label: 83 | indices_for_label[label] = [] 84 | return True 85 | if len(indices_for_label[label]) < num_data_per_label: 86 | for i in indices_for_label[label]: 87 | if i == index: 88 | return False 89 | return True 90 | return False 91 | 92 | for n in xrange(len(dataset)): 93 | index = indices[n] 94 | if check(index): 95 | indices_for_label[labels[index]].append(index) 96 | training_labeled_x.append(dataset[index]) 97 | training_labels.append(labels[index]) 98 | else: 99 | if len(training_unlabeled_x) < num_unlabeled_data: 100 | training_unlabeled_x.append(dataset[index]) 101 | else: 102 | validation_x.append(dataset[index]) 103 | validation_labels.append(labels[index]) 104 | 105 | return training_labeled_x, training_labels, training_unlabeled_x, validation_x, validation_labels 106 | 107 | def sample_x_variable(batchsize, ndim_x, dataset, gpu_enabled=True): 108 | x_batch = np.zeros((batchsize, ndim_x), dtype=np.float32) 109 | indices = np.random.choice(np.arange(len(dataset), dtype=np.int32), size=batchsize, replace=False) 110 | for j in range(batchsize): 111 | data_index = indices[j] 112 | img = dataset[data_index] 113 | x_batch[j] = img.reshape((ndim_x,)) 114 | x_batch = Variable(x_batch) 115 | if gpu_enabled: 116 | x_batch.to_gpu() 117 | return x_batch 118 | 119 | def sample_x_and_label_variables(batchsize, ndim_x, ndim_y, dataset, labels, gpu_enabled=True): 120 | x_batch = np.zeros((batchsize, ndim_x), dtype=np.float32) 121 | # one-hot 122 | y_batch = np.zeros((batchsize, ndim_y), dtype=np.float32) 123 | # label id 124 | label_batch = np.zeros((batchsize,), dtype=np.int32) 125 | indices = np.random.choice(np.arange(len(dataset), dtype=np.int32), size=batchsize, replace=False) 126 | for j in range(batchsize): 127 | data_index = indices[j] 128 | img = dataset[data_index] 129 | x_batch[j] = img.reshape((ndim_x,)) 130 | y_batch[j, labels[data_index]] = 1 131 | label_batch[j] = labels[data_index] 132 | x_batch = Variable(x_batch) 133 | y_batch = Variable(y_batch) 134 | label_batch = Variable(label_batch) 135 | if gpu_enabled: 136 | x_batch.to_gpu() 137 | y_batch.to_gpu() 138 | label_batch.to_gpu() 139 | return x_batch, y_batch, label_batch 140 | 141 | def visualize_x(reconstructed_x_batch, image_width=28, image_height=28, image_channel=1, dir=None): 142 | if dir is None: 143 | raise Exception() 144 | try: 145 | os.mkdir(dir) 146 | except: 147 | pass 148 | fig = pylab.gcf() 149 | fig.set_size_inches(16.0, 16.0) 150 | pylab.clf() 151 | if image_channel == 1: 152 | pylab.gray() 153 | for m in range(100): 154 | pylab.subplot(10, 10, m + 1) 155 | if image_channel == 1: 156 | pylab.imshow(reconstructed_x_batch[m].reshape((image_width, image_height)), interpolation="none") 157 | elif image_channel == 3: 158 | pylab.imshow(reconstructed_x_batch[m].reshape((image_channel, image_width, image_height)), interpolation="none") 159 | pylab.axis("off") 160 | pylab.savefig("%s/reconstructed_x.png" % dir) 161 | 162 | def visualize_z(z_batch, dir=None): 163 | if dir is None: 164 | raise Exception() 165 | try: 166 | os.mkdir(dir) 167 | except: 168 | pass 169 | fig = pylab.gcf() 170 | fig.set_size_inches(20.0, 16.0) 171 | pylab.clf() 172 | for n in xrange(z_batch.shape[0]): 173 | result = pylab.scatter(z_batch[n, 0], z_batch[n, 1], s=40, marker="o", edgecolors='none') 174 | pylab.xlabel("z1") 175 | pylab.ylabel("z2") 176 | pylab.savefig("%s/latent_code.png" % dir) 177 | 178 | def visualize_labeled_z(z_batch, label_batch, dir=None): 179 | fig = pylab.gcf() 180 | fig.set_size_inches(20.0, 16.0) 181 | pylab.clf() 182 | colors = ["#2103c8", "#0e960e", "#e40402","#05aaa8","#ac02ab","#aba808","#151515","#94a169", "#bec9cd", "#6a6551"] 183 | for n in xrange(z_batch.shape[0]): 184 | result = pylab.scatter(z_batch[n, 0], z_batch[n, 1], c=colors[label_batch[n]], s=40, marker="o", edgecolors='none') 185 | 186 | classes = ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"] 187 | recs = [] 188 | for i in range(0, len(colors)): 189 | recs.append(mpatches.Rectangle((0, 0), 1, 1, fc=colors[i])) 190 | 191 | ax = pylab.subplot(111) 192 | box = ax.get_position() 193 | ax.set_position([box.x0, box.y0, box.width * 0.8, box.height]) 194 | ax.legend(recs, classes, loc="center left", bbox_to_anchor=(1.1, 0.5)) 195 | pylab.xticks(pylab.arange(-4, 5)) 196 | pylab.yticks(pylab.arange(-4, 5)) 197 | pylab.xlabel("z1") 198 | pylab.ylabel("z2") 199 | pylab.savefig("%s/labeled_z.png" % dir) 200 | -------------------------------------------------------------------------------- /vae_m1.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import math 3 | import numpy as np 4 | import chainer, os, collections, six 5 | from chainer import cuda, Variable, optimizers, serializers, optimizer 6 | from chainer import functions as F 7 | from chainer import links as L 8 | 9 | activations = { 10 | "sigmoid": F.sigmoid, 11 | "tanh": F.tanh, 12 | "softplus": F.softplus, 13 | "relu": F.relu, 14 | "leaky_relu": F.leaky_relu, 15 | "elu": F.elu 16 | } 17 | 18 | class Conf(): 19 | def __init__(self): 20 | self.image_width = 28 21 | self.image_height = 28 22 | self.ndim_x = 28 * 28 23 | self.ndim_z = 100 24 | self.batchnorm_before_activation = True 25 | 26 | # gaussianmarg | gaussian 27 | # We recommend you to use "gaussianmarg" when decoder is gaussian. 28 | self.type_pz = "gaussianmarg" 29 | self.type_qz = "gaussianmarg" 30 | 31 | # e.g. 32 | # ndim_x (input) -> 2000 -> 1000 -> 100 (output) 33 | # encoder_hidden_units = [2000, 1000] 34 | self.encoder_hidden_units = [600, 600] 35 | self.encoder_activation_function = "softplus" 36 | self.encoder_apply_dropout = True 37 | self.encoder_apply_batchnorm = True 38 | self.encoder_apply_batchnorm_to_input = True 39 | 40 | self.decoder_hidden_units = [600, 600] 41 | self.decoder_activation_function = "softplus" 42 | self.decoder_apply_dropout = True 43 | self.decoder_apply_batchnorm = True 44 | self.decoder_apply_batchnorm_to_input = True 45 | 46 | self.gpu_enabled = True 47 | self.learning_rate = 0.0003 48 | self.gradient_momentum = 0.9 49 | self.gradient_clipping = 1.0 50 | 51 | def check(self): 52 | pass 53 | 54 | def sum_sqnorm(arr): 55 | sq_sum = collections.defaultdict(float) 56 | for x in arr: 57 | with cuda.get_device(x) as dev: 58 | x = x.ravel() 59 | s = x.dot(x) 60 | sq_sum[int(dev)] += s 61 | return sum([float(i) for i in six.itervalues(sq_sum)]) 62 | 63 | class GradientClipping(object): 64 | name = "GradientClipping" 65 | 66 | def __init__(self, threshold): 67 | self.threshold = threshold 68 | 69 | def __call__(self, opt): 70 | norm = np.sqrt(sum_sqnorm([p.grad for p in opt.target.params()])) 71 | if norm < 1: 72 | return 73 | rate = self.threshold / norm 74 | if rate < 1: 75 | for param in opt.target.params(): 76 | grad = param.grad 77 | with cuda.get_device(grad): 78 | grad = cuda.cupy.clip(grad, -self.threshold, self.threshold) 79 | 80 | class VAE(): 81 | # name is used for the filename when you save the model 82 | def __init__(self, conf, name="vae"): 83 | conf.check() 84 | self.encoder, self.decoder = self.build(conf) 85 | self.name = name 86 | 87 | self.optimizer_encoder = optimizers.Adam(alpha=conf.learning_rate, beta1=conf.gradient_momentum) 88 | self.optimizer_encoder.setup(self.encoder) 89 | # self.optimizer_encoder.add_hook(optimizer.WeightDecay(0.001)) 90 | self.optimizer_encoder.add_hook(GradientClipping(conf.gradient_clipping)) 91 | 92 | self.optimizer_decoder = optimizers.Adam(alpha=conf.learning_rate, beta1=conf.gradient_momentum) 93 | self.optimizer_decoder.setup(self.decoder) 94 | # self.optimizer_decoder.add_hook(optimizer.WeightDecay(0.001)) 95 | self.optimizer_decoder.add_hook(GradientClipping(conf.gradient_clipping)) 96 | 97 | self.type_pz = conf.type_pz 98 | self.type_qz = conf.type_qz 99 | 100 | def build(self, conf): 101 | raise Exception() 102 | 103 | def train(self, x, L=1, test=False): 104 | raise Exception() 105 | 106 | @property 107 | def xp(self): 108 | return self.encoder.xp 109 | 110 | @property 111 | def gpu(self): 112 | if cuda.available is False: 113 | return False 114 | return True if self.xp is cuda.cupy else False 115 | 116 | def zero_grads(self): 117 | self.optimizer_encoder.zero_grads() 118 | self.optimizer_decoder.zero_grads() 119 | 120 | def update(self): 121 | self.optimizer_encoder.update() 122 | self.optimizer_decoder.update() 123 | 124 | def bernoulli_nll_keepbatch(self, x, y): 125 | nll = F.softplus(y) - x * y 126 | return F.sum(nll, axis=1) 127 | 128 | def gaussian_nll_keepbatch(self, x, mean, ln_var): 129 | x_prec = F.exp(-ln_var) 130 | x_diff = x - mean 131 | x_power = x_diff ** 2 * x_prec * 0.5 132 | return F.sum((math.log(2.0 * math.pi) + ln_var) * 0.5 + x_power, axis=1) 133 | 134 | def gaussian_kl_divergence_keepbatch(self, mean, ln_var): 135 | var = F.exp(ln_var) 136 | kld = F.sum(mean ** 2 + var - ln_var - 1, axis=1) * 0.5 137 | return kld 138 | 139 | def log_px_z(self, x, z, test=False): 140 | if isinstance(self.decoder, BernoulliDecoder): 141 | # do not apply F.sigmoid to the output of the decoder 142 | raw_output = self.decoder(z, test=test, apply_f=False) 143 | negative_log_likelihood = self.bernoulli_nll_keepbatch(x, raw_output) 144 | log_px_z = -negative_log_likelihood 145 | else: 146 | x_mean, x_ln_var = self.decoder(z, test=test, apply_f=False) 147 | negative_log_likelihood = self.gaussian_nll_keepbatch(x, x_mean, x_ln_var) 148 | log_px_z = -negative_log_likelihood 149 | return log_px_z 150 | 151 | # this will not be used for bernoulli decoder 152 | def log_pz(self, z, mean, ln_var): 153 | if self.type_pz == "gaussianmarg": 154 | # \int q(z)logp(z)dz = -(J/2)*log2pi - (1/2)*sum_{j=1}^{J} (mu^2 + var) 155 | # See Appendix B [Auto-Encoding Variational Bayes](http://arxiv.org/abs/1312.6114) 156 | # See https://github.com/dpkingma/nips14-ssl/blob/master/anglepy/models/VAE_YZ_X.py line 106 157 | log_pz = -0.5 * (math.log(2.0 * math.pi) + mean * mean + F.exp(ln_var)) 158 | elif self.type_pz == "gaussian": 159 | log_pz = -0.5 * math.log(2.0 * math.pi) - 0.5 * z ** 2 160 | return F.sum(log_pz, axis=1) 161 | 162 | # this will not be used for bernoulli decoder 163 | def log_qz_x(self, z, mean, ln_var): 164 | if self.type_qz == "gaussianmarg": 165 | # \int q(z)logq(z)dz = -(J/2)*log2pi - (1/2)*sum_{j=1}^{J} (1 + logvar) 166 | # See Appendix B [Auto-Encoding Variational Bayes](http://arxiv.org/abs/1312.6114) 167 | # See https://github.com/dpkingma/nips14-ssl/blob/master/anglepy/models/VAE_YZ_X.py line 118 168 | log_qz_x = -0.5 * F.sum((math.log(2.0 * math.pi) + 1 + ln_var), axis=1) 169 | elif self.type_qz == "gaussian": 170 | log_qz_x = -self.gaussian_nll_keepbatch(z, mean, ln_var) 171 | return log_qz_x 172 | 173 | def load(self, dir=None): 174 | if dir is None: 175 | raise Exception() 176 | for attr in vars(self): 177 | prop = getattr(self, attr) 178 | if isinstance(prop, chainer.Chain) or isinstance(prop, chainer.optimizer.GradientMethod): 179 | filename = dir + "/%s_%s.hdf5" % (self.name, attr) 180 | if os.path.isfile(filename): 181 | serializers.load_hdf5(filename, prop) 182 | else: 183 | print filename, "missing." 184 | print "model loaded." 185 | 186 | def save(self, dir=None): 187 | if dir is None: 188 | raise Exception() 189 | try: 190 | os.mkdir(dir) 191 | except: 192 | pass 193 | for attr in vars(self): 194 | prop = getattr(self, attr) 195 | if isinstance(prop, chainer.Chain) or isinstance(prop, chainer.optimizer.GradientMethod): 196 | serializers.save_hdf5(dir + "/%s_%s.hdf5" % (self.name, attr), prop) 197 | print "model saved." 198 | 199 | class GaussianM1VAE(VAE): 200 | 201 | def build(self, conf): 202 | wscale = 0.1 203 | encoder_attributes = {} 204 | encoder_units = [(conf.ndim_x, conf.encoder_hidden_units[0])] 205 | encoder_units += zip(conf.encoder_hidden_units[:-1], conf.encoder_hidden_units[1:]) 206 | for i, (n_in, n_out) in enumerate(encoder_units): 207 | encoder_attributes["layer_%i" % i] = L.Linear(n_in, n_out, wscale=wscale) 208 | if conf.batchnorm_before_activation: 209 | encoder_attributes["batchnorm_%i" % i] = L.BatchNormalization(n_out) 210 | else: 211 | encoder_attributes["batchnorm_%i" % i] = L.BatchNormalization(n_in) 212 | encoder_attributes["layer_mean"] = L.Linear(conf.encoder_hidden_units[-1], conf.ndim_z, wscale=wscale) 213 | encoder_attributes["layer_var"] = L.Linear(conf.encoder_hidden_units[-1], conf.ndim_z, wscale=wscale) 214 | encoder = Encoder(**encoder_attributes) 215 | encoder.n_layers = len(encoder_units) 216 | encoder.activation_function = conf.encoder_activation_function 217 | encoder.apply_dropout = conf.encoder_apply_dropout 218 | encoder.apply_batchnorm = conf.encoder_apply_batchnorm 219 | encoder.apply_batchnorm_to_input = conf.encoder_apply_batchnorm_to_input 220 | encoder.batchnorm_before_activation = conf.batchnorm_before_activation 221 | 222 | decoder_attributes = {} 223 | decoder_units = [(conf.ndim_z, conf.decoder_hidden_units[0])] 224 | decoder_units += zip(conf.decoder_hidden_units[:-1], conf.decoder_hidden_units[1:]) 225 | for i, (n_in, n_out) in enumerate(decoder_units): 226 | decoder_attributes["layer_%i" % i] = L.Linear(n_in, n_out, wscale=wscale) 227 | if conf.batchnorm_before_activation: 228 | decoder_attributes["batchnorm_%i" % i] = L.BatchNormalization(n_out) 229 | else: 230 | decoder_attributes["batchnorm_%i" % i] = L.BatchNormalization(n_in) 231 | decoder_attributes["layer_mean"] = L.Linear(conf.decoder_hidden_units[-1], conf.ndim_x, wscale=wscale) 232 | decoder_attributes["layer_var"] = L.Linear(conf.decoder_hidden_units[-1], conf.ndim_x, wscale=wscale) 233 | decoder = GaussianDecoder(**decoder_attributes) 234 | decoder.n_layers = len(decoder_units) 235 | decoder.activation_function = conf.decoder_activation_function 236 | decoder.apply_dropout = conf.decoder_apply_dropout 237 | decoder.apply_batchnorm = conf.decoder_apply_batchnorm 238 | decoder.apply_batchnorm_to_input = conf.decoder_apply_batchnorm_to_input 239 | decoder.batchnorm_before_activation = conf.batchnorm_before_activation 240 | 241 | if conf.gpu_enabled: 242 | encoder.to_gpu() 243 | decoder.to_gpu() 244 | return encoder, decoder 245 | 246 | def train(self, x, L=1, test=False): 247 | batchsize = x.data.shape[0] 248 | z_mean, z_ln_var = self.encoder(x, test=test, apply_f=False) 249 | loss = 0 250 | for l in xrange(L): 251 | # Sample z 252 | z = F.gaussian(z_mean, z_ln_var) 253 | 254 | # Compute lower bound 255 | log_px_z = self.log_px_z(x, z, test=test) 256 | log_pz = self.log_pz(z, z_mean, z_ln_var) 257 | log_qz_x = self.log_qz_x(z, z_mean, z_ln_var) 258 | lower_bound = log_px_z + log_pz - log_qz_x 259 | 260 | loss += -lower_bound 261 | 262 | loss = F.sum(loss) / L / batchsize 263 | 264 | self.zero_grads() 265 | loss.backward() 266 | self.update() 267 | 268 | if self.gpu: 269 | loss.to_cpu() 270 | return loss.data 271 | 272 | class BernoulliM1VAE(VAE): 273 | 274 | def build(self, conf): 275 | wscale = 0.1 276 | encoder_attributes = {} 277 | encoder_units = [(conf.ndim_x, conf.encoder_hidden_units[0])] 278 | encoder_units += zip(conf.encoder_hidden_units[:-1], conf.encoder_hidden_units[1:]) 279 | for i, (n_in, n_out) in enumerate(encoder_units): 280 | encoder_attributes["layer_%i" % i] = L.Linear(n_in, n_out, wscale=wscale) 281 | if conf.batchnorm_before_activation: 282 | encoder_attributes["batchnorm_%i" % i] = L.BatchNormalization(n_out) 283 | else: 284 | encoder_attributes["batchnorm_%i" % i] = L.BatchNormalization(n_in) 285 | encoder_attributes["layer_mean"] = L.Linear(conf.encoder_hidden_units[-1], conf.ndim_z, wscale=wscale) 286 | encoder_attributes["layer_var"] = L.Linear(conf.encoder_hidden_units[-1], conf.ndim_z, wscale=wscale) 287 | encoder = Encoder(**encoder_attributes) 288 | encoder.n_layers = len(encoder_units) 289 | encoder.activation_function = conf.encoder_activation_function 290 | encoder.apply_dropout = conf.encoder_apply_dropout 291 | encoder.apply_batchnorm = conf.encoder_apply_batchnorm 292 | encoder.apply_batchnorm_to_input = conf.encoder_apply_batchnorm_to_input 293 | encoder.batchnorm_before_activation = conf.batchnorm_before_activation 294 | 295 | decoder_attributes = {} 296 | decoder_units = [(conf.ndim_z, conf.decoder_hidden_units[0])] 297 | decoder_units += zip(conf.decoder_hidden_units[:-1], conf.decoder_hidden_units[1:]) 298 | decoder_units += [(conf.decoder_hidden_units[-1], conf.ndim_x)] 299 | for i, (n_in, n_out) in enumerate(decoder_units): 300 | decoder_attributes["layer_%i" % i] = L.Linear(n_in, n_out, wscale=wscale) 301 | if conf.batchnorm_before_activation: 302 | decoder_attributes["batchnorm_%i" % i] = L.BatchNormalization(n_out) 303 | else: 304 | decoder_attributes["batchnorm_%i" % i] = L.BatchNormalization(n_in) 305 | decoder = BernoulliDecoder(**decoder_attributes) 306 | decoder.n_layers = len(decoder_units) 307 | decoder.activation_function = conf.decoder_activation_function 308 | decoder.apply_dropout = conf.decoder_apply_dropout 309 | decoder.apply_batchnorm = conf.decoder_apply_batchnorm 310 | decoder.apply_batchnorm_to_input = conf.decoder_apply_batchnorm_to_input 311 | decoder.batchnorm_before_activation = conf.batchnorm_before_activation 312 | 313 | if conf.gpu_enabled: 314 | encoder.to_gpu() 315 | decoder.to_gpu() 316 | return encoder, decoder 317 | 318 | def train(self, x, L=1, test=False): 319 | batchsize = x.data.shape[0] 320 | z_mean, z_ln_var = self.encoder(x, test=test, apply_f=False) 321 | loss = 0 322 | for l in xrange(L): 323 | # Sample z 324 | z = F.gaussian(z_mean, z_ln_var) 325 | # Decode 326 | x_expectation = self.decoder(z, test=test, apply_f=False) 327 | # E_q(z|x)[log(p(x|z))] 328 | loss += self.bernoulli_nll_keepbatch(x, x_expectation) 329 | if L > 1: 330 | loss /= L 331 | # KL divergence 332 | loss += self.gaussian_kl_divergence_keepbatch(z_mean, z_ln_var) 333 | loss = F.sum(loss) / batchsize 334 | 335 | self.zero_grads() 336 | loss.backward() 337 | self.update() 338 | 339 | if self.gpu: 340 | loss.to_cpu() 341 | return loss.data 342 | 343 | class Encoder(chainer.Chain): 344 | def __init__(self, **layers): 345 | super(Encoder, self).__init__(**layers) 346 | self.activation_function = "softplus" 347 | self.apply_batchnorm_to_input = True 348 | self.apply_batchnorm = True 349 | self.apply_dropout = True 350 | self.batchnorm_before_activation = True 351 | 352 | @property 353 | def xp(self): 354 | return np if self._cpu else cuda.cupy 355 | 356 | def forward_one_step(self, x, test=False, apply_f=True): 357 | f = activations[self.activation_function] 358 | 359 | chain = [x] 360 | 361 | # Hidden 362 | for i in range(self.n_layers): 363 | u = chain[-1] 364 | if self.batchnorm_before_activation: 365 | u = getattr(self, "layer_%i" % i)(u) 366 | if i == 0: 367 | if self.apply_batchnorm_to_input: 368 | u = getattr(self, "batchnorm_%d" % i)(u, test=test) 369 | else: 370 | if self.apply_batchnorm: 371 | u = getattr(self, "batchnorm_%d" % i)(u, test=test) 372 | if self.batchnorm_before_activation == False: 373 | u = getattr(self, "layer_%i" % i)(u) 374 | output = f(u) 375 | if self.apply_dropout: 376 | output = F.dropout(output, train=not test) 377 | chain.append(output) 378 | 379 | u = chain[-1] 380 | mean = self.layer_mean(u) 381 | 382 | # log(sigma^2) 383 | u = chain[-1] 384 | ln_var = self.layer_var(u) 385 | 386 | return mean, ln_var 387 | 388 | def __call__(self, x, test=False, apply_f=True): 389 | mean, ln_var = self.forward_one_step(x, test=test, apply_f=apply_f) 390 | if apply_f: 391 | return F.gaussian(mean, ln_var) 392 | return mean, ln_var 393 | 394 | # Network structure is same as the Encoder 395 | class GaussianDecoder(Encoder): 396 | 397 | def __call__(self, x, test=False, apply_f=False): 398 | mean, ln_var = self.forward_one_step(x, test=test, apply_f=False) 399 | if apply_f: 400 | return F.gaussian(mean, ln_var) 401 | return mean, ln_var 402 | 403 | class BernoulliDecoder(chainer.Chain): 404 | def __init__(self, **layers): 405 | super(BernoulliDecoder, self).__init__(**layers) 406 | self.activation_function = "softplus" 407 | self.apply_batchnorm_to_input = True 408 | self.apply_batchnorm = True 409 | self.apply_dropout = True 410 | self.batchnorm_before_activation = True 411 | 412 | @property 413 | def xp(self): 414 | return np if self._cpu else cuda.cupy 415 | 416 | def forward_one_step(self, x, test=False): 417 | f = activations[self.activation_function] 418 | chain = [x] 419 | 420 | # Hidden 421 | for i in range(self.n_layers): 422 | u = chain[-1] 423 | if self.batchnorm_before_activation: 424 | u = getattr(self, "layer_%i" % i)(u) 425 | if i == 0: 426 | if self.apply_batchnorm_to_input: 427 | u = getattr(self, "batchnorm_%d" % i)(u, test=test) 428 | elif i == self.n_layers - 1: 429 | if self.apply_batchnorm_to_input and self.batchnorm_before_activation == False: 430 | u = getattr(self, "batchnorm_%d" % i)(u, test=test) 431 | else: 432 | if self.apply_batchnorm: 433 | u = getattr(self, "batchnorm_%d" % i)(u, test=test) 434 | if self.batchnorm_before_activation == False: 435 | u = getattr(self, "layer_%i" % i)(u) 436 | if i == self.n_layers - 1: 437 | output = u 438 | else: 439 | output = f(u) 440 | if self.apply_dropout: 441 | output = F.dropout(output, train=not test) 442 | chain.append(output) 443 | 444 | return chain[-1] 445 | 446 | def __call__(self, x, test=False, apply_f=False): 447 | output = self.forward_one_step(x, test=test) 448 | if apply_f: 449 | return F.sigmoid(output) 450 | return output -------------------------------------------------------------------------------- /vae_m2.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import math 3 | import numpy as np 4 | import chainer, os, collections, six 5 | from chainer import cuda, Variable, optimizers, serializers, function, optimizer 6 | from chainer.utils import type_check 7 | from chainer import functions as F 8 | from chainer import links as L 9 | 10 | activations = { 11 | "sigmoid": F.sigmoid, 12 | "tanh": F.tanh, 13 | "softplus": F.softplus, 14 | "relu": F.relu, 15 | "leaky_relu": F.leaky_relu, 16 | "elu": F.elu 17 | } 18 | 19 | class Conf(): 20 | def __init__(self): 21 | self.image_width = 28 22 | self.image_height = 28 23 | self.ndim_x = 28 * 28 24 | self.ndim_y = 10 25 | self.ndim_z = 50 26 | 27 | # True : y = f(BN(Wx + b)) 28 | # False: y = f(W*BN(x) + b) 29 | self.batchnorm_before_activation = True 30 | 31 | # gaussianmarg | gaussian 32 | self.type_pz = "gaussianmarg" 33 | self.type_qz = "gaussianmarg" 34 | 35 | self.encoder_xy_z_hidden_units = [500] 36 | self.encoder_xy_z_activation_function = "softplus" 37 | self.encoder_xy_z_apply_dropout = False 38 | self.encoder_xy_z_apply_batchnorm = True 39 | self.encoder_xy_z_apply_batchnorm_to_input = True 40 | 41 | self.encoder_x_y_hidden_units = [500] 42 | self.encoder_x_y_activation_function = "softplus" 43 | self.encoder_x_y_apply_dropout = False 44 | self.encoder_x_y_apply_batchnorm = True 45 | self.encoder_x_y_apply_batchnorm_to_input = True 46 | 47 | self.decoder_hidden_units = [500] 48 | self.decoder_activation_function = "softplus" 49 | self.decoder_apply_dropout = False 50 | self.decoder_apply_batchnorm = True 51 | self.decoder_apply_batchnorm_to_input = True 52 | 53 | self.gpu_enabled = True 54 | self.learning_rate = 0.0003 55 | self.gradient_momentum = 0.9 56 | self.gradient_clipping = 5.0 57 | 58 | def check(self): 59 | pass 60 | 61 | def sum_sqnorm(arr): 62 | sq_sum = collections.defaultdict(float) 63 | for x in arr: 64 | with cuda.get_device(x) as dev: 65 | x = x.ravel() 66 | s = x.dot(x) 67 | sq_sum[int(dev)] += s 68 | return sum([float(i) for i in six.itervalues(sq_sum)]) 69 | 70 | class GradientClipping(object): 71 | name = "GradientClipping" 72 | 73 | def __init__(self, threshold): 74 | self.threshold = threshold 75 | 76 | def __call__(self, opt): 77 | norm = np.sqrt(sum_sqnorm([p.grad for p in opt.target.params()])) 78 | if norm == 0: 79 | return 80 | rate = self.threshold / norm 81 | if rate < 1: 82 | for param in opt.target.params(): 83 | grad = param.grad 84 | with cuda.get_device(grad): 85 | grad *= rate 86 | 87 | class VAE(): 88 | # name is used for the filename when you save the model 89 | def __init__(self, conf, name="vae"): 90 | conf.check() 91 | self.encoder_xy_z, self.encoder_x_y, self.decoder = self.build(conf) 92 | self.name = name 93 | 94 | self.optimizer_encoder_xy_z = optimizers.Adam(alpha=conf.learning_rate, beta1=conf.gradient_momentum) 95 | self.optimizer_encoder_xy_z.setup(self.encoder_xy_z) 96 | # self.optimizer_encoder_xy_z.add_hook(optimizer.WeightDecay(0.00001)) 97 | self.optimizer_encoder_xy_z.add_hook(GradientClipping(conf.gradient_clipping)) 98 | 99 | self.optimizer_encoder_x_y = optimizers.Adam(alpha=conf.learning_rate, beta1=conf.gradient_momentum) 100 | self.optimizer_encoder_x_y.setup(self.encoder_x_y) 101 | # self.optimizer_encoder_x_y.add_hook(optimizer.WeightDecay(0.00001)) 102 | self.optimizer_encoder_x_y.add_hook(GradientClipping(conf.gradient_clipping)) 103 | 104 | self.optimizer_decoder = optimizers.Adam(alpha=conf.learning_rate, beta1=conf.gradient_momentum) 105 | self.optimizer_decoder.setup(self.decoder) 106 | # self.optimizer_decoder.add_hook(optimizer.WeightDecay(0.00001)) 107 | self.optimizer_decoder.add_hook(GradientClipping(conf.gradient_clipping)) 108 | 109 | self.type_pz = conf.type_pz 110 | self.type_qz = conf.type_qz 111 | 112 | def build(self, conf): 113 | raise Exception() 114 | 115 | def train(self, x, L=1, test=False): 116 | raise Exception() 117 | 118 | @property 119 | def xp(self): 120 | return self.encoder_xy_z.xp 121 | 122 | @property 123 | def gpu(self): 124 | if cuda.available is False: 125 | return False 126 | return True if self.xp is cuda.cupy else False 127 | 128 | def zero_grads(self): 129 | self.optimizer_encoder_xy_z.zero_grads() 130 | self.optimizer_encoder_x_y.zero_grads() 131 | self.optimizer_decoder.zero_grads() 132 | 133 | def update(self): 134 | self.optimizer_encoder_xy_z.update() 135 | self.optimizer_encoder_x_y.update() 136 | self.optimizer_decoder.update() 137 | 138 | def update_classifier(self): 139 | self.optimizer_encoder_x_y.update() 140 | 141 | def encode_x_z(self, x, test=False): 142 | y = self.sample_x_y(x, argmax=False, test=test) 143 | z = self.encoder_xy_z(x, y, test=test) 144 | return z 145 | 146 | def encode_xy_z(self, x, y, test=False): 147 | z = self.encoder_xy_z(x, y, test=test) 148 | return z 149 | 150 | def decode_zy_x(self, z, y, test=False, apply_f=True): 151 | x = self.decoder(z, y, test=test, apply_f=apply_f) 152 | return x 153 | 154 | def sample_x_y(self, x, argmax=False, test=False): 155 | batchsize = x.data.shape[0] 156 | y_distribution = self.encoder_x_y(x, test=test, softmax=True).data 157 | n_labels = y_distribution.shape[1] 158 | if self.gpu: 159 | y_distribution = cuda.to_cpu(y_distribution) 160 | sampled_y = np.zeros((batchsize, n_labels), dtype=np.float32) 161 | if argmax: 162 | args = np.argmax(y_distribution, axis=1) 163 | for b in xrange(batchsize): 164 | sampled_y[b, args[b]] = 1 165 | else: 166 | for b in xrange(batchsize): 167 | label_id = np.random.choice(np.arange(n_labels), p=y_distribution[b]) 168 | sampled_y[b, label_id] = 1 169 | sampled_y = Variable(sampled_y) 170 | if self.gpu: 171 | sampled_y.to_gpu() 172 | return sampled_y 173 | 174 | def sample_x_label(self, x, argmax=True, test=False): 175 | batchsize = x.data.shape[0] 176 | y_distribution = self.encoder_x_y(x, test=test, softmax=True).data 177 | n_labels = y_distribution.shape[1] 178 | if self.gpu: 179 | y_distribution = cuda.to_cpu(y_distribution) 180 | if argmax: 181 | sampled_label = np.argmax(y_distribution, axis=1) 182 | else: 183 | sampled_label = np.zeros((batchsize,), dtype=np.int32) 184 | labels = np.arange(n_labels) 185 | for b in xrange(batchsize): 186 | label_id = np.random.choice(labels, p=y_distribution[b]) 187 | sampled_label[b] = 1 188 | return sampled_label 189 | 190 | def bernoulli_nll_keepbatch(self, x, y): 191 | nll = F.softplus(y) - x * y 192 | return F.sum(nll, axis=1) 193 | 194 | def gaussian_nll_keepbatch(self, x, mean, ln_var, clip=True): 195 | if clip: 196 | clip_min = math.log(0.001) 197 | clip_max = math.log(10) 198 | ln_var = F.clip(ln_var, clip_min, clip_max) 199 | x_prec = F.exp(-ln_var) 200 | x_diff = x - mean 201 | x_power = (x_diff * x_diff) * x_prec * 0.5 202 | return F.sum((math.log(2.0 * math.pi) + ln_var) * 0.5 + x_power, axis=1) 203 | 204 | def gaussian_kl_divergence_keepbatch(self, mean, ln_var): 205 | var = F.exp(ln_var) 206 | kld = F.sum(mean * mean + var - ln_var - 1, axis=1) * 0.5 207 | return kld 208 | 209 | def log_px_zy(self, x, z, y, test=False): 210 | if isinstance(self.decoder, BernoulliDecoder): 211 | # do not apply F.sigmoid to the output of the decoder 212 | raw_output = self.decoder(z, y, test=test, apply_f=False) 213 | negative_log_likelihood = self.bernoulli_nll_keepbatch(x, raw_output) 214 | log_px_zy = -negative_log_likelihood 215 | else: 216 | x_mean, x_ln_var = self.decoder(z, y, test=test, apply_f=False) 217 | negative_log_likelihood = self.gaussian_nll_keepbatch(x, x_mean, x_ln_var) 218 | log_px_zy = -negative_log_likelihood 219 | return log_px_zy 220 | 221 | def log_py(self, y, test=False): 222 | xp = self.xp 223 | num_types_of_label = y.data.shape[1] 224 | # prior p(y) expecting that all classes are evenly distributed 225 | constant = math.log(1.0 / num_types_of_label) 226 | log_py = xp.full((y.data.shape[0],), constant, xp.float32) 227 | return Variable(log_py) 228 | 229 | # this will not be used 230 | def log_pz(self, z, mean, ln_var, test=False): 231 | if self.type_pz == "gaussianmarg": 232 | # \int q(z)logp(z)dz = -(J/2)*log2pi - (1/2)*sum_{j=1}^{J} (mu^2 + var) 233 | # See Appendix B [Auto-Encoding Variational Bayes](http://arxiv.org/abs/1312.6114) 234 | log_pz = -0.5 * (math.log(2.0 * math.pi) + mean * mean + F.exp(ln_var)) 235 | elif self.type_pz == "gaussian": 236 | log_pz = -0.5 * math.log(2.0 * math.pi) - 0.5 * z ** 2 237 | return F.sum(log_pz, axis=1) 238 | 239 | # this will not be used 240 | def log_qz_xy(self, z, mean, ln_var, test=False): 241 | if self.type_qz == "gaussianmarg": 242 | # \int q(z)logq(z)dz = -(J/2)*log2pi - (1/2)*sum_{j=1}^{J} (1 + logvar) 243 | # See Appendix B [Auto-Encoding Variational Bayes](http://arxiv.org/abs/1312.6114) 244 | log_qz_xy = -0.5 * F.sum((math.log(2.0 * math.pi) + 1 + ln_var), axis=1) 245 | elif self.type_qz == "gaussian": 246 | log_qz_xy = -self.gaussian_nll_keepbatch(z, mean, ln_var) 247 | return log_qz_xy 248 | 249 | def train(self, labeled_x, labeled_y, label_ids, unlabeled_x, test=False): 250 | loss, loss_labeled, loss_unlabeled = self.compute_lower_bound_loss(labeled_x, labeled_y, label_ids, unlabeled_x, test=test) 251 | self.zero_grads() 252 | loss.backward() 253 | self.update() 254 | 255 | if self.gpu: 256 | loss_labeled.to_cpu() 257 | if loss_unlabeled is not None: 258 | loss_unlabeled.to_cpu() 259 | 260 | if loss_unlabeled is None: 261 | return loss_labeled.data, 0 262 | 263 | return loss_labeled.data, loss_unlabeled.data 264 | 265 | # Extended objective eq.9 266 | def train_classification(self, labeled_x, label_ids, alpha=1.0, test=False): 267 | loss = alpha * self.compute_classification_loss(labeled_x, label_ids, test=test) 268 | self.zero_grads() 269 | loss.backward() 270 | self.update_classifier() 271 | if self.gpu: 272 | loss.to_cpu() 273 | return loss.data 274 | 275 | def train_jointly(self, labeled_x, labeled_y, label_ids, unlabeled_x, alpha=1.0, test=False): 276 | loss_lower_bound, loss_lb_labled, loss_lb_unlabled = self.compute_lower_bound_loss(labeled_x, labeled_y, label_ids, unlabeled_x, test=test) 277 | loss_classification = alpha * self.compute_classification_loss(labeled_x, label_ids, test=test) 278 | loss = loss_lower_bound + loss_classification 279 | self.zero_grads() 280 | loss.backward() 281 | self.update() 282 | if self.gpu: 283 | loss_lb_labled.to_cpu() 284 | if loss_lb_unlabled is not None: 285 | loss_lb_unlabled.to_cpu() 286 | loss_classification.to_cpu() 287 | 288 | if loss_lb_unlabled is None: 289 | return loss_lb_labled.data, 0, loss_classification.data 290 | 291 | return loss_lb_labled.data, loss_lb_unlabled.data, loss_classification.data 292 | 293 | def compute_lower_bound_loss(self, labeled_x, labeled_y, label_ids, unlabeled_x, test=False): 294 | 295 | def lower_bound(log_px_zy, log_py, log_pz, log_qz_xy): 296 | lb = log_px_zy + log_py + log_pz - log_qz_xy 297 | return lb 298 | 299 | # _l: labeled 300 | # _u: unlabeled 301 | batchsize_l = labeled_x.data.shape[0] 302 | batchsize_u = unlabeled_x.data.shape[0] 303 | num_types_of_label = labeled_y.data.shape[1] 304 | xp = self.xp 305 | 306 | ### Lower bound for labeled data ### 307 | # Compute eq.6 -L(x,y) 308 | z_mean_l, z_ln_var_l = self.encoder_xy_z(labeled_x, labeled_y, test=test, apply_f=False) 309 | z_l = F.gaussian(z_mean_l, z_ln_var_l) 310 | log_px_zy_l = self.log_px_zy(labeled_x, z_l, labeled_y, test=test) 311 | log_py_l = self.log_py(labeled_y, test=test) 312 | if False: 313 | log_pz_l = self.log_pz(z_l, z_mean_l, z_ln_var_l, test=test) 314 | log_qz_xy_l = self.log_qz_xy(z_l, z_mean_l, z_ln_var_l, test=test) 315 | lower_bound_l = lower_bound(log_px_zy_l, log_py_l, log_pz_l, log_qz_xy_l) 316 | else: 317 | lower_bound_l = log_px_zy_l + log_py_l - self.gaussian_kl_divergence_keepbatch(z_mean_l, z_ln_var_l) 318 | 319 | if batchsize_u > 0: 320 | ### Lower bound for unlabeled data ### 321 | # To marginalize y, we repeat unlabeled x, and construct a target (batchsize_u * num_types_of_label) x num_types_of_label 322 | # Example of n-dimensional x and target matrix for a 3 class problem and batch_size=2. 323 | # unlabeled_x_ext y_ext 324 | # [[x0[0], x0[1], ..., x0[n]] [[1, 0, 0] 325 | # [x1[0], x1[1], ..., x1[n]] [1, 0, 0] 326 | # [x0[0], x0[1], ..., x0[n]] [0, 1, 0] 327 | # [x1[0], x1[1], ..., x1[n]] [0, 1, 0] 328 | # [x0[0], x0[1], ..., x0[n]] [0, 0, 1] 329 | # [x1[0], x1[1], ..., x1[n]]] [0, 0, 1]] 330 | 331 | unlabeled_x_ext = xp.zeros((batchsize_u * num_types_of_label, unlabeled_x.data.shape[1]), dtype=xp.float32) 332 | y_ext = xp.zeros((batchsize_u * num_types_of_label, num_types_of_label), dtype=xp.float32) 333 | for n in xrange(num_types_of_label): 334 | y_ext[n * batchsize_u:(n + 1) * batchsize_u,n] = 1 335 | unlabeled_x_ext[n * batchsize_u:(n + 1) * batchsize_u] = unlabeled_x.data 336 | y_ext = Variable(y_ext) 337 | unlabeled_x_ext = Variable(unlabeled_x_ext) 338 | 339 | # Compute eq.6 -L(x,y) for unlabeled data 340 | z_mean_u_ext, z_mean_ln_var_u_ext = self.encoder_xy_z(unlabeled_x_ext, y_ext, test=test, apply_f=False) 341 | z_u_ext = F.gaussian(z_mean_u_ext, z_mean_ln_var_u_ext) 342 | log_px_zy_u = self.log_px_zy(unlabeled_x_ext, z_u_ext, y_ext, test=test) 343 | log_py_u = self.log_py(y_ext, test=test) 344 | if False: 345 | log_pz_u = self.log_pz(z_u_ext, z_mean_u_ext, z_mean_ln_var_u_ext, test=test) 346 | log_qz_xy_u = self.log_qz_xy(z_u_ext, z_mean_u_ext, z_mean_ln_var_u_ext, test=test) 347 | lower_bound_u = lower_bound(log_px_zy_u, log_py_u, log_pz_u, log_qz_xy_u) 348 | else: 349 | lower_bound_u = log_px_zy_u + log_py_u - self.gaussian_kl_divergence_keepbatch(z_mean_u_ext, z_mean_ln_var_u_ext) 350 | 351 | # Compute eq.7 sum_y{q(y|x){-L(x,y) + H(q(y|x))}} 352 | # Let LB(xn, y) be the lower bound for an input image xn and a label y (y = 0, 1, ..., 9). 353 | # Let bs be the batchsize. 354 | # 355 | # lower_bound_u is a vector and it looks like... 356 | # [LB(x0,0), LB(x1,0), ..., LB(x_bs,0), LB(x0,1), LB(x1,1), ..., LB(x_bs,1), ..., LB(x0,9), LB(x1,9), ..., LB(x_bs,9)] 357 | # 358 | # After reshaping. (axis 1 corresponds to label, axis 2 corresponds to batch) 359 | # [[LB(x0,0), LB(x1,0), ..., LB(x_bs,0)], 360 | # [LB(x0,1), LB(x1,1), ..., LB(x_bs,1)], 361 | # . 362 | # . 363 | # . 364 | # [LB(x0,9), LB(x1,9), ..., LB(x_bs,9)]] 365 | # 366 | # After transposing. (axis 1 corresponds to batch) 367 | # [[LB(x0,0), LB(x0,1), ..., LB(x0,9)], 368 | # [LB(x1,0), LB(x1,1), ..., LB(x1,9)], 369 | # . 370 | # . 371 | # . 372 | # [LB(x_bs,0), LB(x_bs,1), ..., LB(x_bs,9)]] 373 | lower_bound_u = F.transpose(F.reshape(lower_bound_u, (num_types_of_label, batchsize_u))) 374 | 375 | y_distribution = self.encoder_x_y(unlabeled_x, test=test, softmax=True) 376 | lower_bound_u = y_distribution * (lower_bound_u - F.log(y_distribution + 1e-6)) 377 | 378 | loss_labeled = -F.sum(lower_bound_l) / batchsize_l 379 | loss_unlabeled = -F.sum(lower_bound_u) / batchsize_u 380 | loss = loss_labeled + loss_unlabeled 381 | else: 382 | loss_unlabeled = None 383 | loss_labeled = -F.sum(lower_bound_l) / batchsize_l 384 | loss = loss_labeled 385 | 386 | return loss, loss_labeled, loss_unlabeled 387 | 388 | # Extended objective eq.9 389 | def compute_classification_loss(self, labeled_x, label_ids, test=False): 390 | y_distribution = self.encoder_x_y(labeled_x, softmax=False, test=test) 391 | batchsize = labeled_x.data.shape[0] 392 | num_types_of_label = y_distribution.data.shape[1] 393 | 394 | loss = F.softmax_cross_entropy(y_distribution, label_ids) 395 | return loss 396 | 397 | def load(self, dir=None): 398 | if dir is None: 399 | raise Exception() 400 | for attr in vars(self): 401 | prop = getattr(self, attr) 402 | if isinstance(prop, chainer.Chain) or isinstance(prop, chainer.optimizer.GradientMethod): 403 | filename = dir + "/%s_%s.hdf5" % (self.name, attr) 404 | if os.path.isfile(filename): 405 | print "loading", filename 406 | serializers.load_hdf5(filename, prop) 407 | else: 408 | print filename, "missing." 409 | print "model loaded." 410 | 411 | def save(self, dir=None): 412 | if dir is None: 413 | raise Exception() 414 | try: 415 | os.mkdir(dir) 416 | except: 417 | pass 418 | for attr in vars(self): 419 | prop = getattr(self, attr) 420 | if isinstance(prop, chainer.Chain) or isinstance(prop, chainer.optimizer.GradientMethod): 421 | serializers.save_hdf5(dir + "/%s_%s.hdf5" % (self.name, attr), prop) 422 | print "model saved." 423 | 424 | class GaussianM2VAE(VAE): 425 | 426 | def build(self, conf): 427 | wscale = 0.1 428 | encoder_xy_z_attributes = {} 429 | encoder_xy_z_units = zip(conf.encoder_xy_z_hidden_units[:-1], conf.encoder_xy_z_hidden_units[1:]) 430 | for i, (n_in, n_out) in enumerate(encoder_xy_z_units): 431 | encoder_xy_z_attributes["layer_%i" % i] = L.Linear(n_in, n_out, wscale=wscale) 432 | if conf.batchnorm_before_activation: 433 | encoder_xy_z_attributes["batchnorm_%i" % i] = L.BatchNormalization(n_out) 434 | else: 435 | encoder_xy_z_attributes["batchnorm_%i" % i] = L.BatchNormalization(n_in) 436 | encoder_xy_z_attributes["layer_merge_x"] = L.Linear(conf.ndim_x, conf.encoder_xy_z_hidden_units[0], wscale=wscale) 437 | encoder_xy_z_attributes["layer_merge_y"] = L.Linear(conf.ndim_y, conf.encoder_xy_z_hidden_units[0], wscale=wscale) 438 | encoder_xy_z_attributes["batchnorm_merge"] = L.BatchNormalization(conf.encoder_xy_z_hidden_units[0]) 439 | encoder_xy_z_attributes["layer_output_mean"] = L.Linear(conf.encoder_xy_z_hidden_units[-1], conf.ndim_z, wscale=wscale) 440 | encoder_xy_z_attributes["layer_output_var"] = L.Linear(conf.encoder_xy_z_hidden_units[-1], conf.ndim_z, wscale=wscale) 441 | encoder_xy_z = GaussianEncoder(**encoder_xy_z_attributes) 442 | encoder_xy_z.n_layers = len(encoder_xy_z_units) 443 | encoder_xy_z.activation_function = conf.encoder_xy_z_activation_function 444 | encoder_xy_z.apply_dropout = conf.encoder_xy_z_apply_dropout 445 | encoder_xy_z.apply_batchnorm = conf.encoder_xy_z_apply_batchnorm 446 | encoder_xy_z.apply_batchnorm_to_input = conf.encoder_xy_z_apply_batchnorm_to_input 447 | encoder_xy_z.batchnorm_before_activation = conf.batchnorm_before_activation 448 | 449 | encoder_x_y_attributes = {} 450 | encoder_x_y_units = [(conf.ndim_x, conf.encoder_x_y_hidden_units[0])] 451 | encoder_x_y_units += zip(conf.encoder_x_y_hidden_units[:-1], conf.encoder_x_y_hidden_units[1:]) 452 | encoder_x_y_units += [(conf.encoder_x_y_hidden_units[-1], conf.ndim_y)] 453 | for i, (n_in, n_out) in enumerate(encoder_x_y_units): 454 | encoder_x_y_attributes["layer_%i" % i] = L.Linear(n_in, n_out, wscale=wscale) 455 | if conf.batchnorm_before_activation: 456 | encoder_x_y_attributes["batchnorm_%i" % i] = L.BatchNormalization(n_out) 457 | else: 458 | encoder_x_y_attributes["batchnorm_%i" % i] = L.BatchNormalization(n_in) 459 | encoder_x_y = SoftmaxEncoder(**encoder_x_y_attributes) 460 | encoder_x_y.n_layers = len(encoder_x_y_units) 461 | encoder_x_y.activation_function = conf.encoder_x_y_activation_function 462 | encoder_x_y.apply_dropout = conf.encoder_x_y_apply_dropout 463 | encoder_x_y.apply_batchnorm = conf.encoder_x_y_apply_batchnorm 464 | encoder_x_y.apply_batchnorm_to_input = conf.encoder_x_y_apply_batchnorm_to_input 465 | encoder_x_y.batchnorm_before_activation = conf.batchnorm_before_activation 466 | 467 | decoder_attributes = {} 468 | decoder_units = zip(conf.decoder_hidden_units[:-1], conf.decoder_hidden_units[1:]) 469 | for i, (n_in, n_out) in enumerate(decoder_units): 470 | decoder_attributes["layer_%i" % i] = L.Linear(n_in, n_out, wscale=wscale) 471 | if conf.batchnorm_before_activation: 472 | decoder_attributes["batchnorm_%i" % i] = L.BatchNormalization(n_out) 473 | else: 474 | decoder_attributes["batchnorm_%i" % i] = L.BatchNormalization(n_in) 475 | 476 | decoder_attributes["layer_merge_x"] = L.Linear(conf.ndim_z, conf.decoder_hidden_units[0], wscale=wscale) 477 | decoder_attributes["layer_merge_y"] = L.Linear(conf.ndim_y, conf.decoder_hidden_units[0], wscale=wscale) 478 | decoder_attributes["batchnorm_merge"] = L.BatchNormalization(conf.decoder_hidden_units[0]) 479 | decoder_attributes["layer_output_mean"] = L.Linear(conf.decoder_hidden_units[-1], conf.ndim_x, wscale=wscale) 480 | decoder_attributes["layer_output_var"] = L.Linear(conf.decoder_hidden_units[-1], conf.ndim_x, wscale=wscale) 481 | decoder = GaussianDecoder(**decoder_attributes) 482 | decoder.n_layers = len(decoder_units) 483 | decoder.activation_function = conf.decoder_activation_function 484 | decoder.apply_dropout = conf.decoder_apply_dropout 485 | decoder.apply_batchnorm = conf.decoder_apply_batchnorm 486 | decoder.apply_batchnorm_to_input = conf.decoder_apply_batchnorm_to_input 487 | decoder.batchnorm_before_activation = conf.batchnorm_before_activation 488 | 489 | if conf.gpu_enabled: 490 | encoder_xy_z.to_gpu() 491 | encoder_x_y.to_gpu() 492 | decoder.to_gpu() 493 | return encoder_xy_z, encoder_x_y, decoder 494 | 495 | class BernoulliM2VAE(VAE): 496 | 497 | def build(self, conf): 498 | wscale = 0.1 499 | encoder_xy_z_attributes = {} 500 | encoder_xy_z_units = zip(conf.encoder_xy_z_hidden_units[:-1], conf.encoder_xy_z_hidden_units[1:]) 501 | for i, (n_in, n_out) in enumerate(encoder_xy_z_units): 502 | encoder_xy_z_attributes["layer_%i" % i] = L.Linear(n_in, n_out, wscale=wscale) 503 | if conf.batchnorm_before_activation: 504 | encoder_xy_z_attributes["batchnorm_%i" % i] = L.BatchNormalization(n_out) 505 | else: 506 | encoder_xy_z_attributes["batchnorm_%i" % i] = L.BatchNormalization(n_in) 507 | encoder_xy_z_attributes["layer_merge_x"] = L.Linear(conf.ndim_x, conf.encoder_xy_z_hidden_units[0], wscale=wscale) 508 | encoder_xy_z_attributes["layer_merge_y"] = L.Linear(conf.ndim_y, conf.encoder_xy_z_hidden_units[0], wscale=wscale) 509 | encoder_xy_z_attributes["batchnorm_merge"] = L.BatchNormalization(conf.encoder_xy_z_hidden_units[0]) 510 | encoder_xy_z_attributes["layer_output_mean"] = L.Linear(conf.encoder_xy_z_hidden_units[-1], conf.ndim_z, wscale=wscale) 511 | encoder_xy_z_attributes["layer_output_var"] = L.Linear(conf.encoder_xy_z_hidden_units[-1], conf.ndim_z, wscale=wscale) 512 | encoder_xy_z = GaussianEncoder(**encoder_xy_z_attributes) 513 | encoder_xy_z.n_layers = len(encoder_xy_z_units) 514 | encoder_xy_z.activation_function = conf.encoder_xy_z_activation_function 515 | encoder_xy_z.apply_dropout = conf.encoder_xy_z_apply_dropout 516 | encoder_xy_z.apply_batchnorm = conf.encoder_xy_z_apply_batchnorm 517 | encoder_xy_z.apply_batchnorm_to_input = conf.encoder_xy_z_apply_batchnorm_to_input 518 | encoder_xy_z.batchnorm_before_activation = conf.batchnorm_before_activation 519 | 520 | encoder_x_y_attributes = {} 521 | encoder_x_y_units = [(conf.ndim_x, conf.encoder_x_y_hidden_units[0])] 522 | encoder_x_y_units += zip(conf.encoder_x_y_hidden_units[:-1], conf.encoder_x_y_hidden_units[1:]) 523 | encoder_x_y_units += [(conf.encoder_x_y_hidden_units[-1], conf.ndim_y)] 524 | for i, (n_in, n_out) in enumerate(encoder_x_y_units): 525 | encoder_x_y_attributes["layer_%i" % i] = L.Linear(n_in, n_out, wscale=wscale) 526 | if conf.batchnorm_before_activation: 527 | encoder_x_y_attributes["batchnorm_%i" % i] = L.BatchNormalization(n_out) 528 | else: 529 | encoder_x_y_attributes["batchnorm_%i" % i] = L.BatchNormalization(n_in) 530 | encoder_x_y = SoftmaxEncoder(**encoder_x_y_attributes) 531 | encoder_x_y.n_layers = len(encoder_x_y_units) 532 | encoder_x_y.activation_function = conf.encoder_x_y_activation_function 533 | encoder_x_y.apply_dropout = conf.encoder_x_y_apply_dropout 534 | encoder_x_y.apply_batchnorm = conf.encoder_x_y_apply_batchnorm 535 | encoder_x_y.apply_batchnorm_to_input = conf.encoder_x_y_apply_batchnorm_to_input 536 | encoder_x_y.batchnorm_before_activation = conf.batchnorm_before_activation 537 | 538 | decoder_attributes = {} 539 | decoder_units = zip(conf.decoder_hidden_units[:-1], conf.decoder_hidden_units[1:]) 540 | decoder_units += [(conf.decoder_hidden_units[-1], conf.ndim_x)] 541 | for i, (n_in, n_out) in enumerate(decoder_units): 542 | decoder_attributes["layer_%i" % i] = L.Linear(n_in, n_out, wscale=wscale) 543 | if conf.batchnorm_before_activation: 544 | decoder_attributes["batchnorm_%i" % i] = L.BatchNormalization(n_out) 545 | else: 546 | decoder_attributes["batchnorm_%i" % i] = L.BatchNormalization(n_in) 547 | decoder_attributes["layer_merge_z"] = L.Linear(conf.ndim_z, conf.decoder_hidden_units[0], wscale=wscale) 548 | decoder_attributes["layer_merge_y"] = L.Linear(conf.ndim_y, conf.decoder_hidden_units[0], wscale=wscale) 549 | decoder_attributes["batchnorm_merge"] = L.BatchNormalization(conf.decoder_hidden_units[0]) 550 | decoder = BernoulliDecoder(**decoder_attributes) 551 | decoder.n_layers = len(decoder_units) 552 | decoder.activation_function = conf.decoder_activation_function 553 | decoder.apply_dropout = conf.decoder_apply_dropout 554 | decoder.apply_batchnorm = conf.decoder_apply_batchnorm 555 | decoder.apply_batchnorm_to_input = conf.decoder_apply_batchnorm_to_input 556 | decoder.batchnorm_before_activation = conf.batchnorm_before_activation 557 | 558 | if conf.gpu_enabled: 559 | encoder_xy_z.to_gpu() 560 | encoder_x_y.to_gpu() 561 | decoder.to_gpu() 562 | return encoder_xy_z, encoder_x_y, decoder 563 | 564 | class SoftmaxEncoder(chainer.Chain): 565 | def __init__(self, **layers): 566 | super(SoftmaxEncoder, self).__init__(**layers) 567 | self.activation_function = "softplus" 568 | self.apply_batchnorm_to_input = True 569 | self.apply_batchnorm = True 570 | self.apply_dropout = False 571 | self.batchnorm_before_activation = True 572 | 573 | @property 574 | def xp(self): 575 | return np if self._cpu else cuda.cupy 576 | 577 | def forward_one_step(self, x, test): 578 | f = activations[self.activation_function] 579 | chain = [x] 580 | 581 | for i in range(self.n_layers): 582 | u = chain[-1] 583 | if self.batchnorm_before_activation: 584 | u = getattr(self, "layer_%i" % i)(u) 585 | if i == 0: 586 | if self.apply_batchnorm_to_input: 587 | u = getattr(self, "batchnorm_%d" % i)(u, test=test) 588 | elif i == self.n_layers - 1: 589 | if self.apply_batchnorm and self.batchnorm_before_activation == False: 590 | u = getattr(self, "batchnorm_%d" % i)(u, test=test) 591 | else: 592 | if self.apply_batchnorm: 593 | u = getattr(self, "batchnorm_%d" % i)(u, test=test) 594 | if self.batchnorm_before_activation == False: 595 | u = getattr(self, "layer_%i" % i)(u) 596 | if i == self.n_layers - 1: 597 | output = u 598 | else: 599 | output = f(u) 600 | if self.apply_dropout: 601 | output = F.dropout(output, train=not test) 602 | chain.append(output) 603 | 604 | return chain[-1] 605 | 606 | def __call__(self, x, test=False, softmax=True): 607 | output = self.forward_one_step(x, test=test) 608 | if softmax: 609 | return F.softmax(output) 610 | return output 611 | 612 | class GaussianEncoder(chainer.Chain): 613 | def __init__(self, **layers): 614 | super(GaussianEncoder, self).__init__(**layers) 615 | self.activation_function = "softplus" 616 | self.apply_batchnorm_to_input = True 617 | self.apply_batchnorm = True 618 | self.apply_dropout = False 619 | self.batchnorm_before_activation = True 620 | 621 | @property 622 | def xp(self): 623 | return np if self._cpu else cuda.cupy 624 | 625 | def forward_one_step(self, x, y, test=False, apply_f=True): 626 | f = activations[self.activation_function] 627 | 628 | if self.apply_batchnorm_to_input: 629 | if self.batchnorm_before_activation: 630 | merged_input = f(self.batchnorm_merge(self.layer_merge_x(x) + self.layer_merge_y(y), test=test)) 631 | else: 632 | merged_input = f(self.layer_merge_x(self.batchnorm_merge(x, test=test)) + self.layer_merge_y(y)) 633 | else: 634 | merged_input = f(self.layer_merge_x(x) + self.layer_merge_y(y)) 635 | 636 | chain = [merged_input] 637 | 638 | # Hidden 639 | for i in range(self.n_layers): 640 | u = chain[-1] 641 | if self.batchnorm_before_activation: 642 | u = getattr(self, "layer_%i" % i)(u) 643 | if self.apply_batchnorm: 644 | u = getattr(self, "batchnorm_%d" % i)(u, test=test) 645 | if self.batchnorm_before_activation == False: 646 | u = getattr(self, "layer_%i" % i)(u) 647 | output = f(u) 648 | if self.apply_dropout: 649 | output = F.dropout(output, train=not test) 650 | chain.append(output) 651 | 652 | u = chain[-1] 653 | mean = self.layer_output_mean(u) 654 | 655 | # log(sd^2) 656 | u = chain[-1] 657 | ln_var = self.layer_output_var(u) 658 | 659 | return mean, ln_var 660 | 661 | def __call__(self, x, y, test=False, apply_f=True): 662 | mean, ln_var = self.forward_one_step(x, y, test=test, apply_f=apply_f) 663 | if apply_f: 664 | return F.gaussian(mean, ln_var) 665 | return mean, ln_var 666 | 667 | # Network structure is same as the GaussianEncoder 668 | class GaussianDecoder(GaussianEncoder): 669 | 670 | def __call__(self, z, y, test=False, apply_f=False): 671 | mean, ln_var = self.forward_one_step(z, y, test=test, apply_f=False) 672 | if apply_f: 673 | return F.gaussian(mean, ln_var) 674 | return mean, ln_var 675 | 676 | class BernoulliDecoder(SoftmaxEncoder): 677 | 678 | def forward_one_step(self, z, y, test): 679 | f = activations[self.activation_function] 680 | 681 | if self.apply_batchnorm_to_input: 682 | if self.batchnorm_before_activation: 683 | merged_input = f(self.batchnorm_merge(self.layer_merge_z(z) + self.layer_merge_y(y), test=test)) 684 | else: 685 | merged_input = f(self.layer_merge_z(self.batchnorm_merge(z, test=test)) + self.layer_merge_y(y)) 686 | else: 687 | merged_input = f(self.layer_merge_z(z) + self.layer_merge_y(y)) 688 | 689 | chain = [merged_input] 690 | 691 | # Hidden 692 | for i in range(self.n_layers): 693 | u = chain[-1] 694 | if self.batchnorm_before_activation: 695 | u = getattr(self, "layer_%i" % i)(u) 696 | if i == self.n_layers - 1: 697 | if self.apply_batchnorm and self.batchnorm_before_activation == False: 698 | u = getattr(self, "batchnorm_%d" % i)(u, test=test) 699 | else: 700 | if self.apply_batchnorm: 701 | u = getattr(self, "batchnorm_%d" % i)(u, test=test) 702 | if self.batchnorm_before_activation == False: 703 | u = getattr(self, "layer_%i" % i)(u) 704 | if i == self.n_layers - 1: 705 | output = u 706 | else: 707 | output = f(u) 708 | if self.apply_dropout: 709 | output = F.dropout(output, train=not test) 710 | chain.append(output) 711 | 712 | return chain[-1] 713 | 714 | def __call__(self, z, y, test=False, apply_f=False): 715 | output = self.forward_one_step(z, y, test=test) 716 | if apply_f: 717 | return F.sigmoid(output) 718 | return output --------------------------------------------------------------------------------