├── .idea ├── deployment.xml ├── inspectionProfiles │ └── Project_Default.xml ├── other.xml ├── remote-mappings.xml ├── vcs.xml └── webServers.xml ├── README.md ├── VAE_CNN_BCEloss.py ├── VAE_CNN_Gaussianloss.py ├── VAE_celeba.py ├── VAE_facebook.py ├── VAE_fb_modified.py ├── main.py ├── main_new.py ├── new1.py ├── new2.py └── simple_main.py /.idea/deployment.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 6 | -------------------------------------------------------------------------------- /.idea/inspectionProfiles/Project_Default.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 15 | -------------------------------------------------------------------------------- /.idea/other.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 7 | -------------------------------------------------------------------------------- /.idea/remote-mappings.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | -------------------------------------------------------------------------------- /.idea/vcs.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /.idea/webServers.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 14 | 15 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Variational-Autoencoder 2 | Contains code to learn variational autoencoder model on MNIST dataset using pytorch. 3 | 4 | L = No. of monte carlo samples for gradient calculation 5 | 6 | Gaussian loss is given by 7 | 8 | ![\Large \frac{1}{N}\sum_{i=1}^{N}\left[\frac{1}{L}\sum_{l=1}^{L}\left\{ \frac{1}{2}\sum_{j=1}^{784}\log(\sigma_{ij}^{(l)})^2 + \frac{1}{2}\sum_{j=1}^{784}\left(\frac{x_{ij}-\mu_{ij}^{(l)}} {\sigma_{ij}^{(l)}}\right)^2 \right\} \right ] - \frac{1}{N}\sum_{i=1}^{N}\left[ \sum_{j=1}^{J}\frac{1}{2}\left(1+\log(\sigma_j^{\prime(i)})^2-(\mu_j^{\prime(i)})^2 -(\sigma_j^{\prime(i)})^2\right )\right ]](https://latex.codecogs.com/svg.latex?%5Cfrac%7B1%7D%7BN%7D%5Csum_%7Bi%3D1%7D%5E%7BN%7D%5Cleft%5B%5Cfrac%7B1%7D%7BL%7D%5Csum_%7Bl%3D1%7D%5E%7BL%7D%5Cleft%5C%7B%20%5Cfrac%7B1%7D%7B2%7D%5Csum_%7Bj%3D1%7D%5E%7B784%7D%5Clog%28%5Csigma_%7Bij%7D%5E%7B%28l%29%7D%29%5E2%20+%20%5Cfrac%7B1%7D%7B2%7D%5Csum_%7Bj%3D1%7D%5E%7B784%7D%5Cleft%28%5Cfrac%7Bx_%7Bij%7D-%5Cmu_%7Bij%7D%5E%7B%28l%29%7D%7D%20%7B%5Csigma_%7Bij%7D%5E%7B%28l%29%7D%7D%5Cright%29%5E2%20%5Cright%5C%7D%20%5Cright%20%5D%20-%20%5Cfrac%7B1%7D%7BN%7D%5Csum_%7Bi%3D1%7D%5E%7BN%7D%5Cleft%5B%20%5Csum_%7Bj%3D1%7D%5E%7BJ%7D%5Cfrac%7B1%7D%7B2%7D%5Cleft%281+%5Clog%28%5Csigma_j%5E%7B%5Cprime%28i%29%7D%29%5E2-%28%5Cmu_j%5E%7B%5Cprime%28i%29%7D%29%5E2%20-%28%5Csigma_j%5E%7B%5Cprime%28i%29%7D%29%5E2%5Cright%20%29%5Cright%20%5D) 9 | 10 | 11 | BCE loss is given by 12 | 13 | ![\Large \frac{1}{N}\sum_{i=1}^{N}\left[\frac{1}{L}\sum_{l=1}^{L}\left\{x_{ij}\log p_{ij}^{(l)} + (1-x_{ij})\log(1-\log p_{ij}^{(l)}) \right\} \right ] - \frac{1}{N}\sum_{i=1}^{N}\left[ \sum_{j=1}^{J}\frac{1}{2}\left(1+\log(\sigma_j^{\prime(i)})^2-(\mu_j^{\prime(i)})^2 -(\sigma_j^{\prime(i)})^2\right )\right ]](https://latex.codecogs.com/svg.latex?%5Cfrac%7B1%7D%7BN%7D%5Csum_%7Bi%3D1%7D%5E%7BN%7D%5Cleft%5B%5Cfrac%7B1%7D%7BL%7D%5Csum_%7Bl%3D1%7D%5E%7BL%7D%5Cleft%5C%7Bx_%7Bij%7D%5Clog%20p_%7Bij%7D%5E%7B%28l%29%7D%20+%20%281-x_%7Bij%7D%29%5Clog%281-%5Clog%20p_%7Bij%7D%5E%7B%28l%29%7D%29%20%5Cright%5C%7D%20%5Cright%20%5D%20-%20%5Cfrac%7B1%7D%7BN%7D%5Csum_%7Bi%3D1%7D%5E%7BN%7D%5Cleft%5B%20%5Csum_%7Bj%3D1%7D%5E%7BJ%7D%5Cfrac%7B1%7D%7B2%7D%5Cleft%281+%5Clog%28%5Csigma_j%5E%7B%5Cprime%28i%29%7D%29%5E2-%28%5Cmu_j%5E%7B%5Cprime%28i%29%7D%29%5E2%20-%28%5Csigma_j%5E%7B%5Cprime%28i%29%7D%29%5E2%5Cright%20%29%5Cright%20%5D) 14 | -------------------------------------------------------------------------------- /VAE_CNN_BCEloss.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | import torch.utils.data 4 | from torch import nn, optim 5 | from torch.autograd import Variable 6 | from torch.nn import functional as F 7 | from torchvision import datasets, transforms 8 | from torchvision.utils import save_image 9 | 10 | os.environ['CUDA_VISIBLE_DEVICES'] = '3' 11 | # changed configuration to this instead of argparse for easier interaction 12 | CUDA = True 13 | SEED = 1 14 | BATCH_SIZE = 128 15 | LOG_INTERVAL = 10 16 | EPOCHS = 10 17 | no_of_sample = 10 18 | 19 | # connections through the autoencoder bottleneck 20 | # in the pytorch VAE example, this is 20 21 | ZDIMS = 20 22 | 23 | torch.manual_seed(SEED) 24 | if CUDA: 25 | torch.cuda.manual_seed(SEED) 26 | 27 | # DataLoader instances will load tensors directly into GPU memory 28 | kwargs = {'num_workers': 1, 'pin_memory': True} if CUDA else {} 29 | 30 | # Download or load downloaded MNIST dataset 31 | # shuffle data at every epoch 32 | train_loader = torch.utils.data.DataLoader(datasets.MNIST('./mnist', train=True, download=True,transform=transforms.ToTensor()), 33 | batch_size=BATCH_SIZE, shuffle=True, **kwargs) 34 | 35 | # Same for test data 36 | test_loader = torch.utils.data.DataLoader(datasets.MNIST('./mnist', train=False, transform=transforms.ToTensor()), 37 | batch_size=BATCH_SIZE, shuffle=True, **kwargs) 38 | 39 | 40 | class VAE(nn.Module): 41 | def __init__(self): 42 | super(VAE, self).__init__() 43 | 44 | self.conv1 = nn.Conv2d(in_channels=1, out_channels=64, kernel_size=(4, 4), padding=(15, 15), 45 | stride=2) # This padding keeps the size of the image same, i.e. same padding 46 | self.conv2 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=(4, 4), padding=(15, 15), stride=2) 47 | self.fc11 = nn.Linear(in_features=128 * 28 * 28, out_features=1024) 48 | self.fc12 = nn.Linear(in_features=1024, out_features=ZDIMS) 49 | 50 | self.fc21 = nn.Linear(in_features=128 * 28 * 28, out_features=1024) 51 | self.fc22 = nn.Linear(in_features=1024, out_features=ZDIMS) 52 | self.relu = nn.ReLU() 53 | 54 | # For decoder 55 | 56 | # For mu 57 | self.fc1 = nn.Linear(in_features=20, out_features=1024) 58 | self.fc2 = nn.Linear(in_features=1024, out_features=7 * 7 * 128) 59 | self.conv_t1 = nn.ConvTranspose2d(in_channels=128, out_channels=64, kernel_size=4, padding=1, stride=2) 60 | self.conv_t2 = nn.ConvTranspose2d(in_channels=64, out_channels=1, kernel_size=4, padding=1, stride=2) 61 | 62 | 63 | 64 | def encode(self, x: Variable) -> (Variable, Variable): 65 | 66 | x = x.view(-1, 1, 28, 28) 67 | x = F.elu(self.conv1(x)) 68 | x = F.elu(self.conv2(x)) 69 | x = x.view(-1, 128 * 28 * 28) 70 | 71 | mu_z = F.elu(self.fc11(x)) 72 | mu_z = self.fc12(mu_z) 73 | 74 | logvar_z = F.elu(self.fc21(x)) 75 | logvar_z = self.fc22(logvar_z) 76 | 77 | return mu_z, logvar_z 78 | 79 | 80 | def reparameterize(self, mu: Variable, logvar: Variable) -> Variable: 81 | 82 | 83 | if self.training: 84 | # multiply log variance with 0.5, then in-place exponent 85 | # yielding the standard deviation 86 | 87 | sample_z = [] 88 | for _ in range(no_of_sample): 89 | std = logvar.mul(0.5).exp_() # type: Variable 90 | eps = Variable(std.data.new(std.size()).normal_()) 91 | sample_z.append(eps.mul(std).add_(mu)) 92 | 93 | return sample_z 94 | 95 | else: 96 | # During inference, we simply spit out the mean of the 97 | # learned distribution for the current input. We could 98 | # use a random sample from the distribution, but mu of 99 | # course has the highest probability. 100 | return mu 101 | 102 | def decode(self, z: Variable) -> Variable: 103 | 104 | x = F.elu(self.fc1(z)) 105 | x = F.elu(self.fc2(x)) 106 | x = x.view(-1, 128, 7, 7) 107 | x = F.relu(self.conv_t1(x)) 108 | x = F.sigmoid(self.conv_t2(x)) 109 | 110 | return x.view(-1, 784) 111 | 112 | 113 | def forward(self, x: Variable) -> (Variable, Variable, Variable): 114 | mu, logvar = self.encode(x.view(-1, 784)) 115 | z = self.reparameterize(mu, logvar) 116 | if self.training: 117 | return [self.decode(z) for z in z], mu, logvar 118 | else: 119 | return self.decode(z), mu, logvar 120 | # return self.decode(z), mu, logvar 121 | 122 | def loss_function(self, recon_x, x, mu, logvar) -> Variable: 123 | # how well do input x and output recon_x agree? 124 | 125 | if self.training: 126 | BCE = 0 127 | for recon_x_one in recon_x: 128 | BCE += F.binary_cross_entropy(recon_x_one, x.view(-1, 784)) 129 | BCE /= len(recon_x) 130 | else: 131 | BCE = F.binary_cross_entropy(recon_x, x.view(-1, 784)) 132 | 133 | # KLD is Kullback–Leibler divergence -- how much does one learned 134 | # distribution deviate from another, in this specific case the 135 | # learned distribution from the unit Gaussian 136 | 137 | # see Appendix B from VAE paper: 138 | # Kingma and Welling. Auto-Encoding Variational Bayes. ICLR, 2014 139 | # https://arxiv.org/abs/1312.6114 140 | # - D_{KL} = 0.5 * sum(1 + log(sigma^2) - mu^2 - sigma^2) 141 | # note the negative D_{KL} in appendix B of the paper 142 | KLD = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp()) 143 | # Normalise by same number of elements as in reconstruction 144 | KLD /= BATCH_SIZE * 784 145 | 146 | 147 | return BCE + KLD 148 | 149 | 150 | model = VAE() 151 | if CUDA: 152 | model.cuda() 153 | 154 | 155 | 156 | 157 | 158 | optimizer = optim.Adam(model.parameters(), lr=1e-3) 159 | 160 | 161 | def train(epoch): 162 | # toggle model to train mode 163 | model.train() 164 | train_loss = 0 165 | # in the case of MNIST, len(train_loader.dataset) is 60000 166 | # each `data` is of BATCH_SIZE samples and has shape [128, 1, 28, 28] 167 | for batch_idx, (data, _) in enumerate(train_loader): 168 | data = Variable(data) 169 | if CUDA: 170 | data = data.cuda() 171 | optimizer.zero_grad() 172 | 173 | # push whole batch of data through VAE.forward() to get recon_loss 174 | recon_batch, mu, logvar = model(data) 175 | # calculate scalar loss 176 | loss = model.loss_function(recon_batch, data, mu, logvar) 177 | # calculate the gradient of the loss w.r.t. the graph leaves 178 | # i.e. input variables -- by the power of pytorch! 179 | loss.backward() 180 | train_loss += loss.data[0] 181 | optimizer.step() 182 | if batch_idx % LOG_INTERVAL == 0: 183 | print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(epoch, batch_idx * len(data), len(train_loader.dataset), 184 | 100. * batch_idx / len(train_loader), 185 | loss.data[0] / len(data))) 186 | 187 | print('====> Epoch: {} Average loss: {:.4f}'.format(epoch, train_loss / len(train_loader.dataset))) 188 | 189 | 190 | def test(epoch): 191 | model.eval() 192 | test_loss = 0 193 | 194 | # each data is of BATCH_SIZE (default 128) samples 195 | for i, (data, _) in enumerate(test_loader): 196 | if CUDA: 197 | # make sure this lives on the GPU 198 | data = data.cuda() 199 | 200 | # we're only going to infer, so no autograd at all required: volatile=True 201 | data = Variable(data, volatile=True) 202 | recon_batch, mu, logvar = model(data) 203 | test_loss += model.loss_function(recon_batch, data, mu, logvar).data[0] 204 | if i == 0: 205 | n = min(data.size(0), 8) 206 | # for the first 128 batch of the epoch, show the first 8 input digits 207 | # with right below them the reconstructed output digits 208 | comparison = torch.cat([data[:n], 209 | recon_batch.view(BATCH_SIZE, 1, 28, 28)[:n]]) 210 | save_image(comparison.data.cpu(), 211 | './mnist/reconstruction_' + str(epoch) + '.png', nrow=n) 212 | 213 | test_loss /= len(test_loader.dataset) 214 | print('====> Test set loss: {:.4f}'.format(test_loss)) 215 | 216 | 217 | if __name__ == "__main__": 218 | for epoch in range(1, EPOCHS + 1): 219 | train(epoch) 220 | test(epoch) 221 | 222 | # 64 sets of random ZDIMS-float vectors, i.e. 64 locations / MNIST 223 | # digits in latent space 224 | sample = Variable(torch.randn(64, ZDIMS)) 225 | if CUDA: 226 | sample = sample.cuda() 227 | sample = model.decode(sample).cpu() 228 | 229 | # save out as an 8x8 matrix of MNIST digits 230 | # this will give you a visual idea of how well latent space can generate things 231 | # that look like digits 232 | save_image(sample.data.view(64, 1, 28, 28),'./mnist/reconstruction' + str(epoch) + '.png') -------------------------------------------------------------------------------- /VAE_CNN_Gaussianloss.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | import torch.utils.data 4 | from torch import nn, optim 5 | import torch.nn.init as init 6 | from torch.autograd import Variable 7 | from torch.nn import functional as F 8 | from torchvision import datasets, transforms 9 | from torchvision.utils import save_image 10 | 11 | os.environ['CUDA_VISIBLE_DEVICES'] = '0' 12 | # changed configuration to this instead of argparse for easier interaction 13 | CUDA = True 14 | SEED = 1 15 | BATCH_SIZE = 128 16 | LOG_INTERVAL = 10 17 | EPOCHS = 50 18 | no_of_sample = 10 19 | 20 | # connections through the autoencoder bottleneck 21 | ZDIMS = 20 22 | 23 | 24 | 25 | torch.manual_seed(SEED) 26 | if CUDA: 27 | torch.cuda.manual_seed(SEED) 28 | 29 | # DataLoader instances will load tensors directly into GPU memory 30 | kwargs = {'num_workers': 1, 'pin_memory': True} if CUDA else {} 31 | 32 | # Download or load downloaded MNIST dataset 33 | # shuffle data at every epoch 34 | train_loader = torch.utils.data.DataLoader(datasets.MNIST('./mnist', train=True, download=True,transform=transforms.ToTensor()), 35 | batch_size=BATCH_SIZE, shuffle=True, **kwargs) 36 | 37 | # Same for test data 38 | test_loader = torch.utils.data.DataLoader(datasets.MNIST('./mnist', train=False, transform=transforms.ToTensor()), 39 | batch_size=BATCH_SIZE, shuffle=True, **kwargs) 40 | 41 | 42 | class VAE(nn.Module): 43 | def __init__(self): 44 | super(VAE, self).__init__() 45 | 46 | self.conv1 = nn.Conv2d(in_channels=1, out_channels=64, kernel_size=(4, 4), padding=(15, 15), 47 | stride=2) # This padding keeps the size of the image same, i.e. same padding 48 | self.conv2 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=(4, 4), padding=(15, 15), stride=2) 49 | self.fc11 = nn.Linear(in_features=128 * 28 * 28, out_features=1024) 50 | self.fc12 = nn.Linear(in_features=1024, out_features=ZDIMS) 51 | 52 | self.fc21 = nn.Linear(in_features=128 * 28 * 28, out_features=1024) 53 | self.fc22 = nn.Linear(in_features=1024, out_features=ZDIMS) 54 | self.relu = nn.ReLU() 55 | 56 | # For decoder 57 | 58 | # For mu 59 | self.fc1 = nn.Linear(in_features=20, out_features=1024) 60 | self.fc2 = nn.Linear(in_features=1024, out_features=7 * 7 * 128) 61 | self.conv_t11 = nn.ConvTranspose2d(in_channels=128, out_channels=64, kernel_size=4, padding=1, stride=2) 62 | self.conv_t12 = nn.ConvTranspose2d(in_channels=64, out_channels=1, kernel_size=4, padding=1, stride=2) 63 | 64 | self.conv_t21 = nn.ConvTranspose2d(in_channels=128, out_channels=64, kernel_size=4, padding=1, stride=2) 65 | self.conv_t22 = nn.ConvTranspose2d(in_channels=64, out_channels=1, kernel_size=4, padding=1, stride=2) 66 | 67 | #Parameter initialization 68 | # for m in self.modules(): 69 | # 70 | # if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear): 71 | # #init.xavier_normal(m.weight.data, gain=nn.init.calculate_gain('relu')) 72 | # init.xavier_uniform(m.weight.data, gain=nn.init.calculate_gain('relu')) 73 | # #init.kaiming_uniform(m.weight.data) 74 | # init.constant(m.bias, .1) 75 | # 76 | # elif isinstance(m, nn.BatchNorm2d): 77 | # m.weight.data.fill_(1) 78 | # m.bias.data.zero_() 79 | 80 | def encode(self, x: Variable) -> (Variable, Variable): 81 | 82 | x = x.view(-1, 1, 28, 28) 83 | x = F.elu(self.conv1(x)) 84 | x = F.elu(self.conv2(x)) 85 | x = x.view(-1, 128 * 28 * 28) 86 | 87 | mu_z = F.elu(self.fc11(x)) 88 | mu_z = self.fc12(mu_z) 89 | 90 | logvar_z = F.elu(self.fc21(x)) 91 | logvar_z = self.fc22(logvar_z) 92 | 93 | return mu_z, logvar_z 94 | 95 | def reparameterize(self, mu: Variable, logvar: Variable) -> list: 96 | """THE REPARAMETERIZATION IDEA: 97 | 98 | For each training sample (we get 128 batched at a time) 99 | 100 | - take the current learned mu, stddev for each of the ZDIMS 101 | dimensions and draw a random sample from that distribution 102 | - the whole network is trained so that these randomly drawn 103 | samples decode to output that looks like the input 104 | - which will mean that the std, mu will be learned 105 | *distributions* that correctly encode the inputs 106 | - due to the additional KLD term (see loss_function() below) 107 | the distribution will tend to unit Gaussians 108 | 109 | Parameters 110 | ---------- 111 | mu : [128, ZDIMS] mean matrix 112 | logvar : [128, ZDIMS] variance matrix 113 | 114 | Returns 115 | ------- 116 | 117 | During training random sample from the learned ZDIMS-dimensional 118 | normal distribution; during inference its mean. 119 | 120 | """ 121 | 122 | if self.training: 123 | # multiply log variance with 0.5, then in-place exponent 124 | # yielding the standard deviation 125 | 126 | sample_z = [] 127 | for _ in range(no_of_sample): 128 | std = logvar.mul(0.5).exp_() # type: Variable 129 | # - std.data is the [128,ZDIMS] tensor that is wrapped by std 130 | # - so eps is [128,ZDIMS] with all elements drawn from a mean 0 131 | # and stddev 1 normal distribution that is 128 samples 132 | # of random ZDIMS-float vectors 133 | eps = Variable(std.data.new(std.size()).normal_()) 134 | # - sample from a normal distribution with standard 135 | # deviation = std and mean = mu by multiplying mean 0 136 | # stddev 1 sample with desired std and mu, see 137 | # https://stats.stackexchange.com/a/16338 138 | # - so we have 128 sets (the batch) of random ZDIMS-float 139 | # vectors sampled from normal distribution with learned 140 | # std and mu for the current input 141 | sample_z.append(eps.mul(std).add_(mu)) 142 | 143 | return sample_z 144 | 145 | else: 146 | # During inference, we simply spit out the mean of the 147 | # learned distribution for the current input. We could 148 | # use a random sample from the distribution, but mu of 149 | # course has the highest probability. 150 | return mu 151 | 152 | def decode(self, z: Variable) -> (Variable, Variable): 153 | 154 | x = F.elu(self.fc1(z)) 155 | x = F.elu(self.fc2(x)) 156 | x = x.view(-1, 128, 7, 7) 157 | mu_x = F.relu(self.conv_t11(x)) 158 | mu_x = F.sigmoid(self.conv_t12(mu_x)) 159 | 160 | logvar_x = F.relu(self.conv_t11(x)) 161 | logvar_x = F.sigmoid(self.conv_t12(logvar_x)) 162 | 163 | return mu_x.view(-1, 784), logvar_x.view(-1, 784) 164 | 165 | def forward(self, x: Variable) -> (Variable, Variable, Variable): 166 | mu, logvar = self.encode(x.view(-1, 784)) 167 | z = self.reparameterize(mu, logvar) 168 | if self.training: 169 | return [self.decode(z) for z in z], mu, logvar 170 | else: 171 | return self.decode(z), mu, logvar 172 | 173 | def loss_function(self, recon_x, x, mu, logvar) -> Variable: 174 | # how well do input x and output recon_x agree? 175 | 176 | if self.training: 177 | GLL = 0 178 | x = x.view(-1, 784) 179 | for recon_x_one in recon_x: 180 | mu_x, logvar_x = recon_x_one 181 | part1 = torch.sum(logvar_x) / BATCH_SIZE 182 | sigma = logvar_x.mul(0.5).exp_() 183 | part2 = torch.sum(((x - mu_x) / sigma) ** 2) / BATCH_SIZE 184 | GLL += .5 * (part1 + part2) 185 | 186 | GLL /= len(recon_x) 187 | else: 188 | x = x.view(-1, 784) 189 | mu_x, logvar_x = recon_x 190 | part1 = torch.sum(logvar_x) / BATCH_SIZE 191 | sigma = logvar_x.mul(0.5).exp_() 192 | part2 = torch.sum(((x - mu_x) / sigma) ** 2) / BATCH_SIZE 193 | GLL = .5 * (part1 + part2) 194 | 195 | 196 | 197 | # see Appendix B from VAE paper: 198 | # Kingma and Welling. Auto-Encoding Variational Bayes. ICLR, 2014 199 | # https://arxiv.org/abs/1312.6114 200 | # - D_{KL} = 0.5 * sum(1 + log(sigma^2) - mu^2 - sigma^2) 201 | # note the negative D_{KL} in appendix B of the paper 202 | KLD = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp()) 203 | # Normalise by same number of elements as in reconstruction 204 | KLD /= BATCH_SIZE 205 | 206 | 207 | return GLL + KLD 208 | 209 | 210 | model = VAE() 211 | if CUDA: 212 | model.cuda() 213 | 214 | # Dr Diederik Kingma: as if VAEs weren't enough, he also gave us Adam! 215 | optimizer = optim.Adam(model.parameters(), lr=1e-3) 216 | 217 | 218 | def train(epoch): 219 | # toggle model to train mode 220 | model.train() 221 | train_loss = 0 222 | # in the case of MNIST, len(train_loader.dataset) is 60000 223 | # each `data` is of BATCH_SIZE samples and has shape [128, 1, 28, 28] 224 | for batch_idx, (data, _) in enumerate(train_loader): 225 | data = Variable(data) 226 | if CUDA: 227 | data = data.cuda() 228 | optimizer.zero_grad() 229 | 230 | # push whole batch of data through VAE.forward() to get recon_loss 231 | recon_batch, mu, logvar = model(data) 232 | # calculate scalar loss 233 | loss = model.loss_function(recon_batch, data, mu, logvar) 234 | # calculate the gradient of the loss w.r.t. the graph leaves 235 | # i.e. input variables -- by the power of pytorch! 236 | loss.backward() 237 | train_loss += loss.data[0] 238 | optimizer.step() 239 | if batch_idx % LOG_INTERVAL == 0: 240 | print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format( 241 | epoch, batch_idx * len(data), len(train_loader.dataset), 242 | 100. * batch_idx / len(train_loader), 243 | loss.data[0] / len(data))) 244 | 245 | print('====> Epoch: {} Average loss: {:.4f}'.format(epoch, train_loss / len(train_loader.dataset))) 246 | 247 | 248 | def test(epoch): 249 | # toggle model to test / inference mode 250 | model.eval() 251 | test_loss = 0 252 | 253 | # each data is of BATCH_SIZE (default 128) samples 254 | for i, (data, _) in enumerate(test_loader): 255 | if CUDA: 256 | # make sure this lives on the GPU 257 | data = data.cuda() 258 | 259 | # we're only going to infer, so no autograd at all required: volatile=True 260 | data = Variable(data, volatile=True) 261 | recon_batch, mu, logvar = model(data) 262 | test_loss += model.loss_function(recon_batch, data, mu, logvar).data[0] 263 | 264 | mu_batch, _ = recon_batch 265 | if i == 0: 266 | n = min(data.size(0), 8) 267 | # for the first 128 batch of the epoch, show the first 8 input digits 268 | # with right below them the reconstructed output digits 269 | comparison = torch.cat([data[:n], 270 | mu_batch.view(BATCH_SIZE, 1, 28, 28)[:n]]) 271 | save_image(comparison.data.cpu(), 272 | '/home/atin/data/new/results_gaussian/reconstruction_' + str(epoch) + '.png', nrow=n) 273 | 274 | test_loss /= len(test_loader.dataset) 275 | print('====> Test set loss: {:.4f}'.format(test_loss)) 276 | 277 | 278 | if __name__ == "__main__": 279 | 280 | for epoch in range(1, EPOCHS + 1): 281 | train(epoch) 282 | test(epoch) 283 | 284 | # 64 sets of random ZDIMS-float vectors, i.e. 64 locations / MNIST 285 | # digits in latent space 286 | sample = Variable(torch.randn(64, ZDIMS)) 287 | if CUDA: 288 | sample = sample.cuda() 289 | mu_sample, sigma_sample = model.decode(sample) 290 | # sample = model.decode(sample).cpu() 291 | 292 | # save out as an 8x8 matrix of MNIST digits 293 | # this will give you a visual idea of how well latent space can generate things 294 | # that look like digits 295 | save_image(mu_sample.cpu().data.view(64, 1, 28, 28),'./mnist/reconstruction' + str(epoch) + '.png') -------------------------------------------------------------------------------- /VAE_celeba.py: -------------------------------------------------------------------------------- 1 | #Create a folder called celeba in home dir where reconstructed images will be stored 2 | #Considered only 100000 images for training 3 | 4 | import os 5 | import torch 6 | import torch.utils.data 7 | from torch import nn, optim 8 | from torch.autograd import Variable 9 | from torch.nn import functional as F 10 | from torchvision import datasets, transforms 11 | from torchvision.utils import save_image 12 | from torch.utils.data import Dataset, DataLoader 13 | from PIL import Image 14 | import numpy as np 15 | import matplotlib.pyplot as plt 16 | from scipy.misc import imresize 17 | 18 | import sys 19 | import warnings 20 | if not sys.warnoptions: 21 | warnings.simplefilter("ignore") 22 | os.environ['CUDA_VISIBLE_DEVICES'] = '1' 23 | no_of_sample = 10 24 | CUDA = True 25 | BATCH_SIZE = 32 26 | LOG_INTERVAL = 5 27 | 28 | 29 | class CelebaDataset(Dataset): 30 | 31 | def __init__(self, root_dir, im_name_list, resize_dim, transform=None): 32 | self.root_dir = root_dir 33 | self.im_list = im_name_list 34 | self.resize_dim = resize_dim 35 | self.transform = transform 36 | 37 | def __len__(self): 38 | return len(self.im_list) 39 | 40 | def __getitem__(self, idx): 41 | im = Image.open(os.path.join(self.root_dir, self.im_list[idx])) 42 | im = np.array(im) 43 | im = imresize(im, self.resize_dim, interp='nearest') 44 | im = im / 255 45 | 46 | if self.transform: 47 | im = self.transform(im) 48 | 49 | return im 50 | 51 | class ToTensor(object): 52 | """Convert ndarrays in sample to Tensors. numpy image: H x W x C, torch image: C X H X W 53 | """ 54 | 55 | def __call__(self, image, invert_arrays=True): 56 | 57 | if invert_arrays: 58 | image = image.transpose((2, 0, 1)) 59 | 60 | return torch.from_numpy(image) 61 | 62 | 63 | class Conv_Block(nn.Module): 64 | def __init__(self, in_channels, out_channels, kernel_size, padding, stride, pool_kernel_size=(2, 2)): 65 | super(Conv_Block, self).__init__() 66 | self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size, padding, stride) 67 | self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size, padding, stride) 68 | self.pool = nn.MaxPool2d(pool_kernel_size) 69 | 70 | def forward(self, x): 71 | x = F.elu(self.conv1(x)) 72 | x = F.elu(self.conv2(x)) 73 | x = self.pool(x) 74 | 75 | return x 76 | 77 | 78 | class VAE(nn.Module): 79 | def __init__(self): 80 | super(VAE, self).__init__() 81 | # Encoder 82 | self.block1 = Conv_Block(3, 64, (3, 3), 1, 1) # 64 83 | self.block2 = Conv_Block(64, 128, (3, 3), 1, 1) # 32 84 | self.block3 = Conv_Block(128, 256, (3, 3), 1, 1) # 16 85 | self.block4 = Conv_Block(256, 32, (3, 3), 1, 1) # 8 86 | 87 | # Decoder 88 | self.fct_decode = nn.Sequential( 89 | nn.Conv2d(16, 64, (3, 3), padding=1), 90 | nn.ELU(), 91 | nn.Upsample(scale_factor=2, mode='nearest'), # 16 92 | nn.Conv2d(64, 64, (3, 3), padding=1), 93 | nn.ELU(), 94 | nn.Upsample(scale_factor=2, mode='nearest'), # 32 95 | nn.Conv2d(64, 64, (3, 3), padding=1), 96 | nn.ELU(), 97 | nn.Upsample(scale_factor=2, mode='nearest'), # 64 98 | nn.Conv2d(64, 16, (3, 3), padding=1), 99 | nn.ELU(), 100 | nn.Upsample(scale_factor=2, mode='nearest'), # 128 101 | ) 102 | 103 | self.final_decod_mean = nn.Conv2d(16, 3, (3, 3), padding=1) 104 | 105 | def encode(self, x): 106 | '''return mu_z and logvar_z''' 107 | 108 | x = F.elu(self.block1(x)) 109 | x = F.elu(self.block2(x)) 110 | x = F.elu(self.block3(x)) 111 | x = F.elu(self.block4(x)) 112 | 113 | return x[:, :16, :, :], x[:, 16:, :, :] # output shape - batch_size x 16 x 8 x 8 114 | 115 | def reparameterize(self, mu: Variable, logvar: Variable) -> Variable: 116 | 117 | if self.training: 118 | # multiply log variance with 0.5, then in-place exponent 119 | # yielding the standard deviation 120 | 121 | sample_z = [] 122 | for _ in range(no_of_sample): 123 | std = logvar.mul(0.5).exp_() # type: Variable 124 | eps = Variable(std.data.new(std.size()).normal_()) 125 | sample_z.append(eps.mul(std).add_(mu)) 126 | 127 | return sample_z 128 | 129 | else: 130 | return mu 131 | 132 | def decode(self, z): 133 | 134 | z = self.fct_decode(z) 135 | z = self.final_decod_mean(z) 136 | z = F.sigmoid(z) 137 | 138 | return z.view(-1, 3 * 128 * 128) 139 | 140 | def forward(self, x): 141 | mu, logvar = self.encode(x) 142 | z = self.reparameterize(mu, logvar) 143 | if self.training: 144 | return [self.decode(z) for z in z], mu, logvar 145 | else: 146 | return self.decode(z), mu, logvar 147 | 148 | def loss_function(self, recon_x, x, mu, logvar) -> Variable: 149 | # how well do input x and output recon_x agree? 150 | 151 | if self.training: 152 | BCE = 0 153 | for recon_x_one in recon_x: 154 | BCE += F.binary_cross_entropy(recon_x_one, x.view(-1, 3 * 128 * 128)) 155 | BCE /= len(recon_x) 156 | else: 157 | BCE = F.binary_cross_entropy(recon_x, x.view(-1, 3 * 128 * 128)) 158 | 159 | KLD = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp()) 160 | KLD /= BATCH_SIZE * 3 * 128 * 128 161 | 162 | return BCE + KLD 163 | 164 | 165 | def train(epoch, model, optimizer, train_loader): 166 | # toggle model to train mode 167 | model.train() 168 | train_loss = 0 169 | # in the case of MNIST, len(train_loader.dataset) is 60000 170 | # each `data` is of BATCH_SIZE samples and has shape [128, 1, 28, 28] 171 | for batch_idx, data in enumerate(train_loader): 172 | data = Variable(data.type(torch.FloatTensor)) 173 | if CUDA: 174 | data = data.cuda() 175 | optimizer.zero_grad() 176 | 177 | # push whole batch of data through VAE.forward() to get recon_loss 178 | recon_batch, mu, logvar = model(data) 179 | # calculate scalar loss 180 | loss = model.loss_function(recon_batch, data, mu, logvar) 181 | # calculate the gradient of the loss w.r.t. the graph leaves 182 | # i.e. input variables -- by the power of pytorch! 183 | loss.backward() 184 | train_loss += loss.data[0] 185 | optimizer.step() 186 | if batch_idx % LOG_INTERVAL == 0: 187 | print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(epoch, batch_idx * len(data), 188 | len(train_loader.dataset), 189 | 100. * batch_idx / len(train_loader), 190 | loss.data[0] / len(data))) 191 | 192 | print('====> Epoch: {} Average loss: {:.4f}'.format(epoch, train_loss / len(train_loader.dataset))) 193 | 194 | 195 | def test(epoch, model, test_loader): 196 | model.eval() 197 | test_loss = 0 198 | 199 | # each data is of BATCH_SIZE (default 128) samples 200 | for i, data in enumerate(test_loader): 201 | data = Variable(data.type(torch.FloatTensor), volatile=True) 202 | if CUDA: 203 | # make sure this lives on the GPU 204 | data = data.cuda() 205 | 206 | # we're only going to infer, so no autograd at all required: volatile=True 207 | 208 | recon_batch, mu, logvar = model(data) 209 | test_loss += model.loss_function(recon_batch, data, mu, logvar).data[0] 210 | if i == 0: 211 | n = min(data.size(0), 8) 212 | # for the first 128 batch of the epoch, show the first 8 input digits 213 | # with right below them the reconstructed output digits 214 | comparison = torch.cat([data[:n], 215 | recon_batch.view(BATCH_SIZE, 3, 128, 128)[:n]]) 216 | save_image(comparison.data.cpu(), 217 | './celeba/reconstruction_' + str(epoch) + '.png', nrow=n) 218 | 219 | # break #To save time 220 | 221 | test_loss /= len(test_loader.dataset) 222 | print('====> Test set loss: {:.4f}'.format(test_loss)) 223 | 224 | if __name__ == "__main__": 225 | 226 | root_dir = "/home/atin/DeployedProjects/TestProject/img_align_celeba" 227 | image_files = os.listdir(root_dir) 228 | train_dataset = CelebaDataset(root_dir, image_files[:100000], (128, 128), transforms.Compose([ToTensor()])) 229 | train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, num_workers=10, shuffle=True) 230 | 231 | #Take only 1000 images in test 232 | test_dataset = CelebaDataset(root_dir, image_files[100000:101000], (128, 128), transforms.Compose([ToTensor()])) 233 | test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, num_workers=10, shuffle=True) 234 | 235 | EPOCHS = 10 236 | model = VAE() 237 | if CUDA: model.cuda() 238 | optimizer = optim.Adam(model.parameters(), lr=1e-3) 239 | 240 | for epoch in range(1, EPOCHS + 1): 241 | train(epoch, model, optimizer, train_loader) 242 | test(epoch, model, test_loader) 243 | 244 | # 64 sets of random ZDIMS-float vectors, i.e. 64 locations / MNIST 245 | # digits in latent space 246 | sample = Variable(torch.randn(64, 16, 8, 8)) 247 | if CUDA: 248 | sample = sample.cuda() 249 | sample = model.decode(sample).cpu() 250 | 251 | # save out as an 8x8 matrix of MNIST digits 252 | # this will give you a visual idea of how well latent space can generate things 253 | # that look like digits 254 | save_image(sample.data.view(64, 3, 128, 128), './celeba/reconstruction' + str(epoch) + '.png') 255 | 256 | 257 | 258 | 259 | 260 | 261 | -------------------------------------------------------------------------------- /VAE_facebook.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | import torch.utils.data 4 | from torch import nn, optim 5 | from torch.autograd import Variable 6 | from torch.nn import functional as F 7 | from torchvision import datasets, transforms 8 | from torchvision.utils import save_image 9 | 10 | # changed configuration to this instead of argparse for easier interaction 11 | CUDA = False 12 | SEED = 1 13 | BATCH_SIZE = 128 14 | LOG_INTERVAL = 10 15 | EPOCHS = 10 16 | 17 | # connections through the autoencoder bottleneck 18 | # in the pytorch VAE example, this is 20 19 | ZDIMS = 20 20 | 21 | # I do this so that the MNIST dataset is downloaded where I want it 22 | #os.chdir("/home/atin/") 23 | 24 | torch.manual_seed(SEED) 25 | if CUDA: 26 | torch.cuda.manual_seed(SEED) 27 | 28 | # DataLoader instances will load tensors directly into GPU memory 29 | kwargs = {'num_workers': 1, 'pin_memory': True} if CUDA else {} 30 | 31 | # Download or load downloaded MNIST dataset 32 | # shuffle data at every epoch 33 | train_loader = torch.utils.data.DataLoader( 34 | datasets.MNIST('../data', train=True, download=True, 35 | transform=transforms.ToTensor()), 36 | batch_size=BATCH_SIZE, shuffle=True, **kwargs) 37 | 38 | # Same for test data 39 | test_loader = torch.utils.data.DataLoader( 40 | datasets.MNIST('../data', train=False, transform=transforms.ToTensor()), 41 | batch_size=BATCH_SIZE, shuffle=True, **kwargs) 42 | 43 | 44 | class VAE(nn.Module): 45 | def __init__(self): 46 | super(VAE, self).__init__() 47 | 48 | # ENCODER 49 | # 28 x 28 pixels = 784 input pixels, 400 outputs 50 | self.fc1 = nn.Linear(784, 400) 51 | # rectified linear unit layer from 400 to 400 52 | # max(0, x) 53 | self.relu = nn.ReLU() 54 | self.fc21 = nn.Linear(400, ZDIMS) # mu layer 55 | self.fc22 = nn.Linear(400, ZDIMS) # logvariance layer 56 | # this last layer bottlenecks through ZDIMS connections 57 | 58 | # DECODER 59 | # from bottleneck to hidden 400 60 | self.fc3 = nn.Linear(ZDIMS, 400) 61 | # from hidden 400 to 784 outputs 62 | self.fc4 = nn.Linear(400, 784) 63 | self.sigmoid = nn.Sigmoid() 64 | 65 | def encode(self, x: Variable) -> (Variable, Variable): 66 | """Input vector x -> fully connected 1 -> ReLU -> (fully connected 67 | 21, fully connected 22) 68 | 69 | Parameters 70 | ---------- 71 | x : [128, 784] matrix; 128 digits of 28x28 pixels each 72 | 73 | Returns 74 | ------- 75 | 76 | (mu, logvar) : ZDIMS mean units one for each latent dimension, ZDIMS 77 | variance units one for each latent dimension 78 | 79 | """ 80 | 81 | # h1 is [128, 400] 82 | h1 = self.relu(self.fc1(x)) # type: Variable 83 | return self.fc21(h1), self.fc22(h1) 84 | 85 | def reparameterize(self, mu: Variable, logvar: Variable) -> Variable: 86 | """THE REPARAMETERIZATION IDEA: 87 | 88 | For each training sample (we get 128 batched at a time) 89 | 90 | - take the current learned mu, stddev for each of the ZDIMS 91 | dimensions and draw a random sample from that distribution 92 | - the whole network is trained so that these randomly drawn 93 | samples decode to output that looks like the input 94 | - which will mean that the std, mu will be learned 95 | *distributions* that correctly encode the inputs 96 | - due to the additional KLD term (see loss_function() below) 97 | the distribution will tend to unit Gaussians 98 | 99 | Parameters 100 | ---------- 101 | mu : [128, ZDIMS] mean matrix 102 | logvar : [128, ZDIMS] variance matrix 103 | 104 | Returns 105 | ------- 106 | 107 | During training random sample from the learned ZDIMS-dimensional 108 | normal distribution; during inference its mean. 109 | 110 | """ 111 | 112 | if self.training: 113 | # multiply log variance with 0.5, then in-place exponent 114 | # yielding the standard deviation 115 | std = logvar.mul(0.5).exp_() # type: Variable 116 | # - std.data is the [128,ZDIMS] tensor that is wrapped by std 117 | # - so eps is [128,ZDIMS] with all elements drawn from a mean 0 118 | # and stddev 1 normal distribution that is 128 samples 119 | # of random ZDIMS-float vectors 120 | eps = Variable(std.data.new(std.size()).normal_()) 121 | # - sample from a normal distribution with standard 122 | # deviation = std and mean = mu by multiplying mean 0 123 | # stddev 1 sample with desired std and mu, see 124 | # https://stats.stackexchange.com/a/16338 125 | # - so we have 128 sets (the batch) of random ZDIMS-float 126 | # vectors sampled from normal distribution with learned 127 | # std and mu for the current input 128 | return eps.mul(std).add_(mu) 129 | 130 | else: 131 | # During inference, we simply spit out the mean of the 132 | # learned distribution for the current input. We could 133 | # use a random sample from the distribution, but mu of 134 | # course has the highest probability. 135 | return mu 136 | 137 | def decode(self, z: Variable) -> Variable: 138 | h3 = self.relu(self.fc3(z)) 139 | return self.sigmoid(self.fc4(h3)) 140 | 141 | def forward(self, x: Variable) -> (Variable, Variable, Variable): 142 | mu, logvar = self.encode(x.view(-1, 784)) 143 | z = self.reparameterize(mu, logvar) 144 | return self.decode(z), mu, logvar 145 | 146 | 147 | model = VAE() 148 | if CUDA: 149 | model.cuda() 150 | 151 | 152 | def loss_function(recon_x, x, mu, logvar) -> Variable: 153 | # how well do input x and output recon_x agree? 154 | BCE = F.binary_cross_entropy(recon_x, x.view(-1, 784)) 155 | 156 | # KLD is Kullback–Leibler divergence -- how much does one learned 157 | # distribution deviate from another, in this specific case the 158 | # learned distribution from the unit Gaussian 159 | 160 | # see Appendix B from VAE paper: 161 | # Kingma and Welling. Auto-Encoding Variational Bayes. ICLR, 2014 162 | # https://arxiv.org/abs/1312.6114 163 | # - D_{KL} = 0.5 * sum(1 + log(sigma^2) - mu^2 - sigma^2) 164 | # note the negative D_{KL} in appendix B of the paper 165 | KLD = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp()) 166 | # Normalise by same number of elements as in reconstruction 167 | KLD /= BATCH_SIZE * 784 168 | 169 | # BCE tries to make our reconstruction as accurate as possible 170 | # KLD tries to push the distributions as close as possible to unit Gaussian 171 | return BCE + KLD 172 | 173 | # Dr Diederik Kingma: as if VAEs weren't enough, he also gave us Adam! 174 | optimizer = optim.Adam(model.parameters(), lr=1e-3) 175 | 176 | 177 | def train(epoch): 178 | # toggle model to train mode 179 | model.train() 180 | train_loss = 0 181 | # in the case of MNIST, len(train_loader.dataset) is 60000 182 | # each `data` is of BATCH_SIZE samples and has shape [128, 1, 28, 28] 183 | for batch_idx, (data, _) in enumerate(train_loader): 184 | data = Variable(data) 185 | if CUDA: 186 | data = data.cuda() 187 | optimizer.zero_grad() 188 | 189 | # push whole batch of data through VAE.forward() to get recon_loss 190 | recon_batch, mu, logvar = model(data) 191 | # calculate scalar loss 192 | loss = loss_function(recon_batch, data, mu, logvar) 193 | # calculate the gradient of the loss w.r.t. the graph leaves 194 | # i.e. input variables -- by the power of pytorch! 195 | loss.backward() 196 | train_loss += loss.data[0] 197 | optimizer.step() 198 | if batch_idx % LOG_INTERVAL == 0: 199 | print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format( 200 | epoch, batch_idx * len(data), len(train_loader.dataset), 201 | 100. * batch_idx / len(train_loader), 202 | loss.data[0] / len(data))) 203 | 204 | print('====> Epoch: {} Average loss: {:.4f}'.format( 205 | epoch, train_loss / len(train_loader.dataset))) 206 | 207 | 208 | def test(epoch): 209 | # toggle model to test / inference mode 210 | model.eval() 211 | test_loss = 0 212 | 213 | # each data is of BATCH_SIZE (default 128) samples 214 | for i, (data, _) in enumerate(test_loader): 215 | if CUDA: 216 | # make sure this lives on the GPU 217 | data = data.cuda() 218 | 219 | # we're only going to infer, so no autograd at all required: volatile=True 220 | data = Variable(data, volatile=True) 221 | recon_batch, mu, logvar = model(data) 222 | test_loss += loss_function(recon_batch, data, mu, logvar).data[0] 223 | if i == 0: 224 | n = min(data.size(0), 8) 225 | # for the first 128 batch of the epoch, show the first 8 input digits 226 | # with right below them the reconstructed output digits 227 | comparison = torch.cat([data[:n], 228 | recon_batch.view(BATCH_SIZE, 1, 28, 28)[:n]]) 229 | save_image(comparison.data.cpu(), 230 | 'results/reconstruction_' + str(epoch) + '.png', nrow=n) 231 | 232 | test_loss /= len(test_loader.dataset) 233 | print('====> Test set loss: {:.4f}'.format(test_loss)) 234 | 235 | 236 | 237 | 238 | if __name__ =="__main__": 239 | 240 | for epoch in range(1, EPOCHS + 1): 241 | train(epoch) 242 | test(epoch) 243 | 244 | # 64 sets of random ZDIMS-float vectors, i.e. 64 locations / MNIST 245 | # digits in latent space 246 | sample = Variable(torch.randn(64, ZDIMS)) 247 | if CUDA: 248 | sample = sample.cuda() 249 | sample = model.decode(sample).cpu() 250 | 251 | # save out as an 8x8 matrix of MNIST digits 252 | # this will give you a visual idea of how well latent space can generate things 253 | # that look like digits 254 | save_image(sample.data.view(64, 1, 28, 28), 255 | 'results/sample_' + str(epoch) + '.png') 256 | 257 | 258 | # from __future__ import print_function 259 | # import argparse 260 | # import torch 261 | # import torch.utils.data 262 | # from torch import nn, optim 263 | # from torch.autograd import Variable 264 | # from torch.nn import functional as F 265 | # from torchvision import datasets, transforms 266 | # from torchvision.utils import save_image 267 | # 268 | # 269 | # parser = argparse.ArgumentParser(description='VAE MNIST Example') 270 | # parser.add_argument('--batch-size', type=int, default=128, metavar='N', 271 | # help='input batch size for training (default: 128)') 272 | # parser.add_argument('--epochs', type=int, default=10, metavar='N', 273 | # help='number of epochs to train (default: 10)') 274 | # parser.add_argument('--no-cuda', action='store_true', default=False, 275 | # help='enables CUDA training') 276 | # parser.add_argument('--seed', type=int, default=1, metavar='S', 277 | # help='random seed (default: 1)') 278 | # parser.add_argument('--log-interval', type=int, default=10, metavar='N', 279 | # help='how many batches to wait before logging training status') 280 | # args = parser.parse_args() 281 | # args.cuda = not args.no_cuda and torch.cuda.is_available() 282 | # 283 | # 284 | # torch.manual_seed(args.seed) 285 | # if args.cuda: 286 | # torch.cuda.manual_seed(args.seed) 287 | # 288 | # 289 | # kwargs = {'num_workers': 1, 'pin_memory': True} if args.cuda else {} 290 | # train_loader = torch.utils.data.DataLoader( 291 | # datasets.MNIST('../data', train=True, download=True, 292 | # transform=transforms.ToTensor()), 293 | # batch_size=args.batch_size, shuffle=True, **kwargs) 294 | # test_loader = torch.utils.data.DataLoader( 295 | # datasets.MNIST('../data', train=False, transform=transforms.ToTensor()), 296 | # batch_size=args.batch_size, shuffle=True, **kwargs) 297 | # 298 | # 299 | # class VAE(nn.Module): 300 | # def __init__(self): 301 | # super(VAE, self).__init__() 302 | # 303 | # self.fc1 = nn.Linear(784, 400) 304 | # self.fc21 = nn.Linear(400, 20) 305 | # self.fc22 = nn.Linear(400, 20) 306 | # 307 | # 308 | # self.fc3 = nn.Linear(20, 400) 309 | # self.fc4 = nn.Linear(400, 784) 310 | # 311 | # self.relu = nn.ReLU() 312 | # self.sigmoid = nn.Sigmoid() 313 | # 314 | # def encode(self, x): 315 | # h1 = self.relu(self.fc1(x)) 316 | # return self.fc21(h1), self.fc22(h1) 317 | # 318 | # def reparameterize(self, mu, logvar): 319 | # if self.training: 320 | # std = logvar.mul(0.5).exp_() 321 | # eps = Variable(std.data.new(std.size()).normal_()) 322 | # return eps.mul(std).add_(mu) 323 | # else: 324 | # return mu 325 | # 326 | # def decode(self, z): 327 | # h3 = self.relu(self.fc3(z)) 328 | # return self.sigmoid(self.fc4(h3)) 329 | # 330 | # def forward(self, x): 331 | # mu, logvar = self.encode(x.view(-1, 784)) 332 | # z = self.reparameterize(mu, logvar) 333 | # return self.decode(z), mu, logvar 334 | # 335 | # 336 | # model = VAE() 337 | # if args.cuda: 338 | # model.cuda() 339 | # optimizer = optim.Adam(model.parameters(), lr=1e-3) 340 | # 341 | # # Reconstruction + KL divergence losses summed over all elements and batch 342 | # def loss_function(recon_x, x, mu, logvar): 343 | # BCE = F.binary_cross_entropy(recon_x, x.view(-1, 784), size_average=False) 344 | # 345 | # # see Appendix B from VAE paper: 346 | # # Kingma and Welling. Auto-Encoding Variational Bayes. ICLR, 2014 347 | # # https://arxiv.org/abs/1312.6114 348 | # # 0.5 * sum(1 + log(sigma^2) - mu^2 - sigma^2) 349 | # KLD = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp()) 350 | # 351 | # return BCE + KLD 352 | # 353 | # 354 | # def train(epoch): 355 | # model.train() 356 | # train_loss = 0 357 | # for batch_idx, (data, _) in enumerate(train_loader): 358 | # data = Variable(data) 359 | # if args.cuda: 360 | # data = data.cuda() 361 | # optimizer.zero_grad() 362 | # recon_batch, mu, logvar = model(data) 363 | # loss = loss_function(recon_batch, data, mu, logvar) 364 | # loss.backward() 365 | # train_loss += loss.data[0] 366 | # optimizer.step() 367 | # if batch_idx % args.log_interval == 0: 368 | # print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format( 369 | # epoch, batch_idx * len(data), len(train_loader.dataset), 370 | # 100. * batch_idx / len(train_loader), 371 | # loss.data[0] / len(data))) 372 | # 373 | # print('====> Epoch: {} Average loss: {:.4f}'.format( 374 | # epoch, train_loss / len(train_loader.dataset))) 375 | # 376 | # 377 | # def test(epoch): 378 | # model.eval() 379 | # test_loss = 0 380 | # for i, (data, _) in enumerate(test_loader): 381 | # if args.cuda: 382 | # data = data.cuda() 383 | # data = Variable(data, volatile=True) 384 | # recon_batch, mu, logvar = model(data) 385 | # test_loss += loss_function(recon_batch, data, mu, logvar).data[0] 386 | # if i == 0: 387 | # n = min(data.size(0), 8) 388 | # comparison = torch.cat([data[:n], 389 | # recon_batch.view(args.batch_size, 1, 28, 28)[:n]]) 390 | # save_image(comparison.data.cpu(), 391 | # 'results/reconstruction_' + str(epoch) + '.png', nrow=n) 392 | # 393 | # test_loss /= len(test_loader.dataset) 394 | # print('====> Test set loss: {:.4f}'.format(test_loss)) 395 | # 396 | # 397 | # for epoch in range(1, args.epochs + 1): 398 | # train(epoch) 399 | # test(epoch) 400 | # sample = Variable(torch.randn(64, 20)) 401 | # if args.cuda: 402 | # sample = sample.cuda() 403 | # sample = model.decode(sample).cpu() 404 | # save_image(sample.data.view(64, 1, 28, 28), 405 | # 'results/sample_' + str(epoch) + '.png') -------------------------------------------------------------------------------- /VAE_fb_modified.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.utils.data 3 | from torch import nn, optim 4 | from torch.autograd import Variable 5 | from torch.nn import functional as F 6 | from torchvision import datasets, transforms 7 | import numpy as np 8 | from torchvision.utils import save_image 9 | import os 10 | 11 | os.environ['CUDA_VISIBLE_DEVICES'] = "2" 12 | 13 | CUDA = False 14 | batch_size = 16 15 | z_dim = 20 16 | no_of_sample = 1000 17 | 18 | 19 | # kwargs = {'num_workers': 1, 'pin_memory': True} if args.cuda else {} 20 | 21 | class VAE(nn.Module): 22 | def __init__(self): 23 | super(VAE, self).__init__() 24 | 25 | # ENCODER 26 | # 28 x 28 pixels = 784 input pixels, 400 outputs 27 | self.fc1 = nn.Linear(784, 400) 28 | # rectified linear unit layer from 400 to 400 29 | # max(0, x) 30 | self.relu = nn.ReLU() 31 | self.fc21 = nn.Linear(400, z_dim) # mu layer 32 | self.fc22 = nn.Linear(400, z_dim) # logvariance layer 33 | # this last layer bottlenecks through ZDIMS connections 34 | 35 | # DECODER 36 | # from bottleneck to hidden 400 37 | self.fc3 = nn.Linear(z_dim, 400) 38 | # from hidden 400 to 784 outputs 39 | self.fc4 = nn.Linear(400, 784) 40 | self.sigmoid = nn.Sigmoid() 41 | 42 | def encode(self, x): 43 | ''' 44 | :param x: here x is an image, can be any tensor 45 | :return: 2 tensors of size [N,z_dim=20] where first one is mu and second one is logvar 46 | ''' 47 | 48 | h1 = self.relu(self.fc1(x)) # type: Variable 49 | return self.fc21(h1), self.fc22(h1) 50 | 51 | def reparametrized_sample(self, parameter_z, no_of_sample): 52 | ''' 53 | 54 | :param z: 55 | :param no_of_sample: no of monte carlo sample 56 | :return: torch of size [N,no_of_sample,z_dim=20] 57 | ''' 58 | if CUDA: 59 | standard_normal_sample = Variable(torch.randn(batch_size, no_of_sample, z_dim).cuda()) 60 | else: 61 | standard_normal_sample = Variable(torch.randn(batch_size, no_of_sample, z_dim)) 62 | 63 | mu_z, logvar_z = parameter_z 64 | mu_z = mu_z.unsqueeze(1) 65 | sigma = logvar_z.mul(.5).exp() 66 | # sigma =.5*logvar_z.exp() 67 | 68 | sigma = sigma.unsqueeze(1) 69 | final_sample = mu_z + sigma * standard_normal_sample 70 | 71 | return final_sample 72 | 73 | def decode(self, z): 74 | h3 = self.relu(self.fc3(z)) 75 | return self.sigmoid(self.fc4(h3)) 76 | 77 | # x = F.elu(self.fc1(z)) 78 | # x = F.elu(self.fc2(x)) 79 | # x = x.view(-1,128,7,7) 80 | # x = F.relu(self.conv_t1(x)) 81 | # x = F.sigmoid(self.conv_t2(x)) 82 | 83 | # return x 84 | # mu_x = x.view(-1,28*28) 85 | # 86 | # logvar_x = F.elu(self.fc3(z)) 87 | # logvar_x = F.softmax(self.fc4(logvar_x)) 88 | # 89 | # return mu_x, logvar_x 90 | 91 | def log_density(self): 92 | pass 93 | 94 | def forward(self, x): 95 | ''' 96 | 97 | :param x: input image 98 | :return: array of length = batch size, each element is a tuple of 2 elemets of size [no_of_sample=1000,28*28 (for MNIST)], corresponding to mu and logvar 99 | ''' 100 | parameter_z = self.encode(x) 101 | sample_z = self.reparametrized_sample(parameter_z, no_of_sample) 102 | x = [self.decode(obs) for obs in sample_z] 103 | 104 | return parameter_z, x 105 | 106 | 107 | def loss_VAE(train_x, paramter_z, predicted_x): 108 | mu_z, logvar_z = paramter_z 109 | # Kullback Liebler Divergence 110 | negative_KLD = 0.5 * torch.sum(1 + logvar_z - mu_z.pow(2) - logvar_z.exp(), 1) # mu_z.size()=[batch_size, 28*28] 111 | # negative_KLD /=784 112 | 113 | # nll 114 | train_x_flattened = train_x.view(-1, 28 * 28) 115 | if CUDA: 116 | nll = Variable(torch.FloatTensor(batch_size).zero_().cuda()) 117 | else: 118 | nll = Variable(torch.FloatTensor(batch_size).zero_()) 119 | 120 | i = 0 121 | for x in train_x_flattened: 122 | predicted = predicted_x[i] 123 | predicted = predicted.view(-1, 784) 124 | 125 | sum = 0 126 | for pred in predicted: 127 | sum += F.binary_cross_entropy(pred, x, size_average=False) 128 | 129 | nll[i] = sum / no_of_sample # Monte carlo step 130 | i += 1 131 | 132 | final_loss = -negative_KLD + nll 133 | final_loss = torch.mean(final_loss) 134 | 135 | return final_loss 136 | 137 | 138 | def train(epoch, model, trainloader, optimizer): 139 | model.train() 140 | 141 | train_loss = 0 142 | count = 0 143 | for batch_id, data in enumerate(train_loader): 144 | 145 | train_x, _ = data 146 | count += train_x.size(0) 147 | 148 | if CUDA: 149 | train_x = Variable(train_x.type(torch.FloatTensor).cuda()) 150 | else: 151 | train_x = Variable(train_x.type(torch.FloatTensor)) 152 | 153 | train_x = train_x.view(-1, 784) 154 | paramter_z, predicted_x = model(train_x) 155 | 156 | loss = loss_VAE(train_x, paramter_z, predicted_x) 157 | train_loss += loss.data[0] 158 | 159 | loss.backward() 160 | optimizer.step() 161 | 162 | if batch_id % 50 == 0: 163 | print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format( 164 | epoch, batch_id * len(data), len(train_loader.dataset), 100. * batch_id / len(train_loader), 165 | loss.data[0])) 166 | 167 | train_loss /= count 168 | print('\nTrain set: Average loss: {:.4f}'.format(train_loss)) 169 | 170 | 171 | if __name__ == "__main__": 172 | train_loader = torch.utils.data.DataLoader( 173 | datasets.MNIST('../data', train=True, download=True, 174 | transform=transforms.ToTensor()), 175 | batch_size=batch_size, shuffle=True) 176 | test_loader = torch.utils.data.DataLoader( 177 | datasets.MNIST('../data', train=False, transform=transforms.ToTensor()), 178 | batch_size=batch_size, shuffle=True) 179 | 180 | model = VAE() 181 | model_parameters = filter(lambda p: p.requires_grad, model.parameters()) 182 | nb_params = sum([np.prod(p.size()) for p in model_parameters]) 183 | print("no. of trainable parametes is: {}".format((nb_params))) 184 | #model.cuda() 185 | 186 | 187 | optimizer = optim.Adam(model.parameters(), lr=.001) 188 | 189 | nb_epoch = 2 190 | for epoch in range(1, nb_epoch + 1): 191 | train(epoch, model, train_loader, optimizer) -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np 3 | from matplotlib import pyplot as plt 4 | import math, os 5 | import matplotlib.pyplot as plt 6 | 7 | import argparse 8 | import torch 9 | import torch.utils.data 10 | from torch import nn, optim 11 | from torch.autograd import Variable 12 | from torch.nn import functional as F 13 | from torchvision import datasets, transforms 14 | from torchvision.utils import save_image 15 | 16 | 17 | parser = argparse.ArgumentParser(description='VAE MNIST Example') 18 | parser.add_argument('--batch-size', type=int, default=128, metavar='N', 19 | help='input batch size for training (default: 128)') 20 | parser.add_argument('--epochs', type=int, default=10, metavar='N', 21 | help='number of epochs to train (default: 10)') 22 | parser.add_argument('--no-cuda', action='store_true', default=False, 23 | help='enables CUDA training') 24 | parser.add_argument('--seed', type=int, default=1, metavar='S', 25 | help='random seed (default: 1)') 26 | parser.add_argument('--log-interval', type=int, default=10, metavar='N', 27 | help='how many batches to wait before logging training status') 28 | args = parser.parse_args() 29 | args.cuda = not args.no_cuda and torch.cuda.is_available() 30 | 31 | 32 | torch.manual_seed(args.seed) 33 | if args.cuda: 34 | torch.cuda.manual_seed(args.seed) 35 | 36 | 37 | kwargs = {'num_workers': 1, 'pin_memory': True} if args.cuda else {} 38 | 39 | def compute_kernel(x, y): 40 | x_size = tf.shape(x)[0] 41 | y_size = tf.shape(y)[0] 42 | dim = tf.shape(x)[1] 43 | tiled_x = tf.tile(tf.reshape(x, tf.stack([x_size, 1, dim])), tf.stack([1, y_size, 1])) 44 | tiled_y = tf.tile(tf.reshape(y, tf.stack([1, y_size, dim])), tf.stack([x_size, 1, 1])) 45 | return tf.exp(-tf.reduce_mean(tf.square(tiled_x - tiled_y), axis=2) / tf.cast(dim, tf.float32)) 46 | 47 | 48 | class VAE(nn.Module): 49 | def __init__(self): 50 | super(VAE, self).__init__() 51 | 52 | self.fc1 = nn.Linear(784, 400) 53 | self.fc21 = nn.Linear(400, 20) 54 | self.fc22 = nn.Linear(400, 20) 55 | self.fc3 = nn.Linear(20, 400) 56 | self.fc4 = nn.Linear(400, 784) 57 | 58 | self.relu = nn.ReLU() 59 | self.sigmoid = nn.Sigmoid() 60 | 61 | def encode(self, x): 62 | h1 = self.relu(self.fc1(x)) 63 | return self.fc21(h1), self.fc22(h1) 64 | 65 | def reparameterize(self, mu, logvar): 66 | if self.training: 67 | std = logvar.mul(0.5).exp_() 68 | eps = Variable(std.data.new(std.size()).normal_()) 69 | return eps.mul(std).add_(mu) 70 | else: 71 | return mu 72 | 73 | def decode(self, z): 74 | h3 = self.relu(self.fc3(z)) 75 | return self.sigmoid(self.fc4(h3)) 76 | 77 | def forward(self, x): 78 | mu, logvar = self.encode(x.view(-1, 784)) 79 | z = self.reparameterize(mu, logvar) 80 | return self.decode(z), mu, logvar 81 | 82 | 83 | def loss_function(recon_x, x, mu, logvar): 84 | BCE = F.binary_cross_entropy(recon_x, x.view(-1, 784), size_average=False) 85 | 86 | # see Appendix B from VAE paper: 87 | # Kingma and Welling. Auto-Encoding Variational Bayes. ICLR, 2014 88 | # https://arxiv.org/abs/1312.6114 89 | # 0.5 * sum(1 + log(sigma^2) - mu^2 - sigma^2) 90 | KLD = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp()) 91 | 92 | return BCE + KLD 93 | 94 | 95 | def train(epoch): 96 | model.train() 97 | train_loss = 0 98 | for batch_idx, (data, _) in enumerate(train_loader): 99 | data = Variable(data) 100 | if args.cuda: 101 | data = data.cuda() 102 | optimizer.zero_grad() 103 | recon_batch, mu, logvar = model(data) 104 | loss = loss_function(recon_batch, data, mu, logvar) 105 | loss.backward() 106 | train_loss += loss.data[0] 107 | optimizer.step() 108 | if batch_idx % args.log_interval == 0: 109 | print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format( 110 | epoch, batch_idx * len(data), len(train_loader.dataset), 111 | 100. * batch_idx / len(train_loader), 112 | loss.data[0] / len(data))) 113 | 114 | print('====> Epoch: {} Average loss: {:.4f}'.format( 115 | epoch, train_loss / len(train_loader.dataset))) 116 | 117 | 118 | def test(epoch): 119 | model.eval() 120 | test_loss = 0 121 | for i, (data, _) in enumerate(test_loader): 122 | if args.cuda: 123 | data = data.cuda() 124 | data = Variable(data, volatile=True) 125 | recon_batch, mu, logvar = model(data) 126 | test_loss += loss_function(recon_batch, data, mu, logvar).data[0] 127 | if i == 0: 128 | n = min(data.size(0), 8) 129 | comparison = torch.cat([data[:n], 130 | recon_batch.view(args.batch_size, 1, 28, 28)[:n]]) 131 | save_image(comparison.data.cpu(), 132 | 'results/reconstruction_' + str(epoch) + '.png', nrow=n) 133 | 134 | test_loss /= len(test_loader.dataset) 135 | print('====> Test set loss: {:.4f}'.format(test_loss)) 136 | 137 | 138 | 139 | def lrelu(x, rate=0.1): 140 | return tf.maximum(tf.minimum(x * rate, 0), x) 141 | 142 | def conv2d_lrelu(inputs, num_outputs, kernel_size, stride): 143 | conv = tf.contrib.layers.convolution2d(inputs, num_outputs, kernel_size, stride, 144 | weights_initializer=tf.contrib.layers.xavier_initializer(), 145 | activation_fn=tf.identity) 146 | conv = lrelu(conv) 147 | return conv 148 | 149 | def conv2d_t_relu(inputs, num_outputs, kernel_size, stride): 150 | conv = tf.contrib.layers.convolution2d_transpose(inputs, num_outputs, kernel_size, stride, 151 | weights_initializer=tf.contrib.layers.xavier_initializer(), 152 | activation_fn=tf.identity) 153 | conv = tf.nn.relu(conv) 154 | return conv 155 | 156 | def fc_lrelu(inputs, num_outputs): 157 | fc = tf.contrib.layers.fully_connected(inputs, num_outputs, 158 | weights_initializer=tf.contrib.layers.xavier_initializer(), 159 | activation_fn=tf.identity) 160 | fc = lrelu(fc) 161 | return fc 162 | 163 | def fc_relu(inputs, num_outputs): 164 | fc = tf.contrib.layers.fully_connected(inputs, num_outputs, 165 | weights_initializer=tf.contrib.layers.xavier_initializer(), 166 | activation_fn=tf.identity) 167 | fc = tf.nn.relu(fc) 168 | return fc 169 | 170 | 171 | def encoder(x, z_dim): 172 | with tf.variable_scope('encoder'): 173 | conv1 = conv2d_lrelu(x, num_outputs=64,kernel_size=4, stride=2) 174 | conv2 = conv2d_lrelu(conv1, 128, 4, 2) 175 | conv2 = tf.reshape(conv2, [-1, np.prod(conv2.get_shape().as_list()[1:])]) 176 | fc1 = fc_lrelu(conv2, 1024) 177 | return tf.contrib.layers.fully_connected(fc1, z_dim, activation_fn=tf.identity) 178 | 179 | def decoder(z, reuse=False): 180 | with tf.variable_scope('decoder') as vs: 181 | if reuse: 182 | vs.reuse_variables() 183 | fc1 = fc_relu(z, 1024) 184 | fc2 = fc_relu(fc1, 7*7*128) 185 | fc2 = tf.reshape(fc2, tf.stack([tf.shape(fc2)[0], 7, 7, 128])) 186 | conv1 = conv2d_t_relu(fc2, 64, 4, 2) 187 | output = tf.contrib.layers.convolution2d_transpose(conv1, 1, 4, 2, activation_fn=tf.sigmoid) 188 | return output 189 | 190 | 191 | 192 | 193 | 194 | if __name__ == "__main__": 195 | train_loader = torch.utils.data.DataLoader( 196 | datasets.MNIST('../data', train=True, download=True, 197 | transform=transforms.ToTensor()), 198 | batch_size=args.batch_size, shuffle=True, **kwargs) 199 | test_loader = torch.utils.data.DataLoader( 200 | datasets.MNIST('../data', train=False, transform=transforms.ToTensor()), 201 | batch_size=args.batch_size, shuffle=True, **kwargs) 202 | 203 | model = VAE() 204 | if args.cuda: 205 | model.cuda() 206 | optimizer = optim.Adam(model.parameters(), lr=1e-3) 207 | 208 | for epoch in range(1, args.epochs + 1): 209 | train(epoch) 210 | test(epoch) 211 | sample = Variable(torch.randn(64, 20)) 212 | if args.cuda: 213 | sample = sample.cuda() 214 | sample = model.decode(sample).cpu() 215 | save_image(sample.data.view(64, 1, 28, 28), 216 | 'results/sample_' + str(epoch) + '.png') 217 | 218 | 219 | 220 | 221 | 222 | 223 | 224 | 225 | 226 | 227 | -------------------------------------------------------------------------------- /main_new.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.utils.data 3 | from torch import nn, optim 4 | from torch.autograd import Variable 5 | from torch.nn import functional as F 6 | from torchvision import datasets, transforms 7 | import numpy as np 8 | from torchvision.utils import save_image 9 | 10 | batch_size =16 11 | z_dim = 20 12 | no_of_sample = 1000 13 | #kwargs = {'num_workers': 1, 'pin_memory': True} if args.cuda else {} 14 | 15 | class VAE(nn.Module): 16 | def __init__(self): 17 | super(VAE, self).__init__() 18 | self.conv1 = nn.Conv2d(in_channels=1, out_channels=64, kernel_size=(4,4),padding=(15,15), stride=2) #This padding keeps the size of the image same, i.e. same padding 19 | self.conv2 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=(4,4), padding=(15,15), stride=2) 20 | self.fc11 = nn.Linear(in_features=128*28*28, out_features=1024) 21 | self.fc12 = nn.Linear(in_features=1024, out_features=z_dim) 22 | 23 | self.fc21 = nn.Linear(in_features=128 * 28 * 28, out_features=1024) 24 | self.fc22 = nn.Linear(in_features=1024, out_features=z_dim) 25 | 26 | #For decoder 27 | 28 | #For mu 29 | self.fc1 = nn.Linear(in_features=20, out_features=1024) 30 | self.fc2 = nn.Linear(in_features=1024, out_features=7*7*128) 31 | self.conv_t1 = nn.ConvTranspose2d(in_channels=128, out_channels=64,kernel_size=4,padding=1,stride=2) 32 | self.conv_t2 = nn.ConvTranspose2d(in_channels=64, out_channels=1,kernel_size=4,padding=1,stride=2) 33 | 34 | #for logvar 35 | self.fc3 = nn.Linear(in_features=20, out_features=400) 36 | self.fc4 = nn.Linear(in_features=400, out_features=784) 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | def encode(self, x): 45 | ''' 46 | :param x: here x is an image, can be any tensor 47 | :return: 2 tensors of size [N,z_dim=20] where first one is mu and second one is logvar 48 | ''' 49 | 50 | x = F.elu(self.conv1(x)) 51 | x = F.elu(self.conv2(x)) 52 | x = x.view(-1,128*28*28) 53 | 54 | mu_z = F.elu(self.fc11(x)) 55 | #mu_z = F.softmax(self.fc12(mu_z)) 56 | mu_z =self.fc12(mu_z) 57 | 58 | logvar_z = F.elu(self.fc21(x)) 59 | #logvar_z = F.softmax(self.fc22(logvar_z)) 60 | logvar_z = self.fc22(logvar_z) 61 | 62 | return mu_z, logvar_z 63 | 64 | def reparametrized_sample(self,parameter_z,no_of_sample): 65 | ''' 66 | 67 | :param z: 68 | :param no_of_sample: no of monte carlo sample 69 | :return: torch of size [N,no_of_sample,z_dim=20] 70 | ''' 71 | standard_normal_sample = Variable(torch.randn(batch_size,no_of_sample,z_dim).cuda()) 72 | mu_z, logvar_z = parameter_z 73 | mu_z = mu_z.unsqueeze(1) 74 | sigma = .5*logvar_z.exp() 75 | sigma = sigma.unsqueeze(1) 76 | final_sample = mu_z+sigma*standard_normal_sample 77 | 78 | return final_sample 79 | 80 | def decode(self,z): 81 | 82 | x = F.elu(self.fc1(z)) 83 | x = F.elu(self.fc2(x)) 84 | x = x.view(-1,128,7,7) 85 | x = F.relu(self.conv_t1(x)) 86 | x = F.softmax(self.conv_t2(x)) 87 | mu_x = x.view(-1,28*28) 88 | 89 | logvar_x = F.elu(self.fc3(z)) 90 | logvar_x = F.softmax(self.fc4(logvar_x)) 91 | 92 | return mu_x, logvar_x 93 | 94 | def log_density(self): 95 | pass 96 | 97 | def forward(self,x): 98 | ''' 99 | 100 | :param x: input image 101 | :return: array of length = batch size, each element is a tuple of 2 elemets of size [no_of_sample=1000,28*28 (for MNIST)], corresponding to mu and logvar 102 | ''' 103 | parameter_z = self.encode(x) 104 | sample_z = self.reparametrized_sample(parameter_z,no_of_sample) 105 | parameter_x = [self.decode(obs) for obs in sample_z] 106 | 107 | return parameter_z, parameter_x 108 | 109 | 110 | def loss_VAE(train_x,parameter_x, paramter_z): 111 | 112 | mu_z, logvar_z = paramter_z 113 | #Kullback Liebler Divergence 114 | negative_KLD = 0.5 * torch.sum(1 + logvar_z - mu_z.pow(2) - logvar_z.exp(),1) #mu_z.size()=[batch_size, 28*28] 115 | 116 | #nll 117 | train_x_flattened = train_x.view(-1, 28*28) 118 | i = 0 119 | nll = Variable(torch.FloatTensor(batch_size).zero_().cuda()) 120 | for param in parameter_x: 121 | mu_x, logvar_x = param 122 | x = train_x_flattened[i] 123 | 124 | log_likelihood_for_one_z = torch.sum(logvar_x,1)+ torch.sum(((x-mu_x).pow(2))/(2*logvar_x.exp()),1) #log pθ(x^(i)|z^(i,l)) 125 | nll_one_sample = torch.mean(log_likelihood_for_one_z) #Monte carlo average step to calculate expectation 126 | nll[i] = nll_one_sample 127 | i += 1 128 | 129 | final_loss = negative_KLD + nll 130 | final_loss = torch.mean(final_loss) 131 | 132 | return final_loss 133 | 134 | 135 | def train(epoch,model,trainloader,optimizer): 136 | model.train() 137 | 138 | train_loss = 0 139 | count = 0 140 | for batch_id, data in enumerate(train_loader): 141 | 142 | train_x, _ = data 143 | count += train_x.size(0) 144 | train_x = Variable(train_x.type(torch.FloatTensor).cuda()) 145 | paramter_z, parameter_x = model(train_x) 146 | 147 | 148 | loss = loss_VAE(train_x, parameter_x, paramter_z) 149 | train_loss += loss.data[0] 150 | 151 | loss.backward() 152 | optimizer.step() 153 | 154 | if batch_id % 50 ==0: 155 | print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format( 156 | epoch, batch_id * len(data), len(train_loader.dataset), 100. * batch_id / len(train_loader), loss.data[0])) 157 | 158 | train_loss /= count 159 | print('\nTrain set: Average loss: {:.4f}'.format(train_loss)) 160 | 161 | 162 | 163 | 164 | if __name__ == "__main__": 165 | train_loader = torch.utils.data.DataLoader( 166 | datasets.MNIST('../data', train=True, download=True, 167 | transform=transforms.ToTensor()), 168 | batch_size=batch_size, shuffle=True) 169 | test_loader = torch.utils.data.DataLoader( 170 | datasets.MNIST('../data', train=False, transform=transforms.ToTensor()), 171 | batch_size=batch_size, shuffle=True) 172 | 173 | model = VAE() 174 | model_parameters = filter(lambda p: p.requires_grad, model.parameters()) 175 | nb_params = sum([np.prod(p.size()) for p in model_parameters]) 176 | print("no. of trainable parametes is: {}".format((nb_params))) 177 | model.cuda() 178 | 179 | 180 | optimizer = optim.Adam(model.parameters(), lr=.001) 181 | 182 | nb_epoch = 2 183 | for epoch in range(1, nb_epoch + 1): 184 | train(epoch, model, train_loader, optimizer) 185 | 186 | 187 | class VAE(nn.Module): 188 | def __init__(self): 189 | super(VAE, self).__init__() 190 | self.conv1 = nn.Conv2d(in_channels=1, out_channels=64, kernel_size=(4, 4), padding=(15, 15), 191 | stride=2) # This padding keeps the size of the image same, i.e. same padding 192 | self.conv2 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=(4, 4), padding=(15, 15), stride=2) 193 | self.fc11 = nn.Linear(in_features=128 * 28 * 28, out_features=1024) 194 | self.fc12 = nn.Linear(in_features=1024, out_features=z_dim) 195 | 196 | self.fc21 = nn.Linear(in_features=128 * 28 * 28, out_features=1024) 197 | self.fc22 = nn.Linear(in_features=1024, out_features=z_dim) 198 | 199 | # For decoder 200 | 201 | # For mu 202 | self.fc1 = nn.Linear(in_features=20, out_features=1024) 203 | self.fc2 = nn.Linear(in_features=1024, out_features=7 * 7 * 128) 204 | self.conv_t1 = nn.ConvTranspose2d(in_channels=128, out_channels=64, kernel_size=4, padding=1, stride=2) 205 | self.conv_t2 = nn.ConvTranspose2d(in_channels=64, out_channels=1, kernel_size=4, padding=1, stride=2) 206 | 207 | # for logvar 208 | self.fc3 = nn.Linear(in_features=20, out_features=400) 209 | self.fc4 = nn.Linear(in_features=400, out_features=784) 210 | 211 | def encode(self, x): 212 | ''' 213 | :param x: here x is an image, can be any tensor 214 | :return: 2 tensors of size [N,z_dim=20] where first one is mu and second one is logvar 215 | ''' 216 | 217 | x = F.elu(self.conv1(x)) 218 | x = F.elu(self.conv2(x)) 219 | x = x.view(-1, 128 * 28 * 28) 220 | 221 | mu_z = F.elu(self.fc11(x)) 222 | # mu_z = F.softmax(self.fc12(mu_z)) 223 | mu_z = self.fc12(mu_z) 224 | 225 | logvar_z = F.elu(self.fc21(x)) 226 | # logvar_z = F.softmax(self.fc22(logvar_z)) 227 | logvar_z = self.fc22(logvar_z) 228 | 229 | return mu_z, logvar_z 230 | 231 | def reparametrized_sample(self, parameter_z, no_of_sample): 232 | ''' 233 | 234 | :param z: 235 | :param no_of_sample: no of monte carlo sample 236 | :return: torch of size [N,no_of_sample,z_dim=20] 237 | ''' 238 | standard_normal_sample = Variable(torch.randn(batch_size, no_of_sample, z_dim)) 239 | mu_z, logvar_z = parameter_z 240 | mu_z = mu_z.unsqueeze(1) 241 | sigma = .5 * logvar_z.exp() 242 | sigma = sigma.unsqueeze(1) 243 | final_sample = mu_z + sigma * standard_normal_sample 244 | 245 | return final_sample 246 | 247 | def decode(self, z): 248 | x = F.elu(self.fc1(z)) 249 | x = F.elu(self.fc2(x)) 250 | x = x.view(-1, 128, 7, 7) 251 | x = F.relu(self.conv_t1(x)) 252 | x = F.softmax(self.conv_t2(x)) 253 | 254 | return x 255 | 256 | 257 | 258 | 259 | 260 | 261 | 262 | 263 | 264 | 265 | 266 | 267 | 268 | 269 | 270 | 271 | 272 | 273 | 274 | 275 | 276 | 277 | 278 | 279 | 280 | 281 | -------------------------------------------------------------------------------- /new1.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | import torch.utils.data 4 | from torch import nn, optim 5 | from torch.autograd import Variable 6 | from torch.nn import functional as F 7 | from torchvision import datasets, transforms 8 | from torchvision.utils import save_image 9 | os.environ['CUDA_VISIBLE_DEVICES']='0' 10 | # changed configuration to this instead of argparse for easier interaction 11 | CUDA = True 12 | SEED = 1 13 | BATCH_SIZE = 128 14 | LOG_INTERVAL = 10 15 | EPOCHS = 10 16 | no_of_sample = 10 17 | 18 | # connections through the autoencoder bottleneck 19 | # in the pytorch VAE example, this is 20 20 | ZDIMS = 20 21 | 22 | # I do this so that the MNIST dataset is downloaded where I want it 23 | #os.chdir("/home/cpbotha/Downloads/pytorch-vae") 24 | 25 | torch.manual_seed(SEED) 26 | if CUDA: 27 | torch.cuda.manual_seed(SEED) 28 | 29 | # DataLoader instances will load tensors directly into GPU memory 30 | kwargs = {'num_workers': 1, 'pin_memory': True} if CUDA else {} 31 | 32 | # Download or load downloaded MNIST dataset 33 | # shuffle data at every epoch 34 | train_loader = torch.utils.data.DataLoader( 35 | datasets.MNIST('/home/atin/data/', train=True, download=True, 36 | transform=transforms.ToTensor()), 37 | batch_size=BATCH_SIZE, shuffle=True, **kwargs) 38 | 39 | # Same for test data 40 | test_loader = torch.utils.data.DataLoader( 41 | datasets.MNIST('/home/atin/data/', train=False, transform=transforms.ToTensor()), 42 | batch_size=BATCH_SIZE, shuffle=True, **kwargs) 43 | 44 | 45 | class VAE(nn.Module): 46 | def __init__(self): 47 | super(VAE, self).__init__() 48 | 49 | self.conv1 = nn.Conv2d(in_channels=1, out_channels=64, kernel_size=(4, 4), padding=(15, 15), 50 | stride=2) # This padding keeps the size of the image same, i.e. same padding 51 | self.conv2 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=(4, 4), padding=(15, 15), stride=2) 52 | self.fc11 = nn.Linear(in_features=128 * 28 * 28, out_features=1024) 53 | self.fc12 = nn.Linear(in_features=1024, out_features=ZDIMS) 54 | 55 | self.fc21 = nn.Linear(in_features=128 * 28 * 28, out_features=1024) 56 | self.fc22 = nn.Linear(in_features=1024, out_features=ZDIMS) 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | # # ENCODER 66 | # # 28 x 28 pixels = 784 input pixels, 400 outputs 67 | # self.fc1 = nn.Linear(784, 400) 68 | # # rectified linear unit layer from 400 to 400 69 | # # max(0, x) 70 | # self.relu = nn.ReLU() 71 | # self.fc21 = nn.Linear(400, ZDIMS) # mu layer 72 | # self.fc22 = nn.Linear(400, ZDIMS) # logvariance layer 73 | # this last layer bottlenecks through ZDIMS connections 74 | 75 | # DECODER 76 | # from bottleneck to hidden 400 77 | self.fc3 = nn.Linear(ZDIMS, 400) 78 | # from hidden 400 to 784 outputs 79 | self.fc4 = nn.Linear(400, 784) 80 | self.sigmoid = nn.Sigmoid() 81 | 82 | def encode(self, x: Variable) -> (Variable, Variable): 83 | 84 | x = F.elu(self.conv1(x)) 85 | x = F.elu(self.conv2(x)) 86 | x = x.view(-1, 128 * 28 * 28) 87 | 88 | mu_z = F.elu(self.fc11(x)) 89 | # mu_z = F.softmax(self.fc12(mu_z)) 90 | mu_z = self.fc12(mu_z) 91 | 92 | logvar_z = F.elu(self.fc21(x)) 93 | # logvar_z = F.softmax(self.fc22(logvar_z)) 94 | logvar_z = self.fc22(logvar_z) 95 | 96 | return mu_z, logvar_z 97 | 98 | 99 | 100 | 101 | 102 | """Input vector x -> fully connected 1 -> ReLU -> (fully connected 103 | 21, fully connected 22) 104 | 105 | Parameters 106 | ---------- 107 | x : [128, 784] matrix; 128 digits of 28x28 pixels each 108 | 109 | Returns 110 | ------- 111 | 112 | (mu, logvar) : ZDIMS mean units one for each latent dimension, ZDIMS 113 | variance units one for each latent dimension 114 | 115 | """ 116 | 117 | # h1 is [128, 400] 118 | h1 = self.relu(self.fc1(x)) # type: Variable 119 | return self.fc21(h1), self.fc22(h1) 120 | 121 | def reparameterize(self, mu: Variable, logvar: Variable) -> Variable: 122 | """THE REPARAMETERIZATION IDEA: 123 | 124 | For each training sample (we get 128 batched at a time) 125 | 126 | - take the current learned mu, stddev for each of the ZDIMS 127 | dimensions and draw a random sample from that distribution 128 | - the whole network is trained so that these randomly drawn 129 | samples decode to output that looks like the input 130 | - which will mean that the std, mu will be learned 131 | *distributions* that correctly encode the inputs 132 | - due to the additional KLD term (see loss_function() below) 133 | the distribution will tend to unit Gaussians 134 | 135 | Parameters 136 | ---------- 137 | mu : [128, ZDIMS] mean matrix 138 | logvar : [128, ZDIMS] variance matrix 139 | 140 | Returns 141 | ------- 142 | 143 | During training random sample from the learned ZDIMS-dimensional 144 | normal distribution; during inference its mean. 145 | 146 | """ 147 | 148 | if self.training: 149 | # multiply log variance with 0.5, then in-place exponent 150 | # yielding the standard deviation 151 | 152 | sample_z = [] 153 | for _ in range(no_of_sample): 154 | std = logvar.mul(0.5).exp_() # type: Variable 155 | # - std.data is the [128,ZDIMS] tensor that is wrapped by std 156 | # - so eps is [128,ZDIMS] with all elements drawn from a mean 0 157 | # and stddev 1 normal distribution that is 128 samples 158 | # of random ZDIMS-float vectors 159 | eps = Variable(std.data.new(std.size()).normal_()) 160 | # - sample from a normal distribution with standard 161 | # deviation = std and mean = mu by multiplying mean 0 162 | # stddev 1 sample with desired std and mu, see 163 | # https://stats.stackexchange.com/a/16338 164 | # - so we have 128 sets (the batch) of random ZDIMS-float 165 | # vectors sampled from normal distribution with learned 166 | # std and mu for the current input 167 | sample_z.append(eps.mul(std).add_(mu)) 168 | 169 | return sample_z 170 | 171 | else: 172 | # During inference, we simply spit out the mean of the 173 | # learned distribution for the current input. We could 174 | # use a random sample from the distribution, but mu of 175 | # course has the highest probability. 176 | return mu 177 | 178 | def decode(self, z: Variable) -> Variable: 179 | h3 = self.relu(self.fc3(z)) 180 | return self.sigmoid(self.fc4(h3)) 181 | 182 | def forward(self, x: Variable) -> (Variable, Variable, Variable): 183 | mu, logvar = self.encode(x.view(-1, 784)) 184 | z = self.reparameterize(mu, logvar) 185 | return [self.decode(z) for z in z], mu, logvar 186 | #return self.decode(z), mu, logvar 187 | 188 | 189 | model = VAE() 190 | if CUDA: 191 | model.cuda() 192 | 193 | 194 | def loss_function(recon_x, x, mu, logvar) -> Variable: 195 | # how well do input x and output recon_x agree? 196 | 197 | BCE = 0 198 | for recon_x_one in recon_x: 199 | BCE += F.binary_cross_entropy(recon_x_one, x.view(-1, 784)) 200 | 201 | BCE /=len(recon_x) 202 | 203 | 204 | 205 | # KLD is Kullback–Leibler divergence -- how much does one learned 206 | # distribution deviate from another, in this specific case the 207 | # learned distribution from the unit Gaussian 208 | 209 | # see Appendix B from VAE paper: 210 | # Kingma and Welling. Auto-Encoding Variational Bayes. ICLR, 2014 211 | # https://arxiv.org/abs/1312.6114 212 | # - D_{KL} = 0.5 * sum(1 + log(sigma^2) - mu^2 - sigma^2) 213 | # note the negative D_{KL} in appendix B of the paper 214 | KLD = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp()) 215 | # Normalise by same number of elements as in reconstruction 216 | KLD /= BATCH_SIZE * 784 217 | 218 | # BCE tries to make our reconstruction as accurate as possible 219 | # KLD tries to push the distributions as close as possible to unit Gaussian 220 | return BCE + KLD 221 | 222 | # Dr Diederik Kingma: as if VAEs weren't enough, he also gave us Adam! 223 | optimizer = optim.Adam(model.parameters(), lr=1e-3) 224 | 225 | 226 | def train(epoch): 227 | # toggle model to train mode 228 | model.train() 229 | train_loss = 0 230 | # in the case of MNIST, len(train_loader.dataset) is 60000 231 | # each `data` is of BATCH_SIZE samples and has shape [128, 1, 28, 28] 232 | for batch_idx, (data, _) in enumerate(train_loader): 233 | data = Variable(data) 234 | if CUDA: 235 | data = data.cuda() 236 | optimizer.zero_grad() 237 | 238 | # push whole batch of data through VAE.forward() to get recon_loss 239 | recon_batch, mu, logvar = model(data) 240 | # calculate scalar loss 241 | loss = loss_function(recon_batch, data, mu, logvar) 242 | # calculate the gradient of the loss w.r.t. the graph leaves 243 | # i.e. input variables -- by the power of pytorch! 244 | loss.backward() 245 | train_loss += loss.data[0] 246 | optimizer.step() 247 | if batch_idx % LOG_INTERVAL == 0: 248 | print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format( 249 | epoch, batch_idx * len(data), len(train_loader.dataset), 250 | 100. * batch_idx / len(train_loader), 251 | loss.data[0] / len(data))) 252 | 253 | print('====> Epoch: {} Average loss: {:.4f}'.format( 254 | epoch, train_loss / len(train_loader.dataset))) 255 | 256 | 257 | def test(epoch): 258 | # toggle model to test / inference mode 259 | model.eval() 260 | test_loss = 0 261 | 262 | # each data is of BATCH_SIZE (default 128) samples 263 | for i, (data, _) in enumerate(test_loader): 264 | if CUDA: 265 | # make sure this lives on the GPU 266 | data = data.cuda() 267 | 268 | # we're only going to infer, so no autograd at all required: volatile=True 269 | data = Variable(data, volatile=True) 270 | recon_batch, mu, logvar = model(data) 271 | test_loss += loss_function(recon_batch, data, mu, logvar).data[0] 272 | if i == 0: 273 | n = min(data.size(0), 8) 274 | # for the first 128 batch of the epoch, show the first 8 input digits 275 | # with right below them the reconstructed output digits 276 | comparison = torch.cat([data[:n], 277 | recon_batch.view(BATCH_SIZE, 1, 28, 28)[:n]]) 278 | save_image(comparison.data.cpu(), 279 | 'results/reconstruction_' + str(epoch) + '.png', nrow=n) 280 | 281 | test_loss /= len(test_loader.dataset) 282 | print('====> Test set loss: {:.4f}'.format(test_loss)) -------------------------------------------------------------------------------- /new2.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | import torch.utils.data 4 | from torch import nn, optim 5 | from torch.autograd import Variable 6 | from torch.nn import functional as F 7 | from torchvision import datasets, transforms 8 | from torchvision.utils import save_image 9 | 10 | os.environ['CUDA_VISIBLE_DEVICES'] = '0' 11 | # changed configuration to this instead of argparse for easier interaction 12 | CUDA = True 13 | SEED = 1 14 | BATCH_SIZE = 128 15 | LOG_INTERVAL = 10 16 | EPOCHS = 10 17 | no_of_sample = 10 18 | 19 | # connections through the autoencoder bottleneck 20 | # in the pytorch VAE example, this is 20 21 | ZDIMS = 20 22 | 23 | # I do this so that the MNIST dataset is downloaded where I want it 24 | # os.chdir("/home/cpbotha/Downloads/pytorch-vae") 25 | 26 | torch.manual_seed(SEED) 27 | if CUDA: 28 | torch.cuda.manual_seed(SEED) 29 | 30 | # DataLoader instances will load tensors directly into GPU memory 31 | kwargs = {'num_workers': 1, 'pin_memory': True} if CUDA else {} 32 | 33 | # Download or load downloaded MNIST dataset 34 | # shuffle data at every epoch 35 | train_loader = torch.utils.data.DataLoader( 36 | datasets.MNIST('/home/atin/data/', train=True, download=True, 37 | transform=transforms.ToTensor()), 38 | batch_size=BATCH_SIZE, shuffle=True, **kwargs) 39 | 40 | # Same for test data 41 | test_loader = torch.utils.data.DataLoader( 42 | datasets.MNIST('/home/atin/data/', train=False, transform=transforms.ToTensor()), 43 | batch_size=BATCH_SIZE, shuffle=True, **kwargs) 44 | 45 | 46 | class VAE(nn.Module): 47 | def __init__(self): 48 | super(VAE, self).__init__() 49 | 50 | self.conv1 = nn.Conv2d(in_channels=1, out_channels=64, kernel_size=(4, 4), padding=(15, 15), 51 | stride=2) # This padding keeps the size of the image same, i.e. same padding 52 | self.conv2 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=(4, 4), padding=(15, 15), stride=2) 53 | self.fc11 = nn.Linear(in_features=128 * 28 * 28, out_features=1024) 54 | self.fc12 = nn.Linear(in_features=1024, out_features=ZDIMS) 55 | 56 | self.fc21 = nn.Linear(in_features=128 * 28 * 28, out_features=1024) 57 | self.fc22 = nn.Linear(in_features=1024, out_features=ZDIMS) 58 | self.relu = nn.ReLU() 59 | 60 | # For decoder 61 | 62 | # For mu 63 | self.fc1 = nn.Linear(in_features=20, out_features=1024) 64 | self.fc2 = nn.Linear(in_features=1024, out_features=7 * 7 * 128) 65 | self.conv_t11 = nn.ConvTranspose2d(in_channels=128, out_channels=64, kernel_size=4, padding=1, stride=2) 66 | self.conv_t12 = nn.ConvTranspose2d(in_channels=64, out_channels=1, kernel_size=4, padding=1, stride=2) 67 | 68 | self.conv_t21 = nn.ConvTranspose2d(in_channels=128, out_channels=64, kernel_size=4, padding=1, stride=2) 69 | self.conv_t22 = nn.ConvTranspose2d(in_channels=64, out_channels=1, kernel_size=4, padding=1, stride=2) 70 | 71 | 72 | 73 | # #for logvar 74 | # self.fc3 = nn.Linear(in_features=20, out_features=400) 75 | # self.fc4 = nn.Linear(in_features=400, out_features=784) 76 | 77 | # # ENCODER 78 | # # 28 x 28 pixels = 784 input pixels, 400 outputs 79 | # self.fc1 = nn.Linear(784, 400) 80 | # # rectified linear unit layer from 400 to 400 81 | # # max(0, x) 82 | # self.relu = nn.ReLU() 83 | # self.fc21 = nn.Linear(400, ZDIMS) # mu layer 84 | # self.fc22 = nn.Linear(400, ZDIMS) # logvariance layer 85 | # this last layer bottlenecks through ZDIMS connections 86 | 87 | # # DECODER 88 | # # from bottleneck to hidden 400 89 | # self.fc3 = nn.Linear(ZDIMS, 400) 90 | # # from hidden 400 to 784 outputs 91 | # self.fc4 = nn.Linear(400, 784) 92 | # self.sigmoid = nn.Sigmoid() 93 | 94 | def encode(self, x: Variable) -> (Variable, Variable): 95 | 96 | x = x.view(-1, 1, 28, 28) 97 | x = F.elu(self.conv1(x)) 98 | x = F.elu(self.conv2(x)) 99 | x = x.view(-1, 128 * 28 * 28) 100 | 101 | mu_z = F.elu(self.fc11(x)) 102 | # mu_z = F.softmax(self.fc12(mu_z)) 103 | mu_z = self.fc12(mu_z) 104 | 105 | logvar_z = F.elu(self.fc21(x)) 106 | # logvar_z = F.softmax(self.fc22(logvar_z)) 107 | logvar_z = self.fc22(logvar_z) 108 | 109 | return mu_z, logvar_z 110 | 111 | def reparameterize(self, mu: Variable, logvar: Variable) -> list: 112 | """THE REPARAMETERIZATION IDEA: 113 | 114 | For each training sample (we get 128 batched at a time) 115 | 116 | - take the current learned mu, stddev for each of the ZDIMS 117 | dimensions and draw a random sample from that distribution 118 | - the whole network is trained so that these randomly drawn 119 | samples decode to output that looks like the input 120 | - which will mean that the std, mu will be learned 121 | *distributions* that correctly encode the inputs 122 | - due to the additional KLD term (see loss_function() below) 123 | the distribution will tend to unit Gaussians 124 | 125 | Parameters 126 | ---------- 127 | mu : [128, ZDIMS] mean matrix 128 | logvar : [128, ZDIMS] variance matrix 129 | 130 | Returns 131 | ------- 132 | 133 | During training random sample from the learned ZDIMS-dimensional 134 | normal distribution; during inference its mean. 135 | 136 | """ 137 | 138 | if self.training: 139 | # multiply log variance with 0.5, then in-place exponent 140 | # yielding the standard deviation 141 | 142 | sample_z = [] 143 | for _ in range(no_of_sample): 144 | std = logvar.mul(0.5).exp_() # type: Variable 145 | # - std.data is the [128,ZDIMS] tensor that is wrapped by std 146 | # - so eps is [128,ZDIMS] with all elements drawn from a mean 0 147 | # and stddev 1 normal distribution that is 128 samples 148 | # of random ZDIMS-float vectors 149 | eps = Variable(std.data.new(std.size()).normal_()) 150 | # - sample from a normal distribution with standard 151 | # deviation = std and mean = mu by multiplying mean 0 152 | # stddev 1 sample with desired std and mu, see 153 | # https://stats.stackexchange.com/a/16338 154 | # - so we have 128 sets (the batch) of random ZDIMS-float 155 | # vectors sampled from normal distribution with learned 156 | # std and mu for the current input 157 | sample_z.append(eps.mul(std).add_(mu)) 158 | 159 | return sample_z 160 | 161 | else: 162 | # During inference, we simply spit out the mean of the 163 | # learned distribution for the current input. We could 164 | # use a random sample from the distribution, but mu of 165 | # course has the highest probability. 166 | return mu 167 | 168 | def decode(self, z: Variable) -> (Variable, Variable): 169 | 170 | x = F.elu(self.fc1(z)) 171 | x = F.elu(self.fc2(x)) 172 | x = x.view(-1, 128, 7, 7) 173 | mu_x = F.relu(self.conv_t11(x)) 174 | mu_x = F.sigmoid(self.conv_t12(mu_x)) 175 | 176 | logvar_x = F.relu(self.conv_t11(x)) 177 | logvar_x = F.sigmoid(self.conv_t12(logvar_x)) 178 | 179 | return mu_x.view(-1, 784), logvar_x.view(-1,784) 180 | 181 | 182 | 183 | def forward(self, x: Variable) -> (Variable, Variable, Variable): 184 | mu, logvar = self.encode(x.view(-1, 784)) 185 | z = self.reparameterize(mu, logvar) 186 | return [self.decode(z) for z in z], mu, logvar 187 | # return self.decode(z), mu, logvar 188 | 189 | 190 | model = VAE() 191 | if CUDA: 192 | model.cuda() 193 | 194 | 195 | def loss_function(recon_x, x, mu, logvar) -> Variable: 196 | # how well do input x and output recon_x agree? 197 | 198 | GLL = 0 199 | x = x.view(-1,784) 200 | for recon_x_one in recon_x: 201 | mu_x, logvar_x = recon_x_one 202 | part1 = torch.sum(logvar_x)/BATCH_SIZE 203 | sigma = logvar_x.mul(0.5).exp_() 204 | part2 = torch.sum(((x-mu_x)/sigma)**2)/BATCH_SIZE 205 | GLL += .5*(part1+part2) 206 | 207 | GLL /= len(recon_x) 208 | 209 | # BCE = 0 210 | # for recon_x_one in recon_x: 211 | # BCE += F.binary_cross_entropy(recon_x_one, x.view(-1, 784)) 212 | # 213 | # BCE /= len(recon_x) 214 | 215 | # KLD is Kullback–Leibler divergence -- how much does one learned 216 | # distribution deviate from another, in this specific case the 217 | # learned distribution from the unit Gaussian 218 | 219 | # see Appendix B from VAE paper: 220 | # Kingma and Welling. Auto-Encoding Variational Bayes. ICLR, 2014 221 | # https://arxiv.org/abs/1312.6114 222 | # - D_{KL} = 0.5 * sum(1 + log(sigma^2) - mu^2 - sigma^2) 223 | # note the negative D_{KL} in appendix B of the paper 224 | KLD = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp()) 225 | # Normalise by same number of elements as in reconstruction 226 | KLD /= BATCH_SIZE 227 | 228 | # BCE tries to make our reconstruction as accurate as possible 229 | # KLD tries to push the distributions as close as possible to unit Gaussian 230 | return GLL + KLD 231 | 232 | 233 | # Dr Diederik Kingma: as if VAEs weren't enough, he also gave us Adam! 234 | optimizer = optim.Adam(model.parameters(), lr=1e-3) 235 | 236 | 237 | def train(epoch): 238 | # toggle model to train mode 239 | model.train() 240 | train_loss = 0 241 | # in the case of MNIST, len(train_loader.dataset) is 60000 242 | # each `data` is of BATCH_SIZE samples and has shape [128, 1, 28, 28] 243 | for batch_idx, (data, _) in enumerate(train_loader): 244 | data = Variable(data) 245 | if CUDA: 246 | data = data.cuda() 247 | optimizer.zero_grad() 248 | 249 | # push whole batch of data through VAE.forward() to get recon_loss 250 | recon_batch, mu, logvar = model(data) 251 | # calculate scalar loss 252 | loss = loss_function(recon_batch, data, mu, logvar) 253 | # calculate the gradient of the loss w.r.t. the graph leaves 254 | # i.e. input variables -- by the power of pytorch! 255 | loss.backward() 256 | train_loss += loss.data[0] 257 | optimizer.step() 258 | if batch_idx % LOG_INTERVAL == 0: 259 | print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format( 260 | epoch, batch_idx * len(data), len(train_loader.dataset), 261 | 100. * batch_idx / len(train_loader), 262 | loss.data[0] / len(data))) 263 | 264 | print('====> Epoch: {} Average loss: {:.4f}'.format( 265 | epoch, train_loss / len(train_loader.dataset))) 266 | 267 | 268 | def test(epoch): 269 | # toggle model to test / inference mode 270 | model.eval() 271 | test_loss = 0 272 | 273 | # each data is of BATCH_SIZE (default 128) samples 274 | for i, (data, _) in enumerate(test_loader): 275 | if CUDA: 276 | # make sure this lives on the GPU 277 | data = data.cuda() 278 | 279 | # we're only going to infer, so no autograd at all required: volatile=True 280 | data = Variable(data, volatile=True) 281 | recon_batch, mu, logvar = model(data) 282 | test_loss += loss_function(recon_batch, data, mu, logvar).data[0] 283 | if i == 0: 284 | n = min(data.size(0), 8) 285 | # for the first 128 batch of the epoch, show the first 8 input digits 286 | # with right below them the reconstructed output digits 287 | comparison = torch.cat([data[:n], 288 | recon_batch.view(BATCH_SIZE, 1, 28, 28)[:n]]) 289 | save_image(comparison.data.cpu(), 290 | 'results/reconstruction_' + str(epoch) + '.png', nrow=n) 291 | 292 | test_loss /= len(test_loader.dataset) 293 | print('====> Test set loss: {:.4f}'.format(test_loss)) 294 | 295 | 296 | 297 | 298 | 299 | 300 | if __name__ == "__main__": 301 | 302 | for epoch in range(1, EPOCHS + 1): 303 | train(epoch) 304 | # test(epoch) 305 | 306 | # 64 sets of random ZDIMS-float vectors, i.e. 64 locations / MNIST 307 | # digits in latent space 308 | sample = Variable(torch.randn(64, ZDIMS)) 309 | if CUDA: 310 | sample = sample.cuda() 311 | sample = model.decode(sample).cpu() 312 | 313 | # save out as an 8x8 matrix of MNIST digits 314 | # this will give you a visual idea of how well latent space can generate things 315 | # that look like digits 316 | save_image(sample.data.view(64, 1, 28, 28), 317 | '/home/atin/data/new/reconstruction' + str(epoch) + '.png') -------------------------------------------------------------------------------- /simple_main.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.utils.data 3 | from torch import nn, optim 4 | from torch.autograd import Variable 5 | from torch.nn import functional as F 6 | from torchvision import datasets, transforms 7 | import numpy as np 8 | from torchvision.utils import save_image 9 | 10 | batch_size =200 11 | z_dim = 20 12 | no_of_sample = 1000 13 | 14 | class VAE(nn.Module): 15 | def __init__(self): 16 | super(VAE, self).__init__() 17 | self.fc1 = nn.Linear(784, 400) 18 | self.fc21 = nn.Linear(400, 20) 19 | self.fc22 = nn.Linear(400, 20) 20 | self.fc3 = nn.Linear(20, 400) 21 | self.fc41 = nn.Linear(400, 784) 22 | self.fc42 = nn.Linear(400, 784) 23 | 24 | self.relu = nn.ReLU() 25 | self.sigmoid = nn.Sigmoid() 26 | 27 | 28 | 29 | def encode(self, x): 30 | ''' 31 | :param x: here x is an image, can be any tensor 32 | :return: 2 tensors of size [N,z_dim=20] where first one is mu and second one is logvar 33 | ''' 34 | 35 | h1 = self.relu(self.fc1(x)) 36 | return self.fc21(h1), self.fc22(h1) 37 | 38 | 39 | def reparametrized_sample(self,parameter_z,no_of_sample): 40 | ''' 41 | 42 | :param z: 43 | :param no_of_sample: no of monte carlo sample 44 | :return: torch of size [N,no_of_sample,z_dim=20] 45 | ''' 46 | standard_normal_sample = Variable(torch.randn(batch_size,no_of_sample,z_dim).cuda()) 47 | mu_z, logvar_z = parameter_z 48 | mu_z = mu_z.unsqueeze(1) 49 | sigma = .5*logvar_z.exp() 50 | sigma = sigma.unsqueeze(1) 51 | final_sample = mu_z+sigma*standard_normal_sample 52 | 53 | return final_sample 54 | 55 | def decode(self,z): 56 | h1 = self.relu(self.fc3(z)) 57 | return self.fc41(h1), self.fc42(h1) 58 | 59 | 60 | def log_density(self): 61 | pass 62 | 63 | def forward(self,x): 64 | ''' 65 | 66 | :param x: input image 67 | :return: array of length = batch size, each element is a tuple of 2 elemets of size [no_of_sample=1000,28*28 (for MNIST)], corresponding to mu and logvar 68 | ''' 69 | 70 | x = x.view(-1,784) 71 | parameter_z = self.encode(x) 72 | sample_z = self.reparametrized_sample(parameter_z,no_of_sample) 73 | parameter_x = [self.decode(obs) for obs in sample_z] 74 | 75 | return parameter_z, parameter_x 76 | 77 | 78 | def loss_VAE(train_x,parameter_x, paramter_z): 79 | 80 | mu_z, logvar_z = paramter_z 81 | #Kullback Liebler Divergence 82 | negative_KLD = 0.5 * torch.sum(1 + logvar_z - mu_z.pow(2) - logvar_z.exp(),1) #mu_z.size()=[batch_size, 28*28] 83 | 84 | #nll 85 | train_x_flattened = train_x.view(-1, 28*28) 86 | i = 0 87 | nll = Variable(torch.FloatTensor(batch_size).zero_().cuda()) 88 | for param in parameter_x: 89 | mu_x, logvar_x = param 90 | x = train_x_flattened[i] 91 | 92 | log_likelihood_for_one_z = torch.sum(logvar_x,1)+ torch.sum(((x-mu_x).pow(2))/(2*logvar_x.exp()),1) #log pθ(x^(i)|z^(i,l)) 93 | nll_one_sample = torch.mean(log_likelihood_for_one_z) #Monte carlo average step to calculate expectation 94 | nll[i] = nll_one_sample 95 | i += 1 96 | 97 | final_loss = negative_KLD + nll 98 | final_loss = torch.mean(final_loss) 99 | 100 | return final_loss 101 | 102 | 103 | def train(epoch,model,trainloader,optimizer): 104 | model.train() 105 | 106 | train_loss = 0 107 | count = 0 108 | for batch_id, data in enumerate(train_loader): 109 | 110 | train_x, _ = data 111 | count += train_x.size(0) 112 | train_x = Variable(train_x.type(torch.FloatTensor).cuda()) 113 | paramter_z, parameter_x = model(train_x) 114 | 115 | 116 | loss = loss_VAE(train_x, parameter_x, paramter_z) 117 | train_loss += loss.data[0] 118 | 119 | loss.backward() 120 | optimizer.step() 121 | 122 | if batch_id % 50 ==0: 123 | print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format( 124 | epoch, batch_id * len(data), len(train_loader.dataset), 100. * batch_id / len(train_loader), loss.data[0])) 125 | 126 | train_loss /= count 127 | print('\nTrain set: Average loss: {:.4f}'.format(train_loss)) 128 | 129 | 130 | 131 | 132 | if __name__ == "__main__": 133 | train_loader = torch.utils.data.DataLoader( 134 | datasets.MNIST('../data', train=True, download=True, 135 | transform=transforms.ToTensor()), 136 | batch_size=batch_size, shuffle=True) 137 | test_loader = torch.utils.data.DataLoader( 138 | datasets.MNIST('../data', train=False, transform=transforms.ToTensor()), 139 | batch_size=batch_size, shuffle=True) 140 | 141 | model = VAE() 142 | model_parameters = filter(lambda p: p.requires_grad, model.parameters()) 143 | nb_params = sum([np.prod(p.size()) for p in model_parameters]) 144 | print("no. of trainable parametes is: {}".format((nb_params))) 145 | model.cuda() 146 | 147 | 148 | optimizer = optim.Adam(model.parameters(), lr=.001) 149 | 150 | nb_epoch = 2 151 | for epoch in range(1, nb_epoch + 1): 152 | train(epoch, model, train_loader, optimizer) 153 | 154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 | 162 | 163 | 164 | 165 | 166 | 167 | 168 | 169 | 170 | 171 | 172 | 173 | 174 | 175 | 176 | 177 | 178 | 179 | 180 | 181 | 182 | 183 | 184 | 185 | 186 | 187 | 188 | 189 | 190 | 191 | 192 | 193 | --------------------------------------------------------------------------------