├── .idea
├── deployment.xml
├── inspectionProfiles
│ └── Project_Default.xml
├── other.xml
├── remote-mappings.xml
├── vcs.xml
└── webServers.xml
├── README.md
├── VAE_CNN_BCEloss.py
├── VAE_CNN_Gaussianloss.py
├── VAE_celeba.py
├── VAE_facebook.py
├── VAE_fb_modified.py
├── main.py
├── main_new.py
├── new1.py
├── new2.py
└── simple_main.py
/.idea/deployment.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/.idea/inspectionProfiles/Project_Default.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
13 |
14 |
15 |
--------------------------------------------------------------------------------
/.idea/other.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
--------------------------------------------------------------------------------
/.idea/remote-mappings.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
--------------------------------------------------------------------------------
/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/.idea/webServers.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
14 |
15 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Variational-Autoencoder
2 | Contains code to learn variational autoencoder model on MNIST dataset using pytorch.
3 |
4 | L = No. of monte carlo samples for gradient calculation
5 |
6 | Gaussian loss is given by
7 |
8 | ![\Large \frac{1}{N}\sum_{i=1}^{N}\left[\frac{1}{L}\sum_{l=1}^{L}\left\{ \frac{1}{2}\sum_{j=1}^{784}\log(\sigma_{ij}^{(l)})^2 + \frac{1}{2}\sum_{j=1}^{784}\left(\frac{x_{ij}-\mu_{ij}^{(l)}} {\sigma_{ij}^{(l)}}\right)^2 \right\} \right ] - \frac{1}{N}\sum_{i=1}^{N}\left[ \sum_{j=1}^{J}\frac{1}{2}\left(1+\log(\sigma_j^{\prime(i)})^2-(\mu_j^{\prime(i)})^2 -(\sigma_j^{\prime(i)})^2\right )\right ]](https://latex.codecogs.com/svg.latex?%5Cfrac%7B1%7D%7BN%7D%5Csum_%7Bi%3D1%7D%5E%7BN%7D%5Cleft%5B%5Cfrac%7B1%7D%7BL%7D%5Csum_%7Bl%3D1%7D%5E%7BL%7D%5Cleft%5C%7B%20%5Cfrac%7B1%7D%7B2%7D%5Csum_%7Bj%3D1%7D%5E%7B784%7D%5Clog%28%5Csigma_%7Bij%7D%5E%7B%28l%29%7D%29%5E2%20+%20%5Cfrac%7B1%7D%7B2%7D%5Csum_%7Bj%3D1%7D%5E%7B784%7D%5Cleft%28%5Cfrac%7Bx_%7Bij%7D-%5Cmu_%7Bij%7D%5E%7B%28l%29%7D%7D%20%7B%5Csigma_%7Bij%7D%5E%7B%28l%29%7D%7D%5Cright%29%5E2%20%5Cright%5C%7D%20%5Cright%20%5D%20-%20%5Cfrac%7B1%7D%7BN%7D%5Csum_%7Bi%3D1%7D%5E%7BN%7D%5Cleft%5B%20%5Csum_%7Bj%3D1%7D%5E%7BJ%7D%5Cfrac%7B1%7D%7B2%7D%5Cleft%281+%5Clog%28%5Csigma_j%5E%7B%5Cprime%28i%29%7D%29%5E2-%28%5Cmu_j%5E%7B%5Cprime%28i%29%7D%29%5E2%20-%28%5Csigma_j%5E%7B%5Cprime%28i%29%7D%29%5E2%5Cright%20%29%5Cright%20%5D)
9 |
10 |
11 | BCE loss is given by
12 |
13 | ![\Large \frac{1}{N}\sum_{i=1}^{N}\left[\frac{1}{L}\sum_{l=1}^{L}\left\{x_{ij}\log p_{ij}^{(l)} + (1-x_{ij})\log(1-\log p_{ij}^{(l)}) \right\} \right ] - \frac{1}{N}\sum_{i=1}^{N}\left[ \sum_{j=1}^{J}\frac{1}{2}\left(1+\log(\sigma_j^{\prime(i)})^2-(\mu_j^{\prime(i)})^2 -(\sigma_j^{\prime(i)})^2\right )\right ]](https://latex.codecogs.com/svg.latex?%5Cfrac%7B1%7D%7BN%7D%5Csum_%7Bi%3D1%7D%5E%7BN%7D%5Cleft%5B%5Cfrac%7B1%7D%7BL%7D%5Csum_%7Bl%3D1%7D%5E%7BL%7D%5Cleft%5C%7Bx_%7Bij%7D%5Clog%20p_%7Bij%7D%5E%7B%28l%29%7D%20+%20%281-x_%7Bij%7D%29%5Clog%281-%5Clog%20p_%7Bij%7D%5E%7B%28l%29%7D%29%20%5Cright%5C%7D%20%5Cright%20%5D%20-%20%5Cfrac%7B1%7D%7BN%7D%5Csum_%7Bi%3D1%7D%5E%7BN%7D%5Cleft%5B%20%5Csum_%7Bj%3D1%7D%5E%7BJ%7D%5Cfrac%7B1%7D%7B2%7D%5Cleft%281+%5Clog%28%5Csigma_j%5E%7B%5Cprime%28i%29%7D%29%5E2-%28%5Cmu_j%5E%7B%5Cprime%28i%29%7D%29%5E2%20-%28%5Csigma_j%5E%7B%5Cprime%28i%29%7D%29%5E2%5Cright%20%29%5Cright%20%5D)
14 |
--------------------------------------------------------------------------------
/VAE_CNN_BCEloss.py:
--------------------------------------------------------------------------------
1 | import os
2 | import torch
3 | import torch.utils.data
4 | from torch import nn, optim
5 | from torch.autograd import Variable
6 | from torch.nn import functional as F
7 | from torchvision import datasets, transforms
8 | from torchvision.utils import save_image
9 |
10 | os.environ['CUDA_VISIBLE_DEVICES'] = '3'
11 | # changed configuration to this instead of argparse for easier interaction
12 | CUDA = True
13 | SEED = 1
14 | BATCH_SIZE = 128
15 | LOG_INTERVAL = 10
16 | EPOCHS = 10
17 | no_of_sample = 10
18 |
19 | # connections through the autoencoder bottleneck
20 | # in the pytorch VAE example, this is 20
21 | ZDIMS = 20
22 |
23 | torch.manual_seed(SEED)
24 | if CUDA:
25 | torch.cuda.manual_seed(SEED)
26 |
27 | # DataLoader instances will load tensors directly into GPU memory
28 | kwargs = {'num_workers': 1, 'pin_memory': True} if CUDA else {}
29 |
30 | # Download or load downloaded MNIST dataset
31 | # shuffle data at every epoch
32 | train_loader = torch.utils.data.DataLoader(datasets.MNIST('./mnist', train=True, download=True,transform=transforms.ToTensor()),
33 | batch_size=BATCH_SIZE, shuffle=True, **kwargs)
34 |
35 | # Same for test data
36 | test_loader = torch.utils.data.DataLoader(datasets.MNIST('./mnist', train=False, transform=transforms.ToTensor()),
37 | batch_size=BATCH_SIZE, shuffle=True, **kwargs)
38 |
39 |
40 | class VAE(nn.Module):
41 | def __init__(self):
42 | super(VAE, self).__init__()
43 |
44 | self.conv1 = nn.Conv2d(in_channels=1, out_channels=64, kernel_size=(4, 4), padding=(15, 15),
45 | stride=2) # This padding keeps the size of the image same, i.e. same padding
46 | self.conv2 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=(4, 4), padding=(15, 15), stride=2)
47 | self.fc11 = nn.Linear(in_features=128 * 28 * 28, out_features=1024)
48 | self.fc12 = nn.Linear(in_features=1024, out_features=ZDIMS)
49 |
50 | self.fc21 = nn.Linear(in_features=128 * 28 * 28, out_features=1024)
51 | self.fc22 = nn.Linear(in_features=1024, out_features=ZDIMS)
52 | self.relu = nn.ReLU()
53 |
54 | # For decoder
55 |
56 | # For mu
57 | self.fc1 = nn.Linear(in_features=20, out_features=1024)
58 | self.fc2 = nn.Linear(in_features=1024, out_features=7 * 7 * 128)
59 | self.conv_t1 = nn.ConvTranspose2d(in_channels=128, out_channels=64, kernel_size=4, padding=1, stride=2)
60 | self.conv_t2 = nn.ConvTranspose2d(in_channels=64, out_channels=1, kernel_size=4, padding=1, stride=2)
61 |
62 |
63 |
64 | def encode(self, x: Variable) -> (Variable, Variable):
65 |
66 | x = x.view(-1, 1, 28, 28)
67 | x = F.elu(self.conv1(x))
68 | x = F.elu(self.conv2(x))
69 | x = x.view(-1, 128 * 28 * 28)
70 |
71 | mu_z = F.elu(self.fc11(x))
72 | mu_z = self.fc12(mu_z)
73 |
74 | logvar_z = F.elu(self.fc21(x))
75 | logvar_z = self.fc22(logvar_z)
76 |
77 | return mu_z, logvar_z
78 |
79 |
80 | def reparameterize(self, mu: Variable, logvar: Variable) -> Variable:
81 |
82 |
83 | if self.training:
84 | # multiply log variance with 0.5, then in-place exponent
85 | # yielding the standard deviation
86 |
87 | sample_z = []
88 | for _ in range(no_of_sample):
89 | std = logvar.mul(0.5).exp_() # type: Variable
90 | eps = Variable(std.data.new(std.size()).normal_())
91 | sample_z.append(eps.mul(std).add_(mu))
92 |
93 | return sample_z
94 |
95 | else:
96 | # During inference, we simply spit out the mean of the
97 | # learned distribution for the current input. We could
98 | # use a random sample from the distribution, but mu of
99 | # course has the highest probability.
100 | return mu
101 |
102 | def decode(self, z: Variable) -> Variable:
103 |
104 | x = F.elu(self.fc1(z))
105 | x = F.elu(self.fc2(x))
106 | x = x.view(-1, 128, 7, 7)
107 | x = F.relu(self.conv_t1(x))
108 | x = F.sigmoid(self.conv_t2(x))
109 |
110 | return x.view(-1, 784)
111 |
112 |
113 | def forward(self, x: Variable) -> (Variable, Variable, Variable):
114 | mu, logvar = self.encode(x.view(-1, 784))
115 | z = self.reparameterize(mu, logvar)
116 | if self.training:
117 | return [self.decode(z) for z in z], mu, logvar
118 | else:
119 | return self.decode(z), mu, logvar
120 | # return self.decode(z), mu, logvar
121 |
122 | def loss_function(self, recon_x, x, mu, logvar) -> Variable:
123 | # how well do input x and output recon_x agree?
124 |
125 | if self.training:
126 | BCE = 0
127 | for recon_x_one in recon_x:
128 | BCE += F.binary_cross_entropy(recon_x_one, x.view(-1, 784))
129 | BCE /= len(recon_x)
130 | else:
131 | BCE = F.binary_cross_entropy(recon_x, x.view(-1, 784))
132 |
133 | # KLD is Kullback–Leibler divergence -- how much does one learned
134 | # distribution deviate from another, in this specific case the
135 | # learned distribution from the unit Gaussian
136 |
137 | # see Appendix B from VAE paper:
138 | # Kingma and Welling. Auto-Encoding Variational Bayes. ICLR, 2014
139 | # https://arxiv.org/abs/1312.6114
140 | # - D_{KL} = 0.5 * sum(1 + log(sigma^2) - mu^2 - sigma^2)
141 | # note the negative D_{KL} in appendix B of the paper
142 | KLD = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp())
143 | # Normalise by same number of elements as in reconstruction
144 | KLD /= BATCH_SIZE * 784
145 |
146 |
147 | return BCE + KLD
148 |
149 |
150 | model = VAE()
151 | if CUDA:
152 | model.cuda()
153 |
154 |
155 |
156 |
157 |
158 | optimizer = optim.Adam(model.parameters(), lr=1e-3)
159 |
160 |
161 | def train(epoch):
162 | # toggle model to train mode
163 | model.train()
164 | train_loss = 0
165 | # in the case of MNIST, len(train_loader.dataset) is 60000
166 | # each `data` is of BATCH_SIZE samples and has shape [128, 1, 28, 28]
167 | for batch_idx, (data, _) in enumerate(train_loader):
168 | data = Variable(data)
169 | if CUDA:
170 | data = data.cuda()
171 | optimizer.zero_grad()
172 |
173 | # push whole batch of data through VAE.forward() to get recon_loss
174 | recon_batch, mu, logvar = model(data)
175 | # calculate scalar loss
176 | loss = model.loss_function(recon_batch, data, mu, logvar)
177 | # calculate the gradient of the loss w.r.t. the graph leaves
178 | # i.e. input variables -- by the power of pytorch!
179 | loss.backward()
180 | train_loss += loss.data[0]
181 | optimizer.step()
182 | if batch_idx % LOG_INTERVAL == 0:
183 | print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(epoch, batch_idx * len(data), len(train_loader.dataset),
184 | 100. * batch_idx / len(train_loader),
185 | loss.data[0] / len(data)))
186 |
187 | print('====> Epoch: {} Average loss: {:.4f}'.format(epoch, train_loss / len(train_loader.dataset)))
188 |
189 |
190 | def test(epoch):
191 | model.eval()
192 | test_loss = 0
193 |
194 | # each data is of BATCH_SIZE (default 128) samples
195 | for i, (data, _) in enumerate(test_loader):
196 | if CUDA:
197 | # make sure this lives on the GPU
198 | data = data.cuda()
199 |
200 | # we're only going to infer, so no autograd at all required: volatile=True
201 | data = Variable(data, volatile=True)
202 | recon_batch, mu, logvar = model(data)
203 | test_loss += model.loss_function(recon_batch, data, mu, logvar).data[0]
204 | if i == 0:
205 | n = min(data.size(0), 8)
206 | # for the first 128 batch of the epoch, show the first 8 input digits
207 | # with right below them the reconstructed output digits
208 | comparison = torch.cat([data[:n],
209 | recon_batch.view(BATCH_SIZE, 1, 28, 28)[:n]])
210 | save_image(comparison.data.cpu(),
211 | './mnist/reconstruction_' + str(epoch) + '.png', nrow=n)
212 |
213 | test_loss /= len(test_loader.dataset)
214 | print('====> Test set loss: {:.4f}'.format(test_loss))
215 |
216 |
217 | if __name__ == "__main__":
218 | for epoch in range(1, EPOCHS + 1):
219 | train(epoch)
220 | test(epoch)
221 |
222 | # 64 sets of random ZDIMS-float vectors, i.e. 64 locations / MNIST
223 | # digits in latent space
224 | sample = Variable(torch.randn(64, ZDIMS))
225 | if CUDA:
226 | sample = sample.cuda()
227 | sample = model.decode(sample).cpu()
228 |
229 | # save out as an 8x8 matrix of MNIST digits
230 | # this will give you a visual idea of how well latent space can generate things
231 | # that look like digits
232 | save_image(sample.data.view(64, 1, 28, 28),'./mnist/reconstruction' + str(epoch) + '.png')
--------------------------------------------------------------------------------
/VAE_CNN_Gaussianloss.py:
--------------------------------------------------------------------------------
1 | import os
2 | import torch
3 | import torch.utils.data
4 | from torch import nn, optim
5 | import torch.nn.init as init
6 | from torch.autograd import Variable
7 | from torch.nn import functional as F
8 | from torchvision import datasets, transforms
9 | from torchvision.utils import save_image
10 |
11 | os.environ['CUDA_VISIBLE_DEVICES'] = '0'
12 | # changed configuration to this instead of argparse for easier interaction
13 | CUDA = True
14 | SEED = 1
15 | BATCH_SIZE = 128
16 | LOG_INTERVAL = 10
17 | EPOCHS = 50
18 | no_of_sample = 10
19 |
20 | # connections through the autoencoder bottleneck
21 | ZDIMS = 20
22 |
23 |
24 |
25 | torch.manual_seed(SEED)
26 | if CUDA:
27 | torch.cuda.manual_seed(SEED)
28 |
29 | # DataLoader instances will load tensors directly into GPU memory
30 | kwargs = {'num_workers': 1, 'pin_memory': True} if CUDA else {}
31 |
32 | # Download or load downloaded MNIST dataset
33 | # shuffle data at every epoch
34 | train_loader = torch.utils.data.DataLoader(datasets.MNIST('./mnist', train=True, download=True,transform=transforms.ToTensor()),
35 | batch_size=BATCH_SIZE, shuffle=True, **kwargs)
36 |
37 | # Same for test data
38 | test_loader = torch.utils.data.DataLoader(datasets.MNIST('./mnist', train=False, transform=transforms.ToTensor()),
39 | batch_size=BATCH_SIZE, shuffle=True, **kwargs)
40 |
41 |
42 | class VAE(nn.Module):
43 | def __init__(self):
44 | super(VAE, self).__init__()
45 |
46 | self.conv1 = nn.Conv2d(in_channels=1, out_channels=64, kernel_size=(4, 4), padding=(15, 15),
47 | stride=2) # This padding keeps the size of the image same, i.e. same padding
48 | self.conv2 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=(4, 4), padding=(15, 15), stride=2)
49 | self.fc11 = nn.Linear(in_features=128 * 28 * 28, out_features=1024)
50 | self.fc12 = nn.Linear(in_features=1024, out_features=ZDIMS)
51 |
52 | self.fc21 = nn.Linear(in_features=128 * 28 * 28, out_features=1024)
53 | self.fc22 = nn.Linear(in_features=1024, out_features=ZDIMS)
54 | self.relu = nn.ReLU()
55 |
56 | # For decoder
57 |
58 | # For mu
59 | self.fc1 = nn.Linear(in_features=20, out_features=1024)
60 | self.fc2 = nn.Linear(in_features=1024, out_features=7 * 7 * 128)
61 | self.conv_t11 = nn.ConvTranspose2d(in_channels=128, out_channels=64, kernel_size=4, padding=1, stride=2)
62 | self.conv_t12 = nn.ConvTranspose2d(in_channels=64, out_channels=1, kernel_size=4, padding=1, stride=2)
63 |
64 | self.conv_t21 = nn.ConvTranspose2d(in_channels=128, out_channels=64, kernel_size=4, padding=1, stride=2)
65 | self.conv_t22 = nn.ConvTranspose2d(in_channels=64, out_channels=1, kernel_size=4, padding=1, stride=2)
66 |
67 | #Parameter initialization
68 | # for m in self.modules():
69 | #
70 | # if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear):
71 | # #init.xavier_normal(m.weight.data, gain=nn.init.calculate_gain('relu'))
72 | # init.xavier_uniform(m.weight.data, gain=nn.init.calculate_gain('relu'))
73 | # #init.kaiming_uniform(m.weight.data)
74 | # init.constant(m.bias, .1)
75 | #
76 | # elif isinstance(m, nn.BatchNorm2d):
77 | # m.weight.data.fill_(1)
78 | # m.bias.data.zero_()
79 |
80 | def encode(self, x: Variable) -> (Variable, Variable):
81 |
82 | x = x.view(-1, 1, 28, 28)
83 | x = F.elu(self.conv1(x))
84 | x = F.elu(self.conv2(x))
85 | x = x.view(-1, 128 * 28 * 28)
86 |
87 | mu_z = F.elu(self.fc11(x))
88 | mu_z = self.fc12(mu_z)
89 |
90 | logvar_z = F.elu(self.fc21(x))
91 | logvar_z = self.fc22(logvar_z)
92 |
93 | return mu_z, logvar_z
94 |
95 | def reparameterize(self, mu: Variable, logvar: Variable) -> list:
96 | """THE REPARAMETERIZATION IDEA:
97 |
98 | For each training sample (we get 128 batched at a time)
99 |
100 | - take the current learned mu, stddev for each of the ZDIMS
101 | dimensions and draw a random sample from that distribution
102 | - the whole network is trained so that these randomly drawn
103 | samples decode to output that looks like the input
104 | - which will mean that the std, mu will be learned
105 | *distributions* that correctly encode the inputs
106 | - due to the additional KLD term (see loss_function() below)
107 | the distribution will tend to unit Gaussians
108 |
109 | Parameters
110 | ----------
111 | mu : [128, ZDIMS] mean matrix
112 | logvar : [128, ZDIMS] variance matrix
113 |
114 | Returns
115 | -------
116 |
117 | During training random sample from the learned ZDIMS-dimensional
118 | normal distribution; during inference its mean.
119 |
120 | """
121 |
122 | if self.training:
123 | # multiply log variance with 0.5, then in-place exponent
124 | # yielding the standard deviation
125 |
126 | sample_z = []
127 | for _ in range(no_of_sample):
128 | std = logvar.mul(0.5).exp_() # type: Variable
129 | # - std.data is the [128,ZDIMS] tensor that is wrapped by std
130 | # - so eps is [128,ZDIMS] with all elements drawn from a mean 0
131 | # and stddev 1 normal distribution that is 128 samples
132 | # of random ZDIMS-float vectors
133 | eps = Variable(std.data.new(std.size()).normal_())
134 | # - sample from a normal distribution with standard
135 | # deviation = std and mean = mu by multiplying mean 0
136 | # stddev 1 sample with desired std and mu, see
137 | # https://stats.stackexchange.com/a/16338
138 | # - so we have 128 sets (the batch) of random ZDIMS-float
139 | # vectors sampled from normal distribution with learned
140 | # std and mu for the current input
141 | sample_z.append(eps.mul(std).add_(mu))
142 |
143 | return sample_z
144 |
145 | else:
146 | # During inference, we simply spit out the mean of the
147 | # learned distribution for the current input. We could
148 | # use a random sample from the distribution, but mu of
149 | # course has the highest probability.
150 | return mu
151 |
152 | def decode(self, z: Variable) -> (Variable, Variable):
153 |
154 | x = F.elu(self.fc1(z))
155 | x = F.elu(self.fc2(x))
156 | x = x.view(-1, 128, 7, 7)
157 | mu_x = F.relu(self.conv_t11(x))
158 | mu_x = F.sigmoid(self.conv_t12(mu_x))
159 |
160 | logvar_x = F.relu(self.conv_t11(x))
161 | logvar_x = F.sigmoid(self.conv_t12(logvar_x))
162 |
163 | return mu_x.view(-1, 784), logvar_x.view(-1, 784)
164 |
165 | def forward(self, x: Variable) -> (Variable, Variable, Variable):
166 | mu, logvar = self.encode(x.view(-1, 784))
167 | z = self.reparameterize(mu, logvar)
168 | if self.training:
169 | return [self.decode(z) for z in z], mu, logvar
170 | else:
171 | return self.decode(z), mu, logvar
172 |
173 | def loss_function(self, recon_x, x, mu, logvar) -> Variable:
174 | # how well do input x and output recon_x agree?
175 |
176 | if self.training:
177 | GLL = 0
178 | x = x.view(-1, 784)
179 | for recon_x_one in recon_x:
180 | mu_x, logvar_x = recon_x_one
181 | part1 = torch.sum(logvar_x) / BATCH_SIZE
182 | sigma = logvar_x.mul(0.5).exp_()
183 | part2 = torch.sum(((x - mu_x) / sigma) ** 2) / BATCH_SIZE
184 | GLL += .5 * (part1 + part2)
185 |
186 | GLL /= len(recon_x)
187 | else:
188 | x = x.view(-1, 784)
189 | mu_x, logvar_x = recon_x
190 | part1 = torch.sum(logvar_x) / BATCH_SIZE
191 | sigma = logvar_x.mul(0.5).exp_()
192 | part2 = torch.sum(((x - mu_x) / sigma) ** 2) / BATCH_SIZE
193 | GLL = .5 * (part1 + part2)
194 |
195 |
196 |
197 | # see Appendix B from VAE paper:
198 | # Kingma and Welling. Auto-Encoding Variational Bayes. ICLR, 2014
199 | # https://arxiv.org/abs/1312.6114
200 | # - D_{KL} = 0.5 * sum(1 + log(sigma^2) - mu^2 - sigma^2)
201 | # note the negative D_{KL} in appendix B of the paper
202 | KLD = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp())
203 | # Normalise by same number of elements as in reconstruction
204 | KLD /= BATCH_SIZE
205 |
206 |
207 | return GLL + KLD
208 |
209 |
210 | model = VAE()
211 | if CUDA:
212 | model.cuda()
213 |
214 | # Dr Diederik Kingma: as if VAEs weren't enough, he also gave us Adam!
215 | optimizer = optim.Adam(model.parameters(), lr=1e-3)
216 |
217 |
218 | def train(epoch):
219 | # toggle model to train mode
220 | model.train()
221 | train_loss = 0
222 | # in the case of MNIST, len(train_loader.dataset) is 60000
223 | # each `data` is of BATCH_SIZE samples and has shape [128, 1, 28, 28]
224 | for batch_idx, (data, _) in enumerate(train_loader):
225 | data = Variable(data)
226 | if CUDA:
227 | data = data.cuda()
228 | optimizer.zero_grad()
229 |
230 | # push whole batch of data through VAE.forward() to get recon_loss
231 | recon_batch, mu, logvar = model(data)
232 | # calculate scalar loss
233 | loss = model.loss_function(recon_batch, data, mu, logvar)
234 | # calculate the gradient of the loss w.r.t. the graph leaves
235 | # i.e. input variables -- by the power of pytorch!
236 | loss.backward()
237 | train_loss += loss.data[0]
238 | optimizer.step()
239 | if batch_idx % LOG_INTERVAL == 0:
240 | print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
241 | epoch, batch_idx * len(data), len(train_loader.dataset),
242 | 100. * batch_idx / len(train_loader),
243 | loss.data[0] / len(data)))
244 |
245 | print('====> Epoch: {} Average loss: {:.4f}'.format(epoch, train_loss / len(train_loader.dataset)))
246 |
247 |
248 | def test(epoch):
249 | # toggle model to test / inference mode
250 | model.eval()
251 | test_loss = 0
252 |
253 | # each data is of BATCH_SIZE (default 128) samples
254 | for i, (data, _) in enumerate(test_loader):
255 | if CUDA:
256 | # make sure this lives on the GPU
257 | data = data.cuda()
258 |
259 | # we're only going to infer, so no autograd at all required: volatile=True
260 | data = Variable(data, volatile=True)
261 | recon_batch, mu, logvar = model(data)
262 | test_loss += model.loss_function(recon_batch, data, mu, logvar).data[0]
263 |
264 | mu_batch, _ = recon_batch
265 | if i == 0:
266 | n = min(data.size(0), 8)
267 | # for the first 128 batch of the epoch, show the first 8 input digits
268 | # with right below them the reconstructed output digits
269 | comparison = torch.cat([data[:n],
270 | mu_batch.view(BATCH_SIZE, 1, 28, 28)[:n]])
271 | save_image(comparison.data.cpu(),
272 | '/home/atin/data/new/results_gaussian/reconstruction_' + str(epoch) + '.png', nrow=n)
273 |
274 | test_loss /= len(test_loader.dataset)
275 | print('====> Test set loss: {:.4f}'.format(test_loss))
276 |
277 |
278 | if __name__ == "__main__":
279 |
280 | for epoch in range(1, EPOCHS + 1):
281 | train(epoch)
282 | test(epoch)
283 |
284 | # 64 sets of random ZDIMS-float vectors, i.e. 64 locations / MNIST
285 | # digits in latent space
286 | sample = Variable(torch.randn(64, ZDIMS))
287 | if CUDA:
288 | sample = sample.cuda()
289 | mu_sample, sigma_sample = model.decode(sample)
290 | # sample = model.decode(sample).cpu()
291 |
292 | # save out as an 8x8 matrix of MNIST digits
293 | # this will give you a visual idea of how well latent space can generate things
294 | # that look like digits
295 | save_image(mu_sample.cpu().data.view(64, 1, 28, 28),'./mnist/reconstruction' + str(epoch) + '.png')
--------------------------------------------------------------------------------
/VAE_celeba.py:
--------------------------------------------------------------------------------
1 | #Create a folder called celeba in home dir where reconstructed images will be stored
2 | #Considered only 100000 images for training
3 |
4 | import os
5 | import torch
6 | import torch.utils.data
7 | from torch import nn, optim
8 | from torch.autograd import Variable
9 | from torch.nn import functional as F
10 | from torchvision import datasets, transforms
11 | from torchvision.utils import save_image
12 | from torch.utils.data import Dataset, DataLoader
13 | from PIL import Image
14 | import numpy as np
15 | import matplotlib.pyplot as plt
16 | from scipy.misc import imresize
17 |
18 | import sys
19 | import warnings
20 | if not sys.warnoptions:
21 | warnings.simplefilter("ignore")
22 | os.environ['CUDA_VISIBLE_DEVICES'] = '1'
23 | no_of_sample = 10
24 | CUDA = True
25 | BATCH_SIZE = 32
26 | LOG_INTERVAL = 5
27 |
28 |
29 | class CelebaDataset(Dataset):
30 |
31 | def __init__(self, root_dir, im_name_list, resize_dim, transform=None):
32 | self.root_dir = root_dir
33 | self.im_list = im_name_list
34 | self.resize_dim = resize_dim
35 | self.transform = transform
36 |
37 | def __len__(self):
38 | return len(self.im_list)
39 |
40 | def __getitem__(self, idx):
41 | im = Image.open(os.path.join(self.root_dir, self.im_list[idx]))
42 | im = np.array(im)
43 | im = imresize(im, self.resize_dim, interp='nearest')
44 | im = im / 255
45 |
46 | if self.transform:
47 | im = self.transform(im)
48 |
49 | return im
50 |
51 | class ToTensor(object):
52 | """Convert ndarrays in sample to Tensors. numpy image: H x W x C, torch image: C X H X W
53 | """
54 |
55 | def __call__(self, image, invert_arrays=True):
56 |
57 | if invert_arrays:
58 | image = image.transpose((2, 0, 1))
59 |
60 | return torch.from_numpy(image)
61 |
62 |
63 | class Conv_Block(nn.Module):
64 | def __init__(self, in_channels, out_channels, kernel_size, padding, stride, pool_kernel_size=(2, 2)):
65 | super(Conv_Block, self).__init__()
66 | self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size, padding, stride)
67 | self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size, padding, stride)
68 | self.pool = nn.MaxPool2d(pool_kernel_size)
69 |
70 | def forward(self, x):
71 | x = F.elu(self.conv1(x))
72 | x = F.elu(self.conv2(x))
73 | x = self.pool(x)
74 |
75 | return x
76 |
77 |
78 | class VAE(nn.Module):
79 | def __init__(self):
80 | super(VAE, self).__init__()
81 | # Encoder
82 | self.block1 = Conv_Block(3, 64, (3, 3), 1, 1) # 64
83 | self.block2 = Conv_Block(64, 128, (3, 3), 1, 1) # 32
84 | self.block3 = Conv_Block(128, 256, (3, 3), 1, 1) # 16
85 | self.block4 = Conv_Block(256, 32, (3, 3), 1, 1) # 8
86 |
87 | # Decoder
88 | self.fct_decode = nn.Sequential(
89 | nn.Conv2d(16, 64, (3, 3), padding=1),
90 | nn.ELU(),
91 | nn.Upsample(scale_factor=2, mode='nearest'), # 16
92 | nn.Conv2d(64, 64, (3, 3), padding=1),
93 | nn.ELU(),
94 | nn.Upsample(scale_factor=2, mode='nearest'), # 32
95 | nn.Conv2d(64, 64, (3, 3), padding=1),
96 | nn.ELU(),
97 | nn.Upsample(scale_factor=2, mode='nearest'), # 64
98 | nn.Conv2d(64, 16, (3, 3), padding=1),
99 | nn.ELU(),
100 | nn.Upsample(scale_factor=2, mode='nearest'), # 128
101 | )
102 |
103 | self.final_decod_mean = nn.Conv2d(16, 3, (3, 3), padding=1)
104 |
105 | def encode(self, x):
106 | '''return mu_z and logvar_z'''
107 |
108 | x = F.elu(self.block1(x))
109 | x = F.elu(self.block2(x))
110 | x = F.elu(self.block3(x))
111 | x = F.elu(self.block4(x))
112 |
113 | return x[:, :16, :, :], x[:, 16:, :, :] # output shape - batch_size x 16 x 8 x 8
114 |
115 | def reparameterize(self, mu: Variable, logvar: Variable) -> Variable:
116 |
117 | if self.training:
118 | # multiply log variance with 0.5, then in-place exponent
119 | # yielding the standard deviation
120 |
121 | sample_z = []
122 | for _ in range(no_of_sample):
123 | std = logvar.mul(0.5).exp_() # type: Variable
124 | eps = Variable(std.data.new(std.size()).normal_())
125 | sample_z.append(eps.mul(std).add_(mu))
126 |
127 | return sample_z
128 |
129 | else:
130 | return mu
131 |
132 | def decode(self, z):
133 |
134 | z = self.fct_decode(z)
135 | z = self.final_decod_mean(z)
136 | z = F.sigmoid(z)
137 |
138 | return z.view(-1, 3 * 128 * 128)
139 |
140 | def forward(self, x):
141 | mu, logvar = self.encode(x)
142 | z = self.reparameterize(mu, logvar)
143 | if self.training:
144 | return [self.decode(z) for z in z], mu, logvar
145 | else:
146 | return self.decode(z), mu, logvar
147 |
148 | def loss_function(self, recon_x, x, mu, logvar) -> Variable:
149 | # how well do input x and output recon_x agree?
150 |
151 | if self.training:
152 | BCE = 0
153 | for recon_x_one in recon_x:
154 | BCE += F.binary_cross_entropy(recon_x_one, x.view(-1, 3 * 128 * 128))
155 | BCE /= len(recon_x)
156 | else:
157 | BCE = F.binary_cross_entropy(recon_x, x.view(-1, 3 * 128 * 128))
158 |
159 | KLD = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp())
160 | KLD /= BATCH_SIZE * 3 * 128 * 128
161 |
162 | return BCE + KLD
163 |
164 |
165 | def train(epoch, model, optimizer, train_loader):
166 | # toggle model to train mode
167 | model.train()
168 | train_loss = 0
169 | # in the case of MNIST, len(train_loader.dataset) is 60000
170 | # each `data` is of BATCH_SIZE samples and has shape [128, 1, 28, 28]
171 | for batch_idx, data in enumerate(train_loader):
172 | data = Variable(data.type(torch.FloatTensor))
173 | if CUDA:
174 | data = data.cuda()
175 | optimizer.zero_grad()
176 |
177 | # push whole batch of data through VAE.forward() to get recon_loss
178 | recon_batch, mu, logvar = model(data)
179 | # calculate scalar loss
180 | loss = model.loss_function(recon_batch, data, mu, logvar)
181 | # calculate the gradient of the loss w.r.t. the graph leaves
182 | # i.e. input variables -- by the power of pytorch!
183 | loss.backward()
184 | train_loss += loss.data[0]
185 | optimizer.step()
186 | if batch_idx % LOG_INTERVAL == 0:
187 | print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(epoch, batch_idx * len(data),
188 | len(train_loader.dataset),
189 | 100. * batch_idx / len(train_loader),
190 | loss.data[0] / len(data)))
191 |
192 | print('====> Epoch: {} Average loss: {:.4f}'.format(epoch, train_loss / len(train_loader.dataset)))
193 |
194 |
195 | def test(epoch, model, test_loader):
196 | model.eval()
197 | test_loss = 0
198 |
199 | # each data is of BATCH_SIZE (default 128) samples
200 | for i, data in enumerate(test_loader):
201 | data = Variable(data.type(torch.FloatTensor), volatile=True)
202 | if CUDA:
203 | # make sure this lives on the GPU
204 | data = data.cuda()
205 |
206 | # we're only going to infer, so no autograd at all required: volatile=True
207 |
208 | recon_batch, mu, logvar = model(data)
209 | test_loss += model.loss_function(recon_batch, data, mu, logvar).data[0]
210 | if i == 0:
211 | n = min(data.size(0), 8)
212 | # for the first 128 batch of the epoch, show the first 8 input digits
213 | # with right below them the reconstructed output digits
214 | comparison = torch.cat([data[:n],
215 | recon_batch.view(BATCH_SIZE, 3, 128, 128)[:n]])
216 | save_image(comparison.data.cpu(),
217 | './celeba/reconstruction_' + str(epoch) + '.png', nrow=n)
218 |
219 | # break #To save time
220 |
221 | test_loss /= len(test_loader.dataset)
222 | print('====> Test set loss: {:.4f}'.format(test_loss))
223 |
224 | if __name__ == "__main__":
225 |
226 | root_dir = "/home/atin/DeployedProjects/TestProject/img_align_celeba"
227 | image_files = os.listdir(root_dir)
228 | train_dataset = CelebaDataset(root_dir, image_files[:100000], (128, 128), transforms.Compose([ToTensor()]))
229 | train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, num_workers=10, shuffle=True)
230 |
231 | #Take only 1000 images in test
232 | test_dataset = CelebaDataset(root_dir, image_files[100000:101000], (128, 128), transforms.Compose([ToTensor()]))
233 | test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, num_workers=10, shuffle=True)
234 |
235 | EPOCHS = 10
236 | model = VAE()
237 | if CUDA: model.cuda()
238 | optimizer = optim.Adam(model.parameters(), lr=1e-3)
239 |
240 | for epoch in range(1, EPOCHS + 1):
241 | train(epoch, model, optimizer, train_loader)
242 | test(epoch, model, test_loader)
243 |
244 | # 64 sets of random ZDIMS-float vectors, i.e. 64 locations / MNIST
245 | # digits in latent space
246 | sample = Variable(torch.randn(64, 16, 8, 8))
247 | if CUDA:
248 | sample = sample.cuda()
249 | sample = model.decode(sample).cpu()
250 |
251 | # save out as an 8x8 matrix of MNIST digits
252 | # this will give you a visual idea of how well latent space can generate things
253 | # that look like digits
254 | save_image(sample.data.view(64, 3, 128, 128), './celeba/reconstruction' + str(epoch) + '.png')
255 |
256 |
257 |
258 |
259 |
260 |
261 |
--------------------------------------------------------------------------------
/VAE_facebook.py:
--------------------------------------------------------------------------------
1 | import os
2 | import torch
3 | import torch.utils.data
4 | from torch import nn, optim
5 | from torch.autograd import Variable
6 | from torch.nn import functional as F
7 | from torchvision import datasets, transforms
8 | from torchvision.utils import save_image
9 |
10 | # changed configuration to this instead of argparse for easier interaction
11 | CUDA = False
12 | SEED = 1
13 | BATCH_SIZE = 128
14 | LOG_INTERVAL = 10
15 | EPOCHS = 10
16 |
17 | # connections through the autoencoder bottleneck
18 | # in the pytorch VAE example, this is 20
19 | ZDIMS = 20
20 |
21 | # I do this so that the MNIST dataset is downloaded where I want it
22 | #os.chdir("/home/atin/")
23 |
24 | torch.manual_seed(SEED)
25 | if CUDA:
26 | torch.cuda.manual_seed(SEED)
27 |
28 | # DataLoader instances will load tensors directly into GPU memory
29 | kwargs = {'num_workers': 1, 'pin_memory': True} if CUDA else {}
30 |
31 | # Download or load downloaded MNIST dataset
32 | # shuffle data at every epoch
33 | train_loader = torch.utils.data.DataLoader(
34 | datasets.MNIST('../data', train=True, download=True,
35 | transform=transforms.ToTensor()),
36 | batch_size=BATCH_SIZE, shuffle=True, **kwargs)
37 |
38 | # Same for test data
39 | test_loader = torch.utils.data.DataLoader(
40 | datasets.MNIST('../data', train=False, transform=transforms.ToTensor()),
41 | batch_size=BATCH_SIZE, shuffle=True, **kwargs)
42 |
43 |
44 | class VAE(nn.Module):
45 | def __init__(self):
46 | super(VAE, self).__init__()
47 |
48 | # ENCODER
49 | # 28 x 28 pixels = 784 input pixels, 400 outputs
50 | self.fc1 = nn.Linear(784, 400)
51 | # rectified linear unit layer from 400 to 400
52 | # max(0, x)
53 | self.relu = nn.ReLU()
54 | self.fc21 = nn.Linear(400, ZDIMS) # mu layer
55 | self.fc22 = nn.Linear(400, ZDIMS) # logvariance layer
56 | # this last layer bottlenecks through ZDIMS connections
57 |
58 | # DECODER
59 | # from bottleneck to hidden 400
60 | self.fc3 = nn.Linear(ZDIMS, 400)
61 | # from hidden 400 to 784 outputs
62 | self.fc4 = nn.Linear(400, 784)
63 | self.sigmoid = nn.Sigmoid()
64 |
65 | def encode(self, x: Variable) -> (Variable, Variable):
66 | """Input vector x -> fully connected 1 -> ReLU -> (fully connected
67 | 21, fully connected 22)
68 |
69 | Parameters
70 | ----------
71 | x : [128, 784] matrix; 128 digits of 28x28 pixels each
72 |
73 | Returns
74 | -------
75 |
76 | (mu, logvar) : ZDIMS mean units one for each latent dimension, ZDIMS
77 | variance units one for each latent dimension
78 |
79 | """
80 |
81 | # h1 is [128, 400]
82 | h1 = self.relu(self.fc1(x)) # type: Variable
83 | return self.fc21(h1), self.fc22(h1)
84 |
85 | def reparameterize(self, mu: Variable, logvar: Variable) -> Variable:
86 | """THE REPARAMETERIZATION IDEA:
87 |
88 | For each training sample (we get 128 batched at a time)
89 |
90 | - take the current learned mu, stddev for each of the ZDIMS
91 | dimensions and draw a random sample from that distribution
92 | - the whole network is trained so that these randomly drawn
93 | samples decode to output that looks like the input
94 | - which will mean that the std, mu will be learned
95 | *distributions* that correctly encode the inputs
96 | - due to the additional KLD term (see loss_function() below)
97 | the distribution will tend to unit Gaussians
98 |
99 | Parameters
100 | ----------
101 | mu : [128, ZDIMS] mean matrix
102 | logvar : [128, ZDIMS] variance matrix
103 |
104 | Returns
105 | -------
106 |
107 | During training random sample from the learned ZDIMS-dimensional
108 | normal distribution; during inference its mean.
109 |
110 | """
111 |
112 | if self.training:
113 | # multiply log variance with 0.5, then in-place exponent
114 | # yielding the standard deviation
115 | std = logvar.mul(0.5).exp_() # type: Variable
116 | # - std.data is the [128,ZDIMS] tensor that is wrapped by std
117 | # - so eps is [128,ZDIMS] with all elements drawn from a mean 0
118 | # and stddev 1 normal distribution that is 128 samples
119 | # of random ZDIMS-float vectors
120 | eps = Variable(std.data.new(std.size()).normal_())
121 | # - sample from a normal distribution with standard
122 | # deviation = std and mean = mu by multiplying mean 0
123 | # stddev 1 sample with desired std and mu, see
124 | # https://stats.stackexchange.com/a/16338
125 | # - so we have 128 sets (the batch) of random ZDIMS-float
126 | # vectors sampled from normal distribution with learned
127 | # std and mu for the current input
128 | return eps.mul(std).add_(mu)
129 |
130 | else:
131 | # During inference, we simply spit out the mean of the
132 | # learned distribution for the current input. We could
133 | # use a random sample from the distribution, but mu of
134 | # course has the highest probability.
135 | return mu
136 |
137 | def decode(self, z: Variable) -> Variable:
138 | h3 = self.relu(self.fc3(z))
139 | return self.sigmoid(self.fc4(h3))
140 |
141 | def forward(self, x: Variable) -> (Variable, Variable, Variable):
142 | mu, logvar = self.encode(x.view(-1, 784))
143 | z = self.reparameterize(mu, logvar)
144 | return self.decode(z), mu, logvar
145 |
146 |
147 | model = VAE()
148 | if CUDA:
149 | model.cuda()
150 |
151 |
152 | def loss_function(recon_x, x, mu, logvar) -> Variable:
153 | # how well do input x and output recon_x agree?
154 | BCE = F.binary_cross_entropy(recon_x, x.view(-1, 784))
155 |
156 | # KLD is Kullback–Leibler divergence -- how much does one learned
157 | # distribution deviate from another, in this specific case the
158 | # learned distribution from the unit Gaussian
159 |
160 | # see Appendix B from VAE paper:
161 | # Kingma and Welling. Auto-Encoding Variational Bayes. ICLR, 2014
162 | # https://arxiv.org/abs/1312.6114
163 | # - D_{KL} = 0.5 * sum(1 + log(sigma^2) - mu^2 - sigma^2)
164 | # note the negative D_{KL} in appendix B of the paper
165 | KLD = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp())
166 | # Normalise by same number of elements as in reconstruction
167 | KLD /= BATCH_SIZE * 784
168 |
169 | # BCE tries to make our reconstruction as accurate as possible
170 | # KLD tries to push the distributions as close as possible to unit Gaussian
171 | return BCE + KLD
172 |
173 | # Dr Diederik Kingma: as if VAEs weren't enough, he also gave us Adam!
174 | optimizer = optim.Adam(model.parameters(), lr=1e-3)
175 |
176 |
177 | def train(epoch):
178 | # toggle model to train mode
179 | model.train()
180 | train_loss = 0
181 | # in the case of MNIST, len(train_loader.dataset) is 60000
182 | # each `data` is of BATCH_SIZE samples and has shape [128, 1, 28, 28]
183 | for batch_idx, (data, _) in enumerate(train_loader):
184 | data = Variable(data)
185 | if CUDA:
186 | data = data.cuda()
187 | optimizer.zero_grad()
188 |
189 | # push whole batch of data through VAE.forward() to get recon_loss
190 | recon_batch, mu, logvar = model(data)
191 | # calculate scalar loss
192 | loss = loss_function(recon_batch, data, mu, logvar)
193 | # calculate the gradient of the loss w.r.t. the graph leaves
194 | # i.e. input variables -- by the power of pytorch!
195 | loss.backward()
196 | train_loss += loss.data[0]
197 | optimizer.step()
198 | if batch_idx % LOG_INTERVAL == 0:
199 | print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
200 | epoch, batch_idx * len(data), len(train_loader.dataset),
201 | 100. * batch_idx / len(train_loader),
202 | loss.data[0] / len(data)))
203 |
204 | print('====> Epoch: {} Average loss: {:.4f}'.format(
205 | epoch, train_loss / len(train_loader.dataset)))
206 |
207 |
208 | def test(epoch):
209 | # toggle model to test / inference mode
210 | model.eval()
211 | test_loss = 0
212 |
213 | # each data is of BATCH_SIZE (default 128) samples
214 | for i, (data, _) in enumerate(test_loader):
215 | if CUDA:
216 | # make sure this lives on the GPU
217 | data = data.cuda()
218 |
219 | # we're only going to infer, so no autograd at all required: volatile=True
220 | data = Variable(data, volatile=True)
221 | recon_batch, mu, logvar = model(data)
222 | test_loss += loss_function(recon_batch, data, mu, logvar).data[0]
223 | if i == 0:
224 | n = min(data.size(0), 8)
225 | # for the first 128 batch of the epoch, show the first 8 input digits
226 | # with right below them the reconstructed output digits
227 | comparison = torch.cat([data[:n],
228 | recon_batch.view(BATCH_SIZE, 1, 28, 28)[:n]])
229 | save_image(comparison.data.cpu(),
230 | 'results/reconstruction_' + str(epoch) + '.png', nrow=n)
231 |
232 | test_loss /= len(test_loader.dataset)
233 | print('====> Test set loss: {:.4f}'.format(test_loss))
234 |
235 |
236 |
237 |
238 | if __name__ =="__main__":
239 |
240 | for epoch in range(1, EPOCHS + 1):
241 | train(epoch)
242 | test(epoch)
243 |
244 | # 64 sets of random ZDIMS-float vectors, i.e. 64 locations / MNIST
245 | # digits in latent space
246 | sample = Variable(torch.randn(64, ZDIMS))
247 | if CUDA:
248 | sample = sample.cuda()
249 | sample = model.decode(sample).cpu()
250 |
251 | # save out as an 8x8 matrix of MNIST digits
252 | # this will give you a visual idea of how well latent space can generate things
253 | # that look like digits
254 | save_image(sample.data.view(64, 1, 28, 28),
255 | 'results/sample_' + str(epoch) + '.png')
256 |
257 |
258 | # from __future__ import print_function
259 | # import argparse
260 | # import torch
261 | # import torch.utils.data
262 | # from torch import nn, optim
263 | # from torch.autograd import Variable
264 | # from torch.nn import functional as F
265 | # from torchvision import datasets, transforms
266 | # from torchvision.utils import save_image
267 | #
268 | #
269 | # parser = argparse.ArgumentParser(description='VAE MNIST Example')
270 | # parser.add_argument('--batch-size', type=int, default=128, metavar='N',
271 | # help='input batch size for training (default: 128)')
272 | # parser.add_argument('--epochs', type=int, default=10, metavar='N',
273 | # help='number of epochs to train (default: 10)')
274 | # parser.add_argument('--no-cuda', action='store_true', default=False,
275 | # help='enables CUDA training')
276 | # parser.add_argument('--seed', type=int, default=1, metavar='S',
277 | # help='random seed (default: 1)')
278 | # parser.add_argument('--log-interval', type=int, default=10, metavar='N',
279 | # help='how many batches to wait before logging training status')
280 | # args = parser.parse_args()
281 | # args.cuda = not args.no_cuda and torch.cuda.is_available()
282 | #
283 | #
284 | # torch.manual_seed(args.seed)
285 | # if args.cuda:
286 | # torch.cuda.manual_seed(args.seed)
287 | #
288 | #
289 | # kwargs = {'num_workers': 1, 'pin_memory': True} if args.cuda else {}
290 | # train_loader = torch.utils.data.DataLoader(
291 | # datasets.MNIST('../data', train=True, download=True,
292 | # transform=transforms.ToTensor()),
293 | # batch_size=args.batch_size, shuffle=True, **kwargs)
294 | # test_loader = torch.utils.data.DataLoader(
295 | # datasets.MNIST('../data', train=False, transform=transforms.ToTensor()),
296 | # batch_size=args.batch_size, shuffle=True, **kwargs)
297 | #
298 | #
299 | # class VAE(nn.Module):
300 | # def __init__(self):
301 | # super(VAE, self).__init__()
302 | #
303 | # self.fc1 = nn.Linear(784, 400)
304 | # self.fc21 = nn.Linear(400, 20)
305 | # self.fc22 = nn.Linear(400, 20)
306 | #
307 | #
308 | # self.fc3 = nn.Linear(20, 400)
309 | # self.fc4 = nn.Linear(400, 784)
310 | #
311 | # self.relu = nn.ReLU()
312 | # self.sigmoid = nn.Sigmoid()
313 | #
314 | # def encode(self, x):
315 | # h1 = self.relu(self.fc1(x))
316 | # return self.fc21(h1), self.fc22(h1)
317 | #
318 | # def reparameterize(self, mu, logvar):
319 | # if self.training:
320 | # std = logvar.mul(0.5).exp_()
321 | # eps = Variable(std.data.new(std.size()).normal_())
322 | # return eps.mul(std).add_(mu)
323 | # else:
324 | # return mu
325 | #
326 | # def decode(self, z):
327 | # h3 = self.relu(self.fc3(z))
328 | # return self.sigmoid(self.fc4(h3))
329 | #
330 | # def forward(self, x):
331 | # mu, logvar = self.encode(x.view(-1, 784))
332 | # z = self.reparameterize(mu, logvar)
333 | # return self.decode(z), mu, logvar
334 | #
335 | #
336 | # model = VAE()
337 | # if args.cuda:
338 | # model.cuda()
339 | # optimizer = optim.Adam(model.parameters(), lr=1e-3)
340 | #
341 | # # Reconstruction + KL divergence losses summed over all elements and batch
342 | # def loss_function(recon_x, x, mu, logvar):
343 | # BCE = F.binary_cross_entropy(recon_x, x.view(-1, 784), size_average=False)
344 | #
345 | # # see Appendix B from VAE paper:
346 | # # Kingma and Welling. Auto-Encoding Variational Bayes. ICLR, 2014
347 | # # https://arxiv.org/abs/1312.6114
348 | # # 0.5 * sum(1 + log(sigma^2) - mu^2 - sigma^2)
349 | # KLD = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp())
350 | #
351 | # return BCE + KLD
352 | #
353 | #
354 | # def train(epoch):
355 | # model.train()
356 | # train_loss = 0
357 | # for batch_idx, (data, _) in enumerate(train_loader):
358 | # data = Variable(data)
359 | # if args.cuda:
360 | # data = data.cuda()
361 | # optimizer.zero_grad()
362 | # recon_batch, mu, logvar = model(data)
363 | # loss = loss_function(recon_batch, data, mu, logvar)
364 | # loss.backward()
365 | # train_loss += loss.data[0]
366 | # optimizer.step()
367 | # if batch_idx % args.log_interval == 0:
368 | # print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
369 | # epoch, batch_idx * len(data), len(train_loader.dataset),
370 | # 100. * batch_idx / len(train_loader),
371 | # loss.data[0] / len(data)))
372 | #
373 | # print('====> Epoch: {} Average loss: {:.4f}'.format(
374 | # epoch, train_loss / len(train_loader.dataset)))
375 | #
376 | #
377 | # def test(epoch):
378 | # model.eval()
379 | # test_loss = 0
380 | # for i, (data, _) in enumerate(test_loader):
381 | # if args.cuda:
382 | # data = data.cuda()
383 | # data = Variable(data, volatile=True)
384 | # recon_batch, mu, logvar = model(data)
385 | # test_loss += loss_function(recon_batch, data, mu, logvar).data[0]
386 | # if i == 0:
387 | # n = min(data.size(0), 8)
388 | # comparison = torch.cat([data[:n],
389 | # recon_batch.view(args.batch_size, 1, 28, 28)[:n]])
390 | # save_image(comparison.data.cpu(),
391 | # 'results/reconstruction_' + str(epoch) + '.png', nrow=n)
392 | #
393 | # test_loss /= len(test_loader.dataset)
394 | # print('====> Test set loss: {:.4f}'.format(test_loss))
395 | #
396 | #
397 | # for epoch in range(1, args.epochs + 1):
398 | # train(epoch)
399 | # test(epoch)
400 | # sample = Variable(torch.randn(64, 20))
401 | # if args.cuda:
402 | # sample = sample.cuda()
403 | # sample = model.decode(sample).cpu()
404 | # save_image(sample.data.view(64, 1, 28, 28),
405 | # 'results/sample_' + str(epoch) + '.png')
--------------------------------------------------------------------------------
/VAE_fb_modified.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.utils.data
3 | from torch import nn, optim
4 | from torch.autograd import Variable
5 | from torch.nn import functional as F
6 | from torchvision import datasets, transforms
7 | import numpy as np
8 | from torchvision.utils import save_image
9 | import os
10 |
11 | os.environ['CUDA_VISIBLE_DEVICES'] = "2"
12 |
13 | CUDA = False
14 | batch_size = 16
15 | z_dim = 20
16 | no_of_sample = 1000
17 |
18 |
19 | # kwargs = {'num_workers': 1, 'pin_memory': True} if args.cuda else {}
20 |
21 | class VAE(nn.Module):
22 | def __init__(self):
23 | super(VAE, self).__init__()
24 |
25 | # ENCODER
26 | # 28 x 28 pixels = 784 input pixels, 400 outputs
27 | self.fc1 = nn.Linear(784, 400)
28 | # rectified linear unit layer from 400 to 400
29 | # max(0, x)
30 | self.relu = nn.ReLU()
31 | self.fc21 = nn.Linear(400, z_dim) # mu layer
32 | self.fc22 = nn.Linear(400, z_dim) # logvariance layer
33 | # this last layer bottlenecks through ZDIMS connections
34 |
35 | # DECODER
36 | # from bottleneck to hidden 400
37 | self.fc3 = nn.Linear(z_dim, 400)
38 | # from hidden 400 to 784 outputs
39 | self.fc4 = nn.Linear(400, 784)
40 | self.sigmoid = nn.Sigmoid()
41 |
42 | def encode(self, x):
43 | '''
44 | :param x: here x is an image, can be any tensor
45 | :return: 2 tensors of size [N,z_dim=20] where first one is mu and second one is logvar
46 | '''
47 |
48 | h1 = self.relu(self.fc1(x)) # type: Variable
49 | return self.fc21(h1), self.fc22(h1)
50 |
51 | def reparametrized_sample(self, parameter_z, no_of_sample):
52 | '''
53 |
54 | :param z:
55 | :param no_of_sample: no of monte carlo sample
56 | :return: torch of size [N,no_of_sample,z_dim=20]
57 | '''
58 | if CUDA:
59 | standard_normal_sample = Variable(torch.randn(batch_size, no_of_sample, z_dim).cuda())
60 | else:
61 | standard_normal_sample = Variable(torch.randn(batch_size, no_of_sample, z_dim))
62 |
63 | mu_z, logvar_z = parameter_z
64 | mu_z = mu_z.unsqueeze(1)
65 | sigma = logvar_z.mul(.5).exp()
66 | # sigma =.5*logvar_z.exp()
67 |
68 | sigma = sigma.unsqueeze(1)
69 | final_sample = mu_z + sigma * standard_normal_sample
70 |
71 | return final_sample
72 |
73 | def decode(self, z):
74 | h3 = self.relu(self.fc3(z))
75 | return self.sigmoid(self.fc4(h3))
76 |
77 | # x = F.elu(self.fc1(z))
78 | # x = F.elu(self.fc2(x))
79 | # x = x.view(-1,128,7,7)
80 | # x = F.relu(self.conv_t1(x))
81 | # x = F.sigmoid(self.conv_t2(x))
82 |
83 | # return x
84 | # mu_x = x.view(-1,28*28)
85 | #
86 | # logvar_x = F.elu(self.fc3(z))
87 | # logvar_x = F.softmax(self.fc4(logvar_x))
88 | #
89 | # return mu_x, logvar_x
90 |
91 | def log_density(self):
92 | pass
93 |
94 | def forward(self, x):
95 | '''
96 |
97 | :param x: input image
98 | :return: array of length = batch size, each element is a tuple of 2 elemets of size [no_of_sample=1000,28*28 (for MNIST)], corresponding to mu and logvar
99 | '''
100 | parameter_z = self.encode(x)
101 | sample_z = self.reparametrized_sample(parameter_z, no_of_sample)
102 | x = [self.decode(obs) for obs in sample_z]
103 |
104 | return parameter_z, x
105 |
106 |
107 | def loss_VAE(train_x, paramter_z, predicted_x):
108 | mu_z, logvar_z = paramter_z
109 | # Kullback Liebler Divergence
110 | negative_KLD = 0.5 * torch.sum(1 + logvar_z - mu_z.pow(2) - logvar_z.exp(), 1) # mu_z.size()=[batch_size, 28*28]
111 | # negative_KLD /=784
112 |
113 | # nll
114 | train_x_flattened = train_x.view(-1, 28 * 28)
115 | if CUDA:
116 | nll = Variable(torch.FloatTensor(batch_size).zero_().cuda())
117 | else:
118 | nll = Variable(torch.FloatTensor(batch_size).zero_())
119 |
120 | i = 0
121 | for x in train_x_flattened:
122 | predicted = predicted_x[i]
123 | predicted = predicted.view(-1, 784)
124 |
125 | sum = 0
126 | for pred in predicted:
127 | sum += F.binary_cross_entropy(pred, x, size_average=False)
128 |
129 | nll[i] = sum / no_of_sample # Monte carlo step
130 | i += 1
131 |
132 | final_loss = -negative_KLD + nll
133 | final_loss = torch.mean(final_loss)
134 |
135 | return final_loss
136 |
137 |
138 | def train(epoch, model, trainloader, optimizer):
139 | model.train()
140 |
141 | train_loss = 0
142 | count = 0
143 | for batch_id, data in enumerate(train_loader):
144 |
145 | train_x, _ = data
146 | count += train_x.size(0)
147 |
148 | if CUDA:
149 | train_x = Variable(train_x.type(torch.FloatTensor).cuda())
150 | else:
151 | train_x = Variable(train_x.type(torch.FloatTensor))
152 |
153 | train_x = train_x.view(-1, 784)
154 | paramter_z, predicted_x = model(train_x)
155 |
156 | loss = loss_VAE(train_x, paramter_z, predicted_x)
157 | train_loss += loss.data[0]
158 |
159 | loss.backward()
160 | optimizer.step()
161 |
162 | if batch_id % 50 == 0:
163 | print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
164 | epoch, batch_id * len(data), len(train_loader.dataset), 100. * batch_id / len(train_loader),
165 | loss.data[0]))
166 |
167 | train_loss /= count
168 | print('\nTrain set: Average loss: {:.4f}'.format(train_loss))
169 |
170 |
171 | if __name__ == "__main__":
172 | train_loader = torch.utils.data.DataLoader(
173 | datasets.MNIST('../data', train=True, download=True,
174 | transform=transforms.ToTensor()),
175 | batch_size=batch_size, shuffle=True)
176 | test_loader = torch.utils.data.DataLoader(
177 | datasets.MNIST('../data', train=False, transform=transforms.ToTensor()),
178 | batch_size=batch_size, shuffle=True)
179 |
180 | model = VAE()
181 | model_parameters = filter(lambda p: p.requires_grad, model.parameters())
182 | nb_params = sum([np.prod(p.size()) for p in model_parameters])
183 | print("no. of trainable parametes is: {}".format((nb_params)))
184 | #model.cuda()
185 |
186 |
187 | optimizer = optim.Adam(model.parameters(), lr=.001)
188 |
189 | nb_epoch = 2
190 | for epoch in range(1, nb_epoch + 1):
191 | train(epoch, model, train_loader, optimizer)
--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | import numpy as np
3 | from matplotlib import pyplot as plt
4 | import math, os
5 | import matplotlib.pyplot as plt
6 |
7 | import argparse
8 | import torch
9 | import torch.utils.data
10 | from torch import nn, optim
11 | from torch.autograd import Variable
12 | from torch.nn import functional as F
13 | from torchvision import datasets, transforms
14 | from torchvision.utils import save_image
15 |
16 |
17 | parser = argparse.ArgumentParser(description='VAE MNIST Example')
18 | parser.add_argument('--batch-size', type=int, default=128, metavar='N',
19 | help='input batch size for training (default: 128)')
20 | parser.add_argument('--epochs', type=int, default=10, metavar='N',
21 | help='number of epochs to train (default: 10)')
22 | parser.add_argument('--no-cuda', action='store_true', default=False,
23 | help='enables CUDA training')
24 | parser.add_argument('--seed', type=int, default=1, metavar='S',
25 | help='random seed (default: 1)')
26 | parser.add_argument('--log-interval', type=int, default=10, metavar='N',
27 | help='how many batches to wait before logging training status')
28 | args = parser.parse_args()
29 | args.cuda = not args.no_cuda and torch.cuda.is_available()
30 |
31 |
32 | torch.manual_seed(args.seed)
33 | if args.cuda:
34 | torch.cuda.manual_seed(args.seed)
35 |
36 |
37 | kwargs = {'num_workers': 1, 'pin_memory': True} if args.cuda else {}
38 |
39 | def compute_kernel(x, y):
40 | x_size = tf.shape(x)[0]
41 | y_size = tf.shape(y)[0]
42 | dim = tf.shape(x)[1]
43 | tiled_x = tf.tile(tf.reshape(x, tf.stack([x_size, 1, dim])), tf.stack([1, y_size, 1]))
44 | tiled_y = tf.tile(tf.reshape(y, tf.stack([1, y_size, dim])), tf.stack([x_size, 1, 1]))
45 | return tf.exp(-tf.reduce_mean(tf.square(tiled_x - tiled_y), axis=2) / tf.cast(dim, tf.float32))
46 |
47 |
48 | class VAE(nn.Module):
49 | def __init__(self):
50 | super(VAE, self).__init__()
51 |
52 | self.fc1 = nn.Linear(784, 400)
53 | self.fc21 = nn.Linear(400, 20)
54 | self.fc22 = nn.Linear(400, 20)
55 | self.fc3 = nn.Linear(20, 400)
56 | self.fc4 = nn.Linear(400, 784)
57 |
58 | self.relu = nn.ReLU()
59 | self.sigmoid = nn.Sigmoid()
60 |
61 | def encode(self, x):
62 | h1 = self.relu(self.fc1(x))
63 | return self.fc21(h1), self.fc22(h1)
64 |
65 | def reparameterize(self, mu, logvar):
66 | if self.training:
67 | std = logvar.mul(0.5).exp_()
68 | eps = Variable(std.data.new(std.size()).normal_())
69 | return eps.mul(std).add_(mu)
70 | else:
71 | return mu
72 |
73 | def decode(self, z):
74 | h3 = self.relu(self.fc3(z))
75 | return self.sigmoid(self.fc4(h3))
76 |
77 | def forward(self, x):
78 | mu, logvar = self.encode(x.view(-1, 784))
79 | z = self.reparameterize(mu, logvar)
80 | return self.decode(z), mu, logvar
81 |
82 |
83 | def loss_function(recon_x, x, mu, logvar):
84 | BCE = F.binary_cross_entropy(recon_x, x.view(-1, 784), size_average=False)
85 |
86 | # see Appendix B from VAE paper:
87 | # Kingma and Welling. Auto-Encoding Variational Bayes. ICLR, 2014
88 | # https://arxiv.org/abs/1312.6114
89 | # 0.5 * sum(1 + log(sigma^2) - mu^2 - sigma^2)
90 | KLD = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp())
91 |
92 | return BCE + KLD
93 |
94 |
95 | def train(epoch):
96 | model.train()
97 | train_loss = 0
98 | for batch_idx, (data, _) in enumerate(train_loader):
99 | data = Variable(data)
100 | if args.cuda:
101 | data = data.cuda()
102 | optimizer.zero_grad()
103 | recon_batch, mu, logvar = model(data)
104 | loss = loss_function(recon_batch, data, mu, logvar)
105 | loss.backward()
106 | train_loss += loss.data[0]
107 | optimizer.step()
108 | if batch_idx % args.log_interval == 0:
109 | print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
110 | epoch, batch_idx * len(data), len(train_loader.dataset),
111 | 100. * batch_idx / len(train_loader),
112 | loss.data[0] / len(data)))
113 |
114 | print('====> Epoch: {} Average loss: {:.4f}'.format(
115 | epoch, train_loss / len(train_loader.dataset)))
116 |
117 |
118 | def test(epoch):
119 | model.eval()
120 | test_loss = 0
121 | for i, (data, _) in enumerate(test_loader):
122 | if args.cuda:
123 | data = data.cuda()
124 | data = Variable(data, volatile=True)
125 | recon_batch, mu, logvar = model(data)
126 | test_loss += loss_function(recon_batch, data, mu, logvar).data[0]
127 | if i == 0:
128 | n = min(data.size(0), 8)
129 | comparison = torch.cat([data[:n],
130 | recon_batch.view(args.batch_size, 1, 28, 28)[:n]])
131 | save_image(comparison.data.cpu(),
132 | 'results/reconstruction_' + str(epoch) + '.png', nrow=n)
133 |
134 | test_loss /= len(test_loader.dataset)
135 | print('====> Test set loss: {:.4f}'.format(test_loss))
136 |
137 |
138 |
139 | def lrelu(x, rate=0.1):
140 | return tf.maximum(tf.minimum(x * rate, 0), x)
141 |
142 | def conv2d_lrelu(inputs, num_outputs, kernel_size, stride):
143 | conv = tf.contrib.layers.convolution2d(inputs, num_outputs, kernel_size, stride,
144 | weights_initializer=tf.contrib.layers.xavier_initializer(),
145 | activation_fn=tf.identity)
146 | conv = lrelu(conv)
147 | return conv
148 |
149 | def conv2d_t_relu(inputs, num_outputs, kernel_size, stride):
150 | conv = tf.contrib.layers.convolution2d_transpose(inputs, num_outputs, kernel_size, stride,
151 | weights_initializer=tf.contrib.layers.xavier_initializer(),
152 | activation_fn=tf.identity)
153 | conv = tf.nn.relu(conv)
154 | return conv
155 |
156 | def fc_lrelu(inputs, num_outputs):
157 | fc = tf.contrib.layers.fully_connected(inputs, num_outputs,
158 | weights_initializer=tf.contrib.layers.xavier_initializer(),
159 | activation_fn=tf.identity)
160 | fc = lrelu(fc)
161 | return fc
162 |
163 | def fc_relu(inputs, num_outputs):
164 | fc = tf.contrib.layers.fully_connected(inputs, num_outputs,
165 | weights_initializer=tf.contrib.layers.xavier_initializer(),
166 | activation_fn=tf.identity)
167 | fc = tf.nn.relu(fc)
168 | return fc
169 |
170 |
171 | def encoder(x, z_dim):
172 | with tf.variable_scope('encoder'):
173 | conv1 = conv2d_lrelu(x, num_outputs=64,kernel_size=4, stride=2)
174 | conv2 = conv2d_lrelu(conv1, 128, 4, 2)
175 | conv2 = tf.reshape(conv2, [-1, np.prod(conv2.get_shape().as_list()[1:])])
176 | fc1 = fc_lrelu(conv2, 1024)
177 | return tf.contrib.layers.fully_connected(fc1, z_dim, activation_fn=tf.identity)
178 |
179 | def decoder(z, reuse=False):
180 | with tf.variable_scope('decoder') as vs:
181 | if reuse:
182 | vs.reuse_variables()
183 | fc1 = fc_relu(z, 1024)
184 | fc2 = fc_relu(fc1, 7*7*128)
185 | fc2 = tf.reshape(fc2, tf.stack([tf.shape(fc2)[0], 7, 7, 128]))
186 | conv1 = conv2d_t_relu(fc2, 64, 4, 2)
187 | output = tf.contrib.layers.convolution2d_transpose(conv1, 1, 4, 2, activation_fn=tf.sigmoid)
188 | return output
189 |
190 |
191 |
192 |
193 |
194 | if __name__ == "__main__":
195 | train_loader = torch.utils.data.DataLoader(
196 | datasets.MNIST('../data', train=True, download=True,
197 | transform=transforms.ToTensor()),
198 | batch_size=args.batch_size, shuffle=True, **kwargs)
199 | test_loader = torch.utils.data.DataLoader(
200 | datasets.MNIST('../data', train=False, transform=transforms.ToTensor()),
201 | batch_size=args.batch_size, shuffle=True, **kwargs)
202 |
203 | model = VAE()
204 | if args.cuda:
205 | model.cuda()
206 | optimizer = optim.Adam(model.parameters(), lr=1e-3)
207 |
208 | for epoch in range(1, args.epochs + 1):
209 | train(epoch)
210 | test(epoch)
211 | sample = Variable(torch.randn(64, 20))
212 | if args.cuda:
213 | sample = sample.cuda()
214 | sample = model.decode(sample).cpu()
215 | save_image(sample.data.view(64, 1, 28, 28),
216 | 'results/sample_' + str(epoch) + '.png')
217 |
218 |
219 |
220 |
221 |
222 |
223 |
224 |
225 |
226 |
227 |
--------------------------------------------------------------------------------
/main_new.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.utils.data
3 | from torch import nn, optim
4 | from torch.autograd import Variable
5 | from torch.nn import functional as F
6 | from torchvision import datasets, transforms
7 | import numpy as np
8 | from torchvision.utils import save_image
9 |
10 | batch_size =16
11 | z_dim = 20
12 | no_of_sample = 1000
13 | #kwargs = {'num_workers': 1, 'pin_memory': True} if args.cuda else {}
14 |
15 | class VAE(nn.Module):
16 | def __init__(self):
17 | super(VAE, self).__init__()
18 | self.conv1 = nn.Conv2d(in_channels=1, out_channels=64, kernel_size=(4,4),padding=(15,15), stride=2) #This padding keeps the size of the image same, i.e. same padding
19 | self.conv2 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=(4,4), padding=(15,15), stride=2)
20 | self.fc11 = nn.Linear(in_features=128*28*28, out_features=1024)
21 | self.fc12 = nn.Linear(in_features=1024, out_features=z_dim)
22 |
23 | self.fc21 = nn.Linear(in_features=128 * 28 * 28, out_features=1024)
24 | self.fc22 = nn.Linear(in_features=1024, out_features=z_dim)
25 |
26 | #For decoder
27 |
28 | #For mu
29 | self.fc1 = nn.Linear(in_features=20, out_features=1024)
30 | self.fc2 = nn.Linear(in_features=1024, out_features=7*7*128)
31 | self.conv_t1 = nn.ConvTranspose2d(in_channels=128, out_channels=64,kernel_size=4,padding=1,stride=2)
32 | self.conv_t2 = nn.ConvTranspose2d(in_channels=64, out_channels=1,kernel_size=4,padding=1,stride=2)
33 |
34 | #for logvar
35 | self.fc3 = nn.Linear(in_features=20, out_features=400)
36 | self.fc4 = nn.Linear(in_features=400, out_features=784)
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 | def encode(self, x):
45 | '''
46 | :param x: here x is an image, can be any tensor
47 | :return: 2 tensors of size [N,z_dim=20] where first one is mu and second one is logvar
48 | '''
49 |
50 | x = F.elu(self.conv1(x))
51 | x = F.elu(self.conv2(x))
52 | x = x.view(-1,128*28*28)
53 |
54 | mu_z = F.elu(self.fc11(x))
55 | #mu_z = F.softmax(self.fc12(mu_z))
56 | mu_z =self.fc12(mu_z)
57 |
58 | logvar_z = F.elu(self.fc21(x))
59 | #logvar_z = F.softmax(self.fc22(logvar_z))
60 | logvar_z = self.fc22(logvar_z)
61 |
62 | return mu_z, logvar_z
63 |
64 | def reparametrized_sample(self,parameter_z,no_of_sample):
65 | '''
66 |
67 | :param z:
68 | :param no_of_sample: no of monte carlo sample
69 | :return: torch of size [N,no_of_sample,z_dim=20]
70 | '''
71 | standard_normal_sample = Variable(torch.randn(batch_size,no_of_sample,z_dim).cuda())
72 | mu_z, logvar_z = parameter_z
73 | mu_z = mu_z.unsqueeze(1)
74 | sigma = .5*logvar_z.exp()
75 | sigma = sigma.unsqueeze(1)
76 | final_sample = mu_z+sigma*standard_normal_sample
77 |
78 | return final_sample
79 |
80 | def decode(self,z):
81 |
82 | x = F.elu(self.fc1(z))
83 | x = F.elu(self.fc2(x))
84 | x = x.view(-1,128,7,7)
85 | x = F.relu(self.conv_t1(x))
86 | x = F.softmax(self.conv_t2(x))
87 | mu_x = x.view(-1,28*28)
88 |
89 | logvar_x = F.elu(self.fc3(z))
90 | logvar_x = F.softmax(self.fc4(logvar_x))
91 |
92 | return mu_x, logvar_x
93 |
94 | def log_density(self):
95 | pass
96 |
97 | def forward(self,x):
98 | '''
99 |
100 | :param x: input image
101 | :return: array of length = batch size, each element is a tuple of 2 elemets of size [no_of_sample=1000,28*28 (for MNIST)], corresponding to mu and logvar
102 | '''
103 | parameter_z = self.encode(x)
104 | sample_z = self.reparametrized_sample(parameter_z,no_of_sample)
105 | parameter_x = [self.decode(obs) for obs in sample_z]
106 |
107 | return parameter_z, parameter_x
108 |
109 |
110 | def loss_VAE(train_x,parameter_x, paramter_z):
111 |
112 | mu_z, logvar_z = paramter_z
113 | #Kullback Liebler Divergence
114 | negative_KLD = 0.5 * torch.sum(1 + logvar_z - mu_z.pow(2) - logvar_z.exp(),1) #mu_z.size()=[batch_size, 28*28]
115 |
116 | #nll
117 | train_x_flattened = train_x.view(-1, 28*28)
118 | i = 0
119 | nll = Variable(torch.FloatTensor(batch_size).zero_().cuda())
120 | for param in parameter_x:
121 | mu_x, logvar_x = param
122 | x = train_x_flattened[i]
123 |
124 | log_likelihood_for_one_z = torch.sum(logvar_x,1)+ torch.sum(((x-mu_x).pow(2))/(2*logvar_x.exp()),1) #log pθ(x^(i)|z^(i,l))
125 | nll_one_sample = torch.mean(log_likelihood_for_one_z) #Monte carlo average step to calculate expectation
126 | nll[i] = nll_one_sample
127 | i += 1
128 |
129 | final_loss = negative_KLD + nll
130 | final_loss = torch.mean(final_loss)
131 |
132 | return final_loss
133 |
134 |
135 | def train(epoch,model,trainloader,optimizer):
136 | model.train()
137 |
138 | train_loss = 0
139 | count = 0
140 | for batch_id, data in enumerate(train_loader):
141 |
142 | train_x, _ = data
143 | count += train_x.size(0)
144 | train_x = Variable(train_x.type(torch.FloatTensor).cuda())
145 | paramter_z, parameter_x = model(train_x)
146 |
147 |
148 | loss = loss_VAE(train_x, parameter_x, paramter_z)
149 | train_loss += loss.data[0]
150 |
151 | loss.backward()
152 | optimizer.step()
153 |
154 | if batch_id % 50 ==0:
155 | print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
156 | epoch, batch_id * len(data), len(train_loader.dataset), 100. * batch_id / len(train_loader), loss.data[0]))
157 |
158 | train_loss /= count
159 | print('\nTrain set: Average loss: {:.4f}'.format(train_loss))
160 |
161 |
162 |
163 |
164 | if __name__ == "__main__":
165 | train_loader = torch.utils.data.DataLoader(
166 | datasets.MNIST('../data', train=True, download=True,
167 | transform=transforms.ToTensor()),
168 | batch_size=batch_size, shuffle=True)
169 | test_loader = torch.utils.data.DataLoader(
170 | datasets.MNIST('../data', train=False, transform=transforms.ToTensor()),
171 | batch_size=batch_size, shuffle=True)
172 |
173 | model = VAE()
174 | model_parameters = filter(lambda p: p.requires_grad, model.parameters())
175 | nb_params = sum([np.prod(p.size()) for p in model_parameters])
176 | print("no. of trainable parametes is: {}".format((nb_params)))
177 | model.cuda()
178 |
179 |
180 | optimizer = optim.Adam(model.parameters(), lr=.001)
181 |
182 | nb_epoch = 2
183 | for epoch in range(1, nb_epoch + 1):
184 | train(epoch, model, train_loader, optimizer)
185 |
186 |
187 | class VAE(nn.Module):
188 | def __init__(self):
189 | super(VAE, self).__init__()
190 | self.conv1 = nn.Conv2d(in_channels=1, out_channels=64, kernel_size=(4, 4), padding=(15, 15),
191 | stride=2) # This padding keeps the size of the image same, i.e. same padding
192 | self.conv2 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=(4, 4), padding=(15, 15), stride=2)
193 | self.fc11 = nn.Linear(in_features=128 * 28 * 28, out_features=1024)
194 | self.fc12 = nn.Linear(in_features=1024, out_features=z_dim)
195 |
196 | self.fc21 = nn.Linear(in_features=128 * 28 * 28, out_features=1024)
197 | self.fc22 = nn.Linear(in_features=1024, out_features=z_dim)
198 |
199 | # For decoder
200 |
201 | # For mu
202 | self.fc1 = nn.Linear(in_features=20, out_features=1024)
203 | self.fc2 = nn.Linear(in_features=1024, out_features=7 * 7 * 128)
204 | self.conv_t1 = nn.ConvTranspose2d(in_channels=128, out_channels=64, kernel_size=4, padding=1, stride=2)
205 | self.conv_t2 = nn.ConvTranspose2d(in_channels=64, out_channels=1, kernel_size=4, padding=1, stride=2)
206 |
207 | # for logvar
208 | self.fc3 = nn.Linear(in_features=20, out_features=400)
209 | self.fc4 = nn.Linear(in_features=400, out_features=784)
210 |
211 | def encode(self, x):
212 | '''
213 | :param x: here x is an image, can be any tensor
214 | :return: 2 tensors of size [N,z_dim=20] where first one is mu and second one is logvar
215 | '''
216 |
217 | x = F.elu(self.conv1(x))
218 | x = F.elu(self.conv2(x))
219 | x = x.view(-1, 128 * 28 * 28)
220 |
221 | mu_z = F.elu(self.fc11(x))
222 | # mu_z = F.softmax(self.fc12(mu_z))
223 | mu_z = self.fc12(mu_z)
224 |
225 | logvar_z = F.elu(self.fc21(x))
226 | # logvar_z = F.softmax(self.fc22(logvar_z))
227 | logvar_z = self.fc22(logvar_z)
228 |
229 | return mu_z, logvar_z
230 |
231 | def reparametrized_sample(self, parameter_z, no_of_sample):
232 | '''
233 |
234 | :param z:
235 | :param no_of_sample: no of monte carlo sample
236 | :return: torch of size [N,no_of_sample,z_dim=20]
237 | '''
238 | standard_normal_sample = Variable(torch.randn(batch_size, no_of_sample, z_dim))
239 | mu_z, logvar_z = parameter_z
240 | mu_z = mu_z.unsqueeze(1)
241 | sigma = .5 * logvar_z.exp()
242 | sigma = sigma.unsqueeze(1)
243 | final_sample = mu_z + sigma * standard_normal_sample
244 |
245 | return final_sample
246 |
247 | def decode(self, z):
248 | x = F.elu(self.fc1(z))
249 | x = F.elu(self.fc2(x))
250 | x = x.view(-1, 128, 7, 7)
251 | x = F.relu(self.conv_t1(x))
252 | x = F.softmax(self.conv_t2(x))
253 |
254 | return x
255 |
256 |
257 |
258 |
259 |
260 |
261 |
262 |
263 |
264 |
265 |
266 |
267 |
268 |
269 |
270 |
271 |
272 |
273 |
274 |
275 |
276 |
277 |
278 |
279 |
280 |
281 |
--------------------------------------------------------------------------------
/new1.py:
--------------------------------------------------------------------------------
1 | import os
2 | import torch
3 | import torch.utils.data
4 | from torch import nn, optim
5 | from torch.autograd import Variable
6 | from torch.nn import functional as F
7 | from torchvision import datasets, transforms
8 | from torchvision.utils import save_image
9 | os.environ['CUDA_VISIBLE_DEVICES']='0'
10 | # changed configuration to this instead of argparse for easier interaction
11 | CUDA = True
12 | SEED = 1
13 | BATCH_SIZE = 128
14 | LOG_INTERVAL = 10
15 | EPOCHS = 10
16 | no_of_sample = 10
17 |
18 | # connections through the autoencoder bottleneck
19 | # in the pytorch VAE example, this is 20
20 | ZDIMS = 20
21 |
22 | # I do this so that the MNIST dataset is downloaded where I want it
23 | #os.chdir("/home/cpbotha/Downloads/pytorch-vae")
24 |
25 | torch.manual_seed(SEED)
26 | if CUDA:
27 | torch.cuda.manual_seed(SEED)
28 |
29 | # DataLoader instances will load tensors directly into GPU memory
30 | kwargs = {'num_workers': 1, 'pin_memory': True} if CUDA else {}
31 |
32 | # Download or load downloaded MNIST dataset
33 | # shuffle data at every epoch
34 | train_loader = torch.utils.data.DataLoader(
35 | datasets.MNIST('/home/atin/data/', train=True, download=True,
36 | transform=transforms.ToTensor()),
37 | batch_size=BATCH_SIZE, shuffle=True, **kwargs)
38 |
39 | # Same for test data
40 | test_loader = torch.utils.data.DataLoader(
41 | datasets.MNIST('/home/atin/data/', train=False, transform=transforms.ToTensor()),
42 | batch_size=BATCH_SIZE, shuffle=True, **kwargs)
43 |
44 |
45 | class VAE(nn.Module):
46 | def __init__(self):
47 | super(VAE, self).__init__()
48 |
49 | self.conv1 = nn.Conv2d(in_channels=1, out_channels=64, kernel_size=(4, 4), padding=(15, 15),
50 | stride=2) # This padding keeps the size of the image same, i.e. same padding
51 | self.conv2 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=(4, 4), padding=(15, 15), stride=2)
52 | self.fc11 = nn.Linear(in_features=128 * 28 * 28, out_features=1024)
53 | self.fc12 = nn.Linear(in_features=1024, out_features=ZDIMS)
54 |
55 | self.fc21 = nn.Linear(in_features=128 * 28 * 28, out_features=1024)
56 | self.fc22 = nn.Linear(in_features=1024, out_features=ZDIMS)
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 | # # ENCODER
66 | # # 28 x 28 pixels = 784 input pixels, 400 outputs
67 | # self.fc1 = nn.Linear(784, 400)
68 | # # rectified linear unit layer from 400 to 400
69 | # # max(0, x)
70 | # self.relu = nn.ReLU()
71 | # self.fc21 = nn.Linear(400, ZDIMS) # mu layer
72 | # self.fc22 = nn.Linear(400, ZDIMS) # logvariance layer
73 | # this last layer bottlenecks through ZDIMS connections
74 |
75 | # DECODER
76 | # from bottleneck to hidden 400
77 | self.fc3 = nn.Linear(ZDIMS, 400)
78 | # from hidden 400 to 784 outputs
79 | self.fc4 = nn.Linear(400, 784)
80 | self.sigmoid = nn.Sigmoid()
81 |
82 | def encode(self, x: Variable) -> (Variable, Variable):
83 |
84 | x = F.elu(self.conv1(x))
85 | x = F.elu(self.conv2(x))
86 | x = x.view(-1, 128 * 28 * 28)
87 |
88 | mu_z = F.elu(self.fc11(x))
89 | # mu_z = F.softmax(self.fc12(mu_z))
90 | mu_z = self.fc12(mu_z)
91 |
92 | logvar_z = F.elu(self.fc21(x))
93 | # logvar_z = F.softmax(self.fc22(logvar_z))
94 | logvar_z = self.fc22(logvar_z)
95 |
96 | return mu_z, logvar_z
97 |
98 |
99 |
100 |
101 |
102 | """Input vector x -> fully connected 1 -> ReLU -> (fully connected
103 | 21, fully connected 22)
104 |
105 | Parameters
106 | ----------
107 | x : [128, 784] matrix; 128 digits of 28x28 pixels each
108 |
109 | Returns
110 | -------
111 |
112 | (mu, logvar) : ZDIMS mean units one for each latent dimension, ZDIMS
113 | variance units one for each latent dimension
114 |
115 | """
116 |
117 | # h1 is [128, 400]
118 | h1 = self.relu(self.fc1(x)) # type: Variable
119 | return self.fc21(h1), self.fc22(h1)
120 |
121 | def reparameterize(self, mu: Variable, logvar: Variable) -> Variable:
122 | """THE REPARAMETERIZATION IDEA:
123 |
124 | For each training sample (we get 128 batched at a time)
125 |
126 | - take the current learned mu, stddev for each of the ZDIMS
127 | dimensions and draw a random sample from that distribution
128 | - the whole network is trained so that these randomly drawn
129 | samples decode to output that looks like the input
130 | - which will mean that the std, mu will be learned
131 | *distributions* that correctly encode the inputs
132 | - due to the additional KLD term (see loss_function() below)
133 | the distribution will tend to unit Gaussians
134 |
135 | Parameters
136 | ----------
137 | mu : [128, ZDIMS] mean matrix
138 | logvar : [128, ZDIMS] variance matrix
139 |
140 | Returns
141 | -------
142 |
143 | During training random sample from the learned ZDIMS-dimensional
144 | normal distribution; during inference its mean.
145 |
146 | """
147 |
148 | if self.training:
149 | # multiply log variance with 0.5, then in-place exponent
150 | # yielding the standard deviation
151 |
152 | sample_z = []
153 | for _ in range(no_of_sample):
154 | std = logvar.mul(0.5).exp_() # type: Variable
155 | # - std.data is the [128,ZDIMS] tensor that is wrapped by std
156 | # - so eps is [128,ZDIMS] with all elements drawn from a mean 0
157 | # and stddev 1 normal distribution that is 128 samples
158 | # of random ZDIMS-float vectors
159 | eps = Variable(std.data.new(std.size()).normal_())
160 | # - sample from a normal distribution with standard
161 | # deviation = std and mean = mu by multiplying mean 0
162 | # stddev 1 sample with desired std and mu, see
163 | # https://stats.stackexchange.com/a/16338
164 | # - so we have 128 sets (the batch) of random ZDIMS-float
165 | # vectors sampled from normal distribution with learned
166 | # std and mu for the current input
167 | sample_z.append(eps.mul(std).add_(mu))
168 |
169 | return sample_z
170 |
171 | else:
172 | # During inference, we simply spit out the mean of the
173 | # learned distribution for the current input. We could
174 | # use a random sample from the distribution, but mu of
175 | # course has the highest probability.
176 | return mu
177 |
178 | def decode(self, z: Variable) -> Variable:
179 | h3 = self.relu(self.fc3(z))
180 | return self.sigmoid(self.fc4(h3))
181 |
182 | def forward(self, x: Variable) -> (Variable, Variable, Variable):
183 | mu, logvar = self.encode(x.view(-1, 784))
184 | z = self.reparameterize(mu, logvar)
185 | return [self.decode(z) for z in z], mu, logvar
186 | #return self.decode(z), mu, logvar
187 |
188 |
189 | model = VAE()
190 | if CUDA:
191 | model.cuda()
192 |
193 |
194 | def loss_function(recon_x, x, mu, logvar) -> Variable:
195 | # how well do input x and output recon_x agree?
196 |
197 | BCE = 0
198 | for recon_x_one in recon_x:
199 | BCE += F.binary_cross_entropy(recon_x_one, x.view(-1, 784))
200 |
201 | BCE /=len(recon_x)
202 |
203 |
204 |
205 | # KLD is Kullback–Leibler divergence -- how much does one learned
206 | # distribution deviate from another, in this specific case the
207 | # learned distribution from the unit Gaussian
208 |
209 | # see Appendix B from VAE paper:
210 | # Kingma and Welling. Auto-Encoding Variational Bayes. ICLR, 2014
211 | # https://arxiv.org/abs/1312.6114
212 | # - D_{KL} = 0.5 * sum(1 + log(sigma^2) - mu^2 - sigma^2)
213 | # note the negative D_{KL} in appendix B of the paper
214 | KLD = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp())
215 | # Normalise by same number of elements as in reconstruction
216 | KLD /= BATCH_SIZE * 784
217 |
218 | # BCE tries to make our reconstruction as accurate as possible
219 | # KLD tries to push the distributions as close as possible to unit Gaussian
220 | return BCE + KLD
221 |
222 | # Dr Diederik Kingma: as if VAEs weren't enough, he also gave us Adam!
223 | optimizer = optim.Adam(model.parameters(), lr=1e-3)
224 |
225 |
226 | def train(epoch):
227 | # toggle model to train mode
228 | model.train()
229 | train_loss = 0
230 | # in the case of MNIST, len(train_loader.dataset) is 60000
231 | # each `data` is of BATCH_SIZE samples and has shape [128, 1, 28, 28]
232 | for batch_idx, (data, _) in enumerate(train_loader):
233 | data = Variable(data)
234 | if CUDA:
235 | data = data.cuda()
236 | optimizer.zero_grad()
237 |
238 | # push whole batch of data through VAE.forward() to get recon_loss
239 | recon_batch, mu, logvar = model(data)
240 | # calculate scalar loss
241 | loss = loss_function(recon_batch, data, mu, logvar)
242 | # calculate the gradient of the loss w.r.t. the graph leaves
243 | # i.e. input variables -- by the power of pytorch!
244 | loss.backward()
245 | train_loss += loss.data[0]
246 | optimizer.step()
247 | if batch_idx % LOG_INTERVAL == 0:
248 | print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
249 | epoch, batch_idx * len(data), len(train_loader.dataset),
250 | 100. * batch_idx / len(train_loader),
251 | loss.data[0] / len(data)))
252 |
253 | print('====> Epoch: {} Average loss: {:.4f}'.format(
254 | epoch, train_loss / len(train_loader.dataset)))
255 |
256 |
257 | def test(epoch):
258 | # toggle model to test / inference mode
259 | model.eval()
260 | test_loss = 0
261 |
262 | # each data is of BATCH_SIZE (default 128) samples
263 | for i, (data, _) in enumerate(test_loader):
264 | if CUDA:
265 | # make sure this lives on the GPU
266 | data = data.cuda()
267 |
268 | # we're only going to infer, so no autograd at all required: volatile=True
269 | data = Variable(data, volatile=True)
270 | recon_batch, mu, logvar = model(data)
271 | test_loss += loss_function(recon_batch, data, mu, logvar).data[0]
272 | if i == 0:
273 | n = min(data.size(0), 8)
274 | # for the first 128 batch of the epoch, show the first 8 input digits
275 | # with right below them the reconstructed output digits
276 | comparison = torch.cat([data[:n],
277 | recon_batch.view(BATCH_SIZE, 1, 28, 28)[:n]])
278 | save_image(comparison.data.cpu(),
279 | 'results/reconstruction_' + str(epoch) + '.png', nrow=n)
280 |
281 | test_loss /= len(test_loader.dataset)
282 | print('====> Test set loss: {:.4f}'.format(test_loss))
--------------------------------------------------------------------------------
/new2.py:
--------------------------------------------------------------------------------
1 | import os
2 | import torch
3 | import torch.utils.data
4 | from torch import nn, optim
5 | from torch.autograd import Variable
6 | from torch.nn import functional as F
7 | from torchvision import datasets, transforms
8 | from torchvision.utils import save_image
9 |
10 | os.environ['CUDA_VISIBLE_DEVICES'] = '0'
11 | # changed configuration to this instead of argparse for easier interaction
12 | CUDA = True
13 | SEED = 1
14 | BATCH_SIZE = 128
15 | LOG_INTERVAL = 10
16 | EPOCHS = 10
17 | no_of_sample = 10
18 |
19 | # connections through the autoencoder bottleneck
20 | # in the pytorch VAE example, this is 20
21 | ZDIMS = 20
22 |
23 | # I do this so that the MNIST dataset is downloaded where I want it
24 | # os.chdir("/home/cpbotha/Downloads/pytorch-vae")
25 |
26 | torch.manual_seed(SEED)
27 | if CUDA:
28 | torch.cuda.manual_seed(SEED)
29 |
30 | # DataLoader instances will load tensors directly into GPU memory
31 | kwargs = {'num_workers': 1, 'pin_memory': True} if CUDA else {}
32 |
33 | # Download or load downloaded MNIST dataset
34 | # shuffle data at every epoch
35 | train_loader = torch.utils.data.DataLoader(
36 | datasets.MNIST('/home/atin/data/', train=True, download=True,
37 | transform=transforms.ToTensor()),
38 | batch_size=BATCH_SIZE, shuffle=True, **kwargs)
39 |
40 | # Same for test data
41 | test_loader = torch.utils.data.DataLoader(
42 | datasets.MNIST('/home/atin/data/', train=False, transform=transforms.ToTensor()),
43 | batch_size=BATCH_SIZE, shuffle=True, **kwargs)
44 |
45 |
46 | class VAE(nn.Module):
47 | def __init__(self):
48 | super(VAE, self).__init__()
49 |
50 | self.conv1 = nn.Conv2d(in_channels=1, out_channels=64, kernel_size=(4, 4), padding=(15, 15),
51 | stride=2) # This padding keeps the size of the image same, i.e. same padding
52 | self.conv2 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=(4, 4), padding=(15, 15), stride=2)
53 | self.fc11 = nn.Linear(in_features=128 * 28 * 28, out_features=1024)
54 | self.fc12 = nn.Linear(in_features=1024, out_features=ZDIMS)
55 |
56 | self.fc21 = nn.Linear(in_features=128 * 28 * 28, out_features=1024)
57 | self.fc22 = nn.Linear(in_features=1024, out_features=ZDIMS)
58 | self.relu = nn.ReLU()
59 |
60 | # For decoder
61 |
62 | # For mu
63 | self.fc1 = nn.Linear(in_features=20, out_features=1024)
64 | self.fc2 = nn.Linear(in_features=1024, out_features=7 * 7 * 128)
65 | self.conv_t11 = nn.ConvTranspose2d(in_channels=128, out_channels=64, kernel_size=4, padding=1, stride=2)
66 | self.conv_t12 = nn.ConvTranspose2d(in_channels=64, out_channels=1, kernel_size=4, padding=1, stride=2)
67 |
68 | self.conv_t21 = nn.ConvTranspose2d(in_channels=128, out_channels=64, kernel_size=4, padding=1, stride=2)
69 | self.conv_t22 = nn.ConvTranspose2d(in_channels=64, out_channels=1, kernel_size=4, padding=1, stride=2)
70 |
71 |
72 |
73 | # #for logvar
74 | # self.fc3 = nn.Linear(in_features=20, out_features=400)
75 | # self.fc4 = nn.Linear(in_features=400, out_features=784)
76 |
77 | # # ENCODER
78 | # # 28 x 28 pixels = 784 input pixels, 400 outputs
79 | # self.fc1 = nn.Linear(784, 400)
80 | # # rectified linear unit layer from 400 to 400
81 | # # max(0, x)
82 | # self.relu = nn.ReLU()
83 | # self.fc21 = nn.Linear(400, ZDIMS) # mu layer
84 | # self.fc22 = nn.Linear(400, ZDIMS) # logvariance layer
85 | # this last layer bottlenecks through ZDIMS connections
86 |
87 | # # DECODER
88 | # # from bottleneck to hidden 400
89 | # self.fc3 = nn.Linear(ZDIMS, 400)
90 | # # from hidden 400 to 784 outputs
91 | # self.fc4 = nn.Linear(400, 784)
92 | # self.sigmoid = nn.Sigmoid()
93 |
94 | def encode(self, x: Variable) -> (Variable, Variable):
95 |
96 | x = x.view(-1, 1, 28, 28)
97 | x = F.elu(self.conv1(x))
98 | x = F.elu(self.conv2(x))
99 | x = x.view(-1, 128 * 28 * 28)
100 |
101 | mu_z = F.elu(self.fc11(x))
102 | # mu_z = F.softmax(self.fc12(mu_z))
103 | mu_z = self.fc12(mu_z)
104 |
105 | logvar_z = F.elu(self.fc21(x))
106 | # logvar_z = F.softmax(self.fc22(logvar_z))
107 | logvar_z = self.fc22(logvar_z)
108 |
109 | return mu_z, logvar_z
110 |
111 | def reparameterize(self, mu: Variable, logvar: Variable) -> list:
112 | """THE REPARAMETERIZATION IDEA:
113 |
114 | For each training sample (we get 128 batched at a time)
115 |
116 | - take the current learned mu, stddev for each of the ZDIMS
117 | dimensions and draw a random sample from that distribution
118 | - the whole network is trained so that these randomly drawn
119 | samples decode to output that looks like the input
120 | - which will mean that the std, mu will be learned
121 | *distributions* that correctly encode the inputs
122 | - due to the additional KLD term (see loss_function() below)
123 | the distribution will tend to unit Gaussians
124 |
125 | Parameters
126 | ----------
127 | mu : [128, ZDIMS] mean matrix
128 | logvar : [128, ZDIMS] variance matrix
129 |
130 | Returns
131 | -------
132 |
133 | During training random sample from the learned ZDIMS-dimensional
134 | normal distribution; during inference its mean.
135 |
136 | """
137 |
138 | if self.training:
139 | # multiply log variance with 0.5, then in-place exponent
140 | # yielding the standard deviation
141 |
142 | sample_z = []
143 | for _ in range(no_of_sample):
144 | std = logvar.mul(0.5).exp_() # type: Variable
145 | # - std.data is the [128,ZDIMS] tensor that is wrapped by std
146 | # - so eps is [128,ZDIMS] with all elements drawn from a mean 0
147 | # and stddev 1 normal distribution that is 128 samples
148 | # of random ZDIMS-float vectors
149 | eps = Variable(std.data.new(std.size()).normal_())
150 | # - sample from a normal distribution with standard
151 | # deviation = std and mean = mu by multiplying mean 0
152 | # stddev 1 sample with desired std and mu, see
153 | # https://stats.stackexchange.com/a/16338
154 | # - so we have 128 sets (the batch) of random ZDIMS-float
155 | # vectors sampled from normal distribution with learned
156 | # std and mu for the current input
157 | sample_z.append(eps.mul(std).add_(mu))
158 |
159 | return sample_z
160 |
161 | else:
162 | # During inference, we simply spit out the mean of the
163 | # learned distribution for the current input. We could
164 | # use a random sample from the distribution, but mu of
165 | # course has the highest probability.
166 | return mu
167 |
168 | def decode(self, z: Variable) -> (Variable, Variable):
169 |
170 | x = F.elu(self.fc1(z))
171 | x = F.elu(self.fc2(x))
172 | x = x.view(-1, 128, 7, 7)
173 | mu_x = F.relu(self.conv_t11(x))
174 | mu_x = F.sigmoid(self.conv_t12(mu_x))
175 |
176 | logvar_x = F.relu(self.conv_t11(x))
177 | logvar_x = F.sigmoid(self.conv_t12(logvar_x))
178 |
179 | return mu_x.view(-1, 784), logvar_x.view(-1,784)
180 |
181 |
182 |
183 | def forward(self, x: Variable) -> (Variable, Variable, Variable):
184 | mu, logvar = self.encode(x.view(-1, 784))
185 | z = self.reparameterize(mu, logvar)
186 | return [self.decode(z) for z in z], mu, logvar
187 | # return self.decode(z), mu, logvar
188 |
189 |
190 | model = VAE()
191 | if CUDA:
192 | model.cuda()
193 |
194 |
195 | def loss_function(recon_x, x, mu, logvar) -> Variable:
196 | # how well do input x and output recon_x agree?
197 |
198 | GLL = 0
199 | x = x.view(-1,784)
200 | for recon_x_one in recon_x:
201 | mu_x, logvar_x = recon_x_one
202 | part1 = torch.sum(logvar_x)/BATCH_SIZE
203 | sigma = logvar_x.mul(0.5).exp_()
204 | part2 = torch.sum(((x-mu_x)/sigma)**2)/BATCH_SIZE
205 | GLL += .5*(part1+part2)
206 |
207 | GLL /= len(recon_x)
208 |
209 | # BCE = 0
210 | # for recon_x_one in recon_x:
211 | # BCE += F.binary_cross_entropy(recon_x_one, x.view(-1, 784))
212 | #
213 | # BCE /= len(recon_x)
214 |
215 | # KLD is Kullback–Leibler divergence -- how much does one learned
216 | # distribution deviate from another, in this specific case the
217 | # learned distribution from the unit Gaussian
218 |
219 | # see Appendix B from VAE paper:
220 | # Kingma and Welling. Auto-Encoding Variational Bayes. ICLR, 2014
221 | # https://arxiv.org/abs/1312.6114
222 | # - D_{KL} = 0.5 * sum(1 + log(sigma^2) - mu^2 - sigma^2)
223 | # note the negative D_{KL} in appendix B of the paper
224 | KLD = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp())
225 | # Normalise by same number of elements as in reconstruction
226 | KLD /= BATCH_SIZE
227 |
228 | # BCE tries to make our reconstruction as accurate as possible
229 | # KLD tries to push the distributions as close as possible to unit Gaussian
230 | return GLL + KLD
231 |
232 |
233 | # Dr Diederik Kingma: as if VAEs weren't enough, he also gave us Adam!
234 | optimizer = optim.Adam(model.parameters(), lr=1e-3)
235 |
236 |
237 | def train(epoch):
238 | # toggle model to train mode
239 | model.train()
240 | train_loss = 0
241 | # in the case of MNIST, len(train_loader.dataset) is 60000
242 | # each `data` is of BATCH_SIZE samples and has shape [128, 1, 28, 28]
243 | for batch_idx, (data, _) in enumerate(train_loader):
244 | data = Variable(data)
245 | if CUDA:
246 | data = data.cuda()
247 | optimizer.zero_grad()
248 |
249 | # push whole batch of data through VAE.forward() to get recon_loss
250 | recon_batch, mu, logvar = model(data)
251 | # calculate scalar loss
252 | loss = loss_function(recon_batch, data, mu, logvar)
253 | # calculate the gradient of the loss w.r.t. the graph leaves
254 | # i.e. input variables -- by the power of pytorch!
255 | loss.backward()
256 | train_loss += loss.data[0]
257 | optimizer.step()
258 | if batch_idx % LOG_INTERVAL == 0:
259 | print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
260 | epoch, batch_idx * len(data), len(train_loader.dataset),
261 | 100. * batch_idx / len(train_loader),
262 | loss.data[0] / len(data)))
263 |
264 | print('====> Epoch: {} Average loss: {:.4f}'.format(
265 | epoch, train_loss / len(train_loader.dataset)))
266 |
267 |
268 | def test(epoch):
269 | # toggle model to test / inference mode
270 | model.eval()
271 | test_loss = 0
272 |
273 | # each data is of BATCH_SIZE (default 128) samples
274 | for i, (data, _) in enumerate(test_loader):
275 | if CUDA:
276 | # make sure this lives on the GPU
277 | data = data.cuda()
278 |
279 | # we're only going to infer, so no autograd at all required: volatile=True
280 | data = Variable(data, volatile=True)
281 | recon_batch, mu, logvar = model(data)
282 | test_loss += loss_function(recon_batch, data, mu, logvar).data[0]
283 | if i == 0:
284 | n = min(data.size(0), 8)
285 | # for the first 128 batch of the epoch, show the first 8 input digits
286 | # with right below them the reconstructed output digits
287 | comparison = torch.cat([data[:n],
288 | recon_batch.view(BATCH_SIZE, 1, 28, 28)[:n]])
289 | save_image(comparison.data.cpu(),
290 | 'results/reconstruction_' + str(epoch) + '.png', nrow=n)
291 |
292 | test_loss /= len(test_loader.dataset)
293 | print('====> Test set loss: {:.4f}'.format(test_loss))
294 |
295 |
296 |
297 |
298 |
299 |
300 | if __name__ == "__main__":
301 |
302 | for epoch in range(1, EPOCHS + 1):
303 | train(epoch)
304 | # test(epoch)
305 |
306 | # 64 sets of random ZDIMS-float vectors, i.e. 64 locations / MNIST
307 | # digits in latent space
308 | sample = Variable(torch.randn(64, ZDIMS))
309 | if CUDA:
310 | sample = sample.cuda()
311 | sample = model.decode(sample).cpu()
312 |
313 | # save out as an 8x8 matrix of MNIST digits
314 | # this will give you a visual idea of how well latent space can generate things
315 | # that look like digits
316 | save_image(sample.data.view(64, 1, 28, 28),
317 | '/home/atin/data/new/reconstruction' + str(epoch) + '.png')
--------------------------------------------------------------------------------
/simple_main.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.utils.data
3 | from torch import nn, optim
4 | from torch.autograd import Variable
5 | from torch.nn import functional as F
6 | from torchvision import datasets, transforms
7 | import numpy as np
8 | from torchvision.utils import save_image
9 |
10 | batch_size =200
11 | z_dim = 20
12 | no_of_sample = 1000
13 |
14 | class VAE(nn.Module):
15 | def __init__(self):
16 | super(VAE, self).__init__()
17 | self.fc1 = nn.Linear(784, 400)
18 | self.fc21 = nn.Linear(400, 20)
19 | self.fc22 = nn.Linear(400, 20)
20 | self.fc3 = nn.Linear(20, 400)
21 | self.fc41 = nn.Linear(400, 784)
22 | self.fc42 = nn.Linear(400, 784)
23 |
24 | self.relu = nn.ReLU()
25 | self.sigmoid = nn.Sigmoid()
26 |
27 |
28 |
29 | def encode(self, x):
30 | '''
31 | :param x: here x is an image, can be any tensor
32 | :return: 2 tensors of size [N,z_dim=20] where first one is mu and second one is logvar
33 | '''
34 |
35 | h1 = self.relu(self.fc1(x))
36 | return self.fc21(h1), self.fc22(h1)
37 |
38 |
39 | def reparametrized_sample(self,parameter_z,no_of_sample):
40 | '''
41 |
42 | :param z:
43 | :param no_of_sample: no of monte carlo sample
44 | :return: torch of size [N,no_of_sample,z_dim=20]
45 | '''
46 | standard_normal_sample = Variable(torch.randn(batch_size,no_of_sample,z_dim).cuda())
47 | mu_z, logvar_z = parameter_z
48 | mu_z = mu_z.unsqueeze(1)
49 | sigma = .5*logvar_z.exp()
50 | sigma = sigma.unsqueeze(1)
51 | final_sample = mu_z+sigma*standard_normal_sample
52 |
53 | return final_sample
54 |
55 | def decode(self,z):
56 | h1 = self.relu(self.fc3(z))
57 | return self.fc41(h1), self.fc42(h1)
58 |
59 |
60 | def log_density(self):
61 | pass
62 |
63 | def forward(self,x):
64 | '''
65 |
66 | :param x: input image
67 | :return: array of length = batch size, each element is a tuple of 2 elemets of size [no_of_sample=1000,28*28 (for MNIST)], corresponding to mu and logvar
68 | '''
69 |
70 | x = x.view(-1,784)
71 | parameter_z = self.encode(x)
72 | sample_z = self.reparametrized_sample(parameter_z,no_of_sample)
73 | parameter_x = [self.decode(obs) for obs in sample_z]
74 |
75 | return parameter_z, parameter_x
76 |
77 |
78 | def loss_VAE(train_x,parameter_x, paramter_z):
79 |
80 | mu_z, logvar_z = paramter_z
81 | #Kullback Liebler Divergence
82 | negative_KLD = 0.5 * torch.sum(1 + logvar_z - mu_z.pow(2) - logvar_z.exp(),1) #mu_z.size()=[batch_size, 28*28]
83 |
84 | #nll
85 | train_x_flattened = train_x.view(-1, 28*28)
86 | i = 0
87 | nll = Variable(torch.FloatTensor(batch_size).zero_().cuda())
88 | for param in parameter_x:
89 | mu_x, logvar_x = param
90 | x = train_x_flattened[i]
91 |
92 | log_likelihood_for_one_z = torch.sum(logvar_x,1)+ torch.sum(((x-mu_x).pow(2))/(2*logvar_x.exp()),1) #log pθ(x^(i)|z^(i,l))
93 | nll_one_sample = torch.mean(log_likelihood_for_one_z) #Monte carlo average step to calculate expectation
94 | nll[i] = nll_one_sample
95 | i += 1
96 |
97 | final_loss = negative_KLD + nll
98 | final_loss = torch.mean(final_loss)
99 |
100 | return final_loss
101 |
102 |
103 | def train(epoch,model,trainloader,optimizer):
104 | model.train()
105 |
106 | train_loss = 0
107 | count = 0
108 | for batch_id, data in enumerate(train_loader):
109 |
110 | train_x, _ = data
111 | count += train_x.size(0)
112 | train_x = Variable(train_x.type(torch.FloatTensor).cuda())
113 | paramter_z, parameter_x = model(train_x)
114 |
115 |
116 | loss = loss_VAE(train_x, parameter_x, paramter_z)
117 | train_loss += loss.data[0]
118 |
119 | loss.backward()
120 | optimizer.step()
121 |
122 | if batch_id % 50 ==0:
123 | print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
124 | epoch, batch_id * len(data), len(train_loader.dataset), 100. * batch_id / len(train_loader), loss.data[0]))
125 |
126 | train_loss /= count
127 | print('\nTrain set: Average loss: {:.4f}'.format(train_loss))
128 |
129 |
130 |
131 |
132 | if __name__ == "__main__":
133 | train_loader = torch.utils.data.DataLoader(
134 | datasets.MNIST('../data', train=True, download=True,
135 | transform=transforms.ToTensor()),
136 | batch_size=batch_size, shuffle=True)
137 | test_loader = torch.utils.data.DataLoader(
138 | datasets.MNIST('../data', train=False, transform=transforms.ToTensor()),
139 | batch_size=batch_size, shuffle=True)
140 |
141 | model = VAE()
142 | model_parameters = filter(lambda p: p.requires_grad, model.parameters())
143 | nb_params = sum([np.prod(p.size()) for p in model_parameters])
144 | print("no. of trainable parametes is: {}".format((nb_params)))
145 | model.cuda()
146 |
147 |
148 | optimizer = optim.Adam(model.parameters(), lr=.001)
149 |
150 | nb_epoch = 2
151 | for epoch in range(1, nb_epoch + 1):
152 | train(epoch, model, train_loader, optimizer)
153 |
154 |
155 |
156 |
157 |
158 |
159 |
160 |
161 |
162 |
163 |
164 |
165 |
166 |
167 |
168 |
169 |
170 |
171 |
172 |
173 |
174 |
175 |
176 |
177 |
178 |
179 |
180 |
181 |
182 |
183 |
184 |
185 |
186 |
187 |
188 |
189 |
190 |
191 |
192 |
193 |
--------------------------------------------------------------------------------