├── .idea
    ├── deployment.xml
    ├── inspectionProfiles
    │   └── Project_Default.xml
    ├── other.xml
    ├── remote-mappings.xml
    ├── vcs.xml
    └── webServers.xml
├── README.md
├── VAE_CNN_BCEloss.py
├── VAE_CNN_Gaussianloss.py
├── VAE_celeba.py
├── VAE_facebook.py
├── VAE_fb_modified.py
├── main.py
├── main_new.py
├── new1.py
├── new2.py
└── simple_main.py


/.idea/deployment.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <project version="4">
3 |   <component name="PublishConfigData" autoUpload="Always" serverName="Delta">
4 |     <option name="myAutoUpload" value="ALWAYS" />
5 |   </component>
6 | </project>


--------------------------------------------------------------------------------
/.idea/inspectionProfiles/Project_Default.xml:
--------------------------------------------------------------------------------
 1 | <component name="InspectionProjectProfileManager">
 2 |   <profile version="1.0">
 3 |     <option name="myName" value="Project Default" />
 4 |     <inspection_tool class="PyCompatibilityInspection" enabled="true" level="WARNING" enabled_by_default="true">
 5 |       <option name="ourVersions">
 6 |         <value>
 7 |           <list size="2">
 8 |             <item index="0" class="java.lang.String" itemvalue="2.7" />
 9 |             <item index="1" class="java.lang.String" itemvalue="3.6" />
10 |           </list>
11 |         </value>
12 |       </option>
13 |     </inspection_tool>
14 |   </profile>
15 | </component>


--------------------------------------------------------------------------------
/.idea/other.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <project version="4">
3 |   <component name="PySciProjectComponent">
4 |     <option name="PY_SCI_VIEW" value="true" />
5 |     <option name="PY_SCI_VIEW_SUGGESTED" value="true" />
6 |   </component>
7 | </project>


--------------------------------------------------------------------------------
/.idea/remote-mappings.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <project version="4">
 3 |   <component name="RemoteMappingsManager">
 4 |     <list>
 5 |       <list>
 6 |         <remote-mappings server-id="python@ssh://atin@137.132.33.43:22/home/atin/.virtualenvs/keras/bin/python" />
 7 |       </list>
 8 |     </list>
 9 |   </component>
10 | </project>


--------------------------------------------------------------------------------
/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <project version="4">
3 |   <component name="VcsDirectoryMappings">
4 |     <mapping directory="$PROJECT_DIR$" vcs="Git" />
5 |   </component>
6 | </project>


--------------------------------------------------------------------------------
/.idea/webServers.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <project version="4">
 3 |   <component name="WebServers">
 4 |     <option name="servers">
 5 |       <webServer id="2fcb480b-9437-4d38-b843-244189c1236d" name="Gamma" url="http://137.132.33.43">
 6 |         <fileTransfer host="137.132.33.43" port="22" privateKey="$USER_HOME$/.ssh/id_rsa" accessType="SFTP" keyPair="true">
 7 |           <advancedOptions>
 8 |             <advancedOptions dataProtectionLevel="Private" />
 9 |           </advancedOptions>
10 |           <option name="port" value="22" />
11 |         </fileTransfer>
12 |       </webServer>
13 |     </option>
14 |   </component>
15 | </project>


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Variational-Autoencoder
 2 | Contains code to learn variational autoencoder model on MNIST dataset using pytorch.
 3 | 
 4 | L = No. of monte carlo samples for gradient calculation
 5 | 
 6 | Gaussian loss is given by
 7 | 
 8 | ![\Large \frac{1}{N}\sum_{i=1}^{N}\left[\frac{1}{L}\sum_{l=1}^{L}\left\{ \frac{1}{2}\sum_{j=1}^{784}\log(\sigma_{ij}^{(l)})^2 + \frac{1}{2}\sum_{j=1}^{784}\left(\frac{x_{ij}-\mu_{ij}^{(l)}}   {\sigma_{ij}^{(l)}}\right)^2 \right\} \right ]  - \frac{1}{N}\sum_{i=1}^{N}\left[ \sum_{j=1}^{J}\frac{1}{2}\left(1+\log(\sigma_j^{\prime(i)})^2-(\mu_j^{\prime(i)})^2 -(\sigma_j^{\prime(i)})^2\right )\right ]](https://latex.codecogs.com/svg.latex?%5Cfrac%7B1%7D%7BN%7D%5Csum_%7Bi%3D1%7D%5E%7BN%7D%5Cleft%5B%5Cfrac%7B1%7D%7BL%7D%5Csum_%7Bl%3D1%7D%5E%7BL%7D%5Cleft%5C%7B%20%5Cfrac%7B1%7D%7B2%7D%5Csum_%7Bj%3D1%7D%5E%7B784%7D%5Clog%28%5Csigma_%7Bij%7D%5E%7B%28l%29%7D%29%5E2%20&plus;%20%5Cfrac%7B1%7D%7B2%7D%5Csum_%7Bj%3D1%7D%5E%7B784%7D%5Cleft%28%5Cfrac%7Bx_%7Bij%7D-%5Cmu_%7Bij%7D%5E%7B%28l%29%7D%7D%20%7B%5Csigma_%7Bij%7D%5E%7B%28l%29%7D%7D%5Cright%29%5E2%20%5Cright%5C%7D%20%5Cright%20%5D%20-%20%5Cfrac%7B1%7D%7BN%7D%5Csum_%7Bi%3D1%7D%5E%7BN%7D%5Cleft%5B%20%5Csum_%7Bj%3D1%7D%5E%7BJ%7D%5Cfrac%7B1%7D%7B2%7D%5Cleft%281&plus;%5Clog%28%5Csigma_j%5E%7B%5Cprime%28i%29%7D%29%5E2-%28%5Cmu_j%5E%7B%5Cprime%28i%29%7D%29%5E2%20-%28%5Csigma_j%5E%7B%5Cprime%28i%29%7D%29%5E2%5Cright%20%29%5Cright%20%5D)
 9 | 
10 | 
11 | BCE loss is given by 
12 | 
13 | ![\Large  \frac{1}{N}\sum_{i=1}^{N}\left[\frac{1}{L}\sum_{l=1}^{L}\left\{x_{ij}\log p_{ij}^{(l)} + (1-x_{ij})\log(1-\log p_{ij}^{(l)}) \right\} \right ]  - \frac{1}{N}\sum_{i=1}^{N}\left[ \sum_{j=1}^{J}\frac{1}{2}\left(1+\log(\sigma_j^{\prime(i)})^2-(\mu_j^{\prime(i)})^2 -(\sigma_j^{\prime(i)})^2\right )\right ]](https://latex.codecogs.com/svg.latex?%5Cfrac%7B1%7D%7BN%7D%5Csum_%7Bi%3D1%7D%5E%7BN%7D%5Cleft%5B%5Cfrac%7B1%7D%7BL%7D%5Csum_%7Bl%3D1%7D%5E%7BL%7D%5Cleft%5C%7Bx_%7Bij%7D%5Clog%20p_%7Bij%7D%5E%7B%28l%29%7D%20&plus;%20%281-x_%7Bij%7D%29%5Clog%281-%5Clog%20p_%7Bij%7D%5E%7B%28l%29%7D%29%20%5Cright%5C%7D%20%5Cright%20%5D%20-%20%5Cfrac%7B1%7D%7BN%7D%5Csum_%7Bi%3D1%7D%5E%7BN%7D%5Cleft%5B%20%5Csum_%7Bj%3D1%7D%5E%7BJ%7D%5Cfrac%7B1%7D%7B2%7D%5Cleft%281&plus;%5Clog%28%5Csigma_j%5E%7B%5Cprime%28i%29%7D%29%5E2-%28%5Cmu_j%5E%7B%5Cprime%28i%29%7D%29%5E2%20-%28%5Csigma_j%5E%7B%5Cprime%28i%29%7D%29%5E2%5Cright%20%29%5Cright%20%5D)
14 | 


--------------------------------------------------------------------------------
/VAE_CNN_BCEloss.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import torch
  3 | import torch.utils.data
  4 | from torch import nn, optim
  5 | from torch.autograd import Variable
  6 | from torch.nn import functional as F
  7 | from torchvision import datasets, transforms
  8 | from torchvision.utils import save_image
  9 | 
 10 | os.environ['CUDA_VISIBLE_DEVICES'] = '3'
 11 | # changed configuration to this instead of argparse for easier interaction
 12 | CUDA = True
 13 | SEED = 1
 14 | BATCH_SIZE = 128
 15 | LOG_INTERVAL = 10
 16 | EPOCHS = 10
 17 | no_of_sample = 10
 18 | 
 19 | # connections through the autoencoder bottleneck
 20 | # in the pytorch VAE example, this is 20
 21 | ZDIMS = 20
 22 | 
 23 | torch.manual_seed(SEED)
 24 | if CUDA:
 25 |     torch.cuda.manual_seed(SEED)
 26 | 
 27 | # DataLoader instances will load tensors directly into GPU memory
 28 | kwargs = {'num_workers': 1, 'pin_memory': True} if CUDA else {}
 29 | 
 30 | # Download or load downloaded MNIST dataset
 31 | # shuffle data at every epoch
 32 | train_loader = torch.utils.data.DataLoader(datasets.MNIST('./mnist', train=True, download=True,transform=transforms.ToTensor()),
 33 |     batch_size=BATCH_SIZE, shuffle=True, **kwargs)
 34 | 
 35 | # Same for test data
 36 | test_loader = torch.utils.data.DataLoader(datasets.MNIST('./mnist', train=False, transform=transforms.ToTensor()),
 37 |     batch_size=BATCH_SIZE, shuffle=True, **kwargs)
 38 | 
 39 | 
 40 | class VAE(nn.Module):
 41 |     def __init__(self):
 42 |         super(VAE, self).__init__()
 43 | 
 44 |         self.conv1 = nn.Conv2d(in_channels=1, out_channels=64, kernel_size=(4, 4), padding=(15, 15),
 45 |                                stride=2)  # This padding keeps the size of the image same, i.e. same padding
 46 |         self.conv2 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=(4, 4), padding=(15, 15), stride=2)
 47 |         self.fc11 = nn.Linear(in_features=128 * 28 * 28, out_features=1024)
 48 |         self.fc12 = nn.Linear(in_features=1024, out_features=ZDIMS)
 49 | 
 50 |         self.fc21 = nn.Linear(in_features=128 * 28 * 28, out_features=1024)
 51 |         self.fc22 = nn.Linear(in_features=1024, out_features=ZDIMS)
 52 |         self.relu = nn.ReLU()
 53 | 
 54 |         # For decoder
 55 | 
 56 |         # For mu
 57 |         self.fc1 = nn.Linear(in_features=20, out_features=1024)
 58 |         self.fc2 = nn.Linear(in_features=1024, out_features=7 * 7 * 128)
 59 |         self.conv_t1 = nn.ConvTranspose2d(in_channels=128, out_channels=64, kernel_size=4, padding=1, stride=2)
 60 |         self.conv_t2 = nn.ConvTranspose2d(in_channels=64, out_channels=1, kernel_size=4, padding=1, stride=2)
 61 | 
 62 | 
 63 | 
 64 |     def encode(self, x: Variable) -> (Variable, Variable):
 65 | 
 66 |         x = x.view(-1, 1, 28, 28)
 67 |         x = F.elu(self.conv1(x))
 68 |         x = F.elu(self.conv2(x))
 69 |         x = x.view(-1, 128 * 28 * 28)
 70 | 
 71 |         mu_z = F.elu(self.fc11(x))
 72 |         mu_z = self.fc12(mu_z)
 73 | 
 74 |         logvar_z = F.elu(self.fc21(x))
 75 |         logvar_z = self.fc22(logvar_z)
 76 | 
 77 |         return mu_z, logvar_z
 78 | 
 79 | 
 80 |     def reparameterize(self, mu: Variable, logvar: Variable) -> Variable:
 81 | 
 82 | 
 83 |         if self.training:
 84 |             # multiply log variance with 0.5, then in-place exponent
 85 |             # yielding the standard deviation
 86 | 
 87 |             sample_z = []
 88 |             for _ in range(no_of_sample):
 89 |                 std = logvar.mul(0.5).exp_()  # type: Variable
 90 |                 eps = Variable(std.data.new(std.size()).normal_())
 91 |                 sample_z.append(eps.mul(std).add_(mu))
 92 | 
 93 |             return sample_z
 94 | 
 95 |         else:
 96 |             # During inference, we simply spit out the mean of the
 97 |             # learned distribution for the current input.  We could
 98 |             # use a random sample from the distribution, but mu of
 99 |             # course has the highest probability.
100 |             return mu
101 | 
102 |     def decode(self, z: Variable) -> Variable:
103 | 
104 |         x = F.elu(self.fc1(z))
105 |         x = F.elu(self.fc2(x))
106 |         x = x.view(-1, 128, 7, 7)
107 |         x = F.relu(self.conv_t1(x))
108 |         x = F.sigmoid(self.conv_t2(x))
109 | 
110 |         return x.view(-1, 784)
111 | 
112 | 
113 |     def forward(self, x: Variable) -> (Variable, Variable, Variable):
114 |         mu, logvar = self.encode(x.view(-1, 784))
115 |         z = self.reparameterize(mu, logvar)
116 |         if self.training:
117 |             return [self.decode(z) for z in z], mu, logvar
118 |         else:
119 |             return self.decode(z), mu, logvar
120 |         # return self.decode(z), mu, logvar
121 | 
122 |     def loss_function(self, recon_x, x, mu, logvar) -> Variable:
123 |         # how well do input x and output recon_x agree?
124 | 
125 |         if self.training:
126 |             BCE = 0
127 |             for recon_x_one in recon_x:
128 |                 BCE += F.binary_cross_entropy(recon_x_one, x.view(-1, 784))
129 |             BCE /= len(recon_x)
130 |         else:
131 |             BCE = F.binary_cross_entropy(recon_x, x.view(-1, 784))
132 | 
133 |         # KLD is Kullback–Leibler divergence -- how much does one learned
134 |         # distribution deviate from another, in this specific case the
135 |         # learned distribution from the unit Gaussian
136 | 
137 |         # see Appendix B from VAE paper:
138 |         # Kingma and Welling. Auto-Encoding Variational Bayes. ICLR, 2014
139 |         # https://arxiv.org/abs/1312.6114
140 |         # - D_{KL} = 0.5 * sum(1 + log(sigma^2) - mu^2 - sigma^2)
141 |         # note the negative D_{KL} in appendix B of the paper
142 |         KLD = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp())
143 |         # Normalise by same number of elements as in reconstruction
144 |         KLD /= BATCH_SIZE * 784
145 | 
146 | 
147 |         return BCE + KLD
148 | 
149 | 
150 | model = VAE()
151 | if CUDA:
152 |     model.cuda()
153 | 
154 | 
155 | 
156 | 
157 | 
158 | optimizer = optim.Adam(model.parameters(), lr=1e-3)
159 | 
160 | 
161 | def train(epoch):
162 |     # toggle model to train mode
163 |     model.train()
164 |     train_loss = 0
165 |     # in the case of MNIST, len(train_loader.dataset) is 60000
166 |     # each `data` is of BATCH_SIZE samples and has shape [128, 1, 28, 28]
167 |     for batch_idx, (data, _) in enumerate(train_loader):
168 |         data = Variable(data)
169 |         if CUDA:
170 |             data = data.cuda()
171 |         optimizer.zero_grad()
172 | 
173 |         # push whole batch of data through VAE.forward() to get recon_loss
174 |         recon_batch, mu, logvar = model(data)
175 |         # calculate scalar loss
176 |         loss = model.loss_function(recon_batch, data, mu, logvar)
177 |         # calculate the gradient of the loss w.r.t. the graph leaves
178 |         # i.e. input variables -- by the power of pytorch!
179 |         loss.backward()
180 |         train_loss += loss.data[0]
181 |         optimizer.step()
182 |         if batch_idx % LOG_INTERVAL == 0:
183 |             print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(epoch, batch_idx * len(data), len(train_loader.dataset),
184 |                        100. * batch_idx / len(train_loader),
185 |                        loss.data[0] / len(data)))
186 | 
187 |     print('====> Epoch: {} Average loss: {:.4f}'.format(epoch, train_loss / len(train_loader.dataset)))
188 | 
189 | 
190 | def test(epoch):
191 |     model.eval()
192 |     test_loss = 0
193 | 
194 |     # each data is of BATCH_SIZE (default 128) samples
195 |     for i, (data, _) in enumerate(test_loader):
196 |         if CUDA:
197 |             # make sure this lives on the GPU
198 |             data = data.cuda()
199 | 
200 |         # we're only going to infer, so no autograd at all required: volatile=True
201 |         data = Variable(data, volatile=True)
202 |         recon_batch, mu, logvar = model(data)
203 |         test_loss += model.loss_function(recon_batch, data, mu, logvar).data[0]
204 |         if i == 0:
205 |             n = min(data.size(0), 8)
206 |             # for the first 128 batch of the epoch, show the first 8 input digits
207 |             # with right below them the reconstructed output digits
208 |             comparison = torch.cat([data[:n],
209 |                                     recon_batch.view(BATCH_SIZE, 1, 28, 28)[:n]])
210 |             save_image(comparison.data.cpu(),
211 |                        './mnist/reconstruction_' + str(epoch) + '.png', nrow=n)
212 | 
213 |     test_loss /= len(test_loader.dataset)
214 |     print('====> Test set loss: {:.4f}'.format(test_loss))
215 | 
216 | 
217 | if __name__ == "__main__":
218 |     for epoch in range(1, EPOCHS + 1):
219 |         train(epoch)
220 |         test(epoch)
221 | 
222 |         # 64 sets of random ZDIMS-float vectors, i.e. 64 locations / MNIST
223 |         # digits in latent space
224 |         sample = Variable(torch.randn(64, ZDIMS))
225 |         if CUDA:
226 |             sample = sample.cuda()
227 |         sample = model.decode(sample).cpu()
228 | 
229 |         # save out as an 8x8 matrix of MNIST digits
230 |         # this will give you a visual idea of how well latent space can generate things
231 |         # that look like digits
232 |         save_image(sample.data.view(64, 1, 28, 28),'./mnist/reconstruction' + str(epoch) + '.png')


--------------------------------------------------------------------------------
/VAE_CNN_Gaussianloss.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import torch
  3 | import torch.utils.data
  4 | from torch import nn, optim
  5 | import torch.nn.init as init
  6 | from torch.autograd import Variable
  7 | from torch.nn import functional as F
  8 | from torchvision import datasets, transforms
  9 | from torchvision.utils import save_image
 10 | 
 11 | os.environ['CUDA_VISIBLE_DEVICES'] = '0'
 12 | # changed configuration to this instead of argparse for easier interaction
 13 | CUDA = True
 14 | SEED = 1
 15 | BATCH_SIZE = 128
 16 | LOG_INTERVAL = 10
 17 | EPOCHS = 50
 18 | no_of_sample = 10
 19 | 
 20 | # connections through the autoencoder bottleneck
 21 | ZDIMS = 20
 22 | 
 23 | 
 24 | 
 25 | torch.manual_seed(SEED)
 26 | if CUDA:
 27 |     torch.cuda.manual_seed(SEED)
 28 | 
 29 | # DataLoader instances will load tensors directly into GPU memory
 30 | kwargs = {'num_workers': 1, 'pin_memory': True} if CUDA else {}
 31 | 
 32 | # Download or load downloaded MNIST dataset
 33 | # shuffle data at every epoch
 34 | train_loader = torch.utils.data.DataLoader(datasets.MNIST('./mnist', train=True, download=True,transform=transforms.ToTensor()),
 35 |     batch_size=BATCH_SIZE, shuffle=True, **kwargs)
 36 | 
 37 | # Same for test data
 38 | test_loader = torch.utils.data.DataLoader(datasets.MNIST('./mnist', train=False, transform=transforms.ToTensor()),
 39 |     batch_size=BATCH_SIZE, shuffle=True, **kwargs)
 40 | 
 41 | 
 42 | class VAE(nn.Module):
 43 |     def __init__(self):
 44 |         super(VAE, self).__init__()
 45 | 
 46 |         self.conv1 = nn.Conv2d(in_channels=1, out_channels=64, kernel_size=(4, 4), padding=(15, 15),
 47 |                                stride=2)  # This padding keeps the size of the image same, i.e. same padding
 48 |         self.conv2 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=(4, 4), padding=(15, 15), stride=2)
 49 |         self.fc11 = nn.Linear(in_features=128 * 28 * 28, out_features=1024)
 50 |         self.fc12 = nn.Linear(in_features=1024, out_features=ZDIMS)
 51 | 
 52 |         self.fc21 = nn.Linear(in_features=128 * 28 * 28, out_features=1024)
 53 |         self.fc22 = nn.Linear(in_features=1024, out_features=ZDIMS)
 54 |         self.relu = nn.ReLU()
 55 | 
 56 |         # For decoder
 57 | 
 58 |         # For mu
 59 |         self.fc1 = nn.Linear(in_features=20, out_features=1024)
 60 |         self.fc2 = nn.Linear(in_features=1024, out_features=7 * 7 * 128)
 61 |         self.conv_t11 = nn.ConvTranspose2d(in_channels=128, out_channels=64, kernel_size=4, padding=1, stride=2)
 62 |         self.conv_t12 = nn.ConvTranspose2d(in_channels=64, out_channels=1, kernel_size=4, padding=1, stride=2)
 63 | 
 64 |         self.conv_t21 = nn.ConvTranspose2d(in_channels=128, out_channels=64, kernel_size=4, padding=1, stride=2)
 65 |         self.conv_t22 = nn.ConvTranspose2d(in_channels=64, out_channels=1, kernel_size=4, padding=1, stride=2)
 66 | 
 67 |         #Parameter initialization
 68 |         # for m in self.modules():
 69 |         #
 70 |         #     if isinstance(m, nn.Conv2d) or isinstance(m, nn.Linear):
 71 |         #         #init.xavier_normal(m.weight.data, gain=nn.init.calculate_gain('relu'))
 72 |         #         init.xavier_uniform(m.weight.data, gain=nn.init.calculate_gain('relu'))
 73 |         #         #init.kaiming_uniform(m.weight.data)
 74 |         #         init.constant(m.bias, .1)
 75 |         #
 76 |         #     elif isinstance(m, nn.BatchNorm2d):
 77 |         #         m.weight.data.fill_(1)
 78 |         #         m.bias.data.zero_()
 79 | 
 80 |     def encode(self, x: Variable) -> (Variable, Variable):
 81 | 
 82 |         x = x.view(-1, 1, 28, 28)
 83 |         x = F.elu(self.conv1(x))
 84 |         x = F.elu(self.conv2(x))
 85 |         x = x.view(-1, 128 * 28 * 28)
 86 | 
 87 |         mu_z = F.elu(self.fc11(x))
 88 |         mu_z = self.fc12(mu_z)
 89 | 
 90 |         logvar_z = F.elu(self.fc21(x))
 91 |         logvar_z = self.fc22(logvar_z)
 92 | 
 93 |         return mu_z, logvar_z
 94 | 
 95 |     def reparameterize(self, mu: Variable, logvar: Variable) -> list:
 96 |         """THE REPARAMETERIZATION IDEA:
 97 | 
 98 |         For each training sample (we get 128 batched at a time)
 99 | 
100 |         - take the current learned mu, stddev for each of the ZDIMS
101 |           dimensions and draw a random sample from that distribution
102 |         - the whole network is trained so that these randomly drawn
103 |           samples decode to output that looks like the input
104 |         - which will mean that the std, mu will be learned
105 |           *distributions* that correctly encode the inputs
106 |         - due to the additional KLD term (see loss_function() below)
107 |           the distribution will tend to unit Gaussians
108 | 
109 |         Parameters
110 |         ----------
111 |         mu : [128, ZDIMS] mean matrix
112 |         logvar : [128, ZDIMS] variance matrix
113 | 
114 |         Returns
115 |         -------
116 | 
117 |         During training random sample from the learned ZDIMS-dimensional
118 |         normal distribution; during inference its mean.
119 | 
120 |         """
121 | 
122 |         if self.training:
123 |             # multiply log variance with 0.5, then in-place exponent
124 |             # yielding the standard deviation
125 | 
126 |             sample_z = []
127 |             for _ in range(no_of_sample):
128 |                 std = logvar.mul(0.5).exp_()  # type: Variable
129 |                 # - std.data is the [128,ZDIMS] tensor that is wrapped by std
130 |                 # - so eps is [128,ZDIMS] with all elements drawn from a mean 0
131 |                 #   and stddev 1 normal distribution that is 128 samples
132 |                 #   of random ZDIMS-float vectors
133 |                 eps = Variable(std.data.new(std.size()).normal_())
134 |                 # - sample from a normal distribution with standard
135 |                 #   deviation = std and mean = mu by multiplying mean 0
136 |                 #   stddev 1 sample with desired std and mu, see
137 |                 #   https://stats.stackexchange.com/a/16338
138 |                 # - so we have 128 sets (the batch) of random ZDIMS-float
139 |                 #   vectors sampled from normal distribution with learned
140 |                 #   std and mu for the current input
141 |                 sample_z.append(eps.mul(std).add_(mu))
142 | 
143 |             return sample_z
144 | 
145 |         else:
146 |             # During inference, we simply spit out the mean of the
147 |             # learned distribution for the current input.  We could
148 |             # use a random sample from the distribution, but mu of
149 |             # course has the highest probability.
150 |             return mu
151 | 
152 |     def decode(self, z: Variable) -> (Variable, Variable):
153 | 
154 |         x = F.elu(self.fc1(z))
155 |         x = F.elu(self.fc2(x))
156 |         x = x.view(-1, 128, 7, 7)
157 |         mu_x = F.relu(self.conv_t11(x))
158 |         mu_x = F.sigmoid(self.conv_t12(mu_x))
159 | 
160 |         logvar_x = F.relu(self.conv_t11(x))
161 |         logvar_x = F.sigmoid(self.conv_t12(logvar_x))
162 | 
163 |         return mu_x.view(-1, 784), logvar_x.view(-1, 784)
164 | 
165 |     def forward(self, x: Variable) -> (Variable, Variable, Variable):
166 |         mu, logvar = self.encode(x.view(-1, 784))
167 |         z = self.reparameterize(mu, logvar)
168 |         if self.training:
169 |             return [self.decode(z) for z in z], mu, logvar
170 |         else:
171 |             return self.decode(z), mu, logvar
172 | 
173 |     def loss_function(self, recon_x, x, mu, logvar) -> Variable:
174 |         # how well do input x and output recon_x agree?
175 | 
176 |         if self.training:
177 |             GLL = 0
178 |             x = x.view(-1, 784)
179 |             for recon_x_one in recon_x:
180 |                 mu_x, logvar_x = recon_x_one
181 |                 part1 = torch.sum(logvar_x) / BATCH_SIZE
182 |                 sigma = logvar_x.mul(0.5).exp_()
183 |                 part2 = torch.sum(((x - mu_x) / sigma) ** 2) / BATCH_SIZE
184 |                 GLL += .5 * (part1 + part2)
185 | 
186 |             GLL /= len(recon_x)
187 |         else:
188 |             x = x.view(-1, 784)
189 |             mu_x, logvar_x = recon_x
190 |             part1 = torch.sum(logvar_x) / BATCH_SIZE
191 |             sigma = logvar_x.mul(0.5).exp_()
192 |             part2 = torch.sum(((x - mu_x) / sigma) ** 2) / BATCH_SIZE
193 |             GLL = .5 * (part1 + part2)
194 | 
195 | 
196 | 
197 |         # see Appendix B from VAE paper:
198 |         # Kingma and Welling. Auto-Encoding Variational Bayes. ICLR, 2014
199 |         # https://arxiv.org/abs/1312.6114
200 |         # - D_{KL} = 0.5 * sum(1 + log(sigma^2) - mu^2 - sigma^2)
201 |         # note the negative D_{KL} in appendix B of the paper
202 |         KLD = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp())
203 |         # Normalise by same number of elements as in reconstruction
204 |         KLD /= BATCH_SIZE
205 | 
206 | 
207 |         return GLL + KLD
208 | 
209 | 
210 | model = VAE()
211 | if CUDA:
212 |     model.cuda()
213 | 
214 | # Dr Diederik Kingma: as if VAEs weren't enough, he also gave us Adam!
215 | optimizer = optim.Adam(model.parameters(), lr=1e-3)
216 | 
217 | 
218 | def train(epoch):
219 |     # toggle model to train mode
220 |     model.train()
221 |     train_loss = 0
222 |     # in the case of MNIST, len(train_loader.dataset) is 60000
223 |     # each `data` is of BATCH_SIZE samples and has shape [128, 1, 28, 28]
224 |     for batch_idx, (data, _) in enumerate(train_loader):
225 |         data = Variable(data)
226 |         if CUDA:
227 |             data = data.cuda()
228 |         optimizer.zero_grad()
229 | 
230 |         # push whole batch of data through VAE.forward() to get recon_loss
231 |         recon_batch, mu, logvar = model(data)
232 |         # calculate scalar loss
233 |         loss = model.loss_function(recon_batch, data, mu, logvar)
234 |         # calculate the gradient of the loss w.r.t. the graph leaves
235 |         # i.e. input variables -- by the power of pytorch!
236 |         loss.backward()
237 |         train_loss += loss.data[0]
238 |         optimizer.step()
239 |         if batch_idx % LOG_INTERVAL == 0:
240 |             print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
241 |                 epoch, batch_idx * len(data), len(train_loader.dataset),
242 |                        100. * batch_idx / len(train_loader),
243 |                        loss.data[0] / len(data)))
244 | 
245 |     print('====> Epoch: {} Average loss: {:.4f}'.format(epoch, train_loss / len(train_loader.dataset)))
246 | 
247 | 
248 | def test(epoch):
249 |     # toggle model to test / inference mode
250 |     model.eval()
251 |     test_loss = 0
252 | 
253 |     # each data is of BATCH_SIZE (default 128) samples
254 |     for i, (data, _) in enumerate(test_loader):
255 |         if CUDA:
256 |             # make sure this lives on the GPU
257 |             data = data.cuda()
258 | 
259 |         # we're only going to infer, so no autograd at all required: volatile=True
260 |         data = Variable(data, volatile=True)
261 |         recon_batch, mu, logvar = model(data)
262 |         test_loss += model.loss_function(recon_batch, data, mu, logvar).data[0]
263 | 
264 |         mu_batch, _ = recon_batch
265 |         if i == 0:
266 |             n = min(data.size(0), 8)
267 |             # for the first 128 batch of the epoch, show the first 8 input digits
268 |             # with right below them the reconstructed output digits
269 |             comparison = torch.cat([data[:n],
270 |                                     mu_batch.view(BATCH_SIZE, 1, 28, 28)[:n]])
271 |             save_image(comparison.data.cpu(),
272 |                        '/home/atin/data/new/results_gaussian/reconstruction_' + str(epoch) + '.png', nrow=n)
273 | 
274 |     test_loss /= len(test_loader.dataset)
275 |     print('====> Test set loss: {:.4f}'.format(test_loss))
276 | 
277 | 
278 | if __name__ == "__main__":
279 | 
280 |     for epoch in range(1, EPOCHS + 1):
281 |         train(epoch)
282 |         test(epoch)
283 | 
284 |         # 64 sets of random ZDIMS-float vectors, i.e. 64 locations / MNIST
285 |         # digits in latent space
286 |         sample = Variable(torch.randn(64, ZDIMS))
287 |         if CUDA:
288 |             sample = sample.cuda()
289 |         mu_sample, sigma_sample = model.decode(sample)
290 |         #     sample = model.decode(sample).cpu()
291 | 
292 |         # save out as an 8x8 matrix of MNIST digits
293 |         # this will give you a visual idea of how well latent space can generate things
294 |         # that look like digits
295 |         save_image(mu_sample.cpu().data.view(64, 1, 28, 28),'./mnist/reconstruction' + str(epoch) + '.png')


--------------------------------------------------------------------------------
/VAE_celeba.py:
--------------------------------------------------------------------------------
  1 | #Create a folder called celeba in home dir where reconstructed images will be stored
  2 | #Considered only 100000 images for training
  3 | 
  4 | import os
  5 | import torch
  6 | import torch.utils.data
  7 | from torch import nn, optim
  8 | from torch.autograd import Variable
  9 | from torch.nn import functional as F
 10 | from torchvision import datasets, transforms
 11 | from torchvision.utils import save_image
 12 | from torch.utils.data import Dataset, DataLoader
 13 | from PIL import Image
 14 | import numpy as np
 15 | import matplotlib.pyplot as plt
 16 | from scipy.misc import imresize
 17 | 
 18 | import sys
 19 | import warnings
 20 | if not sys.warnoptions:
 21 |     warnings.simplefilter("ignore")
 22 | os.environ['CUDA_VISIBLE_DEVICES'] = '1'
 23 | no_of_sample = 10
 24 | CUDA = True
 25 | BATCH_SIZE = 32
 26 | LOG_INTERVAL = 5
 27 | 
 28 | 
 29 | class CelebaDataset(Dataset):
 30 | 
 31 |     def __init__(self, root_dir, im_name_list, resize_dim, transform=None):
 32 |         self.root_dir = root_dir
 33 |         self.im_list = im_name_list
 34 |         self.resize_dim = resize_dim
 35 |         self.transform = transform
 36 | 
 37 |     def __len__(self):
 38 |         return len(self.im_list)
 39 | 
 40 |     def __getitem__(self, idx):
 41 |         im = Image.open(os.path.join(self.root_dir, self.im_list[idx]))
 42 |         im = np.array(im)
 43 |         im = imresize(im, self.resize_dim, interp='nearest')
 44 |         im = im / 255
 45 | 
 46 |         if self.transform:
 47 |             im = self.transform(im)
 48 | 
 49 |         return im
 50 | 
 51 | class ToTensor(object):
 52 |     """Convert ndarrays in sample to Tensors. numpy image: H x W x C, torch image: C X H X W
 53 |     """
 54 | 
 55 |     def __call__(self, image, invert_arrays=True):
 56 | 
 57 |         if invert_arrays:
 58 |             image = image.transpose((2, 0, 1))
 59 | 
 60 |         return torch.from_numpy(image)
 61 | 
 62 | 
 63 | class Conv_Block(nn.Module):
 64 |     def __init__(self, in_channels, out_channels, kernel_size, padding, stride, pool_kernel_size=(2, 2)):
 65 |         super(Conv_Block, self).__init__()
 66 |         self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size, padding, stride)
 67 |         self.conv2 = nn.Conv2d(out_channels, out_channels, kernel_size, padding, stride)
 68 |         self.pool = nn.MaxPool2d(pool_kernel_size)
 69 | 
 70 |     def forward(self, x):
 71 |         x = F.elu(self.conv1(x))
 72 |         x = F.elu(self.conv2(x))
 73 |         x = self.pool(x)
 74 | 
 75 |         return x
 76 | 
 77 | 
 78 | class VAE(nn.Module):
 79 |     def __init__(self):
 80 |         super(VAE, self).__init__()
 81 |         # Encoder
 82 |         self.block1 = Conv_Block(3, 64, (3, 3), 1, 1)  # 64
 83 |         self.block2 = Conv_Block(64, 128, (3, 3), 1, 1)  # 32
 84 |         self.block3 = Conv_Block(128, 256, (3, 3), 1, 1)  # 16
 85 |         self.block4 = Conv_Block(256, 32, (3, 3), 1, 1)  # 8
 86 | 
 87 |         # Decoder
 88 |         self.fct_decode = nn.Sequential(
 89 |             nn.Conv2d(16, 64, (3, 3), padding=1),
 90 |             nn.ELU(),
 91 |             nn.Upsample(scale_factor=2, mode='nearest'),  # 16
 92 |             nn.Conv2d(64, 64, (3, 3), padding=1),
 93 |             nn.ELU(),
 94 |             nn.Upsample(scale_factor=2, mode='nearest'),  # 32
 95 |             nn.Conv2d(64, 64, (3, 3), padding=1),
 96 |             nn.ELU(),
 97 |             nn.Upsample(scale_factor=2, mode='nearest'),  # 64
 98 |             nn.Conv2d(64, 16, (3, 3), padding=1),
 99 |             nn.ELU(),
100 |             nn.Upsample(scale_factor=2, mode='nearest'),  # 128
101 |         )
102 | 
103 |         self.final_decod_mean = nn.Conv2d(16, 3, (3, 3), padding=1)
104 | 
105 |     def encode(self, x):
106 |         '''return mu_z and logvar_z'''
107 | 
108 |         x = F.elu(self.block1(x))
109 |         x = F.elu(self.block2(x))
110 |         x = F.elu(self.block3(x))
111 |         x = F.elu(self.block4(x))
112 | 
113 |         return x[:, :16, :, :], x[:, 16:, :, :]  # output shape - batch_size x 16 x 8 x 8
114 | 
115 |     def reparameterize(self, mu: Variable, logvar: Variable) -> Variable:
116 | 
117 |         if self.training:
118 |             # multiply log variance with 0.5, then in-place exponent
119 |             # yielding the standard deviation
120 | 
121 |             sample_z = []
122 |             for _ in range(no_of_sample):
123 |                 std = logvar.mul(0.5).exp_()  # type: Variable
124 |                 eps = Variable(std.data.new(std.size()).normal_())
125 |                 sample_z.append(eps.mul(std).add_(mu))
126 | 
127 |             return sample_z
128 | 
129 |         else:
130 |             return mu
131 | 
132 |     def decode(self, z):
133 | 
134 |         z = self.fct_decode(z)
135 |         z = self.final_decod_mean(z)
136 |         z = F.sigmoid(z)
137 | 
138 |         return z.view(-1, 3 * 128 * 128)
139 | 
140 |     def forward(self, x):
141 |         mu, logvar = self.encode(x)
142 |         z = self.reparameterize(mu, logvar)
143 |         if self.training:
144 |             return [self.decode(z) for z in z], mu, logvar
145 |         else:
146 |             return self.decode(z), mu, logvar
147 | 
148 |     def loss_function(self, recon_x, x, mu, logvar) -> Variable:
149 |         # how well do input x and output recon_x agree?
150 | 
151 |         if self.training:
152 |             BCE = 0
153 |             for recon_x_one in recon_x:
154 |                 BCE += F.binary_cross_entropy(recon_x_one, x.view(-1, 3 * 128 * 128))
155 |             BCE /= len(recon_x)
156 |         else:
157 |             BCE = F.binary_cross_entropy(recon_x, x.view(-1, 3 * 128 * 128))
158 | 
159 |         KLD = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp())
160 |         KLD /= BATCH_SIZE * 3 * 128 * 128
161 | 
162 |         return BCE + KLD
163 | 
164 | 
165 | def train(epoch, model, optimizer, train_loader):
166 |     # toggle model to train mode
167 |     model.train()
168 |     train_loss = 0
169 |     # in the case of MNIST, len(train_loader.dataset) is 60000
170 |     # each `data` is of BATCH_SIZE samples and has shape [128, 1, 28, 28]
171 |     for batch_idx, data in enumerate(train_loader):
172 |         data = Variable(data.type(torch.FloatTensor))
173 |         if CUDA:
174 |             data = data.cuda()
175 |         optimizer.zero_grad()
176 | 
177 |         # push whole batch of data through VAE.forward() to get recon_loss
178 |         recon_batch, mu, logvar = model(data)
179 |         # calculate scalar loss
180 |         loss = model.loss_function(recon_batch, data, mu, logvar)
181 |         # calculate the gradient of the loss w.r.t. the graph leaves
182 |         # i.e. input variables -- by the power of pytorch!
183 |         loss.backward()
184 |         train_loss += loss.data[0]
185 |         optimizer.step()
186 |         if batch_idx % LOG_INTERVAL == 0:
187 |             print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(epoch, batch_idx * len(data),
188 |                                                                            len(train_loader.dataset),
189 |                                                                            100. * batch_idx / len(train_loader),
190 |                                                                            loss.data[0] / len(data)))
191 | 
192 |     print('====> Epoch: {} Average loss: {:.4f}'.format(epoch, train_loss / len(train_loader.dataset)))
193 | 
194 | 
195 | def test(epoch, model, test_loader):
196 |     model.eval()
197 |     test_loss = 0
198 | 
199 |     # each data is of BATCH_SIZE (default 128) samples
200 |     for i, data in enumerate(test_loader):
201 |         data = Variable(data.type(torch.FloatTensor), volatile=True)
202 |         if CUDA:
203 |             # make sure this lives on the GPU
204 |             data = data.cuda()
205 | 
206 |         # we're only going to infer, so no autograd at all required: volatile=True
207 | 
208 |         recon_batch, mu, logvar = model(data)
209 |         test_loss += model.loss_function(recon_batch, data, mu, logvar).data[0]
210 |         if i == 0:
211 |             n = min(data.size(0), 8)
212 |             # for the first 128 batch of the epoch, show the first 8 input digits
213 |             # with right below them the reconstructed output digits
214 |             comparison = torch.cat([data[:n],
215 |                                     recon_batch.view(BATCH_SIZE, 3, 128, 128)[:n]])
216 |             save_image(comparison.data.cpu(),
217 |                        './celeba/reconstruction_' + str(epoch) + '.png', nrow=n)
218 | 
219 |         # break #To save time
220 | 
221 |     test_loss /= len(test_loader.dataset)
222 |     print('====> Test set loss: {:.4f}'.format(test_loss))
223 | 
224 | if __name__ == "__main__":
225 | 
226 |     root_dir = "/home/atin/DeployedProjects/TestProject/img_align_celeba"
227 |     image_files = os.listdir(root_dir)
228 |     train_dataset = CelebaDataset(root_dir, image_files[:100000], (128, 128), transforms.Compose([ToTensor()]))
229 |     train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, num_workers=10, shuffle=True)
230 | 
231 |     #Take only 1000 images in test
232 |     test_dataset = CelebaDataset(root_dir, image_files[100000:101000], (128, 128), transforms.Compose([ToTensor()]))
233 |     test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, num_workers=10, shuffle=True)
234 | 
235 |     EPOCHS = 10
236 |     model = VAE()
237 |     if CUDA: model.cuda()
238 |     optimizer = optim.Adam(model.parameters(), lr=1e-3)
239 | 
240 |     for epoch in range(1, EPOCHS + 1):
241 |         train(epoch, model, optimizer, train_loader)
242 |         test(epoch, model, test_loader)
243 | 
244 |         # 64 sets of random ZDIMS-float vectors, i.e. 64 locations / MNIST
245 |         # digits in latent space
246 |         sample = Variable(torch.randn(64, 16, 8, 8))
247 |         if CUDA:
248 |             sample = sample.cuda()
249 |         sample = model.decode(sample).cpu()
250 | 
251 |         # save out as an 8x8 matrix of MNIST digits
252 |         # this will give you a visual idea of how well latent space can generate things
253 |         # that look like digits
254 |         save_image(sample.data.view(64, 3, 128, 128), './celeba/reconstruction' + str(epoch) + '.png')
255 | 
256 | 
257 | 
258 | 
259 | 
260 | 
261 | 


--------------------------------------------------------------------------------
/VAE_facebook.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import torch
  3 | import torch.utils.data
  4 | from torch import nn, optim
  5 | from torch.autograd import Variable
  6 | from torch.nn import functional as F
  7 | from torchvision import datasets, transforms
  8 | from torchvision.utils import save_image
  9 | 
 10 | # changed configuration to this instead of argparse for easier interaction
 11 | CUDA = False
 12 | SEED = 1
 13 | BATCH_SIZE = 128
 14 | LOG_INTERVAL = 10
 15 | EPOCHS = 10
 16 | 
 17 | # connections through the autoencoder bottleneck
 18 | # in the pytorch VAE example, this is 20
 19 | ZDIMS = 20
 20 | 
 21 | # I do this so that the MNIST dataset is downloaded where I want it
 22 | #os.chdir("/home/atin/")
 23 | 
 24 | torch.manual_seed(SEED)
 25 | if CUDA:
 26 |     torch.cuda.manual_seed(SEED)
 27 | 
 28 | # DataLoader instances will load tensors directly into GPU memory
 29 | kwargs = {'num_workers': 1, 'pin_memory': True} if CUDA else {}
 30 | 
 31 | # Download or load downloaded MNIST dataset
 32 | # shuffle data at every epoch
 33 | train_loader = torch.utils.data.DataLoader(
 34 |     datasets.MNIST('../data', train=True, download=True,
 35 |                    transform=transforms.ToTensor()),
 36 |     batch_size=BATCH_SIZE, shuffle=True, **kwargs)
 37 | 
 38 | # Same for test data
 39 | test_loader = torch.utils.data.DataLoader(
 40 |     datasets.MNIST('../data', train=False, transform=transforms.ToTensor()),
 41 |     batch_size=BATCH_SIZE, shuffle=True, **kwargs)
 42 | 
 43 | 
 44 | class VAE(nn.Module):
 45 |     def __init__(self):
 46 |         super(VAE, self).__init__()
 47 | 
 48 |         # ENCODER
 49 |         # 28 x 28 pixels = 784 input pixels, 400 outputs
 50 |         self.fc1 = nn.Linear(784, 400)
 51 |         # rectified linear unit layer from 400 to 400
 52 |         # max(0, x)
 53 |         self.relu = nn.ReLU()
 54 |         self.fc21 = nn.Linear(400, ZDIMS)  # mu layer
 55 |         self.fc22 = nn.Linear(400, ZDIMS)  # logvariance layer
 56 |         # this last layer bottlenecks through ZDIMS connections
 57 | 
 58 |         # DECODER
 59 |         # from bottleneck to hidden 400
 60 |         self.fc3 = nn.Linear(ZDIMS, 400)
 61 |         # from hidden 400 to 784 outputs
 62 |         self.fc4 = nn.Linear(400, 784)
 63 |         self.sigmoid = nn.Sigmoid()
 64 | 
 65 |     def encode(self, x: Variable) -> (Variable, Variable):
 66 |         """Input vector x -> fully connected 1 -> ReLU -> (fully connected
 67 |         21, fully connected 22)
 68 | 
 69 |         Parameters
 70 |         ----------
 71 |         x : [128, 784] matrix; 128 digits of 28x28 pixels each
 72 | 
 73 |         Returns
 74 |         -------
 75 | 
 76 |         (mu, logvar) : ZDIMS mean units one for each latent dimension, ZDIMS
 77 |             variance units one for each latent dimension
 78 | 
 79 |         """
 80 | 
 81 |         # h1 is [128, 400]
 82 |         h1 = self.relu(self.fc1(x))  # type: Variable
 83 |         return self.fc21(h1), self.fc22(h1)
 84 | 
 85 |     def reparameterize(self, mu: Variable, logvar: Variable) -> Variable:
 86 |         """THE REPARAMETERIZATION IDEA:
 87 | 
 88 |         For each training sample (we get 128 batched at a time)
 89 | 
 90 |         - take the current learned mu, stddev for each of the ZDIMS
 91 |           dimensions and draw a random sample from that distribution
 92 |         - the whole network is trained so that these randomly drawn
 93 |           samples decode to output that looks like the input
 94 |         - which will mean that the std, mu will be learned
 95 |           *distributions* that correctly encode the inputs
 96 |         - due to the additional KLD term (see loss_function() below)
 97 |           the distribution will tend to unit Gaussians
 98 | 
 99 |         Parameters
100 |         ----------
101 |         mu : [128, ZDIMS] mean matrix
102 |         logvar : [128, ZDIMS] variance matrix
103 | 
104 |         Returns
105 |         -------
106 | 
107 |         During training random sample from the learned ZDIMS-dimensional
108 |         normal distribution; during inference its mean.
109 | 
110 |         """
111 | 
112 |         if self.training:
113 |             # multiply log variance with 0.5, then in-place exponent
114 |             # yielding the standard deviation
115 |             std = logvar.mul(0.5).exp_()  # type: Variable
116 |             # - std.data is the [128,ZDIMS] tensor that is wrapped by std
117 |             # - so eps is [128,ZDIMS] with all elements drawn from a mean 0
118 |             #   and stddev 1 normal distribution that is 128 samples
119 |             #   of random ZDIMS-float vectors
120 |             eps = Variable(std.data.new(std.size()).normal_())
121 |             # - sample from a normal distribution with standard
122 |             #   deviation = std and mean = mu by multiplying mean 0
123 |             #   stddev 1 sample with desired std and mu, see
124 |             #   https://stats.stackexchange.com/a/16338
125 |             # - so we have 128 sets (the batch) of random ZDIMS-float
126 |             #   vectors sampled from normal distribution with learned
127 |             #   std and mu for the current input
128 |             return eps.mul(std).add_(mu)
129 | 
130 |         else:
131 |             # During inference, we simply spit out the mean of the
132 |             # learned distribution for the current input.  We could
133 |             # use a random sample from the distribution, but mu of
134 |             # course has the highest probability.
135 |             return mu
136 | 
137 |     def decode(self, z: Variable) -> Variable:
138 |         h3 = self.relu(self.fc3(z))
139 |         return self.sigmoid(self.fc4(h3))
140 | 
141 |     def forward(self, x: Variable) -> (Variable, Variable, Variable):
142 |         mu, logvar = self.encode(x.view(-1, 784))
143 |         z = self.reparameterize(mu, logvar)
144 |         return self.decode(z), mu, logvar
145 | 
146 | 
147 | model = VAE()
148 | if CUDA:
149 |     model.cuda()
150 | 
151 | 
152 | def loss_function(recon_x, x, mu, logvar) -> Variable:
153 |     # how well do input x and output recon_x agree?
154 |     BCE = F.binary_cross_entropy(recon_x, x.view(-1, 784))
155 | 
156 |     # KLD is Kullback–Leibler divergence -- how much does one learned
157 |     # distribution deviate from another, in this specific case the
158 |     # learned distribution from the unit Gaussian
159 | 
160 |     # see Appendix B from VAE paper:
161 |     # Kingma and Welling. Auto-Encoding Variational Bayes. ICLR, 2014
162 |     # https://arxiv.org/abs/1312.6114
163 |     # - D_{KL} = 0.5 * sum(1 + log(sigma^2) - mu^2 - sigma^2)
164 |     # note the negative D_{KL} in appendix B of the paper
165 |     KLD = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp())
166 |     # Normalise by same number of elements as in reconstruction
167 |     KLD /= BATCH_SIZE * 784
168 | 
169 |     # BCE tries to make our reconstruction as accurate as possible
170 |     # KLD tries to push the distributions as close as possible to unit Gaussian
171 |     return BCE + KLD
172 | 
173 | # Dr Diederik Kingma: as if VAEs weren't enough, he also gave us Adam!
174 | optimizer = optim.Adam(model.parameters(), lr=1e-3)
175 | 
176 | 
177 | def train(epoch):
178 |     # toggle model to train mode
179 |     model.train()
180 |     train_loss = 0
181 |     # in the case of MNIST, len(train_loader.dataset) is 60000
182 |     # each `data` is of BATCH_SIZE samples and has shape [128, 1, 28, 28]
183 |     for batch_idx, (data, _) in enumerate(train_loader):
184 |         data = Variable(data)
185 |         if CUDA:
186 |             data = data.cuda()
187 |         optimizer.zero_grad()
188 | 
189 |         # push whole batch of data through VAE.forward() to get recon_loss
190 |         recon_batch, mu, logvar = model(data)
191 |         # calculate scalar loss
192 |         loss = loss_function(recon_batch, data, mu, logvar)
193 |         # calculate the gradient of the loss w.r.t. the graph leaves
194 |         # i.e. input variables -- by the power of pytorch!
195 |         loss.backward()
196 |         train_loss += loss.data[0]
197 |         optimizer.step()
198 |         if batch_idx % LOG_INTERVAL == 0:
199 |             print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
200 |                 epoch, batch_idx * len(data), len(train_loader.dataset),
201 |                 100. * batch_idx / len(train_loader),
202 |                 loss.data[0] / len(data)))
203 | 
204 |     print('====> Epoch: {} Average loss: {:.4f}'.format(
205 |           epoch, train_loss / len(train_loader.dataset)))
206 | 
207 | 
208 | def test(epoch):
209 |     # toggle model to test / inference mode
210 |     model.eval()
211 |     test_loss = 0
212 | 
213 |     # each data is of BATCH_SIZE (default 128) samples
214 |     for i, (data, _) in enumerate(test_loader):
215 |         if CUDA:
216 |             # make sure this lives on the GPU
217 |             data = data.cuda()
218 | 
219 |         # we're only going to infer, so no autograd at all required: volatile=True
220 |         data = Variable(data, volatile=True)
221 |         recon_batch, mu, logvar = model(data)
222 |         test_loss += loss_function(recon_batch, data, mu, logvar).data[0]
223 |         if i == 0:
224 |           n = min(data.size(0), 8)
225 |           # for the first 128 batch of the epoch, show the first 8 input digits
226 |           # with right below them the reconstructed output digits
227 |           comparison = torch.cat([data[:n],
228 |                                   recon_batch.view(BATCH_SIZE, 1, 28, 28)[:n]])
229 |           save_image(comparison.data.cpu(),
230 |                      'results/reconstruction_' + str(epoch) + '.png', nrow=n)
231 | 
232 |     test_loss /= len(test_loader.dataset)
233 |     print('====> Test set loss: {:.4f}'.format(test_loss))
234 | 
235 | 
236 | 
237 | 
238 | if __name__ =="__main__":
239 | 
240 |     for epoch in range(1, EPOCHS + 1):
241 |         train(epoch)
242 |         test(epoch)
243 | 
244 |         # 64 sets of random ZDIMS-float vectors, i.e. 64 locations / MNIST
245 |         # digits in latent space
246 |         sample = Variable(torch.randn(64, ZDIMS))
247 |         if CUDA:
248 |             sample = sample.cuda()
249 |         sample = model.decode(sample).cpu()
250 | 
251 |         # save out as an 8x8 matrix of MNIST digits
252 |         # this will give you a visual idea of how well latent space can generate things
253 |         # that look like digits
254 |         save_image(sample.data.view(64, 1, 28, 28),
255 |                    'results/sample_' + str(epoch) + '.png')
256 | 
257 | 
258 | # from __future__ import print_function
259 | # import argparse
260 | # import torch
261 | # import torch.utils.data
262 | # from torch import nn, optim
263 | # from torch.autograd import Variable
264 | # from torch.nn import functional as F
265 | # from torchvision import datasets, transforms
266 | # from torchvision.utils import save_image
267 | #
268 | #
269 | # parser = argparse.ArgumentParser(description='VAE MNIST Example')
270 | # parser.add_argument('--batch-size', type=int, default=128, metavar='N',
271 | #                     help='input batch size for training (default: 128)')
272 | # parser.add_argument('--epochs', type=int, default=10, metavar='N',
273 | #                     help='number of epochs to train (default: 10)')
274 | # parser.add_argument('--no-cuda', action='store_true', default=False,
275 | #                     help='enables CUDA training')
276 | # parser.add_argument('--seed', type=int, default=1, metavar='S',
277 | #                     help='random seed (default: 1)')
278 | # parser.add_argument('--log-interval', type=int, default=10, metavar='N',
279 | #                     help='how many batches to wait before logging training status')
280 | # args = parser.parse_args()
281 | # args.cuda = not args.no_cuda and torch.cuda.is_available()
282 | #
283 | #
284 | # torch.manual_seed(args.seed)
285 | # if args.cuda:
286 | #     torch.cuda.manual_seed(args.seed)
287 | #
288 | #
289 | # kwargs = {'num_workers': 1, 'pin_memory': True} if args.cuda else {}
290 | # train_loader = torch.utils.data.DataLoader(
291 | #     datasets.MNIST('../data', train=True, download=True,
292 | #                    transform=transforms.ToTensor()),
293 | #     batch_size=args.batch_size, shuffle=True, **kwargs)
294 | # test_loader = torch.utils.data.DataLoader(
295 | #     datasets.MNIST('../data', train=False, transform=transforms.ToTensor()),
296 | #     batch_size=args.batch_size, shuffle=True, **kwargs)
297 | #
298 | #
299 | # class VAE(nn.Module):
300 | #     def __init__(self):
301 | #         super(VAE, self).__init__()
302 | #
303 | #         self.fc1 = nn.Linear(784, 400)
304 | #         self.fc21 = nn.Linear(400, 20)
305 | #         self.fc22 = nn.Linear(400, 20)
306 | #
307 | #
308 | #         self.fc3 = nn.Linear(20, 400)
309 | #         self.fc4 = nn.Linear(400, 784)
310 | #
311 | #         self.relu = nn.ReLU()
312 | #         self.sigmoid = nn.Sigmoid()
313 | #
314 | #     def encode(self, x):
315 | #         h1 = self.relu(self.fc1(x))
316 | #         return self.fc21(h1), self.fc22(h1)
317 | #
318 | #     def reparameterize(self, mu, logvar):
319 | #         if self.training:
320 | #             std = logvar.mul(0.5).exp_()
321 | #             eps = Variable(std.data.new(std.size()).normal_())
322 | #             return eps.mul(std).add_(mu)
323 | #         else:
324 | #             return mu
325 | #
326 | #     def decode(self, z):
327 | #         h3 = self.relu(self.fc3(z))
328 | #         return self.sigmoid(self.fc4(h3))
329 | #
330 | #     def forward(self, x):
331 | #         mu, logvar = self.encode(x.view(-1, 784))
332 | #         z = self.reparameterize(mu, logvar)
333 | #         return self.decode(z), mu, logvar
334 | #
335 | #
336 | # model = VAE()
337 | # if args.cuda:
338 | #     model.cuda()
339 | # optimizer = optim.Adam(model.parameters(), lr=1e-3)
340 | #
341 | # # Reconstruction + KL divergence losses summed over all elements and batch
342 | # def loss_function(recon_x, x, mu, logvar):
343 | #     BCE = F.binary_cross_entropy(recon_x, x.view(-1, 784), size_average=False)
344 | #
345 | #     # see Appendix B from VAE paper:
346 | #     # Kingma and Welling. Auto-Encoding Variational Bayes. ICLR, 2014
347 | #     # https://arxiv.org/abs/1312.6114
348 | #     # 0.5 * sum(1 + log(sigma^2) - mu^2 - sigma^2)
349 | #     KLD = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp())
350 | #
351 | #     return BCE + KLD
352 | #
353 | #
354 | # def train(epoch):
355 | #     model.train()
356 | #     train_loss = 0
357 | #     for batch_idx, (data, _) in enumerate(train_loader):
358 | #         data = Variable(data)
359 | #         if args.cuda:
360 | #             data = data.cuda()
361 | #         optimizer.zero_grad()
362 | #         recon_batch, mu, logvar = model(data)
363 | #         loss = loss_function(recon_batch, data, mu, logvar)
364 | #         loss.backward()
365 | #         train_loss += loss.data[0]
366 | #         optimizer.step()
367 | #         if batch_idx % args.log_interval == 0:
368 | #             print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
369 | #                 epoch, batch_idx * len(data), len(train_loader.dataset),
370 | #                 100. * batch_idx / len(train_loader),
371 | #                 loss.data[0] / len(data)))
372 | #
373 | #     print('====> Epoch: {} Average loss: {:.4f}'.format(
374 | #           epoch, train_loss / len(train_loader.dataset)))
375 | #
376 | #
377 | # def test(epoch):
378 | #     model.eval()
379 | #     test_loss = 0
380 | #     for i, (data, _) in enumerate(test_loader):
381 | #         if args.cuda:
382 | #             data = data.cuda()
383 | #         data = Variable(data, volatile=True)
384 | #         recon_batch, mu, logvar = model(data)
385 | #         test_loss += loss_function(recon_batch, data, mu, logvar).data[0]
386 | #         if i == 0:
387 | #             n = min(data.size(0), 8)
388 | #             comparison = torch.cat([data[:n],
389 | #                                   recon_batch.view(args.batch_size, 1, 28, 28)[:n]])
390 | #             save_image(comparison.data.cpu(),
391 | #                      'results/reconstruction_' + str(epoch) + '.png', nrow=n)
392 | #
393 | #     test_loss /= len(test_loader.dataset)
394 | #     print('====> Test set loss: {:.4f}'.format(test_loss))
395 | #
396 | #
397 | # for epoch in range(1, args.epochs + 1):
398 | #     train(epoch)
399 | #     test(epoch)
400 | #     sample = Variable(torch.randn(64, 20))
401 | #     if args.cuda:
402 | #         sample = sample.cuda()
403 | #     sample = model.decode(sample).cpu()
404 | #     save_image(sample.data.view(64, 1, 28, 28),
405 | #                'results/sample_' + str(epoch) + '.png')


--------------------------------------------------------------------------------
/VAE_fb_modified.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.utils.data
  3 | from torch import nn, optim
  4 | from torch.autograd import Variable
  5 | from torch.nn import functional as F
  6 | from torchvision import datasets, transforms
  7 | import numpy as np
  8 | from torchvision.utils import save_image
  9 | import os
 10 | 
 11 | os.environ['CUDA_VISIBLE_DEVICES'] = "2"
 12 | 
 13 | CUDA = False
 14 | batch_size = 16
 15 | z_dim = 20
 16 | no_of_sample = 1000
 17 | 
 18 | 
 19 | # kwargs = {'num_workers': 1, 'pin_memory': True} if args.cuda else {}
 20 | 
 21 | class VAE(nn.Module):
 22 |     def __init__(self):
 23 |         super(VAE, self).__init__()
 24 | 
 25 |         # ENCODER
 26 |         # 28 x 28 pixels = 784 input pixels, 400 outputs
 27 |         self.fc1 = nn.Linear(784, 400)
 28 |         # rectified linear unit layer from 400 to 400
 29 |         # max(0, x)
 30 |         self.relu = nn.ReLU()
 31 |         self.fc21 = nn.Linear(400, z_dim)  # mu layer
 32 |         self.fc22 = nn.Linear(400, z_dim)  # logvariance layer
 33 |         # this last layer bottlenecks through ZDIMS connections
 34 | 
 35 |         # DECODER
 36 |         # from bottleneck to hidden 400
 37 |         self.fc3 = nn.Linear(z_dim, 400)
 38 |         # from hidden 400 to 784 outputs
 39 |         self.fc4 = nn.Linear(400, 784)
 40 |         self.sigmoid = nn.Sigmoid()
 41 | 
 42 |     def encode(self, x):
 43 |         '''
 44 |         :param x: here x is an image, can be any tensor
 45 |         :return: 2 tensors of size [N,z_dim=20] where first one is mu and second one is logvar
 46 |         '''
 47 | 
 48 |         h1 = self.relu(self.fc1(x))  # type: Variable
 49 |         return self.fc21(h1), self.fc22(h1)
 50 | 
 51 |     def reparametrized_sample(self, parameter_z, no_of_sample):
 52 |         '''
 53 | 
 54 |         :param z:
 55 |         :param no_of_sample: no of monte carlo sample
 56 |         :return: torch of size [N,no_of_sample,z_dim=20]
 57 |         '''
 58 |         if CUDA:
 59 |             standard_normal_sample = Variable(torch.randn(batch_size, no_of_sample, z_dim).cuda())
 60 |         else:
 61 |             standard_normal_sample = Variable(torch.randn(batch_size, no_of_sample, z_dim))
 62 | 
 63 |         mu_z, logvar_z = parameter_z
 64 |         mu_z = mu_z.unsqueeze(1)
 65 |         sigma = logvar_z.mul(.5).exp()
 66 |         # sigma =.5*logvar_z.exp()
 67 | 
 68 |         sigma = sigma.unsqueeze(1)
 69 |         final_sample = mu_z + sigma * standard_normal_sample
 70 | 
 71 |         return final_sample
 72 | 
 73 |     def decode(self, z):
 74 |         h3 = self.relu(self.fc3(z))
 75 |         return self.sigmoid(self.fc4(h3))
 76 | 
 77 |         # x = F.elu(self.fc1(z))
 78 |         # x = F.elu(self.fc2(x))
 79 |         # x = x.view(-1,128,7,7)
 80 |         # x = F.relu(self.conv_t1(x))
 81 |         # x = F.sigmoid(self.conv_t2(x))
 82 | 
 83 |         # return x
 84 |         # mu_x = x.view(-1,28*28)
 85 |         #
 86 |         # logvar_x = F.elu(self.fc3(z))
 87 |         # logvar_x = F.softmax(self.fc4(logvar_x))
 88 |         #
 89 |         # return mu_x, logvar_x
 90 | 
 91 |     def log_density(self):
 92 |         pass
 93 | 
 94 |     def forward(self, x):
 95 |         '''
 96 | 
 97 |         :param x: input image
 98 |         :return: array of length = batch size, each element is a tuple of 2 elemets of size [no_of_sample=1000,28*28 (for MNIST)], corresponding to mu and logvar
 99 |         '''
100 |         parameter_z = self.encode(x)
101 |         sample_z = self.reparametrized_sample(parameter_z, no_of_sample)
102 |         x = [self.decode(obs) for obs in sample_z]
103 | 
104 |         return parameter_z, x
105 | 
106 | 
107 | def loss_VAE(train_x, paramter_z, predicted_x):
108 |     mu_z, logvar_z = paramter_z
109 |     # Kullback Liebler Divergence
110 |     negative_KLD = 0.5 * torch.sum(1 + logvar_z - mu_z.pow(2) - logvar_z.exp(), 1)  # mu_z.size()=[batch_size, 28*28]
111 |     # negative_KLD /=784
112 | 
113 |     # nll
114 |     train_x_flattened = train_x.view(-1, 28 * 28)
115 |     if CUDA:
116 |         nll = Variable(torch.FloatTensor(batch_size).zero_().cuda())
117 |     else:
118 |         nll = Variable(torch.FloatTensor(batch_size).zero_())
119 | 
120 |     i = 0
121 |     for x in train_x_flattened:
122 |         predicted = predicted_x[i]
123 |         predicted = predicted.view(-1, 784)
124 | 
125 |         sum = 0
126 |         for pred in predicted:
127 |             sum += F.binary_cross_entropy(pred, x, size_average=False)
128 | 
129 |         nll[i] = sum / no_of_sample  # Monte carlo step
130 |         i += 1
131 | 
132 |     final_loss = -negative_KLD + nll
133 |     final_loss = torch.mean(final_loss)
134 | 
135 |     return final_loss
136 | 
137 | 
138 | def train(epoch, model, trainloader, optimizer):
139 |     model.train()
140 | 
141 |     train_loss = 0
142 |     count = 0
143 |     for batch_id, data in enumerate(train_loader):
144 | 
145 |         train_x, _ = data
146 |         count += train_x.size(0)
147 | 
148 |         if CUDA:
149 |             train_x = Variable(train_x.type(torch.FloatTensor).cuda())
150 |         else:
151 |             train_x = Variable(train_x.type(torch.FloatTensor))
152 | 
153 |         train_x = train_x.view(-1, 784)
154 |         paramter_z, predicted_x = model(train_x)
155 | 
156 |         loss = loss_VAE(train_x, paramter_z, predicted_x)
157 |         train_loss += loss.data[0]
158 | 
159 |         loss.backward()
160 |         optimizer.step()
161 | 
162 |         if batch_id % 50 == 0:
163 |             print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
164 |                 epoch, batch_id * len(data), len(train_loader.dataset), 100. * batch_id / len(train_loader),
165 |                 loss.data[0]))
166 | 
167 |     train_loss /= count
168 |     print('\nTrain set: Average loss: {:.4f}'.format(train_loss))
169 | 
170 | 
171 | if __name__ == "__main__":
172 |     train_loader = torch.utils.data.DataLoader(
173 |         datasets.MNIST('../data', train=True, download=True,
174 |                        transform=transforms.ToTensor()),
175 |         batch_size=batch_size, shuffle=True)
176 |     test_loader = torch.utils.data.DataLoader(
177 |         datasets.MNIST('../data', train=False, transform=transforms.ToTensor()),
178 |         batch_size=batch_size, shuffle=True)
179 | 
180 |     model = VAE()
181 |     model_parameters = filter(lambda p: p.requires_grad, model.parameters())
182 |     nb_params = sum([np.prod(p.size()) for p in model_parameters])
183 |     print("no. of trainable parametes is: {}".format((nb_params)))
184 |     #model.cuda()
185 | 
186 | 
187 |     optimizer = optim.Adam(model.parameters(), lr=.001)
188 | 
189 |     nb_epoch = 2
190 |     for epoch in range(1, nb_epoch + 1):
191 |         train(epoch, model, train_loader, optimizer)


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | import numpy as np
  3 | from matplotlib import pyplot as plt
  4 | import math, os
  5 | import matplotlib.pyplot as plt
  6 | 
  7 | import argparse
  8 | import torch
  9 | import torch.utils.data
 10 | from torch import nn, optim
 11 | from torch.autograd import Variable
 12 | from torch.nn import functional as F
 13 | from torchvision import datasets, transforms
 14 | from torchvision.utils import save_image
 15 | 
 16 | 
 17 | parser = argparse.ArgumentParser(description='VAE MNIST Example')
 18 | parser.add_argument('--batch-size', type=int, default=128, metavar='N',
 19 |                     help='input batch size for training (default: 128)')
 20 | parser.add_argument('--epochs', type=int, default=10, metavar='N',
 21 |                     help='number of epochs to train (default: 10)')
 22 | parser.add_argument('--no-cuda', action='store_true', default=False,
 23 |                     help='enables CUDA training')
 24 | parser.add_argument('--seed', type=int, default=1, metavar='S',
 25 |                     help='random seed (default: 1)')
 26 | parser.add_argument('--log-interval', type=int, default=10, metavar='N',
 27 |                     help='how many batches to wait before logging training status')
 28 | args = parser.parse_args()
 29 | args.cuda = not args.no_cuda and torch.cuda.is_available()
 30 | 
 31 | 
 32 | torch.manual_seed(args.seed)
 33 | if args.cuda:
 34 |     torch.cuda.manual_seed(args.seed)
 35 | 
 36 | 
 37 | kwargs = {'num_workers': 1, 'pin_memory': True} if args.cuda else {}
 38 | 
 39 | def compute_kernel(x, y):
 40 |     x_size = tf.shape(x)[0]
 41 |     y_size = tf.shape(y)[0]
 42 |     dim = tf.shape(x)[1]
 43 |     tiled_x = tf.tile(tf.reshape(x, tf.stack([x_size, 1, dim])), tf.stack([1, y_size, 1]))
 44 |     tiled_y = tf.tile(tf.reshape(y, tf.stack([1, y_size, dim])), tf.stack([x_size, 1, 1]))
 45 |     return tf.exp(-tf.reduce_mean(tf.square(tiled_x - tiled_y), axis=2) / tf.cast(dim, tf.float32))
 46 | 
 47 | 
 48 | class VAE(nn.Module):
 49 |     def __init__(self):
 50 |         super(VAE, self).__init__()
 51 | 
 52 |         self.fc1 = nn.Linear(784, 400)
 53 |         self.fc21 = nn.Linear(400, 20)
 54 |         self.fc22 = nn.Linear(400, 20)
 55 |         self.fc3 = nn.Linear(20, 400)
 56 |         self.fc4 = nn.Linear(400, 784)
 57 | 
 58 |         self.relu = nn.ReLU()
 59 |         self.sigmoid = nn.Sigmoid()
 60 | 
 61 |     def encode(self, x):
 62 |         h1 = self.relu(self.fc1(x))
 63 |         return self.fc21(h1), self.fc22(h1)
 64 | 
 65 |     def reparameterize(self, mu, logvar):
 66 |         if self.training:
 67 |             std = logvar.mul(0.5).exp_()
 68 |             eps = Variable(std.data.new(std.size()).normal_())
 69 |             return eps.mul(std).add_(mu)
 70 |         else:
 71 |             return mu
 72 | 
 73 |     def decode(self, z):
 74 |         h3 = self.relu(self.fc3(z))
 75 |         return self.sigmoid(self.fc4(h3))
 76 | 
 77 |     def forward(self, x):
 78 |         mu, logvar = self.encode(x.view(-1, 784))
 79 |         z = self.reparameterize(mu, logvar)
 80 |         return self.decode(z), mu, logvar
 81 | 
 82 | 
 83 | def loss_function(recon_x, x, mu, logvar):
 84 |     BCE = F.binary_cross_entropy(recon_x, x.view(-1, 784), size_average=False)
 85 | 
 86 |     # see Appendix B from VAE paper:
 87 |     # Kingma and Welling. Auto-Encoding Variational Bayes. ICLR, 2014
 88 |     # https://arxiv.org/abs/1312.6114
 89 |     # 0.5 * sum(1 + log(sigma^2) - mu^2 - sigma^2)
 90 |     KLD = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp())
 91 | 
 92 |     return BCE + KLD
 93 | 
 94 | 
 95 | def train(epoch):
 96 |     model.train()
 97 |     train_loss = 0
 98 |     for batch_idx, (data, _) in enumerate(train_loader):
 99 |         data = Variable(data)
100 |         if args.cuda:
101 |             data = data.cuda()
102 |         optimizer.zero_grad()
103 |         recon_batch, mu, logvar = model(data)
104 |         loss = loss_function(recon_batch, data, mu, logvar)
105 |         loss.backward()
106 |         train_loss += loss.data[0]
107 |         optimizer.step()
108 |         if batch_idx % args.log_interval == 0:
109 |             print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
110 |                 epoch, batch_idx * len(data), len(train_loader.dataset),
111 |                 100. * batch_idx / len(train_loader),
112 |                 loss.data[0] / len(data)))
113 | 
114 |     print('====> Epoch: {} Average loss: {:.4f}'.format(
115 |           epoch, train_loss / len(train_loader.dataset)))
116 | 
117 | 
118 | def test(epoch):
119 |     model.eval()
120 |     test_loss = 0
121 |     for i, (data, _) in enumerate(test_loader):
122 |         if args.cuda:
123 |             data = data.cuda()
124 |         data = Variable(data, volatile=True)
125 |         recon_batch, mu, logvar = model(data)
126 |         test_loss += loss_function(recon_batch, data, mu, logvar).data[0]
127 |         if i == 0:
128 |             n = min(data.size(0), 8)
129 |             comparison = torch.cat([data[:n],
130 |                                   recon_batch.view(args.batch_size, 1, 28, 28)[:n]])
131 |             save_image(comparison.data.cpu(),
132 |                      'results/reconstruction_' + str(epoch) + '.png', nrow=n)
133 | 
134 |     test_loss /= len(test_loader.dataset)
135 |     print('====> Test set loss: {:.4f}'.format(test_loss))
136 | 
137 | 
138 | 
139 | def lrelu(x, rate=0.1):
140 |     return tf.maximum(tf.minimum(x * rate, 0), x)
141 | 
142 | def conv2d_lrelu(inputs, num_outputs, kernel_size, stride):
143 |     conv = tf.contrib.layers.convolution2d(inputs, num_outputs, kernel_size, stride,
144 |                                            weights_initializer=tf.contrib.layers.xavier_initializer(),
145 |                                            activation_fn=tf.identity)
146 |     conv = lrelu(conv)
147 |     return conv
148 | 
149 | def conv2d_t_relu(inputs, num_outputs, kernel_size, stride):
150 |     conv = tf.contrib.layers.convolution2d_transpose(inputs, num_outputs, kernel_size, stride,
151 |                                                      weights_initializer=tf.contrib.layers.xavier_initializer(),
152 |                                                      activation_fn=tf.identity)
153 |     conv = tf.nn.relu(conv)
154 |     return conv
155 | 
156 | def fc_lrelu(inputs, num_outputs):
157 |     fc = tf.contrib.layers.fully_connected(inputs, num_outputs,
158 |                                            weights_initializer=tf.contrib.layers.xavier_initializer(),
159 |                                            activation_fn=tf.identity)
160 |     fc = lrelu(fc)
161 |     return fc
162 | 
163 | def fc_relu(inputs, num_outputs):
164 |     fc = tf.contrib.layers.fully_connected(inputs, num_outputs,
165 |                                            weights_initializer=tf.contrib.layers.xavier_initializer(),
166 |                                            activation_fn=tf.identity)
167 |     fc = tf.nn.relu(fc)
168 |     return fc
169 | 
170 | 
171 | def encoder(x, z_dim):
172 |     with tf.variable_scope('encoder'):
173 |         conv1 = conv2d_lrelu(x, num_outputs=64,kernel_size=4, stride=2)
174 |         conv2 = conv2d_lrelu(conv1, 128, 4, 2)
175 |         conv2 = tf.reshape(conv2, [-1, np.prod(conv2.get_shape().as_list()[1:])])
176 |         fc1 = fc_lrelu(conv2, 1024)
177 |         return tf.contrib.layers.fully_connected(fc1, z_dim, activation_fn=tf.identity)
178 | 
179 | def decoder(z, reuse=False):
180 |     with tf.variable_scope('decoder') as vs:
181 |         if reuse:
182 |             vs.reuse_variables()
183 |         fc1 = fc_relu(z, 1024)
184 |         fc2 = fc_relu(fc1, 7*7*128)
185 |         fc2 = tf.reshape(fc2, tf.stack([tf.shape(fc2)[0], 7, 7, 128]))
186 |         conv1 = conv2d_t_relu(fc2, 64, 4, 2)
187 |         output = tf.contrib.layers.convolution2d_transpose(conv1, 1, 4, 2, activation_fn=tf.sigmoid)
188 |         return output
189 | 
190 | 
191 | 
192 | 
193 | 
194 | if __name__  == "__main__":
195 |     train_loader = torch.utils.data.DataLoader(
196 |         datasets.MNIST('../data', train=True, download=True,
197 |                        transform=transforms.ToTensor()),
198 |         batch_size=args.batch_size, shuffle=True, **kwargs)
199 |     test_loader = torch.utils.data.DataLoader(
200 |         datasets.MNIST('../data', train=False, transform=transforms.ToTensor()),
201 |         batch_size=args.batch_size, shuffle=True, **kwargs)
202 | 
203 |     model = VAE()
204 |     if args.cuda:
205 |         model.cuda()
206 |     optimizer = optim.Adam(model.parameters(), lr=1e-3)
207 | 
208 |     for epoch in range(1, args.epochs + 1):
209 |         train(epoch)
210 |         test(epoch)
211 |         sample = Variable(torch.randn(64, 20))
212 |         if args.cuda:
213 |             sample = sample.cuda()
214 |         sample = model.decode(sample).cpu()
215 |         save_image(sample.data.view(64, 1, 28, 28),
216 |                    'results/sample_' + str(epoch) + '.png')
217 | 
218 | 
219 | 
220 | 
221 | 
222 | 
223 | 
224 | 
225 | 
226 | 
227 | 


--------------------------------------------------------------------------------
/main_new.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.utils.data
  3 | from torch import nn, optim
  4 | from torch.autograd import Variable
  5 | from torch.nn import functional as F
  6 | from torchvision import datasets, transforms
  7 | import numpy as np
  8 | from torchvision.utils import save_image
  9 | 
 10 | batch_size =16
 11 | z_dim = 20
 12 | no_of_sample = 1000
 13 | #kwargs = {'num_workers': 1, 'pin_memory': True} if args.cuda else {}
 14 | 
 15 | class VAE(nn.Module):
 16 |     def __init__(self):
 17 |         super(VAE, self).__init__()
 18 |         self.conv1 = nn.Conv2d(in_channels=1, out_channels=64, kernel_size=(4,4),padding=(15,15), stride=2) #This padding keeps the size of the image same, i.e. same padding
 19 |         self.conv2 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=(4,4), padding=(15,15), stride=2)
 20 |         self.fc11 = nn.Linear(in_features=128*28*28, out_features=1024)
 21 |         self.fc12 = nn.Linear(in_features=1024, out_features=z_dim)
 22 | 
 23 |         self.fc21 = nn.Linear(in_features=128 * 28 * 28, out_features=1024)
 24 |         self.fc22 = nn.Linear(in_features=1024, out_features=z_dim)
 25 | 
 26 |         #For decoder
 27 | 
 28 |         #For mu
 29 |         self.fc1 = nn.Linear(in_features=20, out_features=1024)
 30 |         self.fc2 = nn.Linear(in_features=1024, out_features=7*7*128)
 31 |         self.conv_t1 = nn.ConvTranspose2d(in_channels=128, out_channels=64,kernel_size=4,padding=1,stride=2)
 32 |         self.conv_t2 = nn.ConvTranspose2d(in_channels=64, out_channels=1,kernel_size=4,padding=1,stride=2)
 33 | 
 34 |         #for logvar
 35 |         self.fc3 = nn.Linear(in_features=20, out_features=400)
 36 |         self.fc4 = nn.Linear(in_features=400, out_features=784)
 37 | 
 38 | 
 39 | 
 40 | 
 41 | 
 42 | 
 43 | 
 44 |     def encode(self, x):
 45 |         '''
 46 |         :param x: here x is an image, can be any tensor
 47 |         :return: 2 tensors of size [N,z_dim=20] where first one is mu and second one is logvar
 48 |         '''
 49 | 
 50 |         x = F.elu(self.conv1(x))
 51 |         x = F.elu(self.conv2(x))
 52 |         x = x.view(-1,128*28*28)
 53 | 
 54 |         mu_z = F.elu(self.fc11(x))
 55 |         #mu_z = F.softmax(self.fc12(mu_z))
 56 |         mu_z =self.fc12(mu_z)
 57 | 
 58 |         logvar_z = F.elu(self.fc21(x))
 59 |         #logvar_z = F.softmax(self.fc22(logvar_z))
 60 |         logvar_z = self.fc22(logvar_z)
 61 | 
 62 |         return mu_z, logvar_z
 63 | 
 64 |     def reparametrized_sample(self,parameter_z,no_of_sample):
 65 |         '''
 66 | 
 67 |         :param z:
 68 |         :param no_of_sample: no of monte carlo sample
 69 |         :return: torch of size [N,no_of_sample,z_dim=20]
 70 |         '''
 71 |         standard_normal_sample = Variable(torch.randn(batch_size,no_of_sample,z_dim).cuda())
 72 |         mu_z, logvar_z = parameter_z
 73 |         mu_z = mu_z.unsqueeze(1)
 74 |         sigma = .5*logvar_z.exp()
 75 |         sigma = sigma.unsqueeze(1)
 76 |         final_sample = mu_z+sigma*standard_normal_sample
 77 | 
 78 |         return final_sample
 79 | 
 80 |     def decode(self,z):
 81 | 
 82 |         x = F.elu(self.fc1(z))
 83 |         x = F.elu(self.fc2(x))
 84 |         x = x.view(-1,128,7,7)
 85 |         x = F.relu(self.conv_t1(x))
 86 |         x = F.softmax(self.conv_t2(x))
 87 |         mu_x = x.view(-1,28*28)
 88 | 
 89 |         logvar_x = F.elu(self.fc3(z))
 90 |         logvar_x = F.softmax(self.fc4(logvar_x))
 91 | 
 92 |         return mu_x, logvar_x
 93 | 
 94 |     def log_density(self):
 95 |         pass
 96 | 
 97 |     def forward(self,x):
 98 |         '''
 99 | 
100 |         :param x: input image
101 |         :return: array of length = batch size, each element is a tuple of 2 elemets of size [no_of_sample=1000,28*28 (for MNIST)], corresponding to mu and logvar
102 |         '''
103 |         parameter_z = self.encode(x)
104 |         sample_z = self.reparametrized_sample(parameter_z,no_of_sample)
105 |         parameter_x = [self.decode(obs) for obs in sample_z]
106 | 
107 |         return parameter_z, parameter_x
108 | 
109 | 
110 | def loss_VAE(train_x,parameter_x, paramter_z):
111 | 
112 |     mu_z, logvar_z = paramter_z
113 |     #Kullback Liebler Divergence
114 |     negative_KLD = 0.5 * torch.sum(1 + logvar_z - mu_z.pow(2) - logvar_z.exp(),1) #mu_z.size()=[batch_size, 28*28]
115 | 
116 |     #nll
117 |     train_x_flattened = train_x.view(-1, 28*28)
118 |     i = 0
119 |     nll = Variable(torch.FloatTensor(batch_size).zero_().cuda())
120 |     for param in parameter_x:
121 |         mu_x, logvar_x = param
122 |         x = train_x_flattened[i]
123 | 
124 |         log_likelihood_for_one_z = torch.sum(logvar_x,1)+ torch.sum(((x-mu_x).pow(2))/(2*logvar_x.exp()),1) #log pθ(x^(i)|z^(i,l))
125 |         nll_one_sample = torch.mean(log_likelihood_for_one_z) #Monte carlo average step to calculate expectation
126 |         nll[i] = nll_one_sample
127 |         i += 1
128 | 
129 |     final_loss = negative_KLD + nll
130 |     final_loss = torch.mean(final_loss)
131 | 
132 |     return final_loss
133 | 
134 | 
135 | def train(epoch,model,trainloader,optimizer):
136 |     model.train()
137 | 
138 |     train_loss = 0
139 |     count = 0
140 |     for batch_id, data in enumerate(train_loader):
141 | 
142 |         train_x, _ = data
143 |         count += train_x.size(0)
144 |         train_x = Variable(train_x.type(torch.FloatTensor).cuda())
145 |         paramter_z, parameter_x = model(train_x)
146 | 
147 | 
148 |         loss = loss_VAE(train_x, parameter_x, paramter_z)
149 |         train_loss += loss.data[0]
150 | 
151 |         loss.backward()
152 |         optimizer.step()
153 | 
154 |         if batch_id % 50 ==0:
155 |             print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
156 |                 epoch, batch_id * len(data), len(train_loader.dataset), 100. * batch_id / len(train_loader), loss.data[0]))
157 | 
158 |     train_loss /= count
159 |     print('\nTrain set: Average loss: {:.4f}'.format(train_loss))
160 | 
161 | 
162 | 
163 | 
164 | if __name__ == "__main__":
165 |     train_loader = torch.utils.data.DataLoader(
166 |         datasets.MNIST('../data', train=True, download=True,
167 |                        transform=transforms.ToTensor()),
168 |         batch_size=batch_size, shuffle=True)
169 |     test_loader = torch.utils.data.DataLoader(
170 |         datasets.MNIST('../data', train=False, transform=transforms.ToTensor()),
171 |         batch_size=batch_size, shuffle=True)
172 | 
173 |     model = VAE()
174 |     model_parameters = filter(lambda p: p.requires_grad, model.parameters())
175 |     nb_params = sum([np.prod(p.size()) for p in model_parameters])
176 |     print("no. of trainable parametes is: {}".format((nb_params)))
177 |     model.cuda()
178 | 
179 | 
180 |     optimizer = optim.Adam(model.parameters(), lr=.001)
181 | 
182 |     nb_epoch = 2
183 |     for epoch in range(1, nb_epoch + 1):
184 |         train(epoch, model, train_loader, optimizer)
185 | 
186 | 
187 | class VAE(nn.Module):
188 |     def __init__(self):
189 |         super(VAE, self).__init__()
190 |         self.conv1 = nn.Conv2d(in_channels=1, out_channels=64, kernel_size=(4, 4), padding=(15, 15),
191 |                                stride=2)  # This padding keeps the size of the image same, i.e. same padding
192 |         self.conv2 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=(4, 4), padding=(15, 15), stride=2)
193 |         self.fc11 = nn.Linear(in_features=128 * 28 * 28, out_features=1024)
194 |         self.fc12 = nn.Linear(in_features=1024, out_features=z_dim)
195 | 
196 |         self.fc21 = nn.Linear(in_features=128 * 28 * 28, out_features=1024)
197 |         self.fc22 = nn.Linear(in_features=1024, out_features=z_dim)
198 | 
199 |         # For decoder
200 | 
201 |         # For mu
202 |         self.fc1 = nn.Linear(in_features=20, out_features=1024)
203 |         self.fc2 = nn.Linear(in_features=1024, out_features=7 * 7 * 128)
204 |         self.conv_t1 = nn.ConvTranspose2d(in_channels=128, out_channels=64, kernel_size=4, padding=1, stride=2)
205 |         self.conv_t2 = nn.ConvTranspose2d(in_channels=64, out_channels=1, kernel_size=4, padding=1, stride=2)
206 | 
207 |         # for logvar
208 |         self.fc3 = nn.Linear(in_features=20, out_features=400)
209 |         self.fc4 = nn.Linear(in_features=400, out_features=784)
210 | 
211 |     def encode(self, x):
212 |         '''
213 |         :param x: here x is an image, can be any tensor
214 |         :return: 2 tensors of size [N,z_dim=20] where first one is mu and second one is logvar
215 |         '''
216 | 
217 |         x = F.elu(self.conv1(x))
218 |         x = F.elu(self.conv2(x))
219 |         x = x.view(-1, 128 * 28 * 28)
220 | 
221 |         mu_z = F.elu(self.fc11(x))
222 |         # mu_z = F.softmax(self.fc12(mu_z))
223 |         mu_z = self.fc12(mu_z)
224 | 
225 |         logvar_z = F.elu(self.fc21(x))
226 |         # logvar_z = F.softmax(self.fc22(logvar_z))
227 |         logvar_z = self.fc22(logvar_z)
228 | 
229 |         return mu_z, logvar_z
230 | 
231 |     def reparametrized_sample(self, parameter_z, no_of_sample):
232 |         '''
233 | 
234 |         :param z:
235 |         :param no_of_sample: no of monte carlo sample
236 |         :return: torch of size [N,no_of_sample,z_dim=20]
237 |         '''
238 |         standard_normal_sample = Variable(torch.randn(batch_size, no_of_sample, z_dim))
239 |         mu_z, logvar_z = parameter_z
240 |         mu_z = mu_z.unsqueeze(1)
241 |         sigma = .5 * logvar_z.exp()
242 |         sigma = sigma.unsqueeze(1)
243 |         final_sample = mu_z + sigma * standard_normal_sample
244 | 
245 |         return final_sample
246 | 
247 |     def decode(self, z):
248 |         x = F.elu(self.fc1(z))
249 |         x = F.elu(self.fc2(x))
250 |         x = x.view(-1, 128, 7, 7)
251 |         x = F.relu(self.conv_t1(x))
252 |         x = F.softmax(self.conv_t2(x))
253 | 
254 |         return x
255 | 
256 | 
257 | 
258 | 
259 | 
260 | 
261 | 
262 | 
263 | 
264 | 
265 | 
266 | 
267 | 
268 | 
269 | 
270 | 
271 | 
272 | 
273 | 
274 | 
275 | 
276 | 
277 | 
278 | 
279 | 
280 | 
281 | 


--------------------------------------------------------------------------------
/new1.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import torch
  3 | import torch.utils.data
  4 | from torch import nn, optim
  5 | from torch.autograd import Variable
  6 | from torch.nn import functional as F
  7 | from torchvision import datasets, transforms
  8 | from torchvision.utils import save_image
  9 | os.environ['CUDA_VISIBLE_DEVICES']='0'
 10 | # changed configuration to this instead of argparse for easier interaction
 11 | CUDA = True
 12 | SEED = 1
 13 | BATCH_SIZE = 128
 14 | LOG_INTERVAL = 10
 15 | EPOCHS = 10
 16 | no_of_sample = 10
 17 | 
 18 | # connections through the autoencoder bottleneck
 19 | # in the pytorch VAE example, this is 20
 20 | ZDIMS = 20
 21 | 
 22 | # I do this so that the MNIST dataset is downloaded where I want it
 23 | #os.chdir("/home/cpbotha/Downloads/pytorch-vae")
 24 | 
 25 | torch.manual_seed(SEED)
 26 | if CUDA:
 27 |     torch.cuda.manual_seed(SEED)
 28 | 
 29 | # DataLoader instances will load tensors directly into GPU memory
 30 | kwargs = {'num_workers': 1, 'pin_memory': True} if CUDA else {}
 31 | 
 32 | # Download or load downloaded MNIST dataset
 33 | # shuffle data at every epoch
 34 | train_loader = torch.utils.data.DataLoader(
 35 |     datasets.MNIST('/home/atin/data/', train=True, download=True,
 36 |                    transform=transforms.ToTensor()),
 37 |     batch_size=BATCH_SIZE, shuffle=True, **kwargs)
 38 | 
 39 | # Same for test data
 40 | test_loader = torch.utils.data.DataLoader(
 41 |     datasets.MNIST('/home/atin/data/', train=False, transform=transforms.ToTensor()),
 42 |     batch_size=BATCH_SIZE, shuffle=True, **kwargs)
 43 | 
 44 | 
 45 | class VAE(nn.Module):
 46 |     def __init__(self):
 47 |         super(VAE, self).__init__()
 48 | 
 49 |         self.conv1 = nn.Conv2d(in_channels=1, out_channels=64, kernel_size=(4, 4), padding=(15, 15),
 50 |                                stride=2)  # This padding keeps the size of the image same, i.e. same padding
 51 |         self.conv2 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=(4, 4), padding=(15, 15), stride=2)
 52 |         self.fc11 = nn.Linear(in_features=128 * 28 * 28, out_features=1024)
 53 |         self.fc12 = nn.Linear(in_features=1024, out_features=ZDIMS)
 54 | 
 55 |         self.fc21 = nn.Linear(in_features=128 * 28 * 28, out_features=1024)
 56 |         self.fc22 = nn.Linear(in_features=1024, out_features=ZDIMS)
 57 | 
 58 | 
 59 | 
 60 | 
 61 | 
 62 | 
 63 | 
 64 | 
 65 |         # # ENCODER
 66 |         # # 28 x 28 pixels = 784 input pixels, 400 outputs
 67 |         # self.fc1 = nn.Linear(784, 400)
 68 |         # # rectified linear unit layer from 400 to 400
 69 |         # # max(0, x)
 70 |         # self.relu = nn.ReLU()
 71 |         # self.fc21 = nn.Linear(400, ZDIMS)  # mu layer
 72 |         # self.fc22 = nn.Linear(400, ZDIMS)  # logvariance layer
 73 |         # this last layer bottlenecks through ZDIMS connections
 74 | 
 75 |         # DECODER
 76 |         # from bottleneck to hidden 400
 77 |         self.fc3 = nn.Linear(ZDIMS, 400)
 78 |         # from hidden 400 to 784 outputs
 79 |         self.fc4 = nn.Linear(400, 784)
 80 |         self.sigmoid = nn.Sigmoid()
 81 | 
 82 |     def encode(self, x: Variable) -> (Variable, Variable):
 83 | 
 84 |         x = F.elu(self.conv1(x))
 85 |         x = F.elu(self.conv2(x))
 86 |         x = x.view(-1, 128 * 28 * 28)
 87 | 
 88 |         mu_z = F.elu(self.fc11(x))
 89 |         # mu_z = F.softmax(self.fc12(mu_z))
 90 |         mu_z = self.fc12(mu_z)
 91 | 
 92 |         logvar_z = F.elu(self.fc21(x))
 93 |         # logvar_z = F.softmax(self.fc22(logvar_z))
 94 |         logvar_z = self.fc22(logvar_z)
 95 | 
 96 |         return mu_z, logvar_z
 97 | 
 98 | 
 99 | 
100 | 
101 | 
102 |         """Input vector x -> fully connected 1 -> ReLU -> (fully connected
103 |         21, fully connected 22)
104 | 
105 |         Parameters
106 |         ----------
107 |         x : [128, 784] matrix; 128 digits of 28x28 pixels each
108 | 
109 |         Returns
110 |         -------
111 | 
112 |         (mu, logvar) : ZDIMS mean units one for each latent dimension, ZDIMS
113 |             variance units one for each latent dimension
114 | 
115 |         """
116 | 
117 |         # h1 is [128, 400]
118 |         h1 = self.relu(self.fc1(x))  # type: Variable
119 |         return self.fc21(h1), self.fc22(h1)
120 | 
121 |     def reparameterize(self, mu: Variable, logvar: Variable) -> Variable:
122 |         """THE REPARAMETERIZATION IDEA:
123 | 
124 |         For each training sample (we get 128 batched at a time)
125 | 
126 |         - take the current learned mu, stddev for each of the ZDIMS
127 |           dimensions and draw a random sample from that distribution
128 |         - the whole network is trained so that these randomly drawn
129 |           samples decode to output that looks like the input
130 |         - which will mean that the std, mu will be learned
131 |           *distributions* that correctly encode the inputs
132 |         - due to the additional KLD term (see loss_function() below)
133 |           the distribution will tend to unit Gaussians
134 | 
135 |         Parameters
136 |         ----------
137 |         mu : [128, ZDIMS] mean matrix
138 |         logvar : [128, ZDIMS] variance matrix
139 | 
140 |         Returns
141 |         -------
142 | 
143 |         During training random sample from the learned ZDIMS-dimensional
144 |         normal distribution; during inference its mean.
145 | 
146 |         """
147 | 
148 |         if self.training:
149 |             # multiply log variance with 0.5, then in-place exponent
150 |             # yielding the standard deviation
151 | 
152 |             sample_z = []
153 |             for _ in range(no_of_sample):
154 |                 std = logvar.mul(0.5).exp_()  # type: Variable
155 |                 # - std.data is the [128,ZDIMS] tensor that is wrapped by std
156 |                 # - so eps is [128,ZDIMS] with all elements drawn from a mean 0
157 |                 #   and stddev 1 normal distribution that is 128 samples
158 |                 #   of random ZDIMS-float vectors
159 |                 eps = Variable(std.data.new(std.size()).normal_())
160 |                 # - sample from a normal distribution with standard
161 |                 #   deviation = std and mean = mu by multiplying mean 0
162 |                 #   stddev 1 sample with desired std and mu, see
163 |                 #   https://stats.stackexchange.com/a/16338
164 |                 # - so we have 128 sets (the batch) of random ZDIMS-float
165 |                 #   vectors sampled from normal distribution with learned
166 |                 #   std and mu for the current input
167 |                 sample_z.append(eps.mul(std).add_(mu))
168 | 
169 |             return sample_z
170 | 
171 |         else:
172 |             # During inference, we simply spit out the mean of the
173 |             # learned distribution for the current input.  We could
174 |             # use a random sample from the distribution, but mu of
175 |             # course has the highest probability.
176 |             return mu
177 | 
178 |     def decode(self, z: Variable) -> Variable:
179 |         h3 = self.relu(self.fc3(z))
180 |         return self.sigmoid(self.fc4(h3))
181 | 
182 |     def forward(self, x: Variable) -> (Variable, Variable, Variable):
183 |         mu, logvar = self.encode(x.view(-1, 784))
184 |         z = self.reparameterize(mu, logvar)
185 |         return [self.decode(z) for z in z], mu, logvar
186 |         #return self.decode(z), mu, logvar
187 | 
188 | 
189 | model = VAE()
190 | if CUDA:
191 |     model.cuda()
192 | 
193 | 
194 | def loss_function(recon_x, x, mu, logvar) -> Variable:
195 |     # how well do input x and output recon_x agree?
196 | 
197 |     BCE = 0
198 |     for recon_x_one in recon_x:
199 |         BCE += F.binary_cross_entropy(recon_x_one, x.view(-1, 784))
200 | 
201 |     BCE /=len(recon_x)
202 | 
203 | 
204 | 
205 |     # KLD is Kullback–Leibler divergence -- how much does one learned
206 |     # distribution deviate from another, in this specific case the
207 |     # learned distribution from the unit Gaussian
208 | 
209 |     # see Appendix B from VAE paper:
210 |     # Kingma and Welling. Auto-Encoding Variational Bayes. ICLR, 2014
211 |     # https://arxiv.org/abs/1312.6114
212 |     # - D_{KL} = 0.5 * sum(1 + log(sigma^2) - mu^2 - sigma^2)
213 |     # note the negative D_{KL} in appendix B of the paper
214 |     KLD = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp())
215 |     # Normalise by same number of elements as in reconstruction
216 |     KLD /= BATCH_SIZE * 784
217 | 
218 |     # BCE tries to make our reconstruction as accurate as possible
219 |     # KLD tries to push the distributions as close as possible to unit Gaussian
220 |     return BCE + KLD
221 | 
222 | # Dr Diederik Kingma: as if VAEs weren't enough, he also gave us Adam!
223 | optimizer = optim.Adam(model.parameters(), lr=1e-3)
224 | 
225 | 
226 | def train(epoch):
227 |     # toggle model to train mode
228 |     model.train()
229 |     train_loss = 0
230 |     # in the case of MNIST, len(train_loader.dataset) is 60000
231 |     # each `data` is of BATCH_SIZE samples and has shape [128, 1, 28, 28]
232 |     for batch_idx, (data, _) in enumerate(train_loader):
233 |         data = Variable(data)
234 |         if CUDA:
235 |             data = data.cuda()
236 |         optimizer.zero_grad()
237 | 
238 |         # push whole batch of data through VAE.forward() to get recon_loss
239 |         recon_batch, mu, logvar = model(data)
240 |         # calculate scalar loss
241 |         loss = loss_function(recon_batch, data, mu, logvar)
242 |         # calculate the gradient of the loss w.r.t. the graph leaves
243 |         # i.e. input variables -- by the power of pytorch!
244 |         loss.backward()
245 |         train_loss += loss.data[0]
246 |         optimizer.step()
247 |         if batch_idx % LOG_INTERVAL == 0:
248 |             print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
249 |                 epoch, batch_idx * len(data), len(train_loader.dataset),
250 |                 100. * batch_idx / len(train_loader),
251 |                 loss.data[0] / len(data)))
252 | 
253 |     print('====> Epoch: {} Average loss: {:.4f}'.format(
254 |           epoch, train_loss / len(train_loader.dataset)))
255 | 
256 | 
257 | def test(epoch):
258 |     # toggle model to test / inference mode
259 |     model.eval()
260 |     test_loss = 0
261 | 
262 |     # each data is of BATCH_SIZE (default 128) samples
263 |     for i, (data, _) in enumerate(test_loader):
264 |         if CUDA:
265 |             # make sure this lives on the GPU
266 |             data = data.cuda()
267 | 
268 |         # we're only going to infer, so no autograd at all required: volatile=True
269 |         data = Variable(data, volatile=True)
270 |         recon_batch, mu, logvar = model(data)
271 |         test_loss += loss_function(recon_batch, data, mu, logvar).data[0]
272 |         if i == 0:
273 |           n = min(data.size(0), 8)
274 |           # for the first 128 batch of the epoch, show the first 8 input digits
275 |           # with right below them the reconstructed output digits
276 |           comparison = torch.cat([data[:n],
277 |                                   recon_batch.view(BATCH_SIZE, 1, 28, 28)[:n]])
278 |           save_image(comparison.data.cpu(),
279 |                      'results/reconstruction_' + str(epoch) + '.png', nrow=n)
280 | 
281 |     test_loss /= len(test_loader.dataset)
282 |     print('====> Test set loss: {:.4f}'.format(test_loss))


--------------------------------------------------------------------------------
/new2.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import torch
  3 | import torch.utils.data
  4 | from torch import nn, optim
  5 | from torch.autograd import Variable
  6 | from torch.nn import functional as F
  7 | from torchvision import datasets, transforms
  8 | from torchvision.utils import save_image
  9 | 
 10 | os.environ['CUDA_VISIBLE_DEVICES'] = '0'
 11 | # changed configuration to this instead of argparse for easier interaction
 12 | CUDA = True
 13 | SEED = 1
 14 | BATCH_SIZE = 128
 15 | LOG_INTERVAL = 10
 16 | EPOCHS = 10
 17 | no_of_sample = 10
 18 | 
 19 | # connections through the autoencoder bottleneck
 20 | # in the pytorch VAE example, this is 20
 21 | ZDIMS = 20
 22 | 
 23 | # I do this so that the MNIST dataset is downloaded where I want it
 24 | # os.chdir("/home/cpbotha/Downloads/pytorch-vae")
 25 | 
 26 | torch.manual_seed(SEED)
 27 | if CUDA:
 28 |     torch.cuda.manual_seed(SEED)
 29 | 
 30 | # DataLoader instances will load tensors directly into GPU memory
 31 | kwargs = {'num_workers': 1, 'pin_memory': True} if CUDA else {}
 32 | 
 33 | # Download or load downloaded MNIST dataset
 34 | # shuffle data at every epoch
 35 | train_loader = torch.utils.data.DataLoader(
 36 |     datasets.MNIST('/home/atin/data/', train=True, download=True,
 37 |                    transform=transforms.ToTensor()),
 38 |     batch_size=BATCH_SIZE, shuffle=True, **kwargs)
 39 | 
 40 | # Same for test data
 41 | test_loader = torch.utils.data.DataLoader(
 42 |     datasets.MNIST('/home/atin/data/', train=False, transform=transforms.ToTensor()),
 43 |     batch_size=BATCH_SIZE, shuffle=True, **kwargs)
 44 | 
 45 | 
 46 | class VAE(nn.Module):
 47 |     def __init__(self):
 48 |         super(VAE, self).__init__()
 49 | 
 50 |         self.conv1 = nn.Conv2d(in_channels=1, out_channels=64, kernel_size=(4, 4), padding=(15, 15),
 51 |                                stride=2)  # This padding keeps the size of the image same, i.e. same padding
 52 |         self.conv2 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=(4, 4), padding=(15, 15), stride=2)
 53 |         self.fc11 = nn.Linear(in_features=128 * 28 * 28, out_features=1024)
 54 |         self.fc12 = nn.Linear(in_features=1024, out_features=ZDIMS)
 55 | 
 56 |         self.fc21 = nn.Linear(in_features=128 * 28 * 28, out_features=1024)
 57 |         self.fc22 = nn.Linear(in_features=1024, out_features=ZDIMS)
 58 |         self.relu = nn.ReLU()
 59 | 
 60 |         # For decoder
 61 | 
 62 |         # For mu
 63 |         self.fc1 = nn.Linear(in_features=20, out_features=1024)
 64 |         self.fc2 = nn.Linear(in_features=1024, out_features=7 * 7 * 128)
 65 |         self.conv_t11 = nn.ConvTranspose2d(in_channels=128, out_channels=64, kernel_size=4, padding=1, stride=2)
 66 |         self.conv_t12 = nn.ConvTranspose2d(in_channels=64, out_channels=1, kernel_size=4, padding=1, stride=2)
 67 | 
 68 |         self.conv_t21 = nn.ConvTranspose2d(in_channels=128, out_channels=64, kernel_size=4, padding=1, stride=2)
 69 |         self.conv_t22 = nn.ConvTranspose2d(in_channels=64, out_channels=1, kernel_size=4, padding=1, stride=2)
 70 | 
 71 | 
 72 | 
 73 |     #         #for logvar
 74 |     #         self.fc3 = nn.Linear(in_features=20, out_features=400)
 75 |     #         self.fc4 = nn.Linear(in_features=400, out_features=784)
 76 | 
 77 |     # # ENCODER
 78 |     # # 28 x 28 pixels = 784 input pixels, 400 outputs
 79 |     # self.fc1 = nn.Linear(784, 400)
 80 |     # # rectified linear unit layer from 400 to 400
 81 |     # # max(0, x)
 82 |     # self.relu = nn.ReLU()
 83 |     # self.fc21 = nn.Linear(400, ZDIMS)  # mu layer
 84 |     # self.fc22 = nn.Linear(400, ZDIMS)  # logvariance layer
 85 |     # this last layer bottlenecks through ZDIMS connections
 86 | 
 87 |     #         # DECODER
 88 |     #         # from bottleneck to hidden 400
 89 |     #         self.fc3 = nn.Linear(ZDIMS, 400)
 90 |     #         # from hidden 400 to 784 outputs
 91 |     #         self.fc4 = nn.Linear(400, 784)
 92 |     #         self.sigmoid = nn.Sigmoid()
 93 | 
 94 |     def encode(self, x: Variable) -> (Variable, Variable):
 95 | 
 96 |         x = x.view(-1, 1, 28, 28)
 97 |         x = F.elu(self.conv1(x))
 98 |         x = F.elu(self.conv2(x))
 99 |         x = x.view(-1, 128 * 28 * 28)
100 | 
101 |         mu_z = F.elu(self.fc11(x))
102 |         # mu_z = F.softmax(self.fc12(mu_z))
103 |         mu_z = self.fc12(mu_z)
104 | 
105 |         logvar_z = F.elu(self.fc21(x))
106 |         # logvar_z = F.softmax(self.fc22(logvar_z))
107 |         logvar_z = self.fc22(logvar_z)
108 | 
109 |         return mu_z, logvar_z
110 | 
111 |     def reparameterize(self, mu: Variable, logvar: Variable) -> list:
112 |         """THE REPARAMETERIZATION IDEA:
113 | 
114 |         For each training sample (we get 128 batched at a time)
115 | 
116 |         - take the current learned mu, stddev for each of the ZDIMS
117 |           dimensions and draw a random sample from that distribution
118 |         - the whole network is trained so that these randomly drawn
119 |           samples decode to output that looks like the input
120 |         - which will mean that the std, mu will be learned
121 |           *distributions* that correctly encode the inputs
122 |         - due to the additional KLD term (see loss_function() below)
123 |           the distribution will tend to unit Gaussians
124 | 
125 |         Parameters
126 |         ----------
127 |         mu : [128, ZDIMS] mean matrix
128 |         logvar : [128, ZDIMS] variance matrix
129 | 
130 |         Returns
131 |         -------
132 | 
133 |         During training random sample from the learned ZDIMS-dimensional
134 |         normal distribution; during inference its mean.
135 | 
136 |         """
137 | 
138 |         if self.training:
139 |             # multiply log variance with 0.5, then in-place exponent
140 |             # yielding the standard deviation
141 | 
142 |             sample_z = []
143 |             for _ in range(no_of_sample):
144 |                 std = logvar.mul(0.5).exp_()  # type: Variable
145 |                 # - std.data is the [128,ZDIMS] tensor that is wrapped by std
146 |                 # - so eps is [128,ZDIMS] with all elements drawn from a mean 0
147 |                 #   and stddev 1 normal distribution that is 128 samples
148 |                 #   of random ZDIMS-float vectors
149 |                 eps = Variable(std.data.new(std.size()).normal_())
150 |                 # - sample from a normal distribution with standard
151 |                 #   deviation = std and mean = mu by multiplying mean 0
152 |                 #   stddev 1 sample with desired std and mu, see
153 |                 #   https://stats.stackexchange.com/a/16338
154 |                 # - so we have 128 sets (the batch) of random ZDIMS-float
155 |                 #   vectors sampled from normal distribution with learned
156 |                 #   std and mu for the current input
157 |                 sample_z.append(eps.mul(std).add_(mu))
158 | 
159 |             return sample_z
160 | 
161 |         else:
162 |             # During inference, we simply spit out the mean of the
163 |             # learned distribution for the current input.  We could
164 |             # use a random sample from the distribution, but mu of
165 |             # course has the highest probability.
166 |             return mu
167 | 
168 |     def decode(self, z: Variable) -> (Variable, Variable):
169 | 
170 |         x = F.elu(self.fc1(z))
171 |         x = F.elu(self.fc2(x))
172 |         x = x.view(-1, 128, 7, 7)
173 |         mu_x = F.relu(self.conv_t11(x))
174 |         mu_x = F.sigmoid(self.conv_t12(mu_x))
175 | 
176 |         logvar_x = F.relu(self.conv_t11(x))
177 |         logvar_x = F.sigmoid(self.conv_t12(logvar_x))
178 | 
179 |         return mu_x.view(-1, 784), logvar_x.view(-1,784)
180 | 
181 | 
182 | 
183 |     def forward(self, x: Variable) -> (Variable, Variable, Variable):
184 |         mu, logvar = self.encode(x.view(-1, 784))
185 |         z = self.reparameterize(mu, logvar)
186 |         return [self.decode(z) for z in z], mu, logvar
187 |         # return self.decode(z), mu, logvar
188 | 
189 | 
190 | model = VAE()
191 | if CUDA:
192 |     model.cuda()
193 | 
194 | 
195 | def loss_function(recon_x, x, mu, logvar) -> Variable:
196 |     # how well do input x and output recon_x agree?
197 | 
198 |     GLL = 0
199 |     x = x.view(-1,784)
200 |     for recon_x_one in recon_x:
201 |         mu_x, logvar_x = recon_x_one
202 |         part1 = torch.sum(logvar_x)/BATCH_SIZE
203 |         sigma = logvar_x.mul(0.5).exp_()
204 |         part2 = torch.sum(((x-mu_x)/sigma)**2)/BATCH_SIZE
205 |         GLL += .5*(part1+part2)
206 | 
207 |     GLL /= len(recon_x)
208 | 
209 |     # BCE = 0
210 |     # for recon_x_one in recon_x:
211 |     #     BCE += F.binary_cross_entropy(recon_x_one, x.view(-1, 784))
212 |     #
213 |     # BCE /= len(recon_x)
214 | 
215 |     # KLD is Kullback–Leibler divergence -- how much does one learned
216 |     # distribution deviate from another, in this specific case the
217 |     # learned distribution from the unit Gaussian
218 | 
219 |     # see Appendix B from VAE paper:
220 |     # Kingma and Welling. Auto-Encoding Variational Bayes. ICLR, 2014
221 |     # https://arxiv.org/abs/1312.6114
222 |     # - D_{KL} = 0.5 * sum(1 + log(sigma^2) - mu^2 - sigma^2)
223 |     # note the negative D_{KL} in appendix B of the paper
224 |     KLD = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp())
225 |     # Normalise by same number of elements as in reconstruction
226 |     KLD /= BATCH_SIZE
227 | 
228 |     # BCE tries to make our reconstruction as accurate as possible
229 |     # KLD tries to push the distributions as close as possible to unit Gaussian
230 |     return GLL + KLD
231 | 
232 | 
233 | # Dr Diederik Kingma: as if VAEs weren't enough, he also gave us Adam!
234 | optimizer = optim.Adam(model.parameters(), lr=1e-3)
235 | 
236 | 
237 | def train(epoch):
238 |     # toggle model to train mode
239 |     model.train()
240 |     train_loss = 0
241 |     # in the case of MNIST, len(train_loader.dataset) is 60000
242 |     # each `data` is of BATCH_SIZE samples and has shape [128, 1, 28, 28]
243 |     for batch_idx, (data, _) in enumerate(train_loader):
244 |         data = Variable(data)
245 |         if CUDA:
246 |             data = data.cuda()
247 |         optimizer.zero_grad()
248 | 
249 |         # push whole batch of data through VAE.forward() to get recon_loss
250 |         recon_batch, mu, logvar = model(data)
251 |         # calculate scalar loss
252 |         loss = loss_function(recon_batch, data, mu, logvar)
253 |         # calculate the gradient of the loss w.r.t. the graph leaves
254 |         # i.e. input variables -- by the power of pytorch!
255 |         loss.backward()
256 |         train_loss += loss.data[0]
257 |         optimizer.step()
258 |         if batch_idx % LOG_INTERVAL == 0:
259 |             print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
260 |                 epoch, batch_idx * len(data), len(train_loader.dataset),
261 |                        100. * batch_idx / len(train_loader),
262 |                        loss.data[0] / len(data)))
263 | 
264 |     print('====> Epoch: {} Average loss: {:.4f}'.format(
265 |         epoch, train_loss / len(train_loader.dataset)))
266 | 
267 | 
268 | def test(epoch):
269 |     # toggle model to test / inference mode
270 |     model.eval()
271 |     test_loss = 0
272 | 
273 |     # each data is of BATCH_SIZE (default 128) samples
274 |     for i, (data, _) in enumerate(test_loader):
275 |         if CUDA:
276 |             # make sure this lives on the GPU
277 |             data = data.cuda()
278 | 
279 |         # we're only going to infer, so no autograd at all required: volatile=True
280 |         data = Variable(data, volatile=True)
281 |         recon_batch, mu, logvar = model(data)
282 |         test_loss += loss_function(recon_batch, data, mu, logvar).data[0]
283 |         if i == 0:
284 |             n = min(data.size(0), 8)
285 |             # for the first 128 batch of the epoch, show the first 8 input digits
286 |             # with right below them the reconstructed output digits
287 |             comparison = torch.cat([data[:n],
288 |                                     recon_batch.view(BATCH_SIZE, 1, 28, 28)[:n]])
289 |             save_image(comparison.data.cpu(),
290 |                        'results/reconstruction_' + str(epoch) + '.png', nrow=n)
291 | 
292 |     test_loss /= len(test_loader.dataset)
293 |     print('====> Test set loss: {:.4f}'.format(test_loss))
294 | 
295 | 
296 | 
297 | 
298 | 
299 | 
300 | if __name__ == "__main__":
301 | 
302 |     for epoch in range(1, EPOCHS + 1):
303 |         train(epoch)
304 |         # test(epoch)
305 | 
306 |         # 64 sets of random ZDIMS-float vectors, i.e. 64 locations / MNIST
307 |         # digits in latent space
308 |         sample = Variable(torch.randn(64, ZDIMS))
309 |         if CUDA:
310 |             sample = sample.cuda()
311 |         sample = model.decode(sample).cpu()
312 | 
313 |         # save out as an 8x8 matrix of MNIST digits
314 |         # this will give you a visual idea of how well latent space can generate things
315 |         # that look like digits
316 |         save_image(sample.data.view(64, 1, 28, 28),
317 |                    '/home/atin/data/new/reconstruction' + str(epoch) + '.png')


--------------------------------------------------------------------------------
/simple_main.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.utils.data
  3 | from torch import nn, optim
  4 | from torch.autograd import Variable
  5 | from torch.nn import functional as F
  6 | from torchvision import datasets, transforms
  7 | import numpy as np
  8 | from torchvision.utils import save_image
  9 | 
 10 | batch_size =200
 11 | z_dim = 20
 12 | no_of_sample = 1000
 13 | 
 14 | class VAE(nn.Module):
 15 |     def __init__(self):
 16 |         super(VAE, self).__init__()
 17 |         self.fc1 = nn.Linear(784, 400)
 18 |         self.fc21 = nn.Linear(400, 20)
 19 |         self.fc22 = nn.Linear(400, 20)
 20 |         self.fc3 = nn.Linear(20, 400)
 21 |         self.fc41 = nn.Linear(400, 784)
 22 |         self.fc42 = nn.Linear(400, 784)
 23 | 
 24 |         self.relu = nn.ReLU()
 25 |         self.sigmoid = nn.Sigmoid()
 26 | 
 27 | 
 28 | 
 29 |     def encode(self, x):
 30 |         '''
 31 |         :param x: here x is an image, can be any tensor
 32 |         :return: 2 tensors of size [N,z_dim=20] where first one is mu and second one is logvar
 33 |         '''
 34 | 
 35 |         h1 = self.relu(self.fc1(x))
 36 |         return self.fc21(h1), self.fc22(h1)
 37 | 
 38 | 
 39 |     def reparametrized_sample(self,parameter_z,no_of_sample):
 40 |         '''
 41 | 
 42 |         :param z:
 43 |         :param no_of_sample: no of monte carlo sample
 44 |         :return: torch of size [N,no_of_sample,z_dim=20]
 45 |         '''
 46 |         standard_normal_sample = Variable(torch.randn(batch_size,no_of_sample,z_dim).cuda())
 47 |         mu_z, logvar_z = parameter_z
 48 |         mu_z = mu_z.unsqueeze(1)
 49 |         sigma = .5*logvar_z.exp()
 50 |         sigma = sigma.unsqueeze(1)
 51 |         final_sample = mu_z+sigma*standard_normal_sample
 52 | 
 53 |         return final_sample
 54 | 
 55 |     def decode(self,z):
 56 |         h1 = self.relu(self.fc3(z))
 57 |         return self.fc41(h1), self.fc42(h1)
 58 | 
 59 | 
 60 |     def log_density(self):
 61 |         pass
 62 | 
 63 |     def forward(self,x):
 64 |         '''
 65 | 
 66 |         :param x: input image
 67 |         :return: array of length = batch size, each element is a tuple of 2 elemets of size [no_of_sample=1000,28*28 (for MNIST)], corresponding to mu and logvar
 68 |         '''
 69 | 
 70 |         x = x.view(-1,784)
 71 |         parameter_z = self.encode(x)
 72 |         sample_z = self.reparametrized_sample(parameter_z,no_of_sample)
 73 |         parameter_x = [self.decode(obs) for obs in sample_z]
 74 | 
 75 |         return parameter_z, parameter_x
 76 | 
 77 | 
 78 | def loss_VAE(train_x,parameter_x, paramter_z):
 79 | 
 80 |     mu_z, logvar_z = paramter_z
 81 |     #Kullback Liebler Divergence
 82 |     negative_KLD = 0.5 * torch.sum(1 + logvar_z - mu_z.pow(2) - logvar_z.exp(),1) #mu_z.size()=[batch_size, 28*28]
 83 | 
 84 |     #nll
 85 |     train_x_flattened = train_x.view(-1, 28*28)
 86 |     i = 0
 87 |     nll = Variable(torch.FloatTensor(batch_size).zero_().cuda())
 88 |     for param in parameter_x:
 89 |         mu_x, logvar_x = param
 90 |         x = train_x_flattened[i]
 91 | 
 92 |         log_likelihood_for_one_z = torch.sum(logvar_x,1)+ torch.sum(((x-mu_x).pow(2))/(2*logvar_x.exp()),1) #log pθ(x^(i)|z^(i,l))
 93 |         nll_one_sample = torch.mean(log_likelihood_for_one_z) #Monte carlo average step to calculate expectation
 94 |         nll[i] = nll_one_sample
 95 |         i += 1
 96 | 
 97 |     final_loss = negative_KLD + nll
 98 |     final_loss = torch.mean(final_loss)
 99 | 
100 |     return final_loss
101 | 
102 | 
103 | def train(epoch,model,trainloader,optimizer):
104 |     model.train()
105 | 
106 |     train_loss = 0
107 |     count = 0
108 |     for batch_id, data in enumerate(train_loader):
109 | 
110 |         train_x, _ = data
111 |         count += train_x.size(0)
112 |         train_x = Variable(train_x.type(torch.FloatTensor).cuda())
113 |         paramter_z, parameter_x = model(train_x)
114 | 
115 | 
116 |         loss = loss_VAE(train_x, parameter_x, paramter_z)
117 |         train_loss += loss.data[0]
118 | 
119 |         loss.backward()
120 |         optimizer.step()
121 | 
122 |         if batch_id % 50 ==0:
123 |             print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
124 |                 epoch, batch_id * len(data), len(train_loader.dataset), 100. * batch_id / len(train_loader), loss.data[0]))
125 | 
126 |     train_loss /= count
127 |     print('\nTrain set: Average loss: {:.4f}'.format(train_loss))
128 | 
129 | 
130 | 
131 | 
132 | if __name__ == "__main__":
133 |     train_loader = torch.utils.data.DataLoader(
134 |         datasets.MNIST('../data', train=True, download=True,
135 |                        transform=transforms.ToTensor()),
136 |         batch_size=batch_size, shuffle=True)
137 |     test_loader = torch.utils.data.DataLoader(
138 |         datasets.MNIST('../data', train=False, transform=transforms.ToTensor()),
139 |         batch_size=batch_size, shuffle=True)
140 | 
141 |     model = VAE()
142 |     model_parameters = filter(lambda p: p.requires_grad, model.parameters())
143 |     nb_params = sum([np.prod(p.size()) for p in model_parameters])
144 |     print("no. of trainable parametes is: {}".format((nb_params)))
145 |     model.cuda()
146 | 
147 | 
148 |     optimizer = optim.Adam(model.parameters(), lr=.001)
149 | 
150 |     nb_epoch = 2
151 |     for epoch in range(1, nb_epoch + 1):
152 |         train(epoch, model, train_loader, optimizer)
153 | 
154 | 
155 | 
156 | 
157 | 
158 | 
159 | 
160 | 
161 | 
162 | 
163 | 
164 | 
165 | 
166 | 
167 | 
168 | 
169 | 
170 | 
171 | 
172 | 
173 | 
174 | 
175 | 
176 | 
177 | 
178 | 
179 | 
180 | 
181 | 
182 | 
183 | 
184 | 
185 | 
186 | 
187 | 
188 | 
189 | 
190 | 
191 | 
192 | 
193 | 


--------------------------------------------------------------------------------