├── .gitignore
├── interpolate.py
├── helpers.py
├── model-cnn.py
├── train.py
└── model.py


/.gitignore:
--------------------------------------------------------------------------------
1 | *.pyc
2 | *.swp
3 | *.swo
4 | *.pt
5 | __pycache__
6 | 


--------------------------------------------------------------------------------
/interpolate.py:
--------------------------------------------------------------------------------
 1 | from model import *
 2 | 
 3 | vae = torch.load('vae.pt')
 4 | vae.train(False)
 5 | 
 6 | TEMPERATURE = 0.01
 7 | N_SAMPLES = 10
 8 | N_STEPS = 10
 9 | 
10 | def random_sample():
11 |     size = vae.encoder.output_size
12 |     rm = Variable(torch.FloatTensor(1, size).normal_())
13 |     rl = Variable(torch.FloatTensor(1, size).normal_())
14 |     if USE_CUDA:
15 |         rm = rm.cuda()
16 |         rl = rl.cuda()
17 |     z = vae.encoder.sample(rm, rl)
18 |     return z
19 | 
20 | for s in range(1, N_SAMPLES):
21 |     z0 = random_sample()
22 |     z1 = random_sample()
23 |     diff = z1 - z0
24 | 
25 |     last_s = ''
26 | 
27 |     print('(z0)', tensor_to_string(vae.decoder.generate(z0,  MAX_LENGTH, TEMPERATURE)))
28 | 
29 |     for i in range(1, N_STEPS):
30 |         p = i * 1.0 / N_STEPS
31 |         s = tensor_to_string(vae.decoder.generate(z0 + diff * p, MAX_LENGTH, TEMPERATURE))
32 |         if last_s != s:
33 |             print('  .)', s)
34 |         last_s = s
35 | 
36 |     print('(z1)', tensor_to_string(vae.decoder.generate(z1,  MAX_LENGTH, TEMPERATURE)))
37 |     print('\n')
38 | 
39 | 


--------------------------------------------------------------------------------
/helpers.py:
--------------------------------------------------------------------------------
 1 | # https://github.com/spro/char-rnn.pytorch
 2 | 
 3 | import unidecode
 4 | import string
 5 | import random
 6 | import time
 7 | import math
 8 | import torch
 9 | from torch.autograd import Variable
10 | 
11 | USE_CUDA = True
12 | 
13 | # Reading and un-unicode-encoding data
14 | 
15 | all_characters = string.printable
16 | n_characters = len(all_characters)
17 | SOS = n_characters
18 | EOS = n_characters + 1
19 | n_characters += 2
20 | 
21 | def read_file(filename):
22 |     file = unidecode.unidecode(open(filename).read())
23 |     return file, len(file)
24 | 
25 | # Turning a string into a tensor
26 | 
27 | def char_tensor(string):
28 |     size = len(string) + 1
29 |     tensor = torch.zeros(size).long()
30 |     for c in range(len(string)):
31 |         tensor[c] = all_characters.index(string[c])
32 |     tensor[-1] = EOS
33 |     tensor = Variable(tensor)
34 |     if USE_CUDA:
35 |         tensor = tensor.cuda()
36 |     return tensor
37 | 
38 | # Turn a tensor into a string
39 | 
40 | def index_to_char(top_i):
41 |     if top_i == EOS:
42 |         return '$'
43 |     elif top_i == SOS:
44 |         return '^'
45 |     else:
46 |         return all_characters[top_i]
47 | 
48 | def tensor_to_string(t):
49 |     s = ''
50 |     for i in range(t.size(0)):
51 |         ti = t[i]
52 |         top_k = ti.data.topk(1)
53 |         top_i = top_k[1][0]
54 |         s += index_to_char(top_i)
55 |         if top_i == EOS: break
56 |     return s
57 | 
58 | def longtensor_to_string(t):
59 |     s = ''
60 |     for i in range(t.size(0)):
61 |         top_i = t.data[i]
62 |         s += index_to_char(top_i)
63 |     return s
64 | 
65 | # Readable time elapsed
66 | 
67 | def time_since(since):
68 |     s = time.time() - since
69 |     m = math.floor(s / 60)
70 |     s -= m * 60
71 |     return '%dm %ds' % (m, s)
72 | 
73 | 


--------------------------------------------------------------------------------
/model-cnn.py:
--------------------------------------------------------------------------------
 1 | class EncoderCNN(nn.Module):
 2 |     def __init__(self, input_size, hidden_size, output_size, n_layers=1):
 3 |         super(EncoderCNN, self).__init__()
 4 |         self.input_size = input_size
 5 |         self.hidden_size = hidden_size
 6 |         self.output_size = output_size
 7 |         self.n_layers = n_layers
 8 | 
 9 |         self.embed = nn.Embedding(input_size, hidden_size)
10 | 
11 |         self.c1 = nn.Conv1d(hidden_size, 100, 2)
12 |         self.p1 = nn.MaxPool1d(2)
13 |         self.c2 = nn.Conv1d(100, hidden_size, 2)
14 |         self.p2 = nn.MaxPool1d(3)
15 |         convolved_size = self.hidden_size * 7
16 | 
17 |         self.o2m = nn.Linear(convolved_size, output_size)
18 |         self.o2l = nn.Linear(convolved_size, output_size)
19 | 
20 |     def forward(self, input):
21 |         # print('\n[EncoderCNN.forward]')
22 | 
23 |         input = self.embed(input)
24 | 
25 |         input_padded = Variable(torch.zeros(MAX_LENGTH, self.hidden_size))
26 |         input_padded[:input.size(0)] = input
27 |         input = input_padded.transpose(0, 1)
28 |         input = input.unsqueeze(0)
29 | 
30 |         input = self.c1(input)
31 |         input = self.p1(input)
32 |         # print('(c1 p1) input', input.size())
33 | 
34 |         input = self.c2(input)
35 |         input = self.p2(input)
36 |         # print('(c2 p2) input', input.size())
37 | 
38 |         output = input.view(1, -1)
39 |         # print('output', output.size())
40 | 
41 |         mu = self.o2m(output)
42 |         logvar = self.o2l(output)
43 |         z = reparametrize(mu, logvar)
44 |         return mu, logvar, z
45 | 
46 | 
47 | class DecoderCNN(nn.Module):
48 |     def __init__(self, input_size, hidden_size, output_size, n_layers=1, dropout_p=0.05):
49 |         super(DecoderCNN, self).__init__()
50 |         self.input_size = input_size
51 |         self.hidden_size = hidden_size
52 |         self.output_size = output_size
53 | 
54 |         self.embed = nn.Embedding(output_size, hidden_size)
55 |         self.uc1 = nn.ConvTranspose1d(input_size, 200, 15)
56 |         self.uc2 = nn.ConvTranspose1d(200, hidden_size, 15)
57 |         self.uc3 = nn.ConvTranspose1d(hidden_size, hidden_size, 13)
58 |         self.uc4 = nn.ConvTranspose1d(hidden_size, output_size, 11)
59 |         self.gru = nn.GRU(input_size + hidden_size, output_size)
60 | 
61 |     def dconv(self, z, inputs):
62 | 
63 |         # print('\n[DecoderCNN.forward]')
64 |         # print('outputs', outputs.size())
65 | 
66 |         z = z.transpose(0, 1)
67 |         # print('         z =', z.size())
68 |         # print('    inputs =', inputs.size())
69 | 
70 |         u = self.uc1(z.unsqueeze(0))
71 |         # print('         u1=', u.size())
72 |         u = self.uc2(u)
73 |         # print('         u2=', u.size())
74 |         u = self.uc3(u)
75 |         # print('         u3=', u.size())
76 |         u = self.uc4(u)
77 |         # print('         u4=', u.size())
78 | 
79 |         # u = u.transpose(1, 2).transpose(0, 1)
80 |         u = u.squeeze(0).transpose(0, 1)
81 |         # u = u[:n_steps + 1]
82 |         # print('         u =', u.size())
83 |         return u
84 | 
85 |     def step(self, s, u, input, hidden=None, test=False):
86 |         u = u.unsqueeze(0)
87 |         # print('u = ', u.size())
88 |         # print('input = ', input.size())
89 |         input = self.embed(input)
90 |         # print('input = ', input.size())
91 |         input = input.unsqueeze(0)
92 |         # print('u :', u.size())
93 |         # print('input :', input.size())
94 |         inp = torch.cat((u, input), 2)
95 |         # print('inp :', inp.size())
96 |         return self.gru(inp, hidden)
97 | 
98 | 


--------------------------------------------------------------------------------
/train.py:
--------------------------------------------------------------------------------
  1 | import sconce
  2 | import sys
  3 | 
  4 | from model import *
  5 | 
  6 | hidden_size = 500
  7 | embed_size = 50
  8 | learning_rate = 0.0001
  9 | n_epochs = 100000
 10 | grad_clip = 1.0
 11 | 
 12 | kld_start_inc = 10000
 13 | kld_weight = 0.05
 14 | kld_max = 0.1
 15 | kld_inc = 0.000002
 16 | temperature = 0.9
 17 | temperature_min = 0.5
 18 | temperature_dec = 0.000002
 19 | 
 20 | # Training
 21 | # ------------------------------------------------------------------------------
 22 | 
 23 | if len(sys.argv) < 2:
 24 |     print("Usage: python train.py [filename]")
 25 |     sys.exit(1)
 26 | 
 27 | file, file_len = read_file(sys.argv[1])
 28 | # file, file_len = read_file('../practical-pytorch/data/first-names.txt')
 29 | 
 30 | lines = [line.strip() for line in file.split('\n')]
 31 | print('n lines', len(lines))
 32 | 
 33 | def good_size(line):
 34 |     return len(line) >= MIN_LENGTH and len(line) <= MAX_LENGTH
 35 | 
 36 | def good_content(line):
 37 |     return 'http' not in line and '/' not in line
 38 | 
 39 | lines = [line for line in lines if good_size(line) and good_content(line)]
 40 | print('n lines', len(lines))
 41 | random.shuffle(lines)
 42 | 
 43 | def random_training_set():
 44 |     line = random.choice(lines)
 45 |     inp = char_tensor(line)
 46 |     target = char_tensor(line)
 47 |     return inp, target
 48 | 
 49 | e = EncoderRNN(n_characters, hidden_size, embed_size)
 50 | d = DecoderRNN(embed_size, hidden_size, n_characters, 2)
 51 | vae = VAE(e, d)
 52 | optimizer = torch.optim.Adam(vae.parameters(), lr=learning_rate)
 53 | 
 54 | criterion = nn.CrossEntropyLoss()
 55 | 
 56 | if USE_CUDA:
 57 |     vae.cuda()
 58 |     criterion.cuda()
 59 | 
 60 | log_every = 200
 61 | save_every = 5000
 62 | job = sconce.Job('vae', {
 63 |     'hidden_size': hidden_size,
 64 |     'embed_size': embed_size,
 65 |     'learning_rate': learning_rate,
 66 |     'kld_weight': kld_weight,
 67 |     'temperature': temperature,
 68 |     'grad_clip': grad_clip,
 69 | })
 70 | 
 71 | job.log_every = log_every
 72 | 
 73 | def save():
 74 |     save_filename = 'vae.pt'
 75 |     torch.save(vae, save_filename)
 76 |     print('Saved as %s' % save_filename)
 77 | 
 78 | try:
 79 |     for epoch in range(n_epochs):
 80 |         input, target = random_training_set()
 81 | 
 82 |         optimizer.zero_grad()
 83 | 
 84 |         m, l, z, decoded = vae(input, temperature)
 85 |         if temperature > temperature_min:
 86 |             temperature -= temperature_dec
 87 | 
 88 |         loss = criterion(decoded, target)
 89 |         job.record(epoch, loss.data[0])
 90 | 
 91 |         KLD = (-0.5 * torch.sum(l - torch.pow(m, 2) - torch.exp(l) + 1, 1)).mean().squeeze()
 92 |         loss += KLD * kld_weight
 93 | 
 94 |         if epoch > kld_start_inc and kld_weight < kld_max:
 95 |             kld_weight += kld_inc
 96 | 
 97 |         loss.backward()
 98 |         # print('from', next(vae.parameters()).grad.data[0][0])
 99 |         ec = torch.nn.utils.clip_grad_norm(vae.parameters(), grad_clip)
100 |         # print('to  ', next(vae.parameters()).grad.data[0][0])
101 |         optimizer.step()
102 | 
103 |         if epoch % log_every == 0:
104 |             print('[%d] %.4f (k=%.4f, t=%.4f, kl=%.4f, ec=%.4f)' % (
105 |                 epoch, loss.data[0], kld_weight, temperature, KLD.data[0], ec
106 |             ))
107 |             print('   (target) "%s"' % longtensor_to_string(target))
108 |             generated = vae.decoder.generate(z, MAX_LENGTH, temperature)
109 |             print('(generated) "%s"' % tensor_to_string(generated))
110 |             print('')
111 | 
112 |         if epoch > 0 and epoch % save_every == 0:
113 |             save()
114 | 
115 |     save()
116 | 
117 | except KeyboardInterrupt as err:
118 |     print("ERROR", err)
119 |     print("Saving before quit...")
120 |     save()
121 | 
122 | 


--------------------------------------------------------------------------------
/model.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | from torch.autograd import Variable
  4 | import torch.nn.functional as F
  5 | from helpers import *
  6 | 
  7 | MIN_LENGTH = 10
  8 | MAX_LENGTH = 50
  9 | MAX_SAMPLE = False
 10 | MAX_SAMPLE = True
 11 | 
 12 | class Encoder(nn.Module):
 13 |     def sample(self, mu, logvar):
 14 |         eps = Variable(torch.randn(mu.size()))
 15 |         if USE_CUDA:
 16 |             eps = eps.cuda()
 17 |         std = torch.exp(logvar / 2.0)
 18 |         return mu + eps * std
 19 | 
 20 | # Encoder
 21 | # ------------------------------------------------------------------------------
 22 | 
 23 | # Encode into Z with mu and log_var
 24 | 
 25 | class EncoderRNN(Encoder):
 26 |     def __init__(self, input_size, hidden_size, output_size, n_layers=1, bidirectional=True):
 27 |         super(EncoderRNN, self).__init__()
 28 |         self.input_size = input_size
 29 |         self.hidden_size = hidden_size
 30 |         self.output_size = output_size
 31 |         self.n_layers = n_layers
 32 |         self.bidirectional = bidirectional
 33 | 
 34 |         self.embed = nn.Embedding(input_size, hidden_size)
 35 |         self.gru = nn.GRU(hidden_size, hidden_size, n_layers, dropout=0.1, bidirectional=bidirectional)
 36 |         self.o2p = nn.Linear(hidden_size, output_size * 2)
 37 | 
 38 |     def forward(self, input):
 39 |         embedded = self.embed(input).unsqueeze(1)
 40 | 
 41 |         output, hidden = self.gru(embedded, None)
 42 |         output = output[-1] # Take only the last value
 43 |         if self.bidirectional:
 44 |             output = output[:, :self.hidden_size] + output[: ,self.hidden_size:] # Sum bidirectional outputs
 45 | 
 46 |         ps = self.o2p(output)
 47 |         mu, logvar = torch.chunk(ps, 2, dim=1)
 48 |         z = self.sample(mu, logvar)
 49 |         return mu, logvar, z
 50 | 
 51 | # Decoder
 52 | # ------------------------------------------------------------------------------
 53 | 
 54 | # Decode from Z into sequence
 55 | 
 56 | class DecoderRNN(nn.Module):
 57 |     def __init__(self, input_size, hidden_size, output_size, n_layers=1, dropout_p=0.1):
 58 |         super(DecoderRNN, self).__init__()
 59 |         self.input_size = input_size
 60 |         self.hidden_size = hidden_size
 61 |         self.output_size = output_size
 62 |         self.n_layers = n_layers
 63 | 
 64 |         self.embed = nn.Embedding(output_size, hidden_size)
 65 |         self.dropout = nn.Dropout(dropout_p)
 66 |         # self.gru = nn.GRU(hidden_size + input_size, hidden_size, n_layers)
 67 |         self.z2h = nn.Linear(input_size, hidden_size)
 68 |         self.gru = nn.GRU(hidden_size + input_size, hidden_size, n_layers, dropout=dropout_p)
 69 |         self.i2h = nn.Linear(hidden_size + input_size, hidden_size)
 70 |         self.h2o = nn.Linear(hidden_size * 2, hidden_size)
 71 |         self.out = nn.Linear(hidden_size + input_size, output_size)
 72 | 
 73 |     def sample(self, output, temperature):
 74 |         if MAX_SAMPLE:
 75 |             # Sample top value only
 76 |             top_i = output.data.topk(1)[1][0][0]
 77 | 
 78 |         else:
 79 |             # Sample from the network as a multinomial distribution
 80 |             output_dist = output.data.view(-1).div(temperature).exp()
 81 |             top_i = torch.multinomial(output_dist, 1)[0]
 82 | 
 83 |         input = Variable(torch.LongTensor([top_i]))
 84 |         if USE_CUDA:
 85 |             input = input.cuda()
 86 |         return input, top_i
 87 | 
 88 |     def forward(self, z, inputs, temperature):
 89 |         n_steps = inputs.size(0)
 90 |         outputs = Variable(torch.zeros(n_steps, 1, self.output_size))
 91 |         if USE_CUDA:
 92 |             outputs = outputs.cuda()
 93 | 
 94 |         input = Variable(torch.LongTensor([SOS]))
 95 |         if USE_CUDA:
 96 |             input = input.cuda()
 97 |         hidden = self.z2h(z).unsqueeze(0).repeat(self.n_layers, 1, 1)
 98 | 
 99 |         for i in range(n_steps):
100 |             output, hidden = self.step(i, z, input, hidden, temperature)
101 |             outputs[i] = output
102 | 
103 |             use_teacher_forcing = random.random() < temperature
104 |             if use_teacher_forcing:
105 |                 input = inputs[i]
106 |             else:
107 |                 input, top_i = self.sample(output, temperature)
108 | 
109 |         return outputs.squeeze(1)
110 | 
111 |     def generate(self, z, n_steps, temperature):
112 |         outputs = Variable(torch.zeros(n_steps, 1, self.output_size))
113 |         if USE_CUDA:
114 |             outputs = outputs.cuda()
115 | 
116 |         input = Variable(torch.LongTensor([SOS]))
117 |         if USE_CUDA:
118 |             input = input.cuda()
119 |         hidden = self.z2h(z).unsqueeze(0).repeat(self.n_layers, 1, 1)
120 | 
121 |         for i in range(n_steps):
122 |             output, hidden = self.step(i, z, input, hidden, temperature)
123 |             outputs[i] = output
124 |             input, top_i = self.sample(output, temperature)
125 |             if top_i == EOS: break
126 | 
127 |         return outputs.squeeze(1)
128 | 
129 |     def step(self, s, z, input, hidden, temperature=1.0):
130 |         # print('[DecoderRNN.step] s =', s, 'z =', z.size(), 'i =', input.size(), 'h =', hidden.size())
131 |         input = F.relu(self.embed(input))
132 |         input = torch.cat((input, z), 1)
133 |         input = input.unsqueeze(0)
134 |         output, hidden = self.gru(input, hidden)
135 |         output = output.squeeze(0)
136 |         output = torch.cat((output, z), 1)
137 |         output = self.out(output)
138 |         return output, hidden
139 | 
140 | # Container
141 | # ------------------------------------------------------------------------------
142 | 
143 | class VAE(nn.Module):
144 |     def __init__(self, encoder, decoder):
145 |         super(VAE, self).__init__()
146 |         self.encoder = encoder
147 |         self.decoder = decoder
148 | 
149 |     def forward(self, inputs, temperature=1.0):
150 |         m, l, z = self.encoder(inputs)
151 |         decoded = self.decoder(z, inputs, temperature)
152 |         return m, l, z, decoded
153 | 
154 | # Test
155 | 
156 | if __name__ == '__main__':
157 |     hidden_size = 20
158 |     embed_size = 10
159 |     e = EncoderRNN(n_characters, hidden_size, embed_size)
160 |     d = DecoderRNN(embed_size, hidden_size, n_characters, 2)
161 |     if USE_CUDA:
162 |         e.cuda()
163 |         d.cuda()
164 |     vae = VAE(e, d)
165 |     m, l, z, decoded = vae(char_tensor('@spro'))
166 |     print('m =', m.size())
167 |     print('l =', l.size())
168 |     print('z =', z.size())
169 |     print('decoded', tensor_to_string(decoded))
170 | 
171 | 


--------------------------------------------------------------------------------