├── README.md ├── data ├── dracula.txt ├── moby-dick.txt ├── pride-prejudice.txt └── tale-of-two-cities.txt ├── main_generate.py └── model_generate.pt /README.md: -------------------------------------------------------------------------------- 1 | # Text-Generation 2 | 3 | An attention-based deep neural language model using bidrectional LSTMs to generate text sequences character-by-character, given a context sequence of text. 4 | 5 | Utilized few English novels for training and evaluating the model. 6 | 7 | Utilized PyTorch framework for development. Used a NVIDIA GeForce GTX 1080 Ti GPU machine to facilitate training of the model. 8 | 9 | The model achieves an average validation perplexity of approximately 93. 10 | 11 | The trained model is available as "model_generate.pt". 12 | 13 | 14 | ## Text generation examples 15 | 16 | ### From training novels: 17 | 18 | **1. Pride and Prejudice - Jane Austen**: 19 | 20 | ...**Context** : The tumult of her mind, was now painfully great. She knew not how to support herself, and from actual weakness sat down andcried for half-an-hour. 21 | 22 | ...**Generated**: *As I found that I think my cold--ship of any command, obstructive, and in the closely. What was all good scarcely distress,when you can the rise and then immen that she seek sound him that you are more night be. Jane so friend! There was of the steps of a tran by the sea all the room for her. When* 23 | 24 | **2. Dracula - Bram Stoker**: 25 | 26 | ...**Context** : To believe in things that you cannot. Let me illustrate. I heard once of an American who so defined faith: 'that faculty which enables us to believe things which we know to be untrue.' For one, I follow that man. 27 | 28 | ...**Generated**: *Go not things, he might I be things to any man where any dark to call a harders is from the matter carriage, not only starm sunset from the day.* 29 | 30 | *"I don't want of entire more so receiving the spirit in the room was a great us and such a mist we should go it, and the window glanced from you get to re* 31 | 32 | 33 | ### From non-training novels (but same author or same genre): 34 | 35 | **1. Emma - Jane Austen**: 36 | 37 | ...**Context** : During his present short stay, Emma had barely seen him; but just enough to feel that the first meeting was over, and to give her the impression of his not being improved by the mixture of pique and pretension, now spread over his air. 38 | 39 | ...**Generated**: *fish, for I can don, that the sat up in the passions at all, for my soon with exoloce, for my mark who had not to be that he had long considerable the smalled by the sobubs; when the degree was, authorded you at land with the more gloped on* 40 | 41 | **2. The Strange Case of Dr. Jekyll and Mr. Hyde - Robert Louis Stevenson**: 42 | 43 | ...**Context** : Poole swung the axe over his shoulder; the blow shook the building, and the red baize door leaped against the lock and hinges. A dismal screech, as of mere animal terror, rang from the cabinet. 44 | 45 | ...**Generated**: *But all the rest on the London. When the shapeded, that strength of the bell of the druck. "_Court_, also insported me, "that they would find there well, and then that time lad hepromided that my back, the ground, who sun I would never us. It is a secrets from this straight that both suspicion to t* 46 | -------------------------------------------------------------------------------- /main_generate.py: -------------------------------------------------------------------------------- 1 | import os 2 | import unidecode 3 | import string 4 | import random 5 | import re 6 | import numpy as np 7 | import torch 8 | import torch.nn as nn 9 | from torch.autograd import Variable 10 | from torch.nn import functional as F 11 | import time, math 12 | import matplotlib.pyplot as plt 13 | import matplotlib.ticker as ticker 14 | 15 | all_characters = string.printable 16 | n_characters = len(all_characters) 17 | 18 | # get data 19 | all_files = "" 20 | 21 | for file in os.listdir('./data'): 22 | all_files += unidecode.unidecode(open('./data/'+file).read()) + "\n" 23 | 24 | file_len = len(all_files) 25 | 26 | # use CUDA if available 27 | use_cuda = False 28 | if torch.cuda.is_available(): 29 | use_cuda = True 30 | 31 | # define length of string to consider while training 32 | chunk_len = 250 33 | 34 | # get a random chunk of data of length 'chunk_len' 35 | def random_chunk(chunk_len): 36 | start_index = random.randint(0, file_len - chunk_len) 37 | end_index = start_index + chunk_len + 1 38 | return all_files[start_index:end_index] 39 | 40 | # main model class 41 | class TextGenerate(nn.Module): 42 | def __init__(self, input_size, hidden_size, output_size, n_layers=1, bi=True): 43 | super(TextGenerate, self).__init__() 44 | self.input_size = input_size 45 | self.hidden_size = hidden_size 46 | self.output_size = output_size 47 | self.n_layers = n_layers 48 | self.bi = bi 49 | 50 | self.encoder = nn.Embedding(input_size, hidden_size) 51 | self.lstm = nn.LSTM(hidden_size, hidden_size, n_layers, bidirectional=self.bi) 52 | if self.bi: 53 | self.decoder = nn.Linear(hidden_size*2, output_size) 54 | else: 55 | self.decoder = nn.Linear(hidden_size, output_size) 56 | self.out = nn.Linear(output_size, output_size) 57 | self.dropout = nn.Dropout(0.1) 58 | 59 | def forward(self, input, hidden, cell): 60 | 61 | # encoder 62 | input = self.encoder(input.view(1, -1)) 63 | input = self.dropout(input) 64 | output, states = self.lstm(input.view(1, 1, -1), (hidden, cell)) 65 | output = output.permute(1, 0, 2) 66 | 67 | # attention 68 | if self.bi: 69 | out1, out2 = output[:,:,:self.hidden_size], output[:,:,self.hidden_size:] 70 | h1, h2 = states[0][states[0].size()[0] - 2,:,:], states[0][states[0].size()[0] - 1,:,:] 71 | attn_wts_1 = F.softmax(torch.bmm(out1, h1.unsqueeze(2)).squeeze(2), 1) 72 | attn_wts_2 = F.softmax(torch.bmm(out2, h2.unsqueeze(2)).squeeze(2), 1) 73 | attn_1 = torch.bmm(out1.transpose(1, 2), attn_wts_1.unsqueeze(2)).squeeze(2) 74 | attn_2 = torch.bmm(out2.transpose(1, 2), attn_wts_2.unsqueeze(2)).squeeze(2) 75 | attn = torch.cat((attn_1, attn_2), 1) 76 | 77 | else: 78 | h = states.squeeze(0) 79 | attn_wts = F.softmax(torch.bmm(output, h.unsqueeze(2)).squeeze(2), 1) 80 | attn = torch.bmm(output.transpose(1, 2), attn_wts.unsqueeze(2)).squeeze(2) 81 | 82 | # decoder 83 | output = self.decoder(attn) 84 | output = self.dropout(output) 85 | output = self.out(output) 86 | 87 | return output, states 88 | 89 | def init_hidden(self): 90 | if self.bi: 91 | return Variable(torch.zeros(self.n_layers*2, 1, self.hidden_size)) 92 | else: 93 | return Variable(torch.zeros(self.n_layers, 1, self.hidden_size)) 94 | 95 | def init_cell(self): 96 | if self.bi: 97 | return Variable(torch.zeros(self.n_layers*2, 1, self.hidden_size)) 98 | else: 99 | return Variable(torch.zeros(self.n_layers, 1, self.hidden_size)) 100 | 101 | # turn string into list of longs 102 | def char_tensor(string): 103 | tensor = torch.zeros(len(string)).long() 104 | for c in range(len(string)): 105 | tensor[c] = all_characters.index(string[c]) 106 | if use_cuda: 107 | tensor = tensor.cuda() 108 | return Variable(tensor) 109 | 110 | # get random training data 111 | def random_training_set(chunk_len=250): 112 | chunk = random_chunk(chunk_len) 113 | inp = char_tensor(chunk[:-1]) 114 | target = char_tensor(chunk[1:]) 115 | return inp, target 116 | 117 | # evaluate model 118 | def evaluate(target_str, prime_str='A', predict_len=100, temperature=0.8): 119 | model.load_state_dict(torch.load('./model_generate.pt')) 120 | model.eval() 121 | 122 | hidden = model.init_hidden() 123 | cell = model.init_cell() 124 | 125 | if use_cuda: 126 | hidden = hidden.cuda() 127 | cell = cell.cuda() 128 | 129 | prime_input = char_tensor(prime_str) 130 | predicted = prime_str + "\n-------->\n" 131 | 132 | # use priming string to "build up" hidden state 133 | for p in range(len(prime_str) - 1): 134 | _, states = model(prime_input[p], hidden, cell) 135 | 136 | if use_cuda: 137 | hidden, cell = states[0].cuda(), states[1].cuda() 138 | else: 139 | hidden, cell = states[0], states[1] 140 | 141 | inp = prime_input[-1] 142 | loss = 0. 143 | 144 | for p in range(predict_len): 145 | output, states = model(inp, hidden, cell) 146 | 147 | if use_cuda: 148 | output = output.cuda() 149 | hidden, cell = states[0].cuda(), states[1].cuda() 150 | else: 151 | hidden, cell = states[0], states[1] 152 | 153 | target = char_tensor(target_str[p]) 154 | 155 | loss += criterion(output, target) 156 | 157 | # sample from the network as a multinomial distribution 158 | output_dist = output.data.view(-1).div(temperature).exp() 159 | top_i = torch.multinomial(output_dist, 1)[0] 160 | 161 | # add predicted character to string and use as next input 162 | predicted_char = all_characters[top_i] 163 | predicted += predicted_char 164 | inp = char_tensor(predicted_char) 165 | 166 | loss_tot = total_loss(loss, predict_len) 167 | perplexity = perplexity_score(loss_tot) 168 | 169 | return predicted, loss_tot, perplexity 170 | 171 | # get loss 172 | def total_loss(loss, predict_len): 173 | loss_tot = loss.cpu().item()/predict_len 174 | return loss_tot 175 | 176 | # get perplexity 177 | def perplexity_score(loss): 178 | perplexity = 2**loss 179 | return perplexity 180 | 181 | # helper function for time elapsed 182 | def time_since(since): 183 | s = time.time() - since 184 | m = math.floor(s / 60) 185 | s -= m * 60 186 | return '%dm %ds' % (m, s) 187 | 188 | # train model 189 | def train(inp, target): 190 | model.train() 191 | target.unsqueeze_(-1) 192 | hidden = model.init_hidden() 193 | cell = model.init_cell() 194 | 195 | if use_cuda: 196 | hidden = hidden.cuda() 197 | cell = cell.cuda() 198 | 199 | model.zero_grad() 200 | loss = 0 201 | 202 | for c in range(chunk_len): 203 | output, states = model(inp[c], hidden, cell) 204 | if use_cuda: 205 | output = output.cuda() 206 | hidden, cell = states[0].cuda(), states[1].cuda() 207 | else: 208 | hidden, cell = states[0], states[1] 209 | loss += criterion(output, target[c]) 210 | 211 | loss.backward() 212 | model_optimizer.step() 213 | 214 | torch.save(model.state_dict(), './model_generate.pt') 215 | 216 | loss_tot = total_loss(loss, chunk_len) 217 | perplexity = perplexity_score(loss_tot) 218 | 219 | return loss_tot, perplexity 220 | 221 | # generate text given context 222 | def generate(prime_str='A', predict_len=100, temperature=0.8): 223 | model.load_state_dict(torch.load('./model_generate.pt')) 224 | model.eval() 225 | 226 | hidden = model.init_hidden() 227 | cell = model.init_cell() 228 | 229 | if use_cuda: 230 | hidden = hidden.cuda() 231 | cell = cell.cuda() 232 | 233 | prime_input = char_tensor(prime_str) 234 | predicted = prime_str + "\n--------->\n" 235 | 236 | # use priming string to "build up" hidden state 237 | for p in range(len(prime_str) - 1): 238 | _, states = model(prime_input[p], hidden, cell) 239 | 240 | if use_cuda: 241 | hidden, cell = states[0].cuda(), states[1].cuda() 242 | else: 243 | hidden, cell = states[0], states[1] 244 | 245 | inp = prime_input[-1] 246 | 247 | for p in range(predict_len): 248 | output, states = model(inp, hidden, cell) 249 | 250 | if use_cuda: 251 | output = output.cuda() 252 | hidden, cell = states[0].cuda(), states[1].cuda() 253 | else: 254 | hidden, cell = states[0], states[1] 255 | 256 | # sample from the network as a multinomial distribution 257 | output_dist = output.data.view(-1).div(temperature).exp() 258 | top_i = torch.multinomial(output_dist, 1)[0] 259 | 260 | # add predicted character to string and use as next input 261 | predicted_char = all_characters[top_i] 262 | predicted += predicted_char 263 | inp = char_tensor(predicted_char) 264 | 265 | return predicted 266 | 267 | # main 268 | if __name__ == "__main__": 269 | 270 | n_epochs = 25000 271 | print_every = 2500 272 | plot_every = 100 273 | hidden_size = 100 274 | n_layers = 2 275 | lr = 0.0005 276 | bi = True 277 | 278 | # define model 279 | model = TextGenerate(n_characters, hidden_size, n_characters, n_layers, bi) 280 | if use_cuda: 281 | model = model.cuda() 282 | model_optimizer = torch.optim.Adam(model.parameters(), lr=lr) 283 | criterion = nn.CrossEntropyLoss() 284 | 285 | # train the model 286 | start = time.time() 287 | all_losses = [] 288 | all_perplexities = [] 289 | loss_avg = 0. 290 | perplexity_avg = 0. 291 | 292 | for epoch in range(1, n_epochs + 1): 293 | 294 | loss, perplexity = train(*random_training_set(chunk_len)) 295 | loss_avg += loss 296 | perplexity_avg += perplexity 297 | 298 | if epoch % print_every == 0: 299 | print('[%s taken (%d epochs %d%% trained) Loss: %.4f Perplexity: %.4f]' % (time_since(start), epoch, epoch / n_epochs * 100, loss, perplexity)) 300 | 301 | if epoch % plot_every == 0: 302 | all_losses.append(loss_avg / plot_every) 303 | all_perplexities.append(perplexity_avg / plot_every) 304 | loss_avg = 0. 305 | perplexity_avg = 0. 306 | 307 | # plt.figure() 308 | # plt.plot(all_losses) 309 | # plt.show() 310 | 311 | # plt.figure() 312 | # plt.plot(all_perplexities) 313 | # plt.show() 314 | 315 | # evaluation 316 | chunk = random_chunk(500) 317 | prime_str, target_str = chunk[:251], chunk[251:] 318 | 319 | gen_text, loss, perplexity = evaluate(target_str, prime_str, 250, temperature=0.8) 320 | 321 | print("\nLoss: ", loss, " Perplexity:" , perplexity, "\n") 322 | print("\n", gen_text, "\n") 323 | 324 | # training evaluation 325 | 326 | # Pride and Prejudice - Jane Austen 327 | print(generate("\nThe tumult of her mind, was now painfully great. She knew not how \ 328 | to support herself, and from actual weakness sat down and cried for \ 329 | half-an-hour. ", 300, temperature=0.8)) 330 | 331 | # Dracula - Bram Stoker 332 | print(generate("\nTo believe in things that you cannot. Let me illustrate. I heard once \ 333 | of an American who so defined faith: 'that faculty which enables us to \ 334 | believe things which we know to be untrue.' For one, I follow that man. ", 300, temperature=0.8)) 335 | 336 | # outside evaluation 337 | 338 | # Emma - Jane Austen 339 | print(generate("\nDuring his present short stay, Emma had barely seen him; but just enough \ 340 | to feel that the first meeting was over, and to give her the impression \ 341 | of his not being improved by the mixture of pique and pretension, now \ 342 | spread over his air. ", 300, temperature=0.8)) 343 | 344 | # The Strange Case Of Dr. Jekyll And Mr. Hyde - Robert Louis Stevenson 345 | print(generate("\nPoole swung the axe over his shoulder; the blow shook the building, and \ 346 | the red baize door leaped against the lock and hinges. A dismal \ 347 | screech, as of mere animal terror, rang from the cabinet. ", 300, temperature=0.8)) 348 | -------------------------------------------------------------------------------- /model_generate.pt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ApurbaSengupta/Text-Generation/6507c98b78335f1280f68b90e2ab6d77c6dd5fe7/model_generate.pt --------------------------------------------------------------------------------