├── README.md └── cbow.py /README.md: -------------------------------------------------------------------------------- 1 | # continuous-bag-of-words 2 | The Continuous Bag-of-Words model (CBOW) is frequently used in NLP deep learning. It is a model that tries to predict words given the context of a few words before and a few words after the target word. 3 | This is distinct from language modeling, since CBOW is not sequential and does not have to be probabilistic. Typically, CBOW is used to quickly train word embeddings, and these embeddings are used to initialize the embeddings of some more complicated model. Usually, this is referred to as pretraining embeddings. It almost always helps performance a couple of percent. 4 | 5 | This is the solution of the final exercise of [this](http://pytorch.org/tutorials/beginner/deep_learning_nlp_tutorial.html) great tutorial on NLP in PyTorch. 6 | 7 | #### Example 8 | Corpus 9 | ``` 10 | We are about to study the idea of a computational process. 11 | Computational processes are abstract beings that inhabit computers. 12 | As they evolve, processes manipulate other abstract things called data. 13 | The evolution of a process is directed by a pattern of rules 14 | called a program. People create programs to direct processes. In effect, 15 | we conjure the spirits of the computer with our spells. 16 | ``` 17 | Context 18 | ``` 19 | People, create, to, direct 20 | ``` 21 | Output 22 | ``` 23 | programs 24 | ``` 25 | -------------------------------------------------------------------------------- /cbow.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | def make_context_vector(context, word_to_ix): 5 | idxs = [word_to_ix[w] for w in context] 6 | return torch.tensor(idxs, dtype=torch.long) 7 | 8 | CONTEXT_SIZE = 2 # 2 words to the left, 2 to the right 9 | EMDEDDING_DIM = 100 10 | 11 | raw_text = """We are about to study the idea of a computational process. 12 | Computational processes are abstract beings that inhabit computers. 13 | As they evolve, processes manipulate other abstract things called data. 14 | The evolution of a process is directed by a pattern of rules 15 | called a program. People create programs to direct processes. In effect, 16 | we conjure the spirits of the computer with our spells.""".split() 17 | 18 | 19 | # By deriving a set from `raw_text`, we deduplicate the array 20 | vocab = set(raw_text) 21 | vocab_size = len(vocab) 22 | 23 | word_to_ix = {word:ix for ix, word in enumerate(vocab)} 24 | ix_to_word = {ix:word for ix, word in enumerate(vocab)} 25 | 26 | data = [] 27 | for i in range(2, len(raw_text) - 2): 28 | context = [raw_text[i - 2], raw_text[i - 1], 29 | raw_text[i + 1], raw_text[i + 2]] 30 | target = raw_text[i] 31 | data.append((context, target)) 32 | 33 | 34 | class CBOW(torch.nn.Module): 35 | def __init__(self, vocab_size, embedding_dim): 36 | super(CBOW, self).__init__() 37 | 38 | #out: 1 x emdedding_dim 39 | self.embeddings = nn.Embedding(vocab_size, embedding_dim) 40 | self.linear1 = nn.Linear(embedding_dim, 128) 41 | self.activation_function1 = nn.ReLU() 42 | 43 | #out: 1 x vocab_size 44 | self.linear2 = nn.Linear(128, vocab_size) 45 | self.activation_function2 = nn.LogSoftmax(dim = -1) 46 | 47 | 48 | def forward(self, inputs): 49 | embeds = sum(self.embeddings(inputs)).view(1,-1) 50 | out = self.linear1(embeds) 51 | out = self.activation_function1(out) 52 | out = self.linear2(out) 53 | out = self.activation_function2(out) 54 | return out 55 | 56 | def get_word_emdedding(self, word): 57 | word = torch.tensor([word_to_ix[word]]) 58 | return self.embeddings(word).view(1,-1) 59 | 60 | 61 | model = CBOW(vocab_size, EMDEDDING_DIM) 62 | 63 | loss_function = nn.NLLLoss() 64 | optimizer = torch.optim.SGD(model.parameters(), lr=0.001) 65 | 66 | #TRAINING 67 | for epoch in range(50): 68 | total_loss = 0 69 | 70 | for context, target in data: 71 | context_vector = make_context_vector(context, word_to_ix) 72 | 73 | log_probs = model(context_vector) 74 | 75 | total_loss += loss_function(log_probs, torch.tensor([word_to_ix[target]])) 76 | 77 | #optimize at the end of each epoch 78 | optimizer.zero_grad() 79 | total_loss.backward() 80 | optimizer.step() 81 | 82 | #TESTING 83 | context = ['People','create','to', 'direct'] 84 | context_vector = make_context_vector(context, word_to_ix) 85 | a = model(context_vector) 86 | 87 | #Print result 88 | print(f'Raw text: {" ".join(raw_text)}\n') 89 | print(f'Context: {context}\n') 90 | print(f'Prediction: {ix_to_word[torch.argmax(a[0]).item()]}') 91 | --------------------------------------------------------------------------------