├── .gitignore
├── LICENSE
├── README.md
├── chat.py
├── intents.json
├── model.py
├── nltk_utils.py
└── train.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | share/python-wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | MANIFEST
 28 | 
 29 | # PyInstaller
 30 | #  Usually these files are written by a python script from a template
 31 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 32 | *.manifest
 33 | *.spec
 34 | 
 35 | # Installer logs
 36 | pip-log.txt
 37 | pip-delete-this-directory.txt
 38 | 
 39 | # Unit test / coverage reports
 40 | htmlcov/
 41 | .tox/
 42 | .nox/
 43 | .coverage
 44 | .coverage.*
 45 | .cache
 46 | nosetests.xml
 47 | coverage.xml
 48 | *.cover
 49 | *.py,cover
 50 | .hypothesis/
 51 | .pytest_cache/
 52 | cover/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | .pybuilder/
 76 | target/
 77 | 
 78 | # Jupyter Notebook
 79 | .ipynb_checkpoints
 80 | 
 81 | # IPython
 82 | profile_default/
 83 | ipython_config.py
 84 | 
 85 | # pyenv
 86 | #   For a library or package, you might want to ignore these files since the code is
 87 | #   intended to run in multiple environments; otherwise, check them in:
 88 | # .python-version
 89 | 
 90 | # pipenv
 91 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 92 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 93 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 94 | #   install all needed dependencies.
 95 | #Pipfile.lock
 96 | 
 97 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 98 | __pypackages__/
 99 | 
100 | # Celery stuff
101 | celerybeat-schedule
102 | celerybeat.pid
103 | 
104 | # SageMath parsed files
105 | *.sage.py
106 | 
107 | # Environments
108 | .env
109 | .venv
110 | env/
111 | venv/
112 | ENV/
113 | env.bak/
114 | venv.bak/
115 | 
116 | # Spyder project settings
117 | .spyderproject
118 | .spyproject
119 | 
120 | # Rope project settings
121 | .ropeproject
122 | 
123 | # mkdocs documentation
124 | /site
125 | 
126 | # mypy
127 | .mypy_cache/
128 | .dmypy.json
129 | dmypy.json
130 | 
131 | # Pyre type checker
132 | .pyre/
133 | 
134 | # pytype static type analyzer
135 | .pytype/
136 | 
137 | # Cython debug symbols
138 | cython_debug/


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2020 Patrick Loeber
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Implementation of a Contextual Chatbot in PyTorch.  
 2 | Simple chatbot implementation with PyTorch. 
 3 | 
 4 | - The implementation should be easy to follow for beginners and provide a basic understanding of chatbots.
 5 | - The implementation is straightforward with a Feed Forward Neural net with 2 hidden layers.
 6 | - Customization for your own use case is super easy. Just modify `intents.json` with possible patterns and responses and re-run the training (see below for more info).
 7 | 
 8 | The approach is inspired by this article and ported to PyTorch: [https://chatbotsmagazine.com/contextual-chat-bots-with-tensorflow-4391749d0077](https://chatbotsmagazine.com/contextual-chat-bots-with-tensorflow-4391749d0077).
 9 | 
10 | ## Watch the Tutorial
11 | [![Alt text](https://img.youtube.com/vi/RpWeNzfSUHw/hqdefault.jpg)](https://www.youtube.com/watch?v=RpWeNzfSUHw&list=PLqnslRFeH2UrFW4AUgn-eY37qOAWQpJyg)
12 | 
13 | ## Installation
14 | 
15 | ### Create an environment
16 | Whatever you prefer (e.g. `conda` or `venv`)
17 | ```console
18 | mkdir myproject
19 | $ cd myproject
20 | $ python3 -m venv venv
21 | ```
22 | 
23 | ### Activate it
24 | Mac / Linux:
25 | ```console
26 | . venv/bin/activate
27 | ```
28 | Windows:
29 | ```console
30 | venv\Scripts\activate
31 | ```
32 | ### Install PyTorch and dependencies
33 | 
34 | For Installation of PyTorch see [official website](https://pytorch.org/).
35 | 
36 | You also need `nltk`:
37 |  ```console
38 | pip install nltk
39 |  ```
40 | 
41 | If you get an error during the first run, you also need to install `nltk.tokenize.punkt`:
42 | Run this once in your terminal:
43 |  ```console
44 | $ python
45 | >>> import nltk
46 | >>> nltk.download('punkt')
47 | ```
48 | 
49 | ## Usage
50 | Run
51 | ```console
52 | python train.py
53 | ```
54 | This will dump `data.pth` file. And then run
55 | ```console
56 | python chat.py
57 | ```
58 | ## Customize
59 | Have a look at [intents.json](intents.json). You can customize it according to your own use case. Just define a new `tag`, possible `patterns`, and possible `responses` for the chat bot. You have to re-run the training whenever this file is modified.
60 | ```console
61 | {
62 |   "intents": [
63 |     {
64 |       "tag": "greeting",
65 |       "patterns": [
66 |         "Hi",
67 |         "Hey",
68 |         "How are you",
69 |         "Is anyone there?",
70 |         "Hello",
71 |         "Good day"
72 |       ],
73 |       "responses": [
74 |         "Hey :-)",
75 |         "Hello, thanks for visiting",
76 |         "Hi there, what can I do for you?",
77 |         "Hi there, how can I help?"
78 |       ]
79 |     },
80 |     ...
81 |   ]
82 | }
83 | ```
84 | 


--------------------------------------------------------------------------------
/chat.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | import json
 3 | 
 4 | import torch
 5 | 
 6 | from model import NeuralNet
 7 | from nltk_utils import bag_of_words, tokenize
 8 | 
 9 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
10 | 
11 | with open('intents.json', 'r') as json_data:
12 |     intents = json.load(json_data)
13 | 
14 | FILE = "data.pth"
15 | data = torch.load(FILE)
16 | 
17 | input_size = data["input_size"]
18 | hidden_size = data["hidden_size"]
19 | output_size = data["output_size"]
20 | all_words = data['all_words']
21 | tags = data['tags']
22 | model_state = data["model_state"]
23 | 
24 | model = NeuralNet(input_size, hidden_size, output_size).to(device)
25 | model.load_state_dict(model_state)
26 | model.eval()
27 | 
28 | bot_name = "Sam"
29 | print("Let's chat! (type 'quit' to exit)")
30 | while True:
31 |     # sentence = "do you use credit cards?"
32 |     sentence = input("You: ")
33 |     if sentence == "quit":
34 |         break
35 | 
36 |     sentence = tokenize(sentence)
37 |     X = bag_of_words(sentence, all_words)
38 |     X = X.reshape(1, X.shape[0])
39 |     X = torch.from_numpy(X).to(device)
40 | 
41 |     output = model(X)
42 |     _, predicted = torch.max(output, dim=1)
43 | 
44 |     tag = tags[predicted.item()]
45 | 
46 |     probs = torch.softmax(output, dim=1)
47 |     prob = probs[0][predicted.item()]
48 |     if prob.item() > 0.75:
49 |         for intent in intents['intents']:
50 |             if tag == intent["tag"]:
51 |                 print(f"{bot_name}: {random.choice(intent['responses'])}")
52 |     else:
53 |         print(f"{bot_name}: I do not understand...")


--------------------------------------------------------------------------------
/intents.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "intents": [
 3 |     {
 4 |       "tag": "greeting",
 5 |       "patterns": [
 6 |         "Hi",
 7 |         "Hey",
 8 |         "How are you",
 9 |         "Is anyone there?",
10 |         "Hello",
11 |         "Good day"
12 |       ],
13 |       "responses": [
14 |         "Hey :-)",
15 |         "Hello, thanks for visiting",
16 |         "Hi there, what can I do for you?",
17 |         "Hi there, how can I help?"
18 |       ]
19 |     },
20 |     {
21 |       "tag": "goodbye",
22 |       "patterns": ["Bye", "See you later", "Goodbye"],
23 |       "responses": [
24 |         "See you later, thanks for visiting",
25 |         "Have a nice day",
26 |         "Bye! Come back again soon."
27 |       ]
28 |     },
29 |     {
30 |       "tag": "thanks",
31 |       "patterns": ["Thanks", "Thank you", "That's helpful", "Thank's a lot!"],
32 |       "responses": ["Happy to help!", "Any time!", "My pleasure"]
33 |     },
34 |     {
35 |       "tag": "items",
36 |       "patterns": [
37 |         "Which items do you have?",
38 |         "What kinds of items are there?",
39 |         "What do you sell?"
40 |       ],
41 |       "responses": [
42 |         "We sell coffee and tea",
43 |         "We have coffee and tea"
44 |       ]
45 |     },
46 |     {
47 |       "tag": "payments",
48 |       "patterns": [
49 |         "Do you take credit cards?",
50 |         "Do you accept Mastercard?",
51 |         "Can I pay with Paypal?",
52 |         "Are you cash only?"
53 |       ],
54 |       "responses": [
55 |         "We accept VISA, Mastercard and Paypal",
56 |         "We accept most major credit cards, and Paypal"
57 |       ]
58 |     },
59 |     {
60 |       "tag": "delivery",
61 |       "patterns": [
62 |         "How long does delivery take?",
63 |         "How long does shipping take?",
64 |         "When do I get my delivery?"
65 |       ],
66 |       "responses": [
67 |         "Delivery takes 2-4 days",
68 |         "Shipping takes 2-4 days"
69 |       ]
70 |     },
71 |     {
72 |       "tag": "funny",
73 |       "patterns": [
74 |         "Tell me a joke!",
75 |         "Tell me something funny!",
76 |         "Do you know a joke?"
77 |       ],
78 |       "responses": [
79 |         "Why did the hipster burn his mouth? He drank the coffee before it was cool.",
80 |         "What did the buffalo say when his son left for college? Bison."
81 |       ]
82 |     }
83 |   ]
84 | }
85 | 


--------------------------------------------------------------------------------
/model.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | 
 5 | class NeuralNet(nn.Module):
 6 |     def __init__(self, input_size, hidden_size, num_classes):
 7 |         super(NeuralNet, self).__init__()
 8 |         self.l1 = nn.Linear(input_size, hidden_size) 
 9 |         self.l2 = nn.Linear(hidden_size, hidden_size) 
10 |         self.l3 = nn.Linear(hidden_size, num_classes)
11 |         self.relu = nn.ReLU()
12 |     
13 |     def forward(self, x):
14 |         out = self.l1(x)
15 |         out = self.relu(out)
16 |         out = self.l2(out)
17 |         out = self.relu(out)
18 |         out = self.l3(out)
19 |         # no activation and no softmax at the end
20 |         return out


--------------------------------------------------------------------------------
/nltk_utils.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import nltk
 3 | # nltk.download('punkt')
 4 | from nltk.stem.porter import PorterStemmer
 5 | stemmer = PorterStemmer()
 6 | 
 7 | def tokenize(sentence):
 8 |     """
 9 |     split sentence into array of words/tokens
10 |     a token can be a word or punctuation character, or number
11 |     """
12 |     return nltk.word_tokenize(sentence)
13 | 
14 | 
15 | def stem(word):
16 |     """
17 |     stemming = find the root form of the word
18 |     examples:
19 |     words = ["organize", "organizes", "organizing"]
20 |     words = [stem(w) for w in words]
21 |     -> ["organ", "organ", "organ"]
22 |     """
23 |     return stemmer.stem(word.lower())
24 | 
25 | 
26 | def bag_of_words(tokenized_sentence, words):
27 |     """
28 |     return bag of words array:
29 |     1 for each known word that exists in the sentence, 0 otherwise
30 |     example:
31 |     sentence = ["hello", "how", "are", "you"]
32 |     words = ["hi", "hello", "I", "you", "bye", "thank", "cool"]
33 |     bog   = [  0 ,    1 ,    0 ,   1 ,    0 ,    0 ,      0]
34 |     """
35 |     # stem each word
36 |     sentence_words = [stem(word) for word in tokenized_sentence]
37 |     # initialize bag with 0 for each word
38 |     bag = np.zeros(len(words), dtype=np.float32)
39 |     for idx, w in enumerate(words):
40 |         if w in sentence_words: 
41 |             bag[idx] = 1
42 | 
43 |     return bag


--------------------------------------------------------------------------------
/train.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import random
  3 | import json
  4 | 
  5 | import torch
  6 | import torch.nn as nn
  7 | from torch.utils.data import Dataset, DataLoader
  8 | 
  9 | from nltk_utils import bag_of_words, tokenize, stem
 10 | from model import NeuralNet
 11 | 
 12 | with open('intents.json', 'r') as f:
 13 |     intents = json.load(f)
 14 | 
 15 | all_words = []
 16 | tags = []
 17 | xy = []
 18 | # loop through each sentence in our intents patterns
 19 | for intent in intents['intents']:
 20 |     tag = intent['tag']
 21 |     # add to tag list
 22 |     tags.append(tag)
 23 |     for pattern in intent['patterns']:
 24 |         # tokenize each word in the sentence
 25 |         w = tokenize(pattern)
 26 |         # add to our words list
 27 |         all_words.extend(w)
 28 |         # add to xy pair
 29 |         xy.append((w, tag))
 30 | 
 31 | # stem and lower each word
 32 | ignore_words = ['?', '.', '!']
 33 | all_words = [stem(w) for w in all_words if w not in ignore_words]
 34 | # remove duplicates and sort
 35 | all_words = sorted(set(all_words))
 36 | tags = sorted(set(tags))
 37 | 
 38 | print(len(xy), "patterns")
 39 | print(len(tags), "tags:", tags)
 40 | print(len(all_words), "unique stemmed words:", all_words)
 41 | 
 42 | # create training data
 43 | X_train = []
 44 | y_train = []
 45 | for (pattern_sentence, tag) in xy:
 46 |     # X: bag of words for each pattern_sentence
 47 |     bag = bag_of_words(pattern_sentence, all_words)
 48 |     X_train.append(bag)
 49 |     # y: PyTorch CrossEntropyLoss needs only class labels, not one-hot
 50 |     label = tags.index(tag)
 51 |     y_train.append(label)
 52 | 
 53 | X_train = np.array(X_train)
 54 | y_train = np.array(y_train)
 55 | 
 56 | # Hyper-parameters 
 57 | num_epochs = 1000
 58 | batch_size = 8
 59 | learning_rate = 0.001
 60 | input_size = len(X_train[0])
 61 | hidden_size = 8
 62 | output_size = len(tags)
 63 | print(input_size, output_size)
 64 | 
 65 | class ChatDataset(Dataset):
 66 | 
 67 |     def __init__(self):
 68 |         self.n_samples = len(X_train)
 69 |         self.x_data = X_train
 70 |         self.y_data = y_train
 71 | 
 72 |     # support indexing such that dataset[i] can be used to get i-th sample
 73 |     def __getitem__(self, index):
 74 |         return self.x_data[index], self.y_data[index]
 75 | 
 76 |     # we can call len(dataset) to return the size
 77 |     def __len__(self):
 78 |         return self.n_samples
 79 | 
 80 | dataset = ChatDataset()
 81 | train_loader = DataLoader(dataset=dataset,
 82 |                           batch_size=batch_size,
 83 |                           shuffle=True,
 84 |                           num_workers=0)
 85 | 
 86 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 87 | 
 88 | model = NeuralNet(input_size, hidden_size, output_size).to(device)
 89 | 
 90 | # Loss and optimizer
 91 | criterion = nn.CrossEntropyLoss()
 92 | optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
 93 | 
 94 | # Train the model
 95 | for epoch in range(num_epochs):
 96 |     for (words, labels) in train_loader:
 97 |         words = words.to(device)
 98 |         labels = labels.to(dtype=torch.long).to(device)
 99 |         
100 |         # Forward pass
101 |         outputs = model(words)
102 |         # if y would be one-hot, we must apply
103 |         # labels = torch.max(labels, 1)[1]
104 |         loss = criterion(outputs, labels)
105 |         
106 |         # Backward and optimize
107 |         optimizer.zero_grad()
108 |         loss.backward()
109 |         optimizer.step()
110 |         
111 |     if (epoch+1) % 100 == 0:
112 |         print (f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')
113 | 
114 | 
115 | print(f'final loss: {loss.item():.4f}')
116 | 
117 | data = {
118 | "model_state": model.state_dict(),
119 | "input_size": input_size,
120 | "hidden_size": hidden_size,
121 | "output_size": output_size,
122 | "all_words": all_words,
123 | "tags": tags
124 | }
125 | 
126 | FILE = "data.pth"
127 | torch.save(data, FILE)
128 | 
129 | print(f'training complete. file saved to {FILE}')
130 | 


--------------------------------------------------------------------------------