├── .gitignore ├── LICENSE ├── README.md ├── chat.py ├── intents.json ├── model.py ├── nltk_utils.py └── train.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 98 | __pypackages__/ 99 | 100 | # Celery stuff 101 | celerybeat-schedule 102 | celerybeat.pid 103 | 104 | # SageMath parsed files 105 | *.sage.py 106 | 107 | # Environments 108 | .env 109 | .venv 110 | env/ 111 | venv/ 112 | ENV/ 113 | env.bak/ 114 | venv.bak/ 115 | 116 | # Spyder project settings 117 | .spyderproject 118 | .spyproject 119 | 120 | # Rope project settings 121 | .ropeproject 122 | 123 | # mkdocs documentation 124 | /site 125 | 126 | # mypy 127 | .mypy_cache/ 128 | .dmypy.json 129 | dmypy.json 130 | 131 | # Pyre type checker 132 | .pyre/ 133 | 134 | # pytype static type analyzer 135 | .pytype/ 136 | 137 | # Cython debug symbols 138 | cython_debug/ -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Patrick Loeber 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Implementation of a Contextual Chatbot in PyTorch. 2 | Simple chatbot implementation with PyTorch. 3 | 4 | - The implementation should be easy to follow for beginners and provide a basic understanding of chatbots. 5 | - The implementation is straightforward with a Feed Forward Neural net with 2 hidden layers. 6 | - Customization for your own use case is super easy. Just modify `intents.json` with possible patterns and responses and re-run the training (see below for more info). 7 | 8 | The approach is inspired by this article and ported to PyTorch: [https://chatbotsmagazine.com/contextual-chat-bots-with-tensorflow-4391749d0077](https://chatbotsmagazine.com/contextual-chat-bots-with-tensorflow-4391749d0077). 9 | 10 | ## Watch the Tutorial 11 | [![Alt text](https://img.youtube.com/vi/RpWeNzfSUHw/hqdefault.jpg)](https://www.youtube.com/watch?v=RpWeNzfSUHw&list=PLqnslRFeH2UrFW4AUgn-eY37qOAWQpJyg) 12 | 13 | ## Installation 14 | 15 | ### Create an environment 16 | Whatever you prefer (e.g. `conda` or `venv`) 17 | ```console 18 | mkdir myproject 19 | $ cd myproject 20 | $ python3 -m venv venv 21 | ``` 22 | 23 | ### Activate it 24 | Mac / Linux: 25 | ```console 26 | . venv/bin/activate 27 | ``` 28 | Windows: 29 | ```console 30 | venv\Scripts\activate 31 | ``` 32 | ### Install PyTorch and dependencies 33 | 34 | For Installation of PyTorch see [official website](https://pytorch.org/). 35 | 36 | You also need `nltk`: 37 | ```console 38 | pip install nltk 39 | ``` 40 | 41 | If you get an error during the first run, you also need to install `nltk.tokenize.punkt`: 42 | Run this once in your terminal: 43 | ```console 44 | $ python 45 | >>> import nltk 46 | >>> nltk.download('punkt') 47 | ``` 48 | 49 | ## Usage 50 | Run 51 | ```console 52 | python train.py 53 | ``` 54 | This will dump `data.pth` file. And then run 55 | ```console 56 | python chat.py 57 | ``` 58 | ## Customize 59 | Have a look at [intents.json](intents.json). You can customize it according to your own use case. Just define a new `tag`, possible `patterns`, and possible `responses` for the chat bot. You have to re-run the training whenever this file is modified. 60 | ```console 61 | { 62 | "intents": [ 63 | { 64 | "tag": "greeting", 65 | "patterns": [ 66 | "Hi", 67 | "Hey", 68 | "How are you", 69 | "Is anyone there?", 70 | "Hello", 71 | "Good day" 72 | ], 73 | "responses": [ 74 | "Hey :-)", 75 | "Hello, thanks for visiting", 76 | "Hi there, what can I do for you?", 77 | "Hi there, how can I help?" 78 | ] 79 | }, 80 | ... 81 | ] 82 | } 83 | ``` 84 | -------------------------------------------------------------------------------- /chat.py: -------------------------------------------------------------------------------- 1 | import random 2 | import json 3 | 4 | import torch 5 | 6 | from model import NeuralNet 7 | from nltk_utils import bag_of_words, tokenize 8 | 9 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 10 | 11 | with open('intents.json', 'r') as json_data: 12 | intents = json.load(json_data) 13 | 14 | FILE = "data.pth" 15 | data = torch.load(FILE) 16 | 17 | input_size = data["input_size"] 18 | hidden_size = data["hidden_size"] 19 | output_size = data["output_size"] 20 | all_words = data['all_words'] 21 | tags = data['tags'] 22 | model_state = data["model_state"] 23 | 24 | model = NeuralNet(input_size, hidden_size, output_size).to(device) 25 | model.load_state_dict(model_state) 26 | model.eval() 27 | 28 | bot_name = "Sam" 29 | print("Let's chat! (type 'quit' to exit)") 30 | while True: 31 | # sentence = "do you use credit cards?" 32 | sentence = input("You: ") 33 | if sentence == "quit": 34 | break 35 | 36 | sentence = tokenize(sentence) 37 | X = bag_of_words(sentence, all_words) 38 | X = X.reshape(1, X.shape[0]) 39 | X = torch.from_numpy(X).to(device) 40 | 41 | output = model(X) 42 | _, predicted = torch.max(output, dim=1) 43 | 44 | tag = tags[predicted.item()] 45 | 46 | probs = torch.softmax(output, dim=1) 47 | prob = probs[0][predicted.item()] 48 | if prob.item() > 0.75: 49 | for intent in intents['intents']: 50 | if tag == intent["tag"]: 51 | print(f"{bot_name}: {random.choice(intent['responses'])}") 52 | else: 53 | print(f"{bot_name}: I do not understand...") -------------------------------------------------------------------------------- /intents.json: -------------------------------------------------------------------------------- 1 | { 2 | "intents": [ 3 | { 4 | "tag": "greeting", 5 | "patterns": [ 6 | "Hi", 7 | "Hey", 8 | "How are you", 9 | "Is anyone there?", 10 | "Hello", 11 | "Good day" 12 | ], 13 | "responses": [ 14 | "Hey :-)", 15 | "Hello, thanks for visiting", 16 | "Hi there, what can I do for you?", 17 | "Hi there, how can I help?" 18 | ] 19 | }, 20 | { 21 | "tag": "goodbye", 22 | "patterns": ["Bye", "See you later", "Goodbye"], 23 | "responses": [ 24 | "See you later, thanks for visiting", 25 | "Have a nice day", 26 | "Bye! Come back again soon." 27 | ] 28 | }, 29 | { 30 | "tag": "thanks", 31 | "patterns": ["Thanks", "Thank you", "That's helpful", "Thank's a lot!"], 32 | "responses": ["Happy to help!", "Any time!", "My pleasure"] 33 | }, 34 | { 35 | "tag": "items", 36 | "patterns": [ 37 | "Which items do you have?", 38 | "What kinds of items are there?", 39 | "What do you sell?" 40 | ], 41 | "responses": [ 42 | "We sell coffee and tea", 43 | "We have coffee and tea" 44 | ] 45 | }, 46 | { 47 | "tag": "payments", 48 | "patterns": [ 49 | "Do you take credit cards?", 50 | "Do you accept Mastercard?", 51 | "Can I pay with Paypal?", 52 | "Are you cash only?" 53 | ], 54 | "responses": [ 55 | "We accept VISA, Mastercard and Paypal", 56 | "We accept most major credit cards, and Paypal" 57 | ] 58 | }, 59 | { 60 | "tag": "delivery", 61 | "patterns": [ 62 | "How long does delivery take?", 63 | "How long does shipping take?", 64 | "When do I get my delivery?" 65 | ], 66 | "responses": [ 67 | "Delivery takes 2-4 days", 68 | "Shipping takes 2-4 days" 69 | ] 70 | }, 71 | { 72 | "tag": "funny", 73 | "patterns": [ 74 | "Tell me a joke!", 75 | "Tell me something funny!", 76 | "Do you know a joke?" 77 | ], 78 | "responses": [ 79 | "Why did the hipster burn his mouth? He drank the coffee before it was cool.", 80 | "What did the buffalo say when his son left for college? Bison." 81 | ] 82 | } 83 | ] 84 | } 85 | -------------------------------------------------------------------------------- /model.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | 4 | 5 | class NeuralNet(nn.Module): 6 | def __init__(self, input_size, hidden_size, num_classes): 7 | super(NeuralNet, self).__init__() 8 | self.l1 = nn.Linear(input_size, hidden_size) 9 | self.l2 = nn.Linear(hidden_size, hidden_size) 10 | self.l3 = nn.Linear(hidden_size, num_classes) 11 | self.relu = nn.ReLU() 12 | 13 | def forward(self, x): 14 | out = self.l1(x) 15 | out = self.relu(out) 16 | out = self.l2(out) 17 | out = self.relu(out) 18 | out = self.l3(out) 19 | # no activation and no softmax at the end 20 | return out -------------------------------------------------------------------------------- /nltk_utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import nltk 3 | # nltk.download('punkt') 4 | from nltk.stem.porter import PorterStemmer 5 | stemmer = PorterStemmer() 6 | 7 | def tokenize(sentence): 8 | """ 9 | split sentence into array of words/tokens 10 | a token can be a word or punctuation character, or number 11 | """ 12 | return nltk.word_tokenize(sentence) 13 | 14 | 15 | def stem(word): 16 | """ 17 | stemming = find the root form of the word 18 | examples: 19 | words = ["organize", "organizes", "organizing"] 20 | words = [stem(w) for w in words] 21 | -> ["organ", "organ", "organ"] 22 | """ 23 | return stemmer.stem(word.lower()) 24 | 25 | 26 | def bag_of_words(tokenized_sentence, words): 27 | """ 28 | return bag of words array: 29 | 1 for each known word that exists in the sentence, 0 otherwise 30 | example: 31 | sentence = ["hello", "how", "are", "you"] 32 | words = ["hi", "hello", "I", "you", "bye", "thank", "cool"] 33 | bog = [ 0 , 1 , 0 , 1 , 0 , 0 , 0] 34 | """ 35 | # stem each word 36 | sentence_words = [stem(word) for word in tokenized_sentence] 37 | # initialize bag with 0 for each word 38 | bag = np.zeros(len(words), dtype=np.float32) 39 | for idx, w in enumerate(words): 40 | if w in sentence_words: 41 | bag[idx] = 1 42 | 43 | return bag -------------------------------------------------------------------------------- /train.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import random 3 | import json 4 | 5 | import torch 6 | import torch.nn as nn 7 | from torch.utils.data import Dataset, DataLoader 8 | 9 | from nltk_utils import bag_of_words, tokenize, stem 10 | from model import NeuralNet 11 | 12 | with open('intents.json', 'r') as f: 13 | intents = json.load(f) 14 | 15 | all_words = [] 16 | tags = [] 17 | xy = [] 18 | # loop through each sentence in our intents patterns 19 | for intent in intents['intents']: 20 | tag = intent['tag'] 21 | # add to tag list 22 | tags.append(tag) 23 | for pattern in intent['patterns']: 24 | # tokenize each word in the sentence 25 | w = tokenize(pattern) 26 | # add to our words list 27 | all_words.extend(w) 28 | # add to xy pair 29 | xy.append((w, tag)) 30 | 31 | # stem and lower each word 32 | ignore_words = ['?', '.', '!'] 33 | all_words = [stem(w) for w in all_words if w not in ignore_words] 34 | # remove duplicates and sort 35 | all_words = sorted(set(all_words)) 36 | tags = sorted(set(tags)) 37 | 38 | print(len(xy), "patterns") 39 | print(len(tags), "tags:", tags) 40 | print(len(all_words), "unique stemmed words:", all_words) 41 | 42 | # create training data 43 | X_train = [] 44 | y_train = [] 45 | for (pattern_sentence, tag) in xy: 46 | # X: bag of words for each pattern_sentence 47 | bag = bag_of_words(pattern_sentence, all_words) 48 | X_train.append(bag) 49 | # y: PyTorch CrossEntropyLoss needs only class labels, not one-hot 50 | label = tags.index(tag) 51 | y_train.append(label) 52 | 53 | X_train = np.array(X_train) 54 | y_train = np.array(y_train) 55 | 56 | # Hyper-parameters 57 | num_epochs = 1000 58 | batch_size = 8 59 | learning_rate = 0.001 60 | input_size = len(X_train[0]) 61 | hidden_size = 8 62 | output_size = len(tags) 63 | print(input_size, output_size) 64 | 65 | class ChatDataset(Dataset): 66 | 67 | def __init__(self): 68 | self.n_samples = len(X_train) 69 | self.x_data = X_train 70 | self.y_data = y_train 71 | 72 | # support indexing such that dataset[i] can be used to get i-th sample 73 | def __getitem__(self, index): 74 | return self.x_data[index], self.y_data[index] 75 | 76 | # we can call len(dataset) to return the size 77 | def __len__(self): 78 | return self.n_samples 79 | 80 | dataset = ChatDataset() 81 | train_loader = DataLoader(dataset=dataset, 82 | batch_size=batch_size, 83 | shuffle=True, 84 | num_workers=0) 85 | 86 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 87 | 88 | model = NeuralNet(input_size, hidden_size, output_size).to(device) 89 | 90 | # Loss and optimizer 91 | criterion = nn.CrossEntropyLoss() 92 | optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) 93 | 94 | # Train the model 95 | for epoch in range(num_epochs): 96 | for (words, labels) in train_loader: 97 | words = words.to(device) 98 | labels = labels.to(dtype=torch.long).to(device) 99 | 100 | # Forward pass 101 | outputs = model(words) 102 | # if y would be one-hot, we must apply 103 | # labels = torch.max(labels, 1)[1] 104 | loss = criterion(outputs, labels) 105 | 106 | # Backward and optimize 107 | optimizer.zero_grad() 108 | loss.backward() 109 | optimizer.step() 110 | 111 | if (epoch+1) % 100 == 0: 112 | print (f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}') 113 | 114 | 115 | print(f'final loss: {loss.item():.4f}') 116 | 117 | data = { 118 | "model_state": model.state_dict(), 119 | "input_size": input_size, 120 | "hidden_size": hidden_size, 121 | "output_size": output_size, 122 | "all_words": all_words, 123 | "tags": tags 124 | } 125 | 126 | FILE = "data.pth" 127 | torch.save(data, FILE) 128 | 129 | print(f'training complete. file saved to {FILE}') 130 | --------------------------------------------------------------------------------