├── .gitignore ├── README.md ├── bot.py ├── checkpoint ├── intents.json ├── intents_french.json ├── model.py ├── model.tflearn.data-00000-of-00001 ├── model.tflearn.index ├── model.tflearn.meta ├── tflearn_logs ├── JFC9C7 │ └── events.out.tfevents.1506690552.ileeloo.adyax ├── R5ETAO │ └── events.out.tfevents.1506711308.ileeloo.local └── TYPIDR │ └── events.out.tfevents.1506690895.ileeloo.adyax └── training_data /.gitignore: -------------------------------------------------------------------------------- 1 | tflearn_logs -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Chatbot playground based on https://chatbotsmagazine.com/contextual-chat-bots-with-tensorflow-4391749d0077 2 | 3 | * model.py – build a model and saves it on disk 4 | * bot.py – loads the model and processes the responces -------------------------------------------------------------------------------- /bot.py: -------------------------------------------------------------------------------- 1 | import nltk 2 | from nltk.stem.lancaster import LancasterStemmer 3 | stemmer = LancasterStemmer() 4 | 5 | import numpy as np 6 | import tflearn 7 | import tensorflow as tf 8 | import random 9 | 10 | import pickle 11 | import json 12 | 13 | ERROR_THRESHOLD = 0.25 14 | 15 | def clean_up_sentence(sentence): 16 | # tokenize the pattern 17 | sentence_words = nltk.word_tokenize(sentence) 18 | # stem each word 19 | sentence_words = [stemmer.stem(word.lower()) for word in sentence_words] 20 | return sentence_words 21 | 22 | # return bag of words array: 0 or 1 for each word in the bag that exists in the sentence 23 | def bow(sentence, words, show_details=False): 24 | # tokenize the pattern 25 | sentence_words = clean_up_sentence(sentence) 26 | # bag of words 27 | bag = [0]*len(words) 28 | for s in sentence_words: 29 | for i, w in enumerate(words): 30 | if w == s: 31 | bag[i] = 1 32 | if show_details: 33 | print("found in bag %s" % w) 34 | return np.array(bag) 35 | 36 | def classify(sentence): 37 | # generate probabilities from the model 38 | results = model.predict([bow(sentence, words)])[0] 39 | # filter out predictions below a threshold 40 | results = [[i, r] for i, r in enumerate(results) if r > ERROR_THRESHOLD] 41 | # sort by strength of probability 42 | results.sort(key=lambda x: x[1], reverse=True) 43 | return_list = [] 44 | for r in results: 45 | return_list.append((classes[r[0]], r[1])) 46 | # return tuple of intent and probability 47 | return return_list 48 | 49 | def response(sentence, userID='123', show_details=False): 50 | results = classify(sentence) 51 | # if we have a classification then find the matching intent tag 52 | if results: 53 | # loop as long as there are matches to process 54 | while results: 55 | for i in intents['intents']: 56 | # find a tag matching the first results 57 | if i['tag'] == results[0][0]: 58 | # a random response from the intent 59 | return print(random.choice(i['responses'])) 60 | 61 | results.pop(0) 62 | 63 | # === 64 | data = pickle.load(open("training_data", "rb")) 65 | words = data['words'] 66 | classes = data['classes'] 67 | train_x = data['train_x'] 68 | train_y = data['train_y'] 69 | 70 | # import our chat-bot intents file 71 | with open('intents.json') as json_data: 72 | intents = json.load(json_data) 73 | 74 | # load saved model 75 | net = tflearn.input_data(shape=[None, len(train_x[0])]) 76 | net = tflearn.fully_connected(net, 8) 77 | net = tflearn.fully_connected(net, 8) 78 | net = tflearn.fully_connected(net, len(train_y[0]), activation='softmax') 79 | net = tflearn.regression(net) 80 | model = tflearn.DNN(net, tensorboard_dir='tflearn_logs') 81 | model.load('./model.tflearn') 82 | 83 | print (classify('is your shop open today?')) 84 | print (classify('are you open today?')) 85 | print (classify('do you take cash?')) 86 | print (classify('what kind of mopeds do you rent?')) 87 | print (classify('Goodbye, see you later')) 88 | -------------------------------------------------------------------------------- /checkpoint: -------------------------------------------------------------------------------- 1 | model_checkpoint_path: "/Users/valcker/Documents/Projects/rnd/contextual-chatbot/model.tflearn" 2 | all_model_checkpoint_paths: "/Users/valcker/Documents/Projects/rnd/contextual-chatbot/model.tflearn" 3 | -------------------------------------------------------------------------------- /intents.json: -------------------------------------------------------------------------------- 1 | {"intents": [ 2 | {"tag": "greeting", 3 | "patterns": ["Hi", "How are you", "Is anyone there?", "Hello", "Good day"], 4 | "responses": ["Hello, thanks for visiting", "Good to see you again", "Hi there, how can I help?"], 5 | "context_set": "" 6 | }, 7 | {"tag": "goodbye", 8 | "patterns": ["Bye", "See you later", "Goodbye"], 9 | "responses": ["See you later, thanks for visiting", "Have a nice day", "Bye! Come back again soon."] 10 | }, 11 | {"tag": "thanks", 12 | "patterns": ["Thanks", "Thank you", "That's helpful"], 13 | "responses": ["Happy to help!", "Any time!", "My pleasure"] 14 | }, 15 | {"tag": "hours", 16 | "patterns": ["What hours are you open?", "What are your hours?", "When are you open?" ], 17 | "responses": ["We're open every day 9am-9pm", "Our hours are 9am-9pm every day"] 18 | }, 19 | {"tag": "mopeds", 20 | "patterns": ["Which mopeds do you have?", "What kinds of mopeds are there?", "What do you rent?" ], 21 | "responses": ["We rent Yamaha, Piaggio and Vespa mopeds", "We have Piaggio, Vespa and Yamaha mopeds"] 22 | }, 23 | {"tag": "payments", 24 | "patterns": ["Do you take credit cards?", "Do you accept Mastercard?", "Are you cash only?" ], 25 | "responses": ["We accept VISA, Mastercard and AMEX", "We accept most major credit cards"] 26 | }, 27 | {"tag": "opentoday", 28 | "patterns": ["Are you open today?", "When do you open today?", "What are your hours today?"], 29 | "responses": ["We're open every day from 9am-9pm", "Our hours are 9am-9pm every day"] 30 | }, 31 | {"tag": "rental", 32 | "patterns": ["Can we rent a moped?", "I'd like to rent a moped", "How does this work?" ], 33 | "responses": ["Are you looking to rent today or later this week?"], 34 | "context_set": "rentalday" 35 | }, 36 | {"tag": "today", 37 | "patterns": ["today"], 38 | "responses": ["For rentals today please call 1-800-MYMOPED", "Same-day rentals please call 1-800-MYMOPED"], 39 | "context_filter": "rentalday" 40 | } 41 | ] 42 | } 43 | -------------------------------------------------------------------------------- /intents_french.json: -------------------------------------------------------------------------------- 1 | {"intents": [ 2 | {"tag": "greeting", 3 | "patterns": ["Hi", "How are you", "Is anyone there?", "Hello", "Good day"], 4 | "responses": ["Hello, thanks for visiting", "Good to see you again", "Hi there, how can I help?"], 5 | "context_set": "" 6 | }, 7 | {"tag": "goodbye", 8 | "patterns": ["Bye", "See you later", "Goodbye"], 9 | "responses": ["See you later, thanks for visiting", "Have a nice day", "Bye! Come back again soon."] 10 | }, 11 | {"tag": "thanks", 12 | "patterns": ["Thanks", "Thank you", "That's helpful"], 13 | "responses": ["Happy to help!", "Any time!", "My pleasure"] 14 | }, 15 | {"tag": "hours", 16 | "patterns": ["What hours are you open?", "What are your hours?", "When are you open?" ], 17 | "responses": ["We're open every day 9am-9pm", "Our hours are 9am-9pm every day"] 18 | }, 19 | {"tag": "mopeds", 20 | "patterns": ["Which mopeds do you have?", "What kinds of mopeds are there?", "What do you rent?" ], 21 | "responses": ["We rent Yamaha, Piaggio and Vespa mopeds", "We have Piaggio, Vespa and Yamaha mopeds"] 22 | }, 23 | {"tag": "payments", 24 | "patterns": ["Do you take credit cards?", "Do you accept Mastercard?", "Are you cash only?" ], 25 | "responses": ["We accept VISA, Mastercard and AMEX", "We accept most major credit cards"] 26 | }, 27 | {"tag": "opentoday", 28 | "patterns": ["Are you open today?", "When do you open today?", "What are your hours today?"], 29 | "responses": ["We're open every day from 9am-9pm", "Our hours are 9am-9pm every day"] 30 | }, 31 | {"tag": "rental", 32 | "patterns": ["Can we rent a moped?", "I'd like to rent a moped", "How does this work?" ], 33 | "responses": ["Are you looking to rent today or later this week?"], 34 | "context_set": "rentalday" 35 | }, 36 | {"tag": "today", 37 | "patterns": ["today"], 38 | "responses": ["For rentals today please call 1-800-MYMOPED", "Same-day rentals please call 1-800-MYMOPED"], 39 | "context_filter": "rentalday" 40 | } 41 | ] 42 | } 43 | -------------------------------------------------------------------------------- /model.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | import json 3 | import random 4 | 5 | # NLP stuff 6 | import nltk 7 | # nltk.download('punkt') 8 | from nltk.stem.lancaster import LancasterStemmer 9 | 10 | # TensorFlow stuff 11 | import numpy as np 12 | import tflearn 13 | import tensorflow as tf 14 | 15 | stemmer = LancasterStemmer() 16 | # from nltk.stem.snowball import FrenchStemmer 17 | # stemmer = FrenchStemmer() 18 | 19 | # load json file 20 | with open('intents.json') as json_data: 21 | intents = json.load(json_data) 22 | 23 | words = [] 24 | classes = [] 25 | documents = [] 26 | ignore_words = ['?'] 27 | 28 | # loop through each sentense in our intents patterns 29 | for intent in intents['intents']: 30 | for pattern in intent['patterns']: 31 | # tokenize each word in the sentence 32 | w = nltk.word_tokenize(pattern) 33 | # add to our word list 34 | words.extend(w) 35 | # add to documents in our corpus 36 | documents.append((w, intent['tag'])) 37 | # add to our classes list 38 | if intent['tag'] not in classes: 39 | classes.append(intent['tag']) 40 | 41 | # stem and lower each word and remove duplicates 42 | words = [stemmer.stem(w.lower()) for w in words if w not in ignore_words] 43 | words = sorted(list(set(words))) 44 | 45 | # remove duplicates 46 | classes = sorted(list(set(classes))) 47 | 48 | # create training data 49 | training = [] 50 | output = [] 51 | # create an empty array for our output 52 | output_empty = [0] * len(classes) 53 | 54 | # training set, bag of words for each sentence 55 | for doc in documents: 56 | # initialize our bag of words 57 | bag = [] 58 | # list of tokenized words for the pattern 59 | pattern_words = doc[0] 60 | # stem each word 61 | pattern_words = [stemmer.stem(word.lower()) for word in pattern_words] 62 | # create our bag of words array 63 | for w in words: 64 | bag.append(1) if w in pattern_words else bag.append(0) 65 | 66 | # output is a '0' for each tag and '1' for current tag 67 | output_row = list(output_empty) 68 | output_row[classes.index(doc[1])] = 1 69 | 70 | training.append([bag, output_row]) 71 | 72 | # shuffle our features and turn into np.array 73 | random.shuffle(training) 74 | training = np.array(training) 75 | 76 | # create train and test lists 77 | train_x = list(training[:, 0]) 78 | train_y = list(training[:, 1]) 79 | 80 | # reset underlying graph data 81 | tf.reset_default_graph() 82 | # Build neural network 83 | net = tflearn.input_data(shape=[None, len(train_x[0])]) 84 | net = tflearn.fully_connected(net, 8) 85 | net = tflearn.fully_connected(net, 8) 86 | net = tflearn.fully_connected(net, len(train_y[0]), activation='softmax') 87 | net = tflearn.regression(net) 88 | 89 | # Define model and setup tensorboard 90 | model = tflearn.DNN(net, tensorboard_dir='tflearn_logs') 91 | # Start training (apply gradient descent algorithm) 92 | model.fit(train_x, train_y, n_epoch=1000, batch_size=8, show_metric=True) 93 | model.save('model.tflearn') 94 | 95 | pickle.dump({ 96 | 'words': words, 97 | 'classes': classes, 98 | 'train_x': train_x, 99 | 'train_y': train_y 100 | }, open('training_data', 'wb')) -------------------------------------------------------------------------------- /model.tflearn.data-00000-of-00001: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/valcker/contextual-chatbot-tensorflow/dc5efb0a6072259ddf17432b4e4d29fe91329a99/model.tflearn.data-00000-of-00001 -------------------------------------------------------------------------------- /model.tflearn.index: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/valcker/contextual-chatbot-tensorflow/dc5efb0a6072259ddf17432b4e4d29fe91329a99/model.tflearn.index -------------------------------------------------------------------------------- /model.tflearn.meta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/valcker/contextual-chatbot-tensorflow/dc5efb0a6072259ddf17432b4e4d29fe91329a99/model.tflearn.meta -------------------------------------------------------------------------------- /tflearn_logs/JFC9C7/events.out.tfevents.1506690552.ileeloo.adyax: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/valcker/contextual-chatbot-tensorflow/dc5efb0a6072259ddf17432b4e4d29fe91329a99/tflearn_logs/JFC9C7/events.out.tfevents.1506690552.ileeloo.adyax -------------------------------------------------------------------------------- /tflearn_logs/R5ETAO/events.out.tfevents.1506711308.ileeloo.local: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/valcker/contextual-chatbot-tensorflow/dc5efb0a6072259ddf17432b4e4d29fe91329a99/tflearn_logs/R5ETAO/events.out.tfevents.1506711308.ileeloo.local -------------------------------------------------------------------------------- /tflearn_logs/TYPIDR/events.out.tfevents.1506690895.ileeloo.adyax: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/valcker/contextual-chatbot-tensorflow/dc5efb0a6072259ddf17432b4e4d29fe91329a99/tflearn_logs/TYPIDR/events.out.tfevents.1506690895.ileeloo.adyax -------------------------------------------------------------------------------- /training_data: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/valcker/contextual-chatbot-tensorflow/dc5efb0a6072259ddf17432b4e4d29fe91329a99/training_data --------------------------------------------------------------------------------