├── Images ├── 0.jpg ├── Flow.png ├── main.jpg ├── different.png └── architecture.png ├── feedback.xlsx ├── Chatbot use cases..pdf ├── intents.json ├── sentiment Analysis.ipynb ├── Readme.md ├── RailwayBot_SpeechRecognition.ipynb └── RailwayBot.ipynb /Images/0.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/findtharun/Railway_bot/HEAD/Images/0.jpg -------------------------------------------------------------------------------- /Images/Flow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/findtharun/Railway_bot/HEAD/Images/Flow.png -------------------------------------------------------------------------------- /Images/main.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/findtharun/Railway_bot/HEAD/Images/main.jpg -------------------------------------------------------------------------------- /feedback.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/findtharun/Railway_bot/HEAD/feedback.xlsx -------------------------------------------------------------------------------- /Images/different.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/findtharun/Railway_bot/HEAD/Images/different.png -------------------------------------------------------------------------------- /Chatbot use cases..pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/findtharun/Railway_bot/HEAD/Chatbot use cases..pdf -------------------------------------------------------------------------------- /Images/architecture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/findtharun/Railway_bot/HEAD/Images/architecture.png -------------------------------------------------------------------------------- /intents.json: -------------------------------------------------------------------------------- 1 | {"intents": [ 2 | {"tag": "greeting", 3 | "patterns": ["Hi"], 4 | "responses": ["Hello, thanks for visiting", "Good to see you again", "Hi there, how can I help?"], 5 | "context_set": "" 6 | }, 7 | {"tag": "end", 8 | "patterns": ["goodbye"], 9 | "responses": ["See you later, thanks for visiting", "Have a nice day", "Bye! Come back again soon."] 10 | }, 11 | {"tag": "thanks", 12 | "patterns": ["Thank you"], 13 | "responses": ["Happy to help!", "Any time!", "My pleasure"] 14 | }, 15 | {"tag": "hours", 16 | "patterns": ["What hours are you open?" ], 17 | "responses": ["I am 24*7 available"] 18 | }, 19 | {"tag": "schedule", 20 | "patterns": ["What is today's schedule of trains" ], 21 | "responses": ["hyderabad to mumbai" ] 22 | }, 23 | {"tag": "payment accept", 24 | "patterns":["What are the payment modes accepted?"], 25 | "responses": ["We accept VISA, Mastercard and AMEX , most major credit cards"] 26 | }, 27 | {"tag": "opentoday", 28 | "patterns": ["Are you open today?", "When do you open today?"], 29 | "responses": ["We're 24*7 available"] 30 | }, 31 | {"tag": "pnr ", 32 | "patterns": [ "how to check pnr status"], 33 | "responses": ["your pnr details are sent to your registered mobile number"] 34 | }, 35 | {"tag": "pnr_no ", 36 | "patterns": [ " what is my pnr no"], 37 | "responses": ["your pnr details are sent to your registered mobile number"] 38 | }, 39 | {"tag": "refund ", 40 | "patterns": [ "how to check refund status?"], 41 | "responses": ["login to your account , Go to transactions and click refund status in the menu bar"] 42 | }, 43 | {"tag": "cancel", 44 | "patterns": [ "how to cancel ticket?"], 45 | "responses": ["login to your account , Go to my bookings, click cancel"] 46 | }, 47 | 48 | {"tag": "book ticket", 49 | "patterns": ["how to book ticket" ], 50 | "responses": ["go to our website and click buy option"] 51 | } 52 | 53 | 54 | ] 55 | } -------------------------------------------------------------------------------- /sentiment Analysis.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import pandas as pd\n", 10 | "from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer\n", 11 | "analyser = SentimentIntensityAnalyzer()" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": 2, 17 | "metadata": {}, 18 | "outputs": [], 19 | "source": [ 20 | "file=r'feedback.xlsx'" 21 | ] 22 | }, 23 | { 24 | "cell_type": "code", 25 | "execution_count": 3, 26 | "metadata": {}, 27 | "outputs": [], 28 | "source": [ 29 | "xl=pd.ExcelFile(file)" 30 | ] 31 | }, 32 | { 33 | "cell_type": "code", 34 | "execution_count": 4, 35 | "metadata": {}, 36 | "outputs": [], 37 | "source": [ 38 | "dfs=xl.parse(xl.sheet_names[0])" 39 | ] 40 | }, 41 | { 42 | "cell_type": "code", 43 | "execution_count": 5, 44 | "metadata": {}, 45 | "outputs": [ 46 | { 47 | "name": "stdout", 48 | "output_type": "stream", 49 | "text": [ 50 | "Empty DataFrame\n", 51 | "Columns: [thanks,you have been helpful]\n", 52 | "Index: []\n" 53 | ] 54 | } 55 | ], 56 | "source": [ 57 | "print(dfs)" 58 | ] 59 | }, 60 | { 61 | "cell_type": "code", 62 | "execution_count": 6, 63 | "metadata": {}, 64 | "outputs": [], 65 | "source": [ 66 | "sid=SentimentIntensityAnalyzer()" 67 | ] 68 | }, 69 | { 70 | "cell_type": "code", 71 | "execution_count": 7, 72 | "metadata": {}, 73 | "outputs": [ 74 | { 75 | "name": "stdout", 76 | "output_type": "stream", 77 | "text": [ 78 | "neg 0.0\n", 79 | "neu 0.517\n", 80 | "pos 0.483\n", 81 | "compound 0.4215\n" 82 | ] 83 | } 84 | ], 85 | "source": [ 86 | "for data in dfs:\n", 87 | " ss=sid.polarity_scores(data)\n", 88 | " for k in ss:\n", 89 | " print(k,ss[k])" 90 | ] 91 | }, 92 | { 93 | "cell_type": "code", 94 | "execution_count": null, 95 | "metadata": {}, 96 | "outputs": [], 97 | "source": [] 98 | } 99 | ], 100 | "metadata": { 101 | "kernelspec": { 102 | "display_name": "Python 3", 103 | "language": "python", 104 | "name": "python3" 105 | }, 106 | "language_info": { 107 | "codemirror_mode": { 108 | "name": "ipython", 109 | "version": 3 110 | }, 111 | "file_extension": ".py", 112 | "mimetype": "text/x-python", 113 | "name": "python", 114 | "nbconvert_exporter": "python", 115 | "pygments_lexer": "ipython3", 116 | "version": "3.7.3" 117 | } 118 | }, 119 | "nbformat": 4, 120 | "nbformat_minor": 2 121 | } 122 | -------------------------------------------------------------------------------- /Readme.md: -------------------------------------------------------------------------------- 1 |

CHATBOT For Railway Ticket Reservation 2 |

3 | 4 |

a CHAT BOT Built From Scratch

5 | 6 | > Interactive Railway Reservation - Building a ChatBot for a railway reservation system. The Bot maintains a conversation with the user regarding the booking of ticket, source and destination, general FAQs etc. 7 | 8 | >SPECIAL FEATURES INCLUDED FOR THIS BOT ARE: 9 | 10 | * SPEECH RECOGNITION 11 | * CUSTOMIZED THRESHOLD FOR THE MODEL. 12 | * SENTIMENT ANALYSIS . 13 | 14 | 15 | 16 |

BackGround and Signifcance of ChatBots in Railway Ticket Reservation System

17 | 18 |
19 | 20 | * Chatbots are creeping slowly into even the most boring of business interactions. They already started to rule over the low stakes mass market, over simplifying your everyday tasks. From ordering food or query your laptop service to even your credit card queries. 21 | 22 | * A chatbot is an artificial intelligence (AI) software that can simulate a conversation (or a chat) with a user in natural language through messaging applications, websites, mobile apps or through the telephone. 23 | 24 | 25 | 26 | * Railway services are available in most of the countries in the world. However, the demand for fast, reliable, and passenger friendly services is always being felt. With the ever increasing passenger and freight loads, more efficient mechanisms are needed to handle this increasing demand. And this would not be of only man-power but of smart technologies which when induced within the system, would produce a smarter railway system throughout. 27 | 28 | * Indian Railway, the largest employer in India and one of the slowest service providers in terms of train inquiries and ticket booking, may choose the chatbot route for faster response.The inclusion of AI technology via chatbot in customer service is the need of the hour and the world of smartphones and smart devices can help the country to adopt it quickly.Not only for Indian Railways ,many countries need to adapt AI to maintain a good user engagement system. 29 | 30 |
31 | 32 |

Implementation Framework

33 | 34 |

SOFTWARE REQUIREMENTS

35 | 36 | * Tensflow Frame Work, NLTK Library. 37 | 38 |

Solution Approach

39 | 40 | 41 | 42 | 43 | 44 | * Overview:​ Using NLP to train the model , Speech Recognition for output. 45 | My Approach has ​ sentiment analysis ​ which is used to improve the performance of chatbot by 46 | analysing customer Experience. 47 | 48 | 49 | 50 | >Chatbot’s main function is called to take response from user , sent to model ,matched intents and patterns and returned the expected response to user. 51 | 52 | 53 | 54 | * If user selected to speak then, Speech to text 55 | function activated , text is sent to bot.​ ​ From the conversation of user with bot ,​ Punkt​ Sentence 56 | Tokenizer divides a text into a list of sentences, by using an unsupervised algorithm to build a 57 | model for abbreviated words, collocations, and words that start sentences. ​ Stemming and 58 | Lemmatization is used to generate the root form of inflected words. In this Approach I have 59 | just used basic libraries for the framework and lemmatization,intents and pattern matching is 60 | done from scratch . 61 | 62 | 63 | * After tokenization ,I Created words, classes and documents added each word to a list ,and 64 | neglected question marks,exclamation symbols as they are not necessary and useful data is 65 | added to documents in our corpus and classes added to our classes list. From the list words 66 | applied stemming and also removed duplicate words. Classifying the ​ intents ​ for efficient query 67 | handling. 68 | 69 | > Shuffle features and turned into np.array,Building Neural network using TENSORFLOW 70 | FRAMEWORK and Training the Model using ​ Gradient Descent Algorithm. 71 | 72 | 73 | * After Training the model ,imported json file consisting of intents and responses to model, created 74 | a data structure to hold the user query ,generated ​ probability a ​ nd predictions below threshold to 75 | return tuple of intents and probabilities. (Includes scratch implementation of TF-IDF and cosine 76 | Similarity). 77 | 78 | 79 | * FeedBack from The user is taken and Sentiment Analysis is done to improve the Performance of The Chatbot. 80 | 81 |

Thank You :D

82 |

I have used many online resources while creating the application and I would like to thank them .   Hope you found it insightful.If you have any queries you can mail me at : kumartharun435@gmail.com . I would love to hear feedback from you to improvise it and make it better!

83 | 84 | 85 | -------------------------------------------------------------------------------- /RailwayBot_SpeechRecognition.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import nltk\n", 10 | "#It contains text processing libraries for tokenization, \n", 11 | "#parsing, classification, stemming, tagging and semantic reasoning.\n", 12 | "from nltk.stem.lancaster import LancasterStemmer\n", 13 | "stemmer = LancasterStemmer()\n", 14 | "import string \n", 15 | "import numpy" 16 | ] 17 | }, 18 | { 19 | "cell_type": "code", 20 | "execution_count": 2, 21 | "metadata": {}, 22 | "outputs": [ 23 | { 24 | "name": "stderr", 25 | "output_type": "stream", 26 | "text": [ 27 | "WARNING: Logging before flag parsing goes to stderr.\n", 28 | "W0924 20:32:41.921568 15176 deprecation_wrapper.py:119] From C:\\Users\\kumar\\Anaconda3\\lib\\site-packages\\tflearn\\helpers\\summarizer.py:9: The name tf.summary.merge is deprecated. Please use tf.compat.v1.summary.merge instead.\n", 29 | "\n", 30 | "W0924 20:32:41.924598 15176 deprecation_wrapper.py:119] From C:\\Users\\kumar\\Anaconda3\\lib\\site-packages\\tflearn\\helpers\\trainer.py:25: The name tf.summary.FileWriter is deprecated. Please use tf.compat.v1.summary.FileWriter instead.\n", 31 | "\n", 32 | "W0924 20:32:41.987388 15176 deprecation_wrapper.py:119] From C:\\Users\\kumar\\Anaconda3\\lib\\site-packages\\tflearn\\collections.py:13: The name tf.GraphKeys is deprecated. Please use tf.compat.v1.GraphKeys instead.\n", 33 | "\n", 34 | "W0924 20:32:41.995366 15176 deprecation_wrapper.py:119] From C:\\Users\\kumar\\Anaconda3\\lib\\site-packages\\tflearn\\config.py:123: The name tf.get_collection is deprecated. Please use tf.compat.v1.get_collection instead.\n", 35 | "\n", 36 | "W0924 20:32:42.001344 15176 deprecation_wrapper.py:119] From C:\\Users\\kumar\\Anaconda3\\lib\\site-packages\\tflearn\\config.py:129: The name tf.add_to_collection is deprecated. Please use tf.compat.v1.add_to_collection instead.\n", 37 | "\n", 38 | "W0924 20:32:42.001344 15176 deprecation_wrapper.py:119] From C:\\Users\\kumar\\Anaconda3\\lib\\site-packages\\tflearn\\config.py:131: The name tf.assign is deprecated. Please use tf.compat.v1.assign instead.\n", 39 | "\n" 40 | ] 41 | } 42 | ], 43 | "source": [ 44 | "import numpy as np\n", 45 | "import tflearn\n", 46 | "\n", 47 | "#TFlearn is a modular and transparent deep learning library built on top of Tensorflow. \n", 48 | "#It was designed to provide a higher-level API to TensorFlow \n", 49 | "#in order to facilitate and speed-up experimentations, while remaining fully transparent and compatible with it.\n", 50 | "import tensorflow as tf\n", 51 | "import random\n", 52 | "tf.logging.set_verbosity(tf.logging.ERROR)\n", 53 | "\n", 54 | "# import our chat-bot intents file\n", 55 | "import json\n", 56 | "#open the exact location\n", 57 | "with open('intents.json') as jd:\n", 58 | " intents = json.load(jd)" 59 | ] 60 | }, 61 | { 62 | "cell_type": "code", 63 | "execution_count": 3, 64 | "metadata": {}, 65 | "outputs": [], 66 | "source": [ 67 | "import speech_recognition as sr\n", 68 | "r=sr.Recognizer()\n", 69 | "# importing the pyttsx library\n", 70 | "import pyttsx3\n", 71 | "# initialisation \n", 72 | "engine = pyttsx3.init() \n", 73 | "def bot_speaking(message): \n", 74 | " # testing \n", 75 | " engine.say(message)\n", 76 | " engine.runAndWait() \n", 77 | "def get_input():\n", 78 | " with sr.Microphone() as source:\n", 79 | " #print(\"Say something!!!\");\n", 80 | " bot_speaking(\"Hey mate say something\")\n", 81 | " audio=r.listen(source,timeout=0)\n", 82 | " #print(\"Perfect, Thanks!\")\n", 83 | " bot_speaking(\"Perfect, Thanks!\")\n", 84 | " try:\n", 85 | " msg=r.recognize_google(audio)\n", 86 | " print(\"TEXT: \"+msg); #r.recognize(audio,language='hi-IN')\n", 87 | " bot_speaking(\"you said \"+msg)\n", 88 | " return msg\n", 89 | " except:\n", 90 | " #print(\"Dude it's not working :(\")\n", 91 | " bot_speaking(\"Sorry mate! It's not working\")\n", 92 | " pass;" 93 | ] 94 | }, 95 | { 96 | "cell_type": "code", 97 | "execution_count": 4, 98 | "metadata": {}, 99 | "outputs": [ 100 | { 101 | "name": "stderr", 102 | "output_type": "stream", 103 | "text": [ 104 | "[nltk_data] Downloading package punkt to\n", 105 | "[nltk_data] C:\\Users\\kumar\\AppData\\Roaming\\nltk_data...\n", 106 | "[nltk_data] Package punkt is already up-to-date!\n" 107 | ] 108 | } 109 | ], 110 | "source": [ 111 | "nltk.download('punkt')\n", 112 | "#Punkt Sentence Tokenizer. This tokenizer divides a text into a list of sentences,\n", 113 | "#by using an unsupervised algorithm to build a model for abbreviation words, collocations, and words that start sentences.\n", 114 | "#It must be trained on a large collection of plaintext in the target language before it can be used.\n", 115 | "#Create words, classes and documents\n", 116 | "words = []\n", 117 | "classes = []\n", 118 | "documents = []\n", 119 | "ignore_words = ['?','!']" 120 | ] 121 | }, 122 | { 123 | "cell_type": "code", 124 | "execution_count": 5, 125 | "metadata": {}, 126 | "outputs": [], 127 | "source": [ 128 | "# loop through each sentence in our intents patterns\n", 129 | "for intent in intents['intents']:\n", 130 | " for pattern in intent['patterns']:\n", 131 | " \n", 132 | " # tokenize each word in the sentence\n", 133 | " \n", 134 | " w = nltk.word_tokenize(pattern)\n", 135 | " # add to our words list\n", 136 | " words.extend(w)\n", 137 | " # add to documents in our corpus # add to our classes list\n", 138 | " if intent['tag'] not in classes:\n", 139 | "\n", 140 | " documents.append((w, intent['tag']))\n", 141 | " classes.append(intent['tag'])\n" 142 | ] 143 | }, 144 | { 145 | "cell_type": "code", 146 | "execution_count": 6, 147 | "metadata": {}, 148 | "outputs": [], 149 | "source": [ 150 | "# stem and lower each word and remove duplicates\n", 151 | "words = sorted(list(set([stemmer.stem(w.lower()) for w in words if w not in ignore_words])))\n", 152 | "\n", 153 | "# remove duplicates\n", 154 | "classes = sorted(list(set(classes)))\n" 155 | ] 156 | }, 157 | { 158 | "cell_type": "code", 159 | "execution_count": 7, 160 | "metadata": {}, 161 | "outputs": [ 162 | { 163 | "name": "stdout", 164 | "output_type": "stream", 165 | "text": [ 166 | "12 documents\n", 167 | "12 classes ['book ticket', 'cancel', 'end', 'greeting', 'hours', 'opentoday', 'payment accept', 'pnr ', 'pnr_no ', 'refund ', 'schedule', 'thanks']\n", 168 | "31 unique stemmed words [\"'s\", 'acceiv', 'ar', 'book', 'cancel', 'check', 'do', 'goodby', 'hi', 'hour', 'how', 'is', 'mod', 'my', 'no', 'of', 'op', 'pay', 'pnr', 'refund', 'schedule', 'stat', 'thank', 'the', 'ticket', 'to', 'today', 'train', 'what', 'when', 'you']\n" 169 | ] 170 | } 171 | ], 172 | "source": [ 173 | "#Getting to know the documents,classes and stemmed words\n", 174 | "print (len(documents), \"documents\")\n", 175 | "print (len(classes), \"classes\", classes)\n", 176 | "print (len(words), \"unique stemmed words\", words)\n" 177 | ] 178 | }, 179 | { 180 | "cell_type": "code", 181 | "execution_count": 8, 182 | "metadata": {}, 183 | "outputs": [], 184 | "source": [ 185 | "training = []\n", 186 | "output = []\n", 187 | "# create an empty array for our output\n", 188 | "output_empty = [0] * len(classes)\n" 189 | ] 190 | }, 191 | { 192 | "cell_type": "code", 193 | "execution_count": 9, 194 | "metadata": {}, 195 | "outputs": [ 196 | { 197 | "name": "stdout", 198 | "output_type": "stream", 199 | "text": [ 200 | "[0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n", 201 | "[0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n", 202 | "[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1]\n", 203 | "[0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1]\n", 204 | "[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0]\n", 205 | "[0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0]\n", 206 | "[0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1]\n", 207 | "[0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0]\n", 208 | "[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0]\n", 209 | "[0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0]\n", 210 | "[0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0]\n", 211 | "[0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0]\n" 212 | ] 213 | } 214 | ], 215 | "source": [ 216 | "# training set, bag of words for each sentence\n", 217 | "for doc in documents:\n", 218 | " # initialize our bag of words\n", 219 | " bag = []\n", 220 | " # list of tokenized words for the pattern\n", 221 | " pattern_words = doc[0]\n", 222 | " # stem each word\n", 223 | " pattern_words = [stemmer.stem(word.lower()) for word in pattern_words]\n", 224 | " # create our bag of words array\n", 225 | " for w in words:\n", 226 | " bag.append(1) if w in pattern_words else bag.append(0)\n", 227 | " print(bag)\n", 228 | " # output is a '0' for each tag and '1' for current tag\n", 229 | " output_row = list(output_empty)\n", 230 | " output_row[classes.index(doc[1])] = 1\n", 231 | "\n", 232 | " training.append([bag, output_row])\n" 233 | ] 234 | }, 235 | { 236 | "cell_type": "code", 237 | "execution_count": 10, 238 | "metadata": {}, 239 | "outputs": [ 240 | { 241 | "name": "stdout", 242 | "output_type": "stream", 243 | "text": [ 244 | "[[list([0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0])\n", 245 | " list([0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0])]\n", 246 | " [list([0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1])\n", 247 | " list([0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0])]\n", 248 | " [list([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1])\n", 249 | " list([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1])]\n", 250 | " [list([0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0])\n", 251 | " list([0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])]\n", 252 | " [list([0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0])\n", 253 | " list([0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0])]\n", 254 | " [list([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0])\n", 255 | " list([0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0])]\n", 256 | " [list([0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])\n", 257 | " list([0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0])]\n", 258 | " [list([1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0])\n", 259 | " list([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0])]\n", 260 | " [list([0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1])\n", 261 | " list([0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0])]\n", 262 | " [list([0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0])\n", 263 | " list([0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0])]\n", 264 | " [list([0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])\n", 265 | " list([0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0])]\n", 266 | " [list([0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0])\n", 267 | " list([1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])]]\n" 268 | ] 269 | } 270 | ], 271 | "source": [ 272 | "# shuffle our features and turn into np.array\n", 273 | "random.shuffle(training)\n", 274 | "training = np.array(training)\n", 275 | "print(training)" 276 | ] 277 | }, 278 | { 279 | "cell_type": "code", 280 | "execution_count": 11, 281 | "metadata": {}, 282 | "outputs": [ 283 | { 284 | "name": "stdout", 285 | "output_type": "stream", 286 | "text": [ 287 | "[[0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0], [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1], [0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0], [0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0], [0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1], [0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0]] [[0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1], [0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0], [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0], [0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0], [0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0], [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]\n" 288 | ] 289 | } 290 | ], 291 | "source": [ 292 | "# create train and test lists\n", 293 | "train_x = list(training[:,0])\n", 294 | "train_y = list(training[:,1])\n", 295 | "print(train_x,train_y)\n" 296 | ] 297 | }, 298 | { 299 | "cell_type": "code", 300 | "execution_count": 12, 301 | "metadata": {}, 302 | "outputs": [ 303 | { 304 | "name": "stdout", 305 | "output_type": "stream", 306 | "text": [ 307 | "Training Step: 2999 | total loss: \u001b[1m\u001b[32m0.06964\u001b[0m\u001b[0m | time: 0.002s\n", 308 | "| Adam | epoch: 1500 | loss: 0.06964 - acc: 0.9972 -- iter: 08/12\n", 309 | "Training Step: 3000 | total loss: \u001b[1m\u001b[32m0.06323\u001b[0m\u001b[0m | time: 0.004s\n", 310 | "| Adam | epoch: 1500 | loss: 0.06323 - acc: 0.9975 -- iter: 12/12\n", 311 | "--\n" 312 | ] 313 | } 314 | ], 315 | "source": [ 316 | "# reset underlying graph data\n", 317 | "tf.reset_default_graph()\n", 318 | "# Build neural network\n", 319 | "net = tflearn.input_data(shape=[None, len(train_x[0])])\n", 320 | "net = tflearn.fully_connected(net, 12)\n", 321 | "net = tflearn.fully_connected(net, 12)\n", 322 | "net = tflearn.fully_connected(net, len(train_y[0]), activation='softmax')\n", 323 | "net = tflearn.regression(net)\n", 324 | "\n", 325 | "# Define model and setup tensorboard\n", 326 | "model = tflearn.DNN(net, tensorboard_dir='tflearn_logs') \n", 327 | "# Start training (apply gradient descent algorithm)\n", 328 | "model.fit(train_x, train_y, n_epoch=1500, batch_size=8, show_metric=True)#n_epoch is the number of times network sees the data\n", 329 | "model.save('model.tflearn')" 330 | ] 331 | }, 332 | { 333 | "cell_type": "code", 334 | "execution_count": 13, 335 | "metadata": {}, 336 | "outputs": [], 337 | "source": [ 338 | "import pickle\n", 339 | "pickle.dump({'words':words, 'classes':classes,'train_x':train_x,'train_y':train_y},open( \"training_data\", \"wb\" ))\n", 340 | "\n", 341 | "\n", 342 | "\n", 343 | "# restore all of our data structures\n", 344 | "import pickle\n", 345 | "data = pickle.load( open( \"training_data\", \"rb\" ) )\n", 346 | "words = data['words']\n", 347 | "classes = data['classes']\n", 348 | "train_x = data['train_x']\n", 349 | "train_y = data['train_y']" 350 | ] 351 | }, 352 | { 353 | "cell_type": "code", 354 | "execution_count": 14, 355 | "metadata": {}, 356 | "outputs": [], 357 | "source": [ 358 | "# import our chat-bot intents file\n", 359 | "import json\n", 360 | "with open('intents.json') as jd:\n", 361 | " intents = json.load(jd)\n", 362 | " \n", 363 | "# load our saved model\n", 364 | "model.load('./model.tflearn')\n", 365 | "\n", 366 | "def clean_up_sentence(sentence):\n", 367 | " # tokenize the pattern\n", 368 | " sentence_words = nltk.word_tokenize(sentence)\n", 369 | " # stem each word\n", 370 | " sentence_words = [stemmer.stem(word.lower()) for word in sentence_words]\n", 371 | " return sentence_words\n", 372 | "\n", 373 | "# return bag of words array: 0 or 1 for each word in the bag that exists in the sentence\n", 374 | "def bow(sentence, words, show_details=False):\n", 375 | " # tokenize the pattern\n", 376 | " sentence_words = clean_up_sentence(sentence)\n", 377 | " # bag of words\n", 378 | " bag = [0]*len(words) \n", 379 | " for s in sentence_words:\n", 380 | " for i,w in enumerate(words):\n", 381 | " if w == s: \n", 382 | " bag[i] = 1\n", 383 | " if show_details:\n", 384 | " print (\"found in bag: %s\" % w)\n", 385 | "\n", 386 | " return(np.array(bag))" 387 | ] 388 | }, 389 | { 390 | "cell_type": "code", 391 | "execution_count": null, 392 | "metadata": {}, 393 | "outputs": [], 394 | "source": [ 395 | "def cchat():\n", 396 | " print(\"BOT: I am your Personal Digital Assistant. What can I do for you!\")\n", 397 | " while True:\n", 398 | " inp=str(get_input()) #input(\"YOU : \")\n", 399 | " '''if inp.lower()==\"quit\" or inp==None:\n", 400 | " break'''\n", 401 | " if inp=='quit':\n", 402 | " break\n", 403 | " results = model.predict([bow(inp, words)])[0]\n", 404 | " results_index=numpy.argmax(results)\n", 405 | " tag=classes[results_index]\n", 406 | " if(results[results_index]>0.65):\n", 407 | " \n", 408 | " for tg in intents['intents']:\n", 409 | " if tg['tag']==tag:\n", 410 | " response=tg['responses']\n", 411 | " ms=random.choice(response)\n", 412 | "\n", 413 | " print(\"BOT : \" +ms)\n", 414 | " bot_speaking(ms)\n", 415 | " else:\n", 416 | " print(\"BOT : I did not understand you! Try again\")\n", 417 | " bot_speaking(\"I did not understand you! Try again\")\n", 418 | "cchat()" 419 | ] 420 | }, 421 | { 422 | "cell_type": "code", 423 | "execution_count": null, 424 | "metadata": {}, 425 | "outputs": [], 426 | "source": [ 427 | "print(\"BOT : These are the frequently asked questions\")\n" 428 | ] 429 | }, 430 | { 431 | "cell_type": "code", 432 | "execution_count": null, 433 | "metadata": {}, 434 | "outputs": [], 435 | "source": [ 436 | "import xlsxwriter \n", 437 | " \n", 438 | "# Workbook() takes one, non-optional, argument \n", 439 | "# which is the filename that we want to create. \n", 440 | "workbook = xlsxwriter.Workbook('feedback.xlsx') \n", 441 | " \n", 442 | "# The workbook object is then used to add new \n", 443 | "# worksheet via the add_worksheet() method. \n", 444 | "worksheet = workbook.add_worksheet() \n", 445 | " \n", 446 | "# Use the worksheet object to write \n", 447 | "# data via the write() method. \n", 448 | "worksheet.write('A1', input(\"Enter your FEEDBACK: \" )) \n", 449 | "\n", 450 | " \n", 451 | "# Finally, close the Excel file \n", 452 | "# via the close() method. \n", 453 | "workbook.close() " 454 | ] 455 | }, 456 | { 457 | "cell_type": "code", 458 | "execution_count": null, 459 | "metadata": {}, 460 | "outputs": [], 461 | "source": [] 462 | }, 463 | { 464 | "cell_type": "code", 465 | "execution_count": null, 466 | "metadata": {}, 467 | "outputs": [], 468 | "source": [] 469 | }, 470 | { 471 | "cell_type": "code", 472 | "execution_count": null, 473 | "metadata": {}, 474 | "outputs": [], 475 | "source": [] 476 | }, 477 | { 478 | "cell_type": "code", 479 | "execution_count": null, 480 | "metadata": {}, 481 | "outputs": [], 482 | "source": [] 483 | }, 484 | { 485 | "cell_type": "code", 486 | "execution_count": null, 487 | "metadata": {}, 488 | "outputs": [], 489 | "source": [] 490 | } 491 | ], 492 | "metadata": { 493 | "kernelspec": { 494 | "display_name": "Python 3", 495 | "language": "python", 496 | "name": "python3" 497 | }, 498 | "language_info": { 499 | "codemirror_mode": { 500 | "name": "ipython", 501 | "version": 3 502 | }, 503 | "file_extension": ".py", 504 | "mimetype": "text/x-python", 505 | "name": "python", 506 | "nbconvert_exporter": "python", 507 | "pygments_lexer": "ipython3", 508 | "version": "3.7.3" 509 | } 510 | }, 511 | "nbformat": 4, 512 | "nbformat_minor": 2 513 | } 514 | -------------------------------------------------------------------------------- /RailwayBot.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import nltk\n", 10 | "#It contains text processing libraries for tokenization, \n", 11 | "#parsing, classification, stemming, tagging and semantic reasoning.\n", 12 | "from nltk.stem.lancaster import LancasterStemmer\n", 13 | "stemmer = LancasterStemmer()\n", 14 | "import curses\n", 15 | "import string\n", 16 | "import numpy" 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": 2, 22 | "metadata": {}, 23 | "outputs": [ 24 | { 25 | "name": "stderr", 26 | "output_type": "stream", 27 | "text": [ 28 | "WARNING: Logging before flag parsing goes to stderr.\n", 29 | "W0922 14:57:32.300952 13316 deprecation_wrapper.py:119] From C:\\Users\\kumar\\Anaconda3\\lib\\site-packages\\tflearn\\helpers\\summarizer.py:9: The name tf.summary.merge is deprecated. Please use tf.compat.v1.summary.merge instead.\n", 30 | "\n", 31 | "W0922 14:57:32.303982 13316 deprecation_wrapper.py:119] From C:\\Users\\kumar\\Anaconda3\\lib\\site-packages\\tflearn\\helpers\\trainer.py:25: The name tf.summary.FileWriter is deprecated. Please use tf.compat.v1.summary.FileWriter instead.\n", 32 | "\n", 33 | "W0922 14:57:32.379739 13316 deprecation_wrapper.py:119] From C:\\Users\\kumar\\Anaconda3\\lib\\site-packages\\tflearn\\collections.py:13: The name tf.GraphKeys is deprecated. Please use tf.compat.v1.GraphKeys instead.\n", 34 | "\n", 35 | "W0922 14:57:32.388729 13316 deprecation_wrapper.py:119] From C:\\Users\\kumar\\Anaconda3\\lib\\site-packages\\tflearn\\config.py:123: The name tf.get_collection is deprecated. Please use tf.compat.v1.get_collection instead.\n", 36 | "\n", 37 | "W0922 14:57:32.563499 13316 deprecation_wrapper.py:119] From C:\\Users\\kumar\\Anaconda3\\lib\\site-packages\\tflearn\\config.py:129: The name tf.add_to_collection is deprecated. Please use tf.compat.v1.add_to_collection instead.\n", 38 | "\n", 39 | "W0922 14:57:32.564502 13316 deprecation_wrapper.py:119] From C:\\Users\\kumar\\Anaconda3\\lib\\site-packages\\tflearn\\config.py:131: The name tf.assign is deprecated. Please use tf.compat.v1.assign instead.\n", 40 | "\n" 41 | ] 42 | } 43 | ], 44 | "source": [ 45 | "import numpy as np\n", 46 | "import tflearn\n", 47 | "\n", 48 | "#TFlearn is a modular and transparent deep learning library built on top of Tensorflow. \n", 49 | "#It was designed to provide a higher-level API to TensorFlow \n", 50 | "#in order to facilitate and speed-up experimentations, while remaining fully transparent and compatible with it.\n", 51 | "import tensorflow as tf\n", 52 | "import random\n", 53 | "tf.logging.set_verbosity(tf.logging.ERROR)\n", 54 | "\n", 55 | "# import our chat-bot intents file\n", 56 | "import json\n", 57 | "#open the exact location of CORPUS \n", 58 | "with open('intents.json') as jd:\n", 59 | " intents = json.load(jd)" 60 | ] 61 | }, 62 | { 63 | "cell_type": "code", 64 | "execution_count": 3, 65 | "metadata": {}, 66 | "outputs": [ 67 | { 68 | "name": "stderr", 69 | "output_type": "stream", 70 | "text": [ 71 | "[nltk_data] Downloading package punkt to\n", 72 | "[nltk_data] C:\\Users\\kumar\\AppData\\Roaming\\nltk_data...\n", 73 | "[nltk_data] Package punkt is already up-to-date!\n" 74 | ] 75 | } 76 | ], 77 | "source": [ 78 | "nltk.download('punkt')\n", 79 | "#Punkt Sentence Tokenizer. This tokenizer divides a text into a list of sentences,\n", 80 | "#by using an unsupervised algorithm to build a model for abbreviation words, collocations, and words that start sentences.\n", 81 | "#It must be trained on a large collection of plaintext in the target language before it can be used.\n", 82 | "#Create words, classes and documents\n", 83 | "words = []\n", 84 | "classes = []\n", 85 | "documents = []\n", 86 | "ignore_words = ['?','!']" 87 | ] 88 | }, 89 | { 90 | "cell_type": "code", 91 | "execution_count": 4, 92 | "metadata": {}, 93 | "outputs": [], 94 | "source": [ 95 | "# loop through each sentence in our intents patterns\n", 96 | "for intent in intents['intents']:\n", 97 | " for pattern in intent['patterns']: # PATTERN IS USER QUESTION\n", 98 | " \n", 99 | " # tokenize each word in the sentence\n", 100 | " \n", 101 | " w = nltk.word_tokenize(pattern)\n", 102 | " # add to our words list\n", 103 | " words.extend(w)\n", 104 | " # add to documents in our corpus # add to our classes list\n", 105 | " if intent['tag'] not in classes:\n", 106 | "\n", 107 | " documents.append((w, intent['tag']))\n", 108 | " classes.append(intent['tag'])\n" 109 | ] 110 | }, 111 | { 112 | "cell_type": "code", 113 | "execution_count": 5, 114 | "metadata": {}, 115 | "outputs": [], 116 | "source": [ 117 | "# stem and lower each word and remove duplicates\n", 118 | "words = sorted(list(set([stemmer.stem(w.lower()) for w in words if w not in ignore_words])))\n", 119 | "\n", 120 | "# remove duplicates\n", 121 | "classes = sorted(list(set(classes)))\n" 122 | ] 123 | }, 124 | { 125 | "cell_type": "code", 126 | "execution_count": 6, 127 | "metadata": {}, 128 | "outputs": [ 129 | { 130 | "name": "stdout", 131 | "output_type": "stream", 132 | "text": [ 133 | "12 documents\n", 134 | "12 classes ['book ticket', 'cancel', 'end', 'greeting', 'hours', 'opentoday', 'payment accept', 'pnr ', 'pnr_no ', 'refund ', 'schedule', 'thanks']\n", 135 | "31 unique stemmed words [\"'s\", 'acceiv', 'ar', 'book', 'cancel', 'check', 'do', 'goodby', 'hi', 'hour', 'how', 'is', 'mod', 'my', 'no', 'of', 'op', 'pay', 'pnr', 'refund', 'schedule', 'stat', 'thank', 'the', 'ticket', 'to', 'today', 'train', 'what', 'when', 'you']\n" 136 | ] 137 | } 138 | ], 139 | "source": [ 140 | "#Getting to know the documents,classes and stemmed words\n", 141 | "print (len(documents), \"documents\")\n", 142 | "print (len(classes), \"classes\", classes)\n", 143 | "print (len(words), \"unique stemmed words\", words)\n" 144 | ] 145 | }, 146 | { 147 | "cell_type": "code", 148 | "execution_count": 7, 149 | "metadata": {}, 150 | "outputs": [], 151 | "source": [ 152 | "training = []\n", 153 | "output = []\n", 154 | "# create an empty array for our output\n", 155 | "output_empty = [0] * len(classes)\n" 156 | ] 157 | }, 158 | { 159 | "cell_type": "code", 160 | "execution_count": 8, 161 | "metadata": {}, 162 | "outputs": [ 163 | { 164 | "name": "stdout", 165 | "output_type": "stream", 166 | "text": [ 167 | "[0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n", 168 | "[0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n", 169 | "[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1]\n", 170 | "[0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1]\n", 171 | "[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0]\n", 172 | "[0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0]\n", 173 | "[0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1]\n", 174 | "[0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0]\n", 175 | "[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0]\n", 176 | "[0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0]\n", 177 | "[0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0]\n", 178 | "[0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0]\n" 179 | ] 180 | } 181 | ], 182 | "source": [ 183 | "# training set, bag of words for each sentence\n", 184 | "for doc in documents:\n", 185 | " # initialize our bag of words\n", 186 | " bag = [] #bag of words which is a representaion of text that describes the occurence of wordss\n", 187 | " #with in a doccument\n", 188 | " # list of tokenized words for the pattern\n", 189 | " pattern_words = doc[0]\n", 190 | " # stem each word\n", 191 | " pattern_words = [stemmer.stem(word.lower()) for word in pattern_words]\n", 192 | " # create our bag of words array\n", 193 | " for w in words: #if the word appears in the pattern put 1 else 0 \n", 194 | " bag.append(1) if w in pattern_words else bag.append(0)\n", 195 | " print(bag)\n", 196 | " # output is a '0' for each tag and '1' for current tag\n", 197 | " output_row = list(output_empty)\n", 198 | " output_row[classes.index(doc[1])] = 1\n", 199 | "\n", 200 | " training.append([bag, output_row])\n" 201 | ] 202 | }, 203 | { 204 | "cell_type": "code", 205 | "execution_count": null, 206 | "metadata": {}, 207 | "outputs": [ 208 | { 209 | "name": "stdout", 210 | "output_type": "stream", 211 | "text": [ 212 | "[[list([0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0])\n", 213 | " list([0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0])]\n", 214 | " [list([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1])\n", 215 | " list([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1])]\n", 216 | " [list([0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0])\n", 217 | " list([0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0])]\n", 218 | " [list([0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])\n", 219 | " list([0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0])]\n", 220 | " [list([0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0])\n", 221 | " list([0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0])]\n", 222 | " [list([0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0])\n", 223 | " list([0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])]\n", 224 | " [list([0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1])\n", 225 | " list([0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0])]\n", 226 | " [list([0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1])\n", 227 | " list([0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0])]\n", 228 | " [list([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0])\n", 229 | " list([0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0])]\n", 230 | " [list([0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])\n", 231 | " list([0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0])]\n", 232 | " [list([0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0])\n", 233 | " list([1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])]\n", 234 | " [list([1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0])\n", 235 | " list([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0])]]\n" 236 | ] 237 | } 238 | ], 239 | "source": [ 240 | "# shuffle our features and turn into np.array\n", 241 | "random.shuffle(training)\n", 242 | "training = np.array(training)\n", 243 | "print(training)" 244 | ] 245 | }, 246 | { 247 | "cell_type": "code", 248 | "execution_count": null, 249 | "metadata": {}, 250 | "outputs": [ 251 | { 252 | "name": "stdout", 253 | "output_type": "stream", 254 | "text": [ 255 | "[[0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1], [0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0], [0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0], [0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1], [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0], [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0]] [[0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1], [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0], [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0], [0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0], [0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0], [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0]]\n" 256 | ] 257 | } 258 | ], 259 | "source": [ 260 | "# create train and test lists\n", 261 | "train_x = list(training[:,0])\n", 262 | "train_y = list(training[:,1])\n", 263 | "print(train_x,train_y)\n" 264 | ] 265 | }, 266 | { 267 | "cell_type": "code", 268 | "execution_count": null, 269 | "metadata": {}, 270 | "outputs": [ 271 | { 272 | "name": "stdout", 273 | "output_type": "stream", 274 | "text": [ 275 | "Training Step: 2999 | total loss: \u001b[1m\u001b[32m1.87047\u001b[0m\u001b[0m | time: 0.004s\n", 276 | "| Adam | epoch: 1500 | loss: 1.87047 - acc: 0.8995 -- iter: 08/12\n", 277 | "Training Step: 3000 | total loss: \u001b[1m\u001b[32m1.68418\u001b[0m\u001b[0m | time: 0.006s\n", 278 | "| Adam | epoch: 1500 | loss: 1.68418 - acc: 0.9096 -- iter: 12/12\n", 279 | "--\n" 280 | ] 281 | } 282 | ], 283 | "source": [ 284 | "# reset underlying graph data\n", 285 | "tf.reset_default_graph()\n", 286 | "# Build neural network\n", 287 | "net = tflearn.input_data(shape=[None, len(train_x[0])])\n", 288 | "net = tflearn.fully_connected(net, 12)\n", 289 | "net = tflearn.fully_connected(net, 12)\n", 290 | "net = tflearn.fully_connected(net, len(train_y[0]), activation='softmax')\n", 291 | "net = tflearn.regression(net)\n", 292 | "\n", 293 | "# Define model and setup tensorboard\n", 294 | "model = tflearn.DNN(net, tensorboard_dir='tflearn_logs') \n", 295 | "# Start training (apply gradient descent algorithm)\n", 296 | "model.fit(train_x, train_y, n_epoch=1500, batch_size=8, show_metric=True)#n_epoch is the number of times network sees the data\n", 297 | "model.save('model.tflearn')" 298 | ] 299 | }, 300 | { 301 | "cell_type": "code", 302 | "execution_count": null, 303 | "metadata": {}, 304 | "outputs": [], 305 | "source": [ 306 | "import pickle\n", 307 | "pickle.dump({'words':words, 'classes':classes,'train_x':train_x,'train_y':train_y},open( \"training_data\", \"wb\" ))\n", 308 | "\n", 309 | "\n", 310 | "\n", 311 | "# restore all of our data structures\n", 312 | "import pickle\n", 313 | "data = pickle.load( open( \"training_data\", \"rb\" ) )\n", 314 | "words = data['words']\n", 315 | "classes = data['classes']\n", 316 | "train_x = data['train_x']\n", 317 | "train_y = data['train_y']" 318 | ] 319 | }, 320 | { 321 | "cell_type": "code", 322 | "execution_count": null, 323 | "metadata": {}, 324 | "outputs": [], 325 | "source": [ 326 | "# import our chat-bot intents file\n", 327 | "import json\n", 328 | "with open('intents.json') as jd:\n", 329 | " intents = json.load(jd)\n", 330 | " \n", 331 | "# load our saved model\n", 332 | "model.load('./model.tflearn')\n", 333 | "\n", 334 | "def clean_up_sentence(sentence):\n", 335 | " # tokenize the pattern\n", 336 | " sentence_words = nltk.word_tokenize(sentence)\n", 337 | " # stem each word\n", 338 | " sentence_words = [stemmer.stem(word.lower()) for word in sentence_words]\n", 339 | " return sentence_words\n", 340 | "\n", 341 | "# return bag of words array: 0 or 1 for each word in the bag that exists in the sentence\n", 342 | "def bow(sentence, words, show_details=False):\n", 343 | " # tokenize the pattern\n", 344 | " sentence_words = clean_up_sentence(sentence)\n", 345 | " # bag of words\n", 346 | " bag = [0]*len(words) \n", 347 | " for s in sentence_words:\n", 348 | " for i,w in enumerate(words):\n", 349 | " if w == s: \n", 350 | " bag[i] = 1\n", 351 | " if show_details:\n", 352 | " print (\"found in bag: %s\" % w)\n", 353 | "\n", 354 | " return(np.array(bag))" 355 | ] 356 | }, 357 | { 358 | "cell_type": "markdown", 359 | "metadata": {}, 360 | "source": [ 361 | "GRADIENT DESCENT ALGORITHM\n" 362 | ] 363 | }, 364 | { 365 | "cell_type": "markdown", 366 | "metadata": {}, 367 | "source": [ 368 | "Neural networks are trained iteratively using optimization techniques like gradient descent. After each cycle of training, an error metric is calculated based on the difference between prediction and target. The derivatives of this error metric are calculated and propagated back through the network using a technique called backpropagation. Each neuron’s coefficients (weights) are then adjusted relative to how much they contributed to the total error. This process is repeated iteratively until the network error drops below an acceptable threshold." 369 | ] 370 | }, 371 | { 372 | "cell_type": "code", 373 | "execution_count": null, 374 | "metadata": {}, 375 | "outputs": [ 376 | { 377 | "name": "stdout", 378 | "output_type": "stream", 379 | "text": [ 380 | "BOT: I am your Personal Digital Assistant. What can I do for you!\n", 381 | "YOU : hi\n", 382 | "BOT : Hello, thanks for visiting\n", 383 | "YOU : how to cancel a ticket\n", 384 | "BOT : login to your account , Go to my bookings, click cancel\n", 385 | "YOU : how to book a ticket\n", 386 | "BOT : go to our website and click buy option\n", 387 | "YOU : what are the payment modes\n", 388 | "BOT : We accept VISA, Mastercard and AMEX , most major credit cards\n", 389 | "YOU : hi\n", 390 | "BOT : Hi there, how can I help?\n", 391 | "YOU : quit\n" 392 | ] 393 | } 394 | ], 395 | "source": [ 396 | "def cchat():\n", 397 | " print(\"BOT: I am your Personal Digital Assistant. What can I do for you!\")\n", 398 | " while True:\n", 399 | " inp=input(\"YOU : \")\n", 400 | " if inp.lower()==\"quit\":\n", 401 | " break\n", 402 | " results = model.predict([bow(inp, words)])[0]\n", 403 | " results_index=numpy.argmax(results)\n", 404 | " tag=classes[results_index]\n", 405 | " if(results[results_index]>0.65):\n", 406 | " \n", 407 | " for tg in intents['intents']:\n", 408 | " if tg['tag']==tag:\n", 409 | " response=tg['responses']\n", 410 | "\n", 411 | " print(\"BOT : \" +random.choice(response))\n", 412 | " else:\n", 413 | " print(\"BOT : I did not understand you! Try again\")\n", 414 | "cchat()" 415 | ] 416 | }, 417 | { 418 | "cell_type": "code", 419 | "execution_count": 1, 420 | "metadata": {}, 421 | "outputs": [ 422 | { 423 | "name": "stdout", 424 | "output_type": "stream", 425 | "text": [ 426 | "Enter your FEEDBACK: thanks,you have been helpful\n" 427 | ] 428 | } 429 | ], 430 | "source": [ 431 | "import xlsxwriter \n", 432 | " \n", 433 | "# Workbook() takes one, non-optional, argument \n", 434 | "# which is the filename that we want to create. \n", 435 | "workbook = xlsxwriter.Workbook('feedback.xlsx') \n", 436 | " \n", 437 | "# The workbook object is then used to add new \n", 438 | "# worksheet via the add_worksheet() method. \n", 439 | "worksheet = workbook.add_worksheet() \n", 440 | " \n", 441 | "# Use the worksheet object to write \n", 442 | "# data via the write() method. \n", 443 | "worksheet.write('A1', input(\"Enter your FEEDBACK: \" )) \n", 444 | "\n", 445 | " \n", 446 | "# Finally, close the Excel file \n", 447 | "# via the close() method. \n", 448 | "workbook.close() " 449 | ] 450 | }, 451 | { 452 | "cell_type": "code", 453 | "execution_count": null, 454 | "metadata": {}, 455 | "outputs": [], 456 | "source": [] 457 | }, 458 | { 459 | "cell_type": "code", 460 | "execution_count": null, 461 | "metadata": {}, 462 | "outputs": [], 463 | "source": [] 464 | }, 465 | { 466 | "cell_type": "code", 467 | "execution_count": null, 468 | "metadata": {}, 469 | "outputs": [], 470 | "source": [] 471 | }, 472 | { 473 | "cell_type": "code", 474 | "execution_count": null, 475 | "metadata": {}, 476 | "outputs": [], 477 | "source": [] 478 | }, 479 | { 480 | "cell_type": "code", 481 | "execution_count": null, 482 | "metadata": {}, 483 | "outputs": [], 484 | "source": [] 485 | }, 486 | { 487 | "cell_type": "code", 488 | "execution_count": null, 489 | "metadata": {}, 490 | "outputs": [], 491 | "source": [] 492 | }, 493 | { 494 | "cell_type": "code", 495 | "execution_count": null, 496 | "metadata": {}, 497 | "outputs": [], 498 | "source": [] 499 | } 500 | ], 501 | "metadata": { 502 | "kernelspec": { 503 | "display_name": "Python 3", 504 | "language": "python", 505 | "name": "python3" 506 | }, 507 | "language_info": { 508 | "codemirror_mode": { 509 | "name": "ipython", 510 | "version": 3 511 | }, 512 | "file_extension": ".py", 513 | "mimetype": "text/x-python", 514 | "name": "python", 515 | "nbconvert_exporter": "python", 516 | "pygments_lexer": "ipython3", 517 | "version": "3.7.3" 518 | } 519 | }, 520 | "nbformat": 4, 521 | "nbformat_minor": 2 522 | } 523 | --------------------------------------------------------------------------------