├── Images ├── 0.jpg ├── Flow.png ├── main.jpg ├── different.png └── architecture.png ├── feedback.xlsx ├── Chatbot use cases..pdf ├── intents.json ├── sentiment Analysis.ipynb ├── Readme.md ├── RailwayBot_SpeechRecognition.ipynb └── RailwayBot.ipynb /Images/0.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/findtharun/Railway_bot/HEAD/Images/0.jpg -------------------------------------------------------------------------------- /Images/Flow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/findtharun/Railway_bot/HEAD/Images/Flow.png -------------------------------------------------------------------------------- /Images/main.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/findtharun/Railway_bot/HEAD/Images/main.jpg -------------------------------------------------------------------------------- /feedback.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/findtharun/Railway_bot/HEAD/feedback.xlsx -------------------------------------------------------------------------------- /Images/different.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/findtharun/Railway_bot/HEAD/Images/different.png -------------------------------------------------------------------------------- /Chatbot use cases..pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/findtharun/Railway_bot/HEAD/Chatbot use cases..pdf -------------------------------------------------------------------------------- /Images/architecture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/findtharun/Railway_bot/HEAD/Images/architecture.png -------------------------------------------------------------------------------- /intents.json: -------------------------------------------------------------------------------- 1 | {"intents": [ 2 | {"tag": "greeting", 3 | "patterns": ["Hi"], 4 | "responses": ["Hello, thanks for visiting", "Good to see you again", "Hi there, how can I help?"], 5 | "context_set": "" 6 | }, 7 | {"tag": "end", 8 | "patterns": ["goodbye"], 9 | "responses": ["See you later, thanks for visiting", "Have a nice day", "Bye! Come back again soon."] 10 | }, 11 | {"tag": "thanks", 12 | "patterns": ["Thank you"], 13 | "responses": ["Happy to help!", "Any time!", "My pleasure"] 14 | }, 15 | {"tag": "hours", 16 | "patterns": ["What hours are you open?" ], 17 | "responses": ["I am 24*7 available"] 18 | }, 19 | {"tag": "schedule", 20 | "patterns": ["What is today's schedule of trains" ], 21 | "responses": ["hyderabad to mumbai" ] 22 | }, 23 | {"tag": "payment accept", 24 | "patterns":["What are the payment modes accepted?"], 25 | "responses": ["We accept VISA, Mastercard and AMEX , most major credit cards"] 26 | }, 27 | {"tag": "opentoday", 28 | "patterns": ["Are you open today?", "When do you open today?"], 29 | "responses": ["We're 24*7 available"] 30 | }, 31 | {"tag": "pnr ", 32 | "patterns": [ "how to check pnr status"], 33 | "responses": ["your pnr details are sent to your registered mobile number"] 34 | }, 35 | {"tag": "pnr_no ", 36 | "patterns": [ " what is my pnr no"], 37 | "responses": ["your pnr details are sent to your registered mobile number"] 38 | }, 39 | {"tag": "refund ", 40 | "patterns": [ "how to check refund status?"], 41 | "responses": ["login to your account , Go to transactions and click refund status in the menu bar"] 42 | }, 43 | {"tag": "cancel", 44 | "patterns": [ "how to cancel ticket?"], 45 | "responses": ["login to your account , Go to my bookings, click cancel"] 46 | }, 47 | 48 | {"tag": "book ticket", 49 | "patterns": ["how to book ticket" ], 50 | "responses": ["go to our website and click buy option"] 51 | } 52 | 53 | 54 | ] 55 | } -------------------------------------------------------------------------------- /sentiment Analysis.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import pandas as pd\n", 10 | "from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer\n", 11 | "analyser = SentimentIntensityAnalyzer()" 12 | ] 13 | }, 14 | { 15 | "cell_type": "code", 16 | "execution_count": 2, 17 | "metadata": {}, 18 | "outputs": [], 19 | "source": [ 20 | "file=r'feedback.xlsx'" 21 | ] 22 | }, 23 | { 24 | "cell_type": "code", 25 | "execution_count": 3, 26 | "metadata": {}, 27 | "outputs": [], 28 | "source": [ 29 | "xl=pd.ExcelFile(file)" 30 | ] 31 | }, 32 | { 33 | "cell_type": "code", 34 | "execution_count": 4, 35 | "metadata": {}, 36 | "outputs": [], 37 | "source": [ 38 | "dfs=xl.parse(xl.sheet_names[0])" 39 | ] 40 | }, 41 | { 42 | "cell_type": "code", 43 | "execution_count": 5, 44 | "metadata": {}, 45 | "outputs": [ 46 | { 47 | "name": "stdout", 48 | "output_type": "stream", 49 | "text": [ 50 | "Empty DataFrame\n", 51 | "Columns: [thanks,you have been helpful]\n", 52 | "Index: []\n" 53 | ] 54 | } 55 | ], 56 | "source": [ 57 | "print(dfs)" 58 | ] 59 | }, 60 | { 61 | "cell_type": "code", 62 | "execution_count": 6, 63 | "metadata": {}, 64 | "outputs": [], 65 | "source": [ 66 | "sid=SentimentIntensityAnalyzer()" 67 | ] 68 | }, 69 | { 70 | "cell_type": "code", 71 | "execution_count": 7, 72 | "metadata": {}, 73 | "outputs": [ 74 | { 75 | "name": "stdout", 76 | "output_type": "stream", 77 | "text": [ 78 | "neg 0.0\n", 79 | "neu 0.517\n", 80 | "pos 0.483\n", 81 | "compound 0.4215\n" 82 | ] 83 | } 84 | ], 85 | "source": [ 86 | "for data in dfs:\n", 87 | " ss=sid.polarity_scores(data)\n", 88 | " for k in ss:\n", 89 | " print(k,ss[k])" 90 | ] 91 | }, 92 | { 93 | "cell_type": "code", 94 | "execution_count": null, 95 | "metadata": {}, 96 | "outputs": [], 97 | "source": [] 98 | } 99 | ], 100 | "metadata": { 101 | "kernelspec": { 102 | "display_name": "Python 3", 103 | "language": "python", 104 | "name": "python3" 105 | }, 106 | "language_info": { 107 | "codemirror_mode": { 108 | "name": "ipython", 109 | "version": 3 110 | }, 111 | "file_extension": ".py", 112 | "mimetype": "text/x-python", 113 | "name": "python", 114 | "nbconvert_exporter": "python", 115 | "pygments_lexer": "ipython3", 116 | "version": "3.7.3" 117 | } 118 | }, 119 | "nbformat": 4, 120 | "nbformat_minor": 2 121 | } 122 | -------------------------------------------------------------------------------- /Readme.md: -------------------------------------------------------------------------------- 1 |

25 |
26 | * Railway services are available in most of the countries in the world. However, the demand for fast, reliable, and passenger friendly services is always being felt. With the ever increasing passenger and freight loads, more efficient mechanisms are needed to handle this increasing demand. And this would not be of only man-power but of smart technologies which when induced within the system, would produce a smarter railway system throughout.
27 |
28 | * Indian Railway, the largest employer in India and one of the slowest service providers in terms of train inquiries and ticket booking, may choose the chatbot route for faster response.The inclusion of AI technology via chatbot in customer service is the need of the hour and the world of smartphones and smart devices can help the country to adopt it quickly.Not only for Indian Railways ,many countries need to adapt AI to maintain a good user engagement system.
29 |
30 | SOFTWARE REQUIREMENTS
35 | 36 | * Tensflow Frame Work, NLTK Library. 37 | 38 |
41 |
42 |
43 |
44 | * Overview: Using NLP to train the model , Speech Recognition for output.
45 | My Approach has sentiment analysis which is used to improve the performance of chatbot by
46 | analysing customer Experience.
47 |
48 |
49 |
50 | >Chatbot’s main function is called to take response from user , sent to model ,matched intents and patterns and returned the expected response to user.
51 |
52 |
53 |
54 | * If user selected to speak then, Speech to text
55 | function activated , text is sent to bot. From the conversation of user with bot , Punkt Sentence
56 | Tokenizer divides a text into a list of sentences, by using an unsupervised algorithm to build a
57 | model for abbreviated words, collocations, and words that start sentences. Stemming and
58 | Lemmatization is used to generate the root form of inflected words. In this Approach I have
59 | just used basic libraries for the framework and lemmatization,intents and pattern matching is
60 | done from scratch .
61 |
62 |
63 | * After tokenization ,I Created words, classes and documents added each word to a list ,and
64 | neglected question marks,exclamation symbols as they are not necessary and useful data is
65 | added to documents in our corpus and classes added to our classes list. From the list words
66 | applied stemming and also removed duplicate words. Classifying the intents for efficient query
67 | handling.
68 |
69 | > Shuffle features and turned into np.array,Building Neural network using TENSORFLOW
70 | FRAMEWORK and Training the Model using Gradient Descent Algorithm.
71 |
72 |
73 | * After Training the model ,imported json file consisting of intents and responses to model, created
74 | a data structure to hold the user query ,generated probability a nd predictions below threshold to
75 | return tuple of intents and probabilities. (Includes scratch implementation of TF-IDF and cosine
76 | Similarity).
77 |
78 |
79 | * FeedBack from The user is taken and Sentiment Analysis is done to improve the Performance of The Chatbot.
80 |
81 | I have used many online resources while creating the application and I would like to thank them . Hope you found it insightful.If you have any queries you can mail me at : kumartharun435@gmail.com . I would love to hear feedback from you to improvise it and make it better!
83 | 84 | 85 | -------------------------------------------------------------------------------- /RailwayBot_SpeechRecognition.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import nltk\n", 10 | "#It contains text processing libraries for tokenization, \n", 11 | "#parsing, classification, stemming, tagging and semantic reasoning.\n", 12 | "from nltk.stem.lancaster import LancasterStemmer\n", 13 | "stemmer = LancasterStemmer()\n", 14 | "import string \n", 15 | "import numpy" 16 | ] 17 | }, 18 | { 19 | "cell_type": "code", 20 | "execution_count": 2, 21 | "metadata": {}, 22 | "outputs": [ 23 | { 24 | "name": "stderr", 25 | "output_type": "stream", 26 | "text": [ 27 | "WARNING: Logging before flag parsing goes to stderr.\n", 28 | "W0924 20:32:41.921568 15176 deprecation_wrapper.py:119] From C:\\Users\\kumar\\Anaconda3\\lib\\site-packages\\tflearn\\helpers\\summarizer.py:9: The name tf.summary.merge is deprecated. Please use tf.compat.v1.summary.merge instead.\n", 29 | "\n", 30 | "W0924 20:32:41.924598 15176 deprecation_wrapper.py:119] From C:\\Users\\kumar\\Anaconda3\\lib\\site-packages\\tflearn\\helpers\\trainer.py:25: The name tf.summary.FileWriter is deprecated. Please use tf.compat.v1.summary.FileWriter instead.\n", 31 | "\n", 32 | "W0924 20:32:41.987388 15176 deprecation_wrapper.py:119] From C:\\Users\\kumar\\Anaconda3\\lib\\site-packages\\tflearn\\collections.py:13: The name tf.GraphKeys is deprecated. Please use tf.compat.v1.GraphKeys instead.\n", 33 | "\n", 34 | "W0924 20:32:41.995366 15176 deprecation_wrapper.py:119] From C:\\Users\\kumar\\Anaconda3\\lib\\site-packages\\tflearn\\config.py:123: The name tf.get_collection is deprecated. Please use tf.compat.v1.get_collection instead.\n", 35 | "\n", 36 | "W0924 20:32:42.001344 15176 deprecation_wrapper.py:119] From C:\\Users\\kumar\\Anaconda3\\lib\\site-packages\\tflearn\\config.py:129: The name tf.add_to_collection is deprecated. Please use tf.compat.v1.add_to_collection instead.\n", 37 | "\n", 38 | "W0924 20:32:42.001344 15176 deprecation_wrapper.py:119] From C:\\Users\\kumar\\Anaconda3\\lib\\site-packages\\tflearn\\config.py:131: The name tf.assign is deprecated. Please use tf.compat.v1.assign instead.\n", 39 | "\n" 40 | ] 41 | } 42 | ], 43 | "source": [ 44 | "import numpy as np\n", 45 | "import tflearn\n", 46 | "\n", 47 | "#TFlearn is a modular and transparent deep learning library built on top of Tensorflow. \n", 48 | "#It was designed to provide a higher-level API to TensorFlow \n", 49 | "#in order to facilitate and speed-up experimentations, while remaining fully transparent and compatible with it.\n", 50 | "import tensorflow as tf\n", 51 | "import random\n", 52 | "tf.logging.set_verbosity(tf.logging.ERROR)\n", 53 | "\n", 54 | "# import our chat-bot intents file\n", 55 | "import json\n", 56 | "#open the exact location\n", 57 | "with open('intents.json') as jd:\n", 58 | " intents = json.load(jd)" 59 | ] 60 | }, 61 | { 62 | "cell_type": "code", 63 | "execution_count": 3, 64 | "metadata": {}, 65 | "outputs": [], 66 | "source": [ 67 | "import speech_recognition as sr\n", 68 | "r=sr.Recognizer()\n", 69 | "# importing the pyttsx library\n", 70 | "import pyttsx3\n", 71 | "# initialisation \n", 72 | "engine = pyttsx3.init() \n", 73 | "def bot_speaking(message): \n", 74 | " # testing \n", 75 | " engine.say(message)\n", 76 | " engine.runAndWait() \n", 77 | "def get_input():\n", 78 | " with sr.Microphone() as source:\n", 79 | " #print(\"Say something!!!\");\n", 80 | " bot_speaking(\"Hey mate say something\")\n", 81 | " audio=r.listen(source,timeout=0)\n", 82 | " #print(\"Perfect, Thanks!\")\n", 83 | " bot_speaking(\"Perfect, Thanks!\")\n", 84 | " try:\n", 85 | " msg=r.recognize_google(audio)\n", 86 | " print(\"TEXT: \"+msg); #r.recognize(audio,language='hi-IN')\n", 87 | " bot_speaking(\"you said \"+msg)\n", 88 | " return msg\n", 89 | " except:\n", 90 | " #print(\"Dude it's not working :(\")\n", 91 | " bot_speaking(\"Sorry mate! It's not working\")\n", 92 | " pass;" 93 | ] 94 | }, 95 | { 96 | "cell_type": "code", 97 | "execution_count": 4, 98 | "metadata": {}, 99 | "outputs": [ 100 | { 101 | "name": "stderr", 102 | "output_type": "stream", 103 | "text": [ 104 | "[nltk_data] Downloading package punkt to\n", 105 | "[nltk_data] C:\\Users\\kumar\\AppData\\Roaming\\nltk_data...\n", 106 | "[nltk_data] Package punkt is already up-to-date!\n" 107 | ] 108 | } 109 | ], 110 | "source": [ 111 | "nltk.download('punkt')\n", 112 | "#Punkt Sentence Tokenizer. This tokenizer divides a text into a list of sentences,\n", 113 | "#by using an unsupervised algorithm to build a model for abbreviation words, collocations, and words that start sentences.\n", 114 | "#It must be trained on a large collection of plaintext in the target language before it can be used.\n", 115 | "#Create words, classes and documents\n", 116 | "words = []\n", 117 | "classes = []\n", 118 | "documents = []\n", 119 | "ignore_words = ['?','!']" 120 | ] 121 | }, 122 | { 123 | "cell_type": "code", 124 | "execution_count": 5, 125 | "metadata": {}, 126 | "outputs": [], 127 | "source": [ 128 | "# loop through each sentence in our intents patterns\n", 129 | "for intent in intents['intents']:\n", 130 | " for pattern in intent['patterns']:\n", 131 | " \n", 132 | " # tokenize each word in the sentence\n", 133 | " \n", 134 | " w = nltk.word_tokenize(pattern)\n", 135 | " # add to our words list\n", 136 | " words.extend(w)\n", 137 | " # add to documents in our corpus # add to our classes list\n", 138 | " if intent['tag'] not in classes:\n", 139 | "\n", 140 | " documents.append((w, intent['tag']))\n", 141 | " classes.append(intent['tag'])\n" 142 | ] 143 | }, 144 | { 145 | "cell_type": "code", 146 | "execution_count": 6, 147 | "metadata": {}, 148 | "outputs": [], 149 | "source": [ 150 | "# stem and lower each word and remove duplicates\n", 151 | "words = sorted(list(set([stemmer.stem(w.lower()) for w in words if w not in ignore_words])))\n", 152 | "\n", 153 | "# remove duplicates\n", 154 | "classes = sorted(list(set(classes)))\n" 155 | ] 156 | }, 157 | { 158 | "cell_type": "code", 159 | "execution_count": 7, 160 | "metadata": {}, 161 | "outputs": [ 162 | { 163 | "name": "stdout", 164 | "output_type": "stream", 165 | "text": [ 166 | "12 documents\n", 167 | "12 classes ['book ticket', 'cancel', 'end', 'greeting', 'hours', 'opentoday', 'payment accept', 'pnr ', 'pnr_no ', 'refund ', 'schedule', 'thanks']\n", 168 | "31 unique stemmed words [\"'s\", 'acceiv', 'ar', 'book', 'cancel', 'check', 'do', 'goodby', 'hi', 'hour', 'how', 'is', 'mod', 'my', 'no', 'of', 'op', 'pay', 'pnr', 'refund', 'schedule', 'stat', 'thank', 'the', 'ticket', 'to', 'today', 'train', 'what', 'when', 'you']\n" 169 | ] 170 | } 171 | ], 172 | "source": [ 173 | "#Getting to know the documents,classes and stemmed words\n", 174 | "print (len(documents), \"documents\")\n", 175 | "print (len(classes), \"classes\", classes)\n", 176 | "print (len(words), \"unique stemmed words\", words)\n" 177 | ] 178 | }, 179 | { 180 | "cell_type": "code", 181 | "execution_count": 8, 182 | "metadata": {}, 183 | "outputs": [], 184 | "source": [ 185 | "training = []\n", 186 | "output = []\n", 187 | "# create an empty array for our output\n", 188 | "output_empty = [0] * len(classes)\n" 189 | ] 190 | }, 191 | { 192 | "cell_type": "code", 193 | "execution_count": 9, 194 | "metadata": {}, 195 | "outputs": [ 196 | { 197 | "name": "stdout", 198 | "output_type": "stream", 199 | "text": [ 200 | "[0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n", 201 | "[0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n", 202 | "[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1]\n", 203 | "[0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1]\n", 204 | "[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0]\n", 205 | "[0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0]\n", 206 | "[0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1]\n", 207 | "[0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0]\n", 208 | "[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0]\n", 209 | "[0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0]\n", 210 | "[0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0]\n", 211 | "[0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0]\n" 212 | ] 213 | } 214 | ], 215 | "source": [ 216 | "# training set, bag of words for each sentence\n", 217 | "for doc in documents:\n", 218 | " # initialize our bag of words\n", 219 | " bag = []\n", 220 | " # list of tokenized words for the pattern\n", 221 | " pattern_words = doc[0]\n", 222 | " # stem each word\n", 223 | " pattern_words = [stemmer.stem(word.lower()) for word in pattern_words]\n", 224 | " # create our bag of words array\n", 225 | " for w in words:\n", 226 | " bag.append(1) if w in pattern_words else bag.append(0)\n", 227 | " print(bag)\n", 228 | " # output is a '0' for each tag and '1' for current tag\n", 229 | " output_row = list(output_empty)\n", 230 | " output_row[classes.index(doc[1])] = 1\n", 231 | "\n", 232 | " training.append([bag, output_row])\n" 233 | ] 234 | }, 235 | { 236 | "cell_type": "code", 237 | "execution_count": 10, 238 | "metadata": {}, 239 | "outputs": [ 240 | { 241 | "name": "stdout", 242 | "output_type": "stream", 243 | "text": [ 244 | "[[list([0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0])\n", 245 | " list([0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0])]\n", 246 | " [list([0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1])\n", 247 | " list([0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0])]\n", 248 | " [list([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1])\n", 249 | " list([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1])]\n", 250 | " [list([0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0])\n", 251 | " list([0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])]\n", 252 | " [list([0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0])\n", 253 | " list([0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0])]\n", 254 | " [list([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0])\n", 255 | " list([0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0])]\n", 256 | " [list([0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])\n", 257 | " list([0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0])]\n", 258 | " [list([1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0])\n", 259 | " list([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0])]\n", 260 | " [list([0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1])\n", 261 | " list([0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0])]\n", 262 | " [list([0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0])\n", 263 | " list([0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0])]\n", 264 | " [list([0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])\n", 265 | " list([0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0])]\n", 266 | " [list([0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0])\n", 267 | " list([1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])]]\n" 268 | ] 269 | } 270 | ], 271 | "source": [ 272 | "# shuffle our features and turn into np.array\n", 273 | "random.shuffle(training)\n", 274 | "training = np.array(training)\n", 275 | "print(training)" 276 | ] 277 | }, 278 | { 279 | "cell_type": "code", 280 | "execution_count": 11, 281 | "metadata": {}, 282 | "outputs": [ 283 | { 284 | "name": "stdout", 285 | "output_type": "stream", 286 | "text": [ 287 | "[[0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0], [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1], [0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0], [0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0], [0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1], [0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0]] [[0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1], [0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0], [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0], [0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0], [0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0], [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]\n" 288 | ] 289 | } 290 | ], 291 | "source": [ 292 | "# create train and test lists\n", 293 | "train_x = list(training[:,0])\n", 294 | "train_y = list(training[:,1])\n", 295 | "print(train_x,train_y)\n" 296 | ] 297 | }, 298 | { 299 | "cell_type": "code", 300 | "execution_count": 12, 301 | "metadata": {}, 302 | "outputs": [ 303 | { 304 | "name": "stdout", 305 | "output_type": "stream", 306 | "text": [ 307 | "Training Step: 2999 | total loss: \u001b[1m\u001b[32m0.06964\u001b[0m\u001b[0m | time: 0.002s\n", 308 | "| Adam | epoch: 1500 | loss: 0.06964 - acc: 0.9972 -- iter: 08/12\n", 309 | "Training Step: 3000 | total loss: \u001b[1m\u001b[32m0.06323\u001b[0m\u001b[0m | time: 0.004s\n", 310 | "| Adam | epoch: 1500 | loss: 0.06323 - acc: 0.9975 -- iter: 12/12\n", 311 | "--\n" 312 | ] 313 | } 314 | ], 315 | "source": [ 316 | "# reset underlying graph data\n", 317 | "tf.reset_default_graph()\n", 318 | "# Build neural network\n", 319 | "net = tflearn.input_data(shape=[None, len(train_x[0])])\n", 320 | "net = tflearn.fully_connected(net, 12)\n", 321 | "net = tflearn.fully_connected(net, 12)\n", 322 | "net = tflearn.fully_connected(net, len(train_y[0]), activation='softmax')\n", 323 | "net = tflearn.regression(net)\n", 324 | "\n", 325 | "# Define model and setup tensorboard\n", 326 | "model = tflearn.DNN(net, tensorboard_dir='tflearn_logs') \n", 327 | "# Start training (apply gradient descent algorithm)\n", 328 | "model.fit(train_x, train_y, n_epoch=1500, batch_size=8, show_metric=True)#n_epoch is the number of times network sees the data\n", 329 | "model.save('model.tflearn')" 330 | ] 331 | }, 332 | { 333 | "cell_type": "code", 334 | "execution_count": 13, 335 | "metadata": {}, 336 | "outputs": [], 337 | "source": [ 338 | "import pickle\n", 339 | "pickle.dump({'words':words, 'classes':classes,'train_x':train_x,'train_y':train_y},open( \"training_data\", \"wb\" ))\n", 340 | "\n", 341 | "\n", 342 | "\n", 343 | "# restore all of our data structures\n", 344 | "import pickle\n", 345 | "data = pickle.load( open( \"training_data\", \"rb\" ) )\n", 346 | "words = data['words']\n", 347 | "classes = data['classes']\n", 348 | "train_x = data['train_x']\n", 349 | "train_y = data['train_y']" 350 | ] 351 | }, 352 | { 353 | "cell_type": "code", 354 | "execution_count": 14, 355 | "metadata": {}, 356 | "outputs": [], 357 | "source": [ 358 | "# import our chat-bot intents file\n", 359 | "import json\n", 360 | "with open('intents.json') as jd:\n", 361 | " intents = json.load(jd)\n", 362 | " \n", 363 | "# load our saved model\n", 364 | "model.load('./model.tflearn')\n", 365 | "\n", 366 | "def clean_up_sentence(sentence):\n", 367 | " # tokenize the pattern\n", 368 | " sentence_words = nltk.word_tokenize(sentence)\n", 369 | " # stem each word\n", 370 | " sentence_words = [stemmer.stem(word.lower()) for word in sentence_words]\n", 371 | " return sentence_words\n", 372 | "\n", 373 | "# return bag of words array: 0 or 1 for each word in the bag that exists in the sentence\n", 374 | "def bow(sentence, words, show_details=False):\n", 375 | " # tokenize the pattern\n", 376 | " sentence_words = clean_up_sentence(sentence)\n", 377 | " # bag of words\n", 378 | " bag = [0]*len(words) \n", 379 | " for s in sentence_words:\n", 380 | " for i,w in enumerate(words):\n", 381 | " if w == s: \n", 382 | " bag[i] = 1\n", 383 | " if show_details:\n", 384 | " print (\"found in bag: %s\" % w)\n", 385 | "\n", 386 | " return(np.array(bag))" 387 | ] 388 | }, 389 | { 390 | "cell_type": "code", 391 | "execution_count": null, 392 | "metadata": {}, 393 | "outputs": [], 394 | "source": [ 395 | "def cchat():\n", 396 | " print(\"BOT: I am your Personal Digital Assistant. What can I do for you!\")\n", 397 | " while True:\n", 398 | " inp=str(get_input()) #input(\"YOU : \")\n", 399 | " '''if inp.lower()==\"quit\" or inp==None:\n", 400 | " break'''\n", 401 | " if inp=='quit':\n", 402 | " break\n", 403 | " results = model.predict([bow(inp, words)])[0]\n", 404 | " results_index=numpy.argmax(results)\n", 405 | " tag=classes[results_index]\n", 406 | " if(results[results_index]>0.65):\n", 407 | " \n", 408 | " for tg in intents['intents']:\n", 409 | " if tg['tag']==tag:\n", 410 | " response=tg['responses']\n", 411 | " ms=random.choice(response)\n", 412 | "\n", 413 | " print(\"BOT : \" +ms)\n", 414 | " bot_speaking(ms)\n", 415 | " else:\n", 416 | " print(\"BOT : I did not understand you! Try again\")\n", 417 | " bot_speaking(\"I did not understand you! Try again\")\n", 418 | "cchat()" 419 | ] 420 | }, 421 | { 422 | "cell_type": "code", 423 | "execution_count": null, 424 | "metadata": {}, 425 | "outputs": [], 426 | "source": [ 427 | "print(\"BOT : These are the frequently asked questions\")\n" 428 | ] 429 | }, 430 | { 431 | "cell_type": "code", 432 | "execution_count": null, 433 | "metadata": {}, 434 | "outputs": [], 435 | "source": [ 436 | "import xlsxwriter \n", 437 | " \n", 438 | "# Workbook() takes one, non-optional, argument \n", 439 | "# which is the filename that we want to create. \n", 440 | "workbook = xlsxwriter.Workbook('feedback.xlsx') \n", 441 | " \n", 442 | "# The workbook object is then used to add new \n", 443 | "# worksheet via the add_worksheet() method. \n", 444 | "worksheet = workbook.add_worksheet() \n", 445 | " \n", 446 | "# Use the worksheet object to write \n", 447 | "# data via the write() method. \n", 448 | "worksheet.write('A1', input(\"Enter your FEEDBACK: \" )) \n", 449 | "\n", 450 | " \n", 451 | "# Finally, close the Excel file \n", 452 | "# via the close() method. \n", 453 | "workbook.close() " 454 | ] 455 | }, 456 | { 457 | "cell_type": "code", 458 | "execution_count": null, 459 | "metadata": {}, 460 | "outputs": [], 461 | "source": [] 462 | }, 463 | { 464 | "cell_type": "code", 465 | "execution_count": null, 466 | "metadata": {}, 467 | "outputs": [], 468 | "source": [] 469 | }, 470 | { 471 | "cell_type": "code", 472 | "execution_count": null, 473 | "metadata": {}, 474 | "outputs": [], 475 | "source": [] 476 | }, 477 | { 478 | "cell_type": "code", 479 | "execution_count": null, 480 | "metadata": {}, 481 | "outputs": [], 482 | "source": [] 483 | }, 484 | { 485 | "cell_type": "code", 486 | "execution_count": null, 487 | "metadata": {}, 488 | "outputs": [], 489 | "source": [] 490 | } 491 | ], 492 | "metadata": { 493 | "kernelspec": { 494 | "display_name": "Python 3", 495 | "language": "python", 496 | "name": "python3" 497 | }, 498 | "language_info": { 499 | "codemirror_mode": { 500 | "name": "ipython", 501 | "version": 3 502 | }, 503 | "file_extension": ".py", 504 | "mimetype": "text/x-python", 505 | "name": "python", 506 | "nbconvert_exporter": "python", 507 | "pygments_lexer": "ipython3", 508 | "version": "3.7.3" 509 | } 510 | }, 511 | "nbformat": 4, 512 | "nbformat_minor": 2 513 | } 514 | -------------------------------------------------------------------------------- /RailwayBot.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import nltk\n", 10 | "#It contains text processing libraries for tokenization, \n", 11 | "#parsing, classification, stemming, tagging and semantic reasoning.\n", 12 | "from nltk.stem.lancaster import LancasterStemmer\n", 13 | "stemmer = LancasterStemmer()\n", 14 | "import curses\n", 15 | "import string\n", 16 | "import numpy" 17 | ] 18 | }, 19 | { 20 | "cell_type": "code", 21 | "execution_count": 2, 22 | "metadata": {}, 23 | "outputs": [ 24 | { 25 | "name": "stderr", 26 | "output_type": "stream", 27 | "text": [ 28 | "WARNING: Logging before flag parsing goes to stderr.\n", 29 | "W0922 14:57:32.300952 13316 deprecation_wrapper.py:119] From C:\\Users\\kumar\\Anaconda3\\lib\\site-packages\\tflearn\\helpers\\summarizer.py:9: The name tf.summary.merge is deprecated. Please use tf.compat.v1.summary.merge instead.\n", 30 | "\n", 31 | "W0922 14:57:32.303982 13316 deprecation_wrapper.py:119] From C:\\Users\\kumar\\Anaconda3\\lib\\site-packages\\tflearn\\helpers\\trainer.py:25: The name tf.summary.FileWriter is deprecated. Please use tf.compat.v1.summary.FileWriter instead.\n", 32 | "\n", 33 | "W0922 14:57:32.379739 13316 deprecation_wrapper.py:119] From C:\\Users\\kumar\\Anaconda3\\lib\\site-packages\\tflearn\\collections.py:13: The name tf.GraphKeys is deprecated. Please use tf.compat.v1.GraphKeys instead.\n", 34 | "\n", 35 | "W0922 14:57:32.388729 13316 deprecation_wrapper.py:119] From C:\\Users\\kumar\\Anaconda3\\lib\\site-packages\\tflearn\\config.py:123: The name tf.get_collection is deprecated. Please use tf.compat.v1.get_collection instead.\n", 36 | "\n", 37 | "W0922 14:57:32.563499 13316 deprecation_wrapper.py:119] From C:\\Users\\kumar\\Anaconda3\\lib\\site-packages\\tflearn\\config.py:129: The name tf.add_to_collection is deprecated. Please use tf.compat.v1.add_to_collection instead.\n", 38 | "\n", 39 | "W0922 14:57:32.564502 13316 deprecation_wrapper.py:119] From C:\\Users\\kumar\\Anaconda3\\lib\\site-packages\\tflearn\\config.py:131: The name tf.assign is deprecated. Please use tf.compat.v1.assign instead.\n", 40 | "\n" 41 | ] 42 | } 43 | ], 44 | "source": [ 45 | "import numpy as np\n", 46 | "import tflearn\n", 47 | "\n", 48 | "#TFlearn is a modular and transparent deep learning library built on top of Tensorflow. \n", 49 | "#It was designed to provide a higher-level API to TensorFlow \n", 50 | "#in order to facilitate and speed-up experimentations, while remaining fully transparent and compatible with it.\n", 51 | "import tensorflow as tf\n", 52 | "import random\n", 53 | "tf.logging.set_verbosity(tf.logging.ERROR)\n", 54 | "\n", 55 | "# import our chat-bot intents file\n", 56 | "import json\n", 57 | "#open the exact location of CORPUS \n", 58 | "with open('intents.json') as jd:\n", 59 | " intents = json.load(jd)" 60 | ] 61 | }, 62 | { 63 | "cell_type": "code", 64 | "execution_count": 3, 65 | "metadata": {}, 66 | "outputs": [ 67 | { 68 | "name": "stderr", 69 | "output_type": "stream", 70 | "text": [ 71 | "[nltk_data] Downloading package punkt to\n", 72 | "[nltk_data] C:\\Users\\kumar\\AppData\\Roaming\\nltk_data...\n", 73 | "[nltk_data] Package punkt is already up-to-date!\n" 74 | ] 75 | } 76 | ], 77 | "source": [ 78 | "nltk.download('punkt')\n", 79 | "#Punkt Sentence Tokenizer. This tokenizer divides a text into a list of sentences,\n", 80 | "#by using an unsupervised algorithm to build a model for abbreviation words, collocations, and words that start sentences.\n", 81 | "#It must be trained on a large collection of plaintext in the target language before it can be used.\n", 82 | "#Create words, classes and documents\n", 83 | "words = []\n", 84 | "classes = []\n", 85 | "documents = []\n", 86 | "ignore_words = ['?','!']" 87 | ] 88 | }, 89 | { 90 | "cell_type": "code", 91 | "execution_count": 4, 92 | "metadata": {}, 93 | "outputs": [], 94 | "source": [ 95 | "# loop through each sentence in our intents patterns\n", 96 | "for intent in intents['intents']:\n", 97 | " for pattern in intent['patterns']: # PATTERN IS USER QUESTION\n", 98 | " \n", 99 | " # tokenize each word in the sentence\n", 100 | " \n", 101 | " w = nltk.word_tokenize(pattern)\n", 102 | " # add to our words list\n", 103 | " words.extend(w)\n", 104 | " # add to documents in our corpus # add to our classes list\n", 105 | " if intent['tag'] not in classes:\n", 106 | "\n", 107 | " documents.append((w, intent['tag']))\n", 108 | " classes.append(intent['tag'])\n" 109 | ] 110 | }, 111 | { 112 | "cell_type": "code", 113 | "execution_count": 5, 114 | "metadata": {}, 115 | "outputs": [], 116 | "source": [ 117 | "# stem and lower each word and remove duplicates\n", 118 | "words = sorted(list(set([stemmer.stem(w.lower()) for w in words if w not in ignore_words])))\n", 119 | "\n", 120 | "# remove duplicates\n", 121 | "classes = sorted(list(set(classes)))\n" 122 | ] 123 | }, 124 | { 125 | "cell_type": "code", 126 | "execution_count": 6, 127 | "metadata": {}, 128 | "outputs": [ 129 | { 130 | "name": "stdout", 131 | "output_type": "stream", 132 | "text": [ 133 | "12 documents\n", 134 | "12 classes ['book ticket', 'cancel', 'end', 'greeting', 'hours', 'opentoday', 'payment accept', 'pnr ', 'pnr_no ', 'refund ', 'schedule', 'thanks']\n", 135 | "31 unique stemmed words [\"'s\", 'acceiv', 'ar', 'book', 'cancel', 'check', 'do', 'goodby', 'hi', 'hour', 'how', 'is', 'mod', 'my', 'no', 'of', 'op', 'pay', 'pnr', 'refund', 'schedule', 'stat', 'thank', 'the', 'ticket', 'to', 'today', 'train', 'what', 'when', 'you']\n" 136 | ] 137 | } 138 | ], 139 | "source": [ 140 | "#Getting to know the documents,classes and stemmed words\n", 141 | "print (len(documents), \"documents\")\n", 142 | "print (len(classes), \"classes\", classes)\n", 143 | "print (len(words), \"unique stemmed words\", words)\n" 144 | ] 145 | }, 146 | { 147 | "cell_type": "code", 148 | "execution_count": 7, 149 | "metadata": {}, 150 | "outputs": [], 151 | "source": [ 152 | "training = []\n", 153 | "output = []\n", 154 | "# create an empty array for our output\n", 155 | "output_empty = [0] * len(classes)\n" 156 | ] 157 | }, 158 | { 159 | "cell_type": "code", 160 | "execution_count": 8, 161 | "metadata": {}, 162 | "outputs": [ 163 | { 164 | "name": "stdout", 165 | "output_type": "stream", 166 | "text": [ 167 | "[0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n", 168 | "[0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]\n", 169 | "[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1]\n", 170 | "[0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1]\n", 171 | "[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0]\n", 172 | "[0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0]\n", 173 | "[0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1]\n", 174 | "[0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0]\n", 175 | "[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0]\n", 176 | "[0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0]\n", 177 | "[0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0]\n", 178 | "[0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0]\n" 179 | ] 180 | } 181 | ], 182 | "source": [ 183 | "# training set, bag of words for each sentence\n", 184 | "for doc in documents:\n", 185 | " # initialize our bag of words\n", 186 | " bag = [] #bag of words which is a representaion of text that describes the occurence of wordss\n", 187 | " #with in a doccument\n", 188 | " # list of tokenized words for the pattern\n", 189 | " pattern_words = doc[0]\n", 190 | " # stem each word\n", 191 | " pattern_words = [stemmer.stem(word.lower()) for word in pattern_words]\n", 192 | " # create our bag of words array\n", 193 | " for w in words: #if the word appears in the pattern put 1 else 0 \n", 194 | " bag.append(1) if w in pattern_words else bag.append(0)\n", 195 | " print(bag)\n", 196 | " # output is a '0' for each tag and '1' for current tag\n", 197 | " output_row = list(output_empty)\n", 198 | " output_row[classes.index(doc[1])] = 1\n", 199 | "\n", 200 | " training.append([bag, output_row])\n" 201 | ] 202 | }, 203 | { 204 | "cell_type": "code", 205 | "execution_count": null, 206 | "metadata": {}, 207 | "outputs": [ 208 | { 209 | "name": "stdout", 210 | "output_type": "stream", 211 | "text": [ 212 | "[[list([0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0])\n", 213 | " list([0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0])]\n", 214 | " [list([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1])\n", 215 | " list([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1])]\n", 216 | " [list([0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0])\n", 217 | " list([0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0])]\n", 218 | " [list([0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])\n", 219 | " list([0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0])]\n", 220 | " [list([0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0])\n", 221 | " list([0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0])]\n", 222 | " [list([0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0])\n", 223 | " list([0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])]\n", 224 | " [list([0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1])\n", 225 | " list([0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0])]\n", 226 | " [list([0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1])\n", 227 | " list([0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0])]\n", 228 | " [list([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0])\n", 229 | " list([0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0])]\n", 230 | " [list([0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])\n", 231 | " list([0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0])]\n", 232 | " [list([0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0])\n", 233 | " list([1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])]\n", 234 | " [list([1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0])\n", 235 | " list([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0])]]\n" 236 | ] 237 | } 238 | ], 239 | "source": [ 240 | "# shuffle our features and turn into np.array\n", 241 | "random.shuffle(training)\n", 242 | "training = np.array(training)\n", 243 | "print(training)" 244 | ] 245 | }, 246 | { 247 | "cell_type": "code", 248 | "execution_count": null, 249 | "metadata": {}, 250 | "outputs": [ 251 | { 252 | "name": "stdout", 253 | "output_type": "stream", 254 | "text": [ 255 | "[[0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1], [0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0], [0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0], [0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1], [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0], [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0]] [[0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1], [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0], [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0], [0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0], [0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0], [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0]]\n" 256 | ] 257 | } 258 | ], 259 | "source": [ 260 | "# create train and test lists\n", 261 | "train_x = list(training[:,0])\n", 262 | "train_y = list(training[:,1])\n", 263 | "print(train_x,train_y)\n" 264 | ] 265 | }, 266 | { 267 | "cell_type": "code", 268 | "execution_count": null, 269 | "metadata": {}, 270 | "outputs": [ 271 | { 272 | "name": "stdout", 273 | "output_type": "stream", 274 | "text": [ 275 | "Training Step: 2999 | total loss: \u001b[1m\u001b[32m1.87047\u001b[0m\u001b[0m | time: 0.004s\n", 276 | "| Adam | epoch: 1500 | loss: 1.87047 - acc: 0.8995 -- iter: 08/12\n", 277 | "Training Step: 3000 | total loss: \u001b[1m\u001b[32m1.68418\u001b[0m\u001b[0m | time: 0.006s\n", 278 | "| Adam | epoch: 1500 | loss: 1.68418 - acc: 0.9096 -- iter: 12/12\n", 279 | "--\n" 280 | ] 281 | } 282 | ], 283 | "source": [ 284 | "# reset underlying graph data\n", 285 | "tf.reset_default_graph()\n", 286 | "# Build neural network\n", 287 | "net = tflearn.input_data(shape=[None, len(train_x[0])])\n", 288 | "net = tflearn.fully_connected(net, 12)\n", 289 | "net = tflearn.fully_connected(net, 12)\n", 290 | "net = tflearn.fully_connected(net, len(train_y[0]), activation='softmax')\n", 291 | "net = tflearn.regression(net)\n", 292 | "\n", 293 | "# Define model and setup tensorboard\n", 294 | "model = tflearn.DNN(net, tensorboard_dir='tflearn_logs') \n", 295 | "# Start training (apply gradient descent algorithm)\n", 296 | "model.fit(train_x, train_y, n_epoch=1500, batch_size=8, show_metric=True)#n_epoch is the number of times network sees the data\n", 297 | "model.save('model.tflearn')" 298 | ] 299 | }, 300 | { 301 | "cell_type": "code", 302 | "execution_count": null, 303 | "metadata": {}, 304 | "outputs": [], 305 | "source": [ 306 | "import pickle\n", 307 | "pickle.dump({'words':words, 'classes':classes,'train_x':train_x,'train_y':train_y},open( \"training_data\", \"wb\" ))\n", 308 | "\n", 309 | "\n", 310 | "\n", 311 | "# restore all of our data structures\n", 312 | "import pickle\n", 313 | "data = pickle.load( open( \"training_data\", \"rb\" ) )\n", 314 | "words = data['words']\n", 315 | "classes = data['classes']\n", 316 | "train_x = data['train_x']\n", 317 | "train_y = data['train_y']" 318 | ] 319 | }, 320 | { 321 | "cell_type": "code", 322 | "execution_count": null, 323 | "metadata": {}, 324 | "outputs": [], 325 | "source": [ 326 | "# import our chat-bot intents file\n", 327 | "import json\n", 328 | "with open('intents.json') as jd:\n", 329 | " intents = json.load(jd)\n", 330 | " \n", 331 | "# load our saved model\n", 332 | "model.load('./model.tflearn')\n", 333 | "\n", 334 | "def clean_up_sentence(sentence):\n", 335 | " # tokenize the pattern\n", 336 | " sentence_words = nltk.word_tokenize(sentence)\n", 337 | " # stem each word\n", 338 | " sentence_words = [stemmer.stem(word.lower()) for word in sentence_words]\n", 339 | " return sentence_words\n", 340 | "\n", 341 | "# return bag of words array: 0 or 1 for each word in the bag that exists in the sentence\n", 342 | "def bow(sentence, words, show_details=False):\n", 343 | " # tokenize the pattern\n", 344 | " sentence_words = clean_up_sentence(sentence)\n", 345 | " # bag of words\n", 346 | " bag = [0]*len(words) \n", 347 | " for s in sentence_words:\n", 348 | " for i,w in enumerate(words):\n", 349 | " if w == s: \n", 350 | " bag[i] = 1\n", 351 | " if show_details:\n", 352 | " print (\"found in bag: %s\" % w)\n", 353 | "\n", 354 | " return(np.array(bag))" 355 | ] 356 | }, 357 | { 358 | "cell_type": "markdown", 359 | "metadata": {}, 360 | "source": [ 361 | "GRADIENT DESCENT ALGORITHM\n" 362 | ] 363 | }, 364 | { 365 | "cell_type": "markdown", 366 | "metadata": {}, 367 | "source": [ 368 | "Neural networks are trained iteratively using optimization techniques like gradient descent. After each cycle of training, an error metric is calculated based on the difference between prediction and target. The derivatives of this error metric are calculated and propagated back through the network using a technique called backpropagation. Each neuron’s coefficients (weights) are then adjusted relative to how much they contributed to the total error. This process is repeated iteratively until the network error drops below an acceptable threshold." 369 | ] 370 | }, 371 | { 372 | "cell_type": "code", 373 | "execution_count": null, 374 | "metadata": {}, 375 | "outputs": [ 376 | { 377 | "name": "stdout", 378 | "output_type": "stream", 379 | "text": [ 380 | "BOT: I am your Personal Digital Assistant. What can I do for you!\n", 381 | "YOU : hi\n", 382 | "BOT : Hello, thanks for visiting\n", 383 | "YOU : how to cancel a ticket\n", 384 | "BOT : login to your account , Go to my bookings, click cancel\n", 385 | "YOU : how to book a ticket\n", 386 | "BOT : go to our website and click buy option\n", 387 | "YOU : what are the payment modes\n", 388 | "BOT : We accept VISA, Mastercard and AMEX , most major credit cards\n", 389 | "YOU : hi\n", 390 | "BOT : Hi there, how can I help?\n", 391 | "YOU : quit\n" 392 | ] 393 | } 394 | ], 395 | "source": [ 396 | "def cchat():\n", 397 | " print(\"BOT: I am your Personal Digital Assistant. What can I do for you!\")\n", 398 | " while True:\n", 399 | " inp=input(\"YOU : \")\n", 400 | " if inp.lower()==\"quit\":\n", 401 | " break\n", 402 | " results = model.predict([bow(inp, words)])[0]\n", 403 | " results_index=numpy.argmax(results)\n", 404 | " tag=classes[results_index]\n", 405 | " if(results[results_index]>0.65):\n", 406 | " \n", 407 | " for tg in intents['intents']:\n", 408 | " if tg['tag']==tag:\n", 409 | " response=tg['responses']\n", 410 | "\n", 411 | " print(\"BOT : \" +random.choice(response))\n", 412 | " else:\n", 413 | " print(\"BOT : I did not understand you! Try again\")\n", 414 | "cchat()" 415 | ] 416 | }, 417 | { 418 | "cell_type": "code", 419 | "execution_count": 1, 420 | "metadata": {}, 421 | "outputs": [ 422 | { 423 | "name": "stdout", 424 | "output_type": "stream", 425 | "text": [ 426 | "Enter your FEEDBACK: thanks,you have been helpful\n" 427 | ] 428 | } 429 | ], 430 | "source": [ 431 | "import xlsxwriter \n", 432 | " \n", 433 | "# Workbook() takes one, non-optional, argument \n", 434 | "# which is the filename that we want to create. \n", 435 | "workbook = xlsxwriter.Workbook('feedback.xlsx') \n", 436 | " \n", 437 | "# The workbook object is then used to add new \n", 438 | "# worksheet via the add_worksheet() method. \n", 439 | "worksheet = workbook.add_worksheet() \n", 440 | " \n", 441 | "# Use the worksheet object to write \n", 442 | "# data via the write() method. \n", 443 | "worksheet.write('A1', input(\"Enter your FEEDBACK: \" )) \n", 444 | "\n", 445 | " \n", 446 | "# Finally, close the Excel file \n", 447 | "# via the close() method. \n", 448 | "workbook.close() " 449 | ] 450 | }, 451 | { 452 | "cell_type": "code", 453 | "execution_count": null, 454 | "metadata": {}, 455 | "outputs": [], 456 | "source": [] 457 | }, 458 | { 459 | "cell_type": "code", 460 | "execution_count": null, 461 | "metadata": {}, 462 | "outputs": [], 463 | "source": [] 464 | }, 465 | { 466 | "cell_type": "code", 467 | "execution_count": null, 468 | "metadata": {}, 469 | "outputs": [], 470 | "source": [] 471 | }, 472 | { 473 | "cell_type": "code", 474 | "execution_count": null, 475 | "metadata": {}, 476 | "outputs": [], 477 | "source": [] 478 | }, 479 | { 480 | "cell_type": "code", 481 | "execution_count": null, 482 | "metadata": {}, 483 | "outputs": [], 484 | "source": [] 485 | }, 486 | { 487 | "cell_type": "code", 488 | "execution_count": null, 489 | "metadata": {}, 490 | "outputs": [], 491 | "source": [] 492 | }, 493 | { 494 | "cell_type": "code", 495 | "execution_count": null, 496 | "metadata": {}, 497 | "outputs": [], 498 | "source": [] 499 | } 500 | ], 501 | "metadata": { 502 | "kernelspec": { 503 | "display_name": "Python 3", 504 | "language": "python", 505 | "name": "python3" 506 | }, 507 | "language_info": { 508 | "codemirror_mode": { 509 | "name": "ipython", 510 | "version": 3 511 | }, 512 | "file_extension": ".py", 513 | "mimetype": "text/x-python", 514 | "name": "python", 515 | "nbconvert_exporter": "python", 516 | "pygments_lexer": "ipython3", 517 | "version": "3.7.3" 518 | } 519 | }, 520 | "nbformat": 4, 521 | "nbformat_minor": 2 522 | } 523 | --------------------------------------------------------------------------------