├── .gitignore ├── Dockerfile ├── LICENSE ├── README.md ├── bot.png ├── bot.py ├── dataprovider ├── __init__.py └── dataprovider.py ├── nlp ├── __init__.py └── rasa.py ├── rasa-config.json ├── rasa-data.json └── slack ├── __init__.py └── bot.py /.gitignore: -------------------------------------------------------------------------------- 1 | # RASA 2 | rasa-model 3 | rasa-unparsed.txt 4 | 5 | # Byte-compiled / optimized / DLL files 6 | __pycache__/ 7 | *.py[cod] 8 | *$py.class 9 | 10 | # C extensions 11 | *.so 12 | 13 | # Distribution / packaging 14 | .Python 15 | env/ 16 | build/ 17 | develop-eggs/ 18 | dist/ 19 | downloads/ 20 | eggs/ 21 | .eggs/ 22 | lib/ 23 | lib64/ 24 | parts/ 25 | sdist/ 26 | var/ 27 | wheels/ 28 | *.egg-info/ 29 | .installed.cfg 30 | *.egg 31 | 32 | # PyInstaller 33 | # Usually these files are written by a python script from a template 34 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 35 | *.manifest 36 | *.spec 37 | 38 | # Installer logs 39 | pip-log.txt 40 | pip-delete-this-directory.txt 41 | 42 | # Unit test / coverage reports 43 | htmlcov/ 44 | .tox/ 45 | .coverage 46 | .coverage.* 47 | .cache 48 | nosetests.xml 49 | coverage.xml 50 | *.cover 51 | .hypothesis/ 52 | 53 | # Translations 54 | *.mo 55 | *.pot 56 | 57 | # Django stuff: 58 | *.log 59 | local_settings.py 60 | 61 | # Flask stuff: 62 | instance/ 63 | .webassets-cache 64 | 65 | # Scrapy stuff: 66 | .scrapy 67 | 68 | # Sphinx documentation 69 | docs/_build/ 70 | 71 | # PyBuilder 72 | target/ 73 | 74 | # Jupyter Notebook 75 | .ipynb_checkpoints 76 | 77 | # pyenv 78 | .python-version 79 | 80 | # celery beat schedule file 81 | celerybeat-schedule 82 | 83 | # SageMath parsed files 84 | *.sage.py 85 | 86 | # dotenv 87 | .env 88 | 89 | # virtualenv 90 | .venv 91 | venv/ 92 | ENV/ 93 | 94 | # Spyder project settings 95 | .spyderproject 96 | .spyproject 97 | 98 | # Rope project settings 99 | .ropeproject 100 | 101 | # mkdocs documentation 102 | /site 103 | 104 | # mypy 105 | .mypy_cache/ 106 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017 Alex Pliutau 2 | 3 | FROM python:3 4 | 5 | ADD . / 6 | 7 | RUN pip install slackclient rasa_nlu scipy scikit-learn sklearn-crfsuite numpy spacy wolframalpha wikipedia 8 | RUN python -m spacy download en 9 | 10 | CMD [ "python", "./bot.py" ] -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 Alex Pliutau 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ### Overview 2 | 3 | Bot using RASA NLU to answer different questions. Using Wolfram Alpha to get answers. Using Wikipedia as failover. 4 | 5 | ![bot.png](https://raw.githubusercontent.com/plutov/bot/master/bot.png) 6 | 7 | [Article in my blog](http://pliutau.com/create-bot-with-nlu-in-python/) describing how it works. 8 | 9 | ### Run it with Docker 10 | 11 | - [Get Slack API Token](https://get.slack.help/hc/en-us/articles/215770388-Create-and-regenerate-API-tokens) 12 | - [Get Wolfram App ID](https://developer.wolframalpha.com/portal/myapps/) 13 | - Wikipedia API doesn't require API key. 14 | 15 | ``` 16 | docker build -t bot . && docker run -e SLACK_TOKEN= -e WOLFRAM_APP_ID= bot 17 | ``` 18 | 19 | ### Run with Python 3 20 | 21 | Install dependencies from Dockerfile and run: 22 | 23 | ``` 24 | SLACK_TOKEN= WOLFRAM_APP_ID= python3 bot.py 25 | ``` 26 | 27 | ### RASA 28 | 29 | Create initial intentions - https://rasahq.github.io/rasa-nlu-trainer/. During the Bot start we run training process based on `rasa-data.json` intentions. Later we work with messages we can parse, also Bot stores all unparsed messages so we can check them later. -------------------------------------------------------------------------------- /bot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plutov/bot/b3588df64f3bace7f62ae4528eba9f7a6fd697d0/bot.png -------------------------------------------------------------------------------- /bot.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017 Alex Pliutau 2 | 3 | import os 4 | import sys 5 | import traceback 6 | from slack.bot import SlackBot 7 | from nlp.rasa import RasaNLP 8 | from dataprovider.dataprovider import DataProvider 9 | 10 | try: 11 | dp = DataProvider(os.environ.get("WOLFRAM_APP_ID")) 12 | 13 | r = RasaNLP(dp, "rasa-config.json", "rasa-data.json", "./rasa-model") 14 | r.train() 15 | 16 | b = SlackBot(os.environ.get("SLACK_TOKEN"), r) 17 | b.start() 18 | except KeyboardInterrupt: 19 | r.snapshot_unparsed_messages("rasa-unparsed.txt") 20 | sys.exit(0) 21 | except: 22 | r.snapshot_unparsed_messages("rasa-unparsed.txt") 23 | traceback.print_exc() -------------------------------------------------------------------------------- /dataprovider/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plutov/bot/b3588df64f3bace7f62ae4528eba9f7a6fd697d0/dataprovider/__init__.py -------------------------------------------------------------------------------- /dataprovider/dataprovider.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017 Alex Pliutau 2 | 3 | import wolframalpha 4 | import wikipedia 5 | import logging 6 | 7 | class DataProvider(object): 8 | NOT_FOUND_MSG = "Sorry, I don't know this yet" 9 | 10 | def __init__(self, app_id): 11 | logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s') 12 | 13 | self.wolfram_client = wolframalpha.Client(app_id) 14 | logging.info("connected to wolfram") 15 | 16 | def get_short_answer(self, query): 17 | logging.info("searching in wolfram: {}".format(query)) 18 | 19 | try: 20 | wolfram_res = self.wolfram_client.query(query) 21 | logging.info("wolfram res: {}".format(wolfram_res)) 22 | 23 | return next(wolfram_res.results).text 24 | except: 25 | # use wikipedia as failover 26 | wikiepedia_res = wikipedia.summary(query, sentences=1) 27 | logging.info("wikipedia res: {}".format(wikiepedia_res)) 28 | if wikiepedia_res: 29 | return wikiepedia_res 30 | 31 | return self.NOT_FOUND_MSG 32 | -------------------------------------------------------------------------------- /nlp/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plutov/bot/b3588df64f3bace7f62ae4528eba9f7a6fd697d0/nlp/__init__.py -------------------------------------------------------------------------------- /nlp/rasa.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017 Alex Pliutau 2 | 3 | import random 4 | import logging 5 | from rasa_nlu.converters import load_data 6 | from rasa_nlu.config import RasaNLUConfig 7 | from rasa_nlu.model import Trainer, Metadata, Interpreter 8 | from rasa_nlu.components import ComponentBuilder 9 | 10 | class RasaNLP(object): 11 | COULD_NOT_PARSE_MSGS = [ 12 | "Sorry, I don't know it", 13 | "Next time I will know, but not now", 14 | "Sorry, can't get what do you mean", 15 | "Try something else" 16 | ] 17 | GREET_MSGS = ["Hola!", "Privet!", "Xin chào!"] 18 | INTENT_GREET = "greet" 19 | INTENTS_QUESTION = ["whatis", "howto", "when", "do"] 20 | ENTITY_QUERY = "query" 21 | 22 | def __init__(self, data_provider, config_file, data_file, model_dir): 23 | logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s') 24 | 25 | # store unparsed messages, so later we can train bot 26 | self.unparsed_messages = [] 27 | 28 | self.data_provider = data_provider 29 | self.data_file = data_file 30 | self.model_dir = model_dir 31 | self.rasa_config = RasaNLUConfig(config_file) 32 | 33 | def train(self): 34 | training_data = load_data(self.data_file) 35 | trainer = Trainer(self.rasa_config) 36 | trainer.train(training_data) 37 | 38 | self.interpreter = Interpreter.load(trainer.persist(self.model_dir), self.rasa_config) 39 | 40 | logging.info("rasa trained successfully") 41 | 42 | def parse(self, msg): 43 | return self.interpreter.parse(msg) 44 | 45 | def find_reply(self, msg): 46 | res = self.parse(msg) 47 | logging.info("rasa parse res: {}".format(res)) 48 | 49 | if not "intent" in res or res["intent"] is None: 50 | # later we can do something with unparsed messages, probably train bot 51 | self.unparsed_messages.append(msg) 52 | return random.choice(self.COULD_NOT_PARSE_MSGS) 53 | 54 | if res["intent"]["name"] == self.INTENT_GREET: 55 | return random.choice(self.GREET_MSGS) 56 | 57 | # same approach for all questions 58 | if res["intent"]["name"] in self.INTENTS_QUESTION and len(res["entities"]) > 0: 59 | for e in res["entities"]: 60 | if e["entity"] == self.ENTITY_QUERY: 61 | return self.get_short_answer(e["value"]) 62 | 63 | self.unparsed_messages.append(msg) 64 | return random.choice(self.COULD_NOT_PARSE_MSGS) 65 | 66 | def get_short_answer(self, query): 67 | return self.data_provider.get_short_answer(query) 68 | 69 | # saves unparsed messages into a file 70 | def snapshot_unparsed_messages(self, filename): 71 | with open(filename, "a") as f: 72 | for msg in self.unparsed_messages: 73 | f.write("{}\n".format(msg)) -------------------------------------------------------------------------------- /rasa-config.json: -------------------------------------------------------------------------------- 1 | { 2 | "pipeline": "spacy_sklearn" 3 | } -------------------------------------------------------------------------------- /rasa-data.json: -------------------------------------------------------------------------------- 1 | { 2 | "rasa_nlu_data": { 3 | "common_examples": [ 4 | { 5 | "text": "hey", 6 | "intent": "greet", 7 | "entities": [] 8 | }, 9 | { 10 | "text": "howdy", 11 | "intent": "greet", 12 | "entities": [] 13 | }, 14 | { 15 | "text": "hello", 16 | "intent": "greet", 17 | "entities": [] 18 | }, 19 | { 20 | "text": "hi", 21 | "intent": "greet", 22 | "entities": [] 23 | }, 24 | { 25 | "text": "What is a goroutine?", 26 | "intent": "whatis", 27 | "entities": [ 28 | { 29 | "start": 10, 30 | "end": 19, 31 | "value": "goroutine", 32 | "entity": "query" 33 | } 34 | ] 35 | }, 36 | { 37 | "text": "Who is the president of Vietnam?", 38 | "intent": "whatis", 39 | "entities": [ 40 | { 41 | "start": 11, 42 | "end": 31, 43 | "value": "president of Vietnam", 44 | "entity": "query" 45 | } 46 | ] 47 | }, 48 | { 49 | "text": "What is love?", 50 | "intent": "whatis", 51 | "entities": [ 52 | { 53 | "start": 8, 54 | "end": 12, 55 | "value": "love", 56 | "entity": "query" 57 | } 58 | ] 59 | }, 60 | { 61 | "text": "Who are dinosaurs?", 62 | "intent": "whatis", 63 | "entities": [ 64 | { 65 | "start": 8, 66 | "end": 17, 67 | "value": "dinosaurs", 68 | "entity": "query" 69 | } 70 | ] 71 | }, 72 | { 73 | "text": "What is an eclipse?", 74 | "intent": "whatis", 75 | "entities": [ 76 | { 77 | "start": 11, 78 | "end": 18, 79 | "value": "eclipse", 80 | "entity": "query" 81 | } 82 | ] 83 | }, 84 | { 85 | "text": "Who is a christian?", 86 | "intent": "whatis", 87 | "entities": [ 88 | { 89 | "start": 9, 90 | "end": 18, 91 | "value": "christian", 92 | "entity": "query" 93 | } 94 | ] 95 | }, 96 | { 97 | "text": "Who is an engineer?", 98 | "intent": "whatis", 99 | "entities": [ 100 | { 101 | "start": 10, 102 | "end": 18, 103 | "value": "engineer", 104 | "entity": "query" 105 | } 106 | ] 107 | }, 108 | { 109 | "text": "What is the population of Vietnam?", 110 | "intent": "whatis", 111 | "entities": [ 112 | { 113 | "start": 12, 114 | "end": 33, 115 | "value": "population of Vietnam", 116 | "entity": "query" 117 | } 118 | ] 119 | }, 120 | { 121 | "text": "Can you tell me something about Golang?", 122 | "intent": "whatis", 123 | "entities": [ 124 | { 125 | "start": 32, 126 | "end": 38, 127 | "value": "Golang", 128 | "entity": "query" 129 | } 130 | ] 131 | }, 132 | { 133 | "text": "Do you know who is Riki?", 134 | "intent": "whatis", 135 | "entities": [ 136 | { 137 | "start": 19, 138 | "end": 23, 139 | "value": "Riki", 140 | "entity": "query" 141 | } 142 | ] 143 | }, 144 | { 145 | "text": "Do you know what is OOP?", 146 | "intent": "whatis", 147 | "entities": [ 148 | { 149 | "start": 20, 150 | "end": 23, 151 | "value": "OOP", 152 | "entity": "query" 153 | } 154 | ] 155 | }, 156 | { 157 | "text": "Can you give me the price of Bitcoin?", 158 | "intent": "whatis", 159 | "entities": [ 160 | { 161 | "start": 20, 162 | "end": 36, 163 | "value": "price of Bitcoin", 164 | "entity": "query" 165 | } 166 | ] 167 | }, 168 | { 169 | "text": "Can you give me USD VND rate?", 170 | "intent": "whatis", 171 | "entities": [ 172 | { 173 | "start": 16, 174 | "end": 28, 175 | "value": "USD VND rate", 176 | "entity": "query" 177 | } 178 | ] 179 | }, 180 | { 181 | "text": "What is the meaning of life?", 182 | "intent": "whatis", 183 | "entities": [ 184 | { 185 | "start": 12, 186 | "end": 27, 187 | "value": "meaning of life", 188 | "entity": "query" 189 | } 190 | ] 191 | }, 192 | { 193 | "text": "Who is John Lennon?", 194 | "intent": "whatis", 195 | "entities": [ 196 | { 197 | "start": 7, 198 | "end": 18, 199 | "value": "John Lennon", 200 | "entity": "query" 201 | } 202 | ] 203 | }, 204 | { 205 | "text": "What is the biggest country in the world?", 206 | "intent": "whatis", 207 | "entities": [ 208 | { 209 | "start": 12, 210 | "end": 40, 211 | "value": "biggest country in the world", 212 | "entity": "query" 213 | } 214 | ] 215 | }, 216 | { 217 | "text": "What is the most expensive food in the world?", 218 | "intent": "whatis", 219 | "entities": [ 220 | { 221 | "start": 12, 222 | "end": 44, 223 | "value": "most expensive food in the world", 224 | "entity": "query" 225 | } 226 | ] 227 | }, 228 | { 229 | "text": "How to boil eggs?", 230 | "intent": "howto", 231 | "entities": [ 232 | { 233 | "start": 6, 234 | "end": 16, 235 | "value": " boil eggs", 236 | "entity": "query" 237 | } 238 | ] 239 | }, 240 | { 241 | "text": "How to practice yoga?", 242 | "intent": "howto", 243 | "entities": [ 244 | { 245 | "start": 7, 246 | "end": 20, 247 | "value": "practice yoga", 248 | "entity": "query" 249 | } 250 | ] 251 | }, 252 | { 253 | "text": "How to learn programming?", 254 | "intent": "howto", 255 | "entities": [ 256 | { 257 | "start": 7, 258 | "end": 24, 259 | "value": "learn programming", 260 | "entity": "query" 261 | } 262 | ] 263 | }, 264 | { 265 | "text": "When John Lennon died?", 266 | "intent": "when", 267 | "entities": [ 268 | { 269 | "start": 5, 270 | "end": 21, 271 | "value": "John Lennon died", 272 | "entity": "query" 273 | } 274 | ] 275 | }, 276 | { 277 | "text": "When Second World War started?", 278 | "intent": "when", 279 | "entities": [ 280 | { 281 | "start": 5, 282 | "end": 29, 283 | "value": "Second World War started", 284 | "entity": "query" 285 | } 286 | ] 287 | }, 288 | { 289 | "text": "Can you tell me what is the Universe?", 290 | "intent": "whatis", 291 | "entities": [ 292 | { 293 | "start": 28, 294 | "end": 36, 295 | "value": "Universe", 296 | "entity": "query" 297 | } 298 | ] 299 | }, 300 | { 301 | "text": "Can you tell me who is Bismarck Lepe?", 302 | "intent": "whatis", 303 | "entities": [ 304 | { 305 | "start": 23, 306 | "end": 36, 307 | "value": "Bismarck Lepe", 308 | "entity": "query" 309 | } 310 | ] 311 | }, 312 | { 313 | "text": "Do plants die of old age?", 314 | "intent": "do", 315 | "entities": [ 316 | { 317 | "start": 3, 318 | "end": 24, 319 | "value": "plants die of old age", 320 | "entity": "query" 321 | } 322 | ] 323 | }, 324 | { 325 | "text": "Does chewing gum really stay inside you for years?", 326 | "intent": "do", 327 | "entities": [ 328 | { 329 | "start": 5, 330 | "end": 49, 331 | "value": "chewing gum really stay inside you for years", 332 | "entity": "query" 333 | } 334 | ] 335 | }, 336 | { 337 | "text": "Do you know what is goroutine?", 338 | "intent": "whatis", 339 | "entities": [ 340 | { 341 | "start": 20, 342 | "end": 29, 343 | "value": "goroutine", 344 | "entity": "query" 345 | } 346 | ] 347 | }, 348 | { 349 | "text": "Can you tell me what is goroutine?", 350 | "intent": "whatis", 351 | "entities": [ 352 | { 353 | "start": 24, 354 | "end": 33, 355 | "value": "goroutine", 356 | "entity": "query" 357 | } 358 | ] 359 | } 360 | ] 361 | } 362 | } -------------------------------------------------------------------------------- /slack/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/plutov/bot/b3588df64f3bace7f62ae4528eba9f7a6fd697d0/slack/__init__.py -------------------------------------------------------------------------------- /slack/bot.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2017 Alex Pliutau 2 | 3 | from slackclient import SlackClient 4 | import logging 5 | import time 6 | import sys 7 | 8 | class SlackBot(object): 9 | def __init__(self, token, rasa_nlu): 10 | logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s') 11 | 12 | self.sc = SlackClient(token) 13 | self.rasa_nlu = rasa_nlu 14 | 15 | def connect(self): 16 | if self.sc.rtm_connect(): 17 | logging.info("connected to slack rtm") 18 | else: 19 | logging.error("could not connect to slack rtm") 20 | sys.exit(1) 21 | 22 | def start(self): 23 | self.connect() 24 | while True: 25 | for reply in self.sc.rtm_read(): 26 | self.input(reply) 27 | 28 | time.sleep(.1) 29 | 30 | def input(self, data): 31 | # do not handle bot messages 32 | if "type" in data and not "bot_id" in data and data["type"] == "message": 33 | self.process_msg(data) 34 | 35 | def process_msg(self, data): 36 | logging.info("received message from {}: {}".format(data["user"], data["text"])) 37 | text_to_reply = self.rasa_nlu.find_reply(data["text"]) 38 | if text_to_reply: 39 | self.send_im_msg(data["user"], text_to_reply) 40 | 41 | 42 | def send_im_msg(self, user, msg): 43 | self.sc.api_call( 44 | "chat.postMessage", 45 | channel=user, 46 | as_user="true", 47 | text=msg 48 | ) 49 | logging.info("sent message to {}: {}".format(user, msg)) --------------------------------------------------------------------------------