├── bot.gif ├── .gitignore ├── server └── db │ └── TextMessage.js ├── package.json ├── seed.js ├── createData.js ├── index.js ├── README.md └── functions.js /bot.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dorcheng/dorisbot/HEAD/bot.gif -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | node_modules 2 | data 3 | messagePairs.json 4 | .DS_Store 5 | npm-debug.log 6 | -------------------------------------------------------------------------------- /server/db/TextMessage.js: -------------------------------------------------------------------------------- 1 | const Sequelize = require('sequelize'); 2 | const db = new Sequelize( 3 | process.env.DATABASE_URL || 'postgres://localhost:5432/dorisbot', { 4 | logging: false 5 | } 6 | ); 7 | 8 | const TextMessage = db.define('textMessage', { 9 | message: { 10 | type: Sequelize.TEXT 11 | }, 12 | response: { 13 | type: Sequelize.TEXT 14 | } 15 | }); 16 | 17 | module.exports = TextMessage; 18 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "dorisbot", 3 | "version": "1.0.0", 4 | "description": "An intelligent chatbot", 5 | "main": "index.js", 6 | "scripts": { 7 | "start": "node server", 8 | "test": "echo \"Error: no test specified\" && exit 1", 9 | "seed": "node seed.js" 10 | }, 11 | "author": "Doris Cheng", 12 | "license": "ISC", 13 | "dependencies": { 14 | "axios": "^0.16.2", 15 | "body-parser": "^1.18.2", 16 | "express": "^4.15.4", 17 | "fast-levenshtein": "^2.0.6", 18 | "js-yaml": "^3.10.0", 19 | "node-wit": "^4.3.0", 20 | "pg": "^7.3.0", 21 | "pg-hstore": "^2.3.2", 22 | "request": "^2.82.0", 23 | "sequelize": "^4.10.3" 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /seed.js: -------------------------------------------------------------------------------- 1 | const TextMessage = require('./server/db/TextMessage.js'); 2 | const messagePairs = require('./messagePairs.json'); 3 | 4 | let rows = []; 5 | 6 | const keys = Object.keys(messagePairs); 7 | for (var i = 0; i < keys.length; i++) { 8 | let entry = { 9 | message: keys[i], 10 | response: messagePairs[keys[i]] 11 | }; 12 | rows.push(entry); 13 | } 14 | 15 | const seed = () => 16 | Promise.all(rows.map(row => TextMessage.create(row))); 17 | 18 | TextMessage.sync({force: true}) 19 | .then(() => { 20 | return seed(); 21 | }) 22 | .then(() => { 23 | process.exit(0); 24 | }) 25 | .catch(err => { 26 | console.error(err.stack); 27 | process.exit(1); 28 | }); 29 | -------------------------------------------------------------------------------- /createData.js: -------------------------------------------------------------------------------- 1 | const fs = require('fs'); 2 | var yaml = require('js-yaml'); 3 | var levenshtein = require('fast-levenshtein'); 4 | 5 | // Declare document and messagePairs 6 | let doc; 7 | let messagePairs = {}; 8 | 9 | // Parse yaml document of 30309 text messages (896249 words; two years of data) and convert into json object 10 | try { 11 | doc = yaml.safeLoad(fs.readFileSync('./data/textData.yaml', 'utf8')); 12 | doc = doc.sms.slice(0, 30309); 13 | } catch (e) { 14 | console.log(e); 15 | } 16 | 17 | let newKey = ''; // key is the message 18 | let newValue = ''; // value is response 19 | let prevType = 0; 20 | 21 | // Get rid of strings that are similar to size down data set 22 | for (var i = 0; i < doc.length; i++) { 23 | let currType = doc[i]._type; 24 | let currBody = doc[i]._body; 25 | 26 | if (currType === 1) { 27 | //if doris just finished replying 28 | if (prevType === 2) { 29 | //add key + value to your dict 30 | let add = true; 31 | let keys = Object.keys(messagePairs); 32 | for (var j = 0; j < keys.length; j++) { 33 | let distance = levenshtein.get(keys[j], newKey); 34 | let level = distance / Math.max(keys[j].length, newKey.length); 35 | if (level < 0.25) { 36 | add = false; 37 | break; 38 | } 39 | } 40 | if (add) { 41 | messagePairs[newKey] = newValue; 42 | } 43 | //reset newKey and value 44 | newKey = ''; 45 | newValue = ''; 46 | } 47 | newKey = `${newKey} ${currBody}`; 48 | } 49 | if (currType === 2) { 50 | newValue = `${newValue} ${currBody}`; 51 | } 52 | prevType = currType; 53 | } 54 | 55 | // write results into file with json format 56 | fs.writeFile('./data/messagePairs.json', JSON.stringify(messagePairs, null, 4), (err) => { 57 | if (err) { 58 | console.error(err); 59 | return; 60 | } 61 | console.log('Yay! Successfully created file'); 62 | }); 63 | -------------------------------------------------------------------------------- /index.js: -------------------------------------------------------------------------------- 1 | 'use strict'; 2 | 3 | const express = require('express'); 4 | const bodyParser = require('body-parser'); 5 | const handleMessage = require('./functions.js').handleMessage; 6 | const sendTextMessage = require('./functions.js').sendTextMessage; 7 | const app = express(); 8 | 9 | app.set('port', (process.env.PORT || 5000)); 10 | app.use(bodyParser.urlencoded({extended: false})); 11 | app.use(bodyParser.json()); 12 | 13 | app.get('/', function (req, res) { 14 | res.send('Hello world, I am DorisBot'); 15 | }); 16 | 17 | // Facebook verification 18 | app.get('/webhook/', function (req, res) { 19 | if (req.query['hub.verify_token'] === process.env.FB_VERIFY_TOKEN) { 20 | res.send(req.query['hub.challenge']); 21 | } 22 | res.send('Error, wrong token'); 23 | }); 24 | 25 | app.post('/webhook', function (req, res) { 26 | var data = req.body; 27 | 28 | // Make sure this is a page subscription 29 | if (data.object === 'page') { 30 | 31 | // Iterate over each entry 32 | data.entry.forEach(function(entry) { 33 | 34 | // Iterate over each messaging event 35 | entry.messaging.forEach(function(event) { 36 | if (event.message) { 37 | receivedMessage(event); 38 | } else { 39 | console.log('Webhook received unknown event: ', event); 40 | } 41 | }); 42 | }); 43 | 44 | res.sendStatus(200); 45 | } 46 | }); 47 | 48 | function receivedMessage(event) { 49 | var senderID = event.sender.id; 50 | var message = event.message; 51 | var nlp = message.nlp.entities; 52 | 53 | var messageText = message.text; 54 | var messageAttachments = message.attachments; 55 | 56 | if (messageText) { 57 | handleMessage(senderID, messageText, nlp); 58 | 59 | } else if (messageAttachments) { 60 | sendTextMessage(senderID, 'Message with attachment received'); 61 | } 62 | } 63 | 64 | // Start server 65 | app.listen(app.get('port'), function() { 66 | console.log('running on port', app.get('port')); 67 | }); 68 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Meet DorisBot 2 | 3 | DorisBot is a smart chatbot that was built with Wit.ai and Messenger Platform. To make things more interesting, she was built with the goal of speaking like Doris Cheng (me!). 4 | 5 |
6 |

7 | 8 |

9 |
10 | 11 | # Natural Language Processing 12 | 13 | Natural language processing is the ability of a computer program to understand human speech as it is spoken. Wit.ai is a natural language processing tool that provides bot makers with the ability to build models using entities and values. Entities categorizes user intent and values are the associated values. DorisBot uses seven main entities: greeting, goodbye, question, hobbies, currently_doing, schedule_food_date, schedule_hangout. Every time a new input gets sent to her, Wit.ai will process it and respond with a JSON object that contains a confidence value. The confidence value shows how sure or confident Wit is, that it extracted the entity correctly. 14 | 15 | 16 | # How DorisBot chooses what to respond with 17 | 18 | The goal for DorisBot was to increase the percentage that she would respond with an actual message rather than the default message, whenever she receives a new input Wit cannot categorize into an entity. To add a personal touch (and because I did not want to continue manually adding entities to account for every single edge case), I fed DorisBot two years worth of text messages to make DorisBot sound like me. I converted my text messages (originally in XML format) into JSON, and mapped everything so that every message had a corresponding reply from me (key-value message-response pairs). To determine what DorisBot would reply with, I used an algorithm called the Levenshtein distance, which calculates how different two strings are. Whenever there is a new user input, DorisBot will first be processed by Wit.ai. If Wit fails to process it, DorisBot will fall back to checking my text messages and using the Levenshtein distance to see if there is a similar key in my dictionary of key-value pairs. If there is a similar key, DorisBot will respond with that key's value. Otherwise, DorisBot will respond with the default response. 19 | 20 | 21 | -------------------------------------------------------------------------------- /functions.js: -------------------------------------------------------------------------------- 1 | const messagePairs = require('./messagePairs.json'); 2 | const levenshtein = require('fast-levenshtein'); 3 | const request = require('request'); 4 | 5 | // Returns single entity 6 | function returnEntity(entities, name) { 7 | if (entities[name]) { 8 | return entities[name][0]; 9 | } else { 10 | return null; 11 | } 12 | } 13 | 14 | // Handles message based on entity and text messages 15 | function handleMessage(recipientId, message, entities) { 16 | const greeting = returnEntity(entities, 'greeting'); 17 | const goodbye = returnEntity(entities, 'goodbye'); 18 | const question = returnEntity(entities, 'question'); 19 | const hobbies = returnEntity(entities, 'hobbies'); 20 | const currentlydoing = returnEntity(entities, 'currently_doing'); 21 | const planFoodDate = returnEntity(entities, 'schedule_food_date'); 22 | const planHangout = returnEntity(entities, 'schedule_hangout'); 23 | 24 | 25 | // Checks if each entity exists and entity confidence > 0.8 26 | if (greeting && greeting.confidence > 0.8) { 27 | 28 | //Response choices 29 | const responses = ['hi!!', 'hey! hows it going?', 'whats up']; 30 | 31 | // Randomize response 32 | let index = Math.floor(Math.random() * (responses.length + 1)); 33 | sendTextMessage(recipientId, responses[index]); 34 | } 35 | 36 | else if (goodbye && goodbye.confidence > 0.8) { 37 | const responses = ['bye!!', 'okay :( byee', 'see you later!', 'talk to ya soon', 'see ya soon']; 38 | let index = Math.floor(Math.random() * (responses.length + 1)); 39 | sendTextMessage(recipientId, responses[index]); 40 | } 41 | 42 | else if (question && question.confidence > 0.8 && hobbies && hobbies.confidence > 0.8) { 43 | const responses = ['I like hiking a lot', 'hmm roadtripping!! and going on food adventures', 'biking down steep hills', 'I like doing artsy things like arts & crafts and DIY stuff']; 44 | let index = Math.floor(Math.random() * (responses.length + 1)); 45 | sendTextMessage(recipientId, responses[index]); 46 | } 47 | 48 | else if (currentlydoing && currentlydoing.confidence > 0.8) { 49 | if (currentlydoing.value === 'current_thought') { 50 | sendTextMessage(recipientId, 'how to finish my code'); 51 | } 52 | if (currentlydoing.value === 'current_activity'){ 53 | sendTextMessage(recipientId, 'coding lol'); 54 | } 55 | } 56 | 57 | else if (planFoodDate && planFoodDate.confidence > 0.8) { 58 | const responses = ['okok', 'suree, when?', 'kk where do you wanna eat', 'YES!! where?']; 59 | let index = Math.floor(Math.random() * (responses.length + 1)); 60 | sendTextMessage(recipientId, responses[index]); 61 | } 62 | 63 | else if (planHangout && planHangout.confidence > 0.8) { 64 | const responses = ['okok', 'suree, when?', 'kk where do you wanna eat', 'YES!! where?']; 65 | let index = Math.floor(Math.random() * (responses.length + 1)); 66 | sendTextMessage(recipientId, responses[index]); 67 | } 68 | 69 | else { 70 | // Apply text message history to check similarity of input 71 | let result = checkSimilarity(message); 72 | sendTextMessage(recipientId, result); 73 | } 74 | } 75 | 76 | // Format reply message 77 | function sendTextMessage(recipientId, messageText) { 78 | var messageData = { 79 | recipient: { 80 | id: recipientId 81 | }, 82 | message: { 83 | text: messageText 84 | } 85 | }; 86 | 87 | callSendAPI(messageData); 88 | } 89 | 90 | // Check similarity of two strings by using levenshtein distance 91 | function checkSimilarity(input) { 92 | const keys = Object.keys(messagePairs); 93 | let lowestLvl = null; 94 | let lowestLvlKey = null; 95 | for (var k = 0; k < keys.length; k++) { 96 | let distance = levenshtein.get(keys[k], input); 97 | let level = distance / Math.max(keys[k].length, input.length); 98 | if (level <= 0.20) { 99 | return messagePairs[keys[k]]; 100 | } 101 | if (lowestLvl === null || lowestLvl > level) { 102 | lowestLvl = level; 103 | lowestLvlKey = keys[k]; 104 | } 105 | } 106 | if (lowestLvl > 0.25) { 107 | return 'not sure what you mean o.o'; 108 | } 109 | return messagePairs[lowestLvlKey]; 110 | } 111 | 112 | function callSendAPI(messageData) { 113 | request({ 114 | uri: 'https://graph.facebook.com/v2.6/me/messages', 115 | qs: { access_token: process.env.FB_PAGE_ACCESS_TOKEN }, 116 | method: 'POST', 117 | json: messageData 118 | 119 | }, function (error, response, body) { 120 | if (!error && response.statusCode == 200) { 121 | var recipientId = body.recipient_id; 122 | var messageId = body.message_id; 123 | 124 | console.log('Successfully sent generic message with id %s to recipient %s', 125 | messageId, recipientId); 126 | } else { 127 | console.error('Unable to send message.'); 128 | console.error(response); 129 | console.error(error); 130 | } 131 | }); 132 | } 133 | 134 | module.exports = { 135 | handleMessage, 136 | sendTextMessage 137 | }; 138 | --------------------------------------------------------------------------------