├── .env.sample
├── .gitignore
├── package.json
├── readme.md
├── src
    ├── auth-demo.js
    ├── chat-demo.js
    └── voiceit.js
└── tests
    └── transcribe.js


/.env.sample:
--------------------------------------------------------------------------------
1 | VOICEIT_DEV_ID=
2 | VOICEIT_CONFIDENCE=85
3 | APIAI_ACCESS_TOKEN=
4 | GCLOUD_PROJECT_ID=
5 | GCLOUD_KEY_FILENAME=
6 | PORT=1337


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | node_modules/
2 | .env
3 | bajet-51ff0f4e8aad.json


--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "maybankEcho",
 3 |   "description": "Voice Command app with Authentication",
 4 |   "author": "Sulaiman <sulaiman@derp.com.my",
 5 |   "engines": {
 6 |     "node": "5.10.0"
 7 |   },
 8 |   "dependencies": {
 9 |     "@google-cloud/speech": "^0.2.0",
10 |     "apiai": "^3.0.3",
11 |     "async": "^2.1.4",
12 |     "body-parser": "^1.15.2",
13 |     "crypto-js": "^3.1.6",
14 |     "dotenv": "^2.0.0",
15 |     "express": "^4.14.0",
16 |     "request": "^2.75.0",
17 |     "twilio": "^2.11.0"
18 |   }
19 | }
20 | 


--------------------------------------------------------------------------------
/readme.md:
--------------------------------------------------------------------------------
 1 | 
 2 | ## Twilio Echo
 3 | 
 4 | Chat bot or voice command via phone call.
 5 | 
 6 | ### Demo
 7 | 
 8 | https://www.youtube.com/watch?v=91-_yNYDetY
 9 | 
10 | ### Idea
11 | 
12 | We could use recording feature in Twilio, where we would transcribe the audio file using Google Speech. And then we would send a reply accordingly. Inspired from [VoiceIt and Twilio implementation](https://github.com/choppen5/twilioVoiceItiVR).
13 | 
14 | ### Google Speech
15 | 
16 | Twilio has their own transcription engine but I think it is not very accurate, and they are charging $0.05/min for it, plus they are only limited to recording with a duration greater than 2 seconds and less than 120 seconds. Where as Google Speech is giving free first 60 minutes per month, and they are only charging $0.006 per 15 seconds.
17 | 
18 | https://cloud.google.com/speech/pricing
19 | 
20 | ### Twilio
21 | 
22 | Aside from cost of USA phone number which is $1/month, they are charging $0.0075/min for incoming call, and $0.0025/min for recording.
23 | 
24 | Noted that they are also charging $0.0005/min/month for recording storage, but they are giving free first 10,000 minutes per month. So you might want to delete the recordings each month.
25 | 
26 | https://www.twilio.com/voice/pricing/nz
27 | 
28 | ### VoiceIt
29 | 
30 | If you want to implement voice command, you might want to add voice authentication. Unlike other providers, we can use our own phrase instead of some pre-defined words. And they are giving $5 free credits upon signup.
31 | 
32 | http://voiceit-tech.com
33 | 
34 | ### Prerequisites
35 | 
36 | 1. Twilio account, duh
37 | 
38 | 2. Google Cloud JSON key file; https://github.com/GoogleCloudPlatform/google-cloud-node#elsewhere
39 | 
40 | 3. API.AI account, for chat bot demo
41 | 
42 | 4. VoiceIt account, for voice authentication demo
43 | 
44 | ### Installation
45 | 
46 | 1. `git clone https://github.com/natsu90/twilio-echo.git`
47 | 
48 | 2. `cd twilio-echo && npm install`
49 | 
50 | 3. Set your `.env` file
51 | 
52 | 3. `node src/chat-demo.js`, or `node src/auth-demo.js` for voice authentication demo
53 | 
54 | 4. Set your Twilio Voice webhook to `your_IP_or_domain:1337/incoming_call`
55 | 
56 | ### Limitations
57 | 
58 | * Transcription not accurate sometimes, probably due to poor quality of audio, or just my poor pronounciation
59 | 
60 | * Recording URL from Twilio is not available immediately sometimes
61 | 
62 | * A little delay between chat and reply, does not matter for voice command app
63 | 
64 | ### Todo
65 | 
66 | * Delete recording automatically on transcribed chat or ended call
67 | 
68 | * Implement with Nexmo
69 | 
70 | ### License
71 | 
72 | Licensed under the [MIT license](http://opensource.org/licenses/MIT)
73 | 


--------------------------------------------------------------------------------
/src/auth-demo.js:
--------------------------------------------------------------------------------
  1 | 
  2 | /*
  3 |  * Modified from
  4 |  * https://github.com/choppen5/twilioVoiceItiVR
  5 |  */
  6 | 
  7 | require('dotenv').config();
  8 | 
  9 | var twilio     = require('twilio'),
 10 |     bodyParser = require('body-parser'),
 11 |     express    = require('express'),
 12 |     async	   = require('async'),
 13 |     voiceit    = require('./voiceit')();
 14 | 
 15 | var port = process.env.PORT || 1337;
 16 | var app = express();
 17 | 
 18 | app.use(bodyParser.json());
 19 | app.use(bodyParser.urlencoded({
 20 |   extended: true
 21 | }));
 22 | 
 23 | app.post('/incoming_call', voiceit.userRequest, function(req, res) {
 24 | 
 25 | 	var twiml = new twilio.TwimlResponse();
 26 | 
 27 | 	twiml.say('Welcome to Twilio Echo.');
 28 | 
 29 | 	async.waterfall([
 30 | 		function(callback) {
 31 | 			req.user.isExist(callback)
 32 | 		},
 33 | 		function(isExist, callback) {
 34 | 			if (isExist)
 35 | 				req.user.totalEnrollment(callback)
 36 | 			else
 37 | 				req.user.create(function(err) {
 38 | 					if (err) callback(err)
 39 | 					else {
 40 | 						twiml.say('You will be asked to say a phrase 3 times, then you will be able to log in with that phrase.')
 41 | 						callback(null, 0)
 42 | 					}
 43 | 				})
 44 | 		}
 45 | 	], function(err, enrollCount) {
 46 | 
 47 | 		if (err) {
 48 | 			twiml.say(err);
 49 | 		} else if (enrollCount >= 3) {
 50 | 			twiml.redirect('/authenticate');
 51 | 		} else {
 52 | 			twiml.redirect('/enroll?enrollCount=' + enrollCount);
 53 | 		}
 54 | 		res.send(twiml.toString());
 55 | 	});
 56 | });
 57 | 
 58 | // Enrollments
 59 | // -----------
 60 | app.post('/enroll', voiceit.userRequest, function(req, res) {
 61 |   var enrollCount = req.query.enrollCount || 0;
 62 |   var twiml       = new twilio.TwimlResponse();
 63 | 
 64 |   twiml.say('Please say the following phrase to enroll.')
 65 |   	.pause(1).say(req.user.phrase)
 66 |   	.record({
 67 | 	    action    : '/process_enrollment?enrollCount=' + enrollCount,
 68 | 	    maxLength : 5,
 69 | 	    trim      : 'do-not-trim'
 70 | 	});
 71 | 
 72 |   res.send(twiml.toString());
 73 | });
 74 | 
 75 | app.post('/process_enrollment', voiceit.userRequest, function(req, res) {
 76 | 
 77 | 	var enrollCount  = req.query.enrollCount;
 78 | 	var recordingURL = req.body.RecordingUrl;
 79 | 	var twiml = new twilio.TwimlResponse();
 80 | 
 81 | 	req.user.enroll(recordingURL, function(err, success) {
 82 | 
 83 | 		if (!err && success) {
 84 | 			enrollCount++;
 85 | 			if (enrollCount >= 3) {
 86 | 				twiml.say('Thank you, recording received. You are now enrolled. You can log in.')
 87 | 					.redirect('/authenticate');
 88 | 			} else {
 89 | 				twiml.say('Thank you, recording received. You will now be asked to record your phrase again.')
 90 | 					.redirect('/enroll?enrollCount=' + enrollCount);
 91 | 			}
 92 | 		} else {
 93 | 			twiml.say(err + '. Please try again.')
 94 |         		.redirect('/enroll?enrollCount=' + enrollCount);
 95 | 		}
 96 | 
 97 | 		res.send(twiml.toString());
 98 | 	});
 99 | });
100 | 
101 | // Authenticate
102 | // ------------
103 | app.post('/authenticate', voiceit.userRequest, function(req, res) {
104 | 	var twiml = new twilio.TwimlResponse();
105 | 
106 | 	twiml.say('Please say the following phrase to authenticate.')
107 | 		.pause(1).say(req.user.phrase)
108 | 		.record({
109 | 			action    : '/process_authentication',
110 | 			maxLength : 5,
111 | 			trim      : 'do-not-trim',
112 | 		});
113 | 
114 |   res.send(twiml.toString());
115 | });
116 | 
117 | app.post('/process_authentication', voiceit.userRequest, function(req, res) {
118 | 
119 | 	var twiml = new twilio.TwimlResponse();
120 | 	var recordingURL = req.body.RecordingUrl;
121 | 
122 | 	req.user.auth(recordingURL, function(err, success) {
123 | 
124 | 		if (err) {
125 | 			twiml.say(err);
126 | 		} else {
127 | 			if (success)
128 | 				twiml.say('Authentication is successful')
129 | 					.redirect('/start_chat');
130 | 			else
131 | 				twiml.say('Passphrase is not clear. Please try again.')
132 | 					.redirect('/authenticate');
133 | 		}
134 | 
135 | 		res.send(twiml.toString());
136 | 	});
137 | });
138 | 
139 | // Chat
140 | // ------
141 | app.post('/start_chat', (req, res) => {
142 | 
143 | 	var twiml = new twilio.TwimlResponse();
144 | 
145 | 	twiml.say('Thank you for trying this demo. Good bye!')
146 | 
147 | 	res.send(twiml.toString());
148 | });
149 | 
150 | app.listen(port);
151 | console.log('Running Voice Biometrics Server on port ' + port);
152 | 


--------------------------------------------------------------------------------
/src/chat-demo.js:
--------------------------------------------------------------------------------
  1 | 
  2 | require('dotenv').config();
  3 | 
  4 | var twilio     = require('twilio'),
  5 |     bodyParser = require('body-parser'),
  6 |     express    = require('express'),
  7 |     request = require('request'),
  8 |     speech = require('@google-cloud/speech')({
  9 |     	projectId: process.env.GCLOUD_PROJECT_ID,
 10 |   		keyFilename: process.env.GCLOUD_KEY_FILENAME
 11 |   	});
 12 | 
 13 | // Configure these settings based on the audio you're transcribing
 14 | const config = {
 15 | 		encoding: 'LINEAR16',
 16 | 		sampleRate: 8000,
 17 | 		languageCode: 'en-GB'
 18 | 	}
 19 | 
 20 | var port = process.env.PORT || 1337;
 21 | var app = express();
 22 | 
 23 | app.use(bodyParser.json());
 24 | app.use(bodyParser.urlencoded({
 25 |   extended: true
 26 | }));
 27 | 
 28 | app.post('/incoming_call', (req, res) => {
 29 | 
 30 | 	var twiml = new twilio.TwimlResponse();
 31 | 
 32 | 	console.log('Receiving call: ' + req.body.From)
 33 | 
 34 | 	twiml.say('Welcome to Twilio Echo. Please talk after each beep sound')
 35 | 		.redirect('/start_chat');
 36 | 
 37 | 	res.send(twiml.toString());
 38 | })
 39 | 
 40 | app.post('/start_chat', (req, res) => {
 41 | 
 42 | 	var twiml = new twilio.TwimlResponse(),
 43 | 		startCount = req.query.startCount || 0;
 44 | 
 45 | 	twiml.record({
 46 | 			action : '/transcribe',
 47 | 			timeout: 2, // 1 is too soon
 48 | 			maxLength: 15
 49 | 		})
 50 | 
 51 | 	// this will be executed if there is only silence otherwise ignored
 52 | 	startCount++;
 53 | 	if (startCount < 3)
 54 | 		twiml.say('Hello? Are you there?').redirect('/start_chat?startCount=' + startCount)
 55 | 
 56 | 	res.send(twiml.toString());
 57 | })
 58 | 
 59 | app.post('/transcribe', (req, res) => {
 60 | 
 61 | 	var twiml = new twilio.TwimlResponse();
 62 | 	var recordingURL = req.body.RecordingUrl;
 63 | 	// todo // validate recording url availability first
 64 | 	streamingRecognize(recordingURL, function(err, result) {
 65 | 
 66 | 		if (err) console.error(err);
 67 | 		else app.emit('event:transcribed', result);
 68 | 	});
 69 | 
 70 | 	// there is an awkward silence so lets put a dialog
 71 | 	res.send(twiml.say('hold on').redirect('/reply_chat').toString());
 72 | })
 73 | 
 74 | app.post('/reply_chat', (req, res) => {
 75 | 
 76 | 	var twiml = new twilio.TwimlResponse();
 77 | 	var sessionId = req.body.From;
 78 | 
 79 | 	app.once('event:transcribed', function(msg) {
 80 | 
 81 | 		console.log('transcribed: ', msg);
 82 | 
 83 | 		getReply(sessionId, msg, (err, reply) => {
 84 | 
 85 | 	    	console.log('reply: ' + reply)
 86 | 
 87 | 			if (err) twiml.say('Oops! Something went wrong. Please try again') 
 88 | 			else twiml.say(reply) 
 89 | 
 90 | 			twiml.redirect('/start_chat')
 91 | 
 92 | 			res.send(twiml.toString());
 93 | 		})
 94 | 	})
 95 | })
 96 | 
 97 | function getReply(sessionId, msg, callback) {
 98 | 
 99 | 	// sometimes it was silence but there is background noise so it is recorded anyway?
100 | 	if (msg.trim() == "")
101 | 		return callback(null, 'Sorry, can you say it again?')
102 | 
103 | 	var apiai = require('apiai');
104 | 
105 | 	var app = apiai(process.env.APIAI_ACCESS_TOKEN);
106 | 
107 | 	var request = app.textRequest(msg, {
108 |     	sessionId: sessionId
109 | 	});
110 | 
111 | 	request.on('response', function(response) {
112 | 	    callback(null, response.result.fulfillment.speech)
113 | 	});
114 | 
115 | 	request.on('error', function(error) {
116 | 	    callback(error)
117 | 	});
118 | 
119 | 	request.end();
120 | }
121 | 
122 | /* 
123 |  * https://github.com/GoogleCloudPlatform/nodejs-docs-samples/tree/master/speech
124 |  * https://cloud.google.com/speech/docs/best-practices
125 |  */
126 | 
127 | function syncRecognize (filename, callback) {
128 | 	speech.recognize(filename, config, callback);
129 | }
130 | 
131 | function streamingRecognize(filename, callback) {
132 | 
133 | 	const options = {
134 | 	    config: config,
135 | 		singleUtterance: true
136 | 	};
137 | 
138 | 	const recognizeStream = speech.createRecognizeStream(options)
139 | 		.on('error', callback)
140 | 		.on('data', (data) => {
141 | 	  	// console.log('Data received: %j', data);
142 | 	  	if (data.endpointerType == speech.endpointerTypes.ENDPOINTER_EVENT_UNSPECIFIED)
143 | 	  		callback(null, data.results)
144 | 		});
145 | 
146 | 	request(filename).pipe(recognizeStream)
147 | }
148 | 
149 | app.listen(port);
150 | console.log('Running Voice Chat Bot Server on port ' + port);
151 | 


--------------------------------------------------------------------------------
/src/voiceit.js:
--------------------------------------------------------------------------------
  1 | 
  2 | require('dotenv').config();
  3 | 
  4 | const apiUrl = 'https://siv.voiceprintportal.com/sivservice/api'
  5 | var SHA256     = require('crypto-js/sha256'),
  6 | 	request    = require('request');
  7 | 
  8 | var headers = {
  9 |     'VsitDeveloperId' : process.env.VOICEIT_DEV_ID,
 10 |     'PlatformID'      : '23'
 11 | }
 12 | 
 13 | function Voiceit() {
 14 | 
 15 | 	return {
 16 | 		userRequest: function(req, res, next) {
 17 | 			userRequest(req, res, next);
 18 | 		}
 19 | 	};
 20 | }
 21 | 
 22 | function User(phoneNumber) {
 23 | 
 24 | 	// todo // randomly choose from available phrases then assign to the phone number
 25 | 	var data = {
 26 | 		language : 'en-GB',
 27 | 		phrase   : 'Never forget tomorrow is a new day.',
 28 | 		number   : phoneNumber,
 29 | 		headers	 : headers
 30 |    	};
 31 | 
 32 |    	data.headers.VsitEmail = phoneNumber + '@twiliobioauth.example.com';
 33 |    	data.headers.VsitPassword = SHA256(phoneNumber).toString();
 34 | 
 35 |    	return data;
 36 | }
 37 | 
 38 | function userRequest(req, res, next) {
 39 | 
 40 | 	if (typeof req.body.From == 'undefined')
 41 | 		return next();
 42 | 
 43 | 	var user = User(req.body.From);
 44 | 
 45 |    	user.isExist = function(cb) {
 46 |    		isUserExist(user.number, cb);
 47 |    	}
 48 | 
 49 |    	user.totalEnrollment = function(cb) {
 50 |    		getTotalEnrollment(user.number, cb);
 51 |    	}
 52 | 
 53 |    	user.create = function(cb) {
 54 |    		createUser(user.number, cb)
 55 |    	}
 56 | 
 57 |    	user.enroll = function(url, cb) {
 58 |    		enrollUser(user.number, url, cb)
 59 |    	}
 60 | 
 61 |    	user.auth = function(url, cb) {
 62 |    		authUser(user.number, url, cb)
 63 |    	}
 64 | 
 65 | 	req.user = user;
 66 | 	next();
 67 | }
 68 | 
 69 | function isUserExist(phoneNumber, callback) {
 70 | 
 71 | 	var user = User(phoneNumber),
 72 | 		options = {
 73 | 		    url: apiUrl + '/users',
 74 | 		    headers: user.headers
 75 | 		}
 76 | 	
 77 | 	request(options, function (error, response, body) {
 78 | 
 79 | 		if (error) return callback(error)
 80 | 
 81 | 		body = JSON.parse(body)
 82 | 
 83 | 		switch(response.statusCode) {
 84 | 			case 200:
 85 | 				return callback(null, true);
 86 | 			case 412:
 87 | 				if (body.ResponseCode == 'UNF')
 88 | 					return callback(null, false);
 89 | 				return callback(body.Result);
 90 | 			default:
 91 | 				return callback(response.statusCode);
 92 | 		}
 93 | 	});
 94 | }
 95 | 
 96 | function getTotalEnrollment(phoneNumber, callback) {
 97 | 
 98 | 	var user = User(phoneNumber),
 99 | 		options = {
100 | 		    url: apiUrl + '/enrollments',
101 | 		    headers: user.headers
102 | 		}
103 | 
104 | 	request(options, function (error, response, body) {
105 | 
106 | 		if (error) return callback(error)
107 | 
108 | 		body = JSON.parse(body)
109 | 
110 | 		switch(response.statusCode) {
111 | 			case 200:
112 | 				return callback(null, body.Result.length);
113 | 			case 412:
114 | 				return callback(body.Result);
115 | 			default:
116 | 				return callback(response.statusCode);
117 | 		}
118 | 	});
119 | }
120 | 
121 | function createUser(phoneNumber, callback) {
122 | 
123 | 	var user = User(phoneNumber),
124 | 		options = {
125 | 		    url: apiUrl + '/users',
126 | 		    headers: user.headers
127 | 		};
128 | 	options.headers.VsitFirstName = 'First' + user.number
129 | 	options.headers.VsitLastName = 'Last' + user.number
130 | 	options.headers.VsitPhone1 = user.number
131 | 
132 | 	request.post(options, function (error, response, body) {
133 | 
134 | 		if (error) return callback(error)
135 | 
136 | 		body = JSON.parse(body)
137 | 
138 | 		switch(response.statusCode) {
139 | 			case 200:
140 | 				return callback(null, true);
141 | 			case 412:
142 | 				if (body.ResponseCode == 'UAE')
143 | 					return callback(null, false)
144 | 				return callback(body.Result);
145 | 			default:
146 | 				return callback(response.statusCode);
147 | 		}
148 | 	});
149 | }
150 | 
151 | function enrollUser(phoneNumber, wavUrl, callback) {
152 | 
153 | 	var user = User(phoneNumber),
154 | 		options = {
155 | 		    url: apiUrl + '/enrollments/bywavurl',
156 | 		    headers: user.headers
157 | 		};
158 | 	options.headers.VsitwavURL = wavUrl
159 | 	options.headers.ContentLanguage = user.language
160 | 
161 | 	request.post(options, function (error, response, body) {
162 | 
163 | 		if (error) return callback(error)
164 | 
165 | 		body = JSON.parse(body)
166 | 
167 | 		switch(response.statusCode) {
168 | 			case 200:
169 | 				if (body.ResponseCode == 'SUC')
170 | 					return callback(null, true);
171 | 				return callback(body.Result);
172 | 			case 412:
173 | 				return callback(body.Result);
174 | 			default:
175 | 				return callback(response.statusCode);
176 | 		}
177 | 	});
178 | }
179 | 
180 | function authUser(phoneNumber, wavUrl, callback) {
181 | 
182 | 	var user = User(phoneNumber),
183 | 		options = {
184 | 		    url: apiUrl + '/authentications/bywavurl',
185 | 		    headers: user.headers
186 | 		};
187 | 	options.headers.VsitwavURL = wavUrl
188 | 	options.headers.ContentLanguage = user.language
189 | 	options.headers.VsitConfidence = process.env.VOICEIT_CONFIDENCE
190 | 
191 | 	request.post(options, function (error, response, body) {
192 | 
193 | 		if (error) return callback(error)
194 | 
195 | 		body = JSON.parse(body)
196 | 
197 | 		switch(response.statusCode) {
198 | 			case 200:
199 | 				if (body.ResponseCode == 'SUC')
200 | 					return callback(null, true);
201 | 				else if (body.Result.indexOf('Not confident') >= 0)
202 | 					return callback(null, false);
203 | 				return callback(body.Result);
204 | 			case 412:
205 | 				return callback(body.Result);
206 | 			default:
207 | 				return callback(response.statusCode);
208 | 		}
209 | 	});
210 | }
211 | 
212 | module.exports = Voiceit;
213 | 
214 | 


--------------------------------------------------------------------------------
/tests/transcribe.js:
--------------------------------------------------------------------------------
 1 | 
 2 | require('dotenv').config();
 3 | 
 4 | var speech = require('@google-cloud/speech')({
 5 |     	projectId: process.env.GCLOUD_PROJECT_ID,
 6 |   		keyFilename: process.env.GCLOUD_KEY_FILENAME
 7 |   	}),
 8 | 	request = require('request')
 9 | 
10 | // Configure these settings based on the audio you're transcribing
11 | const config = {
12 | 		encoding: 'LINEAR16',
13 | 		sampleRate: 8000,
14 | 		languageCode: 'en-GB'
15 | 	}
16 | 
17 | var recordingURL = process.argv[2]
18 | 
19 | streamingRecognize(recordingURL, function(err, result) {
20 | 
21 | 	if (err) throw err;
22 | 	else console.log(result);
23 | })
24 | 
25 | /* 
26 |  * https://github.com/GoogleCloudPlatform/nodejs-docs-samples/tree/master/speech
27 |  */
28 | 
29 | function syncRecognize (filename, callback) {
30 | 	speech.recognize(filename, config, callback);
31 | }
32 | 
33 | function streamingRecognize(filename, callback) {
34 | 
35 | 	const options = {
36 | 	    config: config,
37 | 		singleUtterance: true
38 | 	};
39 | 
40 | 	const recognizeStream = speech.createRecognizeStream(options)
41 | 		.on('error', callback)
42 | 		.on('data', (data) => {
43 | 	  	// console.log('Data received: %j', data);
44 | 	  	if (data.endpointerType == speech.endpointerTypes.ENDPOINTER_EVENT_UNSPECIFIED)
45 | 	  		callback(null, data.results)
46 | 		});
47 | 
48 | 	request(filename).pipe(recognizeStream)
49 | }
50 | 
51 | 


--------------------------------------------------------------------------------