├── .gitignore ├── 88877_DingLing.wav ├── main.js ├── package.json └── readme.md /.gitignore: -------------------------------------------------------------------------------- 1 | node_modules 2 | xdk 3 | *.xdk 4 | *.xdke 5 | .DS_Store 6 | -------------------------------------------------------------------------------- /88877_DingLing.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Losant/example-edison-echo/8ebd28be160d2ffb767d2dcccb9104cf180c845a/88877_DingLing.wav -------------------------------------------------------------------------------- /main.js: -------------------------------------------------------------------------------- 1 | /*jslint node:true, vars:true, bitwise:true, unparam:true */ 2 | /*jshint unused:true */ 3 | 4 | // load required modules 5 | var async = require('async'); // helps control asynchronous flow 6 | var path = require('path'); // utility for handling file paths 7 | var exec = require('child_process').exec; // runs a command in a shell and buffers the output 8 | var spawn = require('child_process').spawn; // launches a child process 9 | var request = require('request'); // http request client 10 | var watson = require('watson-developer-cloud'); // IBM Watson services client 11 | var five = require('johnny-five'); // robotics programming framework 12 | var Edison = require('edison-io'); // edison IO library 13 | var numify = require('numstr').numify; // english number utility 14 | 15 | // globals 16 | var led = null; // reference to led object 17 | var working = false; // keeps track of if we are already working on a command 18 | 19 | // initialize watson text-to-speech service 20 | var textToSpeech = watson.text_to_speech({ 21 | username: '', 22 | password: '', 23 | version: 'v1' 24 | }); 25 | 26 | // initialize watson speech-to-text service 27 | var speechToText = watson.speech_to_text({ 28 | username: '', 29 | password: '', 30 | version: 'v1' 31 | }); 32 | 33 | // accepts a string and reads it aloud 34 | function tts (text, cb) { 35 | // build tts parameters 36 | var params = { 37 | text: text, 38 | accept: 'audio/wav' 39 | }; 40 | // create gtstreamer child process to play audio 41 | // "fdsrc fd=0" says file to play will be on stdin 42 | // "wavparse" processes the file as audio/wav 43 | // "pulsesink" sends the audio to the default pulse audio sink device 44 | var gst = exec('gst-launch-1.0 fdsrc fd=0 ! wavparse ! pulsesink', function (err) { 45 | if (err) { return cb(err); } 46 | cb(); 47 | }); 48 | // use watson and pipe the text-to-speech results directly to gst 49 | textToSpeech.synthesize(params).pipe(gst.stdin); 50 | } 51 | 52 | // listens for audio then returns text 53 | function stt (cb) { 54 | var duration = 5000; 55 | console.log('listening for %s ms ...', duration); 56 | // create an arecord child process to record audio 57 | var arecord = spawn('arecord', ['-D', 'hw:2,0', '-t', 'wav', '-f', 'dat']); 58 | // build stt params using the stdout of arecord as the audio source 59 | var params = { 60 | audio: arecord.stdout, 61 | content_type: 'audio/wav', 62 | continuous: true // listen for audio the full 5 seconds 63 | }; 64 | // use watson to get answer text 65 | speechToText.recognize(params, function (err, res) { 66 | if (err) { return cb(err); } 67 | var text = ''; 68 | try { 69 | text = res.results[0].alternatives[0].transcript; 70 | } catch (e) { } 71 | console.log('you said: "%s"', text); 72 | cb(null, text.trim()); 73 | }); 74 | // record for duration then kill the child process 75 | setTimeout(function () { 76 | arecord.kill('SIGINT'); 77 | }, duration); 78 | } 79 | 80 | // plays a local wav file 81 | function playWav (file, cb) { 82 | var filePath = path.resolve(__dirname, file); 83 | // create gtstreamer child process to play audio 84 | // "filesrc location=" says use a file at the location as the src 85 | // "wavparse" processes the file as audio/wav 86 | // "volume" sets the output volume, accepts value 0 - 1 87 | // "pulsesink" sends the audio to the default pulse audio sink device 88 | exec('gst-launch-1.0 filesrc location=' + filePath + ' ! wavparse ! volume volume=0.25 ! pulsesink', function (err) { 89 | return cb(err); 90 | }); 91 | } 92 | 93 | 94 | // initialize edison board 95 | var board = new five.Board({ 96 | io: new Edison(), 97 | repl: false // we don't need the repl for this project 98 | }); 99 | 100 | // when the board is ready, listen for a button press 101 | board.on('ready', function() { 102 | var button = new five.Button(4); 103 | led = new five.Led(6); 104 | led.off(); 105 | button.on('press', main); 106 | }); 107 | 108 | 109 | // main function 110 | function main() { 111 | if (working) { return; } 112 | working = true; 113 | async.waterfall([ 114 | async.apply(playWav, '88877_DingLing.wav'), 115 | listen, 116 | search, 117 | speak 118 | ], finish); 119 | } 120 | 121 | // handle any errors clear led and working flag 122 | function finish (err) { 123 | if (err) { 124 | tts('Oops, something went wrong and I was unable to complete your request.'); 125 | console.log(err); 126 | } 127 | // stop blinking and turn off 128 | led.stop().off(); 129 | working = false; 130 | } 131 | 132 | // listen for the audio input 133 | function listen (cb) { 134 | // turn on the led 135 | led.on(); 136 | stt(cb); 137 | } 138 | 139 | // perform a search using the duckduckgo instant answer api 140 | function search (q, cb) { 141 | if (!q) { 142 | return cb(null, 'I\'m sorry I didn\'t hear you.'); 143 | } 144 | // blick the led every 100 ms 145 | led.blink(100); 146 | // run the query through numify for better support of calculations in duckduckgo 147 | q = numify(q); 148 | console.log('searching for: %s', q); 149 | var requestOptions = { 150 | url: 'https://api.duckduckgo.com/', 151 | accept: 'application/json', 152 | qs: { 153 | q: q, 154 | format: 'json', 155 | no_html: 1, 156 | skip_disambig: 1 157 | } 158 | }; 159 | request(requestOptions, function (err, res, body) { 160 | if (err) { return cb(err); } 161 | var result = JSON.parse(body); 162 | var text = 'I\'m sorry, I was unable to find any information on ' + q; // default response 163 | if (result.Answer) { 164 | text = result.Answer; 165 | } else if (result.Definition) { 166 | text = result.Definition; 167 | } else if (result.AbstractText) { 168 | text = result.AbstractText; 169 | } 170 | cb(null, text); 171 | }); 172 | } 173 | 174 | // read the search results 175 | function speak (text, cb) { 176 | // stop blinking and turn off 177 | led.stop().off(); 178 | if (!text) { return cb(); } 179 | tts(text, cb); 180 | } 181 | 182 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "example-edison-echo", 3 | "description": "Amazon Echo Clone with IBM Watson and Intel Edison", 4 | "version": "1.0.0", 5 | "main": "main.js", 6 | "engines": { 7 | "node": ">=0.10.0" 8 | }, 9 | "dependencies": { 10 | "johnny-five": "~0.9.18", 11 | "edison-io": "~0.9.1", 12 | "async": "~1.5.2", 13 | "watson-developer-cloud": "~1.2.1", 14 | "request": "~2.67.0", 15 | "numstr": "~1.0.1" 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | # How to Build an Amazon Echo Clone with IBM Watson and Intel Edison 2 | 3 | ## Overview 4 | In this project we will create an Amazon Echo clone based on the Intel Edison hardware and IBM Watson platform. During the lab we will covering the following topics: 5 | * Capturing audio with a USB microphone. 6 | * Sending audio to a Bluetooth speaker. 7 | * Using [Johnny-Five](https://johnny-five.io) to interface with the Edison's IO. 8 | * Using IBM's Watson Speech-to-Text and Text-to-Speech services. 9 | 10 | ## Getting Started 11 | What you'll need to complete this project: 12 | * An [Intel Edison with Arduino Expansion Board](https://software.intel.com/en-us/iot/hardware/edison) 13 | * USB microphone (I used an [Audio-Technica AT2020 USB](http://www.amazon.com/s/ref=nb_sb_noss?url=search-alias%3Daps&field-keywords=AT2020+USB).) 14 | * Bluetooth speaker (I used an [Oontz Angle](http://smile.amazon.com/s/ref=nb_sb_noss_2?url=search-alias%3Daps&field-keywords=oontz+angle&rh=i%3Aaps%2Ck%3Aoontz+angle).) 15 | * An IBM Bluemix Account - [Bluemix Registration](https://console.ng.bluemix.net/registration/) 16 | * A working knowledge of [Node.js](https://nodejs.org) 17 | 18 | If you haven't already done so, you'll need to setup your Edison and get the latest firmware flashed. You can follow our quick article on [Getting Started with the Intel Edison](https://www.losant.com/blog/getting-started-with-the-intel-edison) or check out [Intel's Getting Started Guide](https://software.intel.com/en-us/iot/library/edison-getting-started). 19 | 20 | NOTE: I'm using the [Intel XDK IoT Edition](https://software.intel.com/en-us/iot/software/ide/intel-xdk-iot-edition) because it makes debugging and uploading code to the board very easy. To learn more about the IDE and how to get started using it check out [Getting Started with the Intel XDK IoT Edition](https://software.intel.com/en-us/getting-started-with-the-intel-xdk-iot-edition). It is not required for this project though. 21 | 22 | ## Connect Bluetooth Speaker 23 | Establish a terminal to your Edison using either of the guides above. 24 | 25 | Make your Bluetooth device discoverable. In my case I needed to push the pair button on the back of the speaker. 26 | 27 | In the terminal to your board type the following: 28 | ``` 29 | root@edison:~# rfkill unblock bluetooth 30 | root@edison:~# bluetoothctl 31 | [bluetooth] scan on 32 | ``` 33 | 34 | This starts the Bluetooth Manager on the Edison and starts scanning for devices. The results should look something like: 35 | ``` 36 | Discovery started 37 | [CHG] Controller 98:4F:EE:06:06:05 Discovering: yes 38 | [NEW] Device A0:E9:DB:08:54:C4 OontZ Angle 39 | ``` 40 | 41 | Find your device in the list and pair to it. 42 | ``` 43 | [bluetooth] pair A0:E9:DB:08:54:C4 44 | ``` 45 | 46 | In some cases, the device may need to connect as well. 47 | ``` 48 | [bluetooth] connect A0:E9:DB:08:54:C4 49 | ``` 50 | 51 | Exit the Bluetooth Manager. 52 | ``` 53 | [bluetooth] quit 54 | ``` 55 | 56 | Let's verify that your device is recognized in pulse audio: 57 | ``` 58 | root@edison:~# pactl list sinks short 59 | ``` 60 | 61 | If all is good, you should see your device listed as a sink device and the name should start with `bluez_sink` like the example output below. 62 | ``` 63 | 0 alsa_output.platform-merr_dpcm_dummy.0.analog-stereo module-alsa-card.c s16le 2ch 48000Hz SUSPENDED 64 | 1 alsa_output.0.analog-stereo module-alsa-card.c s16le 2ch 44100Hz SUSPENDED 65 | 2 bluez_sink.A0_E9_DB_08_54_C4 module-bluez5-device.c s16le 2ch 44100Hz SUSPENDED 66 | ``` 67 | 68 | Now let's set our Bluetooth device as the default sink for the pulse audio server: 69 | ``` 70 | root@edison:~# pactl set-default-sink bluez_sink.A0_E9_DB_08_54_C4 71 | ``` 72 | 73 | ## Connect USB Microphone 74 | The Edison has two USB modes: host mode and device mode. To use a USB microphone you'll need to switch the Edison into host mode by flipping the microswitch, located between the standard sized USB port and the micro USB port, towards the large USB port. You will also need to power the Edison with an [external DC power supply](http://www.digikey.com/product-detail/en/EMSA120150-P5P-SZ/T1091-P5P-ND/2352085) and not through the micro USB. 75 | 76 | Then simply plug your microphone in the large USB port. 77 | 78 | Let's make sure the Edison recognizes our microphone as an audio source by using the `arecord` command. 79 | ``` 80 | root@edison:~# arecord -l 81 | ``` 82 | 83 | The output contains all of the hardware capture devices available. Locate your USB Audio device and make note of its card number and device number. In the example 84 | output below my mic is device 0 on card 2. 85 | ``` 86 | ... 87 | card 2: DSP [Plantronics .Audio 655 DSP], device 0: USB Audio [USB Audio] 88 | Subdevices: 1/1 89 | Subdevice #0: subdevice #0 90 | ``` 91 | 92 | ## Let's Get Coding 93 | In less than 200 lines of code (including comments) we'll have a system that will: 94 | 1. Listen for a button press 95 | 2. Play a sound to let the user know it's listening 96 | 3. Capture 5 seconds of audio input 97 | 4. Convert the audio input to text 98 | 5. Perform a command or search on the input text 99 | 6. Convert the text results to speech 100 | 7. Play the speech audio to the user 101 | 102 | I've broken the code up into easy to understand blocks. Let's walk through them and explain along the way. 103 | 104 | #### Requires and Globals 105 | Nothing special here. Just require the modules we need and declare some vars to use a little later. 106 | 107 | ```js 108 | // load required modules 109 | var async = require('async'); // helps control asynchronous flow 110 | var path = require('path'); // utility for handling file paths 111 | var exec = require('child_process').exec; // runs a command in a shell and buffers the output 112 | var spawn = require('child_process').spawn; // launches a child process 113 | var request = require('request'); // http request client 114 | var watson = require('watson-developer-cloud'); // IBM Watson services client 115 | var five = require('johnny-five'); // robotics programming framework 116 | var Edison = require('edison-io'); // edison IO library 117 | var numify = require('numstr').numify; // english number utility 118 | 119 | // globals 120 | var led = null; // reference to led object 121 | var working = false; // keeps track of if we are already working on a command 122 | ``` 123 | 124 | #### Initialize Watson Services 125 | Another simple block of code but this one requires a little pre-work. IBM Watson Cloud Services requires credentials for each specific service used. Follow the [Obtaining credentials for Watson services](http://www.ibm.com/smarterplanet/us/en/ibmwatson/developercloud/doc/getting_started/gs-credentials.shtml) guide to get credentials for both the Speech-To-Text and the Text-To-Speech services. 126 | 127 | ```js 128 | // initialize watson text-to-speech service 129 | var textToSpeech = watson.text_to_speech({ 130 | username: '', 131 | password: '', 132 | version: 'v1' 133 | }); 134 | 135 | // initialize watson speech-to-text service 136 | var speechToText = watson.speech_to_text({ 137 | username: '', 138 | password: '', 139 | version: 'v1' 140 | }); 141 | ``` 142 | 143 | #### Text-to-Speech and Speech-to-Text Magic 144 | First let's take a look at the Text-to-Speech (TTS) function. There are two parts to TTS: 1) Converting the text to audio and 2) Playing the audio. 145 | 146 | For the first, we are obviously using the IBM Watson Cloud Services which couldn't make it any easier. All we need to do is pass the text we would like converted and the audio format we would like back, into the `synthesize` method and it returns a [Stream](https://nodejs.org/docs/v0.10.38/api/stream.html). 147 | 148 | For the second, we are using [GStreamer](http://gstreamer.freedesktop.org/). More specifically, [`gst-launch`](https://www.mankier.com/1/gst-launch-1.0). We take the `Stream` returned from `synthesize` and pipe it directly into the `stdin` on the child process of `gst-launch-1.0`. GStreamer then processes it as a wav file and sends it to the default audio output. 149 | 150 | ```js 151 | // accepts a string and reads it aloud 152 | function tts (text, cb) { 153 | // build tts parameters 154 | var params = { 155 | text: text, 156 | accept: 'audio/wav' 157 | }; 158 | // create gtstreamer child process to play audio 159 | // "fdsrc fd=0" says file to play will be on stdin 160 | // "wavparse" processes the file as audio/wav 161 | // "pulsesink" sends the audio to the default pulse audio sink device 162 | var gst = exec('gst-launch-1.0 fdsrc fd=0 ! wavparse ! pulsesink', function (err) { 163 | if (err) { return cb(err); } 164 | cb(); 165 | }); 166 | // use watson and pipe the text-to-speech results directly to gst 167 | textToSpeech.synthesize(params).pipe(gst.stdin); 168 | } 169 | ``` 170 | Next let's look at the Speech-to-Text (STT) function. As with the TTS function, there are two main parts. 171 | 172 | The first is capturing the audio. To capture the audio we are using [`arecord`](http://linuxcommand.org/man_pages/arecord1.html). `arecord` is fairly straightforward with the exception of the `-D` option. Earlier when we set up the USB microphone, we used `arecord -l` to confirm the system saw it. That also gave us the card and device numbers associated with the mic. In my case, the mic is device 0 on card 2. Therefor, the `-D` option is set to `hw:2,0` (hardware device, card 2, device 0.) By not providing a file to record the audio to, we are telling `arecord` to send all data to it's `stdout`. 173 | 174 | Now we take the `stdout` from `arecord` and pass that into the `recognize` method on the STT service as the audio source. The `arecord` process will run forever unless will kill it. So we set a timeout for five seconds then kill the child process. 175 | 176 | Once we get the STT result back, we grab the first transcript from the response, trim it and return it. 177 | 178 | ```js 179 | // listens for audio then returns text 180 | function stt (cb) { 181 | var duration = 5000; 182 | console.log('listening for %s ms ...', duration); 183 | // create an arecord child process to record audio 184 | var arecord = spawn('arecord', ['-D', 'hw:2,0', '-t', 'wav', '-f', 'dat']); 185 | // build stt params using the stdout of arecord as the audio source 186 | var params = { 187 | audio: arecord.stdout, 188 | content_type: 'audio/wav', 189 | continuous: true // listen for audio the full 5 seconds 190 | }; 191 | // use watson to get answer text 192 | speechToText.recognize(params, function (err, res) { 193 | if (err) { return cb(err); } 194 | var text = ''; 195 | try { 196 | text = res.results[0].alternatives[0].transcript; 197 | } catch (e) { } 198 | console.log('you said: "%s"', text); 199 | cb(null, text.trim()); 200 | }); 201 | // record for duration then kill the child process 202 | setTimeout(function () { 203 | arecord.kill('SIGINT'); 204 | }, duration); 205 | } 206 | ``` 207 | 208 | #### Play Local Wav File 209 | We have already covered using GStreamer but to play a local the args are a little different. 210 | 211 | ```js 212 | // plays a local wav file 213 | function playWav (file, cb) { 214 | var filePath = path.resolve(__dirname, file); 215 | // create gtstreamer child process to play audio 216 | // "filesrc location=" says use a file at the location as the src 217 | // "wavparse" processes the file as audio/wav 218 | // "volume" sets the output volume, accepts value 0 - 1 219 | // "pulsesink" sends the audio to the default pulse audio sink device 220 | exec('gst-launch-1.0 filesrc location=' + filePath + ' ! wavparse ! volume volume=0.25 ! pulsesink', function (err) { 221 | return cb(err); 222 | }); 223 | } 224 | ``` 225 | 226 | #### Setup Johnny-Five 227 | Here we setup the Edison IO in Johnny-Five and listen for the board to complete initialization. Then attach a button to GPIO pin 4 and an LED to GPIO 6. To do this I used a [Grove Base Shield](http://www.seeedstudio.com/depot/Base-Shield-V2-p-1378.html) along with the Grove button and LED modules. 228 | 229 | You can also attach a button and LED using a breadboard instead. 230 | 231 | Last we add a listener on the button `press` event which will call the `main` function that we will look at next. 232 | 233 | ```js 234 | // initialize edison board 235 | var board = new five.Board({ 236 | io: new Edison(), 237 | repl: false // we don't need the repl for this project 238 | }); 239 | 240 | // when the board is ready, listen for a button press 241 | board.on('ready', function() { 242 | var button = new five.Button(4); 243 | led = new five.Led(6); 244 | led.off(); 245 | button.on('press', main); 246 | }); 247 | ``` 248 | 249 | #### Main 250 | We now have all the supporting pieces so let's put together the main application flow. When main is run, we first play a chime sound to let the user know we are listening using the `playWav` defined earlier. You can find download the wav file I used from [the projects repo](https://github.com/losant/example-edison-echo). We then listen for a command, perform the search, and play the results which we will all look at next. 251 | 252 | Last we handle any errors that may have happened and get ready to do it all again. 253 | 254 | ```js 255 | // main function 256 | function main() { 257 | if (working) { return; } 258 | working = true; 259 | async.waterfall([ 260 | async.apply(playWav, '88877_DingLing.wav'), 261 | listen, 262 | search, 263 | speak 264 | ], finish); 265 | } 266 | 267 | // handle any errors clear led and working flag 268 | function finish (err) { 269 | if (err) { 270 | tts('Oops, something went wrong and I was unable to complete your request.'); 271 | console.log(err); 272 | } 273 | // stop blinking and turn off 274 | led.stop().off(); 275 | working = false; 276 | } 277 | ``` 278 | 279 | #### The Bread-and-Butter 280 | The `listen` function simply turns on the LED to show we are listening then calls `stt` to capture the command. 281 | 282 | ```js 283 | // listen for the audio input 284 | function listen (cb) { 285 | // turn on the led 286 | led.on(); 287 | stt(cb); 288 | } 289 | ``` 290 | 291 | The `search` function uses the [Duck Duck Go Instant Answer API](https://api.duckduckgo.com/api) to perform the search. Then returns the best answer. 292 | 293 | ```js 294 | // perform a search using the duckduckgo instant answer api 295 | function search (q, cb) { 296 | if (!q) { 297 | return cb(null, 'I\'m sorry I didn\'t hear you.'); 298 | } 299 | // blick the led every 100 ms 300 | led.blink(100); 301 | // run the query through numify for better support of calculations in duckduckgo 302 | q = numify(q); 303 | console.log('searching for: %s', q); 304 | var requestOptions = { 305 | url: 'https://api.duckduckgo.com/', 306 | accept: 'application/json', 307 | qs: { 308 | q: q, 309 | format: 'json', 310 | no_html: 1, 311 | skip_disambig: 1 312 | } 313 | }; 314 | request(requestOptions, function (err, res, body) { 315 | if (err) { return cb(err); } 316 | var result = JSON.parse(body); 317 | var text = 'I\'m sorry, I was unable to find any information on ' + q; // default response 318 | if (result.Answer) { 319 | text = result.Answer; 320 | } else if (result.Definition) { 321 | text = result.Definition; 322 | } else if (result.AbstractText) { 323 | text = result.AbstractText; 324 | } 325 | cb(null, text); 326 | }); 327 | } 328 | ``` 329 | 330 | Last we have the `speak` function that takes the search results and passes that into the `tts` function. 331 | 332 | ```js 333 | // read the search results 334 | function speak (text, cb) { 335 | // stop blinking and turn off 336 | led.stop().off(); 337 | if (!text) { return cb(); } 338 | tts(text, cb); 339 | } 340 | ``` 341 | 342 | ## Wrap Up 343 | Deploy the code to your Edison and run it. Wait a few seconds for the app to initialize then press the button. You'll hear a sound and the LED will light up. Speak your search phrase clearly into the mic then sit back and enjoy your new toy. 344 | 345 | You'll find it's great at handling single words and simple phrases. You can also use it to do simple math problems by starting your phrase with "calculate", like "calculate five plus five." 346 | 347 | Below you'll find a list of additional resources used while making this project but not linked to above. I encourage you to take a look at them to learn a little more about the technologies used. You can also find all the code for this project at https://github.com/losant/example-edison-echo. 348 | 349 | Enjoy! 350 | 351 | ## Additional Resources 352 | * [Losant Blog](https://www.losant.com/blog/) 353 | * [Intel Edison Bluetooth Guide](http://download.intel.com/support/edison/sb/edisonbluetooth_331704004.pdf) 354 | * [Intel Edison Audio Setup Guide](http://download.intel.com/support/edison/sb/edisonaudio_332434001.pdf) 355 | * [PLAY AUDIO FROM YOUR INTEL® EDISON VIA BLUETOOTH* USING ADVANCED AUDIO DISTRIBUTION PROFILE (A2DP)](https://software.intel.com/en-us/articles/play-audio-from-your-intel-edison-via-bluetooth-using-advanced-audio-distribution-profile) 356 | --------------------------------------------------------------------------------