├── .gitignore
├── .travis.yml
├── LICENSE
├── README.md
├── index.js
├── package.json
└── test
    ├── fixtures
        ├── en.mp3
        ├── es.mp3
        ├── lengthy.mp3
        └── profanity.mp3
    └── index.js


/.gitignore:
--------------------------------------------------------------------------------
1 | .DS_Store
2 | node_modules


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: node_js
 2 | node_js:
 3 |   - '0.12'
 4 |   - '0.11'
 5 |   - '0.10'
 6 | before_install:
 7 |   - sudo add-apt-repository -y ppa:samrog131/ppa
 8 |   - sudo apt-get update
 9 |   - sudo apt-get -y install wget tar bzip2 flvtool2 ffmpeg
10 |   - wget http://ffmpeg.gusari.org/static/64bit/ffmpeg.static.64bit.latest.tar.gz
11 |   - tar zxf ffmpeg.static.64bit.latest.tar.gz
12 |   - sudo cp ffmpeg ffprobe /usr/bin
13 |   - export ALT_FFMPEG_PATH=$(pwd)/ffmpeg
14 |   - export ALT_FFPROBE_PATH=$(pwd)/ffprobe
15 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2013 Dennis
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of
 6 | this software and associated documentation files (the "Software"), to deal in
 7 | the Software without restriction, including without limitation the rights to
 8 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
 9 | the Software, and to permit persons to whom the Software is furnished to do so,
10 | subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
17 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
18 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
19 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | Google Speech API
 2 | =================
 3 | 
 4 | [![Build Status](https://travis-ci.org/psirenny/node-google-speech-api.png?branch=master)](https://travis-ci.org/psirenny/node-google-speech-api)
 5 | 
 6 | Google [Speech API](https://gist.github.com/alotaiba/1730160) wrapper for node.
 7 | It requires [ffmpeg](https://www.ffmpeg.org) compiled with flac support in order to work.
 8 | 
 9 | 1.0.0 Update
10 | ------------
11 | 
12 | Switched from SoX to ffmpeg. Make sure you have at least version 0.9 of ffmpeg.
13 | 
14 | 0.5 Update
15 | ----------
16 | 
17 | The google speech api now requires an **API Key**.
18 | You'll have to create an app in the Google Developers Console and enable the speech api.  
19 | 
20 | To enable the speech api in the developer console you must join the **chromium dev-list** in google groups.
21 | See [these comments](http://mikepultz.com/2013/07/google-speech-api-full-duplex-php-version/#comments) for more details.  
22 | 
23 | The response format has also changed.
24 | Instead of returning *utterances*, google now returns alternatives with a *transcript*.
25 | See the example below.
26 | 
27 | Usage
28 | -----
29 | 
30 | 
31 |     var speech = require('google-speech-api');
32 | 
33 |     var opts = {
34 |       file: 'speech.mp3',
35 |       key: '<Google API Key>'
36 |     };
37 | 
38 |     speech(opts, function (err, results) {
39 |       console.log(results);
40 |       // [{result: [{alternative: [{transcript: '...'}]}]}]
41 |     });
42 | 
43 | 
44 | Piping
45 | ------
46 | 
47 | You can pipe data:
48 | 
49 |     var request = require('superagent');
50 |     var speech = require('google-speech-api');
51 | 
52 |     // must specify the filetype when piping
53 |     var opts = {filetype: 'mp3'};
54 | 
55 |     request
56 |       .get('http://../../file.mp3')
57 |       .pipe(speech(opts, function (err, results) {
58 |         // handle the results
59 |       }));
60 | 
61 | Options
62 | -------
63 | 
64 | You can specify several options:
65 | * clipSize — The audio duration of files sent to google (in seconds.) Larger files will be broken into pieces. (defaults to 15)
66 | * **file** — The audio file. May be a `string` path or a `Buffer` object. (required)
67 | * **key** — Your google API key. (required)
68 | * client — The name of the client you are connecting with. (defaults to "chromium")
69 | * filetype — Specify the file type. Required when piping or if the file is a buffer object.
70 | * lang — The spoken language in the file. (defaults to "en-US")
71 | * maxRequests — The maximum number of clips to send to google at a time. (defaults to 4)
72 | * maxResults — The maximum number of hypotheses returned by google. (defaults to 1)
73 | * pfilter — Filter profanity by replacing flagged words with pound symbols. Set 0 to unfilter. (defaults to 1)
74 | * sampleRate — The sample rate of the audio sent to google. (defaults to 44000)
75 | * timeout — The amount of time to wait for the speech API before timing out (defaults to 6000ms)
76 | 


--------------------------------------------------------------------------------
/index.js:
--------------------------------------------------------------------------------
  1 | var _ = require('lodash');
  2 | var async = require('async');
  3 | var EventEmitter = require('events').EventEmitter;
  4 | var ffmpeg = require('fluent-ffmpeg');
  5 | var fs = require('fs');
  6 | var request = require('superagent');
  7 | var temp = require('temp');
  8 | 
  9 | var defaults = {
 10 |   client: 'chromium',
 11 |   clipSize: 15,
 12 |   lang: 'en-US',
 13 |   maxRequests: 4,
 14 |   maxResults: 1,
 15 |   pfilter: 1,
 16 |   sampleRate: 44000,
 17 |   timeout: 6000,
 18 |   xjerr: 1
 19 | };
 20 | 
 21 | module.exports = function (options, callback) {
 22 |   var opts = _.merge({}, defaults, options || {});
 23 |   var finishedReadingFile = false;
 24 | 
 25 |   var queue = async.priorityQueue(
 26 |     processClip,
 27 |     opts.maxRequests
 28 |   );
 29 | 
 30 |   queue.events = new EventEmitter();
 31 |   queue.results = [];
 32 | 
 33 |   var reader = new EventEmitter();
 34 | 
 35 |   reader.open = function (file) {
 36 |     var self = this;
 37 | 
 38 |     ffmpeg.ffprobe(file, function (err, info) {
 39 |       if (err) return self.emit('error', err);
 40 |       var fileSize = info.format.duration;
 41 |       var clipCount = Math.ceil(fileSize / opts.clipSize);
 42 |       var clips = _.range(clipCount);
 43 | 
 44 |       function readClip(i, done) {
 45 |         var output = temp.path({suffix: '.flac'});
 46 | 
 47 |         ffmpeg()
 48 |           .on('error', function (err) {
 49 |             self.emit('error', err);
 50 |             done(err);
 51 |           })
 52 |           .on('end', function () {
 53 |             self.emit('clip', output, i);
 54 |             done(null, output);
 55 |           })
 56 |           .input(file)
 57 |           .setStartTime(i * opts.clipSize)
 58 |           .duration(opts.clipSize)
 59 |           .output(output)
 60 |           .audioFrequency(opts.sampleRate)
 61 |           .toFormat('flac')
 62 |           .run();
 63 |       }
 64 | 
 65 |       function end() {
 66 |         self.emit('end');
 67 |       }
 68 | 
 69 |       async.map(clips, readClip, end);
 70 |     });
 71 |   };
 72 | 
 73 |   function processClip(clip, done) {
 74 |     transcribeClip(clip, function (err, result) {
 75 |       fs.unlink(clip);
 76 |       if (!err) return done(null, queue.results.push(result));
 77 |       queue.events.emit('error', err);
 78 |       done(err);
 79 |     });
 80 |   }
 81 | 
 82 |   function transcribeClip(clip, done) {
 83 |     fs.readFile(clip, function (err, data) {
 84 |       if (err) return done(err);
 85 | 
 86 |       request
 87 |         .post('https://www.google.com/speech-api/v2/recognize')
 88 |         .type('audio/x-flac; rate=' + opts.sampleRate)
 89 |         .parse(request.parse.text)
 90 |         .query({key: opts.key})
 91 |         .query({lang: opts.lang})
 92 |         .query({maxResults: opts.maxResults})
 93 |         .query({pfilter: opts.pfilter ? 1 : 0})
 94 |         .send(data)
 95 |         .timeout(opts.timeout)
 96 |         .end(function (err, res) {
 97 |           if (err) return done(err);
 98 |           var text = res.text;
 99 |           if (text) text = text.split('\n')[1];
100 |           if (!text) return done(null, {result: []});
101 |           try {
102 |             done(null, JSON.parse(text));
103 |           } catch (ex) {
104 |             done(ex);
105 |           }
106 |         });
107 |     });
108 |   }
109 | 
110 |   reader.on('clip', function (clip, i) {
111 |     queue.push(clip, i);
112 |   });
113 | 
114 |   reader.on('end', function () {
115 |     finishedReadingFile = true;
116 |   });
117 | 
118 |   reader.on('error', function (err) {
119 |     callback(err);
120 |   });
121 | 
122 |   queue.drain = function () {
123 |     if (!finishedReadingFile) return;
124 |     callback(null, queue.results);
125 |   };
126 | 
127 |   queue.events.on('error', function (err) {
128 |     queue.kill();
129 |     callback(err);
130 |   });
131 | 
132 |   if (opts.file) {
133 |     return reader.open(opts.file);
134 |   }
135 | 
136 |   var file = temp.openSync().path;
137 |   var writeStream = fs.createWriteStream(file);
138 | 
139 |   reader.on('end', function () {
140 |     fs.unlink(file);
141 |   });
142 | 
143 |   reader.on('error', function () {
144 |     fs.unlink(file);
145 |   });
146 | 
147 |   queue.events.on('error', function () {
148 |     fs.unlink(file);
149 |   });
150 | 
151 |   writeStream.on('end', function () {
152 |     reader.open(file);
153 |   });
154 | 
155 |   writeStream.on('close', function () {
156 |     reader.open(file);
157 |   });
158 | 
159 |   writeStream.on('error', function () {
160 |     callback(err);
161 |   });
162 | 
163 |   return writeStream;
164 | };
165 | 


--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "author": "Dennis Torres <djtorres0@gmail.com>",
 3 |   "dependencies": {
 4 |     "async": "^0.9.0",
 5 |     "fluent-ffmpeg": "^2.0.0-rc3",
 6 |     "lodash": "^3.7.0",
 7 |     "superagent": "git://github.com/visionmedia/superagent.git#308a3d5a37c5e34ba736a9d2d63a5a3763b4ba0c",
 8 |     "temp": "^0.8.1"
 9 |   },
10 |   "description": "Google Speech API wrapper for node",
11 |   "devDependencies": {
12 |     "natural": "^0.1.28",
13 |     "tape": "^4.0.0"
14 |   },
15 |   "engines": {
16 |     "node": ">=0.10.x"
17 |   },
18 |   "gitHead": "a5094705349adf734ca3c9f8a2accfb4db2fbd04",
19 |   "keywords": [
20 |     "node",
21 |     "google",
22 |     "speech",
23 |     "api"
24 |   ],
25 |   "license": "MIT",
26 |   "main": "index.js",
27 |   "name": "google-speech-api",
28 |   "readmeFilename": "README.md",
29 |   "repository": {
30 |     "type": "git",
31 |     "url": "git://github.com/psirenny/node-google-speech-api.git"
32 |   },
33 |   "scripts": {
34 |     "test": "node test"
35 |   },
36 |   "version": "1.2.0"
37 | }
38 | 


--------------------------------------------------------------------------------
/test/fixtures/en.mp3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/psirenny/node-google-speech-api/c2eb9ee3d2a58ae0ab6ae0e026254e1a8cd4a292/test/fixtures/en.mp3


--------------------------------------------------------------------------------
/test/fixtures/es.mp3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/psirenny/node-google-speech-api/c2eb9ee3d2a58ae0ab6ae0e026254e1a8cd4a292/test/fixtures/es.mp3


--------------------------------------------------------------------------------
/test/fixtures/lengthy.mp3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/psirenny/node-google-speech-api/c2eb9ee3d2a58ae0ab6ae0e026254e1a8cd4a292/test/fixtures/lengthy.mp3


--------------------------------------------------------------------------------
/test/fixtures/profanity.mp3:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/psirenny/node-google-speech-api/c2eb9ee3d2a58ae0ab6ae0e026254e1a8cd4a292/test/fixtures/profanity.mp3


--------------------------------------------------------------------------------
/test/index.js:
--------------------------------------------------------------------------------
 1 | var _ = require('lodash');
 2 | var fs = require('fs');
 3 | var lib = require('../index');
 4 | var natural = require('natural');
 5 | var path = require('path');
 6 | var test = require('tape');
 7 | var fixtures = {};
 8 | fixtures.en = path.join(__dirname, 'fixtures/en.mp3');
 9 | fixtures.es = path.join(__dirname, 'fixtures/es.mp3');
10 | fixtures.lengthy = path.join(__dirname, 'fixtures/lengthy.mp3');
11 | fixtures.profanity = path.join(__dirname, 'fixtures/profanity.mp3');
12 | 
13 | function combine(utterance, res) {
14 |   var space = utterance ? ' ' : '';
15 |   if (!res.result[0]) return utterance;
16 |   return utterance + space + res.result[0].alternative[0].transcript;
17 | }
18 | 
19 | function check(t, opts, text) {
20 |   var file = fs.createReadStream(opts.file);
21 |   opts.accuracy = opts.accuracy || 0.8;
22 |   opts.key = process.env.GOOGLE_API_KEY;
23 | 
24 |   function test(err, results) {
25 |     t.error(err);
26 |     t.equal(typeof results, 'object');
27 |     t.equal(typeof results[0], 'object');
28 |     t.equal(typeof results[0].result, 'object');
29 |     var sentence = _.reduce(results, combine, '');
30 |     var distance = natural.JaroWinklerDistance(sentence, text);
31 |     t.equal(distance >= opts.accuracy, true);
32 |   };
33 | 
34 |   lib(opts, test);
35 |   file.pipe(lib(_.omit(opts, 'file'), test));
36 | }
37 | 
38 | test('it should be a function', function (t) {
39 |   t.plan(1);
40 |   t.equal(typeof lib, 'function');
41 | });
42 | 
43 | test('it should transcribe', function (t) {
44 |   var opts = {file: fixtures.en};
45 |   t.plan(10);
46 |   check(t, opts, 'thank you very much');
47 | });
48 | 
49 | test('should work in another language', function (t) {
50 |   var opts = {file: fixtures.es, lang: 'es'};
51 |   t.plan(10);
52 |   check(t, opts, 'muchas gracias');
53 | });
54 | 
55 | test('should censor profanity', function (t) {
56 |   var opts = {file: fixtures.profanity, pfilter: true};
57 |   t.plan(10);
58 |   check(t, opts, 'f*** you');
59 | });
60 | 
61 | test('should not censor profanity', function (t) {
62 |   var opts = {file: fixtures.profanity, pfilter: false};
63 |   t.plan(10);
64 |   check(t, opts, 'fuck you');
65 | });
66 | 
67 | test.skip('should clip long audio', function (t) {
68 |   var opts = {accuracy: 0.3, file: fixtures.lengthy};
69 |   t.plan(10);
70 |   check(t, opts, '1 of the Iliad of Homer rendered into English flag vs spy Edward Earl of Derby this is a liberal Vox recording recordings are in the public domain for more information or to volunteer please visit fox.org');
71 | });
72 | 


--------------------------------------------------------------------------------