├── .gitignore ├── .travis.yml ├── LICENSE ├── README.md ├── index.js ├── package.json └── test ├── fixtures ├── en.mp3 ├── es.mp3 ├── lengthy.mp3 └── profanity.mp3 └── index.js /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | node_modules -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: node_js 2 | node_js: 3 | - '0.12' 4 | - '0.11' 5 | - '0.10' 6 | before_install: 7 | - sudo add-apt-repository -y ppa:samrog131/ppa 8 | - sudo apt-get update 9 | - sudo apt-get -y install wget tar bzip2 flvtool2 ffmpeg 10 | - wget http://ffmpeg.gusari.org/static/64bit/ffmpeg.static.64bit.latest.tar.gz 11 | - tar zxf ffmpeg.static.64bit.latest.tar.gz 12 | - sudo cp ffmpeg ffprobe /usr/bin 13 | - export ALT_FFMPEG_PATH=$(pwd)/ffmpeg 14 | - export ALT_FFPROBE_PATH=$(pwd)/ffprobe 15 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2013 Dennis 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of 6 | this software and associated documentation files (the "Software"), to deal in 7 | the Software without restriction, including without limitation the rights to 8 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 9 | the Software, and to permit persons to whom the Software is furnished to do so, 10 | subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 17 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 18 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 19 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 20 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Google Speech API 2 | ================= 3 | 4 | [![Build Status](https://travis-ci.org/psirenny/node-google-speech-api.png?branch=master)](https://travis-ci.org/psirenny/node-google-speech-api) 5 | 6 | Google [Speech API](https://gist.github.com/alotaiba/1730160) wrapper for node. 7 | It requires [ffmpeg](https://www.ffmpeg.org) compiled with flac support in order to work. 8 | 9 | 1.0.0 Update 10 | ------------ 11 | 12 | Switched from SoX to ffmpeg. Make sure you have at least version 0.9 of ffmpeg. 13 | 14 | 0.5 Update 15 | ---------- 16 | 17 | The google speech api now requires an **API Key**. 18 | You'll have to create an app in the Google Developers Console and enable the speech api. 19 | 20 | To enable the speech api in the developer console you must join the **chromium dev-list** in google groups. 21 | See [these comments](http://mikepultz.com/2013/07/google-speech-api-full-duplex-php-version/#comments) for more details. 22 | 23 | The response format has also changed. 24 | Instead of returning *utterances*, google now returns alternatives with a *transcript*. 25 | See the example below. 26 | 27 | Usage 28 | ----- 29 | 30 | 31 | var speech = require('google-speech-api'); 32 | 33 | var opts = { 34 | file: 'speech.mp3', 35 | key: '' 36 | }; 37 | 38 | speech(opts, function (err, results) { 39 | console.log(results); 40 | // [{result: [{alternative: [{transcript: '...'}]}]}] 41 | }); 42 | 43 | 44 | Piping 45 | ------ 46 | 47 | You can pipe data: 48 | 49 | var request = require('superagent'); 50 | var speech = require('google-speech-api'); 51 | 52 | // must specify the filetype when piping 53 | var opts = {filetype: 'mp3'}; 54 | 55 | request 56 | .get('http://../../file.mp3') 57 | .pipe(speech(opts, function (err, results) { 58 | // handle the results 59 | })); 60 | 61 | Options 62 | ------- 63 | 64 | You can specify several options: 65 | * clipSize — The audio duration of files sent to google (in seconds.) Larger files will be broken into pieces. (defaults to 15) 66 | * **file** — The audio file. May be a `string` path or a `Buffer` object. (required) 67 | * **key** — Your google API key. (required) 68 | * client — The name of the client you are connecting with. (defaults to "chromium") 69 | * filetype — Specify the file type. Required when piping or if the file is a buffer object. 70 | * lang — The spoken language in the file. (defaults to "en-US") 71 | * maxRequests — The maximum number of clips to send to google at a time. (defaults to 4) 72 | * maxResults — The maximum number of hypotheses returned by google. (defaults to 1) 73 | * pfilter — Filter profanity by replacing flagged words with pound symbols. Set 0 to unfilter. (defaults to 1) 74 | * sampleRate — The sample rate of the audio sent to google. (defaults to 44000) 75 | * timeout — The amount of time to wait for the speech API before timing out (defaults to 6000ms) 76 | -------------------------------------------------------------------------------- /index.js: -------------------------------------------------------------------------------- 1 | var _ = require('lodash'); 2 | var async = require('async'); 3 | var EventEmitter = require('events').EventEmitter; 4 | var ffmpeg = require('fluent-ffmpeg'); 5 | var fs = require('fs'); 6 | var request = require('superagent'); 7 | var temp = require('temp'); 8 | 9 | var defaults = { 10 | client: 'chromium', 11 | clipSize: 15, 12 | lang: 'en-US', 13 | maxRequests: 4, 14 | maxResults: 1, 15 | pfilter: 1, 16 | sampleRate: 44000, 17 | timeout: 6000, 18 | xjerr: 1 19 | }; 20 | 21 | module.exports = function (options, callback) { 22 | var opts = _.merge({}, defaults, options || {}); 23 | var finishedReadingFile = false; 24 | 25 | var queue = async.priorityQueue( 26 | processClip, 27 | opts.maxRequests 28 | ); 29 | 30 | queue.events = new EventEmitter(); 31 | queue.results = []; 32 | 33 | var reader = new EventEmitter(); 34 | 35 | reader.open = function (file) { 36 | var self = this; 37 | 38 | ffmpeg.ffprobe(file, function (err, info) { 39 | if (err) return self.emit('error', err); 40 | var fileSize = info.format.duration; 41 | var clipCount = Math.ceil(fileSize / opts.clipSize); 42 | var clips = _.range(clipCount); 43 | 44 | function readClip(i, done) { 45 | var output = temp.path({suffix: '.flac'}); 46 | 47 | ffmpeg() 48 | .on('error', function (err) { 49 | self.emit('error', err); 50 | done(err); 51 | }) 52 | .on('end', function () { 53 | self.emit('clip', output, i); 54 | done(null, output); 55 | }) 56 | .input(file) 57 | .setStartTime(i * opts.clipSize) 58 | .duration(opts.clipSize) 59 | .output(output) 60 | .audioFrequency(opts.sampleRate) 61 | .toFormat('flac') 62 | .run(); 63 | } 64 | 65 | function end() { 66 | self.emit('end'); 67 | } 68 | 69 | async.map(clips, readClip, end); 70 | }); 71 | }; 72 | 73 | function processClip(clip, done) { 74 | transcribeClip(clip, function (err, result) { 75 | fs.unlink(clip); 76 | if (!err) return done(null, queue.results.push(result)); 77 | queue.events.emit('error', err); 78 | done(err); 79 | }); 80 | } 81 | 82 | function transcribeClip(clip, done) { 83 | fs.readFile(clip, function (err, data) { 84 | if (err) return done(err); 85 | 86 | request 87 | .post('https://www.google.com/speech-api/v2/recognize') 88 | .type('audio/x-flac; rate=' + opts.sampleRate) 89 | .parse(request.parse.text) 90 | .query({key: opts.key}) 91 | .query({lang: opts.lang}) 92 | .query({maxResults: opts.maxResults}) 93 | .query({pfilter: opts.pfilter ? 1 : 0}) 94 | .send(data) 95 | .timeout(opts.timeout) 96 | .end(function (err, res) { 97 | if (err) return done(err); 98 | var text = res.text; 99 | if (text) text = text.split('\n')[1]; 100 | if (!text) return done(null, {result: []}); 101 | try { 102 | done(null, JSON.parse(text)); 103 | } catch (ex) { 104 | done(ex); 105 | } 106 | }); 107 | }); 108 | } 109 | 110 | reader.on('clip', function (clip, i) { 111 | queue.push(clip, i); 112 | }); 113 | 114 | reader.on('end', function () { 115 | finishedReadingFile = true; 116 | }); 117 | 118 | reader.on('error', function (err) { 119 | callback(err); 120 | }); 121 | 122 | queue.drain = function () { 123 | if (!finishedReadingFile) return; 124 | callback(null, queue.results); 125 | }; 126 | 127 | queue.events.on('error', function (err) { 128 | queue.kill(); 129 | callback(err); 130 | }); 131 | 132 | if (opts.file) { 133 | return reader.open(opts.file); 134 | } 135 | 136 | var file = temp.openSync().path; 137 | var writeStream = fs.createWriteStream(file); 138 | 139 | reader.on('end', function () { 140 | fs.unlink(file); 141 | }); 142 | 143 | reader.on('error', function () { 144 | fs.unlink(file); 145 | }); 146 | 147 | queue.events.on('error', function () { 148 | fs.unlink(file); 149 | }); 150 | 151 | writeStream.on('end', function () { 152 | reader.open(file); 153 | }); 154 | 155 | writeStream.on('close', function () { 156 | reader.open(file); 157 | }); 158 | 159 | writeStream.on('error', function () { 160 | callback(err); 161 | }); 162 | 163 | return writeStream; 164 | }; 165 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "author": "Dennis Torres ", 3 | "dependencies": { 4 | "async": "^0.9.0", 5 | "fluent-ffmpeg": "^2.0.0-rc3", 6 | "lodash": "^3.7.0", 7 | "superagent": "git://github.com/visionmedia/superagent.git#308a3d5a37c5e34ba736a9d2d63a5a3763b4ba0c", 8 | "temp": "^0.8.1" 9 | }, 10 | "description": "Google Speech API wrapper for node", 11 | "devDependencies": { 12 | "natural": "^0.1.28", 13 | "tape": "^4.0.0" 14 | }, 15 | "engines": { 16 | "node": ">=0.10.x" 17 | }, 18 | "gitHead": "a5094705349adf734ca3c9f8a2accfb4db2fbd04", 19 | "keywords": [ 20 | "node", 21 | "google", 22 | "speech", 23 | "api" 24 | ], 25 | "license": "MIT", 26 | "main": "index.js", 27 | "name": "google-speech-api", 28 | "readmeFilename": "README.md", 29 | "repository": { 30 | "type": "git", 31 | "url": "git://github.com/psirenny/node-google-speech-api.git" 32 | }, 33 | "scripts": { 34 | "test": "node test" 35 | }, 36 | "version": "1.2.0" 37 | } 38 | -------------------------------------------------------------------------------- /test/fixtures/en.mp3: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psirenny/node-google-speech-api/c2eb9ee3d2a58ae0ab6ae0e026254e1a8cd4a292/test/fixtures/en.mp3 -------------------------------------------------------------------------------- /test/fixtures/es.mp3: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psirenny/node-google-speech-api/c2eb9ee3d2a58ae0ab6ae0e026254e1a8cd4a292/test/fixtures/es.mp3 -------------------------------------------------------------------------------- /test/fixtures/lengthy.mp3: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psirenny/node-google-speech-api/c2eb9ee3d2a58ae0ab6ae0e026254e1a8cd4a292/test/fixtures/lengthy.mp3 -------------------------------------------------------------------------------- /test/fixtures/profanity.mp3: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/psirenny/node-google-speech-api/c2eb9ee3d2a58ae0ab6ae0e026254e1a8cd4a292/test/fixtures/profanity.mp3 -------------------------------------------------------------------------------- /test/index.js: -------------------------------------------------------------------------------- 1 | var _ = require('lodash'); 2 | var fs = require('fs'); 3 | var lib = require('../index'); 4 | var natural = require('natural'); 5 | var path = require('path'); 6 | var test = require('tape'); 7 | var fixtures = {}; 8 | fixtures.en = path.join(__dirname, 'fixtures/en.mp3'); 9 | fixtures.es = path.join(__dirname, 'fixtures/es.mp3'); 10 | fixtures.lengthy = path.join(__dirname, 'fixtures/lengthy.mp3'); 11 | fixtures.profanity = path.join(__dirname, 'fixtures/profanity.mp3'); 12 | 13 | function combine(utterance, res) { 14 | var space = utterance ? ' ' : ''; 15 | if (!res.result[0]) return utterance; 16 | return utterance + space + res.result[0].alternative[0].transcript; 17 | } 18 | 19 | function check(t, opts, text) { 20 | var file = fs.createReadStream(opts.file); 21 | opts.accuracy = opts.accuracy || 0.8; 22 | opts.key = process.env.GOOGLE_API_KEY; 23 | 24 | function test(err, results) { 25 | t.error(err); 26 | t.equal(typeof results, 'object'); 27 | t.equal(typeof results[0], 'object'); 28 | t.equal(typeof results[0].result, 'object'); 29 | var sentence = _.reduce(results, combine, ''); 30 | var distance = natural.JaroWinklerDistance(sentence, text); 31 | t.equal(distance >= opts.accuracy, true); 32 | }; 33 | 34 | lib(opts, test); 35 | file.pipe(lib(_.omit(opts, 'file'), test)); 36 | } 37 | 38 | test('it should be a function', function (t) { 39 | t.plan(1); 40 | t.equal(typeof lib, 'function'); 41 | }); 42 | 43 | test('it should transcribe', function (t) { 44 | var opts = {file: fixtures.en}; 45 | t.plan(10); 46 | check(t, opts, 'thank you very much'); 47 | }); 48 | 49 | test('should work in another language', function (t) { 50 | var opts = {file: fixtures.es, lang: 'es'}; 51 | t.plan(10); 52 | check(t, opts, 'muchas gracias'); 53 | }); 54 | 55 | test('should censor profanity', function (t) { 56 | var opts = {file: fixtures.profanity, pfilter: true}; 57 | t.plan(10); 58 | check(t, opts, 'f*** you'); 59 | }); 60 | 61 | test('should not censor profanity', function (t) { 62 | var opts = {file: fixtures.profanity, pfilter: false}; 63 | t.plan(10); 64 | check(t, opts, 'fuck you'); 65 | }); 66 | 67 | test.skip('should clip long audio', function (t) { 68 | var opts = {accuracy: 0.3, file: fixtures.lengthy}; 69 | t.plan(10); 70 | check(t, opts, '1 of the Iliad of Homer rendered into English flag vs spy Edward Earl of Derby this is a liberal Vox recording recordings are in the public domain for more information or to volunteer please visit fox.org'); 71 | }); 72 | --------------------------------------------------------------------------------