├── .gitignore ├── LICENSE ├── README.md ├── index.js ├── package.json └── test └── test.js /.gitignore: -------------------------------------------------------------------------------- 1 | .idea/ 2 | node_modules/ 3 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2016 Jam3 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # voice-activity-detection 2 | 3 | ## Syntax 4 | ```vad(audioContext, stream [, options]);``` 5 | 6 | **Default options:** 7 | ```javascript 8 | { 9 | fftSize: 1024, 10 | bufferLen: 1024, 11 | smoothingTimeConstant: 0.2, 12 | minCaptureFreq: 85, // in Hz 13 | maxCaptureFreq: 255, // in Hz 14 | noiseCaptureDuration: 1000, // in ms 15 | minNoiseLevel: 0.3, // from 0 to 1 16 | maxNoiseLevel: 0.7, // from 0 to 1 17 | avgNoiseMultiplier: 1.2, 18 | onVoiceStart: function() {}, 19 | onVoiceStop: function() {}, 20 | onUpdate: function(val) {} 21 | } 22 | ``` 23 | 24 | * ```minCaptureFreq/maxCaptureFreq``` - human voice frequency range 25 | * ```noiseCaptureDuration``` - time for measuring average env. noise before starting voice activity detection 26 | * ```minNoiseLevel/maxNoiseLevel``` - env. noise level normalization range (during ```noiseCaptureDuration```) 27 | * ```avgNoiseMultiplier``` - multiplier for the average env. noise level to set activity/inactivity state toggle 28 | 29 | ## Usage 30 | See [example code](https://github.com/Jam3/voice-activity-detection/blob/master/test/test.js) 31 | 32 | ## Test 33 | ```npm run test``` 34 | -------------------------------------------------------------------------------- /index.js: -------------------------------------------------------------------------------- 1 | 'use strict'; 2 | var analyserFrequency = require('analyser-frequency-average'); 3 | 4 | module.exports = function(audioContext, stream, opts) { 5 | 6 | opts = opts || {}; 7 | 8 | var defaults = { 9 | fftSize: 1024, 10 | bufferLen: 1024, 11 | smoothingTimeConstant: 0.2, 12 | minCaptureFreq: 85, // in Hz 13 | maxCaptureFreq: 255, // in Hz 14 | noiseCaptureDuration: 1000, // in ms 15 | minNoiseLevel: 0.3, // from 0 to 1 16 | maxNoiseLevel: 0.7, // from 0 to 1 17 | avgNoiseMultiplier: 1.2, 18 | onVoiceStart: function() { 19 | }, 20 | onVoiceStop: function() { 21 | }, 22 | onUpdate: function(val) { 23 | } 24 | }; 25 | 26 | var options = {}; 27 | for (var key in defaults) { 28 | options[key] = opts.hasOwnProperty(key) ? opts[key] : defaults[key]; 29 | } 30 | 31 | var baseLevel = 0; 32 | var voiceScale = 1; 33 | var activityCounter = 0; 34 | var activityCounterMin = 0; 35 | var activityCounterMax = 60; 36 | var activityCounterThresh = 5; 37 | 38 | var envFreqRange = []; 39 | var isNoiseCapturing = true; 40 | var prevVadState = undefined; 41 | var vadState = false; 42 | var captureTimeout = null; 43 | 44 | var source = audioContext.createMediaStreamSource(stream); 45 | var analyser = audioContext.createAnalyser(); 46 | analyser.smoothingTimeConstant = options.smoothingTimeConstant; 47 | analyser.fftSize = options.fftSize; 48 | 49 | var scriptProcessorNode = audioContext.createScriptProcessor(options.bufferLen, 1, 1); 50 | connect(); 51 | scriptProcessorNode.onaudioprocess = monitor; 52 | 53 | if (isNoiseCapturing) { 54 | //console.log('VAD: start noise capturing'); 55 | captureTimeout = setTimeout(init, options.noiseCaptureDuration); 56 | } 57 | 58 | function init() { 59 | //console.log('VAD: stop noise capturing'); 60 | isNoiseCapturing = false; 61 | 62 | envFreqRange = envFreqRange.filter(function(val) { 63 | return val; 64 | }).sort(); 65 | var averageEnvFreq = envFreqRange.length ? envFreqRange.reduce(function (p, c) { return Math.min(p, c) }, 1) : (options.minNoiseLevel || 0.1); 66 | 67 | baseLevel = averageEnvFreq * options.avgNoiseMultiplier; 68 | if (options.minNoiseLevel && baseLevel < options.minNoiseLevel) baseLevel = options.minNoiseLevel; 69 | if (options.maxNoiseLevel && baseLevel > options.maxNoiseLevel) baseLevel = options.maxNoiseLevel; 70 | 71 | voiceScale = 1 - baseLevel; 72 | 73 | //console.log('VAD: base level:', baseLevel); 74 | } 75 | 76 | function connect() { 77 | source.connect(analyser); 78 | analyser.connect(scriptProcessorNode); 79 | scriptProcessorNode.connect(audioContext.destination); 80 | } 81 | 82 | function disconnect() { 83 | scriptProcessorNode.disconnect(); 84 | analyser.disconnect(); 85 | source.disconnect(); 86 | } 87 | 88 | function destroy() { 89 | captureTimeout && clearTimeout(captureTimeout); 90 | disconnect(); 91 | scriptProcessorNode.onaudioprocess = null; 92 | } 93 | 94 | function monitor() { 95 | var frequencies = new Uint8Array(analyser.frequencyBinCount); 96 | analyser.getByteFrequencyData(frequencies); 97 | 98 | var average = analyserFrequency(analyser, frequencies, options.minCaptureFreq, options.maxCaptureFreq); 99 | if (isNoiseCapturing) { 100 | envFreqRange.push(average); 101 | return; 102 | } 103 | 104 | if (average >= baseLevel && activityCounter < activityCounterMax) { 105 | activityCounter++; 106 | } else if (average < baseLevel && activityCounter > activityCounterMin) { 107 | activityCounter--; 108 | } 109 | vadState = activityCounter > activityCounterThresh; 110 | 111 | if (prevVadState !== vadState) { 112 | vadState ? onVoiceStart() : onVoiceStop(); 113 | prevVadState = vadState; 114 | } 115 | 116 | options.onUpdate(Math.max(0, average - baseLevel) / voiceScale); 117 | } 118 | 119 | function onVoiceStart() { 120 | options.onVoiceStart(); 121 | } 122 | 123 | function onVoiceStop() { 124 | options.onVoiceStop(); 125 | } 126 | 127 | return {connect: connect, disconnect: disconnect, destroy: destroy}; 128 | }; -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "voice-activity-detection", 3 | "version": "0.0.5", 4 | "description": "Mic input activity detection", 5 | "main": "index.js", 6 | "license": "MIT", 7 | "author": { 8 | "name": "Vadim Namniak", 9 | "email": "vadim@jam3.com", 10 | "url": "https://github.com/Jam3" 11 | }, 12 | "dependencies": { 13 | "analyser-frequency-average": "^1.0.0" 14 | }, 15 | "devDependencies": { 16 | "budo": "^9.2.1" 17 | }, 18 | "scripts": { 19 | "test": "budo test/test.js --live" 20 | }, 21 | "keywords": [ 22 | "voice", 23 | "voice activity", 24 | "mic" 25 | ], 26 | "repository": { 27 | "type": "git", 28 | "url": "git:github.com/Jam3/voice-activity-detection.git" 29 | }, 30 | "homepage": "https://github.com/Jam3/voice-activity-detection", 31 | "bugs": { 32 | "url": "https://github.com/Jam3/voice-activity-detection/issues" 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /test/test.js: -------------------------------------------------------------------------------- 1 | var vad = require('../index.js'); 2 | var audioContext; 3 | 4 | var valueContainer = document.createElement('div'); 5 | document.body.appendChild(valueContainer); 6 | 7 | var stateContainer = document.createElement('div'); 8 | document.body.appendChild(stateContainer); 9 | 10 | requestMic(); 11 | 12 | function requestMic() { 13 | try { 14 | window.AudioContext = window.AudioContext || window.webkitAudioContext; 15 | audioContext = new AudioContext(); 16 | 17 | navigator.getUserMedia = navigator.getUserMedia || navigator.webkitGetUserMedia || navigator.mozGetUserMedia || navigator.msGetUserMedia; 18 | navigator.getUserMedia({audio: true}, startUserMedia, handleMicConnectError); 19 | } catch (e) { 20 | handleUserMediaError(); 21 | } 22 | } 23 | 24 | function handleUserMediaError() { 25 | console.warn('Mic input is not supported by the browser.'); 26 | } 27 | 28 | function handleMicConnectError() { 29 | console.warn('Could not connect microphone. Possible rejected by the user or is blocked by the browser.'); 30 | } 31 | 32 | function startUserMedia(stream) { 33 | var options = { 34 | onVoiceStart: function() { 35 | console.log('voice start'); 36 | stateContainer.innerHTML = 'Voice state: active'; 37 | }, 38 | onVoiceStop: function() { 39 | console.log('voice stop'); 40 | stateContainer.innerHTML = 'Voice state: inactive'; 41 | }, 42 | onUpdate: function(val) { 43 | //console.log('curr val:', val); 44 | valueContainer.innerHTML = 'Current voice activity value: ' + val + ''; 45 | } 46 | }; 47 | vad(audioContext, stream, options); 48 | } --------------------------------------------------------------------------------