├── .gitignore
├── LICENSE
├── README.md
├── index.js
├── package.json
└── test
    └── test.js


/.gitignore:
--------------------------------------------------------------------------------
1 | .idea/
2 | node_modules/
3 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2016 Jam3
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # voice-activity-detection
 2 | 
 3 | ## Syntax
 4 | ```vad(audioContext, stream [, options]);```
 5 | 
 6 | **Default options:**
 7 | ```javascript
 8 | {
 9 |   fftSize: 1024,
10 |   bufferLen: 1024,
11 |   smoothingTimeConstant: 0.2,
12 |   minCaptureFreq: 85,         // in Hz
13 |   maxCaptureFreq: 255,        // in Hz
14 |   noiseCaptureDuration: 1000, // in ms
15 |   minNoiseLevel: 0.3,         // from 0 to 1
16 |   maxNoiseLevel: 0.7,         // from 0 to 1
17 |   avgNoiseMultiplier: 1.2,
18 |   onVoiceStart: function() {},
19 |   onVoiceStop: function() {},
20 |   onUpdate: function(val) {}
21 | }
22 | ```
23 | 
24 | * ```minCaptureFreq/maxCaptureFreq``` - human voice frequency range
25 | * ```noiseCaptureDuration``` - time for measuring average env. noise before starting voice activity detection
26 | * ```minNoiseLevel/maxNoiseLevel``` - env. noise level normalization range (during ```noiseCaptureDuration```)
27 | * ```avgNoiseMultiplier``` - multiplier for the average env. noise level to set activity/inactivity state toggle
28 | 
29 | ## Usage
30 | See [example code](https://github.com/Jam3/voice-activity-detection/blob/master/test/test.js)
31 | 
32 | ## Test
33 | ```npm run test```
34 | 


--------------------------------------------------------------------------------
/index.js:
--------------------------------------------------------------------------------
  1 | 'use strict';
  2 | var analyserFrequency = require('analyser-frequency-average');
  3 | 
  4 | module.exports = function(audioContext, stream, opts) {
  5 | 
  6 |   opts = opts || {};
  7 | 
  8 |   var defaults = {
  9 |     fftSize: 1024,
 10 |     bufferLen: 1024,
 11 |     smoothingTimeConstant: 0.2,
 12 |     minCaptureFreq: 85,         // in Hz
 13 |     maxCaptureFreq: 255,        // in Hz
 14 |     noiseCaptureDuration: 1000, // in ms
 15 |     minNoiseLevel: 0.3,         // from 0 to 1
 16 |     maxNoiseLevel: 0.7,         // from 0 to 1
 17 |     avgNoiseMultiplier: 1.2,
 18 |     onVoiceStart: function() {
 19 |     },
 20 |     onVoiceStop: function() {
 21 |     },
 22 |     onUpdate: function(val) {
 23 |     }
 24 |   };
 25 | 
 26 |   var options = {};
 27 |   for (var key in defaults) {
 28 |     options[key] = opts.hasOwnProperty(key) ? opts[key] : defaults[key];
 29 |   }
 30 | 
 31 |   var baseLevel = 0;
 32 |   var voiceScale = 1;
 33 |   var activityCounter = 0;
 34 |   var activityCounterMin = 0;
 35 |   var activityCounterMax = 60;
 36 |   var activityCounterThresh = 5;
 37 | 
 38 |   var envFreqRange = [];
 39 |   var isNoiseCapturing = true;
 40 |   var prevVadState = undefined;
 41 |   var vadState = false;
 42 |   var captureTimeout = null;
 43 | 
 44 |   var source = audioContext.createMediaStreamSource(stream);
 45 |   var analyser = audioContext.createAnalyser();
 46 |   analyser.smoothingTimeConstant = options.smoothingTimeConstant;
 47 |   analyser.fftSize = options.fftSize;
 48 | 
 49 |   var scriptProcessorNode = audioContext.createScriptProcessor(options.bufferLen, 1, 1);
 50 |   connect();
 51 |   scriptProcessorNode.onaudioprocess = monitor;
 52 | 
 53 |   if (isNoiseCapturing) {
 54 |     //console.log('VAD: start noise capturing');
 55 |     captureTimeout = setTimeout(init, options.noiseCaptureDuration);
 56 |   }
 57 | 
 58 |   function init() {
 59 |     //console.log('VAD: stop noise capturing');
 60 |     isNoiseCapturing = false;
 61 | 
 62 |     envFreqRange = envFreqRange.filter(function(val) {
 63 |       return val;
 64 |     }).sort();
 65 |     var averageEnvFreq = envFreqRange.length ? envFreqRange.reduce(function (p, c) { return Math.min(p, c) }, 1) : (options.minNoiseLevel || 0.1);
 66 | 
 67 |     baseLevel = averageEnvFreq * options.avgNoiseMultiplier;
 68 |     if (options.minNoiseLevel && baseLevel < options.minNoiseLevel) baseLevel = options.minNoiseLevel;
 69 |     if (options.maxNoiseLevel && baseLevel > options.maxNoiseLevel) baseLevel = options.maxNoiseLevel;
 70 | 
 71 |     voiceScale = 1 - baseLevel;
 72 | 
 73 |     //console.log('VAD: base level:', baseLevel);
 74 |   }
 75 | 
 76 |   function connect() {
 77 |     source.connect(analyser);
 78 |     analyser.connect(scriptProcessorNode);
 79 |     scriptProcessorNode.connect(audioContext.destination);
 80 |   }
 81 | 
 82 |   function disconnect() {
 83 |     scriptProcessorNode.disconnect();
 84 |     analyser.disconnect();
 85 |     source.disconnect();
 86 |   }
 87 | 
 88 |   function destroy() {
 89 |     captureTimeout && clearTimeout(captureTimeout);
 90 |     disconnect();
 91 |     scriptProcessorNode.onaudioprocess = null;
 92 |   }
 93 | 
 94 |   function monitor() {
 95 |     var frequencies = new Uint8Array(analyser.frequencyBinCount);
 96 |     analyser.getByteFrequencyData(frequencies);
 97 | 
 98 |     var average = analyserFrequency(analyser, frequencies, options.minCaptureFreq, options.maxCaptureFreq);
 99 |     if (isNoiseCapturing) {
100 |       envFreqRange.push(average);
101 |       return;
102 |     }
103 | 
104 |     if (average >= baseLevel && activityCounter < activityCounterMax) {
105 |       activityCounter++;
106 |     } else if (average < baseLevel && activityCounter > activityCounterMin) {
107 |       activityCounter--;
108 |     }
109 |     vadState = activityCounter > activityCounterThresh;
110 | 
111 |     if (prevVadState !== vadState) {
112 |       vadState ? onVoiceStart() : onVoiceStop();
113 |       prevVadState = vadState;
114 |     }
115 | 
116 |     options.onUpdate(Math.max(0, average - baseLevel) / voiceScale);
117 |   }
118 | 
119 |   function onVoiceStart() {
120 |     options.onVoiceStart();
121 |   }
122 | 
123 |   function onVoiceStop() {
124 |     options.onVoiceStop();
125 |   }
126 | 
127 |   return {connect: connect, disconnect: disconnect, destroy: destroy};
128 | };


--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "voice-activity-detection",
 3 |   "version": "0.0.5",
 4 |   "description": "Mic input activity detection",
 5 |   "main": "index.js",
 6 |   "license": "MIT",
 7 |   "author": {
 8 |     "name": "Vadim Namniak",
 9 |     "email": "vadim@jam3.com",
10 |     "url": "https://github.com/Jam3"
11 |   },
12 |   "dependencies": {
13 |     "analyser-frequency-average": "^1.0.0"
14 |   },
15 |   "devDependencies": {
16 |     "budo": "^9.2.1"
17 |   },
18 |   "scripts": {
19 |     "test": "budo test/test.js --live"
20 |   },
21 |   "keywords": [
22 |     "voice",
23 |     "voice activity",
24 |     "mic"
25 |   ],
26 |   "repository": {
27 |     "type": "git",
28 |     "url": "git:github.com/Jam3/voice-activity-detection.git"
29 |   },
30 |   "homepage": "https://github.com/Jam3/voice-activity-detection",
31 |   "bugs": {
32 |     "url": "https://github.com/Jam3/voice-activity-detection/issues"
33 |   }
34 | }
35 | 


--------------------------------------------------------------------------------
/test/test.js:
--------------------------------------------------------------------------------
 1 | var vad = require('../index.js');
 2 | var audioContext;
 3 | 
 4 | var valueContainer = document.createElement('div');
 5 | document.body.appendChild(valueContainer);
 6 | 
 7 | var stateContainer = document.createElement('div');
 8 | document.body.appendChild(stateContainer);
 9 | 
10 | requestMic();
11 | 
12 | function requestMic() {
13 |   try {
14 |     window.AudioContext = window.AudioContext || window.webkitAudioContext;
15 |     audioContext = new AudioContext();
16 | 
17 |     navigator.getUserMedia = navigator.getUserMedia || navigator.webkitGetUserMedia || navigator.mozGetUserMedia || navigator.msGetUserMedia;
18 |     navigator.getUserMedia({audio: true}, startUserMedia, handleMicConnectError);
19 |   } catch (e) {
20 |     handleUserMediaError();
21 |   }
22 | }
23 | 
24 | function handleUserMediaError() {
25 |   console.warn('Mic input is not supported by the browser.');
26 | }
27 | 
28 | function handleMicConnectError() {
29 |   console.warn('Could not connect microphone. Possible rejected by the user or is blocked by the browser.');
30 | }
31 | 
32 | function startUserMedia(stream) {
33 |   var options = {
34 |     onVoiceStart: function() {
35 |       console.log('voice start');
36 |       stateContainer.innerHTML = 'Voice state: <strong>active</strong>';
37 |     },
38 |     onVoiceStop: function() {
39 |       console.log('voice stop');
40 |       stateContainer.innerHTML = 'Voice state: <strong>inactive</strong>';
41 |     },
42 |     onUpdate: function(val) {
43 |       //console.log('curr val:', val);
44 |       valueContainer.innerHTML = 'Current voice activity value: <strong>' + val + '</strong>';
45 |     }
46 |   };
47 |   vad(audioContext, stream, options);
48 | }


--------------------------------------------------------------------------------