├── .gitignore
├── LICENSE
├── README.md
├── index.js
├── package.json
└── test
└── test.js
/.gitignore:
--------------------------------------------------------------------------------
1 | .idea/
2 | node_modules/
3 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2016 Jam3
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # voice-activity-detection
2 |
3 | ## Syntax
4 | ```vad(audioContext, stream [, options]);```
5 |
6 | **Default options:**
7 | ```javascript
8 | {
9 | fftSize: 1024,
10 | bufferLen: 1024,
11 | smoothingTimeConstant: 0.2,
12 | minCaptureFreq: 85, // in Hz
13 | maxCaptureFreq: 255, // in Hz
14 | noiseCaptureDuration: 1000, // in ms
15 | minNoiseLevel: 0.3, // from 0 to 1
16 | maxNoiseLevel: 0.7, // from 0 to 1
17 | avgNoiseMultiplier: 1.2,
18 | onVoiceStart: function() {},
19 | onVoiceStop: function() {},
20 | onUpdate: function(val) {}
21 | }
22 | ```
23 |
24 | * ```minCaptureFreq/maxCaptureFreq``` - human voice frequency range
25 | * ```noiseCaptureDuration``` - time for measuring average env. noise before starting voice activity detection
26 | * ```minNoiseLevel/maxNoiseLevel``` - env. noise level normalization range (during ```noiseCaptureDuration```)
27 | * ```avgNoiseMultiplier``` - multiplier for the average env. noise level to set activity/inactivity state toggle
28 |
29 | ## Usage
30 | See [example code](https://github.com/Jam3/voice-activity-detection/blob/master/test/test.js)
31 |
32 | ## Test
33 | ```npm run test```
34 |
--------------------------------------------------------------------------------
/index.js:
--------------------------------------------------------------------------------
1 | 'use strict';
2 | var analyserFrequency = require('analyser-frequency-average');
3 |
4 | module.exports = function(audioContext, stream, opts) {
5 |
6 | opts = opts || {};
7 |
8 | var defaults = {
9 | fftSize: 1024,
10 | bufferLen: 1024,
11 | smoothingTimeConstant: 0.2,
12 | minCaptureFreq: 85, // in Hz
13 | maxCaptureFreq: 255, // in Hz
14 | noiseCaptureDuration: 1000, // in ms
15 | minNoiseLevel: 0.3, // from 0 to 1
16 | maxNoiseLevel: 0.7, // from 0 to 1
17 | avgNoiseMultiplier: 1.2,
18 | onVoiceStart: function() {
19 | },
20 | onVoiceStop: function() {
21 | },
22 | onUpdate: function(val) {
23 | }
24 | };
25 |
26 | var options = {};
27 | for (var key in defaults) {
28 | options[key] = opts.hasOwnProperty(key) ? opts[key] : defaults[key];
29 | }
30 |
31 | var baseLevel = 0;
32 | var voiceScale = 1;
33 | var activityCounter = 0;
34 | var activityCounterMin = 0;
35 | var activityCounterMax = 60;
36 | var activityCounterThresh = 5;
37 |
38 | var envFreqRange = [];
39 | var isNoiseCapturing = true;
40 | var prevVadState = undefined;
41 | var vadState = false;
42 | var captureTimeout = null;
43 |
44 | var source = audioContext.createMediaStreamSource(stream);
45 | var analyser = audioContext.createAnalyser();
46 | analyser.smoothingTimeConstant = options.smoothingTimeConstant;
47 | analyser.fftSize = options.fftSize;
48 |
49 | var scriptProcessorNode = audioContext.createScriptProcessor(options.bufferLen, 1, 1);
50 | connect();
51 | scriptProcessorNode.onaudioprocess = monitor;
52 |
53 | if (isNoiseCapturing) {
54 | //console.log('VAD: start noise capturing');
55 | captureTimeout = setTimeout(init, options.noiseCaptureDuration);
56 | }
57 |
58 | function init() {
59 | //console.log('VAD: stop noise capturing');
60 | isNoiseCapturing = false;
61 |
62 | envFreqRange = envFreqRange.filter(function(val) {
63 | return val;
64 | }).sort();
65 | var averageEnvFreq = envFreqRange.length ? envFreqRange.reduce(function (p, c) { return Math.min(p, c) }, 1) : (options.minNoiseLevel || 0.1);
66 |
67 | baseLevel = averageEnvFreq * options.avgNoiseMultiplier;
68 | if (options.minNoiseLevel && baseLevel < options.minNoiseLevel) baseLevel = options.minNoiseLevel;
69 | if (options.maxNoiseLevel && baseLevel > options.maxNoiseLevel) baseLevel = options.maxNoiseLevel;
70 |
71 | voiceScale = 1 - baseLevel;
72 |
73 | //console.log('VAD: base level:', baseLevel);
74 | }
75 |
76 | function connect() {
77 | source.connect(analyser);
78 | analyser.connect(scriptProcessorNode);
79 | scriptProcessorNode.connect(audioContext.destination);
80 | }
81 |
82 | function disconnect() {
83 | scriptProcessorNode.disconnect();
84 | analyser.disconnect();
85 | source.disconnect();
86 | }
87 |
88 | function destroy() {
89 | captureTimeout && clearTimeout(captureTimeout);
90 | disconnect();
91 | scriptProcessorNode.onaudioprocess = null;
92 | }
93 |
94 | function monitor() {
95 | var frequencies = new Uint8Array(analyser.frequencyBinCount);
96 | analyser.getByteFrequencyData(frequencies);
97 |
98 | var average = analyserFrequency(analyser, frequencies, options.minCaptureFreq, options.maxCaptureFreq);
99 | if (isNoiseCapturing) {
100 | envFreqRange.push(average);
101 | return;
102 | }
103 |
104 | if (average >= baseLevel && activityCounter < activityCounterMax) {
105 | activityCounter++;
106 | } else if (average < baseLevel && activityCounter > activityCounterMin) {
107 | activityCounter--;
108 | }
109 | vadState = activityCounter > activityCounterThresh;
110 |
111 | if (prevVadState !== vadState) {
112 | vadState ? onVoiceStart() : onVoiceStop();
113 | prevVadState = vadState;
114 | }
115 |
116 | options.onUpdate(Math.max(0, average - baseLevel) / voiceScale);
117 | }
118 |
119 | function onVoiceStart() {
120 | options.onVoiceStart();
121 | }
122 |
123 | function onVoiceStop() {
124 | options.onVoiceStop();
125 | }
126 |
127 | return {connect: connect, disconnect: disconnect, destroy: destroy};
128 | };
--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "voice-activity-detection",
3 | "version": "0.0.5",
4 | "description": "Mic input activity detection",
5 | "main": "index.js",
6 | "license": "MIT",
7 | "author": {
8 | "name": "Vadim Namniak",
9 | "email": "vadim@jam3.com",
10 | "url": "https://github.com/Jam3"
11 | },
12 | "dependencies": {
13 | "analyser-frequency-average": "^1.0.0"
14 | },
15 | "devDependencies": {
16 | "budo": "^9.2.1"
17 | },
18 | "scripts": {
19 | "test": "budo test/test.js --live"
20 | },
21 | "keywords": [
22 | "voice",
23 | "voice activity",
24 | "mic"
25 | ],
26 | "repository": {
27 | "type": "git",
28 | "url": "git:github.com/Jam3/voice-activity-detection.git"
29 | },
30 | "homepage": "https://github.com/Jam3/voice-activity-detection",
31 | "bugs": {
32 | "url": "https://github.com/Jam3/voice-activity-detection/issues"
33 | }
34 | }
35 |
--------------------------------------------------------------------------------
/test/test.js:
--------------------------------------------------------------------------------
1 | var vad = require('../index.js');
2 | var audioContext;
3 |
4 | var valueContainer = document.createElement('div');
5 | document.body.appendChild(valueContainer);
6 |
7 | var stateContainer = document.createElement('div');
8 | document.body.appendChild(stateContainer);
9 |
10 | requestMic();
11 |
12 | function requestMic() {
13 | try {
14 | window.AudioContext = window.AudioContext || window.webkitAudioContext;
15 | audioContext = new AudioContext();
16 |
17 | navigator.getUserMedia = navigator.getUserMedia || navigator.webkitGetUserMedia || navigator.mozGetUserMedia || navigator.msGetUserMedia;
18 | navigator.getUserMedia({audio: true}, startUserMedia, handleMicConnectError);
19 | } catch (e) {
20 | handleUserMediaError();
21 | }
22 | }
23 |
24 | function handleUserMediaError() {
25 | console.warn('Mic input is not supported by the browser.');
26 | }
27 |
28 | function handleMicConnectError() {
29 | console.warn('Could not connect microphone. Possible rejected by the user or is blocked by the browser.');
30 | }
31 |
32 | function startUserMedia(stream) {
33 | var options = {
34 | onVoiceStart: function() {
35 | console.log('voice start');
36 | stateContainer.innerHTML = 'Voice state: active';
37 | },
38 | onVoiceStop: function() {
39 | console.log('voice stop');
40 | stateContainer.innerHTML = 'Voice state: inactive';
41 | },
42 | onUpdate: function(val) {
43 | //console.log('curr val:', val);
44 | valueContainer.innerHTML = 'Current voice activity value: ' + val + '';
45 | }
46 | };
47 | vad(audioContext, stream, options);
48 | }
--------------------------------------------------------------------------------