├── CMAKE_HELPERS
    ├── FindEigen2.cmake
    └── FindFFTW.cmake
├── CMakeLists.txt
├── LICENSE
├── README.md
├── VAD.h
├── sound.txt
└── voice_detection.cpp


/CMAKE_HELPERS/FindEigen2.cmake:
--------------------------------------------------------------------------------
 1 | # - Try to find Eigen2 lib
 2 | #
 3 | # This module supports requiring a minimum version, e.g. you can do
 4 | #   find_package(Eigen2 2.0.3)
 5 | # to require version 2.0.3 to newer of Eigen2.
 6 | #
 7 | # Once done this will define
 8 | #
 9 | #  EIGEN2_FOUND - system has eigen lib with correct version
10 | #  EIGEN2_INCLUDE_DIR - the eigen include directory
11 | #  EIGEN2_VERSION - eigen version
12 | 
13 | # Copyright (c) 2006, 2007 Montel Laurent, <montel@kde.org>
14 | # Copyright (c) 2008, 2009 Gael Guennebaud, <g.gael@free.fr>
15 | # Redistribution and use is allowed according to the terms of the BSD license.
16 | 
17 | if(NOT Eigen2_FIND_VERSION)
18 |   if(NOT Eigen2_FIND_VERSION_MAJOR)
19 |     set(Eigen2_FIND_VERSION_MAJOR 2)
20 |   endif(NOT Eigen2_FIND_VERSION_MAJOR)
21 |   if(NOT Eigen2_FIND_VERSION_MINOR)
22 |     set(Eigen2_FIND_VERSION_MINOR 0)
23 |   endif(NOT Eigen2_FIND_VERSION_MINOR)
24 |   if(NOT Eigen2_FIND_VERSION_PATCH)
25 |     set(Eigen2_FIND_VERSION_PATCH 0)
26 |   endif(NOT Eigen2_FIND_VERSION_PATCH)
27 | 
28 |   set(Eigen2_FIND_VERSION "${Eigen2_FIND_VERSION_MAJOR}.${Eigen2_FIND_VERSION_MINOR}.${Eigen2_FIND_VERSION_PATCH}")
29 | endif(NOT Eigen2_FIND_VERSION)
30 | 
31 | macro(_eigen2_check_version)
32 |   file(READ "${EIGEN2_INCLUDE_DIR}/Eigen/src/Core/util/Macros.h" _eigen2_version_header)
33 | 
34 |   string(REGEX MATCH "define[ \t]+EIGEN_WORLD_VERSION[ \t]+([0-9]+)" _eigen2_world_version_match "${_eigen2_version_header}")
35 |   set(EIGEN2_WORLD_VERSION "${CMAKE_MATCH_1}")
36 |   string(REGEX MATCH "define[ \t]+EIGEN_MAJOR_VERSION[ \t]+([0-9]+)" _eigen2_major_version_match "${_eigen2_version_header}")
37 |   set(EIGEN2_MAJOR_VERSION "${CMAKE_MATCH_1}")
38 |   string(REGEX MATCH "define[ \t]+EIGEN_MINOR_VERSION[ \t]+([0-9]+)" _eigen2_minor_version_match "${_eigen2_version_header}")
39 |   set(EIGEN2_MINOR_VERSION "${CMAKE_MATCH_1}")
40 | 
41 |   set(EIGEN2_VERSION ${EIGEN2_WORLD_VERSION}.${EIGEN2_MAJOR_VERSION}.${EIGEN2_MINOR_VERSION})
42 |   if((${EIGEN2_WORLD_VERSION} NOTEQUAL 2) OR (${EIGEN2_MAJOR_VERSION} GREATER 10) OR (${EIGEN2_VERSION} VERSION_LESS ${Eigen2_FIND_VERSION}))
43 |     set(EIGEN2_VERSION_OK FALSE)
44 |   else()
45 |     set(EIGEN2_VERSION_OK TRUE)
46 |   endif()
47 | 
48 |   if(NOT EIGEN2_VERSION_OK)
49 | 
50 |     message(STATUS "Eigen2 version ${EIGEN2_VERSION} found in ${EIGEN2_INCLUDE_DIR}, "
51 |                    "but at least version ${Eigen2_FIND_VERSION} is required")
52 |   endif(NOT EIGEN2_VERSION_OK)
53 | endmacro(_eigen2_check_version)
54 | 
55 | if (EIGEN2_INCLUDE_DIR)
56 | 
57 |   # in cache already
58 |   _eigen2_check_version()
59 |   set(EIGEN2_FOUND ${EIGEN2_VERSION_OK})
60 | 
61 | else (EIGEN2_INCLUDE_DIR)
62 | 
63 | find_path(EIGEN2_INCLUDE_DIR NAMES Eigen/Core
64 |      PATHS
65 |      ${INCLUDE_INSTALL_DIR}
66 |      ${KDE4_INCLUDE_DIR}
67 |      PATH_SUFFIXES eigen2
68 |    )
69 | 
70 | if(EIGEN2_INCLUDE_DIR)
71 |   _eigen2_check_version()
72 | endif(EIGEN2_INCLUDE_DIR)
73 | 
74 | include(FindPackageHandleStandardArgs)
75 | find_package_handle_standard_args(Eigen2 DEFAULT_MSG EIGEN2_INCLUDE_DIR EIGEN2_VERSION_OK)
76 | 
77 | mark_as_advanced(EIGEN2_INCLUDE_DIR)
78 | 
79 | endif(EIGEN2_INCLUDE_DIR)
80 | 
81 | 


--------------------------------------------------------------------------------
/CMAKE_HELPERS/FindFFTW.cmake:
--------------------------------------------------------------------------------
 1 | # - Find FFTW
 2 | # Find the native FFTW includes and library
 3 | #
 4 | #  FFTW_INCLUDES    - where to find fftw3.h
 5 | #  FFTW_LIBRARIES   - List of libraries when using FFTW.
 6 | #  FFTW_FOUND       - True if FFTW found.
 7 | 
 8 | if (FFTW_INCLUDES)
 9 |   # Already in cache, be silent
10 |   set (FFTW_FIND_QUIETLY TRUE)
11 | endif (FFTW_INCLUDES)
12 | 
13 | find_path (FFTW_INCLUDES fftw3.h)
14 | 
15 | find_library (FFTW_LIBRARIES NAMES fftw3)
16 | 
17 | # handle the QUIETLY and REQUIRED arguments and set FFTW_FOUND to TRUE if
18 | # all listed variables are TRUE
19 | include (FindPackageHandleStandardArgs)
20 | find_package_handle_standard_args (FFTW DEFAULT_MSG FFTW_LIBRARIES FFTW_INCLUDES)
21 | 
22 | mark_as_advanced (FFTW_LIBRARIES FFTW_INCLUDES)
23 | 


--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | project(vad)
 2 | cmake_minimum_required(VERSION 2.8)
 3 | 
 4 | set(EXECUTABLE_OUTPUT_PATH ${PROJECT_SOURCE_DIR}/bin)
 5 | 
 6 | set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -Wall")
 7 | # set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -Wall")
 8 | 
 9 | list(APPEND CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/CMAKE_HELPERS/")
10 | 
11 | set(HEADER_FILES ${PROJECT_SOURCE_DIR}/vad.h)
12 | 
13 | aux_source_directory(. SRC_LIST)
14 | #include_directories(${PROJECT_SOURCE_DIR})
15 | 
16 | find_package(Eigen3 REQUIRED)
17 | include_directories(EIGEN3_INCLUDE_DIR)
18 | find_package(FFTW REQUIRED)
19 | include_directories(FFTW_INCLUDE_DIR)
20 | 
21 | add_executable(vad voice_detection.cpp )
22 | target_link_libraries(vad -lm -lfftw3 -lstdc++ )
23 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2019 Minh Nguyen
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | This repo is for voice activity detection algorithm (VAD). This code is based on this paper:
 2 | 
 3 | Ramırez, Javier, José C. Segura, Carmen Benıtez, Angel De La Torre, and Antonio Rubio. 
 4 |    "Efficient voice activity detection algorithms using long-term speech information." 
 5 |                        
 6 | And the Matlab code version (Thanks for the isrish making such a beautiful program):
 7 | 
 8 | https://github.com/isrish/VAD-LTSD
 9 | 
10 | 
11 | This cpp file take input audio data from txt file and write results to 'example*.txt' file. At the end, I use gnuplot to plot the result and it works exactly the same as the Matlab Code from isrish.
12 | 
13 | # Compile and run
14 | You need <a href="http://www.fftw.org"> FFTW </a> and <a href="http://http://eigen.tuxfamily.org/"> Eigen3 </a> libraries.
15 | ```
16 | $ mkdir build & cd build
17 | $ cmake .. & make
18 | $ ../bin/./vad
19 | ```
20 | ## Visualizing the result
21 | ```
22 | $ gnuplot
23 | $ > set multiplot
24 | $ > plot "sound.txt" with 1:2 lines
25 | $ > filename(n) = sprintf("example_%d.txt", n)
26 | $ > plot for [i=1:10] filename(i) using 1:2 with lines
27 | ```
28 | 


--------------------------------------------------------------------------------
/VAD.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * VAD.h
  3 |  *
  4 |  *  Created on: May 27, 2016
  5 |  *      Author: dmngu9
  6 |  */
  7 | 
  8 | #ifndef VAD_H_
  9 | #define VAD_H_
 10 | 
 11 | #include <iostream>
 12 | #include <string>
 13 | #include <algorithm>
 14 | #include <vector>
 15 | #include <cmath>
 16 | #include <math.h>
 17 | #include <Eigen/Dense>
 18 | #include <fftw3.h>
 19 | 
 20 | using namespace std;
 21 | using namespace Eigen;
 22 | 
 23 | const double PI = 3.14;
 24 | 
 25 | class VAD{
 26 | 
 27 | private:
 28 | 	int winSize;
 29 | 	int signalSize;
 30 | 	int NFFT2;
 31 | 	int order;
 32 | 	double threshold;
 33 | 	MatrixXd enFrame;
 34 | 	RowVectorXd hamming;
 35 | 	VectorXd averageNoise;
 36 | 	MatrixXd amplitude;
 37 | 
 38 | public:
 39 | 
 40 | 	VAD(int winSize, int signalSize, int order,double threshold){
 41 | 		this->winSize = winSize;
 42 | 		this->signalSize = signalSize;
 43 | 		this->order = order;
 44 | 		this->threshold = threshold;
 45 | 		this->enFrame = MatrixXd::Zero(this->winSize, this->signalSize/(this->winSize*0.5));
 46 | 		this->hamming = createHammingWindow();
 47 | 		this->NFFT2 = this->winSize/2;
 48 | 	}
 49 | 
 50 | 	~VAD(){}
 51 | 
 52 | 	RowVectorXd createHammingWindow(){
 53 | 		double alpha = 0.54;
 54 | 		double beta = 0.46;
 55 | 		RowVectorXd hamming = RowVectorXd::Zero(winSize);
 56 | 		for(int i = 0; i < winSize; i++){
 57 | 			hamming(i) = alpha - beta*cos((2*PI*i)/(winSize-1));
 58 | 		}
 59 | 		return hamming;
 60 | 	}
 61 | 
 62 | 	void buffer(double* signal){
 63 | 		MatrixXd upper = MatrixXd::Zero(this->winSize/2,this->signalSize/(this->winSize*0.5));
 64 | 		MatrixXd below = MatrixXd::Zero(this->winSize/2,this->signalSize/(this->winSize*0.5));
 65 | 
 66 | 		int j = 0;
 67 | 		int k = 0;
 68 | 		for(int i =0; i < this->signalSize; i++){
 69 | 			below(j,k) = signal[i];
 70 | 			j++;
 71 | 			if(j == this->winSize/2){
 72 | 				j = 0;
 73 | 				k++;
 74 | 				if(k < this->signalSize/(this->winSize*0.5))
 75 | 					upper.col(k) = below.col(k-1);
 76 | 			}
 77 | 		}
 78 | 
 79 | 		this->enFrame.topRows(this->winSize/2)= upper;
 80 | 		this->enFrame.bottomRows(this->winSize/2) = below;
 81 | 	}
 82 | 
 83 | 	VectorXd fft_calc(double* fft_input){
 84 | 		VectorXd result = VectorXd::Zero(NFFT2);
 85 | 		fftw_complex* fft_result;
 86 | 		fftw_plan p;
 87 | 
 88 | 		fft_result = (fftw_complex*) fftw_malloc(sizeof(fftw_complex) * this->winSize);
 89 | 		p = fftw_plan_dft_r2c_1d(this->winSize, fft_input, fft_result,FFTW_ESTIMATE);
 90 | 		fftw_execute(p);
 91 | 		fftw_destroy_plan(p);
 92 | 
 93 | 		for(int i = 0; i < this->NFFT2; i++){
 94 | 			result(i) = sqrt(pow(fft_result[i][0],2)+ pow(fft_result[i][1],2));
 95 | 		}
 96 | 		fftw_free(fft_result);
 97 | 		return result;
 98 | 	}
 99 | 
100 | 	VectorXd computeNoiseAverageSpectrum(){
101 | 		VectorXd averageNoiseSpectrum;
102 | 		int wnum = this->enFrame.cols();
103 | 		VectorXd avgAmp = VectorXd::Zero(this->NFFT2);
104 | 		for(int i = 0; i < wnum; i++){
105 | 			VectorXd s = this->enFrame.col(i);//got 6 in each col
106 | 			double fft_input[this->winSize];//winsize 6
107 | 			for(int j = 0; j < this->winSize; j++){
108 | 				fft_input[j] = this->hamming(j) * s(j);//size of 6
109 | 			}
110 | 
111 | 			VectorXd temp = fft_calc(fft_input);
112 | 			avgAmp += temp;
113 | 		}
114 | 		averageNoiseSpectrum = avgAmp/wnum;
115 | 		return averageNoiseSpectrum;
116 | 	}
117 | 
118 | 	//signal is enFrame
119 | 	VectorXd getAmplitude(int index){
120 | 		VectorXd amp;
121 | 		if(amplitude.rows() > index){
122 | 			amp = amplitude.row(index);
123 | 		}
124 | 		else{
125 | 			VectorXd s = this->enFrame.col(index);
126 | 			double fft_input[this->winSize];
127 | 			for(int j = 0; j < this->winSize; j++){
128 | 				fft_input[j] = this->hamming(j) * s(j);//size of 6
129 | 			}
130 | 			amp = fft_calc(fft_input);
131 | 			amplitude.conservativeResize(amplitude.rows()+1, this->NFFT2);
132 | 			amplitude.row(index) = amp;
133 | 		}
134 | 		return amp;
135 | 	}
136 | 
137 | 	VectorXd findMax(VectorXd& a, VectorXd& b){
138 | 		VectorXd result = VectorXd::Zero(this->NFFT2);
139 | 		for(int i = 0; i < this->NFFT2; i++){
140 | 			result(i) = (a(i) > b(i)) ? a(i) : b(i);
141 | 		}
142 | 		return result;
143 | 	}
144 | 
145 | 	VectorXd ltse(int index){
146 | 		VectorXd maxmag = VectorXd::Zero(this->NFFT2);
147 | 		VectorXd maxamp;
148 | 		int i = index - order;
149 | 		while(i != index+order){
150 | 			VectorXd amp = getAmplitude(i);
151 | 			maxamp = findMax(amp,maxmag);
152 | 			i++;
153 | 		}
154 | 		return maxamp;
155 | 	}
156 | 
157 | 	double ltsd(int index){
158 | 		if(index < (this->order) || (index+order >= this->enFrame.cols())){
159 | 			return 0.0;
160 | 		}
161 | 	
162 | 		VectorXd ltseOutput = ltse(index);
163 | 		ltseOutput = ltseOutput.array().square();
164 | 		VectorXd sp = ltseOutput.array()/this->averageNoise.array();
165 | 		double sum = 0;
166 | 		for(int i = 0; i < sp.size(); i++){
167 | 			sum += sp(i)/this->NFFT2;
168 | 		}		
169 | 		
170 | 		double result = 10 * log10(sum);
171 | 
172 | 		if(result < this->threshold){
173 | 			this->averageNoise = 0.54 * this->averageNoise + (1-0.54)*sum*VectorXd::Ones(averageNoise.size());
174 | 		}
175 | 		return result;
176 | 	}
177 | 
178 | 	vector<double> compute(double* signal){
179 | 		buffer(signal);
180 | 		int wnum = this->enFrame.cols();
181 | 		vector<double> ltsds;
182 | 		this->averageNoise = computeNoiseAverageSpectrum().array().square();
183 | 		for(int i = 0; i < wnum; i++){
184 | 			ltsds.push_back(ltsd(i));
185 | 		}
186 | 		return ltsds;
187 | 	}
188 | 
189 | 	MatrixXd getNormalizedEnFrame(){
190 | 		MatrixXd upper = MatrixXd::Zero(this->winSize/2,this->signalSize/(this->winSize*0.5));
191 | 		MatrixXd below = MatrixXd::Zero(this->winSize/2,this->signalSize/(this->winSize*0.5));
192 | 		MatrixXd result = MatrixXd::Zero(this->winSize,this->signalSize/(this->winSize*0.5));
193 | 
194 | 		int j = 0;
195 | 		int k = 0;
196 | 		for(int i = 1; i < this->signalSize+1; i++){
197 | 			below(j,k) = i;
198 | 			j++;
199 | 			if(j == this->winSize/2){
200 | 				j = 0;
201 | 				k++;
202 | 				if(k < this->signalSize/(this->winSize*0.5))
203 | 					upper.col(k) = below.col(k-1);
204 | 			}
205 | 		}
206 | 
207 | 		result.topRows(this->winSize/2)= upper;
208 | 		result.bottomRows(this->winSize/2) = below;
209 | 		return result;
210 | 	}
211 | };
212 | 
213 | #endif /* VAD_H_ */
214 | 


--------------------------------------------------------------------------------
/voice_detection.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * main.cpp
  3 |  *
  4 |  *  Created on: May 26, 2016
  5 |  *      Author: dmngu9
  6 |  */
  7 | #include "VAD.h"
  8 | #include <fstream>
  9 | #include <sstream>
 10 | 
 11 | using namespace std;
 12 | 
 13 | int main(int argc, char** argv){
 14 | 	
 15 | 	const int fs = 11025;
 16 | 	const int WinSize = 256;
 17 | 	const int order = 5;
 18 | 	const double threshold = -6;
 19 | 	const int uSize = 4;
 20 | 
 21 | 	ifstream soundFile(argv[1]);
 22 | 	vector<double> sound;
 23 | 
 24 | 	if(soundFile.is_open()){
 25 | 		double value;
 26 | 		while(soundFile >> value){
 27 | 			sound.push_back(value);
 28 | 			if(sound.size() == 50176)
 29 | 				break;
 30 | 		}
 31 | 		soundFile.close();
 32 | 	}
 33 | 
 34 | 	double* signal = &sound[0];
 35 | 
 36 | 	VAD vad(WinSize,sound.size(),order,threshold);
 37 | 	vector<double> outcome = vad.compute(signal);
 38 | 	MatrixXd enFrame = vad.getNormalizedEnFrame();
 39 | 
 40 | 	double maxLevel = 0;
 41 | 	for(int i = 0; i < sound.size(); i++){
 42 | 		if(maxLevel < abs(sound[i]))
 43 | 			maxLevel = abs(sound[i]);
 44 | 	}
 45 | 
 46 | 	maxLevel += 0.01*maxLevel;
 47 | 	vector<int> idx(outcome.size(),0);
 48 | 	for(int i = 0; i < outcome.size(); i++){
 49 | 		idx[i] = (outcome[i] > 2.5) ? 1 : 0;
 50 | 	}
 51 | 
 52 | 	VectorXd d = VectorXd::Zero(idx.size()-1);
 53 | 	VectorXd vadStart, vadEnd;
 54 | 
 55 | 	for(int i = 0; i < outcome.size()-1; i++){
 56 | 		d(i) = idx[i+1] - idx[i];
 57 | 		if(d(i) == 1){
 58 | 			vadStart.conservativeResize(vadStart.size()+1);
 59 | 			vadStart(vadStart.size()-1) = i;
 60 | 		}
 61 | 		else if (d(i) == -1){ 
 62 | 			vadEnd.conservativeResize(vadEnd.size()+1);
 63 | 			vadEnd(vadEnd.size()-1) = i;
 64 | 		}
 65 | 	}
 66 | 
 67 | 	double q = (double) WinSize/fs;
 68 | 	VectorXd temp = vadEnd - vadStart;
 69 | 	VectorXd len = temp*q;
 70 | 	vector<int> VAD_begin, VAD_end;
 71 | 	for(int i = 0; i < len.size(); i++){
 72 | 		if(len(i) >= (uSize*WinSize/fs)){
 73 | 			VAD_begin.push_back(vadStart(i));
 74 | 			VAD_end.push_back(vadEnd(i));
 75 | 		}
 76 | 	}
 77 | 
 78 | 	//plot sound wave here 
 79 | 	cout << enFrame.row(0) << endl;
 80 | 	for(int i = 0; i < VAD_begin.size(); i++){
 81 | 		double x_start = enFrame(0,VAD_begin[i]+1) + 0.5*order*WinSize;
 82 | 		double x_end = enFrame(enFrame.rows()-1,VAD_end[i]+1) + 0.5*order*WinSize;
 83 | 		// VectorXd x, y;
 84 | 		// x << x_start, x_end, x_end, x_start, x_start;
 85 | 		// y << maxLevel, maxLevel, -maxLevel, -maxLevel, maxLevel;
 86 | 
 87 | 		stringstream ss;
 88 |     		ss << "example_" << i << ".txt";
 89 |     		ofstream myfile;
 90 |     		cout << ss.str();
 91 | 		myfile.open (ss.str().c_str());
 92 | 		myfile << x_start << "\t\t\t" << maxLevel << "\n";
 93 | 		myfile << x_end << "\t\t\t" << maxLevel << "\n";
 94 | 		myfile << x_end << "\t\t\t" << -maxLevel << "\n";
 95 | 		myfile << x_start << "\t\t\t" << -maxLevel << "\n";
 96 | 		myfile << x_start << "\t\t\t" << maxLevel << "\n";
 97 | 		myfile.close();
 98 | 	}
 99 | 	return 0;
100 | }
101 | 
102 | 
103 | 
104 | 


--------------------------------------------------------------------------------