├── .gitignore
├── requirements.txt
├── test
    ├── s1.wav
    ├── s2.wav
    ├── s3.wav
    ├── s4.wav
    ├── s5.wav
    ├── s6.wav
    ├── s7.wav
    └── s8.wav
├── train
    ├── s1.wav
    ├── s2.wav
    ├── s3.wav
    ├── s4.wav
    ├── s5.wav
    ├── s6.wav
    ├── s7.wav
    └── s8.wav
├── ASR_report.pdf
├── src
    ├── __init__.py
    ├── test.py
    ├── LPC.py
    ├── spectrogram.py
    ├── LBG.py
    ├── train.py
    └── mel_coefficients.py
├── README.md
└── LICENSE


/.gitignore:
--------------------------------------------------------------------------------
1 | *.pyc
2 | *.out
3 | *.geany
4 | /.env
5 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy
2 | scipy
3 | matplotlib
4 | 


--------------------------------------------------------------------------------
/test/s1.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/orchidas/Speaker-Recognition/HEAD/test/s1.wav


--------------------------------------------------------------------------------
/test/s2.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/orchidas/Speaker-Recognition/HEAD/test/s2.wav


--------------------------------------------------------------------------------
/test/s3.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/orchidas/Speaker-Recognition/HEAD/test/s3.wav


--------------------------------------------------------------------------------
/test/s4.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/orchidas/Speaker-Recognition/HEAD/test/s4.wav


--------------------------------------------------------------------------------
/test/s5.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/orchidas/Speaker-Recognition/HEAD/test/s5.wav


--------------------------------------------------------------------------------
/test/s6.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/orchidas/Speaker-Recognition/HEAD/test/s6.wav


--------------------------------------------------------------------------------
/test/s7.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/orchidas/Speaker-Recognition/HEAD/test/s7.wav


--------------------------------------------------------------------------------
/test/s8.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/orchidas/Speaker-Recognition/HEAD/test/s8.wav


--------------------------------------------------------------------------------
/train/s1.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/orchidas/Speaker-Recognition/HEAD/train/s1.wav


--------------------------------------------------------------------------------
/train/s2.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/orchidas/Speaker-Recognition/HEAD/train/s2.wav


--------------------------------------------------------------------------------
/train/s3.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/orchidas/Speaker-Recognition/HEAD/train/s3.wav


--------------------------------------------------------------------------------
/train/s4.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/orchidas/Speaker-Recognition/HEAD/train/s4.wav


--------------------------------------------------------------------------------
/train/s5.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/orchidas/Speaker-Recognition/HEAD/train/s5.wav


--------------------------------------------------------------------------------
/train/s6.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/orchidas/Speaker-Recognition/HEAD/train/s6.wav


--------------------------------------------------------------------------------
/train/s7.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/orchidas/Speaker-Recognition/HEAD/train/s7.wav


--------------------------------------------------------------------------------
/train/s8.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/orchidas/Speaker-Recognition/HEAD/train/s8.wav


--------------------------------------------------------------------------------
/ASR_report.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/orchidas/Speaker-Recognition/HEAD/ASR_report.pdf


--------------------------------------------------------------------------------
/src/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on Fri Feb 26 02:37:34 2016
4 | 
5 | @author: ORCHISAMA
6 | """
7 | 
8 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Speaker-Recognition
 2 | Automatic Speaker Recognition algorithms in Python
 3 | 
 4 | This repository contains Python programs that can be used for Automatic Speaker Recognition. ASR is done by extracting MFCCs and LPCs from each speaker and then forming a speaker-specific codebook
 5 | of the same by using Vector Quantization (I like to think of it as a fancy name for NN-clustering). 
 6 | After that, the system is trained and tested for 8 different speakers. 
 7 | 
 8 | 
 9 | To test the algorithm, run test.py from the terminal. Certain parameters are open to be changed, such as the order of LPC coefficients, the number of Mel filterbanks and the number of centroids in each codebook.
10 | Everything is included in the repository, including .wav files for testing and training, hence cloning it and running test.py should work. 
11 | 
12 | A PDF has been included that explains the theory and provides links to relevant websites and projects.
13 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) [2016] [Orchisama Das]
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/src/test.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Created on Fri Feb 26 20:21:03 2016
 4 | 
 5 | @author: ORCHISAMA
 6 | """
 7 | 
 8 | from __future__ import division
 9 | import numpy as np
10 | from scipy.io.wavfile import read
11 | from LBG import EUDistance
12 | from mel_coefficients import mfcc
13 | from LPC import lpc
14 | from train import training
15 | import os
16 | 
17 | 
18 | nSpeaker = 8
19 | nfiltbank = 12
20 | orderLPC = 15
21 | (codebooks_mfcc, codebooks_lpc) = training(nfiltbank, orderLPC)
22 | directory = os.path.dirname(os.getcwd()) + '/test';
23 | fname = str()
24 | nCorrect_MFCC = 0
25 | nCorrect_LPC = 0
26 | 
27 | 
28 | def minDistance(features, codebooks):
29 |     speaker = 0
30 |     distmin = np.inf
31 |     for k in range(np.shape(codebooks)[0]):
32 |         D = EUDistance(features, codebooks[k,:,:])
33 |         dist = np.sum(np.min(D, axis = 1))/(np.shape(D)[0]) 
34 |         if dist < distmin:
35 |             distmin = dist
36 |             speaker = k
37 |             
38 |     return speaker
39 |     
40 | 
41 | for i in range(nSpeaker):
42 |     fname = '/s' + str(i+1) + '.wav'
43 |     print('Now speaker ', str(i+1), 'features are being tested')
44 |     (fs,s) = read(directory + fname)
45 |     mel_coefs = mfcc(s,fs,nfiltbank)
46 |     lpc_coefs = lpc(s, fs, orderLPC)
47 |     sp_mfcc = minDistance(mel_coefs, codebooks_mfcc)
48 |     sp_lpc = minDistance(lpc_coefs, codebooks_lpc)
49 |     
50 |     print('Speaker ', (i+1), ' in test matches with speaker ', (sp_mfcc+1), ' in train for training with MFCC')
51 |     print('Speaker ', (i+1), ' in test matches with speaker ', (sp_lpc+1), ' in train for training with LPC')
52 |    
53 |     if i == sp_mfcc:
54 |         nCorrect_MFCC += 1
55 |     if i == sp_lpc:
56 |         nCorrect_LPC += 1
57 |     
58 | 
59 | percentageCorrect_MFCC = (nCorrect_MFCC/nSpeaker)*100
60 | print('Accuracy of result for training with MFCC is ', percentageCorrect_MFCC, '%')
61 | percentageCorrect_LPC = (nCorrect_LPC/nSpeaker)*100
62 | print('Accuracy of result for training with LPC is ', percentageCorrect_LPC, '%')
63 | 
64 | 
65 |     
66 | 


--------------------------------------------------------------------------------
/src/LPC.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Created on Sat Feb 27 22:01:37 2016
 4 | 
 5 | @author: ORCHISAMA
 6 | """
 7 | 
 8 | #calculate LPC coefficients from sound file
 9 | from __future__ import division
10 | import numpy as np
11 | import matplotlib.pyplot as plt
12 | 
13 | 
14 | def autocorr(x):
15 |     n = len(x)
16 |     variance = np.var(x)
17 |     x = x - np.mean(x)
18 |     r = np.correlate(x, x, mode = 'full')[-n:] #n numbers from last index (l-n to l)
19 |     result = r/(variance*(np.arange(n, 0, -1)))
20 |     return result
21 |     
22 |   
23 | def createSymmetricMatrix(acf,p):
24 |     R = np.empty((p,p))
25 |     for i in range(p):
26 |         for j in range(p):
27 |             R[i,j] = acf[np.abs(i-j)]
28 |     return R
29 |     
30 |     
31 | def lpc(s,fs,p):
32 |     
33 |     #divide into segments of 25 ms with overlap of 10ms
34 |     nSamples = np.int32(0.025*fs)
35 |     overlap = np.int32(0.01*fs)
36 |     nFrames = np.int32(np.ceil(len(s)/(nSamples-overlap)))
37 |     
38 |     #zero padding to make signal length long enough to have nFrames
39 |     padding = ((nSamples-overlap)*nFrames) - len(s)
40 |     if padding > 0:
41 |         signal = np.append(s, np.zeros(padding))
42 |     else:
43 |         signal = s
44 |     segment = np.empty((nSamples, nFrames))
45 |     start = 0
46 |     for i in range(nFrames):
47 |         segment[:,i] = signal[start:start+nSamples]
48 |         start = (nSamples-overlap)*i
49 |     
50 |     #calculate LPC with Yule-Walker    
51 |     lpc_coeffs = np.empty((p, nFrames))
52 |     for i in range(nFrames):
53 |         acf = autocorr(segment[:,i])
54 | #        plt.figure(1)
55 | #        plt.plot(acf)
56 | #        plt.xlabel('lags')
57 | #        plt.ylabel('Autocorrelation coefficients')
58 | #        plt.axis([0, np.size(acf), -1, 1])
59 | #        plt.title('Autocorrelation function')
60 | #        break
61 |         r = -acf[1:p+1].T
62 |         R = createSymmetricMatrix(acf,p)
63 |         lpc_coeffs[:,i] = np.dot(np.linalg.inv(R),r)
64 |         lpc_coeffs[:,i] = lpc_coeffs[:,i]/np.max(np.abs(lpc_coeffs[:,i]))
65 |              
66 |     return lpc_coeffs
67 | 


--------------------------------------------------------------------------------
/src/spectrogram.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Created on Tue Feb 23 23:53:09 2016
 4 | 
 5 | @author: ORCHISAMA
 6 | """
 7 | #calculate short time fourier transform and plot spectrogram
 8 | from __future__ import division
 9 | import matplotlib.pyplot as plt
10 | import numpy as np
11 | from scipy.fftpack import fft, fftshift
12 | from scipy.signal import hann
13 | 
14 | 
15 | def nearestPow2(inp):
16 |     power = np.ceil(np.log2(inp))
17 |     return 2**power
18 | 
19 | def stft(signal, fs, nfft, overlap):
20 | 
21 |     #plotting time domain signal
22 |     plt.figure(1)
23 |     t = np.arange(0,len(signal)/fs, 1/fs)
24 |     plt.plot(t,signal)
25 |     plt.axis(xmax = 1)
26 |     plt.xlabel('Time in seconds')
27 |     plt.ylabel('Amplitude')
28 |     plt.title('Speech signal')
29 |     
30 |     if not np.log2(nfft).is_integer():
31 |         nfft = nearestPow2(nfft)
32 |     slength = len(signal)
33 |     hop_size = np.int32(overlap * nfft)
34 |     nFrames = int(np.round(len(signal)/(nfft-hop_size)))
35 |     #zero padding to make signal length long enough to have nFrames
36 |     signal = np.append(signal, np.zeros(nfft))
37 |     STFT = np.empty((nfft, nFrames))
38 |     segment = np.zeros(nfft)
39 |     start = 0
40 |     
41 |     for n in range(nFrames):
42 |         segment = signal[start:start+nfft] * hann(nfft) 
43 |         padded_seg = np.append(segment,np.zeros(nfft))
44 |         spec = fftshift(fft(padded_seg))
45 |         spec = spec[len(spec)/2:]
46 |         spec = abs(spec)/max(abs(spec))
47 |         powerspec = 20*np.log10(spec)
48 |         STFT[:,n] = powerspec
49 |         start = start + nfft - hop_size 
50 |         
51 |     #plot spectrogram
52 |     plt.figure(2)
53 |     freq = (fs/(2*nfft)) * np.arange(0,nfft,1)
54 |     time = np.arange(0,nFrames)*(slength/(fs*nFrames))
55 |     plt.imshow(STFT, extent = [0,max(time),0,max(freq)],origin='lower', cmap='jet', interpolation='nearest', aspect='auto')
56 |     plt.ylabel('Frequency in Hz')
57 |     plt.xlabel('Time in seconds')
58 |     plt.axis([0,max(time),0,np.max(freq)])
59 |     plt.title('Spectrogram of speech')
60 |     plt.show()
61 |     return (STFT, time, freq)
62 |  
63 |     
64 | 
65 | 
66 |     
67 | 


--------------------------------------------------------------------------------
/src/LBG.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Created on Thu Feb 25 00:34:54 2016
 4 | 
 5 | @author: ORCHISAMA
 6 | """
 7 | 
 8 | #speaker specific Vector Quantization codebook using LBG algorithm
 9 | from __future__ import division
10 | import numpy as np
11 | 
12 | 
13 | def EUDistance(d,c):
14 |     
15 |     # np.shape(d)[0] = np.shape(c)[0]
16 |     n = np.shape(d)[1]
17 |     p = np.shape(c)[1]
18 |     distance = np.empty((n,p))
19 |     
20 |     if n<p:
21 |         for i in range(n):
22 |             copies = np.transpose(np.tile(d[:,i], (p,1)))
23 |             distance[i,:] = np.sum((copies - c)**2,0)
24 |     else:
25 |         for i in range(p):
26 |             copies = np.transpose(np.tile(c[:,i],(n,1)))
27 |             distance[:,i] = np.transpose(np.sum((d - copies)**2,0))
28 |             
29 |     distance = np.sqrt(distance)
30 |     return distance
31 |             
32 | 
33 | def lbg(features, M):
34 |     eps = 0.01
35 |     codebook = np.mean(features, 1)
36 |     distortion = 1
37 |     nCentroid = 1
38 |     while nCentroid < M:
39 |         
40 |         #double the size of codebook
41 |         new_codebook = np.empty((len(codebook), nCentroid*2))
42 |         if nCentroid == 1:
43 |             new_codebook[:,0] = codebook*(1+eps)
44 |             new_codebook[:,1] = codebook*(1-eps)
45 |         else:    
46 |             for i in range(nCentroid):
47 |                 new_codebook[:,2*i] = codebook[:,i] * (1+eps)
48 |                 new_codebook[:,2*i+1] = codebook[:,i] * (1-eps)
49 |         
50 |         codebook = new_codebook
51 |         nCentroid = np.shape(codebook)[1]
52 |         D = EUDistance(features, codebook)
53 |         
54 |         
55 |         while np.abs(distortion) > eps:
56 |        	    #nearest neighbour search
57 |             prev_distance = np.mean(D)
58 |             nearest_codebook = np.argmin(D,axis = 1)
59 |             #print 'nearest neighbour',nearest_codebook
60 |         
61 |             #cluster vectors and find new centroid
62 |             #print np.shape(np.mean(features[:, np.where(nearest_codebook == 0)],2))
63 |             for i in range(nCentroid):
64 |                 codebook[:,i] = np.mean(features[:,np.where(nearest_codebook == i)], 2).T #add along 3rd dimension
65 |           
66 |             #replace all NaN values with 0  
67 |             codebook = np.nan_to_num(codebook)    
68 |             #print 'this codebook', codebook
69 |             D = EUDistance(features, codebook)
70 |             distortion = (prev_distance - np.mean(D))/prev_distance
71 |             #print 'distortion' , distortion
72 |             
73 |     
74 |     #print 'final codebook', codebook, np.shape(codebook)
75 |     return codebook
76 |         
77 |             
78 | 


--------------------------------------------------------------------------------
/src/train.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Created on Fri Feb 26 19:57:32 2016
 4 | 
 5 | @author: ORCHISAMA
 6 | """
 7 | 
 8 | from __future__ import division
 9 | import numpy as np
10 | from scipy.io.wavfile import read
11 | from LBG import lbg
12 | from mel_coefficients import mfcc
13 | from LPC import lpc
14 | import matplotlib.pyplot as plt
15 | import os
16 | 
17 | 
18 | def training(nfiltbank, orderLPC):
19 |     nSpeaker = 8
20 |     nCentroid = 16
21 |     codebooks_mfcc = np.empty((nSpeaker,nfiltbank,nCentroid))
22 |     codebooks_lpc = np.empty((nSpeaker, orderLPC, nCentroid))
23 |     directory = os.path.dirname(os.getcwd()) + '/train';
24 |     fname = str()
25 | 
26 |     for i in range(nSpeaker):
27 |         fname = '/s' + str(i+1) + '.wav'
28 |         print('Now speaker ', str(i+1), 'features are being trained' )
29 |         (fs,s) = read(directory + fname)
30 |         mel_coeff = mfcc(s, fs, nfiltbank)
31 |         lpc_coeff = lpc(s, fs, orderLPC)
32 |         codebooks_mfcc[i,:,:] = lbg(mel_coeff, nCentroid)
33 |         codebooks_lpc[i,:,:] = lbg(lpc_coeff, nCentroid)
34 |         
35 |         plt.figure(i)
36 |         plt.title('Codebook for speaker ' + str(i+1) + ' with ' + str(nCentroid) +  ' centroids')
37 |         for j in range(nCentroid):
38 |             plt.subplot(211)
39 |             plt.stem(codebooks_mfcc[i,:,j])
40 |             plt.ylabel('MFCC')
41 |             plt.subplot(212)
42 |             markerline, stemlines, baseline = plt.stem(codebooks_lpc[i,:,j])
43 |             plt.setp(markerline,'markerfacecolor','r')
44 |             plt.setp(baseline,'color', 'k')
45 |             plt.ylabel('LPC')
46 |             plt.axis(ymin = -1, ymax = 1)
47 |             plt.xlabel('Number of features')
48 |     
49 |     plt.show()
50 |     print('Training complete')
51 |     
52 |     #plotting 5th and 6th dimension MFCC features on a 2D plane
53 |     #comment lines 54 to 71 if you don't want to see codebook
54 |     codebooks = np.empty((2, nfiltbank, nCentroid))
55 |     mel_coeff = np.empty((2, nfiltbank, 68))
56 |    
57 |     for i in range(2):
58 |         fname = '/s' + str(i+2) + '.wav'
59 |         (fs,s) = read(directory + fname)
60 |         mel_coeff[i,:,:] = mfcc(s, fs, nfiltbank)[:,0:68]
61 |         codebooks[i,:,:] = lbg(mel_coeff[i,:,:], nCentroid)
62 |         
63 |     
64 |     plt.figure(nSpeaker + 1)
65 |     s1 = plt.scatter(mel_coeff[0,6,:], mel_coeff[0,4,:],s = 100,  color = 'r', marker = 'o')
66 |     c1 = plt.scatter(codebooks[0,6,:], codebooks[0,4,:], s = 100, color = 'r', marker = '+')
67 |     s2 = plt.scatter(mel_coeff[1,6,:], mel_coeff[1,4,:],s = 100,  color = 'b', marker = 'o')
68 |     c2 = plt.scatter(codebooks[1,6,:], codebooks[1,4,:], s = 100, color = 'b', marker = '+')
69 |     plt.grid()
70 |     plt.legend((s1, s2, c1, c2), ('Sp1','Sp2','Sp1 centroids', 'Sp2 centroids'), scatterpoints = 1, loc = 'upper left')    
71 |     plt.show()
72 |    
73 |     
74 |     return (codebooks_mfcc, codebooks_lpc)
75 |     
76 |     
77 | 


--------------------------------------------------------------------------------
/src/mel_coefficients.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Created on Wed Feb 24 22:48:15 2016
 4 | 
 5 | @author: ORCHISAMA
 6 | """
 7 | 
 8 | from __future__ import division
 9 | from scipy.signal import hamming
10 | from scipy.fftpack import fft, fftshift, dct
11 | import numpy as np
12 | import matplotlib.pyplot as plt
13 | 
14 | def hertz_to_mel(freq):
15 |     return 1125*np.log(1 + freq/700)
16 |     
17 | def mel_to_hertz(m):
18 |     return 700*(np.exp(m/1125) - 1)
19 |     
20 | #calculate mel frequency filter bank
21 | def mel_filterbank(nfft, nfiltbank, fs):
22 |      
23 |     #set limits of mel scale from 300Hz to 8000Hz
24 |     lower_mel = hertz_to_mel(300)
25 |     upper_mel = hertz_to_mel(8000)
26 |     mel = np.linspace(lower_mel, upper_mel, nfiltbank+2)
27 |     hertz = [mel_to_hertz(m) for m in mel]
28 |     fbins = [int(hz * int(nfft/2+1)/fs) for hz in hertz]
29 |     fbank = np.empty((int(nfft/2+1),nfiltbank))
30 |     for i in range(1,nfiltbank+1):
31 |         for k in range(int(nfft/2 + 1)):
32 |             if k < fbins[i-1]:
33 |                 fbank[k, i-1] = 0
34 |             elif k >= fbins[i-1] and k < fbins[i]:
35 |                 fbank[k,i-1] = (k - fbins[i-1])/(fbins[i] - fbins[i-1])
36 |             elif k >= fbins[i] and k <= fbins[i+1]:
37 |                 fbank[k,i-1] = (fbins[i+1] - k)/(fbins[i+1] - fbins[i])
38 |             else:
39 |                 fbank[k,i-1] = 0
40 |      
41 | #    plotting mel frequency filter banks           
42 | #    plt.figure(1)
43 | #    xbins = fs*np.arange(0,nfft/2+1)/(nfft/2+1)
44 | #    for i in range(nfiltbank):
45 | #        plt.plot(xbins, fbank[:,i])
46 | #    plt.axis(xmax = 8000)
47 | #    plt.xlabel('Frequency in Hz')
48 | #    plt.ylabel('Amplitude')
49 | #    plt.title('Mel Filterbank')
50 | #    plt.show()
51 |     return fbank
52 |             
53 | def mfcc(s,fs, nfiltbank):
54 |   
55 |     #divide into segments of 25 ms with overlap of 10ms
56 |     nSamples = np.int32(0.025*fs)
57 |     overlap = np.int32(0.01*fs)
58 |     nFrames = np.int32(np.ceil(len(s)/(nSamples-overlap)))
59 |     #zero padding to make signal length long enough to have nFrames
60 |     padding = ((nSamples-overlap)*nFrames) - len(s)
61 |     if padding > 0:
62 |         signal = np.append(s, np.zeros(padding))
63 |     else:
64 |         signal = s
65 |     segment = np.empty((nSamples, nFrames))
66 |     start = 0
67 |     for i in range(nFrames):
68 |         segment[:,i] = signal[start:start+nSamples]
69 |         start = (nSamples-overlap)*i
70 |     
71 |     #compute periodogram
72 |     nfft = 512
73 |     periodogram = np.empty((nFrames, int(nfft/2 + 1)))
74 |     for i in range(nFrames):
75 |         x = segment[:,i] * hamming(nSamples)
76 |         spectrum = fftshift(fft(x,nfft))
77 |         periodogram[i,:] = abs(spectrum[int(nfft/2-1):])/nSamples
78 |         
79 |     #calculating mfccs    
80 |     fbank = mel_filterbank(nfft, nfiltbank, fs)
81 |     #nfiltbank MFCCs for each frame
82 |     mel_coeff = np.empty((nfiltbank,nFrames))
83 |     for i in range(nfiltbank):
84 |         for k in range(nFrames):
85 |             mel_coeff[i,k] = np.sum(periodogram[k,:]*fbank[:,i])
86 |             
87 |     mel_coeff = np.log10(mel_coeff)
88 |     mel_coeff = dct(mel_coeff)
89 |     #exclude 0th order coefficient (much larger than others)
90 |     mel_coeff[0,:]= np.zeros(nFrames)
91 |     return mel_coeff
92 |             
93 | 
94 | 


--------------------------------------------------------------------------------