├── .gitmodules
├── ActivityDetection.py
├── BOB.py
├── LPC.py
├── Main.py
├── README.md
├── RecordAudio.py
├── UI.py
├── __init__.py
├── background earphone.wav
├── gmmset.py
├── ltsd.py
├── skgmm.py
└── utils.py


/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "gmm"]
2 | 	path = gmm
3 | 	url = https://github.com/zxytim/fast-gmm
4 | 


--------------------------------------------------------------------------------
/ActivityDetection.py:
--------------------------------------------------------------------------------
 1 | from ltsd import LTSD_VAD
 2 | import numpy as np
 3 | 
 4 | class ActivityDetection:
 5 | 
 6 |     def __init__(self):
 7 |         self.initted = False
 8 |         #self.nr = NoiseReduction()
 9 |         self.ltsd = LTSD_VAD()
10 | 
11 |     def init_noise(self, fs, signal):
12 |         self.initted = True
13 |         #self.nr.init_noise(fs, signal)
14 |         self.ltsd.init_params_by_noise(fs, signal)
15 |         #nred = self.nr.filter(fs, signal)
16 |         #self.ltsd.init_params_by_noise(fs, nred)
17 | 
18 |     def filter(self, fs, signal):
19 |         if not self.initted:
20 |             raise "NoiseFilter Not Initialized"
21 | #        nred = self.nr.filter(fs, signal)
22 | #        removed = remove_silence(fs, nred)
23 | #        self.ltsd.plot_ltsd(fs, nred)
24 |         orig_len = len(signal)
25 |         filtered, intervals = self.ltsd.filter(signal)
26 |         #print 'signal lengths', len(filtered), orig_len
27 |         if len(filtered) > orig_len / 3:
28 |             return filtered
29 |         return np.array([])
30 |     
31 |     def remove_silence(self,fs, signal, frame_duration = 0.02, frame_shift = 0.01, perc = 0.15):
32 |         orig_dtype = type(signal[0])
33 |         siglen = len(signal)
34 |         retsig = np.zeros(siglen, dtype = np.int64)
35 |         frame_length = int(frame_duration * fs)
36 |         frame_shift_length = int(frame_shift * fs)
37 |         new_siglen = 0
38 |         i = 0
39 |         average_energy = np.sum(signal ** 2) / float(siglen)
40 |         
41 |         #print "Avg Energy of signal: ", average_energy
42 |         while i < siglen:
43 |             subsig = signal[i:i + frame_length]
44 |             ave_energy = np.sum(subsig ** 2) / float(len(subsig))
45 |             if ave_energy < average_energy * perc:
46 |                 i += frame_length
47 |             else:
48 |                 sigaddlen = min(frame_shift_length, len(subsig))
49 |                 retsig[new_siglen:new_siglen + sigaddlen] = subsig[:sigaddlen]
50 |                 new_siglen += sigaddlen
51 |                 i += frame_shift_length
52 |         retsig = retsig[:new_siglen]
53 |         return retsig.astype(orig_dtype)
54 | 


--------------------------------------------------------------------------------
/BOB.py:
--------------------------------------------------------------------------------
 1 | from utils import cached_func, diff_feature
 2 | import bob
 3 | import bob.ap
 4 | import numpy
 5 | 
 6 | @cached_func
 7 | def get_bob_extractor(fs, win_length_ms=32, win_shift_ms=16,
 8 |                       n_filters=55, n_ceps=19, f_min=0., f_max=6000,
 9 |                       delta_win=2, pre_emphasis_coef=0.95, dct_norm=True,
10 |                       mel_scale=True):
11 |     ret = bob.ap.Ceps(fs, win_length_ms, win_shift_ms, n_filters, n_ceps, f_min,
12 |             f_max, delta_win, pre_emphasis_coef, mel_scale, dct_norm)
13 |     return ret
14 |     
15 | def extract(fs, signal=None, diff=False, **kwargs):
16 |     """accept two argument, or one as a tuple"""
17 |     if signal is None:
18 |         assert type(fs) == tuple
19 |         fs, signal = fs[0], fs[1]
20 | 
21 |     signal = numpy.cast['float'](signal)
22 |     ret = get_bob_extractor(fs, **kwargs)(signal)
23 |     if diff:
24 |         return diff_feature(ret)
25 |     return ret
26 | 


--------------------------------------------------------------------------------
/LPC.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | #import scikits.talkbox as tb
 3 | from scikits.talkbox.linpred import levinson_lpc
 4 | from numpy import *
 5 | from scipy.io import  wavfile
 6 | from MFCC import hamming
 7 | from utils import cached_func, diff_feature
 8 | 
 9 | class LPCExtractor(object):
10 |     def __init__(self, fs, win_length_ms, win_shift_ms, n_lpc,
11 |                  pre_emphasis_coef):
12 |         self.PRE_EMPH = pre_emphasis_coef
13 |         self.n_lpc = n_lpc
14 |         #self.n_lpcc = n_lpcc + 1
15 | 
16 |         self.FRAME_LEN = int(float(win_length_ms) / 1000 * fs)
17 |         self.FRAME_SHIFT = int(float(win_shift_ms) / 1000 * fs)
18 |         self.window = hamming(self.FRAME_LEN)
19 | 
20 | 
21 |     def lpc_to_cc(self, lpc):
22 |         lpcc = zeros(self.n_lpcc)
23 |         lpcc[0] = lpc[0]
24 |         for n in range(1, self.n_lpc):
25 |             lpcc[n] = lpc[n]
26 |             for l in range(0, n):
27 |                 lpcc[n] += lpc[l] * lpcc[n - l - 1] * (n - l) / (n + 1)
28 |         for n in range(self.n_lpc, self.n_lpcc):
29 |             lpcc[n] = 0
30 |             for l in range(0, self.n_lpc):
31 |                 lpcc[n] += lpc[l] * lpcc[n - l - 1] * (n - l) / (n + 1)
32 |         return -lpcc[1:]
33 | 
34 |     def lpcc(self, signal):
35 |         lpc = levinson_lpc.lpc(signal, self.n_lpc)[0]
36 |         return lpc[1:]
37 |         #lpcc = self.lpc_to_cc(lpc)
38 |         #return lpcc
39 | 
40 |     def extract(self, signal):
41 |         frames = (len(signal) - self.FRAME_LEN) / self.FRAME_SHIFT + 1
42 |         feature = []
43 |         for f in xrange(frames):
44 |             frame = signal[f * self.FRAME_SHIFT : f * self.FRAME_SHIFT +
45 |                            self.FRAME_LEN] * self.window
46 |             frame[1:] -= frame[:-1] * self.PRE_EMPH
47 |             feature.append(self.lpcc(frame))
48 | 
49 |         feature = array(feature)
50 |         feature[isnan(feature)] = 0
51 |         return feature
52 | 
53 | @cached_func
54 | def get_lpc_extractor(fs, win_length_ms=32, win_shift_ms=16,
55 |                        n_lpc=15, pre_emphasis_coef=0.95):
56 |     ret = LPCExtractor(fs, win_length_ms, win_shift_ms, n_lpc, pre_emphasis_coef)
57 |     return ret
58 | 
59 | 
60 | def extract(fs, signal=None, diff=False, **kwargs):
61 |     """accept two argument, or one as a tuple"""
62 |     if signal is None:
63 |         assert type(fs) == tuple
64 |         fs, signal = fs[0], fs[1]
65 |     signal = cast['float'](signal)
66 |     ret = get_lpc_extractor(fs, **kwargs).extract(signal)
67 |     if diff:
68 |         return diff_feature(ret)
69 |     return ret
70 | 
71 | if __name__ == "__main__":
72 |     extractor = LPCCExtractor(8000)
73 |     fs, signal = wavfile.read("../corpus.silence-removed/Style_Reading/f_001_03.wav")
74 |     start = time.time()
75 |     ret = extractor.extract(signal)
76 |     print len(ret)
77 |     print len(ret[0])
78 |     print time.time() - start
79 | 


--------------------------------------------------------------------------------
/Main.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | import time
  4 | import cPickle as pickle
  5 | import pyaudio
  6 | from scipy.io import wavfile
  7 | 
  8 | from collections import defaultdict
  9 | import BOB as MFCC
 10 | import LPC
 11 | import numpy as np
 12 | import traceback as tb
 13 | from ActivityDetection import ActivityDetection
 14 | 
 15 | try:
 16 |     from gmmset import GMMSetPyGMM as GMMSet
 17 |     from gmmset import GMM
 18 | except Exception as e:
 19 |     print >> sys.stderr, "Warning: failed to import fast-gmm, use gmm from scikit-learn instead"
 20 |     print str(e)
 21 |     from skgmm import GMMSet, GMM
 22 | 
 23 | class Main():
 24 |     
 25 |     FORMAT=pyaudio.paInt16
 26 |     NPDtype = 'int16'
 27 |     FS = 8000    
 28 |     nr_mixture = 32
 29 |    
 30 |     model_file = 'test.gmm'
 31 |     ubm_model_file = 'ubm.mixture-32.utt-300.model'
 32 |     
 33 |     def __init__(self):
 34 |         self.features = defaultdict(list)
 35 |         self.ad = ActivityDetection()
 36 |         self.signal = []
 37 |         try :
 38 |             fs, signal = wavfile.read('background earphone.wav')
 39 |             self.ad.init_noise(fs, signal)
 40 |         except :
 41 |             pass
 42 |         
 43 |     def getFeatures(self):
 44 |         '''
 45 |         mfcc = MFCC.extract(self.FS, self.signal)
 46 |         lpc = LPC.extract(self.FS, self.signal)
 47 |         features = np.concatenate((mfcc, lpc), axis=1) 
 48 |         '''
 49 |         features = self.mix_feature()
 50 |         self.features[self.name].extend(features)
 51 |         return features
 52 | 
 53 |     def mix_feature(self):
 54 |         mfcc = MFCC.extract(self.FS, self.signal)
 55 |         lpc = LPC.extract(self.FS, self.signal)
 56 |         #if len(mfcc) == 0:
 57 |         #    print >> sys.stderr, "ERROR.. failed to extract mfcc feature:", len(tup[1])
 58 |         #print "mfcc ",mfcc
 59 |         #print "lpc ",lpc
 60 |         return np.concatenate((mfcc, lpc), axis=1)
 61 | 
 62 |     def _get_gmm_set(self):
 63 |         if os.path.isfile(self.ubm_model_file):
 64 |             try:
 65 |                 from gmmset import GMMSetPyGMM
 66 |                 if GMMSet is GMMSetPyGMM:
 67 |                     return GMMSet(ubm=GMM.load(self.ubm_model_file))
 68 |             except Exception as e:
 69 |                 print "Warning: failed to import gmmset. You may forget to compile gmm:"
 70 |                 print e
 71 |                 print "Try running `make -C src/gmm` to compile gmm module."
 72 |                 print "But gmm from sklearn will work as well! Using it now!"
 73 |             return GMMSet()
 74 |         return GMMSet()
 75 | 
 76 |     def train(self):
 77 |         self.gmmset = self._get_gmm_set()
 78 |         start = time.time()
 79 |         print "Training start..."
 80 |         for name, feats in self.features.iteritems():
 81 |             self.gmmset.fit_new(feats, name)
 82 |         print time.time() - start, " seconds"
 83 |         print "Training complete"
 84 | 
 85 |     def predict(self):
 86 |         """ return a label (name)"""
 87 |         try:
 88 |             '''
 89 |             mfcc = MFCC.extract(self.FS, self.signal)
 90 |             lpc = LPC.extract(self.FS, self.signal)
 91 |             features = np.concatenate((mfcc, lpc), axis=1)
 92 |             '''
 93 |             features = self.mix_feature()
 94 |         except Exception as e:
 95 |             print tb.format_exc()
 96 |             return None
 97 |         self.gmmset.ubm = GMM.load(self.ubm_model_file)
 98 |         #print 'The registered users are :', len(self.gmmset.y)
 99 |         #print self.gmmset.y
100 |         #print [y for y in self.gmmset.y]
101 |         return self.gmmset.predict_one_with_rejection(features)
102 | 
103 | 
104 | 
105 |     def get_gmm(self):
106 |         from sklearn.mixture import GMM as skGMM
107 |         from gmmset import GMM as pyGMM
108 |         if GMM == skGMM:
109 |             print 'using GMM from sklearn'
110 |             return GMM(self.nr_mixture)
111 |         else:
112 |             print 'using pyGMM'
113 |             return GMM(nr_mixture=self.nr_mixture, nr_iteration=500,
114 |                     init_with_kmeans=0, concurrency=8,
115 |                     threshold=1e-15,
116 |                     verbosity=2)
117 | 
118 |     def train_ubm(self):
119 |         nr_utt_in_ubm = 300
120 | #        fpaths = get_all_data_fpaths()
121 | #        random.shuffle(fpaths)
122 | #        fpaths = fpaths[:nr_utt_in_ubm]
123 | #        X = datautil.read_raw_data(fpaths)
124 |         X = []
125 |         for name, feats in self.features.iteritems():
126 |             X.extend(feats)
127 |         
128 |         gmm = self.get_gmm()
129 |         start = time.time()
130 |         gmm.fit(X)
131 |         print '\nTraining time :', time.time()-start, 'seconds'
132 | #        gmm.dump('model/ubm.mixture-{}.utt-{}.model' . format(
133 | #            self.nr_mixture, nr_utt_in_ubm))
134 |         gmm.dump('ubm.mixture-{}.utt-{}.model' . format(
135 |             self.nr_mixture, nr_utt_in_ubm))
136 | 
137 |     def dump(self):
138 |         """ dump all models to file"""
139 |         self.gmmset.before_pickle()
140 |         with open(self.model_file, 'w') as f:
141 |             pickle.dump(self, f, -1)
142 |         self.gmmset.after_pickle()
143 |     
144 |     @staticmethod
145 |     def load(fname):
146 |         """ load from a dumped model file"""
147 |         with open(fname, 'r') as f:
148 |             m = pickle.load(f)
149 |             m.gmmset.after_pickle()
150 |             return m
151 | 
152 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # user-authentication-using-voice-biometrics
 2 | Project involving Voice Signal Processing of users to recognise them using Voice Biometrics
 3 | 
 4 | 1. scikit-learn
 5 | 2. scikits.talkbox
 6 | 3. pyssp
 7 | 4. PyQt4
 8 | 5. PyAudio
 9 | 6. Python bindings for bob
10 | 	
11 | In order to use `fast-gmm` instead of `sklearn.mixture.GaussianMixture` :
12 |   run `make -C gmm/` in terminal to configure your system for fast-gmm
13 |   
14 | In order to run the application in command line :
15 | 1. Train:
16 | python `__init__.py` -t enroll
17 | 2. Prediction:
18 | python `__init__.py` -t predict
19 |     
20 | NOTE : Put all the wavfiles in the directory mentioned in the __init__.py file (for both training and prediction) and can be modifies accordingly.
21 | 


--------------------------------------------------------------------------------
/RecordAudio.py:
--------------------------------------------------------------------------------
 1 | import pyaudio
 2 | import time
 3 | import numpy as np
 4 | import scipy.io.wavfile as wavfile
 5 | 
 6 | FORMAT=pyaudio.paInt16
 7 | NPDtype = 'int16'
 8 | FS=8000
 9 | 
10 | class RecorderThread():
11 |     def __init__(self, main):
12 |         self.main = main
13 | 
14 |     def run(self, end_time):
15 |         self.start_time = time.time()
16 |         while(time.time()-self.start_time <= end_time):
17 |             data = self.main.stream.read(1)
18 |             i = ord(data[0]) + 256 * ord(data[1])
19 |             if i > 32768:							# permissible 16 bit audio data value is -32768 to 32767
20 |                 i -= 65536
21 |             self.main.recordData.append(i)
22 | 
23 | class RecordAudio:
24 |     def __init__(self):
25 |         self.reco = RecorderThread(self)
26 |         self.pyaudio = pyaudio.PyAudio()
27 |         self.stream = self.pyaudio.open(format=FORMAT, channels=1, rate=FS,
28 |                         input=True, frames_per_buffer=1)
29 |         self.recordData = []
30 |         
31 |     def start_record(self, time):
32 |         print("Recording...")
33 |         
34 |         self.reco.run(time)
35 |         self.stream.stop_stream()
36 |         self.stream.close()
37 |         self.pyaudio.terminate()
38 |         print("Done Recording...")
39 |         data = np.array(self.recordData, dtype=NPDtype)
40 |         wavfile.write('myaudio.wav', FS, data)
41 |         return FS, data
42 | 
43 | 


--------------------------------------------------------------------------------
/UI.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import time
  3 | import pyaudio
  4 | import numpy as np
  5 | from Main import Main
  6 | from scipy.io import wavfile
  7 | import traceback as tb
  8 | 
  9 | from PyQt4 import uic
 10 | from PyQt4.QtGui import *
 11 | from PyQt4.QtCore import *
 12 | 
 13 | class RecorderThread(QThread):
 14 |     def __init__(self, ui):
 15 |         QThread.__init__(self)
 16 |         self.ui = ui
 17 | 
 18 |     def run(self):
 19 |         self.start_time = time.time()
 20 |         while True:
 21 |             data = self.ui.stream.read(1)
 22 |             i = ord(data[0]) + 256 * ord(data[1])
 23 |             if i > 32768:
 24 |                 i -= 65536
 25 |             self.ui.recordData.append(i)
 26 |             if self.ui.stopped:
 27 |                 break
 28 | 
 29 | class UI(QMainWindow):
 30 |     
 31 |     CONV_INTERVAL = 0.4
 32 |     CONV_DURATION = 1.5
 33 |     CONV_FILTER_DURATION = CONV_DURATION
 34 |     TEST_DURATION = 3
 35 | 
 36 |     def __init__(self, parent=None):
 37 |                
 38 |         QWidget.__init__(self, parent)
 39 |         uic.loadUi("new.ui", self)
 40 |         
 41 |         try :
 42 |             self.main = Main.load('02-09-2017.model')
 43 |         except :
 44 |             self.main = Main()
 45 | 
 46 |         self.task = None
 47 |         self.recordData = []
 48 | 
 49 |         self.statusBar()
 50 | 
 51 |         self.timer = QTimer(self)
 52 |         self.timer.timeout.connect(self.timer_callback)
 53 | 
 54 |         self.Startrecord.clicked.connect(self.startenrollrecord)
 55 |         self.Stoprecord.clicked.connect(self.stopenrollrecord)
 56 |         self.Choosefile.clicked.connect(self.chooseenrollfile)
 57 |         
 58 |         self.Register.clicked.connect(self.register)
 59 |         self.Clear.clicked.connect(self.clear)
 60 |         self.Exit.clicked.connect(self.exit)
 61 | 
 62 |         self.Startrecordreco.clicked.connect(self.startrecorecord)
 63 |         self.Stoprecordreco.clicked.connect(self.stoprecorecord)
 64 |         self.Choosefilereco.clicked.connect(self.chooserecofile)
 65 | 
 66 |         self.Startrecordconv.clicked.connect(self.startconvrecord)
 67 |         self.Stoprecordconv.clicked.connect(self.stopconvrecord)
 68 |         self.Exitconv.clicked.connect(self.exit)
 69 |         self.Clearconv.clicked.connect(self.clear)
 70 |         
 71 |         fs, signal = wavfile.read('background earphone.wav')
 72 |         self.main.ad.init_noise(fs, signal)
 73 | 
 74 |     def timer_callback(self):
 75 |         self.record_time += 1
 76 |         #self.status("Recording..." + time_str(self.record_time))
 77 |         minutes = int(self.record_time / 60)
 78 |         sec = int(self.record_time % 60)
 79 |         s = "{:02d}:{:02d}".format(minutes, sec)
 80 |         #s = time_str(self.record_time)
 81 |         ''' Instead of updating all the timers try updating only the required one'''
 82 |         if self.task == 'enroll' :
 83 |             self.Recordtime.setText(s)
 84 |         elif self.task == 'recognise':
 85 |             self.Recordtimereco.setText(s)
 86 |         elif self.task == 'conversation':
 87 |             self.Recordtimeconv.setText(s)
 88 | 
 89 |     def startrecord(self):
 90 |         self.statusBar().showMessage("Recording...")
 91 |         self.pyaudio = pyaudio.PyAudio()
 92 |         
 93 |         self.recordData = []
 94 |         self.stream = self.pyaudio.open(format=self.main.FORMAT, channels=1, rate=self.main.FS,
 95 |                         input=True, frames_per_buffer=1)
 96 |         self.stopped = False
 97 |         self.reco_th = RecorderThread(self)
 98 |         self.reco_th.start()
 99 | 
100 |         self.timer.start(1000)
101 |         self.record_time = 0
102 |         
103 |     def stoprecord(self):
104 |         self.stopped = True
105 |         self.reco_th.wait()
106 |         self.timer.stop()
107 |         self.stream.stop_stream()
108 |         self.stream.close()
109 |         self.pyaudio.terminate()
110 |         self.statusBar().showMessage("Recording Stopped.")
111 |         self.recordData = np.array(self.recordData, dtype=self.main.NPDtype)
112 |         wavfile.write('myaudio.wav', self.main.FS, self.recordData)
113 | 
114 |     def startenrollrecord(self):
115 |         self.task = 'enroll'
116 |         self.main.name = str(self.Username.text().trimmed())
117 |         if not name:
118 |             QMessageBox.warning(self, "Warning", "Please enter your name first.")
119 |             self.statusBar().showMessage('')
120 |         elif self.main.name == 'Unknown':
121 |             QMessageBox.warning(self, "Warning", "Please enter a valid name.")
122 |             self.statusBar().showMessage('')
123 |         else:
124 |             self.startrecord()
125 |         
126 |     def stopenrollrecord(self):
127 |         self.stoprecord()
128 |         ''' remove silence here and wait for regitser button click for further'''
129 |         self.main.name = str(self.Username.text().trimmed())
130 |         self.main.signal = self.main.ad.filter(self.main.FS, self.recordData)
131 |         wavfile.write('silence removed.wav', self.main.FS, self.main.signal) 
132 |         if len(self.main.signal) > 50:
133 |             self.main.getFeatures()
134 |         else :
135 |             QMessageBox.warning(self, "Warning", "Audio was silent.Try Again.")
136 |         #print "features extracted"
137 |         
138 |     def chooseenrollfile(self):
139 |         self.main.name = str(self.Username.text().trimmed())
140 |         if not self.main.name:
141 |             QMessageBox.warning(self, "Warning", "Please enter your name first.")
142 |             self.statusBar().showMessage('')
143 |         elif self.main.name == 'Unknown':
144 |             QMessageBox.warning(self, "Warning", "Please enter a valid name.")
145 |             self.statusBar().showMessage('')
146 |         else:
147 |             fname = QFileDialog.getOpenFileName(self, "Select Audio File", "", "Files (*.wav)")
148 |             if not fname:
149 |                 return
150 |             
151 |             self.statusBar().showMessage('Loaded '+ fname)
152 |             self.Filename.setText(fname)
153 |             ''' remove silence here and wait for register button click for further '''
154 |             self.main.FS, self.recordData = wavfile.read(fname)
155 |             self.main.signal = self.main.ad.filter(self.main.FS, self.recordData)
156 |             self.main.getFeatures()
157 |             #print "features extracted"
158 | 
159 |     def startrecorecord(self):
160 |         self.task = 'recognise'
161 |         self.startrecord()
162 |         
163 |     def stoprecorecord(self):
164 |         self.stoprecord()
165 |         user = None
166 |         ''' remove silence and proceed with recognition here only'''
167 |         self.main.signal = self.main.ad.filter(self.main.FS, self.recordData)
168 |         if len(self.main.signal) > 50:
169 |             user = self.main.predict()
170 |         self.IdentifiedUser.setText(user)
171 | 
172 |     def chooserecofile(self):
173 |         self.IdentifiedUser.setText('Unknown')
174 |         fname = QFileDialog.getOpenFileName(self, "Select Audio File", "", "Files (*.wav)")
175 |         if not fname:
176 |             return
177 |         user = None
178 |         self.statusBar().showMessage('Loaded '+ fname)
179 |         self.Filenamereco.setText(fname)
180 |         ''' remove silence and extract features '''
181 |         self.main.FS, self.recordData = wavfile.read(fname)
182 |         self.main.signal = self.main.ad.filter(self.main.FS, self.recordData)
183 |         if len(self.main.signal) > 50:
184 |             user = self.main.predict()
185 |         self.IdentifiedUser.setText(user)
186 |         self.statusBar().showMessage('')
187 | 
188 |     def startconvrecord(self):
189 |         self.task = 'conversation'
190 |         self.conv_result_list = []
191 |         self.startrecord()
192 |         self.conv_now_pos = 0
193 |         self.conv_timer = QTimer(self)
194 |         self.conv_timer.timeout.connect(self.do_conversation)
195 |         self.conv_timer.start(self.CONV_INTERVAL * 1000)
196 | 
197 |     def stopconvrecord(self):
198 |         self.statusBar().showMessage("Recording Stopped.")
199 |         self.stoprecord()
200 |         self.conv_timer.stop()
201 |     
202 |     def do_conversation(self):
203 |         interval_len = int(self.CONV_INTERVAL * self.main.FS)
204 |         segment_len = int(self.CONV_DURATION * self.main.FS)
205 |         self.conv_now_pos += interval_len
206 |         to_filter = self.recordData[max([self.conv_now_pos - segment_len, 0]):
207 |                                    self.conv_now_pos]
208 |         signal = np.array(to_filter, dtype=self.main.NPDtype)
209 |         label = None
210 |         print label, "in do_conversation"
211 |         '''
212 |         try:
213 |             signal = self.backend.filter(self.main.FS, signal)
214 |             if len(signal) > 50:
215 |                 label = self.backend.predict(self.main.FS, signal, True)
216 |         except Exception as e:
217 |             print tb.format_exc()
218 |             print str(e)
219 |         '''
220 |         try:
221 |             self.main.signal = self.main.ad.filter(self.main.FS, signal)
222 |             if len(self.main.signal) > 50:
223 |                  label = self.main.predict()
224 |         except Exception as e:
225 |             print tb.format_exc()
226 |             print str(e)
227 | 
228 |         global last_label_to_show
229 |         label_to_show = label
230 |         if label and self.conv_result_list:
231 |             last_label = self.conv_result_list[-1]
232 |             if last_label and last_label != label:
233 |                 label_to_show = last_label_to_show
234 |         self.conv_result_list.append(label)
235 | 
236 |         print label_to_show, "label to show"
237 |         last_label_to_show = label_to_show
238 |         
239 |         #ADD FOR GRAPH
240 |         if label_to_show is None:
241 |             label_to_show = 'Nobody'
242 |         '''
243 |         if len(NAMELIST) and NAMELIST[-1] != label_to_show:
244 |             NAMELIST.append(label_to_show)
245 |         '''
246 |         self.IdentifiedConvUser.setText(label_to_show)
247 |         
248 |     def register(self):
249 |         if not self.Username.text().trimmed():
250 |             QMessageBox.warning(self, "Warning", "Please enter your name.")
251 |         elif len(self.main.signal) == 0:
252 |             QMessageBox.warning(self, "Warning", "Input signal is silent. Try again.")
253 |         else:
254 |             self.statusBar().showMessage('Registration started...')
255 |             self.main.train()
256 |             self.main.dump()
257 |             self.statusBar().showMessage('Registration complete.')
258 | 
259 |     def clear(self):
260 |         self.Username.setText("")
261 |         self.Filenamereco.setText("")
262 |         self.Filename.setText("")
263 |         self.Recordtime.setText("00:00")
264 |         self.Recordtimereco.setText("00:00")
265 |         self.Recordtimeconv.setText("00:00")
266 |         self.main.FS = 8000
267 |         self.main.signal = []
268 |         self.statusBar().showMessage('')
269 |         self.recordData = []
270 |         self.IdentifiedUser.setText("Unknown")
271 |         self.IdentifiedConvUser.setText("Unknown")
272 | 
273 |     def exit(self):
274 |         self.close();
275 | 
276 | 


--------------------------------------------------------------------------------
/__init__.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | import glob
  3 | import argparse
  4 | from RecordAudio import RecordAudio
  5 | from ActivityDetection import ActivityDetection
  6 | from UI import UI
  7 | import scipy.io.wavfile as wavfile
  8 | import traceback as tb
  9 | import numpy as np
 10 | from gmmset import GMM
 11 | 
 12 | from PyQt4.QtGui import *
 13 | from Main import Main
 14 | 
 15 | 
 16 | 
 17 | def get_args():
 18 |     parser = argparse.ArgumentParser()
 19 | 
 20 |     parser.add_argument('-t', '--task',
 21 |                        help='Task to do. Either "enroll" or "predict"',
 22 |                        required=True)
 23 |     ret = parser.parse_args()
 24 |     return ret
 25 | 
 26 | def enroll():
 27 |     try :
 28 |         m = Main.load('test.gmm')
 29 |     except :
 30 |         m = Main()
 31 |     #ra=RecordAudio()
 32 |     #fs, data = ra.start_record(10.0)
 33 |     '''
 34 |     fs, data = wavfile.read('system generated voice/google assistant.wav')
 35 |     m.signal = m.ad.filter(fs, data)
 36 |     m.name = 'Computer'
 37 |     if len(m.signal) > 50:
 38 |         m.getFeatures()
 39 |     else:
 40 |         print 'signal is silent'
 41 |         return
 42 |     '''
 43 |     audios = glob.glob('audio samples/*.wav')
 44 |     if len(audios) is 0:
 45 |         print "No audio file found"
 46 |         exit()
 47 |     for audio in audios:
 48 |         fs, data = wavfile.read(audio)
 49 |         #signal = ad.remove_silence(fs, signal)
 50 |         m.signal = m.ad.filter(fs, data)
 51 |         m.name = audio.split('/')[-1].split('.')[0]
 52 |         print m.name, "processing"
 53 |         #print 'signal length after silence remove', len(m.signal)
 54 |         if len(m.signal) > 50:
 55 |            features = m.getFeatures()
 56 |            #np.savetxt('data/mfcc-lpc-data/'+m.name+'.mfcc-lpc', features)
 57 |         else:
 58 |            print name,"signal is silent"
 59 |     print "features saved"
 60 | 
 61 | 	# train UBM Model prior to GMM Model    
 62 | 	#m.train_ubm()
 63 |     m.train()
 64 |     m.dump()
 65 | 
 66 | def predict():
 67 |     try :
 68 |         m = Main.load('test.gmm')
 69 |     except Exception as e:
 70 |         print tb.format_exc()
 71 |         exit()
 72 |     #ra=RecordAudio()
 73 |     #fs, data = ra.start_record(10.0)
 74 |     
 75 |     audios = glob.glob('audio test samples/*.wav')
 76 |     
 77 |     if len(audios) is 0:
 78 |         print "No audio file found"
 79 |         exit()
 80 |     for audio in audios:
 81 |         user = None
 82 |         fs, data = wavfile.read(audio)
 83 |         #signal = ad.remove_silence(fs, signal)
 84 |         m.signal = m.ad.filter(fs, data)
 85 |         #print 'signal length after silence remove', len(m.signal)
 86 |         name = audio.split('/')[-1].split('.')[0]
 87 |         
 88 |         if len(m.signal) > 50:
 89 |             try :
 90 |                 user = m.predict()
 91 |             except Exception as e :
 92 |                 print tb.format_exc()
 93 |         print name, '-->', user
 94 |     
 95 |     '''
 96 |     fs, data = wavfile.read('test samples/Iqra mam2.wav')
 97 |     #signal = ad.remove_silence(fs, data)
 98 |     
 99 |     user = None
100 |     m.signal = m.ad.filter(fs, data)
101 |     #print len(m.signal)
102 |     if len(m.signal) > 50:
103 |         user = m.predict()
104 |     print "Current speaker is identified as ", user
105 |     '''
106 | 
107 | if __name__ == '__main__':
108 |     '''
109 |     app = QApplication(sys.argv)
110 |     ui=UI()
111 |     ui.show()
112 |     sys.exit(app.exec_())
113 |     '''
114 |     global ra
115 |     args = get_args()
116 |     task = args.task
117 |     if task == 'enroll':
118 |         enroll()
119 |     if task == 'predict':
120 |         predict()
121 |     
122 | 


--------------------------------------------------------------------------------
/background earphone.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rhythmize/user-authentication-using-voice-biometrics/184b1f11c530995a7d8ab17f1fe2de7b0acf1127/background earphone.wav


--------------------------------------------------------------------------------
/gmmset.py:
--------------------------------------------------------------------------------
  1 | import operator
  2 | import multiprocessing
  3 | 
  4 | from collections import defaultdict
  5 | 
  6 | import numpy as np
  7 | 
  8 | from gmm.python.pygmm import GMM
  9 | 
 10 | class GMMSet(object):
 11 |     def __init__(self, gmm_order=32, ubm=None,
 12 |             reject_threshold=10,
 13 |             **kwargs):
 14 |         self.kwargs = kwargs
 15 |         self.gmms = []
 16 |         self.ubm = ubm
 17 |         self.reject_threshold = reject_threshold
 18 |         if ubm is not None:
 19 |             self.gmm_order = ubm.get_nr_mixtures()
 20 |         else:
 21 |             self.gmm_order = gmm_order
 22 |         self.y = []
 23 | 
 24 |     def fit_new(self, x, label):
 25 |         self.y.append(label)
 26 |         gmm = GMM(self.gmm_order, **self.kwargs)
 27 |         gmm.fit(x, self.ubm)
 28 |         self.gmms.append(gmm)
 29 | 
 30 |     def cluster_by_label(self, X, y):
 31 |         Xtmp = defaultdict(list)
 32 |         for ind, x in enumerate(X):
 33 |             label = y[ind]
 34 |             Xtmp[label].extend(x)
 35 |         yp, Xp = zip(*Xtmp.iteritems())
 36 |         return Xp, yp
 37 |     '''
 38 |     def auto_tune_parameter(self, X, y):
 39 |         if self.ubm is None:
 40 |             return
 41 |         # TODO
 42 | 
 43 |     def fit(self, X, y):
 44 |         X, y = self.cluster_by_label(X, y)
 45 |         for ind, x in enumerate(X):
 46 |             self.fit_new(x, y[ind])
 47 | 
 48 |         self.auto_tune_parameter(X, y)
 49 |     '''
 50 | 
 51 |     def gmm_score(self, gmm, x):
 52 |         return np.sum(gmm.score(x))
 53 | 
 54 |     def predict_one_scores(self, x):
 55 |         return [self.gmm_score(gmm, x) for gmm in self.gmms]
 56 | 
 57 |     def predict_one(self, x):
 58 |         scores = self.predict_one_scores(x)
 59 |         return self.y[max(enumerate(scores), key=operator.itemgetter(1))[0]]
 60 | 
 61 |     def predict(self, X):
 62 |         return map(self.predict_one, X)
 63 | 
 64 |     def predict_one_with_rejection(self, x):
 65 |         assert self.ubm is not None, \
 66 |             "UBM must be given prior to conduct reject prediction."
 67 |         
 68 |         scores = self.predict_one_scores(x)
 69 |         x_len = len(x) # normalize score
 70 |         
 71 |         scores = map(lambda v: v / x_len, scores)
 72 |         max_tup = max(enumerate(scores), key=operator.itemgetter(1))
 73 |         
 74 |         ubm_score = self.gmm_score(self.ubm, x) / x_len
 75 |         print scores
 76 |         #print ubm_score
 77 |         if max_tup[1] - ubm_score < self.reject_threshold:
 78 |             #print max_tup[1], ubm_score, max_tup[1] - ubm_score
 79 |             print self.y[max_tup[0]], max_tup, ubm_score, max_tup[1] - ubm_score
 80 |             return None
 81 |         return self.y[max_tup[0]]
 82 | 
 83 |     def predict_with_reject(self, X):
 84 |         return map(self.predict_one_with_rejection, X)
 85 | 
 86 |     def load_gmm(self, label, fname):
 87 |         self.y.append(label)
 88 |         gmm = GMM.load(fname)
 89 |         for key, val in self.kwargs.iteritems():
 90 |             exec("gmm.{0} = val".format(key))
 91 |         self.gmms.append(gmm)
 92 | 
 93 | class GMMSetPyGMM(GMMSet):
 94 |     def predict_one(self, x):
 95 |         scores = [gmm.score_all(x) / len(x) for gmm in self.gmms]
 96 |         #p = sorted(scores)
 97 |         #for (a,b) in sorted(enumerate(scores), key=operator.itemgetter(1), reverse=True):
 98 |         #    print(self.y[a], b) 
 99 |         #print scores, p[-1] - p[-2]
100 |         return self.y[max(enumerate(scores), key=operator.itemgetter(1))[0]]
101 | 
102 |     def before_pickle(self):
103 |         self.gmms = [x.dumps() for x in self.gmms]
104 | 
105 |     def after_pickle(self):
106 |         self.gmms = [GMM.loads(x) for x in self.gmms]
107 | 


--------------------------------------------------------------------------------
/ltsd.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | from scipy.io import wavfile
 3 | import matplotlib
 4 | matplotlib.use("Qt4Agg")
 5 | import matplotlib.pyplot as plt
 6 | import numpy as np
 7 | 
 8 | from pyssp.vad.ltsd import LTSD
 9 | 
10 | 
11 | MAGIC_NUMBER = 0.04644
12 | 
13 | class LTSD_VAD(object):
14 |     ltsd = None
15 |     order = 5
16 | 
17 |     fs = 0
18 |     window_size = 0
19 |     window = 0
20 | 
21 |     lambda0 = 0
22 |     lambda1 = 0
23 | 
24 |     noise_signal = None
25 | 
26 |     def init_params_by_noise(self, fs, noise_signal):
27 |         noise_signal = self._mononize_signal(noise_signal)
28 |         self.noise_signal = np.array(noise_signal)
29 |         self._init_window(fs)
30 |         ltsd = LTSD(self.window_size, self.window, self.order)
31 |         res, ltsds = ltsd.compute_with_noise(noise_signal,
32 |                 noise_signal)
33 |         max_ltsd = max(ltsds)
34 |         self.lambda0 = max_ltsd * 1.1
35 |         self.lambda1 = self.lambda0 * 2.0
36 |         #print 'max_ltsd =', max_ltsd
37 |         #print 'lambda0 =', self.lambda0
38 |         #print 'lambda1 =', self.lambda1
39 |     
40 |     '''
41 |     def plot_ltsd(self, fs, signal):
42 |         signal = self._mononize_signal(signal)
43 |         res, ltsds = self._get_ltsd().compute_with_noise(signal, self.noise_signal)
44 |         plt.plot(ltsds)
45 |         plt.show()
46 |     '''
47 |     def filter(self, signal):
48 |         signal = self._mononize_signal(signal)
49 |         res, ltsds = self._get_ltsd().compute_with_noise(signal, self.noise_signal)
50 |         voice_signals = []
51 |         #print res
52 |         res = [(start * self.window_size / 2, (finish + 1) * self.window_size
53 |                 / 2) for start, finish in res]
54 |         #print res
55 |         #print res, len(ltsds) * self.window_size / 2, "now"
56 |         for start, finish in res:
57 |             voice_signals.append(signal[start:finish])
58 |         try:
59 |             return np.concatenate(voice_signals), res
60 |         except:
61 |             return np.array([]), []
62 | 
63 |     def _init_window(self, fs):
64 |         self.fs = fs
65 |         self.window_size = int(MAGIC_NUMBER * fs)
66 |         self.window = np.hanning(self.window_size)
67 | 
68 |     def _get_ltsd(self, fs=None):
69 |         if fs is not None and fs != self.fs:
70 |             self._init_window(fs)
71 |         return LTSD(self.window_size, self.window, self.order,
72 |                 lambda0=self.lambda0, lambda1=self.lambda1)
73 | 
74 |     def _mononize_signal(self, signal):
75 |         if signal.ndim > 1:
76 |             signal = signal[:,0]
77 |         return signal
78 | 
79 | 
80 | def main():
81 |     fs, bg_signal = wavfile.read(sys.argv[1])
82 |     ltsd = LTSD_VAD()
83 |     ltsd.init_params_by_noise(fs, bg_signal)
84 | 
85 |     fs, signal = wavfile.read(sys.argv[2])
86 |     vaded_signal = ltsd.filter(signal)
87 | 
88 |     wavfile.write('vaded.wav', fs, vaded_signal)
89 | 
90 | if __name__ == '__main__':
91 |     main()
92 | 


--------------------------------------------------------------------------------
/skgmm.py:
--------------------------------------------------------------------------------
 1 | import operator
 2 | import numpy as np
 3 | from sklearn.mixture import GaussianMixture as GMM
 4 | 
 5 | class GMMSet(object):
 6 | 
 7 |     def __init__(self, gmm_order = 32):
 8 |         self.gmms = []
 9 |         self.gmm_order = gmm_order
10 |         self.y = []
11 | 
12 |     def fit_new(self, x, label):
13 |         self.y.append(label)
14 |         gmm = GMM(self.gmm_order)
15 |         gmm.fit(x)
16 |         self.gmms.append(gmm)
17 | 
18 |     def gmm_score(self, gmm, x):
19 |         return np.sum(gmm.score(x))
20 | 
21 |     def before_pickle(self):
22 |         pass
23 | 
24 |     def after_pickle(self):
25 |         pass
26 | 
27 |     def predict_one(self, x):
28 |         #print self.gmms
29 |         scores = [self.gmm_score(gmm, x) / len(x) for gmm in self.gmms]
30 |         #print "scores :\n",scores
31 |         p = sorted(enumerate(scores), key=operator.itemgetter(1), reverse=True)
32 |         #print "p :\n",p
33 |         p = [(str(self.y[i]), y, p[0][1] - y) for i, y in p]
34 |         #print "p :\n",p
35 |         result = [(self.y[index], value) for (index, value) in enumerate(scores)]
36 |         #print "result :\n",result
37 |         p = max(result, key=operator.itemgetter(1))
38 |         return p[0]
39 | 


--------------------------------------------------------------------------------
/utils.py:
--------------------------------------------------------------------------------
 1 | import numpy
 2 | 
 3 | kwd_mark = object()
 4 | 
 5 | def cached_func(function):
 6 |     cache = {}
 7 |     def wrapper(*args, **kwargs):
 8 |         key = args + (kwd_mark,) + tuple(sorted(kwargs.items()))
 9 |         if key in cache:
10 |             return cache[key]
11 |         else:
12 |             result = function(*args, **kwargs)
13 |             cache[key] = result
14 |             return result
15 |     return wrapper
16 | 
17 | 
18 | def diff_feature(feat, nd=1):
19 |     diff = feat[1:] - feat[:-1]
20 |     feat = feat[1:]
21 |     if nd == 1:
22 |         return numpy.concatenate((feat, diff), axis=1)
23 |     elif nd == 2:
24 |         d2 = diff[1:] - diff[:-1]
25 |         return numpy.concatenate((feat[1:], diff[1:], d2), axis=1)
26 | 
27 | 


--------------------------------------------------------------------------------