├── .gitmodules ├── ActivityDetection.py ├── BOB.py ├── LPC.py ├── Main.py ├── README.md ├── RecordAudio.py ├── UI.py ├── __init__.py ├── background earphone.wav ├── gmmset.py ├── ltsd.py ├── skgmm.py └── utils.py /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "gmm"] 2 | path = gmm 3 | url = https://github.com/zxytim/fast-gmm 4 | -------------------------------------------------------------------------------- /ActivityDetection.py: -------------------------------------------------------------------------------- 1 | from ltsd import LTSD_VAD 2 | import numpy as np 3 | 4 | class ActivityDetection: 5 | 6 | def __init__(self): 7 | self.initted = False 8 | #self.nr = NoiseReduction() 9 | self.ltsd = LTSD_VAD() 10 | 11 | def init_noise(self, fs, signal): 12 | self.initted = True 13 | #self.nr.init_noise(fs, signal) 14 | self.ltsd.init_params_by_noise(fs, signal) 15 | #nred = self.nr.filter(fs, signal) 16 | #self.ltsd.init_params_by_noise(fs, nred) 17 | 18 | def filter(self, fs, signal): 19 | if not self.initted: 20 | raise "NoiseFilter Not Initialized" 21 | # nred = self.nr.filter(fs, signal) 22 | # removed = remove_silence(fs, nred) 23 | # self.ltsd.plot_ltsd(fs, nred) 24 | orig_len = len(signal) 25 | filtered, intervals = self.ltsd.filter(signal) 26 | #print 'signal lengths', len(filtered), orig_len 27 | if len(filtered) > orig_len / 3: 28 | return filtered 29 | return np.array([]) 30 | 31 | def remove_silence(self,fs, signal, frame_duration = 0.02, frame_shift = 0.01, perc = 0.15): 32 | orig_dtype = type(signal[0]) 33 | siglen = len(signal) 34 | retsig = np.zeros(siglen, dtype = np.int64) 35 | frame_length = int(frame_duration * fs) 36 | frame_shift_length = int(frame_shift * fs) 37 | new_siglen = 0 38 | i = 0 39 | average_energy = np.sum(signal ** 2) / float(siglen) 40 | 41 | #print "Avg Energy of signal: ", average_energy 42 | while i < siglen: 43 | subsig = signal[i:i + frame_length] 44 | ave_energy = np.sum(subsig ** 2) / float(len(subsig)) 45 | if ave_energy < average_energy * perc: 46 | i += frame_length 47 | else: 48 | sigaddlen = min(frame_shift_length, len(subsig)) 49 | retsig[new_siglen:new_siglen + sigaddlen] = subsig[:sigaddlen] 50 | new_siglen += sigaddlen 51 | i += frame_shift_length 52 | retsig = retsig[:new_siglen] 53 | return retsig.astype(orig_dtype) 54 | -------------------------------------------------------------------------------- /BOB.py: -------------------------------------------------------------------------------- 1 | from utils import cached_func, diff_feature 2 | import bob 3 | import bob.ap 4 | import numpy 5 | 6 | @cached_func 7 | def get_bob_extractor(fs, win_length_ms=32, win_shift_ms=16, 8 | n_filters=55, n_ceps=19, f_min=0., f_max=6000, 9 | delta_win=2, pre_emphasis_coef=0.95, dct_norm=True, 10 | mel_scale=True): 11 | ret = bob.ap.Ceps(fs, win_length_ms, win_shift_ms, n_filters, n_ceps, f_min, 12 | f_max, delta_win, pre_emphasis_coef, mel_scale, dct_norm) 13 | return ret 14 | 15 | def extract(fs, signal=None, diff=False, **kwargs): 16 | """accept two argument, or one as a tuple""" 17 | if signal is None: 18 | assert type(fs) == tuple 19 | fs, signal = fs[0], fs[1] 20 | 21 | signal = numpy.cast['float'](signal) 22 | ret = get_bob_extractor(fs, **kwargs)(signal) 23 | if diff: 24 | return diff_feature(ret) 25 | return ret 26 | -------------------------------------------------------------------------------- /LPC.py: -------------------------------------------------------------------------------- 1 | import time 2 | #import scikits.talkbox as tb 3 | from scikits.talkbox.linpred import levinson_lpc 4 | from numpy import * 5 | from scipy.io import wavfile 6 | from MFCC import hamming 7 | from utils import cached_func, diff_feature 8 | 9 | class LPCExtractor(object): 10 | def __init__(self, fs, win_length_ms, win_shift_ms, n_lpc, 11 | pre_emphasis_coef): 12 | self.PRE_EMPH = pre_emphasis_coef 13 | self.n_lpc = n_lpc 14 | #self.n_lpcc = n_lpcc + 1 15 | 16 | self.FRAME_LEN = int(float(win_length_ms) / 1000 * fs) 17 | self.FRAME_SHIFT = int(float(win_shift_ms) / 1000 * fs) 18 | self.window = hamming(self.FRAME_LEN) 19 | 20 | 21 | def lpc_to_cc(self, lpc): 22 | lpcc = zeros(self.n_lpcc) 23 | lpcc[0] = lpc[0] 24 | for n in range(1, self.n_lpc): 25 | lpcc[n] = lpc[n] 26 | for l in range(0, n): 27 | lpcc[n] += lpc[l] * lpcc[n - l - 1] * (n - l) / (n + 1) 28 | for n in range(self.n_lpc, self.n_lpcc): 29 | lpcc[n] = 0 30 | for l in range(0, self.n_lpc): 31 | lpcc[n] += lpc[l] * lpcc[n - l - 1] * (n - l) / (n + 1) 32 | return -lpcc[1:] 33 | 34 | def lpcc(self, signal): 35 | lpc = levinson_lpc.lpc(signal, self.n_lpc)[0] 36 | return lpc[1:] 37 | #lpcc = self.lpc_to_cc(lpc) 38 | #return lpcc 39 | 40 | def extract(self, signal): 41 | frames = (len(signal) - self.FRAME_LEN) / self.FRAME_SHIFT + 1 42 | feature = [] 43 | for f in xrange(frames): 44 | frame = signal[f * self.FRAME_SHIFT : f * self.FRAME_SHIFT + 45 | self.FRAME_LEN] * self.window 46 | frame[1:] -= frame[:-1] * self.PRE_EMPH 47 | feature.append(self.lpcc(frame)) 48 | 49 | feature = array(feature) 50 | feature[isnan(feature)] = 0 51 | return feature 52 | 53 | @cached_func 54 | def get_lpc_extractor(fs, win_length_ms=32, win_shift_ms=16, 55 | n_lpc=15, pre_emphasis_coef=0.95): 56 | ret = LPCExtractor(fs, win_length_ms, win_shift_ms, n_lpc, pre_emphasis_coef) 57 | return ret 58 | 59 | 60 | def extract(fs, signal=None, diff=False, **kwargs): 61 | """accept two argument, or one as a tuple""" 62 | if signal is None: 63 | assert type(fs) == tuple 64 | fs, signal = fs[0], fs[1] 65 | signal = cast['float'](signal) 66 | ret = get_lpc_extractor(fs, **kwargs).extract(signal) 67 | if diff: 68 | return diff_feature(ret) 69 | return ret 70 | 71 | if __name__ == "__main__": 72 | extractor = LPCCExtractor(8000) 73 | fs, signal = wavfile.read("../corpus.silence-removed/Style_Reading/f_001_03.wav") 74 | start = time.time() 75 | ret = extractor.extract(signal) 76 | print len(ret) 77 | print len(ret[0]) 78 | print time.time() - start 79 | -------------------------------------------------------------------------------- /Main.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import time 4 | import cPickle as pickle 5 | import pyaudio 6 | from scipy.io import wavfile 7 | 8 | from collections import defaultdict 9 | import BOB as MFCC 10 | import LPC 11 | import numpy as np 12 | import traceback as tb 13 | from ActivityDetection import ActivityDetection 14 | 15 | try: 16 | from gmmset import GMMSetPyGMM as GMMSet 17 | from gmmset import GMM 18 | except Exception as e: 19 | print >> sys.stderr, "Warning: failed to import fast-gmm, use gmm from scikit-learn instead" 20 | print str(e) 21 | from skgmm import GMMSet, GMM 22 | 23 | class Main(): 24 | 25 | FORMAT=pyaudio.paInt16 26 | NPDtype = 'int16' 27 | FS = 8000 28 | nr_mixture = 32 29 | 30 | model_file = 'test.gmm' 31 | ubm_model_file = 'ubm.mixture-32.utt-300.model' 32 | 33 | def __init__(self): 34 | self.features = defaultdict(list) 35 | self.ad = ActivityDetection() 36 | self.signal = [] 37 | try : 38 | fs, signal = wavfile.read('background earphone.wav') 39 | self.ad.init_noise(fs, signal) 40 | except : 41 | pass 42 | 43 | def getFeatures(self): 44 | ''' 45 | mfcc = MFCC.extract(self.FS, self.signal) 46 | lpc = LPC.extract(self.FS, self.signal) 47 | features = np.concatenate((mfcc, lpc), axis=1) 48 | ''' 49 | features = self.mix_feature() 50 | self.features[self.name].extend(features) 51 | return features 52 | 53 | def mix_feature(self): 54 | mfcc = MFCC.extract(self.FS, self.signal) 55 | lpc = LPC.extract(self.FS, self.signal) 56 | #if len(mfcc) == 0: 57 | # print >> sys.stderr, "ERROR.. failed to extract mfcc feature:", len(tup[1]) 58 | #print "mfcc ",mfcc 59 | #print "lpc ",lpc 60 | return np.concatenate((mfcc, lpc), axis=1) 61 | 62 | def _get_gmm_set(self): 63 | if os.path.isfile(self.ubm_model_file): 64 | try: 65 | from gmmset import GMMSetPyGMM 66 | if GMMSet is GMMSetPyGMM: 67 | return GMMSet(ubm=GMM.load(self.ubm_model_file)) 68 | except Exception as e: 69 | print "Warning: failed to import gmmset. You may forget to compile gmm:" 70 | print e 71 | print "Try running `make -C src/gmm` to compile gmm module." 72 | print "But gmm from sklearn will work as well! Using it now!" 73 | return GMMSet() 74 | return GMMSet() 75 | 76 | def train(self): 77 | self.gmmset = self._get_gmm_set() 78 | start = time.time() 79 | print "Training start..." 80 | for name, feats in self.features.iteritems(): 81 | self.gmmset.fit_new(feats, name) 82 | print time.time() - start, " seconds" 83 | print "Training complete" 84 | 85 | def predict(self): 86 | """ return a label (name)""" 87 | try: 88 | ''' 89 | mfcc = MFCC.extract(self.FS, self.signal) 90 | lpc = LPC.extract(self.FS, self.signal) 91 | features = np.concatenate((mfcc, lpc), axis=1) 92 | ''' 93 | features = self.mix_feature() 94 | except Exception as e: 95 | print tb.format_exc() 96 | return None 97 | self.gmmset.ubm = GMM.load(self.ubm_model_file) 98 | #print 'The registered users are :', len(self.gmmset.y) 99 | #print self.gmmset.y 100 | #print [y for y in self.gmmset.y] 101 | return self.gmmset.predict_one_with_rejection(features) 102 | 103 | 104 | 105 | def get_gmm(self): 106 | from sklearn.mixture import GMM as skGMM 107 | from gmmset import GMM as pyGMM 108 | if GMM == skGMM: 109 | print 'using GMM from sklearn' 110 | return GMM(self.nr_mixture) 111 | else: 112 | print 'using pyGMM' 113 | return GMM(nr_mixture=self.nr_mixture, nr_iteration=500, 114 | init_with_kmeans=0, concurrency=8, 115 | threshold=1e-15, 116 | verbosity=2) 117 | 118 | def train_ubm(self): 119 | nr_utt_in_ubm = 300 120 | # fpaths = get_all_data_fpaths() 121 | # random.shuffle(fpaths) 122 | # fpaths = fpaths[:nr_utt_in_ubm] 123 | # X = datautil.read_raw_data(fpaths) 124 | X = [] 125 | for name, feats in self.features.iteritems(): 126 | X.extend(feats) 127 | 128 | gmm = self.get_gmm() 129 | start = time.time() 130 | gmm.fit(X) 131 | print '\nTraining time :', time.time()-start, 'seconds' 132 | # gmm.dump('model/ubm.mixture-{}.utt-{}.model' . format( 133 | # self.nr_mixture, nr_utt_in_ubm)) 134 | gmm.dump('ubm.mixture-{}.utt-{}.model' . format( 135 | self.nr_mixture, nr_utt_in_ubm)) 136 | 137 | def dump(self): 138 | """ dump all models to file""" 139 | self.gmmset.before_pickle() 140 | with open(self.model_file, 'w') as f: 141 | pickle.dump(self, f, -1) 142 | self.gmmset.after_pickle() 143 | 144 | @staticmethod 145 | def load(fname): 146 | """ load from a dumped model file""" 147 | with open(fname, 'r') as f: 148 | m = pickle.load(f) 149 | m.gmmset.after_pickle() 150 | return m 151 | 152 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # user-authentication-using-voice-biometrics 2 | Project involving Voice Signal Processing of users to recognise them using Voice Biometrics 3 | 4 | 1. scikit-learn 5 | 2. scikits.talkbox 6 | 3. pyssp 7 | 4. PyQt4 8 | 5. PyAudio 9 | 6. Python bindings for bob 10 | 11 | In order to use `fast-gmm` instead of `sklearn.mixture.GaussianMixture` : 12 | run `make -C gmm/` in terminal to configure your system for fast-gmm 13 | 14 | In order to run the application in command line : 15 | 1. Train: 16 | python `__init__.py` -t enroll 17 | 2. Prediction: 18 | python `__init__.py` -t predict 19 | 20 | NOTE : Put all the wavfiles in the directory mentioned in the __init__.py file (for both training and prediction) and can be modifies accordingly. 21 | -------------------------------------------------------------------------------- /RecordAudio.py: -------------------------------------------------------------------------------- 1 | import pyaudio 2 | import time 3 | import numpy as np 4 | import scipy.io.wavfile as wavfile 5 | 6 | FORMAT=pyaudio.paInt16 7 | NPDtype = 'int16' 8 | FS=8000 9 | 10 | class RecorderThread(): 11 | def __init__(self, main): 12 | self.main = main 13 | 14 | def run(self, end_time): 15 | self.start_time = time.time() 16 | while(time.time()-self.start_time <= end_time): 17 | data = self.main.stream.read(1) 18 | i = ord(data[0]) + 256 * ord(data[1]) 19 | if i > 32768: # permissible 16 bit audio data value is -32768 to 32767 20 | i -= 65536 21 | self.main.recordData.append(i) 22 | 23 | class RecordAudio: 24 | def __init__(self): 25 | self.reco = RecorderThread(self) 26 | self.pyaudio = pyaudio.PyAudio() 27 | self.stream = self.pyaudio.open(format=FORMAT, channels=1, rate=FS, 28 | input=True, frames_per_buffer=1) 29 | self.recordData = [] 30 | 31 | def start_record(self, time): 32 | print("Recording...") 33 | 34 | self.reco.run(time) 35 | self.stream.stop_stream() 36 | self.stream.close() 37 | self.pyaudio.terminate() 38 | print("Done Recording...") 39 | data = np.array(self.recordData, dtype=NPDtype) 40 | wavfile.write('myaudio.wav', FS, data) 41 | return FS, data 42 | 43 | -------------------------------------------------------------------------------- /UI.py: -------------------------------------------------------------------------------- 1 | import os 2 | import time 3 | import pyaudio 4 | import numpy as np 5 | from Main import Main 6 | from scipy.io import wavfile 7 | import traceback as tb 8 | 9 | from PyQt4 import uic 10 | from PyQt4.QtGui import * 11 | from PyQt4.QtCore import * 12 | 13 | class RecorderThread(QThread): 14 | def __init__(self, ui): 15 | QThread.__init__(self) 16 | self.ui = ui 17 | 18 | def run(self): 19 | self.start_time = time.time() 20 | while True: 21 | data = self.ui.stream.read(1) 22 | i = ord(data[0]) + 256 * ord(data[1]) 23 | if i > 32768: 24 | i -= 65536 25 | self.ui.recordData.append(i) 26 | if self.ui.stopped: 27 | break 28 | 29 | class UI(QMainWindow): 30 | 31 | CONV_INTERVAL = 0.4 32 | CONV_DURATION = 1.5 33 | CONV_FILTER_DURATION = CONV_DURATION 34 | TEST_DURATION = 3 35 | 36 | def __init__(self, parent=None): 37 | 38 | QWidget.__init__(self, parent) 39 | uic.loadUi("new.ui", self) 40 | 41 | try : 42 | self.main = Main.load('02-09-2017.model') 43 | except : 44 | self.main = Main() 45 | 46 | self.task = None 47 | self.recordData = [] 48 | 49 | self.statusBar() 50 | 51 | self.timer = QTimer(self) 52 | self.timer.timeout.connect(self.timer_callback) 53 | 54 | self.Startrecord.clicked.connect(self.startenrollrecord) 55 | self.Stoprecord.clicked.connect(self.stopenrollrecord) 56 | self.Choosefile.clicked.connect(self.chooseenrollfile) 57 | 58 | self.Register.clicked.connect(self.register) 59 | self.Clear.clicked.connect(self.clear) 60 | self.Exit.clicked.connect(self.exit) 61 | 62 | self.Startrecordreco.clicked.connect(self.startrecorecord) 63 | self.Stoprecordreco.clicked.connect(self.stoprecorecord) 64 | self.Choosefilereco.clicked.connect(self.chooserecofile) 65 | 66 | self.Startrecordconv.clicked.connect(self.startconvrecord) 67 | self.Stoprecordconv.clicked.connect(self.stopconvrecord) 68 | self.Exitconv.clicked.connect(self.exit) 69 | self.Clearconv.clicked.connect(self.clear) 70 | 71 | fs, signal = wavfile.read('background earphone.wav') 72 | self.main.ad.init_noise(fs, signal) 73 | 74 | def timer_callback(self): 75 | self.record_time += 1 76 | #self.status("Recording..." + time_str(self.record_time)) 77 | minutes = int(self.record_time / 60) 78 | sec = int(self.record_time % 60) 79 | s = "{:02d}:{:02d}".format(minutes, sec) 80 | #s = time_str(self.record_time) 81 | ''' Instead of updating all the timers try updating only the required one''' 82 | if self.task == 'enroll' : 83 | self.Recordtime.setText(s) 84 | elif self.task == 'recognise': 85 | self.Recordtimereco.setText(s) 86 | elif self.task == 'conversation': 87 | self.Recordtimeconv.setText(s) 88 | 89 | def startrecord(self): 90 | self.statusBar().showMessage("Recording...") 91 | self.pyaudio = pyaudio.PyAudio() 92 | 93 | self.recordData = [] 94 | self.stream = self.pyaudio.open(format=self.main.FORMAT, channels=1, rate=self.main.FS, 95 | input=True, frames_per_buffer=1) 96 | self.stopped = False 97 | self.reco_th = RecorderThread(self) 98 | self.reco_th.start() 99 | 100 | self.timer.start(1000) 101 | self.record_time = 0 102 | 103 | def stoprecord(self): 104 | self.stopped = True 105 | self.reco_th.wait() 106 | self.timer.stop() 107 | self.stream.stop_stream() 108 | self.stream.close() 109 | self.pyaudio.terminate() 110 | self.statusBar().showMessage("Recording Stopped.") 111 | self.recordData = np.array(self.recordData, dtype=self.main.NPDtype) 112 | wavfile.write('myaudio.wav', self.main.FS, self.recordData) 113 | 114 | def startenrollrecord(self): 115 | self.task = 'enroll' 116 | self.main.name = str(self.Username.text().trimmed()) 117 | if not name: 118 | QMessageBox.warning(self, "Warning", "Please enter your name first.") 119 | self.statusBar().showMessage('') 120 | elif self.main.name == 'Unknown': 121 | QMessageBox.warning(self, "Warning", "Please enter a valid name.") 122 | self.statusBar().showMessage('') 123 | else: 124 | self.startrecord() 125 | 126 | def stopenrollrecord(self): 127 | self.stoprecord() 128 | ''' remove silence here and wait for regitser button click for further''' 129 | self.main.name = str(self.Username.text().trimmed()) 130 | self.main.signal = self.main.ad.filter(self.main.FS, self.recordData) 131 | wavfile.write('silence removed.wav', self.main.FS, self.main.signal) 132 | if len(self.main.signal) > 50: 133 | self.main.getFeatures() 134 | else : 135 | QMessageBox.warning(self, "Warning", "Audio was silent.Try Again.") 136 | #print "features extracted" 137 | 138 | def chooseenrollfile(self): 139 | self.main.name = str(self.Username.text().trimmed()) 140 | if not self.main.name: 141 | QMessageBox.warning(self, "Warning", "Please enter your name first.") 142 | self.statusBar().showMessage('') 143 | elif self.main.name == 'Unknown': 144 | QMessageBox.warning(self, "Warning", "Please enter a valid name.") 145 | self.statusBar().showMessage('') 146 | else: 147 | fname = QFileDialog.getOpenFileName(self, "Select Audio File", "", "Files (*.wav)") 148 | if not fname: 149 | return 150 | 151 | self.statusBar().showMessage('Loaded '+ fname) 152 | self.Filename.setText(fname) 153 | ''' remove silence here and wait for register button click for further ''' 154 | self.main.FS, self.recordData = wavfile.read(fname) 155 | self.main.signal = self.main.ad.filter(self.main.FS, self.recordData) 156 | self.main.getFeatures() 157 | #print "features extracted" 158 | 159 | def startrecorecord(self): 160 | self.task = 'recognise' 161 | self.startrecord() 162 | 163 | def stoprecorecord(self): 164 | self.stoprecord() 165 | user = None 166 | ''' remove silence and proceed with recognition here only''' 167 | self.main.signal = self.main.ad.filter(self.main.FS, self.recordData) 168 | if len(self.main.signal) > 50: 169 | user = self.main.predict() 170 | self.IdentifiedUser.setText(user) 171 | 172 | def chooserecofile(self): 173 | self.IdentifiedUser.setText('Unknown') 174 | fname = QFileDialog.getOpenFileName(self, "Select Audio File", "", "Files (*.wav)") 175 | if not fname: 176 | return 177 | user = None 178 | self.statusBar().showMessage('Loaded '+ fname) 179 | self.Filenamereco.setText(fname) 180 | ''' remove silence and extract features ''' 181 | self.main.FS, self.recordData = wavfile.read(fname) 182 | self.main.signal = self.main.ad.filter(self.main.FS, self.recordData) 183 | if len(self.main.signal) > 50: 184 | user = self.main.predict() 185 | self.IdentifiedUser.setText(user) 186 | self.statusBar().showMessage('') 187 | 188 | def startconvrecord(self): 189 | self.task = 'conversation' 190 | self.conv_result_list = [] 191 | self.startrecord() 192 | self.conv_now_pos = 0 193 | self.conv_timer = QTimer(self) 194 | self.conv_timer.timeout.connect(self.do_conversation) 195 | self.conv_timer.start(self.CONV_INTERVAL * 1000) 196 | 197 | def stopconvrecord(self): 198 | self.statusBar().showMessage("Recording Stopped.") 199 | self.stoprecord() 200 | self.conv_timer.stop() 201 | 202 | def do_conversation(self): 203 | interval_len = int(self.CONV_INTERVAL * self.main.FS) 204 | segment_len = int(self.CONV_DURATION * self.main.FS) 205 | self.conv_now_pos += interval_len 206 | to_filter = self.recordData[max([self.conv_now_pos - segment_len, 0]): 207 | self.conv_now_pos] 208 | signal = np.array(to_filter, dtype=self.main.NPDtype) 209 | label = None 210 | print label, "in do_conversation" 211 | ''' 212 | try: 213 | signal = self.backend.filter(self.main.FS, signal) 214 | if len(signal) > 50: 215 | label = self.backend.predict(self.main.FS, signal, True) 216 | except Exception as e: 217 | print tb.format_exc() 218 | print str(e) 219 | ''' 220 | try: 221 | self.main.signal = self.main.ad.filter(self.main.FS, signal) 222 | if len(self.main.signal) > 50: 223 | label = self.main.predict() 224 | except Exception as e: 225 | print tb.format_exc() 226 | print str(e) 227 | 228 | global last_label_to_show 229 | label_to_show = label 230 | if label and self.conv_result_list: 231 | last_label = self.conv_result_list[-1] 232 | if last_label and last_label != label: 233 | label_to_show = last_label_to_show 234 | self.conv_result_list.append(label) 235 | 236 | print label_to_show, "label to show" 237 | last_label_to_show = label_to_show 238 | 239 | #ADD FOR GRAPH 240 | if label_to_show is None: 241 | label_to_show = 'Nobody' 242 | ''' 243 | if len(NAMELIST) and NAMELIST[-1] != label_to_show: 244 | NAMELIST.append(label_to_show) 245 | ''' 246 | self.IdentifiedConvUser.setText(label_to_show) 247 | 248 | def register(self): 249 | if not self.Username.text().trimmed(): 250 | QMessageBox.warning(self, "Warning", "Please enter your name.") 251 | elif len(self.main.signal) == 0: 252 | QMessageBox.warning(self, "Warning", "Input signal is silent. Try again.") 253 | else: 254 | self.statusBar().showMessage('Registration started...') 255 | self.main.train() 256 | self.main.dump() 257 | self.statusBar().showMessage('Registration complete.') 258 | 259 | def clear(self): 260 | self.Username.setText("") 261 | self.Filenamereco.setText("") 262 | self.Filename.setText("") 263 | self.Recordtime.setText("00:00") 264 | self.Recordtimereco.setText("00:00") 265 | self.Recordtimeconv.setText("00:00") 266 | self.main.FS = 8000 267 | self.main.signal = [] 268 | self.statusBar().showMessage('') 269 | self.recordData = [] 270 | self.IdentifiedUser.setText("Unknown") 271 | self.IdentifiedConvUser.setText("Unknown") 272 | 273 | def exit(self): 274 | self.close(); 275 | 276 | -------------------------------------------------------------------------------- /__init__.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import glob 3 | import argparse 4 | from RecordAudio import RecordAudio 5 | from ActivityDetection import ActivityDetection 6 | from UI import UI 7 | import scipy.io.wavfile as wavfile 8 | import traceback as tb 9 | import numpy as np 10 | from gmmset import GMM 11 | 12 | from PyQt4.QtGui import * 13 | from Main import Main 14 | 15 | 16 | 17 | def get_args(): 18 | parser = argparse.ArgumentParser() 19 | 20 | parser.add_argument('-t', '--task', 21 | help='Task to do. Either "enroll" or "predict"', 22 | required=True) 23 | ret = parser.parse_args() 24 | return ret 25 | 26 | def enroll(): 27 | try : 28 | m = Main.load('test.gmm') 29 | except : 30 | m = Main() 31 | #ra=RecordAudio() 32 | #fs, data = ra.start_record(10.0) 33 | ''' 34 | fs, data = wavfile.read('system generated voice/google assistant.wav') 35 | m.signal = m.ad.filter(fs, data) 36 | m.name = 'Computer' 37 | if len(m.signal) > 50: 38 | m.getFeatures() 39 | else: 40 | print 'signal is silent' 41 | return 42 | ''' 43 | audios = glob.glob('audio samples/*.wav') 44 | if len(audios) is 0: 45 | print "No audio file found" 46 | exit() 47 | for audio in audios: 48 | fs, data = wavfile.read(audio) 49 | #signal = ad.remove_silence(fs, signal) 50 | m.signal = m.ad.filter(fs, data) 51 | m.name = audio.split('/')[-1].split('.')[0] 52 | print m.name, "processing" 53 | #print 'signal length after silence remove', len(m.signal) 54 | if len(m.signal) > 50: 55 | features = m.getFeatures() 56 | #np.savetxt('data/mfcc-lpc-data/'+m.name+'.mfcc-lpc', features) 57 | else: 58 | print name,"signal is silent" 59 | print "features saved" 60 | 61 | # train UBM Model prior to GMM Model 62 | #m.train_ubm() 63 | m.train() 64 | m.dump() 65 | 66 | def predict(): 67 | try : 68 | m = Main.load('test.gmm') 69 | except Exception as e: 70 | print tb.format_exc() 71 | exit() 72 | #ra=RecordAudio() 73 | #fs, data = ra.start_record(10.0) 74 | 75 | audios = glob.glob('audio test samples/*.wav') 76 | 77 | if len(audios) is 0: 78 | print "No audio file found" 79 | exit() 80 | for audio in audios: 81 | user = None 82 | fs, data = wavfile.read(audio) 83 | #signal = ad.remove_silence(fs, signal) 84 | m.signal = m.ad.filter(fs, data) 85 | #print 'signal length after silence remove', len(m.signal) 86 | name = audio.split('/')[-1].split('.')[0] 87 | 88 | if len(m.signal) > 50: 89 | try : 90 | user = m.predict() 91 | except Exception as e : 92 | print tb.format_exc() 93 | print name, '-->', user 94 | 95 | ''' 96 | fs, data = wavfile.read('test samples/Iqra mam2.wav') 97 | #signal = ad.remove_silence(fs, data) 98 | 99 | user = None 100 | m.signal = m.ad.filter(fs, data) 101 | #print len(m.signal) 102 | if len(m.signal) > 50: 103 | user = m.predict() 104 | print "Current speaker is identified as ", user 105 | ''' 106 | 107 | if __name__ == '__main__': 108 | ''' 109 | app = QApplication(sys.argv) 110 | ui=UI() 111 | ui.show() 112 | sys.exit(app.exec_()) 113 | ''' 114 | global ra 115 | args = get_args() 116 | task = args.task 117 | if task == 'enroll': 118 | enroll() 119 | if task == 'predict': 120 | predict() 121 | 122 | -------------------------------------------------------------------------------- /background earphone.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rhythmize/user-authentication-using-voice-biometrics/184b1f11c530995a7d8ab17f1fe2de7b0acf1127/background earphone.wav -------------------------------------------------------------------------------- /gmmset.py: -------------------------------------------------------------------------------- 1 | import operator 2 | import multiprocessing 3 | 4 | from collections import defaultdict 5 | 6 | import numpy as np 7 | 8 | from gmm.python.pygmm import GMM 9 | 10 | class GMMSet(object): 11 | def __init__(self, gmm_order=32, ubm=None, 12 | reject_threshold=10, 13 | **kwargs): 14 | self.kwargs = kwargs 15 | self.gmms = [] 16 | self.ubm = ubm 17 | self.reject_threshold = reject_threshold 18 | if ubm is not None: 19 | self.gmm_order = ubm.get_nr_mixtures() 20 | else: 21 | self.gmm_order = gmm_order 22 | self.y = [] 23 | 24 | def fit_new(self, x, label): 25 | self.y.append(label) 26 | gmm = GMM(self.gmm_order, **self.kwargs) 27 | gmm.fit(x, self.ubm) 28 | self.gmms.append(gmm) 29 | 30 | def cluster_by_label(self, X, y): 31 | Xtmp = defaultdict(list) 32 | for ind, x in enumerate(X): 33 | label = y[ind] 34 | Xtmp[label].extend(x) 35 | yp, Xp = zip(*Xtmp.iteritems()) 36 | return Xp, yp 37 | ''' 38 | def auto_tune_parameter(self, X, y): 39 | if self.ubm is None: 40 | return 41 | # TODO 42 | 43 | def fit(self, X, y): 44 | X, y = self.cluster_by_label(X, y) 45 | for ind, x in enumerate(X): 46 | self.fit_new(x, y[ind]) 47 | 48 | self.auto_tune_parameter(X, y) 49 | ''' 50 | 51 | def gmm_score(self, gmm, x): 52 | return np.sum(gmm.score(x)) 53 | 54 | def predict_one_scores(self, x): 55 | return [self.gmm_score(gmm, x) for gmm in self.gmms] 56 | 57 | def predict_one(self, x): 58 | scores = self.predict_one_scores(x) 59 | return self.y[max(enumerate(scores), key=operator.itemgetter(1))[0]] 60 | 61 | def predict(self, X): 62 | return map(self.predict_one, X) 63 | 64 | def predict_one_with_rejection(self, x): 65 | assert self.ubm is not None, \ 66 | "UBM must be given prior to conduct reject prediction." 67 | 68 | scores = self.predict_one_scores(x) 69 | x_len = len(x) # normalize score 70 | 71 | scores = map(lambda v: v / x_len, scores) 72 | max_tup = max(enumerate(scores), key=operator.itemgetter(1)) 73 | 74 | ubm_score = self.gmm_score(self.ubm, x) / x_len 75 | print scores 76 | #print ubm_score 77 | if max_tup[1] - ubm_score < self.reject_threshold: 78 | #print max_tup[1], ubm_score, max_tup[1] - ubm_score 79 | print self.y[max_tup[0]], max_tup, ubm_score, max_tup[1] - ubm_score 80 | return None 81 | return self.y[max_tup[0]] 82 | 83 | def predict_with_reject(self, X): 84 | return map(self.predict_one_with_rejection, X) 85 | 86 | def load_gmm(self, label, fname): 87 | self.y.append(label) 88 | gmm = GMM.load(fname) 89 | for key, val in self.kwargs.iteritems(): 90 | exec("gmm.{0} = val".format(key)) 91 | self.gmms.append(gmm) 92 | 93 | class GMMSetPyGMM(GMMSet): 94 | def predict_one(self, x): 95 | scores = [gmm.score_all(x) / len(x) for gmm in self.gmms] 96 | #p = sorted(scores) 97 | #for (a,b) in sorted(enumerate(scores), key=operator.itemgetter(1), reverse=True): 98 | # print(self.y[a], b) 99 | #print scores, p[-1] - p[-2] 100 | return self.y[max(enumerate(scores), key=operator.itemgetter(1))[0]] 101 | 102 | def before_pickle(self): 103 | self.gmms = [x.dumps() for x in self.gmms] 104 | 105 | def after_pickle(self): 106 | self.gmms = [GMM.loads(x) for x in self.gmms] 107 | -------------------------------------------------------------------------------- /ltsd.py: -------------------------------------------------------------------------------- 1 | import sys 2 | from scipy.io import wavfile 3 | import matplotlib 4 | matplotlib.use("Qt4Agg") 5 | import matplotlib.pyplot as plt 6 | import numpy as np 7 | 8 | from pyssp.vad.ltsd import LTSD 9 | 10 | 11 | MAGIC_NUMBER = 0.04644 12 | 13 | class LTSD_VAD(object): 14 | ltsd = None 15 | order = 5 16 | 17 | fs = 0 18 | window_size = 0 19 | window = 0 20 | 21 | lambda0 = 0 22 | lambda1 = 0 23 | 24 | noise_signal = None 25 | 26 | def init_params_by_noise(self, fs, noise_signal): 27 | noise_signal = self._mononize_signal(noise_signal) 28 | self.noise_signal = np.array(noise_signal) 29 | self._init_window(fs) 30 | ltsd = LTSD(self.window_size, self.window, self.order) 31 | res, ltsds = ltsd.compute_with_noise(noise_signal, 32 | noise_signal) 33 | max_ltsd = max(ltsds) 34 | self.lambda0 = max_ltsd * 1.1 35 | self.lambda1 = self.lambda0 * 2.0 36 | #print 'max_ltsd =', max_ltsd 37 | #print 'lambda0 =', self.lambda0 38 | #print 'lambda1 =', self.lambda1 39 | 40 | ''' 41 | def plot_ltsd(self, fs, signal): 42 | signal = self._mononize_signal(signal) 43 | res, ltsds = self._get_ltsd().compute_with_noise(signal, self.noise_signal) 44 | plt.plot(ltsds) 45 | plt.show() 46 | ''' 47 | def filter(self, signal): 48 | signal = self._mononize_signal(signal) 49 | res, ltsds = self._get_ltsd().compute_with_noise(signal, self.noise_signal) 50 | voice_signals = [] 51 | #print res 52 | res = [(start * self.window_size / 2, (finish + 1) * self.window_size 53 | / 2) for start, finish in res] 54 | #print res 55 | #print res, len(ltsds) * self.window_size / 2, "now" 56 | for start, finish in res: 57 | voice_signals.append(signal[start:finish]) 58 | try: 59 | return np.concatenate(voice_signals), res 60 | except: 61 | return np.array([]), [] 62 | 63 | def _init_window(self, fs): 64 | self.fs = fs 65 | self.window_size = int(MAGIC_NUMBER * fs) 66 | self.window = np.hanning(self.window_size) 67 | 68 | def _get_ltsd(self, fs=None): 69 | if fs is not None and fs != self.fs: 70 | self._init_window(fs) 71 | return LTSD(self.window_size, self.window, self.order, 72 | lambda0=self.lambda0, lambda1=self.lambda1) 73 | 74 | def _mononize_signal(self, signal): 75 | if signal.ndim > 1: 76 | signal = signal[:,0] 77 | return signal 78 | 79 | 80 | def main(): 81 | fs, bg_signal = wavfile.read(sys.argv[1]) 82 | ltsd = LTSD_VAD() 83 | ltsd.init_params_by_noise(fs, bg_signal) 84 | 85 | fs, signal = wavfile.read(sys.argv[2]) 86 | vaded_signal = ltsd.filter(signal) 87 | 88 | wavfile.write('vaded.wav', fs, vaded_signal) 89 | 90 | if __name__ == '__main__': 91 | main() 92 | -------------------------------------------------------------------------------- /skgmm.py: -------------------------------------------------------------------------------- 1 | import operator 2 | import numpy as np 3 | from sklearn.mixture import GaussianMixture as GMM 4 | 5 | class GMMSet(object): 6 | 7 | def __init__(self, gmm_order = 32): 8 | self.gmms = [] 9 | self.gmm_order = gmm_order 10 | self.y = [] 11 | 12 | def fit_new(self, x, label): 13 | self.y.append(label) 14 | gmm = GMM(self.gmm_order) 15 | gmm.fit(x) 16 | self.gmms.append(gmm) 17 | 18 | def gmm_score(self, gmm, x): 19 | return np.sum(gmm.score(x)) 20 | 21 | def before_pickle(self): 22 | pass 23 | 24 | def after_pickle(self): 25 | pass 26 | 27 | def predict_one(self, x): 28 | #print self.gmms 29 | scores = [self.gmm_score(gmm, x) / len(x) for gmm in self.gmms] 30 | #print "scores :\n",scores 31 | p = sorted(enumerate(scores), key=operator.itemgetter(1), reverse=True) 32 | #print "p :\n",p 33 | p = [(str(self.y[i]), y, p[0][1] - y) for i, y in p] 34 | #print "p :\n",p 35 | result = [(self.y[index], value) for (index, value) in enumerate(scores)] 36 | #print "result :\n",result 37 | p = max(result, key=operator.itemgetter(1)) 38 | return p[0] 39 | -------------------------------------------------------------------------------- /utils.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | 3 | kwd_mark = object() 4 | 5 | def cached_func(function): 6 | cache = {} 7 | def wrapper(*args, **kwargs): 8 | key = args + (kwd_mark,) + tuple(sorted(kwargs.items())) 9 | if key in cache: 10 | return cache[key] 11 | else: 12 | result = function(*args, **kwargs) 13 | cache[key] = result 14 | return result 15 | return wrapper 16 | 17 | 18 | def diff_feature(feat, nd=1): 19 | diff = feat[1:] - feat[:-1] 20 | feat = feat[1:] 21 | if nd == 1: 22 | return numpy.concatenate((feat, diff), axis=1) 23 | elif nd == 2: 24 | d2 = diff[1:] - diff[:-1] 25 | return numpy.concatenate((feat[1:], diff[1:], d2), axis=1) 26 | 27 | --------------------------------------------------------------------------------