├── LICENSE ├── README.md ├── data ├── sp02.wav ├── sp02_train_sn5.wav ├── sp02_train_sn5_processed.wav ├── sp04.wav ├── sp04_babble_sn10.wav ├── sp04_babble_sn10_processed.wav ├── sp06.wav ├── sp06_babble_sn5.wav ├── sp06_babble_sn5_processed.wav ├── sp09.wav ├── sp09_babble_sn10.wav └── sp09_babble_sn10_processed.wav ├── pns ├── __init__.py ├── noise_estimator.py ├── noise_suppressor.py └── suppression_gain.py └── test_pns.py /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright (c) 2019, wjchen 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | 1. Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | 2. Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | 3. Neither the name of the copyright holder nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Python Speech Enhancement 2 | A python library for speech enhancement. 3 | 4 | ![Noise Suppression Flow Diagram](https://wjchen.net/static/posts/ns_flow.png) 5 | 6 | ## Usage 7 | 8 | The example test_pns.py shows how to do noise suppression on wav files. The python-pesq package should be installed in order to evaluate the output. 9 | ``` 10 | pip install pesq 11 | python test_pns.py 12 | ``` 13 | 14 | Major steps of using the noise suppression library are shown below. The NoiseSuppressor processes audio data block by block. 15 | ```python 16 | # Initialize 17 | fs = 16000 18 | noise_suppressor = NoiseSuppressor(fs) 19 | frame_size = noise_suppressor.get_frame_size() 20 | 21 | # Process 22 | x = noisy_wav 23 | xfinal = np.zeros(len(x)) 24 | 25 | # Start Processing 26 | k = 0 27 | while k + frame_size < len(x): 28 | frame = x[k : k + frame_size] 29 | xfinal[k : k + frame_size] = noise_suppressor.process_frame(frame) 30 | k += frame_size 31 | ``` 32 | 33 | ## Features 34 | - [x] STFT Analysis and Synthesis 35 | - [x] Support sample rate 16000 36 | - [x] IMCRA Noise Estimation, according to [Cohen’s implementation](https://israelcohen.com/software/) 37 | - [x] OMLSA Suppression Gain, according to [Cohen’s implementation](https://israelcohen.com/software/) 38 | - [x] Wiener Suppression Gain 39 | 40 | - [ ] Support sample rate 8000, 32000, 44100, 48000 41 | - [ ] MCRA Noise Estimation 42 | - [ ] Histogram Noise Estimation 43 | 44 | ## Reference 45 | - I. Cohen and B. Berdugo, Speech Enhancement for Non-Stationary Noise Environments, Signal Processing, Vol. 81, No. 11, Nov. 2001, pp. 2403-2418. 46 | - I. Cohen, Noise Spectrum Estimation in Adverse Environments: Improved Minima Controlled Recursive Averaging, IEEE Trans. Speech and Audio Processing, Vol. 11, No. 5, Sep. 2003, pp. 466-475. 47 | - Loizou, Philipos. (2007). Speech Enhancement: Theory and Practice. 10.1201/b14529. -------------------------------------------------------------------------------- /data/sp02.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenwj1989/python-speech-enhancement/f38fde724d576beef24c31cfbdf8704c1540b679/data/sp02.wav -------------------------------------------------------------------------------- /data/sp02_train_sn5.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenwj1989/python-speech-enhancement/f38fde724d576beef24c31cfbdf8704c1540b679/data/sp02_train_sn5.wav -------------------------------------------------------------------------------- /data/sp02_train_sn5_processed.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenwj1989/python-speech-enhancement/f38fde724d576beef24c31cfbdf8704c1540b679/data/sp02_train_sn5_processed.wav -------------------------------------------------------------------------------- /data/sp04.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenwj1989/python-speech-enhancement/f38fde724d576beef24c31cfbdf8704c1540b679/data/sp04.wav -------------------------------------------------------------------------------- /data/sp04_babble_sn10.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenwj1989/python-speech-enhancement/f38fde724d576beef24c31cfbdf8704c1540b679/data/sp04_babble_sn10.wav -------------------------------------------------------------------------------- /data/sp04_babble_sn10_processed.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenwj1989/python-speech-enhancement/f38fde724d576beef24c31cfbdf8704c1540b679/data/sp04_babble_sn10_processed.wav -------------------------------------------------------------------------------- /data/sp06.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenwj1989/python-speech-enhancement/f38fde724d576beef24c31cfbdf8704c1540b679/data/sp06.wav -------------------------------------------------------------------------------- /data/sp06_babble_sn5.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenwj1989/python-speech-enhancement/f38fde724d576beef24c31cfbdf8704c1540b679/data/sp06_babble_sn5.wav -------------------------------------------------------------------------------- /data/sp06_babble_sn5_processed.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenwj1989/python-speech-enhancement/f38fde724d576beef24c31cfbdf8704c1540b679/data/sp06_babble_sn5_processed.wav -------------------------------------------------------------------------------- /data/sp09.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenwj1989/python-speech-enhancement/f38fde724d576beef24c31cfbdf8704c1540b679/data/sp09.wav -------------------------------------------------------------------------------- /data/sp09_babble_sn10.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenwj1989/python-speech-enhancement/f38fde724d576beef24c31cfbdf8704c1540b679/data/sp09_babble_sn10.wav -------------------------------------------------------------------------------- /data/sp09_babble_sn10_processed.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/chenwj1989/python-speech-enhancement/f38fde724d576beef24c31cfbdf8704c1540b679/data/sp09_babble_sn10_processed.wav -------------------------------------------------------------------------------- /pns/__init__.py: -------------------------------------------------------------------------------- 1 | # __init__.py 2 | __all__ = ['noise_suppressor', 'noise_estimator', 'suppression_gain'] 3 | 4 | 5 | -------------------------------------------------------------------------------- /pns/noise_estimator.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | ''' 5 | Constants 6 | ''' 7 | # 1) Parameters of Short Time Fourier Analysis: 8 | Fs_ref = 16e3 # 1.1) Reference Sampling frequency 9 | M_ref = 512 # 1.2) Size of analysis window 10 | #Mo_ref = 0.75*M_ref # 1.3) Number of overlapping samples in consecutive frames 11 | Mo_ref = 352 12 | 13 | # 2) Parameters of Noise Spectrum Estimate 14 | w = 1 # 2.1) Size of frequency smoothing window function = 2*w+1 15 | alpha_s_ref = 0.9 # 2.2) Recursive averaging parameter for the smoothing operation 16 | Nwin = 8 # 2.3) Resolution of local minima search 17 | Vwin = 15 18 | delta_s = 1.67 # 2.4) Local minimum factor 19 | Bmin = 1.66 20 | delta_y = 4.6 # 2.4) Local minimum factor 21 | delta_yt = 3 22 | alpha_d_ref = 0.85 # 2.7) Recursive averaging parameter for the noise 23 | 24 | # 3) Parameters of a Priori Probability for Signal-Absence Estimate 25 | alpha_xi_ref = 0.7 # 3.1) Recursive averaging parameter 26 | 27 | # 4) Parameters of "Decision-Directed" a Priori SNR Estimate 28 | alpha_eta_ref = 0.95 # 4.1) Recursive averaging parameter 29 | eta_min_dB = -18 # 4.2) Lower limit constraint 30 | 31 | # 5) Flags 32 | nonstat = 'medium' #Non stationarity # new version 33 | 34 | Fs = Fs_ref 35 | M = int(M_ref) 36 | Mo = int(Mo_ref) 37 | Mno = int(M-Mo) 38 | alpha_s = alpha_s_ref 39 | alpha_d = alpha_d_ref 40 | alpha_eta = alpha_eta_ref 41 | alpha_xi = alpha_xi_ref 42 | 43 | alpha_d_long = 0.99 44 | eta_min = 10**(eta_min_dB/10) 45 | 46 | #b = hanning(2*w+1) 47 | #b = b/sum(b) # normalize the window function 48 | b = np.array([0, 1, 0]) 49 | 50 | M21 = int(M/2+1) 51 | 52 | class NoiseEstimator(object): 53 | def update(self, features): 54 | pass 55 | 56 | class ImcraNoiseEstimator(NoiseEstimator): 57 | def __init__(self): 58 | self.l = 0 #count of frame 59 | self.l_mod_lswitch = 0 60 | self.S = np.zeros(M21) 61 | self.St = np.zeros(M21) 62 | self.Sy = np.zeros(M21) 63 | self.Smin = np.zeros(M21) 64 | self.Smint = np.zeros(M21) 65 | self.SMact = np.zeros(M21) 66 | self.SMactt = np.zeros(M21) 67 | self.SW = np.zeros((M21,Nwin)) 68 | self.SWt = np.zeros((M21,Nwin)) 69 | self.lambda_d = np.zeros(M21) 70 | self.lambda_dav = np.zeros(M21) 71 | 72 | def update(self, features): 73 | Ya2 = features['signal_power'] 74 | self.eta_2term = features['eta_2term'] 75 | 76 | self.l = self.l + 1 77 | gamma = Ya2 / np.maximum(self.lambda_d, 1e-10) #post_snr 78 | eta = alpha_eta*self.eta_2term + (1-alpha_eta)*np.maximum(gamma-1,0) #prior_snr 79 | eta = np.maximum(eta,eta_min) 80 | v = gamma*eta/(1+eta) 81 | 82 | # 2.1. smooth over frequency 83 | Sf = np.convolve(b, Ya2) # smooth over frequency 84 | Sf = Sf[w:M21+w] 85 | # if l==1 86 | if self.l == 1 : 87 | self.Sy = Ya2 88 | self.S = Sf 89 | self.St = Sf 90 | self.lambda_dav = Ya2 91 | else : 92 | self.S = alpha_s * self.S + (1-alpha_s) * Sf # smooth over time 93 | 94 | if self.l < 15 : 95 | self.Smin = self.S 96 | self.SMact = self.S 97 | else : 98 | self.Smin = np.minimum(self.Smin, self.S) 99 | self.SMact = np.minimum(self.SMact, self.S) 100 | 101 | # Local Minima Search 102 | I_f = np.zeros(M21) 103 | for i in range(M21) : 104 | I_f[i] = Ya2[i]0] 109 | if len(idx)!=0 : 110 | if w : 111 | conv_Y = np.convolve(b, I_f*Ya2) 112 | conv_Y = conv_Y[w:M21+w] 113 | Sft[idx] = conv_Y[idx]/conv_I[idx] 114 | else : 115 | Sft[idx] = Ya2[idx] 116 | 117 | if self.l < 15 : 118 | self.St = self.S 119 | self.Smint = self.St 120 | self.SMactt = self.St 121 | else : 122 | self.St[:] = alpha_s * self.St + (1-alpha_s) * Sft 123 | self.Smint[:] = np.minimum(self.Smint, self.St) 124 | self.SMactt[:] = np.minimum(self.SMactt, self.St) 125 | 126 | qhat = np.ones(M21) 127 | phat = np.zeros(M21) 128 | 129 | if nonstat == 'low' : 130 | gamma_mint = Ya2/Bmin/np.maximum(self.Smin,1e-10) 131 | zetat = self.S/Bmin/np.maximum(self.Smin,1e-10) 132 | else : 133 | gamma_mint = Ya2/Bmin/np.maximum(self.Smint,1e-10) 134 | zetat = self.S/Bmin/np.maximum(self.Smint,1e-10) 135 | 136 | for idx in range(M21) : 137 | if gamma_mint[idx]>1 and gamma_mint[idx]delta_yt or zetat[idx]>=delta_s : 141 | phat[idx] = 1 142 | 143 | self.l_mod_lswitch = self.l_mod_lswitch + 1 144 | if self.l_mod_lswitch == Vwin : 145 | self.l_mod_lswitch = 0 146 | 147 | if self.l == Vwin : 148 | for i in range(Nwin): 149 | self.SW[:,i] = self.S 150 | self.SWt[:, i] = self.St 151 | else : 152 | self.SW[:,:Nwin-1] = self.SW[:,1:Nwin] 153 | self.SW[:,Nwin-1] = self.SMact 154 | self.Smin = self.SW.min(1) 155 | self.SMact = self.S 156 | self.SWt[:,:Nwin-1] = self.SWt[:,1:Nwin] 157 | self.SWt[:,Nwin-1] = self.SMactt 158 | self.Smint = self.SWt.min(1) 159 | self.SMactt = self.St 160 | 161 | alpha_dt = alpha_d + (1-alpha_d)*phat 162 | self.lambda_dav = alpha_dt * self.lambda_dav + (1-alpha_dt)*Ya2 163 | if self.l < 15 : 164 | self.lambda_dav_long = self.lambda_dav 165 | else : 166 | alpha_dt_long = alpha_d_long + (1-alpha_d_long)*phat 167 | self.lambda_dav_long = alpha_dt_long * self.lambda_dav_long + (1-alpha_dt_long)*Ya2 168 | 169 | # 2.4. Noise Spectrum Estimate 170 | if nonstat == 'high' : 171 | self.lambda_d = 2 * self.lambda_dav 172 | else : 173 | self.lambda_d = 1.4685 * self.lambda_dav 174 | 175 | return self.lambda_d 176 | 177 | 178 | -------------------------------------------------------------------------------- /pns/noise_suppressor.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | import numpy as np 4 | from .noise_estimator import ImcraNoiseEstimator 5 | from .suppression_gain import OmlsaGain 6 | 7 | ''' 8 | Constants 9 | ''' 10 | # 1) Parameters of Short Time Fourier Analysis: 11 | Fs_ref = 16e3 # 1.1) Reference Sampling frequency 12 | M_ref = 512 # 1.2) Size of analysis window 13 | #Mo_ref = 0.75*M_ref # 1.3) Number of overlapping samples in consecutive frames 14 | Mo_ref = 352 15 | Mno_ref = 160 16 | 17 | # zero_thres is a threshold for discriminating between zero and nonzero sample. 18 | zero_thres = 1e-10 19 | 20 | 21 | ''' 22 | Class 23 | ''' 24 | class NoiseSuppressor(object): 25 | def __init__(self, sample_rate): 26 | self.sample_rate = sample_rate 27 | self.frame_size = Mno_ref 28 | self.overlap_size = Mo_ref 29 | self.fft_size = M_ref 30 | self.win =np.hamming(self.fft_size) 31 | self.in_buffer = np.zeros(self.fft_size) 32 | self.out_buffer = np.zeros(self.fft_size) 33 | self.noise_estimator = ImcraNoiseEstimator() 34 | self.suppression_gain = OmlsaGain(sample_rate, self.fft_size) 35 | self.fnz_flag = 0 # flag for the first frame which is non-zero 36 | 37 | def get_frame_size(self): 38 | return self.frame_size 39 | 40 | def get_fft_size(self): 41 | return self.fft_size 42 | 43 | def stft_analyze(self, audio): 44 | M = self.fft_size 45 | M21 = int(M/2+1) 46 | Mno = int(M - self.overlap_size) 47 | 48 | self.in_buffer[:M-Mno] = self.in_buffer[Mno:M] # update the frame of data 49 | self.in_buffer[M-Mno:M] = audio 50 | signal_spec = np.zeros(M) 51 | signal_power = np.zeros(M21) 52 | 53 | if ((self.fnz_flag==0 and abs(self.in_buffer[1])>zero_thres)) or \ 54 | (self.fnz_flag==1 and any(abs(self.in_buffer)>zero_thres)) : 55 | self.fnz_flag = 1 56 | # 1. Short Time Fourier Analysis 57 | signal_spec = np.fft.fft(self.win * self.in_buffer) 58 | signal_power = abs(signal_spec[:M21])**2 59 | 60 | return signal_spec, signal_power 61 | 62 | #def stft_synthesize(self, audio): 63 | 64 | def process_frame(self, frame_data): 65 | 66 | M = self.fft_size 67 | M21 = int(M/2+1) 68 | Mno = int(M - self.overlap_size) 69 | 70 | #0 STFT Analysis 71 | signal_spec, signal_power = self.stft_analyze(frame_data) 72 | yout = np.zeros(Mno) 73 | 74 | if self.fnz_flag == 1 : 75 | #1 rough noise estimation 76 | #2 rough a priori and posteri snr estimation 77 | #3 speech presence prabability estimation 78 | #4 precise noise estimation 79 | #5 a priori and posteri snr estimation 80 | features= {'signal_power': signal_power, 81 | 'eta_2term': self.suppression_gain.get_eta()} 82 | noise_power = self.noise_estimator.update(features) 83 | 84 | #6 Update suppression gain 85 | features= {'signal_power': signal_power, 86 | 'noise_power': noise_power} 87 | gain = self.suppression_gain.update(features) 88 | 89 | #7 STFT Synthesis 90 | X = gain * signal_spec[:M21] 91 | x = self.win *np.fft.irfft(X) 92 | self.out_buffer = self.out_buffer + x 93 | 94 | yout = self.out_buffer[:Mno] * 1.0 95 | self.out_buffer[:M-Mno] = self.out_buffer[Mno:M] # update output frame 96 | self.out_buffer[M-Mno:M] = np.zeros(Mno) # update output frame 97 | 98 | return yout -------------------------------------------------------------------------------- /pns/suppression_gain.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from numpy import matlib 3 | from scipy.special import expn 4 | 5 | ''' 6 | Constants 7 | ''' 8 | # 1) Parameters of Short Time Fourier Analysis: 9 | Fs_ref = 16e3 # 1.1) Reference Sampling frequency 10 | M_ref = 512 # 1.2) Size of analysis window 11 | #Mo_ref = 0.75*M_ref # 1.3) Number of overlapping samples in consecutive frames 12 | Mo_ref = 352 13 | 14 | # 3) Parameters of a Priori Probability for Signal-Absence Estimate 15 | alpha_xi_ref = 0.7 # 3.1) Recursive averaging parameter 16 | w_xi_local = 1 # 3.2) Size of frequency local smoothing window function 17 | w_xi_global = 15 # 3.3) Size of frequency local smoothing window function 18 | f_u = 10e3 # 3.4) Upper frequency threshold for global decision 19 | f_l = 50 # 3.5) Lower frequency threshold for global decision 20 | P_min = 0.005 # 3.6) Lower bound constraint 21 | xi_lu_dB = -5 # 3.7) Upper threshold for local decision 22 | xi_ll_dB = -10 # 3.8) Lower threshold for local decision 23 | xi_gu_dB = -5 # 3.9) Upper threshold for global decision 24 | xi_gl_dB = -10 # 3.10) Lower threshold for global decision 25 | xi_fu_dB = -5 # 3.11) Upper threshold for local decision 26 | xi_fl_dB = -10 # 3.12) Lower threshold for local decision 27 | xi_mu_dB = 10 # 3.13) Upper threshold for xi_m 28 | xi_ml_dB = 0 # 3.14) Lower threshold for xi_m 29 | q_max = 0.998 # 3.15) Upper limit constraint 30 | 31 | # 4) Parameters of "Decision-Directed" a Priori SNR Estimate 32 | alpha_eta_ref = 0.95 # 4.1) Recursive averaging parameter 33 | eta_min_dB = -18 # 4.2) Lower limit constraint 34 | 35 | # 5) Flags 36 | broad_flag = 1 # broad band flag # new version 37 | tone_flag = 0 # pure tone flag # new version 38 | nonstat = 'medium' #Non stationarity # new version 39 | 40 | Fs = Fs_ref 41 | M = int(M_ref) 42 | Mo = int(Mo_ref) 43 | Mno = int(M-Mo) 44 | alpha_eta = alpha_eta_ref 45 | alpha_xi = alpha_xi_ref 46 | 47 | alpha_d_long = 0.99 48 | eta_min = 10**(eta_min_dB/10) 49 | G_f = eta_min**0.5 # Gain floor 50 | 51 | 52 | ##b_xi_local = hanning(2*w_xi_local+1) 53 | #b_xi_local = b_xi_local/sum(b_xi_local) # normalize the window function 54 | b_xi_local = np.array([0, 1, 0]) 55 | #b_xi_global = hanning(2*w_xi_global+1) 56 | #b_xi_global = b_xi_global/sum(b_xi_global) # normalize the window function 57 | b_xi_global = np.array([0, 0.000728, 0.002882, 0.006366, 0.011029, 0.016667, 0.023033, 0.029849, 0.036818, 0.043634, 0.050000, 0.055638, 0.060301, 0.063785, 0.065938, 0.066667, 0.065938, 0.063785, 0.060301, 0.055638, 0.050000, 0.043634, 0.036818, 0.029849, 0.023033, 0.016667, 0.011029, 0.006366, 0.002882, 0.000728, 0 58 | ]) 59 | 60 | 61 | M21 = int(M/2+1) 62 | k_u = round(f_u/Fs*M+1) # Upper frequency bin for global decision 63 | k_l = round(f_l/Fs*M+1) # Lower frequency bin for global decision 64 | k_u = min(k_u,M21) 65 | k2_local=round(500/Fs*M+1) 66 | k3_local = round(3500/Fs*M+1) 67 | 68 | class SuppressionGain(object): 69 | def update(self, features): 70 | pass 71 | 72 | class WienerGain(SuppressionGain): 73 | def update(self, features): 74 | ''' 75 | ksi : a priori snr 76 | ''' 77 | gain = features.ksi / (1 + features.ksi) 78 | return gain 79 | 80 | class OmlsaGain(SuppressionGain): 81 | def __init__(self, sample_rate, fft_size): 82 | self.fs = sample_rate 83 | self.fft_size = fft_size 84 | self.M21 = int(fft_size/2+1) 85 | self.eta_2term = np.ones(M21) 86 | self.xi = np.ones(M21) 87 | self.xi_frame = 0 88 | self.xi_m_dB = 0 89 | 90 | def update(self, features): 91 | Ya2 = features['signal_power'] 92 | lambda_d = features['noise_power'] 93 | 94 | gamma = Ya2 / np.maximum(lambda_d, 1e-10) #post_snr 95 | eta = alpha_eta*self.eta_2term + (1-alpha_eta)*np.maximum(gamma-1,0) #prior_snr 96 | eta = np.maximum(eta,eta_min) 97 | v = gamma*eta/(1+eta) 98 | 99 | # A Priori Probability for Signal-Absence Estimate 100 | self.xi = alpha_xi * self.xi + (1-alpha_xi) * eta 101 | xi_local = np.convolve(self.xi, b_xi_local) 102 | xi_local = xi_local[w_xi_local:self.M21+w_xi_local] 103 | xi_global = np.convolve(self.xi, b_xi_global) 104 | xi_global = xi_global[w_xi_global:self.M21+w_xi_global] 105 | dxi_frame = self.xi_frame 106 | self.xi_frame = np.mean(self.xi[k_l:k_u]) 107 | dxi_frame = self.xi_frame - dxi_frame 108 | 109 | xi_local_dB = np.zeros(len(xi_local)) 110 | xi_global_dB = np.zeros(len(xi_global)) 111 | 112 | for i in range(len(xi_local)) : 113 | if xi_local[i] > 0 : 114 | xi_local_dB[i] = 10*np.log10(xi_local[i]) 115 | else : 116 | xi_local_dB[i] = -100 117 | 118 | for i in range(len(xi_global)) : 119 | if xi_global[i] >0 : 120 | xi_global_dB[i] = 10*np.log10(xi_global[i]) 121 | else : 122 | xi_global_dB[i] = -100 123 | 124 | if self.xi_frame >0 : 125 | xi_frame_dB = 10*np.log10(self.xi_frame) 126 | else : 127 | xi_frame_dB = -100 128 | 129 | P_local = np.ones(M21) 130 | for idx in range(M21) : 131 | if xi_local_dB[idx] <= xi_ll_dB: 132 | P_local[idx] = P_min 133 | if xi_local_dB[idx] > xi_ll_dB and xi_local_dB[idx] < xi_lu_dB : 134 | P_local[idx] = P_min + (xi_local_dB[idx]-xi_ll_dB) / (xi_lu_dB-xi_ll_dB) * (1-P_min) 135 | 136 | P_global = np.ones(M21) 137 | for idx in range(M21) : 138 | if xi_global_dB[idx] <= xi_gl_dB: 139 | P_global[idx] = P_min 140 | if xi_global_dB[idx] >xi_gl_dB and xi_global_dB[idx] 500Hz) for low probability of speech presence 146 | 147 | if xi_frame_dB <= xi_fl_dB : 148 | P_frame = P_min 149 | elif dxi_frame >= 0 : 150 | self.xi_m_dB = min(max(xi_frame_dB,xi_ml_dB),xi_mu_dB) 151 | P_frame = 1 152 | elif xi_frame_dB >= self.xi_m_dB + xi_fu_dB : 153 | P_frame = 1 154 | elif xi_frame_dB <= self.xi_m_dB + xi_fl_dB : 155 | P_frame = P_min 156 | else : 157 | P_frame = P_min+(xi_frame_dB-self.xi_m_dB-xi_fl_dB)/(xi_fu_dB-xi_fl_dB)*(1-P_min) 158 | 159 | # q=1-P_global.*P_local*P_frame # new version 160 | if broad_flag : # new version 161 | q = 1 - P_global * P_local * P_frame # new version 162 | else : # new version 163 | q = 1 - P_local * P_frame ##ok # new version 164 | 165 | q = np.minimum(q, q_max) 166 | gamma = np.zeros(M21) 167 | gamma = Ya2 / np.maximum(lambda_d, 1e-10) 168 | eta = alpha_eta * self.eta_2term + (1-alpha_eta) * np.maximum(gamma-1,0) 169 | eta = np.maximum(eta, eta_min) 170 | v = gamma*eta/(1+eta) 171 | PH1 = np.zeros(M21) 172 | idx = [i for i, v in enumerate(q) if v<0.9] 173 | PH1[idx] = 1 / ( 1+q[idx] / (1-q[idx]) * (1+eta[idx]) * np.exp(-v[idx]) ) 174 | 175 | # Spectral Gain 176 | GH1 = np.ones(M21) 177 | 178 | idx = [i for i, val in enumerate(v) if val>5 ] 179 | GH1[idx] = eta[idx] / (1+eta[idx]) 180 | idx = [i for i, val in enumerate(v) if val<=5 and val>0] 181 | GH1[idx] = eta[idx] / (1+eta[idx]) * np.exp(0.5 * expn(1, v[idx])) 182 | 183 | GH0 = G_f 184 | 185 | G = GH1**PH1 * GH0**(1 - PH1) 186 | self.eta_2term = GH1**2 * gamma 187 | return G 188 | 189 | def get_eta(self): 190 | return self.eta_2term -------------------------------------------------------------------------------- /test_pns.py: -------------------------------------------------------------------------------- 1 | 2 | import numpy as np 3 | import soundfile as sf 4 | from pesq import pesq 5 | 6 | from pns.noise_suppressor import NoiseSuppressor 7 | 8 | def test(): 9 | 10 | # Prepare Data 11 | clean_files = ["data/sp02.wav", "data/sp04.wav", "data/sp06.wav", "data/sp09.wav"] 12 | 13 | input_files = ["data/sp02_train_sn5.wav", 14 | "data/sp04_babble_sn10.wav", 15 | "data/sp06_babble_sn5.wav", 16 | "data/sp09_babble_sn10.wav"] 17 | 18 | output_files = ["data/sp02_train_sn5_processed.wav", 19 | "data/sp04_babble_sn10_processed.wav", 20 | "data/sp06_babble_sn5_processed.wav", 21 | "data/sp09_babble_sn10_processed.wav"] 22 | 23 | for i in range(len(input_files)) : 24 | clean_file = clean_files[i] 25 | input_file = input_files[i] 26 | output_file = output_files[i] 27 | 28 | clean_wav, _ = sf.read(clean_file) 29 | noisy_wav, fs = sf.read(input_file) 30 | 31 | # Initialize 32 | noise_suppressor = NoiseSuppressor(fs) 33 | 34 | x = noisy_wav 35 | frame_size = noise_suppressor.get_frame_size() 36 | xfinal = np.zeros(len(x)) 37 | 38 | # Start Processing 39 | k = 0 40 | while k + frame_size < len(x): 41 | frame = x[k : k + frame_size] 42 | xfinal[k : k + frame_size] = noise_suppressor.process_frame(frame) 43 | k += frame_size 44 | 45 | # Save Results 46 | xfinal = xfinal / max(np.abs(xfinal)) 47 | sf.write(output_file, xfinal, fs) 48 | 49 | # Performance Metrics 50 | print("") 51 | print(input_file) 52 | pesq_nb = pesq(ref=clean_wav, deg=noisy_wav, fs=fs, mode='nb') 53 | print("input pesq nb: ", "%.4f" % pesq_nb) 54 | pesq_nb = pesq(ref=clean_wav, deg=xfinal, fs=fs, mode='nb') 55 | print("output pesq nb: ", "%.4f" % pesq_nb) 56 | 57 | if fs > 8000: 58 | pesq_wb = pesq(ref=clean_wav, deg=noisy_wav, fs=fs, mode='wb') 59 | print("input pesq wb: ", "%.4f" % pesq_wb) 60 | pesq_wb = pesq(ref=clean_wav, deg=xfinal, fs=fs, mode='wb') 61 | print("output pesq wb: ", "%.4f" % pesq_wb) 62 | 63 | 64 | def denoise_file(input_file, output_file): 65 | noisy_wav, fs = sf.read(input_file) 66 | channels = noisy_wav.shape[1] if noisy_wav.ndim > 1 else 1 67 | print("Input file: ", input_file) 68 | print("Sample rate: ", fs, "Hz") 69 | print("Num of channels: ", channels) 70 | print("Output file: ", output_file) 71 | 72 | if channels > 1 : 73 | xfinal = np.zeros(noisy_wav.shape) 74 | 75 | for ch in range(channels): 76 | noise_suppressor = NoiseSuppressor(fs) 77 | x = noisy_wav 78 | frame_size = noise_suppressor.get_frame_size() 79 | 80 | # Start Processing 81 | k = 0 82 | while k + frame_size < len(x): 83 | frame = x[k : k + frame_size, ch] 84 | xfinal[k : k + frame_size, ch] = noise_suppressor.process_frame(frame) 85 | k += frame_size 86 | 87 | # Save Results 88 | xfinal[:, ch] = xfinal[:, ch] / max(np.abs(xfinal[:, ch])) 89 | 90 | else: 91 | # Initialize 92 | noise_suppressor = NoiseSuppressor(fs) 93 | x = noisy_wav 94 | frame_size = noise_suppressor.get_frame_size() 95 | xfinal = np.zeros(len(x)) 96 | 97 | # Start Processing 98 | k = 0 99 | while k + frame_size < len(x): 100 | frame = x[k : k + frame_size] 101 | xfinal[k : k + frame_size] = noise_suppressor.process_frame(frame) 102 | k += frame_size 103 | 104 | # Save Results 105 | xfinal = xfinal / max(np.abs(xfinal)) 106 | 107 | sf.write(output_file, xfinal, fs) 108 | 109 | if __name__=="__main__": 110 | denoise_file("data/sp02_train_sn5.wav", "data/sp02_train_sn5_processed.wav") 111 | # test() 112 | 113 | 114 | --------------------------------------------------------------------------------