├── README.md ├── est_cgmm.m ├── multi_fft.m ├── mvdr.m ├── outProdND.m ├── output.wav ├── test.m └── test_wav ├── test1 ├── F02_011C021A_BUS.CH0.wav ├── F02_011C021A_BUS.CH1.wav ├── F02_011C021A_BUS.CH2.wav ├── F02_011C021A_BUS.CH3.wav ├── F02_011C021A_BUS.CH4.wav ├── F02_011C021A_BUS.CH5.wav └── F02_011C021A_BUS.CH6.wav ├── test2 ├── F01_050C0103_STR.CH1.wav ├── F01_050C0103_STR.CH2.wav ├── F01_050C0103_STR.CH3.wav ├── F01_050C0103_STR.CH4.wav ├── F01_050C0103_STR.CH5.wav └── F01_050C0103_STR.CH6.wav └── test3 ├── 20G_20GO010I_STR.CH1.wav ├── 20G_20GO010I_STR.CH2.wav ├── 20G_20GO010I_STR.CH3.wav ├── 20G_20GO010I_STR.CH4.wav ├── 20G_20GO010I_STR.CH5.wav └── 20G_20GO010I_STR.CH6.wav /README.md: -------------------------------------------------------------------------------- 1 | This program is a implementation of " ROBUST MVDR BEAMFORMING USING TIME-FREQUENCY MASKS FOR ONLINE/OFFLINE ASR IN NOISE" for chime3/chime4 data 2 | 3 | This is a Comoplex Gaussian Mixture Model (CGMM) based MVDR beamforming; 4 | 5 | I just implement the batch process of the paper. 6 | 7 | The old verison has been moved to branch old_toy_mvdr 8 | 9 | I can't guarantee the performance. Maybe there are some bugs. If you found that , I wiil appreciate your suggestions. 10 | 11 | test.m gives a detailed instruction about how to run the code. 12 | 13 | This code depends on voicebox. Doownload and add the path to your matlab. 14 | http://www.ee.ic.ac.uk/hp/staff/dmb/voicebox/voicebox.html 15 | -------------------------------------------------------------------------------- /est_cgmm.m: -------------------------------------------------------------------------------- 1 | function [ lambda_v, lambda_y, R_xn, R_n ] = est_cgmm( ffts ) 2 | %EST_CGMM is used to estimate the Complex GMM parameters 3 | %and generate the mask for nosie only and noisy t-f bins 4 | % ffts: M*L*(fft_len/2+1), the multi-channel fft matrix 5 | % lambda_v: the mask for noise only t-f bins 6 | % lambda_y: the mask for noisy t-f bins 7 | % Ry, Rv: the spacial covariance matrix of noisy and noise;; 8 | % M*M*F; 9 | 10 | [M, T, F ] = size(ffts); 11 | 12 | lambda_v = zeros(T, F); 13 | lambda_y =zeros(T, F); 14 | outer=outProdND(ffts); %M*M*T*F 15 | Ry = squeeze(mean(outer, 3)); 16 | R_n = zeros([M, M, F]); 17 | Rv = eye(M); 18 | Rv = reshape(Rv, [size(Rv, 1), size(Rv, 2), 1]); 19 | Rv = repmat(Rv, [1, 1, F]); 20 | phi_y = ones(T, F); 21 | phi_v = ones(T, F); 22 | 23 | 24 | for iter=1:10 25 | for f=1:F 26 | Ry_f = Ry(:, :, f); 27 | Rv_f = Rv(:, :, f); 28 | if rcond(Ry_f) < 0.0001 29 | Ry_f = Ry_f + rand(M)*0.0001; 30 | end 31 | if rcond(Rv_f) < 0.0001 32 | Rv_f = Rv_f + rand(M)*0.0001; 33 | end 34 | invRy_f = inv(Ry_f); 35 | invRv_f = inv(Rv_f); 36 | y_tf = ffts(:, :, f); 37 | y_y_tf = outProdND(y_tf); 38 | sum_y = zeros(M); 39 | sum_v = zeros(M); 40 | acc_n = zeros(M); 41 | e= eye(M)*0.00000; 42 | for t = 1:T 43 | phi_y(t, f) = (1/M)*(trace(y_y_tf(:, :, t)*invRy_f)); 44 | phi_v(t, f) = (1/M)*(trace(y_y_tf(:, :, t)*invRv_f)); 45 | kernel_y = y_tf(:, t)' * (1/phi_y(t, f))*invRy_f * y_tf(:, t); 46 | kernel_v = y_tf(:, t)' * (1/phi_v(t, f))*invRv_f * y_tf(:, t); 47 | p_y(t, f) = exp(-kernel_y)/(pi*det(phi_y(t, f)*Ry_f)); 48 | p_v(t, f) = exp(-kernel_v)/(pi*det(phi_v(t, f)*Rv_f)); 49 | lambda_y(t, f) = p_y(t, f) / (p_y(t, f)+p_v(t, f)); 50 | lambda_v(t, f) = p_v(t, f) / (p_y(t, f)+p_v(t, f)); 51 | sum_y = sum_y + lambda_y(t, f)/phi_y(t, f)*y_y_tf(:, :, t); 52 | sum_v = sum_v + lambda_v(t, f)/phi_v(t, f)*y_y_tf(:, :, t); 53 | acc_n = acc_n + lambda_v(t, f)*y_y_tf(:, :, t); %for eq(4) 54 | end 55 | R_n(:, :, f) = 1/sum(lambda_y(:, f)) * acc_n; %eq(4) 56 | 57 | tmp_Ry_f = 1/sum(lambda_y(:, f)) * sum_y; 58 | tmp_Rv_f = 1/sum(lambda_v(:, f)) * sum_v; 59 | 60 | [V1, D1] = eig(squeeze(tmp_Ry_f)); 61 | [V2, D2] = eig(squeeze(tmp_Rv_f)); 62 | 63 | entropy1 = -diag(V1, 0)'/sum(diag(V1, 0)) * log(diag(V1, 0)/sum(diag(V1, 0))); 64 | entropy2 = -diag(V2, 0)'/sum(diag(V2, 0)) * log(diag(V2, 0)/sum(diag(V2, 0))); 65 | if entropy1 > entropy2 66 | Ry(:, :, f) = tmp_Rv_f; 67 | Rv(:, :, f) = tmp_Ry_f; 68 | else 69 | Ry(:, :, f) = tmp_Ry_f; 70 | Rv(:, :, f) = tmp_Rv_f; 71 | end 72 | end 73 | 74 | Q = sum(sum(lambda_y .* log(p_y+0.001) + lambda_v .* log(p_v+0.001))) 75 | figure(1) 76 | imagesc(real([flipud(lambda_y');flipud(lambda_v')])); 77 | end 78 | R_xn = Ry; 79 | 80 | end 81 | 82 | -------------------------------------------------------------------------------- /multi_fft.m: -------------------------------------------------------------------------------- 1 | function [ frames, ffts ] = multi_fft( wav, frame_length, frame_shift, fft_len ) 2 | %MULTI_FFT is used to do fft of multi-channel data 3 | % wav: L*M matrix. L is length of signal and M is channel number 4 | % frames: M*T*F, M is channel numbers; 5 | % T is frame numbers; 6 | % F is fft bin numbers ; 7 | % ffts: M*T*(fft_len/2+1), the multi-channel fft matrix 8 | [len, M ] = size(wav); 9 | 10 | %% multi-channel fft 11 | 12 | win = hamming(frame_length, 'periodic'); 13 | 14 | tmp = enframe(wav(:, 1),win, frame_shift); 15 | T = size(tmp, 1); 16 | frames = zeros([M,T, frame_length]); 17 | ffts = zeros([M, T, fft_len/2+1]); 18 | 19 | 20 | for i = 1:M 21 | frames(i, :, :)= enframe(wav(:, i),win, frame_shift); 22 | 23 | 24 | end 25 | tmp = fft(frames, fft_len, 3); 26 | ffts = tmp(:, :, 1:fft_len/2+1); 27 | 28 | end 29 | 30 | -------------------------------------------------------------------------------- /mvdr.m: -------------------------------------------------------------------------------- 1 | %% Author Sining Sun (NWPU) 2 | % snsun@nwpu-aslp.org 3 | 4 | function enspec = mvdr( ffts, Rn, d ) 5 | %MVDR is used to do MVDR beamforming; 6 | % ffts: M*T*F multi-channel spectrum 7 | % Rn: M*M*F covariance matrix. 8 | % M is channels number, 9 | % F is frequency bin number 10 | % d: M*F steering vector 11 | % enspec: Tenhanced spectrum after MVDR 12 | % 13 | 14 | [M, T, F] = size(ffts); %fft bins number 15 | w = zeros(M, F); %mvdr beamforming weight 16 | enspec = zeros(T, F); %beamforming outputs 17 | e = 0.0001*eye(M); %avoid the matrix singular 18 | for f= 1:F 19 | 20 | if (rcond(squeeze(Rn(:, :, f))) < 0.001) 21 | invRv = inv(e+squeeze(Rn(:, :, f))); 22 | else 23 | invRv = inv(squeeze(Rn(:, :, f))); 24 | end 25 | w(:, f) = invRv * d(:, f) / (d(:, f)' * invRv *d(:, f)); 26 | enspec(:, f) = (w(:, f)' * squeeze(ffts(:, :, f))).'; 27 | end 28 | end 29 | 30 | -------------------------------------------------------------------------------- /outProdND.m: -------------------------------------------------------------------------------- 1 | % given an ND tensor of size DxN1xN2xN3.., compute the outer product of the 2 | % first dimension independently to return DxDxN1xN2xN3... 3 | % 4 | function output = outProdND(data) 5 | 6 | [D,N1,N2,N3] = size(data); 7 | A = reshape(data,D, N1*N2*N3); 8 | 9 | % B = permute(bsxfun(@times, A, conj(permute(A,[3 2 1]))), [1 3 2]); % slower 10 | B = bsxfun(@times, permute(A, [1 3 2]), permute(conj(A), [3 1 2])); 11 | 12 | output = reshape(B, D,D,N1,N2,N3); 13 | 14 | end -------------------------------------------------------------------------------- /output.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/snsun/cgmm_mvdr/3625fe81202fdeaa5a81809051b99f4f1cbd78eb/output.wav -------------------------------------------------------------------------------- /test.m: -------------------------------------------------------------------------------- 1 | %% Test scripts 2 | %% Author Sining Sun (NWPU) 3 | % snsun@nwpu-aslp.org 4 | clc 5 | clear 6 | 7 | %% Load the test multi-channel test data 8 | I = 6; %channels number 9 | for i = 1:I 10 | wav_all(:, i) = audioread(['test_wav/test3/20G_20GO010I_STR.CH' int2str(i) '.wav']); 11 | end 12 | wav= wav_all(:, [1, 3, 4, 5, 6]); % we do not use ch2 because of bad quality 13 | M=5 14 | % You just neet to give your wav and M and repalace them here. 15 | %% enframe and do fft 16 | frame_length = 400; 17 | frame_shift = 160; 18 | fft_len = 512; 19 | [frames, ffts] = multi_fft(wav, frame_length, frame_shift, fft_len); 20 | 21 | %% Estimate the TF-SPP and spacial covariance matrix for noisy speech and noise 22 | [lambda_v, lambda_y, Ry, Rv] = est_cgmm(ffts); 23 | 24 | Rx = Ry -Rv; %trade off. Rx may be not positive definite 25 | 26 | [M, T, F] = size(ffts); %fft bins number 27 | d = zeros(M, F); %steering vectors 28 | w = d; %mvdr beamforming weight 29 | output = zeros(T, F); %beamforming outputs 30 | 31 | %% Get steering vectors d using eigvalue composition 32 | for f= 1:F 33 | [V, ~, ~] = svd(Rx(:,:,f)); 34 | d(:, f) = V(:, 1); 35 | end 36 | %% Do MVDR beamforming 37 | output = mvdr(ffts, Rv, d); 38 | 39 | %% Reconstruct time domain signal using overlap and add; 40 | output = [output, fliplr(conj(output(:, 2:end-1)))]; 41 | rec_frames = real(ifft(output, fft_len, 2)); 42 | rec_frames = rec_frames(:,1:frame_length); 43 | sig = overlapadd(rec_frames, hamming(frame_length, 'periodic'), frame_shift); 44 | 45 | audiowrite('output.wav', sig./max(abs(sig)), 16000); 46 | -------------------------------------------------------------------------------- /test_wav/test1/F02_011C021A_BUS.CH0.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/snsun/cgmm_mvdr/3625fe81202fdeaa5a81809051b99f4f1cbd78eb/test_wav/test1/F02_011C021A_BUS.CH0.wav -------------------------------------------------------------------------------- /test_wav/test1/F02_011C021A_BUS.CH1.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/snsun/cgmm_mvdr/3625fe81202fdeaa5a81809051b99f4f1cbd78eb/test_wav/test1/F02_011C021A_BUS.CH1.wav -------------------------------------------------------------------------------- /test_wav/test1/F02_011C021A_BUS.CH2.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/snsun/cgmm_mvdr/3625fe81202fdeaa5a81809051b99f4f1cbd78eb/test_wav/test1/F02_011C021A_BUS.CH2.wav -------------------------------------------------------------------------------- /test_wav/test1/F02_011C021A_BUS.CH3.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/snsun/cgmm_mvdr/3625fe81202fdeaa5a81809051b99f4f1cbd78eb/test_wav/test1/F02_011C021A_BUS.CH3.wav -------------------------------------------------------------------------------- /test_wav/test1/F02_011C021A_BUS.CH4.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/snsun/cgmm_mvdr/3625fe81202fdeaa5a81809051b99f4f1cbd78eb/test_wav/test1/F02_011C021A_BUS.CH4.wav -------------------------------------------------------------------------------- /test_wav/test1/F02_011C021A_BUS.CH5.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/snsun/cgmm_mvdr/3625fe81202fdeaa5a81809051b99f4f1cbd78eb/test_wav/test1/F02_011C021A_BUS.CH5.wav -------------------------------------------------------------------------------- /test_wav/test1/F02_011C021A_BUS.CH6.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/snsun/cgmm_mvdr/3625fe81202fdeaa5a81809051b99f4f1cbd78eb/test_wav/test1/F02_011C021A_BUS.CH6.wav -------------------------------------------------------------------------------- /test_wav/test2/F01_050C0103_STR.CH1.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/snsun/cgmm_mvdr/3625fe81202fdeaa5a81809051b99f4f1cbd78eb/test_wav/test2/F01_050C0103_STR.CH1.wav -------------------------------------------------------------------------------- /test_wav/test2/F01_050C0103_STR.CH2.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/snsun/cgmm_mvdr/3625fe81202fdeaa5a81809051b99f4f1cbd78eb/test_wav/test2/F01_050C0103_STR.CH2.wav -------------------------------------------------------------------------------- /test_wav/test2/F01_050C0103_STR.CH3.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/snsun/cgmm_mvdr/3625fe81202fdeaa5a81809051b99f4f1cbd78eb/test_wav/test2/F01_050C0103_STR.CH3.wav -------------------------------------------------------------------------------- /test_wav/test2/F01_050C0103_STR.CH4.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/snsun/cgmm_mvdr/3625fe81202fdeaa5a81809051b99f4f1cbd78eb/test_wav/test2/F01_050C0103_STR.CH4.wav -------------------------------------------------------------------------------- /test_wav/test2/F01_050C0103_STR.CH5.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/snsun/cgmm_mvdr/3625fe81202fdeaa5a81809051b99f4f1cbd78eb/test_wav/test2/F01_050C0103_STR.CH5.wav -------------------------------------------------------------------------------- /test_wav/test2/F01_050C0103_STR.CH6.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/snsun/cgmm_mvdr/3625fe81202fdeaa5a81809051b99f4f1cbd78eb/test_wav/test2/F01_050C0103_STR.CH6.wav -------------------------------------------------------------------------------- /test_wav/test3/20G_20GO010I_STR.CH1.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/snsun/cgmm_mvdr/3625fe81202fdeaa5a81809051b99f4f1cbd78eb/test_wav/test3/20G_20GO010I_STR.CH1.wav -------------------------------------------------------------------------------- /test_wav/test3/20G_20GO010I_STR.CH2.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/snsun/cgmm_mvdr/3625fe81202fdeaa5a81809051b99f4f1cbd78eb/test_wav/test3/20G_20GO010I_STR.CH2.wav -------------------------------------------------------------------------------- /test_wav/test3/20G_20GO010I_STR.CH3.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/snsun/cgmm_mvdr/3625fe81202fdeaa5a81809051b99f4f1cbd78eb/test_wav/test3/20G_20GO010I_STR.CH3.wav -------------------------------------------------------------------------------- /test_wav/test3/20G_20GO010I_STR.CH4.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/snsun/cgmm_mvdr/3625fe81202fdeaa5a81809051b99f4f1cbd78eb/test_wav/test3/20G_20GO010I_STR.CH4.wav -------------------------------------------------------------------------------- /test_wav/test3/20G_20GO010I_STR.CH5.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/snsun/cgmm_mvdr/3625fe81202fdeaa5a81809051b99f4f1cbd78eb/test_wav/test3/20G_20GO010I_STR.CH5.wav -------------------------------------------------------------------------------- /test_wav/test3/20G_20GO010I_STR.CH6.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/snsun/cgmm_mvdr/3625fe81202fdeaa5a81809051b99f4f1cbd78eb/test_wav/test3/20G_20GO010I_STR.CH6.wav --------------------------------------------------------------------------------