├── female.wav
├── male.wav
├── male_female_pure_mixture.wav
├── README.md
├── MWF.m
├── RTF_based_LCMV_GSC.m
└── segmentation.m


/female.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tungluai/RTF-based-LCMV-GSC/HEAD/female.wav


--------------------------------------------------------------------------------
/male.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tungluai/RTF-based-LCMV-GSC/HEAD/male.wav


--------------------------------------------------------------------------------
/male_female_pure_mixture.wav:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Tungluai/RTF-based-LCMV-GSC/HEAD/male_female_pure_mixture.wav


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # RTF-based-LCMV-GSC
2 | Relative transmission function based multichannel speech enhancement.
3 | The diarization and RTF estimation(corresponding to code segmentation.m ) method reference：Data-Driven Source Separation Based on Simplex Analysis ，Bracha Laufer-Goldshtein，26/Feb/2018.
4 | And then use the LCMV in structure of GSC to do speech enhancement or speech separation.
5 | 


--------------------------------------------------------------------------------
/MWF.m:
--------------------------------------------------------------------------------
 1 | % the MWF beamformer
 2 | % SDWMWF:   h = (PhiX + mu * PhiN)^-1 * PhiSN(:,bin)
 3 | % input:    PhiN, (Nch, Nch, Nbin) the noise covariance matrix
 4 | %           PhiX, (Nch, Nch, Nbin) the speech covariance matrix
 5 | %           PhiSN,(Nch, Nbin) the noise&speech across-covariance matrix
 6 | %           mu, the speech distortion/noise reduction trade-off parameter
 7 | % output:   h, (Nch, Nbin)  the beamformer coefficients
 8 | % author : Xu Changlai,6/1,2019
 9 | 
10 | function h = MWF(PhiX,PhiN,PhiSN,mu)
11 | if nargin < 3
12 |     mu = 1;                 % typical value {0, 1}
13 | end
14 | 
15 | [Nch, ~, Nbin] = size(PhiX);
16 | h = zeros(Nch, Nbin);
17 | 
18 | for bin = 1:Nbin
19 |     if rcond(PhiN(:,:,bin)) < eps
20 |      %   disp(['bin ' num2str(bin) ': Noise covariance ill-conditioned.']);
21 |         PhiN(:,:,bin) = PhiN(:,:,bin) + 1e-10 * eye(Nch);
22 |     end
23 |     h(:,bin)  = (PhiX(:,:,bin) + mu * PhiN(:,:,bin)) \ PhiSN(:,bin);
24 | end
25 | 
26 | 
27 | 


--------------------------------------------------------------------------------
/RTF_based_LCMV_GSC.m:
--------------------------------------------------------------------------------
 1 | % LCMV-GSC for speech enhancement
 2 | % author : Xu Changlai,6/2,2019
 3 | 
 4 | clear all
 5 | close all
 6 | 
 7 | [speech , fs ] = audioread('male_female_pure_mixture.wav');
 8 | speech = speech';
 9 | [Nch,Nz] = size(speech);
10 | Nfft =floor( fs*64/1000); % 64 ms per frame
11 | Nbin = floor(Nfft/2+1);
12 | Nfrm = floor(Nz/Nbin)-1;
13 | win = sqrt(hanning(Nfft))';
14 | 
15 | yout = zeros(1,Nz);
16 | Ybin_nonclosed = zeros(1,Nbin);
17 |  
18 | q = zeros(Nch+1, Nbin);
19 | pest = zeros(Nbin,1);
20 | mu = 0.05;
21 | alphaP = 0.9;
22 | Yfbf = zeros(Nch,Nfft);
23 | phi_x = zeros(Nbin);
24 | phi_n = zeros(Nbin);
25 | PhiN = zeros(Nch+1, Nch+1, Nbin);
26 | PhiS = zeros(Nch+1, Nch+1, Nbin);
27 | PhiSN = zeros(Nch+1,Nbin);
28 | 
29 | % processing
30 | [RTF,SPP,Mark] = segmentation (speech,fs,75,64);
31 | % resampling C 
32 | C1 = shiftdim(RTF,2);
33 | for nsr = 1 : size(C1,3)
34 |     C2(:,:,nsr) = resample(C1(:,:,nsr),64,64);
35 | end
36 | C = shiftdim(C2,1);
37 | 
38 | nsrce = size(C,2);
39 | g = [1;zeros(nsrce-1,1)];
40 | g = flipud(g); % change the enhanced person in the case of 2 speakers
41 | enhansp = find(1 == g); 
42 |  for frm = 1 : Nfrm  
43 |     %STFT
44 |     for ch = 1 : Nch
45 |          Y(ch ,:) = fft(win .* speech(ch ,(frm-1)*Nbin+1:(frm-1)*Nbin+Nfft),Nfft);
46 |     end
47 | 
48 |     for bin=1:Nbin  
49 |         w0(:,bin) = C(:,:,bin)/(C(:,:,bin)'* C(:,:,bin)) * g;   
50 |         B(:,:,bin) = eye(Nch,Nch) - C(:,:,bin) /(C(:,:,bin)'*C(:,:,bin))*C(:,:,bin)';
51 |     % processing      
52 |         % FBF filtering
53 |         Yfbf(bin) = w0(:,bin)' * Y(:,bin)/norm(w0(:,bin));    
54 |         % BM filtering
55 |         u(:,bin) = B(:,:,bin) * Y(:,bin);
56 |         
57 |         Yout(bin) = Yfbf(bin) - q(:,bin)'* [Yfbf(bin);u(:,bin)];
58 | 
59 |         % SDW-MWF
60 |         S(:,bin) = [Yout(bin);1e-10 * ones(Nch,1)];
61 |         N(:,bin) = [Yfbf(bin)-Yout(bin); u(:,bin)];
62 |         PhiS(:,:,bin) = 0.98 * PhiS(:,:,bin) + 0.02 * S(:,bin) * S(:,bin)';
63 |         PhiN(:,:,bin) = 0.98 * PhiN(:,:,bin) + 0.02 * N(:,bin) * N(:,bin)';
64 |         PhiSN(:,bin) = 0.98 * PhiSN(:,bin) + 0.02 * N(:,bin) * (Yfbf(bin)-Yout(bin))';
65 |         q(:,bin) = MWF(PhiS(:,:,bin), PhiN(:,:,bin),PhiSN(:,bin),1.11); 
66 |     end
67 |     % load all stuff
68 |     yout((frm-1)*Nfft/2+1:(frm-1)*Nfft/2+Nfft) = yout((frm-1)*Nfft/2+1:(frm-1)*Nfft/2+Nfft) + win .* real(ifft([Yout,conj(Yout(end-1:-1:2))]));   
69 | end
70 | audiowrite('RTF.wav', yout,fs);
71 | 
72 | % plot
73 | figure(2);
74 | subplot(3,1,1);
75 | plot(audioread('male.wav'));
76 | hold on 
77 | plot(Mark(1,:));
78 | subplot(3,1,2);
79 | plot(audioread('female.wav'));
80 | hold on 
81 | plot(Mark(2,:));
82 | subplot(3,1,3);
83 | plot(yout);
84 | 
85 | % [ scoresbefore ] = pesq( 'male.wav', 'male_female_pure_mixture.wav' );
86 | % [ scoresafter ] = pesq( 'male.wav', 'RTF.wav' );
87 | % [ scoresideal ] = pesq( 'male.wav', 'male.wav' );
88 | % fprintf('scorebefore: %f\n',scoresbefore);
89 | % fprintf('scoreafter: %f\n',scoresafter);
90 | % fprintf('scoreideal: %f\n',scoresideal);
91 | % fprintf(['improved PESQ socre : %f\n'],scoresafter-scoresbefore);


--------------------------------------------------------------------------------
/segmentation.m:
--------------------------------------------------------------------------------
  1 | function [RTF,SPP,Mark] = segmentation (speech,fs,ov,t_p_frm)
  2 | 
  3 | % reference : Data-Driven Source Separation Based on Simplex Analysis,2018,Bracha
  4 | %******input
  5 | % speech : source in time domain
  6 | % fs :sample rate
  7 | % t_p_frm : time delay in per frame (ms)
  8 | % ov :  overlap ov%
  9 | 
 10 | %******output
 11 | % RTF: Nch x nsrce x Nbin   relative transmission fuction
 12 | % nsrce : source number
 13 | % Mark: nsrce x (length of speech)    to mark segments
 14 | 
 15 | %author : Xu Changlai,6/2,2019
 16 | 
 17 | [Nch,Nz] = size(speech);
 18 | Nfft =floor( fs*t_p_frm/1000); % 64 ms per frame
 19 | Nbin = floor(Nfft/2+1);
 20 | Nov = 100 / (100-ov) ;
 21 | Lbin = floor( Nfft/Nov ); 
 22 | Nfrm = floor(Nz/Lbin)-(Nov-1);
 23 | win = sqrt(hanning(Nfft))';
 24 | 
 25 | for frm = 1 : Nfrm          
 26 |     %STFT
 27 |     for ch = 1 : Nch
 28 |          Y(ch ,frm,:) = fft(win .* speech(ch ,(frm-1)*Lbin+1:(frm-1)*Lbin+Nfft),Nfft);
 29 |     end
 30 | end
 31 | 
 32 | upbin = floor(4.8*1000/fs*Nfft);
 33 | lowbin = floor(0.3*1000/fs*Nfft+1);
 34 | for frm = 1: Nfrm
 35 |   for bin = lowbin : upbin   %  0.2 ~ 4.8 KHz
 36 |     if frm == 1
 37 |           Am(:,frm,bin-lowbin+1) = (Y(2:end,frm,bin) * Y(1,frm,bin)' + Y(2:end,frm+1,bin) * Y(1,frm+1,bin)') ./...
 38 |               (Y(1,frm,bin) * Y(1,frm,bin)' + Y(1,frm+1,bin) * Y(1,frm+1,bin)'); 
 39 |     
 40 |     elseif frm == Nfrm    
 41 |           Am(:,frm,bin-lowbin+1) = (Y(2:end,frm-1,bin) * Y(1,frm-1,bin)' + Y(2:end,frm,bin) * Y(1,frm,bin)') ./...
 42 |               (Y(1,frm-1,bin) * Y(1,frm-1,bin)' + Y(1,frm,bin) * Y(1,frm,bin)'); 
 43 |     else
 44 |           Am(:,frm,bin-lowbin+1) = (Y(2:end,frm-1,bin) * Y(1,frm-1,bin)' + Y(2:end,frm,bin) * Y(1,frm,bin)'+ Y(2:end,frm+1,bin) * Y(1,frm+1,bin)') ./...
 45 |               (Y(1,frm-1,bin) * Y(1,frm-1,bin)' + Y(1,frm,bin) * Y(1,frm,bin)'+ Y(1,frm+1,bin) * Y(1,frm+1,bin)'); 
 46 |     end
 47 |   end
 48 |   ac(:,frm) = reshape((reshape(Am(:,frm,:),Nch-1,upbin-lowbin+1))',(Nch-1)*(upbin-lowbin+1),1);
 49 |   a(:,frm) = [real(ac(:,frm));imag(ac(:,frm))];
 50 |   for n = 1 : frm
 51 |      W(frm,n) = a(:,frm)'* a(:,n)/(2*(Nch-1)*(upbin-lowbin+1));
 52 |      W(n,frm) = W(frm,n);
 53 |   end
 54 | end
 55 | 
 56 | % EVD on W
 57 | [U,D] = eig(W);
 58 | norm_eigv = diag(D)/D(end,end);
 59 | V = [];
 60 | for cnt = length(norm_eigv):-1:1
 61 |   if norm_eigv(cnt) < .119  %  0.11 ~ 0.128
 62 |       nsrce = length(norm_eigv) - cnt;
 63 |       break;
 64 |   end
 65 |   V = [V,U(:,cnt)];
 66 | end
 67 | % find probability vector
 68 | [~,I1] = max(sum(V.^2,2));
 69 | e(1,:) = V(I1,:); 
 70 | V1 = V - repmat(e(1,:),size(V,1),1);
 71 | [~,I2] = max(sum(V1.^2,2));
 72 | e(2,:) = V(I2,:);
 73 | if nsrce > 2
 74 |   Er = [];
 75 |   for r = 3:nsrce      
 76 |       er = e(r-1,:)-e(1,:); 
 77 |       Er = [Er er'];
 78 |       temp = pinv(Er' * Er);%pseudoantique
 79 |       P = eye(nsrce)- Er * temp * Er';
 80 |       [~,I] = max(sum((P * V1').^2,1));
 81 |       e(r,:) = V(I,:);
 82 |   end  
 83 |  end
 84 | Q = e';
 85 | SPP = (Q \ V')'; % source present probality
 86 | % clustering and estimate the RTF 
 87 | for n = 1 : nsrce
 88 |     Ydom = zeros(Nch,1,Nbin);
 89 |     Yref = zeros(1,1,Nbin);
 90 |     mark = zeros(1,Nz);
 91 |     L = find (SPP(:,n) > .96);%classic .95
 92 |     disp(L);
 93 |     for i = 1 : length(L)
 94 |       Ydom  = Ydom + Y(: ,L(i),1:Nbin) .* repmat(conj(Y(1 ,L(i),1:Nbin)),Nch,1);
 95 |       Yref  = Yref + Y(1 ,L(i),1:Nbin) .* conj(Y(1 ,L(i),1:Nbin));
 96 |       mark((L(i)-1)*Lbin+1:(L(i)-1)*Lbin+Nfft) = ones(1,Nfft);
 97 |     end 
 98 |     RTF(:,n,:) = Ydom ./ repmat(Yref,Nch,1,1);
 99 |     % making marks
100 |     Mark(n,:) = mark/10;
101 | end
102 | 
103 | figure(1);
104 | plot(speech(1,:));
105 | hold on
106 | plot(Mark(1,:));
107 | hold on
108 | plot(Mark(2,:));
109 | end


--------------------------------------------------------------------------------