├── LICENSE ├── README.md └── ssl_tools ├── doa_music.m ├── doa_mvdr.m ├── doa_srp.m ├── example └── example.m ├── pair_processing ├── ds_spec.m ├── fwDs_spec.m ├── fwMvdr_spec.m ├── mvdr_spec.m ├── srpNonlin_spec.m └── srpPhat_spec.m ├── post_findPeaks.m ├── post_sslResult.m └── pre_paramInit.m /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # sound-source-localization-algorithm_DOA_estimation 2 | * 语音信号处理的宽带说话人(声源)定位(DOA估计)算法 3 | 4 | **Abstract** 本仓库是面向语音信号的声源定位传统算法 5 | 6 | **关键词**:声源定位(sound source localization)、DOA估计(DOA estimation)、TDOA估计(TDOA estimation)、麦克风阵列信号处理(microphone array signal processing) 7 | ## ssl_tools 8 | 包含SRP-PHAT(GCC-PHAT)、MUSIC、beamforming(波束形成)三类算法 9 | * SRP:SRP-PHAT、非线性SRP-PHAT 10 | * MUSIC 11 | * beamforming:基于延迟求和(DS)的SNR方位谱估计、基于MVDR的SNR方位谱估计及其对应的频率加权改进算法 12 | 13 | 14 | ## 与语音信号处理的宽带声源定位相关的参考资源 15 | ### 竞赛 16 | * acoustic source LOCalization And TrAcking [[LOCATA]](https://locata.lms.tf.fau.de/) 17 | * Detection and Classification of Acoustic Scenes and Events [[DCASE]](http://dcase.community/challenge2020/task-sound-event-localization-and-detection) 18 | ### 多通道数据集生成算法 19 | * rir-generator [[Code]](https://github.com/ehabets/RIR-Generator) 20 | * ROOMSIM[[Code]](https://github.com/Wenzhe-Liu/ROOMSIM) 21 | ### 开源代码 22 | #### 基于时延的定位 23 | * A simple DOA GUI 24 | [[Code]](https://github.com/wangwei2009/DOA) 25 | #### 基于波束形成的定位 26 | * DNN_Localization_And_Separation 27 | [[Code]](https://github.com/shaharhoch/DNN_Localization_And_Separation) 28 | #### 双耳定位 29 | * binauralLocalization 30 | [[Code]](https://github.com/nicolasobin/binauralLocalization) 31 | * Binaural-Auditory-Localization-System 32 | [[Code]](https://github.com/r04942117/Binaural-Auditory-Localization-System) 33 | * Binaural_Localization:ITD-based localization of sound sources in complex acoustic environments [[Code]](https://github.com/Hardcorehobel/Binaural_Localization) 34 | #### 高分辨率定位 35 | * WSCM-MUSIC 36 | [[Code]](https://github.com/xuchenglin28/WSCM-MUSIC) 37 | #### 基于聚类定位 38 | * messl:Model-based EM Source Separation and Localization 39 | [[Code]](https://github.com/mim/messl) [[Paper]](https://www.ee.columbia.edu/~ronw/pubs/taslp09-messl.pdf) 40 | * fast_sound_source_localization_using_TLSSC:Fast Sound Source Localization Using Two-Level Search Space Clustering 41 | [[Code]](https://github.com/LeeTaewoo/fast_sound_source_localization_using_TLSSC) 42 | #### 窄带定位 43 | * doa-tools 44 | [[Code]](https://github.com/morriswmz/doa-tools) 45 | * 麦克风声源定位 [[Code]](https://github.com/xiaoli1368/Microphone-sound-source-localization) 46 | 47 | -------------------------------------------------------------------------------- /ssl_tools/doa_music.m: -------------------------------------------------------------------------------- 1 | function specGlobal = doa_music(x,Param,nsrc) 2 | if(size(x,2)<2) 3 | error('ERROR[MUSIC]:信号通道数必须大于等于2'); 4 | end 5 | %% STFT 6 | X = ssl_stft(x.',Param.window,Param.noverlap,Param.nfft,Param.fs);%nbin,nfram,nchan 7 | X = X(2:end,:,:); 8 | X = X(Param.freqBins,:,:); 9 | [nbin,~,nmic] = size(X); 10 | %% MUSIC 11 | % linspace包含端点,保证插值时不会出现NaN 12 | aziGrid = linspace(Param.azimuth(1),Param.azimuth(end),round((Param.azimuth(end)-Param.azimuth(1))/Param.alphaRes)+1); 13 | eleGrid = linspace(Param.elevation(1),Param.elevation(end),round((Param.elevation(end)-Param.elevation(1))/Param.alphaRes)+1); 14 | power = zeros(nbin, length(aziGrid), length(eleGrid)); 15 | 16 | for ibin = 1:nbin % 对于每个频点 17 | Rxx = (transpose(squeeze(X(ibin,:,:)))*conj(squeeze(X(ibin,:,:))));% 自相关矩阵 18 | [U,~,~] = svd(Rxx); % SVD分解 Rxx = U * S * U^H 19 | En = U(:,nsrc+1:end); % 噪声子空间 20 | fprintf('%d\n',ibin) 21 | for iaz = 1 :length(aziGrid) 22 | for iel = 1 :length(eleGrid) 23 | v = [cosd(eleGrid(iel))*cosd(aziGrid(iaz));cosd(eleGrid(iel))*sind(aziGrid(iaz));sind(eleGrid(iel))];% 3 x 1 24 | tau = v'*(Param.micPos-repmat(Param.micPos(:,1),[1,nmic]))./Param.c; % 1 * nmic 参考麦克为1:Param.micPos(:,1) 25 | a = exp(1i*2*pi*Param.f(ibin).*transpose(tau));%nmic x 1 SV = exp(-2*1i*pi*tau*Param.f.'); % nmic x nbin 26 | power(ibin,iaz,iel) = 1./(sum(abs( ctranspose(a) * En * ctranspose(En) * a ))); 27 | end 28 | end 29 | end 30 | 31 | % 对所有频率的空间谱加在一起: 32 | spec = squeeze(sum(power,1)); %nAzi x nEle 33 | [az,el]=meshgrid(aziGrid,eleGrid); 34 | [azi,eli]=meshgrid(Param.azimuth,Param.elevation); 35 | 36 | specInterp = interp2(az,el,spec.',azi,eli); 37 | % specInterp = interp2(azOri,elOri,spec.',azInterp,elInterp); 38 | specGlobal = reshape(specInterp.',1,[]); 39 | end 40 | 41 | function X=ssl_stft(x,window,noverlap,nfft,fs) 42 | 43 | % Inputs:x: nchan x nsampl window = blackman(wlen); 44 | % Output:X: nbin x nfram x nchan matrix 45 | 46 | [nchan,~]=size(x); 47 | [Xtemp,F,T,~] = spectrogram(x(1,:),window,noverlap,nfft,fs);%S nbinxnframe 48 | nbin = length(F); 49 | nframe = length(T); 50 | X = zeros(nbin,nframe,nchan); 51 | X(:,:,1) = Xtemp; 52 | for ichan = 2:nchan 53 | X(:,:,ichan) = spectrogram(x(ichan,:),window,noverlap,nfft,fs); 54 | end 55 | 56 | end 57 | -------------------------------------------------------------------------------- /ssl_tools/doa_mvdr.m: -------------------------------------------------------------------------------- 1 | function [specGlobal] = doa_mvdr(x,method,Param) 2 | %% 3 | if(~any(strcmp(method, {'SNR-MVDR' 'SNR-FWMVDR' 'SNR-DS' 'SNR-FWDS'}))) 4 | error('ERROR : method参数错误'); 5 | end 6 | %% 7 | lf=8;lt=2; 8 | Rxx = ssl_Rxx(x,Param.fs,Param.window,Param.noverlap, Param.nfft,lf,lt); 9 | Rxx = permute(Rxx(:,:,2:end,:),[3 4 1 2]); % nbin x nFrames x nChan x nChan 10 | %% 11 | if strcmp(method,'SNR-MVDR') 12 | specGlobal = ssl_MVDR(Rxx,Param); 13 | elseif strcmp(method,'SNR-FWMVDR') 14 | specGlobal = ssl_FWMVDR(Rxx,Param); 15 | elseif strcmp(method,'SNR-DS') 16 | specGlobal = ssl_DS(Rxx,Param); 17 | elseif strcmp(method,'SNR-FWDS') 18 | specGlobal = ssl_FWDS(Rxx,Param); 19 | else 20 | error('ERROR :错误的method'); 21 | end 22 | end 23 | function [hatRxx]=ssl_Rxx(x,fs,window,noverlap, nfft,lf,lt) 24 | %% 25 | if nargin<3, error('Not enough input arguments.'); end 26 | [nsampl,nchan]=size(x); 27 | if nchan>nsampl, error('The input signal must be in columns.'); end 28 | if nargin<4, lf=2; end 29 | if nargin<5, lt=2; end 30 | 31 | %% STFT 32 | X = ioa_stftCompute(x.',window,noverlap,nfft,fs); 33 | [nbin,nfram,nchan]=size(X); 34 | %% 35 | winf=hanning(2*lf-1); 36 | wint=hanning(2*lt-1).'; 37 | hatRxx = zeros(nchan,nchan,nbin,nfram); 38 | 39 | pairId = nchoosek(1:nchan,2); 40 | [nPairs,~] = size(pairId); 41 | 42 | for f=1:nbin, 43 | for t=1:nfram, 44 | indf=max(1,f-lf+1):min(nbin,f+lf-1); 45 | indt=max(1,t-lt+1):min(nfram,t+lt-1); 46 | nind=length(indf)*length(indt); 47 | wei=ones(nchan,1)*reshape(winf(indf-f+lf)*wint(indt-t+lt),1,nind); 48 | XX=reshape(X(indf,indt,:),nind,nchan).'; 49 | local_Cx = (XX.*wei)*XX'/sum(wei(1,:)); 50 | for idPair = 1:nPairs 51 | hatRxx(pairId(idPair,:),pairId(idPair,:),f,t) = local_Cx(pairId(idPair,:),pairId(idPair,:)); 52 | end 53 | end 54 | end 55 | end 56 | function [specGlobal] = ssl_DS(hatRxx,Param) 57 | [~,nFrames,~,~] = size(hatRxx); % nbin x nFrames x 2 x 2 58 | specInst = zeros(Param.nGrid, nFrames); 59 | 60 | for i = 1:Param.nPairs 61 | spec = ds_spec(hatRxx(Param.freqBins,:,Param.pairId(i,:),Param.pairId(i,:)), Param.f(Param.freqBins), Param.tauGrid{i}); % 62 | specSampledgrid = (shiftdim(sum(spec,1)))'; 63 | specCurrentPair = interp1q(Param.alphaSampled{i}', specSampledgrid, Param.alpha(i,:)'); 64 | specInst = specInst + specCurrentPair; 65 | end 66 | 67 | switch Param.pooling 68 | case 'max' 69 | specGlobal = shiftdim(max(specInst,[],2)); 70 | case 'sum' 71 | specGlobal = shiftdim(sum(specInst,2)); 72 | end 73 | end 74 | function [specGlobal] = ssl_FWDS(hatRxx, Param) 75 | 76 | [~,nFrames,~,~] = size(hatRxx); % nbin x nFrames x 2 x 2 77 | specInst = zeros(Param.nGrid, nFrames); 78 | 79 | for i = 1:Param.nPairs 80 | spec = fwDs_spec(hatRxx(Param.freqBins,:,Param.pairId(i,:),Param.pairId(i,:)), Param.f(Param.freqBins), Param.d(i), Param.tauGrid{i},Param.c); % 81 | specSampledgrid = (shiftdim(sum(spec,1)))'; 82 | specCurrentPair = interp1q(Param.alphaSampled{i}', specSampledgrid, Param.alpha(i,:)'); 83 | specInst = specInst + specCurrentPair; 84 | end 85 | 86 | switch Param.pooling 87 | case 'max' 88 | specGlobal = shiftdim(max(specInst,[],2)); 89 | case 'sum' 90 | specGlobal = shiftdim(sum(specInst,2)); 91 | end 92 | end 93 | function [specGlobal] = ssl_MVDR(hatRxx,Param) 94 | 95 | [~,nFrames,~,~] = size(hatRxx); % nbin x nFrames x nmic x nmic 96 | specInst = zeros(Param.nGrid, nFrames); 97 | 98 | for i = 1:Param.nPairs 99 | spec = mvdr_spec(hatRxx(Param.freqBins,:,Param.pairId(i,:),Param.pairId(i,:)), Param.f(Param.freqBins), Param.tauGrid{i}); % 取一对麦克风进行MVDR 100 | specSampledgrid = (shiftdim(sum(spec,1)))'; % sum on frequencies 101 | specCurrentPair = interp1q(Param.alphaSampled{i}', specSampledgrid, Param.alpha(i,:)'); 102 | specInst = specInst + specCurrentPair; 103 | end 104 | 105 | switch Param.pooling 106 | case 'max' 107 | specGlobal = shiftdim(max(specInst,[],2)); 108 | case 'sum' 109 | specGlobal = shiftdim(sum(specInst,2)); 110 | end 111 | end 112 | function [specGlobal] = ssl_FWMVDR(hatRxx,Param) 113 | 114 | [~,nFrames,~,~] = size(hatRxx); % nbin x nFrames x 2 x 2 115 | specInst = zeros(Param.nGrid, nFrames); 116 | 117 | for i = 1:Param.nPairs 118 | spec = fwMvdr_spec(hatRxx(Param.freqBins,:,Param.pairId(i,:),Param.pairId(i,:)), Param.f(Param.freqBins), Param.d(i), Param.tauGrid{i}, Param.c); % 119 | specSampledgrid = (shiftdim(sum(spec,1)))'; 120 | specCurrentPair = interp1q(Param.alphaSampled{i}', specSampledgrid, Param.alpha(i,:)'); 121 | specInst = specInst + specCurrentPair; 122 | end 123 | 124 | switch Param.pooling 125 | case 'max' 126 | specGlobal = shiftdim(max(specInst,[],2)); 127 | case 'sum' 128 | specGlobal = shiftdim(sum(specInst,2)); 129 | end 130 | end 131 | -------------------------------------------------------------------------------- /ssl_tools/doa_srp.m: -------------------------------------------------------------------------------- 1 | function [specGlobal] = doa_srp(x,method, Param) 2 | %% 3 | if(~any(strcmp(method, {'SRP-PHAT' 'SRP-NON'}))) 4 | error('ERROR[doa_srp]: method参数错误'); 5 | end 6 | %% STFT 7 | X = ssl_stft(x.',Param.window, Param.noverlap, Param.nfft, Param.fs); 8 | X = X(2:end,:,:); 9 | %% 10 | if strcmp(method,'SRP-PHAT') 11 | specGlobal = ssl_srpPhat(X,Param); 12 | else 13 | specGlobal = ssl_srp_nonlin(X,Param); 14 | end 15 | 16 | end 17 | 18 | function X=ssl_stft(x,window,noverlap,nfft,fs) 19 | 20 | % Inputs:x: nchan x nsampl window = blackman(wlen); 21 | % Output:X: nbin x nfram x nchan matrix 22 | 23 | [nchan,~]=size(x); 24 | [Xtemp,F,T,~] = spectrogram(x(1,:),window,noverlap,nfft,fs); % S nbin x nframe 25 | nbin = length(F); 26 | nframe = length(T); 27 | X = zeros(nbin,nframe,nchan); 28 | X(:,:,1) = Xtemp; 29 | for ichan = 2:nchan 30 | X(:,:,ichan) = spectrogram(x(ichan,:),window,noverlap,nfft,fs); 31 | end 32 | 33 | end 34 | 35 | function [specGlobal] = ssl_srpPhat(X,Param) 36 | [~,nFrames,~] = size(X); 37 | specInst = zeros(Param.nGrid, nFrames); 38 | 39 | for i = 1:Param.nPairs 40 | spec = srpPhat_spec(X(Param.freqBins,:,Param.pairId(i,:)), Param.f(Param.freqBins), Param.tauGrid{i}); % NV % [freq x fram x local angle for each pair] 41 | specSampledgrid = (shiftdim(sum(spec,1)))'; 42 | specCurrentPair = interp1q(Param.alphaSampled{i}', specSampledgrid, Param.alpha(i,:)'); 43 | specInst(:,:) = specInst(:,:) + specCurrentPair; 44 | end 45 | 46 | switch Param.pooling 47 | case 'max' 48 | specGlobal = shiftdim(max(specInst,[],2)); 49 | case 'sum' 50 | specGlobal = shiftdim(sum(specInst,2)); 51 | end 52 | end 53 | 54 | function [specGlobal] = ssl_srp_nonlin(X,Param) 55 | 56 | alpha_meth = (10*Param.c)./(Param.d*Param.fs); 57 | [~,nFrames,~] = size(X); 58 | specInst = zeros(Param.nGrid, nFrames); 59 | 60 | for i = 1:Param.nPairs 61 | spec = srpNonlin_spec(X(Param.freqBins,:,Param.pairId(i,:)), Param.f(Param.freqBins), alpha_meth(i), Param.tauGrid{i}); 62 | specSampledgrid = (shiftdim(sum(spec,1)))'; 63 | specCurrentPair = interp1q(Param.alphaSampled{i}', specSampledgrid, Param.alpha(i,:)'); 64 | specInst = specInst + specCurrentPair; 65 | end 66 | 67 | switch Param.pooling 68 | case 'max' 69 | specGlobal = shiftdim(max(specInst,[],2)); 70 | case 'sum' 71 | specGlobal = shiftdim(sum(specInst,2)); 72 | end 73 | end 74 | 75 | -------------------------------------------------------------------------------- /ssl_tools/example/example.m: -------------------------------------------------------------------------------- 1 | clc;clear;close all; 2 | 3 | addpath(genpath('./../')); 4 | addpath('./wav files'); 5 | %% 音频文件和传声器位置坐标 6 | fileName = 'example.wav'; 7 | micPos = ... 8 | ...% mic1 mic2 mic3 mic4 mic5 mic6 mic7 mic8 9 | [ 0.037 -0.034 -0.056 -0.056 -0.037 0.034 0.056 0.056; % x 10 | 0.056 0.056 0.037 -0.034 -0.056 -0.056 -0.037 0.034; % y 11 | -0.038 0.038 -0.038 0.038 -0.038 0.038 -0.038 0.038]; % z 12 | 13 | 14 | azBound = [-180 180]; % 方位角搜索范围 15 | elBound = [-90 90]; % 俯仰角搜索范围。若只有水平面:则elBound=0; 16 | gridRes = 1; % 方位角/俯仰角的分辨率 17 | alphaRes = 5; % 分辨率 18 | 19 | method = 'MUSIC'; 20 | wlen = 512; 21 | window = hann(wlen); 22 | noverlap = 0.5*wlen; 23 | nfft = 512; 24 | nsrc = 2; % 声源个数 25 | c = 343; % 声速 26 | freqRange = []; % 计算的频率范围 []为所有频率 27 | pooling = 'max'; % 如何聚合各帧的结果:所有帧取最大或求和{'max' 'sum'} 28 | 29 | %% 读取音频文件(fix) 30 | [x,fs] = audioread(fileName); 31 | [nSample,nChannel] = size(x); 32 | if nChannel>nSample, error('ERROR:输入信号为nSample x nChannel'); end 33 | [~,nMic,~] = size(micPos); 34 | if nChannel~=nMic, error('ERROR:麦克风数应与信号通道数相等'); end 35 | %% 保存参数(fix) 36 | Param = pre_paramInit(c,window, noverlap, nfft,pooling,azBound,elBound,gridRes,alphaRes,fs,freqRange,micPos); 37 | %% 定位(fix) 38 | if strfind(method,'SRP') 39 | specGlobal = doa_srp(x,method, Param); 40 | elseif strfind(method,'SNR') 41 | specGlobal = doa_mvdr(x,method,Param); 42 | elseif strfind(method,'MUSIC') 43 | specGlobal = doa_music(x,Param,nsrc); 44 | else 45 | end 46 | 47 | %% 计算角度 48 | minAngle = 10; % 搜索时两峰之间最小夹角 49 | specDisplay = 1; % 是否展示角度谱{1,0} 50 | % pfEstAngles = post_sslResult(specGlobal, nsrc, Param.azimuth, Param.elevation, minAngle); 51 | % 绘制角谱 52 | % [pfEstAngles,figHandle] = post_findPeaks(specGlobal, Param.azimuth, Param.elevation, Param.azimuthGrid, Param.elevationGrid, nsrc, minAngle, specDisplay); 53 | [pfEstAngles,figHandle] = post_findPeaks(specGlobal, Param.azimuth, Param.elevation, Param.azimuthGrid, Param.elevationGrid, nsrc, minAngle, specDisplay); 54 | 55 | azEst = pfEstAngles(:,1)'; 56 | elEst = pfEstAngles(:,2)'; 57 | for i = 1:nsrc 58 | fprintf('第 %d 个声源方位为: \n Azimuth (Theta): %.0f \t Elevation (Phi): %.0f \n\n',i,azEst(i),elEst(i)); 59 | end 60 | -------------------------------------------------------------------------------- /ssl_tools/pair_processing/ds_spec.m: -------------------------------------------------------------------------------- 1 | function spec = ds_spec(hatRxx, f, tauGrid) 2 | 3 | [nbin,nFrames] = size(hatRxx(:,:,1,1)); 4 | ngrid = length(tauGrid); 5 | R11 = hatRxx(:,:,1,1); 6 | R12 = hatRxx(:,:,1,2); 7 | R22 = hatRxx(:,:,2,2); 8 | traceRxx = real(R11 + R22); 9 | 10 | SNR = zeros(nbin,nFrames,ngrid); 11 | for pkInd=1:ngrid, 12 | EXP = repmat(exp(-2*1i*pi*tauGrid(pkInd)*f),1,nFrames); 13 | SNR(:,:,pkInd) = (traceRxx + 2*real(R12.*EXP))./(traceRxx - 2*real(R12.*EXP)); 14 | end 15 | spec = SNR; 16 | 17 | end -------------------------------------------------------------------------------- /ssl_tools/pair_processing/fwDs_spec.m: -------------------------------------------------------------------------------- 1 | function spec = fwDs_spec(hatRxx, f, d, tauGrid, c) 2 | 3 | [nbin,nFrames] = size(hatRxx(:,:,1,1)); 4 | ngrid = length(tauGrid); 5 | R11 = hatRxx(:,:,1,1); 6 | R12 = hatRxx(:,:,1,2); 7 | R22 = hatRxx(:,:,2,2); 8 | TR = real(R11 + R22); 9 | SINC = sinc(2*f*d/c); 10 | 11 | SNR = zeros(nbin,nFrames,ngrid); 12 | for pkInd=1:ngrid, 13 | EXP = repmat(exp(-2*1i*pi*tauGrid(pkInd)*f),1,nFrames); 14 | SNR(:,:,pkInd) = repmat(-(1+SINC)/2,1,nFrames) + repmat((1-SINC)/2,1,nFrames).*(TR + 2*real(R12.*EXP))./(TR - 2*real(R12.*EXP)); 15 | end 16 | spec = SNR; 17 | 18 | end -------------------------------------------------------------------------------- /ssl_tools/pair_processing/fwMvdr_spec.m: -------------------------------------------------------------------------------- 1 | function spec = fwMvdr_spec(hatRxx, f, d, tauGrid, c) 2 | 3 | [nbin,nFrames] = size(hatRxx(:,:,1,1)); 4 | ngrid = length(tauGrid); 5 | R11 = hatRxx(:,:,1,1); 6 | R12 = hatRxx(:,:,1,2); 7 | R21 = hatRxx(:,:,2,1); 8 | R22 = hatRxx(:,:,2,2); 9 | traceRxx = real(R11 + R22); 10 | SINC = sinc(2*f*d/c); 11 | 12 | SNR = zeros(nbin,nFrames,ngrid); 13 | for pkInd=1:length(tauGrid), 14 | EXP = repmat(exp(-2*1i*pi*tauGrid(pkInd)*f),1,nFrames); 15 | power_y = real(R11.*R22 - R12.*R21)./(traceRxx - 2*real(R12.*EXP)); 16 | SNR(:,:,pkInd) = repmat(-(1+SINC)/2,1,nFrames) + repmat((1-SINC)/2,1,nFrames).*power_y./(.5*traceRxx-power_y); 17 | end 18 | spec = SNR; 19 | 20 | end 21 | -------------------------------------------------------------------------------- /ssl_tools/pair_processing/mvdr_spec.m: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WenzheLiu-Speech/sound-source-localization-algorithm_DOA_estimation/9f7e91bce217d69a110441af939cf041c8f26cd9/ssl_tools/pair_processing/mvdr_spec.m -------------------------------------------------------------------------------- /ssl_tools/pair_processing/srpNonlin_spec.m: -------------------------------------------------------------------------------- 1 | function spec = srpNonlin_spec(X, f, alpha, tauGrid) 2 | 3 | X1 = X(:,:,1); 4 | X2 = X(:,:,2); 5 | 6 | [nbin,nFrames] = size(X1); 7 | ngrid = length(tauGrid); 8 | 9 | spec = zeros(nbin,nFrames,ngrid); 10 | P = X1.*conj(X2); 11 | P = P./abs(P); 12 | temp = ones(1,nFrames); 13 | for pkInd = 1:ngrid, 14 | EXP = exp(-2*1i*pi*tauGrid(pkInd)*f); 15 | EXP = EXP(:,temp); 16 | spec(:,:,pkInd) = 1 - tanh(alpha*sqrt(abs(2-2*real(P.*EXP)))); 17 | end 18 | 19 | end -------------------------------------------------------------------------------- /ssl_tools/pair_processing/srpPhat_spec.m: -------------------------------------------------------------------------------- 1 | function spec = srpPhat_spec(X, f, tauGrid) 2 | 3 | X1 = X(:,:,1); 4 | X2 = X(:,:,2); 5 | [nbin,nFrames] = size(X1); 6 | ngrid = length(tauGrid); 7 | 8 | P = X1.*conj(X2); 9 | P = P./abs(P); 10 | spec = zeros(nbin,nFrames,ngrid); 11 | for pkInd = 1:ngrid 12 | EXP = repmat(exp(-2*1i*pi*tauGrid(pkInd)*f),1,nFrames); 13 | spec(:,:,pkInd) = real(P).*real(EXP) - imag(P).*imag(EXP); 14 | end 15 | 16 | end 17 | -------------------------------------------------------------------------------- /ssl_tools/post_findPeaks.m: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WenzheLiu-Speech/sound-source-localization-algorithm_DOA_estimation/9f7e91bce217d69a110441af939cf041c8f26cd9/ssl_tools/post_findPeaks.m -------------------------------------------------------------------------------- /ssl_tools/post_sslResult.m: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/WenzheLiu-Speech/sound-source-localization-algorithm_DOA_estimation/9f7e91bce217d69a110441af939cf041c8f26cd9/ssl_tools/post_sslResult.m -------------------------------------------------------------------------------- /ssl_tools/pre_paramInit.m: -------------------------------------------------------------------------------- 1 | function Param = pre_paramInit(c,window, noverlap, nfft,pooling,azBound,elBound,gridRes,alphaRes,fs,freqRange,micPos) 2 | Param = struct; 3 | %% 4 | if(isempty(micPos)) 5 | error('ERROR : 请输入micPos'); 6 | else 7 | [dim1,~,~] = size(micPos); 8 | if(dim1~=3),error('ERROR : micPos必须是三维坐标');end 9 | end 10 | 11 | Param.window = window; 12 | Param.noverlap = noverlap; 13 | Param.nfft = nfft; 14 | Param.fs = fs; 15 | Param.f = Param.fs/Param.nfft*(1:Param.nfft/2).'; 16 | if(isempty(freqRange)) 17 | Param.freqBins = 1:length(Param.f); 18 | elseif(freqRange(1) < 0 || freqRange(2) > Param.fs/2) 19 | error('ERROR : 频率范围freqRange应在 0Hz 到 fs/2 之间'); 20 | else 21 | binMin = find(Param.f >= freqRange(1),1,'first'); 22 | binMax = find(Param.f= -90 && azBound <= 90) 44 | azBound = [azBound,azBound]; 45 | elseif(length(azBound) == 2 && azBound(1) >= -180 && azBound(2) <= 180 && azBound(1)<=azBound(2)) 46 | % nothing to do 47 | else 48 | error('ERROR : azBound输入不合法,应为在-/+ 180范围内的一个标量或一个二维向量'); 49 | end 50 | 51 | if(isempty(elBound)) 52 | elBound = [-90 90]; 53 | elseif(length(elBound) == 1 && elBound >= -90 && elBound <= 90) 54 | elBound = [elBound,elBound]; 55 | elseif(length(elBound) == 2 && elBound(1) >= -90 && elBound(2) <= 90 && elBound(1)<=elBound(2)) 56 | % nothing to do 57 | else 58 | error('ERROR : elBound输入不合法,应为在-/+ 90范围内的一个标量或一个二维向量'); 59 | end 60 | 61 | if(length(unique(elBound)) == 1 && length(unique(azBound)) == 1) 62 | error('ERROR : azBound和elBound至多有一个为标量'); 63 | end 64 | 65 | Param.azimuth = (azBound(1) : gridRes : azBound(2))'; 66 | Param.elevation = (elBound(1) : gridRes : elBound(2)); 67 | nAz = length(Param.azimuth); 68 | nEl = length(Param.elevation); 69 | Param.azimuthGrid = repmat(Param.azimuth,nEl,1)'; 70 | Param.elevationGrid = (reshape(repmat(Param.elevation,nAz,1),1,nAz*nEl)); 71 | 72 | %% 将所有候选方位转换为笛卡尔坐标 73 | Param.nGrid = length(Param.azimuthGrid); % (nAlxnEl) x 1 74 | directionCoordinate = zeros(3,Param.nGrid); % 3 x (nAlxnEl) 75 | [directionCoordinate(1,:), directionCoordinate(2,:), directionCoordinate(3,:)] = sph2cart(Param.azimuthGrid*pi/180, Param.elevationGrid*pi/180, 1); 76 | % 所有的麦克风对都初始化一个所有方位的笛卡尔坐标矩阵 3 x nMicPair x nDirction 77 | micPost = (Param.micPos)'; 78 | nMic = size(micPost,1); 79 | Param.pairId = nchoosek(1:nMic,2); 80 | Param.nPairs = size(Param.pairId,1); 81 | coordinate_pair = repmat(directionCoordinate,[1 1 Param.nPairs]); 82 | coordinate_pair = permute(coordinate_pair,[1 3 2]); 83 | %% 所有麦克风对之间的间距 84 | delta12 = micPost(Param.pairId(:,1),:) - micPost(Param.pairId(:,2),:); 85 | Param.d = sqrt(sum(delta12.^2,2)); 86 | delta12_pair = repmat(delta12',[1 1 Param.nGrid]); 87 | 88 | Param.alpha = real(acosd(shiftdim(sum(coordinate_pair.*delta12_pair),1)./repmat(Param.d,[1 Param.nGrid]))); 89 | Param.alphaSampled = cell(1,Param.nPairs); 90 | Param.tauGrid = cell(1,Param.nPairs); 91 | for index = 1:Param.nPairs 92 | Param.alphaSampled{index} = floor(min(Param.alpha(index,:))/Param.alphaRes) * Param.alphaRes : Param.alphaRes : ceil(max(Param.alpha(index,:))/Param.alphaRes) * Param.alphaRes; 93 | Param.tauGrid{index} = Param.d(index)*cos(Param.alphaSampled{index}.*pi/180)./Param.c; % 时延 94 | end 95 | end 96 | --------------------------------------------------------------------------------