├── .gitignore ├── LICENSE ├── README.md ├── check_requirements.py ├── data ├── 20160909-203344_doa_experiment.npz ├── 20160910-192848_doa_separation.npz ├── 20160911-035215_doa_synthetic.npz ├── 20160911-161112_doa_synthetic.npz ├── 20160911-175127_doa_synthetic.npz ├── 20160911-192530_doa_synthetic.npz ├── 20160911-225325_doa_synthetic.npz ├── 20160913-011415_doa_9_mics_10_src.npz ├── README.md ├── mic_layout_18-05.npz ├── mic_layout_19-05.npz └── sph_Dirac_18-05_23_31.npz ├── doa ├── __init__.py ├── cssm.py ├── doa.py ├── fri.py ├── music.py ├── srp.py ├── tools_fri_doa_plane.py ├── tops.py └── waves.py ├── experiment ├── __init__.py ├── arrays │ ├── __init__.py │ ├── compactsix_circular_1.py │ ├── compactsix_random_1.py │ └── pyramic_tetrahedron.py ├── bands_selection.py ├── calibrate_speakers.py ├── experiment_fpga.py ├── physics.py ├── point_cloud.py ├── samples │ ├── fq_sample0.wav │ ├── fq_sample1.wav │ ├── fq_sample2.wav │ ├── fq_sample3.wav │ ├── fq_sample4.wav │ └── fq_sample5.wav ├── speakers_microphones_locations.py └── sweep.wav ├── figure_doa_9_mics_10_src.py ├── figure_doa_9_mics_10_src_plot.py ├── figure_doa_experiment.py ├── figure_doa_experiment_plot.py ├── figure_doa_separation.py ├── figure_doa_separation_plot.py ├── figure_doa_synthetic.py ├── figure_doa_synthetic_plot.py ├── figures ├── README.md ├── experiment_9_mics_10_src.pdf ├── experiment_9_mics_10_src.png ├── experiment_error_box.pdf ├── experiment_error_box.png ├── experiment_minimum_separation.pdf ├── experiment_minimum_separation.png ├── experiment_snr_synthetic.pdf └── experiment_snr_synthetic.png ├── make_all_figures.sh ├── requirements.txt ├── system_install.sh ├── test_doa_recorded.py ├── test_doa_simulated.py ├── test_doa_whitenoise.py └── tools ├── __init__.py ├── dftidefs.py ├── generators.py ├── mkl_fft.py ├── plotters.py └── utils.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.wav 2 | 3 | .idea/* 4 | 5 | .idea/encodings.xml 6 | 7 | .idea/.name 8 | 9 | *.xml 10 | 11 | *.iml 12 | 13 | .gitignore_conflict-20160513-100846 14 | 15 | .idea/.workspace.xml.* 16 | 17 | *.pyc 18 | 19 | *.npz 20 | 21 | *.DS_Store 22 | 23 | result/*.pdf 24 | 25 | *.wav 26 | 27 | *.txt 28 | 29 | experiment/pyramic_recordings/jul26-fpga/slice_pyramic_files.sh 30 | 31 | experiment/pyramic_recordings/jul26-fpga/slice_files.py 32 | 33 | recordings/* 34 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2016, Hanjie Pan, Robin Scheibler, Eric Bezzam, Ivan Dokmanić, Martin Vetterli 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in all 11 | copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 19 | SOFTWARE. 20 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | FRIDA: FRI-based DOA Estimation for Arbitrary Array Layout 2 | ================================== 3 | 4 | This repository contains all the code to reproduce the results of the paper 5 | [*FRIDA: FRI-based DOA Estimation for Arbitrary Array Layout*](https://infoscience.epfl.ch/record/223649). 6 | 7 | *FRIDA* is a new algorithm for direction of arrival (DOA) estimation 8 | for acoustic sources. This repository contains a python implementation 9 | of the algorithm, as well as five conventional methods: MUSIC, SRP-PHAT, CSSM, 10 | WAVES, and TOPS (in the `doa` folder). 11 | 12 | A number of scripts were written to evaluate the performance of FRIDA and the 13 | other algorithms in different scenarios. Monte-Carlo simulations were used to 14 | study the noise robustness and the minimum angle of separation for close source 15 | resolution (`figure_doa_separation.py`, `figure_doa_synthetic.py`). A number 16 | of experiment on recorded data were done and the scripts for processing this 17 | data are also available (`figure_doa_experiment.py`, 18 | `figure_doa_9_mics_10_src.py`). 19 | 20 | We are available for any question or request relating to either the code 21 | or the theory behind it. Just ask! 22 | 23 | Abstract 24 | -------- 25 | 26 | In this paper we present FRIDA --- an algorithm for estimating directions of 27 | arrival of multiple wideband sound sources. FRIDA combines multi-band 28 | information coherently and achieves state-of-the-art resolution at extremely 29 | low signal-to-noise ratios. It works for arbitrary array layouts, but unlike 30 | the various steered response power and subspace methods, it does not require a 31 | grid search. FRIDA leverages recent advances in sampling signals with a finite 32 | rate of innovation. It is based on the insight that for any array layout, the 33 | entries of the spatial covariance matrix can be linearly transformed into a 34 | uniformly sampled sum of sinusoids. 35 | 36 | Authors 37 | ------- 38 | 39 | Hanjie Pan, Robin Scheibler, Eric Bezzam, and Martin Vetterli are with 40 | Audiovisual Communications Laboratory ([LCAV](http://lcav.epfl.ch)) at 41 | [EPFL](http://www.epfl.ch). 42 | 43 | Ivan Dokmanić is with Institut Langevin, CNRS, EsPCI Paris, PSL Research University. 44 | 45 | 46 | 47 | #### Contact 48 | 49 | [Robin Scheibler](mailto:robin[dot]scheibler[at]epfl[dot]ch)
50 | EPFL-IC-LCAV
51 | BC Building
52 | Station 14
53 | 1015 Lausanne 54 | 55 | Recreate the figures and sound samples 56 | -------------------------------------- 57 | 58 | The first step is to make sure that all the dependencies are satisfied. 59 | Check this in the Dependencies section or just run the following to check if you are missing something. 60 | 61 | python check_requirements.py 62 | 63 | If some dependencies are missing, they can be installed with `pip install -r requirements.txt`. 64 | 65 | Second, download the recordings data by running the following at the root of the 66 | repository 67 | 68 | wget https://zenodo.org/record/345132/files/FRIDA_recordings.tar.gz 69 | tar xzfv FRIDA_recordings.tar.gz 70 | 71 | For a quick test that everythin works, you can run the main script in test mode. This will run just one loop 72 | of every simulation. 73 | 74 | ./make_all_figures.sh -t 75 | 76 | For the real deal, run the same command without any options. 77 | 78 | ./make_all_figures.sh 79 | 80 | Parallel computation engines can be used by adding `-n X` where X is the number of engines to use. Typically this is the number of cores available minus one. 81 | 82 | ./make_all_figures.sh -n X 83 | 84 | Alternatively, start an ipython cluster 85 | 86 | ipcluster start -n 87 | 88 | and then type in the following commands in an ipython shell. 89 | 90 | # Simulation with different SNR values 91 | %run figure_doa_synthetic.py -f 92 | %run figure_doa_synthetic_plot.py -f 93 | 94 | # Simulation of closely spaced sources 95 | %run figure_doa_separation.py -f 96 | %run figure_doa_separation_plot.py -f 97 | 98 | # Experiment on speech recordings 99 | %run figure_doa_experiment.py -f 100 | %run figure_doa_experiment_plot.py -f 101 | 102 | # Experiment with 10 loudspeakers and 9 microphones 103 | %run figure_doa_9_mics_10_src.py -o 104 | %run figure_doa_9_mics_10_src_plot.py -f 105 | 106 | The data is saved in the `data` folder and the figures generated are collected in `figures`. 107 | 108 | Data used in the paper 109 | ---------------------- 110 | 111 | The output from the simulation and processing that 112 | was used for the figures in the paper is stored in 113 | the repository in the following files. 114 | 115 | # Simulation with different SNR values 116 | data/20160911-035215_doa_synthetic.npz 117 | data/20160911-161112_doa_synthetic.npz 118 | data/20160911-175127_doa_synthetic.npz 119 | data/20160911-192530_doa_synthetic.npz 120 | data/20160911-225325_doa_synthetic.npz 121 | 122 | # Simulation of closely spaced sources 123 | data/20160910-192848_doa_separation.npz 124 | 125 | # Experiment on speech recordings 126 | data/20160909-203344_doa_experiment.npz 127 | 128 | # Experiment with 10 loudspeakers and 9 microphones 129 | data/20160913-011415_doa_9_mics_10_src.npz 130 | 131 | Recorded Data 132 | ------------- 133 | 134 | [![DOI](https://zenodo.org/badge/DOI/10.7910/DVN/SVQBEP.svg)](https://doi.org/10.7910/DVN/SVQBEP) 135 | 136 | The recorded speech and noise samples used in the experiment have been 137 | published both in [Dataverse](http://dx.doi.org/10.7910/DVN/SVQBEP) and 138 | [Zenodo](https://zenodo.org/record/345132#.WLhMfxIrJFx). The folder containing 139 | the recordings should be at the root of the repository and named `recordings`. 140 | Detailed description and instructions are provided along the data. 141 | 142 | wget https://zenodo.org/record/345132/files/FRIDA_recordings.tar.gz 143 | tar xzfv FRIDA_recordings.tar.gz 144 | 145 | Overview of results 146 | ------------------- 147 | 148 | We implemented for comparison five algorithms: incoherent MUSIC, SRP-PHAT, CSSM, WAVES, and TOPS. 149 | 150 | ### Influence of Noise (Fig. 1A) 151 | 152 | We compare the robustness to noise of the different algorithms when a single source is present. 153 | 154 | 155 | 156 | ### Resolving power (Fig. 1B) 157 | 158 | We study the resolution power of the different algorithms. How close can two sources become 159 | before the algorithm breaks down. 160 | 161 | 162 | 163 | ### Experiment on speech data (Fig. 2C) 164 | 165 | We record signals from 8 loudspeakers with 1, 2, or 3 sources active simultaneously. We use 166 | the algorithm to reconstruct the DOA and plot the statistics of the error. 167 | 168 | 169 | 170 | ### Experiment with more sources than microphone (Fig. 2D) 171 | 172 | FRIDA can identifies DOA of more sources than it uses microphones. We demonstrate 173 | this by playing 10 loudspeakers simultaneously and recovering all DOA with only 174 | 9 microphones. 175 | 176 | 177 | 178 | 179 | Dependencies 180 | ------------ 181 | 182 | For a quick check of the dependencies, run 183 | 184 | python check_requirements.py 185 | 186 | The script `system_install.sh` was used to install all the required software on a blank UBUNTU Xenial server. 187 | 188 | * A working distribution of [Python 2.7](https://www.python.org/downloads/). 189 | * [Numpy](http://www.numpy.org/), [Scipy](http://www.scipy.org/) 190 | * We use the distribution [anaconda](https://store.continuum.io/cshop/anaconda/) to simplify the setup of the environment. 191 | * Computations are very heavy and we use the 192 | [MKL](https://store.continuum.io/cshop/mkl-optimizations/) extension of 193 | Anaconda to speed things up. There is a [free license](https://store.continuum.io/cshop/academicanaconda) for academics. 194 | * We used ipyparallel and joblib for parallel computations. 195 | * [matplotlib](http://matplotlib.org) and [seaborn](https://stanford.edu/~mwaskom/software/seaborn/index.html#) for plotting the results. 196 | 197 | The pyroomacoustics is used for STFT, fractionnal delay filters, microphone arrays generation, and some more. 198 | 199 | pip install git+https://github.com/LCAV/pyroomacoustics 200 | 201 | List of standard packages needed 202 | 203 | numpy, scipy, pandas, ipyparallel, seaborn, zmq, joblib 204 | 205 | In addition the two following libraries are not really needed to recreate the figures, but were used to resample and process the recording files 206 | 207 | scikits.audiolab, sickits.samplerate 208 | 209 | They require install of shared libraries 210 | 211 | # Ubuntu code 212 | apt-get install libsndfile1 libsndfile1-dev libsamplerate0 libsamplerate0-dev # Ubuntu 213 | 214 | # OS X install 215 | brew install libsndfile 216 | brew install libsamplerate 217 | 218 | 219 | 220 | Systems Tested 221 | -------------- 222 | 223 | ###Linux 224 | 225 | | Machine | ICCLUSTER EPFL | 226 | |---------|---------------------------------| 227 | | System | Ubuntu 16.04.5 | 228 | | CPU | Intel Xeon E5-2680 v3 (Haswell) | 229 | | RAM | 64 GB | 230 | 231 | ###OS X 232 | 233 | | Machine | MacBook Pro Retina 15-inch, Early 2013 | 234 | |---------|----------------------------------------| 235 | | System | OS X Maverick 10.11.6 | 236 | | CPU | Intel Core i7 | 237 | | RAM | 16 GB | 238 | 239 | System Info: 240 | ------------ 241 | Darwin 15.6.0 Darwin Kernel Version 15.6.0: Mon Aug 29 20:21:34 PDT 2016; root:xnu-3248.60.11~1/RELEASE_X86_64 x86_64 242 | 243 | Python Info: 244 | ------------ 245 | Python 2.7.11 :: Anaconda custom (x86_64) 246 | 247 | Python Packages Info (conda) 248 | ---------------------------- 249 | # packages in environment at /Users/scheibler/anaconda: 250 | accelerate 2.0.2 np110py27_p0 251 | accelerate_cudalib 2.0 0 252 | anaconda custom py27_0 253 | ipyparallel 5.0.1 py27_0 254 | ipython 4.2.0 py27_0 255 | ipython-notebook 4.0.4 py27_0 256 | ipython-qtconsole 4.0.1 py27_0 257 | ipython_genutils 0.1.0 py27_0 258 | joblib 0.9.4 py27_0 259 | mkl 11.3.3 0 260 | mkl-rt 11.1 p0 261 | mkl-service 1.1.2 py27_2 262 | mklfft 2.1 np110py27_p0 263 | numpy 1.11.0 264 | numpy 1.11.1 py27_0 265 | numpydoc 0.5 266 | pandas 0.18.1 np111py27_0 267 | pyzmq 15.2.0 py27_1 268 | scikits.audiolab 0.11.0 269 | scikits.samplerate 0.3.3 270 | scipy 0.17.0 271 | scipy 0.18.0 np111py27_0 272 | seaborn 0.7.1 py27_0 273 | seaborn 0.7.1 274 | 275 | License 276 | ------- 277 | 278 | Copyright (c) 2016, Hanjie Pan, Robin Scheibler, Eric Bezzam, Ivan Dokmanić, Martin Vetterli 279 | 280 | Permission is hereby granted, free of charge, to any person obtaining a copy 281 | of this software and associated documentation files (the "Software"), to deal 282 | in the Software without restriction, including without limitation the rights 283 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 284 | copies of the Software, and to permit persons to whom the Software is 285 | furnished to do so, subject to the following conditions: 286 | 287 | The above copyright notice and this permission notice shall be included in all 288 | copies or substantial portions of the Software. 289 | 290 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 291 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 292 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 293 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 294 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 295 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 296 | SOFTWARE. 297 | -------------------------------------------------------------------------------- /check_requirements.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import sys 3 | import pkg_resources 4 | import traceback as tb 5 | from pkg_resources import DistributionNotFound, VersionConflict 6 | 7 | # here, if a dependency is not met, a DistributionNotFound or VersionConflict 8 | # exception is thrown. 9 | some_missing = False 10 | with open('./requirements.txt', 'r') as f: 11 | dependencies = f.read().splitlines() 12 | 13 | for dep in dependencies: 14 | try: 15 | pkg_resources.require([dep]) 16 | except: 17 | print('Error: package', dep, 'is required.') 18 | some_missing = True 19 | 20 | if some_missing: 21 | sys.exit(1) 22 | else: 23 | print('All dependencies are satisfied.') 24 | -------------------------------------------------------------------------------- /data/20160909-203344_doa_experiment.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LCAV/FRIDA/ff5d51e498805b862c342dd216ccfffb22444b7f/data/20160909-203344_doa_experiment.npz -------------------------------------------------------------------------------- /data/20160910-192848_doa_separation.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LCAV/FRIDA/ff5d51e498805b862c342dd216ccfffb22444b7f/data/20160910-192848_doa_separation.npz -------------------------------------------------------------------------------- /data/20160911-035215_doa_synthetic.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LCAV/FRIDA/ff5d51e498805b862c342dd216ccfffb22444b7f/data/20160911-035215_doa_synthetic.npz -------------------------------------------------------------------------------- /data/20160911-161112_doa_synthetic.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LCAV/FRIDA/ff5d51e498805b862c342dd216ccfffb22444b7f/data/20160911-161112_doa_synthetic.npz -------------------------------------------------------------------------------- /data/20160911-175127_doa_synthetic.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LCAV/FRIDA/ff5d51e498805b862c342dd216ccfffb22444b7f/data/20160911-175127_doa_synthetic.npz -------------------------------------------------------------------------------- /data/20160911-192530_doa_synthetic.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LCAV/FRIDA/ff5d51e498805b862c342dd216ccfffb22444b7f/data/20160911-192530_doa_synthetic.npz -------------------------------------------------------------------------------- /data/20160911-225325_doa_synthetic.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LCAV/FRIDA/ff5d51e498805b862c342dd216ccfffb22444b7f/data/20160911-225325_doa_synthetic.npz -------------------------------------------------------------------------------- /data/20160913-011415_doa_9_mics_10_src.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LCAV/FRIDA/ff5d51e498805b862c342dd216ccfffb22444b7f/data/20160913-011415_doa_9_mics_10_src.npz -------------------------------------------------------------------------------- /data/README.md: -------------------------------------------------------------------------------- 1 | Data Folder 2 | ----------- 3 | 4 | The output from the simulation and processing that 5 | was used for the figures in the paper is stored in 6 | the repository in the following files. 7 | 8 | # Simulation with different SNR values 9 | data/20160911-035215_doa_synthetic.npz 10 | data/20160911-161112_doa_synthetic.npz 11 | data/20160911-175127_doa_synthetic.npz 12 | data/20160911-192530_doa_synthetic.npz 13 | data/20160911-225325_doa_synthetic.npz 14 | 15 | # Simulation of closely spaced sources 16 | data/20160910-192848_doa_separation.npz 17 | 18 | # Experiment on speech recordings 19 | data/20160909-203344_doa_experiment.npz 20 | 21 | # Experiment with 10 loudspeakers and 9 microphones 22 | data/20160913-011415_doa_9_mics_10_src.npz 23 | 24 | -------------------------------------------------------------------------------- /data/mic_layout_18-05.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LCAV/FRIDA/ff5d51e498805b862c342dd216ccfffb22444b7f/data/mic_layout_18-05.npz -------------------------------------------------------------------------------- /data/mic_layout_19-05.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LCAV/FRIDA/ff5d51e498805b862c342dd216ccfffb22444b7f/data/mic_layout_19-05.npz -------------------------------------------------------------------------------- /data/sph_Dirac_18-05_23_31.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LCAV/FRIDA/ff5d51e498805b862c342dd216ccfffb22444b7f/data/sph_Dirac_18-05_23_31.npz -------------------------------------------------------------------------------- /doa/__init__.py: -------------------------------------------------------------------------------- 1 | __version__ = '1.0' 2 | 3 | # http://mikegrouchy.com/blog/2012/05/be-pythonic-__init__py.html 4 | 5 | import os,sys,inspect 6 | currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe()))) 7 | parentdir = os.path.dirname(currentdir) 8 | sys.path.insert(0,parentdir) 9 | 10 | from .doa import * 11 | from .srp import * 12 | from .music import * 13 | from .cssm import * 14 | from .waves import * 15 | from .tops import * 16 | from .fri import * 17 | 18 | import tools_fri_doa_plane as tools_fri 19 | 20 | # Create this dictionary as a shortcut to different algorithms 21 | algos = { 22 | 'SRP' : SRP, 23 | 'MUSIC' : MUSIC, 24 | 'CSSM' : CSSM, 25 | 'WAVES' : WAVES, 26 | 'TOPS' : TOPS, 27 | 'FRI' : FRI, 28 | } 29 | 30 | -------------------------------------------------------------------------------- /doa/cssm.py: -------------------------------------------------------------------------------- 1 | # Author: Eric Bezzam 2 | # Date: July 15, 2016 3 | 4 | from music import * 5 | 6 | class CSSM(MUSIC): 7 | """ 8 | Class to apply the Coherent Signal-Subspace method (CSSM) [H. Wang and M. 9 | Kaveh] for Direction of Arrival (DoA) estimation. 10 | 11 | .. note:: Run locate_source() to apply the CSSM algorithm. 12 | 13 | :param L: Microphone array positions. Each column should correspond to the 14 | cartesian coordinates of a single microphone. 15 | :type L: numpy array 16 | :param fs: Sampling frequency. 17 | :type fs: float 18 | :param nfft: FFT length. 19 | :type nfft: int 20 | :param c: Speed of sound. Default: 343 m/s 21 | :type c: float 22 | :param num_src: Number of sources to detect. Default: 1 23 | :type num_src: int 24 | :param mode: 'far' or 'near' for far-field or near-field detection 25 | respectively. Default: 'far' 26 | :type mode: str 27 | :param r: Candidate distances from the origin. Default: np.ones(1) 28 | :type r: numpy array 29 | :param theta: Candidate azimuth angles (in radians) with respect to x-axis. 30 | Default: np.linspace(-180.,180.,30)*np.pi/180 31 | :type theta: numpy array 32 | :param phi: Candidate elevation angles (in radians) with respect to z-axis. 33 | Default is x-y plane search: np.pi/2*np.ones(1) 34 | :type phi: numpy array 35 | :param num_iter: Number of iterations for CSSM. Default: 5 36 | :type num_iter: int 37 | """ 38 | def __init__(self, L, fs, nfft, c=343.0, num_src=1, mode='far', r=None, 39 | theta=None, phi=None, num_iter=5, **kwargs): 40 | MUSIC.__init__(self, L=L, fs=fs, nfft=nfft, c=c, num_src=num_src, 41 | mode=mode, r=r, theta=theta, phi=phi) 42 | self.iter = num_iter 43 | 44 | def _process(self, X): 45 | """ 46 | Perform CSSM for given frame in order to estimate steered response 47 | spectrum. 48 | """ 49 | 50 | # compute empirical cross correlation matrices 51 | C_hat = self._compute_correlation_matrices(X) 52 | 53 | # compute initial estimates 54 | beta = [] 55 | invalid = [] 56 | for k in range(self.num_freq): 57 | self.P = 1 / self._compute_spatial_spectrum(C_hat[k,:,:], 58 | self.freq_bins[k]) 59 | self._peaks1D() 60 | if len(self.src_idx) < self.num_src: # remove frequency 61 | invalid.append(k) 62 | # else: 63 | beta.append(self.src_idx) 64 | desired_freq = np.delete(self.freq_bins, invalid) 65 | # self.num_freq = self.num_freq - len(invalid) 66 | 67 | # compute reference frequency (take bin with max amplitude) 68 | f0 = np.argmax(np.sum(np.sum(abs(X[:,self.freq_bins,:]), axis=0), 69 | axis=1)) 70 | f0 = self.freq_bins[f0] 71 | 72 | # iterate to find DOA, maximum number of iterations is 20 73 | i = 0 74 | while(i < self.iter or (len(self.src_idx) < self.num_src and i < 20)): 75 | # coherent sum 76 | R = self._coherent_sum(C_hat, f0, beta) 77 | # subspace decomposition 78 | Es, En, ws, wn = self._subspace_decomposition(R) 79 | # compute spatial spectrum 80 | cross = np.dot(En,np.conjugate(En).T) 81 | # cross = np.identity(self.M) - np.dot(Es, np.conjugate(Es).T) 82 | self.P = self._compute_spatial_spectrum(cross,f0) 83 | self._peaks1D() 84 | beta = np.tile(self.src_idx, (self.num_freq, 1)) 85 | i += 1 86 | 87 | def _coherent_sum(self, C_hat, f0, beta): 88 | R = np.zeros((self.M,self.M)) 89 | # coherently sum frequencies 90 | for j in range(len(self.freq_bins)): 91 | k = self.freq_bins[j] 92 | Aj = self.mode_vec[k,:,beta[j]].T 93 | A0 = self.mode_vec[f0,:,beta[j]].T 94 | B = np.concatenate((np.zeros([self.M-len(beta[j]), len(beta[j])]), 95 | np.identity(self.M-len(beta[j]))), axis=1).T 96 | Tj = np.dot(np.c_[A0, B], np.linalg.inv(np.c_[Aj, B])) 97 | R = R + np.dot(np.dot(Tj,C_hat[j,:,:]),np.conjugate(Tj).T) 98 | return R 99 | -------------------------------------------------------------------------------- /doa/doa.py: -------------------------------------------------------------------------------- 1 | # Author: Eric Bezzam 2 | # Date: Feb 15, 2016 3 | from __future__ import division 4 | 5 | """Direction of Arrival (DoA) estimation.""" 6 | 7 | import numpy as np 8 | import math, sys 9 | import warnings 10 | from abc import ABCMeta, abstractmethod 11 | 12 | from tools_fri_doa_plane import extract_off_diag, cov_mtx_est 13 | 14 | try: 15 | import matplotlib as mpl 16 | 17 | matplotlib_available = True 18 | except ImportError: 19 | matplotlib_available = False 20 | 21 | if matplotlib_available: 22 | import matplotlib.pyplot as plt 23 | from matplotlib import cm 24 | from mpl_toolkits.mplot3d import Axes3D 25 | from matplotlib.ticker import LinearLocator, FormatStrFormatter 26 | 27 | tol = 1e-14 28 | 29 | 30 | class DOA(object): 31 | """ 32 | 33 | Abstract parent class for Direction of Arrival (DoA) algorithms. After 34 | creating an object (SRP, MUSIC, CSSM, WAVES, or TOPS), run locate_source to 35 | apply the corresponding algorithm. 36 | 37 | :param L: Microphone array positions. Each column should correspond to the 38 | cartesian coordinates of a single microphone. 39 | :type L: numpy array 40 | :param fs: Sampling frequency. 41 | :type fs: float 42 | :param nfft: FFT length. 43 | :type nfft: int 44 | :param c: Speed of sound. Default: 343 m/s 45 | :type c: float 46 | :param num_src: Number of sources to detect. Default: 1 47 | :type num_src: int 48 | :param mode: 'far' or 'near' for far-field or near-field detection 49 | respectively. Default: 'far' 50 | :type mode: str 51 | :param r: Candidate distances from the origin. Default: np.ones(1) 52 | :type r: numpy array 53 | :param theta: Candidate azimuth angles (in radians) with respect to x-axis. 54 | Default: np.linspace(-180.,180.,30)*np.pi/180 55 | :type theta: numpy array 56 | :param phi: Candidate elevation angles (in radians) with respect to z-axis. 57 | Default is x-y plane search: np.pi/2*np.ones(1) 58 | :type phi: numpy array 59 | """ 60 | 61 | __metaclass__ = ABCMeta 62 | 63 | def __init__(self, L, fs, nfft, c=343.0, num_src=1, mode='far', r=None, 64 | theta=None, phi=None): 65 | 66 | self.L = L # locations of mics 67 | self.fs = fs # sampling frequency 68 | self.c = c # speed of sound 69 | self.M = L.shape[1] # number of microphones 70 | self.D = L.shape[0] # number of dimensions (x,y,z) 71 | self.num_snap = None # number of snapshots 72 | 73 | self.nfft = nfft 74 | self.max_bin = int(self.nfft/2) + 1 75 | self.freq_bins = None 76 | self.freq_hz = None 77 | self.num_freq = None 78 | 79 | self.num_src = self._check_num_src(num_src) 80 | self.sources = np.zeros([self.D, self.num_src]) 81 | self.src_idx = np.zeros(self.num_src, dtype=np.int) 82 | self.phi_recon = None 83 | 84 | self.mode = mode 85 | if self.mode is 'far': 86 | self.r = np.ones(1) 87 | elif r is None: 88 | self.r = np.ones(1) 89 | self.mode = 'far' 90 | else: 91 | self.r = r 92 | if r == np.ones(1): 93 | mode = 'far' 94 | if theta is None: 95 | self.theta = np.linspace(-180., 180., 30) * np.pi / 180 96 | else: 97 | self.theta = theta 98 | if phi is None: 99 | self.phi = np.pi / 2 * np.ones(1) 100 | else: 101 | self.phi = phi 102 | 103 | # spatial spectrum / dirty image (FRI) 104 | self.P = None 105 | 106 | # build lookup table to candidate locations from r, theta, phi 107 | from fri import FRI 108 | if not isinstance(self, FRI): 109 | self.loc = None 110 | self.num_loc = None 111 | self.build_lookup() 112 | self.mode_vec = None 113 | self.compute_mode() 114 | else: # no grid search for FRI 115 | self.num_loc = len(self.theta) 116 | 117 | def locate_sources(self, X, num_src=None, freq_range=[500.0, 4000.0], 118 | freq_bins=None, freq_hz=None): 119 | """ 120 | Locate source(s) using corresponding algorithm. 121 | 122 | :param X: Set of signals in the frequency (RFFT) domain for current 123 | frame. Size should be M x F x S, where M should correspond to the 124 | number of microphones, F to nfft/2+1, and S to the number of snapshots 125 | (user-defined). It is recommended to have S >> M. 126 | :type X: numpy array 127 | :param num_src: Number of sources to detect. Default is value given to 128 | object constructor. 129 | :type num_src: int 130 | :param freq_range: Frequency range on which to run DoA: [fmin, fmax]. 131 | :type freq_range: list of floats, length 2 132 | :param freq_bins: List of individual frequency bins on which to run 133 | DoA. 134 | If defined by user, it will **not** take into consideration freq_range 135 | or freq_hz. 136 | :type freq_bins: list of int 137 | :param freq_hz: List of individual frequencies on which to run DoA. If 138 | defined by user, it will **not** take into consideration freq_range. 139 | :type freq_hz: list of floats 140 | """ 141 | 142 | # check validity of inputs 143 | if num_src is not None and num_src != self.num_src: 144 | self.num_src = self._check_num_src(num_src) 145 | self.sources = np.zeros([self.num_src, self.D]) 146 | self.src_idx = np.zeros(self.num_src, dtype=np.int) 147 | self.angle_of_arrival = None 148 | if (X.shape[0] != self.M): 149 | raise ValueError('Number of signals (rows) does not match the \ 150 | number of microphones.') 151 | if (X.shape[1] != self.max_bin): 152 | raise ValueError("Mismatch in FFT length.") 153 | self.num_snap = X.shape[2] 154 | 155 | # frequency bins on which to apply DOA 156 | if freq_bins is not None: 157 | self.freq_bins = freq_bins 158 | elif freq_hz is not None: 159 | self.freq_bins = [int(np.round(f / self.fs * self.nfft)) 160 | for f in freq_bins] 161 | else: 162 | print 'Using freq_range' 163 | freq_range = [int(np.round(f / self.fs * self.nfft)) 164 | for f in freq_range] 165 | self.freq_bins = np.arange(freq_range[0], freq_range[1]) 166 | 167 | self.freq_bins = self.freq_bins[self.freq_bins < self.max_bin] 168 | self.freq_bins = self.freq_bins[self.freq_bins >= 0] 169 | self.freq_hz = self.freq_bins * float(self.fs) / float(self.nfft) 170 | self.num_freq = len(self.freq_bins) 171 | 172 | # search for DoA according to desired algorithm 173 | self.P = np.zeros(self.num_loc) 174 | self._process(X) 175 | 176 | # locate sources 177 | if self.phi_recon is None: # not FRI 178 | self._peaks1D() 179 | 180 | def polar_plt_dirac(self, phi_ref=None, alpha_ref=None, save_fig=False, 181 | file_name=None, plt_dirty_img=True): 182 | """ 183 | Generate polar plot of DoA results. 184 | 185 | :param phi_ref: True direction of sources (in radians). 186 | :type phi_ref: numpy array 187 | :param alpha_ref: Estimated amplitude of sources. 188 | :type alpha_ref: numpy array 189 | :param save_fig: Whether or not to save figure as pdf. 190 | :type save_fig: bool 191 | :param file_name: Name of file (if saved). Default is 192 | 'polar_recon_dirac.pdf' 193 | :type file_name: str 194 | :param plt_dirty_img: Whether or not to plot spatial spectrum or 195 | 'dirty image' in the case of FRI. 196 | :type plt_dirty_img: bool 197 | """ 198 | 199 | phi_recon = self.phi_recon 200 | num_mic = self.M 201 | phi_plt = self.theta 202 | 203 | # determine amplitudes 204 | from fri import FRI 205 | if not isinstance(self, FRI): # use spatial spectrum 206 | dirty_img = self.P 207 | alpha_recon = self.P[self.src_idx] 208 | alpha_ref = alpha_recon 209 | else: # create dirty image 210 | dirty_img = self._gen_dirty_img() 211 | alpha_recon = np.mean(self.alpha_recon, axis=1) 212 | alpha_recon /= alpha_recon.max() 213 | if alpha_ref is None: # non-simulated case 214 | alpha_ref = alpha_recon 215 | 216 | # plot 217 | fig = plt.figure(figsize=(5, 4), dpi=90) 218 | ax = fig.add_subplot(111, projection='polar') 219 | base = 1. 220 | height = 10. 221 | blue = [0, 0.447, 0.741] 222 | red = [0.850, 0.325, 0.098] 223 | 224 | if phi_ref is not None: 225 | if alpha_ref.shape[0] < phi_ref.shape[0]: 226 | alpha_ref = np.concatenate((alpha_ref,np.zeros(phi_ref.shape[0]- 227 | alpha_ref.shape[0]))) 228 | # match detected with truth 229 | recon_err, sort_idx = polar_distance(phi_recon, phi_ref) 230 | if self.num_src > 1: 231 | phi_recon = phi_recon[sort_idx[:, 0]] 232 | alpha_recon = alpha_recon[sort_idx[:, 0]] 233 | phi_ref = phi_ref[sort_idx[:, 1]] 234 | alpha_ref = alpha_ref[sort_idx[:, 1]] 235 | elif phi_ref.shape[0] > 1: # one detected source 236 | alpha_ref[sort_idx[1]] = alpha_recon 237 | # markers for original doa 238 | K = len(phi_ref) 239 | ax.scatter(phi_ref, base + height*alpha_ref, c=np.tile(blue, 240 | (K, 1)), s=70, alpha=0.75, marker='^', linewidths=0, 241 | label='original') 242 | # stem for original doa 243 | if K > 1: 244 | for k in range(K): 245 | ax.plot([phi_ref[k], phi_ref[k]], [base, base + 246 | height*alpha_ref[k]], linewidth=1.5, linestyle='-', 247 | color=blue, alpha=0.6) 248 | else: 249 | ax.plot([phi_ref, phi_ref], [base, base + height*alpha_ref], 250 | linewidth=1.5, linestyle='-', color=blue, alpha=0.6) 251 | 252 | K_est = phi_recon.size 253 | # markers for reconstructed doa 254 | ax.scatter(phi_recon, base + height*alpha_recon, c=np.tile(red, 255 | (K_est, 1)), s=100, alpha=0.75, marker='*', linewidths=0, 256 | label='reconstruction') 257 | 258 | # stem for reconstructed doa 259 | if K_est > 1: 260 | for k in range(K_est): 261 | ax.plot([phi_recon[k], phi_recon[k]], [base, base + 262 | height*alpha_recon[k]], linewidth=1.5, linestyle='-', 263 | color=red, alpha=0.6) 264 | else: 265 | ax.plot([phi_recon, phi_recon], [1, 1 + alpha_recon], 266 | linewidth=1.5, linestyle='-', color=red, alpha=0.6) 267 | 268 | # plot the 'dirty' image 269 | if plt_dirty_img: 270 | dirty_img = np.abs(dirty_img) 271 | min_val = dirty_img.min() 272 | max_val = dirty_img.max() 273 | dirty_img = (dirty_img - min_val) / (max_val - min_val) 274 | 275 | # we need to make a complete loop, copy first value to last 276 | c_phi_plt = np.r_[phi_plt, phi_plt[0]] 277 | c_dirty_img = np.r_[dirty_img, dirty_img[0]] 278 | ax.plot(c_phi_plt, base + height*c_dirty_img, linewidth=1, 279 | alpha=0.55,linestyle='-', color=[0.466, 0.674, 0.188], 280 | label='spatial spectrum') 281 | 282 | handles, labels = ax.get_legend_handles_labels() 283 | ax.legend(handles=handles[:3], framealpha=0.5, 284 | scatterpoints=1, loc=8, fontsize=9, 285 | ncol=1, bbox_to_anchor=(0.9, -0.17), 286 | handletextpad=.2, columnspacing=1.7, labelspacing=0.1) 287 | 288 | ax.set_xlabel(r'azimuth $\bm{\varphi}$', fontsize=11) 289 | ax.set_xticks(np.linspace(0, 2 * np.pi, num=12, endpoint=False)) 290 | ax.xaxis.set_label_coords(0.5, -0.11) 291 | ax.set_yticks(np.linspace(0, 1, 2)) 292 | ax.xaxis.grid(b=True, color=[0.3, 0.3, 0.3], linestyle=':') 293 | ax.yaxis.grid(b=True, color=[0.3, 0.3, 0.3], linestyle='--') 294 | ax.set_ylim([0, base + height]) 295 | if save_fig: 296 | if file_name is None: 297 | file_name = 'polar_recon_dirac.pdf' 298 | plt.savefig(file_name, format='pdf', dpi=300, transparent=True) 299 | 300 | 301 | def build_lookup(self, r=None, theta=None, phi=None): 302 | """ 303 | Construct lookup table for given candidate locations (in spherical 304 | coordinates). Each column is a location in cartesian coordinates. 305 | 306 | :param r: Candidate distances from the origin. 307 | :type r: numpy array 308 | :param theta: Candidate azimuth angles with respect to x-axis. 309 | :type theta: numpy array 310 | :param phi: Candidate elevation angles with respect to z-axis. 311 | :type phi: numpy array 312 | """ 313 | if theta is not None: 314 | self.theta = theta 315 | if phi is not None: 316 | self.phi = phi 317 | if r is not None: 318 | self.r = r 319 | if self.r == np.ones(1): 320 | self.mode = 'far' 321 | else: 322 | self.mode = 'near' 323 | self.loc = np.zeros([self.D, len(self.r) * len(self.theta) * 324 | len(self.phi)]) 325 | self.num_loc = self.loc.shape[1] 326 | # convert to cartesian 327 | for i in range(len(self.r)): 328 | r_s = self.r[i] 329 | for j in range(len(self.theta)): 330 | theta_s = self.theta[j] 331 | for k in range(len(self.phi)): 332 | # spher = np.array([r_s,theta_s,self.phi[k]]) 333 | self.loc[:, i * len(self.theta) + j * len(self.phi) + k] = \ 334 | spher2cart(r_s, theta_s, self.phi[k])[0:self.D] 335 | 336 | def compute_mode(self): 337 | """ 338 | Pre-compute mode vectors from candidate locations (in spherical 339 | coordinates). 340 | """ 341 | if self.num_loc is None: 342 | raise ValueError('Lookup table appears to be empty. \ 343 | Run build_lookup().') 344 | self.mode_vec = np.zeros((self.max_bin,self.M,self.num_loc), 345 | dtype='complex64') 346 | if (self.nfft % 2 == 1): 347 | raise ValueError('Signal length must be even.') 348 | f = 1.0 / self.nfft * np.linspace(0, self.nfft / 2, self.max_bin) \ 349 | * 1j * 2 * np.pi 350 | for i in range(self.num_loc): 351 | p_s = self.loc[:, i] 352 | for m in range(self.M): 353 | p_m = self.L[:, m] 354 | if (self.mode == 'near'): 355 | dist = np.linalg.norm(p_m - p_s, axis=1) 356 | if (self.mode == 'far'): 357 | dist = np.dot(p_s, p_m) 358 | # tau = np.round(self.fs*dist/self.c) # discrete - jagged 359 | tau = self.fs * dist / self.c # "continuous" - smoother 360 | self.mode_vec[:, m, i] = np.exp(f * tau) 361 | 362 | def _check_num_src(self, num_src): 363 | # # check validity of inputs 364 | # if num_src > self.M: 365 | # warnings.warn('Number of sources cannot be more than number of \ 366 | # microphones. Changing number of sources to ' + 367 | # str(self.M) + '.') 368 | # num_src = self.M 369 | if num_src < 1: 370 | warnings.warn('Number of sources must be at least 1. Changing \ 371 | number of sources to 1.') 372 | num_src = 1 373 | valid = num_src 374 | return valid 375 | 376 | def _peaks1D(self): 377 | if self.num_src == 1: 378 | self.src_idx[0] = np.argmax(self.P) 379 | self.sources[:, 0] = self.loc[:, self.src_idx[0]] 380 | self.phi_recon = self.theta[self.src_idx[0]] 381 | else: 382 | peak_idx = [] 383 | n = self.P.shape[0] 384 | for i in range(self.num_loc): 385 | # straightforward peak finding 386 | if self.P[i] >= self.P[(i-1)%n] and self.P[i] > self.P[(i+1)%n]: 387 | if len(peak_idx) == 0 or peak_idx[-1] != i-1: 388 | if not (i == self.num_loc and self.P[i] == self.P[0]): 389 | peak_idx.append(i) 390 | 391 | peaks = self.P[peak_idx] 392 | max_idx = np.argsort(peaks)[-self.num_src:] 393 | self.src_idx = [peak_idx[k] for k in max_idx] 394 | self.sources = self.loc[:, self.src_idx] 395 | self.phi_recon = self.theta[self.src_idx] 396 | self.num_src = len(self.src_idx) 397 | 398 | 399 | # ------------------Miscellaneous Functions---------------------# 400 | 401 | def spher2cart(r, theta, phi): 402 | """ 403 | Convert a spherical point to cartesian coordinates. 404 | """ 405 | # convert to cartesian 406 | x = r * np.cos(theta) * np.sin(phi) 407 | y = r * np.sin(theta) * np.sin(phi) 408 | z = r * np.cos(phi) 409 | return np.array([x, y, z]) 410 | 411 | 412 | def polar_distance(x1, x2): 413 | """ 414 | Given two arrays of numbers x1 and x2, pairs the cells that are the 415 | closest and provides the pairing matrix index: x1(index(1,:)) should be as 416 | close as possible to x2(index(2,:)). The function outputs the average of 417 | the absolute value of the differences abs(x1(index(1,:))-x2(index(2,:))). 418 | :param x1: vector 1 419 | :param x2: vector 2 420 | :return: d: minimum distance between d 421 | index: the permutation matrix 422 | """ 423 | x1 = np.reshape(x1, (1, -1), order='F') 424 | x2 = np.reshape(x2, (1, -1), order='F') 425 | N1 = x1.size 426 | N2 = x2.size 427 | diffmat = np.arccos(np.cos(x1 - np.reshape(x2, (-1, 1), order='F'))) 428 | min_N1_N2 = np.min([N1, N2]) 429 | index = np.zeros((min_N1_N2, 2), dtype=int) 430 | if min_N1_N2 > 1: 431 | for k in range(min_N1_N2): 432 | d2 = np.min(diffmat, axis=0) 433 | index2 = np.argmin(diffmat, axis=0) 434 | index1 = np.argmin(d2) 435 | index2 = index2[index1] 436 | index[k, :] = [index1, index2] 437 | diffmat[index2, :] = float('inf') 438 | diffmat[:, index1] = float('inf') 439 | d = np.mean(np.arccos(np.cos(x1[:, index[:, 0]] - x2[:, index[:, 1]]))) 440 | else: 441 | d = np.min(diffmat) 442 | index = np.argmin(diffmat) 443 | if N1 == 1: 444 | index = np.array([1, index]) 445 | else: 446 | index = np.array([index, 1]) 447 | return d, index 448 | -------------------------------------------------------------------------------- /doa/fri.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | from doa import * 3 | 4 | from tools_fri_doa_plane import pt_src_recon_multiband, extract_off_diag, cov_mtx_est 5 | 6 | import os 7 | if os.environ.get('DISPLAY') is None: 8 | import matplotlib 9 | matplotlib.use('Agg') 10 | 11 | from matplotlib import rcParams 12 | 13 | # for latex rendering 14 | os.environ['PATH'] = os.environ['PATH'] + ':/usr/texbin' + ':/opt/local/bin' + ':/Library/TeX/texbin/' 15 | rcParams['text.usetex'] = True 16 | rcParams['text.latex.unicode'] = True 17 | rcParams['text.latex.preamble'] = [r"\usepackage{bm}"] 18 | 19 | class FRI(DOA): 20 | 21 | def __init__(self, L, fs, nfft, max_four, c=343.0, num_src=1, theta=None, G_iter=None, 22 | noise_floor=0., noise_margin=1.5, **kwargs): 23 | DOA.__init__(self, L=L, fs=fs, nfft=nfft, c=c, num_src=num_src, mode='far', theta=theta) 24 | self.max_four = max_four 25 | self.visi_noisy_all = None 26 | self.alpha_recon = np.array(num_src, dtype=float) 27 | 28 | self.noise_floor = noise_floor 29 | self.noise_margin = noise_margin 30 | 31 | # Set the number of updates of the mapping matrix 32 | self.update_G = True if G_iter is not None and G_iter > 0 else False 33 | self.G_iter = G_iter if self.update_G else 1 34 | 35 | def _process(self, X): 36 | 37 | # loop over all subbands 38 | self.num_freq = self.freq_bins.shape[0] 39 | 40 | ''' 41 | visi_noisy_all = [] 42 | for band_count in range(self.num_freq): 43 | # Estimate the covariance matrix and extract off-diagonal entries 44 | visi_noisy = extract_off_diag(cov_mtx_est(X[:,self.freq_bins[band_count],:])) 45 | visi_noisy_all.append(visi_noisy) 46 | ''' 47 | visi_noisy_all = self._visibilities(X) 48 | 49 | # stack as columns (NOT SUBTRACTING NOISELESS) 50 | self.visi_noisy_all = np.column_stack(visi_noisy_all) 51 | 52 | # reconstruct point sources with FRI 53 | max_ini = 50 # maximum number of random initialisation 54 | noise_level = 1e-10 55 | self.phi_recon, self.alpha_recon = pt_src_recon_multiband(self.visi_noisy_all, 56 | self.L[0,:], self.L[1,:], 57 | 2*np.pi*self.freq_hz, self.c, 58 | self.num_src, self.max_four, 59 | noise_level, max_ini, 60 | update_G=self.update_G, G_iter=self.G_iter, 61 | verbose=False) 62 | 63 | def _visibilities(self, X): 64 | 65 | visi_noisy_all = [] 66 | for band_count in range(self.num_freq): 67 | # Estimate the covariance matrix and extract off-diagonal entries 68 | fn = self.freq_bins[band_count] 69 | energy = np.var(X[:,fn,:], axis=0) 70 | I = np.where(energy > self.noise_margin * self.noise_floor) 71 | visi_noisy = extract_off_diag(cov_mtx_est(X[:,fn,I[0]])) 72 | visi_noisy_all.append(visi_noisy) 73 | 74 | return visi_noisy_all 75 | 76 | def _gen_dirty_img(self): 77 | """ 78 | Compute the dirty image associated with the given measurements. Here the Fourier transform 79 | that is not measured by the microphone array is taken as zero. 80 | :param visi: the measured visibilites 81 | :param pos_mic_x: a vector contains microphone array locations (x-coordinates) 82 | :param pos_mic_y: a vector contains microphone array locations (y-coordinates) 83 | :param omega_band: mid-band (ANGULAR) frequency [radian/sec] 84 | :param sound_speed: speed of sound 85 | :param phi_plt: plotting grid (azimuth on the circle) to show the dirty image 86 | :return: 87 | """ 88 | 89 | sound_speed = self.c 90 | phi_plt = self.theta 91 | num_mic = self.M 92 | 93 | x_plt, y_plt = polar2cart(1, phi_plt) 94 | img = np.zeros(phi_plt.size, dtype=complex) 95 | 96 | pos_mic_x = self.L[0,:] 97 | pos_mic_y = self.L[1, :] 98 | for i in range(self.num_freq): 99 | 100 | visi = self.visi_noisy_all[:, i] 101 | omega_band = 2*np.pi*self.freq_hz[i] 102 | 103 | pos_mic_x_normalised = pos_mic_x / (sound_speed / omega_band) 104 | pos_mic_y_normalised = pos_mic_y / (sound_speed / omega_band) 105 | 106 | count_visi = 0 107 | for q in range(num_mic): 108 | p_x_outer = pos_mic_x_normalised[q] 109 | p_y_outer = pos_mic_y_normalised[q] 110 | for qp in range(num_mic): 111 | if not q == qp: 112 | p_x_qqp = p_x_outer - pos_mic_x_normalised[qp] # a scalar 113 | p_y_qqp = p_y_outer - pos_mic_y_normalised[qp] # a scalar 114 | # <= the negative sign converts DOA to propagation vector 115 | img += visi[count_visi] * \ 116 | np.exp(-1j * (p_x_qqp * x_plt + p_y_qqp * y_plt)) 117 | count_visi += 1 118 | 119 | return img / (num_mic * (num_mic - 1)) 120 | 121 | #-------------MISC--------------# 122 | 123 | def polar2cart(rho, phi): 124 | """ 125 | convert from polar to cartesian coordinates 126 | :param rho: radius 127 | :param phi: azimuth 128 | :return: 129 | """ 130 | x = rho * np.cos(phi) 131 | y = rho * np.sin(phi) 132 | return x, y 133 | 134 | -------------------------------------------------------------------------------- /doa/music.py: -------------------------------------------------------------------------------- 1 | # Author: Eric Bezzam 2 | # Date: July 15, 2016 3 | 4 | from doa import * 5 | 6 | class MUSIC(DOA): 7 | """ 8 | Class to apply MUltiple SIgnal Classication (MUSIC) direction-of-arrival 9 | (DoA) for a particular microphone array. 10 | 11 | .. note:: Run locate_source() to apply the MUSIC algorithm. 12 | 13 | :param L: Microphone array positions. Each column should correspond to the 14 | cartesian coordinates of a single microphone. 15 | :type L: numpy array 16 | :param fs: Sampling frequency. 17 | :type fs: float 18 | :param nfft: FFT length. 19 | :type nfft: int 20 | :param c: Speed of sound. Default: 343 m/s 21 | :type c: float 22 | :param num_src: Number of sources to detect. Default: 1 23 | :type num_src: int 24 | :param mode: 'far' or 'near' for far-field or near-field detection 25 | respectively. Default: 'far' 26 | :type mode: str 27 | :param r: Candidate distances from the origin. Default: np.ones(1) 28 | :type r: numpy array 29 | :param theta: Candidate azimuth angles (in radians) with respect to x-axis. 30 | Default: np.linspace(-180.,180.,30)*np.pi/180 31 | :type theta: numpy array 32 | :param phi: Candidate elevation angles (in radians) with respect to z-axis. 33 | Default is x-y plane search: np.pi/2*np.ones(1) 34 | :type phi: numpy array 35 | """ 36 | def __init__(self, L, fs, nfft, c=343.0, num_src=1, mode='far', r=None, 37 | theta=None, phi=None, **kwargs): 38 | DOA.__init__(self, L=L, fs=fs, nfft=nfft, c=c, num_src=num_src, 39 | mode=mode, r=r, theta=theta, phi=phi) 40 | self.Pssl = None 41 | 42 | def _process(self, X): 43 | """ 44 | Perform MUSIC for given frame in order to estimate steered response 45 | spectrum. 46 | """ 47 | 48 | # compute steered response 49 | self.Pssl = np.zeros((self.num_freq,self.num_loc)) 50 | num_freq = self.num_freq 51 | C_hat = self._compute_correlation_matrices(X) 52 | for i in range(self.num_freq): 53 | k = self.freq_bins[i] 54 | # subspace decomposition 55 | Es, En, ws, wn = self._subspace_decomposition(C_hat[i,:,:]) 56 | # compute spatial spectrum 57 | # cross = np.dot(En,np.conjugate(En).T) 58 | cross = np.identity(self.M) - np.dot(Es, np.conjugate(Es).T) 59 | self.Pssl[i,:] = self._compute_spatial_spectrum(cross,k) 60 | self.P = sum(self.Pssl)/num_freq 61 | 62 | def plot_individual_spectrum(self): 63 | """ 64 | Plot the steered response for each frequency. 65 | """ 66 | # check if matplotlib imported 67 | if matplotlib_available is False: 68 | warnings.warn('Could not import matplotlib.') 69 | return 70 | # only for 2D 71 | if len(self.theta)!=1 and len(self.phi)==1 and len(self.r)==1: 72 | pass 73 | else: 74 | warnings.warn('Only for 2D.') 75 | return 76 | # plot 77 | for k in range(self.num_freq): 78 | freq = float(self.freq_bins[k])/self.nfft*self.fs 79 | azimuth = self.theta*180/np.pi 80 | plt.plot(azimuth, self.Pssl[k,0:len(azimuth)]) 81 | plt.ylabel('Magnitude') 82 | plt.xlabel('Azimuth [degrees]') 83 | plt.xlim(min(azimuth),max(azimuth)) 84 | plt.title('Steering Response Spectrum - ' + str(freq) + ' Hz') 85 | plt.grid(True) 86 | 87 | def _compute_spatial_spectrum(self,cross,k): 88 | P = np.zeros(self.num_loc) 89 | for n in range(self.num_loc): 90 | Dc = np.array(self.mode_vec[k,:,n],ndmin=2).T 91 | Dc_H = np.conjugate(np.array(self.mode_vec[k,:,n],ndmin=2)) 92 | denom = np.dot(np.dot(Dc_H,cross),Dc) 93 | P[n] = 1/abs(denom) 94 | return P 95 | 96 | def _compute_correlation_matrices(self, X): 97 | C_hat = np.zeros([self.num_freq,self.M,self.M], dtype=complex) 98 | for i in range(self.num_freq): 99 | k = self.freq_bins[i] 100 | for s in range(self.num_snap): 101 | C_hat[i,:,:] = C_hat[i,:,:] + np.outer(X[:,k,s], 102 | np.conjugate(X[:,k,s])) 103 | return C_hat/self.num_snap 104 | 105 | def _subspace_decomposition(self, R): 106 | w,v = np.linalg.eig(R) 107 | eig_order = np.flipud(np.argsort(abs(w))) 108 | sig_space = eig_order[:self.num_src] 109 | noise_space = eig_order[self.num_src:] 110 | ws = w[sig_space] 111 | wn = w[noise_space] 112 | Es = v[:,sig_space] 113 | En = v[:,noise_space] 114 | return Es, En, ws, wn 115 | -------------------------------------------------------------------------------- /doa/srp.py: -------------------------------------------------------------------------------- 1 | # Author: Eric Bezzam 2 | # Date: July 15, 2016 3 | 4 | from doa import * 5 | 6 | class SRP(DOA): 7 | """ 8 | Class to apply Steered Response Power (SRP) direction-of-arrival (DoA) for 9 | a particular microphone array. 10 | 11 | .. note:: Run locate_source() to apply the SRP-PHAT algorithm. 12 | 13 | :param L: Microphone array positions. Each column should correspond to the 14 | cartesian coordinates of a single microphone. 15 | :type L: numpy array 16 | :param fs: Sampling frequency. 17 | :type fs: float 18 | :param nfft: FFT length. 19 | :type nfft: int 20 | :param c: Speed of sound. Default: 343 m/s 21 | :type c: float 22 | :param num_src: Number of sources to detect. Default: 1 23 | :type num_src: int 24 | :param mode: 'far' or 'near' for far-field or near-field detection 25 | respectively. Default: 'far' 26 | :type mode: str 27 | :param r: Candidate distances from the origin. Default: np.ones(1) 28 | :type r: numpy array 29 | :param theta: Candidate azimuth angles (in radians) with respect to x-axis. 30 | Default: np.linspace(-180.,180.,30)*np.pi/180 31 | :type theta: numpy array 32 | :param phi: Candidate elevation angles (in radians) with respect to z-axis. 33 | Default is x-y plane search: np.pi/2*np.ones(1) 34 | :type phi: numpy array 35 | """ 36 | def __init__(self, L, fs, nfft, c=343.0, num_src=1, mode='far', r=None, 37 | theta=None, phi=None, **kwargs): 38 | DOA.__init__(self, L=L, fs=fs, nfft=nfft, c=c, num_src=num_src, 39 | mode=mode, r=r, theta=theta, phi=phi) 40 | self.num_pairs = self.M*(self.M-1)/2 41 | self.mode_vec = np.conjugate(self.mode_vec) 42 | 43 | def _process(self, X): 44 | """ 45 | Perform SRP-PHAT for given frame in order to estimate steered response 46 | spectrum. 47 | """ 48 | # average over snapshots 49 | for s in range(self.num_snap): 50 | X_s = X[:,self.freq_bins,s] 51 | absX = abs(X_s) 52 | absX[absX p2, starting at the midpoint 80 | o = (p1 + p2) / 2. 81 | u = (p2 - p1) / la.norm(p2 - p1) 82 | 83 | pts = [] 84 | for d in dist: 85 | pts.append(o + d*u) 86 | 87 | return pts 88 | 89 | R_pyramic = np.array( 90 | line(corners[:,0], corners[:,3], pcb) + 91 | line(corners[:,3], corners[:,2], pcb) + 92 | line(corners[:,0], corners[:,1], pcb) + 93 | line(corners[:,1], corners[:,3], pcb) + 94 | line(corners[:,0], corners[:,2], pcb) + 95 | line(corners[:,2], corners[:,1], pcb) 96 | ).T 97 | 98 | # Reference point is 1cm below zero'th mic 99 | R_pyramic[2,:] += 0.01 - R_pyramic[2,0] 100 | 101 | if __name__ == "__main__": 102 | 103 | from experiment import PointCloud 104 | 105 | pyramic = PointCloud(X=R_pyramic) 106 | 107 | D = np.sqrt(pyramic.EDM()) 108 | print D[0,16] 109 | 110 | pyramic.plot() 111 | 112 | -------------------------------------------------------------------------------- /experiment/bands_selection.py: -------------------------------------------------------------------------------- 1 | from __future__ import division, print_function 2 | 3 | import numpy as np 4 | from scipy.io import wavfile 5 | import matplotlib.pyplot as plt 6 | 7 | from tools import rfft 8 | import pyroomacoustics as pra 9 | 10 | def select_bands(samples, freq_range, fs, nfft, win, n_bands, div=1): 11 | ''' 12 | Selects the bins with most energy in a frequency range. 13 | 14 | It is possible to specify a div factor. Then the range is subdivided 15 | into div equal subbands and n_bands / div per subband are selected. 16 | ''' 17 | 18 | if win is not None and isinstance(win, bool): 19 | if win: 20 | win = np.hanning(nfft) 21 | else: 22 | win = None 23 | 24 | # Read the signals in a single array 25 | sig = [wavfile.read(s)[1] for s in samples] 26 | L = max([s.shape[0] for s in sig]) 27 | signals = np.zeros((L,len(samples)), dtype=np.float32) 28 | for i in range(signals.shape[1]): 29 | signals[:sig[i].shape[0],i] = sig[i] / np.std(sig[i][sig[i] > 1e-2]) 30 | 31 | sum_sig = np.sum(signals, axis=1) 32 | 33 | sum_STFT = pra.stft(sum_sig, nfft, nfft, win=win, transform=rfft).T 34 | sum_STFT_avg = np.mean(np.abs(sum_STFT)**2, axis=1) 35 | 36 | # Do some band selection 37 | bnds = np.linspace(freq_range[0], freq_range[1], div+1) 38 | 39 | freq_hz = np.zeros(n_bands) 40 | freq_bins = np.zeros(n_bands, dtype=int) 41 | 42 | nsb = n_bands // div 43 | 44 | for i in range(div): 45 | 46 | bl = int(bnds[i] / fs * nfft) 47 | bh = int(bnds[i+1] / fs * nfft) 48 | 49 | k = np.argsort(sum_STFT_avg[bl:bh])[-nsb:] 50 | 51 | freq_hz[nsb*i:nsb*(i+1)] = (bl + k) / nfft * fs 52 | freq_bins[nsb*i:nsb*(i+1)] = k + bl 53 | 54 | freq_hz = freq_hz[:n_bands] 55 | 56 | return np.unique(freq_hz), np.unique(freq_bins) 57 | 58 | ''' 59 | print('freq_hz = [' + ','.join([str(f) for f in freq_hz]) + ']', sep=',') 60 | 61 | # Plot FFT of all signals 62 | freqvec = np.fft.rfftfreq(L) 63 | S = np.fft.rfft(signals, axis=0) 64 | 65 | STFT = np.array([pra.stft(s, nfft, nfft, win=win, transform=rfft) for s in signals.T]).T 66 | STFT_avg = np.array([np.mean(np.abs(st)**2, axis=0) for st in STFT]) 67 | f_stft = np.fft.rfftfreq(nfft) 68 | 69 | plt.subplot(2,1,1) 70 | plt.plot(fs*freqvec, pra.dB(S)) 71 | 72 | plt.subplot(2,1,2) 73 | plt.plot(fs*f_stft, pra.dB(STFT_avg)) 74 | plt.plot(fs*f_stft, pra.dB(sum_STFT_avg), '--') 75 | plt.plot(freq_hz, pra.dB(sum_STFT_avg[freq_bins]), '*') 76 | 77 | plt.show() 78 | ''' 79 | -------------------------------------------------------------------------------- /experiment/calibrate_speakers.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | 3 | import numpy as np 4 | from scipy import linalg as la 5 | import scikits.samplerate as sr 6 | from scipy.io import wavfile 7 | import json 8 | import sys 9 | import matplotlib.pyplot as plt 10 | 11 | import theaudioexperimentalist as tae 12 | from experiment import PointCloud, arrays, calculate_speed_of_sound 13 | 14 | exp_dir = '/Users/scheibler/switchdrive/LCAV-Audio/Recordings/20160831' 15 | 16 | fn_sweep = exp_dir + '/20160831_short_sweep.wav' 17 | 18 | # Get the speakers and microphones geometry 19 | sys.path.append(exp_dir) 20 | from edm_to_positions import twitters 21 | 22 | # labels of the speakers 23 | labels = twitters.labels 24 | 25 | # Open the protocol json file 26 | with open(exp_dir + '/protocol.json') as fd: 27 | exp_data = json.load(fd) 28 | 29 | temp = exp_data['conditions']['temperature'] 30 | hum = exp_data['conditions']['humidity'] 31 | c = calculate_speed_of_sound(temp, hum) 32 | 33 | # open the sweep 34 | r_sweep, sweep = wavfile.read(fn_sweep) 35 | 36 | spkr = ['16'] 37 | #array_type = 'BBB' 38 | #array_type = 'FPGA' 39 | array_type = 'FPGA_speech' 40 | 41 | # open all recordings 42 | if array_type == 'FPGA': 43 | R = arrays['pyramic_tetrahedron'].copy() 44 | 45 | # Localize microphones in new reference frame 46 | R += twitters[['pyramic']] 47 | 48 | seg_len = 17.8 / 6 49 | offset = 3.85 - seg_len 50 | fn_rec = exp_dir + '/data_pyramic/raw/20160831_sweeps/Mic_' 51 | rec = {} 52 | r_rec = 0 53 | for l,lbl in enumerate(labels): 54 | rec[lbl] = [] 55 | for i in range(R.shape[1]): 56 | r_rec,s = wavfile.read(fn_rec + str(i) + '.wav') 57 | r_rec = 47718.6069 58 | #r_rec = 47760. 59 | b = int(r_rec * (offset + l*seg_len) ) 60 | e = int(r_rec * (offset + (l+1)*seg_len) ) 61 | rec[lbl].append(s[b:e]) 62 | rec[lbl] = np.array(rec[lbl], dtype=np.float32).T/(2**15-1) 63 | 64 | elif array_type == 'FPGA_speech': 65 | R = arrays['pyramic_tetrahedron'].copy() 66 | R += twitters[['pyramic']] 67 | 68 | mics = PointCloud(X=R) 69 | D = np.sqrt(mics.EDM()) 70 | 71 | rec = {} 72 | for lbl in labels[:-2]: 73 | fn_rec = exp_dir + '/data_pyramic/segmented/one_speaker/{}.wav'.format(lbl) 74 | r_rec, s = wavfile.read(fn_rec) 75 | #r_rec = 47718.6069 76 | 77 | # segment the file 78 | rec[lbl] = s 79 | 80 | elif array_type == 'BBB': 81 | R = arrays['compactsix_circular_1'].copy() 82 | R += twitters[['compactsix']] 83 | 84 | fn_rec = exp_dir + '/data_compactsix/raw/20160831_compactsix_sweeps.wav' 85 | r_rec, s = wavfile.read(fn_rec) 86 | 87 | # segment the file 88 | seg_len = 3. 89 | offset = 0. 90 | rec = {} 91 | for l,lbl in enumerate(labels): 92 | rec[lbl] = [] 93 | b = int(r_rec * (offset + l*seg_len) ) 94 | e = int(r_rec * (offset + (l+1)*seg_len) ) 95 | rec[lbl] = s[b:e,:] / (2**15-1) 96 | 97 | if r_sweep != r_rec: 98 | print 'Resample sweep' 99 | sweep = sr.resample(sweep, r_rec/r_sweep, 'sinc_best') 100 | 101 | fs = r_rec 102 | 103 | print 'TDOA' 104 | 105 | if array_type == 'FPGA_speech': 106 | 107 | tdoa = [] 108 | for i in range(0,rec[spkr[0]].shape[1]): 109 | tdoa.append(tae.tdoa(rec[spkr[0]][:,i], rec[spkr[0]][:,0], fs=fs, interp=4, phat=True)) 110 | tdoa = np.array(tdoa) 111 | tdoa -= tdoa[0] 112 | 113 | else: 114 | 115 | print 'Deconvolving' 116 | h = {} 117 | for lbl in spkr: 118 | temp = [] 119 | for mic in range(rec[lbl].shape[1]): 120 | temp.append(tae.deconvolve(rec[lbl][:,mic], sweep, thresh=0.1)) 121 | h[lbl] = np.array(temp).T 122 | 123 | print 'TDOA' 124 | tdoa = [] 125 | for i in range(0,rec[spkr[0]].shape[1]): 126 | #tdoa.append(tae.tdoa(rec[spkr[0]][:,i], rec[spkr[0]][:,0], fs=fs, interp=1, phat=True)) 127 | k = np.argmax(np.abs(h[spkr[0]][:,i])) 128 | if k > h[spkr[0]].shape[0]/2: 129 | k -= h[spkr[0]].shape[0] 130 | tdoa.append(k/fs) 131 | tdoa = np.array(tdoa) 132 | tdoa -= tdoa[0] 133 | 134 | delay_d = tdoa * c 135 | delay_d -= delay_d[0] 136 | 137 | x0 = np.zeros(4) 138 | x0[:3] = twitters[spkr[0]] 139 | x0[3] = la.norm(twitters[spkr[0]] - R[:,0]) 140 | print 'Doing localization' 141 | 142 | remove = [32, 47] 143 | if array_type == 'BBB': 144 | loc = np.array([tae.tdoa_loc(R[:2,:], tdoa, c, x0=x0[:2])]).T 145 | loc = np.concatenate((loc, R[-1:,:1])) 146 | else: 147 | loc = np.array([tae.tdoa_loc(R, tdoa, c, x0=x0)]).T 148 | 149 | tdoa2 = la.norm(R - loc, axis=0) / c 150 | tdoa2 -= tdoa2[0] 151 | 152 | tdoa3 = la.norm(R - twitters[[spkr[0]]], axis=0) / c 153 | tdoa3 -= tdoa3[0] 154 | 155 | R = np.concatenate((R, loc), axis=1) 156 | pc = PointCloud(X=R) 157 | pc.labels[-1] = 'spkr' 158 | 159 | plt.figure() 160 | plt.plot(tdoa) 161 | plt.plot(tdoa2) 162 | plt.plot(tdoa3) 163 | plt.legend(['TDOA measured','TDOA reconstructed','TDOA hand measured location']) 164 | 165 | 166 | axes = pc.plot() 167 | twitters.plot(axes=axes, c='r') 168 | plt.axis('equal') 169 | plt.show() 170 | -------------------------------------------------------------------------------- /experiment/experiment_fpga.py: -------------------------------------------------------------------------------- 1 | 2 | import numpy as np 3 | import sys 4 | 5 | from mpl_toolkits.mplot3d import Axes3D 6 | import matplotlib.pyplot as plt 7 | 8 | sys.path.append('Experiment/arrays') 9 | sys.path.append('Experiment') 10 | 11 | from point_cloud import PointCloud 12 | from speakers_microphones_locations import * 13 | from arrays import * 14 | 15 | # FPGA array reference point offset 16 | R = R_pyramic 17 | ref_pt_offset = 0.01 # meters 18 | 19 | # Adjust the z-offset of Pyramic 20 | R[2,:] += ref_pt_offset - R[2,0] 21 | 22 | # Localize microphones in new reference frame 23 | R += twitters[['FPGA']] 24 | 25 | # correct FPGA reference point to be at the center of the array 26 | twitters.X[:,twitters.key2ind('FPGA')] = R.mean(axis=1) 27 | 28 | # Now we can try to visualize the geometry 29 | 30 | pyramic = PointCloud(X=R) 31 | 32 | # Plot all the markers in the same figure to check all the locations are correct 33 | fig = plt.figure() 34 | axes = fig.add_subplot(111, projection='3d') 35 | 36 | twitters.plot(axes=axes, c='k', marker='s') 37 | pyramic.plot(axes=axes, show_labels=False, c='r', marker='.') 38 | 39 | plt.show() 40 | -------------------------------------------------------------------------------- /experiment/physics.py: -------------------------------------------------------------------------------- 1 | 2 | def calculate_speed_of_sound(t, h, p=1000.): 3 | ''' 4 | Compute the speed of sound as a function of 5 | temperature, humidity and pressure 6 | 7 | Arguments 8 | --------- 9 | 10 | t: temperature [Celsius] 11 | h: relative humidity [%] 12 | p: atmospheric pressure [kpa] 13 | 14 | Return 15 | ------ 16 | 17 | Speed of sound in [m/s] 18 | ''' 19 | 20 | # using crude approximation for now 21 | return 331.4 + 0.6*t + 0.0124*h 22 | -------------------------------------------------------------------------------- /experiment/point_cloud.py: -------------------------------------------------------------------------------- 1 | # marker_calibration 2 | #=================== 3 | #Contains MarkerSet class. 4 | # 5 | #Given a number of markers and their interdistance, this class aims at 6 | #reconstructing their relative coordinates. 7 | 8 | # Provided by LCAV 9 | import numpy as np 10 | from scipy import linalg as la 11 | from mpl_toolkits.mplot3d import Axes3D 12 | import matplotlib.pyplot as plt 13 | 14 | 15 | class PointCloud: 16 | 17 | def __init__(self, m=1, dim=3, diameter=0., X=None, labels=None, EDM=None): 18 | ''' 19 | Parameters 20 | ---------- 21 | 22 | m : int, optional 23 | Number of markers 24 | diameter : float, optional 25 | Diameter of the marker points (added to distance measurements) 26 | dim : int, optional 27 | Dimension of ambient space (default 3) 28 | X : ndarray, optional 29 | Array of column vectors with locations of markers 30 | ''' 31 | 32 | # set the marker diameter 33 | self.diameter = diameter 34 | 35 | if EDM is not None: 36 | self.dim = dim 37 | self.fromEDM(EDM, labels=labels) 38 | 39 | elif X is not None: 40 | self.m = X.shape[1] 41 | self.dim = X.shape[0] 42 | self.X = X 43 | 44 | if labels is None: 45 | self.labels = [str(i) for i in range(self.m)] 46 | 47 | else: 48 | self.m = m 49 | self.dim = dim 50 | self.X = np.zeros((self.dim, self.m)) 51 | 52 | # Now set the labels 53 | if labels is not None: 54 | if len(labels) == self.m: 55 | self.labels = labels 56 | else: 57 | raise ValueError('There needs to be one label per marker point') 58 | else: 59 | self.labels = [str(i) for i in range(self.m)] 60 | 61 | 62 | def __getitem__(self,ref): 63 | 64 | if isinstance(ref, (str, unicode)): 65 | if self.labels is None: 66 | raise ValueError('Labels not set for this marker set') 67 | index = self.labels.index(ref) 68 | elif isinstance(ref, int) or isinstance(ref, slice): 69 | index = ref 70 | elif isinstance(ref, list): 71 | index = [self.labels.index(s) if isinstance(s, (str, unicode)) else s for s in ref] 72 | else: 73 | index = int(ref) 74 | 75 | return self.X[:,index] 76 | 77 | def copy(self): 78 | ''' Return a deep copy of this marker set object ''' 79 | 80 | new_marker = PointCloud(X=self.X.copy(), labels=self.labels, diameter=self.diameter) 81 | return new_marker 82 | 83 | def key2ind(self, ref): 84 | ''' Get the index location from a label ''' 85 | 86 | if isinstance(ref, (str, unicode)): 87 | if self.labels is None: 88 | raise ValueError('Labels must be defined to be used to access markers') 89 | else: 90 | return self.labels.index(ref) 91 | else: 92 | return int(ref) 93 | 94 | def fromEDM(self, D, labels=None, method='mds'): 95 | ''' Compute the position of markers from their Euclidean Distance Matrix 96 | Parameters 97 | ---------- 98 | D : square 2D ndarray 99 | Euclidean Distance Matrix (matrix containing squared distances between points 100 | labels : list, optional 101 | A list of human friendly labels for the markers (e.g. 'east', 'west', etc) 102 | method : str, optional 103 | The method to use 104 | * 'mds' for multidimensional scaling (default) 105 | * 'tri' for trilateration 106 | ''' 107 | 108 | if D.shape[0] != D.shape[1]: 109 | raise ValueError('The distance matrix must be square') 110 | 111 | self.m = D.shape[0] 112 | 113 | if method == 'tri': 114 | self.trilateration(D) 115 | else: 116 | self.classical_mds(D) 117 | 118 | def classical_mds(self, D): 119 | ''' 120 | Classical multidimensional scaling 121 | 122 | Parameters 123 | ---------- 124 | D : square 2D ndarray 125 | Euclidean Distance Matrix (matrix containing squared distances between points 126 | ''' 127 | 128 | # Apply MDS algorithm for denoising 129 | n = D.shape[0] 130 | J = np.eye(n) - np.ones((n,n))/float(n) 131 | G = -0.5*np.dot(J, np.dot(D, J)) 132 | 133 | s, U = np.linalg.eig(G) 134 | 135 | # we need to sort the eigenvalues in decreasing order 136 | s = np.real(s) 137 | o = np.argsort(s) 138 | s = s[o[::-1]] 139 | U = U[:,o[::-1]] 140 | 141 | S = np.diag(s)[0:self.dim,:] 142 | self.X = np.dot(np.sqrt(S),U.T) 143 | 144 | def trilateration_single_point(self, c, Dx, Dy): 145 | ''' 146 | Given x at origin (0,0) and y at (0,c) the distances from a point 147 | at unknown location Dx, Dy to x, y, respectively, finds the position of the point. 148 | ''' 149 | 150 | z = (c**2 - (Dy**2 - Dx**2)) / (2*c) 151 | t = np.sqrt(Dx**2 - z**2) 152 | 153 | return np.array([t,z]) 154 | 155 | def trilateration(self, D): 156 | ''' 157 | Find the location of points based on their distance matrix using trilateration 158 | 159 | Parameters 160 | ---------- 161 | D : square 2D ndarray 162 | Euclidean Distance Matrix (matrix containing squared distances between points 163 | ''' 164 | 165 | dist = np.sqrt(D) 166 | 167 | # Simpler algorithm (no denoising) 168 | self.X = np.zeros((self.dim, self.m)) 169 | 170 | self.X[:,1] = np.array([0, dist[0,1]]) 171 | for i in xrange(2,m): 172 | self.X[:,i] = self.trilateration_single_point(self.X[1,1], 173 | dist[0,i], dist[1,i]) 174 | 175 | def EDM(self): 176 | ''' Computes the EDM corresponding to the marker set ''' 177 | if self.X is None: 178 | raise ValueError('No marker set') 179 | 180 | G = np.dot(self.X.T, self.X) 181 | return np.outer(np.ones(self.m), np.diag(G)) \ 182 | - 2*G + np.outer(np.diag(G), np.ones(self.m)) 183 | 184 | def normalize(self, refs=None): 185 | ''' 186 | Reposition points such that x0 is at origin, x1 lies on c-axis 187 | and x2 lies above x-axis, keeping the relative position to each other. 188 | The z-axis is defined according to right hand rule by default. 189 | 190 | Parameters: 191 | ----------- 192 | refs : list of 3 ints or str 193 | The index or label of three markers used to define (origin, x-axis, y-axis) 194 | left_hand : bool, optional (default False) 195 | Normally the z-axis is defined using right-hand rule, this flag allows to override this behavior 196 | ''' 197 | 198 | if refs is None: 199 | refs = [0,1,2,3] 200 | 201 | # Transform references to indices if needed 202 | refs = [self.key2ind(s) for s in refs] 203 | 204 | if self.dim == 2 and len(refs) < 3: 205 | raise ValueError('In 2D three reference points are needed to define a reference frame') 206 | elif self.dim == 3 and len(refs) < 4: 207 | raise ValueError('In 3D four reference points are needed to define a reference frame') 208 | 209 | # set first point to origin 210 | X0 = self.X[:,refs[0],None] 211 | Y = self.X - X0 212 | 213 | # Rotate around z to align x-axis to second point 214 | theta = np.arctan2(Y[1,refs[1]],Y[0,refs[1]]) 215 | c = np.cos(theta) 216 | s = np.sin(theta) 217 | if self.dim == 2: 218 | H = np.array([[c, s],[-s, c]]) 219 | elif self.dim == 3: 220 | H = np.array([[c, s, 0],[-s, c, 0], [0, 0, 1]]) 221 | Y = np.dot(H,Y) 222 | 223 | if self.dim == 2: 224 | # set third point to lie above x-axis 225 | if Y[1,refs[2]] < 0: 226 | Y[1,:] *= -1 227 | 228 | elif self.dim == 3: 229 | # In 3D we also want to make sur z-axis points up 230 | theta = np.arctan2(Y[2,refs[2]],Y[1,refs[2]]) 231 | c = np.cos(theta) 232 | s = np.sin(theta) 233 | H = np.array([[1, 0, 0], [0, c, s],[0, -s, c]]) 234 | Y = np.dot(H,Y) 235 | 236 | # Flip the z-axis if requested 237 | if self.dim == 3 and Y[2,refs[3]] < 0: 238 | Y[2,:] *= -1 239 | 240 | self.X = Y 241 | 242 | def center(self, marker): 243 | ''' Translate the marker set so that the argument is the origin. ''' 244 | 245 | index = self.key2ind(marker) 246 | self.X -= self.X[:,index,None] 247 | 248 | def align(self, marker, axis): 249 | ''' 250 | Rotate the marker set around the given axis until it is aligned onto the given marker 251 | 252 | Parameters 253 | ---------- 254 | marker : int or str 255 | the index or label of the marker onto which to align the set 256 | axis : int 257 | the axis around which the rotation happens 258 | ''' 259 | 260 | index = self.key2ind(marker) 261 | axis = ['x','y','z'].index(axis) if isinstance(marker, (str, unicode)) else axis 262 | 263 | # swap the axis around which to rotate to last position 264 | Y = self.X 265 | if self.dim == 3: 266 | Y[axis,:], Y[2,:] = Y[2,:], Y[axis,:] 267 | 268 | # Rotate around z to align x-axis to second point 269 | theta = np.arctan2(Y[1,index],Y[0,index]) 270 | c = np.cos(theta) 271 | s = np.sin(theta) 272 | H = np.array([[c, s],[-s, c]]) 273 | Y[:2,:] = np.dot(H,Y[:2,:]) 274 | 275 | if self.dim == 3: 276 | Y[axis,:], Y[2,:] = Y[2,:], Y[axis,:] 277 | 278 | def flatten(self, ind): 279 | ''' 280 | Transform the set of points so that the subset of markers given as argument is 281 | as close as flat (wrt z-axis) as possible. 282 | 283 | Parameters 284 | ---------- 285 | ind : list of bools 286 | Lists of marker indices that should be all in the same subspace 287 | ''' 288 | 289 | # Transform references to indices if needed 290 | ind = [self.key2ind(s) for s in ind] 291 | 292 | # center point cloud around the group of indices 293 | centroid = self.X[:,ind].mean(axis=1, keepdims=True) 294 | X_centered = self.X - centroid 295 | 296 | # The rotation is given by left matrix of SVD 297 | U,S,V = la.svd(X_centered[:,ind], full_matrices=False) 298 | 299 | self.X = np.dot(U.T, X_centered) + centroid 300 | 301 | def correct(self, corr_dic): 302 | ''' correct a marker location by a given vector ''' 303 | 304 | for key, val in corr_dic.items(): 305 | ind = self.key2ind(key) 306 | self.X[:,ind] += val 307 | 308 | def doa(self, receiver, source): 309 | ''' Computes the direction of arrival wrt a source and receiver ''' 310 | 311 | s_ind = self.key2ind(source) 312 | r_ind = self.key2ind(receiver) 313 | 314 | # vector from receiver to source 315 | v = self.X[:,s_ind] - self.X[:,r_ind] 316 | 317 | azimuth = np.arctan2(v[1], v[0]) 318 | elevation = np.arctan2(v[2], la.norm(v[:2])) 319 | 320 | azimuth = azimuth + 2*np.pi if azimuth < 0. else azimuth 321 | elevation = elevation + 2*np.pi if elevation < 0. else elevation 322 | 323 | return np.array([azimuth, elevation]) 324 | 325 | def plot(self, axes=None, show_labels=True, **kwargs): 326 | 327 | if self.dim == 2: 328 | 329 | # Create a figure if needed 330 | if axes is None: 331 | axes = plt.subplot(111) 332 | 333 | axes.plot(self.X[0,:], self.X[1,:], **kwargs) 334 | axes.axis(aspect='equal') 335 | plt.show() 336 | 337 | 338 | elif self.dim == 3: 339 | if axes is None: 340 | fig = plt.figure() 341 | axes = fig.add_subplot(111, projection='3d') 342 | axes.scatter(self.X[0,:], self.X[1,:], self.X[2,:], **kwargs) 343 | 344 | axes.set_xlabel('X') 345 | axes.set_ylabel('Y') 346 | axes.set_zlabel('Z') 347 | plt.show() 348 | 349 | if show_labels and self.labels is not None: 350 | eps = np.linalg.norm(self.X[:,0] - self.X[:,1])/100 351 | for i in xrange(self.m): 352 | if self.dim == 2: 353 | axes.text(self.X[0,i]+eps, self.X[1,i]+eps, self.labels[i]) 354 | elif self.dim == 3: 355 | axes.text(self.X[0,i]+eps, self.X[1,i]+eps, self.X[2,i]+eps, self.labels[i], None) 356 | 357 | return axes 358 | 359 | 360 | 361 | 362 | if __name__ == '__main__': 363 | 364 | # number of markers 365 | m = 4 366 | dim = 2 367 | 368 | D = np.zeros((m,m)) 369 | 370 | marker_diameter = 0.040 # 4 cm 371 | 372 | M_orig = MarkerSet(X=np.array([[0.,0.],[0.7,0.],[0.7,0.7],[0.,0.7]]).T) 373 | D = np.sqrt(M_orig.EDM()) 374 | 375 | """ 376 | D[0,1] = D[1,0] = 4.126 + marker_diameter 377 | D[0,2] = D[2,0] = 6.878 + marker_diameter 378 | D[0,3] = D[3,0] = 4.508 + marker_diameter 379 | D[1,2] = D[2,1] = 4.401 + marker_diameter 380 | D[1,3] = D[3,1] = 7.113 + marker_diameter 381 | D[3,2] = D[2,3] = 7.002 + marker_diameter 382 | """ 383 | 384 | M1 = MarkerSet(m=m, dim=dim, diameter=marker_diameter) 385 | M1.fromEDM(D**2) 386 | M1.normalize() 387 | 388 | M2 = MarkerSet(m=m, dim=dim, diameter=marker_diameter) 389 | M2.fromEDM(D**2, method='tri') 390 | M2.normalize() 391 | 392 | M2.plot(marker='ko', labels=True) 393 | M1.plot(marker='rx') 394 | plt.show() 395 | 396 | -------------------------------------------------------------------------------- /experiment/samples/fq_sample0.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LCAV/FRIDA/ff5d51e498805b862c342dd216ccfffb22444b7f/experiment/samples/fq_sample0.wav -------------------------------------------------------------------------------- /experiment/samples/fq_sample1.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LCAV/FRIDA/ff5d51e498805b862c342dd216ccfffb22444b7f/experiment/samples/fq_sample1.wav -------------------------------------------------------------------------------- /experiment/samples/fq_sample2.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LCAV/FRIDA/ff5d51e498805b862c342dd216ccfffb22444b7f/experiment/samples/fq_sample2.wav -------------------------------------------------------------------------------- /experiment/samples/fq_sample3.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LCAV/FRIDA/ff5d51e498805b862c342dd216ccfffb22444b7f/experiment/samples/fq_sample3.wav -------------------------------------------------------------------------------- /experiment/samples/fq_sample4.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LCAV/FRIDA/ff5d51e498805b862c342dd216ccfffb22444b7f/experiment/samples/fq_sample4.wav -------------------------------------------------------------------------------- /experiment/samples/fq_sample5.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LCAV/FRIDA/ff5d51e498805b862c342dd216ccfffb22444b7f/experiment/samples/fq_sample5.wav -------------------------------------------------------------------------------- /experiment/speakers_microphones_locations.py: -------------------------------------------------------------------------------- 1 | 2 | import numpy as np 3 | from scipy import linalg as la 4 | 5 | from point_cloud import PointCloud 6 | 7 | # The collection of markers where distances were measured 8 | labels = ['11', '7', '5', '3', '13', '8', '4', '14', 'FPGA', 'BBB'] 9 | # The Euclidean distance matrix. Unit is squared meters 10 | EDM = np.array( 11 | [ [ 0, 0.79, 1.63, 2.42, 2.82, 3.55, 2.44, 2.87, 2.22, 1.46 ], 12 | [ 0.79, 0, 1.45, 2.32, 2.49, 3.67, 2.32, 2.54, 1.92, 1.35 ], 13 | [ 1.63, 1.45, 0, 1.92, 2.09, 4.02, 3.48, 3.66, 2.50, 1.68 ], 14 | [ 2.42, 2.32, 1.92, 0, 0.86, 2.43, 2.92, 3.14, 1.56, 1.14 ], 15 | [ 2.82, 2.49, 2.09, 0.86, 0, 3.15, 3.10, 3.07, 1.58, 1.56 ], 16 | [ 3.55, 3.76, 4.02, 2.43, 3.15, 0, 2.44, 2.88, 2.11, 2.45 ], 17 | [ 2.44, 2.32, 3.48, 2.92, 3.10, 2.44, 0, 0.85, 1.52, 2.00 ], 18 | [ 2.87, 2.54, 3.66, 3.14, 3.07, 2.88, 0.85, 0, 1.60, 2.31 ], 19 | [ 2.22, 1.92, 2.50, 1.56, 1.58, 2.11, 1.52, 1.60, 0, 0.97 ], 20 | [ 1.46, 1.35, 1.68, 1.14, 1.56, 2.45, 2.00, 2.31, 0.97, 0 ] ] 21 | )**2 22 | 23 | # Create the marker objects 24 | markers = PointCloud(EDM=EDM, labels=labels) 25 | 26 | # We know that these markers should be roughly on a plane 27 | markers.flatten(['7','5','3','4']) 28 | 29 | # Let the FPGA ref point be the center 30 | markers.center('FPGA') 31 | 32 | # And align x-axis onto speaker 7 33 | markers.align('7','z') 34 | 35 | # Now there a few correction vectors to apply between the measurement points 36 | # and the center of the baffles 37 | corr_twitter = { 38 | '7' : np.array([+0.01, 0, -0.05]), 39 | '3' : np.array([0., -0.01, -0.05]), 40 | '4' : np.array([0., +0.01, -0.05]), 41 | '5' : np.array([0.01*np.cos(np.pi/4.), -0.01*np.sin(np.pi/4.), -0.05]), 42 | '11' : np.array([+0.01, 0, -0.05]), # top row, this needs to be rotated +30 deg around y-axis 43 | '8' : np.array([-0.01, 0, -0.05]), # top row, this needs to be rotated -30 deg around y-axis 44 | '13' : np.array([0., -0.01, -0.19]), # bottom row, this needs to be rotated +30 deg around x-axis 45 | '14' : np.array([0., +0.01, -0.19]), # bottom row, this needs to be rotated -30 deg around x-axis 46 | } 47 | corr_woofer = { 48 | '7' : np.array([+0.02, 0, -0.155]), 49 | '3' : np.array([0., -0.02, -0.155]), 50 | '4' : np.array([0., +0.02, -0.155]), 51 | '5' : np.array([0.02*np.cos(np.pi/4.), -0.01*np.sin(np.pi/4.), -0.155]), 52 | '11' : np.array([+0.02, 0, -0.155]), # top row, this needs to be rotated +30 deg around y-axis 53 | '8' : np.array([-0.02, 0, -0.155]), # top row, this needs to be rotated -30 deg around y-axis 54 | '13' : np.array([0., -0.02, -0.090]), # bottom row, this needs to be rotated +30 deg around x-axis 55 | '14' : np.array([0., +0.02, -0.090]), # bottom row, this needs to be rotated -30 deg around x-axis 56 | } 57 | 58 | # Build rotation matrices by 30 degrees 59 | c,s = np.cos(np.pi/6.), np.sin(np.pi/6.) 60 | R_30_y = np.array([[c, 0., -s], [0., 1., 0], [s, 0., c]]) 61 | R_30_x = np.array([[1, 0., 0.], [0., c, -s], [0., s, c]]) 62 | 63 | # Apply the rotations 64 | corr_twitter['11'] = np.dot(R_30_y, corr_twitter['11']) 65 | corr_twitter['8'] = np.dot(R_30_y.T, corr_twitter['8']) 66 | corr_twitter['13'] = np.dot(R_30_x, corr_twitter['13']) 67 | corr_twitter['14'] = np.dot(R_30_x.T, corr_twitter['14']) 68 | corr_woofer['11'] = np.dot(R_30_y, corr_woofer['11']) 69 | corr_woofer['8'] = np.dot(R_30_y.T, corr_woofer['8']) 70 | corr_woofer['13'] = np.dot(R_30_x, corr_woofer['13']) 71 | corr_woofer['14'] = np.dot(R_30_x.T, corr_woofer['14']) 72 | 73 | # Now make two sets of markers for twitters and woofers 74 | twitters = markers.copy() 75 | woofers = markers.copy() 76 | 77 | # Apply the correction vectors 78 | twitters.correct(corr_twitter) 79 | woofers.correct(corr_woofer) 80 | 81 | if __name__ == "__main__": 82 | 83 | from mpl_toolkits.mplot3d import Axes3D 84 | import matplotlib.pyplot as plt 85 | 86 | # Plot all the markers in the same figure to check all the locations are correct 87 | fig = plt.figure() 88 | axes = fig.add_subplot(111, projection='3d') 89 | 90 | twitters.plot(axes=axes, c='b', marker='s') 91 | woofers.plot(axes=axes, c='r', marker='<') 92 | markers.plot(axes=axes, c='k', marker='.') 93 | 94 | print 'DoA of Speaker 5 to FPGA:', twitters.doa('FPGA','5')/np.pi*180.,'degrees' 95 | 96 | plt.show() 97 | 98 | -------------------------------------------------------------------------------- /experiment/sweep.wav: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LCAV/FRIDA/ff5d51e498805b862c342dd216ccfffb22444b7f/experiment/sweep.wav -------------------------------------------------------------------------------- /figure_doa_9_mics_10_src.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Test with real recordings for cases where we have less microphones than sources. 3 | Here the number of microphones is 9 4 | The number of sources is 10 5 | python test_doa_recorded_local.py -f 1-2-3-4-5-6-7-12-14-15 -b 20 -a 6 6 | ''' 7 | from __future__ import division 8 | from scipy.io import wavfile 9 | import os, sys, getopt, time 10 | import json 11 | 12 | import matplotlib as pyplot 13 | import seaborn as sns 14 | 15 | import pyroomacoustics as pra 16 | 17 | import doa 18 | from tools import * 19 | from experiment import arrays, calculate_speed_of_sound, select_bands, PointCloud 20 | 21 | if __name__ == '__main__': 22 | 23 | # default values 24 | algo = 6 25 | rec_file = '1-2-3-4-5-6-7-12-14-15' 26 | n_bands = 20 27 | data_filename = None 28 | plot_flag = False 29 | 30 | # parse arguments 31 | cmd_name = sys.argv[0] 32 | argv = sys.argv[1:] 33 | 34 | def print_help(cmd): 35 | print('%s [-p] -a -f -b ' % cmd) 36 | print(' -a , --algo=: Algorithm to use 1:SRP-PHAT, 2: MUSIC, 3:CSSM, 4:WAVES, 5:TOPS, 6:FRIDA') 37 | print(' -b , --n_bands=: Number of frequency bands to use.') 38 | print(' -p, --plot: Show a plot at the end of the script.') 39 | print(' -f , --file=: The recording file to use.') 40 | print(' -o , --output=: The file where to save the plotting data.') 41 | 42 | try: 43 | opts, args = getopt.getopt(argv, "ha:f:b:p", ["algo=", "file=", "n_bands=","plot"]) 44 | except getopt.GetoptError: 45 | print_help(cmd_name) 46 | sys.exit(2) 47 | for opt, arg in opts: 48 | if opt == '-h': 49 | print_help(cmd_name) 50 | sys.exit() 51 | elif opt in ("-a", "--algo"): 52 | algo = int(arg) 53 | elif opt in ("-f", "--file"): 54 | rec_file = arg 55 | elif opt in ("-o", "--output"): 56 | data_filename = arg 57 | elif opt in ("-b", "--n_bands"): 58 | n_bands = int(arg) 59 | elif opt in ("-p", "--plot"): 60 | plot_flag = True 61 | 62 | algo_dic = {1:'SRP', 2:'MUSIC', 3:'CSSM', 4:'WAVES', 5:'TOPS', 6:'FRI'} 63 | algo_name = algo_dic[algo] 64 | 65 | # We should make this the default structure 66 | # it can be applied by copying/downloading the data or creating a symbolic link 67 | exp_folder = './recordings/20160912-2/' 68 | 69 | # Get the speakers and microphones grounndtruth locations 70 | sys.path.append(exp_folder) 71 | from edm_to_positions import twitters 72 | 73 | array_str = 'pyramic' 74 | #array_str = 'compactsix' 75 | 76 | if array_str == 'pyramic': 77 | 78 | twitters.center('pyramic') 79 | 80 | # R_flat_I = range(8, 16) + range(24, 32) + range(40, 48) 81 | 82 | # idx0 = (np.random.permutation(8)[:3] + 8).tolist() 83 | # R_flat_I_subset = idx0 + \ 84 | # [idx_loop + 16 for idx_loop in idx0] + \ 85 | # [idx_loop + 32 for idx_loop in idx0] # [8, 10, 13, 15, 40, 42, 47, 26, 30] 86 | R_flat_I_subset = [14, 9, 13, 30, 25, 29, 46, 41, 45] 87 | mic_array = arrays['pyramic_tetrahedron'][:, R_flat_I_subset].copy() 88 | 89 | mic_array += twitters[['pyramic']] 90 | 91 | rec_folder = exp_folder + 'data_pyramic/segmented/' 92 | 93 | elif array_str == 'compactsix': 94 | 95 | twitters.center('compactsix') 96 | 97 | R_flat_I_subset = range(6) 98 | mic_array = arrays['compactsix_circular_1'][:, R_flat_I_subset].copy() 99 | mic_array += twitters[['compactsix']] 100 | rec_folder = exp_folder + 'data_compactsix/segmented/' 101 | 102 | fs = 16000 103 | 104 | num_mic = mic_array.shape[1] # number of microphones 105 | K = rec_file.count('-') + 1 # Real number of sources 106 | K_est = K # Number of sources to estimate 107 | 108 | # Open the protocol json file 109 | with open(exp_folder + 'protocol.json') as fd: 110 | exp_data = json.load(fd) 111 | 112 | # These parameters could be extracted from a JSON file 113 | # Experiment related parameters 114 | temp = exp_data['conditions']['temperature'] 115 | hum = exp_data['conditions']['humidity'] 116 | c = calculate_speed_of_sound(temp, hum) 117 | # save parameters 118 | save_fig = False 119 | save_param = True 120 | fig_dir = './result/' 121 | 122 | # Check if the directory exists 123 | if save_fig and not os.path.exists(fig_dir): 124 | os.makedirs(fig_dir) 125 | 126 | # algorithm parameters 127 | stop_cri = 'max_iter' # can be 'mse' or 'max_iter' 128 | fft_size = 256 # number of FFT bins 129 | win_stft = np.hanning(fft_size) # stft window 130 | frame_shift_step = np.int(fft_size / 1.) 131 | M = 17 # Maximum Fourier coefficient index (-M to M), K_est <= M <= num_mic*(num_mic - 1) / 2 132 | 133 | # ---------------------------- 134 | # Perform direction of arrival 135 | phi_plt = np.linspace(0, 2*np.pi, num=721, dtype=float, endpoint=False) 136 | 137 | # Choose the frequency range to use 138 | freq_range = { 139 | 'MUSIC': [2500., 4500.], 140 | 'SRP': [2500., 4500.], 141 | 'CSSM': [2500., 4500.], 142 | 'WAVES': [3000., 4000.], 143 | 'TOPS': [100., 4500.], 144 | 'FRI': [1500., 6500.], 145 | } 146 | 147 | # Pick uniformly spaced frequencies 148 | freq_hz = np.linspace(freq_range[algo_name][0], freq_range[algo_name][1], n_bands) 149 | freq_bins = np.unique(np.array([int(np.round(f / fs * fft_size)) for f in freq_hz])) 150 | freq_hz = freq_bins * fs / float(fft_size) 151 | n_bands = freq_bins.size 152 | 153 | print('Using {} frequencies: '.format(freq_hz.shape[0])) 154 | print('Selected frequencies: {0} Hertz'.format(freq_bins / fft_size * fs)) 155 | 156 | 157 | # Import speech signal 158 | # ------------------------- 159 | if K == 1: 160 | filename = rec_folder + 'one_speaker/' + rec_file + '.wav' 161 | elif K == 2: 162 | filename = rec_folder + 'two_speakers/' + rec_file + '.wav' 163 | elif K == 3: 164 | filename = rec_folder + 'three_speakers/' + rec_file + '.wav' 165 | else: 166 | filename = rec_folder + rec_file + '.wav' 167 | fs_file, rec_signals = wavfile.read(filename) 168 | fs_silence, rec_silence = wavfile.read(rec_folder + 'silence.wav') 169 | 170 | if fs_file != fs_silence: 171 | raise ValueError('Weird: fs of signals and silence are different...') 172 | 173 | # Resample the files if required 174 | if fs_file != fs: 175 | print 'Resampling signals' 176 | from scikits.samplerate import resample 177 | 178 | resampled_signals = [] 179 | resampled_silence = [] 180 | for i in R_flat_I_subset: 181 | resampled_signals.append( 182 | pra.highpass( 183 | resample(rec_signals[:, i], fs / fs_file, 'sinc_best'), 184 | fs, 185 | fc=150. 186 | ) 187 | ) 188 | resampled_silence.append( 189 | pra.highpass( 190 | resample(rec_silence[:, i], fs / fs_file, 'sinc_best'), 191 | fs, 192 | fc=150. 193 | ) 194 | ) 195 | speech_signals = np.array(resampled_signals, dtype=np.float).T 196 | silence = np.array(resampled_silence, dtype=np.float).T 197 | 198 | else: 199 | print('No need to resample signals') 200 | speech_signals = np.array(rec_signals[:, R_flat_I_subset], dtype=np.float32) 201 | silence = np.array(rec_silence[:, R_flat_I_subset], dtype=np.float32) 202 | 203 | # highpass filter at 150 204 | for s in speech_signals.T: 205 | s[:] = pra.highpass(s, fs, fc=150.) 206 | for s in silence.T: 207 | s[:] = pra.highpass(s, fs, fc=150.) 208 | 209 | # Normalize the amplitude 210 | n_factor = 0.95 / np.max(np.abs(speech_signals)) 211 | speech_signals *= n_factor 212 | silence *= n_factor 213 | 214 | # estimate noise floor 215 | y_noise_stft = [] 216 | for k in range(num_mic): 217 | y_stft = pra.stft(silence[:, k], fft_size, frame_shift_step, 218 | transform=rfft, win=win_stft).T / np.sqrt(fft_size) 219 | y_noise_stft.append(y_stft) 220 | y_noise_stft = np.array(y_noise_stft) 221 | noise_floor = np.mean(np.abs(y_noise_stft) ** 2) 222 | 223 | # estimate SNR in dB (on 1st microphone) 224 | noise_var = np.mean(np.abs(silence) ** 2) 225 | sig_var = np.mean(np.abs(speech_signals) ** 2) 226 | # rought estimate of SNR 227 | SNR = 10 * np.log10((sig_var - noise_var) / noise_var) 228 | print('Estimated SNR: ' + str(SNR)) 229 | 230 | # Compute DFT of snapshots 231 | # ------------------------- 232 | y_mic_stft = [] 233 | for k in range(num_mic): 234 | y_stft = pra.stft(speech_signals[:, k], fft_size, frame_shift_step, 235 | transform=rfft, win=win_stft).T / np.sqrt(fft_size) 236 | y_mic_stft.append(y_stft) 237 | y_mic_stft = np.array(y_mic_stft) 238 | 239 | energy_level = np.abs(y_mic_stft) ** 2 240 | 241 | # True direction of arrival 242 | # ------------------------- 243 | sources = rec_file.split('-') 244 | phi_ks = np.array([twitters.doa(array_str, sources[k])[0] for k in range(K)]) 245 | phi_ks[phi_ks < 0] = phi_ks[phi_ks < 0] + 2 * np.pi 246 | 247 | # create DOA object 248 | if algo == 1: 249 | algo_name = 'SRP-PHAT' 250 | d = doa.SRP(L=mic_array, fs=fs, nfft=fft_size, num_src=K_est, c=c, 251 | theta=phi_plt) 252 | if algo == 2: 253 | algo_name = 'MUSIC' 254 | d = doa.MUSIC(L=mic_array, fs=fs, nfft=fft_size, num_src=K_est, c=c, 255 | theta=phi_plt) 256 | elif algo == 3: 257 | algo_name = 'CSSM' 258 | d = doa.CSSM(L=mic_array, fs=fs, nfft=fft_size, num_src=K_est, c=c, 259 | theta=phi_plt, num_iter=10) 260 | elif algo == 4: 261 | algo_name = 'WAVES' 262 | d = doa.WAVES(L=mic_array, fs=fs, nfft=fft_size, num_src=K_est, c=c, 263 | theta=phi_plt, num_iter=10) 264 | elif algo == 5: 265 | algo_name = 'TOPS' 266 | d = doa.TOPS(L=mic_array, fs=fs, nfft=fft_size, num_src=K_est, c=c, 267 | theta=phi_plt) 268 | elif algo == 6: 269 | algo_name = 'FRI' 270 | d = doa.FRI(L=mic_array, fs=fs, nfft=fft_size, num_src=K_est, c=c, G_iter=5, 271 | theta=phi_plt, max_four=M, noise_floor=noise_floor, noise_margin=0.0) 272 | 273 | # perform localization 274 | print 'Applying ' + algo_name + '...' 275 | d.locate_sources(y_mic_stft, freq_bins=freq_bins) 276 | 277 | # print reconstruction results 278 | recon_err, sort_idx = polar_distance(phi_ks, d.phi_recon) 279 | np.set_printoptions(precision=3, formatter={'float': '{: 0.3f}'.format}) 280 | 281 | print('Reconstructed spherical coordinates (in degrees) and amplitudes:') 282 | if d.num_src > 1: 283 | #d.phi_recon = d.phi_recon[sort_idx[:,1]] 284 | print('Original azimuths : {0}'.format(np.degrees( 285 | phi_ks[sort_idx[:, 0]]))) 286 | #phi_ks))) 287 | print('Detected azimuths : {0}'.format(np.degrees( 288 | d.phi_recon[sort_idx[:, 1]]))) 289 | #d.phi_recon))) 290 | else: 291 | print('Original azimuths : {0}'.format(np.degrees(phi_ks))) 292 | print('Detected azimuths : {0}'.format(np.degrees(d.phi_recon))) 293 | 294 | if isinstance(d, doa.FRI): 295 | #d.alpha_recon = d.alpha_recon[:,sort_idx[:,1]] 296 | print d.alpha_recon.shape 297 | if K > 1: 298 | print('Reconstructed amplitudes : \n{0}\n'.format(d.alpha_recon.squeeze())) 299 | else: 300 | print('Reconstructed amplitudes : \n{0}\n'.format(d.alpha_recon.squeeze())) 301 | 302 | print('Reconstruction error : {0:.3e}'.format(np.degrees(recon_err))) 303 | 304 | # reset numpy print option 305 | np.set_printoptions(edgeitems=3, infstr='inf', 306 | linewidth=75, nanstr='nan', precision=8, 307 | suppress=False, threshold=1000, formatter=None) 308 | 309 | # plot response (for FRI one subband) 310 | if plot_flag: 311 | d.polar_plt_dirac(phi_ks) 312 | plt.show() 313 | 314 | # Save the spatial spectrum as well 315 | if algo_name == 'FRI': 316 | dirty_img = d._gen_dirty_img() 317 | else: 318 | dirty_img = None 319 | 320 | # save the result to plot later 321 | if data_filename is None: 322 | date = time.strftime("%Y%m%d-%H%M%S") 323 | data_filename = 'data/{}_doa_9_mics_10_src.npz'.format(date) 324 | 325 | np.savez(data_filename, phi_ks=phi_ks, phi_recon=d.phi_recon, 326 | dirty_img=dirty_img, phi_grid=d.theta) 327 | 328 | print 'Saved data to file: ' + data_filename 329 | 330 | -------------------------------------------------------------------------------- /figure_doa_9_mics_10_src_plot.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | 3 | import sys 4 | import copy 5 | import numpy as np 6 | import pandas as pd 7 | import getopt 8 | 9 | import matplotlib.pyplot as plt 10 | 11 | import seaborn as sns 12 | 13 | from experiment import arrays 14 | from tools import polar_distance 15 | 16 | if __name__ == "__main__": 17 | 18 | argv = sys.argv[1:] 19 | files = 'data/20160913-011415_doa_9_mics_10_src.npz' 20 | 21 | try: 22 | opts, args = getopt.getopt(argv, "hf:", ["file=",]) 23 | except getopt.GetoptError: 24 | print('figure_doa_9_mics_10_src.py -f ') 25 | sys.exit(2) 26 | for opt, arg in opts: 27 | if opt == '-h': 28 | print('figure_doa_9_mics_10_src.py -f ') 29 | sys.exit() 30 | elif opt in ("-f", "--file"): 31 | files = arg 32 | 33 | data = np.load(files) 34 | 35 | phi_ref = data['phi_ks'] 36 | phi_recon = data['phi_recon'] 37 | phi_plt = data['phi_grid'] 38 | dirty_img = data['dirty_img'] 39 | 40 | # call seaborn and set the style 41 | sns.set(style='whitegrid',context='paper',font_scale=1.2, 42 | rc={ 43 | 'figure.figsize':(3.5,3.15), 44 | 'lines.linewidth':0.75, 45 | 'font.family': 'sans-serif', 46 | 'font.sans-serif': [u'Helvetica'], 47 | 'text.usetex': False, 48 | }) 49 | 50 | # plot 51 | fig = plt.figure(figsize=(3.15, 3.15), dpi=90) 52 | ax = fig.add_subplot(111, projection='polar') 53 | base = 1. 54 | height = 10. 55 | 56 | #blue = [0, 0.447, 0.741] 57 | #red = [0.850, 0.325, 0.098] 58 | 59 | pal = sns.cubehelix_palette(6, start=0.5, rot=-0.5,dark=0.3, light=.75, reverse=True, hue=1.) 60 | #col_recon = pal[0] 61 | col_gt = pal[3] 62 | col_spectrum = pal[5] 63 | 64 | pal = sns.color_palette("RdBu_r", 7) 65 | pal = sns.color_palette("coolwarm", 7) 66 | col_recon = pal[6] 67 | #col_gt = pal[1] 68 | #col_spectrum = pal[2] 69 | 70 | sns.set_palette(pal) 71 | 72 | # We are not interested in amplitude for this plot 73 | alpha_ref = 2./3 * np.ones(phi_ref.shape) 74 | alpha_recon = 2./3 * np.ones(phi_recon.shape) 75 | 76 | # match detected with truth 77 | recon_err, sort_idx = polar_distance(phi_recon, phi_ref) 78 | phi_recon = phi_recon[sort_idx[:, 0]] 79 | phi_ref = phi_ref[sort_idx[:, 1]] 80 | 81 | K_est = phi_recon.size 82 | K = len(phi_ref) 83 | 84 | if phi_ref.shape[0] < 10: 85 | raise ValueError('WE NEED 10 SOURCES!') 86 | 87 | # plot the 'dirty' image 88 | dirty_img = np.abs(dirty_img) 89 | min_val = dirty_img.min() 90 | max_val = dirty_img.max() 91 | dirty_img = (dirty_img - min_val) / (max_val - min_val) 92 | 93 | # we need to make a complete loop, copy first value to last 94 | c_phi_plt = np.r_[phi_plt, phi_plt[0]] 95 | c_dirty_img = np.r_[dirty_img, dirty_img[0]] 96 | ax.plot(c_phi_plt, base + 0.95*height*c_dirty_img, linewidth=1, 97 | alpha=0.7,linestyle='-', color=col_spectrum, 98 | label='spatial spectrum', zorder=0) 99 | 100 | 101 | # stem for original doa 102 | for k in range(K): 103 | ax.plot([phi_ref[k], phi_ref[k]], [base, base + 104 | height*alpha_ref[k]], linewidth=0.5, linestyle='-', 105 | color=col_gt, alpha=1., zorder=1) 106 | 107 | # markers for original doa 108 | ax.scatter(phi_ref, base + height*alpha_ref, 109 | c=np.tile(col_gt, (K, 1)), 110 | s=70, alpha=1.00, marker='^', linewidths=0, 111 | label='groundtruth', zorder=1) 112 | 113 | # stem for reconstructed doa 114 | for k in range(K_est): 115 | ax.plot([phi_recon[k], phi_recon[k]], [base, base + 116 | height*alpha_recon[k]], linewidth=0.5, linestyle='-', 117 | color=col_recon, alpha=1.,zorder=2) 118 | 119 | # markers for reconstructed doa 120 | ax.scatter(phi_recon, base + height*alpha_recon, c=np.tile(col_recon, 121 | (K_est, 1)), s=100, alpha=1., marker='*', linewidths=0, 122 | label='reconstruction', zorder=2) 123 | 124 | 125 | 126 | handles, labels = ax.get_legend_handles_labels() 127 | ax.legend(handles=handles[:3], framealpha=1., 128 | scatterpoints=1, loc=8, 129 | ncol=1, bbox_to_anchor=(0.85, -0.22), 130 | handletextpad=.2, columnspacing=1.7, labelspacing=0.1) 131 | 132 | ax.set_xlabel(r'DOA') #, fontsize=11) 133 | ax.set_xticks(np.linspace(0, 2 * np.pi, num=12, endpoint=False)) 134 | ax.xaxis.set_label_coords(0.5, -0.11) 135 | ax.set_yticks([1]) 136 | ax.set_yticklabels([]) 137 | #ax.set_yticks(np.linspace(0, 1, 2)) 138 | ax.xaxis.grid(b=True, color=[0.3, 0.3, 0.3], linestyle=':', linewidth=0.7) 139 | ax.yaxis.grid(b=True, color=[0.3, 0.3, 0.3], linestyle='--', linewidth=0.7) 140 | ax.set_ylim([0, base + height]) 141 | 142 | plt.tight_layout(pad=0.5) 143 | 144 | filename = 'figures/experiment_9_mics_10_src' 145 | plt.savefig(filename + '.pdf', format='pdf') #, transparent=True) 146 | plt.savefig(filename + '.png', format='png') #, transparent=True) 147 | 148 | -------------------------------------------------------------------------------- /figure_doa_experiment.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | from experiment import select_bands 3 | 4 | def parallel_loop(filename, algo_names, pmt): 5 | ''' 6 | This is one loop of the computation 7 | extracted for parallelization 8 | ''' 9 | 10 | # We need to do a bunch of imports 11 | import pyroomacoustics as pra 12 | import os 13 | import numpy as np 14 | from scipy.io import wavfile 15 | import mkl as mkl_service 16 | import copy 17 | 18 | import doa 19 | from tools import rfft 20 | 21 | # for such parallel processing, it is better 22 | # to deactivate multithreading in mkl 23 | mkl_service.set_num_threads(1) 24 | 25 | # exctract the speaker names from filename 26 | name = os.path.splitext(os.path.basename(filename))[0] 27 | sources = name.split('-') 28 | 29 | # number of sources 30 | K = len(sources) 31 | 32 | # Import speech signal 33 | fs_file, rec_signals = wavfile.read(filename) 34 | 35 | # sanity check 36 | if pmt['fs'] != fs_file: 37 | raise ValueError('The sampling frequency of the files doesn''t match that of the script') 38 | 39 | speech_signals = np.array(rec_signals[:,pmt['mic_select']], dtype=np.float32) 40 | 41 | # Remove the DC bias 42 | for s in speech_signals.T: 43 | s[:] = pra.highpass(s, pmt['fs'], 100.) 44 | 45 | if pmt['stft_win']: 46 | stft_win = np.hanning(pmt['nfft']) 47 | else: 48 | stft_win = None 49 | 50 | # Normalize the amplitude 51 | speech_signals *= pmt['scaling'] 52 | 53 | # Compute STFT of signal 54 | # ------------------------- 55 | y_mic_stft = [] 56 | for k in range(speech_signals.shape[1]): 57 | y_stft = pra.stft(speech_signals[:, k], pmt['nfft'], pmt['stft_hop'], 58 | transform=rfft, win=stft_win).T / np.sqrt(pmt['nfft']) 59 | y_mic_stft.append(y_stft) 60 | y_mic_stft = np.array(y_mic_stft) 61 | 62 | # estimate SNR in dB (on 1st microphone) 63 | sig_var = np.var(speech_signals) 64 | SNR = 10*np.log10( (sig_var - pmt['noise_var']) / pmt['noise_var'] ) 65 | 66 | freq_bins = copy.copy(pmt['freq_bins'][K-1]) 67 | 68 | # dict for output 69 | phi_recon = {} 70 | 71 | for alg in algo_names: 72 | 73 | # Use the convenient dictionary of algorithms defined 74 | d = doa.algos[alg]( 75 | L=pmt['mic_array'], 76 | fs=pmt['fs'], 77 | nfft=pmt['nfft'], 78 | num_src=K, 79 | c=pmt['c'], 80 | theta=pmt['phi_grid'], 81 | max_four=pmt['M'], 82 | num_iter=pmt['num_iter'], 83 | G_iter = pmt['G_iter'] 84 | ) 85 | 86 | # perform localization 87 | d.locate_sources(y_mic_stft, freq_bins=freq_bins[alg]) 88 | 89 | # store result 90 | phi_recon[alg] = d.phi_recon 91 | 92 | return SNR, sources, phi_recon 93 | 94 | 95 | if __name__ == '__main__': 96 | 97 | import numpy as np 98 | from scipy.io import wavfile 99 | import os, sys, getopt 100 | import time 101 | import json 102 | 103 | import pyroomacoustics as pra 104 | 105 | import doa 106 | from tools import rfft 107 | from experiment import arrays, calculate_speed_of_sound 108 | 109 | # default values 110 | serial_flag = False 111 | test_flag = False 112 | data_filename = None 113 | 114 | # parse arguments 115 | cmd_name = sys.argv[0] 116 | argv = sys.argv[1:] 117 | 118 | def print_help(cmd): 119 | print('%s [-t -s] -f ' % cmd) 120 | print(' -s, --serial: Use serial computing') 121 | print(' -t, --test: Test mode (run 1 loop)') 122 | print(' -f , --file=: name of output file') 123 | 124 | try: 125 | opts, args = getopt.getopt(argv, "hf:ts", ["file=", "test","plot"]) 126 | except getopt.GetoptError: 127 | print_help(cmd_name) 128 | sys.exit(2) 129 | for opt, arg in opts: 130 | if opt == '-h': 131 | print_help(cmd_name) 132 | sys.exit() 133 | elif opt in ("-f", "--file"): 134 | data_filename = arg 135 | elif opt in ("-t", "--test"): 136 | test_flag = True 137 | elif opt in ("-s", "--serial"): 138 | serial_flag = True 139 | 140 | # We should make this the default structure 141 | # it can be applied by copying/downloading the data or creating a symbolic link 142 | exp_folder = './recordings/20160908/' 143 | 144 | # Open the protocol json file 145 | with open(exp_folder + 'protocol.json') as fd: 146 | exp_data = json.load(fd) 147 | 148 | # Get the speakers and microphones grounndtruth locations 149 | sys.path.append(exp_folder) 150 | from edm_to_positions import twitters 151 | 152 | array_str = 'pyramic' 153 | #array_str = 'compactsix' 154 | 155 | if array_str == 'pyramic': 156 | 157 | twitters.center('pyramic') 158 | 159 | # subselect the flat part of the array 160 | R_flat_I = range(8, 16) + range(24, 32) + range(40, 48) 161 | 162 | # get array geometry 163 | mic_array = arrays['pyramic_tetrahedron'][:, R_flat_I].copy() 164 | mic_array += twitters[['pyramic']] 165 | 166 | # data subfolder 167 | rec_folder = exp_folder + 'data_pyramic/segmented/' 168 | 169 | elif array_str == 'compactsix': 170 | 171 | twitters.center('compactsix') 172 | 173 | R_flat_I = range(6) 174 | mic_array = arrays['compactsix_circular_1'][:,R_flat_I].copy() 175 | mic_array += twitters[['compactsix']] 176 | rec_folder = exp_folder + 'data_compactsix/segmented/' 177 | missing_rec = () 178 | 179 | # General parameters 180 | fs = 16000 181 | 182 | # Define the algorithms to run 183 | algo_names = ['SRP', 'MUSIC', 'CSSM', 'WAVES', 'TOPS', 'FRI'] 184 | 185 | # Experiment related parameters 186 | temp = exp_data['conditions']['temperature'] 187 | hum = exp_data['conditions']['humidity'] 188 | c = calculate_speed_of_sound(temp, hum) 189 | 190 | # algorithm parameters 191 | parameters = { 192 | 'mic_array' : mic_array, # The array geometry 193 | 'mic_select': R_flat_I, # A subselection of microphones 194 | 'fs' : 16000, # the sampling frequency 195 | 'nfft': 256, # The FFT size 196 | 'stft_hop': 256, # the number of samples between two stft frames 197 | 'stft_win': True, # Use a hanning window for the STFT 198 | 'c': c, # The speed of sound 199 | 'M' : 24, # Maximum Fourier coefficient index (-M to M), K_est <= M <= num_mic*(num_mic - 1) / 2 200 | 'num_iter' : 10, # Maximum number of iterations for algorithms that require them 201 | 'stop_cri' : 'max_iter', # stropping criterion for FRI ('mse' or 'max_iter') 202 | 'G_iter' : 1, # Maximum of liner mapping update iterations 203 | } 204 | 205 | # The frequency grid for the algorithms requiring a grid search 206 | parameters['phi_grid'] = np.linspace(0, 2*np.pi, num=721, dtype=float, endpoint=False) 207 | 208 | # ---------------------------- 209 | # The mighty frequency band selection 210 | 211 | # Old 'magic' bands 212 | #freq_hz = np.array([2300., 2441., 2577., 3182., 3351, 4122.]) 213 | 214 | # Choose the frequency range to use 215 | # These were chosen empirically to give good performance 216 | parameters['freq_range'] = { 217 | 'MUSIC': [2500., 4500.], 218 | 'SRP': [2500., 4500.], 219 | 'CSSM': [2500., 4500.], 220 | 'WAVES': [3000., 4000.], 221 | 'TOPS': [100., 5000.], 222 | 'FRI': [2500., 4500.], 223 | } 224 | 225 | parameters['n_bands'] = { 226 | 'MUSIC' : 20, 227 | 'SRP' : 20, 228 | 'CSSM' : 10, 229 | 'WAVES' : 10, 230 | 'TOPS' : 60, 231 | 'FRI' : 20, 232 | } 233 | 234 | # Band selection 235 | parameters['freq_bins'] = [] 236 | for K in [1,2,3]: 237 | 238 | # the samples are used to select the frequencies 239 | samples = ['experiment/samples/fq_sample{}.wav'.format(i) for i in range(K)] 240 | 241 | parameters['freq_bins'].append({}) 242 | 243 | for algo in algo_names: 244 | # Call the band selection routine 245 | # The routine averages the speech signals 246 | # then splits the range into n_bands equal size bands 247 | # and picks the bin with largest power in each band 248 | freq_hz, freq_bins = select_bands( 249 | samples, 250 | parameters['freq_range'][algo], 251 | parameters['fs'], 252 | parameters['nfft'], 253 | parameters['stft_win'], 254 | parameters['n_bands'][algo], 255 | div = 1) 256 | parameters['freq_bins'][-1][algo] = freq_bins 257 | print K, algo, 'Number of bins', freq_bins.shape[0] 258 | 259 | #----------------------------------------------- 260 | # Get the silence file to use for SNR estimation 261 | fs_silence, rec_silence = wavfile.read(rec_folder + 'silence.wav') 262 | silence = np.array(rec_silence[:,R_flat_I], dtype=np.float32) 263 | for s in silence.T: 264 | s[:] = s - s.mean() 265 | 266 | # This is a scaling factor to apply to raw signals 267 | parameters['scaling'] = np.sqrt(0.1 / np.var(silence)) 268 | silence *= parameters['scaling'] 269 | 270 | # Compute noise variance for later SNR estimation 271 | parameters['noise_var'] = np.var(silence) 272 | 273 | # The folders for the different numbers of speakers 274 | spkr_2_folder = { 1: 'one_speaker/', 2: 'two_speakers/', 3: 'three_speakers/' } 275 | 276 | # collect all filenames 277 | filenames = [] 278 | for K in range(1,4): 279 | fldr = rec_folder + spkr_2_folder[K] 280 | filenames += [fldr + name for name in os.listdir(rec_folder + spkr_2_folder[K])] 281 | 282 | 283 | # There is the option to only run one loop for test 284 | if test_flag: 285 | print 'Running one test loop only.' 286 | filenames = filenames[:1] 287 | 288 | # Main processing loop 289 | if serial_flag: 290 | print 'Running everything in a serial loop.' 291 | # Serial processing 292 | out = [] 293 | for fn in filenames: 294 | out.append(parallel_loop(fn, algo_names, parameters)) 295 | 296 | else: 297 | import ipyparallel as ip 298 | 299 | print 'Using ipyparallel processing.' 300 | 301 | # Start the parallel processing 302 | c = ip.Client() 303 | NC = len(c.ids) 304 | print NC,'workers on the job' 305 | 306 | # replicate some parameters 307 | algo_names_ls = [algo_names]*len(filenames) 308 | params_ls = [parameters]*len(filenames) 309 | 310 | # dispatch to workers 311 | out = c[:].map_sync(parallel_loop, filenames, algo_names_ls, params_ls) 312 | 313 | # Save the result to a file 314 | if data_filename is None: 315 | date = time.strftime("%Y%m%d-%H%M%S") 316 | data_filename = 'data/{}_doa_experiment.npz'.format(date) 317 | 318 | np.savez(data_filename, filenames=filenames, parameters=parameters, algo_names=algo_names, out=out) 319 | 320 | print 'Saved data to file: ' + data_filename 321 | 322 | -------------------------------------------------------------------------------- /figure_doa_experiment_plot.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | 3 | import sys 4 | import numpy as np 5 | import getopt 6 | import pandas as pd 7 | import matplotlib.pyplot as plt 8 | import seaborn as sns 9 | 10 | from tools import polar_distance, polar_error 11 | from experiment import arrays 12 | 13 | if __name__ == "__main__": 14 | # parse arguments 15 | argv = sys.argv[1:] 16 | 17 | # This is the output from `figure_doa_experiment.py` 18 | data_file = 'data/20160909-203344_doa_experiment.npz' 19 | 20 | try: 21 | opts, args = getopt.getopt(argv, "hf:", ["file=",]) 22 | except getopt.GetoptError: 23 | print('test_doa_recorded.py -f ') 24 | sys.exit(2) 25 | for opt, arg in opts: 26 | if opt == '-h': 27 | print('test_doa_recorded.py -a -f -b ') 28 | sys.exit() 29 | elif opt in ("-f", "--file"): 30 | data_file = arg 31 | 32 | # Get the speakers and microphones grounndtruth locations 33 | exp_folder = './recordings/20160908/' 34 | sys.path.append(exp_folder) 35 | from edm_to_positions import twitters 36 | 37 | # Get the microphone array locations 38 | array_str = 'pyramic' 39 | twitters.center(array_str) 40 | R_flat_I = range(8, 16) + range(24, 32) + range(40, 48) 41 | mic_array = arrays['pyramic_tetrahedron'][:, R_flat_I].copy() 42 | mic_array += twitters[[array_str]] 43 | 44 | # set the reference point to center of pyramic array 45 | v = {array_str: np.mean(mic_array, axis=1)} 46 | twitters.correct(v) 47 | 48 | data = np.load(data_file) 49 | 50 | # build some container arrays 51 | algo_names = data['algo_names'].tolist() 52 | 53 | # Now loop and process the results 54 | columns = ['sources','SNR','Algorithm','Error'] 55 | table = [] 56 | close_sources = [] 57 | for pt in data['out']: 58 | 59 | SNR = pt[0] 60 | speakers = [s.replace("'","") for s in pt[1]] 61 | K = len(speakers) 62 | 63 | # Get groundtruth for speaker 64 | phi_gt = np.array([twitters.doa(array_str, s) for s in speakers])[:,0] 65 | 66 | for alg in pt[2].keys(): 67 | phi_recon = pt[2][alg] 68 | recon_err, sort_idx = polar_distance(phi_gt, phi_recon) 69 | table.append([K, SNR, alg, np.degrees(recon_err)]) 70 | 71 | # we single out the reconstruction of the two closely located sources 72 | if '7' in speakers and '16' in speakers: 73 | # by construction '7' is always first and '16' second 74 | success = 0 75 | for p1,p2 in zip(phi_gt[:2], phi_recon[sort_idx[:2,1]]): 76 | if polar_error(p1,p2) < polar_error(phi_gt[0], phi_gt[1]) / 2: 77 | success += 1 78 | close_sources.append([alg, 79 | success, 80 | phi_recon[sort_idx[0,1]], 81 | phi_recon[sort_idx[1,1]], 82 | ]) 83 | 84 | # Create pandas data frame 85 | df = pd.DataFrame(table, columns=columns) 86 | 87 | # Compute statistics for the reconstructed angles 88 | df_close_sources = pd.DataFrame(close_sources, columns=['Algorithm','Success','7','16']) 89 | mu = {'7':{},'16':{}} 90 | std = {'7':{},'16':{}} 91 | for alg in ['FRI','MUSIC','SRP']: 92 | phi_r = df_close_sources[['Algorithm','7','16']][df_close_sources['Algorithm'] == alg] 93 | for spkr in ['7','16']: 94 | mu[spkr][alg] = np.angle(np.mean(np.exp(1j*phi_r[spkr]))) 95 | std[spkr][alg] = np.mean([polar_error(p, mu[spkr][alg]) for p in phi_r[spkr]]) 96 | 97 | for spkr in ['7','16']: 98 | for alg in ['FRI','MUSIC','SRP']: 99 | print spkr,alg,'mu=',np.degrees(mu[spkr][alg]),'std=',np.degrees(std[spkr][alg]) 100 | ''' 101 | for spkr in ['7','16']: 102 | for alg in ['FRI','MUSIC','SRP']: 103 | print np.degrees(mu[spkr][alg]),np.degrees(std[spkr][alg]), 104 | print '' 105 | ''' 106 | 107 | # Create the super plot comparing all algorithms 108 | algo_plot = ['FRI','MUSIC','SRP', 'CSSM', 'TOPS', 'WAVES'] 109 | 110 | sns.set(style='whitegrid',context='paper', font_scale=1.2, 111 | rc={ 112 | 'figure.figsize':(3.5,3.15), 113 | 'lines.linewidth':1.5, 114 | 'font.family': 'sans-serif', 115 | 'font.sans-serif': [u'Helvetica'], 116 | 'text.usetex': False, 117 | }) 118 | #pal = sns.cubehelix_palette(6, start=0.5, rot=-0.75, dark=0.3, light=.8, reverse=True) 119 | #pal = sns.cubehelix_palette(6, start=0.5, rot=-0.5,dark=0.3, light=.85, reverse=True, hue=1.) 120 | pal = sns.cubehelix_palette(6, start=0.5, rot=-0.5,dark=0.3, light=.75, reverse=True, hue=1.) 121 | 122 | plt.figure(figsize=(4.7,3.15), dpi=90) 123 | 124 | sns.boxplot(x="sources", y="Error", hue="Algorithm", 125 | hue_order=algo_plot, data=df, 126 | palette=pal, 127 | fliersize=0.) 128 | 129 | leg = plt.legend(loc='upper left',title='Algorithm', 130 | bbox_to_anchor=[-0.02,1.03], 131 | frameon=False, framealpha=0.1) 132 | leg.get_frame().set_linewidth(0.0) 133 | 134 | #palette="PRGn") 135 | sns.despine(offset=10, trim=True, left=True) 136 | 137 | plt.xlabel("Number of sources") 138 | plt.ylabel("Error $[^\circ]$") 139 | plt.yticks(np.arange(0,80)) 140 | plt.ylim([0.0, 3.3]) 141 | plt.tight_layout(pad=0.1) 142 | 143 | plt.savefig('figures/experiment_error_box.pdf') 144 | plt.savefig('figures/experiment_error_box.png') 145 | 146 | -------------------------------------------------------------------------------- /figure_doa_separation.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | 3 | def parallel_loop(algo_names, pmt, args): 4 | ''' 5 | This is one loop of the computation 6 | extracted for parallelization 7 | ''' 8 | 9 | SNR = args[0] 10 | separation_angle = args[1] 11 | look = args[2] 12 | seed = args[3] 13 | 14 | # We need to do a bunch of imports 15 | import pyroomacoustics as pra 16 | import os 17 | import numpy as np 18 | from scipy.io import wavfile 19 | import mkl as mkl_service 20 | 21 | import doa 22 | from tools import rfft, polar_error, polar_distance, gen_sig_at_mic_stft, gen_diracs_param 23 | 24 | # initialize local RNG seed 25 | np.random.seed(seed) 26 | 27 | # for such parallel processing, it is better 28 | # to deactivate multithreading in mkl 29 | mkl_service.set_num_threads(1) 30 | 31 | # number of sources is always two 32 | K = 2 33 | 34 | # The ground truth angles, same power sources 35 | alpha_gt = np.ones(2, dtype=np.float32) 36 | phi_gt = np.array([look, look + separation_angle], dtype=np.float32) 37 | 38 | # generate complex base-band signal received at microphones 39 | y_mic_stft, y_mic_stft_noiseless = \ 40 | gen_sig_at_mic_stft(phi_gt, alpha_gt, pmt['mic_array'][:2,:], SNR, 41 | pmt['fs'], fft_size=pmt['nfft'], Ns=pmt['num_snapshots']) 42 | 43 | # dict for output 44 | phi = { 'groundtruth': phi_gt, } 45 | 46 | for alg in algo_names: 47 | 48 | # select frequency bins uniformly in the range 49 | freq_hz = np.linspace(pmt['freq_range'][alg][0], pmt['freq_range'][alg][1], pmt['n_bands'][alg]) 50 | freq_bins = np.unique( 51 | np.array([int(np.round(f / pmt['fs'] * pmt['nfft'])) 52 | for f in freq_hz]) 53 | ) 54 | 55 | # Use the convenient dictionary of algorithms defined 56 | d = doa.algos[alg]( 57 | L=pmt['mic_array'], 58 | fs=pmt['fs'], 59 | nfft=pmt['nfft'], 60 | num_src=K, 61 | c=pmt['c'], 62 | theta=pmt['phi_grid'], 63 | max_four=pmt['M'], 64 | num_iter=pmt['num_iter'] 65 | ) 66 | 67 | # perform localization 68 | d.locate_sources(y_mic_stft, freq_bins=freq_bins) 69 | 70 | # store result 71 | phi[alg] = d.phi_recon 72 | 73 | return phi 74 | 75 | 76 | if __name__ == '__main__': 77 | 78 | ''' 79 | This experiment will examine the discrimination power 80 | of different algorithms for sources closely spaced 81 | ''' 82 | 83 | import numpy as np 84 | from scipy.io import wavfile 85 | import os, sys, getopt 86 | import time 87 | import json 88 | 89 | import pyroomacoustics as pra 90 | 91 | import doa 92 | from tools import rfft 93 | from experiment import arrays, calculate_speed_of_sound 94 | 95 | # default values 96 | serial_flag = False 97 | test_flag = False 98 | data_filename = None 99 | 100 | # parse arguments 101 | cmd_name = sys.argv[0] 102 | argv = sys.argv[1:] 103 | 104 | def print_help(cmd): 105 | print('%s [-t -s] -f ' % cmd) 106 | print(' -s, --serial: Use serial computing') 107 | print(' -t, --test: Test mode (run 1 loop)') 108 | print(' -f , --file=: name of output file') 109 | 110 | try: 111 | opts, args = getopt.getopt(argv, "hf:ts", ["file=", "test","plot"]) 112 | except getopt.GetoptError: 113 | print_help(cmd_name) 114 | sys.exit(2) 115 | for opt, arg in opts: 116 | if opt == '-h': 117 | print_help(cmd_name) 118 | sys.exit() 119 | elif opt in ("-f", "--file"): 120 | data_filename = arg 121 | elif opt in ("-t", "--test"): 122 | test_flag = True 123 | elif opt in ("-s", "--serial"): 124 | serial_flag = True 125 | 126 | # parse arguments 127 | algo_names = ['SRP', 'MUSIC', 'CSSM', 'WAVES', 'TOPS', 'FRI'] 128 | SNRs = [0] 129 | separation_angle = np.pi / 2.**np.arange(6.,0.5,-0.5) 130 | look_direction = np.linspace(0, 2.*np.pi/3., 120., endpoint=False) # 120 because of array sym 131 | loops = 10 132 | 133 | # We use the same array geometry as in the experiment 134 | array_str = 'pyramic' 135 | #array_str = 'compactsix' 136 | 137 | if array_str == 'pyramic': 138 | 139 | # subselect the flat part of the array 140 | R_flat_I = range(8, 16) + range(24, 32) + range(40, 48) 141 | 142 | # get array geometry 143 | mic_array = arrays['pyramic_tetrahedron'][:, R_flat_I].copy() 144 | 145 | elif array_str == 'compactsix': 146 | 147 | R_flat_I = range(6) 148 | mic_array = arrays['compactsix_circular_1'][:,R_flat_I].copy() 149 | 150 | # algorithm parameters 151 | parameters = { 152 | 'mic_array' : mic_array, # The array geometry 153 | 'mic_select': R_flat_I, # A subselection of microphones 154 | 'fs' : 16000, # the sampling frequency 155 | 'nfft': 256, # The FFT size 156 | 'stft_hop': 256, # the number of samples between two stft frames 157 | 'stft_win': True, # Use a hanning window for the STFT 158 | 'num_snapshots': 256, # The number of snapshots to compute covariance matrix 159 | 'c': 343., # The speed of sound 160 | 'M' : 24, # Maximum Fourier coefficient index (-M to M), K_est <= M <= num_mic*(num_mic - 1) / 2 161 | 'num_iter' : 10, # Maximum number of iterations for algorithms that require them 162 | 'stop_cri' : 'max_iter', # stropping criterion for FRI ('mse' or 'max_iter') 163 | 'seed': 12345, 164 | } 165 | 166 | # Choose the frequency range to use 167 | # These were chosen empirically to give good performance 168 | parameters['freq_range'] = { 169 | 'MUSIC': [2500., 4500.], 170 | 'SRP': [2500., 4500.], 171 | 'CSSM': [2500., 4500.], 172 | 'WAVES': [3000., 4000.], 173 | 'TOPS': [100., 5000.], 174 | 'FRI': [2500., 4500.], 175 | } 176 | 177 | parameters['n_bands'] = { 178 | 'MUSIC' : 20, 179 | 'SRP' : 20, 180 | 'CSSM' : 10, 181 | 'WAVES' : 10, 182 | 'TOPS' : 60, 183 | 'FRI' : 20, 184 | } 185 | 186 | 187 | # The frequency grid for the algorithms requiring a grid search 188 | parameters['phi_grid'] = np.linspace(0, 2*np.pi, num=721, dtype=float, endpoint=False) 189 | 190 | # seed the original RNG 191 | np.random.seed(parameters['seed']) 192 | 193 | # build the combinatorial argument list 194 | args = [] 195 | for SNR in SNRs: 196 | for phi in separation_angle: 197 | for look in look_direction: 198 | for epoch in range(loops): 199 | seed = np.random.randint(4294967295, dtype=np.uint32) 200 | args.append( (SNR, phi, look, seed) ) 201 | 202 | # There is the option to only run one loop for test 203 | if test_flag: 204 | print 'Running one test loop only.' 205 | args = args[:1] 206 | 207 | # Main processing loop 208 | if serial_flag: 209 | print 'Running everything in a serial loop.' 210 | # Serial processing 211 | out = [] 212 | for ag in args: 213 | out.append(parallel_loop(algo_names, parameters, ag)) 214 | 215 | else: 216 | import ipyparallel as ip 217 | 218 | print 'Using ipyparallel processing.' 219 | 220 | # Start the parallel processing 221 | c = ip.Client() 222 | NC = len(c.ids) 223 | print NC,'workers on the job' 224 | 225 | # replicate some parameters 226 | algo_names_ls = [algo_names]*len(args) 227 | params_ls = [parameters]*len(args) 228 | 229 | # dispatch to workers 230 | out = c[:].map_sync(parallel_loop, algo_names_ls, params_ls, args) 231 | 232 | # Save the result to a file 233 | if data_filename is None: 234 | date = time.strftime("%Y%m%d-%H%M%S") 235 | data_filename = 'data/{}_doa_separation.npz'.format(date) 236 | 237 | np.savez(data_filename, args=args, parameters=parameters, algo_names=algo_names, out=out) 238 | 239 | print 'Saved data to file: ' + data_filename 240 | -------------------------------------------------------------------------------- /figure_doa_separation_plot.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | 3 | import sys, getopt, copy, os 4 | import numpy as np 5 | import pandas as pd 6 | 7 | import matplotlib.pyplot as plt 8 | 9 | import seaborn as sns 10 | 11 | from tools import polar_error, polar_distance 12 | 13 | from experiment import arrays 14 | 15 | if __name__ == "__main__": 16 | 17 | # parse arguments 18 | argv = sys.argv[1:] 19 | 20 | data_file = 'data/20160910-192848_doa_separation.npz' 21 | 22 | try: 23 | opts, args = getopt.getopt(argv, "hf:", ["file=",]) 24 | except getopt.GetoptError: 25 | print('figure_doa_separation_plot.py -f ') 26 | sys.exit(2) 27 | for opt, arg in opts: 28 | if opt == '-h': 29 | print('figure_doa_separation_plot.py -f ') 30 | sys.exit() 31 | elif opt in ("-f", "--file"): 32 | data_file = arg 33 | 34 | # algorithms to take in the plot 35 | algos = ['FRI','MUSIC','SRP','CSSM','WAVES','TOPS'] 36 | algo_lut = { 37 | 'FRI': 'FRIDA', 'MUSIC': 'MUSIC', 'SRP': 'SRP-PHAT', 38 | 'CSSM':'CSSM', 'WAVES':'WAVES','TOPS':'TOPS' 39 | } 40 | 41 | # check if a pickle file exists for these files 42 | pickle_file = os.path.splitext(data_file)[0] + '.pickle' 43 | 44 | if os.path.isfile(pickle_file): 45 | print 'Reading existing pickle file...' 46 | # read the pickle file 47 | df = pd.read_pickle(pickle_file) 48 | 49 | else: 50 | 51 | # This is the output from `figure_doa_experiment.py` 52 | data = np.load(data_file) 53 | 54 | # extra variables 55 | algo_names = data['algo_names'].tolist() 56 | parameters = data['parameters'][()] 57 | args = data['args'].tolist() 58 | sim_out = data['out'] 59 | 60 | 61 | # find min angle of separation 62 | angles = set() 63 | for a in args: 64 | angles.add(a[1]) 65 | phi_min = min(angles) 66 | phi_max = max(angles) 67 | 68 | # build the data table line by line 69 | print 'Building table' 70 | columns = ['SNR','Algorithm','angle','err1','err2','erravg','success'] 71 | table = [] 72 | for i,a in enumerate(args): 73 | for alg in algos: 74 | 75 | snr = a[0] 76 | phi = a[1] 77 | look = a[2] 78 | phi_gt = sim_out[i]['groundtruth'] 79 | phi_recon = sim_out[i][alg] 80 | 81 | # sort the angles 82 | recon_err, sort_idx = polar_distance(phi_gt, phi_recon) 83 | 84 | thresh = phi / 2. 85 | 86 | if len(phi_recon) == 2: 87 | 88 | phi_gt = phi_gt[sort_idx[:,0]] 89 | phi_recon = phi_recon[sort_idx[:,1]] 90 | 91 | # compute individual and average error 92 | err = [polar_error(phi_gt[j],phi_recon[j]) for j in range(2)] 93 | err_avg = np.mean(err) 94 | 95 | # number of sources resolved 96 | success = 0 97 | for p1,p2 in zip(phi_gt, phi_recon): 98 | if polar_error(p1,p2) < thresh: 99 | success += 1 100 | 101 | elif len(phi_recon) == 1: 102 | 103 | phi_gt = phi_gt[sort_idx[0]] 104 | phi_recon = phi_recon 105 | err = [np.nan, np.nan] 106 | err[sort_idx[0]] = polar_error(phi_gt, phi_recon) 107 | 108 | err_avg = err[sort_idx[1]] 109 | 110 | if err < phi/2: 111 | success = 1 112 | else: 113 | success = 0 114 | 115 | entry = [] 116 | entry.append(snr) 117 | entry.append(algo_lut[alg]) 118 | entry.append(int(np.round(np.degrees(phi), decimals=0))) 119 | entry.append(np.degrees(err[0])) 120 | entry.append(np.degrees(err[1])) 121 | entry.append(np.degrees(err_avg)) 122 | entry.append(success) 123 | 124 | table.append(entry) 125 | 126 | # create a pandas frame 127 | print 'Creating dataframe' 128 | df = pd.DataFrame(table, columns=columns) 129 | 130 | # save for later re-plotting 131 | df.to_pickle(pickle_file) 132 | 133 | print 'Plot...' 134 | 135 | sns.set(style='whitegrid', context='paper', font_scale=1.2, 136 | rc={ 137 | 'figure.figsize':(3.5,3.15), 138 | 'lines.linewidth':1.5, 139 | 'font.family': 'sans-serif', 140 | 'font.sans-serif': [u'Helvetica'], 141 | 'text.usetex': False, 142 | }) 143 | #pal = sns.cubehelix_palette(6, start=0.5, rot=-0.75, dark=0.25, light=.75, reverse=True) 144 | pal = sns.cubehelix_palette(6, start=0.5, rot=-0.5,dark=0.3, light=.75, reverse=True, hue=1.) 145 | 146 | plt.figure() 147 | 148 | sns.pointplot(x='angle',y='success',hue='Algorithm', 149 | data=df[['angle','success','Algorithm']], 150 | hue_order=['FRIDA','MUSIC','SRP-PHAT','CSSM','TOPS','WAVES'], 151 | palette=pal, 152 | markers=['^','o','x','s','d','v'], 153 | ci=None) 154 | 155 | ax = plt.gca() 156 | ax.text(-2.65, 1.965, 'B', fontsize=27, fontweight='bold') 157 | 158 | leg = plt.legend(loc='lower right',title='Algorithm', 159 | bbox_to_anchor=[1.05,0.0], 160 | frameon=False, framealpha=0.4) 161 | leg.get_frame().set_linewidth(0.0) 162 | 163 | plt.xlabel('Separation angle [$^\circ$]') 164 | plt.ylabel('# sources resolved') 165 | 166 | plt.ylim([0.45,2.1]) 167 | plt.yticks(np.arange(0.5,2.5,0.5)) 168 | 169 | sns.despine(offset=10, trim=False, left=True, bottom=True) 170 | 171 | plt.tight_layout(pad=0.5) 172 | 173 | plt.savefig('figures/experiment_minimum_separation.pdf') 174 | plt.savefig('figures/experiment_minimum_separation.png') 175 | 176 | -------------------------------------------------------------------------------- /figure_doa_synthetic.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | 3 | def parallel_loop(algo_names, pmt, args): 4 | ''' 5 | This is one loop of the computation 6 | extracted for parallelization 7 | ''' 8 | 9 | number_sources = args[0] 10 | SNR = args[1] 11 | seed = args[2] 12 | 13 | # We need to do a bunch of imports 14 | import pyroomacoustics as pra 15 | import os 16 | import numpy as np 17 | from scipy.io import wavfile 18 | import mkl as mkl_service 19 | 20 | import doa 21 | from tools import rfft, polar_error, polar_distance, gen_sig_at_mic_stft, gen_diracs_param 22 | 23 | # initialize local RNG seed 24 | np.random.seed(seed) 25 | 26 | # for such parallel processing, it is better 27 | # to deactivate multithreading in mkl 28 | mkl_service.set_num_threads(1) 29 | 30 | # number of sources 31 | K = number_sources 32 | 33 | # Generate "groundtruth" Diracs at random 34 | alpha_gt, phi_gt, time_stamp = gen_diracs_param( 35 | K, positive_amp=True, log_normal_amp=False, 36 | semicircle=False, save_param=False 37 | ) 38 | 39 | # generate complex base-band signal received at microphones 40 | y_mic_stft, y_mic_stft_noiseless = \ 41 | gen_sig_at_mic_stft(phi_gt, alpha_gt, pmt['mic_array'][:2,:], SNR, 42 | pmt['fs'], fft_size=pmt['nfft'], Ns=pmt['num_snapshots']) 43 | 44 | # dict for output 45 | phi = { 'groundtruth': phi_gt, } 46 | alpha = { 'groundtruth': alpha_gt, } 47 | 48 | for alg in algo_names: 49 | 50 | # select frequency bins uniformly in the range 51 | freq_hz = np.linspace(pmt['freq_range'][alg][0], pmt['freq_range'][alg][1], pmt['n_bands'][alg]) 52 | freq_bins = np.unique( 53 | np.array([int(np.round(f / pmt['fs'] * pmt['nfft'])) 54 | for f in freq_hz]) 55 | ) 56 | 57 | # Use the convenient dictionary of algorithms defined 58 | d = doa.algos[alg]( 59 | L=pmt['mic_array'], 60 | fs=pmt['fs'], 61 | nfft=pmt['nfft'], 62 | num_src=K, 63 | c=pmt['c'], 64 | theta=pmt['phi_grid'], 65 | max_four=pmt['M'], 66 | num_iter=pmt['num_iter'] 67 | ) 68 | 69 | # perform localization 70 | d.locate_sources(y_mic_stft, freq_bins=freq_bins) 71 | 72 | # store result 73 | phi[alg] = d.phi_recon 74 | 75 | if alg == 'FRI': 76 | alpha[alg] = d.alpha_recon 77 | 78 | return phi, alpha, len(freq_bins) 79 | 80 | 81 | if __name__ == '__main__': 82 | 83 | import numpy as np 84 | from scipy.io import wavfile 85 | import os, sys, getopt 86 | import time 87 | import json 88 | 89 | import pyroomacoustics as pra 90 | 91 | import doa 92 | from tools import rfft 93 | from experiment import arrays, calculate_speed_of_sound 94 | 95 | # default values 96 | serial_flag = False 97 | test_flag = False 98 | data_filename = None 99 | 100 | # parse arguments 101 | cmd_name = sys.argv[0] 102 | argv = sys.argv[1:] 103 | 104 | def print_help(cmd): 105 | print('%s [-t -s] -f ' % cmd) 106 | print(' -s, --serial: Use serial computing') 107 | print(' -t, --test: Test mode (run 1 loop)') 108 | print(' -f , --file=: name of output file') 109 | 110 | try: 111 | opts, args = getopt.getopt(argv, "hf:ts", ["file=", "test","plot"]) 112 | except getopt.GetoptError: 113 | print_help(cmd_name) 114 | sys.exit(2) 115 | for opt, arg in opts: 116 | if opt == '-h': 117 | print_help(cmd_name) 118 | sys.exit() 119 | elif opt in ("-f", "--file"): 120 | data_filename = arg 121 | elif opt in ("-t", "--test"): 122 | test_flag = True 123 | elif opt in ("-s", "--serial"): 124 | serial_flag = True 125 | 126 | # parse arguments 127 | algo_names = ['SRP', 'MUSIC', 'CSSM', 'WAVES', 'TOPS', 'FRI'] 128 | num_sources = range(1,1+1) 129 | SNRs = [-35, -30, -25, -24, -23, -22, -21, -20, 130 | -19, -18, -17, -16, -15, -10, -5, 131 | 0, 5, 10, 15, 20] 132 | loops = 500 133 | 134 | # We use the same array geometry as in the experiment 135 | array_str = 'pyramic' 136 | #array_str = 'compactsix' 137 | 138 | if array_str == 'pyramic': 139 | 140 | # subselect the flat part of the array 141 | R_flat_I = range(8, 16) + range(24, 32) + range(40, 48) 142 | 143 | # get array geometry 144 | mic_array = arrays['pyramic_tetrahedron'][:, R_flat_I].copy() 145 | 146 | elif array_str == 'compactsix': 147 | 148 | R_flat_I = range(6) 149 | mic_array = arrays['compactsix_circular_1'][:,R_flat_I].copy() 150 | 151 | # algorithm parameters 152 | parameters = { 153 | 'mic_array' : mic_array, # The array geometry 154 | 'mic_select': R_flat_I, # A subselection of microphones 155 | 'fs' : 16000, # the sampling frequency 156 | 'nfft': 256, # The FFT size 157 | 'stft_hop': 256, # the number of samples between two stft frames 158 | 'stft_win': True, # Use a hanning window for the STFT 159 | 'num_snapshots': 256, # The number of snapshots to compute covariance matrix 160 | 'c': 343., # The speed of sound 161 | 'M' : 24, # Maximum Fourier coefficient index (-M to M), K_est <= M <= num_mic*(num_mic - 1) / 2 162 | 'num_iter' : 10, # Maximum number of iterations for algorithms that require them 163 | 'stop_cri' : 'max_iter', # stropping criterion for FRI ('mse' or 'max_iter') 164 | 'seed': 54321, 165 | } 166 | 167 | # Choose the frequency range to use 168 | # These were chosen empirically to give good performance 169 | parameters['freq_range'] = { 170 | 'MUSIC': [2500., 4500.], 171 | 'SRP': [2500., 4500.], 172 | 'CSSM': [2500., 4500.], 173 | 'WAVES': [3000., 4000.], 174 | 'TOPS': [100., 5000.], 175 | 'FRI': [2500., 4500.], 176 | } 177 | 178 | parameters['n_bands'] = { 179 | 'MUSIC' : 20, 180 | 'SRP' : 20, 181 | 'CSSM' : 10, 182 | 'WAVES' : 10, 183 | 'TOPS' : 60, 184 | 'FRI' : 20, 185 | } 186 | 187 | # The frequency grid for the algorithms requiring a grid search 188 | parameters['phi_grid'] = np.linspace(0, 2*np.pi, num=721, dtype=float, endpoint=False) 189 | 190 | # seed the original RNG 191 | np.random.seed(parameters['seed']) 192 | 193 | # build the combinatorial argument list 194 | args = [] 195 | for K in num_sources: 196 | for SNR in SNRs: 197 | for epoch in range(loops): 198 | seed = np.random.randint(4294967295, dtype=np.uint32) 199 | args.append((K, SNR, seed)) 200 | 201 | # There is the option to only run one loop for test 202 | if test_flag: 203 | print 'Running one test loop only.' 204 | args = args[:1] 205 | 206 | # Main processing loop 207 | if serial_flag: 208 | print 'Running everything in a serial loop.' 209 | # Serial processing 210 | out = [] 211 | for ag in args: 212 | out.append(parallel_loop(algo_names, parameters, ag)) 213 | 214 | else: 215 | import ipyparallel as ip 216 | 217 | print 'Using ipyparallel processing.' 218 | 219 | # Start the parallel processing 220 | c = ip.Client() 221 | NC = len(c.ids) 222 | print NC,'workers on the job' 223 | 224 | # replicate some parameters 225 | algo_names_ls = [algo_names]*len(args) 226 | params_ls = [parameters]*len(args) 227 | 228 | # evaluate the runtime 229 | then = time.time() 230 | out1 = c[:].map_sync(parallel_loop, algo_names_ls[:NC], params_ls[:NC], args[:NC]) 231 | now = time.time() 232 | one_loop = now - then 233 | print 'Total estimated processing time:', len(args)*one_loop / len(c[:]) 234 | 235 | # dispatch to workers 236 | out = c[:].map_sync(parallel_loop, algo_names_ls[NC:], params_ls[NC:], args[NC:]) 237 | 238 | out = out1 + out 239 | 240 | # Save the result to a file 241 | if data_filename is None: 242 | date = time.strftime("%Y%m%d-%H%M%S") 243 | data_filename = 'data/{}_doa_synthetic.npz'.format(date) 244 | 245 | np.savez(data_filename, args=args, parameters=parameters, algo_names=algo_names, out=out) 246 | 247 | print 'Saved data to file: ' + data_filename 248 | -------------------------------------------------------------------------------- /figure_doa_synthetic_plot.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | 3 | import sys 4 | import copy 5 | import numpy as np 6 | import pandas as pd 7 | import getopt 8 | import os 9 | 10 | from tools import polar_distance 11 | 12 | import matplotlib.pyplot as plt 13 | import seaborn as sns 14 | 15 | if __name__ == "__main__": 16 | 17 | argv = sys.argv[1:] 18 | data_files = '20160911-035215_doa_synthetic.npz' 19 | data_files = [ 20 | 'data/20160911-161112_doa_synthetic.npz', 21 | 'data/20160911-225325_doa_synthetic.npz', 22 | 'data/20160911-175127_doa_synthetic.npz', 23 | 'data/20160911-035215_doa_synthetic.npz', 24 | 'data/20160911-192530_doa_synthetic.npz', 25 | ] 26 | 27 | try: 28 | opts, args = getopt.getopt(argv, "hf:", ["file=",]) 29 | except getopt.GetoptError: 30 | print('figure_doa_separation_plot.py -f ') 31 | sys.exit(2) 32 | for opt, arg in opts: 33 | if opt == '-h': 34 | print('figure_doa_separation_plot.py -f ') 35 | sys.exit() 36 | elif opt in ("-f", "--file"): 37 | data_files = arg.split(',') 38 | 39 | # algorithms to take in the plot 40 | algo_names = ['FRI','MUSIC','SRP','CSSM','TOPS','WAVES'] 41 | algo_lut = { 42 | 'FRI': 'FRIDA', 'MUSIC': 'MUSIC', 'SRP': 'SRP-PHAT', 43 | 'CSSM':'CSSM', 'WAVES':'WAVES','TOPS':'TOPS' 44 | } 45 | 46 | # check if a pickle file exists for these files 47 | pickle_file = os.path.splitext(data_files[0])[0] + '_{}'.format(len(data_files)) + '.pickle' 48 | 49 | if os.path.isfile(pickle_file): 50 | # read the pickle file 51 | perf = pd.read_pickle(pickle_file) 52 | 53 | else: 54 | # build the data table line by line 55 | print 'Building table...' 56 | err_header = ['SNR','Algorithm','Error','Loop index'] 57 | table = [] 58 | 59 | # For easy plotting in seaborn, seems we need a loop count 60 | loop_index = {} 61 | Sources = [1,2,3] 62 | SNRs = np.arange(-35,21) 63 | for s in SNRs: 64 | loop_index[s] = {} 65 | for src in Sources: 66 | loop_index[s][src] = {} 67 | for alg in algo_names: 68 | loop_index[s][src][alg] = 0 69 | 70 | #if os. 71 | 72 | # This is the output from `figure_doa_experiment.py` 73 | for data_file in data_files: 74 | data = np.load(data_file) 75 | 76 | # extra variables 77 | algo_names = data['algo_names'].tolist() 78 | parameters = data['parameters'] 79 | args = data['args'].tolist() 80 | sim_out = data['out'] 81 | 82 | for i,a in enumerate(args): 83 | K = int(a[0]) 84 | 85 | # only retain values for 1 source 86 | if K != 1: 87 | continue 88 | 89 | snr = int(a[1]) 90 | phi_gt = sim_out[i][0]['groundtruth'] 91 | for alg in algo_names: 92 | 93 | 94 | recon_err, sort_idx = polar_distance(phi_gt, sim_out[i][0][alg]) 95 | 96 | entry = [snr] 97 | entry.append(algo_lut[alg]) 98 | entry.append(np.degrees(recon_err)) 99 | entry.append(loop_index[snr][K][alg]) 100 | table.append(entry) 101 | 102 | loop_index[snr][K][alg] += 1 103 | 104 | # create a pandas frame 105 | print 'Making PANDAS frame...' 106 | df = pd.DataFrame(table, columns=err_header) 107 | 108 | # turns out all we need is the follow pivoted table 109 | perf = pd.pivot_table(df, values='Error', index=['SNR'], columns=['Algorithm'], aggfunc=np.mean) 110 | 111 | perf.to_pickle(pickle_file) 112 | 113 | sns.set(style='whitegrid') 114 | sns.plotting_context(context='poster', font_scale=2.) 115 | pal = sns.cubehelix_palette(8, start=0.5, rot=-.75) 116 | 117 | # Draw the figure 118 | print 'Plotting...' 119 | 120 | sns.set(style='whitegrid', context='paper', font_scale=1.2, 121 | rc={ 122 | 'figure.figsize':(3.5,3.15), 123 | 'lines.linewidth':2., 124 | 'font.family': 'sans-serif', 125 | 'font.sans-serif': [u'Helvetica'], 126 | 'text.usetex': False, 127 | }) 128 | #pal = sns.cubehelix_palette(6, start=0.5, rot=-0.75, dark=0.25, light=.75, reverse=True, hue=0.9) 129 | pal = sns.cubehelix_palette(6, start=0.5, rot=-0.5,dark=0.3, light=.75, reverse=True, hue=1.) 130 | sns.set_palette(pal) 131 | #sns.set_palette('viridis') 132 | 133 | plt.figure() 134 | 135 | algo_order = ['FRIDA','MUSIC','SRP-PHAT','CSSM','TOPS','WAVES'] 136 | markers=['^','o','*','s','d','v'] 137 | 138 | for alg,mkr in zip(algo_order, markers): 139 | plt.plot(perf.index, perf[alg], marker=mkr, clip_on=False) 140 | 141 | ax = plt.gca() 142 | 143 | # remove the x-grid 144 | ax.xaxis.grid(False) 145 | 146 | ax.text(-45,87.5, 'A', fontsize=27, fontweight='bold') 147 | 148 | # nice legend box 149 | leg = plt.legend(algo_order, title='Algorithm', frameon=True, framealpha=0.6) 150 | leg.get_frame().set_linewidth(0.0) 151 | 152 | # set all the labels 153 | plt.xlabel('SNR [dB]') 154 | plt.ylabel('Average Error [$^\circ$]') 155 | plt.xlim([-35,15]) 156 | plt.ylim([-0.5, 95]) 157 | plt.xticks([-30, -20, -10, 0, 10]) 158 | plt.yticks([0, 20, 40, 60, 80]) 159 | 160 | sns.despine(offset=10, trim=False, left=True, bottom=True) 161 | 162 | plt.tight_layout(pad=0.5) 163 | 164 | plt.savefig('figures/experiment_snr_synthetic.pdf') 165 | plt.savefig('figures/experiment_snr_synthetic.png') 166 | 167 | -------------------------------------------------------------------------------- /figures/README.md: -------------------------------------------------------------------------------- 1 | FRIDA: Figures 2 | ============== 3 | 4 | This folder contains all the figures generated by the different scripts. 5 | 6 | # Fig. 1A: Average error for different SNR 7 | experiment_snr_synthetic.pdf 8 | 9 | # Fig. 1B: Minimum distance between sources allowing reconstruction 10 | experiment_minimum_separation.pdf 11 | 12 | # Fig. 2C: Average error on the recorded speech samples 13 | experiment_error_box.pdf 14 | 15 | # Fig. 2D: The experiment with 10 loudspeakers and 9 microphones 16 | experiment_9_mics_10_src.pdf 17 | -------------------------------------------------------------------------------- /figures/experiment_9_mics_10_src.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LCAV/FRIDA/ff5d51e498805b862c342dd216ccfffb22444b7f/figures/experiment_9_mics_10_src.pdf -------------------------------------------------------------------------------- /figures/experiment_9_mics_10_src.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LCAV/FRIDA/ff5d51e498805b862c342dd216ccfffb22444b7f/figures/experiment_9_mics_10_src.png -------------------------------------------------------------------------------- /figures/experiment_error_box.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LCAV/FRIDA/ff5d51e498805b862c342dd216ccfffb22444b7f/figures/experiment_error_box.pdf -------------------------------------------------------------------------------- /figures/experiment_error_box.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LCAV/FRIDA/ff5d51e498805b862c342dd216ccfffb22444b7f/figures/experiment_error_box.png -------------------------------------------------------------------------------- /figures/experiment_minimum_separation.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LCAV/FRIDA/ff5d51e498805b862c342dd216ccfffb22444b7f/figures/experiment_minimum_separation.pdf -------------------------------------------------------------------------------- /figures/experiment_minimum_separation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LCAV/FRIDA/ff5d51e498805b862c342dd216ccfffb22444b7f/figures/experiment_minimum_separation.png -------------------------------------------------------------------------------- /figures/experiment_snr_synthetic.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LCAV/FRIDA/ff5d51e498805b862c342dd216ccfffb22444b7f/figures/experiment_snr_synthetic.pdf -------------------------------------------------------------------------------- /figures/experiment_snr_synthetic.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LCAV/FRIDA/ff5d51e498805b862c342dd216ccfffb22444b7f/figures/experiment_snr_synthetic.png -------------------------------------------------------------------------------- /make_all_figures.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # This will run all the scripts to reproduce the figures of the paper 4 | # A fast Hadamard transform for signals sparse in the tranform domain 5 | # by Robin Scheibler, Saeid Haghighatshoar, and Martin Vetterli 6 | # 7 | # This bash file was written by Robin Scheibler, October 2016 8 | # License: MIT 9 | 10 | # Config 11 | ######## 12 | 13 | # Enable test mode. This will run a single loop of each 14 | # script. This can be used to test if the configuration 15 | # is correct 16 | ENABLE_TEST=0 17 | 18 | # Enable serial mode. This deactivate the use of parallel 19 | # workers. The code runs in a straight loop. 20 | ENABLE_SERIAL=0 21 | 22 | # This sets the number of nodes in the ipyparallel cluster 23 | # If no cluster is used, this can be set to zero to run 24 | # in serial mode (super slow though...) 25 | # This number can be set to the number of available threads of 26 | # your CPU minus 1. Usually, the number of threads is twice 27 | # the number of cores. 28 | PARALLEL_WORKERS=0 29 | 30 | # Show help function 31 | #################### 32 | function show_help { 33 | echo "$1 [OPTS]" 34 | echo "Options:" 35 | echo " -t Runs a single loop only for test purpose" 36 | echo " -s Runs all the code in a simple for loop. No parallelism" 37 | echo " -n x Runs the loops in parallel using x workers. This option is ignored if -s is used" 38 | } 39 | 40 | # Process arguments 41 | ################### 42 | 43 | # A POSIX variable 44 | OPTIND=1 # Reset in case getopts has been used previously in the shell. 45 | 46 | while getopts "h?tsn:" opt; do 47 | case "$opt" in 48 | h|\?) 49 | show_help $0 50 | exit 0 51 | ;; 52 | t) ENABLE_TEST=1 53 | ;; 54 | s) ENABLE_SERIAL=1 55 | ;; 56 | n) PARALLEL_WORKERS=$OPTARG 57 | ;; 58 | esac 59 | done 60 | 61 | shift $((OPTIND-1)) 62 | 63 | [ "$1" = "--" ] && shift 64 | 65 | # Process SERIAL flag 66 | if [ $ENABLE_SERIAL -eq 1 ]; then 67 | PARALLEL_WORKERS=0 68 | fi 69 | 70 | # Check that all necessary packages are installed 71 | ################## 72 | 73 | python check_requirements.py 74 | if [ $? -ne 0 ]; then 75 | echo "Some dependency is not met. Please check you have the packages listed in requirements.txt installed." 76 | echo "This can be done by running: python ./check_requirements.py" 77 | exit 1 78 | fi 79 | 80 | # Run all the scripts 81 | ##################### 82 | 83 | # Prepare parallel processing 84 | if [ $PARALLEL_WORKERS -gt 0 ]; then 85 | echo "" 86 | echo "Starting ${PARALLEL_WORKERS} ipyparallel workers." 87 | echo "" 88 | ipcluster start -n ${PARALLEL_WORKERS} --daemonize 89 | echo "" 90 | echo "Wait for 30 seconds to give time to engines to start..." 91 | echo "" 92 | sleep 30 93 | SERIAL_FLAG= 94 | else 95 | SERIAL_FLAG=-s 96 | echo "" 97 | echo "Running the scripts in serial mode (no parallelism)" 98 | echo "" 99 | fi 100 | 101 | # Process test flag 102 | if [ $ENABLE_TEST -eq 1 ]; then 103 | TEST_FLAG=-t 104 | echo "Running the script in testing mode" 105 | else 106 | TEST_FLAG= 107 | fi 108 | 109 | if [ $ENABLE_TEST -ne 1 ] && [ $PARALLEL_WORKERS -eq 0 ]; then 110 | echo "#### You are about to run a very long simulation without parallel processing ####" 111 | echo "" 112 | echo " You might want to take a look at the option -t for a quick test, or -n x to" 113 | echo " use parallel processing (requires ipyparallel module)." 114 | echo "" 115 | read -n 1 -p "Press any key to go ahead." 116 | echo "" 117 | fi 118 | 119 | # Make some folders 120 | mkdir -p figures 121 | mkdir -p data 122 | 123 | FLAGS="${SERIAL_FLAG} ${TEST_FLAG}" 124 | echo "Running with flags ${FLAGS}" 125 | 126 | # Run all the scripts and get the output data file name 127 | echo 'Processing experiment data...' 128 | FILE1=`python figure_doa_experiment.py ${FLAGS} | grep 'Saved data to file:' | awk '{ print $5 }'` 129 | echo 'Running Monte-Carlo SNR simulation...' 130 | FILE2=`python figure_doa_synthetic.py ${FLAGS} | grep 'Saved data to file:' | awk '{ print $5 }'` 131 | echo 'Running Monte-Carlos source resolution simulation...' 132 | FILE3=`python figure_doa_separation.py ${FLAGS} | grep 'Saved data to file:' | awk '{ print $5 }'` 133 | echo 'Processing experiment with more loudspeakers than microphones...' 134 | FILE4=`python figure_doa_9_mics_10_src.py | grep 'Saved data to file:' | awk '{ print $5 }'` 135 | 136 | echo "All processing done! The data was saved in files:" 137 | echo " ${FILE1}" 138 | echo " ${FILE2}" 139 | echo " ${FILE3}" 140 | echo " ${FILE4}" 141 | 142 | # Now produce all the plots 143 | echo 'Creating all figures...' 144 | python figure_doa_experiment_plot.py -f $FILE1 145 | python figure_doa_synthetic_plot.py -f $FILE2 146 | python figure_doa_separation_plot.py -f $FILE3 147 | python figure_doa_9_mics_10_src_plot.py -f $FILE4 148 | 149 | if [ $PARALLEL_WORKERS -gt 0 ]; then 150 | echo 'Stopping parallel processing now.' 151 | ipcluster stop 152 | fi 153 | 154 | echo 'All done. See you soon...' 155 | 156 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | pyroomacoustics 2 | numpy 3 | scipy 4 | pandas 5 | ipyparallel==5.0.1 6 | matplotlib 7 | seaborn 8 | zmq 9 | joblib 10 | -------------------------------------------------------------------------------- /system_install.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # 2016, Robin Scheibler 4 | # This script was used to setup the environment for FRIDA computations 5 | # on a Ubuntu Xenial machine 6 | 7 | # Install anaconda locally 8 | wget https://repo.continuum.io/archive/Anaconda2-4.1.1-Linux-x86_64.sh 9 | bash Anaconda2-4.1.1-Linux-x86_64.sh -b -p /opt/anaconda2 -f 10 | echo 'export PATH=/opt/anaconda2/bin:$PATH' >> .bashrc 11 | . .bashrc 12 | 13 | # update conda 14 | conda update -y conda 15 | 16 | conda upgrade -y numpy scipy pandas 17 | conda install -y ipython ipyparallel 18 | conda install -y mkl accelerate iopro 19 | 20 | apt-get install libsndfile1 libsndfile1-dev libsamplerate0 libsamplerate0-dev git tmux 21 | pip install scikits.samplerate 22 | pip install scikits.audiolab 23 | pip install seaborn 24 | pip install zmq 25 | pip install joblib 26 | 27 | pip install git+https://github.com/LCAV/pyroomacoustics 28 | -------------------------------------------------------------------------------- /test_doa_recorded.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | from scipy.io import wavfile 3 | import os, sys, getopt 4 | import json 5 | 6 | import pyroomacoustics as pra 7 | 8 | import doa 9 | from tools import * 10 | from experiment import arrays, calculate_speed_of_sound, select_bands 11 | 12 | if __name__ == '__main__': 13 | 14 | # parse arguments 15 | argv = sys.argv[1:] 16 | algo = None 17 | rec_file = None 18 | n_bands = None 19 | try: 20 | opts, args = getopt.getopt(argv, "ha:f:b:", ["algo=", "file=", "n_bands"]) 21 | except getopt.GetoptError: 22 | print('test_doa_recorded.py -a -f -b ') 23 | sys.exit(2) 24 | for opt, arg in opts: 25 | if opt == '-h': 26 | print('test_doa_recorded.py -a -f -b ') 27 | sys.exit() 28 | elif opt in ("-a", "--algo"): 29 | algo = int(arg) 30 | elif opt in ("-f", "--file"): 31 | rec_file = arg 32 | elif opt in ("-b", "--n_bands"): 33 | n_bands = int(arg) 34 | 35 | algo_dic = {1:'SRP', 2:'MUSIC', 3:'CSSM', 4:'WAVES', 5:'TOPS', 6:'FRI'} 36 | algo_name = algo_dic[algo] 37 | 38 | # We should make this the default structure 39 | # it can be applied by copying/downloading the data or creating a symbolic link 40 | exp_folder = './recordings/20160908/' 41 | 42 | # Get the speakers and microphones grounndtruth locations 43 | sys.path.append(exp_folder) 44 | from edm_to_positions import twitters 45 | 46 | array_str = 'pyramic' 47 | #array_str = 'compactsix' 48 | 49 | if array_str == 'pyramic': 50 | 51 | twitters.center('pyramic') 52 | 53 | R_flat_I = range(8, 16) + range(24, 32) + range(40, 48) 54 | mic_array = arrays['pyramic_tetrahedron'][:, R_flat_I].copy() 55 | mic_array += twitters[['pyramic']] 56 | 57 | rec_folder = exp_folder + 'data_pyramic/segmented/' 58 | 59 | elif array_str == 'compactsix': 60 | 61 | twitters.center('compactsix') 62 | 63 | R_flat_I = range(6) 64 | mic_array = arrays['compactsix_circular_1'][:, R_flat_I].copy() 65 | mic_array += twitters[['compactsix']] 66 | rec_folder = exp_folder + 'data_compactsix/segmented/' 67 | 68 | fs = 16000 69 | 70 | num_mic = mic_array.shape[1] # number of microphones 71 | K = rec_file.count('-') + 1 # Real number of sources 72 | K_est = K # Number of sources to estimate 73 | 74 | # Open the protocol json file 75 | with open(exp_folder + 'protocol.json') as fd: 76 | exp_data = json.load(fd) 77 | 78 | # These parameters could be extracted from a JSON file 79 | # Experiment related parameters 80 | temp = exp_data['conditions']['temperature'] 81 | hum = exp_data['conditions']['humidity'] 82 | c = calculate_speed_of_sound(temp, hum) 83 | # save parameters 84 | save_fig = False 85 | save_param = True 86 | fig_dir = './result/' 87 | 88 | # Check if the directory exists 89 | if save_fig and not os.path.exists(fig_dir): 90 | os.makedirs(fig_dir) 91 | 92 | # algorithm parameters 93 | stop_cri = 'max_iter' # can be 'mse' or 'max_iter' 94 | fft_size = 256 # number of FFT bins 95 | win_stft = np.hanning(fft_size) # stft window 96 | frame_shift_step = np.int(fft_size / 1.) 97 | M = 20 # Maximum Fourier coefficient index (-M to M), K_est <= M <= num_mic*(num_mic - 1) / 2 98 | 99 | # ---------------------------- 100 | # Perform direction of arrival 101 | phi_plt = np.linspace(0, 2*np.pi, num=721, dtype=float, endpoint=False) 102 | 103 | # Choose the frequency range to use 104 | freq_range = { 105 | 'MUSIC': [2500., 4500.], 106 | 'SRP': [2500., 4500.], 107 | 'CSSM': [2500., 4500.], 108 | 'WAVES': [3000., 4000.], 109 | 'TOPS': [100., 4500.], 110 | 'FRI': [2500., 4500.], 111 | } 112 | 113 | # the samples are used to select the frequencies 114 | samples = ['experiment/samples/fq_sample{}.wav'.format(i) for i in range(K)] 115 | 116 | freq_hz, freq_bins = select_bands(samples, freq_range[algo_name], fs, fft_size, win_stft, n_bands) 117 | 118 | print('Using {} frequencies: '.format(freq_hz.shape[0])) 119 | print('Selected frequencies: {0} Hertz'.format(freq_bins / fft_size * fs)) 120 | 121 | 122 | # Import speech signal 123 | # ------------------------- 124 | if K == 1: 125 | filename = rec_folder + 'one_speaker/' + rec_file + '.wav' 126 | elif K == 2: 127 | filename = rec_folder + 'two_speakers/' + rec_file + '.wav' 128 | elif K == 3: 129 | filename = rec_folder + 'three_speakers/' + rec_file + '.wav' 130 | fs_file, rec_signals = wavfile.read(filename) 131 | fs_silence, rec_silence = wavfile.read(rec_folder + 'silence.wav') 132 | 133 | if fs_file != fs_silence: 134 | raise ValueError('Weird: fs of signals and silence are different...') 135 | 136 | # Resample the files if required 137 | if fs_file != fs: 138 | print 'Resampling signals' 139 | from scikits.samplerate import resample 140 | 141 | resampled_signals = [] 142 | resampled_silence = [] 143 | for i in R_flat_I: 144 | resampled_signals.append( 145 | pra.highpass( 146 | resample(rec_signals[:, i], fs / fs_file, 'sinc_best'), 147 | fs, 148 | fc=150. 149 | ) 150 | ) 151 | resampled_silence.append( 152 | pra.highpass( 153 | resample(rec_silence[:, i], fs / fs_file, 'sinc_best'), 154 | fs, 155 | fc=150. 156 | ) 157 | ) 158 | speech_signals = np.array(resampled_signals, dtype=np.float).T 159 | silence = np.array(resampled_silence, dtype=np.float).T 160 | 161 | else: 162 | print('No need to resample signals') 163 | speech_signals = np.array(rec_signals[:, R_flat_I], dtype=np.float32) 164 | silence = np.array(rec_silence[:, R_flat_I], dtype=np.float32) 165 | 166 | # highpass filter at 150 167 | for s in speech_signals.T: 168 | s[:] = pra.highpass(s, fs, fc=150.) 169 | for s in silence.T: 170 | s[:] = pra.highpass(s, fs, fc=150.) 171 | 172 | # Normalize the amplitude 173 | n_factor = 0.95 / np.max(np.abs(speech_signals)) 174 | speech_signals *= n_factor 175 | silence *= n_factor 176 | 177 | # estimate noise floor 178 | y_noise_stft = [] 179 | for k in range(num_mic): 180 | y_stft = pra.stft(silence[:, k], fft_size, frame_shift_step, 181 | transform=rfft, win=win_stft).T / np.sqrt(fft_size) 182 | y_noise_stft.append(y_stft) 183 | y_noise_stft = np.array(y_noise_stft) 184 | noise_floor = np.mean(np.abs(y_noise_stft) ** 2) 185 | 186 | # estimate SNR in dB (on 1st microphone) 187 | noise_var = np.mean(np.abs(silence) ** 2) 188 | sig_var = np.mean(np.abs(speech_signals) ** 2) 189 | # rought estimate of SNR 190 | SNR = 10 * np.log10((sig_var - noise_var) / noise_var) 191 | print('Estimated SNR: ' + str(SNR)) 192 | 193 | # Compute DFT of snapshots 194 | # ------------------------- 195 | y_mic_stft = [] 196 | for k in range(num_mic): 197 | y_stft = pra.stft(speech_signals[:, k], fft_size, frame_shift_step, 198 | transform=rfft, win=win_stft).T / np.sqrt(fft_size) 199 | y_mic_stft.append(y_stft) 200 | y_mic_stft = np.array(y_mic_stft) 201 | 202 | energy_level = np.abs(y_mic_stft) ** 2 203 | 204 | # True direction of arrival 205 | # ------------------------- 206 | sources = rec_file.split('-') 207 | phi_ks = np.array([twitters.doa(array_str, sources[k])[0] for k in range(K)]) 208 | phi_ks[phi_ks < 0] = phi_ks[phi_ks < 0] + 2 * np.pi 209 | 210 | # create DOA object 211 | if algo == 1: 212 | algo_name = 'SRP-PHAT' 213 | d = doa.SRP(L=mic_array, fs=fs, nfft=fft_size, num_src=K_est, c=c, 214 | theta=phi_plt) 215 | if algo == 2: 216 | algo_name = 'MUSIC' 217 | d = doa.MUSIC(L=mic_array, fs=fs, nfft=fft_size, num_src=K_est, c=c, 218 | theta=phi_plt) 219 | elif algo == 3: 220 | algo_name = 'CSSM' 221 | d = doa.CSSM(L=mic_array, fs=fs, nfft=fft_size, num_src=K_est, c=c, 222 | theta=phi_plt, num_iter=10) 223 | elif algo == 4: 224 | algo_name = 'WAVES' 225 | d = doa.WAVES(L=mic_array, fs=fs, nfft=fft_size, num_src=K_est, c=c, 226 | theta=phi_plt, num_iter=10) 227 | elif algo == 5: 228 | algo_name = 'TOPS' 229 | d = doa.TOPS(L=mic_array, fs=fs, nfft=fft_size, num_src=K_est, c=c, 230 | theta=phi_plt) 231 | elif algo == 6: 232 | algo_name = 'FRI' 233 | d = doa.FRI(L=mic_array, fs=fs, nfft=fft_size, num_src=K_est, c=c, 234 | theta=phi_plt, max_four=M, noise_floor=noise_floor, noise_margin=0.0) 235 | 236 | # perform localization 237 | print 'Applying ' + algo_name + '...' 238 | # d.locate_sources(y_mic_stft, freq_bins=freq_bins) 239 | ''' 240 | if isinstance(d, doa.TOPS) or isinstance(d, doa.WAVES) or isinstance(d, doa.MUSIC) or isinstance(d, doa.CSSM): 241 | d.locate_sources(y_mic_stft, freq_range=freq_range) 242 | else: 243 | print 'using bins' 244 | d.locate_sources(y_mic_stft, freq_bins=freq_bins) 245 | ''' 246 | d.locate_sources(y_mic_stft, freq_bins=freq_bins) 247 | 248 | # print reconstruction results 249 | recon_err, sort_idx = polar_distance(phi_ks, d.phi_recon) 250 | np.set_printoptions(precision=3, formatter={'float': '{: 0.3f}'.format}) 251 | 252 | print('Reconstructed spherical coordinates (in degrees) and amplitudes:') 253 | if d.num_src > 1: 254 | #d.phi_recon = d.phi_recon[sort_idx[:,1]] 255 | print('Original azimuths : {0}'.format(np.degrees( 256 | phi_ks[sort_idx[:, 0]]))) 257 | #phi_ks))) 258 | print('Detected azimuths : {0}'.format(np.degrees( 259 | d.phi_recon[sort_idx[:, 1]]))) 260 | #d.phi_recon))) 261 | else: 262 | print('Original azimuths : {0}'.format(np.degrees(phi_ks))) 263 | print('Detected azimuths : {0}'.format(np.degrees(d.phi_recon))) 264 | 265 | if isinstance(d, doa.FRI): 266 | #d.alpha_recon = d.alpha_recon[:,sort_idx[:,1]] 267 | print d.alpha_recon.shape 268 | if K > 1: 269 | print('Reconstructed amplitudes : \n{0}\n'.format(d.alpha_recon.squeeze())) 270 | else: 271 | print('Reconstructed amplitudes : \n{0}\n'.format(d.alpha_recon.squeeze())) 272 | 273 | print('Reconstruction error : {0:.3e}'.format(np.degrees(recon_err))) 274 | 275 | # reset numpy print option 276 | np.set_printoptions(edgeitems=3, infstr='inf', 277 | linewidth=75, nanstr='nan', precision=8, 278 | suppress=False, threshold=1000, formatter=None) 279 | 280 | # plot results 281 | file_name = (fig_dir + 'polar_sources_{0}_numMic_{1}_' + 282 | '_locations' + '.pdf').format(repr(rec_file), repr(num_mic)) 283 | 284 | # plot response (for FRI one subband) 285 | d.polar_plt_dirac(phi_ks, file_name=file_name) 286 | 287 | plt.show() 288 | -------------------------------------------------------------------------------- /test_doa_simulated.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | import numpy as np 3 | from scipy import linalg 4 | from scipy.io import wavfile 5 | import os, sys, getopt 6 | import time 7 | import matplotlib.pyplot as plt 8 | 9 | import pyroomacoustics as pra 10 | import doa 11 | 12 | from tools import * 13 | from experiment import arrays 14 | 15 | if __name__ == '__main__': 16 | 17 | # parse arguments 18 | argv = sys.argv[1:] 19 | algo = None 20 | num_src = None 21 | try: 22 | opts, args = getopt.getopt(argv,"ha:n:b:",["algo=","num_src=", "n_bands"]) 23 | except getopt.GetoptError: 24 | print 'test_doa_simulated.py -a -n -b ' 25 | sys.exit(2) 26 | for opt, arg in opts: 27 | if opt == '-h': 28 | print 'test_doa_simulated.py -a -n -b ' 29 | sys.exit() 30 | elif opt in ("-a", "--algo"): 31 | algo = int(arg) 32 | elif opt in ("-n", "--num_src"): 33 | num_src = int(arg) 34 | elif opt in ("-b", "--n_bands"): 35 | n_bands = int(arg) 36 | 37 | # file parameters 38 | save_fig = False 39 | save_param = True 40 | fig_dir = './result/' 41 | exp_dir = './experiment/' 42 | available_files = ['samples/fq_sample1.wav', 'samples/fq_sample2.wav'] 43 | speech_files = available_files[:num_src] 44 | 45 | # Check if the directory exists 46 | if save_fig and not os.path.exists(fig_dir): 47 | os.makedirs(fig_dir) 48 | 49 | # parameters setup 50 | fs = 16000 # sampling frequency in Hz 51 | SNR = 5 # SNR for the received signal at microphones in [dB] 52 | speed_sound = pra.constants.get('c') 53 | 54 | R_flat_I = range(8, 16) + range(24, 32) + range(40, 48) 55 | mic_array_coordinate = arrays['pyramic_tetrahedron'][:2,R_flat_I] 56 | num_mic = mic_array_coordinate.shape[1] 57 | 58 | K = len(speech_files) # Real number of sources 59 | K_est = K # Number of sources to estimate 60 | 61 | # algorithm parameters 62 | stop_cri = 'max_iter' # can be 'mse' or 'max_iter' 63 | fft_size = 256 # number of FFT bins 64 | M = 13 # Maximum Fourier coefficient index (-M to M), K_est <= M <= num_mic*(num_mic - 1) / 2 65 | 66 | # Import all speech signals 67 | # ------------------------- 68 | speech_signals = [] 69 | for f in speech_files: 70 | filename = exp_dir + f 71 | r, s = wavfile.read(filename) 72 | if r != fs: 73 | raise ValueError('All speech samples should have correct sampling frequency %d' % (fs)) 74 | speech_signals.append(s) 75 | 76 | # Pad signals so that they all have the same number of samples 77 | signal_length = np.max([s.shape[0] for s in speech_signals]) 78 | for i, s in enumerate(speech_signals): 79 | speech_signals[i] = np.concatenate((s, np.zeros(signal_length - s.shape[0]))) 80 | speech_signals = np.array(speech_signals) 81 | 82 | # Adjust the SNR 83 | for s in speech_signals: 84 | # We normalize each signal so that \sum E[x_k**2] / E[ n**2 ] = SNR 85 | s /= np.std(s[np.abs(s) > 1e-2]) * K 86 | noise_power = 10 ** (-SNR * 0.1) 87 | 88 | # ---------------------------- 89 | # Generate all necessary signals 90 | 91 | # Generate Diracs at random 92 | alpha_ks, phi_ks, time_stamp = gen_diracs_param(K, positive_amp=True, log_normal_amp=False, semicircle=False, save_param=save_param) 93 | 94 | # load saved Dirac parameters 95 | ''' 96 | dirac_file_name = './data/polar_Dirac_' + '31-08_09_14' + '.npz' 97 | alpha_ks, phi_ks, time_stamp = load_dirac_param(dirac_file_name) 98 | phi_ks[1] = phi_ks[0] + 10. / 180. * np.pi 99 | ''' 100 | 101 | print('Dirac parameter tag: ' + time_stamp) 102 | 103 | # generate complex base-band signal received at microphones 104 | y_mic_stft, y_mic_stft_noiseless, speech_stft = \ 105 | gen_speech_at_mic_stft(phi_ks, speech_signals, mic_array_coordinate, noise_power, fs, fft_size=fft_size) 106 | 107 | # ---------------------------- 108 | # Perform direction of arrival 109 | phi_plt = np.linspace(0, 2*np.pi, num=360, dtype=float) 110 | freq_range = [100., 2000.] 111 | freq_bnd = [int(np.round(f/fs*fft_size)) for f in freq_range] 112 | freq_bins = np.arange(freq_bnd[0],freq_bnd[1]) 113 | fmin = min(freq_bins) 114 | 115 | # Subband selection (may need to avoid search in low and high frequencies if there is something like DC bias or unwanted noise) 116 | # bands_pwr = np.mean(np.mean(np.abs(y_mic_stft[:,fft_bins,:]) ** 2, axis=0), axis=1) 117 | bands_pwr = np.mean(np.mean(np.abs(y_mic_stft[:,freq_bins,:]) ** 2, axis=0), axis=1) 118 | freq_bins = np.argsort(bands_pwr)[-n_bands:] + fmin 119 | freq_hz = freq_bins*float(fs)/float(fft_size) 120 | 121 | freq_hz = np.linspace(freq_range[0], freq_range[1], n_bands) 122 | freq_bins = np.array([int(np.round(f / fs * fft_size)) for f in freq_hz]) 123 | 124 | print('Selected frequency bins: {0}'.format(freq_bins)) 125 | print('Selected frequencies: {0} Hertz'.format(freq_hz)) 126 | 127 | # create DOA object 128 | if algo == 1: 129 | algo_name = 'SRP-PHAT' 130 | d = doa.SRP(L=mic_array_coordinate, fs=fs, nfft=fft_size, 131 | num_src=K_est, theta=phi_plt) 132 | if algo == 2: 133 | algo_name = 'MUSIC' 134 | d = doa.MUSIC(L=mic_array_coordinate, fs=fs, nfft=fft_size, 135 | num_src=K_est,theta=phi_plt) 136 | elif algo == 3: 137 | algo_name = 'CSSM' 138 | d = doa.CSSM(L=mic_array_coordinate, fs=fs, nfft=fft_size, 139 | num_src=K_est, theta=phi_plt, num_iter=10) 140 | elif algo == 4: 141 | algo_name = 'WAVES' 142 | d = doa.WAVES(L=mic_array_coordinate, fs=fs, nfft=fft_size, 143 | num_src=K_est, theta=phi_plt, num_iter=10) 144 | elif algo == 5: 145 | algo_name = 'TOPS' 146 | d = doa.TOPS(L=mic_array_coordinate, fs=fs, nfft=fft_size, 147 | num_src=K_est, theta=phi_plt) 148 | elif algo == 6: 149 | algo_name = 'FRI' 150 | d = doa.FRI(L=mic_array_coordinate, fs=fs, nfft=fft_size, 151 | num_src=K_est, theta=phi_plt, max_four=M) 152 | 153 | # perform localization 154 | print 'Applying ' + algo_name + '...' 155 | # d.locate_sources(y_mic_stft, fft_bins=fft_bins) 156 | if isinstance(d, doa.TOPS): 157 | d.locate_sources(y_mic_stft, freq_range=freq_range) 158 | else: 159 | print 'using bins' 160 | d.locate_sources(y_mic_stft, freq_bins=freq_bins) 161 | 162 | 163 | print('SNR for microphone signals: {0}dB\n'.format(SNR)) 164 | 165 | # print reconstruction results 166 | recon_err, sort_idx = polar_distance(d.phi_recon, phi_ks) 167 | np.set_printoptions(precision=3, formatter={'float': '{: 0.3f}'.format}) 168 | print('Reconstructed spherical coordinates (in degrees) and amplitudes:') 169 | if d.num_src > 1: 170 | print('Original azimuths : {0}'.format(np.degrees(phi_ks[sort_idx[:, 1]]))) 171 | print('Reconstructed azimuths : {0}\n'.format(np.degrees(d.phi_recon[sort_idx[:, 0]]))) 172 | else: 173 | print('Original azimuths : {0}'.format(np.degrees(phi_ks))) 174 | print('Reconstructed azimuths : {0}\n'.format(np.degrees(d.phi_recon))) 175 | # print('Original amplitudes : \n{0}'.format(alpha_ks[sort_idx[:, 1]].squeeze())) 176 | # print('Reconstructed amplitudes : \n{0}\n'.format(np.real(d.alpha_recon[sort_idx[:, 0]].squeeze()))) 177 | print('Reconstruction error : {0:.3e}'.format(np.degrees(recon_err))) 178 | # reset numpy print option 179 | np.set_printoptions(edgeitems=3, infstr='inf', 180 | linewidth=75, nanstr='nan', precision=8, 181 | suppress=False, threshold=1000, formatter=None) 182 | 183 | # plot results 184 | file_name = (fig_dir + 'polar_K_{0}_numMic_{1}_' + 185 | 'noise_{2:.0f}dB_locations' + 186 | time_stamp + '.pdf').format(repr(K), repr(num_mic), SNR) 187 | 188 | # plot response (for FRI just one subband) 189 | if isinstance(d, doa.FRI): 190 | alpha_ks = np.array([np.mean(np.abs(s_loop) ** 2, axis=1) for s_loop in speech_stft])[:, d.freq_bins] 191 | d.polar_plt_dirac(phi_ks, np.mean(alpha_ks, axis=1), file_name=file_name) 192 | else: 193 | d.polar_plt_dirac(phi_ks, file_name=file_name) 194 | 195 | plt.show() 196 | -------------------------------------------------------------------------------- /test_doa_whitenoise.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | import numpy as np 3 | from scipy import linalg 4 | from scipy.io import wavfile 5 | import os, sys, getopt 6 | import time 7 | import matplotlib.pyplot as plt 8 | 9 | import pyroomacoustics as pra 10 | 11 | import doa 12 | from tools import * 13 | from experiment import arrays, calculate_speed_of_sound 14 | 15 | 16 | if __name__ == '__main__': 17 | 18 | # parse arguments 19 | argv = sys.argv[1:] 20 | algo = None 21 | num_src = None 22 | n_bands = None 23 | try: 24 | opts, args = getopt.getopt(argv,"ha:n:b:",["algo=","num_src=", "n_bands"]) 25 | except getopt.GetoptError: 26 | print 'test_doa_whitenoise.py -a -n -b ' 27 | sys.exit(2) 28 | for opt, arg in opts: 29 | if opt == '-h': 30 | print 'test_doa_simulated.py -a -n -b ' 31 | sys.exit() 32 | elif opt in ("-a", "--algo"): 33 | algo = int(arg) 34 | elif opt in ("-n", "--num_src"): 35 | num_src = int(arg) 36 | elif opt in ("-b", "--n_bands"): 37 | n_bands = int(arg) 38 | 39 | # file parameters 40 | save_fig = False 41 | save_param = True 42 | fig_dir = './result/' 43 | 44 | # Check if the directory exists 45 | if save_fig and not os.path.exists(fig_dir): 46 | os.makedirs(fig_dir) 47 | 48 | # parameters setup 49 | fs = 16000 # sampling frequency in Hz 50 | SNR = 20 # SNR for the received signal at microphones in [dB] 51 | speed_sound = pra.constants.get('c') 52 | 53 | K = num_src # Real number of sources 54 | K_est = K # Number of sources to estimate 55 | 56 | # algorithm parameters 57 | stop_cri = 'max_iter' # can be 'mse' or 'max_iter' 58 | fft_size = 256 # number of FFT bins 59 | num_snapshot = 256 # number of snapshots used to estimate the covariance 60 | M = 14 # Maximum Fourier coefficient index (-M to M), K_est <= M <= num_mic*(num_mic - 1) / 2 61 | 62 | # ---------------------------- 63 | # Generate all necessary signals 64 | 65 | # Generate Diracs at random 66 | alpha_ks, phi_ks, time_stamp = gen_diracs_param(K, positive_amp=True, log_normal_amp=False, semicircle=False, save_param=save_param) 67 | alpha_ks = np.ones(K) 68 | 69 | print('Dirac parameter tag: ' + time_stamp) 70 | 71 | # select mic array 72 | exp_folder = './recordings/20160831/' 73 | array_str = 'pyramic' 74 | #array_str = 'compactsix' 75 | sys.path.append(exp_folder) 76 | from edm_to_positions import twitters 77 | if array_str == 'pyramic': 78 | twitters.center('pyramic') 79 | R_flat_I = range(8, 16) + range(24, 32) + range(40, 48) 80 | mic_array = arrays['pyramic_tetrahedron'][:, R_flat_I].copy() 81 | mic_array += twitters[['pyramic']] 82 | elif array_str == 'compactsix': 83 | twitters.center('compactsix') 84 | R_flat_I = range(6) 85 | mic_array = arrays['compactsix_circular_1'][:, R_flat_I].copy() 86 | mic_array += twitters[['compactsix']] 87 | mic_array = mic_array[:2,:] 88 | num_mic = mic_array.shape[1] # number of microphones 89 | 90 | # generate complex base-band signal received at microphones 91 | y_mic_stft, y_mic_stft_noiseless = \ 92 | gen_sig_at_mic_stft(phi_ks, alpha_ks, mic_array, SNR, 93 | fs, fft_size=fft_size, Ns=num_snapshot) 94 | 95 | # ---------------------------- 96 | # Perform direction of arrival 97 | phi_plt = np.linspace(0, 2*np.pi, num=300, dtype=float) 98 | freq_range = [100., 4500.] 99 | freq_range = [2500., 4500.] 100 | 101 | freq_hz = np.linspace(freq_range[0], freq_range[1], n_bands) 102 | freq_bins = np.array([int(np.round(f/fs*fft_size)) for f in freq_hz]) 103 | 104 | # Subband selection (may need to avoid search in low and high frequencies if there is something like DC bias or unwanted noise) 105 | # bands_pwr = np.mean(np.mean(np.abs(y_mic_stft[:,freq_bins,:]) ** 2, axis=0), axis=1) 106 | ''' 107 | bands_pwr = np.mean(np.mean(np.abs(y_mic_stft[:,freq_bins,:]) ** 2, axis=0), axis=1) 108 | freq_bins = np.argsort(bands_pwr)[-n_bands:] + fmin 109 | freq_hz = freq_bins*float(fs)/float(fft_size) 110 | ''' 111 | 112 | print('Selected frequency bins: {0}'.format(freq_bins)) 113 | print('Selected frequencies: {0} Hertz'.format(freq_hz)) 114 | 115 | # create DOA object 116 | if algo == 1: 117 | algo_name = 'SRP-PHAT' 118 | d = doa.SRP(L=mic_array, fs=fs, nfft=fft_size, num_src=K_est, 119 | theta=phi_plt) 120 | if algo == 2: 121 | algo_name = 'MUSIC' 122 | d = doa.MUSIC(L=mic_array, fs=fs, nfft=fft_size, num_src=K_est, 123 | theta=phi_plt) 124 | elif algo == 3: 125 | algo_name = 'CSSM' 126 | d = doa.CSSM(L=mic_array, fs=fs, nfft=fft_size, num_src=K_est, 127 | theta=phi_plt, num_iter=10) 128 | elif algo == 4: 129 | algo_name = 'WAVES' 130 | d = doa.WAVES(L=mic_array, fs=fs, nfft=fft_size, num_src=K_est, 131 | theta=phi_plt, num_iter=10) 132 | elif algo == 5: 133 | algo_name = 'TOPS' 134 | d = doa.TOPS(L=mic_array, fs=fs, nfft=fft_size, num_src=K_est, 135 | theta=phi_plt) 136 | elif algo == 6: 137 | algo_name = 'FRI' 138 | d = doa.FRI(L=mic_array, fs=fs, nfft=fft_size, num_src=K_est, 139 | theta=phi_plt, max_four=M) 140 | 141 | # perform localization 142 | print 'Applying ' + algo_name + '...' 143 | # d.locate_sources(y_mic_stft, freq_bins=freq_bins) 144 | if isinstance(d, doa.TOPS): 145 | d.locate_sources(y_mic_stft, freq_range=freq_range) 146 | else: 147 | print 'using bins' 148 | d.locate_sources(y_mic_stft, freq_bins=freq_bins) 149 | 150 | 151 | print('SNR for microphone signals: {0}dB\n'.format(SNR)) 152 | 153 | # print reconstruction results 154 | recon_err, sort_idx = polar_distance(d.phi_recon, phi_ks) 155 | np.set_printoptions(precision=3, formatter={'float': '{: 0.3f}'.format}) 156 | print('Reconstructed spherical coordinates (in degrees) and amplitudes:') 157 | if d.num_src > 1: 158 | print('Original azimuths : {0}'.format(np.degrees(phi_ks[sort_idx[:, 1]]))) 159 | print('Reconstructed azimuths : {0}\n'.format(np.degrees(d.phi_recon[sort_idx[:, 0]]))) 160 | else: 161 | print('Original azimuths : {0}'.format(np.degrees(phi_ks))) 162 | print('Reconstructed azimuths : {0}\n'.format(np.degrees(d.phi_recon))) 163 | # print('Original amplitudes : \n{0}'.format(alpha_ks[sort_idx[:, 1]].squeeze())) 164 | # print('Reconstructed amplitudes : \n{0}\n'.format(np.real(d.alpha_recon[sort_idx[:, 0]].squeeze()))) 165 | print('Reconstruction error : {0:.3e}'.format(np.degrees(recon_err))) 166 | # reset numpy print option 167 | np.set_printoptions(edgeitems=3, infstr='inf', 168 | linewidth=75, nanstr='nan', precision=8, 169 | suppress=False, threshold=1000, formatter=None) 170 | 171 | # plot results 172 | file_name = (fig_dir + 'polar_K_{0}_numMic_{1}_' + 173 | 'noise_{2:.0f}dB_locations' + 174 | time_stamp + '.pdf').format(repr(K), repr(num_mic), SNR) 175 | d.polar_plt_dirac(phi_ks, file_name=file_name) 176 | 177 | plt.show() 178 | -------------------------------------------------------------------------------- /tools/__init__.py: -------------------------------------------------------------------------------- 1 | __version__ = '1.0' 2 | 3 | # http://mikegrouchy.com/blog/2012/05/be-pythonic-__init__py.html 4 | 5 | import os,sys,inspect 6 | currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe()))) 7 | parentdir = os.path.dirname(currentdir) 8 | sys.path.insert(0,parentdir) 9 | 10 | from .generators import * 11 | from .plotters import * 12 | from .utils import * 13 | from .mkl_fft import * 14 | 15 | -------------------------------------------------------------------------------- /tools/dftidefs.py: -------------------------------------------------------------------------------- 1 | import ctypes as _ctypes 2 | 3 | # enum DFTI_CONFIG_PARAM from mkl_dfti.h 4 | 5 | DFTI_FORWARD_DOMAIN = _ctypes.c_int(0) # Domain for forward transform, no default 6 | DFTI_DIMENSION = _ctypes.c_int(1) # Dimension, no default 7 | DFTI_LENGTHS = _ctypes.c_int(2) # length(s) of transform, no default 8 | DFTI_PRECISION = _ctypes.c_int(3) # Precision of computation, no default 9 | DFTI_FORWARD_SCALE = _ctypes.c_int(4) # Scale factor for forward transform, default = 1.0 10 | DFTI_BACKWARD_SCALE = _ctypes.c_int(5) # Scale factor for backward transform, default = 1.0 11 | DFTI_FORWARD_SIGN = _ctypes.c_int(6) # Default for forward transform = DFTI_NEGATIVE 12 | DFTI_NUMBER_OF_TRANSFORMS = _ctypes.c_int(7) # Number of data sets to be transformed, default = 1 13 | DFTI_COMPLEX_STORAGE = _ctypes.c_int(8) # Representation for complex domain, default = DFTI_COMPLEX_COMPLEX 14 | DFTI_REAL_STORAGE = _ctypes.c_int(9) # Rep. for real domain, default = DFTI_REAL_REAL 15 | DFTI_CONJUGATE_EVEN_STORAGE = _ctypes.c_int(10) # Rep. for conjugate even domain, default = DFTI_COMPLEX_REAL 16 | DFTI_PLACEMENT = _ctypes.c_int(11) # Placement of result, default = DFTI_INPLACE 17 | DFTI_INPUT_STRIDES = _ctypes.c_int(12) # Stride information of input data, default = tigthly 18 | DFTI_OUTPUT_STRIDES = _ctypes.c_int(13) # Stride information of output data, default = tigthly 19 | DFTI_INPUT_DISTANCE = _ctypes.c_int(14) # Distance information of input data, default = 0 20 | DFTI_OUTPUT_DISTANCE = _ctypes.c_int(15) # Distance information of output data, default = 0 21 | DFTI_INITIALIZATION_EFFORT = _ctypes.c_int(16) # Effort spent in initialization, default = DFTI_MEDIUM 22 | DFTI_WORKSPACE = _ctypes.c_int(17) # Use of workspace during computation, default = DFTI_ALLOW 23 | DFTI_ORDERING = _ctypes.c_int(18) # Possible out of order computation, default = DFTI_ORDERED 24 | DFTI_TRANSPOSE = _ctypes.c_int(19) # Possible transposition of result, default = DFTI_NONE 25 | DFTI_DESCRIPTOR_NAME = _ctypes.c_int(20) # name of descriptor, default = string of zero length 26 | DFTI_PACKED_FORMAT = _ctypes.c_int(21) # packed format for real transform, default = DFTI_CCS_FORMAT 27 | 28 | # below 4 parameters for get_value functions only 29 | DFTI_COMMIT_STATUS = _ctypes.c_int(22) # Whether descriptor has been commited 30 | DFTI_VERSION = _ctypes.c_int(23) # DFTI implementation version number 31 | DFTI_FORWARD_ORDERING = _ctypes.c_int(24) # The ordering of forward transform 32 | DFTI_BACKWARD_ORDERING = _ctypes.c_int(25) # The ordering of backward transform 33 | 34 | # below for set_value and get_value functions 35 | DFTI_NUMBER_OF_USER_THREADS = _ctypes.c_int(26) # number of user's threads) default = 1 36 | 37 | # DFTI options values 38 | DFTI_COMMITTED = _ctypes.c_int(30) # status - commit 39 | DFTI_UNCOMMITTED = _ctypes.c_int(31) # status - uncommit 40 | DFTI_COMPLEX = _ctypes.c_int(32) # General domain 41 | DFTI_REAL = _ctypes.c_int(33) # Real domain 42 | DFTI_CONJUGATE_EVEN = _ctypes.c_int(34) # Conjugate even domain 43 | DFTI_SINGLE = _ctypes.c_int(35) # Single precision 44 | DFTI_DOUBLE = _ctypes.c_int(36) # Double precision 45 | DFTI_NEGATIVE = _ctypes.c_int(37) # -i, for setting definition of transform 46 | DFTI_POSITIVE = _ctypes.c_int(38) # +i, for setting definition of transform 47 | DFTI_COMPLEX_COMPLEX = _ctypes.c_int(39) # Representation method for domain 48 | DFTI_COMPLEX_REAL = _ctypes.c_int(40) # Representation method for domain 49 | DFTI_REAL_COMPLEX = _ctypes.c_int(41) # Representation method for domain 50 | DFTI_REAL_REAL = _ctypes.c_int(42) # Representation method for domain 51 | DFTI_INPLACE = _ctypes.c_int(43) # Result overwrites input 52 | DFTI_NOT_INPLACE = _ctypes.c_int(44) # Result placed differently than input 53 | DFTI_LOW = _ctypes.c_int(45) # A low setting 54 | DFTI_MEDIUM = _ctypes.c_int(46) # A medium setting 55 | DFTI_HIGH = _ctypes.c_int(47) # A high setting 56 | DFTI_ORDERED = _ctypes.c_int(48) # Data on forward and backward domain ordered 57 | DFTI_BACKWARD_SCRAMBLED = _ctypes.c_int(49) # Data on forward ordered and backward domain scrambled 58 | DFTI_FORWARD_SCRAMBLED = _ctypes.c_int(50) # Data on forward scrambled and backward domain ordered 59 | DFTI_ALLOW = _ctypes.c_int(51) # Allow certain request or usage 60 | DFTI_AVOID = _ctypes.c_int(52) # Avoid certain request or usage 61 | DFTI_NONE = _ctypes.c_int(53) # none certain request or usage 62 | DFTI_CCS_FORMAT = _ctypes.c_int(54) # ccs format for real DFT 63 | DFTI_PACK_FORMAT = _ctypes.c_int(55) # pack format for real DFT 64 | DFTI_PERM_FORMAT = _ctypes.c_int(56) # perm format for real DFT 65 | DFTI_CCE_FORMAT = _ctypes.c_int(57) # cce format for real DFT 66 | -------------------------------------------------------------------------------- /tools/plotters.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | 3 | import numpy as np 4 | import os 5 | from scipy import linalg 6 | 7 | if os.environ.get('DISPLAY') is None: 8 | import matplotlib 9 | matplotlib.use('Agg') 10 | 11 | from matplotlib import rcParams 12 | 13 | # for latex rendering 14 | os.environ['PATH'] = os.environ['PATH'] + ':/usr/texbin' + ':/opt/local/bin' + ':/Library/TeX/texbin/' 15 | rcParams['text.usetex'] = True 16 | rcParams['text.latex.unicode'] = True 17 | rcParams['text.latex.preamble'] = [r"\usepackage{bm}"] 18 | 19 | import matplotlib.pyplot as plt 20 | from matplotlib import cm 21 | 22 | from utils import polar_distance 23 | 24 | 25 | def plt_planewave(y_mic_noiseless, y_mic_noisy, mic=0, save_fig=False, **kwargs): 26 | """ 27 | plot received planewaves at microhpnes 28 | :param y_mic_noiseless: the noiseless planewave 29 | :param y_mic_noisy: the noisy planewave 30 | :param mic: planewave at which microphone to plot 31 | :return: 32 | """ 33 | if 'SNR' in kwargs: 34 | SNR = kwargs['SNR'] 35 | else: 36 | SNR = 20 * np.log10(linalg.norm(y_mic_noiseless[mic, :].flatten('F')) / 37 | linalg.norm((y_mic_noisy[mic, :] - y_mic_noiseless[mic, :]).flatten('F'))) 38 | plt.figure(figsize=(6, 3), dpi=90) 39 | ax1 = plt.axes([0.1, 0.53, 0.85, 0.32]) 40 | plt.plot(np.real(y_mic_noiseless[mic, :]), color=[0, 0.447, 0.741], 41 | linestyle='-', linewidth=1.5, label='noiseless', alpha=0.6) 42 | plt.plot(np.real(y_mic_noisy[mic, :]), color=[0.850, 0.325, 0.098], 43 | linestyle='-', linewidth=1.5, label='noisy', alpha=0.6) 44 | 45 | plt.xlim([0, y_mic_noisy.shape[1] - 1]) 46 | # plt.xlabel(r'time snapshot', fontsize=11) 47 | plt.ylabel(r'$\Re\{y(\omega, t)\}$', fontsize=11) 48 | 49 | ax1.yaxis.major.locator.set_params(nbins=5) 50 | plt.legend(framealpha=0.5, scatterpoints=1, loc=0, 51 | fontsize=9, ncol=2, handletextpad=.2, 52 | columnspacing=1.7, labelspacing=0.1) 53 | 54 | plt.title(r'received planewaves at microphe {0} ($\mbox{{SNR}} = {1:.1f}$dB)'.format(repr(mic), SNR), 55 | fontsize=11) 56 | 57 | ax2 = plt.axes([0.1, 0.14, 0.85, 0.32]) 58 | plt.plot(np.imag(y_mic_noiseless[mic, :]), color=[0, 0.447, 0.741], 59 | linestyle='-', linewidth=1.5, label='noiseless', alpha=0.6) 60 | plt.plot(np.imag(y_mic_noisy[mic, :]), color=[0.850, 0.325, 0.098], 61 | linestyle='-', linewidth=1.5, label='noisy', alpha=0.6) 62 | 63 | plt.xlim([0, y_mic_noisy.shape[1] - 1]) 64 | plt.xlabel(r'time snapshot', fontsize=11) 65 | plt.ylabel(r'$\Im\{y(\omega, t)\}$', fontsize=11) 66 | 67 | ax2.yaxis.major.locator.set_params(nbins=5) 68 | 69 | if save_fig: 70 | if 'file_name' in kwargs: 71 | file_name = kwargs['file_name'] 72 | else: 73 | file_name = 'planewave_mic{0}.pdf'.format(repr(mic)) 74 | plt.savefig(file_name, format='pdf', dpi=300, transparent=True) 75 | 76 | 77 | def polar_plt_diracs(phi_ref, phi_recon, alpha_ref, alpha_recon, num_mic, P, save_fig=False, **kwargs): 78 | """ 79 | plot Diracs in the polar coordinate 80 | :param phi_ref: ground truth Dirac locations (azimuths) 81 | :param phi_recon: reconstructed Dirac locations (azimuths) 82 | :param alpha_ref: ground truth Dirac amplitudes 83 | :param alpha_recon: reconstructed Dirac amplitudes 84 | :param num_mic: number of microphones 85 | :param P: PSNR in the visibility measurements 86 | :param save_fig: whether save the figure or not 87 | :param kwargs: optional input argument(s), include: 88 | file_name: file name used to save figure 89 | :return: 90 | """ 91 | dist_recon = polar_distance(phi_ref, phi_recon)[0] 92 | if 'dirty_img' in kwargs and 'phi_plt' in kwargs: 93 | plt_dirty_img = True 94 | dirty_img = kwargs['dirty_img'] 95 | phi_plt = kwargs['phi_plt'] 96 | else: 97 | plt_dirty_img = False 98 | fig = plt.figure(figsize=(5, 4), dpi=90) 99 | ax = fig.add_subplot(111, projection='polar') 100 | K = phi_ref.size 101 | K_est = phi_recon.size 102 | 103 | ax.scatter(phi_ref, 1 + alpha_ref, c=np.tile([0, 0.447, 0.741], (K, 1)), s=70, 104 | alpha=0.75, marker='^', linewidths=0, label='original') 105 | ax.scatter(phi_recon, 1 + alpha_recon, c=np.tile([0.850, 0.325, 0.098], (K_est, 1)), s=100, 106 | alpha=0.75, marker='*', linewidths=0, label='reconstruction') 107 | for k in xrange(K): 108 | ax.plot([phi_ref[k], phi_ref[k]], [1, 1 + alpha_ref[k]], 109 | linewidth=1.5, linestyle='-', color=[0, 0.447, 0.741], alpha=0.6) 110 | 111 | for k in xrange(K_est): 112 | ax.plot([phi_recon[k], phi_recon[k]], [1, 1 + alpha_recon[k]], 113 | linewidth=1.5, linestyle='-', color=[0.850, 0.325, 0.098], alpha=0.6) 114 | 115 | if plt_dirty_img: 116 | dirty_img = dirty_img.real 117 | min_val = dirty_img.min() 118 | max_val = dirty_img.max() 119 | # color_lines = cm.spectral_r((dirty_img - min_val) / (max_val - min_val)) 120 | # ax.scatter(phi_plt, 1 + dirty_img, edgecolor='none', linewidths=0, 121 | # c=color_lines, label='dirty image') # 1 is for the offset 122 | ax.plot(phi_plt, 1 + dirty_img, linewidth=1, alpha=0.55, 123 | linestyle='-', color=[0.466, 0.674, 0.188], label='dirty image') 124 | 125 | handles, labels = ax.get_legend_handles_labels() 126 | ax.legend(handles=handles[:3], framealpha=0.5, 127 | scatterpoints=1, loc=8, fontsize=9, 128 | ncol=1, bbox_to_anchor=(0.9, -0.17), 129 | handletextpad=.2, columnspacing=1.7, labelspacing=0.1) 130 | title_str = r'$K={0}$, $\mbox{{\# of mic.}}={1}$, $\mbox{{SNR}}={2:.1f}$dB, average error={3:.1e}' 131 | ax.set_title(title_str.format(repr(K), repr(num_mic), P, dist_recon), 132 | fontsize=11) 133 | ax.set_xlabel(r'azimuth $\bm{\varphi}$', fontsize=11) 134 | ax.set_xticks(np.linspace(0, 2 * np.pi, num=12, endpoint=False)) 135 | ax.xaxis.set_label_coords(0.5, -0.11) 136 | ax.set_yticks(np.linspace(0, 1, 2)) 137 | ax.xaxis.grid(b=True, color=[0.3, 0.3, 0.3], linestyle=':') 138 | ax.yaxis.grid(b=True, color=[0.3, 0.3, 0.3], linestyle='--') 139 | if plt_dirty_img: 140 | ax.set_ylim([0, 1.05 + np.max(np.append(np.concatenate((alpha_ref, alpha_recon)), max_val))]) 141 | else: 142 | ax.set_ylim([0, 1.05 + np.max(np.concatenate((alpha_ref, alpha_recon)))]) 143 | if save_fig: 144 | if 'file_name' in kwargs: 145 | file_name = kwargs['file_name'] 146 | else: 147 | file_name = 'polar_recon_dirac.pdf' 148 | plt.savefig(file_name, format='pdf', dpi=300, transparent=True) 149 | # plt.show() 150 | -------------------------------------------------------------------------------- /tools/utils.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | 3 | import numpy as np 4 | 5 | def polar_error(x1, x2): 6 | 7 | tp = 2*np.pi 8 | e = np.minimum(np.mod(x1-x2, tp), np.mod(x2-x1, tp)) 9 | 10 | return e 11 | 12 | def nchoosek(list_in, k): 13 | ''' Produce all combinations of k elements from list_in ''' 14 | 15 | # Recursion ends 16 | if k == 1: 17 | return [[k] for k in list_in] 18 | 19 | # 20 | list_out = [] 21 | for i,element in enumerate(list_in): 22 | sub_list = nchoosek(list_in[i+1:], k-1) 23 | for l in sub_list: 24 | list_out.append([element] + l) 25 | 26 | return list_out 27 | 28 | def polar_distance(x1, x2): 29 | """ 30 | Given two arrays of numbers x1 and x2, pairs the cells that are the 31 | closest and provides the pairing matrix index: x1(index(1,:)) should be as 32 | close as possible to x2(index(2,:)). The function outputs the average of the 33 | absolute value of the differences abs(x1(index(1,:))-x2(index(2,:))). 34 | :param x1: vector 1 35 | :param x2: vector 2 36 | :return: d: minimum distance between d 37 | index: the permutation matrix 38 | """ 39 | x1 = np.reshape(x1, (1, -1), order='F') 40 | x2 = np.reshape(x2, (1, -1), order='F') 41 | N1 = x1.size 42 | N2 = x2.size 43 | diffmat = np.arccos(np.cos(x1 - np.reshape(x2, (-1, 1), order='F'))) 44 | min_N1_N2 = np.min([N1, N2]) 45 | index = np.zeros((min_N1_N2, 2), dtype=int) 46 | if min_N1_N2 > 1: 47 | for k in xrange(min_N1_N2): 48 | d2 = np.min(diffmat, axis=0) 49 | index2 = np.argmin(diffmat, axis=0) 50 | index1 = np.argmin(d2) 51 | index2 = index2[index1] 52 | index[k, :] = [index1, index2] 53 | diffmat[index2, :] = float('inf') 54 | diffmat[:, index1] = float('inf') 55 | d = np.mean(np.arccos(np.cos(x1[:, index[:, 0]] - x2[:, index[:, 1]]))) 56 | else: 57 | d = np.min(diffmat) 58 | index = np.argmin(diffmat) 59 | if N1 == 1: 60 | index = np.array([1, index]) 61 | else: 62 | index = np.array([index, 1]) 63 | 64 | # sort to keep the order of the first vector 65 | if min_N1_N2 > 1: 66 | perm = np.argsort(index[:,0]) 67 | index = index[perm,:] 68 | 69 | return d, index 70 | 71 | 72 | def polar2cart(rho, phi): 73 | """ 74 | convert from polar to cartesian coordinates 75 | :param rho: radius 76 | :param phi: azimuth 77 | :return: 78 | """ 79 | x = rho * np.cos(phi) 80 | y = rho * np.sin(phi) 81 | return x, y 82 | 83 | 84 | def load_dirac_param(file_name): 85 | """ 86 | load stored Diracs' parameters 87 | :param file_name: the file name that the parameters are stored 88 | :return: 89 | """ 90 | stored_param = np.load(file_name) 91 | alpha_ks = stored_param['alpha_ks'] 92 | phi_ks = stored_param['phi_ks'] 93 | time_stamp = stored_param['time_stamp'].tostring() 94 | return alpha_ks, phi_ks, time_stamp 95 | 96 | 97 | def load_mic_array_param(file_name): 98 | """ 99 | load stored microphone array parameters 100 | :param file_name: file that stored these parameters 101 | :return: 102 | """ 103 | stored_param = np.load(file_name) 104 | pos_mic_x = stored_param['pos_mic_x'] 105 | pos_mic_y = stored_param['pos_mic_y'] 106 | layout_time_stamp = stored_param['layout_time_stamp'].tostring() 107 | return pos_mic_x, pos_mic_y, layout_time_stamp 108 | --------------------------------------------------------------------------------