├── Python ├── __init__.py └── src │ ├── __init__.py │ ├── allpass.py │ ├── hrtf_widener.py │ ├── notebooks │ ├── transient_response.ipynb │ ├── stereo_widener.ipynb │ └── decorrelation_comparison.ipynb │ ├── onset_detector.py │ ├── plot.py │ ├── widener.py │ ├── interaural_cues.py │ └── utils.py ├── .gitignore ├── Source ├── BiquadCascade.h ├── BiquadFilter.h ├── Panner.cpp ├── DelayLine.cpp ├── Panner.h ├── LinkwitzCrossover.h ├── ButterworthFilter.h ├── VelvetNoise.h ├── BiquadCascade.cpp ├── OnsetDetector.h ├── DelayLine.h ├── AllpassBiquadCascade.h ├── BiquadFilter.cpp ├── TransientHandler.h ├── LeakyIntegrator.h ├── PluginEditor.h ├── ButterworthFilter.cpp ├── AllpassBiquadCascade.cpp ├── OnsetDetector.cpp ├── TransientHandler.cpp ├── VelvetNoise.cpp ├── LinkwitzCrossover.cpp ├── PluginProcessor.h ├── PluginEditor.cpp └── PluginProcessor.cpp ├── README.md ├── StereoWidener.jucer ├── LICENSE └── Resources └── opt_vn_filters.txt /Python/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /Python/src/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Prerequisites 2 | *.d 3 | 4 | # Compiled Object files 5 | *.slo 6 | *.lo 7 | *.o 8 | *.obj 9 | 10 | # Precompiled Headers 11 | *.gch 12 | *.pch 13 | 14 | # Compiled Dynamic libraries 15 | *.so 16 | *.dylib 17 | *.dll 18 | 19 | # Fortran module files 20 | *.mod 21 | *.smod 22 | 23 | # Compiled Static libraries 24 | *.lai 25 | *.la 26 | *.a 27 | *.lib 28 | 29 | # Executables 30 | *.exe 31 | *.out 32 | *.app 33 | -------------------------------------------------------------------------------- /Source/BiquadCascade.h: -------------------------------------------------------------------------------- 1 | /* 2 | ============================================================================== 3 | 4 | BiquadCascade.h 5 | Created: 2 Oct 2023 7:59:25pm 6 | Author: Orchisama Das 7 | 8 | ============================================================================== 9 | */ 10 | 11 | #pragma once 12 | #include "JuceHeader.h" 13 | #include "BiquadFilter.h" 14 | #include 15 | 16 | class BiquadCascade{ 17 | 18 | public: 19 | BiquadCascade(); 20 | ~BiquadCascade(); 21 | 22 | void initialize(int numBq, float sR, float** b, float** a); 23 | void update(float** b_new, float** a_new); 24 | float process(const float input); 25 | 26 | 27 | private: 28 | int numBiquads; 29 | float sampleRate; 30 | BiquadFilter* biquads; 31 | }; 32 | -------------------------------------------------------------------------------- /Source/BiquadFilter.h: -------------------------------------------------------------------------------- 1 | /* 2 | ============================================================================== 3 | 4 | BiquadFilter.h 5 | Created: 2 Oct 2023 7:40:33pm 6 | Author: Orchisama Das 7 | 8 | ============================================================================== 9 | */ 10 | 11 | #pragma once 12 | #include "JuceHeader.h" 13 | #include 14 | 15 | 16 | class BiquadFilter{ 17 | //allpass biquad filter parameterised by pole radius and angle 18 | public: 19 | BiquadFilter(); 20 | ~BiquadFilter(); 21 | 22 | void initialize(float b0, float b1, float b2, float a0, float a1); 23 | void update(float b0, float b1, float b2, float a0, float a1); 24 | float process(const float input); 25 | 26 | 27 | private: 28 | const int order = 2; 29 | float* a; 30 | float* b; 31 | float* prevInput; 32 | float* prevOutput; 33 | 34 | }; 35 | -------------------------------------------------------------------------------- /Source/Panner.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | ============================================================================== 3 | 4 | Panner.cpp 5 | Created: 1 Jun 2023 12:00:14pm 6 | Author: Orchisama Das 7 | 8 | ============================================================================== 9 | */ 10 | 11 | #include "Panner.h" 12 | 13 | Panner::Panner(){} 14 | Panner::~Panner(){ 15 | delete [] output; 16 | } 17 | 18 | 19 | void Panner::initialize(){ 20 | angle = 0.f; 21 | width = 0.f; 22 | output = new float[numChans]; 23 | for (int i =0; i < numChans; i++){ 24 | output[i] = 0.0f; 25 | } 26 | } 27 | 28 | float Panner::process(const float* input){ 29 | output[0] = std::sin(angle) * input[0]; 30 | output[1] = std::cos(angle) * input[1]; 31 | return output[0] + output[1]; 32 | } 33 | 34 | void Panner::updateWidth(float newWidth){ 35 | width = newWidth; 36 | angle = (float) juce::jmap (width, 0.f, 1.0f, 0.f, PI/2.0f); 37 | } 38 | 39 | 40 | -------------------------------------------------------------------------------- /Source/DelayLine.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | ============================================================================== 3 | 4 | DelayLine.cpp 5 | Created: 3 Jul 2021 9:27:30pm 6 | Author: Orchisama Das 7 | 8 | ============================================================================== 9 | */ 10 | 11 | #include "DelayLine.h" 12 | 13 | 14 | DelayLine::DelayLine(){} 15 | DelayLine::~DelayLine(){} 16 | 17 | float DelayLine::velvetConvolver(int* taps, float* gains, int len){ 18 | float output = 0.0f; 19 | for (int i = 0; i < len; i++){ 20 | int indexAt = readPtr + taps[i]; 21 | if (indexAt >= maxDelay) 22 | indexAt -= maxDelay; 23 | output += gains[i] * delayBuffer[indexAt]; 24 | } 25 | return output; 26 | } 27 | 28 | void DelayLine::prepare(const int L, const float sampleRate){ 29 | 30 | length = L; //length of delay line in samples 31 | //initialize delay lines to prevent garbage memory values 32 | for (int i = 0; i < maxDelay; i++) 33 | delayBuffer[i] = 0.0f; 34 | } 35 | 36 | 37 | -------------------------------------------------------------------------------- /Source/Panner.h: -------------------------------------------------------------------------------- 1 | /* 2 | ============================================================================== 3 | 4 | Panner.h 5 | Created: 1 Jun 2023 12:00:14pm 6 | Author: Orchisama Das 7 | 8 | ============================================================================== 9 | */ 10 | 11 | #pragma once 12 | #include "JuceHeader.h" 13 | class Panner{ 14 | public: 15 | Panner(); 16 | ~Panner(); 17 | 18 | void initialize(); 19 | float process(const float* input); 20 | void updateWidth(float newWidth); 21 | 22 | 23 | 24 | private: 25 | const float PI = std::acos(-1); //PI 26 | const int numChans = 2; //number of channels panned 27 | float angle; //a value between 0 and pi/2 rad that determines left and right gain weightings 28 | float width; //determines stereo width (0 - original width, 1 - max width) 29 | float* output; //2 channel output 30 | bool isAmpPreserveFlag = false; //amplitude or energetic calculations 31 | 32 | }; 33 | -------------------------------------------------------------------------------- /Source/LinkwitzCrossover.h: -------------------------------------------------------------------------------- 1 | /* 2 | ============================================================================== 3 | 4 | LinkwitzCrossover.h 5 | Created: 1 Jun 2023 6:18:50pm 6 | Author: Orchisama Das 7 | 8 | ============================================================================== 9 | */ 10 | 11 | #pragma once 12 | #include "JuceHeader.h" 13 | #include "DelayLine.h" 14 | 15 | class LinkwitzCrossover{ 16 | /* 4th order Linkwitz Riley crossover filters */ 17 | public: 18 | LinkwitzCrossover(); 19 | ~LinkwitzCrossover(); 20 | 21 | void initialize(float sR, std::string type); 22 | float process(const float input); 23 | void update(float newCutoffFreq); 24 | void setCoefficients(); 25 | 26 | 27 | private: 28 | const float PI = std::acos(-1); 29 | const int order = 2; //4th order IIR filter 30 | float sampleRate; //sample rate in Hz 31 | float cutoff = 500; //cutoff frequency in Hz 32 | float* numCoeffs; 33 | float* denCoeffs; 34 | float* prevInput; 35 | float* prevOutput; 36 | bool lowpass; //if filter is lowpass or highpass 37 | 38 | }; 39 | -------------------------------------------------------------------------------- /Source/ButterworthFilter.h: -------------------------------------------------------------------------------- 1 | /* 2 | ============================================================================== 3 | 4 | ButterworthFilter.h 5 | Created: 2 Oct 2023 8:29:46pm 6 | Author: Orchisama Das 7 | 8 | ============================================================================== 9 | */ 10 | 11 | #pragma once 12 | #include "BiquadCascade.h" 13 | 14 | class ButterworthFilter{ 15 | //Butterworth filter of 16th order 16 | public: 17 | ButterworthFilter(); 18 | ~ButterworthFilter(); 19 | 20 | void initialize(float sR, float prewarp_frequency, std::string filter_type); 21 | void update(float newCutoffFreq); 22 | void setCoefficients(); 23 | float process(const float input); 24 | 25 | 26 | private: 27 | const float PI = std::acos(-1); 28 | const int order = 8; //filter order 29 | bool lowpass; //type of filter, 'lowpass' or 'highpass' 30 | float numBiquads; 31 | float sample_rate; 32 | float cutoff_frequency = 500; //cutoff frequency in Hz 33 | float bilinear_warp_factor; 34 | BiquadCascade biquadCascade; //biquad cascade object 35 | float** a; float** b; //filter coefficient arrays 36 | }; 37 | -------------------------------------------------------------------------------- /Source/VelvetNoise.h: -------------------------------------------------------------------------------- 1 | /* 2 | ============================================================================== 3 | 4 | VelvetNoise.h 5 | Created: 6 May 2023 3:52:01pm 6 | Author: Orchisama Das 7 | 8 | ============================================================================== 9 | */ 10 | 11 | #pragma once 12 | #include "JuceHeader.h" 13 | #include "DelayLine.h" 14 | #include 15 | 16 | 17 | class VelvetNoise{ 18 | public: 19 | VelvetNoise(); 20 | ~VelvetNoise(); 21 | 22 | void initialize(float sR, float L, int gS, float targetDecaydB, bool logDistribution); 23 | void initialize_from_string(juce::String opt_vn_filter); 24 | float process(const float input); 25 | void update(int newGridSize); 26 | void setImpulseLocationValues(); 27 | float convertdBtoDecayRate(); 28 | 29 | 30 | private: 31 | int length; //total length of delay line (in samples) 32 | int seqLength; //length of impulse sequence 33 | int gridSize; //density of impulses 34 | int* impulsePositions; //positions of impulses in sequence 35 | float* impulseValues; //value at impulse positions 36 | float decaydB; //decay in dB of the sequence 37 | float sampleRate; //sampling rate in Hz 38 | bool logDistribution; // are the impulses concentrated at the start? 39 | DelayLine delayLine; //Delay line to do convolution with velvet sequence 40 | 41 | }; 42 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # StereoWidener 2 | Plugin to do stereo widening with decorrelation. 3 | 4 | Stereo widening widens the perception of the stereo image by boosting the surrounds and lowering the mid/side ratio. This plugin has controls for the stereo width in lower and higher frequencies, with a controllable cutoff frequency. 5 | The filterbank consists of a lowpass and highpass in cascade, which can be energy preserving (Butterworth, default), or amplitude preserving (Linkwitz-Riley). Two options for decorrelation are available -- efficient convolution with velvet noise filters (default), or a cascade of allpass filters with randomised phases. A transient handling block ensures that transients are not smeared by the decorrelating filters. 6 | 7 |

8 | Screen Shot 2025-05-10 at 12 22 00 PM 9 |

10 | 11 | ### For MacOS users 12 | Use the installer included with the release. The next time you restart your DAW the plugin should show up under the developer name **orchi**. 13 | 14 | ### Theory 15 | The details of this plugin are outlined in the paper An open source stereo widening plugin - Orchisama Das in Proc. of International Conference on Digital Audio Effects, DAFx 2024. 16 | -------------------------------------------------------------------------------- /Source/BiquadCascade.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | ============================================================================== 3 | 4 | BiquadCascade.cpp 5 | Created: 2 Oct 2023 7:59:25pm 6 | Author: Orchisama Das 7 | 8 | ============================================================================== 9 | */ 10 | 11 | #include "BiquadCascade.h" 12 | BiquadCascade::BiquadCascade(){} 13 | BiquadCascade::~BiquadCascade(){ 14 | delete [] biquads; 15 | } 16 | 17 | void BiquadCascade::initialize(int numBq, float sR, float** b, float** a){ 18 | sampleRate = sR; 19 | numBiquads = numBq; 20 | biquads = new BiquadFilter[numBiquads]; 21 | 22 | for(int i = 0; i < numBiquads; i++){ 23 | float a0 = a[i][0]; 24 | float a1 = a[i][1]; 25 | float b0 = b[i][0]; 26 | float b1 = b[i][1]; 27 | float b2 = b[i][2]; 28 | biquads[i].initialize(b0, b1, b2, a0, a1); 29 | } 30 | } 31 | 32 | void BiquadCascade::update(float** b_new, float** a_new){ 33 | for(int i = 0; i < numBiquads; i++){ 34 | float a0 = a_new[i][0]; 35 | float a1 = a_new[i][1]; 36 | float b0 = b_new[i][0]; 37 | float b1 = b_new[i][1]; 38 | float b2 = b_new[i][2]; 39 | biquads[i].update(b0, b1, b2, a0, a1); 40 | } 41 | } 42 | 43 | 44 | float BiquadCascade::process(const float input){ 45 | float curOutput = 0.0f; 46 | float curInput = input; 47 | for(int i = 0; i < numBiquads; i++){ 48 | curOutput = biquads[i].process(curInput); 49 | curInput = curOutput; 50 | } 51 | return curOutput; 52 | } 53 | -------------------------------------------------------------------------------- /Source/OnsetDetector.h: -------------------------------------------------------------------------------- 1 | /* 2 | ============================================================================== 3 | 4 | OnsetDetector.h 5 | Created: 15 May 2024 11:16:46am 6 | Author: Orchisama Das 7 | 8 | ============================================================================== 9 | */ 10 | 11 | #pragma once 12 | #include "LeakyIntegrator.h" 13 | 14 | class OnsetDetector{ 15 | public: 16 | //public variables 17 | bool onset_flag = false; 18 | bool offset_flag = false; 19 | 20 | //public methods 21 | OnsetDetector(); 22 | ~OnsetDetector(); 23 | void prepare(int bufferSize, float sampleRate); 24 | void process(float* input_buffer); 25 | inline bool check_local_peak(); 26 | inline bool check_direction(bool is_rising); 27 | inline bool check_onset(float cur_threshold, bool check_offset); 28 | float* get_signal_envelope(float* input_buffer); 29 | 30 | private: 31 | int buffer_size; 32 | float sample_rate; 33 | LeakyIntegrator leaky; 34 | enum{ 35 | attack_time_ms = 5, 36 | release_time_ms = 50, 37 | }; 38 | float threshold; //dynamic threshold for onset calculation 39 | float running_mean_env; //running mean of the signal envelope 40 | unsigned long num_samps = 0; //keeps track of number of samples in input signal 41 | float second_last_samp = 0.0; //last 3 samples of the signal envelope 42 | float last_samp = 0.0; 43 | float cur_samp = 0.0; 44 | float forget_factor = 0.0; //forget factor for threshold calculation 45 | 46 | }; 47 | -------------------------------------------------------------------------------- /Source/DelayLine.h: -------------------------------------------------------------------------------- 1 | /* 2 | ============================================================================== 3 | 4 | DelayLine.h 5 | Created: 3 Jul 2021 9:27:30pm 6 | Author: Orchisama Das 7 | 8 | ============================================================================== 9 | */ 10 | 11 | #pragma once 12 | #include "JuceHeader.h" 13 | 14 | class DelayLine{ 15 | public: 16 | DelayLine(); 17 | ~DelayLine(); 18 | 19 | //function to set delay line length 20 | void prepare(const int L, const float sampleRate); 21 | 22 | //read from pointer 23 | inline float read() const noexcept { 24 | return delayBuffer[readPtr]; 25 | } 26 | 27 | /*velvet noise convolver with tapped delay line. 28 | position of samples specified by array called taps. 29 | gains is the array of the multipliers 30 | len is the length of the array taps */ 31 | float velvetConvolver(int* taps, float* gains, int len); 32 | 33 | //write a pointer 34 | inline void write(const float input) { 35 | 36 | delayBuffer[writePtr] = input; 37 | } 38 | 39 | //update pointers 40 | inline void update() { 41 | --writePtr; 42 | 43 | if (writePtr < 0) // wrap write pointer 44 | writePtr = maxDelay - 1; 45 | 46 | readPtr = writePtr + length; 47 | if (readPtr >= maxDelay) // wrap read pointer 48 | readPtr -= maxDelay; 49 | } 50 | 51 | private: 52 | enum 53 | { 54 | maxDelay = 32*8192, 55 | }; 56 | float delayBuffer[maxDelay]; 57 | int readPtr = 0, writePtr = 0, length; 58 | }; 59 | -------------------------------------------------------------------------------- /Source/AllpassBiquadCascade.h: -------------------------------------------------------------------------------- 1 | /* 2 | ============================================================================== 3 | 4 | BiquadCascade.h 5 | Created: 2 Jun 2023 9:18:50pm 6 | Author: Orchisama Das 7 | 8 | ============================================================================== 9 | */ 10 | 11 | #pragma once 12 | #include "JuceHeader.h" 13 | #include "BiquadFilter.h" 14 | #include 15 | #include 16 | 17 | class AllpassBiquad : public BiquadFilter{ 18 | //allpass biquad filter parameterised by pole radius and angle 19 | public: 20 | AllpassBiquad(); 21 | ~AllpassBiquad(); 22 | 23 | void initialize(float pole_radii, float pole_angle); 24 | float process(const float input); 25 | 26 | 27 | private: 28 | const float PI = std::acos(-1); //PI 29 | const int order = 2; 30 | std::complex I; //Imaginary number i 31 | 32 | }; 33 | 34 | //----------------------------------------------------------------- 35 | 36 | class AllpassBiquadCascade{ 37 | //allpass biquad filter cascade 38 | public: 39 | AllpassBiquadCascade(); 40 | ~AllpassBiquadCascade(); 41 | 42 | void initialize(int numBq, float sR, float maxGroupDelayMs); 43 | float warpPoleAngle(float pole_angle); 44 | float process(const float input); 45 | 46 | 47 | private: 48 | int numBiquads; 49 | float sampleRate; 50 | float warpFactor; //for ERB warping of pole angles 51 | const float PI = std::acos(-1); 52 | std::complex I; //Imaginary number i 53 | AllpassBiquad* biquads; 54 | }; 55 | -------------------------------------------------------------------------------- /Source/BiquadFilter.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | ============================================================================== 3 | 4 | BiquadFilter.cpp 5 | Created: 2 Oct 2023 7:40:33pm 6 | Author: Orchisama Das 7 | 8 | ============================================================================== 9 | */ 10 | 11 | #include "BiquadFilter.h" 12 | 13 | BiquadFilter::BiquadFilter(){} 14 | BiquadFilter::~BiquadFilter(){ 15 | delete [] a; 16 | delete [] b; 17 | delete [] prevInput; 18 | delete [] prevOutput; 19 | } 20 | 21 | 22 | void BiquadFilter::initialize(float b0, float b1, float b2, float a0, float a1){ 23 | a = new float[order]; 24 | b = new float[order + 1]; 25 | prevInput = new float[order]; 26 | prevOutput = new float[order]; 27 | for (int i = 0; i < order; i ++){ 28 | prevInput[i] = 0.0f; 29 | prevOutput[i] = 0.0f; 30 | } 31 | a[0] = a0; a[1] = a1; 32 | b[0] = b0; b[1] = b1; b[2] = b2; 33 | } 34 | 35 | void BiquadFilter::update(float b0, float b1, float b2, float a0, float a1){ 36 | a[0] = a0; a[1] = a1; 37 | b[0] = b0; b[1] = b1; b[2] = b2; 38 | } 39 | 40 | float BiquadFilter::process(const float input){ 41 | float output = b[0] * input; 42 | for (int i = 0; i < order; i++){ 43 | output += (b[i+1] * prevInput[i]) - (a[i] * prevOutput[i]); 44 | } 45 | 46 | //update previous input and output buffer - right shift by 1 47 | for(int i = order; i > 0; i--){ 48 | prevOutput[i] = prevOutput[i-1]; 49 | prevInput[i] = prevInput[i-1]; 50 | } 51 | prevInput[0] = input; 52 | prevOutput[0] = output; 53 | //std::cout << "Filter INPUT :" << input << ", Filter OUTPUT :" << output << std::endl; 54 | return output; 55 | } 56 | 57 | 58 | 59 | 60 | -------------------------------------------------------------------------------- /Source/TransientHandler.h: -------------------------------------------------------------------------------- 1 | /* 2 | ============================================================================== 3 | 4 | TransientHandler.h 5 | Created: 15 May 2024 11:17:37am 6 | Author: Orchisama Das 7 | 8 | ============================================================================== 9 | */ 10 | 11 | #pragma once 12 | #include "JuceHeader.h" 13 | #include "OnsetDetector.h" 14 | 15 | class TransientHandler{ 16 | public: 17 | TransientHandler(); 18 | ~TransientHandler(); 19 | 20 | inline int ms_to_frames(float time_ms){ 21 | return int(std::ceil(time_ms * 1e-3 * sample_rate / buffer_size)); 22 | } 23 | void prepare_xfade_windows(); 24 | void prepare(int bufferSize, float sampleRate); 25 | void apply_xfade(float* input1, float* input2); 26 | float* copy_buffer(float* input, float* output); 27 | float* process(float* input_buffer, float* widener_output_buffer); 28 | 29 | private: 30 | const float PI = std::acos(-1); 31 | int buffer_size; 32 | float sample_rate; 33 | //cross-fading parameters when onset is detected 34 | float* xfade_in_win; 35 | float* xfade_out_win; 36 | float* xfade_buffer; 37 | float* output_buffer; 38 | 39 | bool prev_onset_flag = false; //was there an onset previously? 40 | //onset detector object 41 | OnsetDetector onset; 42 | // 43 | int hold_counter; //if an onset is detected, the flag will be true for a 44 | //minimum number of frames to prevent false offset detection 45 | int min_frames_hold; 46 | int inhibit_counter; // if an offset is detected, we will wait a 47 | // minimum number of frames to prevent false onset detection 48 | int min_frames_inhibit; 49 | //calculated with a buffer size of 256 samples 50 | enum{ 51 | min_ms_hold = 80, 52 | min_ms_inhibit = 50, 53 | }; 54 | }; 55 | -------------------------------------------------------------------------------- /Source/LeakyIntegrator.h: -------------------------------------------------------------------------------- 1 | /* 2 | ============================================================================== 3 | 4 | LeakyIntegrator.h 5 | Created: 15 May 2024 11:17:24am 6 | Author: Orchisama Das 7 | 8 | ============================================================================== 9 | */ 10 | 11 | #pragma once 12 | #include "JuceHeader.h" 13 | 14 | class LeakyIntegrator{ 15 | public: 16 | LeakyIntegrator(){}; 17 | ~LeakyIntegrator(){ 18 | delete [] signal_env; 19 | }; 20 | 21 | inline float ms_to_samps(float time_ms){ 22 | return (time_ms * 1e-3 * sample_rate); 23 | } 24 | 25 | void prepare(int bufferSize, float sampleRate, float attack_time_ms, float release_time_ms){ 26 | buffer_size = bufferSize; 27 | sample_rate = sampleRate; 28 | tau_attack = ms_to_samps(attack_time_ms); 29 | tau_release = ms_to_samps(release_time_ms); 30 | signal_env = new float[buffer_size]; 31 | for(int i=0; i < buffer_size; i++){ 32 | signal_env[i] = 0.0f; 33 | } 34 | } 35 | 36 | //signal envelope calculation with a leaky integrator 37 | float* process(float* input_buffer){ 38 | float prev_env_samp; 39 | for (int i = 0; i< buffer_size; i++){ 40 | if (i == 0) 41 | prev_env_samp = signal_env[buffer_size-1]; 42 | else 43 | prev_env_samp = signal_env[i-1]; 44 | if (input_buffer[i] > prev_env_samp){ 45 | signal_env[i] = prev_env_samp + (1-std::exp(-1.0/tau_attack)) * (std::abs(input_buffer[i]) - prev_env_samp); 46 | } 47 | else{ 48 | signal_env[i] = prev_env_samp + (1-std::exp(-1.0/tau_release)) * (std::abs(input_buffer[i]) - prev_env_samp); 49 | } 50 | } 51 | return signal_env; 52 | } 53 | 54 | private: 55 | float sample_rate; 56 | int buffer_size; 57 | float tau_attack; 58 | float tau_release; 59 | float* signal_env; 60 | }; 61 | -------------------------------------------------------------------------------- /Source/PluginEditor.h: -------------------------------------------------------------------------------- 1 | /* 2 | ============================================================================== 3 | 4 | This file contains the basic framework code for a JUCE plugin editor. 5 | 6 | ============================================================================== 7 | */ 8 | 9 | #pragma once 10 | 11 | #include 12 | #include "PluginProcessor.h" 13 | 14 | //============================================================================== 15 | /** 16 | */ 17 | class StereoWidenerAudioProcessorEditor : public juce::AudioProcessorEditor 18 | { 19 | public: 20 | StereoWidenerAudioProcessorEditor (StereoWidenerAudioProcessor&, juce::AudioProcessorValueTreeState&); 21 | ~StereoWidenerAudioProcessorEditor() override; 22 | 23 | //============================================================================== 24 | void paint (juce::Graphics&) override; 25 | void resized() override; 26 | 27 | private: 28 | // This reference is provided as a quick way for your editor to 29 | // access the processor object that created it. 30 | StereoWidenerAudioProcessor& audioProcessor; 31 | juce::AudioProcessorValueTreeState& valueTreeState; 32 | juce::Label widthLowerLabel; 33 | juce::Slider widthLowerSlider; 34 | juce::Label widthHigherLabel; 35 | juce::Slider widthHigherSlider; 36 | juce::Slider cutoffFrequencySlider; 37 | juce::Label cutoffFrequencyLabel; 38 | juce::ToggleButton isAmpPreserve; 39 | juce::Label isAmpPreserveLabel; 40 | juce::ToggleButton hasAllpassDecorrelation; 41 | juce::Label hasAllpassDecorrelationLabel; 42 | juce::ToggleButton handleTransients; 43 | juce::Label handleTransientsLabel; 44 | 45 | std::unique_ptr widthLowerAttach; 46 | std::unique_ptr widthHigherAttach; 47 | std::unique_ptr cutoffFrequencyAttach; 48 | std::unique_ptr isAmpPreserveAttach, hasAllpassDecorrelationAttach, handleTransientsAttach; 49 | 50 | 51 | JUCE_DECLARE_NON_COPYABLE_WITH_LEAK_DETECTOR (StereoWidenerAudioProcessorEditor) 52 | }; 53 | -------------------------------------------------------------------------------- /Source/ButterworthFilter.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | ============================================================================== 3 | 4 | ButterworthFilter.cpp 5 | Created: 2 Oct 2023 8:29:46pm 6 | Author: Orchisama Das 7 | 8 | ============================================================================== 9 | */ 10 | 11 | #include "ButterworthFilter.h" 12 | ButterworthFilter::ButterworthFilter(){} 13 | ButterworthFilter::~ButterworthFilter(){ 14 | for(int i = 0; i < numBiquads; i++){ 15 | delete [] a[i]; 16 | delete [] b[i]; 17 | } 18 | delete [] a; 19 | delete [] b; 20 | } 21 | 22 | void ButterworthFilter::initialize(float sR, float prewarp_frequency, std::string filter_type){ 23 | numBiquads = order / 2; 24 | sample_rate = sR; 25 | lowpass = (filter_type == "lowpass")? true : false; 26 | bilinear_warp_factor = 2 * PI * prewarp_frequency / std::tan(PI * prewarp_frequency/sample_rate); 27 | a = new float* [numBiquads]; 28 | b = new float* [numBiquads]; 29 | for(int i = 0; i < numBiquads; i++){ 30 | a[i] = new float [2]; 31 | b[i] = new float [3]; 32 | } 33 | setCoefficients(); 34 | biquadCascade.initialize(numBiquads, sample_rate, b, a); 35 | } 36 | 37 | void ButterworthFilter::update(float newCutoffFreq){ 38 | cutoff_frequency = newCutoffFreq; 39 | setCoefficients(); 40 | biquadCascade.update(b, a); 41 | } 42 | 43 | 44 | void ButterworthFilter::setCoefficients(){ 45 | float w_c = 2 * PI * cutoff_frequency; 46 | float frac = bilinear_warp_factor / w_c; 47 | if (!lowpass) 48 | frac = 1.0 / frac; 49 | float cos_term = 0.0f, denominator = 0.0f; 50 | 51 | for (int k = 0; k < numBiquads; k++){ 52 | cos_term = std::cos(PI* (2*(k+1) + order - 1) / (2 * order)); 53 | denominator = std::pow(frac, 2) - 2 * frac * cos_term + 1; 54 | if (lowpass) 55 | a[k][0] = -2.0 * (std::pow(frac, 2) - 1)/ denominator; 56 | else 57 | a[k][0] = 2.0 * (std::pow(frac, 2) - 1) / denominator; 58 | a[k][1] = (std::pow(frac, 2) + 2 * frac * cos_term + 1) / denominator; 59 | 60 | //normalise the numerator coefficients 61 | b[k][0] = 1.0 / denominator; 62 | b[k][1] = lowpass == true? 2.0 / denominator: -2.0 / denominator; 63 | b[k][2] = 1.0 / denominator; 64 | 65 | //std::cout << "Butterworth coeffs for biquad #" << k+1 << ": " << a[k][0] << ", " << a[k][1] << std::endl; 66 | 67 | } 68 | } 69 | 70 | float ButterworthFilter::process(const float input){ 71 | float output = biquadCascade.process(input); 72 | //std::cout << "Input: " << input << ", Butterworth cascade output: " << output << std::endl; 73 | return output; 74 | } 75 | -------------------------------------------------------------------------------- /Source/AllpassBiquadCascade.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | ============================================================================== 3 | 4 | BiquadCascade.cpp 5 | Created: 2 Jun 2023 9:18:50pm 6 | Author: Orchisama Das 7 | 8 | ============================================================================== 9 | */ 10 | 11 | #include "AllpassBiquadCascade.h" 12 | 13 | AllpassBiquad::AllpassBiquad(){} 14 | AllpassBiquad::~AllpassBiquad(){} 15 | 16 | 17 | void AllpassBiquad::initialize(float pole_radii, float pole_angle){ 18 | I.real(0); I.imag(1); // complex number 0 + 1i 19 | std::complex pole = pole_radii * std::exp(I * pole_angle); 20 | float a0 = -2 * std::real(pole); 21 | float a1 = std::pow(std::abs(pole),2); 22 | float b0 = a1; 23 | float b1 = a0; 24 | float b2 = 1.0; 25 | BiquadFilter::initialize(b0, b1, b2, a0, a1); 26 | } 27 | 28 | float AllpassBiquad::process(const float input){ 29 | return BiquadFilter::process(input); 30 | } 31 | 32 | //------------------------------------------------------------------------------ 33 | 34 | AllpassBiquadCascade::AllpassBiquadCascade(){} 35 | AllpassBiquadCascade::~AllpassBiquadCascade(){ 36 | delete [] biquads; 37 | } 38 | 39 | float AllpassBiquadCascade::warpPoleAngle(float pole_angle){ 40 | std::complex pole_warped = std::exp(I * pole_angle); 41 | std::complex lambdam = std::log((warpFactor + pole_warped) / (1.0f + warpFactor * pole_warped)); 42 | return std::imag(lambdam); 43 | } 44 | 45 | 46 | void AllpassBiquadCascade::initialize(int numBq, float sR, float maxGroupDelayMs){ 47 | I.real(0); I.imag(1); // complex number 0 + 1i 48 | sampleRate = sR; 49 | numBiquads = numBq; 50 | biquads = new AllpassBiquad[numBiquads]; 51 | float maxGrpDel = (1.0 - (maxGroupDelayMs * 1e-3)) / (1.0 + (maxGroupDelayMs * 1e-3)); 52 | 53 | warpFactor = 0.7464 * std::sqrt(2.0 / PI * std::atan(0.1418 * sampleRate)) + 0.03237; 54 | 55 | //generate random pole radii and pole angle 56 | std::default_random_engine generator; 57 | //randomly diistributed between 0.5 and beta 58 | std::uniform_real_distribution distribution_radii(0.5, maxGrpDel); 59 | //randomly distriibuted between 0 and 2PI in ERB scale 60 | std::uniform_real_distribution distribution_angle(0, 2*PI); 61 | 62 | for(int i = 0; i < numBiquads; i++){ 63 | float radius = distribution_radii(generator); 64 | float angle = warpPoleAngle(distribution_angle(generator)); 65 | biquads[i].initialize(radius, angle); 66 | } 67 | } 68 | 69 | 70 | float AllpassBiquadCascade::process(const float input){ 71 | float curOutput = 0.0f; 72 | float curInput = input; 73 | for(int i = 0; i < numBiquads; i++){ 74 | curOutput = biquads[i].process(curInput); 75 | curInput = curOutput; 76 | } 77 | //std::cout << "Cascade INPUT :" << input << ", Cascade OUTPUT :" << curOutput << std::endl; 78 | return curOutput; 79 | } 80 | -------------------------------------------------------------------------------- /Python/src/allpass.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import numpy.typing as npt 3 | from typing import Union 4 | from scipy.signal import sosfilt 5 | from utils import ms_to_samps 6 | 7 | 8 | def warp_pole_angle(rho: float, pole_freq: Union[float, 9 | np.ndarray]) -> npt.NDArray: 10 | """ 11 | Warp pole angles according to warping factor rho 12 | Args: 13 | rho (float): warping factor, 0 < rho < 1 will zoom in on lower frequencies. 14 | pole_freq (float, np.ndarray): pole frequencies in radians/sec 15 | Returns: 16 | np.ndarray: the pole warped angles 17 | """ 18 | poles_warped = np.exp(1j * pole_freq) 19 | lambdam = np.log((rho + poles_warped) / (1 + rho * poles_warped)) 20 | return np.imag(lambdam) 21 | 22 | 23 | def decorrelate_allpass_filters(fs: float, 24 | nbiquads: int = 250, 25 | max_grp_del_ms: float = 30.): 26 | """ 27 | Return cascaded allpass SOS sections with randomised phase to perform signal decorrelation 28 | Args: 29 | fs (float): sample rate in Hz 30 | nbiquds (int): number of AP biquad sections 31 | max_grp_del_ms (float): maximum group delay in each frequency band 32 | Returns: 33 | np.ndarray: 6 x num_biquads AP filter coefficients 34 | """ 35 | 36 | max_grp_del = (1.0 - max_grp_del_ms * 1e-3) / (1 + max_grp_del_ms * 1e-3) 37 | # each pole radius should give max group delay of 30ms 38 | ap_rad = np.random.uniform(high=max_grp_del, low=0.5, size=nbiquads) 39 | # uniformly distributed pole frequencies 40 | ap_pole_freq = np.random.uniform(low=0, high=2 * np.pi, size=nbiquads) 41 | 42 | # warp pole angles to ERB filterbank 43 | warp_factor = 0.7464 * np.sqrt( 44 | 2.0 / np.pi * np.arctan(0.1418 * fs)) + 0.03237 45 | ap_pole_freq_warped = warp_pole_angle(warp_factor, ap_pole_freq) 46 | 47 | # allpass filter biquad cascade 48 | poles = ap_rad * np.exp(1j * ap_pole_freq_warped) 49 | sos_sec = np.zeros((nbiquads, 6)) 50 | # numerator coefficients 51 | sos_sec[:, 0] = np.abs(poles)**2 52 | sos_sec[:, 1] = -2 * np.real(poles) 53 | sos_sec[:, 2] = np.ones(nbiquads) 54 | # denominator coefficients 55 | sos_sec[:, 3] = np.ones(nbiquads) 56 | sos_sec[:, 4] = -2 * np.real(poles) 57 | sos_sec[:, 5] = np.abs(poles)**2 58 | 59 | return sos_sec 60 | 61 | 62 | def get_allpass_impulse_response(sos_section: np.ndarray, fs: float, 63 | signal_length_ms: float): 64 | """Create an impulse response from the sos matrix using a 65 | cascade of biquad filters""" 66 | signal_length_samps = ms_to_samps(signal_length_ms, fs) 67 | impulse = np.zeros((1, signal_length_samps), dtype=float) 68 | impulse[0] = 1.0 69 | impulse_response = sosfilt(sos_section, impulse, zi=None) 70 | return np.squeeze(impulse_response) 71 | 72 | 73 | def process_allpass(input_signal: np.ndarray, 74 | fs: float, 75 | num_biquads: int = 200, 76 | max_grp_del_ms: float = 30.0) -> np.ndarray: 77 | """ 78 | For an input stereo signal, pass both channels through 79 | cascade of allpass filters, and return the output 80 | """ 81 | _, num_channels = input_signal.shape 82 | if num_channels > 2: 83 | input_signal = input_signal.T 84 | num_channels = 2 85 | if num_channels != 2: 86 | raise RuntimeError("Input signal must be stereo!") 87 | 88 | output_signal = np.zeros_like(input_signal) 89 | sos_section = np.zeros((num_channels, num_biquads, 6)) 90 | 91 | for chan in range(num_channels): 92 | sos_section[chan, ...] = decorrelate_allpass_filters( 93 | fs, nbiquads=num_biquads, max_grp_del_ms=max_grp_del_ms) 94 | output_signal[:, chan] = sosfilt(sos_section[chan, ...], 95 | input_signal[:, chan], 96 | zi=None) 97 | 98 | return output_signal 99 | -------------------------------------------------------------------------------- /Source/OnsetDetector.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | ============================================================================== 3 | 4 | OnsetDetector.cpp 5 | Created: 15 May 2024 11:16:46am 6 | Author: Orchisama Das 7 | 8 | ============================================================================== 9 | */ 10 | 11 | #include "OnsetDetector.h" 12 | 13 | OnsetDetector::OnsetDetector(){} 14 | OnsetDetector::~OnsetDetector(){} 15 | 16 | 17 | void OnsetDetector::prepare(int bufferSize, float sampleRate){ 18 | buffer_size = bufferSize; 19 | sample_rate = sampleRate; 20 | //initialise leaky integrator 21 | leaky.prepare(bufferSize, sampleRate, attack_time_ms, release_time_ms); 22 | threshold = 0.0; 23 | //keeps track of the mean of the signal envelope 24 | running_mean_env = 0.0; 25 | } 26 | 27 | inline bool OnsetDetector::check_local_peak(){ 28 | if ((last_samp > second_last_samp) && (last_samp > cur_samp)) 29 | return true; 30 | else 31 | return false; 32 | } 33 | 34 | inline bool OnsetDetector::check_direction(bool is_rising){ 35 | //checks direction of signal. If is_rising is true, returns 36 | //true if direction is rising. If is_rising is false, returns 37 | //true if direction is falling. 38 | 39 | if (is_rising) 40 | return ((second_last_samp < last_samp) && (last_samp < cur_samp)); 41 | else 42 | return ((second_last_samp > last_samp) && (last_samp > cur_samp)); 43 | } 44 | 45 | float* OnsetDetector::get_signal_envelope(float* input_buffer){ 46 | return leaky.process(input_buffer); 47 | } 48 | 49 | 50 | inline bool OnsetDetector::check_onset(float cur_threshold, bool check_offset){ 51 | //checks if there is an onset or offset based on the current value of threshold 52 | //checks for offset if check_offset is true, else checks for onset 53 | if (!check_offset) 54 | return (check_direction(true) && (last_samp > cur_threshold)); 55 | else 56 | return (check_direction(false) && (last_samp < cur_threshold)); 57 | } 58 | 59 | void OnsetDetector::process(float* input_buffer){ 60 | //set flags to false initially 61 | onset_flag = false; 62 | offset_flag = false; 63 | //get signal envelope 64 | float* signal_env = this->get_signal_envelope(input_buffer); 65 | for (int i = 0; i < buffer_size; i++){ 66 | //update the values of the last 3 samples 67 | if (i == 0){ 68 | second_last_samp = last_samp; 69 | last_samp = cur_samp; 70 | cur_samp = signal_env[i]; 71 | } 72 | else if (i == 1){ 73 | second_last_samp = last_samp; 74 | last_samp = signal_env[i-1]; 75 | cur_samp = signal_env[i]; 76 | } 77 | else{ 78 | second_last_samp = signal_env[i-2]; 79 | last_samp = signal_env[i-1]; 80 | cur_samp = signal_env[i]; 81 | } 82 | 83 | //to prevent overflow 84 | num_samps = num_samps < ULONG_MAX? num_samps:0; 85 | 86 | //calculate running mean of the signal envelope 87 | float scaling = 1.0/(++num_samps); 88 | running_mean_env = signal_env[i] * scaling + (1-scaling) * running_mean_env; 89 | 90 | // if a local peak is detected, update threshold to 2xrunning_mean, else 91 | // keep the last value. Do this with an exponential smoother. 92 | forget_factor = this->check_local_peak()?0.01:0.99; 93 | threshold = (1 - forget_factor) * 4.0 * running_mean_env + (forget_factor * threshold); 94 | 95 | //if onset or offset has already been detected, continue 96 | if (onset_flag || offset_flag){ 97 | continue; 98 | } 99 | //these flags are set only once per buffer, but the 100 | //rest of the calculations are carried out sample by sample 101 | else{ 102 | onset_flag = this->check_onset(threshold, false); 103 | offset_flag = this->check_onset(threshold, true); 104 | } 105 | } 106 | } 107 | -------------------------------------------------------------------------------- /Source/TransientHandler.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | ============================================================================== 3 | 4 | TransientHandler.cpp 5 | Created: 15 May 2024 11:17:37am 6 | Author: Orchisama Das 7 | 8 | ============================================================================== 9 | */ 10 | 11 | #include "TransientHandler.h" 12 | 13 | TransientHandler::TransientHandler(){} 14 | TransientHandler::~TransientHandler(){ 15 | delete [] xfade_in_win; 16 | delete [] xfade_out_win; 17 | delete [] xfade_buffer; 18 | delete [] output_buffer; 19 | } 20 | 21 | 22 | void TransientHandler::prepare_xfade_windows(){ 23 | xfade_in_win = new float[buffer_size]; 24 | xfade_out_win = new float[buffer_size]; 25 | xfade_buffer = new float[buffer_size]; 26 | output_buffer = new float[buffer_size]; 27 | 28 | for(int i = 0; i < buffer_size; i++){ 29 | //half hann windows 30 | float phase = static_cast(i) / (buffer_size - 1); 31 | xfade_in_win[i] = 0.5f * (1.0f - std::cos(PI * phase)); 32 | xfade_out_win[i] = 1.0f - xfade_in_win[i]; 33 | xfade_buffer[i] = 0.0f; 34 | output_buffer[i] = 0.0f; 35 | } 36 | } 37 | 38 | void TransientHandler::prepare(int bufferSize, float sampleRate){ 39 | buffer_size = bufferSize; 40 | sample_rate = sampleRate; 41 | hold_counter = 0; 42 | inhibit_counter = 0; 43 | min_frames_hold = ms_to_frames(min_ms_hold); 44 | min_frames_inhibit = ms_to_frames(min_ms_inhibit); 45 | //std::cout << min_frames_hold << ", " << min_frames_inhibit << std::endl; 46 | onset.prepare(buffer_size, sample_rate); 47 | this->prepare_xfade_windows(); 48 | } 49 | 50 | 51 | float* TransientHandler::copy_buffer(float* input, float *output){ 52 | for(int i = 0;i < buffer_size;i++) 53 | output[i] = input[i]; 54 | return output; 55 | } 56 | 57 | void TransientHandler::apply_xfade(float* input1, float* input2){ 58 | //cross-fades between two inputs by applying a fade-in to input1 59 | //and fade-out to input2. 60 | for(int i = 0; i < buffer_size; i++) 61 | xfade_buffer[i] = xfade_in_win[i] * input1[i] + xfade_out_win[i]*input2[i]; 62 | } 63 | 64 | 65 | float* TransientHandler::process(float* input_buffer, float* widener_output_buffer){ 66 | //cross-fade between the input buffer and stereo widener's output buffer 67 | //when a transient is detected. 68 | //Also keep tabs on when the onset and offset flags can change with the 69 | //hold and inhibit counter 70 | onset.process(input_buffer); 71 | bool cur_onset_flag = onset.onset_flag; 72 | 73 | 74 | if (0 < hold_counter && hold_counter < min_frames_hold){ 75 | output_buffer = this->copy_buffer(input_buffer, output_buffer); 76 | hold_counter++; 77 | } 78 | 79 | else if (0 < inhibit_counter && inhibit_counter < min_frames_inhibit){ 80 | output_buffer = this->copy_buffer(widener_output_buffer, output_buffer); 81 | inhibit_counter++; 82 | } 83 | 84 | else{ 85 | if (cur_onset_flag){ 86 | //onset fade-in 87 | this->apply_xfade(input_buffer, widener_output_buffer); 88 | output_buffer = this->copy_buffer(xfade_buffer, output_buffer); 89 | inhibit_counter = 0; 90 | hold_counter = 1; 91 | //std::cout << "Onset detected" << std::endl; 92 | } 93 | else if((prev_onset_flag && onset.offset_flag) || hold_counter == min_frames_hold){ 94 | //offset fade-out, or switch from input to widener output after holding 95 | this->apply_xfade(widener_output_buffer, input_buffer); 96 | output_buffer = this->copy_buffer(xfade_buffer, output_buffer); 97 | hold_counter = 0; 98 | inhibit_counter = 1; 99 | //std::cout << "Offset detected" << std::endl; 100 | } 101 | else{ 102 | //otherwise 103 | output_buffer = this->copy_buffer(widener_output_buffer, output_buffer); 104 | hold_counter = 0; 105 | inhibit_counter = 0; 106 | } 107 | } 108 | prev_onset_flag = cur_onset_flag; 109 | return output_buffer; 110 | 111 | } 112 | -------------------------------------------------------------------------------- /Source/VelvetNoise.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | ============================================================================== 3 | 4 | VelvetNoise.cpp 5 | Created: 6 May 2023 3:52:01pm 6 | Author: Orchisama Das 7 | 8 | ============================================================================== 9 | */ 10 | 11 | #include "VelvetNoise.h" 12 | 13 | VelvetNoise::VelvetNoise(){}; 14 | VelvetNoise::~VelvetNoise(){ 15 | delete [] impulsePositions; 16 | delete [] impulseValues; 17 | } 18 | 19 | void VelvetNoise::initialize_from_string(juce::String opt_vn_filter){ 20 | //since we don't know the size of these, we make them vectors 21 | std::vector tempImpulsePositions; 22 | std::vector tempImpulseValues; 23 | std::string nextNumber; 24 | 25 | //separate all characters in string by space 26 | juce::StringArray tokens; 27 | tokens.addTokens (opt_vn_filter, " "); 28 | 29 | for (int i=0; ilogDistribution = logDistribution; 58 | } 59 | 60 | void VelvetNoise::setImpulseLocationValues(){ 61 | float impulseSpacing = sampleRate / gridSize; 62 | float impulseEnergy = 0.0; 63 | 64 | seqLength = (int)std::floor(length / impulseSpacing); 65 | impulsePositions = new int[seqLength]; 66 | impulseValues = new float[seqLength]; 67 | 68 | //create random distributions between 0, 1 69 | std::default_random_engine generator1, generator2; 70 | std::uniform_real_distribution distribution(0.0,1.0); 71 | float runningSum = 0.f; 72 | float newImpulseSpacing = 0.f; 73 | 74 | for (int i = 0; i < seqLength; i++){ 75 | float r1 = distribution(generator1); 76 | float r2 = distribution(generator2); 77 | if (logDistribution){ 78 | newImpulseSpacing = (length / 100) * std::pow(10, 2*gridSize*i/seqLength); 79 | runningSum += newImpulseSpacing; 80 | impulsePositions[i] = std::round(r2 * (newImpulseSpacing - 1) + runningSum); 81 | } 82 | else{ 83 | impulsePositions[i] = std::round(i * impulseSpacing + r2 * (impulseSpacing - 1)); 84 | } 85 | int sign = 2 * std::round(r1) - 1; 86 | impulseValues[i] = sign * std::exp(-convertdBtoDecayRate() * i); 87 | impulseEnergy += std::pow(impulseValues[i], 2); 88 | } 89 | 90 | //normalise by sequence energy 91 | for (int i = 0; i < seqLength; i++){ 92 | impulseValues[i] /= std::sqrt(impulseEnergy); 93 | //std :: cout << "Impulse location " << impulsePositions[i] << std::endl; 94 | //std :: cout << "Impulse gain " << impulseValues[i] << std::endl; 95 | } 96 | 97 | } 98 | 99 | void VelvetNoise::update(int newGridSize){ 100 | gridSize = newGridSize; 101 | setImpulseLocationValues(); 102 | } 103 | 104 | float VelvetNoise::process(const float input){ 105 | delayLine.update(); 106 | delayLine.write(input); 107 | float output = delayLine.velvetConvolver(impulsePositions, impulseValues, seqLength); 108 | //std::cout << "VN output is " << output << std::endl; 109 | return output; 110 | } 111 | 112 | float VelvetNoise::convertdBtoDecayRate(){ 113 | return -std::log(std::pow(10, -decaydB/20))/ seqLength; 114 | } 115 | 116 | -------------------------------------------------------------------------------- /Source/LinkwitzCrossover.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | ============================================================================== 3 | 4 | LinkwitzCrossover.cpp 5 | Created: 1 Jun 2023 6:18:50pm 6 | Author: Orchisama Das 7 | 8 | ============================================================================== 9 | */ 10 | 11 | #include "LinkwitzCrossover.h" 12 | 13 | LinkwitzCrossover::LinkwitzCrossover(){} 14 | LinkwitzCrossover::~LinkwitzCrossover(){ 15 | delete [] numCoeffs; 16 | delete [] denCoeffs; 17 | delete [] prevInput; 18 | delete [] prevOutput; 19 | } 20 | 21 | 22 | void LinkwitzCrossover::setCoefficients(){ 23 | if (order == 4){ 24 | //4th order filter is unstable 25 | float wc = 2*PI*cutoff; 26 | float wc2 = std::pow(wc, 2); 27 | float wc3 = std::pow(wc, 3); 28 | float wc4 = std::pow(wc, 4); 29 | 30 | float k = wc / std::tan(PI * cutoff/sampleRate); 31 | float k2 = std::pow(k, 2); 32 | float k3 = std::pow(k, 3); 33 | float k4 = std::pow(k, 4); 34 | float sq_tmp1 = std::sqrt(2) * wc3 * k; 35 | float sq_tmp2 = std::sqrt(2)* wc * k3; 36 | float a_tmp = 4 * wc2 * k2 + 2 * sq_tmp1 + k4 + 2 * sq_tmp2 + wc4; 37 | 38 | denCoeffs[0] = (4 * (wc4 + sq_tmp1 - k4 - sq_tmp2)) / a_tmp; 39 | denCoeffs[1] = (6 * wc4 - 8 * wc2 * k2 + 6 * k4)/ a_tmp; 40 | denCoeffs[2] = (4 *(wc4 - sq_tmp1 + sq_tmp2 - k4)) / a_tmp; 41 | denCoeffs[3] = (k4 - 2 * sq_tmp1 + wc4 - 2 * sq_tmp2+ 4 * wc2 * k2) / a_tmp; 42 | 43 | //================================================ 44 | // low-pass 45 | //================================================ 46 | if (lowpass){ 47 | numCoeffs[0] = wc4 / a_tmp; 48 | numCoeffs[1] = 4 * wc4 / a_tmp; 49 | numCoeffs[2] = 6 * wc4 / a_tmp; 50 | numCoeffs[3] = numCoeffs[1]; 51 | numCoeffs[4] = numCoeffs[0]; 52 | } 53 | //===================================================== 54 | // high-pass 55 | //===================================================== 56 | else{ 57 | numCoeffs[0] = k4 / a_tmp; 58 | numCoeffs[1] = -4 * k4 / a_tmp; 59 | numCoeffs[2] = 6 * k4 / a_tmp; 60 | numCoeffs[3] = numCoeffs[1]; 61 | numCoeffs[4] = numCoeffs[0]; 62 | } 63 | } 64 | else{ 65 | //2nd order is stable 66 | float fpi = PI*cutoff; 67 | float wc = 2 * fpi; 68 | float wc2 = wc * wc; 69 | float wc22 = 2 * wc2; 70 | float k = wc / std::tan(fpi / sampleRate); 71 | float k2 = k * k; 72 | float k22 = 2 * k2; 73 | float wck2 = 2 * wc * k; 74 | float tmpk = (k2 + wc2 + wck2); 75 | 76 | 77 | denCoeffs[0] = (-k22 + wc22) / tmpk; 78 | denCoeffs[1] = (-wck2 + k2 + wc2) / tmpk; 79 | //--------------- 80 | // low-pass 81 | //--------------- 82 | if (lowpass){ 83 | numCoeffs[0] = wc2 / tmpk; 84 | numCoeffs[1] = wc22 /tmpk; 85 | numCoeffs[2] = wc2 /tmpk; 86 | } 87 | else{ 88 | //---------------- 89 | // high-pass 90 | //---------------- 91 | numCoeffs[0] = k2 / tmpk; 92 | numCoeffs[1] = -k22 / tmpk; 93 | numCoeffs[2] = k2 / tmpk; 94 | } 95 | } 96 | 97 | } 98 | 99 | void LinkwitzCrossover::initialize(float sR, std::string type){ 100 | sampleRate = sR; 101 | numCoeffs = new float[order + 1]; 102 | denCoeffs = new float[order]; 103 | prevInput = new float[order]; 104 | prevOutput = new float[order]; 105 | for (int i = 0; i < order; i++){ 106 | numCoeffs[i] = 0.0; 107 | denCoeffs[i] = 0.0; 108 | prevInput[i] = 0.0; 109 | prevOutput[i] = 0.0; 110 | } 111 | numCoeffs[order] = 0.0; 112 | lowpass = (type == "lowpass")? true : false; 113 | setCoefficients(); 114 | } 115 | 116 | 117 | void LinkwitzCrossover::update(float newCutoffFrequency){ 118 | cutoff = newCutoffFrequency; 119 | setCoefficients(); 120 | } 121 | 122 | float LinkwitzCrossover::process(const float input){ 123 | float output = numCoeffs[0] * input; 124 | for (int i = 0; i < order; i++){ 125 | output += (numCoeffs[i+1] * prevInput[i]) - (denCoeffs[i] * prevOutput[i]); 126 | } 127 | 128 | //update previous input and output buffer - right shift by 1 129 | for(int i = order; i > 0; i--){ 130 | prevOutput[i] = prevOutput[i-1]; 131 | prevInput[i] = prevInput[i-1]; 132 | } 133 | prevInput[0] = input; 134 | prevOutput[0] = output; 135 | 136 | //there is a 180 degree phase shift between lowpass and highpass 137 | if (lowpass) 138 | return output; 139 | else 140 | return -output; 141 | } 142 | -------------------------------------------------------------------------------- /Source/PluginProcessor.h: -------------------------------------------------------------------------------- 1 | /* 2 | ============================================================================== 3 | 4 | This file contains the basic framework code for a JUCE plugin processor. 5 | 6 | ============================================================================== 7 | */ 8 | 9 | #pragma once 10 | 11 | #include 12 | #include "VelvetNoise.h" 13 | #include "Panner.h" 14 | #include "LinkwitzCrossover.h" 15 | #include "ButterworthFilter.h" 16 | #include "AllpassBiquadCascade.h" 17 | #include "TransientHandler.h" 18 | //============================================================================== 19 | /** 20 | */ 21 | class StereoWidenerAudioProcessor : public juce::AudioProcessor 22 | #if JucePlugin_Enable_ARA 23 | , public juce::AudioProcessorARAExtension 24 | #endif 25 | { 26 | public: 27 | //============================================================================== 28 | StereoWidenerAudioProcessor(); 29 | ~StereoWidenerAudioProcessor() override; 30 | 31 | //============================================================================== 32 | void prepareToPlay (double sampleRate, int samplesPerBlock) override; 33 | void releaseResources() override; 34 | 35 | #ifndef JucePlugin_PreferredChannelConfigurations 36 | bool isBusesLayoutSupported (const BusesLayout& layouts) const override; 37 | #endif 38 | 39 | void processBlock (juce::AudioBuffer&, juce::MidiBuffer&) override; 40 | 41 | //============================================================================== 42 | juce::AudioProcessorEditor* createEditor() override; 43 | bool hasEditor() const override; 44 | 45 | //============================================================================== 46 | const juce::String getName() const override; 47 | 48 | bool acceptsMidi() const override; 49 | bool producesMidi() const override; 50 | bool isMidiEffect() const override; 51 | double getTailLengthSeconds() const override; 52 | 53 | //============================================================================== 54 | int getNumPrograms() override; 55 | int getCurrentProgram() override; 56 | void setCurrentProgram (int index) override; 57 | const juce::String getProgramName (int index) override; 58 | void changeProgramName (int index, const juce::String& newName) override; 59 | 60 | //============================================================================== 61 | void getStateInformation (juce::MemoryBlock& destData) override; 62 | void setStateInformation (const void* data, int sizeInBytes) override; 63 | inline float onePoleFilter(float input, float previous_output); 64 | juce::StringArray initialise_velvet_from_binary_file(); 65 | 66 | 67 | //Input parameters 68 | juce::AudioProcessorValueTreeState parameters; 69 | std::atomic* widthLower; //stereo width (0 - original, 100 - max widening) 70 | std::atomic* widthHigher; 71 | std::atomic* cutoffFrequency; //filterbank cutoff frequency 72 | std::atomic* isAmpPreserve; //calculations are amplitude or energy preserving 73 | std::atomic* hasAllpassDecorrelation; //what decorrelator to use - VN or AP 74 | std::atomic* handleTransients; //whether to have transient handline block 75 | const int numFreqBands = 2; 76 | 77 | private: 78 | //============================================================================== 79 | JUCE_DECLARE_NON_COPYABLE_WITH_LEAK_DETECTOR (StereoWidenerAudioProcessor) 80 | const int numChannels = getMainBusNumInputChannels(); 81 | const float PI = std::acos(-1); 82 | 83 | VelvetNoise* velvetSequence; 84 | AllpassBiquadCascade* allpassCascade; 85 | Panner* pan; 86 | LinkwitzCrossover** amp_preserve_filters; 87 | ButterworthFilter** energy_preserve_filters; 88 | TransientHandler* transient_handler; 89 | 90 | int density = 1000; 91 | float targetDecaydB = 10.; 92 | bool logDistribution = true; //whether to concentrate VN impulses at the beginning 93 | bool useOptVelvetFilters = false; //whether to use optimised VN filters 94 | float* pannerInputs; 95 | float* temp_output; 96 | float* gain_multiplier; 97 | float prevWidthLower, curWidthLower; 98 | float prevWidthHigher, curWidthHigher; 99 | float prevCutoffFreq, curCutoffFreq; 100 | float smooth_factor; //one pole filter for parameter update 101 | enum{ 102 | vnLenMs = 15, 103 | smoothingTimeMs = 10, 104 | maxGroupDelayMs = 15, 105 | numBiquads = 200, 106 | prewarpFreqHz = 1000, 107 | }; 108 | std::vector> inputData; 109 | std::vector> outputData; 110 | float** final_output; 111 | 112 | }; 113 | -------------------------------------------------------------------------------- /StereoWidener.jucer: -------------------------------------------------------------------------------- 1 | 2 | 3 | 7 | 8 | 9 | 11 | 12 | 13 | 15 | 17 | 19 | 20 | 22 | 23 | 25 | 27 | 28 | 29 | 31 | 33 | 35 | 36 | 38 | 40 | 41 | 42 | 43 | 44 | 45 | 47 | 49 | 51 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | -------------------------------------------------------------------------------- /Python/src/hrtf_widener.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from numpy.typing import NDArray, ArrayLike 3 | from typing import Tuple 4 | from scipy.fft import rfftfreq, rfft, irfft, fftshift 5 | from scipy.signal import fftconvolve, oaconvolve 6 | from interaural_cues import HRTFParams, get_hrtf_from_spherical_head_model, convert_ipd_to_itd 7 | from utils import calculate_interchannel_cross_correlation_matrix, calculate_interchannel_coherence 8 | import matplotlib.pyplot as plt 9 | 10 | 11 | class HRTFStereoWidener(): 12 | """ 13 | Stereo widening with ITD modification. The distance between the two speakers 14 | is a controllable parameter. A spherical head model with nominal head radius is used 15 | """ 16 | 17 | def __init__(self, 18 | sample_rate: float, 19 | azimuth_range: Tuple[float, float], 20 | angular_res_deg: float = 2, 21 | num_freq_points: int = 2**9, 22 | num_time_samples: int = 2**10, 23 | head_radius: float = 0.075): 24 | """ 25 | Args: 26 | sample_rate (float): 27 | azimuth_range (ArrayLike): the range of azimuth angles in ascending order 28 | in which the stereo pair can lie. 29 | angular_res_deg (float): angular reslution of the azimuth values in degrees 30 | num_freq_points (int): number of frequency bins in HRTF 31 | num_time_samples (int): length of HRIRs in samples 32 | head_radius (float): head radius in metre for HRTF calculation 33 | """ 34 | self.sample_rate = sample_rate 35 | assert azimuth_range[0] <= azimuth_range[ 36 | 1], "Azimuth range should be ascending" 37 | self.azimuth_range = np.arange(azimuth_range[0], 38 | azimuth_range[1] + angular_res_deg, 39 | angular_res_deg) 40 | self.num_freq_points = num_freq_points 41 | self.num_time_samples = num_time_samples 42 | # angle between the head axis and the left speaker 43 | # (angle between head and right speaker is negative of this) 44 | self.speaker_angle_deg = 30.0 45 | # HRTF set derived from spherical head model 46 | self._hrtf_set = get_hrtf_from_spherical_head_model( 47 | self.azimuth_range, self.frequency_axis, self.num_time_samples, 48 | head_radius) 49 | 50 | @property 51 | def num_orientations(self): 52 | return len(self.azimuth_range) 53 | 54 | @property 55 | def frequency_axis(self): 56 | return rfftfreq(self.num_freq_points, d=1.0 / self.sample_rate) 57 | 58 | @property 59 | def left_ear_axis(self): 60 | return 0 61 | 62 | @property 63 | def right_ear_axis(self): 64 | return 1 65 | 66 | @property 67 | def num_inputs(self): 68 | return 2 69 | 70 | @property 71 | def num_outputs(self): 72 | return 2 73 | 74 | @property 75 | def hrtf_set(self): 76 | itd = convert_ipd_to_itd(self._hrtf_set.ipd, 77 | self.sample_rate, 78 | self.frequency_axis / (self.sample_rate / 2) * 79 | np.pi, 80 | wrapped_phase=False) 81 | self._hrtf_set.itd = itd 82 | return self._hrtf_set 83 | 84 | def update_speaker_angle(self, new_speaker_angle: float): 85 | assert self.azimuth_range[0] <= new_speaker_angle <= self.azimuth_range[ 86 | -1], f"Speaker angle cannot exceed {azimuth_range[-1]} degrees" 87 | self.speaker_angle_deg = new_speaker_angle 88 | 89 | def find_closest_doa(self) -> Tuple[int, int]: 90 | all_doas = self._hrtf_set.doa 91 | left_closest_idx = np.argmin(np.abs(all_doas - self.speaker_angle_deg)) 92 | right_closest_idx = np.argmin( 93 | np.abs(all_doas - (-self.speaker_angle_deg))) 94 | return (left_closest_idx, right_closest_idx) 95 | 96 | def process(self, input_signal: NDArray) -> NDArray: 97 | """Apply the right HRIRs to the direct and cross-talk path of the input signal""" 98 | 99 | # ensure input signal is of the right length 100 | _, num_channels = input_signal.shape 101 | if num_channels > self.num_inputs: 102 | input_signal = input_signal.T 103 | num_channels = self.num_inputs 104 | if num_channels != self.num_inputs: 105 | raise RuntimeError("Input signal must be stereo!") 106 | output_signal = np.zeros((input_signal.shape[0], self.num_outputs)) 107 | 108 | # find closest DOA 109 | closest_left_idx, closest_right_idx = self.find_closest_doa() 110 | # HRIRs for both ears corresponding to left speaker 111 | left_spk_hrir = self._hrtf_set.hrirs[closest_left_idx, ...] 112 | # HRIR for both ears corresponding to right speaker 113 | right_spk_hrir = self._hrtf_set.hrirs[closest_right_idx, ...] 114 | # this array is num_time_samples x num_ears x num_speakers 115 | spk_hrirs = np.dstack([left_spk_hrir, right_spk_hrir]) 116 | 117 | for ear in range(self.num_outputs): 118 | for spk in range(self.num_inputs): 119 | # ear axis stays constant 120 | output_signal[:, ear] += oaconvolve( 121 | np.squeeze(input_signal[:, spk]), 122 | np.squeeze(spk_hrirs[:, ear, spk]), 123 | mode='same') * 0.5 124 | 125 | return output_signal 126 | 127 | def calculate_correlation_function(self, 128 | output_signal: np.ndarray) -> float: 129 | X = rfft(output_signal[:, 0], n=self.num_freq_points) 130 | Y = rfft(output_signal[:, 1], n=self.num_freq_points) 131 | corr_freq = (X * np.conj(Y)) / ( 132 | np.sqrt(np.sum(np.abs(X)) + np.sum(np.abs(Y)))) 133 | corr_func = fftshift(irfft(corr_freq, n=self.num_time_samples)) 134 | return corr_func 135 | 136 | @staticmethod 137 | def calculate_correlation(output_signal: np.ndarray) -> float: 138 | return calculate_interchannel_coherence(output_signal[:, 0], 139 | output_signal[:, 1], 140 | time_axis=0) 141 | 142 | def calculate_interchannel_coherence(self, output_signal: NDArray): 143 | icc_matrix, icc_freqs = calculate_interchannel_cross_correlation_matrix( 144 | output_signal, 145 | fs=self.sample_rate, 146 | num_channels=self.num_outputs, 147 | time_axis=0, 148 | channel_axis=-1, 149 | bands_per_octave=3, 150 | freq_range=(20, self.sample_rate / 2.0)) 151 | icc_vector = np.squeeze(icc_matrix[..., 0, 1]) 152 | return icc_vector, icc_freqs 153 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Creative Commons Legal Code 2 | 3 | CC0 1.0 Universal 4 | 5 | CREATIVE COMMONS CORPORATION IS NOT A LAW FIRM AND DOES NOT PROVIDE 6 | LEGAL SERVICES. DISTRIBUTION OF THIS DOCUMENT DOES NOT CREATE AN 7 | ATTORNEY-CLIENT RELATIONSHIP. CREATIVE COMMONS PROVIDES THIS 8 | INFORMATION ON AN "AS-IS" BASIS. CREATIVE COMMONS MAKES NO WARRANTIES 9 | REGARDING THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS 10 | PROVIDED HEREUNDER, AND DISCLAIMS LIABILITY FOR DAMAGES RESULTING FROM 11 | THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS PROVIDED 12 | HEREUNDER. 13 | 14 | Statement of Purpose 15 | 16 | The laws of most jurisdictions throughout the world automatically confer 17 | exclusive Copyright and Related Rights (defined below) upon the creator 18 | and subsequent owner(s) (each and all, an "owner") of an original work of 19 | authorship and/or a database (each, a "Work"). 20 | 21 | Certain owners wish to permanently relinquish those rights to a Work for 22 | the purpose of contributing to a commons of creative, cultural and 23 | scientific works ("Commons") that the public can reliably and without fear 24 | of later claims of infringement build upon, modify, incorporate in other 25 | works, reuse and redistribute as freely as possible in any form whatsoever 26 | and for any purposes, including without limitation commercial purposes. 27 | These owners may contribute to the Commons to promote the ideal of a free 28 | culture and the further production of creative, cultural and scientific 29 | works, or to gain reputation or greater distribution for their Work in 30 | part through the use and efforts of others. 31 | 32 | For these and/or other purposes and motivations, and without any 33 | expectation of additional consideration or compensation, the person 34 | associating CC0 with a Work (the "Affirmer"), to the extent that he or she 35 | is an owner of Copyright and Related Rights in the Work, voluntarily 36 | elects to apply CC0 to the Work and publicly distribute the Work under its 37 | terms, with knowledge of his or her Copyright and Related Rights in the 38 | Work and the meaning and intended legal effect of CC0 on those rights. 39 | 40 | 1. Copyright and Related Rights. A Work made available under CC0 may be 41 | protected by copyright and related or neighboring rights ("Copyright and 42 | Related Rights"). Copyright and Related Rights include, but are not 43 | limited to, the following: 44 | 45 | i. the right to reproduce, adapt, distribute, perform, display, 46 | communicate, and translate a Work; 47 | ii. moral rights retained by the original author(s) and/or performer(s); 48 | iii. publicity and privacy rights pertaining to a person's image or 49 | likeness depicted in a Work; 50 | iv. rights protecting against unfair competition in regards to a Work, 51 | subject to the limitations in paragraph 4(a), below; 52 | v. rights protecting the extraction, dissemination, use and reuse of data 53 | in a Work; 54 | vi. database rights (such as those arising under Directive 96/9/EC of the 55 | European Parliament and of the Council of 11 March 1996 on the legal 56 | protection of databases, and under any national implementation 57 | thereof, including any amended or successor version of such 58 | directive); and 59 | vii. other similar, equivalent or corresponding rights throughout the 60 | world based on applicable law or treaty, and any national 61 | implementations thereof. 62 | 63 | 2. Waiver. To the greatest extent permitted by, but not in contravention 64 | of, applicable law, Affirmer hereby overtly, fully, permanently, 65 | irrevocably and unconditionally waives, abandons, and surrenders all of 66 | Affirmer's Copyright and Related Rights and associated claims and causes 67 | of action, whether now known or unknown (including existing as well as 68 | future claims and causes of action), in the Work (i) in all territories 69 | worldwide, (ii) for the maximum duration provided by applicable law or 70 | treaty (including future time extensions), (iii) in any current or future 71 | medium and for any number of copies, and (iv) for any purpose whatsoever, 72 | including without limitation commercial, advertising or promotional 73 | purposes (the "Waiver"). Affirmer makes the Waiver for the benefit of each 74 | member of the public at large and to the detriment of Affirmer's heirs and 75 | successors, fully intending that such Waiver shall not be subject to 76 | revocation, rescission, cancellation, termination, or any other legal or 77 | equitable action to disrupt the quiet enjoyment of the Work by the public 78 | as contemplated by Affirmer's express Statement of Purpose. 79 | 80 | 3. Public License Fallback. Should any part of the Waiver for any reason 81 | be judged legally invalid or ineffective under applicable law, then the 82 | Waiver shall be preserved to the maximum extent permitted taking into 83 | account Affirmer's express Statement of Purpose. In addition, to the 84 | extent the Waiver is so judged Affirmer hereby grants to each affected 85 | person a royalty-free, non transferable, non sublicensable, non exclusive, 86 | irrevocable and unconditional license to exercise Affirmer's Copyright and 87 | Related Rights in the Work (i) in all territories worldwide, (ii) for the 88 | maximum duration provided by applicable law or treaty (including future 89 | time extensions), (iii) in any current or future medium and for any number 90 | of copies, and (iv) for any purpose whatsoever, including without 91 | limitation commercial, advertising or promotional purposes (the 92 | "License"). The License shall be deemed effective as of the date CC0 was 93 | applied by Affirmer to the Work. Should any part of the License for any 94 | reason be judged legally invalid or ineffective under applicable law, such 95 | partial invalidity or ineffectiveness shall not invalidate the remainder 96 | of the License, and in such case Affirmer hereby affirms that he or she 97 | will not (i) exercise any of his or her remaining Copyright and Related 98 | Rights in the Work or (ii) assert any associated claims and causes of 99 | action with respect to the Work, in either case contrary to Affirmer's 100 | express Statement of Purpose. 101 | 102 | 4. Limitations and Disclaimers. 103 | 104 | a. No trademark or patent rights held by Affirmer are waived, abandoned, 105 | surrendered, licensed or otherwise affected by this document. 106 | b. Affirmer offers the Work as-is and makes no representations or 107 | warranties of any kind concerning the Work, express, implied, 108 | statutory or otherwise, including without limitation warranties of 109 | title, merchantability, fitness for a particular purpose, non 110 | infringement, or the absence of latent or other defects, accuracy, or 111 | the present or absence of errors, whether or not discoverable, all to 112 | the greatest extent permissible under applicable law. 113 | c. Affirmer disclaims responsibility for clearing rights of other persons 114 | that may apply to the Work or any use thereof, including without 115 | limitation any person's Copyright and Related Rights in the Work. 116 | Further, Affirmer disclaims responsibility for obtaining any necessary 117 | consents, permissions or other rights required for any use of the 118 | Work. 119 | d. Affirmer understands and acknowledges that Creative Commons is not a 120 | party to this document and has no duty or obligation with respect to 121 | this CC0 or use of the Work. 122 | -------------------------------------------------------------------------------- /Python/src/notebooks/transient_response.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "0", 6 | "metadata": {}, 7 | "source": [ 8 | "### Check the transient response of the stereo widener with percussive signals" 9 | ] 10 | }, 11 | { 12 | "cell_type": "code", 13 | "execution_count": null, 14 | "id": "1", 15 | "metadata": {}, 16 | "outputs": [], 17 | "source": [ 18 | "import numpy as np\n", 19 | "import soundfile as sf\n", 20 | "import os, sys\n", 21 | "from typing import Optional\n", 22 | "import matplotlib.pyplot as plt\n", 23 | "from scipy.signal import spectrogram\n", 24 | "from importlib import reload\n", 25 | "path = os.path.abspath('../.')\n", 26 | "sys.path.append(path)" 27 | ] 28 | }, 29 | { 30 | "cell_type": "code", 31 | "execution_count": null, 32 | "id": "2", 33 | "metadata": {}, 34 | "outputs": [], 35 | "source": [ 36 | "from widener import *\n", 37 | "from onset_detector import OnsetDetector\n", 38 | "from utils import db, ms_to_samps\n", 39 | "from plot import *\n", 40 | "save_flag = True" 41 | ] 42 | }, 43 | { 44 | "cell_type": "markdown", 45 | "id": "3", 46 | "metadata": {}, 47 | "source": [ 48 | "#### Helper function" 49 | ] 50 | }, 51 | { 52 | "cell_type": "code", 53 | "execution_count": null, 54 | "id": "4", 55 | "metadata": {}, 56 | "outputs": [], 57 | "source": [ 58 | "def plot_spectrogram(signal: np.ndarray, fs:float, title: str, nfft:int=2**10, axis:Optional[int]=None):\n", 59 | " if axis is None:\n", 60 | " ff, tt, Sxx = spectrogram(signal, fs=fs, window='hann', nfft=nfft)\n", 61 | " else:\n", 62 | " ff, tt, Sxx = spectrogram(signal, fs=fs, window='hann', nfft=nfft, axis=axis)\n", 63 | " Sxx_norm = Sxx/np.max(np.abs(Sxx))\n", 64 | " Sxx_db = db(np.abs(Sxx_norm))\n", 65 | " if Sxx.ndim == 3: \n", 66 | " fig, ax = plt.subplots(2,1)\n", 67 | " for num_sig in range(Sxx.shape[1]):\n", 68 | " ax[num_sig].imshow(np.squeeze(Sxx_db[:, num_sig, :]), \n", 69 | " cmap = 'jet', \n", 70 | " extent=[0,max(tt),0,max(ff)], \n", 71 | " interpolation='bilinear', \n", 72 | " aspect = 'auto', \n", 73 | " origin = 'lower')\n", 74 | " ax[num_sig].set_xlabel('Time (s)') \n", 75 | " ax[num_sig].set_ylabel('Frequency (Hz)')\n", 76 | " ax[num_sig].set_ylim([20,20000])\n", 77 | " ax[num_sig].grid(True)\n", 78 | " ax[0].set_title(title)\n", 79 | "\n", 80 | " else:\n", 81 | " fig, ax = plt.subplots()\n", 82 | " ax.imshow(np.squeeze(Sxx_db), \n", 83 | " cmap = 'jet', \n", 84 | " extent=[0,max(tt),0,max(ff)], \n", 85 | " interpolation='bilinear', \n", 86 | " aspect = 'auto', \n", 87 | " origin = 'lower')\n", 88 | " ax.set_title(title)\n", 89 | " ax.set_xlabel('Time (s)')\n", 90 | " ax.set_ylabel('Frequency (Hz)')\n", 91 | " ax.set_ylim([20,20000])\n", 92 | " ax.grid(True)\n", 93 | " return fig" 94 | ] 95 | }, 96 | { 97 | "cell_type": "markdown", 98 | "id": "5", 99 | "metadata": {}, 100 | "source": [ 101 | "#### Check the onset detector" 102 | ] 103 | }, 104 | { 105 | "cell_type": "code", 106 | "execution_count": null, 107 | "id": "6", 108 | "metadata": {}, 109 | "outputs": [], 110 | "source": [ 111 | "input_signal, fs = sf.read('../../audio/lexicon_dry15.wav')\n", 112 | "trim_start_samp = int(ms_to_samps(1200, fs))\n", 113 | "trim_end_samp = int(ms_to_samps(3000, fs))\n", 114 | "trimmed_input_signal = input_signal[trim_start_samp : trim_end_samp, :]\n", 115 | "# plot spectrogram\n", 116 | "fig = plot_spectrogram(trimmed_input_signal[:,0], fs, 'Input signal', axis=0, nfft=2**9)\n", 117 | "if save_flag:\n", 118 | " fig.savefig('../../figures/transient_input_spec.png', dpi=300)" 119 | ] 120 | }, 121 | { 122 | "cell_type": "code", 123 | "execution_count": null, 124 | "id": "7", 125 | "metadata": {}, 126 | "outputs": [], 127 | "source": [ 128 | "onset_detect = OnsetDetector(fs, \n", 129 | " attack_time_ms=5.0, \n", 130 | " release_time_ms=20.0,\n", 131 | " min_onset_hold_ms=80.0,\n", 132 | " min_onset_sep_ms=20.0)\n", 133 | "onset_detect.process(input_signal, to_plot=True)" 134 | ] 135 | }, 136 | { 137 | "cell_type": "markdown", 138 | "id": "8", 139 | "metadata": {}, 140 | "source": [ 141 | "#### Broadband widener with and without transient detection" 142 | ] 143 | }, 144 | { 145 | "cell_type": "code", 146 | "execution_count": null, 147 | "id": "9", 148 | "metadata": {}, 149 | "outputs": [], 150 | "source": [ 151 | "decorr_type = DecorrelationType.ALLPASS\n", 152 | "beta_init = np.pi/2\n", 153 | "stereo_widener = StereoWidenerBroadband(input_signal, fs, decorr_type, beta_init)\n", 154 | "output_signal = stereo_widener.process()\n", 155 | "\n", 156 | "# plot spectrogram\n", 157 | "trimmed_output_signal = output_signal[trim_start_samp : trim_end_samp, :]\n", 158 | "fig = plot_spectrogram(trimmed_output_signal[:,0], fs, 'Broadband widener output', axis=0, nfft=2**9)\n", 159 | "if save_flag:\n", 160 | " fig.savefig('../../figures/transient_output_spec_wo_detection.png', dpi=300)\n", 161 | " sf.write(f'../../audio/lexicon_broadband_beta={beta_init/(np.pi/2)}.wav', output_signal, fs)\n" 162 | ] 163 | }, 164 | { 165 | "cell_type": "code", 166 | "execution_count": null, 167 | "id": "10", 168 | "metadata": {}, 169 | "outputs": [], 170 | "source": [ 171 | "del stereo_widener\n", 172 | "onset_detection_params = {\n", 173 | " 'min_onset_hold_ms':80.0,\n", 174 | " 'min_onset_sep_ms':20.0,\n", 175 | "}\n", 176 | "stereo_widener = StereoWidenerBroadband(input_signal, \n", 177 | " fs, \n", 178 | " decorr_type, \n", 179 | " beta_init, \n", 180 | " detect_transient=True, \n", 181 | " onset_detection_params=onset_detection_params)\n", 182 | "output_signal = stereo_widener.process()\n", 183 | "trimmed_output_signal = output_signal[trim_start_samp : trim_end_samp, :]\n", 184 | "\n", 185 | "# plot spectrogram\n", 186 | "fig = plot_spectrogram(trimmed_output_signal[:,0], fs, 'Broadband widener output', axis=0, nfft=2**9)\n", 187 | "if save_flag:\n", 188 | " fig.savefig('../../figures/transient_output_spec_with_detection.png', dpi=300)\n", 189 | " sf.write(f'../../audio/lexicon_broadband_transient_beta={beta_init/(np.pi/2)}.wav', output_signal, fs)" 190 | ] 191 | }, 192 | { 193 | "cell_type": "code", 194 | "execution_count": null, 195 | "id": "11", 196 | "metadata": {}, 197 | "outputs": [], 198 | "source": [ 199 | "from plot import time_plot\n", 200 | "plt.figure()\n", 201 | "plt.subplot(211)\n", 202 | "time_plot(input_signal, fs, db_scale=False)\n", 203 | "plt.subplot(212)\n", 204 | "time_plot(output_signal, fs, db_scale=False)" 205 | ] 206 | } 207 | ], 208 | "metadata": { 209 | "kernelspec": { 210 | "display_name": "Python 3 (ipykernel)", 211 | "language": "python", 212 | "name": "python3" 213 | }, 214 | "language_info": { 215 | "codemirror_mode": { 216 | "name": "ipython", 217 | "version": 3 218 | }, 219 | "file_extension": ".py", 220 | "mimetype": "text/x-python", 221 | "name": "python", 222 | "nbconvert_exporter": "python", 223 | "pygments_lexer": "ipython3", 224 | "version": "3.9.0" 225 | } 226 | }, 227 | "nbformat": 4, 228 | "nbformat_minor": 5 229 | } 230 | -------------------------------------------------------------------------------- /Source/PluginEditor.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | ============================================================================== 3 | 4 | This file contains the basic framework code for a JUCE plugin editor. 5 | 6 | ============================================================================== 7 | */ 8 | 9 | #include "PluginProcessor.h" 10 | #include "PluginEditor.h" 11 | 12 | //============================================================================== 13 | StereoWidenerAudioProcessorEditor::StereoWidenerAudioProcessorEditor (StereoWidenerAudioProcessor& p, juce::AudioProcessorValueTreeState& vts) 14 | : AudioProcessorEditor (&p), audioProcessor (p), valueTreeState (vts) 15 | { 16 | // Make sure that before the constructor has finished, you've set the 17 | // editor's size to whatever you need it to be. 18 | setSize (300,500); 19 | 20 | //add sliders and labels 21 | addAndMakeVisible(widthLowerSlider); 22 | widthLowerSlider.setSliderStyle(juce::Slider::SliderStyle::Rotary); 23 | widthLowerSlider.setRange (0, 100.0); 24 | widthLowerSlider.setValue(0.0); 25 | widthLowerAttach.reset (new juce::AudioProcessorValueTreeState::SliderAttachment (valueTreeState, "widthLower", widthLowerSlider)); 26 | 27 | 28 | addAndMakeVisible(widthLowerLabel); 29 | widthLowerLabel.setText("Lower frequency width", juce::dontSendNotification); 30 | widthLowerLabel.setFont(juce::Font("Times New Roman", 15.0f, juce::Font::plain)); 31 | widthLowerLabel.attachToComponent (&widthLowerSlider, false); 32 | 33 | 34 | //add sliders and labels 35 | addAndMakeVisible(widthHigherSlider); 36 | widthHigherSlider.setSliderStyle(juce::Slider::SliderStyle::Rotary); 37 | widthHigherSlider.setRange (0, 100.0); 38 | widthHigherSlider.setValue(0.0); 39 | widthHigherAttach.reset (new juce::AudioProcessorValueTreeState::SliderAttachment (valueTreeState, "widthHigher", widthHigherSlider)); 40 | 41 | 42 | addAndMakeVisible(widthHigherLabel); 43 | widthHigherLabel.setText("Higher frequency width", juce::dontSendNotification); 44 | widthHigherLabel.setFont(juce::Font("Times New Roman", 15.0f, juce::Font::plain)); 45 | widthHigherLabel.attachToComponent (&widthHigherSlider, false); 46 | 47 | //add sliders and labels 48 | addAndMakeVisible(cutoffFrequencySlider); 49 | cutoffFrequencySlider.setSliderStyle(juce::Slider::SliderStyle::Rotary); 50 | cutoffFrequencySlider.setRange (100.0, 4000.0); 51 | cutoffFrequencySlider.setValue(500.0); 52 | cutoffFrequencySlider.setSkewFactor(0.5); //this will ensure more focus on lower frequencies 53 | cutoffFrequencyAttach.reset (new juce::AudioProcessorValueTreeState::SliderAttachment (valueTreeState, "cutoffFrequency", cutoffFrequencySlider)); 54 | 55 | addAndMakeVisible(cutoffFrequencyLabel); 56 | cutoffFrequencyLabel.setText("Filter cutoff frequency", juce::dontSendNotification); 57 | cutoffFrequencyLabel.setFont(juce::Font("Times New Roman", 15.0f, juce::Font::plain)); 58 | cutoffFrequencyLabel.attachToComponent (&cutoffFrequencySlider, false); 59 | 60 | //add toggle button to switch between amplitude and energetic calculations 61 | addAndMakeVisible(isAmpPreserve); 62 | // [=] indicates a lambda function, it sets the parameterChangedCallback below 63 | isAmpPreserveAttach = std::make_unique(*vts.getParameter("isAmpPreserve"), [=] (float value) { 64 | bool isSelected = value == 1.0f; 65 | isAmpPreserve.setToggleState(isSelected, juce::sendNotificationSync); 66 | }); 67 | 68 | isAmpPreserve.onClick = [=] { 69 | //if toggle state is true, then 70 | if (isAmpPreserve.getToggleState()) 71 | isAmpPreserveAttach->setValueAsCompleteGesture(1.0f); 72 | else 73 | isAmpPreserveAttach->setValueAsCompleteGesture(0.0f); 74 | }; 75 | isAmpPreserveAttach->sendInitialUpdate(); 76 | 77 | //add labels 78 | addAndMakeVisible(isAmpPreserveLabel); 79 | isAmpPreserveLabel.setText ("Amplitude (on) / Energy preserve (off)", juce::dontSendNotification); 80 | isAmpPreserveLabel.setFont(juce::Font ("Times New Roman", 12.0f, juce::Font::plain)); 81 | 82 | // add toggle button for choosing decorrelator 83 | addAndMakeVisible(hasAllpassDecorrelation); 84 | // [=] indicates a lambda function, it sets the parameterChangedCallback below 85 | hasAllpassDecorrelationAttach = std::make_unique(*vts.getParameter("hasAllpassDecorrelation"), [=] (float value) { 86 | bool isSelected = value == 1.0f; 87 | hasAllpassDecorrelation.setToggleState(isSelected, juce::sendNotificationSync); 88 | }); 89 | 90 | hasAllpassDecorrelation.onClick = [=] { 91 | //if toggle state is true, then 92 | if (hasAllpassDecorrelation.getToggleState()) 93 | hasAllpassDecorrelationAttach->setValueAsCompleteGesture(1.0f); 94 | else 95 | hasAllpassDecorrelationAttach->setValueAsCompleteGesture(0.0f); 96 | }; 97 | hasAllpassDecorrelationAttach->sendInitialUpdate(); 98 | 99 | //add labels 100 | addAndMakeVisible(hasAllpassDecorrelationLabel); 101 | hasAllpassDecorrelationLabel.setText ("Allpass (on) / Velvet decorrelation (off)", juce::dontSendNotification); 102 | hasAllpassDecorrelationLabel.setFont(juce::Font ("Times New Roman", 12.0f, juce::Font::plain)); 103 | 104 | 105 | 106 | // add toggle button for choosing transient detector 107 | addAndMakeVisible(handleTransients); 108 | // [=] indicates a lambda function, it sets the parameterChangedCallback below 109 | handleTransientsAttach = std::make_unique(*vts.getParameter("handleTransients"), [=] (float value) { 110 | bool isSelected = value == 1.0f; 111 | handleTransients.setToggleState(isSelected, juce::sendNotificationSync); 112 | }); 113 | 114 | handleTransients.onClick = [=] { 115 | //if toggle state is true, then 116 | if (handleTransients.getToggleState()) 117 | handleTransientsAttach->setValueAsCompleteGesture(1.0f); 118 | else 119 | handleTransientsAttach->setValueAsCompleteGesture(0.0f); 120 | }; 121 | handleTransientsAttach->sendInitialUpdate(); 122 | 123 | //add labels 124 | addAndMakeVisible(handleTransientsLabel); 125 | handleTransientsLabel.setText ("Transient detection", juce::dontSendNotification); 126 | handleTransientsLabel.setFont(juce::Font ("Transient detection", 12.0f, juce::Font::plain)); 127 | handleTransientsLabel.setFont(juce::Font ("Times New Roman", 12.0f, juce::Font::plain)); 128 | } 129 | 130 | StereoWidenerAudioProcessorEditor::~StereoWidenerAudioProcessorEditor() 131 | { 132 | } 133 | 134 | //============================================================================== 135 | void StereoWidenerAudioProcessorEditor::paint (juce::Graphics& g) 136 | { 137 | // (Our component is opaque, so we must completely fill the background with a solid colour) 138 | g.fillAll (getLookAndFeel().findColour (juce::ResizableWindow::backgroundColourId)); 139 | 140 | g.setFont (juce::Font ("Times New Roman", 20.0f, juce::Font::bold)); 141 | g.setColour (juce::Colours::lightgrey); 142 | g.drawText ("StereoWidener", 150, 450, 180, 50, true); 143 | } 144 | 145 | void StereoWidenerAudioProcessorEditor::resized() 146 | { 147 | // This is generally where you'll want to lay out the positions of any 148 | // subcomponents in your editor.. 149 | auto sliderLeft = 30; 150 | widthLowerSlider.setBounds (sliderLeft , 50, getWidth() - sliderLeft - 10, 80); 151 | widthHigherSlider.setBounds (sliderLeft , 150, getWidth() - sliderLeft - 10, 80); 152 | cutoffFrequencySlider.setBounds (sliderLeft , 250, getWidth() - sliderLeft - 10, 80); 153 | 154 | isAmpPreserve.setBounds (sliderLeft, 320, getWidth() - sliderLeft - 10, 50); 155 | isAmpPreserveLabel.setBounds(sliderLeft + 50, 340, getWidth() - sliderLeft - 10, 20); 156 | 157 | hasAllpassDecorrelation.setBounds (sliderLeft, 360, getWidth() - sliderLeft - 10, 50); 158 | hasAllpassDecorrelationLabel.setBounds(sliderLeft + 50, 380, getWidth() - sliderLeft - 10, 20); 159 | 160 | handleTransients.setBounds (sliderLeft, 400, getWidth() - sliderLeft - 10, 50); 161 | handleTransientsLabel.setBounds(sliderLeft + 50, 420, getWidth() - sliderLeft - 10, 20); 162 | } 163 | -------------------------------------------------------------------------------- /Python/src/onset_detector.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from numpy.typing import ArrayLike, NDArray 3 | from typing import List 4 | import matplotlib.pyplot as plt 5 | from utils import ms_to_samps 6 | 7 | _HIGH_EPS = 1e9 8 | 9 | 10 | class OnsetDetector(): 11 | """ 12 | Onset detector with a leaky integrator""" 13 | 14 | def __init__(self, 15 | fs: float, 16 | attack_time_ms: float = 5.0, 17 | release_time_ms: float = 20.0, 18 | min_onset_hold_ms: float = 80.0, 19 | min_onset_sep_ms: float = 50.0): 20 | """ 21 | Args: 22 | fs (float): sampling rate in Hz 23 | attack_time_ms (float): leaky integrator attack time 24 | release_time_ms (float): leaky integrator release time 25 | min_onset_hold_ms (float): minimum time to wait 26 | before an onset becomes an offset 27 | min_onset_sep_ms (float): minimum separation between two 28 | onsets in ms 29 | """ 30 | self.fs = fs 31 | self.leaky = LeakyIntegrator(fs, attack_time_ms, release_time_ms) 32 | self._onset_flag = [] 33 | self.min_onset_hold_samps = int(ms_to_samps(min_onset_hold_ms, 34 | self.fs)) 35 | self.min_onset_sep_samps = int(ms_to_samps(min_onset_sep_ms, self.fs)) 36 | self._threshold = None 37 | self._signal_env = None 38 | 39 | @property 40 | def signal_env(self) -> NDArray: 41 | return self._signal_env 42 | 43 | @property 44 | def threshold(self) -> NDArray: 45 | return self._threshold 46 | 47 | @property 48 | def running_sum_thres(self) -> float: 49 | return self._running_sum_thres 50 | 51 | @property 52 | def onset_flag(self) -> List[bool]: 53 | return self._onset_flag 54 | 55 | @staticmethod 56 | def check_local_peak(cur_samp: float, prev_samp: float, next_samp: float): 57 | """ 58 | Given the current, previous and next samples, check if the current sample 59 | is a local peak 60 | """ 61 | if cur_samp > prev_samp and cur_samp > next_samp: 62 | return True 63 | else: 64 | return False 65 | 66 | @staticmethod 67 | def check_direction(cur_samp: float, 68 | prev_samp: float, 69 | next_samp: float, 70 | is_rising: bool = True) -> bool: 71 | """ 72 | Check whether the signal envelope is rising or falling. 73 | The flag `is_rising` is used to check for rising envelopes 74 | """ 75 | if is_rising: 76 | return True if cur_samp > prev_samp and cur_samp < next_samp else False 77 | else: 78 | return True if cur_samp < prev_samp and cur_samp > next_samp else False 79 | 80 | def process(self, input_signal: NDArray, to_plot: bool = False): 81 | """Given an input signal, find the location of onsets""" 82 | if (input_signal.ndim == 2 and input_signal.shape[1] > 1): 83 | input_signal = input_signal[:, 0] 84 | num_samp = len(input_signal) 85 | self._signal_env = self.leaky.process(input_signal) 86 | # onet flag is a list of bools 87 | self._onset_flag = [False for k in range(num_samp)] 88 | # threshold for onset calculation, calculated dynamically 89 | self._threshold = np.ones(num_samp) * _HIGH_EPS 90 | # running sum to calculate mean of the signal envelope 91 | self._running_sum_thres = 0.0 92 | hold_counter = 0 93 | inhibit_counter = 0 94 | 95 | for k in range(1, num_samp - 1): 96 | cur_samp = self._signal_env[k] 97 | prev_samp = self._signal_env[k - 1] 98 | next_samp = self._signal_env[k + 1] 99 | 100 | is_local_peak = self.check_local_peak(cur_samp, prev_samp, 101 | next_samp) 102 | 103 | # running sum of the signal envelope 104 | self._running_sum_thres += self.signal_env[k] 105 | 106 | # threshold is 1.4 * mean of envelope if there is a local peak 107 | self._threshold[k] = 2 * (self._running_sum_thres / 108 | k) if is_local_peak else self._threshold[ 109 | k - 1] 110 | 111 | # if an onset is detected, the flag will be true for a minimum number 112 | # of frames to prevent false offset detection 113 | if 0 < hold_counter < self.min_onset_hold_samps: 114 | hold_counter += 1 115 | self.onset_flag[k] = True 116 | continue 117 | # if an offset is detected, the flag will be false for a minimum number 118 | # of frames to prevent false onset detection 119 | elif 0 < inhibit_counter < self.min_onset_sep_samps: 120 | inhibit_counter += 1 121 | self.onset_flag[k] = False 122 | continue 123 | else: 124 | hold_counter = 0 125 | inhibit_counter = 0 126 | # if the signal is rising and the value is greater than the 127 | # mean of the thresholds so far 128 | if self.check_direction( 129 | cur_samp, prev_samp, next_samp, 130 | is_rising=True) and cur_samp > (self._threshold[k]): 131 | self._onset_flag[k] = True 132 | hold_counter += 1 133 | # if the signal is fallng and the value is lesser than the 134 | # mean of the thresholds so far 135 | elif self.check_direction( 136 | cur_samp, prev_samp, next_samp, 137 | is_rising=False) and cur_samp < (self._threshold[k]): 138 | self._onset_flag[k] = False 139 | inhibit_counter += 1 140 | 141 | if to_plot: 142 | ax = self.plot(input_signal) 143 | 144 | def plot(self, input_signal: NDArray): 145 | """Plot the input signal and the detected signal, threshold and onsets""" 146 | num_samp = len(input_signal) 147 | time_vector = np.arange(0, num_samp / self.fs, 1.0 / self.fs) 148 | onset_pos = np.zeros_like(time_vector) 149 | onset_idx = np.where(self._onset_flag)[0] 150 | onset_pos[onset_idx] = 1.0 151 | 152 | fig, ax = plt.subplots(figsize=(6, 4)) 153 | ax.plot(time_vector, input_signal, label='input signal') 154 | ax.plot(time_vector, self._signal_env, label='envelope') 155 | ax.plot(time_vector, self._threshold, label='threshold') 156 | ax.plot(time_vector, onset_pos, 'k--', label='onsets') 157 | ax.legend(loc='lower left') 158 | ax.set_ylim([-1.0, 1.0]) 159 | plt.show() 160 | 161 | return ax 162 | 163 | 164 | class LeakyIntegrator(): 165 | """Leaky integrator for signal envelope detection""" 166 | 167 | def __init__(self, 168 | fs: float, 169 | attack_time_ms: float = 5.0, 170 | release_time_ms: float = 50.0): 171 | self.fs = fs 172 | self.attack_time_ms = attack_time_ms 173 | self.release_time_ms = release_time_ms 174 | 175 | def process(self, input_signal: NDArray) -> NDArray: 176 | """Estimate the signal amplitude envelope with a leaky integrator. 177 | 178 | Leaky integrator = a first-order IIR low-pass filter. 179 | 180 | Args: 181 | input_signal (npt.NDArray): The impulse response (should be 1-dimensional array or only the 1st column is taken) 182 | fs (float): Sample rate 183 | attack_time_ms (float): Integrator attack time in milliseconds, by default 5 184 | release_time_ms (float): Integrator release time in milliseconds, by default 50 185 | 186 | Returns: 187 | npt.NDArray: The envelope of the impulse response 188 | 189 | """ 190 | # find envelope with a leaky integrator 191 | if (input_signal.ndim == 2 and input_signal.shape[1] > 1): 192 | input_signal = input_signal[:, 0] 193 | 194 | tau_a = self.attack_time_ms * self.fs * 1e-3 195 | tau_r = self.release_time_ms * self.fs * 1e-3 196 | signal_length = len(input_signal) 197 | signal_env = np.zeros_like(input_signal) 198 | for n in range(1, signal_length): 199 | if input_signal[n] > signal_env[n - 1]: 200 | signal_env[n] = signal_env[n - 1] + ( 201 | 1 - np.exp(-1 / tau_a)) * (np.abs(input_signal[n]) - 202 | signal_env[n - 1]) 203 | else: 204 | signal_env[n] = signal_env[n - 1] + ( 205 | 1 - np.exp(-1 / tau_r)) * (np.abs(input_signal[n]) - 206 | signal_env[n - 1]) 207 | 208 | return signal_env 209 | -------------------------------------------------------------------------------- /Python/src/plot.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from collections import namedtuple 3 | from typing import Optional, List 4 | import matplotlib.pyplot as plt 5 | from matplotlib.axes import Axes 6 | from scipy import interpolate 7 | from scipy.signal import sosfreqz 8 | from utils import db, db_floor, hertz_to_erbscale, erbscale_to_hertz 9 | 10 | FreqTicks = namedtuple("FreqTicks", ["ticks", "labels"]) 11 | AudFreqTicks = FreqTicks( 12 | ticks=[20, 50, 100, 250, 500, 1e3, 2e3, 4e3, 8e3, 16e3], 13 | labels=[20, 50, 100, 250, 500, "1k", "2k", "4k", "8k", "16k"]) 14 | 15 | 16 | def erbspace(fmin: float, fmax: float, n: int) -> np.ndarray: 17 | """Generate a sequence of frequencies spaced evenly on the ERB-scale. 18 | 19 | Args: 20 | fmin (float): Minimum frequency (in Hertz) 21 | fmax (float): Maximum frequency (in Hertz) 22 | n (int): Number of points in the sequence. 23 | 24 | Returns: 25 | np.ndarray: The ERB-spaced sequence. 26 | """ 27 | ymin, ymax = hertz_to_erbscale(fmin), hertz_to_erbscale(fmax) 28 | y = erbscale_to_hertz(np.linspace(ymin, ymax, n)) 29 | y[0] = fmin 30 | y[-1] = fmax 31 | return y 32 | 33 | 34 | def semiaudplot( 35 | f: np.ndarray, 36 | y: np.ndarray, 37 | *args, 38 | n: int = 1024, 39 | marker: Optional[str] = None, 40 | linestyle: Optional[str] = None, 41 | ax: Optional[Axes] = None, 42 | interp: bool = False, 43 | **pyplot_kwargs, 44 | ) -> List[plt.Line2D]: 45 | """Plot a sequence with one axis spaced using an auditory (ERB) frequency scale. 46 | 47 | Args: 48 | f (np.ndarray): Array of frequencies (in Hertz). 49 | y (np.ndarray): Data to be plotted (same size as f) 50 | n (int): Number of points to use for interpolation on auditory frequency scale, by default 1024 51 | marker (str): The marker style to use, by default None 52 | ax (Axes): Optionally provide the axes on which to plot, by default None 53 | interp (bool): If true, spline interpolation is performed for the plotted line, by default False. 54 | pyplt_kwargs (**kwargs): Additional keyword arguments are passed to the 55 | matplotlib.pyplot plotting function. 56 | 57 | Returns: 58 | List[plt.Line2D]: List of plotted lines. 59 | 60 | Raises: 61 | ValueError: y can only be up to two-dimensional 62 | """ 63 | if ax is None: 64 | ax = plt.gca() 65 | f, y = np.asarray(f), np.asarray(y) 66 | if np.ndim(y) == 1: 67 | y = y[np.newaxis] 68 | elif np.ndim(y) != 2: 69 | raise ValueError("Only 1- or 2-dimensional arrays can be plotted") 70 | elif y.shape[1] != f.shape[0] and y.shape[0] == f.shape[0]: 71 | y = y.T 72 | 73 | fn = erbspace(np.min(f), np.max(f), n) 74 | 75 | plots = [] 76 | kwargs_c = pyplot_kwargs.copy() 77 | for yi in y: 78 | if interp: 79 | tck = interpolate.splrep(f, yi) 80 | yy = interpolate.splev(fn, tck) 81 | plots.extend(ax.plot(fn, yy, *args, **pyplot_kwargs)) 82 | if marker: 83 | kwargs_c["color"] = plots[-1].get_c() 84 | plots.extend( 85 | ax.plot(f, 86 | yi, 87 | marker=marker, 88 | linestyle='', 89 | *args, 90 | **kwargs_c, 91 | label=None)) 92 | if linestyle: 93 | kwargs_c["color"] = plots[-1].get_c() 94 | plots.extend( 95 | ax.plot(f, 96 | yi, 97 | linestyle=linestyle, 98 | *args, 99 | **kwargs_c, 100 | label=None)) 101 | if marker and linestyle: 102 | kwargs_c["color"] = plots[-1].get_c() 103 | plots.extend( 104 | ax.plot(f, 105 | yi, 106 | marker=marker, 107 | linestyle=linestyle, 108 | *args, 109 | **kwargs_c, 110 | label=None)) 111 | else: 112 | if marker: 113 | pyplot_kwargs['marker'] = marker 114 | if linestyle: 115 | pyplot_kwargs['linestyle'] = linestyle 116 | plots.extend(ax.plot(f, yi, *args, **pyplot_kwargs)) 117 | 118 | ax.set_xscale("function", functions=(hertz_to_erbscale, erbscale_to_hertz)) 119 | ax.set_xticks(AudFreqTicks.ticks, AudFreqTicks.labels) 120 | ax.set_xlabel("Frequency (Hz)") 121 | return plots 122 | 123 | 124 | def plot_icc_matrix(icc_matrix: np.ndarray, 125 | freqs: np.ndarray, 126 | num_channels: int, 127 | chan_names: List[str], 128 | title: str = 'ICC', 129 | ax: Optional = None, 130 | ear: Optional[str] = None, 131 | room_name: Optional[str] = None, 132 | ylimits: np.array = np.array([0, 1.05])): 133 | """Plots interchannel correlation matrix""" 134 | 135 | if ax is None: 136 | fig, ax = plt.subplots(num_channels, num_channels, figsize=[8, 8]) 137 | if room_name is not None and ear is not None: 138 | fig.suptitle(f'{title} for {ear} ear in {room_name}') 139 | else: 140 | fig.suptitle(f'{title}') 141 | 142 | for i in range(num_channels): 143 | for j in range(num_channels): 144 | semiaudplot(freqs, icc_matrix[:, i, j], ax=ax[i, j]) 145 | # Hide X and Y axes label marks and ticks 146 | if i < num_channels - 1 or j != num_channels // 2: 147 | ax[i, j].xaxis.set_tick_params(labelbottom=False) 148 | ax[i, j].set_xticks([]) 149 | ax[i, j].set_xlabel('') 150 | 151 | if i == 0: 152 | ax[i, j].set_title(f'{chan_names[j]}') 153 | 154 | if j > 0: 155 | ax[i, j].yaxis.set_tick_params(labelleft=False) 156 | ax[i, j].set_yticks([]) 157 | ax[i, j].set_ylabel('') 158 | 159 | if j == num_channels - 1: 160 | ax[i, j].set_title(f'{chan_names[i]}', loc='right') 161 | 162 | ax[i, j].set_ylim(ylimits) 163 | 164 | return ax 165 | 166 | 167 | def time_plot( 168 | x: np.ndarray, 169 | fs: float, 170 | *args, 171 | db_scale: bool = False, 172 | timeaxis: int = 0, 173 | ax: Optional[Axes] = None, 174 | start_time: float = 0, 175 | **pyplt_kwargs, 176 | ) -> List[plt.Line2D]: 177 | """Plot a time-domain series, optionally on log-amplitude scale. 178 | 179 | Args: 180 | x (np.ndarray): The signal to plot. 181 | fs (float): Sample rate (Hertz) 182 | db_scale (bool, optional): Plot on log-amplitude scale, by default False 183 | timeaxis (int, optional): The axis of `x` to interpret as the time series, by default 0. 184 | ax (Axes, optional): Provide the axes on which to plot, by default None 185 | start_time (float, optional): The start time of the time axis (seconds). Defaults to 0. 186 | pyplt_kwargs (**kwargs): Additional keyword arguments are passed to the 187 | matplotlib.pyplot plotting function. 188 | 189 | Returns: 190 | List[plt.Line2D]: List of plotted lines. 191 | 192 | Raises: 193 | ValueError: Checks dimensions of `x` and that `timeaxis` is in range. 194 | """ 195 | if x.ndim > 2: 196 | raise ValueError( 197 | f"x must have at most two dimensions, it has {x.ndim} dimensions") 198 | permitted_timeaxis = (1, -1, 0) 199 | if timeaxis not in permitted_timeaxis: 200 | raise ValueError( 201 | f"timeaxis should be one of {permitted_timeaxis}, got {timeaxis}") 202 | if timeaxis != 0: 203 | x = np.moveaxis(x, timeaxis, destination=0) 204 | length = x.shape[0] 205 | t = np.linspace(0, float(length - 1) / fs, length) + start_time 206 | if ax is None: 207 | ax = plt.gca() 208 | plots = [] 209 | kwargs_c = pyplt_kwargs.copy() 210 | if db_scale: 211 | plots.extend(ax.plot(t, db(db_floor(np.abs(x))), *args, **kwargs_c)) 212 | ax.set_ylabel("Amplitude (absolute, dB)") 213 | else: 214 | plots.extend(ax.plot(t, x, *args, **kwargs_c)) 215 | ax.set_ylabel("Amplitude (linear)") 216 | ax.set_xlabel("Time (s)") 217 | ax.grid(True) 218 | 219 | return plots 220 | 221 | 222 | def plot_filt_response(sos, worN=1024): 223 | """Plot SOS filter response, for worN number of bins in the frequency axis""" 224 | w, h = sosfreqz(sos, worN=worN) 225 | plt.subplot(2, 1, 1) 226 | plt.plot(w / np.pi, db(h)) 227 | plt.ylim(-75, 5) 228 | plt.grid(True) 229 | plt.yticks([0, -20, -40, -60]) 230 | plt.ylabel('Gain [dB]') 231 | plt.title('Frequency Response') 232 | plt.subplot(2, 1, 2) 233 | plt.plot(w / np.pi, np.angle(h)) 234 | plt.grid(True) 235 | plt.yticks([-np.pi, -0.5 * np.pi, 0, 0.5 * np.pi, np.pi], 236 | [r'$-\pi$', r'$-\pi/2$', '0', r'$\pi/2$', r'$\pi$']) 237 | plt.ylabel('Phase [rad]') 238 | plt.xlabel('Normalized frequency (1.0 = Nyquist)') 239 | -------------------------------------------------------------------------------- /Resources/opt_vn_filters.txt: -------------------------------------------------------------------------------- 1 | 1.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 1.0580 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 -0.5971 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.4181 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 -0.2201 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 -0.0441 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 -0.0358 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0774 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 -0.0320 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 -0.0071 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 -0.0039 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0097 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0019 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0012 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 -0.0032 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 2 | -1.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.9928 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.5530 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.4268 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 -0.0525 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.1981 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.1238 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 -0.0155 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0339 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 -0.0053 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 -0.0039 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 -0.0042 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0014 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 -0.0036 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 -0.0026 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 0.0000 3 | -------------------------------------------------------------------------------- /Python/src/widener.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import numpy.typing as npt 3 | import pyfar as pf 4 | from scipy.signal import hann 5 | from typing import Tuple, Optional, Dict 6 | from abc import ABC 7 | from enum import Enum 8 | from pathlib import Path 9 | from velvet import process_velvet 10 | from allpass import process_allpass 11 | from onset_detector import OnsetDetector 12 | from utils import (calculate_interchannel_coherence, ms_to_samps, 13 | calculate_interchannel_cross_correlation_matrix, 14 | half_hann_fade) 15 | 16 | VN_PATH = Path('../../../Resources/init_vn_filters.txt') 17 | OPT_VN_PATH = Path('../../../Resources/opt_vn_filters.txt') 18 | 19 | 20 | class FilterbankType(Enum): 21 | AMP_PRESERVE = "amplitude-preserve" 22 | ENERGY_PRESERVE = "energy-preserve" 23 | 24 | 25 | class DecorrelationType(Enum): 26 | ALLPASS = "allpass" 27 | VELVET = "velvet" 28 | OPT_VELVET = "opt_velvet" 29 | 30 | 31 | class StereoWidener(ABC): 32 | """Parent stereo widener class""" 33 | 34 | def __init__(self, 35 | input_stereo: np.ndarray, 36 | fs: float, 37 | decorr_type: DecorrelationType, 38 | beta: float, 39 | detect_transient: bool = False, 40 | onset_detection_params: Optional[Dict] = None, 41 | xfade_win_len_ms: float = 1.0): 42 | """Args: 43 | input_stereo (ndarray) : input stereo signal 44 | fs (float) : sampling frequency 45 | decorr_type (Decorrelation type) : decorrelation type (allpass, velvet or opt_velvet) 46 | beta (between 0 and pi/2): crossfading factor (initial) 47 | detect_transient (bool): whether to add a transient detection block 48 | onset_detection_params (dict, optional): dictionary of parameters for onset detection 49 | xfade_win_len_ms (float): crossfading window used during transients 50 | """ 51 | 52 | self.input_signal = input_stereo 53 | self.fs = fs 54 | self.decorr_type = decorr_type 55 | self.beta = beta 56 | self.detect_transient = detect_transient 57 | 58 | _, self.num_channels = input_stereo.shape 59 | if self.num_channels > 2: 60 | self.input_signal = self.input_signal.T 61 | self.num_channels = 2 62 | if self.num_channels != 2: 63 | raise RuntimeError("Input signal must be stereo!") 64 | 65 | self.decorrelated_signal = self.decorrelate_input() 66 | if self.detect_transient: 67 | if onset_detection_params is not None: 68 | self.onset_detector = OnsetDetector( 69 | self.fs, 70 | min_onset_hold_ms=onset_detection_params[ 71 | 'min_onset_hold_ms'], 72 | min_onset_sep_ms=onset_detection_params['min_onset_sep_ms'] 73 | ) 74 | else: 75 | self.onset_detector = OnsetDetector(self.fs) 76 | self.xfade_win_len_samps = int( 77 | ms_to_samps(xfade_win_len_ms, self.fs)) 78 | self.xfade_in_win = half_hann_fade(self.xfade_win_len_samps, 79 | fade_out=False) 80 | self.xfade_out_win = half_hann_fade(self.xfade_win_len_samps, 81 | fade_out=True) 82 | 83 | def decorrelate_input(self) -> np.ndarray: 84 | if self.decorr_type == DecorrelationType.ALLPASS: 85 | decorrelated_signal = process_allpass(self.input_signal, 86 | self.fs, 87 | num_biquads=200) 88 | elif self.decorr_type == DecorrelationType.VELVET: 89 | decorrelated_signal = process_velvet(self.input_signal, self.fs, 90 | VN_PATH) 91 | elif self.decorr_type == DecorrelationType.OPT_VELVET: 92 | decorrelated_signal = process_velvet(self.input_signal, self.fs, 93 | OPT_VN_PATH) 94 | else: 95 | raise NotImplementedError("Other decorrelators are not available") 96 | return decorrelated_signal 97 | 98 | def get_onset_flag(self, input: npt.ArrayLike) -> np.ndarray: 99 | """Returns the onset locations and the signal envelope""" 100 | self.onset_detector.process(input) 101 | return self.onset_detector.onset_flag 102 | 103 | def process(self): 104 | pass 105 | 106 | 107 | class StereoWidenerBroadband(StereoWidener): 108 | """Broadband stereo widener class""" 109 | 110 | def __init__(self, 111 | input_stereo: np.ndarray, 112 | fs: float, 113 | decorr_type: DecorrelationType, 114 | beta: float, 115 | detect_transient: bool = False, 116 | onset_detection_params: Optional[Dict] = None, 117 | xfade_win_len_ms: float = 1.0): 118 | super().__init__(input_stereo, fs, decorr_type, beta, detect_transient, 119 | onset_detection_params, xfade_win_len_ms) 120 | 121 | def update_beta(self, new_beta: float): 122 | self.beta = new_beta 123 | 124 | def process(self) -> np.ndarray: 125 | stereo_output = np.zeros_like(self.input_signal) 126 | 127 | for chan in range(self.num_channels): 128 | stereo_output[:, chan] = np.cos( 129 | self.beta) * self.input_signal[:, chan] + np.sin( 130 | self.beta) * self.decorrelated_signal[:, chan] 131 | 132 | # if an onset is detected, the input signal is passed as it is 133 | if self.detect_transient: 134 | onset_flags = super().get_onset_flag(self.input_signal[:, 135 | chan]) 136 | onset_idx = np.where(onset_flags)[0] 137 | # all the onset positions should have the input signal 138 | stereo_output[onset_idx, chan] = self.input_signal[onset_idx, 139 | chan] 140 | 141 | # apply fade in and fade out windows before the onsets 142 | consecutive_onset_idx = self.consecutive_elements(onset_idx) 143 | consecutive_onset_start_idx = [ 144 | array[0] for array in consecutive_onset_idx 145 | ] 146 | 147 | for start_idx in consecutive_onset_start_idx: 148 | stereo_output[start_idx - 149 | self.xfade_win_len_samps:start_idx, 150 | chan] *= self.xfade_out_win 151 | stereo_output[ 152 | start_idx - self.xfade_win_len_samps:start_idx, 153 | chan] += self.xfade_in_win * self.input_signal[ 154 | start_idx - self.xfade_win_len_samps:start_idx, 155 | chan] 156 | 157 | return stereo_output 158 | 159 | @staticmethod 160 | def consecutive_elements(data: npt.NDArray, stepsize: int = 1): 161 | return np.split(data, np.where(np.diff(data) != stepsize)[0] + 1) 162 | 163 | @staticmethod 164 | def calculate_correlation(output_signal: np.ndarray) -> float: 165 | return calculate_interchannel_coherence(output_signal[:, 0], 166 | output_signal[:, 1], 167 | time_axis=0) 168 | 169 | 170 | class StereoWidenerFrequencyBased(StereoWidener): 171 | 172 | def __init__(self, input_stereo: np.ndarray, fs: float, 173 | filterbank_type: FilterbankType, 174 | decorr_type: DecorrelationType, beta: Tuple[float, float], 175 | cutoff_freq: float): 176 | """Frequency based stereo widener 177 | Args: 178 | input_stereo (ndarray): input stereo signal 179 | fs (float): sampling rate 180 | filterbank_type (Filterbank type): amplitude or energy preserving 181 | decorr_type (Decorrelation type): allpass, velvet or opt-velvet 182 | beta (Tuple(float, float)): cross-fading gain for low and high frequencies 183 | cutoff_freq (float): cutoff frequency of filterbank (Hz) 184 | """ 185 | 186 | super().__init__(input_stereo, fs, decorr_type, beta) 187 | self.filterbank_type = filterbank_type 188 | self.cutoff_freq = cutoff_freq 189 | self.get_filter_coefficients() 190 | 191 | def get_filter_coefficients(self): 192 | if self.filterbank_type == FilterbankType.AMP_PRESERVE: 193 | # Linkwitz Riley crossover filterbank 194 | filters = pf.dsp.filter.crossover(signal=None, 195 | N=4, 196 | frequency=self.cutoff_freq, 197 | sampling_rate=self.fs) 198 | self.lowpass_filter_coeffs = pf.classes.filter.FilterSOS( 199 | filters.coefficients[0, ...], self.fs) 200 | self.highpass_filter_coeffs = pf.classes.filter.FilterSOS( 201 | filters.coefficients[1, ...], self.fs) 202 | 203 | elif self.filterbank_Type == FilterbankType.ENERGY_PRESERVE: 204 | 205 | self.lowpass_filter_coeffs = pf.dsp.filter.butterworth( 206 | signal=None, 207 | N=16, 208 | frequency=self.cutoff_freq, 209 | btype='lowpass', 210 | sampling_rate=self.fs) 211 | 212 | self.highpass_filter_coeffs = pf.dsp.filter.butterworth( 213 | signal=None, 214 | N=16, 215 | frequency=self.cutoff_freq, 216 | btype='highpass', 217 | sampling_rate=self.fs) 218 | 219 | else: 220 | raise NotImplementedError( 221 | "Only Butterworth and LR crossover filters are available") 222 | 223 | def filter_in_subbands(self, signal: np.ndarray) -> np.ndarray: 224 | """Filter signal into two frequency bands""" 225 | pf_signal = pf.classes.audio.Signal(signal, self.fs) 226 | lowpass_signal = self.lowpass_filter_coeffs.process(pf_signal).time 227 | highpass_signal = self.highpass_filter_coeffs.process(pf_signal).time 228 | return np.vstack((lowpass_signal, highpass_signal)) 229 | 230 | def update_beta(self, new_beta: Tuple[float, float]): 231 | self.beta = new_beta 232 | 233 | def update_cutoff_frequency(self, new_cutoff_freq: float): 234 | self.cutoff_freq = new_cutoff_freq 235 | self.get_filter_coefficients() 236 | 237 | def process(self): 238 | stereo_output = np.zeros_like(self.input_signal) 239 | filtered_input = np.zeros( 240 | (2, self.input_signal.shape[0], self.num_channels)) 241 | filtered_decorr = np.zeros_like(filtered_input) 242 | 243 | for chan in range(self.num_channels): 244 | filtered_input[..., chan] = self.filter_in_subbands( 245 | self.input_signal[:, chan]) 246 | filtered_decorr[..., chan] = self.filter_in_subbands( 247 | self.decorrelated_signal[:, chan]) 248 | 249 | for k in range(self.num_channels): 250 | stereo_output[:, chan] += np.cos(self.beta[k]) * np.squeeze( 251 | filtered_input[k, :, chan]) + np.sin( 252 | self.beta[k]) * np.squeeze(filtered_decorr[k, :, chan]) 253 | 254 | return stereo_output 255 | 256 | def calculate_interchannel_coherence(self, output_signal: np.ndarray): 257 | icc_matrix, icc_freqs = calculate_interchannel_cross_correlation_matrix( 258 | output_signal, 259 | fs=self.fs, 260 | num_channels=self.num_channels, 261 | time_axis=0, 262 | channel_axis=-1, 263 | bands_per_octave=3, 264 | freq_range=(20, self.fs / 2.0)) 265 | icc_vector = np.squeeze(icc_matrix[..., 0, 1]) 266 | return icc_vector, icc_freqs 267 | -------------------------------------------------------------------------------- /Python/src/notebooks/stereo_widener.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "id": "9d7a707a-7e88-4156-8cd9-c8b26effe2ba", 7 | "metadata": {}, 8 | "outputs": [], 9 | "source": [ 10 | "import numpy as np\n", 11 | "from pathlib import Path\n", 12 | "import soundfile as sf\n", 13 | "from scipy.signal import chirp, spectrogram\n", 14 | "import matplotlib.pyplot as plt\n", 15 | "import IPython\n", 16 | "from tqdm import tqdm\n", 17 | "\n", 18 | "import os, sys\n", 19 | "path = os.path.abspath('../.')\n", 20 | "sys.path.append(path)" 21 | ] 22 | }, 23 | { 24 | "cell_type": "code", 25 | "execution_count": null, 26 | "id": "f5ab4ac3-0ea1-4e15-ad35-65c5322817d0", 27 | "metadata": {}, 28 | "outputs": [], 29 | "source": [ 30 | "from velvet import *\n", 31 | "from allpass import *\n", 32 | "from utils import ms_to_samps, db, estimate_onsets_log_threshold\n", 33 | "from widener import *\n", 34 | "from hrtf_widener import *\n", 35 | "from plot import *\n", 36 | "from interaural_cues import itd_maxiacc\n", 37 | "save_flag = False" 38 | ] 39 | }, 40 | { 41 | "cell_type": "markdown", 42 | "id": "1ecb3880-b0dc-4df2-ab30-360e8a234379", 43 | "metadata": {}, 44 | "source": [ 45 | "### Helper functions" 46 | ] 47 | }, 48 | { 49 | "cell_type": "code", 50 | "execution_count": null, 51 | "id": "a552b90a-833b-4925-bbf9-6e59fdaa0a68", 52 | "metadata": {}, 53 | "outputs": [], 54 | "source": [ 55 | "def plot_spectrogram(signal: np.ndarray, fs:float, title: str, nfft:int=2**10, axis:Optional[int]=None):\n", 56 | " if axis is None:\n", 57 | " ff, tt, Sxx = spectrogram(signal, fs=fs, window='hann', nfft=nfft)\n", 58 | " else:\n", 59 | " ff, tt, Sxx = spectrogram(signal, fs=fs, window='hann', nfft=nfft, axis=axis)\n", 60 | " if Sxx.ndim == 3:\n", 61 | " fig, ax = plt.subplots(2,1)\n", 62 | " for num_sig in range(Sxx.shape[1]):\n", 63 | " ax[num_sig].pcolormesh(tt, ff, np.squeeze(Sxx[:,num_sig,:]), shading='gouraud')\n", 64 | " ax[num_sig].set_xlabel('t (sec)')\n", 65 | " ax[num_sig].set_ylabel('Frequency (Hz)')\n", 66 | " ax[num_sig].grid(True)\n", 67 | " ax[0].set_title(title)\n", 68 | "\n", 69 | " else:\n", 70 | " fig, ax = plt.subplots()\n", 71 | " plt.pcolormesh(tt, ff, Sxx, shading='gouraud')\n", 72 | " ax.set_title(title)\n", 73 | " ax.set_xlabel('t (sec)')\n", 74 | " ax.set_ylabel('Frequency (Hz)')\n", 75 | " ax.grid(True)\n", 76 | " plt.show()" 77 | ] 78 | }, 79 | { 80 | "cell_type": "markdown", 81 | "id": "3d4883bd-6548-4e85-a0e7-703473832625", 82 | "metadata": {}, 83 | "source": [ 84 | "#### Create a chirp signal and duplicate it" 85 | ] 86 | }, 87 | { 88 | "cell_type": "code", 89 | "execution_count": null, 90 | "id": "5a4e149d-b6ed-446a-a923-c62cf62780ff", 91 | "metadata": {}, 92 | "outputs": [], 93 | "source": [ 94 | "fs = 48000\n", 95 | "input_len_sec = 2.0\n", 96 | "time_vec = np.arange(0, input_len_sec, 1.0/fs)\n", 97 | "input_signal = chirp(time_vec, f0=20, t1=input_len_sec, f1=20000, method='logarithmic')\n", 98 | "input_signal_stereo = np.vstack((input_signal, input_signal))\n", 99 | "\n", 100 | "# plot, and listen to chirp\n", 101 | "plot_spectrogram(input_signal_stereo.T, fs, 'Input chirp', axis=0)\n", 102 | "sf.write('../../audio/input_chirp.wav', input_signal_stereo.T, fs)\n", 103 | "IPython.display.Audio('../../audio/input_chirp.wav')\n" 104 | ] 105 | }, 106 | { 107 | "cell_type": "markdown", 108 | "id": "28030bec-1dc5-493c-af09-aba303b62f30", 109 | "metadata": {}, 110 | "source": [ 111 | "### Pass chirp through HRTF based stereo widener and plot the ICC as a function of speaker separation angle = $[0, \\pi/2]$\n", 112 | "\n", 113 | "There is no point plotting the ICC between the left and the right output channels here, as they will always be identical." 114 | ] 115 | }, 116 | { 117 | "cell_type": "code", 118 | "execution_count": null, 119 | "id": "85afe55f-57ee-40ea-8c29-ac41258c8942", 120 | "metadata": {}, 121 | "outputs": [], 122 | "source": [ 123 | "num_freq_samples = 2**9\n", 124 | "num_time_samples = 2**9\n", 125 | "input_signal = np.zeros((int(0.5*fs), 2))\n", 126 | "input_signal[0,:] = np.ones(2)\n", 127 | "\n", 128 | "# input_signal, fs = sf.read('../../../Examples/dual_mono_original.mp3')\n", 129 | "save_flag = True\n", 130 | "\n", 131 | "hrtf_widener = HRTFStereoWidener(sample_rate=fs, \n", 132 | " azimuth_range=(-90,90), \n", 133 | " num_freq_points=num_freq_samples, \n", 134 | " num_time_samples=num_time_samples,\n", 135 | " head_radius=0.085)\n", 136 | "num_beta = 5\n", 137 | "beta = np.linspace(0, np.pi/2, num_beta)\n", 138 | "itds = np.zeros(num_beta)\n", 139 | "max_iacc = np.zeros(num_beta)\n", 140 | "bands_per_octave=3\n", 141 | "iac_freqs = pf.dsp.filter.fractional_octave_frequencies(num_fractions=bands_per_octave, \n", 142 | " frequency_range=(20, fs/2.0), \n", 143 | " return_cutoff=False)[0]\n", 144 | "num_iac_freqs = len(iac_freqs)\n", 145 | "hrtf_set = hrtf_widener.hrtf_set\n", 146 | "res_tup = np.array([itd_maxiacc(np.squeeze(hrtf_set.hrirs[k, ...]), fs, time_axis=0, ear_axis=1) \n", 147 | " for k in range (hrtf_widener.num_orientations)])\n", 148 | "itds_set = res_tup[:, 0]\n", 149 | "def find_closest_itd(all_itds,fs, all_doas, des_doa):\n", 150 | " closest_idx = np.argmin(np.abs(all_doas - des_doa))\n", 151 | " return all_itds[closest_idx] * fs\n", 152 | "\n", 153 | "fig, ax = plt.subplots(figsize=(6, 4))\n", 154 | "for k in tqdm(range(num_beta)):\n", 155 | " hrtf_widener.update_speaker_angle(np.degrees(beta[k]))\n", 156 | " output_signal = hrtf_widener.process(input_signal.copy())\n", 157 | " closest_itd = find_closest_itd(itds_set, fs, hrtf_set.doa, np.degrees(beta[k]))\n", 158 | " onset = estimate_onsets_log_threshold(output_signal[:, 0], axis=0)\n", 159 | " output_signal = np.roll(output_signal, -onset, axis=0) \n", 160 | "\n", 161 | " plt.subplot(num_beta,1,k+1)\n", 162 | " plt.plot(output_signal[:num_time_samples, :])\n", 163 | " plt.vlines([0, closest_itd], 0, 1, 'k', linestyles='dashed')\n", 164 | "\n", 165 | " # if save_flag:\n", 166 | " # sf.write(f'../../../Examples/dual_mono_hrtf_spk_angle={np.degrees(beta[k])}.wav', output_signal, fs)\n", 167 | "\n", 168 | "if save_flag:\n", 169 | " plt.savefig('../../figures/hrtf_stereo_widener_IRs.png', dpi=300)\n" 170 | ] 171 | }, 172 | { 173 | "cell_type": "markdown", 174 | "id": "e47b19ac-929d-4725-b633-2bd371f29502", 175 | "metadata": {}, 176 | "source": [ 177 | "### Get decorrelated signals with velvet noise filters" 178 | ] 179 | }, 180 | { 181 | "cell_type": "code", 182 | "execution_count": null, 183 | "id": "41d5df44-23a2-43b4-9e8c-605fcae2d257", 184 | "metadata": {}, 185 | "outputs": [], 186 | "source": [ 187 | "opt_vn_path = Path('../../../Resources/opt_vn_filters.txt')\n", 188 | "vn_output_signal = process_velvet(input_signal_stereo, fs, opt_vn_path)\n", 189 | "\n", 190 | "# plot and listen to output\n", 191 | "plot_spectrogram(vn_output_signal, fs, 'VN stereo chirps', axis=0)\n", 192 | "sf.write('../../audio/vn_chirp.wav', vn_output_signal, fs)\n", 193 | "IPython.display.Audio('../../audio/vn_chirp.wav')" 194 | ] 195 | }, 196 | { 197 | "cell_type": "markdown", 198 | "id": "885079ea-f08c-44c8-91c2-e84eadd5c6f5", 199 | "metadata": {}, 200 | "source": [ 201 | "### Get decorrelated signals with allpass filters" 202 | ] 203 | }, 204 | { 205 | "cell_type": "code", 206 | "execution_count": null, 207 | "id": "3b757770-7ccd-49a6-9540-f890a3e9aaa2", 208 | "metadata": {}, 209 | "outputs": [], 210 | "source": [ 211 | "allpass_output_signal = process_allpass(input_signal_stereo, fs, num_biquads=200)\n", 212 | "plot_spectrogram(allpass_output_signal, fs, 'Allpass stereo chirps', axis=0)\n", 213 | "\n", 214 | "# plot and listen to output\n", 215 | "sf.write('../../audio/allpass_chirp.wav', allpass_output_signal, fs)\n", 216 | "IPython.display.Audio('../../audio/allpass_chirp.wav')" 217 | ] 218 | }, 219 | { 220 | "cell_type": "markdown", 221 | "id": "728552bf-e41b-4b83-9ef7-978e192635ff", 222 | "metadata": {}, 223 | "source": [ 224 | "### Pass through stereo widener for different values of beta\n" 225 | ] 226 | }, 227 | { 228 | "cell_type": "code", 229 | "execution_count": null, 230 | "id": "d653e884-36f4-4dba-884e-b5f6f68fef6c", 231 | "metadata": {}, 232 | "outputs": [], 233 | "source": [ 234 | "decorr_type = DecorrelationType.ALLPASS\n", 235 | "beta_init = 0\n", 236 | "stereo_widener = StereoWidenerBroadband(input_signal_stereo, fs, decorr_type, beta_init)\n", 237 | "\n", 238 | "num_beta = 100\n", 239 | "beta = np.linspace(0, np.pi/2, num_beta)\n", 240 | "decorr_correlation_coeffs = np.zeros(num_beta)\n", 241 | "for k in range(num_beta):\n", 242 | " stereo_widener.update_beta(beta[k])\n", 243 | " output_signal = stereo_widener.process()\n", 244 | " decorr_correlation_coeffs[k] = stereo_widener.calculate_correlation(output_signal)\n", 245 | "\n", 246 | "fig, ax = plt.subplots(figsize=(6, 4))\n", 247 | "ax.plot(beta / (np.pi / 2), decorr_correlation_coeffs)\n", 248 | "ax.set_ylim([0, 1])\n", 249 | "ax.set_ylabel('Correlation coefficient')\n", 250 | "ax.set_xlabel('Normalised angle')\n", 251 | "ax.set_xlim([0, 1])\n", 252 | "plt.grid()\n", 253 | "if save_flag:\n", 254 | " plt.savefig('../../figures/beta_vs_correlation.png', dpi=300)" 255 | ] 256 | }, 257 | { 258 | "cell_type": "markdown", 259 | "id": "fad649a1-af3a-40a5-8774-0870b6afb123", 260 | "metadata": {}, 261 | "source": [ 262 | "### Pass stereo widener through frequency based architecture and plot IC" 263 | ] 264 | }, 265 | { 266 | "cell_type": "code", 267 | "execution_count": null, 268 | "id": "40210022-759d-4285-94bd-b4f9c18e16ee", 269 | "metadata": {}, 270 | "outputs": [], 271 | "source": [ 272 | "beta = [(0, np.pi/2), (np.pi/2, 0)]\n", 273 | "num_beta = len(beta)\n", 274 | "filterbank_type = FilterbankType.AMP_PRESERVE\n", 275 | "cutoff_freq = 250\n", 276 | "bands_per_octave=3\n", 277 | "num_iter = 100\n", 278 | "iac_freqs = pf.dsp.filter.fractional_octave_frequencies(num_fractions=bands_per_octave, \n", 279 | " frequency_range=(20, fs/2.0), \n", 280 | " return_cutoff=False)[0]\n", 281 | "num_freqs = len(iac_freqs)\n", 282 | "icc_vector = np.zeros((num_iter, num_beta, num_freqs))\n", 283 | "\n", 284 | "for iter in tqdm(range(num_iter)):\n", 285 | " stereo_widener = StereoWidenerFrequencyBased(input_signal_stereo, fs, filterbank_type, decorr_type, (0,0), cutoff_freq)\n", 286 | " for k in range(num_beta):\n", 287 | " stereo_widener.update_beta(beta[k])\n", 288 | " output_signal = stereo_widener.process()\n", 289 | " icc_vector[iter, k, :], _ = stereo_widener.calculate_interchannel_coherence(output_signal)\n", 290 | " del stereo_widener" 291 | ] 292 | }, 293 | { 294 | "cell_type": "code", 295 | "execution_count": null, 296 | "id": "e226631d-6d02-4a82-bfd6-e2f9c3c2aa7a", 297 | "metadata": {}, 298 | "outputs": [], 299 | "source": [ 300 | "plt.rc('text', usetex=False)\n", 301 | "semiaudplot(iac_freqs, np.median(icc_vector, axis=0), marker='*', interp=False)\n", 302 | "plt.ylabel('Coherence')\n", 303 | "plt.ylim([0, 1.01])\n", 304 | "plt.vlines(cutoff_freq, 0, 1.01, colors= 'k', linestyles='dashed')\n", 305 | "plt.legend([\"beta_{low} = 0, beta_{high} = pi/2\", \"beta_{low} = pi/2, beta_{high} = 0\"], loc=\"upper right\")\n", 306 | "plt.grid()\n", 307 | "plt.savefig('../../figures/stereo_widener_iac.png', dpi=300)" 308 | ] 309 | } 310 | ], 311 | "metadata": { 312 | "kernelspec": { 313 | "display_name": "Python 3 (ipykernel)", 314 | "language": "python", 315 | "name": "python3" 316 | }, 317 | "language_info": { 318 | "codemirror_mode": { 319 | "name": "ipython", 320 | "version": 3 321 | }, 322 | "file_extension": ".py", 323 | "mimetype": "text/x-python", 324 | "name": "python", 325 | "nbconvert_exporter": "python", 326 | "pygments_lexer": "ipython3", 327 | "version": "3.9.0" 328 | } 329 | }, 330 | "nbformat": 4, 331 | "nbformat_minor": 5 332 | } 333 | -------------------------------------------------------------------------------- /Python/src/interaural_cues.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from numpy.typing import NDArray, ArrayLike 3 | from scipy.fft import irfft, fftshift 4 | import scipy.signal as sig 5 | from dataclasses import dataclass 6 | from typing import Optional, Tuple 7 | from utils import db, ms_to_samps, parabolic_peak_interp, signal_envelope_analytical, xcorr 8 | 9 | _EPS = np.finfo(float).eps 10 | 11 | 12 | @dataclass 13 | class HRTFParams: 14 | """A dataclass storing the HRTF, ILD and IPD corresponding to each DoA""" 15 | 16 | # number of points in the frequency axis 17 | num_freq_points: int 18 | # number of DoAs 19 | num_doas: int 20 | # length of HRIR in time domain 21 | num_samples: int 22 | # direction of arrival of the source 23 | doa: ArrayLike 24 | # frequencies where HRTF is calculated 25 | freqs: ArrayLike 26 | # hrtfs in freq domain, of size num_doas x num_freqs x 2 27 | hrtfs: NDArray 28 | # hrirs in the time domain 29 | hrirs: NDArray 30 | # interaural cues 31 | ild: NDArray 32 | ipd: NDArray 33 | itd: Optional[ArrayLike] = None 34 | 35 | 36 | def ild_from_hrtf(hrtfs: NDArray, 37 | ear_axis: int, 38 | right_ear_idx: int = -1, 39 | left_ear_idx: int = 0, 40 | in_db: bool = False) -> NDArray: 41 | """ 42 | Calculate interaural level difference for each frequency bin and DoA from the HRTF database 43 | Args: 44 | hrtfs (NDArray) : array of hrtfs, num_doas x num_freqs x 2 45 | ear_axis (int): ear axis (should be -1) 46 | right_ear_idx (int): index of right ear 47 | left_ear_idx (int): index of left ear 48 | in_db (bool): whether to return the ILD in dB 49 | Returns: 50 | NDArray: num_doas x num_freqs ILD (ear dimension squeezed) 51 | """ 52 | if ear_axis != -1: 53 | hrtfs = np.moveaxis(hrtfs, ear_axis, -1) 54 | ild = np.abs( 55 | hrtfs[..., right_ear_idx]) / (np.abs(hrtfs[..., left_ear_idx]) + _EPS) 56 | return db(ild) if in_db else ild 57 | 58 | 59 | def ipd_from_hrtf(hrtfs: NDArray, 60 | ear_axis: int, 61 | right_ear_idx: int = -1, 62 | left_ear_idx: int = 0, 63 | unwrap_phase: bool = False) -> NDArray: 64 | """ 65 | Calculate interaural phase difference for each frequency bin and DoA from the HRTF database 66 | Args: 67 | hrtfs (NDArray) : complex array of hrtfs, num_doas x num_freqs x 2 in the frequency domain 68 | ear_axis (int): ear axis (should be -1) 69 | right_ear_idx (int): index of right ear 70 | left_ear_idx (int): index of left ear 71 | unwrap_phase (bool): if true, unwrap phase before returning 72 | Returns: 73 | NDArray: num_doas x num_freqs IPD in radians (between -pi/2 and pi/2 if wrapped), ear dimension squeezed 74 | """ 75 | 76 | if ear_axis != -1: 77 | hrtfs = np.moveaxis(hrtfs, ear_axis, -1) 78 | ipd = np.angle(hrtfs[..., right_ear_idx] / hrtfs[..., left_ear_idx]) 79 | return np.unwrap(ipd) if unwrap_phase else ipd 80 | 81 | 82 | def convert_ipd_to_itd(ipd: NDArray, sample_rate: float, 83 | norm_frequency_axis: ArrayLike, 84 | wrapped_phase: bool) -> NDArray: 85 | """ 86 | Converts interaural phase difference to interaural time difference 87 | Args: 88 | ipd (NDArray) : Array of IPDa 89 | sample_rate (float) : sampling frequency 90 | norm_frequency_axis (ArrayLike): normalised positive frequency axis between (0, pi) 91 | wrapped_phase (bool): whether the IPD is wrapped between -pi/2 and pi/2 92 | """ 93 | if wrapped_phase: 94 | ipd = np.unwrap(ipd) 95 | return -ipd / (norm_frequency_axis * sample_rate + _EPS) 96 | 97 | 98 | def convert_itd_to_ipd(itd: NDArray, sample_rate: float, 99 | norm_frequency_axis: ArrayLike, 100 | wrap_phase: bool) -> NDArray: 101 | """ 102 | Converts interaural time difference to interaural phase difference 103 | Args: 104 | itd (NDArray) : Array of ITDs in seconds 105 | sample_rate (float) : sampling frequency 106 | norm_frequency_axis (ArrayLike): normalised positive frequency axis between (0, pi) 107 | wrap_phase (bool): whether to wrap the IPD between -pi/2 and pi/2 108 | """ 109 | ipd = -norm_frequency_axis * (itd * sample_rate + _EPS) 110 | if wrap_phase: 111 | return ((ipd + np.pi) % 2 * np.pi) - np.pi 112 | else: 113 | return ipd 114 | 115 | 116 | def get_hrtf_from_spherical_head_model( 117 | azimuth: ArrayLike, 118 | fft_freqs: NDArray, 119 | num_time_samples: int, 120 | head_radius: float = 0.075, 121 | speed_sound: float = 340, 122 | use_tanh_fit: bool = True) -> HRTFParams: 123 | """ 124 | Get ITD, ILD for a spherical head of specified radius at specified frequencies, 125 | see Romblom, D. and Bahu, H., “A Revision and Objective Evaluation of the 1-Pole 126 | 1-Zero Spherical Head Shadowing Filter,” in AES AVAR, 2018 127 | Args: 128 | azimuth (ArrayLike): the direction of arrival (azimuth only) of the sources in degrees 129 | fft_freqs(NDArray): frequencies corresponding to the fft bins in radians 130 | num_time_sample (int): length of the HRIR filters in the time domain 131 | head_radius (float): head radius in m 132 | speed_sound (float): speed of sound in air (m/s) 133 | use_tanh_fit (bool): whther to use hyperbolic tangent fit to get zero of head shadowing, or to use Brown-Duda 134 | Returns: 135 | HRTFParams : HRTF object containing the DoAs, HRTFs and ITD and ILD as a function 136 | of DoA and frequency 137 | 138 | """ 139 | 140 | def calculate_time_delay(head_radius: float, speed_sound: float, 141 | incidence_angle: ArrayLike) -> ArrayLike: 142 | 143 | time_delay = head_radius / speed_sound * np.where( 144 | np.abs(incidence_angle) < 90, -np.cos(np.radians(incidence_angle)), 145 | np.radians(np.abs(incidence_angle) - 90.0)) 146 | 147 | return time_delay 148 | 149 | # azimuth of the sources 150 | num_doas = len(azimuth) 151 | num_freqs = len(fft_freqs) 152 | num_ears = 2 153 | hrtfs = np.zeros((num_doas, num_freqs, num_ears), dtype=complex) 154 | hrirs = np.zeros((num_doas, num_time_samples, num_ears), dtype=np.float32) 155 | phase = np.zeros((num_doas, num_freqs, num_ears), dtype=float) 156 | fundamental_frequency = speed_sound / head_radius 157 | 158 | for k in range(num_ears): 159 | # incidence angle, depending on right or left ear 160 | incidence_angle = 90 - azimuth if k == 0 else 90 + azimuth 161 | time_delay = calculate_time_delay(head_radius, speed_sound, 162 | incidence_angle) 163 | phase[..., k] = 2 * np.pi * time_delay[:, np.newaxis] @ fft_freqs[ 164 | np.newaxis, :] 165 | if use_tanh_fit: 166 | zero_location_control = 1.15 - (0.85 * 167 | np.tanh(1.7 * 168 | (incidence_angle - 97.4))) 169 | else: 170 | beta_min = 0.1 171 | min_incidence_angle = 150 172 | zero_location_control = (1 + beta_min / 2.) + ( 173 | (1 - beta_min / 2.) * 174 | np.cos(incidence_angle / min_incidence_angle * 180)) 175 | 176 | # this is a row variable 177 | intermediate_var = 1j * 2 * np.pi * fft_freqs[np.newaxis, :] / ( 178 | 2 * fundamental_frequency) 179 | head_shadow_response = (1 + zero_location_control[:, np.newaxis] 180 | @ intermediate_var) / (1 + np.ones( 181 | (num_doas, 1)) @ intermediate_var) 182 | hrtfs[..., k] = head_shadow_response * np.exp(-1j * phase[..., k]) 183 | 184 | hrirs[..., k] = fftshift(irfft( 185 | hrtfs[..., k], 186 | n=num_time_samples, 187 | axis=1, 188 | ), 189 | axes=1) 190 | 191 | ild = ild_from_hrtf(hrtfs, ear_axis=-1) 192 | ipd = ipd_from_hrtf(hrtfs, ear_axis=-1, unwrap_phase=True) 193 | 194 | return HRTFParams(num_freqs, num_doas, num_time_samples, azimuth, 195 | fft_freqs, hrtfs, hrirs, ild, ipd) 196 | 197 | 198 | def itd_maxiacc( 199 | signal: np.ndarray, 200 | fs: float, 201 | time_axis: int = -1, 202 | ear_axis: int = -2, 203 | max_lag_ms: float = 1., 204 | calc_env: bool = True, 205 | lowpass_cutoff: Optional[float] = 3000., 206 | interp_peak: bool = True, 207 | ) -> Tuple[float, float]: 208 | """Estimate the interaural time difference using the maximum of the interaural cross-correlation function. 209 | Optionally, the incoming signal can be pre-processed with a low-pass filter and/or calculation of the signal 210 | envelope. This processing typically makes the estimation more robust and is enabled by default. 211 | 212 | There are lots of other ways of estimating ITD. In the `onsets` module, you can directly estimate the time-of- 213 | arrival in impulse responses and use the inter-aural difference of these as an ITD estimate. However, this IACC 214 | method should also work on binaural signals, not just impulse responses. 215 | 216 | Args: 217 | signal (np.ndarray): The signal to be analysed. Must be at least two-dimensional. 218 | fs (float): Sampling rate (Hertz) 219 | time_axis (int, optional): The time axis of signal. Defaults to -1. 220 | ear_axis (int, optional): The ear axis of signal (must have size 2). Defaults to -2. 221 | max_lag_ms (float, optional): The maximum . Defaults to 1.5. 222 | calc_env (bool, optional): _description_. Defaults to True. 223 | lowpass_cutoff (float, optional): If provided, the signal is low-pass filtered at this cutoff frequency (in 224 | Hertz) before further processing. Defaults to 3kHz. Setting to None will skip this stage. 225 | interp_peak (bool, optional): Optionally use parabolic interpolation to find the ITD to sub-sample acurracy. 226 | 227 | Raises: 228 | ValueError: If signal is not at least two-dimensional 229 | ValueError: If the ear_axis does not have size 2 230 | ValueError: If ear_axis and time_axis are equal. 231 | 232 | Returns: 233 | float: The delay (in seconds) corresponding to the maximum IACC. 234 | float: The maximum absolute value of the normalised IACC. 235 | """ 236 | if signal.ndim < 2: 237 | raise ValueError("`signal` must be at least two-dimensional") 238 | if signal.shape[ear_axis] != 2: 239 | raise ValueError("The binaural signal must have size 2 in ear axis") 240 | if ear_axis == time_axis: 241 | raise ValueError("ear_axis and time_axis cannot be the same") 242 | 243 | max_lag = ms_to_samps(max_lag_ms, fs) 244 | 245 | # shuffle axes around so -2=ear, -1=time 246 | time_axis = np.remainder(time_axis, signal.ndim) 247 | ear_axis = np.remainder(ear_axis, signal.ndim) 248 | if time_axis != signal.ndim - 1: 249 | signal = np.moveaxis(signal, time_axis, -1) 250 | if time_axis < ear_axis: 251 | ear_axis -= 1 252 | if ear_axis != signal.ndim - 2: 253 | signal = np.moveaxis(signal, ear_axis, -2) 254 | 255 | # optional pre-processing steps 256 | if lowpass_cutoff is not None and lowpass_cutoff > 0: 257 | lp_sos = sig.butter(N=12, Wn=lowpass_cutoff / (fs / 2), output='sos') 258 | sig_lp = sig.sosfilt(lp_sos, signal, axis=-1) 259 | else: 260 | sig_lp = signal 261 | if calc_env: 262 | sig_env = signal_envelope_analytical(sig_lp, axis=-1) 263 | else: 264 | sig_env = sig_lp 265 | 266 | # iterate over other dimensions 267 | chan_shape = signal.shape[:-2] 268 | iacc = np.zeros(chan_shape, dtype=signal.dtype) 269 | itd = np.zeros(chan_shape, dtype=signal.dtype) 270 | for chan_ixs in np.ndindex(chan_shape): 271 | cc, lags = xcorr(sig_env[chan_ixs + (0, )], 272 | sig_env[chan_ixs + (1, )], 273 | max_lag=max_lag, 274 | norm=True) 275 | abs_cc = np.abs(cc) 276 | max_iacc_ix = np.argmax(abs_cc) 277 | if interp_peak and 0 < max_iacc_ix < lags.shape[0] - 2: 278 | # use parabolic interpolation to find sub-sample max iacc 279 | lag_interp, iacc[chan_ixs], _ = parabolic_peak_interp( 280 | abs_cc[max_iacc_ix - 1], abs_cc[max_iacc_ix], 281 | abs_cc[max_iacc_ix + 1]) 282 | # get time index using interpolated peak position 283 | if lag_interp < 0: 284 | itd[chan_ixs] = abs(lag_interp) * lags[max_iacc_ix - 1] + ( 285 | 1 + lag_interp) * lags[max_iacc_ix] 286 | else: 287 | itd[chan_ixs] = (1 - lag_interp) * lags[ 288 | max_iacc_ix] + lag_interp * lags[max_iacc_ix + 1] 289 | else: 290 | itd[chan_ixs] = lags[max_iacc_ix] 291 | iacc[chan_ixs] = abs_cc[max_iacc_ix] 292 | itd /= fs 293 | return itd, iacc 294 | -------------------------------------------------------------------------------- /Python/src/utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import numpy.typing as npt 3 | from typing import Tuple, Optional, List, Union, cast 4 | import scipy.signal as sig 5 | import pyfar as pf 6 | 7 | EPS = np.finfo(float).eps 8 | # ERB means "Equivalent retangular band(-width)" 9 | # Constants: 10 | _ERB_L = 24.7 11 | _ERB_Q = 9.265 12 | 13 | def db2lin(x: npt.ArrayLike, /) -> npt.NDArray: 14 | """Convert from decibels to linear 15 | 16 | Args: 17 | x (ArrayLike): value(s) to be converted 18 | 19 | Returns: 20 | (ArrayLike): values converted to linear 21 | """ 22 | return np.power(10.0, x * 0.05) 23 | 24 | def db(x: npt.NDArray[float], /, *, is_squared: bool = False, allow_inf: bool = False) -> npt.NDArray[float]: 25 | """Converts values to decibels. 26 | 27 | Args: 28 | x (NDArray): value(s) to be converted to dB. 29 | is_squared (bool): Indicates whether `x` represents some power-like quantity (True) or some root-power-like 30 | quantity (False). Defaults to False, i.e. `x` is a root-power-like auqntity (e.g. Voltage, pressure, ...). 31 | allow_inf (bool): Whether infinitely small (or 0.0) values should be allowed, in which case `-np.inf` values 32 | may be returned. 33 | 34 | Returns: 35 | An array with the converted values, in dB. 36 | """ 37 | x = np.abs(x) 38 | if not allow_inf: 39 | x = np.maximum(x, EPS) 40 | factor = 10.0 if is_squared else 20.0 41 | return factor * np.log10(x) 42 | 43 | def db_floor(x: np.ndarray, floor: float = -120.0) -> np.ndarray: 44 | """Apply a lower floor to a set of values, i.e. any values lower than the floor value are set to that value. 45 | The floor values is specified in dB, whereas the values themselves are linear. 46 | Args: 47 | x (np.ndarray): Value array (linear) 48 | floor (float): Floor in dB, by default -120.0 49 | Returns: 50 | np.ndarray: Floor'd array 51 | """ 52 | min_lin = db2lin(floor) 53 | x_clip = np.maximum(x, min_lin) 54 | return x_clip 55 | 56 | def ms_to_samps(ms: npt.NDArray[float], /, fs: float) -> npt.NDArray[int]: 57 | """Calculate the nearest integer number of samples corresponding to the given time duration in milliseconds. 58 | 59 | Args: 60 | ms (NDArray): Duration, in milliseconds 61 | fs (float): Sample rate, in Hertz. 62 | 63 | Returns: 64 | An NDArray containing the nearest corresponding number of samples 65 | """ 66 | return np.round(ms * 1e-3 * fs).astype(int) 67 | 68 | def samps_to_ms(samps: npt.NDArray[int], /, fs: float) -> npt.NDArray[float]: 69 | """Calculate time duration in milliseconds corresponding to the given number of samples. 70 | 71 | Args: 72 | samps (ArrayLike): Durations in samples. 73 | fs (float): Sample rate, in Hertz. 74 | 75 | Returns: 76 | A NDArray ofcontaining the duration(s), in milliseconds. 77 | """ 78 | return 1e3 * np.array(samps) / fs 79 | 80 | def hertz_to_erbscale(frequency: Union[float, np.ndarray]) -> Union[float, np.ndarray]: 81 | """Convert frequency in Hertz to ERB-scale frequency. 82 | 83 | Equation 16 in Hohmann2002. 84 | 85 | Args: 86 | frequency (Union[float, np.ndarray]): The frequency value(s) in Hz. 87 | 88 | Returns: 89 | Union[float, np.ndarray]: The frequency value(s) on ERB-scale. 90 | """ 91 | return _ERB_Q * np.log(1 + frequency / (_ERB_L * _ERB_Q)) 92 | 93 | 94 | def erbscale_to_hertz(erb: Union[float, np.ndarray]) -> Union[float, np.ndarray]: 95 | """Convert frequency in ERB-scale to Hertz. 96 | 97 | Args: 98 | erb (Union[float, np.ndarray]): Frequency value(s) on the ERB-scale. 99 | 100 | Returns: 101 | Union[float, np.ndarray]: Frequency values in Hz. 102 | """ 103 | return (np.exp(erb / _ERB_Q) - 1) * _ERB_L * _ERB_Q 104 | 105 | def half_hann_fade(length: int, fade_out: bool = False) -> npt.NDArray: 106 | """Generate a half Hann window for fading out or in a signal. 107 | 108 | Args: 109 | length (int): The length of the fade. 110 | fade_out (bool, optional): If True, fade out, if False, fade in. Defaults to False. 111 | 112 | Returns: 113 | npt.NDArray: The half Hann window signal (one-dimensional) 114 | """ 115 | n = np.linspace(start=0, stop=1, num=length) 116 | fade: npt.NDArray = 0.5 - 0.5 * np.cos(np.pi * (n + int(fade_out))) 117 | return fade 118 | 119 | def rms(irs: npt.NDArray[np.float_], axis: int = -1, remove_dc: bool = False) \ 120 | -> Union[np.float_, npt.NDArray[np.float_]]: 121 | """Calculate the root-mean-square value of a signal. 122 | 123 | Args: 124 | irs (npt.NDArray): The signal array 125 | axis (int, optional): The axis along which to make the calculation. Defaults to -1. 126 | remove_dc (bool, optional): Optionally remove the mean (DC) component of the signal. Defaults to False. 127 | 128 | Returns: 129 | npt.NDArray: The RMS value(s). One fewer dimension than `irs`. 130 | """ 131 | rms_irs: npt.NDArray 132 | if remove_dc: 133 | rms_irs = np.std(irs, axis=axis) 134 | else: 135 | rms_irs = np.sqrt(np.mean(np.square(irs), axis=axis)) 136 | return rms_irs 137 | 138 | def normalise_irs(irs: npt.NDArray[np.float_], 139 | fs: int, 140 | norm_db: float = -18, 141 | window_size: Tuple[float, float] = (5e-4, 1e-3)) -> Tuple[npt.NDArray[np.float_], float]: 142 | """Normalise an array of IRs to a given value in decibels. 143 | 144 | The RMS signal level around the peak value is calculated using a window. 145 | The mean across measurements is then used to adjust the level of the entire array. 146 | 147 | Args: 148 | irs (npt.NDArray): Impulse response data, with time in last axis. 149 | fs (int): Sample rate in Hertz 150 | norm_db (float): The normalisation target value in decibels, by default -18 151 | window_size (tuple): of the Window size before and after peak value in seconds (float), 152 | By default (5e-4, 1e-3) i.e. 0.5ms before peak and 1ms after. 153 | 154 | Returns: 155 | tuple: of the normalised array and the gain applied in dB 156 | """ 157 | win_pre = int(window_size[0] * fs) 158 | win_post = int(window_size[1] * fs) 159 | win_len = win_pre + win_post 160 | win: npt.NDArray 161 | win = np.concatenate((half_hann_fade(win_pre), half_hann_fade(win_post, fade_out=True)), axis=-1) 162 | 163 | def win_peak(x: npt.NDArray) -> npt.NDArray: 164 | peak = np.argmax(x) 165 | start_ix = int(np.maximum(peak - win_pre, 0)) 166 | end_ix = start_ix + win_len 167 | if end_ix > irs.shape[-1]: 168 | raise RuntimeError("Window index exceeded length of array") 169 | return np.array(x[start_ix:end_ix] * win) 170 | 171 | irs_peaks = np.apply_along_axis(win_peak, axis=-1, arr=irs) 172 | mean_val = np.mean(rms(irs_peaks, axis=-1)) 173 | norm_lin = db2lin(norm_db) 174 | norm_val = norm_lin / mean_val 175 | gain_db = cast(float, db(norm_val)) 176 | norm_irs = irs * norm_val 177 | return norm_irs, gain_db 178 | 179 | def filter_in_subbands(input_signal: np.ndarray, 180 | fs: int, 181 | bands_per_octave: int = 3, 182 | freq_range=(20, 16000), 183 | filter_length: int = 4096) -> Tuple[npt.NDArray, npt.NDArray]: 184 | 185 | signal = pf.classes.audio.Signal(input_signal, fs) 186 | signal_subband, centre_frequencies = pf.dsp.filter.reconstructing_fractional_octave_bands(signal, 187 | bands_per_octave, freq_range, n_samples=filter_length) 188 | 189 | return signal_subband.time, centre_frequencies 190 | 191 | 192 | def calculate_interchannel_coherence(x: np.ndarray, y: np.ndarray, time_axis: int) -> npt.NDArray: 193 | return np.abs(np.sum(x * y, axis=time_axis)) / np.sqrt(np.sum(x**2, axis=time_axis) * np.sum(y**2, axis=time_axis)) 194 | 195 | 196 | def calculate_interchannel_cross_correlation_matrix(signals: np.ndarray, 197 | fs: int, 198 | num_channels: int, 199 | time_axis: int = -1, 200 | channel_axis: int = 0, 201 | return_single_coeff: bool = False, 202 | bands_per_octave: int = 3, 203 | freq_range=(20,16000)): 204 | """Returns a matrix of ICC values for each channel axis in signals""" 205 | if time_axis != -1: 206 | signals = np.moveaxis(signals, 0, 1) 207 | channel_axis = 0 208 | time_axis = -1 209 | 210 | # passthrough filterbank 211 | if not return_single_coeff: 212 | signals_subband, centre_frequencies = filter_in_subbands(signals, 213 | fs, 214 | bands_per_octave=bands_per_octave, 215 | freq_range=freq_range 216 | ) 217 | num_f = len(centre_frequencies) 218 | # make sure the channel axis is in the beginning 219 | signals_subband = np.moveaxis(signals_subband, 1, 0) 220 | 221 | icc_matrix = np.ones((num_f, num_channels, num_channels)) 222 | else: 223 | icc_matrix = np.ones((num_channels, num_channels)) 224 | 225 | for i in range(num_channels): 226 | for j in range(num_channels): 227 | if i == j: 228 | continue 229 | if return_single_coeff: 230 | icc_matrix[i, j] = calculate_interchannel_coherence(signals[i, :], signals[j, :], time_axis=time_axis) 231 | else: 232 | icc_matrix[:, i, j] = calculate_interchannel_coherence(signals_subband[i, :, :], 233 | signals_subband[j, :, :], 234 | time_axis=time_axis) 235 | if return_single_coeff: 236 | return icc_matrix 237 | else: 238 | return icc_matrix, centre_frequencies 239 | 240 | def signal_envelope_analytical(irs: npt.NDArray, axis: int = -1) -> np.ndarray: 241 | """Calculate amplitude envelope using Hilbert transform. 242 | 243 | 244 | Args: 245 | irs (npt.NDArray): Impulse responses 246 | axis (int, optional): Time axis index, by default -1 247 | 248 | Returns: 249 | npt.NDArray: Envelope signals 250 | """ 251 | env: npt.NDArray 252 | env = np.abs(sig.hilbert(irs, axis=axis)) 253 | return env 254 | 255 | 256 | def parabolic_peak_interp(ym1: float, y0: float, yp1: float) -> Tuple[float, float, float]: 257 | """Quadratic interpolation of three adjacent samples to find a peak. 258 | 259 | A parabola is given by y(x) = a*(x-p)^2+b, where y(-1)=ym1, y(0)=y0, y(1)=yp1. 260 | 261 | https://ccrma.stanford.edu/~jos/sasp/Matlab_Parabolic_Peak_Interpolation.html 262 | 263 | Args: 264 | ym1 (float): Sample before the peak 265 | y0 (float): Peak value sample 266 | yp1 (float): Next sample following the peak 267 | 268 | Returns: 269 | p (float): peak location 270 | y (float): peak height 271 | a (float): half-curvature of parabolic fit through the points 272 | """ 273 | if ym1 < y0 <= yp1 or ym1 > y0 >= yp1: 274 | raise ValueError( 275 | f"y0 must be either the largest or the smallest of the three samples. Got: ym1={ym1}, y0={y0}, yp1={yp1}") 276 | p = (yp1 - ym1) / (2 * (2 * y0 - yp1 - ym1)) 277 | y = y0 - 0.25 * (ym1 - yp1) * p 278 | a = 0.5 * (ym1 - 2 * y0 + yp1) 279 | return p, y, a 280 | 281 | 282 | def xcorr(a: npt.NDArray, 283 | b: npt.NDArray, 284 | max_lag: Optional[int] = None, 285 | norm: bool = False) -> Tuple[npt.NDArray, npt.NDArray]: 286 | """Estimate the cross-correlation of two signals 287 | 288 | Args: 289 | a (npt.NDArray): First signal 290 | b (npt.NDArray): Second signal 291 | max_lag (int, optional): If provided, limit the delay/lag range to this many samples. Defaults to None. 292 | Cannot be greater than the longest length of the two signals. 293 | norm (bool, optional): If True, will do normalised cross-correlation. Defaults to False. 294 | Cannot be used if a and b have different lengths. 295 | 296 | Raises: 297 | ValueError: a and/or b are not one-dimensional 298 | ValueError: norm cannot be True if a and b have different lenghts 299 | 300 | Returns: 301 | npt.NDArray: Cross-correlation function 302 | npt.NDArray: Corresponding delays from a to b (note: reversed perspective relative to 303 | scipy.signal.correlation_lags) 304 | """ 305 | if a.ndim > 1 or b.ndim > 1: 306 | raise ValueError("a and b must be one-dimensional arrays") 307 | 308 | max_lag_default = max(b.shape[0], a.shape[0]) - 1 309 | pad = b.shape[0] - a.shape[0] 310 | 311 | if max_lag is None: 312 | lag_range = max_lag_default 313 | else: 314 | lag_range = abs(max_lag) 315 | lag_range = min(max_lag, max_lag_default) 316 | 317 | # calculate normalization before zero padding (as this will affect the norms) 318 | if norm: 319 | if pad != 0: 320 | raise ValueError("a and b must have equal length for normalised cross-correlation") 321 | norm_val = np.linalg.norm(a) * np.linalg.norm(b) 322 | 323 | # zero pad to same length 324 | if pad > 0: 325 | a = np.pad(a, (0, abs(pad)), 'constant', constant_values=0) 326 | elif pad < 0: 327 | b = np.pad(b, (0, abs(pad)), 'constant', constant_values=0) 328 | 329 | cc = sig.correlate(a, b) 330 | 331 | if lag_range != max_lag_default: 332 | start_ix = max_lag_default - lag_range 333 | end_ix = max_lag_default + lag_range + 1 334 | cc = cc[start_ix:end_ix] 335 | 336 | if norm: 337 | cc /= norm_val 338 | 339 | lags = np.arange(lag_range, -lag_range - 1, -1) 340 | 341 | return cc, lags 342 | 343 | 344 | def estimate_onsets_log_threshold(irs: npt.NDArray[np.float32], 345 | thresh_db: float = -20, 346 | axis: int = -1) -> npt.NDArray[np.int_]: 347 | """Estimate onsets of impulse responses using a log-amplitude threshold below the peak value. 348 | 349 | Args: 350 | irs (npt.NDArray[np.float32]): Array of impulse responses. 351 | thresh_db (float): Threshold below the peak value in decibels at which the onset is detected. 352 | axis (int, optional): The axis of the time series, by default -1 (last). 353 | 354 | Returns: 355 | npt.NDArray[np.int_]: Onset indices in samples (one less dimension than irs). 356 | """ 357 | 358 | def log_thresh_1d(log_amp_ir: npt.NDArray[np.float32]) -> int: 359 | peak_ix = np.argmax(log_amp_ir) 360 | peak = log_amp_ir[peak_ix] 361 | thresh = peak - abs(thresh_db) # forces negative dB 362 | above_thresh = np.nonzero(log_amp_ir[:peak_ix + 1] > thresh)[0] 363 | return above_thresh[0] if above_thresh.size > 0 else -1 364 | 365 | log_amp_irs = db(np.abs(irs)) 366 | return np.apply_along_axis(log_thresh_1d, axis, log_amp_irs) 367 | -------------------------------------------------------------------------------- /Source/PluginProcessor.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | ============================================================================== 3 | 4 | This file contains the basic framework code for a JUCE plugin processor. 5 | 6 | ============================================================================== 7 | */ 8 | 9 | #include "PluginProcessor.h" 10 | #include "PluginEditor.h" 11 | 12 | //============================================================================== 13 | StereoWidenerAudioProcessor::StereoWidenerAudioProcessor() 14 | #ifndef JucePlugin_PreferredChannelConfigurations 15 | : AudioProcessor (BusesProperties() 16 | #if ! JucePlugin_IsMidiEffect 17 | #if ! JucePlugin_IsSynth 18 | .withInput ("Input", juce::AudioChannelSet::stereo(), true) 19 | #endif 20 | .withOutput ("Output", juce::AudioChannelSet::stereo(), true) 21 | #endif 22 | ), 23 | parameters(*this, nullptr, juce::Identifier ("StereoWidener"),{ 24 | std::make_unique 25 | (juce::ParameterID{"widthLower",1}, // parameterID and parameter version 26 | "Lower frequency width", // parameter name 27 | 0.0f, // minimum value 28 | 100.0f, // maximum value 29 | 0.0f), // initial value 30 | std::make_unique 31 | (juce::ParameterID{"widthHigher",1}, // parameterID 32 | "Higher frequency width", // parameter name 33 | 0.0f, // minimum value 34 | 100.0f, // maximum value 35 | 0.0f), 36 | std::make_unique 37 | (juce::ParameterID{"cutoffFrequency",1}, // parameterID 38 | "Filter cutoff frequency", // parameter name 39 | 100.0f, // minimum value 40 | 4000.0f, // maximum value 41 | 0.0f), 42 | std::make_unique 43 | (juce::ParameterID{"isAmpPreserve",1}, 44 | "Amplitude preserve", 45 | 0, 1, 0), 46 | std::make_unique 47 | (juce::ParameterID{"hasAllpassDecorrelation",1}, 48 | "Allpass decorrelation", 49 | 0, 1, 0), 50 | std::make_unique 51 | (juce::ParameterID{"handleTransients",1}, 52 | "Transient detection", 53 | 0, 1, 0), 54 | }) 55 | #endif 56 | { 57 | //set user defined parameters 58 | widthLower = parameters.getRawParameterValue("widthLower"); 59 | widthHigher = parameters.getRawParameterValue("widthHigher"); 60 | cutoffFrequency = parameters.getRawParameterValue("cutoffFrequency"); 61 | isAmpPreserve = parameters.getRawParameterValue("isAmpPreserve"); 62 | hasAllpassDecorrelation = parameters.getRawParameterValue("hasAllpassDecorrelation"); 63 | handleTransients = parameters.getRawParameterValue("handleTransients"); 64 | 65 | } 66 | 67 | StereoWidenerAudioProcessor::~StereoWidenerAudioProcessor(){} 68 | 69 | //============================================================================== 70 | const juce::String StereoWidenerAudioProcessor::getName() const 71 | { 72 | return JucePlugin_Name; 73 | } 74 | 75 | bool StereoWidenerAudioProcessor::acceptsMidi() const 76 | { 77 | #if JucePlugin_WantsMidiInput 78 | return true; 79 | #else 80 | return false; 81 | #endif 82 | } 83 | 84 | bool StereoWidenerAudioProcessor::producesMidi() const 85 | { 86 | #if JucePlugin_ProducesMidiOutput 87 | return true; 88 | #else 89 | return false; 90 | #endif 91 | } 92 | 93 | bool StereoWidenerAudioProcessor::isMidiEffect() const 94 | { 95 | #if JucePlugin_IsMidiEffect 96 | return true; 97 | #else 98 | return false; 99 | #endif 100 | } 101 | 102 | double StereoWidenerAudioProcessor::getTailLengthSeconds() const 103 | { 104 | return 0.0; 105 | } 106 | 107 | int StereoWidenerAudioProcessor::getNumPrograms() 108 | { 109 | return 1; // NB: some hosts don't cope very well if you tell them there are 0 programs, 110 | // so this should be at least 1, even if you're not really implementing programs. 111 | } 112 | 113 | int StereoWidenerAudioProcessor::getCurrentProgram() 114 | { 115 | return 0; 116 | } 117 | 118 | void StereoWidenerAudioProcessor::setCurrentProgram (int index) 119 | { 120 | } 121 | 122 | const juce::String StereoWidenerAudioProcessor::getProgramName (int index) 123 | { 124 | return {}; 125 | } 126 | 127 | void StereoWidenerAudioProcessor::changeProgramName (int index, const juce::String& newName) 128 | { 129 | } 130 | 131 | //read optimised VN file 132 | juce::StringArray StereoWidenerAudioProcessor::initialise_velvet_from_binary_file(){ 133 | auto input = juce::MemoryInputStream(BinaryData::opt_vn_filters_txt, BinaryData::opt_vn_filters_txtSize, false); 134 | //read the entire text file as a string 135 | const juce::String fileAsString = input.readEntireStreamAsString(); 136 | //break each line into a new string 137 | auto opt_velvet_arrays = juce::StringArray::fromLines(juce::StringRef(fileAsString)); 138 | return opt_velvet_arrays; 139 | } 140 | 141 | 142 | //============================================================================== 143 | void StereoWidenerAudioProcessor::prepareToPlay (double sampleRate, int samplesPerBlock) 144 | { 145 | // Use this method as the place to do any pre-playback 146 | // initialisation that you need.. 147 | allpassCascade = new AllpassBiquadCascade[numChannels]; 148 | velvetSequence = new VelvetNoise[numChannels]; 149 | juce::StringArray opt_velvet_arrays = initialise_velvet_from_binary_file(); 150 | 151 | pan = new Panner[numFreqBands * numChannels]; 152 | amp_preserve_filters = new LinkwitzCrossover* [numFreqBands * numChannels]; 153 | energy_preserve_filters = new ButterworthFilter* [numFreqBands * numChannels]; 154 | gain_multiplier = new float[numFreqBands]; 155 | temp_output = new float[numFreqBands]; 156 | pannerInputs = new float[numChannels]; 157 | transient_handler = new TransientHandler[numChannels]; 158 | final_output = new float* [numChannels]; 159 | 160 | int count = 0; 161 | 162 | for(int k = 0; k < numChannels; k++){ 163 | //initialise transient handler 164 | if (handleTransients) 165 | transient_handler[k].prepare(samplesPerBlock, sampleRate); 166 | 167 | //initialise decorrelators 168 | allpassCascade[k].initialize(numBiquads, sampleRate, maxGroupDelayMs); 169 | 170 | if (useOptVelvetFilters){ 171 | velvetSequence[k].initialize_from_string(opt_velvet_arrays[k]); 172 | } 173 | else{ 174 | velvetSequence[k].initialize(sampleRate, vnLenMs, density, targetDecaydB, logDistribution); 175 | } 176 | 177 | //initialise panner inputs 178 | pannerInputs[k] = 0.f; 179 | //final output buffer 180 | final_output[k] = new float[samplesPerBlock]; 181 | 182 | for (int i = 0; i < numFreqBands; i++){ 183 | temp_output[i] = 0.0; 184 | gain_multiplier[i] = (i == 0) ? 0.f : 1.f; 185 | 186 | //initialise panner 187 | pan[count].initialize(); 188 | amp_preserve_filters[count] = new LinkwitzCrossover[numChannels]; 189 | energy_preserve_filters[count] = new ButterworthFilter[numChannels]; 190 | 191 | //0, 2 contains lowpass filter and 1, 3 contains highpass filter 192 | for (int j = 0; j < numChannels; j++){ 193 | //initialise filters 194 | if (count % numChannels == 0){ 195 | amp_preserve_filters[count][j].initialize(sampleRate, "lowpass"); 196 | energy_preserve_filters[count][j].initialize(sampleRate, prewarpFreqHz, "lowpass"); 197 | } 198 | else{ 199 | amp_preserve_filters[count][j].initialize(sampleRate, "highpass"); 200 | energy_preserve_filters[count][j].initialize(sampleRate, prewarpFreqHz, "highpass"); 201 | } 202 | } 203 | count++; 204 | } 205 | } 206 | 207 | inputData = std::vector>(numChannels, std::vector(samplesPerBlock, 0.0f)); 208 | outputData = std::vector>(numChannels, std::vector(samplesPerBlock, 0.0f)); 209 | prevWidthLower = 0.f; 210 | curWidthLower = 0.f; 211 | prevWidthHigher = 0.0f; 212 | curWidthHigher = 0.f; 213 | prevCutoffFreq = 500.0f; 214 | smooth_factor = std::exp(-1.0f / (smoothingTimeMs * 0.001f * sampleRate)); 215 | 216 | } 217 | 218 | void StereoWidenerAudioProcessor::releaseResources() 219 | { 220 | // When playback stops, you can use this as an opportunity to free up any 221 | // spare memory, etc. 222 | delete [] pannerInputs; 223 | delete [] temp_output; 224 | delete [] pan; 225 | delete [] gain_multiplier; 226 | delete [] allpassCascade; 227 | delete [] velvetSequence; 228 | delete [] transient_handler; 229 | 230 | for (int i = 0; i < numChannels * numFreqBands; i++){ 231 | delete [] amp_preserve_filters[i]; 232 | delete [] energy_preserve_filters[i]; 233 | } 234 | 235 | } 236 | 237 | #ifndef JucePlugin_PreferredChannelConfigurations 238 | bool StereoWidenerAudioProcessor::isBusesLayoutSupported (const BusesLayout& layouts) const 239 | { 240 | #if JucePlugin_IsMidiEffect 241 | juce::ignoreUnused (layouts); 242 | return true; 243 | #else 244 | // This is the place where you check if the layout is supported. 245 | // In this template code we only support mono or stereo. 246 | // Some plugin hosts, such as certain GarageBand versions, will only 247 | // load plugins that support stereo bus layouts. 248 | if (layouts.getMainOutputChannelSet() != juce::AudioChannelSet::mono() 249 | && layouts.getMainOutputChannelSet() != juce::AudioChannelSet::stereo()) 250 | return false; 251 | 252 | // This checks if the input layout matches the output layout 253 | #if ! JucePlugin_IsSynth 254 | if (layouts.getMainOutputChannelSet() != layouts.getMainInputChannelSet()) 255 | return false; 256 | #endif 257 | 258 | return true; 259 | #endif 260 | } 261 | #endif 262 | 263 | inline float StereoWidenerAudioProcessor::onePoleFilter(float input, float previous_output){ 264 | return (input * (1.0f-smooth_factor)) + (previous_output * smooth_factor); 265 | } 266 | 267 | 268 | void StereoWidenerAudioProcessor::processBlock (juce::AudioBuffer& buffer, juce::MidiBuffer& midiMessages) 269 | { 270 | juce::ScopedNoDenormals noDenormals; 271 | int count = 0; 272 | 273 | //update parameter 274 | //panners 0 and 2 have lowpassed signals 275 | //panners 1 and 3 have highpass signals 276 | 277 | //update lowpass width 278 | if (prevWidthLower != *widthLower) { 279 | curWidthLower = onePoleFilter(*widthLower, prevWidthLower); 280 | pan[0].updateWidth(curWidthLower/100.0); 281 | pan[2].updateWidth(curWidthLower/100.0); 282 | prevWidthLower = curWidthLower; 283 | } 284 | 285 | //update highpass width 286 | if (prevWidthHigher != *widthHigher){ 287 | curWidthHigher = onePoleFilter(*widthHigher, prevWidthHigher); 288 | pan[1].updateWidth(curWidthHigher/100.0); 289 | pan[3].updateWidth(curWidthHigher/100.0); 290 | prevWidthHigher = curWidthHigher; 291 | } 292 | 293 | 294 | //update filter cutoff frequency 295 | if (prevCutoffFreq != *cutoffFrequency){ 296 | curCutoffFreq = onePoleFilter(*cutoffFrequency, prevCutoffFreq); 297 | count = 0; 298 | for(int k = 0; k < numChannels; k++){ 299 | for (int i = 0; i < numFreqBands; i++){ 300 | for (int j = 0; j < numChannels; j++){ 301 | amp_preserve_filters[count][j].update(curCutoffFreq); 302 | energy_preserve_filters[count][j].update(curCutoffFreq); 303 | } 304 | count++; 305 | } 306 | } 307 | prevCutoffFreq = curCutoffFreq; 308 | } 309 | 310 | auto totalNumInputChannels = getTotalNumInputChannels(); 311 | auto totalNumOutputChannels = getTotalNumOutputChannels(); 312 | const int numSamples = buffer.getNumSamples(); 313 | jassert(totalNumOutputChannels == totalNumInputChannels); 314 | 315 | // read input data into multidimensional array 316 | for(int chan = 0; chan < totalNumInputChannels; chan++){ 317 | const float* channelInData = buffer.getReadPointer(chan, 0); 318 | 319 | for (int i = 0; i < numSamples; i++){ 320 | inputData[chan][i] = channelInData[i]; 321 | } 322 | } 323 | 324 | //process input to get output 325 | for (int i = 0; i < numSamples; i++){ 326 | count = 0; 327 | for(int chan = 0; chan < totalNumOutputChannels; chan++){ 328 | float output = 0.0f; 329 | float decorr_output = 0.0f; 330 | 331 | //decorrelate input channel by convolving with VN sequence 332 | if (*hasAllpassDecorrelation) 333 | decorr_output = allpassCascade[chan].process(inputData[chan][i]); 334 | //or by passing through allpass cascade 335 | else 336 | decorr_output = velvetSequence[chan].process(inputData[chan][i]); 337 | 338 | //process in frequency bands 339 | for(int k = 0; k < numFreqBands; k++){ 340 | float filtered_input = 0.0f; 341 | float filtered_decorr_output = 0.0f; 342 | //pass input and decorrelation output through filterbank 343 | if (*isAmpPreserve){ 344 | filtered_input = amp_preserve_filters[k][chan].process(inputData[chan][i]); 345 | filtered_decorr_output = amp_preserve_filters[numFreqBands + k][chan].process(decorr_output); 346 | } 347 | else{ 348 | filtered_input = energy_preserve_filters[k][chan].process(inputData[chan][i]); 349 | filtered_decorr_output = energy_preserve_filters[numFreqBands + k][chan].process(decorr_output); 350 | } 351 | 352 | pannerInputs[0] = filtered_decorr_output; 353 | pannerInputs[1] = filtered_input; 354 | float panner_output = pan[count++].process(pannerInputs); 355 | output += panner_output; 356 | } 357 | outputData[chan][i] = output; 358 | if (! *handleTransients) 359 | buffer.setSample(chan, i, output); 360 | } 361 | } 362 | 363 | // transient handling logic 364 | if (*handleTransients){ 365 | for(int chan = 0; chan < totalNumOutputChannels; chan++){ 366 | final_output[chan] = transient_handler[chan].process(&inputData[chan][0], &outputData[chan][0]); 367 | for (int i = 0; i < numSamples; i++){ 368 | buffer.setSample(chan, i, final_output[chan][i]); 369 | } 370 | } 371 | } 372 | } 373 | 374 | 375 | //============================================================================== 376 | bool StereoWidenerAudioProcessor::hasEditor() const 377 | { 378 | return true; // (change this to false if you choose to not supply an editor) 379 | } 380 | 381 | juce::AudioProcessorEditor* StereoWidenerAudioProcessor::createEditor() 382 | { 383 | return new StereoWidenerAudioProcessorEditor (*this, parameters); 384 | } 385 | 386 | //============================================================================== 387 | void StereoWidenerAudioProcessor::getStateInformation (juce::MemoryBlock& destData) 388 | { 389 | // You should use this method to store your parameters in the memory block. 390 | // You could do that either as raw data, or use the XML or ValueTree classes 391 | // as intermediaries to make it easy to save and load complex data. 392 | 393 | auto state = parameters.copyState(); 394 | std::unique_ptr xml (state.createXml()); 395 | copyXmlToBinary (*xml, destData); 396 | } 397 | 398 | void StereoWidenerAudioProcessor::setStateInformation (const void* data, int sizeInBytes) 399 | { 400 | // You should use this method to restore your parameters from this memory block, 401 | // whose contents will have been created by the getStateInformation() call. 402 | std::unique_ptr xmlState (getXmlFromBinary (data, sizeInBytes)); 403 | 404 | if (xmlState.get() != nullptr) 405 | if (xmlState->hasTagName (parameters.state.getType())) 406 | parameters.replaceState (juce::ValueTree::fromXml (*xmlState)); 407 | } 408 | 409 | //============================================================================== 410 | // This creates new instances of the plugin.. 411 | juce::AudioProcessor* JUCE_CALLTYPE createPluginFilter() 412 | { 413 | return new StereoWidenerAudioProcessor(); 414 | } 415 | -------------------------------------------------------------------------------- /Python/src/notebooks/decorrelation_comparison.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "0", 6 | "metadata": {}, 7 | "source": [ 8 | "

This notebook compares the magnitude response and interchannel coherence of 3 types of stereo decorrelation filters -allpass cascade, velvet noise and optimised velvet noise\n", 9 | "

" 10 | ] 11 | }, 12 | { 13 | "cell_type": "code", 14 | "execution_count": null, 15 | "id": "1", 16 | "metadata": {}, 17 | "outputs": [], 18 | "source": [ 19 | "import numpy as np\n", 20 | "from scipy.fft import rfft, rfftfreq\n", 21 | "from scipy.signal import sosfreqz\n", 22 | "from importlib import reload\n", 23 | "import matplotlib.pyplot as plt\n", 24 | "import os, sys\n", 25 | "from tqdm import tqdm\n", 26 | "from scipy.io import loadmat, savemat\n", 27 | "from typing import Optional\n", 28 | "path = os.path.abspath('../.')\n", 29 | "sys.path.append(path)" 30 | ] 31 | }, 32 | { 33 | "cell_type": "code", 34 | "execution_count": null, 35 | "id": "2", 36 | "metadata": {}, 37 | "outputs": [], 38 | "source": [ 39 | "from velvet import *\n", 40 | "from allpass import *\n", 41 | "from utils import ms_to_samps, db\n", 42 | "from plot import *\n", 43 | "from utils import calculate_interchannel_cross_correlation_matrix" 44 | ] 45 | }, 46 | { 47 | "cell_type": "markdown", 48 | "id": "3", 49 | "metadata": {}, 50 | "source": [ 51 | "### Helper functions" 52 | ] 53 | }, 54 | { 55 | "cell_type": "code", 56 | "execution_count": null, 57 | "id": "4", 58 | "metadata": {}, 59 | "outputs": [], 60 | "source": [ 61 | "def get_ir_spectrum(ir:np.ndarray, fs: float):\n", 62 | " num_chan, ir_len = ir.shape\n", 63 | " fft_size = np.int32(2 ** np.ceil(np.log2(ir_len)))\n", 64 | " ir_spectrum = rfft(ir, fft_size, axis=-1)\n", 65 | " freq_axis = rfftfreq(fft_size, d = 1.0/fs)\n", 66 | " return ir_spectrum, freq_axis\n", 67 | "\n", 68 | "\n", 69 | "def plot_sos_response(sos:np.ndarray, fs: float, num_bins:int):\n", 70 | " if sos.ndim == 3:\n", 71 | " num_chan = sos.shape[0]\n", 72 | " freq_response = np.zeros((num_chan, num_bins), dtype = complex)\n", 73 | " for chan in range(num_chan):\n", 74 | " w, freq_response[chan, :] = sosfreqz(sos[chan, ...], worN=num_bins)\n", 75 | " else:\n", 76 | " w, freq_response = sosfreqz(sos, worN=num_bins)\n", 77 | " freqs = w / np.pi * fs\n", 78 | " return freq_response, freqs\n", 79 | "\n", 80 | "\n", 81 | "def pick_minimum_coherence_decorrelator(decorr_irs :float, \n", 82 | " fs: float, \n", 83 | " num_iters : int, \n", 84 | " num_channels: int = 2, \n", 85 | " bands_per_octave :int = 3,\n", 86 | " cost_fn_vals: Optional[np.ndarray] = None) -> int:\n", 87 | " iac_freqs = pf.dsp.filter.fractional_octave_frequencies(num_fractions=bands_per_octave, \n", 88 | " frequency_range=(20, sample_rate/2.0), \n", 89 | " return_cutoff=False)[0]\n", 90 | " num_freqs = len(iac_freqs)\n", 91 | " icc_matrix_all = np.zeros((num_iters, num_freqs))\n", 92 | " for num_ir in tqdm(range(num_iters)):\n", 93 | " icc_matrix, _ = calculate_interchannel_cross_correlation_matrix(np.squeeze(decorr_irs[num_ir, ...]),\n", 94 | " fs=fs,\n", 95 | " num_channels=num_channels,\n", 96 | " time_axis=-1,\n", 97 | " channel_axis=0,\n", 98 | " bands_per_octave=bands_per_octave,\n", 99 | " freq_range=(20, fs/2.0))\n", 100 | "\n", 101 | " icc_matrix_all[num_ir, :] = np.squeeze(icc_matrix[..., 0, 1])\n", 102 | "\n", 103 | " # weighted cost function taking into account min ICC and spectral flatness\n", 104 | " mean_icc = np.mean(np.abs(icc_matrix_all), axis=-1)\n", 105 | " if cost_fn_vals is None:\n", 106 | " return np.argmin(mean_icc)\n", 107 | " else:\n", 108 | " lamb = 0.5\n", 109 | " meu = 0.1\n", 110 | " net_cost_function = (1-lamb) * mean_icc + lamb * meu * np.sum(cost_fn_vals, axis=-1)\n", 111 | " return np.argmin(net_cost_function)" 112 | ] 113 | }, 114 | { 115 | "cell_type": "markdown", 116 | "id": "5", 117 | "metadata": {}, 118 | "source": [ 119 | "### Global parameters" 120 | ] 121 | }, 122 | { 123 | "cell_type": "code", 124 | "execution_count": null, 125 | "id": "6", 126 | "metadata": {}, 127 | "outputs": [], 128 | "source": [ 129 | "sample_rate = 48000\n", 130 | "ir_length_ms = 15\n", 131 | "ir_length_samps = ms_to_samps(ir_length_ms, sample_rate)\n", 132 | "num_biquads = 200\n", 133 | "num_channels = 2\n", 134 | "num_iter = 100\n", 135 | "save_flag = True\n", 136 | "plot_flag = True if num_iter == 1 else False" 137 | ] 138 | }, 139 | { 140 | "cell_type": "markdown", 141 | "id": "7", 142 | "metadata": {}, 143 | "source": [ 144 | "### Loop over stereo channels" 145 | ] 146 | }, 147 | { 148 | "cell_type": "code", 149 | "execution_count": null, 150 | "id": "8", 151 | "metadata": {}, 152 | "outputs": [], 153 | "source": [ 154 | "ap_ir = np.zeros((num_iter, num_channels, ir_length_samps))\n", 155 | "sos_section = np.zeros((num_iter, num_channels, num_biquads, 6))\n", 156 | "init_vn_sequence = np.zeros_like(ap_ir)\n", 157 | "opt_vn_sequence = np.zeros_like(ap_ir)\n", 158 | "cost_fn_val = np.zeros((num_iter, num_channels))" 159 | ] 160 | }, 161 | { 162 | "cell_type": "markdown", 163 | "id": "9", 164 | "metadata": {}, 165 | "source": [ 166 | "#### Create allpass biquad sequence" 167 | ] 168 | }, 169 | { 170 | "cell_type": "code", 171 | "execution_count": null, 172 | "id": "10", 173 | "metadata": {}, 174 | "outputs": [], 175 | "source": [ 176 | "for iter in tqdm(range(num_iter)):\n", 177 | " for chan in range(num_channels):\n", 178 | " sos_section[iter, chan, ...] = decorrelate_allpass_filters(sample_rate, nbiquads = num_biquads, max_grp_del_ms = 30)\n", 179 | " ap_ir[iter, chan, :] = get_allpass_impulse_response(sos_section[iter, chan, ...], sample_rate, ir_length_ms)\n", 180 | " if plot_flag:\n", 181 | " fig = plt.figure()\n", 182 | " plt.subplot(111)\n", 183 | " plt.plot(ap_ir[chan, :])\n", 184 | " plt.title('Allpass IR')" 185 | ] 186 | }, 187 | { 188 | "cell_type": "markdown", 189 | "id": "11", 190 | "metadata": {}, 191 | "source": [ 192 | "#### Create a VN sequence and optimise it iif the optimised files don't exist already" 193 | ] 194 | }, 195 | { 196 | "cell_type": "code", 197 | "execution_count": null, 198 | "id": "12", 199 | "metadata": {}, 200 | "outputs": [], 201 | "source": [ 202 | "loadpath = \"../../../../Resources/decorrelation_ir_matrix.mat\"\n", 203 | "try:\n", 204 | " ir_dict = loadmat(loadpath)\n", 205 | " init_vn_sequence = ir_dict['velvet']\n", 206 | " opt_vn_sequence = ir_dict['opt-velvet']\n", 207 | " cost_fn_val = ir_dict['cost-func-vals']\n", 208 | "except: \n", 209 | " raise OSError(\"File not found\")\n", 210 | " \n", 211 | " for iter in tqdm(range(num_iter)):\n", 212 | " for chan in range(num_channels):\n", 213 | " # number of VN sequences\n", 214 | " num_seq = 1\n", 215 | " # 1 samples per millisecond\n", 216 | " num_samp_per_ms = 1\n", 217 | " grid_size = int(sample_rate / (num_samp_per_ms*1e3))\n", 218 | " ir_length_samps = ms_to_samps(ir_length_ms, sample_rate)\n", 219 | " decay_db = 60\n", 220 | " \n", 221 | " init_vn_seq = np.squeeze(generate_velvet_noise_sequence(num_seq, sample_rate, grid_size, ir_length_samps).T)\n", 222 | " impulse_locations = np.nonzero(init_vn_seq)[0]\n", 223 | " impulse_signs = np.sign(init_vn_seq[impulse_locations])\n", 224 | " num_impulses = len(impulse_locations)\n", 225 | " \n", 226 | " decay_slope = -np.log(np.power(10, -decay_db / 20)) / ir_length_samps\n", 227 | " init_vn_seq_with_decay = np.zeros_like(init_vn_seq)\n", 228 | " init_vn_seq_with_decay[impulse_locations] = np.exp(-impulse_locations * decay_slope) * impulse_signs\n", 229 | " \n", 230 | " \n", 231 | " #### Optimise the VN sequence\n", 232 | " \n", 233 | " num_samples_per_second = sample_rate / grid_size\n", 234 | " opt_vn_seq, opt_vn_params, cost_fn_val[iter, chan] = optimise_velvet_noise_sequence(init_vn_seq, \n", 235 | " sample_rate, \n", 236 | " num_samples_per_second, \n", 237 | " impulse_locations, \n", 238 | " decay_db,\n", 239 | " verbose=False)\n", 240 | " \n", 241 | " init_vn_sequence[iter, chan, :] = init_vn_seq_with_decay\n", 242 | " opt_vn_sequence[iter, chan, :] = opt_vn_seq\n", 243 | " \n", 244 | " #### Plot the original and optimised sequence\n", 245 | " if plot_flag:\n", 246 | " fig = plt.figure()\n", 247 | " plt.subplot(211)\n", 248 | " plt.stem(init_vn_sequence[chan, :])\n", 249 | " plt.ylim([-1, 1])\n", 250 | " plt.title('VN sequence')\n", 251 | " \n", 252 | " plt.subplot(212)\n", 253 | " plt.stem(opt_vn_sequence[chan, :])\n", 254 | " plt.title('Optimised VN sequence')\n", 255 | " plt.ylim([-1, 1])\n", 256 | " if plot_flag:\n", 257 | " fig.tight_layout(pad=1.0)\n" 258 | ] 259 | }, 260 | { 261 | "cell_type": "markdown", 262 | "id": "13", 263 | "metadata": {}, 264 | "source": [ 265 | "### For each decorrelator, find the IR pair that gives the minimum IC over all frequency bands" 266 | ] 267 | }, 268 | { 269 | "cell_type": "code", 270 | "execution_count": null, 271 | "id": "14", 272 | "metadata": {}, 273 | "outputs": [], 274 | "source": [ 275 | "all_irs = np.stack((ap_ir, init_vn_sequence, opt_vn_sequence), axis = -1)\n", 276 | "num_irs = np.shape(all_irs)[-1]\n", 277 | "min_ir_idx = dict()\n", 278 | "keys = ['allpass' ,'velvet' ,'opt-velvet']\n", 279 | "for k in range(num_irs):\n", 280 | " if keys[k] == 'opt-velvet':\n", 281 | " min_ir_idx[keys[k]] = pick_minimum_coherence_decorrelator(all_irs[...,-1], sample_rate, num_iter, cost_fn_vals=cost_fn_val)\n", 282 | " else: \n", 283 | " min_ir_idx[keys[k]] = pick_minimum_coherence_decorrelator(all_irs[...,-1], sample_rate, num_iter)\n", 284 | " " 285 | ] 286 | }, 287 | { 288 | "cell_type": "markdown", 289 | "id": "15", 290 | "metadata": {}, 291 | "source": [ 292 | "### Save VN and optimised VN sequence that gives minimum IC" 293 | ] 294 | }, 295 | { 296 | "cell_type": "code", 297 | "execution_count": null, 298 | "id": "16", 299 | "metadata": {}, 300 | "outputs": [], 301 | "source": [ 302 | "if save_flag:\n", 303 | " with open('../../../Resources/opt_vn_filters.txt','wb') as f:\n", 304 | " np.savetxt(f, np.squeeze(opt_vn_sequence[min_ir_idx['opt-velvet']]), fmt='%.4f')\n", 305 | " \n", 306 | " with open('../../../Resources/init_vn_filters.txt','wb') as f:\n", 307 | " np.savetxt(f, np.squeeze(init_vn_sequence[min_ir_idx['velvet']]), fmt='%.4f')" 308 | ] 309 | }, 310 | { 311 | "cell_type": "markdown", 312 | "id": "17", 313 | "metadata": {}, 314 | "source": [ 315 | "### Compare the magnitude spectrum of all 3" 316 | ] 317 | }, 318 | { 319 | "cell_type": "code", 320 | "execution_count": null, 321 | "id": "18", 322 | "metadata": {}, 323 | "outputs": [], 324 | "source": [ 325 | "ap_impulse_mag, ap_freq_axis = plot_sos_response(sos_section[min_ir_idx['allpass']], \n", 326 | " sample_rate, \n", 327 | " num_bins = np.int32(2 ** np.ceil(np.log2(ir_length_samps))))\n", 328 | "plt.figure(figsize=[6, 4])\n", 329 | "semiaudplot(ap_freq_axis, db(np.abs(ap_impulse_mag)))\n", 330 | "plt.title('Allpass cascade IR')\n", 331 | "plt.ylabel('Magnitude (dB)')\n", 332 | "plt.ylim([-60, 10])\n", 333 | "if save_flag:\n", 334 | " plt.savefig('../../figures/ap_mag_response.png', dpi=300)\n", 335 | "\n", 336 | "\n", 337 | "vn_impulse_mag, vn_freq_axis = get_ir_spectrum(init_vn_sequence[min_ir_idx['velvet']], sample_rate)\n", 338 | "plt.figure(figsize=[6, 4])\n", 339 | "semiaudplot(vn_freq_axis, db(np.abs(vn_impulse_mag)))\n", 340 | "plt.title('Velvet noise IR')\n", 341 | "plt.ylim([-60, 10])\n", 342 | "plt.ylabel('Magnitude (dB)')\n", 343 | "if save_flag:\n", 344 | " plt.savefig('../../figures/velvet_mag_response.png', dpi=300)\n", 345 | "\n", 346 | "\n", 347 | "opt_vn_impulse_mag, opt_vn_freq_axis = get_ir_spectrum(opt_vn_sequence[min_ir_idx['opt-velvet']], sample_rate)\n", 348 | "plt.figure(figsize=[6, 4])\n", 349 | "semiaudplot(opt_vn_freq_axis, db(np.abs(opt_vn_impulse_mag)))\n", 350 | "plt.title('Optimised velvet noise IR')\n", 351 | "plt.ylim([-60, 10])\n", 352 | "plt.ylabel('Magnitude (dB)')\n", 353 | "if save_flag:\n", 354 | " plt.savefig('../../figures/opt_velvet_response.png', dpi=300)\n", 355 | "\n", 356 | "# magnitude spectra on the same plot\n", 357 | "plt.figure(figsize=[6, 4])\n", 358 | "line0 = semiaudplot(ap_freq_axis, db(np.abs(ap_impulse_mag)))\n", 359 | "line1 = semiaudplot(opt_vn_freq_axis, db(np.abs(opt_vn_impulse_mag)), linestyle='--')\n", 360 | "plt.ylabel('Amplitude(dB)')\n", 361 | "plt.xlabel('Frequency (Hz)')\n", 362 | "plt.legend([line0[0], line1[0]], ['Allpass', 'Opt VN'])\n", 363 | "plt.ylim([-60, 10])\n", 364 | "plt.xlim([20, 16e3])\n", 365 | "if save_flag:\n", 366 | " plt.savefig('../../figures/comb_magnitude_response.png', dpi=300) \n" 367 | ] 368 | }, 369 | { 370 | "cell_type": "markdown", 371 | "id": "19", 372 | "metadata": {}, 373 | "source": [ 374 | "### Compare the interchannel coherence of all 3 - plot the median IC of all filters, and the IC of the best performing filters" 375 | ] 376 | }, 377 | { 378 | "cell_type": "code", 379 | "execution_count": null, 380 | "id": "20", 381 | "metadata": {}, 382 | "outputs": [], 383 | "source": [ 384 | "import pyfar as pf\n", 385 | "\n", 386 | "bands_per_octave=3\n", 387 | "iac_freqs = pf.dsp.filter.fractional_octave_frequencies(num_fractions=bands_per_octave, \n", 388 | " frequency_range=(20, sample_rate/2.0), \n", 389 | " return_cutoff=False)[0]\n", 390 | "num_freqs = len(iac_freqs)\n", 391 | "icc_vector_all = np.zeros((num_iter, num_freqs, num_irs))\n", 392 | "icc_vector_best = np.zeros((num_freqs, num_irs))\n", 393 | "\n", 394 | "\n", 395 | "for iter in tqdm(range(num_iter)):\n", 396 | " for num_ir in range(num_irs):\n", 397 | " icc_matrix, _ = calculate_interchannel_cross_correlation_matrix(all_irs[iter, ..., num_ir],\n", 398 | " fs=sample_rate,\n", 399 | " num_channels=num_channels,\n", 400 | " time_axis=-1,\n", 401 | " channel_axis=0,\n", 402 | " bands_per_octave=bands_per_octave,\n", 403 | " freq_range=(20, sample_rate/2.0))\n", 404 | " icc_vector_all[iter,:, num_ir] = np.squeeze(icc_matrix[..., 0, 1])\n", 405 | " if iter == 0:\n", 406 | " icc_matrix_best, _ = calculate_interchannel_cross_correlation_matrix(all_irs[min_ir_idx[keys[num_ir]], ..., num_ir],\n", 407 | " fs=sample_rate,\n", 408 | " num_channels=num_channels,\n", 409 | " time_axis=-1,\n", 410 | " channel_axis=0,\n", 411 | " bands_per_octave=bands_per_octave,\n", 412 | " freq_range=(20, sample_rate/2.0))\n", 413 | " icc_vector_best[:, num_ir] = np.squeeze(icc_matrix[..., 0, 1])\n", 414 | "\n", 415 | " \n", 416 | "\n", 417 | " " 418 | ] 419 | }, 420 | { 421 | "cell_type": "code", 422 | "execution_count": null, 423 | "id": "21", 424 | "metadata": {}, 425 | "outputs": [], 426 | "source": [ 427 | "fig, axis = plt.subplots(figsize=(6, 4))\n", 428 | "titles = ['Allpass', 'Velvet', 'Optimised Velvet']\n", 429 | "semiaudplot(iac_freqs, np.median(icc_vector_all, axis=0), marker='*', interp=False)\n", 430 | "# semiaudplot(iac_freqs, icc_vector_best, marker='o', interp=False)\n", 431 | "plt.ylabel('Coherence')\n", 432 | "plt.ylim([0, 1.01])\n", 433 | "plt.legend(['allpass', 'velvet', 'opt velvet'])\n", 434 | "plt.grid()\n", 435 | "if save_flag:\n", 436 | " plt.savefig('../../figures/decorr_iac.png', dpi=300)" 437 | ] 438 | }, 439 | { 440 | "cell_type": "code", 441 | "execution_count": null, 442 | "id": "22", 443 | "metadata": {}, 444 | "outputs": [], 445 | "source": [ 446 | "ir_dict= {\"allpass\": ap_ir, \"velvet\":init_vn_sequence, \"opt-velvet\":opt_vn_sequence, \"cost-func-vals\" : cost_fn_val}\n", 447 | "savemat(\"../../../Resources/decorrelation_ir_matrix.mat\", ir_dict)" 448 | ] 449 | } 450 | ], 451 | "metadata": { 452 | "kernelspec": { 453 | "display_name": "Python 3 (ipykernel)", 454 | "language": "python", 455 | "name": "python3" 456 | }, 457 | "language_info": { 458 | "codemirror_mode": { 459 | "name": "ipython", 460 | "version": 3 461 | }, 462 | "file_extension": ".py", 463 | "mimetype": "text/x-python", 464 | "name": "python", 465 | "nbconvert_exporter": "python", 466 | "pygments_lexer": "ipython3", 467 | "version": "3.9.0" 468 | } 469 | }, 470 | "nbformat": 4, 471 | "nbformat_minor": 5 472 | } 473 | --------------------------------------------------------------------------------