├── .gitignore ├── README.md ├── pitch_detector.cpp ├── pitch_detector.h ├── pitch_detector.mm └── source ├── PitchMPM.h └── PitchYIN.h /.gitignore: -------------------------------------------------------------------------------- 1 | # Compiled Object files 2 | *.slo 3 | *.lo 4 | *.o 5 | *.obj 6 | 7 | # Precompiled Headers 8 | *.gch 9 | *.pch 10 | 11 | # Compiled Dynamic libraries 12 | *.so 13 | *.dylib 14 | *.dll 15 | 16 | # Fortran module files 17 | *.mod 18 | 19 | # Compiled Static libraries 20 | *.lai 21 | *.la 22 | *.a 23 | *.lib 24 | 25 | # Executables 26 | *.exe 27 | *.out 28 | *.app 29 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Pitch Detector 2 | JUCE module for pitch estimation 3 | 4 | PitchYIN class based on the YIN implementation found in the [aubio](https://aubio.org) library 5 | 6 | PitchMPM class adapted from the McLeod Pitch Method implementation in https://github.com/sevagh/pitch-detection 7 | 8 | The updated version of the PitchMPM class now uses FFT for the auto-correlation function using the AudioFFT library (via the module wrapper at https://github.com/adamski/audio_fft). The previous time-based version is now in the `time-based` branch. 9 | 10 | ### Usage 11 | 12 | **NOTE:** `bufferSize` should be a power of 2! 13 | 14 | ```cpp 15 | // Class members 16 | PitchMPM pitchMPM; 17 | AudioSampleBuffer sampleBuffer; 18 | 19 | // Setup / prepare 20 | pitchMPM.setBufferSize (bufferSize); 21 | pitchMPM.setSampleRate (sampleRate); 22 | 23 | // Process 24 | float newPitch = pitchMPM.getPitch (sampleBuffer.getReadPointer (0)); 25 | ``` 26 | 27 | 28 | ### TODO 29 | - [ ] Seperate time-based method into another class that can be used as an alternative to the FFT based method 30 | - [ ] Add FFT based YIN implementation (not a priority, MPM works well for my needs - PR's welcome) 31 | - [ ] Create base (virtual) `Pitch` class and add implementations as subclasses. 32 | - [ ] Add other methods, e.g. Wavelet? 33 | - [ ] Remove JUCE dependency from implementations so that they can be used on embedded platforms, e.g. Arduino/Teensy. Will also need 'pluggable' FFT methods for this to work. 34 | -------------------------------------------------------------------------------- /pitch_detector.cpp: -------------------------------------------------------------------------------- 1 | #include "pitch_detector.h" 2 | 3 | namespace adamski { 4 | 5 | } 6 | -------------------------------------------------------------------------------- /pitch_detector.h: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | BEGIN_JUCE_MODULE_DECLARATION 4 | ID: pitch_detector 5 | vendor: adamski 6 | version: 0.2.0 7 | name: Pitch Detector 8 | description: Pitch estimation methods 9 | website: http://www.github.com/adamski/pitch_detector 10 | license: MIT 11 | dependencies: juce_core, juce_audio_basics, audio_fft 12 | OSXFrameworks: 13 | iOSFrameworks: 14 | END_JUCE_MODULE_DECLARATION 15 | */ 16 | 17 | #pragma once 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | 24 | namespace adamski { 25 | 26 | using namespace juce; 27 | 28 | #include "source/PitchMPM.h" 29 | #include "source/PitchYIN.h" 30 | 31 | } 32 | -------------------------------------------------------------------------------- /pitch_detector.mm: -------------------------------------------------------------------------------- 1 | #include "pitch_detector.cpp" 2 | -------------------------------------------------------------------------------- /source/PitchMPM.h: -------------------------------------------------------------------------------- 1 | 2 | 3 | #if JUCE_IOS || JUCE_MAC // TODO: Move this to Projucer project? 4 | #define AUDIOFFT_APPLE_ACCELERATE 1 5 | #endif 6 | 7 | #define CUTOFF 0.93 //0.97 is default 8 | #define SMALL_CUTOFF 0.5 9 | #define LOWER_PITCH_CUTOFF 80 //hz 10 | 11 | /** 12 | * TODO: Provide switch between time-based and FFT based methods 13 | */ 14 | 15 | class PitchMPM 16 | { 17 | 18 | public: 19 | 20 | PitchMPM(size_t bufferSize) : PitchMPM(44100, bufferSize) {} 21 | 22 | PitchMPM(int sampleRate, size_t bufferSize) : bufferSize (bufferSize), 23 | sampleRate (sampleRate), 24 | fftSize (2 * bufferSize), // Needs to be a power of 2! 25 | real (audiofft::AudioFFT::ComplexSize(fftSize)), 26 | imag (audiofft::AudioFFT::ComplexSize(fftSize)), 27 | output (fftSize) 28 | 29 | 30 | { 31 | //nsdf.insertMultiple(0, 0.0, bufferSize); 32 | 33 | } 34 | 35 | 36 | ~PitchMPM() 37 | { 38 | 39 | //nsdf.clear(); 40 | maxPositions.clear(); 41 | ampEstimates.clear(); 42 | periodEstimates.clear(); 43 | 44 | } 45 | 46 | float getPitch(const float *audioBuffer) 47 | { 48 | 49 | maxPositions.clearQuick(); 50 | periodEstimates.clearQuick(); 51 | ampEstimates.clearQuick(); 52 | 53 | //nsdfTimeDomain(audioBuffer); 54 | //nsdfFrequencyDomain(audioBuffer); 55 | 56 | if (audioBuffer == nullptr) 57 | { 58 | DBG ("audioBuffer NULL"); 59 | return 0.0f; 60 | } 61 | //nsdf = Array (nsdfFrequencyDomain(audioBuffer).data()); 62 | std::vector _nsdf = nsdfFrequencyDomain(audioBuffer); 63 | std::vector max_positions = peak_picking(_nsdf); 64 | std::vector> estimates; 65 | 66 | //peakPicking(); 67 | 68 | float highestAmplitude = -FLT_MAX; 69 | 70 | for (auto tau : max_positions) 71 | { 72 | highestAmplitude = jmax(highestAmplitude, _nsdf[tau]); 73 | 74 | if (_nsdf[tau] > SMALL_CUTOFF) 75 | { 76 | auto x = parabolic_interpolation(_nsdf, tau); 77 | estimates.push_back(x); 78 | highestAmplitude = std::max(highestAmplitude, std::get<1>(x)); 79 | } 80 | } 81 | 82 | if (estimates.empty()) return -1; 83 | 84 | float actualCutoff = CUTOFF * highestAmplitude; 85 | float period = 0; 86 | 87 | for (auto estimate : estimates) 88 | { 89 | if (std::get<1>(estimate) >= actualCutoff) 90 | { 91 | period = std::get<0>(estimate); 92 | break; 93 | } 94 | } 95 | 96 | float pitchEstimate = (sampleRate / period); 97 | return (pitchEstimate > LOWER_PITCH_CUTOFF) ? pitchEstimate : -1; 98 | } 99 | 100 | void setSampleRate (int newSampleRate) 101 | { 102 | sampleRate = newSampleRate; 103 | } 104 | 105 | void setBufferSize (int newBufferSize) 106 | { 107 | bufferSize = newBufferSize; 108 | input.resize (bufferSize); 109 | fftSize = 2 * bufferSize; 110 | real.resize (audiofft::AudioFFT::ComplexSize(fftSize)); 111 | imag.resize (audiofft::AudioFFT::ComplexSize(fftSize)); 112 | output.resize (fftSize); 113 | } 114 | 115 | private: 116 | size_t bufferSize; 117 | 118 | audiofft::AudioFFT fft; 119 | size_t fftSize; 120 | std::vector input; 121 | std::vector real; 122 | std::vector imag; 123 | std::vector output; 124 | 125 | float sampleRate; 126 | 127 | float turningPointX, turningPointY; 128 | //Array nsdf; 129 | 130 | Array maxPositions; 131 | Array periodEstimates; 132 | Array ampEstimates; 133 | 134 | /* 135 | void parabolicInterpolation(int tau) 136 | { 137 | float nsdfa = nsdf.getUnchecked (tau - 1); 138 | float nsdfb = nsdf.getUnchecked (tau); 139 | float nsdfc = nsdf.getUnchecked (tau + 1); 140 | float bValue = tau; 141 | float bottom = nsdfc + nsdfa - 2 * nsdfb; 142 | if (bottom == 0.0) 143 | { 144 | turningPointX = bValue; 145 | turningPointY = nsdfb; 146 | } 147 | else 148 | { 149 | float delta = nsdfa - nsdfc; 150 | turningPointX = bValue + delta / (2 * bottom); 151 | turningPointY = nsdfb - delta * delta / (8 * bottom); 152 | } 153 | } 154 | 155 | void peakPicking() 156 | { 157 | 158 | int pos = 0; 159 | int curMaxPos = 0; 160 | float* nsdfPtr = nsdf.getRawDataPointer(); 161 | 162 | while (pos < (bufferSize - 1) / 3 && nsdfPtr[pos] > 0) { 163 | pos++; 164 | } 165 | 166 | while (pos < bufferSize - 1 && nsdfPtr[pos] <= 0.0) { 167 | pos++; 168 | } 169 | 170 | if (pos == 0) { 171 | pos = 1; 172 | } 173 | 174 | while (pos < bufferSize - 1) { 175 | if (nsdfPtr[pos] > nsdfPtr[pos - 1] && nsdfPtr[pos] >= nsdfPtr[pos + 1]) { 176 | if (curMaxPos == 0) { 177 | curMaxPos = pos; 178 | } else if (nsdfPtr[pos] > nsdfPtr[curMaxPos]) { 179 | curMaxPos = pos; 180 | } 181 | } 182 | pos++; 183 | if (pos < bufferSize - 1 && nsdfPtr[pos] <= 0) { 184 | if (curMaxPos > 0) { 185 | maxPositions.add (curMaxPos); 186 | curMaxPos = 0; 187 | } 188 | while (pos < bufferSize - 1 && nsdfPtr[pos] <= 0.0f) { 189 | pos++; 190 | } 191 | } 192 | } 193 | if (curMaxPos > 0) 194 | { 195 | maxPositions.add (curMaxPos); 196 | } 197 | } 198 | */ 199 | 200 | inline std::pair parabolic_interpolation(std::vector array, float x) 201 | { 202 | int x_adjusted; 203 | 204 | if (x < 1) { 205 | x_adjusted = (array[x] <= array[x+1]) ? x : x+1; 206 | } else if (x > signed(array.size())-1) { 207 | x_adjusted = (array[x] <= array[x-1]) ? x : x-1; 208 | } else { 209 | float den = array[x+1] + array[x-1] - 2 * array[x]; 210 | float delta = array[x-1] - array[x+1]; 211 | return (!den) ? std::make_pair(x, array[x]) : std::make_pair(x + delta / (2 * den), array[x] - delta*delta/(8*den)); 212 | } 213 | return std::make_pair(x_adjusted, array[x_adjusted]); 214 | } 215 | 216 | static std::vector peak_picking(std::vector nsdf) 217 | { 218 | std::vector max_positions{}; 219 | int pos = 0; 220 | int curMaxPos = 0; 221 | ssize_t size = nsdf.size(); 222 | 223 | while (pos < (size - 1) / 3 && nsdf[pos] > 0) pos++; 224 | while (pos < size - 1 && nsdf[pos] <= 0.0) pos++; 225 | 226 | if (pos == 0) pos = 1; 227 | 228 | while (pos < size - 1) { 229 | if (nsdf[pos] > nsdf[pos - 1] && nsdf[pos] >= nsdf[pos + 1]) { 230 | if (curMaxPos == 0) { 231 | curMaxPos = pos; 232 | } else if (nsdf[pos] > nsdf[curMaxPos]) { 233 | curMaxPos = pos; 234 | } 235 | } 236 | pos++; 237 | if (pos < size - 1 && nsdf[pos] <= 0) { 238 | if (curMaxPos > 0) { 239 | max_positions.push_back(curMaxPos); 240 | curMaxPos = 0; 241 | } 242 | while (pos < size - 1 && nsdf[pos] <= 0.0) { 243 | pos++; 244 | } 245 | } 246 | } 247 | if (curMaxPos > 0) { 248 | max_positions.push_back(curMaxPos); 249 | } 250 | return max_positions; 251 | } 252 | 253 | /* 254 | void nsdfTimeDomain(const float *audioBuffer) 255 | { 256 | int tau; 257 | for (tau = 0; tau < bufferSize; tau++) { 258 | float acf = 0; 259 | float divisorM = 0; 260 | for (int i = 0; i < bufferSize - tau; i++) { 261 | acf += audioBuffer[i] * audioBuffer[i + tau]; 262 | divisorM += audioBuffer[i] * audioBuffer[i] + audioBuffer[i + tau] * audioBuffer[i + tau]; 263 | } 264 | nsdf.setUnchecked(tau, 2 * acf / divisorM); 265 | } 266 | } 267 | */ 268 | 269 | // FFT based methods 270 | std::vector nsdfFrequencyDomain (const float *audioBuffer) 271 | { 272 | //std::vector> acf(size2); 273 | //std::vector acf_real{}; 274 | 275 | real.resize (fftSize); 276 | imag.resize (fftSize); 277 | 278 | if (audioBuffer == nullptr) 279 | DBG ("audioBuffer NULL: nsdfFrequencyDomain"); 280 | 281 | std::vector acf (autoCorrelation (audioBuffer)); 282 | 283 | /* 284 | for (auto it = acf.begin() + size2/2; it != acf.end(); ++it) 285 | acf_real.push_back((*it) / acf[size2 / 2]); 286 | */ 287 | 288 | /** This code is for interleaved data, above is not 289 | for (auto it = acf.begin() + size2/2; it != acf.end(); ++it) 290 | acf_real.push_back((*it)/acf[size2/2]); 291 | //acf_real.push_back((*it).real()/acf[size2/2].real()); 292 | ****************************************************/ 293 | 294 | // for (int i = size2/2; i < acf.size(); ++i) 295 | // nsdf.setUnchecked(i, acf[i] / acf[size2 / 2]); 296 | 297 | 298 | //return acf_real; 299 | return acf; 300 | } 301 | 302 | std::vector autoCorrelation(const float *audioBuffer) 303 | { 304 | if (audioBuffer == nullptr) 305 | DBG ("audioBuffer NULL: autoCorrelation"); 306 | 307 | //AudioSampleBuffer paddedAudioBuffer (audioBuffer, 1, fftSize); 308 | std::vector input (audioBuffer, audioBuffer + bufferSize); 309 | input.resize(fftSize, 0.0f); 310 | 311 | if (audioBuffer == nullptr) 312 | DBG ("audioBuffer NULL: autoCorrelation post resize"); 313 | 314 | if (input.data() == nullptr) 315 | DBG ("input.data() NULL: autoCorrelation post resize"); 316 | 317 | fft.init(fftSize); 318 | fft.fft(input.data(), real.data(), imag.data()); 319 | //fft.fft(audioBuffer, real.data(), imag.data()); 320 | 321 | // Complex Conjugate 322 | for (int i = 0; i < fftSize; ++i) 323 | { 324 | /** 325 | * std::complex method 326 | */ 327 | std::complex complex(real[i], imag[i]); 328 | complex = complex * std::conj(complex); // No need to scale as AudioFFT does this already 329 | real[i] = complex.real(); 330 | imag[i] = complex.imag(); 331 | 332 | /** 333 | * calculate via real[i] * real[i] + imag[i] * imag[i]. 334 | * And if you really mean complex conjugation, just negate imag[i] 335 | */ 336 | 337 | //imag[i] *= -1; 338 | //real[i] = real[i] * real[i]; // + imag[i] * imag[i]; 339 | } 340 | 341 | fft.ifft(output.data(), real.data(), imag.data()); 342 | return output; 343 | } 344 | 345 | 346 | 347 | }; 348 | -------------------------------------------------------------------------------- /source/PitchYIN.h: -------------------------------------------------------------------------------- 1 | class PitchYIN 2 | { 3 | 4 | public: 5 | 6 | PitchYIN (int sampleRate, unsigned int bufferSize) : yin (1, bufferSize), bufferSize (bufferSize), sampleRate(sampleRate), tolerence (0.15), 7 | deltaWasNegative (false) 8 | { 9 | } 10 | 11 | PitchYIN (unsigned int bufferSize) : yin (1, bufferSize), bufferSize (bufferSize), sampleRate(44100), tolerence (0.15), 12 | deltaWasNegative (false) 13 | { 14 | } 15 | 16 | void setSampleRate(unsigned int newSampleRate) 17 | { 18 | sampleRate = newSampleRate; 19 | DBG ("sampleRate: " + String(sampleRate)); 20 | } 21 | 22 | /** Output the difference function */ 23 | void difference(AudioSampleBuffer& input) 24 | { 25 | float tmp; 26 | float *yinData = yin.getWritePointer(0); 27 | const float *inputData = input.getReadPointer(0); 28 | 29 | FloatVectorOperations::fill(yinData, 0.0, yin.getNumSamples()); 30 | 31 | for (int tau = 1; tau < yin.getNumSamples(); tau++) 32 | { 33 | for (int j = 0; j < yin.getNumSamples(); j++) 34 | { 35 | tmp = inputData[j] - inputData[j + tau]; 36 | yinData[tau] += (tmp * tmp); 37 | } 38 | } 39 | } 40 | 41 | /** cumulative mean normalized difference function */ 42 | void cumulativeMean () 43 | { 44 | float *yinData = yin.getWritePointer(0); 45 | float tmp = 0.; 46 | yinData[0] = 1.; 47 | //AUBIO_DBG("%f\t",yinData[0]); 48 | for (int tau = 1; tau < yin.getNumSamples(); tau++) 49 | { 50 | tmp += yinData[tau]; 51 | yinData[tau] *= tau / tmp; 52 | //AUBIO_DBG("%f\t",yinData[tau]); 53 | } 54 | //AUBIO_DBG("\n"); 55 | } 56 | 57 | int getPitch () 58 | { 59 | int tau = 0; 60 | float *yinData = yin.getWritePointer(0); 61 | do { 62 | if (yinData[tau] < 0.1) 63 | { 64 | while (yinData[tau + 1] < yinData[tau]) 65 | { 66 | tau++; 67 | } 68 | return tau; 69 | } 70 | tau++; 71 | } while (tau < yin.getNumSamples()); 72 | //AUBIO_DBG("No pitch found"); 73 | return 0; 74 | 75 | } 76 | 77 | 78 | /** Full YIN algorithm */ 79 | float calculatePitch (const float* inputData) noexcept 80 | { 81 | int period; 82 | float delta = 0.0, runningSum = 0.0; 83 | float *yinData = yin.getWritePointer(0); 84 | //deltaWasNegative = false; 85 | 86 | //DBG ("calculatePitch"); 87 | 88 | yinData[0] = 1.0; 89 | for (int tau = 1; tau < yin.getNumSamples(); tau++) 90 | { 91 | yinData[tau] = 0.0; 92 | for (int j = 0; j < yin.getNumSamples(); j++) 93 | { 94 | delta = inputData[j] - inputData[j + tau]; 95 | yinData[tau] += (delta * delta); 96 | //if (delta < 0) deltaWasNegative = true; 97 | } 98 | runningSum += yinData[tau]; 99 | if (runningSum != 0) 100 | { 101 | yinData[tau] *= tau / runningSum; 102 | } 103 | else 104 | { 105 | yinData[tau] = 1.0; 106 | } 107 | period = tau - 3; 108 | if (tau > 4 && (yinData[period] < tolerence) && 109 | (yinData[period] < yinData[period + 1])) 110 | { 111 | DBG ("return early"); 112 | return quadraticPeakPosition (yin.getReadPointer(0), period); 113 | } 114 | } 115 | return quadraticPeakPosition (yin.getReadPointer(0), minElement (yin.getReadPointer(0))); 116 | } 117 | 118 | float getPitchInHz (const float* inputData) noexcept 119 | { 120 | float pitch = 0.0; 121 | //slideBlock (input); 122 | pitch = calculatePitch (inputData); 123 | log->writeToLog("pitch: " + String(pitch)); 124 | 125 | if (pitch > 0) 126 | { 127 | pitch = sampleRate / (pitch + 0.0); 128 | log->writeToLog ("pitchInHz: " + String(pitch)); 129 | } 130 | else 131 | { 132 | pitch = 0.0; 133 | } 134 | currentPitch = pitch; 135 | //if (deltaWasNegative) DBG("delta negative"); 136 | 137 | return pitch; 138 | } 139 | 140 | void setTolerence(float newTolerence) 141 | { 142 | tolerence = newTolerence; 143 | } 144 | 145 | private: 146 | AudioSampleBuffer yin; //, buf; 147 | //float* yinData; 148 | unsigned int bufferSize; 149 | float tolerence; //, confidence; 150 | unsigned int sampleRate; 151 | bool deltaWasNegative; 152 | float currentPitch; 153 | Logger *log; 154 | 155 | // /** adapter to stack ibuf new samples at the end of buf, and trim `buf` to `bufsize` */ 156 | // void slideBlock (AudioSampleBuffer& ibuf) 157 | // { 158 | // float *bufData = buf.getWritePointer(0); 159 | // const float *ibufData = ibuf.getReadPointer(0); 160 | // 161 | // unsigned int j = 0, overlapSize = 0; 162 | // overlapSize = buf.getNumSamples() - ibuf.getNumSamples(); 163 | // for (j = 0; j < overlapSize; j++) 164 | // { 165 | // bufData[j] = bufData[j + ibuf.getNumSamples()]; 166 | // } 167 | // for (j = 0; j < ibuf.getNumSamples(); j++) 168 | // { 169 | // bufData[j + overlapSize] = ibufData[j]; 170 | // } 171 | // } 172 | 173 | // Below functions should go in a seperate utilities class 174 | 175 | float quadraticPeakPosition (const float *data, unsigned int pos) noexcept 176 | { 177 | float s0, s1, s2; 178 | unsigned int x0, x2; 179 | if (pos == 0 || pos == bufferSize - 1) return pos; 180 | x0 = (pos < 1) ? pos : pos - 1; 181 | x2 = (pos + 1 < bufferSize) ? pos + 1 : pos; 182 | if (x0 == pos) return (data[pos] <= data[x2]) ? pos : x2; 183 | if (x2 == pos) return (data[pos] <= data[x0]) ? pos : x0; 184 | s0 = data[x0]; 185 | s1 = data[pos]; 186 | s2 = data[x2]; 187 | return pos + 0.5 * (s0 - s2 ) / (s0 - 2.* s1 + s2); 188 | } 189 | 190 | unsigned int minElement (const float *data) noexcept 191 | { 192 | #ifndef JUCE_USE_VDSP_FRAMEWORK 193 | unsigned int j, pos = 0; 194 | float tmp = data[0]; 195 | for (j = 0; j < bufferSize; j++) 196 | { 197 | pos = (tmp < data[j]) ? pos : j; 198 | tmp = (tmp < data[j]) ? tmp : data[j]; 199 | } 200 | #else 201 | float tmp = 0.0; 202 | unsigned int pos = 0; 203 | #if !DOUBLE_SAMPLES 204 | vDSP_minvi(data, 1, &tmp, (vDSP_Length *)&pos, bufferSize); 205 | #else 206 | vDSP_minviD(data, 1, &tmp, (vDSP_Length *)&pos, bufferSize); 207 | #endif 208 | #endif 209 | return pos; 210 | } 211 | }; 212 | --------------------------------------------------------------------------------