├── tests ├── __init__.py ├── test.py ├── test_key_detection.py └── gen_test_clips.py ├── chord_detection ├── dsp │ ├── __init__.py │ ├── lowpass.py │ ├── frame.py │ └── wfir.py ├── __init__.py ├── multipitch.py ├── chord_detect.py ├── chromagram.py ├── harmonic_energy.py ├── prime_multif0.py ├── periodicity.py ├── esacf.py └── iterative_f0.py ├── .gitignore ├── requirements.txt ├── .github ├── piano_c_1.png ├── piano_c_2.png ├── piano_c_3.png └── piano_c_4.png ├── pyproject.toml ├── LICENSE └── README.md /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /chord_detection/dsp/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__ 2 | *.pyc 3 | *.wav 4 | *.egg-info 5 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | scipy 2 | numpy 3 | librosa 4 | matplotlib 5 | soundfile 6 | peakutils 7 | -------------------------------------------------------------------------------- /.github/piano_c_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sevagh/chord-detection/HEAD/.github/piano_c_1.png -------------------------------------------------------------------------------- /.github/piano_c_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sevagh/chord-detection/HEAD/.github/piano_c_2.png -------------------------------------------------------------------------------- /.github/piano_c_3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sevagh/chord-detection/HEAD/.github/piano_c_3.png -------------------------------------------------------------------------------- /.github/piano_c_4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sevagh/chord-detection/HEAD/.github/piano_c_4.png -------------------------------------------------------------------------------- /chord_detection/dsp/lowpass.py: -------------------------------------------------------------------------------- 1 | import scipy 2 | import numpy 3 | import scipy.signal 4 | 5 | 6 | def lowpass_filter(x: numpy.ndarray, fs: float, band: float) -> numpy.ndarray: 7 | b, a = scipy.signal.butter(2, [band / (fs / 2)], btype="low") 8 | return scipy.signal.lfilter(b, a, x) 9 | -------------------------------------------------------------------------------- /chord_detection/__init__.py: -------------------------------------------------------------------------------- 1 | from chord_detection.esacf import MultipitchESACF 2 | from chord_detection.harmonic_energy import MultipitchHarmonicEnergy 3 | from chord_detection.iterative_f0 import MultipitchIterativeF0 4 | from chord_detection.prime_multif0 import MultipitchPrimeMultiF0 5 | 6 | from chord_detection.multipitch import METHODS 7 | from chord_detection.chromagram import detect_key 8 | -------------------------------------------------------------------------------- /chord_detection/dsp/frame.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | import math 3 | 4 | 5 | def frame_cutter(x: numpy.ndarray, frame_size: int) -> numpy.ndarray: 6 | if len(x.shape) != 1: 7 | raise ValueError("Only 1D numpy ndarrays are supported") 8 | 9 | num_frames = float(x.shape[0]) / float(frame_size) 10 | num_frames = int(math.ceil(num_frames)) 11 | pad = int(num_frames * frame_size - x.shape[0]) 12 | x_pad = numpy.concatenate((x, numpy.zeros(pad))) 13 | for x_frame in numpy.split(x_pad, num_frames): 14 | yield x_frame 15 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["setuptools>=61.0"] 3 | build-backend = "setuptools.build_meta" 4 | 5 | [project] 6 | name = "chord-detection" 7 | version = "0.0.1" 8 | description = "DSP algorithms for chord detection + key estimation" 9 | readme = { file = "README.md", content-type = "text/markdown" } 10 | authors = [ 11 | { name = "Sevag Hanssian (sevagh@protonmail.com)" }, 12 | ] 13 | license = { text = "MIT" } 14 | requires-python = ">=3" 15 | dependencies = [ 16 | "scipy", 17 | "numpy", 18 | "matplotlib", 19 | "librosa", 20 | "soundfile", 21 | "peakutils", 22 | ] 23 | 24 | [project.scripts] 25 | chord-detect = "chord_detection.chord_detect:main_cli" 26 | -------------------------------------------------------------------------------- /chord_detection/dsp/wfir.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | import scipy 3 | import scipy.signal 4 | 5 | 6 | def _bark_warp_coef(fs): 7 | return ( 8 | 1.0674 * numpy.sqrt((2.0 / numpy.pi) * numpy.arctan(0.06583 * fs / 1000.0)) 9 | - 0.1916 10 | ) 11 | 12 | 13 | def _warped_remez_coefs(fs, order): 14 | l = 20 15 | r = min(20000, fs / 2 - 1) 16 | t = 1 17 | 18 | c = scipy.signal.remez( 19 | order + 1, [0, l - t, l, r, r + t, 0.5 * fs], [0, 1, 0], fs=fs 20 | ) 21 | return c.tolist() 22 | 23 | 24 | # see: https://sevagh.github.io/warped-linear-prediction/ 25 | def wfir(x: numpy.ndarray, fs: float, order: int) -> numpy.ndarray: 26 | a = _bark_warp_coef(fs) 27 | 28 | B = [-a.conjugate(), 1] 29 | A = [1, -a] 30 | ys = [0] * order 31 | 32 | ys[0] = scipy.signal.lfilter(B, A, x) 33 | for i in range(1, len(ys)): 34 | ys[i] = scipy.signal.lfilter(B, A, ys[i - 1]) 35 | 36 | c = _warped_remez_coefs(fs, order) 37 | 38 | x_hat = c[0] * x 39 | for i in range(order): 40 | x_hat += c[i + 1] * ys[i] 41 | 42 | r = x - x_hat 43 | return r 44 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Sevag Hanssian 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /tests/test.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import numpy 3 | import os 4 | from chord_detection import ( 5 | MultipitchESACF, 6 | MultipitchIterativeF0, 7 | MultipitchHarmonicEnergy, 8 | MultipitchPrimeMultiF0, 9 | ) 10 | import soundfile 11 | from tempfile import TemporaryDirectory 12 | 13 | 14 | TESTCASES = { 15 | "tests/test_1_note_Csharp3.wav": "010000000000", 16 | "tests/test_1_note_E4.wav": "000010000000", 17 | "tests/test_2_notes_E2_F3.wav": "000011000000", 18 | "tests/test_2_notes_G3_Asharp4.wav": "000000010010", 19 | "tests/test_3_notes_G2_B2_G#3.wav": "000000011001", 20 | } 21 | 22 | 23 | class TestChordDetection(unittest.TestCase): 24 | def test_all(self): 25 | for test_clip, expected_result in TESTCASES.items(): 26 | compute_objs = [ 27 | MultipitchESACF(test_clip), 28 | MultipitchHarmonicEnergy(test_clip), 29 | MultipitchIterativeF0(test_clip), 30 | MultipitchPrimeMultiF0(test_clip), 31 | ] 32 | for c in compute_objs: 33 | ret = c.compute_pitches(display_plot_frame=1) 34 | print( 35 | "{0}\n{1}\n{2} expected\n{3} actual\n".format( 36 | c.display_name(), test_clip, expected_result, ret 37 | ) 38 | ) 39 | -------------------------------------------------------------------------------- /chord_detection/multipitch.py: -------------------------------------------------------------------------------- 1 | from abc import ABCMeta, abstractmethod 2 | from pathlib import Path 3 | import librosa 4 | from collections import OrderedDict 5 | 6 | METHODS = OrderedDict() 7 | 8 | 9 | class Multipitch(object): 10 | __metaclass__ = ABCMeta 11 | 12 | def __init_subclass__(cls, **kwargs): 13 | super().__init_subclass__(**kwargs) 14 | method_num = cls.method_number() 15 | if method_num in METHODS.keys(): 16 | raise ValueError( 17 | "Method number {0} already registered as {1} in {2}".format( 18 | method_num, METHODS[method_num], METHODS 19 | ) 20 | ) 21 | METHODS[cls.method_number()] = cls 22 | 23 | @abstractmethod 24 | def __init__(self, audio_path): 25 | x, self.fs = librosa.load(audio_path) 26 | if len(x.shape) == 2 and x.shape[0] == 2: 27 | self.x = x[0::2] / 2.0 + x[1::2] / 2.0 28 | else: 29 | self.x = x 30 | self.clip_name = Path(audio_path).name 31 | 32 | @abstractmethod 33 | def compute_pitches(self): 34 | pass 35 | 36 | @staticmethod 37 | @abstractmethod 38 | def display_name(): 39 | raise ValueError("unimplemented") 40 | 41 | @staticmethod 42 | @abstractmethod 43 | def method_number(): 44 | raise ValueError("unimplemented") 45 | -------------------------------------------------------------------------------- /tests/test_key_detection.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import numpy 3 | import os 4 | from chord_detection import detect_key 5 | import soundfile 6 | from tempfile import TemporaryDirectory 7 | 8 | 9 | TESTCASES = { 10 | "Cmaj": numpy.asarray( 11 | [ 12 | 100.0, # C-E-G, c major pitches 13 | 0.0, 14 | 0.0, 15 | 0.0, 16 | 100.0, # E 17 | 0.0, 18 | 0.0, 19 | 100.0, # G 20 | 0.0, 21 | 0.0, 22 | 0.0, 23 | 0.0, 24 | ] 25 | ), 26 | "Cmin": numpy.asarray( 27 | [ 28 | 50.0, # C-D-Eb-G, c minor pitches 29 | 0.0, 30 | 50.0, # D 31 | 50.0, # D#/Eb 32 | 0.0, 33 | 0.0, 34 | 0.0, 35 | 10.0, # G 36 | 0.0, 37 | 0.0, 38 | 0.0, 39 | 0.0, 40 | ] 41 | ), 42 | "G#maj": numpy.asarray( 43 | [ 44 | 0.0, 45 | 10.0, # C# 46 | 0.0, 47 | 10.0, # D# 48 | 0.0, 49 | 0.0, 50 | 0.0, 51 | 0.0, 52 | 10.0, # G# 53 | 0.0, 54 | 10.0, # A# 55 | 0.0, 56 | ] 57 | ), 58 | } 59 | 60 | 61 | class TestKeyDetection(unittest.TestCase): 62 | def test_krumhansl_schmuckler_key_detection(self): 63 | for expected_key, X in TESTCASES.items(): 64 | self.assertEqual(detect_key(X), expected_key) 65 | -------------------------------------------------------------------------------- /tests/gen_test_clips.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import unittest 4 | import numpy 5 | import os 6 | import librosa 7 | import soundfile 8 | import sys 9 | from tempfile import TemporaryDirectory 10 | 11 | 12 | def main(): 13 | dest = "tests/test_1_note_Csharp3.wav" 14 | tone = librosa.tone(138.59, sr=22050, length=44100) 15 | soundfile.write(dest, tone, 22050) 16 | print("Created {0} with note C#3".format(dest)) 17 | 18 | dest = "tests/test_1_note_E4.wav" 19 | tone = librosa.tone(329.63, sr=22050, length=44100) 20 | soundfile.write(dest, tone, 22050) 21 | print("Created {0} with note E4".format(dest)) 22 | 23 | dest = "tests/test_2_notes_E2_F3.wav" 24 | tone = numpy.zeros(44100) 25 | tone += librosa.tone(82.41, sr=22050, length=44100) 26 | tone += librosa.tone(174.61, sr=22050, length=44100) 27 | soundfile.write(dest, tone, 22050) 28 | print("Created {0} with notes E2, F3".format(dest)) 29 | 30 | dest = "tests/test_2_notes_G3_Asharp4.wav" 31 | tone = numpy.zeros(44100) 32 | tone += librosa.tone(196, sr=22050, length=44100) 33 | tone += librosa.tone(466.16, sr=22050, length=44100) 34 | soundfile.write(dest, tone, 22050) 35 | print("Created {0} with notes G3, A#4".format(dest)) 36 | 37 | dest = "tests/test_3_notes_G2_B2_G#3.wav" 38 | tone = numpy.zeros(44100) 39 | tone += librosa.tone(98, sr=22050, length=44100) 40 | tone += librosa.tone(123.47, sr=22050, length=44100) 41 | tone += librosa.tone(207.65, sr=22050, length=44100) 42 | soundfile.write(dest, tone, 22050) 43 | print("Created {0} with notes G2, B2, G#3".format(dest)) 44 | 45 | return 0 46 | 47 | 48 | if __name__ == "__main__": 49 | sys.exit(main()) 50 | -------------------------------------------------------------------------------- /chord_detection/chord_detect.py: -------------------------------------------------------------------------------- 1 | from chord_detection.esacf import MultipitchESACF 2 | from chord_detection.harmonic_energy import MultipitchHarmonicEnergy 3 | from chord_detection.iterative_f0 import MultipitchIterativeF0 4 | from chord_detection.prime_multif0 import MultipitchPrimeMultiF0 5 | from chord_detection.multipitch import METHODS 6 | 7 | import sys 8 | import argparse 9 | 10 | 11 | def main_cli(): 12 | method_nums = [k for k in METHODS.keys()] 13 | method_nums_help_string = "-1 = all, " 14 | for k in METHODS.keys(): 15 | method_nums_help_string += "{0} ({1}), ".format(k, METHODS[k].display_name()) 16 | 17 | method_nums_help_string = method_nums_help_string[:-2] # strip trailing ', ' 18 | 19 | parser = argparse.ArgumentParser( 20 | prog="chord-detection", 21 | description="Collection of chord-detection techniques", 22 | formatter_class=argparse.RawDescriptionHelpFormatter, 23 | ) 24 | 25 | parser.add_argument( 26 | "--key", 27 | action="store_true", 28 | help="estimate the key using the Krumhansl-Schmuckler key-finding algorithm", 29 | ) 30 | parser.add_argument( 31 | "--displayplots", 32 | type=int, 33 | help="display intermediate plots at specified frame with matplotlib", 34 | default=-1, 35 | ) 36 | parser.add_argument( 37 | "--method", 38 | type=int, 39 | help=method_nums_help_string, 40 | default=next(iter(METHODS.keys())), 41 | ) 42 | parser.add_argument("input_path", help="Path to WAV audio clip") 43 | args = parser.parse_args() 44 | 45 | compute_objs = [] 46 | 47 | if args.method == -1: 48 | for v in METHODS.values(): 49 | compute_objs.append(v(args.input_path)) 50 | else: 51 | try: 52 | compute_objs.append(METHODS[args.method](args.input_path)) 53 | except KeyError: 54 | raise ValueError("valid methods: {0}".format(method_nums_help_string)) 55 | 56 | for compute_obj in compute_objs: 57 | print( 58 | "{0} - {1}".format(compute_obj.method_number(), compute_obj.display_name()) 59 | ) 60 | chromagram = compute_obj.compute_pitches(args.displayplots) 61 | print(chromagram) 62 | if args.key: 63 | print(chromagram.key()) 64 | 65 | 66 | if __name__ == "__main__": 67 | main_cli() 68 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # chord-detection 2 | 3 | This repository is an MIT-licensed collection of multipitch/polyphonic instrument chord and key detection methods, implemented from academic papers using Python. 4 | 5 | ## Usage 6 | 7 | Pip install it from the git url: 8 | ``` 9 | $ pip install git+https://github.com/sevagh/chord-detection.git 10 | ``` 11 | 12 | This installs the library `chord_detection` and the command-line tool `chord-detect`: 13 | ``` 14 | $ chord-detect 15 | usage: chord-detection [-h] [--key] [--displayplots DISPLAYPLOTS] [--method METHOD] input_path 16 | chord-detection: error: the following arguments are required: input_path 17 | ``` 18 | 19 | Run tests: 20 | ``` 21 | $ python ./tests/gen_test_clips.py 22 | $ python -m unittest 23 | ``` 24 | 25 | ## Methods 26 | 27 | The results displayed come from a wav clip of a piano C major chord. The outputs are the summed chromagrams across all the frames, normalized to 9 (the biggest single-digit number), in a 12-digit chroma vector string. 28 | 29 | The expected notes of a C major chord are C E G, so the expected answer should resemble `C000E00G0000`, where `C > E, C > G`. The values of C, E, G are specific to the method being used, but it's some representation of the energy of the detected pitch, e.g. spectral energy, summed FFT. 30 | 31 | ### ESACF (Tolonen, Karjalainen) 32 | 33 | ``` 34 | reference: C000E00G0000 35 | computed: 900003001000, key: Cmaj 36 | ``` 37 | 38 | ![esacf](.github/piano_c_1.png) 39 | 40 | _T. Tolonen and M. Karjalainen, "A computationally efficient multipitch analysis model," in IEEE Transactions on Speech and Audio Processing, vol. 8, no. 6, pp. 708-716, Nov. 2000._ 41 | 42 | _V. Zenz and A. Rauber, "Automatic Chord Detection Incorporating Beat and Key Detection," 2007 IEEE International Conference on Signal Processing and Communications, Dubai, 2007, pp. 1175-1178._ 43 | 44 | ### Harmonic Energy (Stark, Plumbley) 45 | 46 | ``` 47 | reference: C000E00G0000 48 | computed: 921111111111, key: Cmin 49 | ``` 50 | 51 | ![harmeng](.github/piano_c_2.png) 52 | 53 | _M Stark, Adam and Plumbley, Mark., "Real-Time Chord Recognition for Live Performance," in Proceedings of the 2009 International Computer Music Conference (ICMC 2009), Montreal, Canada, 16-21 August 2009._ 54 | 55 | ### Iterative F0 (Klapuri, Anssi) 56 | 57 | ``` 58 | reference: C000E00G0000 59 | computed: 900301000000, key: Cmin 60 | ``` 61 | 62 | ![iterativef0](.github/piano_c_3.png) 63 | 64 | _Klapuri, Anssi, "Multipitch Analysis of Polyphonic Music and Speech Signals Using an Auditory Model," IEEE TRANSACTIONS ON AUDIO, SPEECH, AND LANGUAGE PROCESSING, VOL. 16, NO. 2, FEBRUARY 2008 255._ 65 | 66 | _Klapuri, Anssi. "Multiple Fundamental Frequency Estimation by Summing Harmonic Amplitudes." ISMIR (2006)._ 67 | 68 | ### Prime-multiF0 (Camacho, Kaver-Oreamuno) 69 | 70 | ``` 71 | reference: C000E00G0000 72 | computed: 951000000002, key: Cmin 73 | ``` 74 | 75 | ![primemultif0](.github/piano_c_4.png) 76 | 77 | _Camacho, A, Oreamuno, I, "A multipitch estimation algorithm based on fundamental frequencies and prime harmonics," Sound and Music Computing Conference 2013._ 78 | -------------------------------------------------------------------------------- /chord_detection/chromagram.py: -------------------------------------------------------------------------------- 1 | from collections import OrderedDict 2 | from collections.abc import Sequence 3 | import math 4 | import numpy 5 | import scipy 6 | 7 | 8 | _note_names = ["C", "C#", "D", "D#", "E", "F", "F#", "G", "G#", "A", "A#", "B"] 9 | 10 | 11 | class Chromagram(Sequence): 12 | def __init__(self): 13 | self.c = OrderedDict() 14 | for n in _note_names: 15 | self.c[n] = 0.0 16 | self.p = None 17 | super().__init__() 18 | 19 | def __getitem__(self, i): 20 | if type(i) == str: 21 | i = i.replace('♯', '#') # librosa-isms... 22 | return self.c[i] 23 | elif type(i) == int: 24 | return self.c[_note_names[i]] 25 | else: 26 | raise ValueError("this shouldn't happen") 27 | 28 | def __setitem__(self, i, item): 29 | if type(i) == str: 30 | self.c[i] = item 31 | elif type(i) == int: 32 | self.c[_note_names[i]] = item 33 | else: 34 | raise ValueError("this shouldn't happen") 35 | 36 | def __len__(self): 37 | return len(self.c) 38 | 39 | def __repr__(self): 40 | return self._pack() 41 | 42 | def __add__(self, other): 43 | for k in self.c.keys(): 44 | self.c[k] += other.c[k] 45 | return self 46 | 47 | def key(self): 48 | return detect_key(numpy.asarray([v for v in self.c.values()])) 49 | 50 | def _pack(self): 51 | nc = _normalize(self.c) 52 | 53 | pack = [0 for _ in range(12)] 54 | 55 | for i, v in enumerate(nc.values()): 56 | pack[i] = int(round(v)) 57 | 58 | return "".join([str(p_) for p_ in pack]) 59 | 60 | 61 | def _normalize(c: OrderedDict): 62 | c_ = c.copy() 63 | 64 | chromagram_min = min(c_.values()) 65 | if chromagram_min != 0.0: 66 | for k in c_.keys(): 67 | c_[k] = round(c_[k] / chromagram_min, 3) 68 | 69 | chromagram_max = max(c_.values()) 70 | if chromagram_max > 9.0: 71 | for k in c_.keys(): 72 | c_[k] *= 9.0 / chromagram_max 73 | 74 | return c_ 75 | 76 | 77 | """ 78 | attribution: 79 | https://gist.github.com/bmcfee/1f66825cef2eb34c839b42dddbad49fd 80 | https://github.com/bmcfee 81 | """ 82 | 83 | 84 | def detect_key(X): 85 | if X.shape[0] != 12: 86 | raise ValueError( 87 | "input must be a chroma vector i.e. a numpy ndarray of shape (12,)" 88 | ) 89 | # key_names = "A", "A#", "B", "C", "C#", "D", "D#", "E", "F", "F#", "G", "G#" 90 | X = scipy.stats.zscore(X) 91 | 92 | # Coefficients from Kumhansl and Schmuckler 93 | # as reported here: http://rnhart.net/articles/key-finding/ 94 | major = numpy.asarray( 95 | [6.35, 2.23, 3.48, 2.33, 4.38, 4.09, 2.52, 5.19, 2.39, 3.66, 2.29, 2.88] 96 | ) 97 | major = scipy.stats.zscore(major) 98 | 99 | minor = numpy.asarray( 100 | [6.33, 2.68, 3.52, 5.38, 2.60, 3.53, 2.54, 4.75, 3.98, 2.69, 3.34, 3.17] 101 | ) 102 | minor = scipy.stats.zscore(minor) 103 | 104 | # Generate all rotations of major 105 | major = scipy.linalg.circulant(major) 106 | minor = scipy.linalg.circulant(minor) 107 | 108 | major = major.T.dot(X) 109 | minor = minor.T.dot(X) 110 | 111 | major_winner = int(numpy.argmax(major) + 0.5) 112 | minor_winner = int(numpy.argmax(minor) + 0.5) 113 | # essentia adds a 0.5? why 114 | # https://github.com/MTG/essentia/blob/master/src/algorithms/tonal/key.cpp#L370 115 | 116 | if major[major_winner] > minor[minor_winner]: 117 | return "{0}maj".format(_note_names[major_winner]) 118 | elif major[major_winner] < minor[minor_winner]: 119 | return "{0}min".format(_note_names[minor_winner]) 120 | else: 121 | if major_winner == minor_winner: 122 | return "{0}majmin".format(_note_names[major_winner]) 123 | else: 124 | return "{0}maj OR {1}min".format( 125 | _note_names[major_winner], _note_names[minor_winner] 126 | ) 127 | -------------------------------------------------------------------------------- /chord_detection/harmonic_energy.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | import random 3 | import scipy 4 | import scipy.signal 5 | import librosa 6 | import matplotlib.pyplot as plt 7 | from chord_detection.multipitch import Multipitch 8 | from chord_detection.chromagram import Chromagram 9 | from chord_detection.dsp.frame import frame_cutter 10 | from collections import OrderedDict 11 | 12 | 13 | class MultipitchHarmonicEnergy(Multipitch): 14 | def __init__( 15 | self, audio_path, frame_size=8192, num_harmonic=2, num_octave=2, num_bins=2 16 | ): 17 | super().__init__(audio_path) 18 | self.frame_size = frame_size 19 | self.num_harmonic = num_harmonic 20 | self.num_octave = num_octave 21 | self.num_bins = num_bins 22 | 23 | @staticmethod 24 | def display_name(): 25 | return "Harmonic Energy (Stark, Plumbley)" 26 | 27 | @staticmethod 28 | def method_number(): 29 | return 2 30 | 31 | def compute_pitches(self, display_plot_frame=-1): 32 | # first C = C3 33 | notes = librosa.cqt_frequencies(12, fmin=librosa.note_to_hz('C3')) 34 | 35 | divisor_ratio = (self.fs / 4.0) / self.frame_size 36 | self.dft_maxes = [] 37 | 38 | overall_chromagram = Chromagram() 39 | 40 | for frame, x in enumerate(frame_cutter(self.x, self.frame_size)): 41 | chromagram = Chromagram() 42 | x = x * scipy.signal.hamming(self.frame_size) 43 | x_dft = numpy.sqrt(numpy.absolute(numpy.fft.rfft(x))) 44 | for n in range(12): 45 | chroma_sum = 0.0 46 | for octave in range(1, self.num_octave + 1): 47 | note_sum = 0.0 48 | for harmonic in range(1, self.num_harmonic + 1): 49 | x_dft_max = float("-inf") # sentinel 50 | 51 | k_prime = numpy.round( 52 | (notes[n] * octave * harmonic) / divisor_ratio 53 | ) 54 | k0 = int(k_prime - self.num_bins * harmonic) 55 | k1 = int(k_prime + self.num_bins * harmonic) 56 | 57 | best_ind = None 58 | for k in range(k0, k1): 59 | curr_ = x_dft[k] 60 | if curr_ > x_dft_max: 61 | x_dft_max = curr_ 62 | best_ind = k 63 | 64 | note_sum += x_dft_max * (1.0 / harmonic) 65 | self.dft_maxes.append((k0, best_ind, k1)) 66 | chroma_sum += note_sum 67 | chromagram[n] += chroma_sum 68 | 69 | overall_chromagram += chromagram 70 | 71 | if frame == display_plot_frame: 72 | _display_plots(self.clip_name, self.fs, self.frame_size, x_dft, self.x, x, self.dft_maxes) 73 | return overall_chromagram 74 | 75 | def _display_plots(clip_name, fs, frame_size, x_dft, x, x_frame, dft_maxes): 76 | pltlen = frame_size 77 | samples = numpy.arange(pltlen) 78 | dftlen = int(x_dft.shape[0] / 2) 79 | dft_samples = numpy.arange(dftlen) 80 | 81 | fig1, (ax1, ax2) = plt.subplots(2, 1) 82 | 83 | ax1.set_title("x[n] - {0}".format(clip_name)) 84 | ax1.set_xlabel("n (samples)") 85 | ax1.set_ylabel("amplitude") 86 | ax1.plot(samples, x[:pltlen], "b", alpha=0.3, linestyle=":", label="x[n]") 87 | ax1.plot( 88 | samples, 89 | x_frame[:pltlen], 90 | "r", 91 | alpha=0.4, 92 | linestyle="--", 93 | label="x[n], frame + ham", 94 | ) 95 | ax1.grid() 96 | ax1.legend(loc="upper right") 97 | 98 | ax2.set_title("X (DFT)") 99 | ax2.set_xlabel("fft bin") 100 | ax2.set_ylabel("magnitude") 101 | ax2.plot(dft_samples, x_dft[:dftlen], "b", alpha=0.5, label="X(n)") 102 | for i, dft_max in enumerate(dft_maxes): 103 | left, mid, right = dft_max 104 | ax2.plot(left, x_dft[:dftlen][left], "rx") 105 | ax2.plot(mid, x_dft[:dftlen][mid], "go") 106 | ax2.plot(right, x_dft[:dftlen][right], color="purple", marker="x") 107 | pitch = fs / mid 108 | note = librosa.hz_to_note(pitch, octave=False) 109 | pitch = round(pitch, 2) 110 | 111 | if (i % 17) == 0: 112 | # displaying too many of these clutters the graph 113 | ax2.text( 114 | mid, 1.2 * x_dft[:dftlen][mid], "{0}\n{1}".format(pitch, note) 115 | ) 116 | 117 | ax2.grid() 118 | ax2.legend(loc="upper right") 119 | 120 | plt.show() 121 | -------------------------------------------------------------------------------- /chord_detection/prime_multif0.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | import math 3 | import random 4 | import scipy 5 | import scipy.signal 6 | import librosa 7 | import typing 8 | import peakutils 9 | from matplotlib import mlab 10 | import matplotlib.pyplot as plt 11 | from chord_detection.multipitch import Multipitch 12 | from chord_detection.chromagram import Chromagram 13 | from chord_detection.dsp.wfir import wfir 14 | from chord_detection.dsp.frame import frame_cutter 15 | from collections import OrderedDict 16 | 17 | 18 | class MultipitchPrimeMultiF0(Multipitch): 19 | def __init__( 20 | self, 21 | audio_path, 22 | num_harmonic=1, 23 | num_octave=2, 24 | harmonic_multiples_elim=5, 25 | harmonic_elim_runs=2, 26 | ): 27 | super().__init__(audio_path) 28 | self.num_harmonic = num_harmonic 29 | self.num_octave = num_octave 30 | self.harmonic_elim_runs = harmonic_elim_runs 31 | self.harmonic_multiples_elim = harmonic_multiples_elim 32 | 33 | @staticmethod 34 | def display_name(): 35 | return "Prime-multiF0 (Camacho, Kaver-Oreamuno)" 36 | 37 | @staticmethod 38 | def method_number(): 39 | return 4 40 | 41 | def compute_pitches(self, display_plot_frame=-1): 42 | overall_chromagram = Chromagram() 43 | 44 | # first C = C3 45 | notes = librosa.cqt_frequencies(12, fmin=librosa.note_to_hz('C3')) 46 | 47 | self.specgram_to_plot = [] 48 | 49 | for n in range(12): 50 | for octave in range(1, self.num_octave + 1): 51 | for harmonic in range(1, self.num_harmonic + 1): 52 | f_candidate = notes[n] * octave * harmonic 53 | window_size = int((8 / f_candidate) * self.fs) 54 | 55 | chromagram = Chromagram() 56 | for frame, x_t in enumerate(frame_cutter(self.x, window_size)): 57 | real_window_size = max(x_t.shape[0], window_size) 58 | window = numpy.hanning(real_window_size) 59 | s, f = mlab.magnitude_spectrum(x_t, Fs=self.fs, window=window) 60 | s = s[:int(s.shape[0]/2)] 61 | f = f[:int(f.shape[0]/2)] 62 | s[s < 0] = 0.0 # clip 63 | might_append_1 = s.copy() 64 | might_append_2 = [] 65 | 66 | for _ in range(self.harmonic_elim_runs): 67 | max_freq_idx = s.argmax(axis=0) 68 | max_f = f[max_freq_idx] 69 | try: 70 | note = librosa.hz_to_note(max_f, octave=False) 71 | chromagram[note] += s[max_freq_idx] 72 | might_append_2.append((max_freq_idx, max_f, note)) 73 | except (ValueError, OverflowError): 74 | continue 75 | eliminated = [] 76 | for harmonic_index_multiple in range( 77 | 1, self.harmonic_multiples_elim 78 | ): 79 | elim_freq = harmonic_index_multiple * max_f 80 | elim_index = numpy.where(f == elim_freq) 81 | s[elim_index] -= s[elim_index] 82 | might_append_3 = s.copy() 83 | 84 | if frame == display_plot_frame: 85 | # plot once and stop 86 | display_plot_frame = -1 87 | _display_plots(self.clip_name, self.x, ((might_append_1, might_append_2, might_append_3))) 88 | 89 | overall_chromagram += chromagram 90 | 91 | return overall_chromagram 92 | 93 | def _display_plots(clip_name, x, specgram_to_plot): 94 | fig1, (ax1, ax2) = plt.subplots(2, 1) 95 | 96 | ax1.set_title("x[n] - {0}".format(clip_name)) 97 | ax1.set_xlabel("n (samples)") 98 | ax1.set_ylabel("amplitude") 99 | ax1.plot(numpy.arange(x.shape[0]), x, "b", alpha=0.5, label="x[n]") 100 | ax1.grid() 101 | ax1.legend(loc="upper right") 102 | 103 | (s, notes, s_post) = specgram_to_plot 104 | 105 | ax2.set_title("S (specgram)".format(clip_name)) 106 | ax2.set_xlabel("frequency bins") 107 | ax2.set_ylabel("magnitude") 108 | ax2.plot(numpy.arange(s.shape[0]), s, "b", alpha=0.5, label="S") 109 | for (freq_idx, freq, note) in notes: 110 | ax2.plot(freq_idx, s[freq_idx], "ro") 111 | ax2.text(freq_idx, s[freq_idx], "{0}, {1}".format(round(freq, 2), note)) 112 | ax2.plot( 113 | numpy.arange(s_post.shape[0]), 114 | s_post, 115 | "g", 116 | alpha=0.5, 117 | label="S' (f0 candidates eliminated)", 118 | ) 119 | ax2.grid() 120 | ax2.legend(loc="upper right") 121 | 122 | plt.show() 123 | -------------------------------------------------------------------------------- /chord_detection/periodicity.py: -------------------------------------------------------------------------------- 1 | from chord_detection.chromagram import Chromagram 2 | import numpy 3 | import math 4 | import librosa 5 | 6 | 7 | _HAMMINGWINDOWNORM = [0.0011244659258033, 0.11559343551383, 0.42817348241183, 0.81822361914331, 1.0, 0.81822361914331, 0.42817348241183, 0.11559343551383, 0.0011244659258033] 8 | 9 | 10 | ''' 11 | borrowed heavily from https://github.com/BansMarbol/PolyPitch 12 | ''' 13 | 14 | class IterativeF0PeriodicityAnalysis(): 15 | def __init__( 16 | self, 17 | fs: float, 18 | window_size: int, 19 | max_voices=4, 20 | tau_min=1.0/2100.0, 21 | tau_max=1.0/40.0, 22 | tau_prec=0.0000001, 23 | Q=20, 24 | M=20, 25 | epsilon1=20, 26 | epsilon2=320, 27 | gamma=0.66, 28 | ): 29 | self.fs = fs 30 | self.window_size = window_size 31 | self.K = window_size/fs 32 | self.max_voices = max_voices 33 | self.tau_min = tau_min 34 | self.tau_max = tau_max 35 | self.tau_prec = tau_prec 36 | self.Q = Q 37 | self.M = M 38 | self.epsilon1 = epsilon1 39 | self.epsilon2 = epsilon2 40 | self.gamma = gamma # polyphony estimate 41 | 42 | self.voicesaliences = numpy.zeros(self.max_voices) 43 | self.voiceperiods = numpy.zeros(self.max_voices) 44 | self.smax = numpy.zeros(self.Q) 45 | self.tau_low = numpy.zeros(self.Q) 46 | self.tau_up = numpy.zeros(self.Q) 47 | 48 | def compute(self, Uk: numpy.ndarray) -> Chromagram: 49 | num_voices_detected = 0 50 | cancellation_weight = 1.0 51 | 52 | Ud = numpy.zeros(Uk.shape[0]) 53 | Ur = numpy.array(Uk) 54 | 55 | # clear the arrays from the last run 56 | self.voicesaliences[:] = 0.0 57 | self.voiceperiods[:] = 0.0 58 | 59 | prevmixturescore = 0.0 60 | mixturescore = 0.0 61 | 62 | keepgoing = True 63 | 64 | while keepgoing: 65 | winningtau, bestsalience = self.min_search(Ur) 66 | self.voicesaliences[num_voices_detected] = bestsalience 67 | self.voiceperiods[num_voices_detected] = winningtau 68 | 69 | num_voices_detected += 1 70 | mixturescore += bestsalience 71 | 72 | testquantity = mixturescore/(math.pow(num_voices_detected, self.gamma)) 73 | 74 | if num_voices_detected >= self.max_voices or testquantity <= prevmixturescore: 75 | keepgoing = False 76 | else: 77 | prevmixturescore = testquantity 78 | tau = winningtau 79 | topm = int(tau*(self.fs/self.window_size)*Uk.shape[0]) 80 | 81 | srovertau = self.fs/tau 82 | weight = srovertau + self.epsilon1 83 | for m in range(1, topm): 84 | partialK = m*self.K/tau + 0.5 85 | if partialK <= Uk.shape[0]: 86 | Urweight = Ur[int(partialK)] 87 | Urweight *= weight/(m*srovertau + self.epsilon2) 88 | 89 | lowk = max(int(partialK-4), 0) 90 | highk = min(int(partialK+4), Uk.shape[0]) 91 | 92 | for j in range(lowk, highk+1): 93 | hammingindexnow = int(j - partialK + 4) 94 | val = _HAMMINGWINDOWNORM[hammingindexnow] * Urweight 95 | Ud[j] += val 96 | 97 | for i in range(Uk.shape[0]): 98 | diff = Uk[i] - cancellation_weight*Ud[i] 99 | Ur[i] = max(diff, 0) 100 | 101 | if num_voices_detected > 0: 102 | num_voices_detected -= 1 103 | 104 | c = Chromagram() 105 | for i in range(self.voiceperiods.shape[0]): 106 | try: 107 | note = librosa.hz_to_note(self.fs/self.voiceperiods[i], octave=False) 108 | c[note] += self.voicesaliences[i] 109 | except OverflowError: 110 | continue 111 | 112 | return c, (self.voicesaliences.copy(), self.voiceperiods.copy()) 113 | 114 | def min_search(self, Ur: numpy.ndarray) -> float: 115 | q = 0 116 | 117 | self.tau_low[0] = self.tau_min 118 | self.tau_up[0] = self.tau_max 119 | 120 | qbest = 0 121 | 122 | while (self.tau_up[qbest] - self.tau_low[qbest]) > self.tau_prec and q < self.Q-1: 123 | q += 1 124 | self.tau_low[q] = (self.tau_low[qbest] + self.tau_up[qbest])*0.5 125 | self.tau_up[q] = self.tau_up[qbest] 126 | self.tau_up[qbest] = self.tau_low[q] 127 | 128 | self.smax[q] = self.smax_fn(q, Ur) 129 | self.smax[qbest] = self.smax_fn(qbest, Ur) 130 | 131 | whichq = 0 132 | maxval = self.smax[0] 133 | 134 | for j in range(1, q+1): 135 | valnow = self.smax[j] 136 | if valnow > maxval: 137 | maxval = valnow 138 | whichq = j 139 | qbest = whichq 140 | 141 | winningtau = (self.tau_low[qbest] + self.tau_up[qbest])*0.5 142 | return winningtau, self.smax[qbest] 143 | 144 | def smax_fn(self, q: int, Ur: numpy.ndarray) -> float: 145 | tau = 0.5*(self.tau_low[q] + self.tau_up[q]) 146 | deltatau = self.tau_up[q] - self.tau_low[q] 147 | 148 | salience = 0.0 149 | srovertau = self.fs/self.tau_up[q] 150 | 151 | weight_numerator = self.fs/self.tau_low[q] + self.epsilon1 152 | def weight_denominator(m: int): 153 | return (m*self.fs/self.tau_up[q] + self.epsilon2) 154 | 155 | for m in range(1, self.M): 156 | lowk = int(m*self.K/(tau+0.5*deltatau) + 0.5) 157 | highk = int(m*self.K/(tau-0.5*deltatau) + 0.5) 158 | 159 | Umax = numpy.amax(Ur[lowk:highk+1]) 160 | salience += weight_denominator(m)*Umax 161 | 162 | salience *= weight_numerator 163 | return salience 164 | -------------------------------------------------------------------------------- /chord_detection/esacf.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | import scipy 3 | import scipy.signal 4 | import librosa 5 | import typing 6 | import peakutils 7 | import matplotlib.pyplot as plt 8 | from chord_detection.multipitch import Multipitch 9 | from chord_detection.chromagram import Chromagram 10 | from chord_detection.dsp.wfir import wfir 11 | from chord_detection.dsp.frame import frame_cutter 12 | from chord_detection.dsp.lowpass import lowpass_filter 13 | from collections import OrderedDict 14 | 15 | 16 | class MultipitchESACF(Multipitch): 17 | def __init__( 18 | self, 19 | audio_path, 20 | ham_ms=46.4, 21 | k=0.67, 22 | n_peaks_elim=6, 23 | peak_thresh=0.1, 24 | peak_min_dist=10, 25 | ): 26 | super().__init__(audio_path) 27 | self.ham_samples = int(self.fs * ham_ms / 1000.0) 28 | self.k = k 29 | self.n_peaks_elim = n_peaks_elim 30 | self.peak_thresh = peak_thresh 31 | self.peak_min_dist = peak_min_dist 32 | 33 | @staticmethod 34 | def display_name(): 35 | return "ESACF (Tolonen, Karjalainen)" 36 | 37 | @staticmethod 38 | def method_number(): 39 | return 1 40 | 41 | def compute_pitches(self, display_plot_frame=-1): 42 | overall_chromagram = Chromagram() 43 | 44 | for frame, x_frame in enumerate(frame_cutter(self.x, self.ham_samples)): 45 | x = wfir(x_frame, self.fs, 12) 46 | 47 | x_hi = _highpass_filter(x, self.fs) 48 | x_hi = numpy.clip(x_hi, 0, None) # half-wave rectification 49 | x_hi = lowpass_filter(x_hi, self.fs, 1000) # paper wants it 50 | 51 | x_lo = lowpass_filter(x, self.fs, 1000) 52 | 53 | x_sacf = _sacf([x_lo, x_hi]) 54 | x_esacf, harmonic_elim_plots = _esacf(x_sacf, self.n_peaks_elim, True) 55 | 56 | peak_indices = peakutils.indexes( 57 | x_esacf, thres=self.peak_thresh, min_dist=self.peak_min_dist 58 | ) 59 | 60 | peak_indices_interp = peakutils.interpolate( 61 | numpy.arange(x_esacf.shape[0]), x_esacf, ind=peak_indices 62 | ) 63 | 64 | chromagram = Chromagram() 65 | for i, tau in enumerate(peak_indices_interp): 66 | pitch = self.fs / tau 67 | try: 68 | note = librosa.hz_to_note(pitch, octave=False) 69 | chromagram[note] += x_esacf[peak_indices[i]] 70 | except ValueError: 71 | continue 72 | overall_chromagram += chromagram 73 | 74 | if frame == display_plot_frame: 75 | _display_plots( 76 | self.clip_name, 77 | self.fs, 78 | self.ham_samples, 79 | frame, 80 | x, 81 | x_lo, 82 | x_hi, 83 | x_sacf, 84 | x_esacf, 85 | harmonic_elim_plots, 86 | peak_indices, 87 | peak_indices_interp, 88 | ) 89 | 90 | return overall_chromagram 91 | 92 | 93 | def _sacf(x_channels: typing.List[numpy.ndarray], k=None) -> numpy.ndarray: 94 | # k is same as p (power) in the Klapuri/Ansi paper, method 3 95 | if not k: 96 | k = 0.67 97 | 98 | shape = x_channels[0].shape[0] 99 | 100 | running_sum = numpy.zeros(shape) 101 | 102 | for xc in x_channels: 103 | running_sum += numpy.abs(numpy.fft.fft(xc)) ** k 104 | 105 | return numpy.real(numpy.fft.ifft(running_sum))[:int((shape-1)/2)] 106 | 107 | 108 | def _esacf( 109 | x2: numpy.ndarray, n_peaks: int, ret_plots: bool 110 | ) -> typing.Tuple[numpy.ndarray, typing.List[numpy.ndarray]]: 111 | """ 112 | enhance the SACF with the following procedure 113 | clip to positive values, time stretch by n_peaks 114 | subtract original 115 | """ 116 | x2tmp = x2.copy() 117 | to_plot = [] 118 | 119 | for timescale in range(2, n_peaks + 1): 120 | x2tmp = numpy.clip(x2tmp, 0, None) 121 | x2stretched = librosa.effects.time_stretch(x2tmp, rate=timescale).copy() 122 | 123 | x2stretched.resize(x2tmp.shape) 124 | if ret_plots: 125 | to_plot.append(x2stretched) 126 | x2tmp -= x2stretched 127 | x2tmp = numpy.clip(x2tmp, 0, None) 128 | 129 | return x2tmp, to_plot 130 | 131 | 132 | def _highpass_filter(x: numpy.ndarray, fs: float) -> numpy.ndarray: 133 | b, a = scipy.signal.butter(2, [1000 / (fs / 2)], btype="high") 134 | return scipy.signal.lfilter(b, a, x) 135 | 136 | 137 | def _display_plots( 138 | clip_name, 139 | fs, 140 | frame_size, 141 | frame, 142 | x, 143 | x_lo, 144 | x_hi, 145 | x_sacf, 146 | x_esacf, 147 | harmonic_elim_plots, 148 | peak_indices, 149 | peak_indices_interp, 150 | ): 151 | samples = numpy.arange(frame_size) 152 | 153 | fig1, (ax1, ax2) = plt.subplots(2, 1) 154 | 155 | ax1.set_title("{0} - x[n], frame {1}".format(clip_name, frame)) 156 | ax1.set_xlabel("n (samples)") 157 | ax1.set_ylabel("amplitude") 158 | ax1.plot(samples, x, "b", alpha=0.5, label="x[n]") 159 | ax1.plot(samples, x_lo, "g", alpha=0.5, linestyle="--", label="x[n] lo") 160 | ax1.plot(samples, x_hi, "r", alpha=0.5, linestyle=":", label="x[n] hi") 161 | ax1.grid() 162 | ax1.legend(loc="upper right") 163 | 164 | ax2.set_title("SACF, ESACF") 165 | ax2.set_xlabel("n (samples)") 166 | ax2.set_ylabel("normalized amplitude") 167 | 168 | i = 0 169 | for i, h in enumerate(harmonic_elim_plots): 170 | h_norm = h / numpy.max(h) 171 | ax2.plot( 172 | samples, 173 | numpy.concatenate((h_norm, numpy.zeros(samples.shape[0] - h.shape[0]))), 174 | "C{0}".format(i), 175 | alpha=0.1, 176 | label="time stretch {0}".format(2 + i), 177 | ) 178 | i += 1 179 | sacf_norm = x_sacf / numpy.max(x_sacf) 180 | ax2.plot( 181 | samples, 182 | numpy.concatenate( 183 | (sacf_norm, numpy.zeros(samples.shape[0] - sacf_norm.shape[0])) 184 | ), 185 | "C{0}".format(i), 186 | linestyle="--", 187 | alpha=0.5, 188 | label="sacf", 189 | ) 190 | esacf_norm = x_esacf / numpy.max(x_esacf) 191 | i += 1 192 | ax2.plot( 193 | samples, 194 | numpy.concatenate( 195 | (esacf_norm, numpy.zeros(samples.shape[0] - sacf_norm.shape[0])) 196 | ), 197 | "C{0}".format(i), 198 | linestyle=":", 199 | alpha=0.5, 200 | label="esacf", 201 | ) 202 | scatter_peaks = esacf_norm[peak_indices] 203 | for i, ind in enumerate(peak_indices_interp): 204 | pitch = round(fs / ind, 2) 205 | text = "{0}, {1}".format(pitch, librosa.hz_to_note(pitch, octave=False)) 206 | x = peak_indices_interp[i] 207 | y = scatter_peaks[i] 208 | ax2.plot(x, y, "rx") 209 | ax2.text(x, y, text) 210 | 211 | ax2.grid() 212 | ax2.legend(loc="upper right") 213 | 214 | plt.show() 215 | -------------------------------------------------------------------------------- /chord_detection/iterative_f0.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | import math 3 | import random 4 | import typing 5 | import scipy 6 | import scipy.signal 7 | import scipy.fftpack 8 | import peakutils 9 | import librosa 10 | import matplotlib.pyplot as plt 11 | from numba import njit, jit 12 | from chord_detection.multipitch import Multipitch 13 | from chord_detection.chromagram import Chromagram 14 | from chord_detection.dsp.wfir import wfir 15 | from chord_detection.dsp.frame import frame_cutter 16 | from chord_detection.dsp.lowpass import lowpass_filter 17 | from chord_detection.periodicity import IterativeF0PeriodicityAnalysis 18 | from collections import OrderedDict 19 | 20 | 21 | class MultipitchIterativeF0(Multipitch): 22 | def __init__( 23 | self, 24 | audio_path, 25 | frame_size=8192, 26 | power=1.0, 27 | channels=70, 28 | zeta0=2.3, 29 | zeta1=0.39, 30 | peak_thresh=0.5, 31 | peak_min_dist=10, 32 | harmonic_multiples_elim=5, 33 | ): 34 | super().__init__(audio_path) 35 | self.frame_size = frame_size 36 | self.num_frames = math.ceil(self.x.shape[0] / self.frame_size) 37 | self.power = power 38 | self.channels = [ 39 | 229 * (10 ** ((zeta1 * c + zeta0) / 21.4) - 1) for c in range(channels) 40 | ] 41 | self.peak_thresh = peak_thresh 42 | self.peak_min_dist = peak_min_dist 43 | self.harmonic_multiples_elim = harmonic_multiples_elim 44 | self.periodicity_estimator = IterativeF0PeriodicityAnalysis(self.fs, self.frame_size) 45 | 46 | @staticmethod 47 | def display_name(): 48 | return "Iterative F0 (Klapuri, Anssi)" 49 | 50 | @staticmethod 51 | def method_number(): 52 | return 3 53 | 54 | def compute_pitches(self, display_plot_frame=-1): 55 | ycn = [None for _ in range(len(self.channels))] 56 | 57 | for i, fc in enumerate(self.channels): 58 | yc = _auditory_filterbank(self.x, self.fs, fc) 59 | yc = wfir(yc, self.fs, 12) # dynamic level compression 60 | yc[yc < 0] = -yc[yc < 0] # full-wave rectification 61 | yc = ( 62 | yc + lowpass_filter(yc, self.fs, fc) 63 | ) / 2.0 # sum with low-pass filtered version of self at center-channel frequency 64 | 65 | ycn[i] = yc 66 | 67 | Yct = [ 68 | [None for _ in range(len(self.channels))] for _ in range(self.num_frames) 69 | ] 70 | Ut = [None for _ in range(self.num_frames)] 71 | 72 | for channel, fc in enumerate(self.channels): 73 | for frame, yct in enumerate(frame_cutter(ycn[channel], self.frame_size)): 74 | # hamming windowed and zero-padded to 2x length 75 | yct = yct * scipy.signal.hamming(yct.shape[0]) 76 | yct = numpy.concatenate((yct, numpy.zeros(yct.shape[0]))) 77 | Yct[frame][channel] = yct.copy() 78 | 79 | shape = Yct[0][0].shape[0] 80 | for frame in range(self.num_frames): 81 | running_sum = numpy.zeros(shape) 82 | 83 | for channel, Yct_ in enumerate(Yct[frame]): 84 | running_sum += numpy.abs(numpy.fft.fft(Yct_)) ** self.power 85 | Ut[frame] = running_sum 86 | 87 | overall_chromagram = Chromagram() 88 | # periodicity estimate - iterative f0 cancellation/tau/salience loop 89 | for frame, Uk in enumerate(Ut): 90 | frame_chromagram, salience_plots = self.periodicity_estimator.compute(Uk) 91 | overall_chromagram += frame_chromagram 92 | 93 | if frame == display_plot_frame: 94 | _display_plots(self.clip_name, self.fs, self.frame_size, self.x, self.channels, ycn, Ut[frame], salience_plots) 95 | 96 | return overall_chromagram 97 | 98 | 99 | def _display_plots(clip_name, fs, frame_size, x, channels, ytc, Ut, splots): 100 | fig1, (ax1, ax2) = plt.subplots(2, 1) 101 | 102 | ax1.set_title(r"x[n], $y_c$[n], normalized - {0}".format(clip_name)) 103 | ax1.set_xlabel("n (samples)") 104 | ax1.set_ylabel("amplitude") 105 | ax1.plot( 106 | numpy.arange(frame_size), 107 | x[: frame_size]/numpy.max(x), 108 | "b", 109 | alpha=0.75, 110 | linestyle="--", 111 | label="x[n]", 112 | ) 113 | 114 | for i, x in enumerate( 115 | [random.randrange(0, len(channels)) for _ in range(6)] 116 | ): 117 | ax1.plot( 118 | numpy.arange(frame_size), 119 | ytc[x][: frame_size]/numpy.max(ytc[x][: frame_size]), 120 | color="C{0}".format(i), 121 | linestyle="--", 122 | alpha=0.5, 123 | label=r"$y_c$[n], $f_c$ = {0}".format(round(channels[x], 2)), 124 | ) 125 | i += 1 126 | 127 | ax1.grid() 128 | ax1.legend(loc="upper right") 129 | 130 | ax2.set_title("Ut, bandwise summary spectrum") 131 | ax2.set_xlabel("fft bin") 132 | ax2.set_ylabel("amplitude") 133 | ax2.plot( 134 | numpy.arange(frame_size/2-1024), 135 | Ut[1024 : int(frame_size/2)], 136 | "b", 137 | alpha=0.75, 138 | linestyle="--", 139 | label="Ut", 140 | ) 141 | 142 | max_ut = numpy.amax(Ut[1024 : int(frame_size/2)]) 143 | 144 | (saliences, periods) = splots 145 | 146 | tau = int(1/periods[0]) 147 | 148 | ax2.plot( 149 | tau, 150 | max_ut/2, 151 | 'rx', 152 | label=r'$s(\hat{\tau})$ = ' + str(round(saliences[0], 2)) 153 | ) 154 | 155 | pitch = fs / periods[0] 156 | note = librosa.hz_to_note(pitch, octave=False) 157 | pitch = round(pitch, 2) 158 | 159 | ax2.text( 160 | tau, 161 | 1.1*(max_ut/2), 162 | '{0}, {1}'.format(pitch, note) 163 | ) 164 | 165 | ax2.grid() 166 | ax2.legend(loc="upper right") 167 | 168 | plt.show() 169 | 170 | 171 | def _auditory_filterbank(x, fc, fs): 172 | J = 4 173 | 174 | # bc3db = -3/J dB 175 | A = numpy.exp(-(3 / J) * numpy.pi / (fs * numpy.sqrt(2 ** (1 / J) - 1))) 176 | 177 | cos_theta1 = (1 + A * A) / (2 * A) * numpy.cos(2 * numpy.pi * fc / fs) 178 | cos_theta2 = (2 * A) / (1 + A * A) * numpy.cos(2 * numpy.pi * fc / fs) 179 | rho1 = (1 / 2) * (1 - A * A) 180 | rho2 = (1 - A * A) * numpy.sqrt(1 - cos_theta2 ** 2) 181 | 182 | resonator_1_b = [rho1, 0, -rho1] 183 | resonator_1_a = [1, -A * cos_theta1, A * A] 184 | 185 | resonator_2_b = [rho2] 186 | resonator_2_a = [1, -A * cos_theta2, A * A] 187 | 188 | x = scipy.signal.lfilter(resonator_1_b, resonator_1_a, x) 189 | x = scipy.signal.lfilter(resonator_1_b, resonator_1_a, x) 190 | x = scipy.signal.lfilter(resonator_2_b, resonator_2_a, x) 191 | x = scipy.signal.lfilter(resonator_2_b, resonator_2_a, x) 192 | 193 | return x 194 | 195 | 196 | """ 197 | i would've used the sacf() function here from method 1 198 | but the IFFT is specific to that method and we're using weighted salience for periodicity analysis here 199 | """ 200 | 201 | 202 | def _bandwise_summary_spectrum( 203 | x_channels: typing.List[numpy.ndarray], k=None 204 | ) -> numpy.ndarray: 205 | # k is same as p (power) in the Klapuri/Ansi paper 206 | if not k: 207 | k = 0.67 208 | 209 | shape = x_channels[0].shape[0] 210 | running_sum = numpy.zeros(shape) 211 | 212 | for xc in x_channels: 213 | running_sum += numpy.abs(numpy.fft.fft(xc)) ** k 214 | 215 | return running_sum[:int((shape-1)/2)] 216 | --------------------------------------------------------------------------------