├── tests
    ├── __init__.py
    ├── test.py
    ├── test_key_detection.py
    └── gen_test_clips.py
├── chord_detection
    ├── dsp
    │   ├── __init__.py
    │   ├── lowpass.py
    │   ├── frame.py
    │   └── wfir.py
    ├── __init__.py
    ├── multipitch.py
    ├── chord_detect.py
    ├── chromagram.py
    ├── harmonic_energy.py
    ├── prime_multif0.py
    ├── periodicity.py
    ├── esacf.py
    └── iterative_f0.py
├── .gitignore
├── requirements.txt
├── .github
    ├── piano_c_1.png
    ├── piano_c_2.png
    ├── piano_c_3.png
    └── piano_c_4.png
├── pyproject.toml
├── LICENSE
└── README.md


/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/chord_detection/dsp/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | __pycache__
2 | *.pyc
3 | *.wav
4 | *.egg-info
5 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | scipy
2 | numpy
3 | librosa
4 | matplotlib
5 | soundfile
6 | peakutils
7 | 


--------------------------------------------------------------------------------
/.github/piano_c_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sevagh/chord-detection/HEAD/.github/piano_c_1.png


--------------------------------------------------------------------------------
/.github/piano_c_2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sevagh/chord-detection/HEAD/.github/piano_c_2.png


--------------------------------------------------------------------------------
/.github/piano_c_3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sevagh/chord-detection/HEAD/.github/piano_c_3.png


--------------------------------------------------------------------------------
/.github/piano_c_4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/sevagh/chord-detection/HEAD/.github/piano_c_4.png


--------------------------------------------------------------------------------
/chord_detection/dsp/lowpass.py:
--------------------------------------------------------------------------------
1 | import scipy
2 | import numpy
3 | import scipy.signal
4 | 
5 | 
6 | def lowpass_filter(x: numpy.ndarray, fs: float, band: float) -> numpy.ndarray:
7 |     b, a = scipy.signal.butter(2, [band / (fs / 2)], btype="low")
8 |     return scipy.signal.lfilter(b, a, x)
9 | 


--------------------------------------------------------------------------------
/chord_detection/__init__.py:
--------------------------------------------------------------------------------
1 | from chord_detection.esacf import MultipitchESACF
2 | from chord_detection.harmonic_energy import MultipitchHarmonicEnergy
3 | from chord_detection.iterative_f0 import MultipitchIterativeF0
4 | from chord_detection.prime_multif0 import MultipitchPrimeMultiF0
5 | 
6 | from chord_detection.multipitch import METHODS
7 | from chord_detection.chromagram import detect_key
8 | 


--------------------------------------------------------------------------------
/chord_detection/dsp/frame.py:
--------------------------------------------------------------------------------
 1 | import numpy
 2 | import math
 3 | 
 4 | 
 5 | def frame_cutter(x: numpy.ndarray, frame_size: int) -> numpy.ndarray:
 6 |     if len(x.shape) != 1:
 7 |         raise ValueError("Only 1D numpy ndarrays are supported")
 8 | 
 9 |     num_frames = float(x.shape[0]) / float(frame_size)
10 |     num_frames = int(math.ceil(num_frames))
11 |     pad = int(num_frames * frame_size - x.shape[0])
12 |     x_pad = numpy.concatenate((x, numpy.zeros(pad)))
13 |     for x_frame in numpy.split(x_pad, num_frames):
14 |         yield x_frame
15 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [build-system]
 2 | requires = ["setuptools>=61.0"]
 3 | build-backend = "setuptools.build_meta"
 4 | 
 5 | [project]
 6 | name = "chord-detection"
 7 | version = "0.0.1"
 8 | description = "DSP algorithms for chord detection + key estimation"
 9 | readme = { file = "README.md", content-type = "text/markdown" }
10 | authors = [
11 |     { name = "Sevag Hanssian (sevagh@protonmail.com)" },
12 | ]
13 | license = { text = "MIT" }
14 | requires-python = ">=3"
15 | dependencies = [
16 |     "scipy",
17 |     "numpy",
18 |     "matplotlib",
19 |     "librosa",
20 |     "soundfile",
21 |     "peakutils",
22 | ]
23 | 
24 | [project.scripts]
25 | chord-detect = "chord_detection.chord_detect:main_cli"
26 | 


--------------------------------------------------------------------------------
/chord_detection/dsp/wfir.py:
--------------------------------------------------------------------------------
 1 | import numpy
 2 | import scipy
 3 | import scipy.signal
 4 | 
 5 | 
 6 | def _bark_warp_coef(fs):
 7 |     return (
 8 |         1.0674 * numpy.sqrt((2.0 / numpy.pi) * numpy.arctan(0.06583 * fs / 1000.0))
 9 |         - 0.1916
10 |     )
11 | 
12 | 
13 | def _warped_remez_coefs(fs, order):
14 |     l = 20
15 |     r = min(20000, fs / 2 - 1)
16 |     t = 1
17 | 
18 |     c = scipy.signal.remez(
19 |         order + 1, [0, l - t, l, r, r + t, 0.5 * fs], [0, 1, 0], fs=fs
20 |     )
21 |     return c.tolist()
22 | 
23 | 
24 | # see: https://sevagh.github.io/warped-linear-prediction/
25 | def wfir(x: numpy.ndarray, fs: float, order: int) -> numpy.ndarray:
26 |     a = _bark_warp_coef(fs)
27 | 
28 |     B = [-a.conjugate(), 1]
29 |     A = [1, -a]
30 |     ys = [0] * order
31 | 
32 |     ys[0] = scipy.signal.lfilter(B, A, x)
33 |     for i in range(1, len(ys)):
34 |         ys[i] = scipy.signal.lfilter(B, A, ys[i - 1])
35 | 
36 |     c = _warped_remez_coefs(fs, order)
37 | 
38 |     x_hat = c[0] * x
39 |     for i in range(order):
40 |         x_hat += c[i + 1] * ys[i]
41 | 
42 |     r = x - x_hat
43 |     return r
44 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2019 Sevag Hanssian
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/tests/test.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | import numpy
 3 | import os
 4 | from chord_detection import (
 5 |     MultipitchESACF,
 6 |     MultipitchIterativeF0,
 7 |     MultipitchHarmonicEnergy,
 8 |     MultipitchPrimeMultiF0,
 9 | )
10 | import soundfile
11 | from tempfile import TemporaryDirectory
12 | 
13 | 
14 | TESTCASES = {
15 |     "tests/test_1_note_Csharp3.wav": "010000000000",
16 |     "tests/test_1_note_E4.wav": "000010000000",
17 |     "tests/test_2_notes_E2_F3.wav": "000011000000",
18 |     "tests/test_2_notes_G3_Asharp4.wav": "000000010010",
19 |     "tests/test_3_notes_G2_B2_G#3.wav": "000000011001",
20 | }
21 | 
22 | 
23 | class TestChordDetection(unittest.TestCase):
24 |     def test_all(self):
25 |         for test_clip, expected_result in TESTCASES.items():
26 |             compute_objs = [
27 |                 MultipitchESACF(test_clip),
28 |                 MultipitchHarmonicEnergy(test_clip),
29 |                 MultipitchIterativeF0(test_clip),
30 |                 MultipitchPrimeMultiF0(test_clip),
31 |             ]
32 |             for c in compute_objs:
33 |                 ret = c.compute_pitches(display_plot_frame=1)
34 |                 print(
35 |                     "{0}\n{1}\n{2} expected\n{3} actual\n".format(
36 |                         c.display_name(), test_clip, expected_result, ret
37 |                     )
38 |                 )
39 | 


--------------------------------------------------------------------------------
/chord_detection/multipitch.py:
--------------------------------------------------------------------------------
 1 | from abc import ABCMeta, abstractmethod
 2 | from pathlib import Path
 3 | import librosa
 4 | from collections import OrderedDict
 5 | 
 6 | METHODS = OrderedDict()
 7 | 
 8 | 
 9 | class Multipitch(object):
10 |     __metaclass__ = ABCMeta
11 | 
12 |     def __init_subclass__(cls, **kwargs):
13 |         super().__init_subclass__(**kwargs)
14 |         method_num = cls.method_number()
15 |         if method_num in METHODS.keys():
16 |             raise ValueError(
17 |                 "Method number {0} already registered as {1} in {2}".format(
18 |                     method_num, METHODS[method_num], METHODS
19 |                 )
20 |             )
21 |         METHODS[cls.method_number()] = cls
22 | 
23 |     @abstractmethod
24 |     def __init__(self, audio_path):
25 |         x, self.fs = librosa.load(audio_path)
26 |         if len(x.shape) == 2 and x.shape[0] == 2:
27 |             self.x = x[0::2] / 2.0 + x[1::2] / 2.0
28 |         else:
29 |             self.x = x
30 |         self.clip_name = Path(audio_path).name
31 | 
32 |     @abstractmethod
33 |     def compute_pitches(self):
34 |         pass
35 | 
36 |     @staticmethod
37 |     @abstractmethod
38 |     def display_name():
39 |         raise ValueError("unimplemented")
40 | 
41 |     @staticmethod
42 |     @abstractmethod
43 |     def method_number():
44 |         raise ValueError("unimplemented")
45 | 


--------------------------------------------------------------------------------
/tests/test_key_detection.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | import numpy
 3 | import os
 4 | from chord_detection import detect_key
 5 | import soundfile
 6 | from tempfile import TemporaryDirectory
 7 | 
 8 | 
 9 | TESTCASES = {
10 |     "Cmaj": numpy.asarray(
11 |         [
12 |             100.0,  # C-E-G, c major pitches
13 |             0.0,
14 |             0.0,
15 |             0.0,
16 |             100.0,  # E
17 |             0.0,
18 |             0.0,
19 |             100.0,  # G
20 |             0.0,
21 |             0.0,
22 |             0.0,
23 |             0.0,
24 |         ]
25 |     ),
26 |     "Cmin": numpy.asarray(
27 |         [
28 |             50.0,  # C-D-Eb-G, c minor pitches
29 |             0.0,
30 |             50.0,  # D
31 |             50.0,  # D#/Eb
32 |             0.0,
33 |             0.0,
34 |             0.0,
35 |             10.0,  # G
36 |             0.0,
37 |             0.0,
38 |             0.0,
39 |             0.0,
40 |         ]
41 |     ),
42 |     "G#maj": numpy.asarray(
43 |         [
44 |             0.0,
45 |             10.0,  # C#
46 |             0.0,
47 |             10.0,  # D#
48 |             0.0,
49 |             0.0,
50 |             0.0,
51 |             0.0,
52 |             10.0,  # G#
53 |             0.0,
54 |             10.0,  # A#
55 |             0.0,
56 |         ]
57 |     ),
58 | }
59 | 
60 | 
61 | class TestKeyDetection(unittest.TestCase):
62 |     def test_krumhansl_schmuckler_key_detection(self):
63 |         for expected_key, X in TESTCASES.items():
64 |             self.assertEqual(detect_key(X), expected_key)
65 | 


--------------------------------------------------------------------------------
/tests/gen_test_clips.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | 
 3 | import unittest
 4 | import numpy
 5 | import os
 6 | import librosa
 7 | import soundfile
 8 | import sys
 9 | from tempfile import TemporaryDirectory
10 | 
11 | 
12 | def main():
13 |     dest = "tests/test_1_note_Csharp3.wav"
14 |     tone = librosa.tone(138.59, sr=22050, length=44100)
15 |     soundfile.write(dest, tone, 22050)
16 |     print("Created {0} with note C#3".format(dest))
17 | 
18 |     dest = "tests/test_1_note_E4.wav"
19 |     tone = librosa.tone(329.63, sr=22050, length=44100)
20 |     soundfile.write(dest, tone, 22050)
21 |     print("Created {0} with note E4".format(dest))
22 | 
23 |     dest = "tests/test_2_notes_E2_F3.wav"
24 |     tone = numpy.zeros(44100)
25 |     tone += librosa.tone(82.41, sr=22050, length=44100)
26 |     tone += librosa.tone(174.61, sr=22050, length=44100)
27 |     soundfile.write(dest, tone, 22050)
28 |     print("Created {0} with notes E2, F3".format(dest))
29 | 
30 |     dest = "tests/test_2_notes_G3_Asharp4.wav"
31 |     tone = numpy.zeros(44100)
32 |     tone += librosa.tone(196, sr=22050, length=44100)
33 |     tone += librosa.tone(466.16, sr=22050, length=44100)
34 |     soundfile.write(dest, tone, 22050)
35 |     print("Created {0} with notes G3, A#4".format(dest))
36 | 
37 |     dest = "tests/test_3_notes_G2_B2_G#3.wav"
38 |     tone = numpy.zeros(44100)
39 |     tone += librosa.tone(98, sr=22050, length=44100)
40 |     tone += librosa.tone(123.47, sr=22050, length=44100)
41 |     tone += librosa.tone(207.65, sr=22050, length=44100)
42 |     soundfile.write(dest, tone, 22050)
43 |     print("Created {0} with notes G2, B2, G#3".format(dest))
44 | 
45 |     return 0
46 | 
47 | 
48 | if __name__ == "__main__":
49 |     sys.exit(main())
50 | 


--------------------------------------------------------------------------------
/chord_detection/chord_detect.py:
--------------------------------------------------------------------------------
 1 | from chord_detection.esacf import MultipitchESACF
 2 | from chord_detection.harmonic_energy import MultipitchHarmonicEnergy
 3 | from chord_detection.iterative_f0 import MultipitchIterativeF0
 4 | from chord_detection.prime_multif0 import MultipitchPrimeMultiF0
 5 | from chord_detection.multipitch import METHODS
 6 | 
 7 | import sys
 8 | import argparse
 9 | 
10 | 
11 | def main_cli():
12 |     method_nums = [k for k in METHODS.keys()]
13 |     method_nums_help_string = "-1 = all, "
14 |     for k in METHODS.keys():
15 |         method_nums_help_string += "{0} ({1}), ".format(k, METHODS[k].display_name())
16 | 
17 |     method_nums_help_string = method_nums_help_string[:-2]  # strip trailing ', '
18 | 
19 |     parser = argparse.ArgumentParser(
20 |         prog="chord-detection",
21 |         description="Collection of chord-detection techniques",
22 |         formatter_class=argparse.RawDescriptionHelpFormatter,
23 |     )
24 | 
25 |     parser.add_argument(
26 |         "--key",
27 |         action="store_true",
28 |         help="estimate the key using the Krumhansl-Schmuckler key-finding algorithm",
29 |     )
30 |     parser.add_argument(
31 |         "--displayplots",
32 |         type=int,
33 |         help="display intermediate plots at specified frame with matplotlib",
34 |         default=-1,
35 |     )
36 |     parser.add_argument(
37 |         "--method",
38 |         type=int,
39 |         help=method_nums_help_string,
40 |         default=next(iter(METHODS.keys())),
41 |     )
42 |     parser.add_argument("input_path", help="Path to WAV audio clip")
43 |     args = parser.parse_args()
44 | 
45 |     compute_objs = []
46 | 
47 |     if args.method == -1:
48 |         for v in METHODS.values():
49 |             compute_objs.append(v(args.input_path))
50 |     else:
51 |         try:
52 |             compute_objs.append(METHODS[args.method](args.input_path))
53 |         except KeyError:
54 |             raise ValueError("valid methods: {0}".format(method_nums_help_string))
55 | 
56 |     for compute_obj in compute_objs:
57 |         print(
58 |             "{0} - {1}".format(compute_obj.method_number(), compute_obj.display_name())
59 |         )
60 |         chromagram = compute_obj.compute_pitches(args.displayplots)
61 |         print(chromagram)
62 |         if args.key:
63 |             print(chromagram.key())
64 | 
65 | 
66 | if __name__ == "__main__":
67 |     main_cli()
68 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # chord-detection
 2 | 
 3 | This repository is an MIT-licensed collection of multipitch/polyphonic instrument chord and key detection methods, implemented from academic papers using Python.
 4 | 
 5 | ## Usage
 6 | 
 7 | Pip install it from the git url: 
 8 | ```
 9 | $ pip install git+https://github.com/sevagh/chord-detection.git
10 | ```
11 | 
12 | This installs the library `chord_detection` and the command-line tool `chord-detect`:
13 | ```
14 | $ chord-detect
15 | usage: chord-detection [-h] [--key] [--displayplots DISPLAYPLOTS] [--method METHOD] input_path
16 | chord-detection: error: the following arguments are required: input_path
17 | ```
18 | 
19 | Run tests:
20 | ```
21 | $ python ./tests/gen_test_clips.py
22 | $ python -m unittest
23 | ```
24 | 
25 | ## Methods
26 | 
27 | The results displayed come from a wav clip of a piano C major chord. The outputs are the summed chromagrams across all the frames, normalized to 9 (the biggest single-digit number), in a 12-digit chroma vector string.
28 | 
29 | The expected notes of a C major chord are C E G, so the expected answer should resemble `C000E00G0000`, where `C > E, C > G`. The values of C, E, G are specific to the method being used, but it's some representation of the energy of the detected pitch, e.g. spectral energy, summed FFT.
30 | 
31 | ### ESACF (Tolonen, Karjalainen)
32 | 
33 | ```
34 | reference: C000E00G0000
35 | computed:  900003001000, key: Cmaj
36 | ```
37 | 
38 | ![esacf](.github/piano_c_1.png)
39 | 
40 | _T. Tolonen and M. Karjalainen, "A computationally efficient multipitch analysis model," in IEEE Transactions on Speech and Audio Processing, vol. 8, no. 6, pp. 708-716, Nov. 2000._
41 | 
42 | _V. Zenz and A. Rauber, "Automatic Chord Detection Incorporating Beat and Key Detection," 2007 IEEE International Conference on Signal Processing and Communications, Dubai, 2007, pp. 1175-1178._
43 | 
44 | ### Harmonic Energy (Stark, Plumbley)
45 | 
46 | ```
47 | reference: C000E00G0000
48 | computed:  921111111111, key: Cmin
49 | ```
50 | 
51 | ![harmeng](.github/piano_c_2.png)
52 | 
53 | _M Stark, Adam and Plumbley, Mark., "Real-Time Chord Recognition for Live Performance," in Proceedings of the 2009 International Computer Music Conference (ICMC 2009), Montreal, Canada, 16-21 August 2009._
54 | 
55 | ### Iterative F0 (Klapuri, Anssi)
56 | 
57 | ```
58 | reference: C000E00G0000
59 | computed:  900301000000, key: Cmin
60 | ```
61 | 
62 | ![iterativef0](.github/piano_c_3.png)
63 | 
64 | _Klapuri, Anssi, "Multipitch Analysis of Polyphonic Music and Speech Signals Using an Auditory Model," IEEE TRANSACTIONS ON AUDIO, SPEECH, AND LANGUAGE PROCESSING, VOL. 16, NO. 2, FEBRUARY 2008 255._
65 | 
66 | _Klapuri, Anssi. "Multiple Fundamental Frequency Estimation by Summing Harmonic Amplitudes." ISMIR (2006)._
67 | 
68 | ### Prime-multiF0 (Camacho, Kaver-Oreamuno)
69 | 
70 | ```
71 | reference: C000E00G0000
72 | computed:  951000000002, key: Cmin
73 | ```
74 | 
75 | ![primemultif0](.github/piano_c_4.png)
76 | 
77 | _Camacho, A, Oreamuno, I, "A multipitch estimation algorithm based on fundamental frequencies and prime harmonics," Sound and Music Computing Conference 2013._
78 | 


--------------------------------------------------------------------------------
/chord_detection/chromagram.py:
--------------------------------------------------------------------------------
  1 | from collections import OrderedDict
  2 | from collections.abc import Sequence
  3 | import math
  4 | import numpy
  5 | import scipy
  6 | 
  7 | 
  8 | _note_names = ["C", "C#", "D", "D#", "E", "F", "F#", "G", "G#", "A", "A#", "B"]
  9 | 
 10 | 
 11 | class Chromagram(Sequence):
 12 |     def __init__(self):
 13 |         self.c = OrderedDict()
 14 |         for n in _note_names:
 15 |             self.c[n] = 0.0
 16 |         self.p = None
 17 |         super().__init__()
 18 | 
 19 |     def __getitem__(self, i):
 20 |         if type(i) == str:
 21 |             i = i.replace('♯', '#') # librosa-isms...
 22 |             return self.c[i]
 23 |         elif type(i) == int:
 24 |             return self.c[_note_names[i]]
 25 |         else:
 26 |             raise ValueError("this shouldn't happen")
 27 | 
 28 |     def __setitem__(self, i, item):
 29 |         if type(i) == str:
 30 |             self.c[i] = item
 31 |         elif type(i) == int:
 32 |             self.c[_note_names[i]] = item
 33 |         else:
 34 |             raise ValueError("this shouldn't happen")
 35 | 
 36 |     def __len__(self):
 37 |         return len(self.c)
 38 | 
 39 |     def __repr__(self):
 40 |         return self._pack()
 41 | 
 42 |     def __add__(self, other):
 43 |         for k in self.c.keys():
 44 |             self.c[k] += other.c[k]
 45 |         return self
 46 | 
 47 |     def key(self):
 48 |         return detect_key(numpy.asarray([v for v in self.c.values()]))
 49 | 
 50 |     def _pack(self):
 51 |         nc = _normalize(self.c)
 52 | 
 53 |         pack = [0 for _ in range(12)]
 54 | 
 55 |         for i, v in enumerate(nc.values()):
 56 |             pack[i] = int(round(v))
 57 | 
 58 |         return "".join([str(p_) for p_ in pack])
 59 | 
 60 | 
 61 | def _normalize(c: OrderedDict):
 62 |     c_ = c.copy()
 63 | 
 64 |     chromagram_min = min(c_.values())
 65 |     if chromagram_min != 0.0:
 66 |         for k in c_.keys():
 67 |             c_[k] = round(c_[k] / chromagram_min, 3)
 68 | 
 69 |     chromagram_max = max(c_.values())
 70 |     if chromagram_max > 9.0:
 71 |         for k in c_.keys():
 72 |             c_[k] *= 9.0 / chromagram_max
 73 | 
 74 |     return c_
 75 | 
 76 | 
 77 | """
 78 | attribution:
 79 |     https://gist.github.com/bmcfee/1f66825cef2eb34c839b42dddbad49fd
 80 |     https://github.com/bmcfee
 81 | """
 82 | 
 83 | 
 84 | def detect_key(X):
 85 |     if X.shape[0] != 12:
 86 |         raise ValueError(
 87 |             "input must be a chroma vector i.e. a numpy ndarray of shape (12,)"
 88 |         )
 89 |     # key_names = "A", "A#", "B", "C", "C#", "D", "D#", "E", "F", "F#", "G", "G#"
 90 |     X = scipy.stats.zscore(X)
 91 | 
 92 |     # Coefficients from Kumhansl and Schmuckler
 93 |     # as reported here: http://rnhart.net/articles/key-finding/
 94 |     major = numpy.asarray(
 95 |         [6.35, 2.23, 3.48, 2.33, 4.38, 4.09, 2.52, 5.19, 2.39, 3.66, 2.29, 2.88]
 96 |     )
 97 |     major = scipy.stats.zscore(major)
 98 | 
 99 |     minor = numpy.asarray(
100 |         [6.33, 2.68, 3.52, 5.38, 2.60, 3.53, 2.54, 4.75, 3.98, 2.69, 3.34, 3.17]
101 |     )
102 |     minor = scipy.stats.zscore(minor)
103 | 
104 |     # Generate all rotations of major
105 |     major = scipy.linalg.circulant(major)
106 |     minor = scipy.linalg.circulant(minor)
107 | 
108 |     major = major.T.dot(X)
109 |     minor = minor.T.dot(X)
110 | 
111 |     major_winner = int(numpy.argmax(major) + 0.5)
112 |     minor_winner = int(numpy.argmax(minor) + 0.5)
113 |     # essentia adds a 0.5? why
114 |     # https://github.com/MTG/essentia/blob/master/src/algorithms/tonal/key.cpp#L370
115 | 
116 |     if major[major_winner] > minor[minor_winner]:
117 |         return "{0}maj".format(_note_names[major_winner])
118 |     elif major[major_winner] < minor[minor_winner]:
119 |         return "{0}min".format(_note_names[minor_winner])
120 |     else:
121 |         if major_winner == minor_winner:
122 |             return "{0}majmin".format(_note_names[major_winner])
123 |         else:
124 |             return "{0}maj OR {1}min".format(
125 |                 _note_names[major_winner], _note_names[minor_winner]
126 |             )
127 | 


--------------------------------------------------------------------------------
/chord_detection/harmonic_energy.py:
--------------------------------------------------------------------------------
  1 | import numpy
  2 | import random
  3 | import scipy
  4 | import scipy.signal
  5 | import librosa
  6 | import matplotlib.pyplot as plt
  7 | from chord_detection.multipitch import Multipitch
  8 | from chord_detection.chromagram import Chromagram
  9 | from chord_detection.dsp.frame import frame_cutter
 10 | from collections import OrderedDict
 11 | 
 12 | 
 13 | class MultipitchHarmonicEnergy(Multipitch):
 14 |     def __init__(
 15 |         self, audio_path, frame_size=8192, num_harmonic=2, num_octave=2, num_bins=2
 16 |     ):
 17 |         super().__init__(audio_path)
 18 |         self.frame_size = frame_size
 19 |         self.num_harmonic = num_harmonic
 20 |         self.num_octave = num_octave
 21 |         self.num_bins = num_bins
 22 | 
 23 |     @staticmethod
 24 |     def display_name():
 25 |         return "Harmonic Energy (Stark, Plumbley)"
 26 | 
 27 |     @staticmethod
 28 |     def method_number():
 29 |         return 2
 30 | 
 31 |     def compute_pitches(self, display_plot_frame=-1):
 32 |         # first C = C3
 33 |         notes = librosa.cqt_frequencies(12, fmin=librosa.note_to_hz('C3'))
 34 | 
 35 |         divisor_ratio = (self.fs / 4.0) / self.frame_size
 36 |         self.dft_maxes = []
 37 | 
 38 |         overall_chromagram = Chromagram()
 39 | 
 40 |         for frame, x in enumerate(frame_cutter(self.x, self.frame_size)):
 41 |             chromagram = Chromagram()
 42 |             x = x * scipy.signal.hamming(self.frame_size)
 43 |             x_dft = numpy.sqrt(numpy.absolute(numpy.fft.rfft(x)))
 44 |             for n in range(12):
 45 |                 chroma_sum = 0.0
 46 |                 for octave in range(1, self.num_octave + 1):
 47 |                     note_sum = 0.0
 48 |                     for harmonic in range(1, self.num_harmonic + 1):
 49 |                         x_dft_max = float("-inf")  # sentinel
 50 | 
 51 |                         k_prime = numpy.round(
 52 |                             (notes[n] * octave * harmonic) / divisor_ratio
 53 |                         )
 54 |                         k0 = int(k_prime - self.num_bins * harmonic)
 55 |                         k1 = int(k_prime + self.num_bins * harmonic)
 56 | 
 57 |                         best_ind = None
 58 |                         for k in range(k0, k1):
 59 |                             curr_ = x_dft[k]
 60 |                             if curr_ > x_dft_max:
 61 |                                 x_dft_max = curr_
 62 |                                 best_ind = k
 63 | 
 64 |                         note_sum += x_dft_max * (1.0 / harmonic)
 65 |                         self.dft_maxes.append((k0, best_ind, k1))
 66 |                     chroma_sum += note_sum
 67 |                 chromagram[n] += chroma_sum
 68 | 
 69 |             overall_chromagram += chromagram
 70 | 
 71 |             if frame == display_plot_frame:
 72 |                 _display_plots(self.clip_name, self.fs, self.frame_size, x_dft, self.x, x, self.dft_maxes)
 73 |         return overall_chromagram
 74 | 
 75 | def _display_plots(clip_name, fs, frame_size, x_dft, x, x_frame, dft_maxes):
 76 |     pltlen = frame_size
 77 |     samples = numpy.arange(pltlen)
 78 |     dftlen = int(x_dft.shape[0] / 2)
 79 |     dft_samples = numpy.arange(dftlen)
 80 | 
 81 |     fig1, (ax1, ax2) = plt.subplots(2, 1)
 82 | 
 83 |     ax1.set_title("x[n] - {0}".format(clip_name))
 84 |     ax1.set_xlabel("n (samples)")
 85 |     ax1.set_ylabel("amplitude")
 86 |     ax1.plot(samples, x[:pltlen], "b", alpha=0.3, linestyle=":", label="x[n]")
 87 |     ax1.plot(
 88 |         samples,
 89 |         x_frame[:pltlen],
 90 |         "r",
 91 |         alpha=0.4,
 92 |         linestyle="--",
 93 |         label="x[n], frame + ham",
 94 |     )
 95 |     ax1.grid()
 96 |     ax1.legend(loc="upper right")
 97 | 
 98 |     ax2.set_title("X (DFT)")
 99 |     ax2.set_xlabel("fft bin")
100 |     ax2.set_ylabel("magnitude")
101 |     ax2.plot(dft_samples, x_dft[:dftlen], "b", alpha=0.5, label="X(n)")
102 |     for i, dft_max in enumerate(dft_maxes):
103 |         left, mid, right = dft_max
104 |         ax2.plot(left, x_dft[:dftlen][left], "rx")
105 |         ax2.plot(mid, x_dft[:dftlen][mid], "go")
106 |         ax2.plot(right, x_dft[:dftlen][right], color="purple", marker="x")
107 |         pitch = fs / mid
108 |         note = librosa.hz_to_note(pitch, octave=False)
109 |         pitch = round(pitch, 2)
110 | 
111 |         if (i % 17) == 0:
112 |             # displaying too many of these clutters the graph
113 |             ax2.text(
114 |                 mid, 1.2 * x_dft[:dftlen][mid], "{0}\n{1}".format(pitch, note)
115 |             )
116 | 
117 |     ax2.grid()
118 |     ax2.legend(loc="upper right")
119 | 
120 |     plt.show()
121 | 


--------------------------------------------------------------------------------
/chord_detection/prime_multif0.py:
--------------------------------------------------------------------------------
  1 | import numpy
  2 | import math
  3 | import random
  4 | import scipy
  5 | import scipy.signal
  6 | import librosa
  7 | import typing
  8 | import peakutils
  9 | from matplotlib import mlab
 10 | import matplotlib.pyplot as plt
 11 | from chord_detection.multipitch import Multipitch
 12 | from chord_detection.chromagram import Chromagram
 13 | from chord_detection.dsp.wfir import wfir
 14 | from chord_detection.dsp.frame import frame_cutter
 15 | from collections import OrderedDict
 16 | 
 17 | 
 18 | class MultipitchPrimeMultiF0(Multipitch):
 19 |     def __init__(
 20 |         self,
 21 |         audio_path,
 22 |         num_harmonic=1,
 23 |         num_octave=2,
 24 |         harmonic_multiples_elim=5,
 25 |         harmonic_elim_runs=2,
 26 |     ):
 27 |         super().__init__(audio_path)
 28 |         self.num_harmonic = num_harmonic
 29 |         self.num_octave = num_octave
 30 |         self.harmonic_elim_runs = harmonic_elim_runs
 31 |         self.harmonic_multiples_elim = harmonic_multiples_elim
 32 | 
 33 |     @staticmethod
 34 |     def display_name():
 35 |         return "Prime-multiF0 (Camacho, Kaver-Oreamuno)"
 36 | 
 37 |     @staticmethod
 38 |     def method_number():
 39 |         return 4
 40 | 
 41 |     def compute_pitches(self, display_plot_frame=-1):
 42 |         overall_chromagram = Chromagram()
 43 | 
 44 |         # first C = C3
 45 |         notes = librosa.cqt_frequencies(12, fmin=librosa.note_to_hz('C3'))
 46 | 
 47 |         self.specgram_to_plot = []
 48 | 
 49 |         for n in range(12):
 50 |             for octave in range(1, self.num_octave + 1):
 51 |                 for harmonic in range(1, self.num_harmonic + 1):
 52 |                     f_candidate = notes[n] * octave * harmonic
 53 |                     window_size = int((8 / f_candidate) * self.fs)
 54 | 
 55 |                     chromagram = Chromagram()
 56 |                     for frame, x_t in enumerate(frame_cutter(self.x, window_size)):
 57 |                         real_window_size = max(x_t.shape[0], window_size)
 58 |                         window = numpy.hanning(real_window_size)
 59 |                         s, f = mlab.magnitude_spectrum(x_t, Fs=self.fs, window=window)
 60 |                         s = s[:int(s.shape[0]/2)]
 61 |                         f = f[:int(f.shape[0]/2)]
 62 |                         s[s < 0] = 0.0  # clip
 63 |                         might_append_1 = s.copy()
 64 |                         might_append_2 = []
 65 | 
 66 |                         for _ in range(self.harmonic_elim_runs):
 67 |                             max_freq_idx = s.argmax(axis=0)
 68 |                             max_f = f[max_freq_idx]
 69 |                             try:
 70 |                                 note = librosa.hz_to_note(max_f, octave=False)
 71 |                                 chromagram[note] += s[max_freq_idx]
 72 |                                 might_append_2.append((max_freq_idx, max_f, note))
 73 |                             except (ValueError, OverflowError):
 74 |                                 continue
 75 |                             eliminated = []
 76 |                             for harmonic_index_multiple in range(
 77 |                                 1, self.harmonic_multiples_elim
 78 |                             ):
 79 |                                 elim_freq = harmonic_index_multiple * max_f
 80 |                                 elim_index = numpy.where(f == elim_freq)
 81 |                                 s[elim_index] -= s[elim_index]
 82 |                         might_append_3 = s.copy()
 83 | 
 84 |                         if frame == display_plot_frame:
 85 |                             # plot once and stop
 86 |                             display_plot_frame = -1
 87 |                             _display_plots(self.clip_name, self.x, ((might_append_1, might_append_2, might_append_3)))
 88 | 
 89 |                     overall_chromagram += chromagram
 90 | 
 91 |         return overall_chromagram
 92 | 
 93 | def _display_plots(clip_name, x, specgram_to_plot):
 94 |     fig1, (ax1, ax2) = plt.subplots(2, 1)
 95 | 
 96 |     ax1.set_title("x[n] - {0}".format(clip_name))
 97 |     ax1.set_xlabel("n (samples)")
 98 |     ax1.set_ylabel("amplitude")
 99 |     ax1.plot(numpy.arange(x.shape[0]), x, "b", alpha=0.5, label="x[n]")
100 |     ax1.grid()
101 |     ax1.legend(loc="upper right")
102 | 
103 |     (s, notes, s_post) = specgram_to_plot
104 | 
105 |     ax2.set_title("S (specgram)".format(clip_name))
106 |     ax2.set_xlabel("frequency bins")
107 |     ax2.set_ylabel("magnitude")
108 |     ax2.plot(numpy.arange(s.shape[0]), s, "b", alpha=0.5, label="S")
109 |     for (freq_idx, freq, note) in notes:
110 |         ax2.plot(freq_idx, s[freq_idx], "ro")
111 |         ax2.text(freq_idx, s[freq_idx], "{0}, {1}".format(round(freq, 2), note))
112 |     ax2.plot(
113 |         numpy.arange(s_post.shape[0]),
114 |         s_post,
115 |         "g",
116 |         alpha=0.5,
117 |         label="S' (f0 candidates eliminated)",
118 |     )
119 |     ax2.grid()
120 |     ax2.legend(loc="upper right")
121 | 
122 |     plt.show()
123 | 


--------------------------------------------------------------------------------
/chord_detection/periodicity.py:
--------------------------------------------------------------------------------
  1 | from chord_detection.chromagram import Chromagram
  2 | import numpy
  3 | import math
  4 | import librosa
  5 | 
  6 | 
  7 | _HAMMINGWINDOWNORM = [0.0011244659258033, 0.11559343551383, 0.42817348241183, 0.81822361914331, 1.0, 0.81822361914331, 0.42817348241183, 0.11559343551383, 0.0011244659258033]
  8 | 
  9 | 
 10 | '''
 11 | borrowed heavily from https://github.com/BansMarbol/PolyPitch
 12 | '''
 13 | 
 14 | class IterativeF0PeriodicityAnalysis():
 15 |     def __init__(
 16 |             self,
 17 |             fs: float,
 18 |             window_size: int,
 19 |             max_voices=4,
 20 |             tau_min=1.0/2100.0,
 21 |             tau_max=1.0/40.0,
 22 |             tau_prec=0.0000001,
 23 |             Q=20,
 24 |             M=20,
 25 |             epsilon1=20,
 26 |             epsilon2=320,
 27 |             gamma=0.66,
 28 |         ):
 29 |         self.fs = fs
 30 |         self.window_size = window_size
 31 |         self.K = window_size/fs
 32 |         self.max_voices = max_voices
 33 |         self.tau_min = tau_min
 34 |         self.tau_max = tau_max
 35 |         self.tau_prec = tau_prec
 36 |         self.Q = Q
 37 |         self.M = M
 38 |         self.epsilon1 = epsilon1
 39 |         self.epsilon2 = epsilon2
 40 |         self.gamma = gamma # polyphony estimate
 41 | 
 42 |         self.voicesaliences = numpy.zeros(self.max_voices)
 43 |         self.voiceperiods = numpy.zeros(self.max_voices)
 44 |         self.smax = numpy.zeros(self.Q)
 45 |         self.tau_low = numpy.zeros(self.Q)
 46 |         self.tau_up = numpy.zeros(self.Q)
 47 | 
 48 |     def compute(self, Uk: numpy.ndarray) -> Chromagram:
 49 |         num_voices_detected = 0
 50 |         cancellation_weight = 1.0
 51 | 
 52 |         Ud = numpy.zeros(Uk.shape[0])
 53 |         Ur = numpy.array(Uk)
 54 | 
 55 |         # clear the arrays from the last run
 56 |         self.voicesaliences[:] = 0.0
 57 |         self.voiceperiods[:] = 0.0
 58 | 
 59 |         prevmixturescore = 0.0
 60 |         mixturescore = 0.0
 61 | 
 62 |         keepgoing = True
 63 | 
 64 |         while keepgoing:
 65 |             winningtau, bestsalience = self.min_search(Ur)
 66 |             self.voicesaliences[num_voices_detected] = bestsalience
 67 |             self.voiceperiods[num_voices_detected] = winningtau
 68 | 
 69 |             num_voices_detected += 1
 70 |             mixturescore += bestsalience
 71 | 
 72 |             testquantity = mixturescore/(math.pow(num_voices_detected, self.gamma))
 73 | 
 74 |             if num_voices_detected >= self.max_voices or testquantity <= prevmixturescore:
 75 |                 keepgoing = False
 76 |             else:
 77 |                 prevmixturescore = testquantity
 78 |                 tau = winningtau
 79 |                 topm = int(tau*(self.fs/self.window_size)*Uk.shape[0])
 80 | 
 81 |                 srovertau = self.fs/tau
 82 |                 weight = srovertau + self.epsilon1
 83 |                 for m in range(1, topm):
 84 |                     partialK = m*self.K/tau + 0.5
 85 |                     if partialK <= Uk.shape[0]:
 86 |                         Urweight = Ur[int(partialK)]
 87 |                         Urweight *= weight/(m*srovertau + self.epsilon2)
 88 | 
 89 |                         lowk = max(int(partialK-4), 0)
 90 |                         highk = min(int(partialK+4), Uk.shape[0])
 91 | 
 92 |                         for j in range(lowk, highk+1):
 93 |                             hammingindexnow = int(j - partialK + 4)
 94 |                             val = _HAMMINGWINDOWNORM[hammingindexnow] * Urweight
 95 |                             Ud[j] += val
 96 | 
 97 |                 for i in range(Uk.shape[0]):
 98 |                     diff = Uk[i] - cancellation_weight*Ud[i]
 99 |                     Ur[i] = max(diff, 0)
100 | 
101 |         if num_voices_detected > 0:
102 |             num_voices_detected -= 1
103 | 
104 |         c = Chromagram()
105 |         for i in range(self.voiceperiods.shape[0]):
106 |             try:
107 |                 note = librosa.hz_to_note(self.fs/self.voiceperiods[i], octave=False)
108 |                 c[note] += self.voicesaliences[i]
109 |             except OverflowError:
110 |                 continue
111 | 
112 |         return c, (self.voicesaliences.copy(), self.voiceperiods.copy())
113 | 
114 |     def min_search(self, Ur: numpy.ndarray) -> float:
115 |         q = 0
116 | 
117 |         self.tau_low[0] = self.tau_min
118 |         self.tau_up[0] = self.tau_max
119 | 
120 |         qbest = 0
121 | 
122 |         while (self.tau_up[qbest] - self.tau_low[qbest]) > self.tau_prec and q < self.Q-1:
123 |             q += 1
124 |             self.tau_low[q] = (self.tau_low[qbest] + self.tau_up[qbest])*0.5
125 |             self.tau_up[q] = self.tau_up[qbest]
126 |             self.tau_up[qbest] = self.tau_low[q]
127 | 
128 |             self.smax[q] = self.smax_fn(q, Ur)
129 |             self.smax[qbest] = self.smax_fn(qbest, Ur)
130 | 
131 |             whichq = 0
132 |             maxval = self.smax[0]
133 | 
134 |             for j in range(1, q+1):
135 |                 valnow = self.smax[j]
136 |                 if valnow > maxval:
137 |                     maxval = valnow
138 |                     whichq = j
139 |             qbest = whichq
140 | 
141 |         winningtau = (self.tau_low[qbest] + self.tau_up[qbest])*0.5
142 |         return winningtau, self.smax[qbest]
143 | 
144 |     def smax_fn(self, q: int, Ur: numpy.ndarray) -> float:
145 |         tau = 0.5*(self.tau_low[q] + self.tau_up[q])
146 |         deltatau = self.tau_up[q] - self.tau_low[q]
147 | 
148 |         salience = 0.0
149 |         srovertau = self.fs/self.tau_up[q]
150 | 
151 |         weight_numerator = self.fs/self.tau_low[q] + self.epsilon1
152 |         def weight_denominator(m: int):
153 |             return (m*self.fs/self.tau_up[q] + self.epsilon2)
154 | 
155 |         for m in range(1, self.M):
156 |             lowk = int(m*self.K/(tau+0.5*deltatau) + 0.5)
157 |             highk = int(m*self.K/(tau-0.5*deltatau) + 0.5)
158 | 
159 |             Umax = numpy.amax(Ur[lowk:highk+1])
160 |             salience += weight_denominator(m)*Umax
161 | 
162 |         salience *= weight_numerator
163 |         return salience
164 | 


--------------------------------------------------------------------------------
/chord_detection/esacf.py:
--------------------------------------------------------------------------------
  1 | import numpy
  2 | import scipy
  3 | import scipy.signal
  4 | import librosa
  5 | import typing
  6 | import peakutils
  7 | import matplotlib.pyplot as plt
  8 | from chord_detection.multipitch import Multipitch
  9 | from chord_detection.chromagram import Chromagram
 10 | from chord_detection.dsp.wfir import wfir
 11 | from chord_detection.dsp.frame import frame_cutter
 12 | from chord_detection.dsp.lowpass import lowpass_filter
 13 | from collections import OrderedDict
 14 | 
 15 | 
 16 | class MultipitchESACF(Multipitch):
 17 |     def __init__(
 18 |         self,
 19 |         audio_path,
 20 |         ham_ms=46.4,
 21 |         k=0.67,
 22 |         n_peaks_elim=6,
 23 |         peak_thresh=0.1,
 24 |         peak_min_dist=10,
 25 |     ):
 26 |         super().__init__(audio_path)
 27 |         self.ham_samples = int(self.fs * ham_ms / 1000.0)
 28 |         self.k = k
 29 |         self.n_peaks_elim = n_peaks_elim
 30 |         self.peak_thresh = peak_thresh
 31 |         self.peak_min_dist = peak_min_dist
 32 | 
 33 |     @staticmethod
 34 |     def display_name():
 35 |         return "ESACF (Tolonen, Karjalainen)"
 36 | 
 37 |     @staticmethod
 38 |     def method_number():
 39 |         return 1
 40 | 
 41 |     def compute_pitches(self, display_plot_frame=-1):
 42 |         overall_chromagram = Chromagram()
 43 | 
 44 |         for frame, x_frame in enumerate(frame_cutter(self.x, self.ham_samples)):
 45 |             x = wfir(x_frame, self.fs, 12)
 46 | 
 47 |             x_hi = _highpass_filter(x, self.fs)
 48 |             x_hi = numpy.clip(x_hi, 0, None)  # half-wave rectification
 49 |             x_hi = lowpass_filter(x_hi, self.fs, 1000)  # paper wants it
 50 | 
 51 |             x_lo = lowpass_filter(x, self.fs, 1000)
 52 | 
 53 |             x_sacf = _sacf([x_lo, x_hi])
 54 |             x_esacf, harmonic_elim_plots = _esacf(x_sacf, self.n_peaks_elim, True)
 55 | 
 56 |             peak_indices = peakutils.indexes(
 57 |                 x_esacf, thres=self.peak_thresh, min_dist=self.peak_min_dist
 58 |             )
 59 | 
 60 |             peak_indices_interp = peakutils.interpolate(
 61 |                 numpy.arange(x_esacf.shape[0]), x_esacf, ind=peak_indices
 62 |             )
 63 | 
 64 |             chromagram = Chromagram()
 65 |             for i, tau in enumerate(peak_indices_interp):
 66 |                 pitch = self.fs / tau
 67 |                 try:
 68 |                     note = librosa.hz_to_note(pitch, octave=False)
 69 |                     chromagram[note] += x_esacf[peak_indices[i]]
 70 |                 except ValueError:
 71 |                     continue
 72 |             overall_chromagram += chromagram
 73 | 
 74 |             if frame == display_plot_frame:
 75 |                 _display_plots(
 76 |                     self.clip_name,
 77 |                     self.fs,
 78 |                     self.ham_samples,
 79 |                     frame,
 80 |                     x,
 81 |                     x_lo,
 82 |                     x_hi,
 83 |                     x_sacf,
 84 |                     x_esacf,
 85 |                     harmonic_elim_plots,
 86 |                     peak_indices,
 87 |                     peak_indices_interp,
 88 |                 )
 89 | 
 90 |         return overall_chromagram
 91 | 
 92 | 
 93 | def _sacf(x_channels: typing.List[numpy.ndarray], k=None) -> numpy.ndarray:
 94 |     # k is same as p (power) in the Klapuri/Ansi paper, method 3
 95 |     if not k:
 96 |         k = 0.67
 97 | 
 98 |     shape = x_channels[0].shape[0]
 99 | 
100 |     running_sum = numpy.zeros(shape)
101 | 
102 |     for xc in x_channels:
103 |         running_sum += numpy.abs(numpy.fft.fft(xc)) ** k
104 | 
105 |     return numpy.real(numpy.fft.ifft(running_sum))[:int((shape-1)/2)]
106 | 
107 | 
108 | def _esacf(
109 |     x2: numpy.ndarray, n_peaks: int, ret_plots: bool
110 | ) -> typing.Tuple[numpy.ndarray, typing.List[numpy.ndarray]]:
111 |     """
112 |     enhance the SACF with the following procedure
113 |     clip to positive values, time stretch by n_peaks
114 |     subtract original
115 |     """
116 |     x2tmp = x2.copy()
117 |     to_plot = []
118 | 
119 |     for timescale in range(2, n_peaks + 1):
120 |         x2tmp = numpy.clip(x2tmp, 0, None)
121 |         x2stretched = librosa.effects.time_stretch(x2tmp, rate=timescale).copy()
122 | 
123 |         x2stretched.resize(x2tmp.shape)
124 |         if ret_plots:
125 |             to_plot.append(x2stretched)
126 |         x2tmp -= x2stretched
127 |         x2tmp = numpy.clip(x2tmp, 0, None)
128 | 
129 |     return x2tmp, to_plot
130 | 
131 | 
132 | def _highpass_filter(x: numpy.ndarray, fs: float) -> numpy.ndarray:
133 |     b, a = scipy.signal.butter(2, [1000 / (fs / 2)], btype="high")
134 |     return scipy.signal.lfilter(b, a, x)
135 | 
136 | 
137 | def _display_plots(
138 |     clip_name,
139 |     fs,
140 |     frame_size,
141 |     frame,
142 |     x,
143 |     x_lo,
144 |     x_hi,
145 |     x_sacf,
146 |     x_esacf,
147 |     harmonic_elim_plots,
148 |     peak_indices,
149 |     peak_indices_interp,
150 | ):
151 |     samples = numpy.arange(frame_size)
152 | 
153 |     fig1, (ax1, ax2) = plt.subplots(2, 1)
154 | 
155 |     ax1.set_title("{0} - x[n], frame {1}".format(clip_name, frame))
156 |     ax1.set_xlabel("n (samples)")
157 |     ax1.set_ylabel("amplitude")
158 |     ax1.plot(samples, x, "b", alpha=0.5, label="x[n]")
159 |     ax1.plot(samples, x_lo, "g", alpha=0.5, linestyle="--", label="x[n] lo")
160 |     ax1.plot(samples, x_hi, "r", alpha=0.5, linestyle=":", label="x[n] hi")
161 |     ax1.grid()
162 |     ax1.legend(loc="upper right")
163 | 
164 |     ax2.set_title("SACF, ESACF")
165 |     ax2.set_xlabel("n (samples)")
166 |     ax2.set_ylabel("normalized amplitude")
167 | 
168 |     i = 0
169 |     for i, h in enumerate(harmonic_elim_plots):
170 |         h_norm = h / numpy.max(h)
171 |         ax2.plot(
172 |             samples,
173 |             numpy.concatenate((h_norm, numpy.zeros(samples.shape[0] - h.shape[0]))),
174 |             "C{0}".format(i),
175 |             alpha=0.1,
176 |             label="time stretch {0}".format(2 + i),
177 |         )
178 |     i += 1
179 |     sacf_norm = x_sacf / numpy.max(x_sacf)
180 |     ax2.plot(
181 |         samples,
182 |         numpy.concatenate(
183 |             (sacf_norm, numpy.zeros(samples.shape[0] - sacf_norm.shape[0]))
184 |         ),
185 |         "C{0}".format(i),
186 |         linestyle="--",
187 |         alpha=0.5,
188 |         label="sacf",
189 |     )
190 |     esacf_norm = x_esacf / numpy.max(x_esacf)
191 |     i += 1
192 |     ax2.plot(
193 |         samples,
194 |         numpy.concatenate(
195 |             (esacf_norm, numpy.zeros(samples.shape[0] - sacf_norm.shape[0]))
196 |         ),
197 |         "C{0}".format(i),
198 |         linestyle=":",
199 |         alpha=0.5,
200 |         label="esacf",
201 |     )
202 |     scatter_peaks = esacf_norm[peak_indices]
203 |     for i, ind in enumerate(peak_indices_interp):
204 |         pitch = round(fs / ind, 2)
205 |         text = "{0}, {1}".format(pitch, librosa.hz_to_note(pitch, octave=False))
206 |         x = peak_indices_interp[i]
207 |         y = scatter_peaks[i]
208 |         ax2.plot(x, y, "rx")
209 |         ax2.text(x, y, text)
210 | 
211 |     ax2.grid()
212 |     ax2.legend(loc="upper right")
213 | 
214 |     plt.show()
215 | 


--------------------------------------------------------------------------------
/chord_detection/iterative_f0.py:
--------------------------------------------------------------------------------
  1 | import numpy
  2 | import math
  3 | import random
  4 | import typing
  5 | import scipy
  6 | import scipy.signal
  7 | import scipy.fftpack
  8 | import peakutils
  9 | import librosa
 10 | import matplotlib.pyplot as plt
 11 | from numba import njit, jit
 12 | from chord_detection.multipitch import Multipitch
 13 | from chord_detection.chromagram import Chromagram
 14 | from chord_detection.dsp.wfir import wfir
 15 | from chord_detection.dsp.frame import frame_cutter
 16 | from chord_detection.dsp.lowpass import lowpass_filter
 17 | from chord_detection.periodicity import IterativeF0PeriodicityAnalysis
 18 | from collections import OrderedDict
 19 | 
 20 | 
 21 | class MultipitchIterativeF0(Multipitch):
 22 |     def __init__(
 23 |         self,
 24 |         audio_path,
 25 |         frame_size=8192,
 26 |         power=1.0,
 27 |         channels=70,
 28 |         zeta0=2.3,
 29 |         zeta1=0.39,
 30 |         peak_thresh=0.5,
 31 |         peak_min_dist=10,
 32 |         harmonic_multiples_elim=5,
 33 |     ):
 34 |         super().__init__(audio_path)
 35 |         self.frame_size = frame_size
 36 |         self.num_frames = math.ceil(self.x.shape[0] / self.frame_size)
 37 |         self.power = power
 38 |         self.channels = [
 39 |             229 * (10 ** ((zeta1 * c + zeta0) / 21.4) - 1) for c in range(channels)
 40 |         ]
 41 |         self.peak_thresh = peak_thresh
 42 |         self.peak_min_dist = peak_min_dist
 43 |         self.harmonic_multiples_elim = harmonic_multiples_elim
 44 |         self.periodicity_estimator = IterativeF0PeriodicityAnalysis(self.fs, self.frame_size)
 45 | 
 46 |     @staticmethod
 47 |     def display_name():
 48 |         return "Iterative F0 (Klapuri, Anssi)"
 49 | 
 50 |     @staticmethod
 51 |     def method_number():
 52 |         return 3
 53 | 
 54 |     def compute_pitches(self, display_plot_frame=-1):
 55 |         ycn = [None for _ in range(len(self.channels))]
 56 |         
 57 |         for i, fc in enumerate(self.channels):
 58 |             yc = _auditory_filterbank(self.x, self.fs, fc)
 59 |             yc = wfir(yc, self.fs, 12)  # dynamic level compression
 60 |             yc[yc < 0] = -yc[yc < 0]  # full-wave rectification
 61 |             yc = (
 62 |                 yc + lowpass_filter(yc, self.fs, fc)
 63 |             ) / 2.0  # sum with low-pass filtered version of self at center-channel frequency
 64 | 
 65 |             ycn[i] = yc
 66 | 
 67 |         Yct = [
 68 |             [None for _ in range(len(self.channels))] for _ in range(self.num_frames)
 69 |         ]
 70 |         Ut = [None for _ in range(self.num_frames)]
 71 | 
 72 |         for channel, fc in enumerate(self.channels):
 73 |             for frame, yct in enumerate(frame_cutter(ycn[channel], self.frame_size)):
 74 |                 # hamming windowed and zero-padded to 2x length
 75 |                 yct = yct * scipy.signal.hamming(yct.shape[0])
 76 |                 yct = numpy.concatenate((yct, numpy.zeros(yct.shape[0])))
 77 |                 Yct[frame][channel] = yct.copy()
 78 | 
 79 |         shape = Yct[0][0].shape[0]
 80 |         for frame in range(self.num_frames):
 81 |             running_sum = numpy.zeros(shape)
 82 | 
 83 |             for channel, Yct_ in enumerate(Yct[frame]):
 84 |                 running_sum += numpy.abs(numpy.fft.fft(Yct_)) ** self.power
 85 |             Ut[frame] = running_sum
 86 | 
 87 |         overall_chromagram = Chromagram()
 88 |         # periodicity estimate - iterative f0 cancellation/tau/salience loop
 89 |         for frame, Uk in enumerate(Ut):
 90 |             frame_chromagram, salience_plots = self.periodicity_estimator.compute(Uk)
 91 |             overall_chromagram += frame_chromagram
 92 | 
 93 |             if frame == display_plot_frame:
 94 |                 _display_plots(self.clip_name, self.fs, self.frame_size, self.x, self.channels, ycn, Ut[frame], salience_plots)
 95 | 
 96 |         return overall_chromagram
 97 | 
 98 | 
 99 | def _display_plots(clip_name, fs, frame_size, x, channels, ytc, Ut, splots):
100 |     fig1, (ax1, ax2) = plt.subplots(2, 1)
101 | 
102 |     ax1.set_title(r"x[n], $y_c$[n], normalized - {0}".format(clip_name))
103 |     ax1.set_xlabel("n (samples)")
104 |     ax1.set_ylabel("amplitude")
105 |     ax1.plot(
106 |         numpy.arange(frame_size),
107 |         x[: frame_size]/numpy.max(x),
108 |         "b",
109 |         alpha=0.75,
110 |         linestyle="--",
111 |         label="x[n]",
112 |     )
113 | 
114 |     for i, x in enumerate(
115 |         [random.randrange(0, len(channels)) for _ in range(6)]
116 |     ):
117 |         ax1.plot(
118 |             numpy.arange(frame_size),
119 |             ytc[x][: frame_size]/numpy.max(ytc[x][: frame_size]),
120 |             color="C{0}".format(i),
121 |             linestyle="--",
122 |             alpha=0.5,
123 |             label=r"$y_c$[n], $f_c$ = {0}".format(round(channels[x], 2)),
124 |         )
125 |     i += 1
126 | 
127 |     ax1.grid()
128 |     ax1.legend(loc="upper right")
129 | 
130 |     ax2.set_title("Ut, bandwise summary spectrum")
131 |     ax2.set_xlabel("fft bin")
132 |     ax2.set_ylabel("amplitude")
133 |     ax2.plot(
134 |         numpy.arange(frame_size/2-1024),
135 |         Ut[1024 : int(frame_size/2)],
136 |         "b",
137 |         alpha=0.75,
138 |         linestyle="--",
139 |         label="Ut",
140 |     )
141 | 
142 |     max_ut = numpy.amax(Ut[1024 : int(frame_size/2)])
143 | 
144 |     (saliences, periods) = splots
145 | 
146 |     tau = int(1/periods[0])
147 | 
148 |     ax2.plot(
149 |             tau,
150 |             max_ut/2,
151 |             'rx',
152 |             label=r'$s(\hat{\tau})$ = ' + str(round(saliences[0], 2))
153 |         )
154 | 
155 |     pitch = fs / periods[0]
156 |     note = librosa.hz_to_note(pitch, octave=False)
157 |     pitch = round(pitch, 2)
158 | 
159 |     ax2.text(
160 |             tau,
161 |             1.1*(max_ut/2),
162 |             '{0}, {1}'.format(pitch, note)
163 |     )
164 | 
165 |     ax2.grid()
166 |     ax2.legend(loc="upper right")
167 | 
168 |     plt.show()
169 | 
170 | 
171 | def _auditory_filterbank(x, fc, fs):
172 |     J = 4
173 | 
174 |     # bc3db = -3/J dB
175 |     A = numpy.exp(-(3 / J) * numpy.pi / (fs * numpy.sqrt(2 ** (1 / J) - 1)))
176 | 
177 |     cos_theta1 = (1 + A * A) / (2 * A) * numpy.cos(2 * numpy.pi * fc / fs)
178 |     cos_theta2 = (2 * A) / (1 + A * A) * numpy.cos(2 * numpy.pi * fc / fs)
179 |     rho1 = (1 / 2) * (1 - A * A)
180 |     rho2 = (1 - A * A) * numpy.sqrt(1 - cos_theta2 ** 2)
181 | 
182 |     resonator_1_b = [rho1, 0, -rho1]
183 |     resonator_1_a = [1, -A * cos_theta1, A * A]
184 | 
185 |     resonator_2_b = [rho2]
186 |     resonator_2_a = [1, -A * cos_theta2, A * A]
187 | 
188 |     x = scipy.signal.lfilter(resonator_1_b, resonator_1_a, x)
189 |     x = scipy.signal.lfilter(resonator_1_b, resonator_1_a, x)
190 |     x = scipy.signal.lfilter(resonator_2_b, resonator_2_a, x)
191 |     x = scipy.signal.lfilter(resonator_2_b, resonator_2_a, x)
192 | 
193 |     return x
194 | 
195 | 
196 | """
197 | i would've used the sacf() function here from method 1
198 | but the IFFT is specific to that method and we're using weighted salience for periodicity analysis here
199 | """
200 | 
201 | 
202 | def _bandwise_summary_spectrum(
203 |     x_channels: typing.List[numpy.ndarray], k=None
204 | ) -> numpy.ndarray:
205 |     # k is same as p (power) in the Klapuri/Ansi paper
206 |     if not k:
207 |         k = 0.67
208 | 
209 |     shape = x_channels[0].shape[0]
210 |     running_sum = numpy.zeros(shape)
211 | 
212 |     for xc in x_channels:
213 |         running_sum += numpy.abs(numpy.fft.fft(xc)) ** k
214 | 
215 |     return running_sum[:int((shape-1)/2)]
216 | 


--------------------------------------------------------------------------------