├── .gitignore
├── LICENSE
├── README.md
├── chordrec
    ├── __init__.py
    ├── augmenters.py
    ├── chroma.py
    ├── classify.py
    ├── data.py
    ├── experiment.py
    ├── features.py
    ├── models
    │   ├── __init__.py
    │   ├── avg_gap_feature.py
    │   ├── blocks.py
    │   ├── chroma_dnn.py
    │   ├── crf.py
    │   ├── dnn.py
    │   └── rnn.py
    ├── targets.py
    └── test.py
├── experiments
    ├── feature_cache
    │   └── README
    ├── ismir2016
    │   ├── chroma.yaml
    │   ├── chroma_wlog.yaml
    │   ├── data
    │   ├── deep_chroma.yaml
    │   ├── feature_cache
    │   ├── logfiltspec.yaml
    │   └── run.sh
    ├── madmom2016
    │   ├── README.md
    │   ├── chord_feature_convnet.yaml
    │   ├── create_crf_init_params.py
    │   ├── create_madmom_convnet_model.py
    │   ├── create_madmom_crf_model.py
    │   ├── create_madmom_deep_chroma_model.py
    │   ├── crf_chord_rec.yaml
    │   └── deep_chroma.yaml
    └── mlsp2016
    │   ├── README.md
    │   ├── convnet.yaml
    │   ├── create_crf_init_params.py
    │   ├── crf.yaml
    │   ├── feature_cache
    │   └── to_madmom_crf.py
└── tools
    ├── evaluate.py
    ├── extract_perfect_chroma.py
    └── post_process.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | ### Python template
  2 | # Byte-compiled / optimized / DLL files
  3 | __pycache__/
  4 | *.py[cod]
  5 | *$py.class
  6 | 
  7 | # C extensions
  8 | *.so
  9 | 
 10 | # Distribution / packaging
 11 | .Python
 12 | env/
 13 | build/
 14 | develop-eggs/
 15 | dist/
 16 | downloads/
 17 | eggs/
 18 | .eggs/
 19 | lib/
 20 | lib64/
 21 | parts/
 22 | sdist/
 23 | var/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | 
 28 | # PyInstaller
 29 | #  Usually these files are written by a python script from a template
 30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 31 | *.manifest
 32 | *.spec
 33 | 
 34 | # Installer logs
 35 | pip-log.txt
 36 | pip-delete-this-directory.txt
 37 | 
 38 | # Unit test / coverage reports
 39 | htmlcov/
 40 | .tox/
 41 | .coverage
 42 | .coverage.*
 43 | .cache
 44 | nosetests.xml
 45 | coverage.xml
 46 | *,cover
 47 | 
 48 | # Translations
 49 | *.mo
 50 | *.pot
 51 | 
 52 | # Django stuff:
 53 | *.log
 54 | 
 55 | # Sphinx documentation
 56 | docs/_build/
 57 | 
 58 | # PyBuilder
 59 | target/
 60 | ### Vim template
 61 | [._]*.s[a-w][a-z]
 62 | [._]s[a-w][a-z]
 63 | *.un~
 64 | Session.vim
 65 | .netrwhist
 66 | *~
 67 | ### IPythonNotebook template
 68 | # Temporary data
 69 | .ipynb_checkpoints/
 70 | ### JetBrains template
 71 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio
 72 | 
 73 | *.iml
 74 | 
 75 | ## Directory-based project format:
 76 | .idea/
 77 | # if you remove the above rule, at least ignore the following:
 78 | 
 79 | # User-specific stuff:
 80 | # .idea/workspace.xml
 81 | # .idea/tasks.xml
 82 | # .idea/dictionaries
 83 | 
 84 | # Sensitive or high-churn files:
 85 | # .idea/dataSources.ids
 86 | # .idea/dataSources.xml
 87 | # .idea/sqlDataSources.xml
 88 | # .idea/dynamic.xml
 89 | # .idea/uiDesigner.xml
 90 | 
 91 | # Gradle:
 92 | # .idea/gradle.xml
 93 | # .idea/libraries
 94 | 
 95 | # Mongo Explorer plugin:
 96 | # .idea/mongoSettings.xml
 97 | 
 98 | ## File-based project format:
 99 | *.ipr
100 | *.iws
101 | 
102 | ## Plugin-specific files:
103 | 
104 | # IntelliJ
105 | /out/
106 | 
107 | # mpeltonen/sbt-idea plugin
108 | .idea_modules/
109 | 
110 | # JIRA plugin
111 | atlassian-ide-plugin.xml
112 | 
113 | # Crashlytics plugin (for Android Studio and IntelliJ)
114 | com_crashlytics_export_strings.xml
115 | crashlytics.properties
116 | crashlytics-build.properties
117 | 
118 | # Created by .ignore support plugin (hsz.mobi)
119 | 
120 | # Own ignores
121 | experiments/data
122 | notes/
123 | experiments/feature_cache/*
124 | !experiments/feature_cache/README


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2016 Filip Korzeniowski
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # chordrec
 2 | 
 3 | This is the code I use for my chord recognition experiments.
 4 | 
 5 | ## Requirements & Installation
 6 | 
 7 | I assume a standard "scientific Python" environment with NumPy, SciPy, etc.
 8 | Additionally, the following libraries are required:
 9 | 
10 |  - [Theano](https://github.com/Theano/Theano)
11 |  - [Lasagne](https://github.com/Lasagne/Lasagne)
12 |  - [dmgr](https://github.com/fdlm/dmgr)
13 |  - [nn](https://github.com/fdlm/nn)
14 |  - [Spaghetti](https://github.com/fdlm/Spaghetti)
15 |  - [madmom](https://github.com/CPJKU/madmom)*
16 |  - [librosa](https://github.com/librosa/librosa) (version 0.4.1)*
17 |  - [mir_eval](https://github.com/craffel/mir_eval)*
18 |  - [pyyaml](https://bitbucket.org/xi/pyyaml)*
19 |  - [sacred](https://github.com/IDSIA/sacred)*
20 | 
21 | Packages marked with a * can be installed using `pip`, the others are either
22 | not available or recommended to be installed from source. If I missed any
23 | dependency, please let me know.
24 | 
25 | Once you have all libraries installed, clone this repository and add its path
26 | to the `$PYTHONPATH` environment variable.
27 | 
28 | ## Data Setup
29 | 
30 | Different experiments might require different data set to be present (you can
31 | find detailed information on the sites describing the experiments on my
32 | [website](http://fdlm.github.io)). The directory structure for each dataset,
33 | however, is the same.
34 | 
35 | Put all datasets into respective subdirectories under
36 | `chordrec/experiments/data`. The datasets have to contain three types of data:
37 | audio files in `.flac` format, corresponding chord annotations in lab format
38 | with the file extension `.chords`, and the cross-validation split definitions.
39 | Audio and annotation files can be organised on a directory structure, but do
40 | not need to; the programs will look for any `.flac` and `.chord` files in all
41 | directories recursively. However, the split definition
42 | files must be in a `splits` sub-directory in each dataset directory (e.g.
43 | `beatles/splits`). File names of audio and annotation files must correspond to
44 | the names given in the split definition files.
45 | 
46 | The `data` directory including some example datasets should look like this,
47 | The internal structures of the `queen`, `robbie_williams`, `rwc` and `zweieck`
48 | directories following the one of the `beatles`:
49 | 
50 | ```
51 | experiments
52 |  +-- data
53 |       +-- beatles
54 |            +-- *.flac
55 |            +-- *.chords
56 |            +-- splits
57 |                 +-- 8-fold_cv_album_distributed_*.fold
58 |       +-- queen
59 |       +-- robbie_williams
60 |       +-- rwc
61 |       +-- zweieck
62 | ```
63 | 
64 | Refer to the websites for each individual experiment for more information on
65 | the data and how to obtain it.
66 | 
67 | ## Experiments
68 | 
69 | The `experiments` sub-directory contains scripts and configurations to
70 | reproduce the results of all my papers on chord recognition (plus some more).
71 | Since neural networks are initialised randomly, and I usually do not save the
72 | seed, the results might differ slightly from the ones in the papers.
73 | 
74 |  - `experiments/ismir2016`: Reproduces the final results for all features
75 |    compared in the paper
76 | 
77 |    F. Korzeniowski and G. Widmer. ["Feature Learning for Chord Recognition: The
78 |    Deep Chroma Extractor"](https://drive.google.com/open?id=0B0gBhdh1fIPKZUwtdnJpeDBjdlk). In *Proceedings of the 17th International Society
79 |    for Music Information Retrieval Conference (ISMIR 2016)*,  New York, USA.
80 | 
81 |    See [here](http://fdlm.github.io/post/deepchroma) for more
82 |    information on the model and the necessary data.
83 | 
84 |  - `experiments/madmom2016`: Configurations to train the chord recognition
85 |    models of the [madmom](https://github.com/CPJKU/madmom) audio processing
86 |    library.
87 | 
88 |  - `experiments/mlsp2016`: Reproduces the results of the chord recognition
89 |    system presented in the following paper:
90 | 
91 |    F. Korzeniowski and G. Widmer. ["A Fully Convolutional Deep Auditory Model
92 |    for Musical Chord Recognition"](https://drive.google.com/open?id=0B0gBhdh1fIPKNXE5Z3VpQ2pjcE0)
93 |    In *Proceedings of the IEEE International Workshop on Machine Learning for
94 |    Signal Processing (MLSP 2016)*, Salerno, Italy, 2016.
95 | 
96 |    See [here](http://fdlm.github.io/post/auditorymodel) for more
97 |    information on the model and the necessary data.
98 | 


--------------------------------------------------------------------------------
/chordrec/__init__.py:
--------------------------------------------------------------------------------
1 | from . import (augmenters, chroma, classify, data, experiment, features,
2 |                targets, test)
3 | 


--------------------------------------------------------------------------------
/chordrec/augmenters.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from scipy.ndimage import shift
  3 | import random
  4 | from targets import one_hot
  5 | 
  6 | 
  7 | class SemitoneShift(object):
  8 | 
  9 |     def __init__(self, p, max_shift, bins_per_semitone,
 10 |                  target_type='chords_maj_min'):
 11 |         """
 12 |         Augmenter that shifts by semitones a spectrum with logarithmically
 13 |         spaced frequency bins.
 14 | 
 15 |         :param p: percentage of data to be shifted
 16 |         :param max_shift: maximum number of semitones to shift
 17 |         :param bins_per_semitone: number of spectrogram bins per semitone
 18 |         :param target_type: specifies target type
 19 |         """
 20 |         self.p = p
 21 |         self.max_shift = max_shift
 22 |         self.bins_per_semitone = bins_per_semitone
 23 | 
 24 |         if target_type == 'chords_maj_min':
 25 |             self.adapt_targets = self._adapt_targets_chords_maj_min
 26 |         elif target_type == 'chroma':
 27 |             self.adapt_targets = self._adapt_targets_chroma
 28 | 
 29 |     def _adapt_targets_chords_maj_min(self, targets, shifts):
 30 |         chord_classes = targets.argmax(-1)
 31 |         no_chord_class = targets.shape[-1] - 1
 32 |         no_chords = (chord_classes == no_chord_class)
 33 |         chord_roots = chord_classes % 12
 34 |         chord_majmin = chord_classes / 12
 35 | 
 36 |         new_chord_roots = (chord_roots + shifts) % 12
 37 |         new_chord_classes = new_chord_roots + chord_majmin * 12
 38 |         new_chord_classes[no_chords] = no_chord_class
 39 |         new_targets = one_hot(new_chord_classes, no_chord_class + 1)
 40 |         return new_targets
 41 | 
 42 |     def _adapt_targets_chroma(self, targets, shifts):
 43 |         new_targets = np.empty_like(targets)
 44 |         for i in range(len(targets)):
 45 |             new_targets[i] = np.roll(targets[i], shifts[i], axis=-1)
 46 |         return new_targets
 47 | 
 48 |     def __call__(self, batch_iterator):
 49 |         """
 50 |         :param batch_iterator: data iterator that yields the data to be
 51 |                                augmented
 52 |         :return: augmented data/target pairs
 53 |         """
 54 | 
 55 |         for data, targets in batch_iterator:
 56 |             batch_size = len(data)
 57 | 
 58 |             shifts = np.random.randint(-self.max_shift,
 59 |                                        self.max_shift + 1, batch_size)
 60 | 
 61 |             # zero out shifts for 1-p percentage
 62 |             no_shift = random.sample(range(batch_size),
 63 |                                      int(batch_size * (1 - self.p)))
 64 |             shifts[no_shift] = 0
 65 | 
 66 |             new_targets = self.adapt_targets(targets, shifts)
 67 | 
 68 | 
 69 |             new_data = np.empty_like(data)
 70 |             for i in range(batch_size):
 71 |                 # TODO: remove data from upper and lower parts that got
 72 |                 #       rolled (?)
 73 |                 new_data[i] = np.roll(
 74 |                     data[i], shifts[i] * self.bins_per_semitone, axis=-1)
 75 | 
 76 |             yield new_data, new_targets
 77 | 
 78 | 
 79 | class Detuning(object):
 80 | 
 81 |     def __init__(self, p, max_shift, bins_per_semitone):
 82 |         """
 83 |         Augmenter that shifts a spectrogram with logarithmically spaced
 84 |         frequency bins by maximum 0.5 semitones
 85 |         :param p: percentage of data to be shifted
 86 |         :param max_shift: maximum fraction of semitone to shirt (<= 0.5)
 87 |         :param bins_per_semitone: number of spectrogram bins per semitone
 88 |         """
 89 |         if max_shift >= 0.5:
 90 |             raise ValueError('Detuning only works up to half a semitone!')
 91 |         self.p = p
 92 |         self.max_shift = max_shift
 93 |         self.bins_per_semitone = bins_per_semitone
 94 | 
 95 |     def __call__(self, batch_iterator):
 96 |         """
 97 |         :param batch_iterator: data iterator that yields the data to be
 98 |                                augmented
 99 |         :return: augmented data/target pairs
100 |         """
101 |         for data, targets in batch_iterator:
102 |             batch_size = len(data)
103 | 
104 |             shifts = np.random.rand(batch_size) * 2 * self.max_shift - \
105 |                 self.max_shift
106 | 
107 |             # zero out shifts for 1-p percentage
108 |             no_shift = random.sample(range(batch_size),
109 |                                      int(batch_size * (1 - self.p)))
110 |             shifts[no_shift] = 0
111 | 
112 |             new_data = np.empty_like(data)
113 |             for i in range(batch_size):
114 |                 new_data[i] = shift(
115 |                     data[i], (shifts[i] * self.bins_per_semitone, 0))
116 | 
117 |             yield new_data, targets
118 | 
119 | 
120 | def create_augmenters(augmentation):
121 |     return [globals()[name](**params)
122 |             for name, params in augmentation.iteritems()]
123 | 
124 | 
125 | def add_sacred_config(ex):
126 |     ex.add_named_config(
127 |         'augmentation',
128 |         augmentation=dict(
129 |             SemitoneShift=dict(
130 |                 p=1.0,
131 |                 max_shift=4,
132 |                 bins_per_semitone=2
133 |             ),
134 |             Detuning=dict(
135 |                 p=1.0,
136 |                 max_shift=0.4,
137 |                 bins_per_semitone=2
138 |             )
139 |         )
140 |     )
141 | 


--------------------------------------------------------------------------------
/chordrec/chroma.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | 
  3 | import os
  4 | import numpy as np
  5 | import yaml
  6 | 
  7 | from nn.utils import Colors
  8 | 
  9 | import data
 10 | import dmgr
 11 | import features
 12 | import nn
 13 | import targets
 14 | import test
 15 | from experiment import TempDir, create_optimiser, setup
 16 | from models import chroma_dnn
 17 | 
 18 | 
 19 | def compute_chroma(process_fn, agg_dataset, dest_dir, batch_size,
 20 |                    extension='.features.npy'):
 21 |     if not os.path.exists(dest_dir):
 22 |         os.makedirs(dest_dir)
 23 | 
 24 |     chroma_files = []
 25 | 
 26 |     for ds_idx in range(agg_dataset.n_datasources):
 27 |         ds = agg_dataset.datasource(ds_idx)
 28 | 
 29 |         chromas = []
 30 | 
 31 |         for data, _ in dmgr.iterators.iterate_batches(ds, batch_size,
 32 |                                                       randomise=False,
 33 |                                                       expand=False):
 34 |             chromas.append(process_fn(data))
 35 | 
 36 |         chromas = np.concatenate(chromas)
 37 |         chroma_file = os.path.join(dest_dir, ds.name + extension)
 38 |         np.save(chroma_file, chromas)
 39 |         chroma_files.append(chroma_file)
 40 | 
 41 |     return chroma_files
 42 | 
 43 | 
 44 | # Initialise Sacred experiment
 45 | ex = setup('Deep Chroma Extractor')
 46 | 
 47 | 
 48 | # Standard config
 49 | @ex.config
 50 | def _cfg():
 51 |     observations = 'results'
 52 |     feature_extractor = None
 53 |     target = None
 54 |     chroma_network = None
 55 |     optimiser = None
 56 |     training = None
 57 |     regularisation = None
 58 |     testing = None
 59 |     augmentation = None
 60 | 
 61 | 
 62 | # add models
 63 | chroma_dnn.add_sacred_config(ex)
 64 | 
 65 | 
 66 | @ex.automain
 67 | def main(datasource, feature_extractor, target, chroma_network,
 68 |          optimiser, training, regularisation, augmentation, testing):
 69 | 
 70 |     err = False
 71 |     if chroma_network is None:
 72 |         print(Colors.red('ERROR: Specify a chroma extractor!'))
 73 |         err = True
 74 |     if feature_extractor is None:
 75 |         print(Colors.red('ERROR: Specify a feature extractor!'))
 76 |         err = True
 77 |     if target is None:
 78 |         print(Colors.red('ERROR: Specify a target!'))
 79 |         err = True
 80 |     if chroma_network is None:
 81 |         print(Colors.red('ERROR: Specify a chroma extractor!'))
 82 |         err = True
 83 |     if err:
 84 |         return 1
 85 | 
 86 |     # intermediate target is always chroma vectors
 87 |     target_chroma = targets.ChromaTarget(
 88 |         feature_extractor['params']['fps'])
 89 | 
 90 |     target_chords = targets.create_target(
 91 |         feature_extractor['params']['fps'],
 92 |         target
 93 |     )
 94 | 
 95 |     if not isinstance(datasource['test_fold'], list):
 96 |         datasource['test_fold'] = [datasource['test_fold']]
 97 | 
 98 |     if not isinstance(datasource['val_fold'], list):
 99 |         datasource['val_fold'] = [datasource['val_fold']]
100 | 
101 |         # if no validation folds are specified, always use the
102 |         # 'None' and determine validation fold automatically
103 |         if datasource['val_fold'][0] is None:
104 |             datasource['val_fold'] *= len(datasource['test_fold'])
105 | 
106 |     if len(datasource['test_fold']) != len(datasource['val_fold']):
107 |         print(Colors.red('ERROR: Need same number of validation and '
108 |                          'test folds'))
109 |         return 1
110 | 
111 |     all_pred_files = []
112 |     all_gt_files = []
113 | 
114 |     print(Colors.magenta('\nStarting experiment ' + ex.observers[0].hash()))
115 | 
116 |     with TempDir() as exp_dir:
117 |         for test_fold, val_fold in zip(datasource['test_fold'],
118 |                                        datasource['val_fold']):
119 |             print('')
120 |             print(Colors.yellow(
121 |                 '=' * 20 + ' FOLD {} '.format(test_fold) + '=' * 20))
122 |             # Load data sets
123 |             print(Colors.red('\nLoading data...\n'))
124 | 
125 |             feature_ext = features.create_extractor(feature_extractor,
126 |                                                     test_fold)
127 |             train_set, val_set, test_set, gt_files = data.create_datasources(
128 |                 dataset_names=datasource['datasets'],
129 |                 preprocessors=datasource['preprocessors'],
130 |                 compute_features=feature_ext,
131 |                 compute_targets=target_chroma,
132 |                 context_size=datasource['context_size'],
133 |                 test_fold=test_fold,
134 |                 val_fold=val_fold,
135 |                 cached=datasource['cached']
136 |             )
137 | 
138 |             if testing['test_on_val']:
139 |                 test_set = val_set
140 | 
141 |             print(Colors.blue('Train Set:'))
142 |             print('\t', train_set)
143 | 
144 |             print(Colors.blue('Validation Set:'))
145 |             print('\t', val_set)
146 | 
147 |             print(Colors.blue('Test Set:'))
148 |             print('\t', test_set)
149 |             print('')
150 | 
151 |             # build network
152 |             print(Colors.red('Building network...\n'))
153 | 
154 |             model_type = globals()[chroma_network['model']['type']]
155 |             mdl = model_type.build_model(in_shape=train_set.dshape,
156 |                                          out_size_chroma=train_set.tshape[0],
157 |                                          out_size=target_chords.num_classes,
158 |                                          model=chroma_network['model'])
159 | 
160 |             chroma_neural_net = mdl['chroma_network']
161 |             chord_neural_net = mdl['chord_network']
162 |             input_var = mdl['input_var']
163 |             chroma_target_var = mdl['chroma_target_var']
164 |             chord_target_var = mdl['chord_target_var']
165 |             chroma_loss_fn = mdl['chroma_loss_fn']
166 |             chord_loss_fn = mdl['chord_loss_fn']
167 | 
168 |             chroma_opt, chroma_lrs = create_optimiser(chroma_network['optimiser'])
169 |             chord_opt, chord_lrs = create_optimiser(optimiser)
170 | 
171 |             chroma_train_fn = nn.compile_train_fn(
172 |                 chroma_neural_net, input_var, chroma_target_var,
173 |                 loss_fn=chroma_loss_fn, opt_fn=chroma_opt,
174 |                 **chroma_network['regularisation']
175 |             )
176 | 
177 |             chroma_test_fn = nn.compile_test_func(
178 |                 chroma_neural_net, input_var, chroma_target_var,
179 |                 loss_fn=chroma_loss_fn,
180 |                 **chroma_network['regularisation']
181 |             )
182 | 
183 |             chroma_process_fn = nn.compile_process_func(
184 |                 chroma_neural_net, input_var
185 |             )
186 | 
187 |             chord_train_fn = nn.compile_train_fn(
188 |                 chord_neural_net, input_var, chord_target_var,
189 |                 loss_fn=chord_loss_fn, opt_fn=chord_opt, tags={'chord': True},
190 |                 **regularisation
191 |             )
192 | 
193 |             chord_test_fn = nn.compile_test_func(
194 |                 chord_neural_net, input_var, chord_target_var,
195 |                 loss_fn=chord_loss_fn, tags={'chord': True},
196 |                 **regularisation
197 |             )
198 | 
199 |             chord_process_fn = nn.compile_process_func(
200 |                 chord_neural_net, input_var
201 |             )
202 | 
203 |             print(Colors.blue('Chroma Network:'))
204 |             print(nn.to_string(chroma_neural_net))
205 |             print('')
206 | 
207 |             print(Colors.blue('Chords Network:'))
208 |             print(nn.to_string(chord_neural_net))
209 |             print('')
210 | 
211 |             print(Colors.red('Starting training chroma network...\n'))
212 | 
213 |             chroma_training = chroma_network['training']
214 |             chroma_train_batches, chroma_validation_batches = \
215 |                 model_type.create_iterators(train_set, val_set,
216 |                                             chroma_training, augmentation)
217 |             crm_train_losses, crm_val_losses, _, crm_val_accs = nn.train(
218 |                 network=chroma_neural_net,
219 |                 train_fn=chroma_train_fn, train_batches=chroma_train_batches,
220 |                 test_fn=chroma_test_fn,
221 |                 validation_batches=chroma_validation_batches,
222 |                 threads=10, callbacks=[chroma_lrs] if chroma_lrs else [],
223 |                 num_epochs=chroma_training['num_epochs'],
224 |                 early_stop=chroma_training['early_stop'],
225 |                 early_stop_acc=chroma_training['early_stop_acc'],
226 |                 acc_func=nn.nn.elemwise_acc
227 |             )
228 | 
229 |             # we need to create a new dataset with a new target (chords)
230 |             del train_set
231 |             del val_set
232 |             del test_set
233 |             del gt_files
234 | 
235 |             train_set, val_set, test_set, gt_files = data.create_datasources(
236 |                 dataset_names=datasource['datasets'],
237 |                 preprocessors=datasource['preprocessors'],
238 |                 compute_features=feature_ext,
239 |                 compute_targets=target_chords,
240 |                 context_size=datasource['context_size'],
241 |                 test_fold=test_fold,
242 |                 val_fold=val_fold,
243 |                 cached=datasource['cached']
244 |             )
245 | 
246 |             if testing['test_on_val']:
247 |                 test_set = val_set
248 | 
249 |             print(Colors.blue('Train Set:'))
250 |             print('\t', train_set)
251 | 
252 |             print(Colors.blue('Validation Set:'))
253 |             print('\t', val_set)
254 | 
255 |             print(Colors.blue('Test Set:'))
256 |             print('\t', test_set)
257 |             print('')
258 | 
259 |             print(Colors.red('Starting training chord network...\n'))
260 | 
261 |             chord_train_batches, chord_validation_batches = \
262 |                 model_type.create_iterators(train_set, val_set, training,
263 |                                             augmentation)
264 | 
265 |             crd_train_losses, crd_val_losses, _, crd_val_accs = nn.train(
266 |                 network=chord_neural_net,
267 |                 train_fn=chord_train_fn, train_batches=chord_train_batches,
268 |                 test_fn=chord_test_fn,
269 |                 validation_batches=chord_validation_batches,
270 |                 threads=10, callbacks=[chord_lrs] if chord_lrs else [],
271 |                 num_epochs=training['num_epochs'],
272 |                 early_stop=training['early_stop'],
273 |                 early_stop_acc=training['early_stop_acc'],
274 |             )
275 | 
276 |             print(Colors.red('\nStarting testing...\n'))
277 | 
278 |             param_file = os.path.join(
279 |                 exp_dir, 'params_fold_{}.pkl'.format(test_fold))
280 |             nn.save_params(chord_neural_net, param_file)
281 |             ex.add_artifact(param_file)
282 | 
283 |             pred_files = test.compute_labeling(
284 |                 chord_process_fn, target_chords, test_set, dest_dir=exp_dir,
285 |                 use_mask=False, batch_size=testing['batch_size']
286 |             )
287 | 
288 |             # compute chroma vectors for the test set
289 |             # TODO: replace this with experiment.compute_features
290 |             for cf in compute_chroma(chroma_process_fn, test_set,
291 |                                      batch_size=training['batch_size'],
292 |                                      dest_dir=exp_dir):
293 |                 ex.add_artifact(cf)
294 | 
295 |             test_gt_files = dmgr.files.match_files(
296 |                 pred_files, test.PREDICTION_EXT, gt_files, data.GT_EXT
297 |             )
298 | 
299 |             all_pred_files += pred_files
300 |             all_gt_files += test_gt_files
301 | 
302 |             print(Colors.blue('Results:'))
303 |             scores = test.compute_average_scores(test_gt_files, pred_files)
304 |             test.print_scores(scores)
305 |             result_file = os.path.join(
306 |                 exp_dir, 'results_fold_{}.yaml'.format(test_fold))
307 |             yaml.dump(dict(scores=scores,
308 |                            chord_train_losses=map(float, crd_train_losses),
309 |                            chord_val_losses=map(float, crd_val_losses),
310 |                            chord_val_accs=map(float, crd_val_accs),
311 |                            chroma_train_losses=map(float, crm_train_losses),
312 |                            chroma_val_losses=map(float, crm_val_losses),
313 |                            chroma_val_accs=map(float, crm_val_accs)),
314 |                       open(result_file, 'w'))
315 |             ex.add_artifact(result_file)
316 | 
317 |             # close all files
318 |             del train_set
319 |             del val_set
320 |             del test_set
321 |             del gt_files
322 | 
323 |         # if there is something to aggregate
324 |         if len(datasource['test_fold']) > 1:
325 |             print(Colors.yellow('\nAggregated Results:\n'))
326 |             scores = test.compute_average_scores(all_gt_files, all_pred_files)
327 |             test.print_scores(scores)
328 |             result_file = os.path.join(exp_dir, 'results.yaml')
329 |             yaml.dump(dict(scores=scores), open(result_file, 'w'))
330 |             ex.add_artifact(result_file)
331 | 
332 |         for pf in all_pred_files:
333 |             ex.add_artifact(pf)
334 | 
335 |     print(Colors.magenta('Stopping experiment ' + ex.observers[0].hash()))
336 | 


--------------------------------------------------------------------------------
/chordrec/classify.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | 
  3 | import os
  4 | 
  5 | import yaml
  6 | 
  7 | import data
  8 | import dmgr
  9 | import features
 10 | import nn
 11 | import targets
 12 | import test
 13 | 
 14 | from nn.utils import Colors
 15 | from models import dnn, avg_gap_feature, crf, rnn
 16 | from experiment import TempDir, create_optimiser, setup, compute_features
 17 | 
 18 | # Initialise Sacred experiment
 19 | ex = setup('Classify Chords')
 20 | 
 21 | 
 22 | # Standard config
 23 | @ex.config
 24 | def _cfg():
 25 |     observations = 'results'
 26 |     feature_extractor = None
 27 |     target = None
 28 |     model = None
 29 |     optimiser = None
 30 |     training = None
 31 |     regularisation = None
 32 |     testing = None
 33 |     augmentation = None
 34 | 
 35 | 
 36 | # add models
 37 | dnn.add_sacred_config(ex)
 38 | avg_gap_feature.add_sacred_config(ex)
 39 | crf.add_sacred_config(ex)
 40 | rnn.add_sacred_config(ex)
 41 | 
 42 | 
 43 | # add general configs
 44 | @ex.named_config
 45 | def learn_rate_schedule():
 46 |     optimiser = dict(
 47 |         schedule=dict(
 48 |             interval=10,
 49 |             factor=0.5
 50 |         )
 51 |     )
 52 | 
 53 | 
 54 | @ex.automain
 55 | def main(_log, datasource, feature_extractor, target, model, optimiser,
 56 |          training, regularisation, augmentation, testing):
 57 | 
 58 |     err = False
 59 |     if model is None or not model or 'type' not in model:
 60 |         _log.error(Colors.red('Specify a model!'))
 61 |         err = True
 62 |     if feature_extractor is None:
 63 |         _log.error(Colors.red('Specify a feature extractor!'))
 64 |         err = True
 65 |     if target is None:
 66 |         _log.error(Colors.red('Specify a target!'))
 67 |         err = True
 68 |     if err:
 69 |         return 1
 70 | 
 71 |     target_computer = targets.create_target(
 72 |         feature_extractor['params']['fps'],
 73 |         target
 74 |     )
 75 | 
 76 |     if not isinstance(datasource['test_fold'], list):
 77 |         datasource['test_fold'] = [datasource['test_fold']]
 78 | 
 79 |     if not isinstance(datasource['val_fold'], list):
 80 |         datasource['val_fold'] = [datasource['val_fold']]
 81 | 
 82 |         # if no validation folds are specified, always use the
 83 |         # 'None' and determine validation fold automatically
 84 |         if datasource['val_fold'][0] is None:
 85 |             datasource['val_fold'] *= len(datasource['test_fold'])
 86 | 
 87 |     if len(datasource['test_fold']) != len(datasource['val_fold']):
 88 |         _log.error(Colors.red('Need same number of validation and test folds'))
 89 |         return 1
 90 | 
 91 |     all_pred_files = []
 92 |     all_gt_files = []
 93 | 
 94 |     print(Colors.magenta('\nStarting experiment ' + ex.observers[0].hash()))
 95 | 
 96 |     with TempDir() as exp_dir:
 97 |         for test_fold, val_fold in zip(datasource['test_fold'],
 98 |                                        datasource['val_fold']):
 99 |             print('')
100 |             print(Colors.yellow(
101 |                 '=' * 20 + ' FOLD {} '.format(test_fold) + '=' * 20))
102 |             # Load data sets
103 |             print(Colors.red('\nLoading data...\n'))
104 | 
105 |             train_set, val_set, test_set, gt_files = data.create_datasources(
106 |                 dataset_names=datasource['datasets'],
107 |                 preprocessors=datasource['preprocessors'],
108 |                 compute_features=features.create_extractor(feature_extractor,
109 |                                                            test_fold),
110 |                 compute_targets=target_computer,
111 |                 context_size=datasource['context_size'],
112 |                 test_fold=test_fold,
113 |                 val_fold=val_fold,
114 |                 cached=datasource['cached'],
115 |             )
116 | 
117 |             if testing['test_on_val']:
118 |                 test_set = val_set
119 | 
120 |             print(Colors.blue('Train Set:'))
121 |             print('\t', train_set)
122 |             print(Colors.blue('Validation Set:'))
123 |             print('\t', val_set)
124 |             print(Colors.blue('Test Set:'))
125 |             print('\t', test_set)
126 |             print('')
127 | 
128 |             # build network
129 |             print(Colors.red('Building network...\n'))
130 | 
131 |             model_type = globals()[model['type']]
132 |             mdl = model_type.build_model(in_shape=train_set.dshape,
133 |                                          out_size=train_set.tshape[0],
134 |                                          model=model)
135 | 
136 |             # mandatory parts of the model
137 |             neural_net = mdl['network']
138 |             input_var = mdl['input_var']
139 |             target_var = mdl['target_var']
140 |             loss_fn = mdl['loss_fn']
141 | 
142 |             # optional parts
143 |             mask_var = mdl.get('mask_var')
144 |             feature_out = mdl.get('feature_out')
145 | 
146 |             train_batches, validation_batches = model_type.create_iterators(
147 |                 train_set, val_set, training, augmentation
148 |             )
149 | 
150 |             opt, lrs = create_optimiser(optimiser)
151 | 
152 |             train_fn = nn.compile_train_fn(
153 |                 neural_net, input_var, target_var,
154 |                 loss_fn=loss_fn, opt_fn=opt, mask_var=mask_var,
155 |                 **regularisation
156 |             )
157 | 
158 |             test_fn = nn.compile_test_func(
159 |                 neural_net, input_var, target_var,
160 |                 loss_fn=loss_fn, mask_var=mask_var,
161 |                 **regularisation
162 |             )
163 | 
164 |             process_fn = nn.compile_process_func(
165 |                 neural_net, input_var, mask_var=mask_var)
166 | 
167 |             if feature_out is not None:
168 |                 feature_fn = nn.compile_process_func(
169 |                     feature_out, input_var, mask_var=mask_var
170 |                 )
171 |             else:
172 |                 feature_fn = None
173 | 
174 |             print(Colors.blue('Neural Network:'))
175 |             print(nn.to_string(neural_net))
176 |             print('')
177 | 
178 |             if 'param_file' in training:
179 |                 nn.load_params(neural_net,
180 |                                training['param_file'].format(test_fold))
181 |                 train_losses = []
182 |                 val_losses = []
183 |                 val_accs = []
184 |             else:
185 |                 if 'init_file' in training:
186 |                     print('initialising')
187 |                     nn.load_params(neural_net,
188 |                                    training['init_file'].format(test_fold))
189 |                 print(Colors.red('Starting training...\n'))
190 |                 train_losses, val_losses, _, val_accs = nn.train(
191 |                     network=neural_net,
192 |                     train_fn=train_fn, train_batches=train_batches,
193 |                     test_fn=test_fn, validation_batches=validation_batches,
194 |                     threads=10, callbacks=[lrs] if lrs else [],
195 |                     num_epochs=training['num_epochs'],
196 |                     early_stop=training['early_stop'],
197 |                     early_stop_acc=training['early_stop_acc']
198 |                 )
199 |                 param_file = os.path.join(
200 |                     exp_dir, 'params_fold_{}.pkl'.format(test_fold))
201 |                 nn.save_params(neural_net, param_file)
202 |                 ex.add_artifact(param_file)
203 | 
204 |             print(Colors.red('\nStarting testing...\n'))
205 | 
206 |             if feature_fn is not None:
207 |                 dest_dir = os.path.join(exp_dir,
208 |                                         'features_fold_{}'.format(test_fold))
209 |                 compute_features(
210 |                     feature_fn, train_set, batch_size=testing['batch_size'],
211 |                     dest_dir=dest_dir, extension='.features.npy',
212 |                     use_mask=mask_var is not None)
213 |                 compute_features(
214 |                     feature_fn, val_set, batch_size=testing['batch_size'],
215 |                     dest_dir=dest_dir, extension='.features.npy',
216 |                     use_mask=mask_var is not None)
217 |                 compute_features(
218 |                     feature_fn, test_set, batch_size=testing['batch_size'],
219 |                     dest_dir=dest_dir, extension='.features.npy',
220 |                     use_mask=mask_var is not None)
221 |                 ex.add_artifact(dest_dir)
222 | 
223 |             pred_files = test.compute_labeling(
224 |                 process_fn, target_computer, test_set, dest_dir=exp_dir,
225 |                 use_mask=mask_var is not None, batch_size=testing['batch_size']
226 |             )
227 | 
228 |             test_gt_files = dmgr.files.match_files(
229 |                 pred_files, test.PREDICTION_EXT, gt_files, data.GT_EXT
230 |             )
231 | 
232 |             all_pred_files += pred_files
233 |             all_gt_files += test_gt_files
234 | 
235 |             print(Colors.blue('Results:'))
236 |             scores = test.compute_average_scores(test_gt_files, pred_files)
237 |             test.print_scores(scores)
238 |             result_file = os.path.join(
239 |                 exp_dir, 'results_fold_{}.yaml'.format(test_fold))
240 |             yaml.dump(dict(scores=scores,
241 |                            train_losses=map(float, train_losses),
242 |                            val_losses=map(float, val_losses),
243 |                            val_accs=map(float, val_accs)),
244 |                       open(result_file, 'w'))
245 |             ex.add_artifact(result_file)
246 | 
247 |             # delete datasets so disk space is free
248 |             del train_set
249 |             del val_set
250 |             del test_set
251 | 
252 |         # if there is something to aggregate
253 |         if len(datasource['test_fold']) > 1:
254 |             print(Colors.yellow('\nAggregated Results:\n'))
255 |             scores = test.compute_average_scores(all_gt_files, all_pred_files)
256 |             test.print_scores(scores)
257 |             result_file = os.path.join(exp_dir, 'results.yaml')
258 |             yaml.dump(dict(scores=scores), open(result_file, 'w'))
259 |             ex.add_artifact(result_file)
260 | 
261 |         for pf in all_pred_files:
262 |             ex.add_artifact(pf)
263 | 
264 |     print(Colors.magenta('Stopping experiment ' + ex.observers[0].hash()))
265 | 


--------------------------------------------------------------------------------
/chordrec/data.py:
--------------------------------------------------------------------------------
  1 | from operator import eq
  2 | import os
  3 | import dmgr
  4 | 
  5 | DATA_DIR = 'data'
  6 | CACHE_DIR = 'feature_cache'
  7 | SRC_EXT = '.flac'
  8 | GT_EXT = '.chords'
  9 | 
 10 | 
 11 | def combine_files(*args):
 12 |     """
 13 |     Combines file dictionaries as returned by the methods of Dataset.
 14 |     :param args: file dictionaries
 15 |     :return:     combined file dictionaries
 16 |     """
 17 |     if len(args) < 1:
 18 |         raise ValueError('Pass at least one argument!')
 19 | 
 20 |     # make sure all elements contain the same number of splits
 21 |     if len(set(len(a) for a in args)) > 1:
 22 |         raise ValueError('Arguments must contain the same number of splits!')
 23 | 
 24 |     combined = [{'feat': [], 'targ': []} for _ in range(len(args[0]))]
 25 | 
 26 |     for fs in args:
 27 |         for s in range(len(combined)):
 28 |             for t in combined[s]:
 29 |                 combined[s][t] += fs[s][t]
 30 | 
 31 |     return combined
 32 | 
 33 | 
 34 | DATASET_DEFS = {
 35 |     'beatles': {
 36 |         'data_dir': 'beatles',
 37 |         'split_filename': '8-fold_cv_album_distributed_{}.fold'
 38 |     },
 39 |     'queen': {
 40 |         'data_dir': 'queen',
 41 |         'split_filename': '8-fold_cv_random_{}.fold'
 42 |     },
 43 |     'zweieck': {
 44 |         'data_dir': 'zweieck',
 45 |         'split_filename': '8-fold_cv_random_{}.fold'
 46 |     },
 47 |     'robbie_williams': {
 48 |         'data_dir': 'robbie_williams',
 49 |         'split_filename': '8-fold_cv_random_{}.fold'
 50 |     },
 51 |     'rwc': {
 52 |         'data_dir': 'rwc',
 53 |         'split_filename': '8-fold_cv_random_{}.fold'
 54 |     },
 55 |     'billboard': {
 56 |         'data_dir': os.path.join('mcgill-billboard', 'unique'),
 57 |         'split_filename': '8-fold_cv_random_{}.fold'
 58 |     }
 59 | }
 60 | 
 61 | 
 62 | def load_dataset(name, data_dir, feature_cache_dir,
 63 |                  compute_features, compute_targets):
 64 | 
 65 |     assert name in DATASET_DEFS.keys(), 'Unknown dataset {}'.format(name)
 66 | 
 67 |     data_dir = os.path.join(data_dir, DATASET_DEFS[name]['data_dir'])
 68 |     split_filename = os.path.join(data_dir, 'splits',
 69 |                                   DATASET_DEFS[name]['split_filename'])
 70 | 
 71 |     return dmgr.Dataset(
 72 |         data_dir,
 73 |         os.path.join(feature_cache_dir, name),
 74 |         [split_filename.format(f) for f in range(8)],
 75 |         source_ext=SRC_EXT,
 76 |         gt_ext=GT_EXT,
 77 |         compute_features=compute_features,
 78 |         compute_targets=compute_targets,
 79 |     )
 80 | 
 81 | 
 82 | def create_preprocessors(preproc_defs):
 83 |     preprocessors = []
 84 |     for pp in preproc_defs:
 85 |         preprocessors.append(
 86 |             getattr(dmgr.preprocessing, pp['name'])(**pp['params']))
 87 |     return preprocessors
 88 | 
 89 | 
 90 | def create_datasources(dataset_names, preprocessors,
 91 |                        compute_features, compute_targets, context_size,
 92 |                        data_dir=DATA_DIR, feature_cache_dir=CACHE_DIR,
 93 |                        test_fold=0, val_fold=None,
 94 |                        **kwargs):
 95 | 
 96 |     if test_fold is not None and val_fold is None:
 97 |         val_fold = test_fold - 1
 98 | 
 99 |     preprocessors = create_preprocessors(preprocessors)
100 | 
101 |     if context_size > 0:
102 |         data_source_type = dmgr.datasources.ContextDataSource
103 |         kwargs['context_size'] = context_size
104 |     else:
105 |         data_source_type = dmgr.datasources.DataSource
106 | 
107 |     # load all datasets
108 |     datasets = [load_dataset(name, data_dir, feature_cache_dir,
109 |                              compute_features, compute_targets)
110 |                 for name in dataset_names]
111 | 
112 |     if test_fold is not None:
113 |         files = combine_files(*[ds.fold_split(val_fold, test_fold)
114 |                                 for ds in datasets])
115 |     else:
116 |         # times three such that train, validation and test set are the same
117 |         files = combine_files(*[[ds.all_files()]
118 |                                 for ds in datasets])
119 | 
120 |     ds = dmgr.datasources.get_datasources(
121 |         files, preprocessors=preprocessors, data_source_type=data_source_type,
122 |         **kwargs
123 |     )
124 | 
125 |     if len(ds) == 3:
126 |         train, val, test = ds
127 |     elif len(ds) == 1:
128 |         train = ds[0]
129 |         val = ds[0]
130 |         test = ds[0]
131 |     else:
132 |         raise RuntimeError('Got {} datasources,'
133 |                            ' expected 1 or 3.'.format(len(ds)))
134 | 
135 |     return train, val, test, sum((ds.gt_files for ds in datasets), [])
136 | 
137 | 
138 | def add_sacred_config(ex):
139 |     ex.add_config(
140 |         datasource=dict(
141 |             datasets=['beatles', 'queen', 'zweieck', 'robbie_williams', 'rwc'],
142 |             context_size=0,
143 |             preprocessors=[],
144 |             # fold 6 overestimates the score, but has highest correlation
145 |             # with the total score
146 |             test_fold=6,
147 |             val_fold=None,
148 |             cached=True
149 |         )
150 |     )
151 | 


--------------------------------------------------------------------------------
/chordrec/experiment.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | import os
  3 | import yaml
  4 | import pickle
  5 | import shutil
  6 | import hashlib
  7 | import tempfile
  8 | import sys
  9 | from functools import partial
 10 | from sacred import Experiment
 11 | from sacred.observers import RunObserver
 12 | import lasagne as lnn
 13 | import theano
 14 | import numpy as np
 15 | 
 16 | import nn
 17 | import dmgr
 18 | from nn.utils import Colors
 19 | 
 20 | import data
 21 | import features
 22 | import targets
 23 | import augmenters
 24 | 
 25 | 
 26 | class TempDir:
 27 |     """
 28 |     Creates a temporary directory to save stuff to
 29 |     """
 30 |     def __enter__(self):
 31 |         self._tmp_dir_path = tempfile.mkdtemp()
 32 |         return self._tmp_dir_path
 33 | 
 34 |     def __exit__(self, type, value, traceback):
 35 |         shutil.rmtree(self._tmp_dir_path)
 36 | 
 37 | 
 38 | def compute_features(process_fn, agg_dataset, dest_dir, use_mask,
 39 |                      batch_size, extension):
 40 |     if not os.path.exists(dest_dir):
 41 |         os.makedirs(dest_dir)
 42 |     else:
 43 |         if not os.path.isdir(dest_dir):
 44 |             print(Colors.red('Destination path exists but is not a directory!'),
 45 |                   file=sys.stderr)
 46 |             return
 47 | 
 48 |     iterate_batches = dmgr.iterators.iterate_batches
 49 | 
 50 |     feature_files = []
 51 | 
 52 |     for ds_idx in range(agg_dataset.n_datasources):
 53 |         ds = agg_dataset.datasource(ds_idx)
 54 | 
 55 |         feats = []
 56 |         for data, _ in iterate_batches(ds, batch_size or ds.n_data,
 57 |                                        randomise=False, expand=False):
 58 |             if use_mask:
 59 |                 data = data[np.newaxis, :]
 60 |                 mask = np.ones(data.shape[:2], dtype=np.float32)
 61 | 
 62 |                 f = process_fn(data, mask)[0]
 63 |             else:
 64 |                 f = process_fn(data)
 65 |             feats.append(f)
 66 | 
 67 |         feats = np.concatenate(feats)
 68 |         feat_file = os.path.join(dest_dir, ds.name + extension)
 69 |         np.save(feat_file, feats)
 70 |         feature_files.append(feat_file)
 71 | 
 72 |     return feature_files
 73 | 
 74 | 
 75 | def create_optimiser(optimiser):
 76 |     """
 77 |     Creates a function that returns an optimiser and (optional) a learn
 78 |     rate schedule
 79 |     """
 80 | 
 81 |     if optimiser['schedule'] is not None:
 82 |         # if we have a learn rate schedule, create a theano shared
 83 |         # variable and a corresponding update
 84 |         lr = theano.shared(np.float32(optimiser['params']['learning_rate']))
 85 | 
 86 |         # create a copy of the optimiser config dict so we do not change
 87 |         # it
 88 |         from copy import deepcopy
 89 |         optimiser = deepcopy(optimiser)
 90 |         optimiser['params']['learning_rate'] = lr
 91 |         lrs = nn.LearnRateSchedule(learning_rate=lr, **optimiser['schedule'])
 92 |     else:
 93 |         lrs = None
 94 | 
 95 |     return partial(getattr(lnn.updates, optimiser['name']),
 96 |                    **optimiser['params']), lrs
 97 | 
 98 | 
 99 | def rhash(d):
100 |     """
101 |     Coputes the recursive hash of a dictionary
102 |     :param d:  dictionary to hash
103 |     :return:   hash of dictionary
104 |     """
105 |     m = hashlib.sha1()
106 | 
107 |     if isinstance(d, dict):
108 |         for _, value in sorted(d.items(), key=lambda (k, v): k):
109 |             m.update(rhash(value))
110 |     else:
111 |         m.update(str(d))
112 | 
113 |     return m.hexdigest()
114 | 
115 | 
116 | def fhash(filename):
117 |     """
118 |     Computes the hash of a file
119 |     :param filename: file to hash
120 |     :return:         hash value of file
121 |     """
122 |     md5 = hashlib.md5()
123 |     with open(filename, 'rb') as f:
124 |         # this needs an empty *byte* string b'' as a sentinel value
125 |         for chunk in iter(lambda: f.read(128 * md5.block_size), b''):
126 |             md5.update(chunk)
127 |     return md5.hexdigest()
128 | 
129 | 
130 | class PickleAndSymlinkObserver(RunObserver):
131 | 
132 |     def __init__(self):
133 |         self.config = None
134 |         self.run = None
135 |         self._hash = None
136 | 
137 |     def started_event(self, ex_info, host_info, start_time, config, comment):
138 |         self.config = config
139 | 
140 |         # remember the *exact* configuration used for this run
141 |         config_file = os.path.join(self.config_path(), 'config.yaml')
142 |         with open(config_file, 'w') as f:
143 |             f.write(yaml.dump(self.config))
144 | 
145 |         self.run = {
146 |             'ex_info': ex_info,
147 |             'host_info': host_info,
148 |             'start_time': start_time,
149 |             'comment': comment
150 |         }
151 | 
152 |     def hash(self):
153 |         if self._hash is None:
154 |             self._hash = rhash(self.config)
155 | 
156 |         return self._hash
157 | 
158 |     def config_path(self):
159 |         if self.config is None:
160 |             raise RuntimeError('tried to get a path without a configuration!')
161 | 
162 |         config_save_path = os.path.join(self.config['observations'],
163 |                                         self.hash())
164 | 
165 |         if not os.path.exists(config_save_path):
166 |             os.makedirs(os.path.join(config_save_path, 'resources'))
167 |             os.makedirs(os.path.join(config_save_path, 'artifacts'))
168 |         return config_save_path
169 | 
170 |     def heartbeat_event(self, info, captured_out, beat_time):
171 |         self.run['info'] = info
172 |         self.run['captured_out'] = captured_out
173 |         self.run['beat_time'] = beat_time
174 | 
175 |     def completed_event(self, stop_time, result):
176 |         run_file = os.path.join(self.config_path(), 'completed.pkl')
177 |         with open(run_file, 'w') as f:
178 |             pickle.dump(self.run, f)
179 | 
180 |     def interrupted_event(self, interrupt_time):
181 |         self.run['interrupt_time'] = interrupt_time
182 |         interrupted_file = os.path.join(self.config_path(), 'interrupted.pkl')
183 |         with open(interrupted_file, 'w') as f:
184 |             pickle.dump(self.run, f)
185 | 
186 |     def failed_event(self, fail_time, fail_trace):
187 |         self.run['fail_time'] = fail_time
188 |         self.run['fail_trace'] = fail_trace
189 | 
190 |         fail_file = os.path.join(self.config_path(), 'failed.pkl')
191 |         with open(fail_file, 'w') as f:
192 |             pickle.dump(self.run, f)
193 | 
194 |         fail_file = os.path.join(self.config_path(), 'failed_trace.txt')
195 |         with open(fail_file, 'w') as f:
196 |             f.write(''.join(fail_trace))
197 | 
198 |     def resource_event(self, filename):
199 |         """
200 |         link a used file (this is where we could have distributed storage)...
201 |         """
202 |         linkname = os.path.join(self.config_path(), 'resources',
203 |                                 fhash(filename))
204 |         if not os.path.exists(linkname):
205 |             os.symlink(filename, linkname)
206 | 
207 |     def artifact_event(self, filename):
208 |         """
209 |         move an artifact from a temporary space to the actual observations
210 |         directory for this run
211 |         """
212 |         newname = os.path.join(self.config_path(), 'artifacts',
213 |                                os.path.basename(filename))
214 |         shutil.move(filename, newname)
215 | 
216 |     def get_artifact_path(self, path):
217 |         return os.path.join(self.config_path(), 'artifacts', path)
218 | 
219 | 
220 | class ParamSaver:
221 | 
222 |     def __init__(self, ex, net, tmp_dir):
223 |         self.ex = ex
224 |         self.tmp_dir = tmp_dir
225 |         self.net = net
226 | 
227 |     def __call__(self, epoch):
228 |         fn = os.path.join(self.tmp_dir, 'params_{}.pkl'.format(epoch))
229 |         self.net.save_parameters(fn)
230 |         self.ex.add_artifact(fn)
231 | 
232 | 
233 | def setup(name):
234 |     ex = Experiment(name)
235 |     ex.observers.append(PickleAndSymlinkObserver())
236 |     data.add_sacred_config(ex)
237 |     features.add_sacred_config(ex)
238 |     targets.add_sacred_config(ex)
239 |     augmenters.add_sacred_config(ex)
240 |     return ex
241 | 
242 | 
243 | 


--------------------------------------------------------------------------------
/chordrec/features.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import madmom as mm
  3 | import pickle
  4 | 
  5 | 
  6 | class ConstantQ:
  7 | 
  8 |     def __init__(self, num_bands, fmin, num_octaves, fps, align, log_div,
  9 |                  sample_rate=44100, fold=None):
 10 | 
 11 |         self.fps = fps
 12 |         self.num_bands = num_bands
 13 |         self.align = align
 14 |         self.fmin = fmin
 15 |         self.num_octaves = num_octaves
 16 |         self.log_div = log_div
 17 | 
 18 |         self.sample_rate = sample_rate
 19 | 
 20 |         from yaafelib import FeaturePlan, Engine
 21 | 
 22 |         fp = FeaturePlan(sample_rate=sample_rate)
 23 | 
 24 |         cqt_config = " ".join(['cqt: CQT',
 25 |                                'CQTAlign={}'.format(align),
 26 |                                'CQTBinsPerOctave={}'.format(num_bands),
 27 |                                'CQTMinFreq={}'.format(fmin),
 28 |                                'CQTNbOctaves={}'.format(num_octaves),
 29 |                                'stepSize={}'.format(sample_rate / fps)
 30 |                                ])
 31 | 
 32 |         fp.addFeature(cqt_config)
 33 | 
 34 |         df = fp.getDataFlow()
 35 |         self.engine = Engine()
 36 |         self.engine.load(df)
 37 | 
 38 |     @property
 39 |     def name(self):
 40 |         return 'cqt_fps={}_num-bands={}_align={}_fmin={}_num_oct={}'\
 41 |                '_logdiv={}'.format(self.fps, self.num_bands, self.align,
 42 |                                    self.fmin, self.num_octaves, self.log_div)
 43 | 
 44 |     def __call__(self, audio_file):
 45 | 
 46 |         audio = mm.audio.signal.Signal(audio_file,
 47 |                                        sample_rate=self.sample_rate,
 48 |                                        num_channels=1).astype(np.float64)
 49 | 
 50 |         cqt = self.engine.processAudio(audio.reshape((1, -1)))['cqt']
 51 |         # compensate for different padding in madmom vs. yaafe and convert
 52 |         # to float32
 53 |         cqt = np.vstack((cqt, np.zeros(cqt.shape[1:]))).astype(np.float32)
 54 | 
 55 |         if self.log_div:
 56 |             return np.log(cqt / self.log_div + 1)
 57 |         else:
 58 |             return cqt
 59 | 
 60 | 
 61 | class LogFiltSpec:
 62 | 
 63 |     def __init__(self, frame_sizes, num_bands, fmin, fmax, fps, unique_filters,
 64 |                  sample_rate=44100, fold=None):
 65 | 
 66 |         self.frame_sizes = frame_sizes
 67 |         self.num_bands = num_bands
 68 |         self.fmax = fmax
 69 |         self.fmin = fmin
 70 |         self.fps = fps
 71 |         self.unique_filters = unique_filters
 72 |         self.sample_rate = sample_rate
 73 | 
 74 |     @property
 75 |     def name(self):
 76 |         return 'lfs_fps={}_num-bands={}_fmin={}_fmax={}_frame_sizes=[{}]'.format(
 77 |                 self.fps, self.num_bands, self.fmin, self.fmax,
 78 |                 '-'.join(map(str, self.frame_sizes))
 79 |         ) + ('_uf' if self.unique_filters else '')
 80 | 
 81 |     def __call__(self, audio_file):
 82 |         # do not resample because ffmpeg/avconv creates terrible sampling
 83 |         # artifacts
 84 |         specs = [
 85 |             mm.audio.spectrogram.LogarithmicFilteredSpectrogram(
 86 |                 audio_file, num_channels=1, sample_rate=self.sample_rate,
 87 |                 fps=self.fps, frame_size=ffts,
 88 |                 num_bands=self.num_bands, fmin=self.fmin, fmax=self.fmax,
 89 |                 unique_filters=self.unique_filters)
 90 |             for ffts in self.frame_sizes
 91 |         ]
 92 | 
 93 |         return np.hstack(specs).astype(np.float32)
 94 | 
 95 | 
 96 | class Chroma:
 97 | 
 98 |     def __init__(self, frame_size, fmax, fps, oct_width, center_note, log_eta,
 99 |                  sample_rate=44100, fold=None):
100 |         self.fps = fps
101 |         self.fmax = fmax
102 |         self.sample_rate = sample_rate
103 |         self.oct_width = oct_width
104 |         self.center_note = center_note
105 |         self.frame_size = frame_size
106 |         self.log_eta = log_eta
107 | 
108 |         # parameters are based on Cho and Bello, 2014.
109 |         import librosa
110 |         ctroct = (librosa.hz_to_octs(librosa.note_to_hz(center_note))
111 |                   if center_note is not None else None)
112 | 
113 |         self.filterbank = librosa.filters.chroma(
114 |             sr=sample_rate, n_fft=frame_size, octwidth=oct_width,
115 |             ctroct=ctroct).T[:-1]
116 | 
117 |         # mask out everything above fmax
118 |         from bottleneck import move_mean
119 |         m = np.fft.fftfreq(
120 |             frame_size, 1. / sample_rate)[:frame_size / 2] < fmax
121 |         mask_smooth = move_mean(m, window=10, min_count=1)
122 |         self.filterbank *= mask_smooth[:, np.newaxis]
123 | 
124 |     @property
125 |     def name(self):
126 |         if self.oct_width is not None:
127 |             gauss_str = '_octwidth={:g}_cnote={}'.format(self.oct_width,
128 |                                                          self.center_note)
129 |         else:
130 |             gauss_str = ''
131 | 
132 |         if self.log_eta is not None:
133 |             log_str = '_log={}'.format(self.log_eta)
134 |         else:
135 |             log_str = ''
136 | 
137 |         return 'chroma_fps={}_fmax={}_frame_size={}'.format(
138 |             self.fps, self.fmax, self.frame_size) + gauss_str + log_str
139 | 
140 |     def __call__(self, audio_file):
141 |         spec = mm.audio.spectrogram.Spectrogram(
142 |             audio_file, num_channels=1, sample_rate=self.sample_rate,
143 |             fps=self.fps, frame_size=4096,
144 |         )
145 | 
146 |         if self.log_eta is not None:
147 |             spec = np.log(self.log_eta * spec / spec.max() + 1)
148 | 
149 |         chroma = np.dot(spec, self.filterbank)
150 |         norm = np.sqrt(np.sum(chroma ** 2, axis=1))
151 |         norm[norm < 1e-20] = 1.
152 |         return (chroma / norm[:, np.newaxis]).astype(np.float32)
153 | 
154 | 
155 | class ChromaCq:
156 | 
157 |     def __init__(self, fps, win_center, win_width, log_eta,
158 |                  sample_rate=44100, fold=None):
159 |         """
160 |         Computes Chromas from a constant q transform.
161 |         :param fps:          frames per second
162 |         :param win_center:   midi number of window center note
163 |         :param win_width:    width of weighting window
164 |         :param log_eta:      scaling parameter for log
165 |         :param sample_rate:  sample rate of the audio
166 |         """
167 |         self.fps = fps
168 |         self.sample_rate = sample_rate
169 |         self.num_bins = 84
170 |         self.log_eta = log_eta
171 | 
172 |         if win_center is None:
173 |             self.win = None
174 |             self.win_center = None
175 |             self.win_width = None
176 |         else:
177 |             # cq spec starts at C1, which is midi pitch 24. the zeroth bin thus
178 |             # corresponds to midi note 24, and we have to adjust win_center
179 |             self.win_center = float(win_center - 24)
180 |             self.win_width = float(win_width)
181 |             self.win = np.exp(
182 |                 -0.5 * ((self.win_center - np.arange(self.num_bins)) /
183 |                         self.win_width) ** 2
184 |             )
185 | 
186 |     @property
187 |     def name(self):
188 |         if self.win is not None:
189 |             win_str = '_winc={}_winw={}'.format(self.win_center,
190 |                                                 self.win_width)
191 |         else:
192 |             win_str = ''
193 | 
194 |         log_str = '_log_eta={}'.format(self.log_eta) if self.log_eta else ''
195 |         return 'chroma_cq_fps={}'.format(self.fps) + win_str + log_str
196 | 
197 |     def __call__(self, audio_file):
198 |         import librosa
199 |         y = mm.audio.signal.Signal(audio_file, num_channels=1,
200 |                                    sample_rate=self.sample_rate)
201 | 
202 |         cq = librosa.core.cqt(y, sr=y.sample_rate, tuning=0,
203 |                               fmin=mm.audio.filters.midi2hz(24),
204 |                               n_bins=self.num_bins,
205 |                               hop_length=int(self.sample_rate / self.fps))
206 | 
207 |         if self.log_eta is not None:
208 |             cq = np.log(self.log_eta * cq / cq.max() + 1)
209 | 
210 |         if self.win is not None:
211 |             cq *= self.win[:, np.newaxis]
212 | 
213 |         return librosa.feature.chroma_cqt(y=None, C=cq, tuning=0,
214 |                                           norm=2).T.astype(np.float32)
215 | 
216 | 
217 | class HarmonicPitchClassProfile:
218 | 
219 |     def __init__(self, fps, frame_size, fmax, num_bands,
220 |                  sample_rate=44100, fold=None):
221 |         self.fps = fps
222 |         self.frame_size = frame_size
223 |         self.fmax = fmax
224 |         self.sample_rate = sample_rate
225 |         self.num_bands = num_bands
226 | 
227 |     @property
228 |     def name(self):
229 |         return 'hpcp_fps={}_fmax={}_nbands={}_frame_size={}'.format(
230 |             self.fps, self.fmax, self.num_bands, self.frame_size
231 |         )
232 | 
233 |     def __call__(self, audio_file):
234 |         from madmom.audio import chroma
235 | 
236 |         hpcp = chroma.HarmonicPitchClassProfile(
237 |             audio_file, fps=self.fps, fmax=self.fmax,
238 |             num_classes=self.num_bands, sample_rate=self.sample_rate
239 |         )
240 | 
241 |         norm = np.sqrt(np.sum(hpcp ** 2, axis=1))
242 |         norm[norm < 1e-20] = 1.
243 |         return (hpcp / norm[:, np.newaxis]).astype(np.float32)
244 | 
245 | 
246 | class DeepChroma:
247 | 
248 |     def __init__(self, fps, fmin=65, fmax=2100, unique_filters=True,
249 |                  models=None, sample_rate=44100, fold=None):
250 |         assert fps == 10, 'Cannot handle fps different from 10 yet.'
251 |         from madmom.audio.chroma import DeepChromaProcessor
252 |         from hashlib import sha1
253 |         self.fps = fps
254 |         self.fmin = fmin
255 |         self.fmax = fmax
256 |         self.unique_filters = unique_filters
257 |         self.dcp = DeepChromaProcessor(
258 |             fmin=fmin, fmax=fmax, unique_filters=unique_filters, models=models
259 |         )
260 |         self.model_hash = sha1(pickle.dumps(self.dcp)).hexdigest()
261 | 
262 |     @property
263 |     def name(self):
264 |         return 'deepchroma_fps={}_fmin={}_fmax={}_uf={}_mdlhsh={}'.format(
265 |             self.fps, self.fmin, self.fmax, self.unique_filters,
266 |             self.model_hash
267 |         )
268 | 
269 |     def __call__(self, audio_file):
270 |         return self.dcp(audio_file)
271 | 
272 | 
273 | class PrecomputedFeature:
274 | 
275 |     def __init__(self, name, fps, fold):
276 |         self._name = name
277 |         self.fps = fps
278 |         self.fold = fold
279 | 
280 |     @property
281 |     def name(self):
282 |         return self._name.format(fps=self.fps, fold=self.fold)
283 | 
284 |     def __call__(self, audio_file):
285 |         raise NotImplementedError(
286 |             'Cannot compute features for {}. '
287 |             'This feature is only precomputed!'.format(audio_file))
288 | 
289 | 
290 | def add_sacred_config(ex):
291 |     ex.add_named_config(
292 |         'constant_q',
293 |         feature_extractor=dict(
294 |             name='ConstantQ',
295 |             params=dict(
296 |                 fps=10,
297 |                 num_bands=24,
298 |                 fmin=30,
299 |                 num_octaves=8,
300 |                 log_div=500.,
301 |                 align='c'
302 |             )
303 |         )
304 |     )
305 | 
306 |     ex.add_named_config(
307 |         'log_filt_spec',
308 |         feature_extractor=dict(
309 |             name='LogFiltSpec',
310 |             params=dict(
311 |                 fps=10,
312 |                 frame_sizes=[8192],
313 |                 num_bands=24,
314 |                 fmin=65,
315 |                 fmax=2100,
316 |                 unique_filters=True,
317 |             )
318 |         )
319 |     )
320 | 
321 |     ex.add_named_config(
322 |         'chroma_clp',
323 |         feature_extractor=dict(
324 |             name='PrecomputedFeature',
325 |             params=dict(
326 |                 name='chroma_clp_fps={fps}',
327 |                 fps=10,
328 |             )
329 |         )
330 |     )
331 | 
332 |     ex.add_named_config(
333 |         'perfect_chroma',
334 |         feature_extractor=dict(
335 |             name='PrecomputedFeature',
336 |             params=dict(
337 |                 name='perfect_chroma_fps={fps}',
338 |                 fps=10
339 |             )
340 |         )
341 |     )
342 | 
343 |     ex.add_named_config(
344 |         'gap_feature',
345 |         feature_extractor=dict(
346 |             name='PrecomputedFeature',
347 |             params=dict(
348 |                 name='gap_feature/features_fold_{fold}',
349 |                 fps=10,
350 |             )
351 |         )
352 |     )
353 | 
354 |     ex.add_named_config(
355 |         'deep_chroma_pc',
356 |         feature_extractor=dict(
357 |             name='PrecomputedFeature',
358 |             params=dict(
359 |                 name='deep_chroma_pc',
360 |                 fps=10
361 |             )
362 |         )
363 |     )
364 | 
365 |     ex.add_named_config(
366 |         'deep_chroma',
367 |         feature_extractor=dict(
368 |             name='DeepChroma',
369 |             params=dict(
370 |                 fps=10
371 |             )
372 |         )
373 |     )
374 | 
375 |     ex.add_named_config(
376 |         'hpcp',
377 |         feature_extractor=dict(
378 |             name='HarmonicPitchClassProfile',
379 |             params=dict(
380 |                 fps=10,
381 |                 frame_size=8192,
382 |                 fmax=5500,
383 |                 num_bands=36,
384 |             )
385 |         )
386 |     )
387 | 
388 |     ex.add_named_config(
389 |         'chroma_hpcp',
390 |         feature_extractor=dict(
391 |             name='HarmonicPitchClassProfile',
392 |             params=dict(
393 |                 fps=10,
394 |                 frame_size=8192,
395 |                 fmax=5500,
396 |                 num_bands=12,
397 |             )
398 |         )
399 |     )
400 | 
401 |     ex.add_named_config(
402 |         'chroma',
403 |         feature_extractor=dict(
404 |             name='Chroma',
405 |             params=dict(
406 |                 fps=10,
407 |                 frame_size=4096,
408 |                 fmax=5500,
409 |                 oct_width=None,
410 |                 center_note=None,
411 |                 log_eta=None
412 |             )
413 |         )
414 |     )
415 | 
416 |     ex.add_named_config(
417 |         'chroma_w_log',
418 |         feature_extractor=dict(
419 |             name='Chroma',
420 |             params=dict(
421 |                 fps=10,
422 |                 frame_size=4096,
423 |                 fmax=5500,
424 |                 oct_width=15./12,
425 |                 center_note='C4',
426 |                 log_eta=1000
427 |             )
428 |         )
429 |     )
430 | 
431 |     ex.add_named_config(
432 |         'chroma_cq',
433 |         feature_extractor=dict(
434 |             name='ChromaCq',
435 |             params=dict(
436 |                 fps=9.98641304347826086957,
437 |                 win_center=None,
438 |                 win_width=None,
439 |                 log_eta=None
440 |             )
441 |         )
442 |     )
443 | 
444 |     ex.add_named_config(
445 |         'chroma_cq_w_log',
446 |         feature_extractor=dict(
447 |             name='ChromaCq',
448 |             params=dict(
449 |                 fps=9.98641304347826086957,
450 |                 # paramters taken from Cho's paper
451 |                 win_center=60,
452 |                 win_width=15,
453 |                 log_eta=1000
454 |             )
455 |         )
456 |     )
457 | 
458 | 
459 | def create_extractor(config, fold):
460 |     return globals()[config['name']](fold=fold, **config['params'])
461 | 


--------------------------------------------------------------------------------
/chordrec/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fdlm/chordrec/1acb97e5efdd9474e7abfe4b741f94b5452499d5/chordrec/models/__init__.py


--------------------------------------------------------------------------------
/chordrec/models/avg_gap_feature.py:
--------------------------------------------------------------------------------
  1 | from dnn import *
  2 | 
  3 | 
  4 | def build_model(in_shape, out_size, model):
  5 |     network, input_var, target_var = build_net(in_shape, out_size, model)
  6 | 
  7 |     # this goes back to the nonlinearity layer of the penultimate conv layer
  8 |     # (after batchnorm!)
  9 |     feature_layer = network
 10 |     for _ in range(7):
 11 |         feature_layer = feature_layer.input_layer
 12 | 
 13 |     # average the feature maps of this conv layer
 14 |     feature_out = lnn.layers.get_output(feature_layer, deterministic=True)
 15 |     feature_out = tt.mean(feature_out, axis=(2, 3))
 16 | 
 17 |     return dict(network=network, input_var=input_var, target_var=target_var,
 18 |                 loss_fn=categorical_crossentropy, feature_out=feature_out)
 19 | 
 20 | 
 21 | def add_sacred_config(ex):
 22 |     # ======================================================= conv net with gap
 23 | 
 24 |     ex.add_named_config(
 25 |         name='gap_feature_extractor',
 26 |         datasource=dict(
 27 |             context_size=7,
 28 |         ),
 29 |         model=dict(
 30 |             type='avg_gap_feature',
 31 |             conv=dict(
 32 |                 conv1=dict(
 33 |                     num_layers=4,
 34 |                     num_filters=32,
 35 |                     filter_size=(3, 3),
 36 |                     pool_size=(1, 2),
 37 |                     dropout=0.5,
 38 |                     pad='same',
 39 |                     batch_norm=True,
 40 |                 ),
 41 |                 conv2=dict(
 42 |                     num_layers=2,
 43 |                     num_filters=64,
 44 |                     filter_size=(3, 3),
 45 |                     pool_size=(1, 2),
 46 |                     dropout=0.5,
 47 |                     pad='valid',
 48 |                     batch_norm=True,
 49 |                 ),
 50 |                 conv3=dict(
 51 |                     num_layers=1,
 52 |                     num_filters=128,
 53 |                     filter_size=(9, 12),
 54 |                     pool_size=None,
 55 |                     dropout=0.5,
 56 |                     pad='valid',
 57 |                     batch_norm=True
 58 |                 )
 59 |             ),
 60 |             gap=dict(
 61 |                 batch_norm=True,
 62 |                 gap_nonlinearity='linear',
 63 |             ),
 64 |             out_nonlinearity='softmax'
 65 |         ),
 66 |         optimiser=dict(
 67 |             name='adam',
 68 |             params=dict(
 69 |                 learning_rate=0.001
 70 |             ),
 71 |             schedule=None
 72 |         ),
 73 |         training=dict(
 74 |             num_epochs=500,
 75 |             early_stop=5,
 76 |             early_stop_acc=True,
 77 |             batch_size=512,
 78 |         ),
 79 |         regularisation=dict(
 80 |             l2=1e-7,
 81 |             l1=0
 82 |         ),
 83 |         testing=dict(
 84 |             test_on_val=False,
 85 |             batch_size=512
 86 |         )
 87 |     )
 88 | 
 89 |     ex.add_named_config(
 90 |         name='gap_feature_extractor_mm_2016',
 91 |         datasource=dict(
 92 |             context_size=11,
 93 |         ),
 94 |         model=dict(
 95 |             type='avg_gap_feature',
 96 |             conv=dict(
 97 |                 conv1=dict(
 98 |                     num_layers=4,
 99 |                     num_filters=32,
100 |                     filter_size=(3, 3),
101 |                     pool_size=(1, 2),
102 |                     dropout=0.5,
103 |                     pad='valid',
104 |                     batch_norm=True,
105 |                 ),
106 |                 conv2=dict(
107 |                     num_layers=2,
108 |                     num_filters=64,
109 |                     filter_size=(3, 3),
110 |                     pool_size=(1, 2),
111 |                     dropout=0.5,
112 |                     pad='valid',
113 |                     batch_norm=True,
114 |                 ),
115 |                 conv3=dict(
116 |                     num_layers=1,
117 |                     num_filters=128,
118 |                     filter_size=(9, 12),
119 |                     pool_size=None,
120 |                     dropout=0.5,
121 |                     pad='valid',
122 |                     batch_norm=True
123 |                 )
124 |             ),
125 |             gap=dict(
126 |                 batch_norm=True,
127 |                 gap_nonlinearity='linear',
128 |             ),
129 |             out_nonlinearity='softmax'
130 |         ),
131 |         optimiser=dict(
132 |             name='adam',
133 |             params=dict(
134 |                 learning_rate=0.001
135 |             ),
136 |             schedule=None
137 |         ),
138 |         training=dict(
139 |             num_epochs=500,
140 |             early_stop=5,
141 |             early_stop_acc=True,
142 |             batch_size=512,
143 |         ),
144 |         regularisation=dict(
145 |             l2=1e-7,
146 |             l1=0
147 |         ),
148 |         testing=dict(
149 |             test_on_val=False,
150 |             batch_size=512
151 |         )
152 |     )
153 | 


--------------------------------------------------------------------------------
/chordrec/models/blocks.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import lasagne as lnn
  3 | 
  4 | 
  5 | def conv(network, batch_norm, num_layers, num_filters, filter_size, pad,
  6 |          pool_size, dropout):
  7 |     for k in range(num_layers):
  8 |         network = lnn.layers.Conv2DLayer(
  9 |             network, num_filters=num_filters,
 10 |             filter_size=filter_size,
 11 |             W=lnn.init.Orthogonal(gain=np.sqrt(2 / (1 + .1 ** 2))),
 12 |             pad=pad,
 13 |             nonlinearity=lnn.nonlinearities.rectify,
 14 |             name='Conv_{}'.format(k))
 15 |         if batch_norm:
 16 |             network = lnn.layers.batch_norm(network)
 17 | 
 18 |     if pool_size:
 19 |         network = lnn.layers.MaxPool2DLayer(network, pool_size=pool_size,
 20 |                                             name='Pool')
 21 |     if dropout > 0.0:
 22 |         network = lnn.layers.DropoutLayer(network, p=dropout)
 23 | 
 24 |     return network
 25 | 
 26 | 
 27 | def gap(network, out_size, batch_norm,
 28 |         gap_nonlinearity, out_nonlinearity):
 29 | 
 30 |     gap_nonlinearity = getattr(lnn.nonlinearities, gap_nonlinearity)
 31 |     out_nonlinearity = getattr(lnn.nonlinearities, out_nonlinearity)
 32 | 
 33 |     # output classification layer
 34 |     network = lnn.layers.Conv2DLayer(
 35 |         network, num_filters=out_size, filter_size=1,
 36 |         nonlinearity=gap_nonlinearity, name='Output_Conv')
 37 |     if batch_norm:
 38 |         network = lnn.layers.batch_norm(network)
 39 | 
 40 |     network = lnn.layers.Pool2DLayer(
 41 |         network, pool_size=network.output_shape[-2:], ignore_border=False,
 42 |         mode='average_exc_pad', name='GlobalAveragePool')
 43 |     network = lnn.layers.FlattenLayer(network, name='Flatten')
 44 | 
 45 |     network = lnn.layers.NonlinearityLayer(
 46 |         network, nonlinearity=out_nonlinearity, name='output')
 47 | 
 48 |     return network
 49 | 
 50 | 
 51 | def dense(network, batch_norm, nonlinearity, num_layers, num_units,
 52 |           dropout):
 53 | 
 54 |     nl = getattr(lnn.nonlinearities, nonlinearity)
 55 | 
 56 |     for i in range(num_layers):
 57 |         network = lnn.layers.DenseLayer(
 58 |             network, num_units=num_units, nonlinearity=nl,
 59 |             name='fc-{}'.format(i)
 60 |         )
 61 |         if batch_norm:
 62 |             network = lnn.layers.batch_norm(network)
 63 |         if dropout > 0.0:
 64 |             network = lnn.layers.DropoutLayer(network, p=dropout)
 65 | 
 66 |     return network
 67 | 
 68 | 
 69 | def recurrent(network, mask_in, num_rec_units, num_layers, dropout,
 70 |               bidirectional, nonlinearity):
 71 | 
 72 |     if nonlinearity != 'LSTM':
 73 |         nl = getattr(lnn.nonlinearities, nonlinearity)
 74 | 
 75 |         def add_layer(prev_layer, **kwargs):
 76 |             return lnn.layers.RecurrentLayer(
 77 |                 prev_layer, num_units=num_rec_units, mask_input=mask_in,
 78 |                 nonlinearity=nl,
 79 |                 W_in_to_hid=lnn.init.GlorotUniform(),
 80 |                 W_hid_to_hid=lnn.init.Orthogonal(gain=np.sqrt(2) / 2),
 81 |                 **kwargs)
 82 | 
 83 |     else:
 84 |         def add_layer(prev_layer, **kwargs):
 85 |             return lnn.layers.LSTMLayer(
 86 |                 prev_layer, num_units=num_rec_units, mask_input=mask_in,
 87 |                 **kwargs
 88 |             )
 89 | 
 90 |     fwd = network
 91 |     for i in range(num_layers):
 92 |         fwd = add_layer(fwd, name='rec_fwd_{}'.format(i))
 93 |         if dropout > 0.:
 94 |             fwd = lnn.layers.DropoutLayer(fwd, p=dropout)
 95 | 
 96 |     if not bidirectional:
 97 |         return network
 98 | 
 99 |     bck = network
100 |     for i in range(num_layers):
101 |         bck = add_layer(bck, name='rec_bck_{}'.format(i), backwards=True)
102 |         if dropout > 0:
103 |             bck = lnn.layers.DropoutLayer(bck, p=dropout)
104 | 
105 |     # combine the forward and backward recurrent layers...
106 |     network = lnn.layers.ConcatLayer([fwd, bck], name='fwd + bck', axis=-1)
107 |     return network
108 | 
109 | 


--------------------------------------------------------------------------------
/chordrec/models/chroma_dnn.py:
--------------------------------------------------------------------------------
  1 | import theano.tensor as tt
  2 | import lasagne as lnn
  3 | 
  4 | from . import dnn
  5 | 
  6 | 
  7 | def compute_loss(prediction, target):
  8 |     # need to clip predictions for numerical stability
  9 |     eps = 1e-7
 10 |     pred_clip = tt.clip(prediction, eps, 1.-eps)
 11 |     return lnn.objectives.binary_crossentropy(pred_clip, target).mean()
 12 | 
 13 | 
 14 | def build_net(in_shape, out_size_chroma, out_size, model):
 15 |     # first, stack the dnn chroma extractor
 16 |     chroma_network, input_var, crm_target_var = dnn.build_net(
 17 |         in_shape, out_size_chroma, model
 18 |     )
 19 | 
 20 |     # then, add the logistic regression chord classifier
 21 |     crd_target_var = tt.matrix('target_output', dtype='float32')
 22 | 
 23 |     chord_network = lnn.layers.DenseLayer(
 24 |         chroma_network, name='chords', num_units=out_size,
 25 |         nonlinearity=lnn.nonlinearities.softmax)
 26 | 
 27 |     # tag chord classification parameters so we can distinguish them later
 28 |     for p in chord_network.get_params():
 29 |         chord_network.params[p].add('chord')
 30 | 
 31 |     return (chroma_network, chord_network,
 32 |             input_var, crm_target_var, crd_target_var)
 33 | 
 34 | 
 35 | def build_model(in_shape, out_size_chroma, out_size, model):
 36 |     (crm, crd, inv, crmv, crdv) = build_net(in_shape, out_size_chroma,
 37 |                                             out_size, model)
 38 |     return dict(chroma_network=crm, chord_network=crd,
 39 |                 input_var=inv, chroma_target_var=crmv, chord_target_var=crdv,
 40 |                 chroma_loss_fn=compute_loss,
 41 |                 chord_loss_fn=dnn.categorical_crossentropy)
 42 | 
 43 | 
 44 | create_iterators = dnn.create_iterators
 45 | 
 46 | 
 47 | def add_sacred_config(ex):
 48 | 
 49 |     # =============================================================== dense net
 50 | 
 51 |     ex.add_named_config(
 52 |         name='dense_net',
 53 |         datasource=dict(
 54 |             context_size=7,
 55 |         ),
 56 |         chroma_network=dict(
 57 |             model=dict(
 58 |                 type='chroma_dnn',
 59 |                 dense=dict(
 60 |                     num_layers=3,
 61 |                     num_units=512,
 62 |                     dropout=0.5,
 63 |                     nonlinearity='rectify',
 64 |                     batch_norm=False,
 65 |                 ),
 66 |                 out_nonlinearity='sigmoid'
 67 |             ),
 68 |             optimiser=dict(
 69 |                 name='adam',
 70 |                 params=dict(
 71 |                     learning_rate=0.0001
 72 |                 ),
 73 |                 schedule=None
 74 |             ),
 75 |             training=dict(
 76 |                 iterator='BatchIterator',
 77 |                 batch_size=512,
 78 |                 num_epochs=500,
 79 |                 early_stop=20,
 80 |                 early_stop_acc=False,
 81 |             ),
 82 |             regularisation=dict(
 83 |                 l2=1e-4,
 84 |                 l1=0.0,
 85 |             ),
 86 |         ),
 87 |         optimiser=dict(
 88 |             name='adam',
 89 |             params=dict(
 90 |                 learning_rate=0.001
 91 |             ),
 92 |             schedule=None
 93 |         ),
 94 |         training=dict(
 95 |             iterator='BatchIterator',
 96 |             batch_size=512,
 97 |             num_epochs=500,
 98 |             early_stop=20,
 99 |             early_stop_acc=True
100 |         ),
101 |         regularisation=dict(
102 |             l2=1e-4,
103 |             l1=0.0,
104 |         ),
105 |         testing=dict(
106 |             test_on_val=False,
107 |             batch_size=None
108 |         )
109 |     )
110 | 
111 | 


--------------------------------------------------------------------------------
/chordrec/models/crf.py:
--------------------------------------------------------------------------------
  1 | import theano.tensor as tt
  2 | 
  3 | import dmgr
  4 | import lasagne as lnn
  5 | import spaghetti as spg
  6 | 
  7 | from .. import augmenters
  8 | 
  9 | 
 10 | class CrfLoss:
 11 | 
 12 |     def __init__(self, crf):
 13 |         self.crf = crf
 14 | 
 15 |     def __call__(self, prediction, target, mask):
 16 |         loss = spg.objectives.neg_log_likelihood(self.crf, target, mask)
 17 |         loss /= mask.sum(axis=1)  # normalise to sequence length
 18 |         return lnn.objectives.aggregate(loss, mode='mean')
 19 | 
 20 | 
 21 | def build_net(in_shape, out_size, model):
 22 |     # input variables
 23 |     input_var = (tt.tensor4('input', dtype='float32')
 24 |                  if len(in_shape) > 1 else
 25 |                  tt.tensor3('input', dtype='float32'))
 26 |     target_var = tt.tensor3('target_output', dtype='float32')
 27 |     mask_var = tt.matrix('mask_input', dtype='float32')
 28 | 
 29 |     # stack more layers
 30 |     network = lnn.layers.InputLayer(
 31 |         name='input', shape=(None, None) + in_shape,
 32 |         input_var=input_var
 33 |     )
 34 | 
 35 |     mask_in = lnn.layers.InputLayer(name='mask',
 36 |                                     input_var=mask_var,
 37 |                                     shape=(None, None))
 38 | 
 39 |     network = spg.layers.CrfLayer(
 40 |         network, mask_input=mask_in, num_states=out_size, name='CRF')
 41 | 
 42 |     return network, input_var, target_var, mask_var
 43 | 
 44 | 
 45 | def build_model(in_shape, out_size, model):
 46 |     network, input_var, target_var, mask_var = build_net(in_shape, out_size,
 47 |                                                          model)
 48 |     loss_fn = CrfLoss(network)
 49 |     return dict(network=network, input_var=input_var, target_var=target_var,
 50 |                 mask_var=mask_var, loss_fn=loss_fn)
 51 | 
 52 | 
 53 | def create_iterators(train_set, val_set, training, augmentation):
 54 |     train_batches = dmgr.iterators.SequenceIterator(
 55 |         train_set, training['batch_size'], randomise=True,
 56 |         expand=True, max_seq_len=training['max_seq_len']
 57 |     )
 58 | 
 59 |     val_batches = dmgr.iterators.SequenceIterator(
 60 |         val_set, training['batch_size'], randomise=False,
 61 |         expand=False
 62 |     )
 63 | 
 64 |     if augmentation is not None:
 65 |         train_batches = dmgr.iterators.AugmentedIterator(
 66 |             train_batches, *augmenters.create_augmenters(augmentation)
 67 |         )
 68 | 
 69 |     return train_batches, val_batches
 70 | 
 71 | 
 72 | def add_sacred_config(ex):
 73 |     ex.add_named_config(
 74 |         name='crf',
 75 |         datasource=dict(
 76 |             context_size=0,
 77 |         ),
 78 |         model=dict(
 79 |             type='crf'
 80 |         ),
 81 |         optimiser=dict(
 82 |             name='adam',
 83 |             params=dict(
 84 |                 learning_rate=0.01
 85 |             ),
 86 |             schedule=None
 87 |         ),
 88 |         training=dict(
 89 |             batch_size=32,
 90 |             max_seq_len=1024,
 91 |             num_epochs=500,
 92 |             early_stop=20,
 93 |             early_stop_acc=True,
 94 |         ),
 95 |         regularisation=dict(
 96 |             l1=1e-4,
 97 |             l2=0.0,
 98 |         ),
 99 |         testing=dict(
100 |             test_on_val=False,
101 |             batch_size=None,
102 |         )
103 |     )
104 | 


--------------------------------------------------------------------------------
/chordrec/models/dnn.py:
--------------------------------------------------------------------------------
  1 | import theano.tensor as tt
  2 | import lasagne as lnn
  3 | 
  4 | import dmgr
  5 | 
  6 | from .. import augmenters
  7 | from . import blocks
  8 | 
  9 | 
 10 | def categorical_crossentropy(prediction, target):
 11 |     # need to clip predictions for numerical stability
 12 |     eps = 1e-7
 13 |     pred_clip = tt.clip(prediction, eps, 1.-eps)
 14 |     return lnn.objectives.categorical_crossentropy(pred_clip, target).mean()
 15 | 
 16 | 
 17 | def categorical_mse(predictions, targets):
 18 |     """ Mean squared error on class targets """
 19 |     return tt.mean(
 20 |         (1.0 - predictions[tt.arange(targets.shape[0]), targets]) ** 2)
 21 | 
 22 | 
 23 | def build_net(in_shape, out_size, model):
 24 |     # input variables
 25 |     input_var = (tt.tensor3('input', dtype='float32')
 26 |                  if len(in_shape) > 1 else
 27 |                  tt.matrix('input', dtype='float32'))
 28 |     target_var = tt.matrix('target_output', dtype='float32')
 29 | 
 30 |     # stack more layers
 31 |     network = lnn.layers.InputLayer(
 32 |         name='input', shape=(None,) + in_shape, input_var=input_var)
 33 | 
 34 |     if 'conv' in model and model['conv']:
 35 |         # reshape to 1 "color" channel
 36 |         network = lnn.layers.reshape(
 37 |             network, shape=(-1, 1) + in_shape, name='reshape')
 38 | 
 39 |         for c in sorted(model['conv'].keys()):
 40 |             network = blocks.conv(network, **model['conv'][c])
 41 | 
 42 |     # no more output layer if gap is already there!
 43 |     if 'gap' in model and model['gap']:
 44 |         network = blocks.gap(network, out_size=out_size,
 45 |                              out_nonlinearity=model['out_nonlinearity'],
 46 |                              **model['gap'])
 47 |     else:
 48 |         if 'dense' in model and model['dense']:
 49 |             network = blocks.dense(network, **model['dense'])
 50 | 
 51 |         # output layer
 52 |         out_nl = getattr(lnn.nonlinearities, model['out_nonlinearity'])
 53 |         network = lnn.layers.DenseLayer(
 54 |             network, name='output', num_units=out_size,
 55 |             nonlinearity=out_nl)
 56 | 
 57 |     return network, input_var, target_var
 58 | 
 59 | 
 60 | def train_iterator(train_set, training):
 61 |     it = training.get('iterator', 'BatchIterator')
 62 | 
 63 |     if it == 'BatchIterator':
 64 |         return dmgr.iterators.BatchIterator(
 65 |             train_set, training['batch_size'], randomise=True,
 66 |             expand=True
 67 |         )
 68 |     elif it == 'ClassBalancedIterator':
 69 |         return dmgr.iterators.UniformClassIterator(
 70 |             train_set, training['batch_size']
 71 |         )
 72 |     else:
 73 |         raise ValueError('Unknown Batch Iterator: {}'.format(it))
 74 | 
 75 | 
 76 | def build_model(in_shape, out_size, model):
 77 |     network, input_var, target_var = build_net(in_shape, out_size, model)
 78 |     return dict(network=network, input_var=input_var, target_var=target_var,
 79 |                 loss_fn=categorical_crossentropy)
 80 | 
 81 | 
 82 | def create_iterators(train_set, val_set, training, augmentation):
 83 |     train_batches = train_iterator(train_set, training)
 84 |     val_batches = dmgr.iterators.BatchIterator(
 85 |         val_set, training['batch_size'], randomise=False, expand=True
 86 |     )
 87 | 
 88 |     if augmentation is not None:
 89 |         train_batches = dmgr.iterators.AugmentedIterator(
 90 |             train_batches, *augmenters.create_augmenters(augmentation)
 91 |         )
 92 | 
 93 |     return train_batches, val_batches
 94 | 
 95 | 
 96 | def add_sacred_config(ex):
 97 | 
 98 |     # =============================================================== dense net
 99 | 
100 |     ex.add_named_config(
101 |         name='dense_net',
102 |         datasource=dict(
103 |             context_size=7,
104 |         ),
105 |         model=dict(
106 |             type='dnn',
107 |             dense=dict(
108 |                 num_layers=3,
109 |                 num_units=512,
110 |                 nonlinearity='rectify',
111 |                 batch_norm=False,
112 |                 dropout=0.5,
113 |             ),
114 |             out_nonlinearity='softmax'
115 |         ),
116 |         optimiser=dict(
117 |             name='adam',
118 |             params=dict(
119 |                 learning_rate=0.0001
120 |             ),
121 |             schedule=None
122 |         ),
123 |         training=dict(
124 |             iterator='BatchIterator',
125 |             batch_size=512,
126 |             num_epochs=500,
127 |             early_stop=20,
128 |             early_stop_acc=True,
129 |         ),
130 |         regularisation=dict(
131 |             l2=1e-4,
132 |             l1=0.0,
133 |         ),
134 |         testing=dict(
135 |             test_on_val=False,
136 |             batch_size=None
137 |         )
138 |     )
139 | 
140 |     # ================================================================ conv net
141 | 
142 |     ex.add_named_config(
143 |         name='conv_net',
144 |         datasource=dict(
145 |             context_size=7,
146 |         ),
147 |         model=dict(
148 |             type='dnn',
149 |             conv=dict(
150 |                 conv1=dict(
151 |                     num_layers=4,
152 |                     num_filters=32,
153 |                     filter_size=(3, 3),
154 |                     pool_size=(1, 2),
155 |                     dropout=0.5,
156 |                     pad='same',
157 |                     batch_norm=True,
158 |                 ),
159 |                 conv2=dict(
160 |                     num_layers=2,
161 |                     num_filters=64,
162 |                     filter_size=(3, 3),
163 |                     pool_size=(1, 2),
164 |                     dropout=0.5,
165 |                     pad='valid',
166 |                     batch_norm=True,
167 |                 ),
168 |                 conv3=dict(
169 |                     num_layers=1,
170 |                     num_filters=128,
171 |                     filter_size=(9, 12),
172 |                     pool_size=None,
173 |                     dropout=0.5,
174 |                     pad='valid',
175 |                     batch_norm=True
176 |                 )
177 |             ),
178 |             out_nonlinearity='softmax'
179 |         ),
180 |         optimiser=dict(
181 |             name='adam',
182 |             params=dict(
183 |                 learning_rate=0.001
184 |             ),
185 |             schedule=None
186 |         ),
187 |         training=dict(
188 |             num_epochs=500,
189 |             early_stop=5,
190 |             early_stop_acc=True,
191 |             batch_size=512,
192 |         ),
193 |         regularisation=dict(
194 |             l2=1e-7,
195 |             l1=0
196 |         ),
197 |         testing=dict(
198 |             test_on_val=False,
199 |             batch_size=512
200 |         )
201 |     )
202 | 
203 |     @ex.named_config
204 |     def dense_classifier():
205 |         model = dict(
206 |             dense=dict(
207 |                 num_layers=1,
208 |                 num_units=512,
209 |                 dropout=0.5,
210 |                 nonlinearity='rectify',
211 |                 batch_norm=False
212 |             )
213 |         )
214 | 
215 |     @ex.named_config
216 |     def gap_classifier():
217 |         model = dict(
218 |             gap=dict(
219 |                 batch_norm=True,
220 |                 gap_nonlinearity='linear',
221 |             )
222 |         )
223 | 
224 | 
225 | 
226 | 
227 | 


--------------------------------------------------------------------------------
/chordrec/models/rnn.py:
--------------------------------------------------------------------------------
  1 | import theano.tensor as tt
  2 | 
  3 | import dmgr
  4 | import lasagne as lnn
  5 | 
  6 | from .. import augmenters
  7 | from . import blocks
  8 | 
  9 | 
 10 | def compute_loss(prediction, target, mask):
 11 |     # need to clip predictions for numerical stability
 12 |     eps = 1e-7
 13 |     pred_clip = tt.clip(prediction, eps, 1. - eps)
 14 |     loss = lnn.objectives.categorical_crossentropy(pred_clip, target)
 15 |     return lnn.objectives.aggregate(loss, mask, mode='normalized_sum')
 16 | 
 17 | 
 18 | def build_net(in_shape, out_size, model):
 19 |     # input variables
 20 |     input_var = tt.tensor3('input', dtype='float32')
 21 |     target_var = tt.tensor3('target_output', dtype='float32')
 22 |     mask_var = tt.matrix('mask_input', dtype='float32')
 23 | 
 24 |     # stack more layers
 25 |     network = lnn.layers.InputLayer(
 26 |         name='input', shape=(None, None) + in_shape,
 27 |         input_var=input_var
 28 |     )
 29 | 
 30 |     true_batch_size, true_seq_len, _ = input_var.shape
 31 | 
 32 |     mask_in = lnn.layers.InputLayer(name='mask',
 33 |                                     input_var=mask_var,
 34 |                                     shape=(None, None))
 35 | 
 36 |     network = blocks.recurrent(network, mask_in, **model['recurrent'])
 37 | 
 38 |     # In order to connect a recurrent layer to a dense layer, we need to
 39 |     # flatten the first two dimensions (our "sample dimensions"); this will
 40 |     # cause each time step of each sequence to be processed independently
 41 |     network = lnn.layers.ReshapeLayer(
 42 |         network, (-1, lnn.layers.get_output_shape(network)[-1]),
 43 |         name='reshape to single')
 44 | 
 45 |     network = lnn.layers.DenseLayer(
 46 |         network, num_units=out_size, nonlinearity=lnn.nonlinearities.softmax,
 47 |         name='output')
 48 | 
 49 |     # To reshape back to our original shape, we can use the symbolic shape
 50 |     # variables we retrieved above.
 51 |     network = lnn.layers.ReshapeLayer(
 52 |         network, (true_batch_size, true_seq_len, out_size),
 53 |         name='output-reshape')
 54 | 
 55 |     return network, input_var, target_var, mask_var
 56 | 
 57 | 
 58 | def build_model(in_shape, out_size, model):
 59 |     network, input_var, target_var, mask_var = build_net(in_shape, out_size,
 60 |                                                          model)
 61 |     return dict(network=network, input_var=input_var, target_var=target_var,
 62 |                 mask_var=mask_var, loss_fn=compute_loss)
 63 | 
 64 | 
 65 | def create_iterators(train_set, val_set, training, augmentation):
 66 |     train_batches = dmgr.iterators.SequenceIterator(
 67 |         train_set, training['batch_size'], randomise=True,
 68 |         expand=True, max_seq_len=training['max_seq_len']
 69 |     )
 70 | 
 71 |     val_batches = dmgr.iterators.SequenceIterator(
 72 |         val_set, training['batch_size'], randomise=False,
 73 |         expand=False
 74 |     )
 75 | 
 76 |     if augmentation is not None:
 77 |         train_batches = dmgr.iterators.AugmentedIterator(
 78 |             train_batches, *augmenters.create_augmenters(augmentation)
 79 |         )
 80 | 
 81 |     return train_batches, val_batches
 82 | 
 83 | 
 84 | def add_sacred_config(ex):
 85 |     ex.add_named_config(
 86 |         name='recurrent',
 87 |         model=dict(
 88 |             type='rnn',
 89 |             recurrent=dict(
 90 |                 num_rec_units=128,
 91 |                 num_layers=3,
 92 |                 dropout=0.3,
 93 |                 bidirectional=True,
 94 |                 nonlinearity='rectify'
 95 |             )
 96 |         ),
 97 |         optimiser=dict(
 98 |             name='adam',
 99 |             params=dict(
100 |                 learning_rate=0.0001
101 |             ),
102 |             schedule=None
103 |         ),
104 |         training=dict(
105 |             iterator='BatchIterator',
106 |             batch_size=8,
107 |             max_seq_len=64,
108 |             num_epochs=1000,
109 |             early_stop=20,
110 |             early_stop_acc=True,
111 |         ),
112 |         regularisation=dict(
113 |             l1=0.0,
114 |             l2=1e-4,
115 |         ),
116 |         testing=dict(
117 |             test_on_val=False
118 |         )
119 |     )
120 | 
121 |     @ex.named_config
122 |     def lstm():
123 |         net = dict(
124 |             nonlinearity='LSTM',
125 |             num_rec_units=64,
126 |         )
127 | 


--------------------------------------------------------------------------------
/chordrec/targets.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import string
  3 | import mir_eval
  4 | 
  5 | 
  6 | def one_hot(class_ids, num_classes):
  7 |     """
  8 |     Create one-hot encoding of class ids
  9 |     :param class_ids:   ids of classes to map
 10 |     :param num_classes: number of classes
 11 |     :return: one-hot encoding of class ids
 12 |     """
 13 |     oh = np.zeros((len(class_ids), num_classes), dtype=np.float32)
 14 |     oh[np.arange(len(class_ids)), class_ids] = 1
 15 | 
 16 |     # make sure one-hot encoding corresponds to class ids
 17 |     assert (oh.argmax(axis=1) == class_ids).all()
 18 |     # make sure there is only one id set per vector
 19 |     assert (oh.sum(axis=1) == 1).all()
 20 | 
 21 |     return oh
 22 | 
 23 | 
 24 | class IntervalAnnotationTarget(object):
 25 | 
 26 |     def __init__(self, fps, num_classes):
 27 |         self.fps = fps
 28 |         self.num_classes = num_classes
 29 | 
 30 |     def _annotations_to_targets(self, annotations):
 31 |         """
 32 |         Class ID of 'no chord' should always be last!
 33 |         :param annotations:
 34 |         :return:
 35 |         """
 36 |         raise NotImplementedError('Implement this')
 37 | 
 38 |     def _targets_to_annotations(self, targets):
 39 |         raise NotImplementedError('Implement this.')
 40 | 
 41 |     def _dummy_target(self):
 42 |         raise NotImplementedError('Implement this.')
 43 | 
 44 |     def __call__(self, target_file, num_frames=None):
 45 |         """
 46 |         Creates one-hot encodings from an annotation file.
 47 | 
 48 |         :param target_file: file containing time annotations
 49 |         :param num_frames:  number of frames in the audio file. if None,
 50 |                             estimate from the end of last annotation
 51 |         :return:            one-hot ground truth per frame
 52 |         """
 53 |         ann = np.loadtxt(target_file,
 54 |                          comments=None,
 55 |                          dtype=[('start', np.float),
 56 |                                 ('end', np.float),
 57 |                                 # assumes chord descriptions are
 58 |                                 # shorter than 50 characters
 59 |                                 ('label', 'S50')])
 60 | 
 61 |         if num_frames is None:
 62 |             num_frames = np.ceil(ann['end'][-1] * self.fps)
 63 | 
 64 |         # we will add a dummy class at the end and at the beginning,
 65 |         # because some annotations miss it, are not exactly aligned at the end
 66 |         # or do not start at the beginning of an audio file
 67 |         targets = np.vstack((self._dummy_target(),
 68 |                              self._annotations_to_targets(ann['label']),
 69 |                              self._dummy_target()))
 70 | 
 71 |         # add the times for the dummy events
 72 |         start = np.hstack(([-np.inf], ann['start'], ann['end'][-1]))
 73 |         end = np.hstack((ann['start'][0], ann['end'], [np.inf]))
 74 | 
 75 |         # next, we have to assign each frame a target. first, compute the
 76 |         # frame times
 77 |         frame_times = np.arange(num_frames, dtype=np.float) / self.fps
 78 | 
 79 |         # IMPORTANT: round everything to milliseconds to prevent errors caused
 80 |         # by floating point hell. Ideally, we would round everything to
 81 |         # possible *frame times*, but it is easier this way.
 82 |         start = np.round(start, decimals=3)
 83 |         end = np.round(end, decimals=3)
 84 |         frame_times = np.round(frame_times, decimals=3)
 85 | 
 86 |         target_per_frame = ((start <= frame_times[:, np.newaxis]) &
 87 |                             (frame_times[:, np.newaxis] < end))
 88 | 
 89 |         # make sure each frame is assigned to only one target vector
 90 |         assert (target_per_frame.sum(axis=1) == 1).all()
 91 | 
 92 |         # create the one hot vectors per frame
 93 |         return targets[np.nonzero(target_per_frame)[1]].astype(np.float32)
 94 | 
 95 |     def write_chord_predictions(self, filename, predictions):
 96 |         with open(filename, 'w') as f:
 97 |             f.writelines(['{:.3f}\t{:.3f}\t{}\n'.format(*p)
 98 |                           for p in self._targets_to_annotations(predictions)])
 99 | 
100 | 
101 | class ChordsMajMin(IntervalAnnotationTarget):
102 | 
103 |     def __init__(self, fps):
104 |         # 25 classes - 12 minor, 12 major, one "No Chord"
105 |         super(ChordsMajMin, self).__init__(fps, 25)
106 | 
107 |     @property
108 |     def name(self):
109 |         return 'chords_majmin_fps={}'.format(self.fps)
110 | 
111 |     def _dummy_target(self):
112 |         dt = np.zeros(self.num_classes, dtype=np.float32)
113 |         dt[-1] = 1
114 |         return dt
115 | 
116 |     def _annotations_to_targets(self, labels):
117 |         """
118 |         Maps chord annotations to 25 classes (12 major, 12 minor, 1 no chord)
119 | 
120 |         :param labels: chord labels
121 |         :return: one-hot encoding of class id per annotation
122 |         """
123 |         # first, create chord/class mapping. root note 'A' has id 0, increasing
124 |         # with each semitone. we have duplicate mappings for flat and sharp
125 |         # notes, just to be sure.
126 |         natural = zip(string.uppercase[:7], [0, 2, 3, 5, 7, 8, 10])
127 |         sharp = map(lambda v: (v[0] + '#', (v[1] + 1) % 12), natural)
128 |         flat = map(lambda v: (v[0] + 'b', (v[1] - 1) % 12), natural)
129 | 
130 |         # 'no chord' is coded as 'N'. The class ID of 'N' is 24, after all
131 |         # major and minor chords. Sometimes there is also an 'X' annotation,
132 |         # meaning that the chord cannot be properly determined on beat-lebel
133 |         # (too much going on in the audio). We will treat this also as
134 |         # 'no chord'
135 |         root_note_map = dict(natural + sharp + flat + [('N', 24), ('X', 24)])
136 | 
137 |         # then, we load the annotations, map the chords to class ids, and
138 |         # finally map class ids to a one-hot encoding. first, map the root
139 |         # notes.
140 |         chord_root_notes = [c.split(':')[0].split('/')[0] for c in labels]
141 |         chord_root_note_ids = np.array([root_note_map[crn]
142 |                                         for crn in chord_root_notes])
143 | 
144 |         # then, map the chords to major and minor. we assume chords with a
145 |         # minor third as first interval are considered minor chords,
146 |         # the rest are major chords, following MIREX, as stated in
147 |         # Taemin Cho, Juan Bello: "On the relative importance of Individual
148 |         # Components of Chord Recognition Systems"
149 | 
150 |         chord_type = [c.split(':')[1] if ':' in c else '' for c in labels]
151 | 
152 |         # we will shift the class ids for all minor notes by 12
153 |         # (num major chords)
154 |         chord_type_shift = np.array(
155 |             map(lambda x: 12 if 'min' in x or 'dim' in x else 0, chord_type)
156 |         )
157 | 
158 |         # now we can compute the final chord class id
159 |         return one_hot(chord_root_note_ids + chord_type_shift,
160 |                        self.num_classes)
161 | 
162 |     def _targets_to_annotations(self, targets):
163 |         natural = zip([0, 2, 3, 5, 7, 8, 10], string.uppercase[:7])
164 |         sharp = map(lambda v: ((v[0] + 1) % 12, v[1] + '#'), natural)
165 | 
166 |         semitone_to_label = dict(sharp + natural)
167 | 
168 |         def pred_to_label(pred):
169 |             if pred == 24:
170 |                 return 'N'
171 |             return '{}:{}'.format(semitone_to_label[pred % 12],
172 |                                   'maj' if pred < 12 else 'min')
173 | 
174 |         spf = 1. / self.fps
175 |         labels = [(i * spf, pred_to_label(p)) for i, p in enumerate(targets)]
176 | 
177 |         # join same consequtive predictions
178 |         prev_label = (None, None)
179 |         uniq_labels = []
180 | 
181 |         for label in labels:
182 |             if label[1] != prev_label[1]:
183 |                 uniq_labels.append(label)
184 |                 prev_label = label
185 | 
186 |         # end time of last label is one frame duration after
187 |         # the last prediction time
188 |         start_times, chord_labels = zip(*uniq_labels)
189 |         end_times = start_times[1:] + (labels[-1][0] + spf,)
190 | 
191 |         return zip(start_times, end_times, chord_labels)
192 | 
193 | 
194 | class ChordsRoot(IntervalAnnotationTarget):
195 | 
196 |     def __init__(self, fps):
197 |         # 13 classes - 12 semitones and "no chord"
198 |         super(ChordsRoot, self).__init__(fps, 13)
199 | 
200 |     @property
201 |     def name(self):
202 |         return 'chords_root_fps={}'.format(self.fps)
203 | 
204 |     def _dummy_target(self):
205 |         dt = np.zeros(self.num_classes, dtype=np.float32)
206 |         dt[-1] = 1
207 |         return dt
208 | 
209 |     def _annotations_to_targets(self, labels):
210 |         """
211 |         Maps chord annotations to 13 classes (12 root tones, 1 no chord)
212 | 
213 |         :param labels: chord label
214 |         :return: class id per annotation
215 |         """
216 |         # first, create chord/class mapping. root note 'A' has id 0, increasing
217 |         # with each semitone. we have duplicate mappings for flat and sharp
218 |         # notes, just to be sure.
219 |         natural = zip(string.uppercase[:7], [0, 2, 3, 5, 7, 8, 10])
220 |         sharp = map(lambda v: (v[0] + '#', (v[1] + 1) % 12), natural)
221 |         flat = map(lambda v: (v[0] + 'b', (v[1] - 1) % 12), natural)
222 | 
223 |         # 'no chord' is coded as 'N'. The class ID of 'N' is 12, after all
224 |         # root notes. Sometimes there is also an 'X' annotation,
225 |         # meaning that the chord cannot be properly determined on beat-lebel
226 |         # (too much going on in the audio). We will treat this also as
227 |         # 'no chord'
228 |         root_note_map = dict(natural + sharp + flat + [('N', 12), ('X', 12)])
229 | 
230 |         # then, we load the annotations, map the chords to class ids, and
231 |         # finally map class ids to a one-hot encoding. first, map the root
232 |         # notes.
233 |         chord_root_notes = [c.split(':')[0].split('/')[0] for c in labels]
234 |         chord_root_note_ids = np.array([root_note_map[crn]
235 |                                         for crn in chord_root_notes])
236 | 
237 |         return one_hot(chord_root_note_ids, self.num_classes)
238 | 
239 |     def _targets_to_annotations(self, targets):
240 |         natural = zip([0, 2, 3, 5, 7, 8, 10], string.uppercase[:7])
241 |         sharp = map(lambda v: ((v[0] + 1) % 12, v[1] + '#'), natural)
242 | 
243 |         semitone_to_label = dict(sharp + natural + [(12, 'N')])
244 |         spf = 1. / self.fps
245 |         labels = [(i * spf, semitone_to_label[p])
246 |                   for i, p in enumerate(targets)]
247 | 
248 |         # join same consequtive predictions
249 |         prev_label = (None, None)
250 |         uniq_labels = []
251 | 
252 |         for label in labels:
253 |             if label[1] != prev_label[1]:
254 |                 uniq_labels.append(label)
255 |                 prev_label = label
256 | 
257 |         # end time of last label is one frame duration after
258 |         # the last prediction time
259 |         start_times, chord_labels = zip(*uniq_labels)
260 |         end_times = start_times[1:] + (labels[-1][0] + spf,)
261 | 
262 |         return zip(start_times, end_times, chord_labels)
263 | 
264 | 
265 | class ChordsMajMinSevenths(IntervalAnnotationTarget):
266 | 
267 |     def __init__(self, fps):
268 |         # 73 classes - maj, 7, maj7, min, min7 minmaj7 with 12 each, 1 no chord
269 |         super(ChordsMajMinSevenths, self).__init__(fps, 73)
270 | 
271 |     @property
272 |     def name(self):
273 |         return 'chords_majminsevenths_fps={}'.format(self.fps)
274 | 
275 |     def _dummy_target(self):
276 |         dt = np.zeros(self.num_classes, dtype=np.float32)
277 |         dt[-1] = 1
278 |         return dt
279 | 
280 |     def _annotations_to_targets(self, labels):
281 |         root, semis, _ = mir_eval.chord.encode_many(labels, True)
282 |         class_ids = root.copy()
283 | 
284 |         # 'no chord' is last class
285 |         class_ids[class_ids == -1] = self.num_classes - 1
286 | 
287 |         # minor chords start at idx 36
288 |         class_ids[semis[:, 3] == 1] += 36
289 | 
290 |         # seventh shift
291 |         seventh = semis[:, 10] == 1
292 |         maj_seventh = semis[:, 11] == 1
293 | 
294 |         # this weirdness is necessary because of a B:sus4(b7)/7 annotation
295 |         # in the RWC corpus...
296 |         maj_seventh &= ~seventh
297 |         assert (seventh & maj_seventh).sum() == 0
298 | 
299 |         class_ids[seventh] += 12
300 |         class_ids[maj_seventh] += 24
301 | 
302 |         return one_hot(class_ids, self.num_classes)
303 | 
304 |     def _targets_to_annotations(self, targets):
305 |         natural = zip([0, 2, 3, 5, 7, 8, 10], string.uppercase[:7])
306 |         sharp = map(lambda v: ((v[0] + 1) % 12, v[1] + '#'), natural)
307 |         roots = {(a - 3) % 12: b for a, b in dict(sharp + natural).iteritems()}
308 |         ext = ['maj', '7', 'maj7', 'min', 'min7', 'minmaj7']
309 | 
310 |         def pred_to_label(pred):
311 |             if pred == self.num_classes - 1:
312 |                 return 'N'
313 | 
314 |             return '{root}:{ext}'.format(
315 |                 root=roots[pred % 12],
316 |                 ext=ext[pred / 12]
317 |             )
318 | 
319 |         spf = 1. / self.fps
320 |         labels = [(i * spf, pred_to_label(p)) for i, p in enumerate(targets)]
321 | 
322 |         # join same consequtive predictions
323 |         prev_label = (None, None)
324 |         uniq_labels = []
325 | 
326 |         for label in labels:
327 |             if label[1] != prev_label[1]:
328 |                 uniq_labels.append(label)
329 |                 prev_label = label
330 | 
331 |         # end time of last label is one frame duration after
332 |         # the last prediction time
333 |         start_times, chord_labels = zip(*uniq_labels)
334 |         end_times = start_times[1:] + (labels[-1][0] + spf,)
335 | 
336 |         return zip(start_times, end_times, chord_labels)
337 | 
338 | 
339 | class ChromaTarget(IntervalAnnotationTarget):
340 | 
341 |     def __init__(self, fps):
342 |         # vector of 12 semitones
343 |         super(ChromaTarget, self).__init__(fps, 12)
344 | 
345 |     @property
346 |     def name(self):
347 |         return 'chroma_target_fps={}'.format(self.fps)
348 | 
349 |     def _dummy_target(self):
350 |         return mir_eval.chord.NO_CHORD_ENCODED[1]
351 | 
352 |     def _annotations_to_targets(self, labels):
353 |         roots, bitmaps, _ = mir_eval.chord.encode_many(labels)
354 |         chromas = mir_eval.chord.rotate_bitmaps_to_roots(bitmaps, roots)
355 |         return chromas
356 | 
357 |     def _targets_to_annotations(self, targets):
358 |         raise RuntimeError('Does not work with this target.')
359 | 
360 | 
361 | def add_sacred_config(ex):
362 |     ex.add_named_config(
363 |         'chords_maj_min',
364 |         target=dict(
365 |             name='ChordsMajMin',
366 |             params=dict()
367 |         )
368 |     )
369 |     ex.add_named_config(
370 |         'chords_root',
371 |         target=dict(
372 |             name='ChordsRoot',
373 |             params=dict()
374 |         )
375 |     )
376 |     ex.add_named_config(
377 |         'chords_maj_min_sevenths',
378 |         target=dict(
379 |             name='ChordsMajMinSevenths',
380 |             params=dict()
381 |         )
382 |     )
383 | 
384 | 
385 | def create_target(fps, config):
386 |     return globals()[config['name']](fps=fps, **config['params'])
387 | 
388 | 


--------------------------------------------------------------------------------
/chordrec/test.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | import sys
  3 | import os
  4 | import numpy as np
  5 | 
  6 | from dmgr.iterators import iterate_batches
  7 | from nn.utils import Colors
  8 | 
  9 | 
 10 | PREDICTION_EXT = '.chords.txt'
 11 | 
 12 | 
 13 | def compute_labeling(process_fn, target, agg_dataset, dest_dir, use_mask,
 14 |                      batch_size=None, extension='.chords.txt'):
 15 |     """
 16 |     Computes and saves the labels for each datasource in an aggragated
 17 |     datasource
 18 |     :param process_fn:  theano function that gives the nn's output
 19 |     :param target:      target computer
 20 |     :param agg_dataset: aggragated datasource.
 21 |     :param dest_dir:    where to store predicted chord labels
 22 |     :param use_mask:    if the network is an rnn
 23 |     :param batch_size:  Batch size if each datasource is to be processed batch-wise
 24 |     :param extension:   file extension of the resulting files
 25 |     :return:            list of files containing the predictions
 26 |     """
 27 |     if not os.path.exists(dest_dir):
 28 |         os.makedirs(dest_dir)
 29 |     else:
 30 |         if not os.path.isdir(dest_dir):
 31 |             print(Colors.red('Destination path exists but is not a directory!'),
 32 |                   file=sys.stderr)
 33 |             return
 34 | 
 35 |     pred_files = []
 36 | 
 37 |     for ds_idx in range(agg_dataset.n_datasources):
 38 |         ds = agg_dataset.datasource(ds_idx)
 39 | 
 40 |         pred = []
 41 |         for data, _ in iterate_batches(ds, batch_size or ds.n_data,
 42 |                                        randomise=False, expand=False):
 43 |             if use_mask:
 44 |                 data = data[np.newaxis, :]
 45 |                 mask = np.ones(data.shape[:2], dtype=np.float32)
 46 | 
 47 |                 p = process_fn(data, mask)[0]
 48 |             else:
 49 |                 p = process_fn(data)
 50 | 
 51 |             pred.append(p.argmax(axis=1))
 52 | 
 53 |         pred = np.concatenate(pred)
 54 | 
 55 |         pred_file = os.path.join(dest_dir, ds.name + extension)
 56 |         target.write_chord_predictions(pred_file, pred)
 57 |         pred_files.append(pred_file)
 58 | 
 59 |     return pred_files
 60 | 
 61 | 
 62 | def compute_scores(annotation_files, prediction_files):
 63 |     assert len(annotation_files) == len(prediction_files)
 64 |     assert len(annotation_files) > 0
 65 |     import mir_eval
 66 | 
 67 |     scores = []
 68 |     total_length = 0.
 69 | 
 70 |     for af, pf in zip(annotation_files, prediction_files):
 71 |         ann_int, ann_lab = mir_eval.io.load_labeled_intervals(af)
 72 |         pred_int, pred_lab = mir_eval.io.load_labeled_intervals(pf)
 73 | 
 74 |         # we assume that the end-time of the last annotated label is the
 75 |         # length of the song
 76 |         song_length = ann_int[-1][1]
 77 |         total_length += song_length
 78 | 
 79 |         scores.append(
 80 |             (pf, song_length,
 81 |              mir_eval.chord.evaluate(ann_int, ann_lab, pred_int, pred_lab))
 82 |         )
 83 | 
 84 |     return scores, total_length
 85 | 
 86 | 
 87 | def average_scores(scores, total_length):
 88 |     # initialise the average score with all metrics and values 0.
 89 |     avg_score = {metric: 0. for metric in scores[0][-1]}
 90 | 
 91 |     for _, length, score in scores:
 92 |         weight = length / total_length
 93 |         for metric in score:
 94 |             avg_score[metric] += float(weight * score[metric])
 95 | 
 96 |     return avg_score
 97 | 
 98 | 
 99 | def compute_average_scores(annotation_files, prediction_files):
100 |     # first, compute all individual scores
101 |     scores, total_length = compute_scores(annotation_files, prediction_files)
102 |     return average_scores(scores, total_length)
103 | 
104 | 
105 | def print_scores(scores):
106 |     for name, val in scores.iteritems():
107 |         label = '\t{}:'.format(name).ljust(16)
108 |         print(label + '{:.3f}'.format(val))
109 | 


--------------------------------------------------------------------------------
/experiments/feature_cache/README:
--------------------------------------------------------------------------------
1 | This directory will contain cached features.
2 | 


--------------------------------------------------------------------------------
/experiments/ismir2016/chroma.yaml:
--------------------------------------------------------------------------------
 1 | augmentation: null
 2 | datasource:
 3 |   cached: true
 4 |   context_size: 13
 5 |   datasets: [beatles, zweieck, queen, rwc, robbie_williams]
 6 |   preprocessors: []
 7 |   test_fold: [0, 1, 2, 3, 4, 5, 6, 7]
 8 |   val_fold: null
 9 | feature_extractor:
10 |   name: ChromaCq
11 |   params: {fps: 9.986413043478262, log_eta: null, win_center: null, win_width: null}
12 | model:
13 |     type: 'dnn'
14 |     dense: {batch_norm: false, dropout: 0.5, nonlinearity: 'rectify',
15 |             num_layers: 0, num_units: 0}
16 |     out_nonlinearity: 'softmax'
17 | observations: 'results/chroma_test'
18 | optimiser:
19 |   name: 'adam'
20 |   params: {learning_rate: 0.001}
21 |   schedule: null
22 | regularisation: {l1: 0.0, l2: 0.0001}
23 | # this is the seed of the best achieved result seed: 288170960
24 | target:
25 |   name: ChordsMajMin
26 |   params: {}
27 | testing: {batch_size: null, test_on_val: false}
28 | training: {batch_size: 512, early_stop: 20, early_stop_acc: true,
29 |            iterator: 'BatchIterator', num_epochs: 500}
30 | 


--------------------------------------------------------------------------------
/experiments/ismir2016/chroma_wlog.yaml:
--------------------------------------------------------------------------------
 1 | augmentation: null
 2 | datasource:
 3 |   cached: true
 4 |   context_size: 15
 5 |   datasets: [beatles, zweieck, queen, rwc, robbie_williams]
 6 |   preprocessors: []
 7 |   test_fold: [0, 1, 2, 3, 4, 5, 6, 7]
 8 |   val_fold: null
 9 | feature_extractor:
10 |   name: ChromaCq
11 |   params: {fps: 9.986413043478262, log_eta: 1000, win_center: 60, win_width: 15}
12 | model:
13 |     type: 'dnn'
14 |     dense: {batch_norm: false, dropout: 0.5, nonlinearity: 'rectify',
15 |             num_layers: 0, num_units: 0}
16 |     out_nonlinearity: 'softmax'
17 | observations: 'results/chroma_wlog_test'
18 | optimiser:
19 |   name: 'adam'
20 |   params: {learning_rate: 0.001}
21 |   schedule: null
22 | regularisation: {l1: 0.0, l2: 0.0001}
23 | # this is the seed of the best achieved result seed: 906228973
24 | target:
25 |   name: ChordsMajMin
26 |   params: {}
27 | testing: {batch_size: null, test_on_val: false}
28 | training: {batch_size: 512, early_stop: 20, early_stop_acc: true,
29 |            iterator: 'BatchIterator', num_epochs: 500}
30 | 


--------------------------------------------------------------------------------
/experiments/ismir2016/data:
--------------------------------------------------------------------------------
1 | ../data


--------------------------------------------------------------------------------
/experiments/ismir2016/deep_chroma.yaml:
--------------------------------------------------------------------------------
 1 | augmentation: null
 2 | chroma_network:
 3 |   model:
 4 |       type: 'chroma_dnn'
 5 |       dense: {batch_norm: false, dropout: 0.5, nonlinearity: 'rectify',
 6 |               num_layers: 3, num_units: 512}
 7 |       out_nonlinearity: 'sigmoid'
 8 |   optimiser:
 9 |     name: 'adam'
10 |     params: {learning_rate: 0.0001}
11 |     schedule: null
12 |   regularisation: {l1: 0.0, l2: 0.0001}
13 |   training: {batch_size: 512, early_stop: 20, early_stop_acc: false,
14 |              iterator: 'BatchIterator', num_epochs: 500}
15 | datasource:
16 |   cached: true
17 |   context_size: 7
18 |   datasets: [beatles, zweieck, queen, rwc, robbie_williams]
19 |   preprocessors: []
20 |   test_fold: [0, 1, 2, 3, 4, 5, 6, 7]
21 |   val_fold: null
22 | feature_extractor:
23 |   name: LogFiltSpec
24 |   params:
25 |     fmax: 5500
26 |     fmin: 30
27 |     fps: 10
28 |     frame_sizes: [8192]
29 |     num_bands: 24
30 |     unique_filters: false
31 | observations: 'results/deep_chroma_test'
32 | optimiser:
33 |   name: 'adam'
34 |   params: {learning_rate: 0.001}
35 |   schedule: null
36 | # this is the seed of the best achieved result. seed: 13436906
37 | target:
38 |   name: ChordsMajMin
39 |   params: {}
40 | regularisation: {l1: 0.0, l2: 0.0001}
41 | testing: {batch_size: 512, test_on_val: false}
42 | training: {batch_size: 512, early_stop: 20, early_stop_acc: true,
43 |            iterator: 'BatchIterator', num_epochs: 500}
44 | 


--------------------------------------------------------------------------------
/experiments/ismir2016/feature_cache:
--------------------------------------------------------------------------------
1 | ../feature_cache


--------------------------------------------------------------------------------
/experiments/ismir2016/logfiltspec.yaml:
--------------------------------------------------------------------------------
 1 | augmentation: null
 2 | datasource:
 3 |   cached: true
 4 |   context_size: 5
 5 |   datasets: [beatles, zweieck, queen, rwc, robbie_williams]
 6 |   preprocessors: []
 7 |   test_fold: [0, 1, 2, 3, 4, 5, 6, 7]
 8 |   val_fold: null
 9 | feature_extractor:
10 |   name: LogFiltSpec
11 |   params:
12 |     fmax: 5500
13 |     fmin: 30
14 |     fps: 10
15 |     frame_sizes: [8192]
16 |     num_bands: 24
17 |     unique_filters: false
18 | model:
19 |     type: 'dnn'
20 |     dense: {batch_norm: false, dropout: 0.5, nonlinearity: 'rectify',
21 |             num_layers: 0, num_units: 0}
22 |     out_nonlinearity: 'softmax'
23 | observations: 'results/logfiltspec_test'
24 | optimiser:
25 |   name: 'adam'
26 |   params: {learning_rate: 0.001}
27 |   schedule: null
28 | regularisation: {l1: 0.0, l2: 0.0001}
29 | # this is the seed that achieved the best results seed: 55835954
30 | target:
31 |   name: ChordsMajMin
32 |   params: {}
33 | testing: {batch_size: null, test_on_val: false}
34 | training: {batch_size: 512, early_stop: 20, early_stop_acc: true,
35 |            iterator: 'BatchIterator', num_epochs: 500}
36 | 


--------------------------------------------------------------------------------
/experiments/ismir2016/run.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # echo on
 4 | set -x
 5 | 
 6 | # number of runs to perform of each experiment
 7 | N_RUNS=10
 8 | 
 9 | # run chord classification ...
10 | # ... with deep chroma extractor
11 | for i in `seq $N_RUNS`
12 | do
13 |     python -m chordrec.chroma with deep_chroma.yaml
14 | done
15 | 
16 | # ... with simple chromas
17 | for i in `seq $N_RUNS`
18 | do
19 |     python -m chordrec.classify with chroma.yaml
20 | done
21 | 
22 | # ... with weighted, logarithmised chromas
23 | for i in `seq $N_RUNS`
24 | do
25 |     python -m chordrec.classify with chroma_wlog.yaml
26 | done
27 | 
28 | # ... with logarithmic filtered spectrogram
29 | for i in `seq $N_RUNS`
30 | do
31 |     python -m chordrec.classify with logfiltspec.yaml
32 | done
33 | 


--------------------------------------------------------------------------------
/experiments/madmom2016/README.md:
--------------------------------------------------------------------------------
 1 | # Training Models for the `madmom` Audio Processing Framework
 2 | 
 3 | The following text will guide you through the process of training
 4 | chord-recognition related models of `madmom`.
 5 | 
 6 | ## Deep Chroma Extractor
 7 | 
 8 | To train the Deep Chroma Extractor model, simply run
 9 | 
10 |     $ python -m chordrec.chroma with deep_chroma.yaml
11 | 
12 | and note the experiment ID (`<dc_expid>`). After training finished, you can
13 | convert the learned model to a madmom compatible model by running
14 | 
15 |     $ ./create_madmom_deep_chroma_model.py results/<dc_expid>/artifacts/params_fold_None.pkl \
16 |                                            chroma_dnn.pkl
17 | 
18 | This will create a file "chords_dnn.pkl" which contains the madmom
19 | neural network model.
20 | 
21 | ## Deep Chroma Chord Recogniser
22 | 
23 | Before training the deep chroma chord recogniser, make sure to train the
24 | deep chroma extractor and note its experiment ID. The trained chord recogniser
25 | will work best with this chroma extractor.
26 | 
27 | To train the Deep Chroma Chord Recogniser, run
28 | 
29 |     $ python -m chordrec.classify with crf_chord_rec.yaml \
30 |                 feature_extractor.params.name='../../results/<dc_expid>/artifacts'
31 | 
32 | and note the experiment ID (`<dccr_expid>`). Then, convert the learned model
33 | to the a madmom compatible one using
34 | 
35 |     $ ./create_madmom_crf_model.py results/<dccr_expid>/params_fold_None.pkl \
36 |                                    chords_dccrf.pkl
37 | 
38 | This will create a file "chords_dccrf.pkl" which contains the madmom CRF model
39 | for chord recognition.
40 | 
41 | ## ConvNet Chord Recogniser
42 | 
43 | The ConvNet Chord Recogniser consists of a) the feature extraction ConvNet
44 | and b) a CRF for decoding the chord sequence. First, you need to train
45 | the ConvNet:
46 | 
47 |     $ python -m chordrec.classify with chord_feature_convnet.yaml
48 | 
49 | Note the experiment id (`<cn_expid>`). Then, create the parameter
50 | initialisation file for the CRF,
51 | 
52 |     $ ./create_crf_init_params.py results/<cn_expid>/artifacts/params_fold_None.pkl \
53 |                                   crf_init_params.pkl
54 | 
55 | and train the CRF for chord sequence decoding:
56 | 
57 |     $ python -m chordrec.classify with crf_chord_rec.yaml \
58 |                 feature_extractor.params.name='../../results/<cn_expid>/artifacts/features_fold_None' \
59 |                 training.init_file='crf_init_params.pkl'
60 | 
61 | Also note the corresponding experiment id (`<cncr_expid>`). Then, convert the
62 | learned models to madmom models:
63 | 
64 |     $ ./create_madmom_convnet_model.py results/<cn_expid>/artifacts/params_fold_None.pkl \
65 |                                        chords_cnnfeat.pkl
66 |     $ ./create_madmom_crf_model.py results/<cncr_expid>/artifacts/params_fold_None.pkl \
67 |                                    chords_cnncrf.pkl
68 | 
69 | This will create two files (`chords_cnnfeat.pkl` and `chords_cnncrf.pkl`) which
70 | contain the CNN feature extraction model and the CRF chord recognition model
71 | respectively.


--------------------------------------------------------------------------------
/experiments/madmom2016/chord_feature_convnet.yaml:
--------------------------------------------------------------------------------
 1 | augmentation:
 2 |   Detuning: {bins_per_semitone: 2, max_shift: 0.4, p: 1.0}
 3 |   SemitoneShift: {bins_per_semitone: 2, max_shift: 4, p: 1.0}
 4 | datasource:
 5 |   cached: false
 6 |   context_size: 11
 7 |   datasets: [beatles, queen, zweieck, robbie_williams, rwc, billboard]
 8 |   preprocessors: []
 9 |   test_fold: null
10 |   val_fold: null
11 | feature_extractor:
12 |   name: LogFiltSpec
13 |   params:
14 |     fmax: 2600
15 |     fmin: 60
16 |     fps: 10
17 |     frame_sizes: [8192]
18 |     num_bands: 24
19 |     unique_filters: true
20 | model:
21 |   conv:
22 |     conv1:
23 |       batch_norm: true
24 |       dropout: 0.5
25 |       filter_size: [3, 3]
26 |       num_filters: 32
27 |       num_layers: 4
28 |       pad: valid
29 |       pool_size: [1, 2]
30 |     conv2:
31 |       batch_norm: true
32 |       dropout: 0.5
33 |       filter_size: [3, 3]
34 |       num_filters: 64
35 |       num_layers: 2
36 |       pad: valid
37 |       pool_size: [1, 2]
38 |     conv3:
39 |       batch_norm: true
40 |       dropout: 0.5
41 |       filter_size: [9, 12]
42 |       num_filters: 128
43 |       num_layers: 1
44 |       pad: valid
45 |       pool_size: null
46 |   gap: {batch_norm: true, gap_nonlinearity: linear}
47 |   out_nonlinearity: softmax
48 |   type: avg_gap_feature
49 | observations: results
50 | optimiser:
51 |   name: adam
52 |   params: {learning_rate: 0.001}
53 |   schedule: null
54 | regularisation: {l1: 0, l2: 1.0e-07}
55 | target:
56 |   name: ChordsMajMin
57 |   params: {}
58 | testing: {batch_size: 512, test_on_val: false}
59 | training: {batch_size: 512, early_stop: 5, early_stop_acc: true, num_epochs: 500}
60 | 


--------------------------------------------------------------------------------
/experiments/madmom2016/create_crf_init_params.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pickle
 3 | from docopt import docopt
 4 | 
 5 | USAGE = """
 6 | create_crf_init_params.py - creates initial crf parameters from a learned
 7 |                             gap convnet.
 8 | 
 9 | Usage:
10 |     create_crf_init_params.py <convnet_params> <crf_params>
11 | 
12 | Arguments:
13 |     <convnet_params>  pickle file containing the learned convnet parameters
14 |     <crf_params>  file where the initial crf parameters should be stored
15 | """
16 | 
17 | args = docopt(USAGE)
18 | 
19 | params = pickle.load(open(args['<convnet_params>']))
20 | conv, beta, gamma, mean, inv_std = params[-5:]
21 | 
22 | c = (beta - mean * gamma * inv_std)
23 | W = (conv.reshape(conv.shape[:2]) * gamma[:, np.newaxis] *
24 |      inv_std[:, np.newaxis]).T
25 | pi = np.zeros_like(c)
26 | tau = np.zeros_like(c)
27 | A = np.zeros((len(beta), len(beta)))
28 | 
29 | pickle.dump([pi.astype(np.float32),
30 |              tau.astype(np.float32),
31 |              c.astype(np.float32),
32 |              A.astype(np.float32),
33 |              W.astype(np.float32)], open(args['<crf_params>'], 'w'))
34 | 


--------------------------------------------------------------------------------
/experiments/madmom2016/create_madmom_convnet_model.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import madmom as mm
 4 | import numpy as np
 5 | import pickle
 6 | from docopt import docopt
 7 | from madmom.ml.nn.layers import (ConvolutionalLayer,
 8 |                                  BatchNormLayer, MaxPoolLayer)
 9 | from madmom.ml.nn.activations import relu
10 | 
11 | USAGE = """
12 | create_madmom_convnet_model.py - creates madmom convnet models for chord rec.
13 | 
14 | Usage:
15 |     create_madmom_convnet_model.py <cr_model> <mm_model>
16 | 
17 | Arguments:
18 |     <cr_model>  source lasagne model file name
19 |     <mm_model>  destination madmom model file name
20 | """
21 | 
22 | args = docopt(USAGE)
23 | 
24 | 
25 | def conv_block(p, n_layers):
26 |     layers = []
27 |     for i in range(n_layers):
28 |         layers.append(ConvolutionalLayer(p[0].transpose(1, 0, 2, 3),
29 |                                          np.array([0])))
30 |         layers.append(BatchNormLayer(*p[1:5], activation_fn=relu))
31 |         del p[:5]
32 |     return layers
33 | 
34 | p = pickle.load(open(args['<cr_model>']))
35 | 
36 | layers = []
37 | layers += conv_block(p, 4)
38 | layers.append(MaxPoolLayer((1, 2)))
39 | layers += conv_block(p, 2)
40 | layers.append(MaxPoolLayer((1, 2)))
41 | layers += conv_block(p, 1)
42 | 
43 | nn = mm.ml.nn.NeuralNetwork(layers)
44 | nn.dump(args['<mm_model>'])
45 | 


--------------------------------------------------------------------------------
/experiments/madmom2016/create_madmom_crf_model.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import madmom as mm
 4 | import pickle
 5 | from docopt import docopt
 6 | 
 7 | USAGE = """
 8 | create_madmom_crf_model.py - creates madmom CRF models.
 9 | 
10 | Usage:
11 |     create_madmom_deep_chroma_model.py <spg_mdl> <mm_mdl>
12 | 
13 | Arguments:
14 |     <spg_mdl>  source spaghetti model file
15 |     <mm_mdl>   destination madmom model file
16 | """
17 | 
18 | args = docopt(USAGE)
19 | 
20 | pi, tau, c, A, W = pickle.load(open(args['<spg_mdl>']))
21 | crf = mm.ml.crf.ConditionalRandomField(pi, tau, c, A, W)
22 | pickle.dump(crf, open(args['<mm_mdl>'], 'wb'))
23 | 


--------------------------------------------------------------------------------
/experiments/madmom2016/create_madmom_deep_chroma_model.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import madmom as mm
 4 | import pickle
 5 | from docopt import docopt
 6 | 
 7 | USAGE = """
 8 | create_madmom_deep_chroma_model.py - creates madmom models for the
 9 |                                      DeepChromaProcessor.
10 | 
11 | Usage:
12 |     create_madmom_deep_chroma_model.py <cr_model> <mm_model>
13 | 
14 | Arguments:
15 |     <cr_model>  source lasagne model
16 |     <mm_model>  destination madmom model
17 | """
18 | 
19 | args = docopt(USAGE)
20 | 
21 | p = pickle.load(open(args['<cr_model>']))
22 | nn = mm.ml.nn.NeuralNetwork([
23 |     mm.ml.nn.layers.FeedForwardLayer(
24 |         p[i], p[i+1],
25 |         # relu layers, but last layer is sigmoid
26 |         mm.ml.nn.activations.relu if i < len(p) - 4 else
27 |         mm.ml.nn.activations.sigmoid
28 |     )
29 |     for i in range(0, len(p) - 2, 2)
30 | ])
31 | nn.dump(args['<mm_model>'])
32 | 


--------------------------------------------------------------------------------
/experiments/madmom2016/crf_chord_rec.yaml:
--------------------------------------------------------------------------------
 1 | augmentation: null
 2 | datasource:
 3 |   cached: false
 4 |   context_size: 0
 5 |   datasets: [beatles, queen, zweieck, robbie_williams, rwc, billboard]
 6 |   preprocessors: []
 7 |   test_fold: null
 8 |   val_fold: null
 9 | feature_extractor:
10 |   name: PrecomputedFeature
11 |   params:
12 |     fps: 10
13 |     name: 'substitute this on the command line according to README.md'
14 | model: {type: crf}
15 | observations: 'results'
16 | optimiser:
17 |   name: adam
18 |   params: {learning_rate: 0.01}
19 |   schedule: null
20 | regularisation: {l1: 0.0001, l2: 0.0}
21 | target:
22 |   name: ChordsMajMin
23 |   params: {}
24 | testing: {batch_size: null, test_on_val: false}
25 | training: {batch_size: 32, early_stop: 20, early_stop_acc: true, max_seq_len: 1024,
26 |   num_epochs: 500}


--------------------------------------------------------------------------------
/experiments/madmom2016/deep_chroma.yaml:
--------------------------------------------------------------------------------
 1 | augmentation: null
 2 | chroma_network:
 3 |   model:
 4 |     dense: {batch_norm: false, dropout: 0.5, nonlinearity: rectify, num_layers: 3,
 5 |       num_units: 256}
 6 |     out_nonlinearity: sigmoid
 7 |     type: chroma_dnn
 8 |   optimiser:
 9 |     name: adam
10 |     params: {learning_rate: 0.0001}
11 |     schedule: null
12 |   regularisation: {l1: 0.0, l2: 0.0001}
13 |   training: {batch_size: 512, early_stop: 20, early_stop_acc: false, iterator: BatchIterator,
14 |     num_epochs: 500}
15 | datasource:
16 |   cached: true
17 |   context_size: 7
18 |   datasets: [beatles, zweieck, queen, rwc, robbie_williams, billboard]
19 |   preprocessors: []
20 |   test_fold: null
21 |   val_fold: null
22 | feature_extractor:
23 |   name: LogFiltSpec
24 |   params:
25 |     fmax: 2100
26 |     fmin: 65
27 |     fps: 10
28 |     frame_sizes: [8192]
29 |     num_bands: 24
30 |     unique_filters: true
31 | observations: results
32 | optimiser:
33 |   name: adam
34 |   params: {learning_rate: 0.001}
35 |   schedule: null
36 | regularisation: {l1: 0.0, l2: 0.0001}
37 | target:
38 |   name: ChordsMajMin
39 |   params: {}
40 | testing: {batch_size: 512, test_on_val: false}
41 | training: {batch_size: 512, early_stop: 20, early_stop_acc: true, iterator: BatchIterator,
42 |   num_epochs: 500}
43 | 


--------------------------------------------------------------------------------
/experiments/mlsp2016/README.md:
--------------------------------------------------------------------------------
 1 | # Running the experiment for the MLSP 2016 paper
 2 | 
 3 | The experiment consists of two steps. First, we train the feature extraction
 4 | CNN. Second, we train the conditional random field that decodes chord
 5 | sequences.
 6 | 
 7 | ## CNN feature extractor
 8 | 
 9 | To train the convnet, simply run
10 | 
11 |     $ python -m chordrec.classify with convnet.yaml
12 | 
13 | and note the experiment id (`<cn_expid>`).
14 | 
15 | ## CRF chord decoder
16 | 
17 | First, create the CRF parameter initialisation files for each fold. We
18 | will save those into a subdirectory `crf_init_params`:
19 | 
20 |     $ ./create_crf_init_params.py results/<cn_expid>/artifacts crf_init_params
21 | 
22 | and train the CRF for chord sequence decoding:
23 | 
24 |     $ python -m chordrec.classify with crf.yaml \
25 |                 feature_extractor.params.name='../../results/<cn_expid>/artifacts/features_fold_{fold}' \
26 |                 training.init_file='crf_init_params/crf_init_params_{}.pkl'
27 | 


--------------------------------------------------------------------------------
/experiments/mlsp2016/convnet.yaml:
--------------------------------------------------------------------------------
 1 | augmentation:
 2 |   Detuning: {bins_per_semitone: 2, max_shift: 0.4, p: 1.0}
 3 |   SemitoneShift: {bins_per_semitone: 2, max_shift: 4, p: 1.0}
 4 | datasource:
 5 |   cached: true
 6 |   context_size: 7
 7 |   datasets: [beatles, queen, zweieck, robbie_williams, rwc]
 8 |   preprocessors: []
 9 |   test_fold: [0, 1, 2, 3, 4, 5, 6, 7]
10 |   val_fold: null
11 | feature_extractor:
12 |   name: LogFiltSpec
13 |   params:
14 |     fmax: 2100
15 |     fmin: 65
16 |     fps: 10
17 |     frame_sizes: [8192]
18 |     num_bands: 24
19 |     unique_filters: true
20 | model:
21 |   conv:
22 |     conv1:
23 |       batch_norm: true
24 |       dropout: 0.5
25 |       filter_size: [3, 3]
26 |       num_filters: 32
27 |       num_layers: 4
28 |       pad: same
29 |       pool_size: [1, 2]
30 |     conv2:
31 |       batch_norm: true
32 |       dropout: 0.5
33 |       filter_size: [3, 3]
34 |       num_filters: 64
35 |       num_layers: 2
36 |       pad: valid
37 |       pool_size: [1, 2]
38 |     conv3:
39 |       batch_norm: true
40 |       dropout: 0.5
41 |       filter_size: [9, 12]
42 |       num_filters: 128
43 |       num_layers: 1
44 |       pad: valid
45 |       pool_size: null
46 |   out_nonlinearity: softmax
47 |   type: dnn
48 | observations: 'results'
49 | optimiser:
50 |   name: adam
51 |   params: {learning_rate: 0.001}
52 |   schedule: null
53 | regularisation: {l1: 0, l2: 1.0e-07}
54 | target:
55 |   name: ChordsMajMin
56 |   params: {}
57 | testing: {batch_size: 512, test_on_val: false}
58 | training: {batch_size: 512, early_stop: 5, early_stop_acc: true, num_epochs: 500}
59 | 


--------------------------------------------------------------------------------
/experiments/mlsp2016/create_crf_init_params.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pickle
 3 | import os
 4 | from docopt import docopt
 5 | from glob import glob
 6 | from os.path import join, exists
 7 | 
 8 | 
 9 | USAGE = """
10 | create_crf_init_params.py - creates initial crf parameters from a learned
11 |                             gap convnet.
12 | 
13 | Usage:
14 |     create_crf_init_params.py <src_dir> <dst_dir>
15 | 
16 | Arguments:
17 |     <src_dir>  directory containing the CNN parameter files for each fold
18 |     <dst_dir>  directory where to store the initial CRF parameters
19 | """
20 | 
21 | args = docopt(USAGE)
22 | param_files = glob(join(args['<src_dir>'], 'params*.pkl'))
23 | 
24 | if not exists(args['<dst_dir>']):
25 |     os.makedirs(args['<dst_dir>'])
26 | 
27 | for fold, pfile in enumerate(param_files):
28 |     params = pickle.load(open(pfile))
29 |     conv, beta, gamma, mean, inv_std = params[-5:]
30 | 
31 |     c = (beta - mean * gamma * inv_std)
32 |     W = (conv.reshape(conv.shape[:2]) * gamma[:, np.newaxis] *
33 |          inv_std[:, np.newaxis]).T
34 |     pi = np.zeros_like(c)
35 |     tau = np.zeros_like(c)
36 |     A = np.zeros((len(beta), len(beta)))
37 | 
38 |     dst_file = join(args['<dst_dir>'], 'crf_init_params_{}.pkl'.format(fold))
39 | 
40 |     pickle.dump([pi.astype(np.float32),
41 |                  tau.astype(np.float32),
42 |                  c.astype(np.float32),
43 |                  A.astype(np.float32),
44 |                  W.astype(np.float32)], open(dst_file, 'w'))
45 | 


--------------------------------------------------------------------------------
/experiments/mlsp2016/crf.yaml:
--------------------------------------------------------------------------------
 1 | augmentation: null
 2 | datasource:
 3 |   cached: true
 4 |   context_size: 0
 5 |   datasets: [beatles, queen, zweieck, robbie_williams, rwc]
 6 |   preprocessors: []
 7 |   test_fold: null
 8 |   val_fold: null
 9 | feature_extractor:
10 |   name: PrecomputedFeature
11 |   params:
12 |     fps: 10
13 |     name: 'substitute this on the command line according to README.md'
14 | model: {type: crf}
15 | observations: 'results'
16 | optimiser:
17 |   name: adam
18 |   params: {learning_rate: 0.01}
19 |   schedule: null
20 | regularisation: {l1: 0.0001, l2: 0.0}
21 | target:
22 |   name: ChordsMajMin
23 |   params: {}
24 | testing: {batch_size: null, test_on_val: false}
25 | training: {batch_size: 32, early_stop: 20, early_stop_acc: true, max_seq_len: 1024,
26 |   num_epochs: 500}


--------------------------------------------------------------------------------
/experiments/mlsp2016/feature_cache:
--------------------------------------------------------------------------------
1 | ../feature_cache


--------------------------------------------------------------------------------
/experiments/mlsp2016/to_madmom_crf.py:
--------------------------------------------------------------------------------
 1 | import madmom as mm
 2 | import pickle
 3 | from glob import glob
 4 | from docopt import docopt
 5 | from os.path import join
 6 | 
 7 | USAGE = """
 8 | create_madmom_deep_chroma_model.py - creates a madmom crf that predicts chords from deep chroma
 9 | vectors.
10 | 
11 | Usage:
12 |     to_madmom_crf.py <param_dir> [<dst_name>]
13 | 
14 | Arguments:
15 |     <param_dir>  directory containing the parameter files
16 |     <dst_name>  name format for destination files. '{}' will be replaced
17 |                 with the model number [default: crf_dc_{}.pkl]
18 | """
19 | 
20 | args = docopt(USAGE)
21 | 
22 | args['<dst_name>'] = args['<dst_name>'] or 'crf_dc_{}.pkl'
23 | 
24 | param_files = glob(join(args['<param_dir>'], 'params*.pkl'))
25 | 
26 | for nid, f in enumerate(param_files):
27 |     p = pickle.load(open(f))
28 |     crf = mm.ml.crf.ConditionalRandomField(
29 |         initial=p[0], final=p[1], bias=p[2], transition=p[3], observation=p[4]
30 |     )
31 |     crf.dump(args['<dst_name>'].format(nid + 1))
32 | 


--------------------------------------------------------------------------------
/tools/evaluate.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import fnmatch
 3 | from docopt import docopt
 4 | 
 5 | import dmgr
 6 | 
 7 | from chordrec import test
 8 | 
 9 | 
10 | USAGE = """
11 | evaluate.py
12 | 
13 | Usage:
14 |     evaluate.py [-i IND_RES_FILE] [-o TOT_RES_FILE] FILES...
15 | 
16 | Arguments:
17 |     FILES  annotaion or prediction files
18 | 
19 | Options:
20 |     -i IND_RES_FILE  file where to store individual results
21 |     -o TOT_RES_FILE  file where to store total results
22 | """
23 | 
24 | 
25 | def main():
26 |     args = docopt(USAGE)
27 | 
28 |     ann_files = fnmatch.filter(args['FILES'], '*.chords')
29 | 
30 |     pred_files = dmgr.files.match_files(
31 |         ann_files, '.chords',
32 |         args['FILES'], '.chords.txt'
33 |     )
34 | 
35 |     test.print_scores(test.compute_average_scores(ann_files, pred_files))
36 | 
37 | 
38 | if __name__ == "__main__":
39 |     main()
40 | 


--------------------------------------------------------------------------------
/tools/extract_perfect_chroma.py:
--------------------------------------------------------------------------------
 1 | """
 2 | extract_perfect_chroma.py
 3 | 
 4 |     Computes "perfect" chroma vectors based on the ground truth chord
 5 |     annotations of a file.
 6 | 
 7 | Usage:
 8 |     extract_perfect_chroma.py [options] <fps> <dirs>...
 9 | 
10 | Arguments:
11 |     <fps>   frames per second
12 |     <dirs>  directories containing ground truth and audio files.
13 |             audio files are needed for song length
14 | 
15 | Options:
16 |     -o=<output_dir>  where to put the resulting chromas
17 |                      [default: ./feature_cache]
18 | """
19 | 
20 | from os.path import splitext, basename, join
21 | import numpy as np
22 | from itertools import chain, izip
23 | from docopt import docopt
24 | import madmom as mm
25 | import mir_eval
26 | 
27 | from dmgr.files import find, match_files
28 | 
29 | 
30 | def to_chroma(intervals, labels, num_frames, fps):
31 |     roots, bitmaps, _ = mir_eval.chord.encode_many(labels)
32 |     chromas = mir_eval.chord.rotate_bitmaps_to_roots(bitmaps, roots)
33 |     starts = intervals[:, 0]
34 |     ends = intervals[:, 1]
35 | 
36 |     # add dummy events
37 |     starts = np.hstack(([-np.inf], starts, ends[-1]))
38 |     ends = np.hstack((starts[1], ends, [np.inf]))
39 |     chromas = np.vstack((np.zeros(12), chromas, np.zeros(12)))
40 | 
41 |     # Finally, we create the chroma vectors per frame!
42 |     frame_times = np.arange(num_frames, dtype=np.float) / fps
43 | 
44 |     # IMPORTANT: round everything to milliseconds to prevent errors caused
45 |     # by floating point hell. Ideally, we would round everything to
46 |     # possible *frame times*, but it is easier this way.
47 |     starts = np.round(starts, decimals=3)
48 |     ends = np.round(ends, decimals=3)
49 |     frame_times = np.round(frame_times, decimals=3)
50 | 
51 |     target_per_frame = ((starts <= frame_times[:, np.newaxis]) &
52 |                         (frame_times[:, np.newaxis] < ends))
53 | 
54 |     # make sure each frame is assigned to only one target vector
55 |     assert (target_per_frame.sum(axis=1) == 1).all()
56 | 
57 |     # create the one hot vectors per frame
58 |     return chromas[np.nonzero(target_per_frame)[1]].astype(np.float32)
59 | 
60 | 
61 | def main():
62 |     args = docopt(__doc__)
63 | 
64 |     chord_files = list(chain.from_iterable(
65 |         find(d, '*.chords') for d in args['<dirs>']))
66 |     audio_files = list(chain.from_iterable(
67 |         find(d, '*.flac') for d in args['<dirs>']))
68 | 
69 |     if len(chord_files) != len(audio_files):
70 |         print 'ERROR: {} chord files, but {} audio files'.format(
71 |             len(chord_files), len(audio_files))
72 | 
73 |     audio_files = match_files(chord_files, audio_files, '.chords', '.flac')
74 | 
75 |     for cf, af in izip(chord_files, audio_files):
76 |         sig = mm.audio.signal.FramedSignal(af, fps=float(args['<fps>']))
77 |         intervals, labels = mir_eval.io.load_labeled_intervals(cf)
78 | 
79 |         chromas = to_chroma(intervals, labels, sig.num_frames, 
80 |                             float(args['<fps>']))
81 | 
82 |         chroma_file = splitext(basename(cf))[0] + '.features.npy'
83 |         np.save(join(args['-o'], chroma_file), chromas)
84 | 
85 | 
86 | if __name__ == '__main__':
87 |     main()
88 | 


--------------------------------------------------------------------------------
/tools/post_process.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import dmgr
  3 | import test
  4 | import os
  5 | import shutil
  6 | import fnmatch
  7 | import scipy.stats
  8 | from targets import ChordsMajMin
  9 | from docopt import docopt
 10 | from experiment import TempDir
 11 | from itertools import tee, izip
 12 | 
 13 | USAGE = """
 14 | Post-Processes chord prediction files.
 15 | 
 16 | Usage:
 17 |     post_process.py [options] <files>...
 18 | 
 19 | Options:
 20 |     --fps=<fps>  work with this number of frames per second [default: 10]
 21 |     --win_length=<win_length>  length in seconds of the post-processing filter
 22 |                                [default: 1.0]
 23 |     --beats  use beat-based majority vote
 24 |     --out_dir=<out_dir>  where to put the post-processed results
 25 | """
 26 | 
 27 | 
 28 | def pairwise(iterable):
 29 |     a, b = tee(iterable)
 30 |     next(b, None)
 31 |     return izip(a, b)
 32 | 
 33 | 
 34 | def majority_vote(targets, win_size):
 35 |     context_size = (win_size - 1) / 2
 36 |     t_wins = dmgr.datasources.segment_axis(targets, frame_size=win_size)
 37 |     middle = scipy.stats.mode(t_wins, axis=1)[0][:, 0]
 38 |     start = np.hstack([scipy.stats.mode(targets[:i + 1])[0]
 39 |                        for i in range(context_size)])
 40 |     end = np.hstack([scipy.stats.mode(targets[i:])[0]
 41 |                      for i in range(-context_size, 0)])
 42 |     return np.hstack((start, middle, end))
 43 | 
 44 | 
 45 | def majority_vote_beats(targets, beats):
 46 |     if len(beats) == 0:
 47 |         return targets
 48 |     pp_targets = np.zeros_like(targets)
 49 |     beats = np.concatenate(([0], beats, [None]))
 50 |     for start, end in pairwise(beats):
 51 |         pp_targets[start:end] = scipy.stats.mode(targets[start:end])[0]
 52 |     return pp_targets
 53 | 
 54 | 
 55 | def main():
 56 |     args = docopt(USAGE)
 57 | 
 58 |     fps = float(args['--fps'])
 59 |     win_size = int(float(args['--win_length']) * fps)
 60 |     if win_size % 2 == 0:
 61 |         win_size += 1
 62 | 
 63 |     out_dir = args['--out_dir']
 64 | 
 65 |     files = args['<files>']
 66 |     ann_files = fnmatch.filter(files, '*.chords')
 67 |     pred_files = dmgr.files.match_files(ann_files, '.chords',
 68 |                                         files, '.chords.txt')
 69 | 
 70 |     if args['--beats']:
 71 |         beat_files = dmgr.files.match_files(ann_files, files,
 72 |                                             '.chords', '.beats')
 73 |     else:
 74 |         beat_files = None
 75 | 
 76 |     pre_filter_scores = test.compute_average_scores(ann_files, pred_files)
 77 |     print "Pre-Filter scores:"
 78 |     test.print_scores(pre_filter_scores)
 79 | 
 80 |     with TempDir() as tmpdir:
 81 |         target = ChordsMajMin(fps)
 82 |         pp_pred_files = []
 83 |         for i, pf in enumerate(pred_files):
 84 |             name = os.path.basename(pf)
 85 |             targets = target(pf).argmax(axis=1)
 86 | 
 87 |             if not args['--beats']:
 88 |                 pp_targets = majority_vote(targets, win_size)
 89 |             else:
 90 |                 beats = np.loadtxt(beat_files[i], usecols=[0]) * fps
 91 |                 pp_targets = majority_vote_beats(targets, beats)
 92 | 
 93 |             target.write_chord_predictions(
 94 |                 os.path.join(tmpdir, name),
 95 |                 pp_targets
 96 |             )
 97 |             pp_pred_files.append(os.path.join(tmpdir, name))
 98 | 
 99 |         post_filter_scores = test.compute_average_scores(ann_files,
100 |                                                          pp_pred_files)
101 |         print "Post-Filter scores:"
102 |         test.print_scores(post_filter_scores)
103 | 
104 |         if out_dir is not None:
105 |             if not os.path.exists(out_dir):
106 |                 os.makedirs(out_dir)
107 |             for f in pp_pred_files:
108 |                 shutil.move(f, os.path.join(out_dir, os.path.basename(f)))
109 | 
110 | 
111 | if __name__ == '__main__':
112 |     main()
113 | 


--------------------------------------------------------------------------------