├── .gitignore
├── README.md
├── onset_evaluation.py
└── onset_program.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | *.py[co]
 2 | 
 3 | # Packages
 4 | *.egg
 5 | *.egg-info
 6 | dist
 7 | build
 8 | eggs
 9 | parts
10 | bin
11 | var
12 | sdist
13 | develop-eggs
14 | .installed.cfg
15 | 
16 | # Installer logs
17 | pip-log.txt
18 | 
19 | # Unit test / coverage reports
20 | .coverage
21 | .tox
22 | 
23 | #Translations
24 | *.mo
25 | 
26 | #Mr Developer
27 | .mr.developer.cfg
28 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | onset_detection
 2 | ===============
 3 | 
 4 | Python implementation of the most common spectral based onset detection algorithms.
 5 | 
 6 | Usage
 7 | -----
 8 | `onset_program.py input.wav` processes the audio file and writes the detected onsets to a file named input.onsets.txt.
 9 | This file is suitable for evaluation with the `onset_evaluation.py` scripts which expects pairs of files with the extensions `.onsets.txt` and the corresponding ground-truth annotations ending with `.onsets`.
10 | 
11 | Please see the `-h` option to get a more detailed description.
12 | 
13 | Requirements
14 | ------------
15 | * Python 2.7 or 2.6 with argparse module
16 | * Numpy
17 | * Scipy
18 | 
19 | 


--------------------------------------------------------------------------------
/onset_evaluation.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # encoding: utf-8
  3 | """
  4 | Copyright (c) 2012 Sebastian Böck <sebastian.boeck@jku.at>
  5 | All rights reserved.
  6 | 
  7 | Redistribution and use in source and binary forms, with or without
  8 | modification, are permitted provided that the following conditions are met:
  9 | 
 10 | 1. Redistributions of source code must retain the above copyright notice, this
 11 |    list of conditions and the following disclaimer.
 12 | 2. Redistributions in binary form must reproduce the above copyright notice,
 13 |    this list of conditions and the following disclaimer in the documentation
 14 |    and/or other materials provided with the distribution.
 15 | 
 16 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
 17 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 18 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 19 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
 20 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 21 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 22 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 23 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 24 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 25 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 26 | 
 27 | """
 28 | 
 29 | """
 30 | This software was released together with the paper
 31 | 
 32 | "Evaluating the Online Capabilities of Onset Detection Methods"
 33 | by Sebastian Böck, Florian Krebs and Markus Schedl
 34 | in Proceedings of the 13th International Society for Music Information
 35 | Retrieval Conference (ISMIR), 2012
 36 | 
 37 | Please note that is not tuned in any way for speed/memory efficiency. However,
 38 | it can be used to compare other onset detection algorithms with the method
 39 | described in our paper. All results given in the paper were evaluated with this
 40 | code and obtained with the provided onset_program.py script.
 41 | 
 42 | If you use this software, please cite the above paper.
 43 | 
 44 | Please send any comments, enhancements, errata, etc. to the main author.
 45 | 
 46 | """
 47 | 
 48 | import numpy as np  # needed only for mean and std.dev in Counter.print_errors()
 49 | 
 50 | 
 51 | class Counter(object):
 52 |     """
 53 |     Simple class for counting errors.
 54 | 
 55 |     """
 56 |     def __init__(self):
 57 |         """
 58 |         Creates a new Counter object instance.
 59 | 
 60 |         """
 61 |         # for simple events like onsets or beats
 62 |         self.num = 0    # number of targets
 63 |         self.tp = 0     # number of true positives
 64 |         self.fp = 0     # number of false positives
 65 |         self.fn = 0     # number of false negatives
 66 |         self.dev = []   # array for deviations
 67 | 
 68 |     # for adding 2 Counters
 69 |     def __add__(self, other):
 70 |         if isinstance(other, Counter):
 71 |             self.num += other.num
 72 |             self.tp += other.tp
 73 |             self.fp += other.fp
 74 |             self.fn += other.fn
 75 |             self.dev.extend(other.dev)
 76 |             return self
 77 |         else:
 78 |             return NotImplemented
 79 | 
 80 |     @property
 81 |     def precision(self):
 82 |         """Precision."""
 83 |         try:
 84 |             return self.tp / float(self.tp + self.fp)
 85 |         except ZeroDivisionError:
 86 |             return 0.
 87 | 
 88 |     @property
 89 |     def recall(self):
 90 |         """Recall."""
 91 |         try:
 92 |             return self.tp / float(self.tp + self.fn)
 93 |         except ZeroDivisionError:
 94 |             return 0.
 95 | 
 96 |     @property
 97 |     def fmeasure(self):
 98 |         """F-measure."""
 99 |         try:
100 |             return 2. * self.precision * self.recall / (self.precision + self.recall)
101 |         except ZeroDivisionError:
102 |             return 0.
103 | 
104 |     @property
105 |     def accuracy(self):
106 |         """Accuracy."""
107 |         try:
108 |             return self.tp / float(self.fp + self.fn + self.tp)
109 |         except ZeroDivisionError:
110 |             return 0.
111 | 
112 |     @property
113 |     def true_positive_rate(self):
114 |         """True positive rate."""
115 |         try:
116 |             return self.tp / float(self.num)
117 |         except ZeroDivisionError:
118 |             return 0.
119 | 
120 |     @property
121 |     def false_positive_rate(self):
122 |         """False positive rate."""
123 |         try:
124 |             return self.fp / float(self.fp + self.tp)
125 |         except ZeroDivisionError:
126 |             return 0.
127 | 
128 |     @property
129 |     def false_negative_rate(self):
130 |         """False negative rate."""
131 |         try:
132 |             return self.fn / float(self.fn + self.tp)
133 |         except ZeroDivisionError:
134 |             return 0.
135 | 
136 |     def print_errors(self, tex=False):
137 |         """
138 |         Print errors.
139 | 
140 |         param: tex: output format to be used in .tex files [default=False]
141 | 
142 |         """
143 |         # print the errors
144 |         print '  targets: %5d correct: %5d fp: %4d fn: %4d p=%.3f r=%.3f f=%.3f' % (self.num, self.tp, self.fp, self.fn, self.precision, self.recall, self.fmeasure)
145 |         print '  tp: %.1f%% fp: %.1f%% acc: %.1f%% mean: %.1f ms std: %.1f ms' % (self.true_positive_rate * 100., self.false_positive_rate * 100., self.accuracy * 100., np.mean(self.dev) * 1000., np.std(self.dev) * 1000.)
146 |         if tex:
147 |             print "%i events & Precision & Recall & F-measure & True Positves & False Positives & Accuracy & Delay\\\\" % (self.num)
148 |             print "tex & %.3f & %.3f & %.3f & %.3f & %.3f & %.3f %.1f\$\\pm\$%.1f\\,ms\\\\" % (self.precision, self.recall, self.fmeasure, self.true_positive_rate, self.false_positive_rate, self.accuracy, np.mean(self.dev) * 1000., np.std(self.dev) * 1000.)
149 | 
150 | 
151 | def load_events(filename):
152 |     """
153 |     Load a list of events from file.
154 | 
155 |     param: filename: name of the file
156 | 
157 |     """
158 |     # array for events
159 |     events = []
160 |     # try to read in the onsets from the file
161 |     with open(filename, 'rb') as f:
162 |         # read in each line of the file
163 |         for line in f:
164 |             # append the event (1st column) to the list, ignore the rest
165 |             events.append(float(line.split()[0]))
166 |     # return
167 |     return events
168 | 
169 | 
170 | def combine_events(events, delta):
171 |     """
172 |     Combine all events within a certain range.
173 | 
174 |     param: events: list of events [in seconds]
175 |     param: delta: combination length [in seconds]
176 |     return: list of combined events
177 | 
178 |     """
179 |     # sort the events
180 |     events.sort()
181 |     events_length = len(events)
182 |     events_index = 0
183 |     # array for combined events
184 |     comb = []
185 |     # iterate over all events
186 |     while events_index < events_length - 1:
187 |         # get the first event
188 |         first = events[events_index]
189 |         # always increase the events index
190 |         events_index += 1
191 |         # get the second event
192 |         second = events[events_index]
193 |         # combine the two events?
194 |         if second - first <= delta:
195 |             # two events within the combination window, combine them and replace
196 |             # the second event in the original list with the mean of the events
197 |             events[events_index] = (first + second) / 2.
198 |         else:
199 |             # the two events can not be combined,
200 |             # store the first event in the new list
201 |             comb.append(first)
202 |     # always append the last element of the list
203 |     comb.append(events[-1])
204 |     # return the combined onsets
205 |     return comb
206 | 
207 | 
208 | def count_errors(detections, targets, window, delay=0):
209 |     """
210 |     Count the errors for the given detections and targets.
211 | 
212 |     param: detections: a list of events [in seconds]
213 |     param: targets: a list of events [in seconds]
214 |     param: window: detection window [in seconds]
215 |     param: delay: add delay to all detections [in seconds, default=0]
216 |     return: a Counter object instance
217 | 
218 |     """
219 |     # sort the detections and targets
220 |     detections.sort()
221 |     targets.sort()
222 |     # counter for evaluation
223 |     counter = Counter()
224 |     counter.num = len(targets)
225 |     # evaluate
226 |     det_length = len(detections)
227 |     tar_length = len(targets)
228 |     det_index = 0
229 |     tar_index = 0
230 |     while det_index < det_length and tar_index < tar_length:
231 |         # TODO: right now the first detection is compared to the first target
232 |         # but we should compare the closets to get correct mean/std.dev values
233 |         # besides that the evaluation is correct
234 |         # fetch the first detection
235 |         det = detections[det_index]
236 |         # fetch the first target
237 |         tar = targets[tar_index]
238 |         # shift with delay
239 |         if abs(det + delay - tar) <= window:
240 |             # TP detection
241 |             counter.tp += 1
242 |             # save the deviation
243 |             counter.dev.append(det + delay - tar)
244 |             # increase the detection and target index
245 |             det_index += 1
246 |             tar_index += 1
247 |         elif det + delay < tar:
248 |             # FP detection
249 |             counter.fp += 1
250 |             # increase the detection index
251 |             det_index += 1
252 |             # do not increase the target index
253 |         elif det + delay > tar:
254 |             # we missed a target, thus FN
255 |             counter.fn += 1
256 |             # do not increase the detection index
257 |             # increase the target index
258 |             tar_index += 1
259 |     # the remaining detections are FP
260 |     counter.fp += det_length - det_index
261 |     # the remaining targets are FN
262 |     counter.fn += tar_length - tar_index
263 |     assert counter.tp == counter.num - counter.fn, "too stupid to count correctly"
264 |     # return the counter
265 |     return counter
266 | 
267 | 
268 | def main():
269 |     import os.path
270 |     import argparse
271 |     import glob
272 |     import fnmatch
273 | 
274 |     # define parser
275 |     p = argparse.ArgumentParser(formatter_class=argparse.RawDescriptionHelpFormatter, description="""
276 |     If invoked without any parameters the script evaluates pairs of files
277 |     with the targets (.onsets) and detection (.onsets.txt) as simple text
278 |     files with one onset timestamp per line according to the rules given in
279 | 
280 |     "Evaluating the Online Capabilities of Onset Detection Methods"
281 |     by Sebastian Böck, Florian Krebs and Markus Schedl
282 |     in Proceedings of the 13th International Society for
283 |     Music Information Retrieval Conference (ISMIR 2012)
284 | 
285 |     """)
286 |     p.add_argument('files', metavar='files', nargs='+', help='path or files to be evaluated (list of files being filtered according to -d and -t arguments)')
287 |     p.add_argument('-v', dest='verbose', action='store_true', help='be verbose')
288 |     # extensions used for evaluation
289 |     p.add_argument('-d', dest='detections', action='store', default='.onsets.txt', help='extensions of the detections [default: .onsets.txt]')
290 |     p.add_argument('-t', dest='targets', action='store', default='.onsets', help='extensions of the targets [default: .onsets]')
291 |     # parameters for evaluation
292 |     p.add_argument('-w', dest='window', action='store', default=50, type=float, help='evaluation window [in milliseconds]')
293 |     p.add_argument('-c', dest='combine', action='store', default=30, type=float, help='combine target events within this range [in milliseconds]')
294 |     p.add_argument('--delay', action='store', default=0., type=float, help='add given delay to all detections [in milliseconds]')
295 |     p.add_argument('--tex', action='store_true', help='format errors for use is .tex files')
296 |     # version
297 |     p.add_argument('--version', action='version', version='%(prog)s 1.0 (2012-10-01)')
298 |     # parse the arguments
299 |     args = p.parse_args()
300 | 
301 |     # convert the detection, combine, and delay values to seconds
302 |     args.window /= 2000.  # also halve the size of the detection window
303 |     args.combine /= 1000.
304 |     args.delay /= 1000.
305 | 
306 |     # determine the files to process
307 |     files = []
308 |     for f in args.files:
309 |         # check what we have (file/path)
310 |         if os.path.isdir(f):
311 |             # use all files in the given path
312 |             files = glob.glob(f + '/*')
313 |         else:
314 |             # file was given, append to list
315 |             files.append(f)
316 |     # sort files
317 |     files.sort()
318 | 
319 |     # TODO: find a better way to determine the corresponding detection/target files from a given list/path of files
320 |     # filter target files
321 |     tar_files = fnmatch.filter(files, "*%s" % args.targets)
322 |     # filter detection files
323 |     det_files = fnmatch.filter(files, "*%s" % args.detections)
324 |     # must be the same number
325 |     assert len(tar_files) == len(det_files), "different number of targets (%i) and detections (%i)" % (len(tar_files), len(det_files))
326 | 
327 |     # sum counter for all files
328 |     sum_counter = Counter()
329 |     # evaluate all files
330 |     for i in range(len(det_files)):
331 |         detections = load_events(det_files[i])
332 |         targets = load_events(tar_files[i])
333 |         if args.combine > 0:
334 |             targets = combine_events(targets, args.combine)
335 |         counter = count_errors(detections, targets, args.window, args.delay)
336 |         # print stats for each file
337 |         if args.verbose:
338 |             print det_files[i]
339 |             counter.print_errors(args.tex)
340 |         # add to sum counter
341 |         sum_counter += counter
342 |     # print summary
343 |     print 'summary for %i files; detection window %.1f ms (+- %.1f ms)' % (len(det_files), args.window * 2000, args.window * 1000)
344 |     sum_counter.print_errors(args.tex)
345 | 
346 | if __name__ == '__main__':
347 |     main()
348 | 


--------------------------------------------------------------------------------
/onset_program.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # encoding: utf-8
  3 | """
  4 | Copyright (c) 2012, 2013 Sebastian Böck <sebastian.boeck@jku.at>
  5 | All rights reserved.
  6 | 
  7 | Redistribution and use in source and binary forms, with or without
  8 | modification, are permitted provided that the following conditions are met:
  9 | 
 10 | 1. Redistributions of source code must retain the above copyright notice, this
 11 |    list of conditions and the following disclaimer.
 12 | 2. Redistributions in binary form must reproduce the above copyright notice,
 13 |    this list of conditions and the following disclaimer in the documentation
 14 |    and/or other materials provided with the distribution.
 15 | 
 16 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
 17 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 18 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 19 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
 20 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 21 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 22 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 23 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 24 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 25 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 26 | 
 27 | """
 28 | 
 29 | """
 30 | Please note that this program released together with the paper
 31 | 
 32 | "Evaluating the Online Capabilities of Onset Detection Methods"
 33 | by Sebastian Böck, Florian Krebs and Markus Schedl
 34 | in Proceedings of the 13th International Society for Music Information
 35 | Retrieval Conference (ISMIR), 2012
 36 | 
 37 | is not tuned in any way for speed/memory efficiency. However, it can be used
 38 | to compare other onset detection algorithms with the method described in our
 39 | paper. All results given in the paper were obtained with this code and
 40 | evaluated with the provided onset_evaluation.py script.
 41 | 
 42 | If you use this software, please cite the above paper.
 43 | 
 44 | Please send any comments, enhancements, errata, etc. to the main author.
 45 | 
 46 | """
 47 | 
 48 | import numpy as np
 49 | import scipy.fftpack as fft
 50 | from scipy.io import wavfile
 51 | 
 52 | 
 53 | class Filter(object):
 54 |     """
 55 |     Filter Class.
 56 | 
 57 |     """
 58 |     def __init__(self, ffts, fs, bands=12, fmin=27.5, fmax=16000, equal=False):
 59 |         """
 60 |         Creates a new Filter object instance.
 61 | 
 62 |         :param ffts: number of FFT coefficients
 63 |         :param fs: sample rate of the audio file
 64 |         :param bands: number of filter bands [default=12]
 65 |         :param fmin: the minimum frequency [in Hz, default=27.5]
 66 |         :param fmax: the maximum frequency [in Hz, default=16000]
 67 |         :param equal: normalize each band to equal energy [default=False]
 68 | 
 69 |         """
 70 |         # samplerate
 71 |         self.fs = fs
 72 |         # reduce fmax if necessary
 73 |         if fmax > fs / 2:
 74 |             fmax = fs / 2
 75 |         # get a list of frequencies
 76 |         frequencies = self.frequencies(bands, fmin, fmax)
 77 |         # conversion factor for mapping of frequencies to spectrogram bins
 78 |         factor = (fs / 2.0) / ffts
 79 |         # map the frequencies to the spectrogram bins
 80 |         frequencies = np.round(np.asarray(frequencies) / factor).astype(int)
 81 |         # only keep unique bins
 82 |         frequencies = np.unique(frequencies)
 83 |         # filter out all frequencies outside the valid range
 84 |         frequencies = [f for f in frequencies if f < ffts]
 85 |         # number of bands
 86 |         bands = len(frequencies) - 2
 87 |         assert bands >= 3, "cannot create filterbank with less than 3 frequencies"
 88 |         # init the filter matrix with size: ffts x filter bands
 89 |         self.filterbank = np.zeros([ffts, bands], dtype=np.float)
 90 |         # process all bands
 91 |         for band in range(bands):
 92 |             # edge & center frequencies
 93 |             start, mid, stop = frequencies[band:band + 3]
 94 |             # create a triangular filter
 95 |             self.filterbank[start:stop, band] = self.triang(start, mid, stop, equal)
 96 | 
 97 |     @staticmethod
 98 |     def frequencies(bands, fmin, fmax, a=440):
 99 |         """
100 |         Returns a list of frequencies aligned on a logarithmic scale.
101 | 
102 |         :param bands: number of filter bands per octave
103 |         :param fmin: the minimum frequency [in Hz]
104 |         :param fmax: the maximum frequency [in Hz]
105 |         :param a: frequency of A0 [in Hz, default=440]
106 |         :return a list of frequencies
107 |         Using 12 bands per octave and a=440 corresponding to the MIDI notes.
108 | 
109 |         """
110 |         # factor 2 frequencies are apart
111 |         factor = 2.0 ** (1.0 / bands)
112 |         # start with A0
113 |         freq = a
114 |         frequencies = [freq]
115 |         # go upwards till fmax
116 |         while freq <= fmax:
117 |             # multiply once more, since the included frequency is a frequency
118 |             # which is only used as the right corner of a (triangular) filter
119 |             freq *= factor
120 |             frequencies.append(freq)
121 |         # restart with a and go downwards till fmin
122 |         freq = a
123 |         while freq >= fmin:
124 |             # divide once more, since the included frequency is a frequency
125 |             # which is only used as the left corner of a (triangular) filter
126 |             freq /= factor
127 |             frequencies.append(freq)
128 |         # sort frequencies
129 |         frequencies.sort()
130 |         # return the list
131 |         return frequencies
132 | 
133 |     @staticmethod
134 |     def triang(start, mid, stop, equal=False):
135 |         """
136 |         Calculates a triangular window of the given size.
137 | 
138 |         :param start: starting bin (with value 0, included in the returned filter)
139 |         :param mid: center bin (of height 1, unless norm is True)
140 |         :param stop: end bin (with value 0, not included in the returned filter)
141 |         :param equal: normalize the area of the filter to 1 [default=False]
142 |         :return a triangular shaped filter
143 | 
144 |         """
145 |         # height of the filter
146 |         height = 1.
147 |         # normalize the height
148 |         if equal:
149 |             height = 2. / (stop - start)
150 |         # init the filter
151 |         triang_filter = np.empty(stop - start)
152 |         # rising edge
153 |         triang_filter[:mid - start] = np.linspace(0, height, (mid - start), endpoint=False)
154 |         # falling edge
155 |         triang_filter[mid - start:] = np.linspace(height, 0, (stop - mid), endpoint=False)
156 |         # return
157 |         return triang_filter
158 | 
159 | 
160 | class Wav(object):
161 |     """
162 |     Wav Class is a simple wrapper around scipy.io.wavfile.
163 | 
164 |     """
165 |     def __init__(self, filename):
166 |         """
167 |         Creates a new Wav object instance of the given file.
168 | 
169 |         :param filename: name of the .wav file
170 | 
171 |         """
172 |         # read in the audio
173 |         self.samplerate, self.audio = wavfile.read(filename)
174 |         # scale the audio values to the range -1...1 depending on the audio type
175 |         self.audio = self.audio / float(np.iinfo(self.audio.dtype).max)
176 |         # set the length
177 |         self.samples = np.shape(self.audio)[0]
178 |         # set the number of channels
179 |         try:
180 |             # multi channel files
181 |             self.channels = np.shape(self.audio)[1]
182 |         except IndexError:
183 |             # catch mono files
184 |             self.channels = 1
185 | 
186 |     def attenuate(self, attenuation):
187 |         """
188 |         Attenuate the audio signal.
189 | 
190 |         :param attenuation: attenuation level given in dB
191 | 
192 |         """
193 |         self.audio /= np.power(np.sqrt(10.), attenuation / 10.)
194 | 
195 |     def downmix(self):
196 |         """
197 |         Down-mix the audio signal to mono.
198 | 
199 |         """
200 |         if self.channels > 1:
201 |             self.audio = np.sum(self.audio, -1) / self.channels
202 | 
203 |     def normalize(self):
204 |         """
205 |         Normalize the audio signal.
206 | 
207 |         """
208 |         self.audio /= np.max(self.audio)
209 | 
210 | 
211 | class Spectrogram(object):
212 |     """
213 |     Spectrogram Class.
214 | 
215 |     """
216 |     def __init__(self, wav, window_size=2048, fps=200, online=True, phase=True):
217 |         """
218 |         Creates a new Spectrogram object instance and performs a STFT on the given audio.
219 | 
220 |         :param wav: a Wav object
221 |         :param window_size: is the size for the window in samples [default=2048]
222 |         :param fps: is the desired frame rate [default=200]
223 |         :param online: work in online mode (i.e. use only past audio information) [default=True]
224 |         :param phase: include phase information [default=True]
225 | 
226 |         """
227 |         # init some variables
228 |         self.wav = wav
229 |         self.fps = fps
230 |         # derive some variables
231 |         self.hop_size = float(self.wav.samplerate) / float(self.fps)  # use floats so that seeking works properly
232 |         self.frames = int(self.wav.samples / self.hop_size)
233 |         self.ffts = int(window_size / 2)
234 |         self.bins = int(window_size / 2)  # initial number equal to ffts, can change if filters are used
235 |         # init STFT matrix
236 |         self.stft = np.empty([self.frames, self.ffts], np.complex)
237 |         # create windowing function
238 |         self.window = np.hanning(window_size)
239 |         # step through all frames
240 |         for frame in range(self.frames):
241 |             # seek to the right position in the audio signal
242 |             if online:
243 |                 # step back a complete window_size after moving forward 1 hop_size
244 |                 # so that the current position is at the stop of the window
245 |                 seek = int((frame + 1) * self.hop_size - window_size)
246 |             else:
247 |                 # step back half of the window_size so that the frame represents the centre of the window
248 |                 seek = int(frame * self.hop_size - window_size / 2)
249 |             # read in the right portion of the audio
250 |             if seek >= self.wav.samples:
251 |                 # stop of file reached
252 |                 break
253 |             elif seek + window_size >= self.wav.samples:
254 |                 # stop behind the actual audio stop, append zeros accordingly
255 |                 zeros = np.zeros(seek + window_size - self.wav.samples)
256 |                 signal = self.wav.audio[seek:]
257 |                 signal = np.append(signal, zeros)
258 |             elif seek < 0:
259 |                 # start before the actual audio start, pad with zeros accordingly
260 |                 zeros = np.zeros(-seek)
261 |                 signal = self.wav.audio[0:seek + window_size]
262 |                 signal = np.append(zeros, signal)
263 |             else:
264 |                 # normal read operation
265 |                 signal = self.wav.audio[seek:seek + window_size]
266 |             # multiply the signal with the window function
267 |             signal = signal * self.window
268 |             # only shift and perform complex DFT if needed
269 |             if phase:
270 |                 # circular shift the signal (needed for correct phase)
271 |                 signal = fft.fftshift(signal)
272 |             # perform DFT
273 |             self.stft[frame] = fft.fft(signal, window_size)[:self.ffts]
274 |             # next frame
275 |         # magnitude spectrogram
276 |         self.spec = np.abs(self.stft)
277 |         # phase
278 |         if phase:
279 |             self.phase = np.arctan2(np.imag(self.stft), np.real(self.stft))
280 | 
281 |     # pre-processing stuff
282 |     def aw(self, floor=5, relaxation=10):
283 |         """
284 |         Perform adaptive whitening on the magnitude spectrogram.
285 | 
286 |         :param floor: floor value [default=5]
287 |         :param relaxation: relaxation time in seconds [default=10]
288 | 
289 |         "Adaptive Whitening For Improved Real-time Audio Onset Detection"
290 |         Dan Stowell and Mark Plumbley
291 |         Proceedings of the International Computer Music Conference (ICMC), 2007
292 | 
293 |         """
294 |         mem_coeff = 10.0 ** (-6. * relaxation / self.fps)
295 |         P = np.zeros_like(self.spec)
296 |         # iterate over all frames
297 |         for f in range(self.frames):
298 |             spec_floor = np.maximum(self.spec[f], floor)
299 |             if f > 0:
300 |                 P[f] = np.maximum(spec_floor, mem_coeff * P[f - 1])
301 |             else:
302 |                 P[f] = spec_floor
303 |         # adjust spec
304 |         self.spec /= P
305 | 
306 |     def filter(self, filterbank=None):
307 |         """
308 |         Filter the magnitude spectrogram with a filterbank.
309 | 
310 |         :param filterbank: Filter object which includes the filterbank [default=None]
311 | 
312 |         If no filter is given a standard one will be created.
313 | 
314 |         """
315 |         if filterbank is None:
316 |             # construct a standard filterbank
317 |             filterbank = Filter(ffts=self.ffts, fs=self.wav.samplerate).filterbank
318 |         # filter the magnitude spectrogram with the filterbank
319 |         self.spec = np.dot(self.spec, filterbank)
320 |         # adjust the number of bins
321 |         self.bins = np.shape(filterbank)[1]
322 | 
323 |     def log(self, mul=20, add=1):
324 |         """
325 |         Take the logarithm of the magnitude spectrogram.
326 | 
327 |         :param mul: multiply the magnitude spectrogram with given value [default=20]
328 |         :param add: add the given value to the magnitude spectrogram [default=1]
329 | 
330 |         """
331 |         if add <= 0:
332 |             raise ValueError("a positive value must be added before taking the logarithm")
333 |         self.spec = np.log10(mul * self.spec + add)
334 | 
335 | 
336 | class SpectralODF(object):
337 |     """
338 |     The SpectralODF class implements most of the common onset detection function
339 |     based on the magnitude or phase information of a spectrogram.
340 | 
341 |     """
342 |     def __init__(self, spectrogram, ratio=0.22, frames=None):
343 |         """
344 |         Creates a new ODF object instance.
345 | 
346 |         :param spectrogram: the spectrogram on which the detections functions operate
347 |         :param ratio: calculate the difference to the frame which has the given magnitude ratio [default=0.22]
348 |         :param frames: calculate the difference to the N-th previous frame [default=None]
349 | 
350 |         """
351 |         self.s = spectrogram
352 |         # determine the number off diff frames
353 |         if frames is None:
354 |             # get the first sample with a higher magnitude than given ratio
355 |             sample = np.argmax(self.s.window > ratio)
356 |             diff_samples = self.s.window.size / 2 - sample
357 |             # convert to frames
358 |             frames = int(round(diff_samples / self.s.hop_size))
359 |         # set the minimum to 1
360 |         if frames < 1:
361 |             frames = 1
362 |         self.diff_frames = frames
363 | 
364 |     @staticmethod
365 |     def wraptopi(angle):
366 |         """
367 |         Wrap the phase information to the range -π...π.
368 | 
369 |         """
370 |         return np.mod(angle + np.pi, 2.0 * np.pi) - np.pi
371 | 
372 |     def diff(self, spec, pos=False, diff_frames=None):
373 |         """
374 |         Calculates the difference on the magnitude spectrogram.
375 | 
376 |         :param spec: the magnitude spectrogram
377 |         :param pos: only keep positive values [default=False]
378 |         :param diff_frames: calculate the difference to the N-th previous frame [default=None]
379 | 
380 |         """
381 |         diff = np.zeros_like(spec)
382 |         if diff_frames is None:
383 |             diff_frames = self.diff_frames
384 |         # calculate the diff
385 |         diff[diff_frames:] = spec[diff_frames:] - spec[0:-diff_frames]
386 |         if pos:
387 |             diff = diff * (diff > 0)
388 |         return diff
389 | 
390 |     # Onset Detection Functions
391 |     def hfc(self):
392 |         """
393 |         High Frequency Content.
394 | 
395 |         "Computer Modeling of Sound for Transformation and Synthesis of Musical Signals"
396 |         Paul Masri
397 |         PhD thesis, University of Bristol, 1996
398 | 
399 |         """
400 |         # HFC weights the magnitude spectrogram by the bin number, thus emphasising high frequencies
401 |         return np.mean(self.s.spec * np.arange(self.s.bins), axis=1)
402 | 
403 |     def sd(self):
404 |         """
405 |         Spectral Diff.
406 | 
407 |         "A hybrid approach to musical note onset detection"
408 |         Chris Duxbury, Mark Sandler and Matthew Davis
409 |         Proceedings of the 5th International Conference on Digital Audio Effects (DAFx-02), 2002.
410 | 
411 |         """
412 |         # Spectral diff is the sum of all squared positive 1st order differences
413 |         return np.sum(self.diff(self.s.spec, pos=True) ** 2, axis=1)
414 | 
415 |     def sf(self):
416 |         """
417 |         Spectral Flux.
418 | 
419 |         "Computer Modeling of Sound for Transformation and Synthesis of Musical Signals"
420 |         Paul Masri
421 |         PhD thesis, University of Bristol, 1996
422 | 
423 |         """
424 |         # Spectral flux is the sum of all positive 1st order differences
425 |         return np.sum(self.diff(self.s.spec, pos=True), axis=1)
426 | 
427 |     def mkl(self, epsilon=0.000001):
428 |         """
429 |         Modified Kullback-Leibler.
430 | 
431 |         :param epsilon: add epsilon to avoid division by 0 [default=0.000001]
432 | 
433 |         we use the implenmentation presented in:
434 |         "Automatic Annotation of Musical Audio for Interactive Applications"
435 |         Paul Brossier
436 |         PhD thesis, Queen Mary University of London, 2006
437 | 
438 |         instead of the original work:
439 |         "Onset Detection in Musical Audio Signals"
440 |         Stephen Hainsworth and Malcolm Macleod
441 |         Proceedings of the International Computer Music Conference (ICMC), 2003
442 | 
443 |         """
444 |         if epsilon <= 0:
445 |             raise ValueError("a positive value must be added before division")
446 |         mkl = np.zeros_like(self.s.spec)
447 |         mkl[1:] = self.s.spec[1:] / (self.s.spec[0:-1] + epsilon)
448 |         # note: the original MKL uses sum instead of mean, but the range of mean is much more suitable
449 |         return np.mean(np.log(1 + mkl), axis=1)
450 | 
451 |     def _pd(self):
452 |         """
453 |         Helper method used by pd() & wpd().
454 | 
455 |         """
456 |         pd = np.zeros_like(self.s.phase)
457 |         # instantaneous frequency is given by the first difference ψ′(n, k) = ψ(n, k) − ψ(n − 1, k)
458 |         # change in instantaneous frequency is given by the second order difference ψ′′(n, k) = ψ′(n, k) − ψ′(n − 1, k)
459 |         pd[2:] = self.s.phase[2:] - 2 * self.s.phase[1:-1] + self.s.phase[:-2]
460 |         # map to the range -pi..pi
461 |         return self.wraptopi(pd)
462 | 
463 |     def pd(self):
464 |         """
465 |         Phase Deviation.
466 | 
467 |         "On the use of phase and energy for musical onset detection in the complex domain"
468 |         Juan Pablo Bello, Chris Duxbury, Matthew Davies and Mark Sandler
469 |         IEEE Signal Processing Letters, Volume 11, Number 6, 2004
470 | 
471 |         """
472 |         # take the mean of the absolute changes in instantaneous frequency
473 |         return np.mean(np.abs(self._pd()), axis=1)
474 | 
475 |     def wpd(self):
476 |         """
477 |         Weighted Phase Deviation.
478 | 
479 |         "Onset Detection Revisited"
480 |         Simon Dixon
481 |         Proceedings of the 9th International Conference on Digital Audio Effects (DAFx), 2006
482 | 
483 |         """
484 |         # make sure the spectrogram is not filtered before
485 |         assert np.shape(self.s.phase) == np.shape(self.s.spec)
486 |         # wpd = spec * pd
487 |         return np.mean(np.abs(self._pd() * self.s.spec), axis=1)
488 | 
489 |     def nwpd(self, epsilon=0.000001):
490 |         """
491 |         Normalized Weighted Phase Deviation.
492 | 
493 |         :param epsilon: add epsilon to avoid division by 0 [default=0.000001]
494 | 
495 |         "Onset Detection Revisited"
496 |         Simon Dixon
497 |         Proceedings of the 9th International Conference on Digital Audio Effects (DAFx), 2006
498 | 
499 |         """
500 |         if epsilon <= 0:
501 |             raise ValueError("a positive value must be added before division")
502 |         # normalize WPD by the sum of the spectrogram (add a small amount so that we don't divide by 0)
503 |         return self.wpd() / np.add(np.mean(self.s.spec, axis=1), epsilon)
504 | 
505 |     def _cd(self):
506 |         """
507 |         Helper method used by cd() & rcd().
508 | 
509 |         we use the simple implementation presented in:
510 |         "Onset Detection Revisited"
511 |         Simon Dixon
512 |         Proceedings of the 9th International Conference on Digital Audio Effects (DAFx), 2006
513 | 
514 |         """
515 |         assert np.shape(self.s.phase) == np.shape(self.s.spec)  # make sure the spectrogram is not filtered before
516 |         # expected spectrogram
517 |         cd_target = np.zeros_like(self.s.phase)
518 |         # assume constant phase change
519 |         cd_target[1:] = 2 * self.s.phase[1:] - self.s.phase[:-1]
520 |         # add magnitude
521 |         cd_target = self.s.spec * np.exp(1j * cd_target)
522 |         # complex spectrogram
523 |         # note: construct new instead of using self.stft, because pre-processing could have been applied
524 |         cd = self.s.spec * np.exp(1j * self.s.phase)
525 |         # subtract the target values
526 |         cd[1:] -= cd_target[:-1]
527 |         return cd
528 | 
529 |     def cd(self):
530 |         """
531 |         Complex Domain.
532 | 
533 |         "On the use of phase and energy for musical onset detection in the complex domain"
534 |         Juan Pablo Bello, Chris Duxbury, Matthew Davies and Mark Sandler
535 |         IEEE Signal Processing Letters, Volume 11, Number 6, 2004
536 | 
537 |         """
538 |         # take the sum of the absolute changes
539 |         return np.sum(np.abs(self._cd()), axis=1)
540 | 
541 |     def rcd(self):
542 |         """
543 |         Rectified Complex Domain.
544 | 
545 |         "Onset Detection Revisited"
546 |         Simon Dixon
547 |         Proceedings of the 9th International Conference on Digital Audio Effects (DAFx), 2006
548 | 
549 |         """
550 |         # rectified complex domain
551 |         rcd = self._cd()
552 |         # only keep values where the magnitude rises
553 |         rcd[1:] = rcd[1:] * (self.s.spec[1:] > self.s.spec[:-1])
554 |         # take the sum of the absolute changes
555 |         return np.sum(np.abs(rcd), axis=1)
556 | 
557 | 
558 | class Onsets(object):
559 |     """
560 |     Onset Class.
561 | 
562 |     """
563 |     def __init__(self, activations, fps, online=True):
564 |         """
565 |         Creates a new Onset object instance with the given activations of the
566 |         ODF (OnsetDetectionFunction). The activations can be read in from a file.
567 | 
568 |         :param activations: an array containing the activations of the ODF
569 |         :param fps: frame rate of the activations
570 |         :param online: work in online mode (i.e. use only past information) [default=True]
571 | 
572 |         """
573 |         self.activations = None     # activations of the ODF
574 |         self.fps = fps              # framerate of the activation function
575 |         self.online = online        # online peak-picking
576 |         self.detections = []        # list of detected onsets (in seconds)
577 |         # set / load activations
578 |         if isinstance(activations, np.ndarray):
579 |             # activations are given as an array
580 |             self.activations = activations
581 |         else:
582 |             # read in the activations from a file
583 |             self.load(activations)
584 | 
585 |     def detect(self, threshold, combine=30, pre_avg=100, pre_max=30, post_avg=30, post_max=70, delay=0):
586 |         """
587 |         Detects the onsets.
588 | 
589 |         :param threshold: threshold for peak-picking
590 |         :param combine: only report 1 onset for N miliseconds [default=30]
591 |         :param pre_avg: use N miliseconds past information for moving average [default=100]
592 |         :param pre_max: use N miliseconds past information for moving maximum [default=30]
593 |         :param post_avg: use N miliseconds future information for moving average [default=0]
594 |         :param post_max: use N miliseconds future information for moving maximum [default=40]
595 |         :param delay: report the onset N miliseconds delayed [default=0]
596 | 
597 |         In online mode, post_avg and post_max are set to 0.
598 | 
599 |         Implements the peak-picking method described in:
600 | 
601 |         "Evaluating the Online Capabilities of Onset Detection Methods"
602 |         Sebastian Böck, Florian Krebs and Markus Schedl
603 |         Proceedings of the 13th International Society for Music Information Retrieval Conference (ISMIR), 2012
604 | 
605 |         """
606 |         import scipy.ndimage as sim
607 |         # online mode?
608 |         if self.online:
609 |             post_max = 0
610 |             post_avg = 0
611 |         # convert timing information to frames
612 |         pre_avg = int(round(self.fps * pre_avg / 1000.))
613 |         pre_max = int(round(self.fps * pre_max / 1000.))
614 |         post_max = int(round(self.fps * post_max / 1000.))
615 |         post_avg = int(round(self.fps * post_avg / 1000.))
616 |         # convert to seconds
617 |         combine /= 1000.
618 |         delay /= 1000.
619 |         # init detections
620 |         self.detections = []
621 |         # moving maximum
622 |         max_length = pre_max + post_max + 1
623 |         max_origin = int(np.floor((pre_max - post_max) / 2))
624 |         mov_max = sim.filters.maximum_filter1d(self.activations, max_length, mode='constant', origin=max_origin)
625 |         # moving average
626 |         avg_length = pre_avg + post_avg + 1
627 |         avg_origin = int(np.floor((pre_avg - post_avg) / 2))
628 |         mov_avg = sim.filters.uniform_filter1d(self.activations, avg_length, mode='constant', origin=avg_origin)
629 |         # detections are activation equal to the maximum
630 |         detections = self.activations * (self.activations == mov_max)
631 |         # detections must be greater or equal than the moving average + threshold
632 |         detections = detections * (detections >= mov_avg + threshold)
633 |         # convert detected onsets to a list of timestamps
634 |         last_onset = 0
635 |         for i in np.nonzero(detections)[0]:
636 |             onset = float(i) / float(self.fps) + delay
637 |             # only report an onset if the last N miliseconds none was reported
638 |             if onset > last_onset + combine:
639 |                 self.detections.append(onset)
640 |                 # save last reported onset
641 |                 last_onset = onset
642 | 
643 |     def write(self, filename):
644 |         """
645 |         Write the detected onsets to the given file.
646 | 
647 |         :param filename: the target file name
648 | 
649 |         Only useful if detect() was invoked before.
650 | 
651 |         """
652 |         with open(filename, 'w') as f:
653 |             for pos in self.detections:
654 |                 f.write(str(pos) + '\n')
655 | 
656 |     def save(self, filename):
657 |         """
658 |         Save the onset activations to the given file.
659 | 
660 |         :param filename: the target file name
661 | 
662 |         """
663 |         self.activations.tofile(filename)
664 | 
665 |     def load(self, filename):
666 |         """
667 |         Load the onset activations from the given file.
668 | 
669 |         :param filename: the target file name
670 | 
671 |         """
672 |         self.activations = np.fromfile(filename)
673 | 
674 | 
675 | def parser():
676 |     import argparse
677 |     # define parser
678 |     p = argparse.ArgumentParser(formatter_class=argparse.RawDescriptionHelpFormatter, description="""
679 |     If invoked without any parameters, the software detects all onsets in
680 |     the given files in online mode according to the method proposed in:
681 | 
682 |     "Evaluating the Online Capabilities of Onset Detection Methods"
683 |     by Sebastian Böck, Florian Krebs and Markus Schedl
684 |     in Proceedings of the 13th International Society for
685 |     Music Information Retrieval Conference (ISMIR), 2012
686 | 
687 |     """)
688 |     # general options
689 |     p.add_argument('files', metavar='files', nargs='+', help='files to be processed')
690 |     p.add_argument('-v', dest='verbose', action='store_true', help='be verbose')
691 |     p.add_argument('-s', dest='save', action='store_true', default=False, help='save the activations of the onset detection functions')
692 |     p.add_argument('-l', dest='load', action='store_true', default=False, help='load the activations of the onset detection functions')
693 |     # online / offline mode
694 |     p.add_argument('--offline', dest='online', action='store_false', default=True, help='operate in offline mode')
695 |     # wav options
696 |     wav_opts = p.add_argument_group('audio arguments')
697 |     wav_opts.add_argument('--norm', action='store_true', default=None, help='normalize the audio [switches to offline mode]')
698 |     wav_opts.add_argument('--att', action='store', type=float, default=None, help='attenuate the audio by ATT dB')
699 |     # spectrogram options
700 |     spec_opts = p.add_argument_group('spectrogram arguments')
701 |     spec_opts.add_argument('--fps', action='store', default=200, type=int, help='frames per second')
702 |     spec_opts.add_argument('--window', action='store', type=int, default=2048, help='DFT window length')
703 |     spec_opts.add_argument('--ratio', action='store', type=float, default=0.22, help='window magnitude ratio to calc number of diff frames')
704 |     spec_opts.add_argument('--frames', action='store', type=int, default=None, help='diff frames')
705 |     # pre-processing
706 |     pre_opts = p.add_argument_group('pre-processing arguments')
707 |     # aw
708 |     pre_opts.add_argument('--aw', action='store_true', default=False, help='apply adaptive whitening')
709 |     pre_opts.add_argument('--floor', action='store', type=float, default=5.0, help='floor value for adaptive whitening [default=5.0]')
710 |     pre_opts.add_argument('--relax', action='store', type=float, default=10.0, help='relaxation time for adaptive whitening [default=10.0]')
711 |     # filter
712 |     pre_opts.add_argument('--filter', action='store_true', default=None, help='filter the magnitude spectrogram with a filterbank')
713 |     pre_opts.add_argument('--fmin', action='store', default=27.5, type=float, help='minimum frequency of filter in Hz [default=27.5]')
714 |     pre_opts.add_argument('--fmax', action='store', default=16000, type=float, help='maximum frequency of filter in Hz [default=16000]')
715 |     pre_opts.add_argument('--bands', action='store', type=int, default=12, help='number of bands per octave')
716 |     pre_opts.add_argument('--equal', action='store_true', default=False, help='equalize triangular windows to have equal area')
717 |     # logarithm
718 |     pre_opts.add_argument('--log', action='store_true', default=None, help='logarithmic magnitude')
719 |     pre_opts.add_argument('--mul', action='store', default=1, type=float, help='multiplier (before taking the log) [default=1]')
720 |     pre_opts.add_argument('--add', action='store', default=1, type=float, help='value added (before taking the log) [default=1]')
721 |     # onset detection
722 |     onset_opts = p.add_argument_group('onset detection arguments')
723 |     onset_opts.add_argument('-o', dest='odf', action='append', default=[], help='use this onset detection function (can be used multiple times) [hfc,sd,sf,mkl,pd,wpd,nwpd,cd,rcd,all]')
724 |     onset_opts.add_argument('-t', dest='threshold', action='store', type=float, default=2.5, help='detection threshold')
725 |     onset_opts.add_argument('--combine', action='store', type=float, default=30, help='combine onsets within N miliseconds [default=30]')
726 |     onset_opts.add_argument('--pre_avg', action='store', type=float, default=100, help='build average over N previous miliseconds [default=100]')
727 |     onset_opts.add_argument('--pre_max', action='store', type=float, default=30, help='search maximum over N previous miliseconds [default=30]')
728 |     onset_opts.add_argument('--post_avg', action='store', type=float, default=70, help='build average over N following miliseconds [default=70]')
729 |     onset_opts.add_argument('--post_max', action='store', type=float, default=30, help='search maximum over N following miliseconds [default=30]')
730 |     onset_opts.add_argument('--delay', action='store', type=float, default=0, help='report the onsets N miliseconds delayed [default=0]')
731 |     # version
732 |     p.add_argument('--version', action='version', version='%(prog)s 1.04 (2013-02-27)')
733 |     # parse arguments
734 |     args = p.parse_args()
735 | 
736 |     # list of offered ODFs
737 |     methods = ['hfc', 'sd', 'sf', 'mkl', 'pd', 'wpd', 'nwpd', 'cd', 'rcd']
738 |     # use default values if no ODF is given
739 |     if args.odf == []:
740 |         args.odf = ['sf']
741 |         if args.log is None:
742 |             args.log = True
743 |         if args.filter is None:
744 |             args.filter = True
745 |     # use all onset detection functions
746 |     if 'all' in args.odf:
747 |         args.odf = methods
748 |     # remove not implemented/mistyped methods
749 |     args.odf = list(set(args.odf) & set(methods))
750 |     assert args.odf, 'at least one onset detection function must be given'
751 |     # check if we need the STFT phase information
752 |     if set(args.odf) & set(['pd', 'wpd', 'nwpd', 'cd', 'rcd']):
753 |         args.phase = True
754 |     else:
755 |         args.phase = False
756 | 
757 |     # print arguments
758 |     if args.verbose:
759 |         print args
760 | 
761 |     # return args
762 |     return args
763 | 
764 | 
765 | def main():
766 |     import os.path
767 |     import glob
768 |     import fnmatch
769 | 
770 |     # parse arguments
771 |     args = parser()
772 | 
773 |     # determine the files to process
774 |     files = []
775 |     for f in args.files:
776 |         # check what we have (file/path)
777 |         if os.path.isdir(f):
778 |             # use all files in the given path
779 |             files = glob.glob(f + '/*.wav')
780 |         else:
781 |             # file was given, append to list
782 |             files.append(f)
783 | 
784 |     # only process .wav files
785 |     files = fnmatch.filter(files, '*.wav')
786 |     files.sort()
787 | 
788 |     # init filterbank
789 |     filt = None
790 | 
791 |     # process the files
792 |     for f in files:
793 |         if args.verbose:
794 |             print f
795 | 
796 |         # use the name of the file without the extension
797 |         filename = os.path.splitext(f)[0]
798 | 
799 |         # do the processing stuff unless the activations are loaded from file
800 |         if not args.load:
801 |             # open the wav file
802 |             w = Wav(f)
803 |             # normalize audio
804 |             if args.norm:
805 |                 w.normalize()
806 |                 args.online = False  # switch to offline mode
807 |             # downmix to mono
808 |             if w.channels > 1:
809 |                 w.downmix()
810 |             # attenuate signal
811 |             if args.att:
812 |                 w.attenuate(args.att)
813 | 
814 |             # spectrogram
815 |             s = Spectrogram(w, args.window, args.fps, args.online, args.phase)
816 |             # adaptive whitening
817 |             if args.aw:
818 |                 s.aw(args.floor, args.relax)
819 |             # filter
820 |             if args.filter:
821 |                 # (re-)create filterbank if the samplerate of the audio changes
822 |                 if (filt is None) or (filt.fs != w.samplerate):
823 |                     filt = Filter(args.window / 2, w.samplerate, args.bands, args.fmin, args.fmax, args.equal)
824 |                 # filter the spectrogram
825 |                 s.filter(filt.filterbank)
826 |             # log
827 |             if args.log:
828 |                 s.log(args.mul, args.add)
829 | 
830 |         # process all onset detection functions
831 |         for odf in args.odf:
832 |             # load the activations from file
833 |             if args.load:
834 |                 o = Onsets("%s.onsets.%s" % (filename, odf), args.fps, args.online)
835 |                 pass
836 |             else:
837 |                 # use the spectrogram to create an SpectralODF object
838 |                 sodf = SpectralODF(s, args.ratio, args.frames)
839 |                 # perform detection function on the object
840 |                 act = getattr(sodf, odf)()
841 |                 # create an Onset object with the returned activations
842 |                 o = Onsets(act, args.fps, args.online)
843 |                 if args.save:
844 |                     # save the raw ODF activations
845 |                     o.save("%s.onsets.%s" % (filename, odf))
846 |                     # do not proceed with onset detection
847 |                     continue
848 |             # detect the onsets
849 |             o.detect(args.threshold, args.combine, args.pre_avg, args.pre_max, args.post_avg, args.post_max, args.delay)
850 |             # write the onsets to a file
851 |             if len(args.odf) > 1:
852 |                 # include the ODF name
853 |                 o.write("%s.onsets.%s.txt" % (filename, odf))
854 |             else:
855 |                 o.write("%s.onsets.txt" % (filename))
856 |             if args.verbose:
857 |                 print 'detections:', o.detections
858 |             # continue with next onset detection function
859 |         # continue with next file
860 | 
861 | if __name__ == '__main__':
862 |     main()
863 | 


--------------------------------------------------------------------------------