├── .gitignore
├── .gitlab-ci.yml
├── Bergmann-Raffel
    ├── .gitignore
    ├── download.sh
    ├── download7digital.py
    ├── lists
    │   └── songlist.txt
    ├── plot.py
    ├── process.py
    ├── report.md
    └── requirements.txt
├── Hysneli-Rafii
    ├── code
    │   ├── .gitkeep
    │   ├── bss_eval.m
    │   ├── repet.m
    │   └── repetSem.m
    ├── dataset
    │   ├── .gitkeep
    │   └── download_install.bat
    ├── output
    │   ├── .gitkeep
    │   ├── Music-SDR evaluation.fig
    │   └── Voice-SDR evaluation.fig
    └── reportREPET.md
├── Instructions.md
├── Makefile
├── Pali-Boeck
    ├── .gitignore
    ├── code
    │   ├── .gitkeep
    │   ├── readme_code.md
    │   ├── runSuperFlux.py
    │   └── runSuperFlux_online.py
    ├── dataset
    │   ├── .gitkeep
    │   ├── modalexport
    │   ├── readFiles.py
    │   ├── readme_dataset.md
    │   └── wavFiles
    │   │   └── .gitkeep
    ├── download.bat
    ├── getwav.bat
    ├── output
    │   ├── .gitignore
    │   ├── annotations
    │   │   └── .gitkeep
    │   ├── evaluation.py
    │   ├── evaluation_online.py
    │   ├── outputEval.txt
    │   └── outputEvalOnline.txt
    ├── report.md
    ├── requirements.txt
    ├── run.bat
    └── run_online.bat
├── README.md
├── Reck-Font
    ├── Tempo estimation results.py
    ├── data_preprocessing.sh
    ├── download.sh
    ├── evaluation.sh
    ├── report.md
    ├── requirements.txt
    └── run_analysis.sh
├── Rosenzweig-Jiang
    ├── .gitignore
    ├── code
    │   ├── .gitkeep
    │   └── getFileNames.m
    ├── dataset
    │   └── .gitkeep
    ├── output
    │   ├── .gitkeep
    │   ├── TbFigure4.fig
    │   └── TbFigure4.png
    ├── report.md
    ├── step01_download.m
    └── step02_reproduceTbFigure4.m
└── Ssenyonga-Huang
    ├── .gitignore
    ├── code
        ├── .gitkeep
        ├── presentation_demo.m
        └── rpca_mask_script.m
    ├── download.sh
    ├── output
        ├── .gitkeep
        └── figures
        │   ├── .gitkeep
        │   ├── fig3.png
        │   ├── fig4.png
        │   └── fig5.png
    └── report.md


/.gitignore:
--------------------------------------------------------------------------------
  1 | #####=== General Files ===#####
  2 | 
  3 | *.zip
  4 | *.gz
  5 | *.7z
  6 | *.tar
  7 | *.tgz
  8 | *.tar.gz
  9 | *.rar
 10 | *.wav
 11 | *.mp3
 12 | *.mp4
 13 | *.mat
 14 | *.h5
 15 | *.hdf5
 16 | 
 17 | #####=== OSX ===#####
 18 | .DS_Store
 19 | .AppleDouble
 20 | .LSOverride
 21 | 
 22 | # Icon must end with two \r
 23 | Icon
 24 | 
 25 | # Thumbnails
 26 | ._*
 27 | 
 28 | # Files that might appear in the root of a volume
 29 | .DocumentRevisions-V100
 30 | .fseventsd
 31 | .Spotlight-V100
 32 | .TemporaryItems
 33 | .Trashes
 34 | .VolumeIcon.icns
 35 | 
 36 | # Directories potentially created on remote AFP share
 37 | .AppleDB
 38 | .AppleDesktop
 39 | Network Trash Folder
 40 | Temporary Items
 41 | .apdisk
 42 | 
 43 | 
 44 | #####=== Windows ===#####
 45 | # Windows image file caches
 46 | Thumbs.db
 47 | ehthumbs.db
 48 | 
 49 | # Folder config file
 50 | Desktop.ini
 51 | 
 52 | # Recycle Bin used on file shares
 53 | $RECYCLE.BIN/
 54 | 
 55 | # Windows Installer files
 56 | *.cab
 57 | *.msi
 58 | *.msm
 59 | *.msp
 60 | 
 61 | # Windows shortcuts
 62 | *.lnk
 63 | 
 64 | #####=== Linux ===#####
 65 | *~
 66 | 
 67 | # KDE directory preferences
 68 | .directory
 69 | 
 70 | # Linux trash folder which might appear on any partition or disk
 71 | .Trash-*
 72 | 
 73 | 
 74 | #####=== C ===#####
 75 | 
 76 | # Object files
 77 | *.o
 78 | *.ko
 79 | *.obj
 80 | *.elf
 81 | 
 82 | # Precompiled Headers
 83 | *.gch
 84 | *.pch
 85 | 
 86 | # Libraries
 87 | *.lib
 88 | *.a
 89 | *.la
 90 | *.lo
 91 | 
 92 | # Shared objects (inc. Windows DLLs)
 93 | *.dll
 94 | *.so
 95 | *.so.*
 96 | *.dylib
 97 | 
 98 | # Executables
 99 | *.exe
100 | *.out
101 | *.app
102 | *.i*86
103 | *.x86_64
104 | *.hex
105 | 
106 | # Debug files
107 | *.dSYM/
108 | 
109 | #####=== C++ ===#####
110 | 
111 | # Compiled Object files
112 | *.slo
113 | *.lo
114 | *.o
115 | *.obj
116 | 
117 | # Precompiled Headers
118 | *.gch
119 | *.pch
120 | 
121 | # Compiled Dynamic libraries
122 | *.so
123 | *.dylib
124 | *.dll
125 | 
126 | # Fortran module files
127 | *.mod
128 | 
129 | # Compiled Static libraries
130 | *.lai
131 | *.la
132 | *.a
133 | *.lib
134 | 
135 | # Executables
136 | *.exe
137 | *.out
138 | *.app
139 | 
140 | #####=== Matlab ===#####
141 | ##---------------------------------------------------
142 | ## Remove autosaves generated by the Matlab editor
143 | ## We have git for backups!
144 | ##---------------------------------------------------
145 | 
146 | # Windows default autosave extension
147 | *.asv
148 | 
149 | # OSX / *nix default autosave extension
150 | *.m~
151 | 
152 | # Compiled MEX binaries (all platforms)
153 | *.mex*
154 | 
155 | # Simulink Code Generation
156 | slprj/
157 | 
158 | #####=== Python ===#####
159 | 
160 | # Byte-compiled / optimized / DLL files
161 | __pycache__/
162 | *.py[cod]
163 | *$py.class
164 | 
165 | # C extensions
166 | *.so
167 | 
168 | # Distribution / packaging
169 | .Python
170 | env/
171 | build/
172 | develop-eggs/
173 | dist/
174 | downloads/
175 | eggs/
176 | .eggs/
177 | lib/
178 | lib64/
179 | parts/
180 | sdist/
181 | var/
182 | *.egg-info/
183 | .installed.cfg
184 | *.egg
185 | 
186 | # PyInstaller
187 | #  Usually these files are written by a python script from a template
188 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
189 | *.manifest
190 | *.spec
191 | 
192 | # Installer logs
193 | pip-log.txt
194 | pip-delete-this-directory.txt
195 | 
196 | # Unit test / coverage reports
197 | htmlcov/
198 | .tox/
199 | .coverage
200 | .coverage.*
201 | .cache
202 | nosetests.xml
203 | coverage.xml
204 | *,cover
205 | 
206 | # Translations
207 | *.mo
208 | *.pot
209 | 
210 | # Django stuff:
211 | *.log
212 | 
213 | # Sphinx documentation
214 | docs/_build/
215 | 
216 | # PyBuilder
217 | target/
218 | *.lab
219 | *.m
220 | *.pdf
221 | 


--------------------------------------------------------------------------------
/.gitlab-ci.yml:
--------------------------------------------------------------------------------
 1 | image: audiolabs/latex
 2 | 
 3 | test:
 4 |   script: 
 5 |     - apt-get update -yy
 6 |     - apt-get install -yy pandoc
 7 |     - make all
 8 |   artifacts:
 9 |     name: "$CI_PROJECT_NAME"
10 |     paths:
11 |       - "*.pdf"
12 | 


--------------------------------------------------------------------------------
/Bergmann-Raffel/.gitignore:
--------------------------------------------------------------------------------
  1 | midi-dataset/
  2 | output/
  3 | data/
  4 | 
  5 | #####=== General Files ===#####
  6 | 
  7 | *.zip
  8 | *.gz
  9 | *.7z
 10 | *.tar
 11 | *.tgz
 12 | *.tar.gz
 13 | *.rar
 14 | *.wav
 15 | *.mp3
 16 | *.mp4
 17 | *.mat
 18 | *.h5
 19 | *.hdf5
 20 | 
 21 | 
 22 | 
 23 | #####=== OSX ===#####
 24 | .DS_Store
 25 | .AppleDouble
 26 | .LSOverride
 27 | 
 28 | # Icon must end with two \r
 29 | Icon
 30 | 
 31 | # Thumbnails
 32 | ._*
 33 | 
 34 | # Files that might appear in the root of a volume
 35 | .DocumentRevisions-V100
 36 | .fseventsd
 37 | .Spotlight-V100
 38 | .TemporaryItems
 39 | .Trashes
 40 | .VolumeIcon.icns
 41 | 
 42 | # Directories potentially created on remote AFP share
 43 | .AppleDB
 44 | .AppleDesktop
 45 | Network Trash Folder
 46 | Temporary Items
 47 | .apdisk
 48 | 
 49 | 
 50 | #####=== Windows ===#####
 51 | # Windows image file caches
 52 | Thumbs.db
 53 | ehthumbs.db
 54 | 
 55 | # Folder config file
 56 | Desktop.ini
 57 | 
 58 | # Recycle Bin used on file shares
 59 | $RECYCLE.BIN/
 60 | 
 61 | # Windows Installer files
 62 | *.cab
 63 | *.msi
 64 | *.msm
 65 | *.msp
 66 | 
 67 | # Windows shortcuts
 68 | *.lnk
 69 | 
 70 | #####=== Linux ===#####
 71 | *~
 72 | 
 73 | # KDE directory preferences
 74 | .directory
 75 | 
 76 | # Linux trash folder which might appear on any partition or disk
 77 | .Trash-*
 78 | 
 79 | 
 80 | #####=== C ===#####
 81 | 
 82 | # Object files
 83 | *.o
 84 | *.ko
 85 | *.obj
 86 | *.elf
 87 | 
 88 | # Precompiled Headers
 89 | *.gch
 90 | *.pch
 91 | 
 92 | # Libraries
 93 | *.lib
 94 | *.a
 95 | *.la
 96 | *.lo
 97 | 
 98 | # Shared objects (inc. Windows DLLs)
 99 | *.dll
100 | *.so
101 | *.so.*
102 | *.dylib
103 | 
104 | # Executables
105 | *.exe
106 | *.out
107 | *.app
108 | *.i*86
109 | *.x86_64
110 | *.hex
111 | 
112 | # Debug files
113 | *.dSYM/
114 | 
115 | #####=== C++ ===#####
116 | 
117 | # Compiled Object files
118 | *.slo
119 | *.lo
120 | *.o
121 | *.obj
122 | 
123 | # Precompiled Headers
124 | *.gch
125 | *.pch
126 | 
127 | # Compiled Dynamic libraries
128 | *.so
129 | *.dylib
130 | *.dll
131 | 
132 | # Fortran module files
133 | *.mod
134 | 
135 | # Compiled Static libraries
136 | *.lai
137 | *.la
138 | *.a
139 | *.lib
140 | 
141 | # Executables
142 | *.exe
143 | *.out
144 | *.app
145 | 
146 | #####=== Matlab ===#####
147 | ##---------------------------------------------------
148 | ## Remove autosaves generated by the Matlab editor
149 | ## We have git for backups!
150 | ##---------------------------------------------------
151 | 
152 | # Windows default autosave extension
153 | *.asv
154 | 
155 | # OSX / *nix default autosave extension
156 | *.m~
157 | 
158 | # Compiled MEX binaries (all platforms)
159 | *.mex*
160 | 
161 | # Simulink Code Generation
162 | slprj/
163 | 
164 | #####=== Python ===#####
165 | 
166 | # Byte-compiled / optimized / DLL files
167 | __pycache__/
168 | *.py[cod]
169 | *$py.class
170 | 
171 | # C extensions
172 | *.so
173 | 
174 | # Distribution / packaging
175 | .Python
176 | env/
177 | build/
178 | develop-eggs/
179 | dist/
180 | downloads/
181 | eggs/
182 | .eggs/
183 | lib/
184 | lib64/
185 | parts/
186 | sdist/
187 | var/
188 | *.egg-info/
189 | .installed.cfg
190 | *.egg
191 | 
192 | # PyInstaller
193 | #  Usually these files are written by a python script from a template
194 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
195 | *.manifest
196 | *.spec
197 | 
198 | # Installer logs
199 | pip-log.txt
200 | pip-delete-this-directory.txt
201 | 
202 | # Unit test / coverage reports
203 | htmlcov/
204 | .tox/
205 | .coverage
206 | .coverage.*
207 | .cache
208 | nosetests.xml
209 | coverage.xml
210 | *,cover
211 | 
212 | # Translations
213 | *.mo
214 | *.pot
215 | 
216 | # Django stuff:
217 | *.log
218 | 
219 | # Sphinx documentation
220 | docs/_build/
221 | 
222 | # PyBuilder
223 | target/
224 | 


--------------------------------------------------------------------------------
/Bergmann-Raffel/download.sh:
--------------------------------------------------------------------------------
 1 | mkdir -p data/mp3
 2 | mkdir -p data/mid
 3 | 
 4 | wget "http://hog.ee.columbia.edu/craffel/lmd/clean_midi.tar.gz" -P "data/mid"
 5 | 
 6 | cd data/mid
 7 | 
 8 | tar --wildcards -xzf clean_midi.tar.gz "clean_midi/Michael Jackson/"*
 9 | tar --wildcards -xzf clean_midi.tar.gz "clean_midi/AC DC/"*
10 | tar --wildcards -xzf clean_midi.tar.gz "clean_midi/Billy Idol/"*
11 | 
12 | cd "clean_midi/Michael Jackson"
13 | rename "s/^/Michael Jackson /" *
14 | cd "../AC DC"
15 | rename "s/^/AC DC /" *
16 | cd "../Billy Idol"
17 | rename "s/^/Billy Idol /" *
18 | cd ../../..
19 | 
20 | mv "mid/clean_midi/Michael Jackson/"* mid
21 | mv "mid/clean_midi/AC DC/"* mid
22 | mv "mid/clean_midi/Billy Idol/"* mid
23 | 
24 | rm mid/clean_midi.tar.gz
25 | rm mid/*.*.mid
26 | rm -r mid/clean_midi
27 | 
28 | cd ..
29 | 


--------------------------------------------------------------------------------
/Bergmann-Raffel/download7digital.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import argparse
 3 | import requests
 4 | from requests_oauthlib import OAuth1
 5 | 
 6 | 
 7 | def search_trackid(search, key, secret):
 8 |     url = 'https://api.7digital.com/1.2/track/search'
 9 |     auth = OAuth1(key, secret, signature_type='query')
10 | 
11 |     params = {
12 |         'q': search,
13 |         'country': 'ww',
14 |     }
15 | 
16 |     headers = {
17 |         'accept': 'application/json',
18 |     }
19 | 
20 |     try:
21 |         r = requests.get(url, auth=auth, params=params, headers=headers)
22 |         data = r.json()
23 |     except ValueError:
24 |         print r.content
25 |         raise
26 | 
27 |     if data['status'] != 'ok':
28 |         return None
29 | 
30 |     return data['searchResults']['searchResult'][0]['track']['id']
31 | 
32 | 
33 | def get_file(trackid, handle, key, secret):
34 |     url = "http://previews.7digital.com/clip/%s" % trackid
35 |     auth = OAuth1(key, secret, signature_type='query')
36 | 
37 |     params = {
38 |         'country': 'ww',
39 |     }
40 | 
41 |     r = requests.get(url, auth=auth, params=params, stream=True)
42 | 
43 |     if not r.ok:
44 |         raise RuntimeError("Download of %s failed" % trackid)
45 | 
46 |     for block in r.iter_content(1024):
47 |         handle.write(block)
48 | 
49 | 
50 | if __name__ == '__main__':
51 |     parser = argparse.ArgumentParser(description='Download 7digital previews for a list of songs.')
52 |     parser.add_argument(
53 |         '--key',
54 |         help='7digital key, optional',
55 |         nargs='?',
56 |         default=os.environ.get('DIGITAL7_KEY'),
57 |     )
58 |     parser.add_argument(
59 |         '--secret',
60 |         help='7digital secret, optional',
61 |         nargs='?',
62 |         default=os.environ.get('DIGITAL7_SECRET'),
63 |     )
64 |     
65 |     args = parser.parse_args()
66 |     with open("lists/songlist.txt", "r") as songlist:
67 |         songs = songlist.read().splitlines()
68 |         for song in songs:
69 |             trackid = search_trackid(song, key=args.key, secret=args.secret)
70 |             title = song.replace(" ", "_").lower()
71 |             outfile = 'data/mp3/%s.mp3' % title
72 |             with open(outfile, 'wb') as f:
73 |                 get_file(trackid, f, key=args.key, secret=args.secret)
74 |                 print outfile
75 | 


--------------------------------------------------------------------------------
/Bergmann-Raffel/lists/songlist.txt:
--------------------------------------------------------------------------------
 1 | AC DC Back In Black
 2 | AC DC Dirty Deeds Done Dirt Cheap
 3 | AC DC For Those About To Rock (We Salute You)
 4 | AC DC Highway To Hell
 5 | AC DC Thunderstruck
 6 | AC DC TNT
 7 | AC DC Whole Lotta Rosie
 8 | AC DC Who Made Who
 9 | AC DC You Shook Me All Night Long
10 | Billy Idol Catch My Fall
11 | Billy Idol Dancing With Myself
12 | Billy Idol (Do Not) Stand in the Shadows
13 | Billy Idol Eyes Without A Face
14 | Billy Idol Hot in the City
15 | Billy Idol Mony Mony
16 | Billy Idol Rebel Yell
17 | Billy Idol White Wedding
18 | Michael Jackson Beat It
19 | Michael Jackson Black or White
20 | Michael Jackson Don't Stop 'Til You Get Enough
21 | Michael Jackson Heal The World
22 | Michael Jackson I Just Can't Stop Loving You
23 | Michael Jackson Liberian Girl
24 | Michael Jackson Man in the Mirror
25 | Michael Jackson Off the Wall
26 | Michael Jackson Remember the Time
27 | Michael Jackson Rock With You
28 | Michael Jackson She's Out Of My Life
29 | Michael Jackson Smooth Criminal
30 | Michael Jackson The Girl Is Mine
31 | Michael Jackson The Lady in My Life
32 | Michael Jackson The Way You Make Me Feel
33 | Michael Jackson Thriller
34 | Michael Jackson Wanna Be Startin' Somethin'
35 | 
36 | 


--------------------------------------------------------------------------------
/Bergmann-Raffel/plot.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | plot cqts
 3 | '''
 4 | 
 5 | import os
 6 | import sys
 7 | sys.path.append('midi-dataset')
 8 | import deepdish
 9 | import librosa
10 | import feature_extraction
11 | import matplotlib.pyplot as plt
12 | 
13 | def plot_mp3_cqt(filename):
14 |     sr = feature_extraction.AUDIO_FS
15 |     # load mp3 cqt from h5 file
16 |     cqt_filename = os.path.join('data', 'mp3cqts', '%s.h5' % filename)
17 |     cqt = deepdish.io.load(cqt_filename)
18 |     plt.figure(figsize=(8,5))
19 |     librosa.display.specshow(cqt, sr=sr, x_axis='frames', 
20 |                              y_axis='cqt_note', fmin = librosa.core.midi_to_hz(feature_extraction.NOTE_START),
21 |                              hop_length = feature_extraction.AUDIO_HOP)
22 |     plt.title('CQT of mp3 file for %s' % filename)
23 |     plt.tight_layout()
24 |     plt.savefig(os.path.join('output', '%s_mp3.png' % filename))
25 |     plt.clf()
26 |     
27 | def plot_mid_cqt(filename):
28 |     sr = feature_extraction.MIDI_FS
29 |     # load mid cqt from h5 file
30 |     cqt_filename = os.path.join('data', 'midcqts', '%s.h5' % filename)
31 |     cqt = deepdish.io.load(cqt_filename)
32 |     plt.figure(figsize=(25,5))
33 |     librosa.display.specshow(cqt, sr=sr, x_axis='frames',
34 |                              y_axis='cqt_note', fmin = librosa.core.midi_to_hz(feature_extraction.NOTE_START),
35 |                              hop_length = feature_extraction.MIDI_HOP)
36 |     plt.title('CQT of midi file for %s' % filename)
37 |     plt.tight_layout()
38 |     plt.savefig(os.path.join('output', '%s_mid.png' % filename))
39 |     plt.clf()
40 |     
41 | def plot_dist_matrix(filename):
42 |     mat_filename = os.path.join('data', 'distance_matrices', '%s.h5' % filename)
43 |     dist_mat = deepdish.io.load(mat_filename)
44 |     plt.figure(figsize=(25,8))
45 |     plt.matshow(dist_mat)
46 |     plt.title('CQT Distance Matrix %s' % filename)
47 |     plt.savefig(os.path.join('output', '%s_distance.png' % filename))
48 |     plt.clf()
49 |     
50 |     
51 | if __name__ == '__main__':
52 |     with open('lists/songlist.txt') as songlist:
53 |         songs = songlist.read().splitlines()
54 |     filenames = [song.replace(" ", "_").lower() for song in songs]
55 |     for filename in filenames:
56 |         plot_mp3_cqt(filename)
57 |     for song in songs:
58 |         plot_mid_cqt(song)
59 |     for filename in filenames:
60 |         plot_dist_matrix(filename)


--------------------------------------------------------------------------------
/Bergmann-Raffel/process.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | create cqts for mp3 files
 3 | '''
 4 | 
 5 | import os
 6 | import sys
 7 | sys.path.append("midi-dataset")
 8 | import feature_extraction
 9 | import pretty_midi
10 | import librosa
11 | import deepdish
12 | import scipy
13 | 
14 | def process_mp3(filename):
15 |     mp3_filename = os.path.join('data', 'mp3', filename)
16 |     sr = feature_extraction.AUDIO_FS
17 |     #load the mp3 file
18 |     audio_data, _ = librosa.load(mp3_filename, sr=sr)
19 |     cqt = feature_extraction.audio_cqt(audio_data)
20 |     cqt = librosa.logamplitude(cqt, ref_power=cqt.max())
21 |     cqt = librosa.util.normalize(cqt, norm=2., axis=0)
22 |     #create the output folder if it doesn't exist
23 |     if not os.path.exists(os.path.join('data','mp3cqts')):
24 |         os.makedirs(os.path.join('data','mp3cqts'))
25 |     output_filename = os.path.join('data','mp3cqts',filename.replace('.mp3', '.h5'))    
26 |     deepdish.io.save(output_filename, cqt)
27 |     return cqt
28 | 
29 | def process_mid(filename):
30 |     mid_filename = os.path.join('data', 'mid', filename)
31 |     mid_data = pretty_midi.PrettyMIDI(mid_filename)
32 |     cqt = feature_extraction.midi_cqt(mid_data)
33 |     cqt = librosa.logamplitude(cqt, ref_power=cqt.max())
34 |     cqt = librosa.util.normalize(cqt, norm=2., axis=0)
35 |     if not os.path.exists(os.path.join('data','midcqts')):
36 |         os.makedirs(os.path.join('data','midcqts'))
37 |     output_filename = os.path.join('data','midcqts',filename.replace('.mid', '.h5'))    
38 |     deepdish.io.save(output_filename, cqt)
39 |     return cqt   
40 |     
41 | def distance_matrix(mp3cqt, midcqt, title):
42 |     dist_matrix = scipy.spatial.distance.cdist(mp3cqt.T, midcqt.T, 'cosine')
43 |     if not os.path.exists(os.path.join('data','distance_matrices')):
44 |         os.makedirs(os.path.join('data','distance_matrices'))
45 |     output_filename = os.path.join('data','distance_matrices', "%s.h5" % title)
46 |     deepdish.io.save(output_filename, dist_matrix)
47 |     
48 |     
49 | if __name__ == '__main__':
50 |     with open('lists/songlist.txt') as songlist:
51 |         songs = songlist.read().splitlines()  
52 |     for song in songs:
53 |         filename = song.replace(" ", "_").lower()
54 |         mp3_cqt = process_mp3('%s.mp3' % filename)        
55 |         mid_cqt = process_mid('%s.mid' % song)
56 |         distance_matrix(mp3_cqt, mid_cqt, filename)
57 |     


--------------------------------------------------------------------------------
/Bergmann-Raffel/report.md:
--------------------------------------------------------------------------------
  1 | # "Large-Scale Content-Based Matching of MIDI and Audio Files"
  2 | 
  3 | - Paper author: Colin Raffel
  4 | - Report author: Michael Bergmann
  5 | 
  6 | ## Usage guide
  7 | 
  8 | ### Setup
  9 | 
 10 | The required environment is described in the requirements.txt
 11 | 
 12 | To setup the environment, install the given Software and run:
 13 | 
 14 | 	pip install numpy
 15 |     pip install -r requirements.txt
 16 | 	git clone https://github.com/bergmann-fau/midi-dataset
 17 | 	
 18 | ### Download Dataset and preprocess data
 19 |   
 20 | Please apply for a [7Digital API key](https://api-signup.7digital.com/) and set it like:
 21 |   
 22 | 	export DIGITAL7_KEY=yourkey
 23 | 	export DIGITAL7_SECRET=yoursecret
 24 |   
 25 | Run the following commands to download the dataset:
 26 | 
 27 | 	./download.sh
 28 | 	python download7digital.py
 29 | 
 30 | ### Process results
 31 | 
 32 | To process the MIDI and mp3 files run:
 33 | 
 34 | 	python process.py
 35 | 	
 36 | To create the plots run:
 37 | 	
 38 | 	python plot.py
 39 |   
 40 | The plots are now placed in the output folder. For every Song there are 3 plots: One shows the Constant-Q-Transform of the audio from the mp3 file, one shows the CQT from the MIDI file and one shows the distance matrix for the two CQTs.
 41 | 
 42 | 
 43 | ## Evaluation
 44 | 
 45 | The Evaluation of the reproducability is based on "Reproducible Research in Signal Processing" by P. Vandewalle. Since only figures 2(a), 2(b) and 2(e) are reproduced in this work, only the required steps to create those figure were analyzed:
 46 | - Acquisition of the dataset
 47 | - Computation of Constant-Q-Transforms and distance matrizes
 48 | - Plotting the CQTs and distance-matrizes
 49 | Further steps like the hashing of the CQTs and training of the convolutional networks were not looked at.
 50 | 
 51 | 
 52 | ### Dataset
 53 | 
 54 | The acquisition of the dataset is the main problem in reproducing the results of the paper. For the evaluation a set of MIDI files is required, as well as audio files corresponding to the Million Song Database.
 55 | The MIDI files are made available by the author online.
 56 | The mp3 files have to be aquired from 7digital, an onlineshop for music. The [7digital API](http://docs.7digital.com/) allows the download of 30-60 second preview clips of their songs.
 57 | 
 58 | The main problems:
 59 | - The [code](https://github.com/tbertinmahieux/MSongsDB/blob/master/Tasks_Demos/Preview7digital/get_preview_url.py#L181) provided on the Million Song Database website to download preview clips from the 7digital API is outdated, as the API call does not utilize OAuth, which was added by 7digital and the API has to be accessed in another way.
 60 | - The 7digital API only allows 4000 requests per day for free users which means for a million songs with 2 requests per song, downloading the dataset takes 500 days. 
 61 | 
 62 | 
 63 | ### Code
 64 | 
 65 | The entire code for reproducing the end results of the paper is available online in a [github repository](https://github.com/craffel/midi-dataset). Sadly the code for generating the figures is not part of the repository. As a result some modifications and additions to the code were neccessary to aquire the plots.
 66 | 
 67 | The main problems:
 68 | - It is hard to extract the required data from the code since many code parts are not designed very flexible. For Example taken from [the authors repository](https://github.com/craffel/midi-dataset/blob/master/feature_extraction.py#L87) at line 87:
 69 | 
 70 | 		def audio_cqt(audio_data):
 71 | 			# Some computations
 72 | 			return post_process_cqt(audio_gram)
 73 | 	
 74 | 	This processes the audio data and post-processes the resulting Constant-Q-Transform whenever audio_cqt is called. For the plotting of the CQT the signal before post-processing is required, therefore the following change was needed:
 75 | 
 76 | 		def audio_cqt(audio_data):
 77 | 			# Some computations
 78 | 			return audio_gram
 79 | 	
 80 | 	and then calling the function like:
 81 | 		
 82 | 		post_process_cqt(audio_cqt(audio_data))
 83 | 
 84 | - In some scripts function calls are made on all the datasets, without checking if the dataset is available. This makes applying the given code to a smaller subset unneccessarily complicated.
 85 | 
 86 | While working on the paper, an other issue, which might affect further analysis of the paper became apparent. The code is sometimes very specific towards the authors test environment. For Example:
 87 | * Parallelization is done for a fixed number of processor cores. This complicates the reproduction of the results on a different setup. See [here](https://github.com/craffel/midi-dataset/blob/master/scripts/create_msd_cqts.py#L55).
 88 | * The python code executes bash commands and can therefore only be executed on Unix Systems. See [here](https://github.com/craffel/midi-dataset/blob/master/feature_extraction.py#L46).
 89 | 
 90 | 
 91 | ### Results
 92 | 
 93 | Comparing Figure 2(a) and 2(b) from the Paper with the CQT plots for "Billy Idol - Dancing with myself" in the output folder one can see that the CQTs general form looks similar. The audio sample is taken from a different part of the song and the time axis is represented in frames not beats. This is due to the fact that in this work no beat information was retrieved.
 94 | 
 95 | When comparing the CQT distance matrix for the same song with Figure 2(e) similar effects can be observed. The general form of the CQT looks similar, the axis are again scaled differently.
 96 | 
 97 | While the results appear very similar to the paper, the differences show that a full reconstruction was not achieved, due to the aforementioned problems with the implementation.
 98 | 
 99 | 
100 | ### Conlusion
101 | 
102 | > 1: The results cannot seem to be reproduced by an independent
103 | researcher.
104 | 
105 | This is based on the fact that acquiring the dataset in a reasonable time is almost impossible using the given free tools.
106 | 


--------------------------------------------------------------------------------
/Bergmann-Raffel/requirements.txt:
--------------------------------------------------------------------------------
 1 | # OS: Ubuntu 16.04.1
 2 | # Python 2.7.12
 3 | 
 4 | # Required software, install with apt-get
 5 | #
 6 | # fluidsynth
 7 | # ffmpeg
 8 | # python-tk
 9 | 
10 | # Required Python packages
11 | numpy
12 | scipy
13 | librosa==0.4.3
14 | pretty_midi
15 | deepdish
16 | msgpack-python
17 | msgpack-numpy
18 | requests
19 | requests-oauthlib
20 | 


--------------------------------------------------------------------------------
/Hysneli-Rafii/code/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/audiolabs/APSRR-2016/fb9cb5b2e2d18555808d24df7f64233cee5e5d36/Hysneli-Rafii/code/.gitkeep


--------------------------------------------------------------------------------
/Hysneli-Rafii/code/bss_eval.m:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/audiolabs/APSRR-2016/fb9cb5b2e2d18555808d24df7f64233cee5e5d36/Hysneli-Rafii/code/bss_eval.m


--------------------------------------------------------------------------------
/Hysneli-Rafii/code/repet.m:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/audiolabs/APSRR-2016/fb9cb5b2e2d18555808d24df7f64233cee5e5d36/Hysneli-Rafii/code/repet.m


--------------------------------------------------------------------------------
/Hysneli-Rafii/code/repetSem.m:
--------------------------------------------------------------------------------
  1 | % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  2 | % "Repeating Pattern Extraction Technique (REPET): a simple method for voice/source separation"
  3 | %
  4 | % Authors: Zafar Rafii & Bryan Pardo
  5 | % Programmed by: Arjola Hysneli
  6 | % Audio Processing Seminar WS 2016/2017
  7 | % FAU Erlangen-Nuernberg/AudioLabs Erlangen
  8 | % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  9 | clear all; close all; clc
 10 | 
 11 | 
 12 | % get paths to all files
 13 | currentpath= pwd;
 14 | cd ../
 15 | cd dataset
 16 | srcFiles = dir('./MIR-1K/Wavfile/*.wav'); 
 17 | 
 18 | for i = 1 : size(srcFiles,1)
 19 |  filename{i} = strcat('./MIR-1K/Wavfile/',srcFiles(i).name);
 20 | [soundCell{i},fs{i}] = audioread(filename{i});
 21 | end
 22 |    
 23 | 
 24 | cd ../
 25 | cd code
 26 |  for i = 1 : size(srcFiles,1)
 27 |      %% Case 1: voice and music have same original level
 28 |   
 29 |   % Cell creation
 30 |   ce(i).Mtx= deal(soundCell{1,i});                                         %read all the sound names from the cell to a matrix(that is inside a struct)
 31 |   ce(i).sound_mix= ce(i).Mtx(:,1) +ce(i).Mtx(:,2);
 32 |   ce(i).soundEst = repet(ce(i).sound_mix,fs{i});                           %take the repeating background using REPET
 33 |   ce(i).frgEst=ce(i).sound_mix-ce(i).soundEst;
 34 |   
 35 |   % Evaluation of parameters
 36 |   
 37 |   [SDR{i},SIR{i},SAR{i},perm{i}]=bss_eval( ce(i).soundEst',ce(i).Mtx(:,1)'); %using Bss_eval %background
 38 |   [SDRfrg{i},SIRfrg{i},SARfrg{i},permfrg{i}]=bss_eval( ce(i).frgEst',ce(i).Mtx(:,2)');
 39 |   
 40 |   % Calculation of power
 41 |   
 42 |   pw(i).pw1= (1/length(ce(i).Mtx(:,1)))*sum(abs(ce(i).Mtx(:,1)).^2);       %power of channel 1
 43 |   pw(i).pw2= (1/length(ce(i).Mtx(:,2)))*sum(abs(ce(i).Mtx(:,2)).^2);       %power of channel 2
 44 |   pw(i).pw_dB=pow2db(ceil(pw(i).pw1/pw(i).pw2));                           %power ratio
 45 |   
 46 |   if range(pw(i).pw_dB)==0                                                 %check if all the values in the vector are the same(for every song)
 47 |     powVoMU =pw(1).pw_dB;                                                    %takes just one value
 48 |   end
 49 |   
 50 | 
 51 |   %% Case 2: Music is louder
 52 |   
 53 |   % Cell creation
 54 |   ceMu(i).Mtx= deal(soundCell{1,i});
 55 |   ceMu(i).sound_IncMu=ceMu(i).Mtx(:,1) * 10^(5/20);                        %music channel increased with 5 db (v/m=-5)
 56 |   ceMu(i).sound_mix= ceMu(i).sound_IncMu +ceMu(i).Mtx(:,2);
 57 |   ceMu(i).soundEst = repet(ceMu(i).sound_mix,fs{i});                       %take the repeating background
 58 |   ceMu(i).frgEst=ceMu(i).sound_mix-ceMu(i).soundEst;                       %the estimated foreground/voice
 59 |   
 60 |   % Evaluation of parameters
 61 |   [SDRmu{i},SIRmu{i},SARmu{i},permmu{i}]=bss_eval( ceMu(i).soundEst',ceMu(i).sound_IncMu'); %background
 62 |   [SDRmufrg{i},SIRmufrg{i},SARmufrg{i},permmufrg{i}]=bss_eval( ceMu(i).frgEst',ceMu(i).Mtx(:,2)');
 63 |   
 64 |   % Calculation of power
 65 |    pwMu(i).pw2= (1/length(ceMu(i).Mtx(:,2)))*sum(abs(ceMu(i).Mtx(:,2)).^2);
 66 |    pwMu(i).pw1= (1/length(ceMu(i).sound_IncMu))*sum(abs(ceMu(i).sound_IncMu).^2);
 67 |    pwMu(i).pw_dB=pow2db(pwMu(i).pw2/ pwMu(i).pw1);
 68 |    
 69 |    if range(pwMu(i).pw_dB)==0
 70 |       music=pwMu(1).pw_dB;
 71 |    end     
 72 |   
 73 |   %% Case 2: Voice is louder(v/m=5)
 74 |   
 75 |   % Cell creation
 76 |   ceVo(i).Mtx= deal(soundCell{1,i});
 77 |   ceVo(i).sound_IncVo=ceVo(i).Mtx(:,2) * 10^(5/20);                        %Voice channel increased with 5 db (v/m=5)
 78 |   ceVo(i).sound_mix= ceVo(i).sound_IncVo +ceVo(i).Mtx(:,1);
 79 |   ceVo(i).soundEst = repet(ceVo(i).sound_mix,fs{i});                       %take the repeating background
 80 |   ceVo(i).frgEst=ceVo(i).sound_mix-ceVo(i).soundEst;                       %the estimated foreground/voice
 81 |   
 82 |   % Evaluation of parameters
 83 |   [SDRvo{i},SIRvo{i},SARvo{i},permvo{i}]= bss_eval( ceVo(i).soundEst',ceVo(i).sound_IncVo'); %background
 84 |   [SDRvofrg{i},SIRvofrg{i},SARvofrg{i},permvofrg{i}]= bss_eval(ceMu(i).frgEst',ceMu(i).Mtx(:,2)');
 85 |   
 86 |   % Calculation of power
 87 |   pwVo(i).pw1= (1/length(ceVo(i).Mtx(:,1)))*sum(abs(ceVo(i).Mtx(:,1)).^2);
 88 |   pwVo(i).pw2= (1/length(ceVo(i).sound_IncVo))*sum(abs(ceVo(i).sound_IncVo).^2);
 89 |   pwVo(i).pw=pwVo(i).pw2/ pwVo(i).pw1;
 90 |   pwVo(i).pw_dB=pow2db(pwVo(i).pw);
 91 |   
 92 |   if range(pwVo(i).pw_dB)==0
 93 |     voice=pwVo(1).pw_dB;
 94 |   end
 95 |   
 96 | end
 97 | 
 98 | SDRcel=[SDR ;SDRmu ;SDRvo];                                                  %concatinate all teh SDR taken from the 3 cases above
 99 | SDRcelfrg=[SDRfrg; SDRmufrg; SDRvofrg]; 
100 | SDRmtx=cell2mat(SDRcel);                                                   %converts the cell structure to matrix
101 | SDRmtxfrg=cell2mat(SDRcelfrg);  
102 | powVec= [voice powVoMu music];                                              % power vector
103 | 
104 | 
105 | % ploting 
106 | figure (1)
107 | fig1=boxplot(SDRmtx',powVec);
108 | xlabel('Power ratios in dB');
109 | ylabel('SDR evaluation/Music case')
110 | title('Music-SDR evaluation')
111 | 
112 | 
113 | figure (2)
114 | fig2=boxplot(SDRmtxfrg',powVec);
115 | xlabel('Power ratios in dB');
116 | ylabel('SDR evaluation/Voice case')
117 | title('Voice-SDR evaluation')
118 | 
119 | 
120 | % saving the figures
121 | hgsave('Music-SDR evaluation.fig');
122 | hgsave('Voice-SDR evaluation.fig');
123 | 
124 | 


--------------------------------------------------------------------------------
/Hysneli-Rafii/dataset/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/audiolabs/APSRR-2016/fb9cb5b2e2d18555808d24df7f64233cee5e5d36/Hysneli-Rafii/dataset/.gitkeep


--------------------------------------------------------------------------------
/Hysneli-Rafii/dataset/download_install.bat:
--------------------------------------------------------------------------------
1 | set "mypath=%cd%" 
2 | curl -O http://mirlab.org/dataset/public/MIR-1K.rar
3 | curl -O https://www.netzmechanik.de/dl/4/winrar-x64-540d.exe             
4 | %cd%\winrar-x64-540d.exe                                         
5 | cd %PROGRAMFILES%
6 | "%cd%\WinRAR\UnRAR.exe" x %mypath%\MIR-1K.rar %mypath%


--------------------------------------------------------------------------------
/Hysneli-Rafii/output/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/audiolabs/APSRR-2016/fb9cb5b2e2d18555808d24df7f64233cee5e5d36/Hysneli-Rafii/output/.gitkeep


--------------------------------------------------------------------------------
/Hysneli-Rafii/output/Music-SDR evaluation.fig:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/audiolabs/APSRR-2016/fb9cb5b2e2d18555808d24df7f64233cee5e5d36/Hysneli-Rafii/output/Music-SDR evaluation.fig


--------------------------------------------------------------------------------
/Hysneli-Rafii/output/Voice-SDR evaluation.fig:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/audiolabs/APSRR-2016/fb9cb5b2e2d18555808d24df7f64233cee5e5d36/Hysneli-Rafii/output/Voice-SDR evaluation.fig


--------------------------------------------------------------------------------
/Hysneli-Rafii/reportREPET.md:
--------------------------------------------------------------------------------
 1 | # Title "Repeating Pattern Extraction Technique (REPET): a simple method for voice/source separation"
 2 | 
 3 | - Paper author: Zafar Rafii, Bryan Pardo
 4 | - Report author: Arjola Hysneli
 5 | 
 6 | ## Requirements
 7 | Recommended: 
 8 | * MATLAB 2015a
 9 | * Windows 7
10 | 
11 | ## Download tools/dataset and process results 
12 | 
13 | 1.Execute 'download_install.bat'
14 | 
15 | * The file is found in the 'dataset' folder.
16 | * The script downloads WinRAR(used to extract the dataset needed) and extracts the audio files used (MIR-1K set).
17 | 
18 | 2.Execute repetSem.m 
19 | 
20 | * The REPET method is used to separate the repeating background(music) from a non-repeating foreground. 
21 | * Further 'bss_eval' toolbox is used to measure the performance in source separation. 
22 | * This will reproduce the SDR values for three voice-to-music ratios(-5,0,5).
23 | 
24 | 
25 | 
26 | ## Reproducional research study
27 | Reference on: "Reproducible Research in Signal Processing - What, why, and how" by Vandewalle et al. (https://infoscience.epfl.ch/record/136640/files/VandewalleKV09.pdf).
28 | According to the evaluation steps considered in the paper, in the following part the reproduction level of 'REPET,  simple method for sound/music separation' is explained.
29 | 
30 | ### Reproducibility of the algorithm
31 | - Is the algorithm described in sufficient detail?
32 | 	- 1 Point: Yes
33 | - Are exact parameter values given?
34 | 	- 1 Point: Yes(The voice-to-music mixing ratio)
35 | - Is there a block diagram?
36 | 	- 0 Points: No
37 | - Is there a pseudocode?
38 | 	- 0 Points: No
39 | - Are there proofs for all the theorems?
40 | 	- 0 Point: There is no theorem to be proved,but every step is explained mathematically in details.
41 | - Is the algorithm compared to other algorithms?
42 | 	- 1 Points: Yes, it is compared to 1. Hsu et al.
43 |                                            2. Durrieu et al.
44 |                                            3. Durrieu+ Highpass filtering
45 |                                            4. REPET + highpass filtering
46 | 
47 | ### Reproducibility of the code
48 | - Are implementation details (programming language, platform, compiler flags, etc.) given?
49 | 	- 1 Points:  programming language: MATLAB 
50 | - Is the code available online?
51 | 	- 0.5 Points: There is no code available for the reproduction of the figure (just the REPET code and the evaluation toolbox)
52 | 
53 | 
54 | ### Reproducibility of the data
55 | - Is there an explanation of what the data represents?
56 | 	- 1 Point: Yes (also the results of the comparison with the different methods are given)
57 | - Is the size of the data set acceptable?
58 | 	- 0.5 Point: In my opinion it is too big (1000 songs)
59 | - Is the data set available online?
60 | 	- 1 Points: Yes, it is easy to access (MIR-1K)!
61 | 
62 | ### Summary
63 | 
64 | **General remarks on the reproductivity:**
65 | - By excuting bss_eval as an output SDR is generated. In paper, when using the competetive method 1 the results are given in terms of GNSDR --> means can't reproduce Fig.2
66 | - The dataset used is in .rar format. (Since matlab toolbox doesn't contain the unrar function, it is difficult create the download.m file to automatially uncompress the sounds used. )
67 | - The codes of the competetive methods used are not provided.
68 | - Boxplot command used to plot the graphs is not available without the Statistics and Machine Learning Toolbox
69 | - Both codes(REPET & bss_eval) provided were reproducable (but since the number of songs used is high, it requires more time to execute the codes)
70 | 
71 | 
72 | 
73 | **Degree of reproducability:**
74 | 
75 |  **3** - The results can be reproduced by an independent researcher, requiring considerable effort.
76 | 


--------------------------------------------------------------------------------
/Instructions.md:
--------------------------------------------------------------------------------
 1 | # Reproducible Audio Research Seminar 2016
 2 | 
 3 | ## Paper Selection
 4 | 
 5 | * [List of possible papers](https://github.com/faroit/reproducible-audio-research)
 6 | 
 7 | ## Deliverables
 8 | 
 9 | To successfully pass the seminar we split the seminar sub tasks and several small deliverables:
10 | 
11 | ### 1. Get familiar with reproducible research
12 | 
13 |  [Read this paper about reproducible research in Signal Processing](https://infoscience.epfl.ch/record/136640). This will give you an introduction why reproducibility is important. Also you will be introduced into what are papers with a high degree of reproducibility. When you reproduce your paper you will be asked to judge on the papers reproducibility:
14 | 
15 | ##### Six Degrees of Reproducibility
16 |  1. Cannot be reproduced.
17 |  2. Cannot seem to be reproduced.
18 |  3. Could be reproduced, requiring extreme effort.
19 |  4. Can be reproduced, requiring considerable effort.
20 |  5. Can be easily reproduced with at most 15 minutes of user effort, requiring some proprietary source packages.
21 |  6. Can be easily reproduced with at most 15 min of user effort, requiring only standard, freely available tools
22 | 
23 | 
24 | ### 2. Reproduce the paper and submit documentation via [pull request](https://help.github.com/articles/about-pull-requests/)
25 | 
26 | * **Deadline for submitting a pull request**: 14.12.2016
27 | * **Final Deadline (updates of PRs are allowed)**: 21.12.2016
28 | * We will merge all pull requests on 21.12.2016. Late submissions are not possible.
29 | 
30 | After you get appointed to a paper, read the paper carefully and get back to the supervisors for questions about the technical content of the paper. When you are familiar with the topic, you can start with the actual reproducibility report. To make it easer for you and for us, we would encourage you to split this task into several subtask where each is (ideally) self-contained.
31 | 
32 | Each of these sub-tasks should therefore itself be submitted in reproducible manner. This means that your contributions should contain documentation and code how to easily reproduce the papers results in a short amount of time.
33 | 
34 | ### 2.1. Document and set up computing environment
35 | 
36 | Regardless of the programming languages and operating systems you are going to use to reproduce the paper, it is important to document your machine setup and thus allow other people to recreate your environment.
37 | 
38 | E.g. if you use python:
39 | 
40 | * State the version of python that you have used
41 | * State your package requirements (create [`requirements.txt`](https://pip.pypa.io/en/stable/user_guide/#requirements-files) or [`environment.yml`](http://conda.pydata.org/docs/using/envs.html#share-an-environment) if you use [Anaconda](https://anaconda.org)).
42 | 
43 | E.g. if you use Matlab, make sure that you document the [include paths](https://de.mathworks.com/help/matlab/matlab_env/what-is-the-matlab-search-path.html).
44 | 
45 | Your environment may change during development, e.g. when you install new packages. This is normal and should be reflected in `requirements.txt`.
46 | 
47 | ### 2.2. Aggregate the dataset
48 | 
49 | In most cases the methods will be evaluated on publicly available data. These datasets are downloadable from the internet. Since only few methods actually deal with raw audio, this sub-task should make sure that the data is read an preprocessed in manner that allows the actually algorithm to easily process the data. The output of this sub-task could be a script that
50 | 
51 | * **automatically fetches** the datasets using tools like `wget` ,`cURL`
52 | * **preprocesses** the data. This could include: file operations like renaming, unzipping.
53 | * **parsing Metadata**
54 | * **computes features**
55 | * **writes out** the dataset in a format that allows for fast reading and writing (`hdf5`, `mat`, ...)
56 | 
57 | ### 2.3. Process the dataset
58 | 
59 | After you have preprocessed the data we want to process the data by using the methods described in the paper.
60 | 
61 | The output of this sub-task could be a script that:
62 | 
63 | * **loads** the pre-processed data
64 | * **applies the methods/algorithms** described the paper on the loaded dataset
65 | * **writes out** the results so that they can easily evaluated in the next step. Be aware to write out relevant metadata as well as the processed audio data, depending on paper method.
66 | 
67 | ### 2.4. Evaluate the processed data
68 | 
69 | This subtask is the most important one. The goal should be to reproduce the main results figure of the paper being reproduced. To archive this goal:
70 | 
71 | * **load the processed data and/or metadata**
72 | * Compute the evaluation metrics as described in the paper
73 | * Write out the aggregated metrics/scores
74 | * Plot the results
75 | 
76 | ### 3. Presentation
77 | 
78 | The presentations should consists of three parts:
79 | 
80 | 1. Describe the paper and give a short outline of the research field, including the possible applications.
81 | 2. Describe the methods you have used to reproduce the paper.
82 | 3. Evaluate the degree of reproducibility.
83 | 
84 | ## Submitting Seminar Deliverables
85 | 
86 | For the reproducible research seminar we are handling deliverables through [GitHub](https://github.com/) (which means it will be necessary that you all have an account) If you don't have experience with git/GitHub, don't worry, you can do everything from your browser; see below for a tutorial.
87 | If you are comfortable with using git, please follow these steps to create a submission:
88 | 
89 | 1. Create a fork of this repository by clicking on the "Fork" button at the top of this page.
90 | 2. In your fork, add content to the deliverables folder by copying the template from the template directory and rename it to match your paper and name. The folder should be named `(Your-last-name)-(last-name-of-first-author-of-paper-being-reproduced)`
91 | 3. Create a pull request to merge your fork. Make the title of the pull request to the subtask you want to propose.
92 | 4. We will review the pull request (be aware that we will only review deliverables that were pushed before the deadline; You can make update revisions of your deliverable before the deadline by updating your pull request.
93 | 
94 | Even if you have no experience with git or the command line you can upload your submissions through the github website (look for the upload button). If you have any issues, please don't hesitate to email us.
95 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | .PHONY: all
 2 | 
 3 | all: Bergmann-Raffel.pdf Hysneli-Rafii.pdf Pali-Boeck.pdf Reck-Font.pdf Rosenzweig-Jiang.pdf Ssenyonga-Huang.pdf
 4 | 
 5 | %.pdf: %/report.md
 6 | 	pandoc $(basename $@)/report.md  -V geometry:margin=1in -o $@
 7 | 
 8 | Hysneli-Rafii.pdf: Hysneli-Rafii/reportREPET.md
 9 | 	pandoc $(basename $@)/reportREPET.md  -V geometry:margin=1in -o $@
10 | 


--------------------------------------------------------------------------------
/Pali-Boeck/.gitignore:
--------------------------------------------------------------------------------
1 | wavFiles/*
2 | !wavFiles/.gitkeep
3 | annotations/*
4 | !annotations/.gitkeep
5 | dataset/
6 | onset_db/
7 | code/SuperFlux/


--------------------------------------------------------------------------------
/Pali-Boeck/code/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/audiolabs/APSRR-2016/fb9cb5b2e2d18555808d24df7f64233cee5e5d36/Pali-Boeck/code/.gitkeep


--------------------------------------------------------------------------------
/Pali-Boeck/code/readme_code.md:
--------------------------------------------------------------------------------
 1 | ##Short explanation of runSuperFlux.py file
 2 | 
 3 | ##### This script is needed to run the SuperFlux.py (main script of the paper)
 4 | 
 5 | ##### As prerequisites: 
 6 | - the .wav files must be saved to /dataset/wavFiles (instructions on dataset folder) 
 7 | 
 8 | - readFiles.py must be executed (as described on report.md)
 9 | 
10 | ##### After the SuperFlux is executed -> the onsets for each .wav file is saved on output/resultOnsets folder
11 | ##### Each output file has the extension: .superflux.txt


--------------------------------------------------------------------------------
/Pali-Boeck/code/runSuperFlux.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Created on Sun Dec 11 13:09:18 2016
 4 | 
 5 | @author: OrnelaP
 6 | """
 7 | 
 8 | """This script executes SuperFlux.py and saves the onset files to a new folder"""
 9 | 
10 | import os
11 | import os.path
12 | import shutil
13 | from subprocess import call
14 | 
15 | my_path = os.path.abspath(os.path.join(os.pardir, 'Pali-Boeck'))
16 | 
17 | newPath = os.path.join(my_path, 'output/resultOnsets')
18 | 
19 | if not os.path.exists(newPath):
20 |     os.makedirs(newPath)
21 | 
22 | readPath = os.path.join(my_path, 'dataset/fileList.txt')
23 | filesPath = os.path.join(my_path, 'dataset/wavFiles')
24 | 
25 | wavFiles = open(readPath, 'r')
26 | 
27 | inFlux = wavFiles.read().split()
28 | 
29 | for i in inFlux:
30 |     call('python code/SuperFlux/SuperFlux.py -s ' + os.path.join(filesPath, i), shell = True)
31 | 
32 | wavFiles.close()
33 | 
34 | files = os.listdir(filesPath)
35 | for f in files:
36 |     if f.endswith(".act", 0, len(f)):
37 |          #print(f)
38 |          os.remove(os.path.join(filesPath, f))          
39 |     if f.endswith(".superflux.txt", 0, len(f)):
40 |          #print(f)
41 |          shutil.move((os.path.join(filesPath, f)), newPath)


--------------------------------------------------------------------------------
/Pali-Boeck/code/runSuperFlux_online.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Created on Sun Dec 11 13:09:18 2016
 4 | 
 5 | @author: OrnelaP
 6 | """
 7 | 
 8 | """This script executes SuperFlux.py and saves the onset files to a new folder"""
 9 | 
10 | import os
11 | import os.path
12 | import shutil
13 | from subprocess import call
14 | 
15 | my_path = os.path.abspath(os.path.join(os.pardir, 'Pali-Boeck'))
16 | 
17 | newPath = os.path.join(my_path, 'output/resultOnsetsOnline')
18 | 
19 | if not os.path.exists(newPath):
20 |     os.makedirs(newPath)
21 | 
22 | readPath = os.path.join(my_path, 'dataset/fileList.txt')
23 | filesPath = os.path.join(my_path, 'dataset/wavFiles')
24 | 
25 | wavFiles = open(readPath, 'r')
26 | 
27 | inFlux = wavFiles.read().split()
28 | 
29 | for i in inFlux:
30 |     call('python code/SuperFlux/SuperFlux.py -s --online ' + os.path.join(filesPath, i), shell = True)
31 | 
32 | wavFiles.close()
33 | 
34 | files = os.listdir(filesPath)
35 | for f in files:
36 |     if f.endswith(".act", 0, len(f)):
37 |          #print(f)
38 |          os.remove(os.path.join(filesPath, f))          
39 |     if f.endswith(".superflux.txt", 0, len(f)):
40 |          #print(f)
41 |          shutil.move((os.path.join(filesPath, f)), newPath)


--------------------------------------------------------------------------------
/Pali-Boeck/dataset/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/audiolabs/APSRR-2016/fb9cb5b2e2d18555808d24df7f64233cee5e5d36/Pali-Boeck/dataset/.gitkeep


--------------------------------------------------------------------------------
/Pali-Boeck/dataset/modalexport:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | '''
 4 | Copy audio files from a modal database to a given output directory.
 5 | 
 6 | An optional audio file name may be given in order to only extract a single
 7 | audio file. If omitted, all files will be extracted.
 8 | 
 9 | For each audio file, the metadata will also be extracted to a YAML file in
10 | the same directory, called file_name.yaml.
11 | '''
12 | import sys
13 | import os
14 | import h5py
15 | import yaml
16 | from scipy.io.wavfile import write
17 | import numpy as np
18 | 
19 | 
20 | def _extract_file(audio_db, output_dir, file_name):
21 |     sampling_rate = int(audio_db[file_name].attrs['sampling_rate'])
22 | 
23 |     audio = np.asarray(np.array(audio_db[file_name]) * 32768.0, np.int16)
24 |     write(os.path.join(output_dir, file_name), sampling_rate, audio)
25 | 
26 |     metadata = dict(audio_db[file_name].attrs)
27 | 
28 |     # convert numpy types as the yaml module doesn't know how to serialise them
29 |     for item in metadata:
30 |         if type(metadata[item]) == np.ndarray:
31 |             if issubclass(metadata[item][0].__class__, int):
32 |                 metadata[item] = [int(i) for i in metadata[item]]
33 |             elif issubclass(metadata[item][0].__class__, float):
34 |                 metadata[item] = [float(i) for i in metadata[item]]
35 |             elif issubclass(metadata[item][0].__class__, np.string_):
36 |                 metadata[item] = [str(i) for i in metadata[item]]
37 |         elif issubclass(metadata[item].__class__, int):
38 |             metadata[item] = int(metadata[item])
39 |         elif issubclass(metadata[item].__class__, float):
40 |             metadata[item] = float(metadata[item])
41 |         elif issubclass(metadata[item].__class__, np.string_):
42 |             metadata[item] = str(metadata[item])
43 | 
44 |     #metadata_file = os.path.join(output_dir, file_name + '.yaml')
45 |     #with open(metadata_file, 'w') as f:
46 |         #f.write(yaml.safe_dump(metadata))
47 | 
48 | if __name__ == '__main__':
49 |     usage = 'Usage: modalexport <HDF5 file> <output directory> [<audio file>]'
50 |     if len(sys.argv) not in [3, 4]:
51 |         print (usage)
52 |         sys.exit(1)
53 | 
54 |     audio_db_path = sys.argv[1]
55 |     output_dir = sys.argv[2]
56 | 
57 |     if not os.path.exists(output_dir):
58 |         os.makedirs(output_dir)
59 | 
60 |     if len(sys.argv) == 4:
61 |         file_name = sys.argv[3]
62 |     else:
63 |         file_name = None
64 | 
65 |     try:
66 |         audio_db = h5py.File(audio_db_path, 'r')
67 |         if file_name:
68 |             _extract_file(audio_db, output_dir, file_name)
69 |         else:
70 |             for file_name in audio_db:
71 |                 _extract_file(audio_db, output_dir, file_name)
72 |     finally:
73 |         if audio_db:
74 |             audio_db.close()
75 | 


--------------------------------------------------------------------------------
/Pali-Boeck/dataset/readFiles.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Created on Sun Dec  4 12:28:23 2016
 4 | 
 5 | @author: OrnelaP
 6 | """
 7 | # -*- coding: utf-8 -*-
 8 | import shutil
 9 | import os.path
10 | from os import listdir
11 | from os.path import isfile, join
12 | 
13 | wavFilesPath = os.path.abspath(os.path.join(os.pardir, 'Pali-Boeck/dataset/wavFiles'))
14 | annotationsPathPre = os.path.abspath(os.path.join(os.pardir, 'Pali-Boeck/onset_db'))
15 | annotationsPathPost = os.path.abspath(os.path.join(os.pardir, 'Pali-Boeck/output/annotations'))
16 | 
17 | #Read the .wav files names into an output file
18 | file_list = [f for f in listdir(wavFilesPath) if isfile(join(wavFilesPath, f))]
19 | 
20 | myfile = open(os.path.abspath(os.path.join(os.pardir,'Pali-Boeck/dataset/fileList.txt')), 'w')
21 | 
22 | for i in file_list:
23 |     myfile.write(" %s" % i) 
24 | myfile.close()
25 | 
26 | #Move the respective used dataset annotations to the output/annotations folder
27 | annotations = os.listdir(annotationsPathPre)
28 | for ann in annotations: 
29 |     if ann.startswith('jg_', 0, len(ann)):
30 |         shutil.move((os.path.join(annotationsPathPre, ann)), annotationsPathPost)
31 | 


--------------------------------------------------------------------------------
/Pali-Boeck/dataset/readme_dataset.md:
--------------------------------------------------------------------------------
 1 | ## Getting the wav files from 'Musical Onset Database And Library (Modal)'
 2 | 
 3 | * #### From the repository of the modal library: [modal](https://github.com/johnglover/modal]) -> get the *modalexport* file
 4 | 
 5 | * #### On download section - get the .hdf5 format file: [onsets1.1](http://dl.dropbox.com/u/9444913/onsets1.1.hdf5)
 6 | 
 7 | *Side Note*: the file can be automatically downloaded also from download.bat file on /Pali-Boeck
 8 | 
 9 | * #### When using the download.bat file, the onsets1.1.hdf5 file will be directly downloaded into the dataset folder
10 | *Side Note*: If the download is computed manually, please copy the onsets1.1.hdf5 into the /dataset folder
11 |    
12 | * #### After the execution of getwav.bat file, on wavFiles folder will be saved the .wav files taken from the hdf5 format 
13 | 
14 | *Side Note:* I commented out the part on the code wich gets the yaml representer, since I got as an output only one of the wav files and the following error:
15 |   ```
16 |   File "~\Anaconda3\lib\site-packages\yaml\representer.py", line 229, in represent_undefined
17 |     raise RepresenterError("cannot represent an object: %s" % data)
18 | yaml.representer.RepresenterError: cannot represent an object: [161]
19 |   ```
20 | 
21 | ------------------------------------------------------------------------
22 | 
23 | #### *readFiles.py* script reads the names of wav Files which are going to be the input for the batch mode in SuperFlux
24 | 
25 | ------------------------------------------------------------------------


--------------------------------------------------------------------------------
/Pali-Boeck/dataset/wavFiles/.gitkeep:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/Pali-Boeck/download.bat:
--------------------------------------------------------------------------------
 1 | @echo off
 2 | 
 3 | git clone https://github.com/CPJKU/SuperFlux
 4 | move SuperFlux %cd%/code
 5 | 
 6 | bitsadmin /create downloadJob
 7 | bitsadmin /transfer downloadJob /download /priority normal http://dl.dropbox.com/u/9444913/onsets1.1.hdf5 %cd%/dataset/onsets1.1.hdf5
 8 | 
 9 | git clone https://github.com/CPJKU/onset_db
10 | 
11 | pause


--------------------------------------------------------------------------------
/Pali-Boeck/getwav.bat:
--------------------------------------------------------------------------------
1 | @echo off
2 | 
3 | python dataset/modalexport dataset/onsets1.1.hdf5 dataset/wavFiles
4 | 
5 | python dataset/readFiles.py
6 | 
7 | pause


--------------------------------------------------------------------------------
/Pali-Boeck/output/.gitignore:
--------------------------------------------------------------------------------
1 | resultOnsets/
2 | resultOnsetsOnline/
3 | annotations/*
4 | !annotations/.gitkeep


--------------------------------------------------------------------------------
/Pali-Boeck/output/annotations/.gitkeep:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/Pali-Boeck/output/evaluation.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Created on Sun Dec 18 13:51:34 2016
 3 | 
 4 | @author: OrnelaP
 5 | """
 6 | import numpy as np
 7 | import madmom.evaluation.onsets
 8 | import os.path
 9 | from os import listdir
10 | from os.path import isfile, join
11 | 
12 | onsetPath = os.path.abspath(os.path.join(os.pardir, 'Pali-Boeck/output/resultOnsets'))
13 | annotationsPath = os.path.abspath(os.path.join(os.pardir, 'Pali-Boeck/output/annotations'))
14 | 
15 | onsetListNames = [f for f in listdir(onsetPath) if isfile(join(onsetPath, f))]
16 | annotationsListNames = [f for f in listdir(annotationsPath) if isfile(join(annotationsPath, f))]
17 | 
18 | outFile = open(os.path.abspath(os.path.join(os.pardir, 'Pali-Boeck/output'))+'/'+'outputEval.txt', 'w+')                        
19 | 
20 | onsetEval = []
21 | 
22 | for itemOnset, itemAnnotations in zip(onsetListNames, annotationsListNames):
23 |     outFile.write(itemOnset+ ' ' + itemAnnotations+'\n')
24 |     #Read the onsets file first - save onsets to a np.array             
25 |     onsetsFloat = []
26 |     with  open(onsetPath+'/'+itemOnset) as fonsets:
27 |             onsetList = [line.strip() for line in fonsets if line.strip()]
28 |                          
29 |     for item in onsetList:
30 |         onsetsFloat.append(float(item))    
31 |     
32 |     onsetsArr = np.asarray(onsetsFloat)
33 |     
34 |     #Read the annotations file first - save annotations to a np.array
35 |     annotationsFloat = []
36 |     with open(annotationsPath+'/'+itemAnnotations) as fannotations:
37 |         annotationsList = [line.strip() for line in fannotations if line.strip()]
38 |     
39 |     for i in annotationsList:
40 |         annotationsFloat.append(float(i))
41 |        
42 |     annotationsArr = np.asarray(annotationsFloat)
43 |     
44 |     #Evaluation with a window = 0.025 and no delay
45 |     print >> outFile, madmom.evaluation.onsets.OnsetEvaluation((onsetsArr), (annotationsArr), 0.025, 0.03, 0)
46 |     
47 |     onsetEval.append(madmom.evaluation.onsets.OnsetEvaluation((onsetsArr), (annotationsArr), 0.025, 0.03, 0))
48 |     
49 |     print madmom.evaluation.onsets.OnsetSumEvaluation(onsetEval)
50 |     print madmom.evaluation.onsets.OnsetMeanEvaluation(onsetEval)
51 |     
52 |     fonsets.close()
53 |     fannotations.close()
54 |     
55 | outFile.close()


--------------------------------------------------------------------------------
/Pali-Boeck/output/evaluation_online.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Created on Sun Dec 18 13:51:34 2016
 3 | 
 4 | @author: OrnelaP
 5 | """
 6 | import numpy as np
 7 | import madmom.evaluation.onsets
 8 | import os.path
 9 | from os import listdir
10 | from os.path import isfile, join
11 | 
12 | onsetPath = os.path.abspath(os.path.join(os.pardir, 'Pali-Boeck/output/resultOnsetsOnline'))
13 | annotationsPath = os.path.abspath(os.path.join(os.pardir, 'Pali-Boeck/output/annotations'))
14 | 
15 | onsetListNames = [f for f in listdir(onsetPath) if isfile(join(onsetPath, f))]
16 | annotationsListNames = [f for f in listdir(annotationsPath) if isfile(join(annotationsPath, f))]
17 |                         
18 | print onsetListNames
19 | print annotationsListNames
20 | 
21 | outFile = open(os.path.abspath(os.path.join(os.pardir, 'Pali-Boeck/output'))+'/'+'outputEvalOnline.txt', 'w+')                        
22 | 
23 | onsetEval = []
24 | 
25 | for itemOnset, itemAnnotations in zip(onsetListNames, annotationsListNames):
26 |     outFile.write(itemOnset+ ' ' + itemAnnotations+'\n')
27 |     #Read the onsets file first - save onsets to a np.array             
28 |     onsetsFloat = []
29 |     with  open(onsetPath+'/'+itemOnset) as fonsets:
30 |             onsetList = [line.strip() for line in fonsets if line.strip()]
31 |                          
32 |     for item in onsetList:
33 |         onsetsFloat.append(float(item))    
34 |     
35 |     onsetsArr = np.asarray(onsetsFloat)
36 |     
37 |     #Read the annotations file first - save annotations to a np.array
38 |     annotationsFloat = []
39 |     with open(annotationsPath+'/'+itemAnnotations) as fannotations:
40 |         annotationsList = [line.strip() for line in fannotations if line.strip()]
41 |     
42 |     for i in annotationsList:
43 |         annotationsFloat.append(float(i))
44 |        
45 |     annotationsArr = np.asarray(annotationsFloat)
46 |     
47 |     #Evaluation with a window = 0.025 and no delay
48 |     print >> outFile, madmom.evaluation.onsets.OnsetEvaluation((onsetsArr), (annotationsArr), 0.025, 0.03, 0)
49 |     
50 |     onsetEval.append(madmom.evaluation.onsets.OnsetEvaluation((onsetsArr), (annotationsArr), 0.025, 0.03, 0))
51 |     
52 |     print madmom.evaluation.onsets.OnsetSumEvaluation(onsetEval)
53 |     print madmom.evaluation.onsets.OnsetMeanEvaluation(onsetEval)
54 |     
55 |     fonsets.close()
56 |     fannotations.close()
57 |     
58 | outFile.close()


--------------------------------------------------------------------------------
/Pali-Boeck/output/outputEval.txt:
--------------------------------------------------------------------------------
  1 | bass-elec-C-lo.superflux.txt .gitkeep
  2 | Onsets:     0 TP:     0 FP:     3 FN:     0 Precision: 0.000 Recall: 1.000 F-measure: 0.000 mean:   nan ms std:   nan ms
  3 | bass-elec2-E-lick.superflux.txt jg_bass-elec-C-lo.onsets
  4 | Onsets:     1 TP:     1 FP:    19 FN:     0 Precision: 0.050 Recall: 1.000 F-measure: 0.095 mean:   6.3 ms std:   0.0 ms
  5 | bell-octave0.superflux.txt jg_bass-elec2-E-lick.onsets
  6 | Onsets:    16 TP:     0 FP:     0 FN:    16 Precision: 1.000 Recall: 0.000 F-measure: 0.000 mean:   nan ms std:   nan ms
  7 | bell-octave1.superflux.txt jg_bell-octave0.onsets
  8 | Onsets:     1 TP:     0 FP:     0 FN:     1 Precision: 1.000 Recall: 0.000 F-measure: 0.000 mean:   nan ms std:   nan ms
  9 | bell-octave2.superflux.txt jg_bell-octave1.onsets
 10 | Onsets:     1 TP:     0 FP:     0 FN:     1 Precision: 1.000 Recall: 0.000 F-measure: 0.000 mean:   nan ms std:   nan ms
 11 | child-imitate-owl.superflux.txt jg_bell-octave2.onsets
 12 | Onsets:     1 TP:     1 FP:     2 FN:     0 Precision: 0.333 Recall: 1.000 F-measure: 0.500 mean:   8.8 ms std:   0.0 ms
 13 | child-imitate-shaker-hi-up.superflux.txt jg_child-imitate-owl.onsets
 14 | Onsets:     2 TP:     1 FP:     0 FN:     1 Precision: 1.000 Recall: 0.500 F-measure: 0.667 mean:   4.6 ms std:   0.0 ms
 15 | clarinet-C-octave0-vib.superflux.txt jg_child-imitate-shaker-hi-up.onsets
 16 | Onsets:     1 TP:     0 FP:     1 FN:     1 Precision: 0.000 Recall: 0.000 F-measure: 0.000 mean:   nan ms std:   nan ms
 17 | clarinet-C-octave0.superflux.txt jg_clarinet-C-octave0-vib.onsets
 18 | Onsets:     1 TP:     1 FP:     0 FN:     0 Precision: 1.000 Recall: 1.000 F-measure: 1.000 mean:  -4.7 ms std:   0.0 ms
 19 | conga-muffled-1.superflux.txt jg_clarinet-C-octave0.onsets
 20 | Onsets:     1 TP:     0 FP:     0 FN:     1 Precision: 1.000 Recall: 0.000 F-measure: 0.000 mean:   nan ms std:   nan ms
 21 | conga-open-1.superflux.txt jg_conga-muffled-1.onsets
 22 | Onsets:     1 TP:     1 FP:     0 FN:     0 Precision: 1.000 Recall: 1.000 F-measure: 1.000 mean:   9.1 ms std:   0.0 ms
 23 | cymbal-hihat-foot-open.superflux.txt jg_conga-open-1.onsets
 24 | Onsets:     1 TP:     1 FP:     0 FN:     0 Precision: 1.000 Recall: 1.000 F-measure: 1.000 mean:   4.8 ms std:   0.0 ms
 25 | cymbal-hihat-lick.superflux.txt jg_cymbal-hihat-foot-open.onsets
 26 | Onsets:     1 TP:     1 FP:     7 FN:     0 Precision: 0.125 Recall: 1.000 F-measure: 0.222 mean:  -3.5 ms std:   0.0 ms
 27 | cymbal-hihat-openclose-stick.superflux.txt jg_cymbal-hihat-lick.onsets
 28 | Onsets:     8 TP:     1 FP:     3 FN:     7 Precision: 0.250 Recall: 0.125 F-measure: 0.167 mean:   8.3 ms std:   0.0 ms
 29 | cymbal-splash-stick.superflux.txt jg_cymbal-hihat-openclose-stick.onsets
 30 | Onsets:     2 TP:     1 FP:     2 FN:     1 Precision: 0.333 Recall: 0.500 F-measure: 0.400 mean:   9.4 ms std:   0.0 ms
 31 | drum-bass-hi-1.superflux.txt jg_cymbal-splash-stick.onsets
 32 | Onsets:     1 TP:     1 FP:     0 FN:     0 Precision: 1.000 Recall: 1.000 F-measure: 1.000 mean:   3.0 ms std:   0.0 ms
 33 | drum-snare-drag.superflux.txt jg_drum-bass-hi-1.onsets
 34 | Onsets:     1 TP:     0 FP:     2 FN:     1 Precision: 0.000 Recall: 0.000 F-measure: 0.000 mean:   nan ms std:   nan ms
 35 | drum-snare-roll-short.superflux.txt jg_drum-snare-drag.onsets
 36 | Onsets:     3 TP:     3 FP:     6 FN:     0 Precision: 0.333 Recall: 1.000 F-measure: 0.500 mean:  -7.2 ms std:  11.5 ms
 37 | drum-surdo-large-lick.superflux.txt jg_drum-snare-roll-short.onsets
 38 | Onsets:     9 TP:     2 FP:     8 FN:     7 Precision: 0.200 Recall: 0.222 F-measure: 0.211 mean:   6.1 ms std:   1.4 ms
 39 | female-glissdown.superflux.txt jg_drum-surdo-large-lick.onsets
 40 | Onsets:     9 TP:     1 FP:     1 FN:     8 Precision: 0.500 Recall: 0.111 F-measure: 0.182 mean:   5.6 ms std:   0.0 ms
 41 | femalevoices_aa2_A3.superflux.txt jg_female-glissdown.onsets
 42 | Onsets:     1 TP:     0 FP:    12 FN:     1 Precision: 0.000 Recall: 0.000 F-measure: 0.000 mean:   nan ms std:   nan ms
 43 | femalevoices_oo2_C#4.superflux.txt jg_femalevoices_aa2_A3.onsets
 44 | Onsets:     1 TP:     0 FP:     3 FN:     1 Precision: 0.000 Recall: 0.000 F-measure: 0.000 mean:   nan ms std:   nan ms
 45 | flugel-C.superflux.txt jg_femalevoices_oo2_C#4.onsets
 46 | Onsets:     1 TP:     0 FP:     1 FN:     1 Precision: 0.000 Recall: 0.000 F-measure: 0.000 mean:   nan ms std:   nan ms
 47 | flugel-lick.superflux.txt jg_flugel-C.onsets
 48 | Onsets:     1 TP:     1 FP:    10 FN:     0 Precision: 0.091 Recall: 1.000 F-measure: 0.167 mean:   8.5 ms std:   0.0 ms
 49 | flute-alto-C.superflux.txt jg_flugel-lick.onsets
 50 | Onsets:     8 TP:     2 FP:     1 FN:     6 Precision: 0.667 Recall: 0.250 F-measure: 0.364 mean:  -6.1 ms std:  11.8 ms
 51 | flute-alto-lick.superflux.txt jg_flute-alto-C.onsets
 52 | Onsets:     1 TP:     1 FP:    19 FN:     0 Precision: 0.050 Recall: 1.000 F-measure: 0.095 mean: -16.5 ms std:   0.0 ms
 53 | glitch_groove_7.superflux.txt jg_flute-alto-lick.onsets
 54 | Onsets:    15 TP:     4 FP:    42 FN:    11 Precision: 0.087 Recall: 0.267 F-measure: 0.131 mean:  -2.4 ms std:   9.5 ms
 55 | guitar-ac-E-octave1.superflux.txt jg_glitch_groove_7.onsets
 56 | Onsets:    38 TP:     1 FP:     0 FN:    37 Precision: 1.000 Recall: 0.026 F-measure: 0.051 mean:  -1.2 ms std:   0.0 ms
 57 | guitar-classical-E-octave1-vib.superflux.txt jg_guitar-ac-E-octave1.onsets
 58 | Onsets:     1 TP:     1 FP:     0 FN:     0 Precision: 1.000 Recall: 1.000 F-measure: 1.000 mean:   6.1 ms std:   0.0 ms
 59 | guitar-classical-lick.superflux.txt jg_guitar-classical-E-octave1-vib.onsets
 60 | Onsets:     1 TP:     0 FP:    11 FN:     1 Precision: 0.000 Recall: 0.000 F-measure: 0.000 mean:   nan ms std:   nan ms
 61 | guitar-elec-hollow-lick.superflux.txt jg_guitar-classical-lick.onsets
 62 | Onsets:    12 TP:     7 FP:    14 FN:     5 Precision: 0.333 Recall: 0.583 F-measure: 0.424 mean:  -0.7 ms std:  10.7 ms
 63 | guitar-elec-solid-dist-E-octave1-long.superflux.txt jg_guitar-elec-hollow-lick.onsets
 64 | Onsets:    15 TP:     0 FP:     4 FN:    15 Precision: 0.000 Recall: 0.000 F-measure: 0.000 mean:   nan ms std:   nan ms
 65 | guitar-elec-solid-dist-EBfifths-octave1-long.superflux.txt jg_guitar-elec-solid-dist-E-octave1-long.onsets
 66 | Onsets:     1 TP:     1 FP:     1 FN:     0 Precision: 0.500 Recall: 1.000 F-measure: 0.667 mean:  12.3 ms std:   0.0 ms
 67 | guitar-elec-solid-dist-lick.superflux.txt jg_guitar-elec-solid-dist-EBfifths-octave1-long.onsets
 68 | Onsets:     1 TP:     1 FP:    13 FN:     0 Precision: 0.071 Recall: 1.000 F-measure: 0.133 mean: -19.4 ms std:   0.0 ms
 69 | guitar-elec-solid-lick.superflux.txt jg_guitar-elec-solid-dist-lick.onsets
 70 | Onsets:     8 TP:     4 FP:    11 FN:     4 Precision: 0.267 Recall: 0.500 F-measure: 0.348 mean:  -0.1 ms std:  13.6 ms
 71 | guitar_chord1.superflux.txt jg_guitar-elec-solid-lick.onsets
 72 | Onsets:    13 TP:     1 FP:     0 FN:    12 Precision: 1.000 Recall: 0.077 F-measure: 0.143 mean:  21.6 ms std:   0.0 ms
 73 | guitar_chord2.superflux.txt jg_guitar_chord1.onsets
 74 | Onsets:     2 TP:     1 FP:     1 FN:     1 Precision: 0.500 Recall: 0.500 F-measure: 0.500 mean:  12.3 ms std:   0.0 ms
 75 | guitar_chord3.superflux.txt jg_guitar_chord2.onsets
 76 | Onsets:     1 TP:     1 FP:     1 FN:     0 Precision: 0.500 Recall: 1.000 F-measure: 0.667 mean:   4.1 ms std:   0.0 ms
 77 | guitar_riff_cutup1.superflux.txt jg_guitar_chord3.onsets
 78 | Onsets:     2 TP:     0 FP:    16 FN:     2 Precision: 0.000 Recall: 0.000 F-measure: 0.000 mean:   nan ms std:   nan ms
 79 | guitar_riff_cutup3.superflux.txt jg_guitar_riff_cutup1.onsets
 80 | Onsets:    17 TP:     3 FP:    11 FN:    14 Precision: 0.214 Recall: 0.176 F-measure: 0.194 mean: -19.0 ms std:   4.6 ms
 81 | malevoices_oo2_C#4.superflux.txt jg_guitar_riff_cutup3.onsets
 82 | Onsets:    11 TP:     2 FP:     7 FN:     9 Precision: 0.222 Recall: 0.182 F-measure: 0.200 mean:  -8.7 ms std:  10.0 ms
 83 | malevoice_aa_A2.superflux.txt jg_malevoices_oo2_C#4.onsets
 84 | Onsets:     1 TP:     0 FP:    10 FN:     1 Precision: 0.000 Recall: 0.000 F-measure: 0.000 mean:   nan ms std:   nan ms
 85 | metal_beat_1.superflux.txt jg_malevoice_aa_A2.onsets
 86 | Onsets:     1 TP:     1 FP:    11 FN:     0 Precision: 0.083 Recall: 1.000 F-measure: 0.154 mean:  -9.5 ms std:   0.0 ms
 87 | metal_beat_2.superflux.txt jg_metal_beat_1.onsets
 88 | Onsets:    11 TP:     2 FP:    11 FN:     9 Precision: 0.154 Recall: 0.182 F-measure: 0.167 mean:  11.4 ms std:   5.0 ms
 89 | mix1.superflux.txt jg_metal_beat_2.onsets
 90 | Onsets:    12 TP:     3 FP:    10 FN:     9 Precision: 0.231 Recall: 0.250 F-measure: 0.240 mean:   0.6 ms std:  17.6 ms
 91 | mix2.superflux.txt jg_mix1.onsets
 92 | Onsets:     9 TP:     2 FP:    12 FN:     7 Precision: 0.143 Recall: 0.222 F-measure: 0.174 mean:  -3.9 ms std:   6.0 ms
 93 | mix3.superflux.txt jg_mix2.onsets
 94 | Onsets:     7 TP:     3 FP:     5 FN:     4 Precision: 0.375 Recall: 0.429 F-measure: 0.400 mean:   9.7 ms std:   1.6 ms
 95 | mix4.superflux.txt jg_mix3.onsets
 96 | Onsets:     4 TP:     0 FP:    42 FN:     4 Precision: 0.000 Recall: 0.000 F-measure: 0.000 mean:   nan ms std:   nan ms
 97 | piano-studio-octave1.superflux.txt jg_mix4.onsets
 98 | Onsets:    28 TP:     1 FP:     0 FN:    27 Precision: 1.000 Recall: 0.036 F-measure: 0.069 mean:   8.0 ms std:   0.0 ms
 99 | piano_B4.superflux.txt jg_piano-studio-octave1.onsets
100 | Onsets:     1 TP:     0 FP:     1 FN:     1 Precision: 0.000 Recall: 0.000 F-measure: 0.000 mean:   nan ms std:   nan ms
101 | piano_chord.superflux.txt jg_piano_B4.onsets
102 | Onsets:     1 TP:     0 FP:     4 FN:     1 Precision: 0.000 Recall: 0.000 F-measure: 0.000 mean:   nan ms std:   nan ms
103 | piano_G2.superflux.txt jg_piano_chord.onsets
104 | Onsets:     4 TP:     0 FP:     1 FN:     4 Precision: 0.000 Recall: 0.000 F-measure: 0.000 mean:   nan ms std:   nan ms
105 | prep_pianoE0_2.superflux.txt jg_piano_G2.onsets
106 | Onsets:     1 TP:     0 FP:     1 FN:     1 Precision: 0.000 Recall: 0.000 F-measure: 0.000 mean:   nan ms std:   nan ms
107 | prep_piano_C0_2.superflux.txt jg_prep_pianoE0_2.onsets
108 | Onsets:     1 TP:     0 FP:     1 FN:     1 Precision: 0.000 Recall: 0.000 F-measure: 0.000 mean:   nan ms std:   nan ms
109 | prep_piano_hit1.superflux.txt jg_prep_piano_C0_2.onsets
110 | Onsets:     1 TP:     0 FP:     4 FN:     1 Precision: 0.000 Recall: 0.000 F-measure: 0.000 mean:   nan ms std:   nan ms
111 | sax-bari-C-lo.superflux.txt jg_prep_piano_hit1.onsets
112 | Onsets:     1 TP:     0 FP:     1 FN:     1 Precision: 0.000 Recall: 0.000 F-measure: 0.000 mean:   nan ms std:   nan ms
113 | sax-bari-lick.superflux.txt jg_sax-bari-C-lo.onsets
114 | Onsets:     1 TP:     1 FP:     8 FN:     0 Precision: 0.111 Recall: 1.000 F-measure: 0.200 mean:  -0.9 ms std:   0.0 ms
115 | sax-tenor-lick.superflux.txt jg_sax-bari-lick.onsets
116 | Onsets:    10 TP:     4 FP:     6 FN:     6 Precision: 0.400 Recall: 0.400 F-measure: 0.400 mean:  -1.0 ms std:   6.2 ms
117 | shaker.superflux.txt jg_sax-tenor-lick.onsets
118 | Onsets:     9 TP:     1 FP:     1 FN:     8 Precision: 0.500 Recall: 0.111 F-measure: 0.182 mean: -11.2 ms std:   0.0 ms
119 | shenai-C.superflux.txt jg_shaker.onsets
120 | Onsets:     2 TP:     1 FP:     4 FN:     1 Precision: 0.200 Recall: 0.500 F-measure: 0.286 mean: -24.1 ms std:   0.0 ms
121 | singing-female1-C-hi.superflux.txt jg_shenai-C.onsets
122 | Onsets:     2 TP:     1 FP:     1 FN:     1 Precision: 0.500 Recall: 0.500 F-measure: 0.500 mean:   6.7 ms std:   0.0 ms
123 | singing-womanMA-C-oo.superflux.txt jg_singing-female1-C-hi.onsets
124 | Onsets:     1 TP:     1 FP:     0 FN:     0 Precision: 1.000 Recall: 1.000 F-measure: 1.000 mean:  12.6 ms std:   0.0 ms
125 | tabla-lick-voiceanddrums.superflux.txt jg_singing-womanMA-C-oo.onsets
126 | Onsets:     1 TP:     1 FP:     6 FN:     0 Precision: 0.143 Recall: 1.000 F-measure: 0.250 mean: -16.5 ms std:   0.0 ms
127 | tabla-lick.superflux.txt jg_tabla-lick-voiceanddrums.onsets
128 | Onsets:     6 TP:     6 FP:     0 FN:     0 Precision: 1.000 Recall: 1.000 F-measure: 1.000 mean: -10.1 ms std:   5.7 ms
129 | techno_sequence3.superflux.txt jg_tabla-lick.onsets
130 | Onsets:     6 TP:     1 FP:   109 FN:     5 Precision: 0.009 Recall: 0.167 F-measure: 0.017 mean:   0.0 ms std:   0.0 ms
131 | thisIsTheSoundOfAVocoder_edited.superflux.txt jg_techno_sequence3.onsets
132 | Onsets:   109 TP:     9 FP:    15 FN:   100 Precision: 0.375 Recall: 0.083 F-measure: 0.135 mean:  -4.6 ms std:  10.1 ms
133 | timbale-lick.superflux.txt jg_thisIsTheSoundOfAVocoder_edited.onsets
134 | Onsets:    18 TP:     3 FP:    11 FN:    15 Precision: 0.214 Recall: 0.167 F-measure: 0.188 mean:   3.9 ms std:  12.6 ms
135 | trpt-C-lo.superflux.txt jg_timbale-lick.onsets
136 | Onsets:    13 TP:     1 FP:     0 FN:    12 Precision: 1.000 Recall: 0.077 F-measure: 0.143 mean:  12.4 ms std:   0.0 ms
137 | trpt-lick.superflux.txt jg_trpt-C-lo.onsets
138 | Onsets:     1 TP:     1 FP:    12 FN:     0 Precision: 0.077 Recall: 1.000 F-measure: 0.143 mean:  -7.6 ms std:   0.0 ms
139 | twangs_1.superflux.txt jg_trpt-lick.onsets
140 | Onsets:    13 TP:     4 FP:     7 FN:     9 Precision: 0.364 Recall: 0.308 F-measure: 0.333 mean:   1.6 ms std:  14.4 ms
141 | vocoder1.superflux.txt jg_twangs_1.onsets
142 | Onsets:     9 TP:     1 FP:    35 FN:     8 Precision: 0.028 Recall: 0.111 F-measure: 0.044 mean:   9.7 ms std:   0.0 ms
143 | 


--------------------------------------------------------------------------------
/Pali-Boeck/output/outputEvalOnline.txt:
--------------------------------------------------------------------------------
  1 | bass-elec-C-lo.superflux.txt .gitkeep
  2 | Onsets:     0 TP:     0 FP:     4 FN:     0 Precision: 0.000 Recall: 1.000 F-measure: 0.000 mean:   nan ms std:   nan ms
  3 | bass-elec2-E-lick.superflux.txt jg_bass-elec-C-lo.onsets
  4 | Onsets:     1 TP:     1 FP:    43 FN:     0 Precision: 0.023 Recall: 1.000 F-measure: 0.044 mean:   6.3 ms std:   0.0 ms
  5 | bell-octave0.superflux.txt jg_bass-elec2-E-lick.onsets
  6 | Onsets:    16 TP:     0 FP:     1 FN:    16 Precision: 0.000 Recall: 0.000 F-measure: 0.000 mean:   nan ms std:   nan ms
  7 | bell-octave1.superflux.txt jg_bell-octave0.onsets
  8 | Onsets:     1 TP:     1 FP:     0 FN:     0 Precision: 1.000 Recall: 1.000 F-measure: 1.000 mean:   7.7 ms std:   0.0 ms
  9 | bell-octave2.superflux.txt jg_bell-octave1.onsets
 10 | Onsets:     1 TP:     1 FP:     0 FN:     0 Precision: 1.000 Recall: 1.000 F-measure: 1.000 mean:   8.8 ms std:   0.0 ms
 11 | child-imitate-owl.superflux.txt jg_bell-octave2.onsets
 12 | Onsets:     1 TP:     1 FP:    10 FN:     0 Precision: 0.091 Recall: 1.000 F-measure: 0.167 mean:   8.8 ms std:   0.0 ms
 13 | child-imitate-shaker-hi-up.superflux.txt jg_child-imitate-owl.onsets
 14 | Onsets:     2 TP:     1 FP:     0 FN:     1 Precision: 1.000 Recall: 0.500 F-measure: 0.667 mean:   4.6 ms std:   0.0 ms
 15 | clarinet-C-octave0-vib.superflux.txt jg_child-imitate-shaker-hi-up.onsets
 16 | Onsets:     1 TP:     1 FP:     5 FN:     0 Precision: 0.167 Recall: 1.000 F-measure: 0.286 mean:  -1.7 ms std:   0.0 ms
 17 | clarinet-C-octave0.superflux.txt jg_clarinet-C-octave0-vib.onsets
 18 | Onsets:     1 TP:     1 FP:     6 FN:     0 Precision: 0.143 Recall: 1.000 F-measure: 0.250 mean: -24.7 ms std:   0.0 ms
 19 | conga-muffled-1.superflux.txt jg_clarinet-C-octave0.onsets
 20 | Onsets:     1 TP:     1 FP:     0 FN:     0 Precision: 1.000 Recall: 1.000 F-measure: 1.000 mean: -15.7 ms std:   0.0 ms
 21 | conga-open-1.superflux.txt jg_conga-muffled-1.onsets
 22 | Onsets:     1 TP:     1 FP:     0 FN:     0 Precision: 1.000 Recall: 1.000 F-measure: 1.000 mean:   9.1 ms std:   0.0 ms
 23 | cymbal-hihat-foot-open.superflux.txt jg_conga-open-1.onsets
 24 | Onsets:     1 TP:     1 FP:     2 FN:     0 Precision: 0.333 Recall: 1.000 F-measure: 0.500 mean:   4.8 ms std:   0.0 ms
 25 | cymbal-hihat-lick.superflux.txt jg_cymbal-hihat-foot-open.onsets
 26 | Onsets:     1 TP:     1 FP:    19 FN:     0 Precision: 0.050 Recall: 1.000 F-measure: 0.095 mean:  -3.5 ms std:   0.0 ms
 27 | cymbal-hihat-openclose-stick.superflux.txt jg_cymbal-hihat-lick.onsets
 28 | Onsets:     8 TP:     1 FP:     3 FN:     7 Precision: 0.250 Recall: 0.125 F-measure: 0.167 mean:   8.3 ms std:   0.0 ms
 29 | cymbal-splash-stick.superflux.txt jg_cymbal-hihat-openclose-stick.onsets
 30 | Onsets:     2 TP:     1 FP:     0 FN:     1 Precision: 1.000 Recall: 0.500 F-measure: 0.667 mean:   9.4 ms std:   0.0 ms
 31 | drum-bass-hi-1.superflux.txt jg_cymbal-splash-stick.onsets
 32 | Onsets:     1 TP:     1 FP:     0 FN:     0 Precision: 1.000 Recall: 1.000 F-measure: 1.000 mean:   3.0 ms std:   0.0 ms
 33 | drum-snare-drag.superflux.txt jg_drum-bass-hi-1.onsets
 34 | Onsets:     1 TP:     1 FP:     4 FN:     0 Precision: 0.200 Recall: 1.000 F-measure: 0.333 mean:   7.5 ms std:   0.0 ms
 35 | drum-snare-roll-short.superflux.txt jg_drum-snare-drag.onsets
 36 | Onsets:     3 TP:     2 FP:    13 FN:     1 Precision: 0.133 Recall: 0.667 F-measure: 0.222 mean:   4.8 ms std:   4.1 ms
 37 | drum-surdo-large-lick.superflux.txt jg_drum-snare-roll-short.onsets
 38 | Onsets:     9 TP:     2 FP:    22 FN:     7 Precision: 0.083 Recall: 0.222 F-measure: 0.121 mean:  11.1 ms std:   3.6 ms
 39 | female-glissdown.superflux.txt jg_drum-surdo-large-lick.onsets
 40 | Onsets:     9 TP:     1 FP:     5 FN:     8 Precision: 0.167 Recall: 0.111 F-measure: 0.133 mean:   5.6 ms std:   0.0 ms
 41 | femalevoices_aa2_A3.superflux.txt jg_female-glissdown.onsets
 42 | Onsets:     1 TP:     0 FP:    21 FN:     1 Precision: 0.000 Recall: 0.000 F-measure: 0.000 mean:   nan ms std:   nan ms
 43 | femalevoices_oo2_C#4.superflux.txt jg_femalevoices_aa2_A3.onsets
 44 | Onsets:     1 TP:     1 FP:     8 FN:     0 Precision: 0.111 Recall: 1.000 F-measure: 0.200 mean: -19.0 ms std:   0.0 ms
 45 | flugel-C.superflux.txt jg_femalevoices_oo2_C#4.onsets
 46 | Onsets:     1 TP:     0 FP:     2 FN:     1 Precision: 0.000 Recall: 0.000 F-measure: 0.000 mean:   nan ms std:   nan ms
 47 | flugel-lick.superflux.txt jg_flugel-C.onsets
 48 | Onsets:     1 TP:     1 FP:    27 FN:     0 Precision: 0.036 Recall: 1.000 F-measure: 0.069 mean:   3.5 ms std:   0.0 ms
 49 | flute-alto-C.superflux.txt jg_flugel-lick.onsets
 50 | Onsets:     8 TP:     1 FP:     0 FN:     7 Precision: 1.000 Recall: 0.125 F-measure: 0.222 mean:   5.7 ms std:   0.0 ms
 51 | flute-alto-lick.superflux.txt jg_flute-alto-C.onsets
 52 | Onsets:     1 TP:     1 FP:    45 FN:     0 Precision: 0.022 Recall: 1.000 F-measure: 0.043 mean: -16.5 ms std:   0.0 ms
 53 | glitch_groove_7.superflux.txt jg_flute-alto-lick.onsets
 54 | Onsets:    15 TP:     7 FP:   100 FN:     8 Precision: 0.065 Recall: 0.467 F-measure: 0.115 mean:  -9.8 ms std:  14.6 ms
 55 | guitar-ac-E-octave1.superflux.txt jg_glitch_groove_7.onsets
 56 | Onsets:    38 TP:     1 FP:     0 FN:    37 Precision: 1.000 Recall: 0.026 F-measure: 0.051 mean:  -1.2 ms std:   0.0 ms
 57 | guitar-classical-E-octave1-vib.superflux.txt jg_guitar-ac-E-octave1.onsets
 58 | Onsets:     1 TP:     1 FP:     0 FN:     0 Precision: 1.000 Recall: 1.000 F-measure: 1.000 mean:   6.1 ms std:   0.0 ms
 59 | guitar-classical-lick.superflux.txt jg_guitar-classical-E-octave1-vib.onsets
 60 | Onsets:     1 TP:     1 FP:    33 FN:     0 Precision: 0.029 Recall: 1.000 F-measure: 0.057 mean:   7.7 ms std:   0.0 ms
 61 | guitar-elec-hollow-lick.superflux.txt jg_guitar-classical-lick.onsets
 62 | Onsets:    12 TP:     8 FP:    38 FN:     4 Precision: 0.174 Recall: 0.667 F-measure: 0.276 mean:  -1.3 ms std:  13.6 ms
 63 | guitar-elec-solid-dist-E-octave1-long.superflux.txt jg_guitar-elec-hollow-lick.onsets
 64 | Onsets:    15 TP:     1 FP:     9 FN:    14 Precision: 0.100 Recall: 0.067 F-measure: 0.080 mean:   8.4 ms std:   0.0 ms
 65 | guitar-elec-solid-dist-EBfifths-octave1-long.superflux.txt jg_guitar-elec-solid-dist-E-octave1-long.onsets
 66 | Onsets:     1 TP:     1 FP:     9 FN:     0 Precision: 0.100 Recall: 1.000 F-measure: 0.182 mean: -22.7 ms std:   0.0 ms
 67 | guitar-elec-solid-dist-lick.superflux.txt jg_guitar-elec-solid-dist-EBfifths-octave1-long.onsets
 68 | Onsets:     1 TP:     1 FP:    30 FN:     0 Precision: 0.032 Recall: 1.000 F-measure: 0.062 mean: -24.4 ms std:   0.0 ms
 69 | guitar-elec-solid-lick.superflux.txt jg_guitar-elec-solid-dist-lick.onsets
 70 | Onsets:     8 TP:     4 FP:    31 FN:     4 Precision: 0.114 Recall: 0.500 F-measure: 0.186 mean: -10.8 ms std:  10.4 ms
 71 | guitar_chord1.superflux.txt jg_guitar-elec-solid-lick.onsets
 72 | Onsets:    13 TP:     1 FP:     6 FN:    12 Precision: 0.143 Recall: 0.077 F-measure: 0.100 mean:  11.6 ms std:   0.0 ms
 73 | guitar_chord2.superflux.txt jg_guitar_chord1.onsets
 74 | Onsets:     2 TP:     2 FP:     9 FN:     0 Precision: 0.182 Recall: 1.000 F-measure: 0.308 mean: -20.3 ms std:   2.7 ms
 75 | guitar_chord3.superflux.txt jg_guitar_chord2.onsets
 76 | Onsets:     1 TP:     1 FP:    11 FN:     0 Precision: 0.083 Recall: 1.000 F-measure: 0.154 mean: -20.9 ms std:   0.0 ms
 77 | guitar_riff_cutup1.superflux.txt jg_guitar_chord3.onsets
 78 | Onsets:     2 TP:     0 FP:    50 FN:     2 Precision: 0.000 Recall: 0.000 F-measure: 0.000 mean:   nan ms std:   nan ms
 79 | guitar_riff_cutup3.superflux.txt jg_guitar_riff_cutup1.onsets
 80 | Onsets:    17 TP:     7 FP:    29 FN:    10 Precision: 0.194 Recall: 0.412 F-measure: 0.264 mean: -22.1 ms std:   1.9 ms
 81 | malevoices_oo2_C#4.superflux.txt jg_guitar_riff_cutup3.onsets
 82 | Onsets:    11 TP:     2 FP:    18 FN:     9 Precision: 0.100 Recall: 0.182 F-measure: 0.129 mean: -11.2 ms std:  12.5 ms
 83 | malevoice_aa_A2.superflux.txt jg_malevoices_oo2_C#4.onsets
 84 | Onsets:     1 TP:     0 FP:    18 FN:     1 Precision: 0.000 Recall: 0.000 F-measure: 0.000 mean:   nan ms std:   nan ms
 85 | metal_beat_1.superflux.txt jg_malevoice_aa_A2.onsets
 86 | Onsets:     1 TP:     0 FP:    27 FN:     1 Precision: 0.000 Recall: 0.000 F-measure: 0.000 mean:   nan ms std:   nan ms
 87 | metal_beat_2.superflux.txt jg_metal_beat_1.onsets
 88 | Onsets:    11 TP:     3 FP:    23 FN:     8 Precision: 0.115 Recall: 0.273 F-measure: 0.162 mean: -11.2 ms std:  13.0 ms
 89 | mix1.superflux.txt jg_metal_beat_2.onsets
 90 | Onsets:    12 TP:     2 FP:    28 FN:    10 Precision: 0.067 Recall: 0.167 F-measure: 0.095 mean:  12.1 ms std:   8.1 ms
 91 | mix2.superflux.txt jg_mix1.onsets
 92 | Onsets:     9 TP:     4 FP:    45 FN:     5 Precision: 0.082 Recall: 0.444 F-measure: 0.138 mean:   0.6 ms std:  13.0 ms
 93 | mix3.superflux.txt jg_mix2.onsets
 94 | Onsets:     7 TP:     2 FP:    21 FN:     5 Precision: 0.087 Recall: 0.286 F-measure: 0.133 mean:  14.0 ms std:   6.7 ms
 95 | mix4.superflux.txt jg_mix3.onsets
 96 | Onsets:     4 TP:     1 FP:   114 FN:     3 Precision: 0.009 Recall: 0.250 F-measure: 0.017 mean: -18.9 ms std:   0.0 ms
 97 | piano-studio-octave1.superflux.txt jg_mix4.onsets
 98 | Onsets:    28 TP:     1 FP:     0 FN:    27 Precision: 1.000 Recall: 0.036 F-measure: 0.069 mean:   8.0 ms std:   0.0 ms
 99 | piano_B4.superflux.txt jg_piano-studio-octave1.onsets
100 | Onsets:     1 TP:     0 FP:     3 FN:     1 Precision: 0.000 Recall: 0.000 F-measure: 0.000 mean:   nan ms std:   nan ms
101 | piano_chord.superflux.txt jg_piano_B4.onsets
102 | Onsets:     1 TP:     1 FP:     7 FN:     0 Precision: 0.125 Recall: 1.000 F-measure: 0.222 mean: -21.8 ms std:   0.0 ms
103 | piano_G2.superflux.txt jg_piano_chord.onsets
104 | Onsets:     4 TP:     0 FP:     4 FN:     4 Precision: 0.000 Recall: 0.000 F-measure: 0.000 mean:   nan ms std:   nan ms
105 | prep_pianoE0_2.superflux.txt jg_piano_G2.onsets
106 | Onsets:     1 TP:     0 FP:     5 FN:     1 Precision: 0.000 Recall: 0.000 F-measure: 0.000 mean:   nan ms std:   nan ms
107 | prep_piano_C0_2.superflux.txt jg_prep_pianoE0_2.onsets
108 | Onsets:     1 TP:     1 FP:     3 FN:     0 Precision: 0.250 Recall: 1.000 F-measure: 0.400 mean: -20.0 ms std:   0.0 ms
109 | prep_piano_hit1.superflux.txt jg_prep_piano_C0_2.onsets
110 | Onsets:     1 TP:     1 FP:    12 FN:     0 Precision: 0.077 Recall: 1.000 F-measure: 0.143 mean:  16.9 ms std:   0.0 ms
111 | sax-bari-C-lo.superflux.txt jg_prep_piano_hit1.onsets
112 | Onsets:     1 TP:     0 FP:     8 FN:     1 Precision: 0.000 Recall: 0.000 F-measure: 0.000 mean:   nan ms std:   nan ms
113 | sax-bari-lick.superflux.txt jg_sax-bari-C-lo.onsets
114 | Onsets:     1 TP:     1 FP:    31 FN:     0 Precision: 0.031 Recall: 1.000 F-measure: 0.061 mean: -10.9 ms std:   0.0 ms
115 | sax-tenor-lick.superflux.txt jg_sax-bari-lick.onsets
116 | Onsets:    10 TP:     4 FP:    43 FN:     6 Precision: 0.085 Recall: 0.400 F-measure: 0.140 mean: -16.0 ms std:   4.9 ms
117 | shaker.superflux.txt jg_sax-tenor-lick.onsets
118 | Onsets:     9 TP:     1 FP:    15 FN:     8 Precision: 0.062 Recall: 0.111 F-measure: 0.080 mean: -21.2 ms std:   0.0 ms
119 | shenai-C.superflux.txt jg_shaker.onsets
120 | Onsets:     2 TP:     1 FP:     5 FN:     1 Precision: 0.167 Recall: 0.500 F-measure: 0.250 mean: -24.1 ms std:   0.0 ms
121 | singing-female1-C-hi.superflux.txt jg_shenai-C.onsets
122 | Onsets:     2 TP:     1 FP:     3 FN:     1 Precision: 0.250 Recall: 0.500 F-measure: 0.333 mean:   6.7 ms std:   0.0 ms
123 | singing-womanMA-C-oo.superflux.txt jg_singing-female1-C-hi.onsets
124 | Onsets:     1 TP:     1 FP:     2 FN:     0 Precision: 0.333 Recall: 1.000 F-measure: 0.500 mean:  -2.4 ms std:   0.0 ms
125 | tabla-lick-voiceanddrums.superflux.txt jg_singing-womanMA-C-oo.onsets
126 | Onsets:     1 TP:     1 FP:    17 FN:     0 Precision: 0.056 Recall: 1.000 F-measure: 0.105 mean: -16.5 ms std:   0.0 ms
127 | tabla-lick.superflux.txt jg_tabla-lick-voiceanddrums.onsets
128 | Onsets:     6 TP:     6 FP:    11 FN:     0 Precision: 0.353 Recall: 1.000 F-measure: 0.522 mean:  -2.6 ms std:   7.0 ms
129 | techno_sequence3.superflux.txt jg_tabla-lick.onsets
130 | Onsets:     6 TP:     2 FP:   269 FN:     4 Precision: 0.007 Recall: 0.333 F-measure: 0.014 mean:  -7.3 ms std:  17.3 ms
131 | thisIsTheSoundOfAVocoder_edited.superflux.txt jg_techno_sequence3.onsets
132 | Onsets:   109 TP:    12 FP:    54 FN:    97 Precision: 0.182 Recall: 0.110 F-measure: 0.137 mean:  -0.9 ms std:  13.2 ms
133 | timbale-lick.superflux.txt jg_thisIsTheSoundOfAVocoder_edited.onsets
134 | Onsets:    18 TP:     3 FP:    26 FN:    15 Precision: 0.103 Recall: 0.167 F-measure: 0.128 mean: -11.8 ms std:  14.0 ms
135 | trpt-C-lo.superflux.txt jg_timbale-lick.onsets
136 | Onsets:    13 TP:     1 FP:     1 FN:    12 Precision: 0.500 Recall: 0.077 F-measure: 0.133 mean:   7.4 ms std:   0.0 ms
137 | trpt-lick.superflux.txt jg_trpt-C-lo.onsets
138 | Onsets:     1 TP:     1 FP:    37 FN:     0 Precision: 0.026 Recall: 1.000 F-measure: 0.051 mean:  -7.6 ms std:   0.0 ms
139 | twangs_1.superflux.txt jg_trpt-lick.onsets
140 | Onsets:    13 TP:     5 FP:    21 FN:     8 Precision: 0.192 Recall: 0.385 F-measure: 0.256 mean:  -7.7 ms std:  15.4 ms
141 | vocoder1.superflux.txt jg_twangs_1.onsets
142 | Onsets:     9 TP:     2 FP:   104 FN:     7 Precision: 0.019 Recall: 0.222 F-measure: 0.035 mean:  13.8 ms std:   5.9 ms
143 | 


--------------------------------------------------------------------------------
/Pali-Boeck/report.md:
--------------------------------------------------------------------------------
  1 | # Maximum Filter Vibrato Suppression for Onset Detection
  2 | 
  3 | - Paper author: Sebastian Böck, Gerhard Widmer
  4 | - Report author: Ornela Pali
  5 | 
  6 | ## Requirements
  7 | - OS: Windows 10 
  8 | - Python 2.7.0
  9 | - Anaconda 2.4.0
 10 | - Git 2.11.0
 11 | - [Microsoft Visual C++ Compiler for Python 2.7](http://www.microsoft.com/en-us/download/confirmation.aspx?id=44266)
 12 | - Other optional packages on `requirements.txt`
 13 | 
 14 | ## The reproducibility of this paper is designed to be completed in three basic steps:
 15 | ##### On the main folder, run: <br />
 16 | 	 1. download.bat 
 17 | 	 2. getwav.bat 
 18 | 	 3. run.bat 
 19 |   
 20 | ## Instructions
 21 | 
 22 | ## 1. Installation
 23 | To install the required packages to reproduce the paper, run: 
 24 | * `pip install -r requirements.txt`
 25 | 
 26 | - *Side Note* on `requirements.txt`: In order to get all the packages mentioned on the requirements file, please run: 
 27 | * `pip install Cython==0.24.1`
 28 | * `pip install -r requirements.txt`
 29 | 
 30 | ### 1.1. Download the dataset 
 31 | The second most important step, is to download the dataset. This can be done by running the following file: 
 32 | * `download.bat`
 33 | * In the .bat script, the command to get toolkit published with the paper is included as well:
 34 | 	* `git clone git@github.com/CPJKU/SuperFlux`
 35 | 
 36 | ### 1.2. Retreave the annotations
 37 | Furthermore by running the `download.bat` file, we retreive the annotation onsets by the command:
 38 | * `git clone https://github.com/CPJKU/onset_db`
 39 | 
 40 | ## 2. The downloaded dataset is in hdf5 format. 
 41 | In order to extract the .wav files from `onsets1.1.hdf5`, run:
 42 | * `getwav.bat`
 43 | 
 44 | ###  2.1. Read wav Files
 45 | In the following steps, we need to use these .wav files as an input to the SuperFlux file. 
 46 | - In order to get file names from `wavFiles/dataset`, another command is included in `getwav.bat`
 47 | 	 - `python readFiles.py`
 48 | 
 49 | - This command runs the script: `readFiles.py`. 
 50 | - As a result, on the same folder will be created a new file (named: `fileList`) which contains the names of wavFiles to be evaluated
 51 | - In this script is inlcuded the functionality to automatically move the respective annotations of the used dataset -> to the `/output/annotations` folder
 52 | 		
 53 | ## 3. Onset Detection
 54 | - The final step is to run the SuperFlux, in order to get the onset detections: 
 55 | 	 - `run.bat`
 56 | - This batch file goes to `/code` folder and runs the `runSuperflux.py` file 
 57 | - A detailed explanation of the `runSuperflux.py` can be found on `/code/readme_code.md`
 58 | - The SuperFlux offers also the online onset detection. This is achieved by adding `--online` option to the SuperFlux
 59 | 	 - `run_online.bat`
 60 | 	
 61 | # Output
 62 | The output files containing the onset detection for each file and for each method will be saved on:
 63 | - `/output/resultOnsets`
 64 | - `/output/resultOnsetsOnline`
 65 | 	
 66 | #Evaluation
 67 | **Up to this step, we have:**
 68 | 1. Retreived and converted the dataset
 69 | 2. Detected the onsets for the available dataset in two ways: offline and online method
 70 | 3. Saved the results to the /output folder
 71 | 
 72 | #### The remaining step is to evaluate the results. 
 73 | According to the paper the Superflux algorithm is estimated based on: <br />
 74 | **Precision; Recall; F-measure**
 75 | 
 76 | - In order to compute these measurements, the module **onsets** on **madmom** library is used. 
 77 | - In the output folder, there are two python scripts, namely: `evaluation.py` and `evaluation_online.py` which compute these measurements for the offline and online methods
 78 | - Each of the scripts is called by the respective `run` batch file: `run.bat` and `run_online.bat`
 79 | - As a result, two output files will be created: `outputEval.txt` and `outputEvalOnline.txt`
 80 | - Each of latter files contains results of: Precision; Recall; F-measure for each of the .wav files
 81 | *Side Note*: in order to estimate the accuracy of the algorithm, the files of the onset detections are compared to the respective annotations
 82 |  which are [freely available](https://github.com/CPJKU/onset_db)
 83 | - In our case, since not the full dataset of the paper is used, the respective annotations for the used dataset can be found at: `Pali-Boeck/output/annotations`
 84 | 
 85 | #### In the following, the results are included in a table:
 86 | 
 87 | |                | Precision | Recall |F-measure|
 88 | |----------------|-----------|--------|---------|
 89 | | Offline        | 0.360     | 0.402  | 0.254   |
 90 | | Online         | 0.244     | 0.548  | 0.229   |
 91 | 
 92 | **Here must be noted that there are some differences to the actual results of the paper. One of the possible reasons can be the different dataset used for the evaluation. In this case, the freely available dataset is used.**
 93 | 
 94 | ### Issues
 95 | During the evaluation of the paper "Maximum Filter Vibrato Suppression for Onset Detection" I came across the following issues:
 96 |  - The file SuperFlux.py doesn't write the output files (onset detections) on Python 3.5 version
 97 | 
 98 | ### Reproducibility
 99 | [4] Can be reproduced, requiring considerable effort.
100 | * Evaluation of the reproducibility considering a basic knowledge of Python
101 | 
102 | #### Easy to reproduce parts
103 | * The evaluation of the accuracy of the algorithm was one of the easiest part to reproduce. The evaluation is done through the madmom library, by `madmom.evaluation.onsets` module. When running the included classes, one have to take care only for the correct respective inputs. 
104 | 
105 | #### Difficult to reproduce parts
106 | * One of the difficulties on the SuperFlux reproduction, are the output results. 
107 | 	* As mentioned on the issues as well, SuperFlux works only on Python 2.7. No output is received when runned on Python 3.5. So we can conclude that for Python 3.5. SuperFlux needs some fixes. 
108 | * Furthermore, the available dataset of the paper is not complete. This is a drawback for the final and accurate results. 
109 | * Many libraries had to be used to reproduce the results. This takes time to find the appropriate module/classes and their respective functionality. 
110 | 
111 | #### Parts that needed fixing
112 | * While converting the .hdf5 file to different .wav files, on the `modalexport` file I commented out the part on the code wich gets the yaml representer, since I got as an output only one of the wav files and the following error: <br />
113 |   ```
114 |   File "~\Anaconda3\lib\site-packages\yaml\representer.py", line 229, in represent_undefined
115 |     raise RepresenterError("cannot represent an object: %s" % data)
116 | yaml.representer.RepresenterError: cannot represent an object: [161]
117 |   ```
118 | 


--------------------------------------------------------------------------------
/Pali-Boeck/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy==1.11.1
2 | scipy==0.18.1
3 | Cython==0.24.1
4 | nose==1.3.7
5 | spyder==3.0.0
6 | madmom==0.14.1
7 | 


--------------------------------------------------------------------------------
/Pali-Boeck/run.bat:
--------------------------------------------------------------------------------
1 | @echo off
2 | python code/runSuperFlux.py
3 | 
4 | python output/evaluation.py
5 | pause


--------------------------------------------------------------------------------
/Pali-Boeck/run_online.bat:
--------------------------------------------------------------------------------
1 | @echo off
2 | python code/runSuperFlux_online.py
3 | 
4 | python output/evaluation_online.py
5 | pause


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Reproducible Audio Research Seminar 2016
 2 | This repository presents the results of the 2016 reproducible research seminar held at [International Audio Laboratories Erlangen](http://www.audiolabs-erlangen.de).
 3 | 
 4 | The objective of this course is to understand and verify published computational results in audio research. Students presented and discussed their results of reproducing a chosen paper. Students were asked to judge on the papers [reproducibility score](https://infoscience.epfl.ch/record/136640) between 1 and 6:
 5 | 
 6 | | Score     |      |
 7 | | :------------- | :------------- |
 8 | | ⭐️         | Cannot be reproduced.       |
 9 | | ⭐️⭐️       | Cannot seem to be reproduced.      |
10 | | ⭐️⭐️⭐️     | Could be reproduced, requiring extreme effort.     |
11 | | ⭐️⭐️⭐️⭐️   | Can be reproduced, requiring considerable effort.      |
12 | | ⭐️⭐️⭐️⭐️⭐️ | Can be easily reproduced with at most 15 minutes of user effort, requiring some proprietary source packages.     |
13 | | 🌟       | Can be easily reproduced with at most 15 min of user effort, requiring only standard, freely available tools    |
14 | 
15 | __Note__: The reproducibility reports were prepared by the students. The reported `Reproducibilty Score` below, therefore are based on the author's own report and do not necessarily reflect the view of the International Audio Laboratories Erlangen.
16 | 
17 | ## Seminar Organisation:
18 | 
19 | * [Fabian-Robert Stöter](https://www.audiolabs-erlangen.de/fau/assistant/stoeter)
20 | * [Nils Werner](https://www.audiolabs-erlangen.de/fau/assistant/werner)
21 | * [Christoph Weiss](https://www.audiolabs-erlangen.de/fau/assistant/weiss)
22 | 
23 | ## Paper Selection
24 | 
25 | #### Maximum Filter Vibrato Suppression for Onset Detection
26 | 
27 | * __Authors:__ Sebastian Böck and Gerhard Widmer
28 | * __Reproduced by:__ Ornela Pali, [@ornelapp](https://github.com/ornelapp)
29 | * [Original Paper + Code](https://github.com/faroit/reproducible-audio-research/blob/master/papers/boeck13.md)
30 | * [Reproducibilty Report](https://github.com/faroit/APSRR-2016/blob/master/Pali-Boeck/report.md)
31 | * __Reproducibilty Score:__ ⭐️⭐️⭐️⭐️
32 | 
33 | #### Tempo Estimation for Music Loops and a Simple Confidence Measure
34 | 
35 | * __Authors:__ Font, F., & Serra, X.
36 | * __Reproduced by:__ Julian Reck, [@reckjn](https://github.com/reckjn)
37 | * [Original Paper + Code](https://github.com/faroit/reproducible-audio-research/blob/master/papers/font16.md)
38 | * [Reproducibilty Report](https://github.com/faroit/APSRR-2016/blob/master/Reck-Font/report.md)
39 | * __Reproducibilty Score:__ ⭐️⭐️⭐️⭐️
40 | 
41 | #### Singing-Voice Separation From Monaural Recordings Using Robust Principal Component Analysis
42 | 
43 | * __Authors:__ Po-Sen Huang, Scott Deeann Chen, Paris Smaragdis, Mark Hasegawa-Johnson
44 | * __Reproduced by:__ Robin Ssenyonga, [@robin-nyombi](https://github.com/robin-nyombi)
45 | * [Original Paper + Code](https://github.com/faroit/reproducible-audio-research/blob/master/papers/huang12.md)
46 | * [Reproducibilty Report](https://github.com/faroit/APSRR-2016/blob/master/Ssenyonga-Huang/report.md)
47 | * __Reproducibilty Score:__ ⭐️⭐️⭐️
48 | 
49 | #### Large-Scale Content-Based Matching of Midi and Audio Files
50 | 
51 | * __Authors:__ Colin Raffel, Daniel P. W. Ellis
52 | * __Reproduced by:__ Michael Bergmann, [@bergmann-fau](https://github.com/bergmann-fau)
53 | * [Original Paper + Code](https://github.com/faroit/reproducible-audio-research/blob/master/papers/raffel15.md)
54 | * [Reproducibilty Report](https://github.com/faroit/APSRR-2016/blob/master/Bergmann-Raffel/report.md)
55 | * __Reproducibilty Score:__ ⭐️
56 | 
57 | #### REpeating Pattern Extraction Technique (REPET): A Simple Method for Music/Voice Separation
58 | 
59 | * __Authors:__ Zafar Rafii, Bryan Pardo
60 | * __Reproduced by:__ Arjola Hysneli, [@Arjola](https://github.com/Arjola)
61 | * [Original Paper + Code](https://github.com/faroit/reproducible-audio-research/blob/master/papers/rafii13.md)
62 | * [Reproducibilty Report](https://github.com/faroit/APSRR-2016/blob/master/Hysneli-Rafii/reportREPET.md)
63 | * __Reproducibilty Score:__ ⭐️⭐️⭐️
64 | 
65 | #### Analyzing Chroma Feature Types for Automated Chord Recognition
66 | 
67 | * __Authors:__ Nanzhu Jiang, Peter Grosche, Verena Konz, and Meinard Müller
68 | * __Reproduced by:__ Sebastian Rosenzweig, [@all4sound](https://github.com/all4sound)
69 | * [Original Paper + Code](https://github.com/faroit/reproducible-audio-research/blob/master/papers/jiang11.md)
70 | * [Reproducibilty Report](https://github.com/faroit/APSRR-2016/blob/master/Rosenzweig-Jiang/report.md)
71 | * __Reproducibilty Score:__ ⭐️⭐️⭐️
72 | 
73 | ### License
74 | 
75 | [![](https://i.creativecommons.org/l/by-nc/4.0/88x31.png)](http://creativecommons.org/licenses/by-nc/4.0/)
76 | 


--------------------------------------------------------------------------------
/Reck-Font/Tempo estimation results.py:
--------------------------------------------------------------------------------
  1 | 
  2 | # coding: utf-8
  3 | 
  4 | # In[1]:
  5 | 
  6 | from shared_notebook_utils import *
  7 | from statsmodels.sandbox.stats.runs import mcnemar
  8 | # Add the datasets you want to analyze in 'dataset_dirnames'
  9 | # Change the directory names listed here by the ones set in your data folder.
 10 | # In this repository we include an example 'toy_dataset' that can be used to test the notebook.
 11 | dataset_dirnames = ['FSL4']
 12 | datasets = load_datasets(dirnames=dataset_dirnames, clean=True)  # Load and clean datasets
 13 | # Add the datasets you want to analyze in 'methods_to_compare'
 14 | methods_to_compare = ['Percival14', 'Zapata14', 'Degara12', 'Bock15', 'Gkiokas12', 'RekBox']
 15 | #get_ipython().magic(u'matplotlib inline')
 16 | 
 17 | # This notebook contains excerpts from the article: Font, F., & Serra, X. (2016). Tempo Estimation for Music Loops and a Simple Confidence Measure. In Proceedings of the Int. Conf. on Music Information Retrieval (ISMIR).
 18 | # License: CC-BY-4.0
 19 | 
 20 | 
 21 | # ## Overall tempo estimation accuracy
 22 | #
 23 | # Similarly to previous tempo estimation works, we follow the methodology described by Gouyon et al. [1] to test the aforementioned algorithms against the four collected datasets.
 24 | # In addition the standard *Accuracy 1* and *Accuracy 2* measures (with 4% tolerance), we add an extra measure that we call *Accuracy 1e* and that represents the percentage of instances whose estimated BPM is exactly the same as the ground truth after rounding the estimated BPM to the nearest integer. Accuracy 1e is therefore more strict than Accuracy 1.
 25 | # The reason why we added this extra accuracy measure is that, imagining a music creation context where loops can be queried in a database, it is of special relevance to get returned instances whose BPM exactly matches that specified as target.
 26 | #
 27 | # The overall accuracy results show that Percival14 obtains the highest accuracy scores for all accuracy measures and all datasets except for the LOOP dataset, in which highest score for Accuracy 1 is obtained by Zapata14 (see Figure 1 in the cell below). Considering the data from all datasets at once, mean accuracy values for Percival14 range from 47% (Accuracy 1e) to 73% (Accuracy 2), with an average increase of 7% accuracy when compared with the second best-scored method. With a few exceptions, pairwise accuracy differences between Percival14 and the second best-scored method in all datasets and accuracy measures are statistically significant using McNemar's test and a significance value of $\alpha=0.01$ (i.e.,~$p \ll 0.01$).
 28 | # We also observe that accuracies for the APPL dataset tend to be higher than for other datasets. This can be explained by the fact that APPL contains professionally created and curated loops, while the other datasets contain user contributed content, not necessarily created by professionals (Mixcraft's loop library also contains content gathered from online repositories).
 29 | #
 30 | # [1] Fabien Gouyon, Anssi Klapuri, Simon Dixon, Miguel Alonso, George Tzanetakis, Christian Uhle, and Pedro Cano. An Experimental Comparison ofAudio Tempo Induction Algorithms. IEEE Transactions on Audio, Speech and Language Processing, 14(5):1832–1844, 2006.
 31 | #
 32 | # **Update**: we added the two other methods that Madmom's TempoDetector provides (Bock15ACF and Bock15DBN). These do not appear in the paper, and have only abnalysed FSL4 dataset, therefore we don't have results for the other datasets.
 33 | #
 34 | 
 35 | # In[2]:
 36 | 
 37 | # Prepare figure
 38 | n_datasets = len(datasets)
 39 | if not n_datasets:
 40 |     raise Exception("No datasets were specified or could be loaded!")
 41 | nrows = int(math.ceil(float(n_datasets)/2))
 42 | ncols = 1 if n_datasets < 2 else 2
 43 | fig1, axs = plt.subplots(nrows, ncols, figsize=(17, 5))
 44 | 
 45 | # Run analysis and plot results
 46 | all_datasets_method_results = dict()
 47 | for count, dataset in enumerate(datasets):
 48 |     print title("\nGeneral tempo estimation results (%s)" % dataset.short_name, c='*')
 49 |     methods_to_compare = methods_to_compare
 50 |     table_header = ['Method', 'Accuracy 1e', 'Accuracy 1', 'Accuracy 2']
 51 |     table_rows = list()
 52 | 
 53 |     for method in methods_to_compare:
 54 |         if method not in all_datasets_method_results:
 55 |             all_datasets_method_results[method] = dict()
 56 |         try:
 57 |             table_row = [method]
 58 |             for accuracy_func in accuracy1e, accuracy1, accuracy2:
 59 |                 method_results = accuracy_func(dataset.data, method)
 60 |                 table_row.append(100 * basic_statistics(method_results)['avg'])
 61 |                 if accuracy_func.__name__ not in all_datasets_method_results[method]:
 62 |                     all_datasets_method_results[method][accuracy_func.__name__] = list()
 63 |                 all_datasets_method_results[method][accuracy_func.__name__] += method_results
 64 |             table_rows.append(table_row)
 65 |         except IndexError:
 66 |             print "Warning: Skipping method %s (analsyis not found in dataset)" % method
 67 |             continue
 68 |     print ""
 69 |     print_table(table_header, table_rows, sort_column=3, highlight_max=True)
 70 | 
 71 |     if n_datasets == 1:
 72 |         ax = axs
 73 |     elif n_datasets == 2:
 74 |         ax = axs[count]
 75 |     else:
 76 |         ax_col=count/2
 77 |         ax_row=count%2
 78 |         ax = axs[ax_col,ax_row]
 79 | 
 80 |     N = len(methods_to_compare)
 81 |     ind = np.arange(N)
 82 |     accuracy_1e_means = [row[1] for row in table_rows]
 83 |     accuracy_1_means = [row[2] for row in table_rows]
 84 |     accuracy_2_means = [row[3] for row in table_rows]
 85 |     width = 0.25
 86 |     rects1 = ax.bar(ind, accuracy_1e_means, width, color=COLORS[0])
 87 |     rects2 = ax.bar(ind + width, accuracy_1_means, width, color=COLORS[1])
 88 |     rects3 = ax.bar(ind + 2*width, accuracy_2_means, width, color=COLORS[2])
 89 |     ax.set_title(dataset.short_name)
 90 |     ax.set_xticks(ind + 1.4 * width)
 91 |     ax.set_xticklabels([method for method in methods_to_compare], rotation=0)
 92 |     ax.legend((rects1[0], rects2[0], rects3[0]), ('Accuracy 1e', 'Accuracy 1', 'Accuracy 2'))
 93 |     ax.set_ylabel('Accuracy (%)')
 94 |     ax.set_ylim((0, 100))
 95 |     ax.xaxis.grid(False)
 96 | 
 97 | plt.savefig('Figure2.png')
 98 | #plt.show()
 99 | figure_caption = """
100 | **Figure 1**: Overall tempo estimation accuracies.
101 | """
102 | 
103 | #IPython.display.display(IPython.display.Markdown(figure_caption))
104 | 
105 | # Show results for all datasets combined
106 | print title("General tempo estimation results (ALL DATASETS)")
107 | table_header = ['Method', 'Accuracy 1e', 'Accuracy 1', 'Accuracy 2', 'Mean accuracy']
108 | table_rows = list()
109 | for method, results in all_datasets_method_results.items():
110 |     table_row = [method]
111 |     for accuracy_measure, data in results.items():
112 |         table_row.append(100 * basic_statistics(data)['avg'])
113 |     table_rows.append(table_row + [np.mean(table_row[1:])])
114 | print_table(table_header, table_rows, sort_column=4, highlight_max=True)
115 | 
116 | 
117 | # In[3]:
118 | 
119 | # Run pairwise statistical tests
120 | alpha = 0.01
121 | 
122 | def compute_mcnemar_test(method1_data, method2_data):
123 |     stat, pvalue = mcnemar(method1_data, method2_data, exact=False)
124 |     if pvalue < alpha:
125 |         return "%.3g*" % pvalue
126 |     return "%.3g" % pvalue
127 | 
128 | for count, dataset in enumerate(datasets):
129 |     print ""
130 |     print title('Statistical tests for dataset %s' % dataset.short_name)
131 |     for accuracy_func in accuracy1e, accuracy1, accuracy2:
132 |         print '\nMeasure: %s' % accuracy_func.__name__
133 |         table_header = [' '] + [method for method in methods_to_compare]
134 |         table_rows = list()
135 |         for method_a in methods_to_compare:
136 |             table_row = list()
137 |             for method_b in methods_to_compare:
138 |                 if method_a != method_b:
139 |                     result = compute_mcnemar_test(accuracy_func(dataset.data, method_a), accuracy_func(dataset.data, method_b))
140 |                     table_row.append(result)
141 |                 else:
142 |                     table_row.append('-')
143 |             table_rows.append([method_a] + table_row)
144 |         print_table(table_header, table_rows)
145 | 
146 | 
147 | # ## Accuracy vs confidence measure
148 | #
149 | # Figure 2 (see cells below) shows the accuracy of the three best-scoring tempo estimation algorithms and the number of instances remaining in the dataset when filtering by different values of a confidence threshold $\gamma$.
150 | # As we expected, we can see how accuracy increases with $\gamma$ but the number of instances decreases.
151 | # Interestingly, we observe that the number of instances decays later for estimations performed with Percival14 algorithm than for the other algorithms.
152 | # This reflects the fact that Percival14 produces better BPM estimates.
153 | 
154 | # In[2]:
155 | 
156 | # Confidence measure function
157 | # NOTE: to speed up this process here we have already precomputed the effective start and end positions
158 | # acocrding to the envelope
159 | def compute_confidence_measure(estimated_bpm,
160 |                        duration_samples,
161 |                        start_effective_duration,
162 |                        end_effective_duration,
163 |                        sample_rate=44100, beat_range=range(1, 128), k=0.5):
164 |     if estimated_bpm == 0:
165 |         # This condition is to skip computing other steps if estimated bpm is 0, we already know that the
166 |         # output will be 0
167 |         return 0
168 | 
169 |     durations_to_check = [
170 |         duration_samples,
171 |         duration_samples - start_effective_duration,
172 |         end_effective_duration,
173 |         end_effective_duration - start_effective_duration
174 |     ]
175 | 
176 |     beat_duration = (60.0 * sample_rate)/estimated_bpm
177 |     L = [beat_duration * n for n in beat_range]
178 |     thr_lambda = k * beat_duration
179 |     confidences = list()
180 |     for duration in durations_to_check:
181 |         delta_l = min([abs(l - duration) for l in L])
182 |         if delta_l > thr_lambda:
183 |             confidences.append(0.0)
184 |         else:
185 |             confidences.append(1.0 - float(delta_l) / thr_lambda)
186 |     return max(confidences)
187 | 
188 | # Iterate over all instances in all datasets and for all methods
189 | print 'Computing confidence measure values for all sounds in all datasets and for all methods...',
190 | n_annotated = 0
191 | for dataset in datasets:
192 |     for key, item in dataset.data.items():
193 |         for method in methods_to_compare:
194 |             try:
195 |                 item['analysis'][method]['confidence_ffont'] = compute_confidence_measure(
196 |                     int(round(item['analysis'][method]['bpm'])),
197 |                     item['analysis']['durations']['length_samples'],
198 |                     item['analysis']['durations']['start_effective_duration'],
199 |                     item['analysis']['durations']['end_effective_duration']
200 |                 )
201 |                 n_annotated += 1
202 |             except KeyError:
203 |                 continue
204 | print 'done! \n%i annotations done' % n_annotated
205 | if n_annotated == 0:
206 |     print 'It looks like no instances have been annotated. Make sure that the dataset has been properly analysed.'
207 | 
208 | 
209 | # In[3]:
210 | 
211 | # Now do all the plotting
212 | 
213 | for dataset in datasets:
214 |     fig, ax1 = plt.subplots(1, 1, figsize=(14, 6))
215 |     ax2 = ax1.twinx()
216 | 
217 |     for count, method in enumerate(['Percival14', 'Degara12', 'Zapata14']):
218 |         accuracies_1e = list()
219 |         accuracies_1 = list()
220 |         accuracies_2 = list()
221 |         counts = list()
222 |         steps = np.linspace(0, 1, 60)
223 |         for conf_min in steps:
224 |             try:
225 |                 filtered_data = dataset.filter_data([('analysis.%s.%s__>=' % (method, 'confidence_ffont'), conf_min)]).data
226 |                 accuracies_1e.append(100 * basic_statistics(accuracy1e(filtered_data, method))['avg'])
227 |                 accuracies_1.append(100 * basic_statistics(accuracy1(filtered_data, method))['avg'])
228 |                 accuracies_2.append(100 * basic_statistics(accuracy2(filtered_data, method))['avg'])
229 |                 counts.append(len(filtered_data))
230 |             except IndexError:
231 |                 raise Exception("Unable to compute basic statistics. Make sure that there is data available...")
232 | 
233 |         ax1.plot(steps, accuracies_1e, color=COLORS[count], ls="--")
234 |         ax1.plot(steps, accuracies_2, color=COLORS[count], ls="--")
235 |         ax1.fill_between(steps, accuracies_1e, accuracies_2, color=COLORS[count], alpha=0.2)
236 |         ax2.plot(steps, counts, color=COLORS[count], label=method)
237 | 
238 |     print title('Confidence vs tempo estimation accuracy (%s)' % dataset.short_name)
239 |     ax1.set_ylabel('Accuracy (%)')
240 |     ax1.set_xlabel('$\gamma$')
241 |     ax2.set_ylabel('Number of instances')
242 |     ax2.legend(loc=3)
243 |     ax1.set_ylim((0, 100))
244 |     ax1.set_yticks([float(ax1.get_ylim()[1])*(float(i)/5) for i in range(0,6)])
245 |     ax2.set_ylim(0.0, ax2.get_ylim()[1])
246 |     ax2.set_yticks([float(ax2.get_ylim()[1])*(float(i)/5) for i in range(0,6)])
247 |     ax1.vlines(0.95, 0, 100, color='#999999')  # 95% confidence threshold
248 |     #plt.show()
249 |     plt.savefig('Figure3.png')
250 |     figure_caption = """**Figure 2 - %s **: Accuracy vs confidence measure for %s dataset. Lower bounds of the
251 |     filled areas correspond to Accuracy 1e, while upper bounds correspond to Accuracy 2. Solid lines represent
252 |     the number of instances remaining in the dataset.
253 |     """ % (dataset.short_name, dataset.short_name)
254 |     IPython.display.display(IPython.display.Markdown(figure_caption))
255 | 
256 | 
257 | # ### Accuracy results for confidence > 95%
258 | #
259 | # Filtering by the confidence measure, a potential user searching for loops in a dataset could define a minimum threshold to get more accurate results at the expense of getting less loops returned.
260 | # For instance, if we set a hard confidence threshold of $\gamma=0.95$, we find that the accuracies for Percival14 method range, on average, from 67% (Accuracy 1e) to 92% (Accuracy 2) while preserving an average of 52% of the instances.
261 | # Interestingly enough, we observe that when setting that hard threshold, reported RekBox accuracies outperform these of Percival14 in all datasets, with an average increase ranging from 2% for Accuracy 2 to 14% for Accuracy 1e (all statistically significant with $p \ll 0.01$).
262 | #
263 | # We attribute this to the fact that RekBox seems to have a built-in confidence measure thresholding step in which the algorithm outputs 0 BPM when the analysis does not meet certain confidence requirements. Therefore, once filtering the datasets by $\gamma$ (even with small values), all those instances whose BPM estimation is 0 BPM get discarded. Nevertheless, it is also important to note that filtering with the hard threshold, RekBox only preserves an average of 31% of the instances (lower than the 52% reported above by Percival14).
264 | 
265 | # In[4]:
266 | 
267 | # Accuracy for for confidence threshold > 95%
268 | conf_threshold = 0.95
269 | table_header = ['Method'] + [dataset.short_name for dataset in datasets]
270 | table_rows = list()
271 | for method in methods_to_compare:
272 |     table_row = list()
273 |     table_row.append(method)
274 |     for dataset in datasets:
275 |         try:
276 |             filtered_data = dataset.filter_data([('analysis.%s.%s__>=' % (method, 'confidence_ffont'), conf_threshold)]).data
277 |             min_acc = 100 * basic_statistics(accuracy1e(filtered_data, method))['avg']
278 |             max_acc = 100 * basic_statistics(accuracy2(filtered_data, method))['avg']
279 |         except IndexError:
280 |             print 'Unable to compute results for method %s and dataset %s' % (method, dataset.short_name)
281 |             table_row.append('-')
282 |             continue
283 |         table_row.append('A=%.2f - %.2f%%, N=%.0f%%' % (min_acc, max_acc, 100 * float(len(filtered_data))/len(dataset.data)))
284 |     table_rows.append(table_row)
285 | print ''
286 | print_table(table_header, table_rows)
287 | 
288 | 
289 | # In[8]:
290 | 
291 | # Run pairwise statistical tests when filtering by 95% confidence
292 | alpha = 0.01
293 | conf_threshold = 0.95
294 | 
295 | def compute_mcnemar_test(method1_data, method2_data):
296 |     stat, pvalue = mcnemar(method1_data, method2_data, exact=False)
297 |     if pvalue < alpha:
298 |         return "%.3g*" % pvalue
299 |     return "%.3g" % pvalue
300 | 
301 | for count, dataset in enumerate(datasets):
302 |     print ""
303 |     print title('Statistical tests for dataset %s' % dataset.short_name)
304 |     for accuracy_func in accuracy1e, accuracy1, accuracy2:
305 |         print '\nMeasure: %s' % accuracy_func.__name__
306 |         table_header = [' '] + [method for method in methods_to_compare]
307 |         table_rows = list()
308 |         for method_a in methods_to_compare:
309 |             table_row = list()
310 |             for method_b in methods_to_compare:
311 |                 if method_a != method_b:
312 |                     filtered_data_a = dataset.filter_data([('analysis.%s.%s__>=' % (method_a, 'confidence_ffont'), conf_threshold)]).data
313 |                     filtered_data_b = dataset.filter_data([('analysis.%s.%s__>=' % (method_a, 'confidence_ffont'), conf_threshold)]).data
314 |                     result = compute_mcnemar_test(accuracy_func(filtered_data_a, method_a), accuracy_func(filtered_data_b, method_b))
315 |                     table_row.append(result)
316 |                 else:
317 |                     table_row.append('-')
318 |             table_rows.append([method_a] + table_row)
319 |         print_table(table_header, table_rows)
320 | 
321 | 
322 | # ## Combined accuracy and confidence measure
323 | #
324 | # Besides the overall accuracy measurements, we are also interested in observing how accuracy varies according to the confidence values that we estimate.
325 | # We can intuitively imagine that if we filter our datasets and only keep loops whose estimated confidence for the BPM prediction is above a certain confidence threshold, the overall accuracy results will increase.
326 | # However, the higher we set the minimum confidence threshold, the smaller the size of filtered dataset will be.
327 | # Hence, we want to quantify the relation between the overall accuracy and the total number of music loops that remain in a dataset after filtering by minimum confidence.
328 | # To do that, given one of the aforementioned accuracy measures, we can define a minimum confidence threshold $\gamma$ and a function $A(\gamma)$ that represents overall BPM estimation accuracy when only evaluating loop instances whose estimated confidence value is above $\gamma$ for a given dataset and tempo estimation algorithm.
329 | # Similarly, we can define another function $N(\gamma)$ which returns the percentage of instances remaining in a dataset after filtering out those whose estimated confidence value (for a given tempo estimation method) is below $\gamma$. $A(\gamma)$ and $N(\gamma)$ can be understood as standard precision and recall curves, and therefore we can define a combined score measure $S(\gamma)$ doing the analogy with an f-measure computation:
330 | #
331 | # $$ S(\gamma) = 2 \cdot \frac{A(\gamma) \cdot N(\gamma)}{A(\gamma) + N(\gamma)}.$$
332 | #
333 | # An overall score for a given dataset, tempo estimation algorithm and accuracy measure can thus be given by taking the mean of $S(\gamma)$, $\bar S$. After computing $\bar S$ with our data, we again find that Percival14 obtains the best score in all datasets and for all accuracy measures (i.e.~for $A(\gamma)$ computed with Accuracy 1e, Accuracy 1 or Accuracy 2). This means that Percival14 offers the overall best balance between estimation accuracy and number of preserved instances in the dataset when filtering by a minimum confidence threshold.
334 | 
335 | # In[9]:
336 | 
337 | # NOTE: To run this cell, confidence measures need to be computed first (see cells above)
338 | 
339 | def overall_method_score(accuracies, counts, dataset_size):
340 |     """
341 |     We normalize the counts to the dataset size so scores provided by datasets of different sizes are comparable.
342 |     We also normalize accuracy values to go from 0 to 1 instead of from 0 to 100.
343 |     Finally we divide the result by the number of bins of the histogram so that using histograms of different resolution
344 |     should return similar results.
345 |     """
346 |     overall_score = 0.0
347 |     length = len(counts)  # Should be the same as len(accuracies)
348 |     for i in range(length):
349 |         overall_score += 2 * ((float(counts[i])/dataset_size) * (float(accuracies[i])/100)) / ((float(counts[i])/dataset_size) + (float(accuracies[i])/100))
350 |     return overall_score/length
351 | 
352 | for accuracy_func in accuracy1e, accuracy1, accuracy2:
353 |     print title('Combined score results for %s' % accuracy_func.__name__)
354 |     table_header = ['Method'] + [dataset.short_name for dataset in datasets]
355 |     table_rows = list()
356 |     for method in methods_to_compare:
357 |         table_row = list()
358 |         table_row.append(method)
359 |         for dataset in datasets:
360 |             accuracies = list()
361 |             counts = list()
362 |             steps = np.linspace(0, 1, 60)
363 |             for conf_min in steps:
364 |                 try:
365 |                     filtered_data = dataset.filter_data([('analysis.%s.%s__>=' % (method, 'confidence_ffont'), conf_min)]).data
366 |                     if filtered_data:
367 |                         accuracies.append(100 * basic_statistics(accuracy_func(filtered_data, method))['avg'])
368 |                     else:
369 |                         accuracies.append(0.00000001)  # Use very small number to avoid division by 0, in any case counts will be 0 to so overall_score will be 0
370 |                     counts.append(len(filtered_data))
371 |                 except IndexError:
372 |                     raise Exception("Unable to compute basic statistics. Make sure that there is data available...")
373 |             score = overall_method_score(accuracies, counts, dataset_size=len(dataset.data))
374 |             table_row.append(score)
375 |         table_rows.append(table_row)
376 |     print_table(table_header, table_rows, sort_column=1, highlight_max=True)
377 | 
378 | 
379 | # ## Comparisson with confidence measure from Zapata14
380 | #
381 | # Zapata et. al. [2] propose a confidence measure based on computing the mutual agreement between an ensemble of tempo estimation algorithms that take different sets of input features.
382 | # To make this confidence measure comparable to the one we propose, we normalise the confidence output of Zapata et. al. to take values form 0 to 1.
383 | # Similarly to Figure 2, we plot the estimation accuracy and the number of remaining instances as a function of a minimum confidence threshold $\gamma$ (Figure 3 in the cells below).
384 | # We observe that Zapata's confidence measure allows to achieve accuracies which are around 15% higher than when using our confidence. However, the number of remaining instances in the dataset is drastically reduced, and accuracy values for $\gamma > 0.75$ become incosistent.
385 | # Looking at the $\bar S$ score, we find that Zapata14 in combination with our confidence measure gets better results than when using the original measure, with an average $\bar S$ increase of 17%, 29% and 31% (for the three accuracy measures Accuracy 1e, Accuracy 1 and Accuracy 2 respectively).
386 | # This indicates that the confidence measure we propose is able to better maximise accuracy and number of remaining instances.
387 | #
388 | # [2] Jose R Zapata, Andre Holzapfel, Matthew EP Davied, Joao L Oliveira, and Fabien Gouyon. Assigning a
389 | # Confidence Threshold on Automatic Beat Annotation in Large Datasets. In Proc. of the Int. Conf. on Music
390 | # Information Retrieval (ISMIR), number Ismir, pages 157-162, 2012.
391 | 
392 | # In[10]:
393 | 
394 | # Normalize confidence measure for Zapata14 so that numbers go from 0 to 1 and it is
395 | # comparable with our confidence measure. We get the maximum and minimum value found in the dataset
396 | # and normalise the confidence to be in the [0, 1] range.
397 | for dataset in datasets:
398 |     # Annotate all methods wirh confidence measures using the function above
399 |     zapata_conf_values = dataset.get_data('analysis.Zapata14.confidence')
400 |     if not zapata_conf_values:
401 |         raise Exception('No information for Zapata14 method in dataset %s' % dataset.short_name)
402 |     max_value = max(zapata_conf_values)
403 |     min_value = min(zapata_conf_values)
404 |     for key, item in dataset.data.items():
405 |         try:
406 |             normalised_value = float(item['analysis']['Zapata14']['confidence'] - min_value)/(max_value - min_value)
407 |             item['analysis']['Zapata14']['confidence_zapata'] = normalised_value
408 |         except KeyError:
409 |             continue
410 | 
411 | CONFIDENCE_MEASURE_NAMES = {
412 |     'confidence_ffont': 'Our confidence measure',
413 |     'confidence_zapata': 'Original confidence measure'
414 | }
415 | 
416 | for dataset in datasets:
417 |     fig, ax1 = plt.subplots(1, 1, figsize=(14, 6))
418 |     ax2 = ax1.twinx()
419 |     methods_and_conf_to_compare = [('Zapata14', 'confidence_ffont'),
420 |                                    ('Zapata14', 'confidence_zapata')]
421 | 
422 |     for count, (method, conf_method) in enumerate(methods_and_conf_to_compare):
423 |         accuracies_1e = list()
424 |         accuracies_1 = list()
425 |         accuracies_2 = list()
426 |         counts = list()
427 |         steps = np.linspace(0, 1, 60)
428 |         for conf_min in steps:
429 |             try:
430 |                 filtered_data = dataset.filter_data([('analysis.%s.%s__>=' % (method, conf_method), conf_min)]).data
431 |                 accuracies_1e.append(100 * basic_statistics(accuracy1e(filtered_data, method))['avg'])
432 |                 accuracies_1.append(100 * basic_statistics(accuracy1(filtered_data, method))['avg'])
433 |                 accuracies_2.append(100 * basic_statistics(accuracy2(filtered_data, method))['avg'])
434 |                 counts.append(len(filtered_data))
435 |             except IndexError:
436 |                 raise Exception("Unable to compute basic statistics. Make sure that there is data available...")
437 | 
438 |         ax1.plot(steps, accuracies_1e, color=COLORS[count], ls="--")
439 |         ax1.plot(steps, accuracies_2, color=COLORS[count], ls="--")
440 |         ax1.fill_between(steps, accuracies_1e, accuracies_2,
441 |                          color=COLORS[count], alpha=0.2)
442 |         ax2.plot(steps, counts, color=COLORS[count], label="%s - %s" % (method, CONFIDENCE_MEASURE_NAMES[conf_method]))
443 | 
444 |     print title('Confidence vs tempo estimation accuracy (%s)' % dataset.short_name)
445 |     ax1.set_ylabel('Accuracy (%)')
446 |     ax1.set_xlabel('$\gamma$')
447 |     ax2.set_ylabel('Number of instances')
448 |     ax2.legend(loc=3)
449 |     ax1.set_ylim((0, 100))
450 |     ax1.set_yticks([float(ax1.get_ylim()[1])*(float(i)/5) for i in range(0,6)])
451 |     ax2.set_ylim(0.0, ax2.get_ylim()[1])
452 |     ax2.set_yticks([float(ax2.get_ylim()[1])*(float(i)/5) for i in range(0,6)])
453 |     #plt.show()
454 |     plt.savefig('Figure4.png')
455 |     figure_caption = """**Figure 3 - %s **: Comparison of our proposed confidence measure with the confidence
456 |     measure proposed in [2] for %s dataset and Zapata14 tempo estimation algorithm.
457 |     """ % (dataset.short_name, dataset.short_name)
458 |     IPython.display.display(IPython.display.Markdown(figure_caption))
459 | 
460 | 
461 | # In[11]:
462 | 
463 | # Compute combined scores for Zapata14 method using confidence_ffont or confidence_zapata
464 | # NOTE: to run this cell 'overall_method_score' must have been defined (see cells above)
465 | 
466 | methods_and_conf_to_compare = [('Zapata14', 'confidence_ffont'),
467 |                                ('Zapata14', 'confidence_zapata')]
468 | for accuracy_func in accuracy1e, accuracy1, accuracy2:
469 |     print title('Combined score results for %s' % accuracy_func.__name__)
470 |     table_header = ['Method - Conf method'] + [dataset.short_name for dataset in datasets]
471 |     table_rows = list()
472 |     for method, conf_method in methods_and_conf_to_compare:
473 |         table_row = list()
474 |         table_row.append('%s - %s' % (method, conf_method))
475 |         for dataset in datasets:
476 |             accuracies = list()
477 |             counts = list()
478 |             steps = np.linspace(0, 1, 60)
479 |             for conf_min in steps:
480 |                 try:
481 |                     filtered_data = dataset.filter_data([('analysis.%s.%s__>=' % (method, conf_method), conf_min)]).data
482 |                     if filtered_data:
483 |                         accuracies.append(100 * basic_statistics(accuracy_func(filtered_data, method))['avg'])
484 |                     else:
485 |                         accuracies.append(0.00000001)  # Use very small number to avoid division by 0, in any case counts will be 0 to so overall_score will be 0
486 |                     accuracies.append(100 * basic_statistics(accuracy_func(filtered_data, method))['avg'])
487 |                     counts.append(len(filtered_data))
488 |                 except IndexError:
489 |                     raise Exception("Unable to compute basic statistics. Make sure that there is data available...")
490 |             score = overall_method_score(accuracies, counts, dataset_size=len(dataset.data))
491 |             table_row.append(score)
492 |         table_rows.append(table_row)
493 |     print_table(table_header, table_rows, sort_column=1, highlight_max=True)
494 | 
495 | 
496 | # In[ ]:
497 | 


--------------------------------------------------------------------------------
/Reck-Font/data_preprocessing.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | #create folder structure for database
 4 | mkdir ismir2016/data/FSL4
 5 | mkdir ismir2016/data/FSL4/audio
 6 | 
 7 | #convert audio data to mono wavs
 8 | python ismir2016/scripts/convert_audio_files_to_wav.py FSL4/audio/original --outdir ismir2016/data/FSL4/audio
 9 | 
10 | #change the metadata.json wav file path to match the users path
11 | dot="$(cd "$(dirname "$0")"; pwd)"
12 | path1="$dot/ismir2016/data/FSL4/audio"
13 | path2="$dot/ismir2016/data/FSL4"
14 | sed -i -e "s#/Users/ffont/ResearchRepos/ffont-ismir2016/data/fsdataset/audio/wav#${path1}#g" FSL4/metadata.json
15 | 
16 | #copy the json file to the required directory
17 | cp FSL4/metadata.json $path2
18 | 
19 | #edit the evaluation notebook to match the given dataset name and the algorithms wished to be displayed
20 | sed -i -e "s#'freesound_loops_db_4000', 'apple_loops_db', 'mixcraft_loops_db', 'looperman'#'FSL4'#g" 'ismir2016/Tempo estimation results.ipynb'
21 | sed -i -e "s# 'Bock15ACF', 'Bock15DBN,'##g" 'ismir2016/Tempo estimation results.ipynb'
22 | 
23 | #fix a typo from the authors
24 | sed -i -e "s#Gkiokasq12#Gkiokas12#g" ismir2016/analysis_algorithms.py
25 | 


--------------------------------------------------------------------------------
/Reck-Font/download.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | 
3 | # download the codebase and the database
4 | git clone https://github.com/ffont/ismir2016
5 | wget http://labs.freesound.org/static_data/FSL4/FSL4.zip
6 | # Extract the database
7 | unzip FSL4
8 | 


--------------------------------------------------------------------------------
/Reck-Font/evaluation.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | 
3 | #copy the evaluation script to the right directory
4 | cp -R 'Tempo estimation results.py' ismir2016/
5 | python 'ismir2016/Tempo estimation results.py'
6 | 


--------------------------------------------------------------------------------
/Reck-Font/report.md:
--------------------------------------------------------------------------------
  1 | # Report on the Reproducibility of "Tempo Estimation for Music Loops and a Simple Confidence Measure"
  2 | 
  3 | - Paper author: Font, F., & Serra, X.
  4 | - Report author: Julian Reck
  5 | 
  6 | This is a reproduction of the paper ["Tempo Estimation for Music Loops and a Simple Confidence Measure"](http://mtg.upf.edu/node/3479) from Frederic Font and Xavier Serra for the Audiolabs Audio Processing Seminar WS 2016/2017 by Julian Reck.
  7 | 
  8 | ## Installation
  9 | 
 10 | ### System Setup and Requirements
 11 | 
 12 | * OS:	Ubuntu 16.04.1 LTS
 13 | 
 14 | * Python 2.7.12
 15 | 
 16 | * Software and packages needed are listed in requirements.txt
 17 | 
 18 | ### Installation
 19 | 
 20 | To download this toolkit and install the required python packages run:
 21 | 
 22 |     git clone https://github.com/faroit/APSRR-2016.git
 23 |     cd APSRR-2016/
 24 |     cd reck-font/
 25 |     pip install -r requirements.txt
 26 |     
 27 | Despite the python packages, the software listed in requirements.txt needs to be installed manually.
 28 | 
 29 | ### Download Dataset
 30 | 
 31 |     ./download.sh
 32 |     
 33 | Downloads the [codebase](https://github.com/ffont/ismir2016) from the paper and the Freesound loop database (FSL4).
 34 | 
 35 | ### Preprocess Dataset
 36 | 
 37 |     ./data_preprocessing.sh
 38 |     
 39 | Preprocesses the dataset, creates folder structure and fixes some errors in the code from the authors.
 40 | 
 41 | ### Analyse Dataset   
 42 | 
 43 |     ./run_analysis.sh
 44 |     
 45 | Analyses the FSL4 dataset with all the tempo estimation algorithms used in the paper except the RekordBox algorithm. To get the results from the RekordBox software, use the instructions from the authors of the paper, which can be found [here](https://github.com/ffont/ismir2016/blob/master/docs/analyze_dataset.md#rekbox).
 46 | 
 47 | ### Evaluation
 48 | 
 49 |     ./evaluation.sh
 50 | 
 51 | The figures 2-4 from the paper are reproduced and will be saved as 'Figure2.png', 'Figure3.png' and 'Figure4.png'.
 52 | 
 53 | 
 54 | ## Evaluation
 55 | 
 56 | During the evaluation of the paper "Tempo Estimation for Music Loops and a Simple Confidence Measure" I came across the following issues:
 57 | 
 58 | * Setting up and working with a Linux system takes effort, but things work much easier than on Windows once you get used to it.
 59 | 
 60 | * Installing python packages is pretty cumbersome without pip install -r requirements.txt, which I unfortunatly didn't know about before.
 61 | 
 62 | * The installation of the codebase from the paper is very well described with only a few exceptions.
 63 | 
 64 | * If the database FSL4 is downloaded via the provided link from the authors, the metadata json file contains fixed path names from the authors, leading to errors, which I fixed with a script.
 65 | 
 66 | * All python algorithms except Gkiokas12 were easily runnable. Gkiokas12, which was written with octave, took a bit longer to install.
 67 | 
 68 | * The RekordBox metadata can not be extracted automatically with a script.
 69 | 
 70 | * Due to the big dataset, computation time is high, especially for the Gkiokas12. Without parallelization it took about 18 hours. 
 71 | 
 72 | * A typo in the the script 'analysis_algorithms.py' leads to errors in the evaluation.
 73 | 
 74 | * The evaluation scripts are nicely written with iPhyton notebooks, but for a simpler extraction of the result plots, I converted it to a python script, which saves the desired plots as pngs.
 75 | 
 76 | ## Reproducibility
 77 | 
 78 | ### 1) Reproducibility of the algorithm
 79 | 
 80 | a) Is the algorithm described in sufficient detail? 1
 81 | 
 82 | b) Are exact parameter values given? 1
 83 | 
 84 | c) Is there a block diagram? 0
 85 | 
 86 | d) Is there a pseudocode? 0
 87 | 
 88 | e) Are there proofs for all the theorems? 0 (No theorems to be proofed)
 89 | 
 90 | f) Is the algorithm compared to other algorithms? 1
 91 | 
 92 | 
 93 | ### 2) Reproducibility of the code
 94 | 
 95 | a) Are implementation details (programming language,
 96 | 
 97 | platform, compiler flags, etc.) given? 1
 98 | 
 99 | b) Is the code available online? 1
100 | 
101 | 
102 | ###  3) Reproducibility of the data
103 | 
104 | a) Is there an explanation of what the data represents? 1
105 | 
106 | b) Is the size of the data set acceptable? 1
107 | 
108 | c) Is the data set available online? 0.5
109 | 
110 | 
111 | ## Degree of Reproducibility
112 | 
113 | Without this toolkit: 
114 | 
115 | * 4. Can be reproduced, requiring considerable effort.
116 | 
117 | With this toolkit: 
118 | 
119 | * 5. Can be easily reproduced with at most 15 minutes of user effort, requiring some proprietary source packages.
120 | 


--------------------------------------------------------------------------------
/Reck-Font/requirements.txt:
--------------------------------------------------------------------------------
 1 | #Software:
 2 | # -FFmpeg
 3 | # -Octave
 4 | # -Vlc
 5 | # -RekordBox
 6 | # -Essentia
 7 | 
 8 | #Octave Packages:
 9 | # -control
10 | # -signal
11 | # -image
12 | 
13 | #Python packages:
14 | cycler==0.10.0
15 | matplotlib==1.5.3
16 | numpy==1.10.1
17 | pandas==0.19.1
18 | pyparsing==2.1.10
19 | python-dateutil==2.6.0
20 | pytz==2016.7
21 | scipy==0.18.1
22 | seaborn==0.7.1
23 | six==1.10.0
24 | pyzmq==15.2.0
25 | tornado==4.2.1
26 | jsonschema==2.5.1
27 | jupyter==1.0.0
28 | Cython==0.25.1
29 | madmom==0.14.1
30 | click==6.6
31 | celery==4.0.0
32 | statsmodels==0.6.1
33 | pyaml==16.12.2
34 | 
35 | 


--------------------------------------------------------------------------------
/Reck-Font/run_analysis.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | 
3 | # run analysis with all algorithms, takes approximatly 18 hours without using parallelization methods like the authors proposed celery on Intel(R) Core(TM) i5-4670K CPU @ 3.40GHz
4 | python ismir2016/scripts/analyze_dataset.py ismir2016/data/FSL4
5 | 
6 | # use --max_sounds 10 for testing with only 10 wav files
7 | #python ismir2016/scripts/analyze_dataset.py ismir2016/data/FSL4 --max_sounds 10
8 | 


--------------------------------------------------------------------------------
/Rosenzweig-Jiang/.gitignore:
--------------------------------------------------------------------------------
1 | dataset/
2 | code/
3 | output/


--------------------------------------------------------------------------------
/Rosenzweig-Jiang/code/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/audiolabs/APSRR-2016/fb9cb5b2e2d18555808d24df7f64233cee5e5d36/Rosenzweig-Jiang/code/.gitkeep


--------------------------------------------------------------------------------
/Rosenzweig-Jiang/code/getFileNames.m:
--------------------------------------------------------------------------------
 1 | % Source: StackOverflow (http://stackoverflow.com/questions/2652630/how-to-get-all-files-under-a-specific-directory-in-matlab)
 2 | % Licensed under cc by-sa 3.0 with attribution required (https://creativecommons.org/licenses/by-sa/3.0/#)
 3 | % Posted by: oz-radiano(http://stackoverflow.com/users/69555/oz-radiano)
 4 | % Modifications by Sebastian Rosenzweig: Renamed function from getAllFiles to getFileNames
 5 | 
 6 | function fileList = getFileNames(dirName, fileExtension, appendFullPath)
 7 | 
 8 |   dirData = dir([dirName '/' fileExtension]);      %# Get the data for the current directory
 9 |   dirWithSubFolders = dir(dirName);
10 |   dirIndex = [dirWithSubFolders.isdir];  %# Find the index for directories
11 |   fileList = {dirData.name}';  %'# Get a list of the files
12 |   if ~isempty(fileList)
13 |     if appendFullPath
14 |       fileList = cellfun(@(x) fullfile(dirName,x),...  %# Prepend path to files
15 |                        fileList,'UniformOutput',false);
16 |     end
17 |   end
18 |   subDirs = {dirWithSubFolders(dirIndex).name};  %# Get a list of the subdirectories
19 |   validIndex = ~ismember(subDirs,{'.','..'});  %# Find index of subdirectories
20 |                                                %#   that are not '.' or '..'
21 |   for iDir = find(validIndex)                  %# Loop over valid subdirectories
22 |     nextDir = fullfile(dirName,subDirs{iDir});    %# Get the subdirectory path
23 |     fileList = [fileList; getFileNames(nextDir, fileExtension, appendFullPath)];  %# Recursively call getAllFiles
24 |   end
25 | 
26 | end


--------------------------------------------------------------------------------
/Rosenzweig-Jiang/dataset/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/audiolabs/APSRR-2016/fb9cb5b2e2d18555808d24df7f64233cee5e5d36/Rosenzweig-Jiang/dataset/.gitkeep


--------------------------------------------------------------------------------
/Rosenzweig-Jiang/output/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/audiolabs/APSRR-2016/fb9cb5b2e2d18555808d24df7f64233cee5e5d36/Rosenzweig-Jiang/output/.gitkeep


--------------------------------------------------------------------------------
/Rosenzweig-Jiang/output/TbFigure4.fig:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/audiolabs/APSRR-2016/fb9cb5b2e2d18555808d24df7f64233cee5e5d36/Rosenzweig-Jiang/output/TbFigure4.fig


--------------------------------------------------------------------------------
/Rosenzweig-Jiang/output/TbFigure4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/audiolabs/APSRR-2016/fb9cb5b2e2d18555808d24df7f64233cee5e5d36/Rosenzweig-Jiang/output/TbFigure4.png


--------------------------------------------------------------------------------
/Rosenzweig-Jiang/report.md:
--------------------------------------------------------------------------------
  1 | # Report "Analyzing Chroma Feature Types for Automated Chord Recognition"
  2 | 
  3 | - Paper author: Nanzhu Jiang et al.
  4 | - Report author: Sebastian Rosenzweig
  5 | 
  6 | ## About
  7 | 
  8 | - Supervisor: Christof Weiß
  9 | - Audio Processing Seminar WS 2016/2017
 10 | - Reproduction of Tb-graph of figure 4 (page 8)
 11 | 
 12 | ## Requirements
 13 | Recommended: MATLAB 2016b or higher
 14 | 
 15 | Minimum: MATLAB 2011a
 16 | 
 17 | ## Installation
 18 | 
 19 | ### Download and prepare tools/dataset
 20 | Execute step01_download.m
 21 | 
 22 | This will download and extract the dataset and the Chroma Toolbox in given folder structure.
 23 | 
 24 | If you use a MATLAB version before 2012b please convert all mp3 files to wav files (Samplingrate: 22050Hz) with a converter of your choice, e.g. ffmpeg!
 25 | 
 26 | ### Process results
 27 | Execute step02_reproduceTbFigure4.m
 28 | 
 29 | This will reproduce the Tb-graph of figure 4 in paper.
 30 | 
 31 | Execution time: This may take about 30 Minutes
 32 | 
 33 | ## Evaluation of reproducability
 34 | The reproducability of the given paper will be evaluated in the following using the criteria of "Reproducible Research in Signal Processing - What, why, and how" by Vandewalle et al. (https://infoscience.epfl.ch/record/136640/files/VandewalleKV09.pdf).
 35 | 
 36 | ### Reproducibility of the algorithm
 37 | - Is the algorithm described in sufficient detail?
 38 | 	- 1 Point: Yes, although it takes some time to implement the details 
 39 | - Are exact parameter values given?
 40 | 	- 1 Point: Yes
 41 | - Is there a block diagram?
 42 | 	- 0 Points: No
 43 | - Is there a pseudocode?
 44 | 	- 0.5 Points: Demo script available for Chroma Toolbox; Overall procedure is described in paper
 45 | - Are there proofs for all the theorems?
 46 | 	- 1 Point: Not in the paper directly, but one can find them in the sources
 47 | - Is the algorithm compared to other algorithms?
 48 | 	- 0.5 Points: Proposed algorithms are compared among each other
 49 | 
 50 | 
 51 | ### Reproducibility of the code
 52 | - Are implementation details (programming language, platform, compiler flags, etc.) given?
 53 | 	- 0.5 Points: MATLAB as programming language not explicitly recommended in paper
 54 | - Is the code available online?
 55 | 	- 0 Points: Only the Chroma toolbox is available online, not the code for reproducing the figure
 56 | 
 57 | 
 58 | ### Reproducibility of the data
 59 | - Is there an explanation of what the data represents?
 60 | 	- 1 Point: Yes
 61 | - Is the size of the data set acceptable?
 62 | 	- 1 Point: Yes, 180 songs
 63 | - Is the data set available online?
 64 | 	- 0.5 Points: Yes, but hard to find. No explicit link to data is given!
 65 | 
 66 | 
 67 | ### Summary
 68 | **Reproducability of algorithm:** Better compared to reference papers (table 1)
 69 | 
 70 | **Reproducability of Code:** Equal to reference in paper
 71 | 
 72 | **Data:** Better compared to references
 73 | 
 74 | **Issues which occurred during evaluation:**
 75 | - Chroma Toolbox is not mentioned in paper (it has not been released at the time the paper was written), though, this toolbox is essential to reproduce the paper (saves hours of work)!!!
 76 | - folder structure of dataset is unhandy and has to be dissolved
 77 | - audio files are in mp3 format and might have to be converted when using older MATLAB versions
 78 | - chord labels/annotations are in ASCII/lab format -> unusual, CSV is preferred and easier to process
 79 | - chord labels contain more than just minor and major chords -> chord labeler had to be implemented that maps all chords to major/minor/none chords
 80 | - song 'Ask_me_why.mp3' hat to be excluded, because time interval in chord annotation was missing
 81 | 
 82 | **Work:** ~25h
 83 | 
 84 | **Results:** The F-measure results differ slightly from the ones in the paper (s. Table below). One possible reason could be a different parameter setting used by the authors.
 85 | 
 86 | |            | Tb Paper | Tb Reproduced | Difference |
 87 | |------------|:--------:|:-------------:|:----------:|
 88 | | CP         |   0.460  |     0.435     |    0.025   |
 89 | | CLP[1]     |   0.508  |     0.472     |    0.036   |
 90 | | CLP[10]    |   0.544  |     0.501     |    0.043   |
 91 | | CLP[100]   |   0.553  |     0.505     |    0.048   |
 92 | | CLP[1000]  |   0.541  |     0.493     |    0.048   |
 93 | | CLP[10000] |   0.521  |     0.472     |    0.049   |
 94 | 
 95 | **Degree of reproducability:**
 96 | 
 97 | With background knowledge: **3** - The results can be reproduced by an independent researcher, requiring considerable effort.
 98 | 
 99 | Without background knowledge: **2** - The results could be reproduced by an independent researcher, requiring extreme effort.
100 | 


--------------------------------------------------------------------------------
/Rosenzweig-Jiang/step01_download.m:
--------------------------------------------------------------------------------
 1 | % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 2 | % step01_download.m
 3 | %
 4 | % Downloads and extracts code and dataset for reproduction of paper:
 5 | % "Analyzing Chroma Feature Types for Automated Chord Recognition"
 6 | % by Nanzhu Jiang et al.
 7 | % 
 8 | % Programmer: Sebastian Rosenzweig
 9 | % Supervisor: Christof Weiss
10 | % Audio Processing Seminar WS 2016/2017
11 | % FAU Erlangen-Nuernberg/AudioLabs Erlangen
12 | % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
13 | 
14 | clear all; close all; clc;
15 | 
16 | addpath(genpath('./code/')); % add helper function
17 | 
18 | %% Delete old files
19 | if exist('./dataset/audio/','dir')
20 |     rmdir('./dataset/audio/','s');
21 | end
22 | if exist('./dataset/chords/','dir')
23 |     rmdir('./dataset/chords/','s');
24 | end
25 | if exist('./code/MATLAB-Chroma-Toolbox_2.0/','dir')
26 |     rmdir('./code/MATLAB-Chroma-Toolbox_2.0/','s');
27 | end
28 | if exist('./output/listOfFiles.mat','file')
29 |     delete('./output/listOfFiles.mat');
30 | end
31 | 
32 | %% Download dataset and Chroma Toolbox
33 | options = weboptions('Timeout',Inf);
34 | dataZip = websave('./dataset/data.zip','http://labrosa.ee.columbia.edu/projects/chords/beatles.zip',options);
35 | chromaZip = websave('./code/chroma.zip','https://www.audiolabs-erlangen.de/content/resources/MIR/chromatoolbox/MATLAB-Chroma-Toolbox_2.0.zip',options);
36 | 
37 | %% Extract ZIP files
38 | unzip(dataZip,'./dataset/');
39 | unzip(chromaZip,'./code/');
40 | 
41 | % cleanup
42 | delete(dataZip);
43 | delete(chromaZip);
44 | 
45 | %% Prepare dataset
46 | % get paths to all files
47 | listMP3Path = getFileNames('./dataset/','*.mp3',1);
48 | listChordsPath = getFileNames('./dataset/','*.lab',1);
49 | 
50 | % check if files have been found
51 | if isempty(listMP3Path) || isempty(listChordsPath)
52 |     error('Dataset not found! Please download it once more!');
53 | end
54 | 
55 | % get filenames only
56 | listMP3 = getFileNames('./dataset/','*.mp3',0);
57 | listChords = getFileNames('./dataset/','*.lab',0);
58 | 
59 | % remove numbers from filenames
60 | for i = 1:length(listMP3)
61 |     listMP3{i,1} = listMP3{i,1}(4:end);
62 |     listChords{i,1} = listChords{i,1}(4:end);
63 | end
64 | 
65 | % create new directories
66 | mkdir('./dataset/audio/');
67 | mkdir('./dataset/chords/');
68 | 
69 | % copy files to new directories
70 | for i = 1:length(listMP3)
71 |     copyfile(listMP3Path{i,1},['./dataset/audio/' listMP3{i,1}]);
72 |     copyfile(listChordsPath{i,1},['./dataset/chords/' listChords{i,1}]);
73 | end
74 | 
75 | % delete old folder structure
76 | rmdir('./dataset/beatles','s');
77 | 
78 | % save list of files
79 | save('./output/listOfFiles.mat','listMP3');


--------------------------------------------------------------------------------
/Rosenzweig-Jiang/step02_reproduceTbFigure4.m:
--------------------------------------------------------------------------------
  1 | % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
  2 | % step02_reproduceTbFigure4.m
  3 | %
  4 | % Reproduction of Tb-graph of figure 4 of paper:
  5 | % "Analyzing Chroma Feature Types for Automated Chord Recognition"
  6 | % by Nanzhu Jiang et al.
  7 | % 
  8 | % Programmer: Sebastian Rosenzweig
  9 | % Supervisor: Christof Weiss
 10 | % Audio Processing Seminar WS 2016/2017
 11 | % FAU Erlangen-Nuernberg/AudioLabs Erlangen
 12 | % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 13 | 
 14 | clear all; close all; clc
 15 | tic
 16 | 
 17 | %% Set paths, load files, init variables
 18 | addpath(genpath('./code/')); % add chroma toolbox and helper function
 19 | 
 20 | chordsFolder = './dataset/chords/';
 21 | audioFolder = './dataset/audio/';
 22 | outputFolder = './output/';
 23 | 
 24 | load('./output/listOfFiles.mat'); % load list of MP3-files
 25 | 
 26 | fMeasure = zeros(size(listMP3,1),6);
 27 | 
 28 | for fileIdx = 1:size(listMP3,1)
 29 |     if fileIdx == 87
 30 |         % Ask_me_why.lab is buggy
 31 |         continue;
 32 |     end
 33 |     fprintf('Processing file %d of %d...\n',fileIdx,size(listMP3,1));
 34 |     fileName = listMP3{fileIdx};
 35 | 
 36 |     %% Compute pitch features
 37 |     [f_audio,fs] = audioread([audioFolder fileName]);
 38 |     f_audio = resample (f_audio,22050,fs,100);
 39 |     shiftFB = estimateTuning(f_audio);
 40 | 
 41 |     paramPitch.winLenSTMSP = 4410;
 42 |     paramPitch.shiftFB = shiftFB;
 43 |     paramPitch.visualize = 0;
 44 |     [f_pitch,sideinfo] = ...
 45 |         audio_to_pitch_via_FB(f_audio,paramPitch);
 46 | 
 47 |     chromaMatrices = cell(6,1);
 48 | 
 49 |     paramCP.applyLogCompr = 0;
 50 |     paramCP.visualize = 0;
 51 |     paramCP.inputFeatureRate = sideinfo.pitch.featureRate;
 52 |     [chromaMatrices{1},~] = pitch_to_chroma(f_pitch,paramCP,sideinfo);
 53 | 
 54 |     paramCLP.applyLogCompr = 1;
 55 |     paramCLP.factorLogCompr = 1;
 56 |     paramCLP.visualize = 0;
 57 |     paramCLP.inputFeatureRate = sideinfo.pitch.featureRate;
 58 |     [chromaMatrices{2},~] = pitch_to_chroma(f_pitch,paramCLP,sideinfo);
 59 | 
 60 |     paramCLP.applyLogCompr = 1;
 61 |     paramCLP.factorLogCompr = 10;
 62 |     paramCLP.visualize = 0;
 63 |     paramCLP.inputFeatureRate = sideinfo.pitch.featureRate;
 64 |     [chromaMatrices{3},~] = pitch_to_chroma(f_pitch,paramCLP,sideinfo);
 65 | 
 66 |     paramCLP.applyLogCompr = 1;
 67 |     paramCLP.factorLogCompr = 100;
 68 |     paramCLP.visualize = 0;
 69 |     paramCLP.inputFeatureRate = sideinfo.pitch.featureRate;
 70 |     [chromaMatrices{4},~] = pitch_to_chroma(f_pitch,paramCLP,sideinfo);
 71 | 
 72 |     paramCLP.applyLogCompr = 1;
 73 |     paramCLP.factorLogCompr = 1000;
 74 |     paramCLP.visualize = 0;
 75 |     paramCLP.inputFeatureRate = sideinfo.pitch.featureRate;
 76 |     [chromaMatrices{5},~] = pitch_to_chroma(f_pitch,paramCLP,sideinfo);
 77 | 
 78 |     paramCLP.applyLogCompr = 1;
 79 |     paramCLP.factorLogCompr = 10000;
 80 |     paramCLP.visualize = 0;
 81 |     paramCLP.inputFeatureRate = sideinfo.pitch.featureRate;
 82 |     [chromaMatrices{6},~] = pitch_to_chroma(f_pitch,paramCLP,sideinfo);
 83 | 
 84 |     %save([outputFolder fileName(1:end-3) 'mat'],'chromaMatrices');
 85 |     %load([outputFolder fileName(1:end-3) 'mat']);
 86 | 
 87 |     %% Create binary template vectors
 88 |     templateVec = cell(24,1);
 89 |     templateVec{1,1} = [1 0 0 0 1 0 0 1 0 0 0 0]; % C-major
 90 |     for i = 2:12
 91 |         templateVec{i,1} = circshift(templateVec{1,1}',i-1)';
 92 |     end
 93 |     templateVec{13,1} = [1 0 0 1 0 0 0 1 0 0 0 0]; % C-minor
 94 |     for i = 14:24
 95 |         templateVec{i,1} = circshift(templateVec{13,1}',i-1)';
 96 |     end
 97 |     % Major: 'C' 'Cis' 'D' 'Dis' 'E' 'F' 'Fis' 'G' 'Gis' 'A' 'Bb' 'B' 
 98 |     % Minor: 'Cm' 'Cism' 'Dm' 'Dism' 'Em' 'Fm' 'Fism' 'Gm' 'Gism' 'Am' 'Bbm' 'Bm' 
 99 | 
100 |     for compressionIdx = 1:size(chromaMatrices,1)
101 |         chromaMatrixLog = chromaMatrices{compressionIdx};
102 |         
103 |         %% Do template matching
104 | 
105 |         chordIdxVec = zeros(1, size(chromaMatrixLog,2));
106 | 
107 |         for i = 1:size(chromaMatrixLog,2)
108 |             featureVec = chromaMatrixLog(:,i);
109 |             distanceVec = NaN(1,24);
110 | 
111 |             for j = 1:24
112 |                 distanceVec(j) = 1 - dot(featureVec,templateVec{j,1})/(norm(featureVec,2)*norm(templateVec{j,1},2));
113 |             end
114 | 
115 |             [~,chordIdxVec(i)] = min(distanceVec);
116 |         end
117 | 
118 |         %% Prepare chord labels
119 |         % read file
120 |         fid = fopen([chordsFolder fileName(1:end-3) 'lab']);
121 |         content = textscan(fid, '%f64 %f64 %s');
122 |         fclose(fid);
123 | 
124 |         % map all occuring chords to major/minor/none chords
125 |         contentMM = content;
126 |         for i = 1:size(content{3},1)
127 |             if i == 55
128 |                 a = 1;
129 |             end
130 |             curLabel = contentMM{3}{i};
131 |             k = strfind(curLabel,':');
132 |             if isempty(k) % major chord
133 |                 slash = strfind(curLabel,'/');
134 |                 if ~isempty(slash)
135 |                     contentMM{3}{i} = curLabel(1:slash-1);
136 |                 end
137 |                 
138 |                 switch contentMM{3}{i}
139 |                     case 'Eb'
140 |                         contentMM{3}{i} = 'D#';
141 |                     case 'Ab'
142 |                         contentMM{3}{i} = 'G#';
143 |                     case 'Gb'
144 |                         contentMM{3}{i} = 'F#';
145 |                     case 'A#'
146 |                         contentMM{3}{i} = 'B';
147 |                     case 'Db'
148 |                         contentMM{3}{i} = 'C#';    
149 |                 end
150 |             else
151 |                 newLabel = curLabel(1:k-1);
152 |                 switch newLabel
153 |                     case 'Eb'
154 |                         newLabel = 'D#';
155 |                     case 'Ab'
156 |                         newLabel = 'G#';
157 |                     case 'Gb'
158 |                         newLabel = 'F#';
159 |                     case 'A#'
160 |                         newLabel = 'B';
161 |                     case 'Db'
162 |                         newLabel = 'C#';
163 |                 end
164 | 
165 |                 app = curLabel(k+1:end);
166 |                 switch app
167 |                     case 'min'
168 |                         newLabel = strcat(newLabel, ':min');
169 |                     case 'dim'
170 |                         newLabel = strcat(newLabel, ':min');
171 |                     case 'aug'
172 |                         newLabel = strcat(newLabel, '');
173 |                     case 'maj7'
174 |                         newLabel = strcat(newLabel, '');   
175 |                     case '7'
176 |                         newLabel = strcat(newLabel, '');
177 |                     case 'maj(9)'
178 |                         newLabel = strcat(newLabel, '');    
179 |                     case 'aug(7)'
180 |                         newLabel = strcat(newLabel, '');    
181 |                     case 'min7'
182 |                         newLabel = strcat(newLabel, ':min');    
183 |                     case 'min(9)'
184 |                         newLabel = strcat(newLabel, ':min');    
185 |                     case 'dim(7)'
186 |                         newLabel = strcat(newLabel, ':min');    
187 |                     case 'hdim7'
188 |                         newLabel = strcat(newLabel, ':min');    
189 |                     case 'dim7'
190 |                         newLabel = strcat(newLabel, ':min');    
191 |                     otherwise
192 |                         newLabel = 'N';
193 |                 end
194 | 
195 |                 contentMM{3}{i} = newLabel;
196 |             end
197 |         end
198 | 
199 |         % assign chords to each window
200 |         labelIdxVec = NaN(size(chordIdxVec));
201 | 
202 |         for i = 1:length(labelIdxVec)
203 |             % calculate center of current window
204 |             featureRate = 10;%sideinfo.pitch.featureRate;
205 |             
206 |             fLen = fs/featureRate;
207 |             centerSample = (2*i-1)*fLen/2;
208 |             centerT = centerSample/fs;
209 | 
210 |             % find label index
211 |             if centerT > contentMM{2}(end)
212 |                 labelIdx = size(contentMM,1);
213 |             else
214 |                 labelIdx = find(contentMM{1} <= centerT & contentMM{2} > centerT);
215 |             end
216 | 
217 |             if isempty(labelIdx)
218 |                 error('Could not find entry to given time!');
219 |             end
220 | 
221 |             % translate chord label to chord index
222 |             curChordName = contentMM{3}{labelIdx};
223 |             switch curChordName
224 |                 case 'C'
225 |                     labelIdxVec(i) = 1;
226 |                 case 'C#'
227 |                     labelIdxVec(i) = 2;
228 |                 case 'D'
229 |                     labelIdxVec(i) = 3;
230 |                 case 'D#'
231 |                     labelIdxVec(i) = 4;
232 |                 case 'E'
233 |                     labelIdxVec(i) = 5;
234 |                 case 'F'
235 |                     labelIdxVec(i) = 6;
236 |                 case 'F#'
237 |                     labelIdxVec(i) = 7;
238 |                 case 'G'
239 |                     labelIdxVec(i) = 8;
240 |                 case 'G#'
241 |                     labelIdxVec(i) = 9;
242 |                 case 'A'
243 |                     labelIdxVec(i) = 10;
244 |                 case 'Bb'
245 |                     labelIdxVec(i) = 11;
246 |                 case 'B'
247 |                     labelIdxVec(i) = 12;
248 |                 case 'C:min'
249 |                     labelIdxVec(i) = 13;
250 |                 case 'C#:min'
251 |                     labelIdxVec(i) = 14;
252 |                 case 'D:min'
253 |                     labelIdxVec(i) = 15;
254 |                 case 'D#:min'
255 |                     labelIdxVec(i) = 16;
256 |                 case 'E:min'
257 |                     labelIdxVec(i) = 17;
258 |                 case 'F:min'
259 |                     labelIdxVec(i) = 18;
260 |                 case 'F#:min'
261 |                     labelIdxVec(i) = 19;
262 |                 case 'G:min'
263 |                     labelIdxVec(i) = 20;
264 |                 case 'G#:min'
265 |                     labelIdxVec(i) = 21;
266 |                 case 'A:min'
267 |                     labelIdxVec(i) = 22;
268 |                 case 'Bb:min'
269 |                     labelIdxVec(i) = 23;
270 |                 case 'B:min'
271 |                     labelIdxVec(i) = 24;
272 |                 case 'N'
273 |                     labelIdxVec(i) = 25;
274 |                 otherwise
275 |                     error('Unknown chord!');
276 |             end
277 |         end
278 | 
279 |         %% Evaluation
280 |         C = 0; %correct
281 |         FP = 0; %false positive
282 |         FN = 0; %false negative
283 | 
284 |         for i = 1:length(labelIdxVec)
285 |             curChordC = chordIdxVec(i); %computed chord
286 |             curChordR = labelIdxVec(i); %reference chord
287 | 
288 |             if curChordR == 25
289 |                 continue
290 |             end
291 | 
292 |             if curChordC == curChordR
293 |                 C = C+1;
294 |             else
295 |                 FN = FN+1;
296 |                 FP = FP+1;
297 |             end    
298 |         end
299 | 
300 |         P = C/(C+FP);
301 |         R = C/(C+FN);
302 |         F = 2*P*R/(P+R);
303 |         
304 |         if isnan(F)
305 |             F = 0;
306 |         end
307 |         
308 |         fMeasure(fileIdx,compressionIdx) = F;
309 |     end    
310 | end
311 | 
312 | %% Plot results
313 | F0 = sum(fMeasure(:,1))/size(fMeasure,1);
314 | F1 = sum(fMeasure(:,2))/size(fMeasure,1);
315 | F10 = sum(fMeasure(:,3))/size(fMeasure,1);
316 | F100 = sum(fMeasure(:,4))/size(fMeasure,1);
317 | F1000 = sum(fMeasure(:,5))/size(fMeasure,1);
318 | F10000 = sum(fMeasure(:,6))/size(fMeasure,1);
319 | 
320 | FVec = [F0 F1 F10 F100 F1000 F10000];
321 | 
322 | h = figure;
323 | p = plot(1:6,FVec,'-o','MarkerEdgeColor',[.7 .7 .7],'MarkerFaceColor',[.7 .7 .7],'LineWidth',3);
324 | set(p,'Color',[.7 .7 .7]);
325 | ylim([0.35 0.8]);
326 | xlim([0.5 6.5]);
327 | set(gca,'XTick',1:6);
328 | set(gca,'XTickLabel',{'CP' ; 'CLP[1]' ; 'CLP[10]' ; 'CLP[100]' ; 'CLP[1000]' ; 'CLP[10000]'});
329 | legend('Tb','Location','southeast')
330 | xlabel('Compression parameter \eta');
331 | ylabel('F-measure');
332 | 
333 | % save figure
334 | saveas(h,[outputFolder 'TbFigure4.fig']);
335 | saveas(h,[outputFolder 'TbFigure4.png']);
336 | 
337 | toc


--------------------------------------------------------------------------------
/Ssenyonga-Huang/.gitignore:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/Ssenyonga-Huang/code/.gitkeep:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/Ssenyonga-Huang/code/presentation_demo.m:
--------------------------------------------------------------------------------
 1 | clear; close all; clc;
 2 | work_folder = pwd;
 3 | cd ../../../
 4 | third_party = pwd;
 5 | cd(work_folder)
 6 | mkdir('demo_files')
 7 | set(0,'DefaultFigureWindowStyle','docked','DefaultAxesFontSize',14)
 8 | addpath(genpath([third_party,'/singingvoiceseparationrpca']));
 9 | addpath([third_party,'/MIR-1K/Wavfile']);
10 | parm.nFFT = 1024;
11 | parm.windowsize = 1024;
12 | parm.masktype = 1; %1: binary mask, 2: no mask
13 | parm.lambda = 1;
14 | parm.gain = 1;
15 | parm.power = 1;
16 | 
17 | %% Extract files and choose example
18 | files = dir([third_party,'/MIR-1K/Wavfile/*.wav']);
19 | k = randi(1000,1); % random song to use
20 | [wavinmix, fs] = audioread(files(k).name);
21 | parm.fs = fs;
22 | Music = wavinmix(:,1);
23 | Vocals = wavinmix(:,2);
24 | 
25 | % Mixing at different SNR
26 | 
27 | g_5 = rms(Vocals) / (10^0.5 * rms(Music));
28 | g_min5 = rms(Music) / (10^0.5 * rms(Vocals));
29 | 
30 | wavinmix5 = g_5*Music + Vocals; % 5dB
31 | wavinmix0 = Music + Vocals; % 0dB
32 | wavinmix_min5 = Music + g_min5*Vocals; % -5dB
33 | parm.outname = [work_folder, filesep, 'demo_files', filesep, files(k).name(1:end-4)];
34 | audiowrite([parm.outname, '_SNR5.wav'],wavinmix5,fs); % 5dB
35 | audiowrite([parm.outname, '_SNR0.wav'],wavinmix0,fs); % 0dB
36 | audiowrite([parm.outname, '_SNRmin5.wav'],wavinmix_min5,fs); % -5dB
37 | audiowrite([parm.outname, '_music.wav'],Music,fs);
38 | audiowrite([parm.outname, '_vocal.wav'],Vocals,fs);
39 | %% Run RPCA
40 | outputs5 = rpca_mask_execute(wavinmix5, parm);
41 | outputs0 = rpca_mask_execute(wavinmix0, parm);
42 | outputs_min5 = rpca_mask_execute(wavinmix_min5, parm);
43 | 
44 | audiowrite([parm.outname, '_sparse.wav'],outputs5.wavoutE,fs);
45 | audiowrite([parm.outname, '_lowrank.wav'],outputs5.wavoutA,fs);
46 | 
47 | %% plotting
48 | figure('name','Mixture matrix, M')
49 | stft(wavinmix5); % change this to show different spectograms, for different dB
50 | figure('name','Sparse matrix, S')
51 | stft(outputs5.wavoutE);
52 | figure('name','Low rank matrix, L')
53 | stft(outputs5.wavoutA);
54 | 
55 | 
56 | 
57 | 
58 | 


--------------------------------------------------------------------------------
/Ssenyonga-Huang/code/rpca_mask_script.m:
--------------------------------------------------------------------------------
  1 | % ---------------------------------------------------------------------------------------------------------------
  2 | % This is an edited and extended version of the example code for running the RPCA for source separation
  3 | % P.-S. Huang, S. D. Chen, P. Smaragdis, M. Hasegawa-Johnson,
  4 | % "Singing-Voice Separation From Monaural Recordings Using Robust Principal Component Analysis," in ICASSP 2012
  5 | %
  6 | % Original version written by Po-Sen Huang @ UIUC
  7 | % Edited and extended by Robin Ssenyonga
  8 | % For any questions on the original version, please email huang146@illinois.edu.
  9 | % For any questions on the extended version, please email robin.ssenyonga@fau.de.
 10 | 
 11 | % ---------------------------------------------------------------------------------------------------------------
 12 | %% addpath
 13 | clear; close all; clc;
 14 | work_folder = pwd;
 15 | cd ../../../
 16 | third_party = pwd;
 17 | cd(work_folder)
 18 | set(0,'DefaultFigureWindowStyle','docked','DefaultAxesFontSize',14)
 19 | addpath(genpath([third_party,'/singingvoiceseparationrpca']));
 20 | addpath([third_party,'/MIR-1K/Wavfile']);
 21 | parm.nFFT = 1024;
 22 | parm.windowsize = 1024;
 23 | parm.masktype = 1; %1: binary mask, 2: no mask
 24 | gains = [0.1, 0.5, 1, 1.5, 2];
 25 | lambda = [0.1, 0.5, 1, 1.5, 2, 2.5];
 26 | parm.power = 1;
 27 | 
 28 | total_no_lambda_5 = zeros(numel(lambda),3);
 29 | total_bin_lambda_5 = zeros(numel(lambda),3);
 30 | total_no_lambda_0 = zeros(numel(lambda),3);
 31 | total_bin_lambda_0 = zeros(numel(lambda),3);
 32 | total_no_lambda_min5 = zeros(numel(lambda),3);
 33 | total_bin_lambda_min5 = zeros(numel(lambda),3);
 34 | 
 35 | total_gain_5 = zeros(numel(gains),3);
 36 | total_gain_0 = zeros(numel(gains),3);
 37 | total_gain_min5 = zeros(numel(gains),3);
 38 | 
 39 | % Evaluating for different lambdas
 40 | 
 41 | no_lambda_5 = zeros(numel(lambda),3);
 42 | bin_lambda_5 = zeros(numel(lambda),3);
 43 | no_lambda_0 = zeros(numel(lambda),3);
 44 | bin_lambda_0 = zeros(numel(lambda),3);
 45 | no_lambda_min5 = zeros(numel(lambda),3);
 46 | bin_lambda_min5 = zeros(numel(lambda),3);
 47 | 
 48 | % Evaluating for different gains
 49 | 
 50 | gain_5 = zeros(numel(gains),3);
 51 | gain_0 = zeros(numel(gains),3);
 52 | gain_min5 = zeros(numel(gains),3);
 53 | 
 54 | % Collecting data for GNSDR
 55 | 
 56 | no_total_NSDR_5 = 0;
 57 | no_total_weight_5 = 0;
 58 | no_total_NSDR_0 = 0;
 59 | no_total_weight_0 = 0;
 60 | no_total_NSDR_min5 = 0;
 61 | no_total_weight_min5 = 0;
 62 | 
 63 | bin_total_NSDR_5 = 0;
 64 | bin_total_NSDR_0 = 0;
 65 | bin_total_NSDR_min5 = 0;
 66 | %% Extract files and process
 67 | N = 10;
 68 | files = dir([third_party,'/MIR-1K/Wavfile/*.wav']);
 69 | for k = 1:N
 70 |     [wavinmix, fs] = audioread(files(k).name);
 71 |     parm.fs = fs;
 72 |     Music = wavinmix(:,1);
 73 |     Vocals = wavinmix(:,2);
 74 |     
 75 |     % Mixing at different SNR
 76 |     
 77 |     g_5 = rms(Vocals) / (10^0.5 * rms(Music));
 78 |     g_min5 = rms(Music) / (10^0.5 * rms(Vocals));
 79 |     
 80 |     wavinmix5 = g_5*wavinmix(:,1) + wavinmix(:,2); % 5dB
 81 |     wavinmix0 = wavinmix(:,1) + wavinmix(:,2); % 0dB
 82 |     wavinmix_min5 = wavinmix(:,1) + g_min5*wavinmix(:,2); % -5dB
 83 |     parm.outname = [third_party, filesep, 'APSRR-2016/Ssenyonga-Huang/output', filesep, files(k).name(1:end-4)];
 84 |     
 85 |     %% Run RPCA for different lambdas
 86 |     parm.gain = 1;
 87 |     for i=1:numel(lambda)
 88 |         parm.masktype = 2; % 2: no mask
 89 |         parm.lambda = lambda(i);
 90 |         outputs5 = rpca_mask_execute(wavinmix5, parm);
 91 |         outputs0 = rpca_mask_execute(wavinmix0, parm);
 92 |         outputs_min5 = rpca_mask_execute(wavinmix_min5, parm);
 93 |         %% Run evaluation
 94 |         for ii=1:3
 95 |             if ii==1
 96 |                 % SDR,SIR,SAR computation
 97 |                 evaluation_results =rpca_mask_evaluation(Music, Vocals, outputs5);
 98 |                 no_lambda_5(i,:) = [evaluation_results.SDR, ...
 99 |                     evaluation_results.SAR, evaluation_results.SIR];
100 |                 if parm.lambda==1
101 |                     [s_target, e_interf, e_artif] = bss_decomp_gain(wavinmix5', 1, Vocals');
102 |                     [sdr_mixture, sir_mixture, sar_mixture] = bss_crit(s_target, e_interf, e_artif);
103 |                     %% NSDR = SDR(estimated voice, voice) - SDR(mixture, voice)
104 |                     no_total_weight_5 = no_total_weight_5 + numel(wavinmix5);
105 |                     no_total_NSDR_5 = no_total_NSDR_5 + numel(wavinmix5)*(evaluation_results.SDR - sdr_mixture);
106 |                 end
107 |             elseif ii==2
108 |                 % SDR,SIR,SAR computation
109 |                 evaluation_results =rpca_mask_evaluation(Music, Vocals, outputs0);
110 |                 no_lambda_0(i,:) = [evaluation_results.SDR, ...
111 |                     evaluation_results.SAR, evaluation_results.SIR];
112 |                 if parm.lambda==1
113 |                     [s_target, e_interf, e_artif] = bss_decomp_gain(wavinmix0', 1, Vocals');
114 |                     [sdr_mixture, sir_mixture, sar_mixture] = bss_crit(s_target, e_interf, e_artif);
115 |                     %% NSDR = SDR(estimated voice, voice) - SDR(mixture, voice)
116 |                     no_total_weight_0 = no_total_weight_0 + numel(wavinmix0);
117 |                     no_total_NSDR_0 = no_total_NSDR_0 + numel(wavinmix0)*(evaluation_results.SDR - sdr_mixture);
118 |                 end
119 |             else
120 |                 % SDR,SIR,SAR computation
121 |                 evaluation_results =rpca_mask_evaluation(Music, Vocals, outputs_min5);
122 |                 no_lambda_min5(i,:) = [evaluation_results.SDR, ...
123 |                     evaluation_results.SAR, evaluation_results.SIR];
124 |                 if parm.lambda==1
125 |                     [s_target, e_interf, e_artif] = bss_decomp_gain(wavinmix_min5', 1, Vocals');
126 |                     [sdr_mixture, sir_mixture, sar_mixture] = bss_crit(s_target, e_interf, e_artif);
127 |                     %% NSDR = SDR(estimated voice, voice) - SDR(mixture, voice)
128 |                     no_total_weight_min5 = no_total_weight_min5 + numel(wavinmix_min5);
129 |                     no_total_NSDR_min5 = no_total_NSDR_min5 + numel(wavinmix_min5)*(evaluation_results.SDR - sdr_mixture);
130 |                 end
131 |             end
132 |         end
133 |     end
134 |     
135 |     total_no_lambda_5 = total_no_lambda_5 + no_lambda_5;
136 |     total_no_lambda_0 = total_no_lambda_0 + no_lambda_0;
137 |     total_no_lambda_min5 = total_no_lambda_min5 + no_lambda_min5;
138 |     
139 |     
140 |     for i=1:numel(lambda)
141 |         parm.lambda = lambda(i);
142 |         parm.masktype = 1; %1: binary mask
143 |         outputs5 = rpca_mask_execute(wavinmix5, parm);
144 |         outputs0 = rpca_mask_execute(wavinmix0, parm);
145 |         outputs_min5 = rpca_mask_execute(wavinmix_min5, parm);
146 |         %% Run evaluation
147 |         for ii=1:3
148 |             if ii==1
149 |                 % SDR,SIR,SAR computation
150 |                 evaluation_results =rpca_mask_evaluation(Music, Vocals, outputs5);
151 |                 bin_lambda_5(i,:) = [evaluation_results.SDR, ...
152 |                     evaluation_results.SAR, evaluation_results.SIR];
153 |                 if parm.lambda==1
154 |                     [s_target, e_interf, e_artif] = bss_decomp_gain(wavinmix5', 1, Vocals');
155 |                     [sdr_mixture, sir_mixture, sar_mixture] = bss_crit(s_target, e_interf, e_artif);
156 |                     %% NSDR = SDR(estimated voice, voice) - SDR(mixture, voice)
157 |                     bin_total_NSDR_5 = bin_total_NSDR_5 + numel(wavinmix5)*(evaluation_results.SDR - sdr_mixture);
158 |                 end
159 |             elseif ii==2
160 |                 % SDR,SIR,SAR computation
161 |                 evaluation_results =rpca_mask_evaluation(Music, Vocals, outputs0);
162 |                 bin_lambda_0(i,:) = [evaluation_results.SDR, ...
163 |                     evaluation_results.SAR, evaluation_results.SIR];
164 |                 if parm.lambda==1
165 |                     [s_target, e_interf, e_artif] = bss_decomp_gain(wavinmix0', 1, Vocals');
166 |                     [sdr_mixture, sir_mixture, sar_mixture] = bss_crit(s_target, e_interf, e_artif);
167 |                     %% NSDR = SDR(estimated voice, voice) - SDR(mixture, voice)
168 |                     bin_total_NSDR_0 = bin_total_NSDR_0 + numel(wavinmix0)*(evaluation_results.SDR - sdr_mixture);
169 |                 end
170 |             else
171 |                 % SDR,SIR,SAR computation
172 |                 evaluation_results =rpca_mask_evaluation(Music, Vocals, outputs_min5);
173 |                 bin_lambda_min5(i,:) = [evaluation_results.SDR, ...
174 |                     evaluation_results.SAR, evaluation_results.SIR];
175 |                 if parm.lambda==1
176 |                     [s_target, e_interf, e_artif] = bss_decomp_gain(wavinmix_min5', 1, Vocals');
177 |                     [sdr_mixture, sir_mixture, sar_mixture] = bss_crit(s_target, e_interf, e_artif);
178 |                     %% NSDR = SDR(estimated voice, voice) - SDR(mixture, voice)
179 |                     bin_total_NSDR_min5 = bin_total_NSDR_min5 + numel(wavinmix_min5)*(evaluation_results.SDR - sdr_mixture);
180 |                 end
181 |             end
182 |         end
183 |     end
184 |     
185 |     total_bin_lambda_5 = total_bin_lambda_5 + bin_lambda_5;
186 |     total_bin_lambda_0 = total_bin_lambda_0 + bin_lambda_0;
187 |     total_bin_lambda_min5 = total_bin_lambda_min5 + bin_lambda_min5;
188 |     
189 |     
190 |     %% Run RPCA for different gains
191 |     parm.lambda = 1;
192 |     for i=1:numel(gains)
193 |         parm.gain = gains(i);
194 |         parm.masktype = 1; %1: binary mask, 2: no mask
195 |         outputs5 = rpca_mask_execute(wavinmix5, parm);
196 |         outputs0 = rpca_mask_execute(wavinmix0, parm);
197 |         outputs_min5 = rpca_mask_execute(wavinmix_min5, parm);
198 |         %% Run evaluation
199 |         for ii=1:3
200 |             if ii==1
201 |                 % SDR,SIR,SAR computation
202 |                 evaluation_results =rpca_mask_evaluation(Music, Vocals, outputs5);
203 |                 gain_5(i,:) = [evaluation_results.SDR, ...
204 |                     evaluation_results.SAR, evaluation_results.SIR];
205 |             elseif ii==2
206 |                 % SDR,SIR,SAR computation
207 |                 evaluation_results =rpca_mask_evaluation(Music, Vocals, outputs0);
208 |                 gain_0(i,:) = [evaluation_results.SDR, ...
209 |                     evaluation_results.SAR, evaluation_results.SIR];
210 |             else
211 |                 % SDR,SIR,SAR computation
212 |                 evaluation_results =rpca_mask_evaluation(Music, Vocals, outputs_min5);
213 |                 gain_min5(i,:) = [evaluation_results.SDR, ...
214 |                     evaluation_results.SAR, evaluation_results.SIR];
215 |             end
216 |         end
217 |     end
218 |     
219 |     total_gain_5 = total_gain_5 + gain_5;
220 |     total_gain_0 = total_gain_0 + gain_0;
221 |     total_gain_min5 = total_gain_min5 + gain_min5;
222 |     
223 | end
224 | 
225 | %% plotting
226 | 
227 | % bars  plots for lambda_k
228 | figure(1);
229 | subplot(321)
230 | bar(total_no_lambda_5/N)
231 | colormap(jet)
232 | ylabel('dB')
233 | set(gca,'XLim',[0 7],'YLim',[-15 35],'XTickLabel',lambda)
234 | title('no mask, SNR=5,gain=1','FontWeight','normal')
235 | legend('SDR','SAR','SIR','Location','BestOutside','Orientation','horizontal')
236 | legend HIDE
237 | subplot(322)
238 | bar(total_bin_lambda_5/N)
239 | ylabel('dB')
240 | set(gca,'XLim',[0 7],'YLim',[-15 35],'XTickLabel',lambda)
241 | title('binary mask, SNR=5,gain=1','FontWeight','normal')
242 | legend('SDR','SAR','SIR','Location','BestOutside','Orientation','horizontal')
243 | legend HIDE
244 | subplot(323)
245 | bar(total_no_lambda_0/N)
246 | ylabel('dB')
247 | set(gca,'XLim',[0 7],'YLim',[-15 35],'XTickLabel',lambda)
248 | title('no mask, SNR=0,gain=1','FontWeight','normal')
249 | legend('SDR','SAR','SIR','Location','BestOutside','Orientation','horizontal')
250 | legend HIDE
251 | subplot(324)
252 | bar(total_bin_lambda_0/N)
253 | ylabel('dB')
254 | set(gca,'XLim',[0 7],'YLim',[-15 35],'XTickLabel',lambda)
255 | title('binary mask, SNR=0,gain=1','FontWeight','normal')
256 | legend('SDR','SAR','SIR','Location','BestOutside','Orientation','horizontal')
257 | legend HIDE
258 | subplot(325)
259 | bar(total_no_lambda_min5/N)
260 | ylabel('dB')
261 | xlabel('k(of \lambda_k)')
262 | set(gca,'XLim',[0 7],'YLim',[-15 35],'XTickLabel',lambda)
263 | title('no mask, SNR=-5,gain=1','FontWeight','normal')
264 | legend('SDR','SAR','SIR','Location','BestOutside','Orientation','horizontal')
265 | legend HIDE
266 | subplot(326)
267 | bar(total_bin_lambda_min5/N)
268 | ylabel('dB')
269 | xlabel('k(of \lambda_k)')
270 | set(gca,'XLim',[0 7],'YLim',[-15 35],'XTickLabel',lambda)
271 | title('binary mask, SNR=-5,gain=1','FontWeight','normal')
272 | legend('SDR','SAR','SIR','Location','BestOutside','Orientation','horizontal')
273 | 
274 | 
275 | % bar  plots for gains
276 | 
277 | figure(2);
278 | subplot(311)
279 | bar(total_gain_5/N)
280 | ylabel('dB')
281 | colormap(jet)
282 | set(gca,'XLim',[0 6],'YLim',[-15 30],'XTickLabel',gains)
283 | title('binary mask, SNR=5,\lambda_1','FontWeight','normal')
284 | legend('SDR','SAR','SIR','Location','BestOutside','Orientation','horizontal')
285 | legend HIDE
286 | subplot(312)
287 | bar(total_gain_0/N)
288 | ylabel('dB')
289 | set(gca,'XLim',[0 6],'YLim',[-15 20],'XTickLabel',gains)
290 | title('binary mask, SNR=0,\lambda_1','FontWeight','normal')
291 | legend('SDR','SAR','SIR','Location','BestOutside','Orientation','horizontal')
292 | legend HIDE
293 | subplot(313)
294 | bar(total_gain_min5/N)
295 | ylabel('dB')
296 | xlabel('gain')
297 | set(gca,'XLim',[0 6],'YLim',[-15 10],'XTickLabel',gains)
298 | title('binary mask, SNR=-5,\lambda_1','FontWeight','normal')
299 | legend('SDR','SAR','SIR','Location','BestOutside','Orientation','horizontal')
300 | 
301 | 
302 | % bar  plots for GNSDR
303 | GNSDR = [-0.51, 0.52, bin_total_NSDR_min5/no_total_weight_min5, no_total_NSDR_min5/no_total_weight_min5, 5.82
304 |     0.91, 1.11, bin_total_NSDR_0/no_total_weight_0, no_total_NSDR_0/no_total_weight_0, 8.36
305 |     0.17, 1.10, bin_total_NSDR_5/no_total_weight_5, no_total_NSDR_5/no_total_weight_5, 10.62];
306 | figure(3);
307 | bar(GNSDR)
308 | colormap(jet)
309 | xlabel('Voice-to-Music Ratio(dB)')
310 | ylabel('GNSDR(dB)')
311 | set(gca,'XLim',[0 4],'YLim',[-1 14],'XTickLabel',[-5,0,5])
312 | legend('Hsu','Rafii','binary mask,\lambda_1,gain=1','no mask,\lambda_1,gain=1','ideal','Location','North','Orientation','horizontal')
313 | 
314 | hgexport(1,'../output/figures/lambda');
315 | hgexport(2,'../output/figures/gain');
316 | hgexport(3,'../output/figures/GNSDR');
317 | 


--------------------------------------------------------------------------------
/Ssenyonga-Huang/download.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | cd ../../
3 | git clone https://github.com/posenhuang/singingvoiceseparationrpca
4 | curl -O http://mirlab.org/dataset/public/MIR-1K.rar
5 | unrar x ./MIR-1K.rar
6 | 


--------------------------------------------------------------------------------
/Ssenyonga-Huang/output/.gitkeep:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/Ssenyonga-Huang/output/figures/.gitkeep:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/Ssenyonga-Huang/output/figures/fig3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/audiolabs/APSRR-2016/fb9cb5b2e2d18555808d24df7f64233cee5e5d36/Ssenyonga-Huang/output/figures/fig3.png


--------------------------------------------------------------------------------
/Ssenyonga-Huang/output/figures/fig4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/audiolabs/APSRR-2016/fb9cb5b2e2d18555808d24df7f64233cee5e5d36/Ssenyonga-Huang/output/figures/fig4.png


--------------------------------------------------------------------------------
/Ssenyonga-Huang/output/figures/fig5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/audiolabs/APSRR-2016/fb9cb5b2e2d18555808d24df7f64233cee5e5d36/Ssenyonga-Huang/output/figures/fig5.png


--------------------------------------------------------------------------------
/Ssenyonga-Huang/report.md:
--------------------------------------------------------------------------------
 1 | # Singing-Voice Separation From Monaural Recordings Using Robust Principal Component Analysis 
 2 | 
 3 | - Paper author: Po-Sen Huang, Scott Deeann Chen, Paris Smaragdis, Mark Hasegawa-Johnson
 4 | - Report author: Robin Ssenyonga
 5 | 
 6 | This is a report on the reproducibility of the above paper by Huang et al. by Robin Ssenyonga for the Audio Processing Seminar 2016/17. The aim of this project was to reproduce the evaluation results plots of the paper.
 7 | 
 8 | ## Installation
 9 | 
10 | ### Installation
11 | 
12 | Bash script was developed for OS X, so would be recommended.
13 | 
14 | Would also recommend at least MATLAB_R2012b mainly because of audioread.
15 | 
16 | Code was written using macOS Sierra installed with MATLAB_R2015b. The code was also run on a Windows 10 platform running MATLAB_R2016b to obtain the same results.
17 | 
18 | ### Download Dataset and preprocess data
19 | 
20 | Execute `download.sh`. Note that in case you do not have unrar, you can add the line installing unrar on your system. Refer to `download.sh`!
21 | 
22 | ### Process results
23 | 
24 | Run `rpca_mask_script.m` (assuming all included code and dataset are downloaded) which is included in the folder `code/`. 
25 | 
26 | The standalone code is provided for a varying dataset, e.g. 10 songs randomly chosen of 1000 song clips, for this the variable N has to be changed before running the script. Check that all files/folders are correctly placed within the working folder.
27 | 
28 | To use the whole dataset, run the script after changing N to 1000.
29 | 
30 | Execution time: This takes a long time, so it might need some code optimisation or use of the parallel computing toolbox.
31 | 
32 | Included is the `presentation_demo.m` script ( in `code/`) to generate audio files for a demo in a presentation.
33 | 
34 | ### Evaluation
35 | 
36 | The resulting figures (in `output/figures/`) are meant to be the ones on the last two pages of the paper.
37 | 
38 | ## Evaluation
39 | 
40 | During the evaluation of the paper *Singing-Voice Separation From Monaural Recordings Using Robust Principal Component Analysis* I came across the following issues:
41 | - I had an initial issue with automatic download of the dataset and codes using MATLAB so went for the manual option. This part has been resolved though.
42 | - The code uploaded was meant to be a demo so was lacking in many ways, I had to adapt the script and add many other components.
43 | - I was originally unfamiliar with the dataset so was confused about how to extract the different SNRs.
44 | - It is a really huge dataset and I have been running the code for the whole dataset for a long time.
45 | 
46 | ## Reproducible research study
47 | 
48 | The algorithms used were easily reproducible, as were the results needed for evaluation plotting.
49 | 
50 | I struggled more with using git and terminal than I did with the code.
51 | 
52 | ### Reproducibility of the algorithm
53 | - Is the algorithm described in sufficient detail? 1
54 | - Are exact parameter values given? 0.5
55 | - Is there a block diagram? 1
56 | - Is there a pseudocode? 0.5 ( demo script for one file, no plots)
57 | - Are there proofs for all the theorems? 1
58 | - Is the algorithm compared to other algorithms? 1
59 | 
60 | 
61 | ### Reproducibility of the code
62 | - Are implementation details (programming language, platform, compiler flags, etc.) given? 1
63 | - Is the code available online? 1
64 | 
65 | ### Reproducibility of the data
66 | - Is there an explanation of what the data represents? 1
67 | - Is the size of the data set acceptable? 1
68 | - Is the data set available online? 1
69 | 
70 | 
71 | ### My version
72 | **Reproducibility of algorithm:** Same as the original paper
73 | 
74 | **Reproducibility of Code:** It is better reproducible compared to the original paper
75 | 
76 | **Data:** More files for the demo ,i.e. user-defined compared to 2, with an option to use all files (1000), so better in that sense
77 | 
78 | **Degree of reproducibility:**
79 | 
80 | Before: **3** - The results can be reproduced by an independent researcher, requiring considerable effort. For the exact plots, this requires patience too since the dataset is very large.
81 | 
82 | After: **4** - The results can be easily reproduced by an independent researcher with at most 15 minutes of user effort, requiring some proprietary source packages, i.e. MATLAB.
83 | 


--------------------------------------------------------------------------------