├── .gitignore ├── .gitlab-ci.yml ├── Bergmann-Raffel ├── .gitignore ├── download.sh ├── download7digital.py ├── lists │ └── songlist.txt ├── plot.py ├── process.py ├── report.md └── requirements.txt ├── Hysneli-Rafii ├── code │ ├── .gitkeep │ ├── bss_eval.m │ ├── repet.m │ └── repetSem.m ├── dataset │ ├── .gitkeep │ └── download_install.bat ├── output │ ├── .gitkeep │ ├── Music-SDR evaluation.fig │ └── Voice-SDR evaluation.fig └── reportREPET.md ├── Instructions.md ├── Makefile ├── Pali-Boeck ├── .gitignore ├── code │ ├── .gitkeep │ ├── readme_code.md │ ├── runSuperFlux.py │ └── runSuperFlux_online.py ├── dataset │ ├── .gitkeep │ ├── modalexport │ ├── readFiles.py │ ├── readme_dataset.md │ └── wavFiles │ │ └── .gitkeep ├── download.bat ├── getwav.bat ├── output │ ├── .gitignore │ ├── annotations │ │ └── .gitkeep │ ├── evaluation.py │ ├── evaluation_online.py │ ├── outputEval.txt │ └── outputEvalOnline.txt ├── report.md ├── requirements.txt ├── run.bat └── run_online.bat ├── README.md ├── Reck-Font ├── Tempo estimation results.py ├── data_preprocessing.sh ├── download.sh ├── evaluation.sh ├── report.md ├── requirements.txt └── run_analysis.sh ├── Rosenzweig-Jiang ├── .gitignore ├── code │ ├── .gitkeep │ └── getFileNames.m ├── dataset │ └── .gitkeep ├── output │ ├── .gitkeep │ ├── TbFigure4.fig │ └── TbFigure4.png ├── report.md ├── step01_download.m └── step02_reproduceTbFigure4.m └── Ssenyonga-Huang ├── .gitignore ├── code ├── .gitkeep ├── presentation_demo.m └── rpca_mask_script.m ├── download.sh ├── output ├── .gitkeep └── figures │ ├── .gitkeep │ ├── fig3.png │ ├── fig4.png │ └── fig5.png └── report.md /.gitignore: -------------------------------------------------------------------------------- 1 | #####=== General Files ===##### 2 | 3 | *.zip 4 | *.gz 5 | *.7z 6 | *.tar 7 | *.tgz 8 | *.tar.gz 9 | *.rar 10 | *.wav 11 | *.mp3 12 | *.mp4 13 | *.mat 14 | *.h5 15 | *.hdf5 16 | 17 | #####=== OSX ===##### 18 | .DS_Store 19 | .AppleDouble 20 | .LSOverride 21 | 22 | # Icon must end with two \r 23 | Icon 24 | 25 | # Thumbnails 26 | ._* 27 | 28 | # Files that might appear in the root of a volume 29 | .DocumentRevisions-V100 30 | .fseventsd 31 | .Spotlight-V100 32 | .TemporaryItems 33 | .Trashes 34 | .VolumeIcon.icns 35 | 36 | # Directories potentially created on remote AFP share 37 | .AppleDB 38 | .AppleDesktop 39 | Network Trash Folder 40 | Temporary Items 41 | .apdisk 42 | 43 | 44 | #####=== Windows ===##### 45 | # Windows image file caches 46 | Thumbs.db 47 | ehthumbs.db 48 | 49 | # Folder config file 50 | Desktop.ini 51 | 52 | # Recycle Bin used on file shares 53 | $RECYCLE.BIN/ 54 | 55 | # Windows Installer files 56 | *.cab 57 | *.msi 58 | *.msm 59 | *.msp 60 | 61 | # Windows shortcuts 62 | *.lnk 63 | 64 | #####=== Linux ===##### 65 | *~ 66 | 67 | # KDE directory preferences 68 | .directory 69 | 70 | # Linux trash folder which might appear on any partition or disk 71 | .Trash-* 72 | 73 | 74 | #####=== C ===##### 75 | 76 | # Object files 77 | *.o 78 | *.ko 79 | *.obj 80 | *.elf 81 | 82 | # Precompiled Headers 83 | *.gch 84 | *.pch 85 | 86 | # Libraries 87 | *.lib 88 | *.a 89 | *.la 90 | *.lo 91 | 92 | # Shared objects (inc. Windows DLLs) 93 | *.dll 94 | *.so 95 | *.so.* 96 | *.dylib 97 | 98 | # Executables 99 | *.exe 100 | *.out 101 | *.app 102 | *.i*86 103 | *.x86_64 104 | *.hex 105 | 106 | # Debug files 107 | *.dSYM/ 108 | 109 | #####=== C++ ===##### 110 | 111 | # Compiled Object files 112 | *.slo 113 | *.lo 114 | *.o 115 | *.obj 116 | 117 | # Precompiled Headers 118 | *.gch 119 | *.pch 120 | 121 | # Compiled Dynamic libraries 122 | *.so 123 | *.dylib 124 | *.dll 125 | 126 | # Fortran module files 127 | *.mod 128 | 129 | # Compiled Static libraries 130 | *.lai 131 | *.la 132 | *.a 133 | *.lib 134 | 135 | # Executables 136 | *.exe 137 | *.out 138 | *.app 139 | 140 | #####=== Matlab ===##### 141 | ##--------------------------------------------------- 142 | ## Remove autosaves generated by the Matlab editor 143 | ## We have git for backups! 144 | ##--------------------------------------------------- 145 | 146 | # Windows default autosave extension 147 | *.asv 148 | 149 | # OSX / *nix default autosave extension 150 | *.m~ 151 | 152 | # Compiled MEX binaries (all platforms) 153 | *.mex* 154 | 155 | # Simulink Code Generation 156 | slprj/ 157 | 158 | #####=== Python ===##### 159 | 160 | # Byte-compiled / optimized / DLL files 161 | __pycache__/ 162 | *.py[cod] 163 | *$py.class 164 | 165 | # C extensions 166 | *.so 167 | 168 | # Distribution / packaging 169 | .Python 170 | env/ 171 | build/ 172 | develop-eggs/ 173 | dist/ 174 | downloads/ 175 | eggs/ 176 | .eggs/ 177 | lib/ 178 | lib64/ 179 | parts/ 180 | sdist/ 181 | var/ 182 | *.egg-info/ 183 | .installed.cfg 184 | *.egg 185 | 186 | # PyInstaller 187 | # Usually these files are written by a python script from a template 188 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 189 | *.manifest 190 | *.spec 191 | 192 | # Installer logs 193 | pip-log.txt 194 | pip-delete-this-directory.txt 195 | 196 | # Unit test / coverage reports 197 | htmlcov/ 198 | .tox/ 199 | .coverage 200 | .coverage.* 201 | .cache 202 | nosetests.xml 203 | coverage.xml 204 | *,cover 205 | 206 | # Translations 207 | *.mo 208 | *.pot 209 | 210 | # Django stuff: 211 | *.log 212 | 213 | # Sphinx documentation 214 | docs/_build/ 215 | 216 | # PyBuilder 217 | target/ 218 | *.lab 219 | *.m 220 | *.pdf 221 | -------------------------------------------------------------------------------- /.gitlab-ci.yml: -------------------------------------------------------------------------------- 1 | image: audiolabs/latex 2 | 3 | test: 4 | script: 5 | - apt-get update -yy 6 | - apt-get install -yy pandoc 7 | - make all 8 | artifacts: 9 | name: "$CI_PROJECT_NAME" 10 | paths: 11 | - "*.pdf" 12 | -------------------------------------------------------------------------------- /Bergmann-Raffel/.gitignore: -------------------------------------------------------------------------------- 1 | midi-dataset/ 2 | output/ 3 | data/ 4 | 5 | #####=== General Files ===##### 6 | 7 | *.zip 8 | *.gz 9 | *.7z 10 | *.tar 11 | *.tgz 12 | *.tar.gz 13 | *.rar 14 | *.wav 15 | *.mp3 16 | *.mp4 17 | *.mat 18 | *.h5 19 | *.hdf5 20 | 21 | 22 | 23 | #####=== OSX ===##### 24 | .DS_Store 25 | .AppleDouble 26 | .LSOverride 27 | 28 | # Icon must end with two \r 29 | Icon 30 | 31 | # Thumbnails 32 | ._* 33 | 34 | # Files that might appear in the root of a volume 35 | .DocumentRevisions-V100 36 | .fseventsd 37 | .Spotlight-V100 38 | .TemporaryItems 39 | .Trashes 40 | .VolumeIcon.icns 41 | 42 | # Directories potentially created on remote AFP share 43 | .AppleDB 44 | .AppleDesktop 45 | Network Trash Folder 46 | Temporary Items 47 | .apdisk 48 | 49 | 50 | #####=== Windows ===##### 51 | # Windows image file caches 52 | Thumbs.db 53 | ehthumbs.db 54 | 55 | # Folder config file 56 | Desktop.ini 57 | 58 | # Recycle Bin used on file shares 59 | $RECYCLE.BIN/ 60 | 61 | # Windows Installer files 62 | *.cab 63 | *.msi 64 | *.msm 65 | *.msp 66 | 67 | # Windows shortcuts 68 | *.lnk 69 | 70 | #####=== Linux ===##### 71 | *~ 72 | 73 | # KDE directory preferences 74 | .directory 75 | 76 | # Linux trash folder which might appear on any partition or disk 77 | .Trash-* 78 | 79 | 80 | #####=== C ===##### 81 | 82 | # Object files 83 | *.o 84 | *.ko 85 | *.obj 86 | *.elf 87 | 88 | # Precompiled Headers 89 | *.gch 90 | *.pch 91 | 92 | # Libraries 93 | *.lib 94 | *.a 95 | *.la 96 | *.lo 97 | 98 | # Shared objects (inc. Windows DLLs) 99 | *.dll 100 | *.so 101 | *.so.* 102 | *.dylib 103 | 104 | # Executables 105 | *.exe 106 | *.out 107 | *.app 108 | *.i*86 109 | *.x86_64 110 | *.hex 111 | 112 | # Debug files 113 | *.dSYM/ 114 | 115 | #####=== C++ ===##### 116 | 117 | # Compiled Object files 118 | *.slo 119 | *.lo 120 | *.o 121 | *.obj 122 | 123 | # Precompiled Headers 124 | *.gch 125 | *.pch 126 | 127 | # Compiled Dynamic libraries 128 | *.so 129 | *.dylib 130 | *.dll 131 | 132 | # Fortran module files 133 | *.mod 134 | 135 | # Compiled Static libraries 136 | *.lai 137 | *.la 138 | *.a 139 | *.lib 140 | 141 | # Executables 142 | *.exe 143 | *.out 144 | *.app 145 | 146 | #####=== Matlab ===##### 147 | ##--------------------------------------------------- 148 | ## Remove autosaves generated by the Matlab editor 149 | ## We have git for backups! 150 | ##--------------------------------------------------- 151 | 152 | # Windows default autosave extension 153 | *.asv 154 | 155 | # OSX / *nix default autosave extension 156 | *.m~ 157 | 158 | # Compiled MEX binaries (all platforms) 159 | *.mex* 160 | 161 | # Simulink Code Generation 162 | slprj/ 163 | 164 | #####=== Python ===##### 165 | 166 | # Byte-compiled / optimized / DLL files 167 | __pycache__/ 168 | *.py[cod] 169 | *$py.class 170 | 171 | # C extensions 172 | *.so 173 | 174 | # Distribution / packaging 175 | .Python 176 | env/ 177 | build/ 178 | develop-eggs/ 179 | dist/ 180 | downloads/ 181 | eggs/ 182 | .eggs/ 183 | lib/ 184 | lib64/ 185 | parts/ 186 | sdist/ 187 | var/ 188 | *.egg-info/ 189 | .installed.cfg 190 | *.egg 191 | 192 | # PyInstaller 193 | # Usually these files are written by a python script from a template 194 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 195 | *.manifest 196 | *.spec 197 | 198 | # Installer logs 199 | pip-log.txt 200 | pip-delete-this-directory.txt 201 | 202 | # Unit test / coverage reports 203 | htmlcov/ 204 | .tox/ 205 | .coverage 206 | .coverage.* 207 | .cache 208 | nosetests.xml 209 | coverage.xml 210 | *,cover 211 | 212 | # Translations 213 | *.mo 214 | *.pot 215 | 216 | # Django stuff: 217 | *.log 218 | 219 | # Sphinx documentation 220 | docs/_build/ 221 | 222 | # PyBuilder 223 | target/ 224 | -------------------------------------------------------------------------------- /Bergmann-Raffel/download.sh: -------------------------------------------------------------------------------- 1 | mkdir -p data/mp3 2 | mkdir -p data/mid 3 | 4 | wget "http://hog.ee.columbia.edu/craffel/lmd/clean_midi.tar.gz" -P "data/mid" 5 | 6 | cd data/mid 7 | 8 | tar --wildcards -xzf clean_midi.tar.gz "clean_midi/Michael Jackson/"* 9 | tar --wildcards -xzf clean_midi.tar.gz "clean_midi/AC DC/"* 10 | tar --wildcards -xzf clean_midi.tar.gz "clean_midi/Billy Idol/"* 11 | 12 | cd "clean_midi/Michael Jackson" 13 | rename "s/^/Michael Jackson /" * 14 | cd "../AC DC" 15 | rename "s/^/AC DC /" * 16 | cd "../Billy Idol" 17 | rename "s/^/Billy Idol /" * 18 | cd ../../.. 19 | 20 | mv "mid/clean_midi/Michael Jackson/"* mid 21 | mv "mid/clean_midi/AC DC/"* mid 22 | mv "mid/clean_midi/Billy Idol/"* mid 23 | 24 | rm mid/clean_midi.tar.gz 25 | rm mid/*.*.mid 26 | rm -r mid/clean_midi 27 | 28 | cd .. 29 | -------------------------------------------------------------------------------- /Bergmann-Raffel/download7digital.py: -------------------------------------------------------------------------------- 1 | import os 2 | import argparse 3 | import requests 4 | from requests_oauthlib import OAuth1 5 | 6 | 7 | def search_trackid(search, key, secret): 8 | url = 'https://api.7digital.com/1.2/track/search' 9 | auth = OAuth1(key, secret, signature_type='query') 10 | 11 | params = { 12 | 'q': search, 13 | 'country': 'ww', 14 | } 15 | 16 | headers = { 17 | 'accept': 'application/json', 18 | } 19 | 20 | try: 21 | r = requests.get(url, auth=auth, params=params, headers=headers) 22 | data = r.json() 23 | except ValueError: 24 | print r.content 25 | raise 26 | 27 | if data['status'] != 'ok': 28 | return None 29 | 30 | return data['searchResults']['searchResult'][0]['track']['id'] 31 | 32 | 33 | def get_file(trackid, handle, key, secret): 34 | url = "http://previews.7digital.com/clip/%s" % trackid 35 | auth = OAuth1(key, secret, signature_type='query') 36 | 37 | params = { 38 | 'country': 'ww', 39 | } 40 | 41 | r = requests.get(url, auth=auth, params=params, stream=True) 42 | 43 | if not r.ok: 44 | raise RuntimeError("Download of %s failed" % trackid) 45 | 46 | for block in r.iter_content(1024): 47 | handle.write(block) 48 | 49 | 50 | if __name__ == '__main__': 51 | parser = argparse.ArgumentParser(description='Download 7digital previews for a list of songs.') 52 | parser.add_argument( 53 | '--key', 54 | help='7digital key, optional', 55 | nargs='?', 56 | default=os.environ.get('DIGITAL7_KEY'), 57 | ) 58 | parser.add_argument( 59 | '--secret', 60 | help='7digital secret, optional', 61 | nargs='?', 62 | default=os.environ.get('DIGITAL7_SECRET'), 63 | ) 64 | 65 | args = parser.parse_args() 66 | with open("lists/songlist.txt", "r") as songlist: 67 | songs = songlist.read().splitlines() 68 | for song in songs: 69 | trackid = search_trackid(song, key=args.key, secret=args.secret) 70 | title = song.replace(" ", "_").lower() 71 | outfile = 'data/mp3/%s.mp3' % title 72 | with open(outfile, 'wb') as f: 73 | get_file(trackid, f, key=args.key, secret=args.secret) 74 | print outfile 75 | -------------------------------------------------------------------------------- /Bergmann-Raffel/lists/songlist.txt: -------------------------------------------------------------------------------- 1 | AC DC Back In Black 2 | AC DC Dirty Deeds Done Dirt Cheap 3 | AC DC For Those About To Rock (We Salute You) 4 | AC DC Highway To Hell 5 | AC DC Thunderstruck 6 | AC DC TNT 7 | AC DC Whole Lotta Rosie 8 | AC DC Who Made Who 9 | AC DC You Shook Me All Night Long 10 | Billy Idol Catch My Fall 11 | Billy Idol Dancing With Myself 12 | Billy Idol (Do Not) Stand in the Shadows 13 | Billy Idol Eyes Without A Face 14 | Billy Idol Hot in the City 15 | Billy Idol Mony Mony 16 | Billy Idol Rebel Yell 17 | Billy Idol White Wedding 18 | Michael Jackson Beat It 19 | Michael Jackson Black or White 20 | Michael Jackson Don't Stop 'Til You Get Enough 21 | Michael Jackson Heal The World 22 | Michael Jackson I Just Can't Stop Loving You 23 | Michael Jackson Liberian Girl 24 | Michael Jackson Man in the Mirror 25 | Michael Jackson Off the Wall 26 | Michael Jackson Remember the Time 27 | Michael Jackson Rock With You 28 | Michael Jackson She's Out Of My Life 29 | Michael Jackson Smooth Criminal 30 | Michael Jackson The Girl Is Mine 31 | Michael Jackson The Lady in My Life 32 | Michael Jackson The Way You Make Me Feel 33 | Michael Jackson Thriller 34 | Michael Jackson Wanna Be Startin' Somethin' 35 | 36 | -------------------------------------------------------------------------------- /Bergmann-Raffel/plot.py: -------------------------------------------------------------------------------- 1 | ''' 2 | plot cqts 3 | ''' 4 | 5 | import os 6 | import sys 7 | sys.path.append('midi-dataset') 8 | import deepdish 9 | import librosa 10 | import feature_extraction 11 | import matplotlib.pyplot as plt 12 | 13 | def plot_mp3_cqt(filename): 14 | sr = feature_extraction.AUDIO_FS 15 | # load mp3 cqt from h5 file 16 | cqt_filename = os.path.join('data', 'mp3cqts', '%s.h5' % filename) 17 | cqt = deepdish.io.load(cqt_filename) 18 | plt.figure(figsize=(8,5)) 19 | librosa.display.specshow(cqt, sr=sr, x_axis='frames', 20 | y_axis='cqt_note', fmin = librosa.core.midi_to_hz(feature_extraction.NOTE_START), 21 | hop_length = feature_extraction.AUDIO_HOP) 22 | plt.title('CQT of mp3 file for %s' % filename) 23 | plt.tight_layout() 24 | plt.savefig(os.path.join('output', '%s_mp3.png' % filename)) 25 | plt.clf() 26 | 27 | def plot_mid_cqt(filename): 28 | sr = feature_extraction.MIDI_FS 29 | # load mid cqt from h5 file 30 | cqt_filename = os.path.join('data', 'midcqts', '%s.h5' % filename) 31 | cqt = deepdish.io.load(cqt_filename) 32 | plt.figure(figsize=(25,5)) 33 | librosa.display.specshow(cqt, sr=sr, x_axis='frames', 34 | y_axis='cqt_note', fmin = librosa.core.midi_to_hz(feature_extraction.NOTE_START), 35 | hop_length = feature_extraction.MIDI_HOP) 36 | plt.title('CQT of midi file for %s' % filename) 37 | plt.tight_layout() 38 | plt.savefig(os.path.join('output', '%s_mid.png' % filename)) 39 | plt.clf() 40 | 41 | def plot_dist_matrix(filename): 42 | mat_filename = os.path.join('data', 'distance_matrices', '%s.h5' % filename) 43 | dist_mat = deepdish.io.load(mat_filename) 44 | plt.figure(figsize=(25,8)) 45 | plt.matshow(dist_mat) 46 | plt.title('CQT Distance Matrix %s' % filename) 47 | plt.savefig(os.path.join('output', '%s_distance.png' % filename)) 48 | plt.clf() 49 | 50 | 51 | if __name__ == '__main__': 52 | with open('lists/songlist.txt') as songlist: 53 | songs = songlist.read().splitlines() 54 | filenames = [song.replace(" ", "_").lower() for song in songs] 55 | for filename in filenames: 56 | plot_mp3_cqt(filename) 57 | for song in songs: 58 | plot_mid_cqt(song) 59 | for filename in filenames: 60 | plot_dist_matrix(filename) -------------------------------------------------------------------------------- /Bergmann-Raffel/process.py: -------------------------------------------------------------------------------- 1 | ''' 2 | create cqts for mp3 files 3 | ''' 4 | 5 | import os 6 | import sys 7 | sys.path.append("midi-dataset") 8 | import feature_extraction 9 | import pretty_midi 10 | import librosa 11 | import deepdish 12 | import scipy 13 | 14 | def process_mp3(filename): 15 | mp3_filename = os.path.join('data', 'mp3', filename) 16 | sr = feature_extraction.AUDIO_FS 17 | #load the mp3 file 18 | audio_data, _ = librosa.load(mp3_filename, sr=sr) 19 | cqt = feature_extraction.audio_cqt(audio_data) 20 | cqt = librosa.logamplitude(cqt, ref_power=cqt.max()) 21 | cqt = librosa.util.normalize(cqt, norm=2., axis=0) 22 | #create the output folder if it doesn't exist 23 | if not os.path.exists(os.path.join('data','mp3cqts')): 24 | os.makedirs(os.path.join('data','mp3cqts')) 25 | output_filename = os.path.join('data','mp3cqts',filename.replace('.mp3', '.h5')) 26 | deepdish.io.save(output_filename, cqt) 27 | return cqt 28 | 29 | def process_mid(filename): 30 | mid_filename = os.path.join('data', 'mid', filename) 31 | mid_data = pretty_midi.PrettyMIDI(mid_filename) 32 | cqt = feature_extraction.midi_cqt(mid_data) 33 | cqt = librosa.logamplitude(cqt, ref_power=cqt.max()) 34 | cqt = librosa.util.normalize(cqt, norm=2., axis=0) 35 | if not os.path.exists(os.path.join('data','midcqts')): 36 | os.makedirs(os.path.join('data','midcqts')) 37 | output_filename = os.path.join('data','midcqts',filename.replace('.mid', '.h5')) 38 | deepdish.io.save(output_filename, cqt) 39 | return cqt 40 | 41 | def distance_matrix(mp3cqt, midcqt, title): 42 | dist_matrix = scipy.spatial.distance.cdist(mp3cqt.T, midcqt.T, 'cosine') 43 | if not os.path.exists(os.path.join('data','distance_matrices')): 44 | os.makedirs(os.path.join('data','distance_matrices')) 45 | output_filename = os.path.join('data','distance_matrices', "%s.h5" % title) 46 | deepdish.io.save(output_filename, dist_matrix) 47 | 48 | 49 | if __name__ == '__main__': 50 | with open('lists/songlist.txt') as songlist: 51 | songs = songlist.read().splitlines() 52 | for song in songs: 53 | filename = song.replace(" ", "_").lower() 54 | mp3_cqt = process_mp3('%s.mp3' % filename) 55 | mid_cqt = process_mid('%s.mid' % song) 56 | distance_matrix(mp3_cqt, mid_cqt, filename) 57 | -------------------------------------------------------------------------------- /Bergmann-Raffel/report.md: -------------------------------------------------------------------------------- 1 | # "Large-Scale Content-Based Matching of MIDI and Audio Files" 2 | 3 | - Paper author: Colin Raffel 4 | - Report author: Michael Bergmann 5 | 6 | ## Usage guide 7 | 8 | ### Setup 9 | 10 | The required environment is described in the requirements.txt 11 | 12 | To setup the environment, install the given Software and run: 13 | 14 | pip install numpy 15 | pip install -r requirements.txt 16 | git clone https://github.com/bergmann-fau/midi-dataset 17 | 18 | ### Download Dataset and preprocess data 19 | 20 | Please apply for a [7Digital API key](https://api-signup.7digital.com/) and set it like: 21 | 22 | export DIGITAL7_KEY=yourkey 23 | export DIGITAL7_SECRET=yoursecret 24 | 25 | Run the following commands to download the dataset: 26 | 27 | ./download.sh 28 | python download7digital.py 29 | 30 | ### Process results 31 | 32 | To process the MIDI and mp3 files run: 33 | 34 | python process.py 35 | 36 | To create the plots run: 37 | 38 | python plot.py 39 | 40 | The plots are now placed in the output folder. For every Song there are 3 plots: One shows the Constant-Q-Transform of the audio from the mp3 file, one shows the CQT from the MIDI file and one shows the distance matrix for the two CQTs. 41 | 42 | 43 | ## Evaluation 44 | 45 | The Evaluation of the reproducability is based on "Reproducible Research in Signal Processing" by P. Vandewalle. Since only figures 2(a), 2(b) and 2(e) are reproduced in this work, only the required steps to create those figure were analyzed: 46 | - Acquisition of the dataset 47 | - Computation of Constant-Q-Transforms and distance matrizes 48 | - Plotting the CQTs and distance-matrizes 49 | Further steps like the hashing of the CQTs and training of the convolutional networks were not looked at. 50 | 51 | 52 | ### Dataset 53 | 54 | The acquisition of the dataset is the main problem in reproducing the results of the paper. For the evaluation a set of MIDI files is required, as well as audio files corresponding to the Million Song Database. 55 | The MIDI files are made available by the author online. 56 | The mp3 files have to be aquired from 7digital, an onlineshop for music. The [7digital API](http://docs.7digital.com/) allows the download of 30-60 second preview clips of their songs. 57 | 58 | The main problems: 59 | - The [code](https://github.com/tbertinmahieux/MSongsDB/blob/master/Tasks_Demos/Preview7digital/get_preview_url.py#L181) provided on the Million Song Database website to download preview clips from the 7digital API is outdated, as the API call does not utilize OAuth, which was added by 7digital and the API has to be accessed in another way. 60 | - The 7digital API only allows 4000 requests per day for free users which means for a million songs with 2 requests per song, downloading the dataset takes 500 days. 61 | 62 | 63 | ### Code 64 | 65 | The entire code for reproducing the end results of the paper is available online in a [github repository](https://github.com/craffel/midi-dataset). Sadly the code for generating the figures is not part of the repository. As a result some modifications and additions to the code were neccessary to aquire the plots. 66 | 67 | The main problems: 68 | - It is hard to extract the required data from the code since many code parts are not designed very flexible. For Example taken from [the authors repository](https://github.com/craffel/midi-dataset/blob/master/feature_extraction.py#L87) at line 87: 69 | 70 | def audio_cqt(audio_data): 71 | # Some computations 72 | return post_process_cqt(audio_gram) 73 | 74 | This processes the audio data and post-processes the resulting Constant-Q-Transform whenever audio_cqt is called. For the plotting of the CQT the signal before post-processing is required, therefore the following change was needed: 75 | 76 | def audio_cqt(audio_data): 77 | # Some computations 78 | return audio_gram 79 | 80 | and then calling the function like: 81 | 82 | post_process_cqt(audio_cqt(audio_data)) 83 | 84 | - In some scripts function calls are made on all the datasets, without checking if the dataset is available. This makes applying the given code to a smaller subset unneccessarily complicated. 85 | 86 | While working on the paper, an other issue, which might affect further analysis of the paper became apparent. The code is sometimes very specific towards the authors test environment. For Example: 87 | * Parallelization is done for a fixed number of processor cores. This complicates the reproduction of the results on a different setup. See [here](https://github.com/craffel/midi-dataset/blob/master/scripts/create_msd_cqts.py#L55). 88 | * The python code executes bash commands and can therefore only be executed on Unix Systems. See [here](https://github.com/craffel/midi-dataset/blob/master/feature_extraction.py#L46). 89 | 90 | 91 | ### Results 92 | 93 | Comparing Figure 2(a) and 2(b) from the Paper with the CQT plots for "Billy Idol - Dancing with myself" in the output folder one can see that the CQTs general form looks similar. The audio sample is taken from a different part of the song and the time axis is represented in frames not beats. This is due to the fact that in this work no beat information was retrieved. 94 | 95 | When comparing the CQT distance matrix for the same song with Figure 2(e) similar effects can be observed. The general form of the CQT looks similar, the axis are again scaled differently. 96 | 97 | While the results appear very similar to the paper, the differences show that a full reconstruction was not achieved, due to the aforementioned problems with the implementation. 98 | 99 | 100 | ### Conlusion 101 | 102 | > 1: The results cannot seem to be reproduced by an independent 103 | researcher. 104 | 105 | This is based on the fact that acquiring the dataset in a reasonable time is almost impossible using the given free tools. 106 | -------------------------------------------------------------------------------- /Bergmann-Raffel/requirements.txt: -------------------------------------------------------------------------------- 1 | # OS: Ubuntu 16.04.1 2 | # Python 2.7.12 3 | 4 | # Required software, install with apt-get 5 | # 6 | # fluidsynth 7 | # ffmpeg 8 | # python-tk 9 | 10 | # Required Python packages 11 | numpy 12 | scipy 13 | librosa==0.4.3 14 | pretty_midi 15 | deepdish 16 | msgpack-python 17 | msgpack-numpy 18 | requests 19 | requests-oauthlib 20 | -------------------------------------------------------------------------------- /Hysneli-Rafii/code/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/audiolabs/APSRR-2016/fb9cb5b2e2d18555808d24df7f64233cee5e5d36/Hysneli-Rafii/code/.gitkeep -------------------------------------------------------------------------------- /Hysneli-Rafii/code/bss_eval.m: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/audiolabs/APSRR-2016/fb9cb5b2e2d18555808d24df7f64233cee5e5d36/Hysneli-Rafii/code/bss_eval.m -------------------------------------------------------------------------------- /Hysneli-Rafii/code/repet.m: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/audiolabs/APSRR-2016/fb9cb5b2e2d18555808d24df7f64233cee5e5d36/Hysneli-Rafii/code/repet.m -------------------------------------------------------------------------------- /Hysneli-Rafii/code/repetSem.m: -------------------------------------------------------------------------------- 1 | % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 2 | % "Repeating Pattern Extraction Technique (REPET): a simple method for voice/source separation" 3 | % 4 | % Authors: Zafar Rafii & Bryan Pardo 5 | % Programmed by: Arjola Hysneli 6 | % Audio Processing Seminar WS 2016/2017 7 | % FAU Erlangen-Nuernberg/AudioLabs Erlangen 8 | % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% 9 | clear all; close all; clc 10 | 11 | 12 | % get paths to all files 13 | currentpath= pwd; 14 | cd ../ 15 | cd dataset 16 | srcFiles = dir('./MIR-1K/Wavfile/*.wav'); 17 | 18 | for i = 1 : size(srcFiles,1) 19 | filename{i} = strcat('./MIR-1K/Wavfile/',srcFiles(i).name); 20 | [soundCell{i},fs{i}] = audioread(filename{i}); 21 | end 22 | 23 | 24 | cd ../ 25 | cd code 26 | for i = 1 : size(srcFiles,1) 27 | %% Case 1: voice and music have same original level 28 | 29 | % Cell creation 30 | ce(i).Mtx= deal(soundCell{1,i}); %read all the sound names from the cell to a matrix(that is inside a struct) 31 | ce(i).sound_mix= ce(i).Mtx(:,1) +ce(i).Mtx(:,2); 32 | ce(i).soundEst = repet(ce(i).sound_mix,fs{i}); %take the repeating background using REPET 33 | ce(i).frgEst=ce(i).sound_mix-ce(i).soundEst; 34 | 35 | % Evaluation of parameters 36 | 37 | [SDR{i},SIR{i},SAR{i},perm{i}]=bss_eval( ce(i).soundEst',ce(i).Mtx(:,1)'); %using Bss_eval %background 38 | [SDRfrg{i},SIRfrg{i},SARfrg{i},permfrg{i}]=bss_eval( ce(i).frgEst',ce(i).Mtx(:,2)'); 39 | 40 | % Calculation of power 41 | 42 | pw(i).pw1= (1/length(ce(i).Mtx(:,1)))*sum(abs(ce(i).Mtx(:,1)).^2); %power of channel 1 43 | pw(i).pw2= (1/length(ce(i).Mtx(:,2)))*sum(abs(ce(i).Mtx(:,2)).^2); %power of channel 2 44 | pw(i).pw_dB=pow2db(ceil(pw(i).pw1/pw(i).pw2)); %power ratio 45 | 46 | if range(pw(i).pw_dB)==0 %check if all the values in the vector are the same(for every song) 47 | powVoMU =pw(1).pw_dB; %takes just one value 48 | end 49 | 50 | 51 | %% Case 2: Music is louder 52 | 53 | % Cell creation 54 | ceMu(i).Mtx= deal(soundCell{1,i}); 55 | ceMu(i).sound_IncMu=ceMu(i).Mtx(:,1) * 10^(5/20); %music channel increased with 5 db (v/m=-5) 56 | ceMu(i).sound_mix= ceMu(i).sound_IncMu +ceMu(i).Mtx(:,2); 57 | ceMu(i).soundEst = repet(ceMu(i).sound_mix,fs{i}); %take the repeating background 58 | ceMu(i).frgEst=ceMu(i).sound_mix-ceMu(i).soundEst; %the estimated foreground/voice 59 | 60 | % Evaluation of parameters 61 | [SDRmu{i},SIRmu{i},SARmu{i},permmu{i}]=bss_eval( ceMu(i).soundEst',ceMu(i).sound_IncMu'); %background 62 | [SDRmufrg{i},SIRmufrg{i},SARmufrg{i},permmufrg{i}]=bss_eval( ceMu(i).frgEst',ceMu(i).Mtx(:,2)'); 63 | 64 | % Calculation of power 65 | pwMu(i).pw2= (1/length(ceMu(i).Mtx(:,2)))*sum(abs(ceMu(i).Mtx(:,2)).^2); 66 | pwMu(i).pw1= (1/length(ceMu(i).sound_IncMu))*sum(abs(ceMu(i).sound_IncMu).^2); 67 | pwMu(i).pw_dB=pow2db(pwMu(i).pw2/ pwMu(i).pw1); 68 | 69 | if range(pwMu(i).pw_dB)==0 70 | music=pwMu(1).pw_dB; 71 | end 72 | 73 | %% Case 2: Voice is louder(v/m=5) 74 | 75 | % Cell creation 76 | ceVo(i).Mtx= deal(soundCell{1,i}); 77 | ceVo(i).sound_IncVo=ceVo(i).Mtx(:,2) * 10^(5/20); %Voice channel increased with 5 db (v/m=5) 78 | ceVo(i).sound_mix= ceVo(i).sound_IncVo +ceVo(i).Mtx(:,1); 79 | ceVo(i).soundEst = repet(ceVo(i).sound_mix,fs{i}); %take the repeating background 80 | ceVo(i).frgEst=ceVo(i).sound_mix-ceVo(i).soundEst; %the estimated foreground/voice 81 | 82 | % Evaluation of parameters 83 | [SDRvo{i},SIRvo{i},SARvo{i},permvo{i}]= bss_eval( ceVo(i).soundEst',ceVo(i).sound_IncVo'); %background 84 | [SDRvofrg{i},SIRvofrg{i},SARvofrg{i},permvofrg{i}]= bss_eval(ceMu(i).frgEst',ceMu(i).Mtx(:,2)'); 85 | 86 | % Calculation of power 87 | pwVo(i).pw1= (1/length(ceVo(i).Mtx(:,1)))*sum(abs(ceVo(i).Mtx(:,1)).^2); 88 | pwVo(i).pw2= (1/length(ceVo(i).sound_IncVo))*sum(abs(ceVo(i).sound_IncVo).^2); 89 | pwVo(i).pw=pwVo(i).pw2/ pwVo(i).pw1; 90 | pwVo(i).pw_dB=pow2db(pwVo(i).pw); 91 | 92 | if range(pwVo(i).pw_dB)==0 93 | voice=pwVo(1).pw_dB; 94 | end 95 | 96 | end 97 | 98 | SDRcel=[SDR ;SDRmu ;SDRvo]; %concatinate all teh SDR taken from the 3 cases above 99 | SDRcelfrg=[SDRfrg; SDRmufrg; SDRvofrg]; 100 | SDRmtx=cell2mat(SDRcel); %converts the cell structure to matrix 101 | SDRmtxfrg=cell2mat(SDRcelfrg); 102 | powVec= [voice powVoMu music]; % power vector 103 | 104 | 105 | % ploting 106 | figure (1) 107 | fig1=boxplot(SDRmtx',powVec); 108 | xlabel('Power ratios in dB'); 109 | ylabel('SDR evaluation/Music case') 110 | title('Music-SDR evaluation') 111 | 112 | 113 | figure (2) 114 | fig2=boxplot(SDRmtxfrg',powVec); 115 | xlabel('Power ratios in dB'); 116 | ylabel('SDR evaluation/Voice case') 117 | title('Voice-SDR evaluation') 118 | 119 | 120 | % saving the figures 121 | hgsave('Music-SDR evaluation.fig'); 122 | hgsave('Voice-SDR evaluation.fig'); 123 | 124 | -------------------------------------------------------------------------------- /Hysneli-Rafii/dataset/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/audiolabs/APSRR-2016/fb9cb5b2e2d18555808d24df7f64233cee5e5d36/Hysneli-Rafii/dataset/.gitkeep -------------------------------------------------------------------------------- /Hysneli-Rafii/dataset/download_install.bat: -------------------------------------------------------------------------------- 1 | set "mypath=%cd%" 2 | curl -O http://mirlab.org/dataset/public/MIR-1K.rar 3 | curl -O https://www.netzmechanik.de/dl/4/winrar-x64-540d.exe 4 | %cd%\winrar-x64-540d.exe 5 | cd %PROGRAMFILES% 6 | "%cd%\WinRAR\UnRAR.exe" x %mypath%\MIR-1K.rar %mypath% -------------------------------------------------------------------------------- /Hysneli-Rafii/output/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/audiolabs/APSRR-2016/fb9cb5b2e2d18555808d24df7f64233cee5e5d36/Hysneli-Rafii/output/.gitkeep -------------------------------------------------------------------------------- /Hysneli-Rafii/output/Music-SDR evaluation.fig: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/audiolabs/APSRR-2016/fb9cb5b2e2d18555808d24df7f64233cee5e5d36/Hysneli-Rafii/output/Music-SDR evaluation.fig -------------------------------------------------------------------------------- /Hysneli-Rafii/output/Voice-SDR evaluation.fig: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/audiolabs/APSRR-2016/fb9cb5b2e2d18555808d24df7f64233cee5e5d36/Hysneli-Rafii/output/Voice-SDR evaluation.fig -------------------------------------------------------------------------------- /Hysneli-Rafii/reportREPET.md: -------------------------------------------------------------------------------- 1 | # Title "Repeating Pattern Extraction Technique (REPET): a simple method for voice/source separation" 2 | 3 | - Paper author: Zafar Rafii, Bryan Pardo 4 | - Report author: Arjola Hysneli 5 | 6 | ## Requirements 7 | Recommended: 8 | * MATLAB 2015a 9 | * Windows 7 10 | 11 | ## Download tools/dataset and process results 12 | 13 | 1.Execute 'download_install.bat' 14 | 15 | * The file is found in the 'dataset' folder. 16 | * The script downloads WinRAR(used to extract the dataset needed) and extracts the audio files used (MIR-1K set). 17 | 18 | 2.Execute repetSem.m 19 | 20 | * The REPET method is used to separate the repeating background(music) from a non-repeating foreground. 21 | * Further 'bss_eval' toolbox is used to measure the performance in source separation. 22 | * This will reproduce the SDR values for three voice-to-music ratios(-5,0,5). 23 | 24 | 25 | 26 | ## Reproducional research study 27 | Reference on: "Reproducible Research in Signal Processing - What, why, and how" by Vandewalle et al. (https://infoscience.epfl.ch/record/136640/files/VandewalleKV09.pdf). 28 | According to the evaluation steps considered in the paper, in the following part the reproduction level of 'REPET, simple method for sound/music separation' is explained. 29 | 30 | ### Reproducibility of the algorithm 31 | - Is the algorithm described in sufficient detail? 32 | - 1 Point: Yes 33 | - Are exact parameter values given? 34 | - 1 Point: Yes(The voice-to-music mixing ratio) 35 | - Is there a block diagram? 36 | - 0 Points: No 37 | - Is there a pseudocode? 38 | - 0 Points: No 39 | - Are there proofs for all the theorems? 40 | - 0 Point: There is no theorem to be proved,but every step is explained mathematically in details. 41 | - Is the algorithm compared to other algorithms? 42 | - 1 Points: Yes, it is compared to 1. Hsu et al. 43 | 2. Durrieu et al. 44 | 3. Durrieu+ Highpass filtering 45 | 4. REPET + highpass filtering 46 | 47 | ### Reproducibility of the code 48 | - Are implementation details (programming language, platform, compiler flags, etc.) given? 49 | - 1 Points: programming language: MATLAB 50 | - Is the code available online? 51 | - 0.5 Points: There is no code available for the reproduction of the figure (just the REPET code and the evaluation toolbox) 52 | 53 | 54 | ### Reproducibility of the data 55 | - Is there an explanation of what the data represents? 56 | - 1 Point: Yes (also the results of the comparison with the different methods are given) 57 | - Is the size of the data set acceptable? 58 | - 0.5 Point: In my opinion it is too big (1000 songs) 59 | - Is the data set available online? 60 | - 1 Points: Yes, it is easy to access (MIR-1K)! 61 | 62 | ### Summary 63 | 64 | **General remarks on the reproductivity:** 65 | - By excuting bss_eval as an output SDR is generated. In paper, when using the competetive method 1 the results are given in terms of GNSDR --> means can't reproduce Fig.2 66 | - The dataset used is in .rar format. (Since matlab toolbox doesn't contain the unrar function, it is difficult create the download.m file to automatially uncompress the sounds used. ) 67 | - The codes of the competetive methods used are not provided. 68 | - Boxplot command used to plot the graphs is not available without the Statistics and Machine Learning Toolbox 69 | - Both codes(REPET & bss_eval) provided were reproducable (but since the number of songs used is high, it requires more time to execute the codes) 70 | 71 | 72 | 73 | **Degree of reproducability:** 74 | 75 | **3** - The results can be reproduced by an independent researcher, requiring considerable effort. 76 | -------------------------------------------------------------------------------- /Instructions.md: -------------------------------------------------------------------------------- 1 | # Reproducible Audio Research Seminar 2016 2 | 3 | ## Paper Selection 4 | 5 | * [List of possible papers](https://github.com/faroit/reproducible-audio-research) 6 | 7 | ## Deliverables 8 | 9 | To successfully pass the seminar we split the seminar sub tasks and several small deliverables: 10 | 11 | ### 1. Get familiar with reproducible research 12 | 13 | [Read this paper about reproducible research in Signal Processing](https://infoscience.epfl.ch/record/136640). This will give you an introduction why reproducibility is important. Also you will be introduced into what are papers with a high degree of reproducibility. When you reproduce your paper you will be asked to judge on the papers reproducibility: 14 | 15 | ##### Six Degrees of Reproducibility 16 | 1. Cannot be reproduced. 17 | 2. Cannot seem to be reproduced. 18 | 3. Could be reproduced, requiring extreme effort. 19 | 4. Can be reproduced, requiring considerable effort. 20 | 5. Can be easily reproduced with at most 15 minutes of user effort, requiring some proprietary source packages. 21 | 6. Can be easily reproduced with at most 15 min of user effort, requiring only standard, freely available tools 22 | 23 | 24 | ### 2. Reproduce the paper and submit documentation via [pull request](https://help.github.com/articles/about-pull-requests/) 25 | 26 | * **Deadline for submitting a pull request**: 14.12.2016 27 | * **Final Deadline (updates of PRs are allowed)**: 21.12.2016 28 | * We will merge all pull requests on 21.12.2016. Late submissions are not possible. 29 | 30 | After you get appointed to a paper, read the paper carefully and get back to the supervisors for questions about the technical content of the paper. When you are familiar with the topic, you can start with the actual reproducibility report. To make it easer for you and for us, we would encourage you to split this task into several subtask where each is (ideally) self-contained. 31 | 32 | Each of these sub-tasks should therefore itself be submitted in reproducible manner. This means that your contributions should contain documentation and code how to easily reproduce the papers results in a short amount of time. 33 | 34 | ### 2.1. Document and set up computing environment 35 | 36 | Regardless of the programming languages and operating systems you are going to use to reproduce the paper, it is important to document your machine setup and thus allow other people to recreate your environment. 37 | 38 | E.g. if you use python: 39 | 40 | * State the version of python that you have used 41 | * State your package requirements (create [`requirements.txt`](https://pip.pypa.io/en/stable/user_guide/#requirements-files) or [`environment.yml`](http://conda.pydata.org/docs/using/envs.html#share-an-environment) if you use [Anaconda](https://anaconda.org)). 42 | 43 | E.g. if you use Matlab, make sure that you document the [include paths](https://de.mathworks.com/help/matlab/matlab_env/what-is-the-matlab-search-path.html). 44 | 45 | Your environment may change during development, e.g. when you install new packages. This is normal and should be reflected in `requirements.txt`. 46 | 47 | ### 2.2. Aggregate the dataset 48 | 49 | In most cases the methods will be evaluated on publicly available data. These datasets are downloadable from the internet. Since only few methods actually deal with raw audio, this sub-task should make sure that the data is read an preprocessed in manner that allows the actually algorithm to easily process the data. The output of this sub-task could be a script that 50 | 51 | * **automatically fetches** the datasets using tools like `wget` ,`cURL` 52 | * **preprocesses** the data. This could include: file operations like renaming, unzipping. 53 | * **parsing Metadata** 54 | * **computes features** 55 | * **writes out** the dataset in a format that allows for fast reading and writing (`hdf5`, `mat`, ...) 56 | 57 | ### 2.3. Process the dataset 58 | 59 | After you have preprocessed the data we want to process the data by using the methods described in the paper. 60 | 61 | The output of this sub-task could be a script that: 62 | 63 | * **loads** the pre-processed data 64 | * **applies the methods/algorithms** described the paper on the loaded dataset 65 | * **writes out** the results so that they can easily evaluated in the next step. Be aware to write out relevant metadata as well as the processed audio data, depending on paper method. 66 | 67 | ### 2.4. Evaluate the processed data 68 | 69 | This subtask is the most important one. The goal should be to reproduce the main results figure of the paper being reproduced. To archive this goal: 70 | 71 | * **load the processed data and/or metadata** 72 | * Compute the evaluation metrics as described in the paper 73 | * Write out the aggregated metrics/scores 74 | * Plot the results 75 | 76 | ### 3. Presentation 77 | 78 | The presentations should consists of three parts: 79 | 80 | 1. Describe the paper and give a short outline of the research field, including the possible applications. 81 | 2. Describe the methods you have used to reproduce the paper. 82 | 3. Evaluate the degree of reproducibility. 83 | 84 | ## Submitting Seminar Deliverables 85 | 86 | For the reproducible research seminar we are handling deliverables through [GitHub](https://github.com/) (which means it will be necessary that you all have an account) If you don't have experience with git/GitHub, don't worry, you can do everything from your browser; see below for a tutorial. 87 | If you are comfortable with using git, please follow these steps to create a submission: 88 | 89 | 1. Create a fork of this repository by clicking on the "Fork" button at the top of this page. 90 | 2. In your fork, add content to the deliverables folder by copying the template from the template directory and rename it to match your paper and name. The folder should be named `(Your-last-name)-(last-name-of-first-author-of-paper-being-reproduced)` 91 | 3. Create a pull request to merge your fork. Make the title of the pull request to the subtask you want to propose. 92 | 4. We will review the pull request (be aware that we will only review deliverables that were pushed before the deadline; You can make update revisions of your deliverable before the deadline by updating your pull request. 93 | 94 | Even if you have no experience with git or the command line you can upload your submissions through the github website (look for the upload button). If you have any issues, please don't hesitate to email us. 95 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: all 2 | 3 | all: Bergmann-Raffel.pdf Hysneli-Rafii.pdf Pali-Boeck.pdf Reck-Font.pdf Rosenzweig-Jiang.pdf Ssenyonga-Huang.pdf 4 | 5 | %.pdf: %/report.md 6 | pandoc $(basename $@)/report.md -V geometry:margin=1in -o $@ 7 | 8 | Hysneli-Rafii.pdf: Hysneli-Rafii/reportREPET.md 9 | pandoc $(basename $@)/reportREPET.md -V geometry:margin=1in -o $@ 10 | -------------------------------------------------------------------------------- /Pali-Boeck/.gitignore: -------------------------------------------------------------------------------- 1 | wavFiles/* 2 | !wavFiles/.gitkeep 3 | annotations/* 4 | !annotations/.gitkeep 5 | dataset/ 6 | onset_db/ 7 | code/SuperFlux/ -------------------------------------------------------------------------------- /Pali-Boeck/code/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/audiolabs/APSRR-2016/fb9cb5b2e2d18555808d24df7f64233cee5e5d36/Pali-Boeck/code/.gitkeep -------------------------------------------------------------------------------- /Pali-Boeck/code/readme_code.md: -------------------------------------------------------------------------------- 1 | ##Short explanation of runSuperFlux.py file 2 | 3 | ##### This script is needed to run the SuperFlux.py (main script of the paper) 4 | 5 | ##### As prerequisites: 6 | - the .wav files must be saved to /dataset/wavFiles (instructions on dataset folder) 7 | 8 | - readFiles.py must be executed (as described on report.md) 9 | 10 | ##### After the SuperFlux is executed -> the onsets for each .wav file is saved on output/resultOnsets folder 11 | ##### Each output file has the extension: .superflux.txt -------------------------------------------------------------------------------- /Pali-Boeck/code/runSuperFlux.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Sun Dec 11 13:09:18 2016 4 | 5 | @author: OrnelaP 6 | """ 7 | 8 | """This script executes SuperFlux.py and saves the onset files to a new folder""" 9 | 10 | import os 11 | import os.path 12 | import shutil 13 | from subprocess import call 14 | 15 | my_path = os.path.abspath(os.path.join(os.pardir, 'Pali-Boeck')) 16 | 17 | newPath = os.path.join(my_path, 'output/resultOnsets') 18 | 19 | if not os.path.exists(newPath): 20 | os.makedirs(newPath) 21 | 22 | readPath = os.path.join(my_path, 'dataset/fileList.txt') 23 | filesPath = os.path.join(my_path, 'dataset/wavFiles') 24 | 25 | wavFiles = open(readPath, 'r') 26 | 27 | inFlux = wavFiles.read().split() 28 | 29 | for i in inFlux: 30 | call('python code/SuperFlux/SuperFlux.py -s ' + os.path.join(filesPath, i), shell = True) 31 | 32 | wavFiles.close() 33 | 34 | files = os.listdir(filesPath) 35 | for f in files: 36 | if f.endswith(".act", 0, len(f)): 37 | #print(f) 38 | os.remove(os.path.join(filesPath, f)) 39 | if f.endswith(".superflux.txt", 0, len(f)): 40 | #print(f) 41 | shutil.move((os.path.join(filesPath, f)), newPath) -------------------------------------------------------------------------------- /Pali-Boeck/code/runSuperFlux_online.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Sun Dec 11 13:09:18 2016 4 | 5 | @author: OrnelaP 6 | """ 7 | 8 | """This script executes SuperFlux.py and saves the onset files to a new folder""" 9 | 10 | import os 11 | import os.path 12 | import shutil 13 | from subprocess import call 14 | 15 | my_path = os.path.abspath(os.path.join(os.pardir, 'Pali-Boeck')) 16 | 17 | newPath = os.path.join(my_path, 'output/resultOnsetsOnline') 18 | 19 | if not os.path.exists(newPath): 20 | os.makedirs(newPath) 21 | 22 | readPath = os.path.join(my_path, 'dataset/fileList.txt') 23 | filesPath = os.path.join(my_path, 'dataset/wavFiles') 24 | 25 | wavFiles = open(readPath, 'r') 26 | 27 | inFlux = wavFiles.read().split() 28 | 29 | for i in inFlux: 30 | call('python code/SuperFlux/SuperFlux.py -s --online ' + os.path.join(filesPath, i), shell = True) 31 | 32 | wavFiles.close() 33 | 34 | files = os.listdir(filesPath) 35 | for f in files: 36 | if f.endswith(".act", 0, len(f)): 37 | #print(f) 38 | os.remove(os.path.join(filesPath, f)) 39 | if f.endswith(".superflux.txt", 0, len(f)): 40 | #print(f) 41 | shutil.move((os.path.join(filesPath, f)), newPath) -------------------------------------------------------------------------------- /Pali-Boeck/dataset/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/audiolabs/APSRR-2016/fb9cb5b2e2d18555808d24df7f64233cee5e5d36/Pali-Boeck/dataset/.gitkeep -------------------------------------------------------------------------------- /Pali-Boeck/dataset/modalexport: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | ''' 4 | Copy audio files from a modal database to a given output directory. 5 | 6 | An optional audio file name may be given in order to only extract a single 7 | audio file. If omitted, all files will be extracted. 8 | 9 | For each audio file, the metadata will also be extracted to a YAML file in 10 | the same directory, called file_name.yaml. 11 | ''' 12 | import sys 13 | import os 14 | import h5py 15 | import yaml 16 | from scipy.io.wavfile import write 17 | import numpy as np 18 | 19 | 20 | def _extract_file(audio_db, output_dir, file_name): 21 | sampling_rate = int(audio_db[file_name].attrs['sampling_rate']) 22 | 23 | audio = np.asarray(np.array(audio_db[file_name]) * 32768.0, np.int16) 24 | write(os.path.join(output_dir, file_name), sampling_rate, audio) 25 | 26 | metadata = dict(audio_db[file_name].attrs) 27 | 28 | # convert numpy types as the yaml module doesn't know how to serialise them 29 | for item in metadata: 30 | if type(metadata[item]) == np.ndarray: 31 | if issubclass(metadata[item][0].__class__, int): 32 | metadata[item] = [int(i) for i in metadata[item]] 33 | elif issubclass(metadata[item][0].__class__, float): 34 | metadata[item] = [float(i) for i in metadata[item]] 35 | elif issubclass(metadata[item][0].__class__, np.string_): 36 | metadata[item] = [str(i) for i in metadata[item]] 37 | elif issubclass(metadata[item].__class__, int): 38 | metadata[item] = int(metadata[item]) 39 | elif issubclass(metadata[item].__class__, float): 40 | metadata[item] = float(metadata[item]) 41 | elif issubclass(metadata[item].__class__, np.string_): 42 | metadata[item] = str(metadata[item]) 43 | 44 | #metadata_file = os.path.join(output_dir, file_name + '.yaml') 45 | #with open(metadata_file, 'w') as f: 46 | #f.write(yaml.safe_dump(metadata)) 47 | 48 | if __name__ == '__main__': 49 | usage = 'Usage: modalexport [