├── .dockerignore
├── .gitignore
├── .pre-commit-config.yaml
├── LICENSE
├── README.md
├── docker
    ├── Dockerfile
    ├── build.sh
    ├── env.sh
    ├── kill.sh
    ├── notebook.sh
    ├── run.sh
    └── shell.sh
├── jukemir
    ├── __init__.py
    ├── assets.py
    ├── assets
    │   ├── debug.json
    │   ├── emomusic.json
    │   ├── giantsteps.json
    │   ├── gtzan.json
    │   ├── magnatagatune.json
    │   └── precomputed.json
    ├── datasets
    │   ├── __init__.py
    │   ├── cache.py
    │   └── test.py
    ├── probe
    │   ├── __init__.py
    │   ├── aggregate.py
    │   ├── execute.py
    │   └── test.py
    ├── utils.py
    └── utils_test.py
├── metadata
    ├── emomusic.json.gz
    ├── giantsteps_clips.json.gz
    ├── gtzan_ff.json.gz
    └── magnatagatune.json.gz
├── pyproject.toml
├── representations
    ├── build.sh
    ├── choi.dockerfile
    ├── choi
    │   └── main.py
    ├── chroma.dockerfile
    ├── clmr.dockerfile
    ├── clmr
    │   └── main.py
    ├── env.sh
    ├── handcrafted
    │   └── main.py
    ├── jukebox.dockerfile
    ├── jukebox
    │   ├── main.py
    │   └── make_models.py.patch
    ├── mfcc.dockerfile
    ├── musicnn.dockerfile
    ├── musicnn
    │   └── main.py
    ├── push.sh
    └── run.sh
├── reproduce
    ├── 0_docker.sh
    ├── 123_precomputed.sh
    ├── 1_download.sh
    ├── 2_process.sh
    ├── 3_extract.py
    ├── 3_extract.sh
    ├── 4_grid_config.py
    ├── 4_grid_config.sh
    ├── 5_grid_train_codalab.sh
    ├── 5_grid_train_serial.py
    ├── 5_grid_train_serial.sh
    └── 6_evaluate.sh
└── setup.py


/.dockerignore:
--------------------------------------------------------------------------------
1 | cache/
2 | jukemir/
3 | jukemir.egg-info/
4 | representations/
5 | notebooks/
6 | venv/
7 | old/
8 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | jukemir.egg-info/
2 | __pycache__/
3 | notebooks/
4 | venv/
5 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | repos:
 2 |   - repo: https://github.com/PyCQA/isort
 3 |     rev: 5.7.0
 4 |     hooks:
 5 |     - id: isort
 6 |       language_version: python3
 7 |   - repo: https://github.com/ambv/black
 8 |     rev: 20.8b1
 9 |     hooks:
10 |     - id: black
11 |       language_version: python3
12 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2021 Rodrigo Castellon and Chris Donahue
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Jukebox for MIR transfer learning
  2 | 
  3 | This repository contains code for our paper [_Codified audio language modeling learns useful representations for music information retrieval_](https://arxiv.org/abs/2107.05677) (Castellon et al. 2021), which demonstrates that [OpenAI's Jukebox](https://openai.com/blog/jukebox/) (Dhariwal et al. 2020) provides rich representations for music transfer learning.
  4 | 
  5 | This README is divided into three standalone sections. The [first section](#simple-example-of-inference-with-jukebox-in-colab), providing a Colab notebook that allows you to perform inference on the full Jukebox model for free, is optimized for simplicity and ease-of-use. The [second section](#simple-example-of-using-jukebox-for-transfer-learning) requires some setting up, but  provides an end-to-end example of genre detection using representations from Jukebox. The [third section](#reproducing-results-from-our-paper) is optimized for reproducibility and provides step-by-step instructions for reproducing the results from our paper.
  6 | 
  7 | In addition, we provide a Python library for extracting representations from Jukebox in just a couple lines of code. Check it out [here](https://github.com/rodrigo-castellon/jukemirlib)!
  8 | 
  9 | ## Simple example of inference with Jukebox in Colab
 10 | 
 11 | Despite Jukebox's large size, it can be made to run on a single Tesla T4 GPU on Colab for free. Check out the [notebook here](https://colab.research.google.com/drive/1x4qt_-SLrSSRzOuz7dkuxTK-o4fSoH9n?usp=sharing). In the notebook, you should be able to use the API to extract representations in a couple lines of code:
 12 | 
 13 | ```python
 14 | audio = load_audio_from_file(fname, offset=0.0, duration=25)
 15 | 
 16 | representations = get_acts_from_audio(audio=audio,
 17 |                                       layers=[36],
 18 |                                       meanpool=True)
 19 | 
 20 | print(f"Got representations {representations}")
 21 | print(f"Its shape is {representations[36].shape}")
 22 | ```
 23 | 
 24 | Simply pass the audio, layer numbers you're interested in, whether you want to mean-pool, and you'll get representations from Jukebox out. We accomplish these memory savings by [initializing the model with the meta device](https://huggingface.co/docs/accelerate/v0.11.0/en/big_modeling), letting you perform end-to-end inference on any commodity GPU. All in all, the notebook provides certain benefits over the `jukemir` codebase, including:
 25 | - a clean API for extracting representations in a customizable manner
 26 | - compute savings when performing inference on short audio clips
 27 | - memory-efficient model initialization, enabling extracting representations from later layers in one step
 28 | 
 29 | ## Simple example of using Jukebox for transfer learning
 30 | 
 31 | NOTE: the original hosting link for GTZAN appears to be dead, but you can try downloading it from [The Internet Archive](https://web.archive.org/web/20211207012050/http://opihi.cs.uvic.ca/sound/genres.tar.gz) instead.
 32 | 
 33 | This section provides a quick demonstration of using Jukebox for transfer learning on the GTZAN genre detection dataset (Tzanetakis and Cook 2002).
 34 | 
 35 | Our codebase uses Docker to simplify the process of extracting representations from Jukebox for new audio files. If you do not already have Docker on your machine, please follow [these instructions](https://docs.docker.com/get-docker/) to install it.
 36 | 
 37 | Once docker is installed, run the following to download and extract the dataset:
 38 | 
 39 | ```sh
 40 | wget http://opihi.cs.uvic.ca/sound/genres.tar.gz
 41 | tar xvfz genres.tar.gz
 42 | ```
 43 | 
 44 | Next, we will extract representations from Jukebox. This will require a system with at least 30GB of RAM and a GPU with at least 12GB VRAM. Feature extraction will take a few hours (though it can be parallelized). If your system does not meet these requirements, you can alternatively [download the pre-computed features](https://nlp.stanford.edu/data/cdonahue/jukemir/precomputed/gtzan-jukebox.tar.gz). If your system does meet the requirements, run:
 45 | 
 46 | ```sh
 47 | for GENRE in blues classical country disco hiphop jazz metal pop reggae rock
 48 | do
 49 | 	echo $GENRE
 50 | 	docker run \
 51 | 		-it \
 52 | 		--rm \
 53 | 		-v $(pwd)/genres/$GENRE:/input \
 54 | 		-v $(pwd)/features:/output \
 55 | 		jukemir/representations_jukebox
 56 | done
 57 | ```
 58 | 
 59 | This pre-built Docker container ([`jukemir/representations_jukebox`](https://hub.docker.com/repository/docker/jukemir/representations_jukebox)) automatically extracts representations for all files in its input folder (`/input`) and saves a standard numpy array for each file in its output folder (`/output`). You can use Docker's `-v` directive to link input and output folders on your host machine to these directories (see above for an example).
 60 | 
 61 | Once you've extracted representations from Jukebox, you can train simple classifiers on top of them for different MIR tasks. For example, the following Python script trains and evaluates a simple SVM classifier on GTZAN:
 62 | 
 63 | ```py
 64 | import glob
 65 | import os
 66 | import random
 67 | 
 68 | import numpy as np
 69 | from sklearn.svm import SVC
 70 | from sklearn.model_selection import cross_val_score
 71 | from sklearn.pipeline import make_pipeline
 72 | from sklearn.preprocessing import StandardScaler
 73 | 
 74 | # Find numpy paths (and randomize to remove label ordering)
 75 | npy_paths = sorted(glob.glob('features/*.npy'))
 76 | assert len(npy_paths) == 1000
 77 | random.seed(0)
 78 | random.shuffle(npy_paths)
 79 | 
 80 | # Load data
 81 | X = np.array([np.load(p) for p in npy_paths])
 82 | y = np.array([os.path.split(p)[1].split('.')[0] for p in npy_paths])
 83 | 
 84 | # Run cross-validation
 85 | clf = make_pipeline(StandardScaler(), SVC())
 86 | scores = cross_val_score(clf, X, y, cv=10)
 87 | print('{:.1f} +- {:.1f}'.format(np.mean(scores) * 100, np.std(scores) * 100))
 88 | ```
 89 | 
 90 | This should print out `92.5 +- 2.9`, though results may differ slightly due to minor numerical differences. Note that, for simplicity, the above code performs the _traditional_ evaluation for GTZAN which uses 10-fold cross validation. In our paper, we evaluate on the "fault-filtered" GTZAN split from Kereliuk et al. 2015.
 91 | 
 92 | ## Reproducing results from our paper
 93 | 
 94 | This section provides step-by-step instructions for reproducing all results from our paper. All code is executed within [pre-built Docker containers](https://hub.docker.com/orgs/jukemir/repositories) to increase reproducibility.
 95 | 
 96 | ### Setting up Docker and cache
 97 | 
 98 | If you do not already have Docker on your machine, please follow [these instructions](https://docs.docker.com/get-docker/) to install it.
 99 | 
100 | To initialize the default cache directory, **run `mkdir ~/.jukemir`**. If you would prefer to use a different directory, set the appropriate environment variable: `export JUKEMIR_CACHE_DIR=<your desired directory>`.
101 | 
102 | To launch our pre-built Docker container in the background, **navigate to the [`reproduce`](reproduce/) folder and run `./0_docker.sh`**. You may kill and remove this container at any time by typing `docker kill jukemir`.
103 | 
104 | ### Download pre-computed representations (skipping steps 1-3)
105 | 
106 | Unless you are probing a new dataset or comparing a new representation on an existing dataset, you can **skip steps 1-3 by downloading the pre-computed representations via `./123_precomputed.sh`**. If you need to selectively download particular pre-computed representations, see [`jukemir/assets/precomputed.json`](jukemir/assets/precomputed.json) for relevant URLs.
107 | 
108 | ### (Step 1) Downloading the datasets
109 | 
110 | Once Docker is running, **run [`./1_download.sh`](reproduce/1_download.sh)** to download all of the raw dataset assets from their respective sources. Note that this downloads about 8GB of files and may take quite a long time depending on the speed of your network connection.
111 | 
112 | The resultant files will be downloaded to the `datasets` subdirectory of your cache directory (`~/.jukemir/cache/datasets` by default).
113 | 
114 | ### (Step 2) Processing the datasets
115 | 
116 | Once the raw assets have been downloaded, **run [`./2_process.sh`](reproduce/2_process.sh)** to process them into a standard format (parses all metadata and decodes MP3s to 16-bit PCM WAV files). Note that this script will also take several hours to run and will produce about 50GB of WAV files.
117 | 
118 | The resultant files will be saved to the `processed` subdirectory of your cache directory (`~/.jukemir/cache/processed` by default).
119 | 
120 | ### (Step 3) Extracting representations
121 | 
122 | Next, we need to extract representations for each WAV file in the processed datasets. Note that this process is resource-intensive (especially for certain representations). Alternatively, you can [download pre-computed representations](#download-precomputed).
123 | 
124 | Each representation from our paper (`chroma`, `mfcc`, `choi`, `musicnn`, `clmr`, `jukebox`) has been packaged into a pre-built Docker container with a common API. The basic "type signature" of each container takes a folder of WAV files as input and returns a folder of Numpy arrays containing the corresponding representations. For example, if you have a folder in your current working directory called `mywavs/`, you can extract representations from Jukebox via the following command, which will create a folder of Numpy arrays called `mywavs_jukebox/`:
125 | 
126 | ```sh
127 | docker run \
128 | 	-it \
129 | 	--rm \
130 | 	-v $(pwd)/mywavs:/input \
131 | 	-v $(pwd)/mywavs_jukebox:/output \
132 | 	jukemir/representations_jukebox
133 | ```
134 | 
135 | Note that each container also takes two optional arguments as input, `--batch_size` and `--batch_idx`, which can be used to compute representations for a subset (batch) of the input WAV file directory. This is useful for parallelizing computation across several workers.
136 | 
137 | Because `choi` uses batchnorm during inference, its representations will differ if `--batch_size` is changed from its default value of `256` (not recommended). All other representations are invariant to batch size. Note that `musicnn`, `clmr`, and `jukebox` should be run on a machine with a GPU.
138 | 
139 | A Python script at [`reproduce/3_extract.py`](reproduce/3_extract.py) will generate all of the Docker commands needed to re-extract all representations for all datasets (see [`reproduce/3_extract.sh`](reproduce/3_extract.sh) for output). We highly recommend executing these commands in parallel in your own computing environment, as running them one at a time will take a long time.
140 | 
141 | ### (Steps 4/5) Configuring and running probing experiments
142 | 
143 | Individual probing experiments are defined using a `jukemir.probe.ProbeExperimentConfig` and executed via `jukemir.probe.execute_probe_experiment`. For example, assuming you've followed the previous steps, you can train a linear probe on `gtzan_ff` using features from `jukebox` via the following script:
144 | 
145 | ```py
146 | from jukemir.probe import ProbeExperimentConfig, execute_probe_experiment
147 | 
148 | cfg = ProbeExperimentConfig(dataset="gtzan_ff", representation="jukebox")
149 | execute_probe_experiment(cfg)
150 | ```
151 | 
152 | To generate config files for the grid searches described in our paper, **run `./4_grid_config.sh`**. The resultant files will be saved to the `probes` subdirectory of your cache directory (`~/.jukemir/cache/probes` by default).
153 | 
154 | To run all probing experiments one after another, **run `./5_grid_train_serial.sh`**. This will take several days to run to completion. The resultant files will be saved to the `probes` subdirectory of your cache directory (`~/.jukemir/cache/probes` by default).
155 | 
156 | We highly recommend parallelizing this computation in your own environment. For example, you can run this computation in parallel on the [Codalab platform](https://worksheets.codalab.org/worksheets/0x7c5afa6f88bd4ff29fec75035332a583) by `pip`-installing the [`codalab`](https://pypi.org/project/codalab/) and **running `./5_grid_train_codalab.sh`**.
157 | 
158 | ### (Step 6) Evaluating test performance
159 | 
160 | Once all the probes are done training, **run `./6_evaluate.sh`** to find the runs with the best validation scores for each dataset/representation and compute test performance.
161 | 
162 | ## Citation
163 | 
164 | If you use this codebase in your work, please consider citing our paper:
165 | 
166 | ```
167 | @inproceedings{castellon2021calm,
168 |   title={Codified audio language modeling learns useful representations for music information retrieval},
169 |   author={Castellon, Rodrigo and Donahue, Chris and Liang, Percy},
170 |   booktitle={ISMIR},
171 |   year={2021}
172 | }
173 | ```
174 | 


--------------------------------------------------------------------------------
/docker/Dockerfile:
--------------------------------------------------------------------------------
 1 | # Import Ubuntu/CUDA/cuDNN
 2 | FROM nvidia/cuda:10.1-cudnn7-devel-ubuntu18.04
 3 | 
 4 | # Configure basic environment
 5 | ENV DEBIAN_FRONTEND=noninteractive
 6 | RUN apt-get update --fix-missing
 7 | RUN apt-get install -y --no-install-recommends build-essential
 8 | SHELL ["/bin/bash", "-c"]
 9 | 
10 | # Install FFmpeg
11 | RUN apt-get install -y --no-install-recommends software-properties-common; add-apt-repository ppa:jonathonf/ffmpeg-4; apt-get update; apt-get install -y --no-install-recommends ffmpeg
12 | 
13 | # Install Python
14 | ENV LANG C.UTF-8
15 | RUN apt-get install -y --no-install-recommends python3 python3-dev
16 | RUN ln -s $(which python3) /usr/local/bin/python
17 | 
18 | # Install pip
19 | RUN apt-get install -y python3-pip
20 | RUN python -m pip install --no-cache-dir --upgrade "pip<20.3" setuptools
21 | 
22 | # Install learning frameworks
23 | RUN python -m pip install --no-cache-dir torch==1.8.0+cu101 -f https://nelsonliu.me/files/pytorch/whl/torch_stable.html
24 | RUN python -m pip install --no-cache-dir scikit-learn==0.24.1
25 | 
26 | # Install Jupyter
27 | RUN python -m pip install --no-cache-dir jupyter matplotlib
28 | 
29 | # Install other Python deps
30 | RUN python -m pip install --no-cache-dir librosa
31 | RUN python -m pip install --no-cache-dir tqdm
32 | RUN apt-get install -y --no-install-recommends fluidsynth; python -m pip install --no-cache-dir pyfluidsynth
33 | RUN python -m pip install --no-cache-dir pretty_midi
34 | RUN python -m pip install --no-cache-dir mir_eval
35 | RUN python -m pip install --no-cache-dir wandb
36 | 
37 | # Create workdir and cache
38 | ARG WORKDIR=/jukemir
39 | RUN mkdir -p $WORKDIR
40 | WORKDIR $WORKDIR
41 | RUN mkdir cache
42 | ENV JUKEMIR_CACHE_DIR $WORKDIR/cache
43 | 
44 | # Install library placeholder
45 | COPY setup.py setup.py
46 | RUN mkdir jukemir
47 | RUN python -m pip install --no-cache-dir -e .
48 | 
49 | # Create additional folders
50 | RUN mkdir notebooks
51 | RUN mkdir scripts
52 | RUN mkdir tests
53 | 
54 | # Create user
55 | RUN useradd -ms /bin/bash jukemir
56 | RUN chmod -R ugo+rw $WORKDIR
57 | RUN chown -R jukemir $WORKDIR
58 | USER jukemir
59 | 


--------------------------------------------------------------------------------
/docker/build.sh:
--------------------------------------------------------------------------------
1 | source env.sh
2 | 
3 | pushd ..
4 | docker build -t ${DOCKER_NAMESPACE}/${DOCKER_TAG} -f docker/Dockerfile .
5 | popd
6 | 


--------------------------------------------------------------------------------
/docker/env.sh:
--------------------------------------------------------------------------------
1 | DOCKER_NAMESPACE=jukemir
2 | DOCKER_TAG=lib
3 | DOCKER_NAME=jukemir
4 | 


--------------------------------------------------------------------------------
/docker/kill.sh:
--------------------------------------------------------------------------------
1 | source env.sh
2 | 
3 | docker kill ${DOCKER_NAME}
4 | 


--------------------------------------------------------------------------------
/docker/notebook.sh:
--------------------------------------------------------------------------------
 1 | source env.sh
 2 | 
 3 | docker exec \
 4 |   -it \
 5 |   ${DOCKER_NAME} \
 6 |   jupyter notebook \
 7 |     --ip=0.0.0.0 \
 8 |     --port 8888 \
 9 |     --no-browser \
10 |     --allow-root \
11 |     --notebook-dir=/jukemir/notebooks
12 | 


--------------------------------------------------------------------------------
/docker/run.sh:
--------------------------------------------------------------------------------
 1 | source env.sh
 2 | 
 3 | pushd ..
 4 | set -e
 5 | HOST_CACHE=$(python -c "from jukemir import CACHE_DIR; print(CACHE_DIR)")
 6 | echo $HOST_CACHE
 7 | popd
 8 | 
 9 | DOCKER_CPUS=$(python3 -c "import os; cpus=os.sched_getaffinity(0); print(','.join(map(str,cpus)))")
10 | DOCKER_GPUS=$(nvidia-smi -L | python3 -c "import sys; print(','.join([l.strip().split()[-1][:-1] for l in list(sys.stdin)]))")
11 | DOCKER_CPU_ARG="--cpuset-cpus ${DOCKER_CPUS}"
12 | DOCKER_GPU_ARG="--gpus device=${DOCKER_GPUS}"
13 | 
14 | docker run \
15 |   -it \
16 |   --rm \
17 |   -d \
18 |   ${DOCKER_CPU_ARG} \
19 |   ${DOCKER_GPU_ARG} \
20 |   --name ${DOCKER_NAME} \
21 |   -u $(id -u):$(id -g) \
22 |   -v $HOST_CACHE:/jukemir/cache \
23 |   -v $(pwd)/../jukemir:/jukemir/jukemir \
24 |   -v $(pwd)/../notebooks:/jukemir/notebooks \
25 |   -v $(pwd)/../scripts:/jukemir/scripts \
26 |   -v $(pwd)/../tests:/jukemir/tests \
27 |   -v ~/.local:/.local \
28 |   -p 8888:8888 \
29 |   ${DOCKER_NAMESPACE}/${DOCKER_TAG} \
30 |   bash
31 | 


--------------------------------------------------------------------------------
/docker/shell.sh:
--------------------------------------------------------------------------------
1 | source env.sh
2 | 
3 | docker exec -it ${DOCKER_NAME} bash
4 | 


--------------------------------------------------------------------------------
/jukemir/__init__.py:
--------------------------------------------------------------------------------
 1 | import pathlib
 2 | from os import environ as os_env
 3 | 
 4 | LIB_DIR = pathlib.Path(__file__).resolve().parent
 5 | 
 6 | if "JUKEMIR_CACHE_DIR" in os_env:
 7 |     CACHE_DIR = pathlib.Path(os_env["JUKEMIR_CACHE_DIR"])
 8 | else:
 9 |     CACHE_DIR = pathlib.Path(pathlib.Path.home(), ".jukemir")
10 | CACHE_DIR = CACHE_DIR.resolve()
11 | 
12 | CACHE_DATASETS_DIR = pathlib.Path(CACHE_DIR, "processed")
13 | CACHE_REPRESENTATIONS_DIR = pathlib.Path(CACHE_DIR, "representations")
14 | CACHE_PROBES_DIR = pathlib.Path(CACHE_DIR, "probes")
15 | 
16 | # NOTE: This changes the test discovery pattern from "test*.py" (default) to "*test.py".
17 | def load_tests(loader, standard_tests, pattern):
18 |     package_tests = loader.discover(start_dir=LIB_DIR, pattern="*test.py")
19 |     standard_tests.addTests(package_tests)
20 |     return standard_tests
21 | 


--------------------------------------------------------------------------------
/jukemir/assets.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import logging
  3 | import pathlib
  4 | import urllib.request
  5 | 
  6 | from . import CACHE_DIR, LIB_DIR
  7 | from .utils import compute_checksum
  8 | 
  9 | _DEFAULT_CHUNK_SIZE = 4096
 10 | 
 11 | # Parse asset specification JSON
 12 | _ASSET_PATHS = set()
 13 | _ASSETS = {}
 14 | for json_path in sorted(pathlib.Path(LIB_DIR, "assets").rglob("*.json")):
 15 |     with open(json_path, "r") as f:
 16 |         d = json.load(f)
 17 |     for tag, asset in d.items():
 18 |         if tag != tag.upper():
 19 |             raise AssertionError("Tags should be uppercase")
 20 |         if "checksum" not in asset:
 21 |             raise AssertionError("Missing checksum")
 22 |         try:
 23 |             asset["path_rel"] = pathlib.PurePosixPath(asset["path_rel"].strip())
 24 |         except:
 25 |             raise AssertionError("Invalid path")
 26 |         if asset["path_rel"] in _ASSET_PATHS:
 27 |             raise AssertionError("Duplicate path")
 28 |         _ASSET_PATHS.add(asset["path_rel"])
 29 |         asset["path"] = pathlib.Path(CACHE_DIR, asset["path_rel"]).resolve()
 30 |     _ASSETS.update(d)
 31 | 
 32 | 
 33 | def _download(url, dest_path, chunk_size=_DEFAULT_CHUNK_SIZE):
 34 |     with open(dest_path, "wb") as f:
 35 |         if "drive.google.com" in url:
 36 |             raise NotImplementedError()
 37 |         else:
 38 |             r = urllib.request.urlopen(url)
 39 |             while True:
 40 |                 chunk = r.read(chunk_size)
 41 |                 if not chunk:
 42 |                     break
 43 |                 f.write(chunk)
 44 | 
 45 | 
 46 | def retrieve_and_or_verify_asset(
 47 |     tag, delete_wrong=False, chunk_size=_DEFAULT_CHUNK_SIZE
 48 | ):
 49 |     """Attempts to acquire and/or verify existance of a tagged asset in the cache.
 50 | 
 51 |     Returns
 52 |     -------
 53 |     str
 54 |        Absolute file path for asset, if verified.
 55 | 
 56 |     Raises
 57 |     ------
 58 |     :class:`ValueError`
 59 |        Invalid asset tag.
 60 |     :class:`Exception`
 61 |        Asset could not be verified.
 62 |     """
 63 |     # Retrieve asset
 64 |     if tag not in _ASSETS:
 65 |         raise ValueError()
 66 |     asset = _ASSETS[tag]
 67 |     path = asset["path"]
 68 |     checksum = asset["checksum"]
 69 |     logging.info(f"Verifying asset: {tag}")
 70 |     logging.info(f"Asset location: {path}")
 71 | 
 72 |     # Create parent directory
 73 |     if not path.parent.is_dir():
 74 |         logging.info(f"Creating parent: {path.parent}")
 75 |         path.parent.mkdir(parents=True)
 76 | 
 77 |     def verify():
 78 |         assert path.is_file()
 79 |         if checksum is not None:
 80 |             if len(checksum) == 32:
 81 |                 algorithm = "md5"
 82 |             elif len(checksum) == 40:
 83 |                 algorithm = "sha1"
 84 |             elif len(checksum) == 64:
 85 |                 algorithm = "sha256"
 86 |             else:
 87 |                 raise Exception("Unknown checksum algorithm")
 88 |             computed = compute_checksum(
 89 |                 path, algorithm=algorithm, chunk_size=chunk_size
 90 |             )
 91 |             if computed != checksum:
 92 |                 raise Exception(f"File {path} has wrong checksum.")
 93 | 
 94 |     # Delete incorrect files
 95 |     already_verified = False
 96 |     if delete_wrong and path.is_file():
 97 |         try:
 98 |             verify()
 99 |             already_verified = True
100 |         except Exception:
101 |             logging.warning(f"Deleting file with bad checksum: {path}")
102 |             path.unlink()
103 | 
104 |     # Attempt to download
105 |     if not path.is_file():
106 |         url = asset.get("url")
107 |         if url is None:
108 |             raise Exception("File is missing and cannot be downloaded")
109 |         logging.info(f"Downloading from: {url}")
110 |         try:
111 |             _download(url, path)
112 |         except Exception as e:
113 |             if path.is_file():
114 |                 path.unlink()
115 |             raise Exception(f"Download failed: {e}")
116 |     assert path.is_file()
117 | 
118 |     # Ensure file integrity
119 |     if not already_verified:
120 |         verify()
121 |     logging.info(f"Verified!")
122 | 
123 |     return path
124 | 
125 | 
126 | if __name__ == "__main__":
127 |     import multiprocessing
128 |     from argparse import ArgumentParser
129 | 
130 |     parser = ArgumentParser()
131 | 
132 |     parser.add_argument("startswith", nargs="?")
133 |     parser.add_argument("--delete_wrong", action="store_true", dest="delete_wrong")
134 |     parser.add_argument("--num_parallel", "-n", type=int)
135 | 
136 |     parser.set_defaults(startswith=None, num_parallel=1, delete_wrong=False)
137 | 
138 |     args = parser.parse_args()
139 | 
140 |     logging.basicConfig(level=logging.INFO)
141 | 
142 |     tags = list(_ASSETS.keys())
143 |     if args.startswith is not None:
144 |         tags = [t for t in tags if t.startswith(args.startswith.strip().upper())]
145 | 
146 |     def task(t):
147 |         logging.info("-" * 80)
148 |         try:
149 |             retrieve_and_or_verify_asset(t, delete_wrong=args.delete_wrong)
150 |         except Exception as e:
151 |             logging.error(e)
152 |             raise e
153 | 
154 |     with multiprocessing.Pool(args.num_parallel) as p:
155 |         p.map(task, tags)
156 | 


--------------------------------------------------------------------------------
/jukemir/assets/debug.json:
--------------------------------------------------------------------------------
1 | {
2 |   "DEBUG_MP3": {
3 |     "path_rel": "datasets/debug/test1_22050.mp3",
4 |     "url": "https://github.com/librosa/librosa/raw/main/tests/data/test1_22050.mp3",
5 |     "checksum": "a2084c37c252a1de25586549ef0ab5c512f8e91a2c072d5a8941817d949996b2"
6 |   }
7 | }
8 | 


--------------------------------------------------------------------------------
/jukemir/assets/emomusic.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "EMOMUSIC_CLIPS": {
 3 |     "path_rel": "datasets/emomusic/clips_45sec.tar.gz",
 4 |     "url": "http://cvml.unige.ch/databases/emoMusic/clips_45sec.tar.gz",
 5 |     "checksum": "3994a5d5fefc27f9540d1a69cd7f1294c68e6cb311ce49765021e2e9c7a1057b"
 6 |   },
 7 |   "EMOMUSIC_ANNOTATIONS": {
 8 |     "path_rel": "datasets/emomusic/annotations.tar.gz",
 9 |     "url": "http://cvml.unige.ch/databases/emoMusic/annotations.tar.gz",
10 |     "checksum": "e0cb0b06e1b0ce7f04b1572fc410937699b7fad9dae67ddaa745f06cda4b0b33"
11 |   },
12 |   "EMOMUSIC_MANUAL": {
13 |     "path_rel": "datasets/emomusic/dataset_manual.pdf",
14 |     "url": "http://cvml.unige.ch/databases/emoMusic/dataset_manual.pdf",
15 |     "checksum": "2bfc7f7cc58ac22283f54593ed5104a8210972fa182c1aca1da478e0dae77d44"
16 |   }
17 | }
18 | 


--------------------------------------------------------------------------------
/jukemir/assets/gtzan.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "GTZAN": {
 3 |     "path_rel": "datasets/gtzan/genres.tar.gz",
 4 |     "url": "http://opihi.cs.uvic.ca/sound/genres.tar.gz",
 5 |     "checksum": "24347e0223d2ba798e0a558c4c172d9d4a19c00bb7963fe055d183dadb4ef2c6"
 6 |   },
 7 |   "GTZAN_TRAIN": {
 8 |     "path_rel": "datasets/gtzan/train_filtered.txt",
 9 |     "url": "https://raw.githubusercontent.com/coreyker/dnn-mgr/bdad579ea6cb37b665ea6019fe1026a6ce20cbc7/gtzan/train_filtered.txt",
10 |     "checksum": "2b04966cef1486f1a7767b74271533fa23e220388db804a1937835e1fc4fa3fa"
11 |   },
12 |   "GTZAN_VALID": {
13 |     "path_rel": "datasets/gtzan/valid_filtered.txt",
14 |     "url": "https://raw.githubusercontent.com/coreyker/dnn-mgr/bdad579ea6cb37b665ea6019fe1026a6ce20cbc7/gtzan/valid_filtered.txt",
15 |     "checksum": "f8fbee7c52397a0e0f3a91dc54fedecb2e27f5bdc9a9daeab8cb10d077e26956"
16 |   },
17 |   "GTZAN_TEST": {
18 |     "path_rel": "datasets/gtzan/test_filtered.txt",
19 |     "url": "https://raw.githubusercontent.com/coreyker/dnn-mgr/bdad579ea6cb37b665ea6019fe1026a6ce20cbc7/gtzan/test_filtered.txt",
20 |     "checksum": "09cafa6a89ce0a27f30ed250d56ecb4d03c22fffb5a693f96c7ae655ee02ac72"
21 |   }
22 | }
23 | 


--------------------------------------------------------------------------------
/jukemir/assets/magnatagatune.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "MAGNATAGATUNE_MP3_001": {
 3 |     "path_rel": "datasets/magnatagatune/mp3.zip.001",
 4 |     "url": "http://mi.soi.city.ac.uk/datasets/magnatagatune/mp3.zip.001",
 5 |     "checksum": "f857fe185968773058cc71662c2ef5d8f2d4b7338e3c122cfd52f82dcb9760b9"
 6 |   },
 7 |   "MAGNATAGATUNE_MP3_002": {
 8 |     "path_rel": "datasets/magnatagatune/mp3.zip.002",
 9 |     "url": "http://mi.soi.city.ac.uk/datasets/magnatagatune/mp3.zip.002",
10 |     "checksum": "fc2e1ec441755556ed1398b1808f1b08b6034372f8bc27394510c0c58cdb52ce"
11 |   },
12 |   "MAGNATAGATUNE_MP3_003": {
13 |     "path_rel": "datasets/magnatagatune/mp3.zip.003",
14 |     "url": "http://mi.soi.city.ac.uk/datasets/magnatagatune/mp3.zip.003",
15 |     "checksum": "83a689824c17e82f6eb81cdbc4e4ca239a4cfc1fb41f1a5c80b861caec90450f"
16 |   },
17 |   "MAGNATAGATUNE_CLIP_METADATA": {
18 |     "path_rel": "datasets/magnatagatune/clip_info_final.csv",
19 |     "url": "http://mi.soi.city.ac.uk/datasets/magnatagatune/clip_info_final.csv",
20 |     "checksum": "cb6108a10d3a91f0bfd7d2fbec2382559d15f20c8d28093f14e12162a47a3e78"
21 |   },
22 |   "MAGNATAGATUNE_ANNOTATIONS": {
23 |     "path_rel": "datasets/magnatagatune/annotations_final.csv",
24 |     "url": "http://mi.soi.city.ac.uk/datasets/magnatagatune/annotations_final.csv",
25 |     "checksum": "99b94e95825d4fa576386a5367b9c96e0ec636e9f9c240aceb492f4728f68c3c"
26 |   },
27 |   "MAGNATAGATUNE_CLIP_METADATA_MYSQL": {
28 |     "path_rel": "datasets/magnatagatune/clip_info_final.sql.zip",
29 |     "url": "http://mi.soi.city.ac.uk/datasets/magnatagatune/clip_info_final.sql.zip",
30 |     "checksum": "06ee2496fbc517929979f1e83a1ee7aa4b38ff2f277111c48dbe12eb3b9d7956"
31 |   },
32 |   "MAGNATAGATUNE_AUDIO_FEATURES": {
33 |     "path_rel": "datasets/magnatagatune/mp3_echonest_xml.zip",
34 |     "url": "http://mi.soi.city.ac.uk/datasets/magnatagatune/mp3_echonest_xml.zip",
35 |     "checksum": "80e973e0c433e733ed3dcab9ff2e73d1ae3842b84a3ef348d53a9e88aaaa64d4"
36 |   },
37 |   "MAGNATAGATUNE_SIMILARITY": {
38 |     "path_rel": "datasets/magnatagatune/comparisons_final.csv",
39 |     "url": "http://mi.soi.city.ac.uk/datasets/magnatagatune/comparisons_final.csv",
40 |     "checksum": "cf210e087ed5b3f3f8b164626e1d2857cf0ba9ae66bd9229bafe042889107a98"
41 |   }
42 | }
43 | 


--------------------------------------------------------------------------------
/jukemir/assets/precomputed.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "PRECOMPUTED-EMOMUSIC-CHOI": {
  3 |     "path_rel": "representations/emomusic-choi.tar.gz",
  4 |     "url": "https://nlp.stanford.edu/data/cdonahue/jukemir/precomputed/emomusic-choi.tar.gz",
  5 |     "checksum": "17d23d7211bd8513bcc6ed4d62cf819389030a3cc786c39e93c3e37df19fcfdc"
  6 |   },
  7 |   "PRECOMPUTED-EMOMUSIC-CHROMA": {
  8 |     "path_rel": "representations/emomusic-chroma.tar.gz",
  9 |     "url": "https://nlp.stanford.edu/data/cdonahue/jukemir/precomputed/emomusic-chroma.tar.gz",
 10 |     "checksum": "cbe6faaa56309450103888442ab144d0f2fbc43f35fd0a1ed807bf27d6b6df5a"
 11 |   },
 12 |   "PRECOMPUTED-EMOMUSIC-CLMR": {
 13 |     "path_rel": "representations/emomusic-clmr.tar.gz",
 14 |     "url": "https://nlp.stanford.edu/data/cdonahue/jukemir/precomputed/emomusic-clmr.tar.gz",
 15 |     "checksum": "5b4d19dba824b8fe7f44025657cfd441fabd12c091aab0119d2f9b885f7acc2c"
 16 |   },
 17 |   "PRECOMPUTED-EMOMUSIC-JUKEBOX": {
 18 |     "path_rel": "representations/emomusic-jukebox.tar.gz",
 19 |     "url": "https://nlp.stanford.edu/data/cdonahue/jukemir/precomputed/emomusic-jukebox.tar.gz",
 20 |     "checksum": "bff3a148eb15c60308629e3a1e2593b9d34324bb9404a49f98c44854ea3aeb5e"
 21 |   },
 22 |   "PRECOMPUTED-EMOMUSIC-MFCC": {
 23 |     "path_rel": "representations/emomusic-mfcc.tar.gz",
 24 |     "url": "https://nlp.stanford.edu/data/cdonahue/jukemir/precomputed/emomusic-mfcc.tar.gz",
 25 |     "checksum": "c0d6a45f5d0279e4f296921c2fd64425a1b9bfd9baee751c8809a67a484d5589"
 26 |   },
 27 |   "PRECOMPUTED-EMOMUSIC-MUSICNN": {
 28 |     "path_rel": "representations/emomusic-musicnn.tar.gz",
 29 |     "url": "https://nlp.stanford.edu/data/cdonahue/jukemir/precomputed/emomusic-musicnn.tar.gz",
 30 |     "checksum": "80e09ec228b5a032f813937a59b7bc99775b92af225e42af428cd395ac9b57c9"
 31 |   },
 32 |   "PRECOMPUTED-GIANTSTEPS_CLIPS-CHOI": {
 33 |     "path_rel": "representations/giantsteps_clips-choi.tar.gz",
 34 |     "url": "https://nlp.stanford.edu/data/cdonahue/jukemir/precomputed/giantsteps_clips-choi.tar.gz",
 35 |     "checksum": "ad5286f1eedd978720d20ceadff281ec284a030d217a2f5be011ebf7b7265b49"
 36 |   },
 37 |   "PRECOMPUTED-GIANTSTEPS_CLIPS-CHROMA": {
 38 |     "path_rel": "representations/giantsteps_clips-chroma.tar.gz",
 39 |     "url": "https://nlp.stanford.edu/data/cdonahue/jukemir/precomputed/giantsteps_clips-chroma.tar.gz",
 40 |     "checksum": "2928549df97d4c594a7be14b8b40b68c31120b42eb0c208860ca113ab6d2aae1"
 41 |   },
 42 |   "PRECOMPUTED-GIANTSTEPS_CLIPS-CLMR": {
 43 |     "path_rel": "representations/giantsteps_clips-clmr.tar.gz",
 44 |     "url": "https://nlp.stanford.edu/data/cdonahue/jukemir/precomputed/giantsteps_clips-clmr.tar.gz",
 45 |     "checksum": "152e7d33c96a531ee606181690010878c386c079901b6484e1d666813d5ed340"
 46 |   },
 47 |   "PRECOMPUTED-GIANTSTEPS_CLIPS-JUKEBOX": {
 48 |     "path_rel": "representations/giantsteps_clips-jukebox.tar.gz",
 49 |     "url": "https://nlp.stanford.edu/data/cdonahue/jukemir/precomputed/giantsteps_clips-jukebox.tar.gz",
 50 |     "checksum": "ac25a9c900865482ad1cb5b7d8d8208935491c7134690f8bd530dc660898bee2"
 51 |   },
 52 |   "PRECOMPUTED-GIANTSTEPS_CLIPS-MFCC": {
 53 |     "path_rel": "representations/giantsteps_clips-mfcc.tar.gz",
 54 |     "url": "https://nlp.stanford.edu/data/cdonahue/jukemir/precomputed/giantsteps_clips-mfcc.tar.gz",
 55 |     "checksum": "61d6df264266ba03d27517dba0cbae50904a2d093f9b6c7cc675220311734156"
 56 |   },
 57 |   "PRECOMPUTED-GIANTSTEPS_CLIPS-MUSICNN": {
 58 |     "path_rel": "representations/giantsteps_clips-musicnn.tar.gz",
 59 |     "url": "https://nlp.stanford.edu/data/cdonahue/jukemir/precomputed/giantsteps_clips-musicnn.tar.gz",
 60 |     "checksum": "a837b0a4030df83fd46ae5ed9f4871f6c4fe3d3cfbed2b951b3225a893b98fe6"
 61 |   },
 62 |   "PRECOMPUTED-GTZAN_FF-CHOI": {
 63 |     "path_rel": "representations/gtzan_ff-choi.tar.gz",
 64 |     "url": "https://nlp.stanford.edu/data/cdonahue/jukemir/precomputed/gtzan_ff-choi.tar.gz",
 65 |     "checksum": "7b6ee4d2cf368a5d5006a4fd1af3f6176bae5d1cf665fac06c8cd0ffb0529d4e"
 66 |   },
 67 |   "PRECOMPUTED-GTZAN_FF-CHROMA": {
 68 |     "path_rel": "representations/gtzan_ff-chroma.tar.gz",
 69 |     "url": "https://nlp.stanford.edu/data/cdonahue/jukemir/precomputed/gtzan_ff-chroma.tar.gz",
 70 |     "checksum": "2a95d7ba03f8b4a500b1a2e38f9449069b449fc68a2ca13a35a94edf2d5f5e8e"
 71 |   },
 72 |   "PRECOMPUTED-GTZAN_FF-CLMR": {
 73 |     "path_rel": "representations/gtzan_ff-clmr.tar.gz",
 74 |     "url": "https://nlp.stanford.edu/data/cdonahue/jukemir/precomputed/gtzan_ff-clmr.tar.gz",
 75 |     "checksum": "fe4de733f91c94a1fe2900ee989602c0463a29a8ad3276c21b077a1bff32fa81"
 76 |   },
 77 |   "PRECOMPUTED-GTZAN_FF-JUKEBOX": {
 78 |     "path_rel": "representations/gtzan_ff-jukebox.tar.gz",
 79 |     "url": "https://nlp.stanford.edu/data/cdonahue/jukemir/precomputed/gtzan_ff-jukebox.tar.gz",
 80 |     "checksum": "d3d619b907f84069faada1fd81edede6518e11bbb90c3d3f75785b9c48f472c8"
 81 |   },
 82 |   "PRECOMPUTED-GTZAN_FF-MFCC": {
 83 |     "path_rel": "representations/gtzan_ff-mfcc.tar.gz",
 84 |     "url": "https://nlp.stanford.edu/data/cdonahue/jukemir/precomputed/gtzan_ff-mfcc.tar.gz",
 85 |     "checksum": "13d8fe30ae20cec7dc91363a8f2b55a5f6c1b60091d2349eefdf33e217b66169"
 86 |   },
 87 |   "PRECOMPUTED-GTZAN_FF-MUSICNN": {
 88 |     "path_rel": "representations/gtzan_ff-musicnn.tar.gz",
 89 |     "url": "https://nlp.stanford.edu/data/cdonahue/jukemir/precomputed/gtzan_ff-musicnn.tar.gz",
 90 |     "checksum": "3a72e7dfca03ab4aa5eaa6a875d7962225ea1f61f715a1335cf98aee493ce0ff"
 91 |   },
 92 |   "PRECOMPUTED-MAGNATAGATUNE-CHOI": {
 93 |     "path_rel": "representations/magnatagatune-choi.tar.gz",
 94 |     "url": "https://nlp.stanford.edu/data/cdonahue/jukemir/precomputed/magnatagatune-choi.tar.gz",
 95 |     "checksum": "5c7c72f2ec12ad609e6cc31ecf06872e6f6d82d492a40ec578f44fc6c6ddf078"
 96 |   },
 97 |   "PRECOMPUTED-MAGNATAGATUNE-CHROMA": {
 98 |     "path_rel": "representations/magnatagatune-chroma.tar.gz",
 99 |     "url": "https://nlp.stanford.edu/data/cdonahue/jukemir/precomputed/magnatagatune-chroma.tar.gz",
100 |     "checksum": "4725c13fd25b9b3fdbd59f73c0637b882486a340283a80ab8649a9a64d0a3313"
101 |   },
102 |   "PRECOMPUTED-MAGNATAGATUNE-CLMR": {
103 |     "path_rel": "representations/magnatagatune-clmr.tar.gz",
104 |     "url": "https://nlp.stanford.edu/data/cdonahue/jukemir/precomputed/magnatagatune-clmr.tar.gz",
105 |     "checksum": "a69ad2f3f462915f00519cdc160dde94f46b9cc190f0391525957f2547e04e67"
106 |   },
107 |   "PRECOMPUTED-MAGNATAGATUNE-JUKEBOX": {
108 |     "path_rel": "representations/magnatagatune-jukebox.tar.gz",
109 |     "url": "https://nlp.stanford.edu/data/cdonahue/jukemir/precomputed/magnatagatune-jukebox.tar.gz",
110 |     "checksum": "2547b4d5c6b85bad1de12c6280c704e1183b6740c07316d0f08325b95f966d40"
111 |   },
112 |   "PRECOMPUTED-MAGNATAGATUNE-MFCC": {
113 |     "path_rel": "representations/magnatagatune-mfcc.tar.gz",
114 |     "url": "https://nlp.stanford.edu/data/cdonahue/jukemir/precomputed/magnatagatune-mfcc.tar.gz",
115 |     "checksum": "3d2f95a221478873be34d5f00ede38ce50b1707d322838831aeff8f0c00800d5"
116 |   },
117 |   "PRECOMPUTED-MAGNATAGATUNE-MUSICNN": {
118 |     "path_rel": "representations/magnatagatune-musicnn.tar.gz",
119 |     "url": "https://nlp.stanford.edu/data/cdonahue/jukemir/precomputed/magnatagatune-musicnn.tar.gz",
120 |     "checksum": "3b87e35f5774e30ff6d8932f9a7e41cfe07b8d7542866fddd70660a8869b8643"
121 |   }
122 | }


--------------------------------------------------------------------------------
/jukemir/datasets/__init__.py:
--------------------------------------------------------------------------------
  1 | import copy
  2 | import csv
  3 | import gzip
  4 | import json
  5 | import pathlib
  6 | import shutil
  7 | import tempfile
  8 | from collections import Counter, defaultdict
  9 | 
 10 | import numpy as np
 11 | from scipy.io.wavfile import read as wavread
 12 | from tqdm import tqdm
 13 | 
 14 | from ..assets import retrieve_and_or_verify_asset
 15 | from ..utils import compute_checksum, run_cmd_sync
 16 | 
 17 | 
 18 | def _iter_gtzan(metadata_only=False, posthoc_splits=False):
 19 |     # Parse splits
 20 |     if posthoc_splits:
 21 |         did_to_split = {}
 22 |         for split_name in ["train", "valid", "test"]:
 23 |             split_path = retrieve_and_or_verify_asset(f"GTZAN_{split_name.upper()}")
 24 |             with open(split_path, "r") as f:
 25 |                 split = f.read().strip().splitlines()
 26 |             for p in split:
 27 |                 p = pathlib.PurePath(p)
 28 |                 did = p.stem
 29 |                 assert did not in did_to_split
 30 |                 did_to_split[did] = split_name
 31 | 
 32 |     # Parse audio
 33 |     uids = set()
 34 |     with tempfile.TemporaryDirectory() as d:
 35 |         d = pathlib.Path(d)
 36 |         asset_path = retrieve_and_or_verify_asset("GTZAN")
 37 |         shutil.unpack_archive(str(asset_path), d)
 38 | 
 39 |         # Find dataset entries
 40 |         genre_to_dids = defaultdict(list)
 41 |         for p in sorted(d.rglob("*.wav")):
 42 |             genre = p.parts[-2]
 43 |             did = p.stem
 44 |             genre_to_dids[genre].append(did)
 45 | 
 46 |         # Compute metadata
 47 |         for genre, dids in genre_to_dids.items():
 48 |             for did in dids:
 49 |                 uid = compute_checksum(did.encode("utf-8"))
 50 |                 assert uid not in uids
 51 |                 uids.add(uid)
 52 | 
 53 |                 metadata = {
 54 |                     "y": genre,
 55 |                     "extra": {"id": did, "uid": uid},
 56 |                 }
 57 | 
 58 |                 if posthoc_splits:
 59 |                     if did not in did_to_split:
 60 |                         continue
 61 |                     split = did_to_split[did]
 62 |                     metadata["split"] = split
 63 | 
 64 |                 result = (uid, metadata)
 65 |                 if not metadata_only:
 66 |                     audio_path = pathlib.Path(d, "genres", genre, f"{did}.wav")
 67 |                     result += (audio_path,)
 68 |                 yield result
 69 | 
 70 | 
 71 | def iter_gtzan(metadata_only=False):
 72 |     return _iter_gtzan(metadata_only=metadata_only)
 73 | 
 74 | 
 75 | def iter_gtzan_ff(metadata_only=False):
 76 |     return _iter_gtzan(metadata_only=metadata_only, posthoc_splits=True)
 77 | 
 78 | 
 79 | def iter_magnatagatune(metadata_only=False, filter_empty=False):
 80 |     audio_uid_to_uids = defaultdict(list)
 81 |     uid_to_metadata = {}
 82 |     with open(retrieve_and_or_verify_asset("MAGNATAGATUNE_CLIP_METADATA"), "r") as f:
 83 |         for row in csv.DictReader(f, delimiter="\t"):
 84 |             uid = str(int(row["clip_id"])).zfill(5)
 85 |             audio_uid = (row["url"].strip(), row["track_number"].strip())
 86 |             assert all(len(a) > 0 for a in audio_uid)
 87 |             audio_uid_to_uids[audio_uid].append(uid)
 88 |             assert uid not in uid_to_metadata
 89 |             uid_to_metadata[uid] = dict(row)
 90 |     audio_uid_to_uids = {
 91 |         k: sorted(v, key=lambda uid: int(uid_to_metadata[uid]["segmentStart"]))
 92 |         for k, v in audio_uid_to_uids.items()
 93 |     }
 94 | 
 95 |     tag_counts = Counter()
 96 |     uid_to_tags = {}
 97 |     with open(retrieve_and_or_verify_asset("MAGNATAGATUNE_ANNOTATIONS"), "r") as f:
 98 |         for row in csv.DictReader(f, delimiter="\t"):
 99 |             uid = str(int(row["clip_id"])).zfill(5)
100 |             assert uid_to_metadata[uid]["mp3_path"] == row["mp3_path"]
101 |             tags = [
102 |                 k
103 |                 for k, v in row.items()
104 |                 if k not in ["clip_id", "mp3_path"] and v == "1"
105 |             ]
106 |             uid_to_tags[uid] = tags
107 |             for t in tags:
108 |                 tag_counts[t] += 1
109 | 
110 |     top_50_tags = sorted(list(tag_counts.items()), key=lambda x: -x[1])[:50]
111 |     top_50_tags = [t for t, c in top_50_tags]
112 |     top_50_tags = set(top_50_tags)
113 |     assert len(top_50_tags) == 50
114 | 
115 |     with tempfile.TemporaryDirectory() as d:
116 |         if not metadata_only:
117 |             z_path = pathlib.Path(d, "mp3.zip")
118 |             with open(z_path, "wb") as z:
119 |                 for i in range(1, 4):
120 |                     path = retrieve_and_or_verify_asset(
121 |                         f"MAGNATAGATUNE_MP3_{str(i).zfill(3)}"
122 |                     )
123 |                     with open(path, "rb") as f:
124 |                         z.write(f.read())
125 |             shutil.unpack_archive(str(z_path), d)
126 | 
127 |         for uid, metadata in uid_to_metadata.items():
128 |             # Get labels
129 |             tags = uid_to_tags.get(uid)
130 | 
131 |             # Compute clip info
132 |             audio_uid = (metadata["url"].strip(), metadata["track_number"].strip())
133 |             audio_uids = audio_uid_to_uids[audio_uid]
134 |             audio_duration = max(
135 |                 float(uid_to_metadata[clip_uid]["segmentEnd"])
136 |                 for clip_uid in audio_uids
137 |             )
138 | 
139 |             # Find MP3 path
140 |             if uid in ["35644", "55753", "57881"]:
141 |                 # NOTE: These UIDs point to corrupt MP3s
142 |                 mp3_path = ""
143 |             else:
144 |                 mp3_path = metadata["mp3_path"].strip()
145 | 
146 |             # Determine split
147 |             if len(mp3_path) == 0:
148 |                 split = None
149 |             else:
150 |                 folder = mp3_path.split("/")[0]
151 |                 assert len(folder) == 1 and folder in "0123456789abcdef"
152 |                 if folder in "0123456789ab":
153 |                     split = "train"
154 |                 elif folder == "c":
155 |                     split = "valid"
156 |                 else:
157 |                     assert folder in "def"
158 |                     split = "test"
159 | 
160 |             result = (
161 |                 uid,
162 |                 {
163 |                     "split": split,
164 |                     "clip": {
165 |                         "audio_uid": audio_uid,
166 |                         "audio_duration": audio_duration,
167 |                         "clip_idx": audio_uids.index(uid),
168 |                         "clip_offset": float(metadata["segmentStart"]),
169 |                     },
170 |                     "y": None
171 |                     if tags is None
172 |                     else [t for t in tags if t in top_50_tags],
173 |                     "y_all": tags,
174 |                     "extra": metadata,
175 |                 },
176 |             )
177 |             if filter_empty:
178 |                 tags = result[1]["y"]
179 |                 if tags is None or len(tags) == 0:
180 |                     result[1]["split"] = None
181 |             if not metadata_only:
182 |                 if len(mp3_path) == 0:
183 |                     mp3_path = None
184 |                 else:
185 |                     mp3_path = pathlib.Path(d, metadata["mp3_path"])
186 |                     assert mp3_path.stat().st_size > 0
187 |                 result = result + (mp3_path,)
188 |             yield result
189 | 
190 | 
191 | def iter_emomusic(metadata_only=False):
192 |     with tempfile.TemporaryDirectory() as d:
193 |         d = pathlib.Path(d)
194 |         annotations_path = retrieve_and_or_verify_asset("EMOMUSIC_ANNOTATIONS")
195 |         shutil.unpack_archive(str(annotations_path), d)
196 | 
197 |         def parse_minsec(s):
198 |             s = s.split(".")
199 |             t = float(s[0]) * 60
200 |             if len(s) > 1:
201 |                 assert len(s) == 2
202 |                 if len(s[1]) == 1:
203 |                     s[1] += "0"
204 |                 t += float(s[1])
205 |             return t
206 | 
207 |         audio_uids = set()
208 |         uid_to_metadata = {}
209 |         for stem in [
210 |             "songs_info",
211 |             "static_annotations",
212 |             "valence_cont_average",
213 |             "valence_cont_std",
214 |             "arousal_cont_average",
215 |             "arousal_cont_std",
216 |         ]:
217 |             with open(pathlib.Path(d, f"{stem}.csv"), "r") as f:
218 |                 for row in csv.DictReader(f):
219 |                     row = {k: v.strip() for k, v in row.items()}
220 |                     uid = str(int(row["song_id"])).zfill(4)
221 |                     if stem == "songs_info":
222 |                         assert uid not in uid_to_metadata
223 |                         audio_uid = (row["Artist"], row["Song title"])
224 |                         # NOTE: Only one clip per song in this dataset
225 |                         assert audio_uid not in audio_uids
226 |                         audio_uids.add(audio_uid)
227 |                         clip_start = parse_minsec(row["start of the segment (min.sec)"])
228 |                         clip_end = parse_minsec(row["end of the segment (min.sec)"])
229 |                         clip_dur = clip_end - clip_start
230 |                         assert clip_dur == 45.0
231 |                         uid_to_metadata[uid] = {
232 |                             "split": "test"
233 |                             if row["Mediaeval 2013 set"] == "evaluation"
234 |                             else "train",
235 |                             "clip": {
236 |                                 "audio_uid": audio_uid,
237 |                                 "audio_duration": clip_end,
238 |                                 "clip_idx": 0,
239 |                                 "clip_offset": clip_start,
240 |                             },
241 |                             "y": None,
242 |                             "extra": {},
243 |                         }
244 |                     else:
245 |                         assert uid in uid_to_metadata
246 |                     uid_to_metadata[uid]["extra"][stem] = row
247 |                     if stem == "static_annotations":
248 |                         uid_to_metadata[uid]["y"] = [
249 |                             float(row["mean_arousal"]),
250 |                             float(row["mean_valence"]),
251 |                         ]
252 | 
253 |         # Normalize
254 |         arousals = [
255 |             metadata["y"][0]
256 |             for metadata in uid_to_metadata.values()
257 |             if metadata["split"] == "train"
258 |         ]
259 |         valences = [
260 |             metadata["y"][1]
261 |             for metadata in uid_to_metadata.values()
262 |             if metadata["split"] == "train"
263 |         ]
264 |         arousal_mean = np.mean(arousals)
265 |         arousal_std = np.std(arousals)
266 |         valence_mean = np.mean(valences)
267 |         valence_std = np.std(valences)
268 |         for metadata in uid_to_metadata.values():
269 |             metadata["y"] = [
270 |                 (metadata["y"][0] - arousal_mean) / arousal_std,
271 |                 (metadata["y"][1] - valence_mean) / valence_std,
272 |             ]
273 | 
274 |         ratios = ["train"] * 8 + ["valid"] * 2
275 |         for uid, metadata in uid_to_metadata.items():
276 |             if metadata["split"] == "train":
277 |                 artist = metadata["extra"]["songs_info"]["Artist"]
278 |                 artist = "".join(
279 |                     [
280 |                         c
281 |                         for c in artist.lower()
282 |                         if (ord(c) < 128 and (c.isalpha() or c.isspace()))
283 |                     ]
284 |                 )
285 |                 artist = " ".join(artist.split())
286 |                 artist_id = int(
287 |                     compute_checksum(artist.encode("utf-8"), algorithm="sha1"), 16
288 |                 )
289 |                 split = ratios[artist_id % len(ratios)]
290 |                 metadata["split"] = split
291 | 
292 |         if not metadata_only:
293 |             clips_path = retrieve_and_or_verify_asset("EMOMUSIC_CLIPS")
294 |             shutil.unpack_archive(str(clips_path), d)
295 | 
296 |         for uid, metadata in uid_to_metadata.items():
297 |             # Yield result
298 |             result = (uid, metadata)
299 |             if not metadata_only:
300 |                 mp3_path = pathlib.Path(d, "clips_45seconds", f"{int(uid)}.mp3")
301 |                 result = result + (mp3_path,)
302 |             yield result
303 | 
304 | 
305 | def _iter_giantsteps(metadata_only=False, clip_duration=None):
306 |     with tempfile.TemporaryDirectory() as d:
307 |         d = pathlib.Path(d)
308 |         uids = set()
309 |         for split_name in ["train", "test"]:
310 |             if split_name == "train":
311 |                 asset_path = retrieve_and_or_verify_asset(
312 |                     "GIANTSTEPS_MTG_KEY_ANNOTATIONS"
313 |                 )
314 |                 code = pathlib.Path(
315 |                     d,
316 |                     f"giantsteps-mtg-key-dataset-fd7b8c584f7bd6d720d170c325a6d42c9bf75a6b",
317 |                 )
318 |                 mp3_asset_template = "GIANTSTEPS_MTG_KEY_{}"
319 |             else:
320 |                 asset_path = retrieve_and_or_verify_asset("GIANTSTEPS_KEY_ANNOTATIONS")
321 |                 code = pathlib.Path(
322 |                     d,
323 |                     f"giantsteps-key-dataset-c8cb8aad2cb53f165be51ea099d0dc75c64a844f",
324 |                 )
325 |                 mp3_asset_template = "GIANTSTEPS_KEY_{}"
326 | 
327 |             shutil.unpack_archive(str(asset_path), d)
328 | 
329 |             if split_name == "train":
330 |                 did_to_metadata = {}
331 |                 with open(
332 |                     pathlib.Path(code, "annotations", "beatport_metadata.txt"), "r"
333 |                 ) as f:
334 |                     for row in csv.DictReader(f, delimiter="\t"):
335 |                         did_to_metadata[int(row["ID"])] = row
336 | 
337 |                 # NOTE: This seemingly-arbitrary split induces target of 80/20
338 |                 ratios = (["train"] * 100) + (["valid"] * 16)
339 |                 artist_to_split = {}
340 |                 for metadata in did_to_metadata.values():
341 |                     artists = [a.strip() for a in metadata["ARTIST"].strip().split(",")]
342 |                     artist_ids = [
343 |                         int(
344 |                             compute_checksum(
345 |                                 a.lower().encode("utf-8"), algorithm="sha1"
346 |                             ),
347 |                             16,
348 |                         )
349 |                         for a in artists
350 |                     ]
351 |                     artist_splits = [ratios[i % len(ratios)] for i in artist_ids]
352 |                     for artist, split in zip(artists, artist_splits):
353 |                         artist_to_split[artist] = split
354 | 
355 |                 # All collaborators of valid artists are valid (run twice for two-hop)
356 |                 for _ in range(2):
357 |                     for metadata in did_to_metadata.values():
358 |                         artists = [
359 |                             a.strip() for a in metadata["ARTIST"].strip().split(",")
360 |                         ]
361 |                         artist_splits = [artist_to_split[a] for a in artists]
362 |                         if "valid" in artist_splits:
363 |                             for a in artists:
364 |                                 artist_to_split[a] = "valid"
365 | 
366 |                 did_to_annotations = {}
367 |                 with open(
368 |                     pathlib.Path(code, "annotations", "annotations.txt"), "r"
369 |                 ) as f:
370 |                     for row in csv.DictReader(f, delimiter="\t"):
371 |                         did_to_annotations[int(row["ID"])] = row
372 | 
373 |             for path in pathlib.Path(code, "md5").glob("*.md5"):
374 |                 did = int(path.stem.split(".")[0])
375 |                 uid = str(did).zfill(7)
376 |                 assert uid not in uids
377 |                 uids.add(uid)
378 | 
379 |                 extra = {"id": did}
380 |                 if split_name == "train":
381 |                     extra["beatport_metadata"] = did_to_metadata[did]
382 |                     extra["annotations"] = did_to_annotations[did]
383 | 
384 |                 for annotation in [
385 |                     "genre",
386 |                     "key",
387 |                     "jams",
388 |                     "giantsteps.genre",
389 |                     "giantsteps.key",
390 |                 ]:
391 |                     if annotation == "jams" and split_name == "train":
392 |                         continue
393 |                     if "." in annotation:
394 |                         adir, aext = annotation.split(".")
395 |                     else:
396 |                         adir = annotation
397 |                         aext = annotation
398 |                     path = pathlib.Path(code, "annotations", adir, f"{did}.LOFI.{aext}")
399 |                     with open(path, "r") as f:
400 |                         contents = f.read()
401 |                     if annotation == "jams":
402 |                         contents = json.loads(contents)
403 |                     extra[annotation] = contents
404 | 
405 |                 if split_name == "train":
406 |                     # NOTE: Skips low-confidence as in (Korzeniowski and Widmer 2018)
407 |                     if int(extra["annotations"]["C"]) != 2:
408 |                         continue
409 |                     # NOTE: Skips multiple keys as in (Korzeniowski and Widmer 2018)
410 |                     if "/" in extra["annotations"]["MANUAL KEY"]:
411 |                         continue
412 |                     tonic, scale = extra["annotations"]["MANUAL KEY"].split()
413 |                     assert extra["key"].startswith(" ".join((tonic.lower(), scale)))
414 |                     enharmonic = {
415 |                         "C#": "Db",
416 |                         "D#": "Eb",
417 |                         "F#": "Gb",
418 |                         "G#": "Ab",
419 |                         "A#": "Bb",
420 |                     }
421 |                     tonic = enharmonic.get(tonic, tonic)
422 | 
423 |                     artists = [
424 |                         a.strip()
425 |                         for a in extra["beatport_metadata"]["ARTIST"].strip().split(",")
426 |                     ]
427 |                     artist_splits = [artist_to_split[a] for a in artists]
428 |                     assert len(set(artist_splits)) == 1
429 |                     induced_split = artist_splits[0]
430 |                 else:
431 |                     tonic, scale = extra["key"].split()
432 |                     induced_split = "test"
433 |                 y = " ".join((tonic, scale))
434 | 
435 |                 metadata = {"split": induced_split, "y": y, "extra": extra}
436 | 
437 |                 mp3_asset_tag = mp3_asset_template.format(did)
438 | 
439 |                 if clip_duration is not None:
440 |                     mp3_path = retrieve_and_or_verify_asset(mp3_asset_tag)
441 |                     status, stdout, stderr = run_cmd_sync(
442 |                         f"ffprobe -i {mp3_path} -show_entries format=duration",
443 |                         timeout=60,
444 |                     )
445 |                     assert status == 0
446 |                     duration = float(stdout.strip().splitlines()[1].split("=")[1])
447 |                     metadata["clip"] = {
448 |                         "audio_uid": uid,
449 |                         "audio_duration": duration,
450 |                         "clip_idx": None,
451 |                         "clip_offset": None,
452 |                     }
453 |                     for clip_idx, clip_offset in enumerate(
454 |                         np.arange(0, duration, clip_duration)
455 |                     ):
456 |                         this_clip_duration = min(duration - clip_offset, clip_duration)
457 |                         assert (
458 |                             this_clip_duration >= 0
459 |                             and this_clip_duration <= clip_duration
460 |                         )
461 |                         if this_clip_duration < clip_duration:
462 |                             continue
463 |                         clip_uid = f"{uid}-{clip_idx}"
464 |                         clip_metadata = copy.deepcopy(metadata)
465 |                         clip_metadata["clip"]["clip_idx"] = clip_idx
466 |                         clip_metadata["clip"]["clip_offset"] = clip_offset
467 |                         result = (
468 |                             clip_uid,
469 |                             clip_metadata,
470 |                             mp3_path,
471 |                             clip_offset,
472 |                             clip_duration,
473 |                         )
474 |                         yield result[: 2 if metadata_only else 5]
475 |                 else:
476 |                     result = (uid, metadata)
477 |                     if not metadata_only:
478 |                         mp3_path = retrieve_and_or_verify_asset(mp3_asset_tag)
479 |                         result += (mp3_path,)
480 |                     yield result
481 | 
482 | 
483 | def iter_giantsteps(metadata_only=False):
484 |     return _iter_giantsteps(metadata_only=metadata_only, clip_duration=None)
485 | 
486 | 
487 | def iter_giantsteps_clips(metadata_only=False):
488 |     return _iter_giantsteps(metadata_only=metadata_only, clip_duration=30.0)
489 | 
490 | 
491 | def iter_debug(metadata_only=False):
492 |     yield ("debug", {"y": "guitar"}, retrieve_and_or_verify_asset("DEBUG_MP3"))
493 | 
494 | 
495 | def write_dataset_json(dataset, path):
496 |     path = pathlib.Path(path)
497 |     if path.suffix == ".gz":
498 |         open_fn = lambda: gzip.open(path, "wt", encoding="utf-8")
499 |     else:
500 |         open_fn = lambda: open(path, "w")
501 |     with open_fn() as f:
502 |         json.dump(dataset, f, indent=2, sort_keys=True)
503 | 
504 | 
505 | def read_dataset_json(path):
506 |     path = pathlib.Path(path)
507 |     if path.suffix == ".gz":
508 |         open_fn = lambda: gzip.open(path, "rt", encoding="utf-8")
509 |     else:
510 |         open_fn = lambda: open(path, "r")
511 |     with open_fn() as f:
512 |         return json.load(f)
513 | 
514 | 
515 | def cache_dataset(dataset_iter, out_dir, progress_bar=True):
516 |     out_dir = pathlib.Path(out_dir)
517 |     out_dir.mkdir(parents=True, exist_ok=True)
518 |     metadata_path = pathlib.Path(out_dir, "meta.json")
519 |     audio_dir = pathlib.Path(out_dir, "wav")
520 | 
521 |     all_metadata = {}
522 |     _tqdm = tqdm if progress_bar else lambda x: x
523 |     for example in _tqdm(dataset_iter):
524 |         if len(example) == 2:
525 |             uid, metadata = example
526 |         elif len(example) >= 3:
527 |             if len(example) == 3:
528 |                 uid, metadata, src_audio_path = example
529 |                 clip_offset = None
530 |             elif len(example) == 5:
531 |                 uid, metadata, src_audio_path, clip_offset, clip_duration = example
532 |             else:
533 |                 raise ValueError("Bad iterator")
534 |             if src_audio_path is not None:
535 |                 dest_audio_path = pathlib.Path(audio_dir, f"{uid}.wav")
536 |                 dest_audio_path.parent.mkdir(exist_ok=True)
537 |                 clip_args = (
538 |                     ""
539 |                     if clip_offset is None
540 |                     else f"-ss {clip_offset} -t {clip_duration}"
541 |                 )
542 |                 status, stdout, stderr = run_cmd_sync(
543 |                     f"ffmpeg -y -i {src_audio_path} {clip_args} -ac 1 -bitexact {dest_audio_path}",
544 |                     timeout=60,
545 |                 )
546 |                 try:
547 |                     sr, audio = wavread(dest_audio_path)
548 |                     assert audio.ndim == 1
549 |                     assert audio.shape[0] > 0
550 |                     if "clip" in metadata:
551 |                         metadata["clip"]["clip_duration"] = audio.shape[0] / sr
552 |                 except:
553 |                     raise Exception(f"Could not convert source audio to wav:\n{stderr}")
554 |         else:
555 |             raise ValueError("Bad iterator")
556 | 
557 |         if uid in all_metadata:
558 |             raise ValueError("Duplicate UID in interator")
559 |         all_metadata[uid] = metadata
560 | 
561 |     write_dataset_json(all_metadata, metadata_path)
562 | 


--------------------------------------------------------------------------------
/jukemir/datasets/cache.py:
--------------------------------------------------------------------------------
 1 | if __name__ == "__main__":
 2 |     import logging
 3 |     import pathlib
 4 |     import sys
 5 | 
 6 |     from .. import CACHE_DATASETS_DIR
 7 |     from . import (
 8 |         cache_dataset,
 9 |         iter_debug,
10 |         iter_emomusic,
11 |         iter_giantsteps_clips,
12 |         iter_gtzan_ff,
13 |         iter_magnatagatune,
14 |     )
15 | 
16 |     logging.basicConfig(level=logging.INFO)
17 | 
18 |     dataset = sys.argv[1]
19 |     dataset_dir = pathlib.Path(CACHE_DATASETS_DIR, dataset)
20 |     logging.info(f"Caching {dataset} to {dataset_dir}")
21 |     cache_dataset(
22 |         eval(f"iter_{dataset}")(metadata_only="audio" not in sys.argv), dataset_dir
23 |     )
24 | 


--------------------------------------------------------------------------------
/jukemir/datasets/test.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import pathlib
  3 | import tempfile
  4 | import unittest
  5 | from collections import Counter, defaultdict
  6 | 
  7 | import numpy as np
  8 | from scipy.io.wavfile import write as wavwrite
  9 | 
 10 | from . import (
 11 |     cache_dataset,
 12 |     iter_emomusic,
 13 |     iter_giantsteps,
 14 |     iter_giantsteps_clips,
 15 |     iter_gtzan,
 16 |     iter_gtzan_ff,
 17 |     iter_magnatagatune,
 18 |     read_dataset_json,
 19 |     write_dataset_json,
 20 | )
 21 | 
 22 | 
 23 | class Test(unittest.TestCase):
 24 |     def test_io(self):
 25 |         d = {"foo": "bar" * 1000}
 26 | 
 27 |         with tempfile.NamedTemporaryFile() as f:
 28 |             write_dataset_json(d, f.name)
 29 |             self.assertEqual(os.path.getsize(f.name), 3015)
 30 |             d_hat = read_dataset_json(f.name)
 31 |             self.assertEqual(d, d_hat)
 32 | 
 33 |         with tempfile.NamedTemporaryFile(suffix=".gz") as f:
 34 |             write_dataset_json(d, f.name)
 35 |             self.assertEqual(os.path.getsize(f.name), 76)
 36 |             d_hat = read_dataset_json(f.name)
 37 |             self.assertEqual(d, d_hat)
 38 | 
 39 |     def test_iter_gtzan(self):
 40 |         d = {uid: metadata for uid, metadata in iter_gtzan(metadata_only=True)}
 41 |         self.assertEqual(len(d), 1000)
 42 |         label_counts = Counter()
 43 |         for uid, attrs in d.items():
 44 |             label_counts[attrs["y"]] += 1
 45 |         self.assertEqual(len(label_counts), 10)
 46 |         self.assertTrue(all(v == 100 for v in label_counts.values()))
 47 | 
 48 |     def test_iter_gtzan_ff(self):
 49 |         d = {uid: metadata for uid, metadata in iter_gtzan_ff(metadata_only=True)}
 50 |         self.assertEqual(len(d), 930)
 51 |         label_counts = Counter()
 52 |         split_counts = Counter()
 53 |         for uid, attrs in d.items():
 54 |             label_counts[attrs["y"]] += 1
 55 |             split_counts[attrs["split"]] += 1
 56 |         self.assertEqual(len(split_counts), 3)
 57 |         self.assertEqual(split_counts["train"], 443)
 58 |         self.assertEqual(split_counts["valid"], 197)
 59 |         self.assertEqual(split_counts["test"], 290)
 60 |         self.assertEqual(len(label_counts), 10)
 61 |         self.assertTrue(all(v >= 84 for v in label_counts.values()))
 62 | 
 63 |     def test_iter_magnatagatune(self):
 64 |         d = {uid: metadata for uid, metadata in iter_magnatagatune(metadata_only=True)}
 65 |         self.assertEqual(len(d), 31382)
 66 |         tag_counts_top50 = Counter()
 67 |         tag_counts = Counter()
 68 |         clip_idx_counts = Counter()
 69 |         split_counts = Counter()
 70 |         for uid, attrs in d.items():
 71 |             split_counts[attrs["split"]] += 1
 72 |             clip_idx_counts[attrs["clip"]["clip_idx"]] += 1
 73 |             if attrs["y"] is not None:
 74 |                 for tag in attrs["y"]:
 75 |                     tag_counts_top50[tag] += 1
 76 |                 for tag in attrs["y_all"]:
 77 |                     tag_counts[tag] += 1
 78 |         self.assertEqual(split_counts[None], 5522)
 79 |         self.assertEqual(split_counts["train"], 18706)
 80 |         self.assertEqual(split_counts["valid"], 1825)
 81 |         self.assertEqual(split_counts["test"], 5329)
 82 |         self.assertEqual(clip_idx_counts[0], 6670)
 83 |         self.assertEqual(clip_idx_counts[74], 1)
 84 |         self.assertEqual(len(tag_counts_top50), 50)
 85 |         self.assertEqual(len(tag_counts), 188)
 86 |         # NOTE: List from https://github.com/keunwoochoi/magnatagatune-list
 87 |         keunwoo_list = """guitar, classical, slow, techno, strings, drums, electronic, rock, fast, piano, ambient, beat, violin, vocal, synth, female, indian, opera, male, singing, vocals, no vocals, harpsichord, loud, quiet, flute, woman, male vocal, no vocal, pop, soft, sitar, solo, man, classic, choir, voice, new age, dance, male voice, female vocal, beats, harp, cello, no voice, weird, country, metal, female voice, choral"""
 88 |         self.assertEqual(
 89 |             frozenset(tag_counts_top50.keys()),
 90 |             frozenset(keunwoo_list.strip().split(", ")),
 91 |         )
 92 |         self.assertTrue(all(v >= 23 for v in tag_counts.values()))
 93 |         self.assertTrue(all(v >= 490 for v in tag_counts_top50.values()))
 94 | 
 95 |     def test_iter_emomusic(self):
 96 |         d = {uid: metadata for uid, metadata in iter_emomusic(metadata_only=True)}
 97 |         self.assertEqual(len(d), 744)
 98 |         arousals = []
 99 |         valences = []
100 |         split_counts = Counter()
101 |         for uid, attrs in d.items():
102 |             split_counts[attrs["split"]] += 1
103 |             arousal, valence = attrs["y"]
104 |             arousals.append(arousal)
105 |             valences.append(valence)
106 |         self.assertEqual(split_counts["train"], 504)
107 |         self.assertEqual(split_counts["valid"], 115)
108 |         self.assertEqual(split_counts["test"], 125)
109 |         self.assertAlmostEqual(np.min(arousals), -2.277, places=3)
110 |         self.assertAlmostEqual(np.max(arousals), 2.629, places=3)
111 |         self.assertAlmostEqual(np.mean(arousals), 0.022, places=3)
112 |         self.assertAlmostEqual(np.std(arousals), 0.992, places=3)
113 |         self.assertAlmostEqual(np.min(valences), -2.689, places=3)
114 |         self.assertAlmostEqual(np.max(valences), 2.492, places=3)
115 |         self.assertAlmostEqual(np.mean(valences), 0.026, places=3)
116 |         self.assertAlmostEqual(np.std(valences), 0.992, places=3)
117 |         """
118 |         # Before normalization
119 |         self.assertAlmostEqual(np.min(valences), 1.6, places=3)
120 |         self.assertAlmostEqual(np.max(valences), 8.1, places=3)
121 |         self.assertAlmostEqual(np.mean(valences), 5.006, places=3)
122 |         self.assertAlmostEqual(np.std(valences), 1.245, places=3)
123 |         self.assertAlmostEqual(np.min(arousals), 1.6, places=3)
124 |         self.assertAlmostEqual(np.max(arousals), 8.4, places=3)
125 |         self.assertAlmostEqual(np.mean(arousals), 4.786, places=3)
126 |         self.assertAlmostEqual(np.std(arousals), 1.375, places=3)
127 |         """
128 | 
129 |     def test_iter_giantsteps(self):
130 |         d = {uid: metadata for uid, metadata in iter_giantsteps(metadata_only=True)}
131 |         # NOTE: (Korzeniowski and Widmer 2018) says this should be 604+1077=1681
132 |         self.assertEqual(len(d), 1763)
133 |         split_key_counts = defaultdict(Counter)
134 |         key_counts = Counter()
135 |         tonic_counts = Counter()
136 |         scale_counts = Counter()
137 |         for uid, attrs in d.items():
138 |             split_key_counts[attrs["split"]][attrs["y"]] += 1
139 |             key_counts[attrs["y"]] += 1
140 |             tonic, scale = attrs["y"].split()
141 |             tonic_counts[tonic] += 1
142 |             scale_counts[scale] += 1
143 |         self.assertEqual(len(split_key_counts["train"]), 24)
144 |         self.assertEqual(len(split_key_counts["test"]), 24)
145 |         self.assertEqual(sum(split_key_counts["train"].values()), 923)
146 |         self.assertEqual(sum(split_key_counts["valid"].values()), 236)
147 |         self.assertEqual(sum(split_key_counts["test"].values()), 604)
148 |         self.assertEqual(len(key_counts), 24)
149 |         self.assertEqual(key_counts["F minor"], 176)
150 |         self.assertEqual(key_counts["B major"], 27)
151 |         self.assertEqual(len(scale_counts), 2)
152 |         self.assertEqual(scale_counts["minor"], 1293)
153 |         self.assertEqual(scale_counts["major"], 470)
154 |         self.assertEqual(len(tonic_counts), 12)
155 |         self.assertEqual(tonic_counts["F"], 214)
156 |         self.assertEqual(tonic_counts["Db"], 140)
157 | 
158 |     def test_iter_giantsteps_clips(self):
159 |         d = {
160 |             uid: metadata for uid, metadata in iter_giantsteps_clips(metadata_only=True)
161 |         }
162 |         self.assertEqual(len(d), 7035)
163 | 
164 |     def test_cache_dataset(self):
165 |         def iter_fake_dataset(metadata_only):
166 |             for v in [0, 1]:
167 |                 uid = v
168 |                 metadata = {"y": v}
169 |                 if metadata_only:
170 |                     yield uid, metadata
171 |                 else:
172 |                     with tempfile.NamedTemporaryFile() as f:
173 |                         wavwrite(
174 |                             f.name, 44100, np.full((44100, 2), v, dtype=np.float32)
175 |                         )
176 |                         yield uid, metadata, f.name
177 | 
178 |         for metadata_only in [False, True]:
179 |             with tempfile.TemporaryDirectory() as d:
180 |                 cache_dataset(iter_fake_dataset(metadata_only), d, progress_bar=False)
181 |                 self.assertEqual(
182 |                     len(read_dataset_json(pathlib.Path(d, "meta.json"))), 2
183 |                 )
184 |                 if not metadata_only:
185 |                     self.assertTrue(pathlib.Path(d, "wav", "0.wav").is_file())
186 |                     self.assertTrue(pathlib.Path(d, "wav", "1.wav").is_file())
187 | 


--------------------------------------------------------------------------------
/jukemir/probe/__init__.py:
--------------------------------------------------------------------------------
  1 | import itertools
  2 | import json
  3 | import logging
  4 | import math
  5 | import pathlib
  6 | import pickle
  7 | import random
  8 | import tempfile
  9 | from collections import OrderedDict, defaultdict
 10 | 
 11 | import mir_eval
 12 | import numpy as np
 13 | import torch
 14 | import torch.nn as nn
 15 | import torch.nn.functional as F
 16 | from scipy.stats import mode as scipy_mode
 17 | from sklearn.metrics import average_precision_score, r2_score, roc_auc_score
 18 | from sklearn.preprocessing import StandardScaler
 19 | 
 20 | from .. import CACHE_DATASETS_DIR, CACHE_PROBES_DIR, CACHE_REPRESENTATIONS_DIR
 21 | from ..utils import compute_checksum
 22 | 
 23 | DATASET_TO_ATTRS = {
 24 |     "test": {
 25 |         "num_outputs": 4,
 26 |         "output_type": "multiclass",
 27 |         "labels": ["0", "1", "2", "3"],
 28 |     },
 29 |     "gtzan_ff": {
 30 |         "num_outputs": 10,
 31 |         "output_type": "multiclass",
 32 |         "labels": """blues, classical, country, disco, hiphop, jazz, metal, pop, reggae, rock""".split(
 33 |             ", "
 34 |         ),
 35 |     },
 36 |     "giantsteps_clips": {
 37 |         "num_outputs": 24,
 38 |         "output_type": "multiclass",
 39 |         "labels": """C major, Db major, D major, Eb major, E major, F major, Gb major, G major, Ab major, A major, Bb major, B major, C minor, Db minor, D minor, Eb minor, E minor, F minor, Gb minor, G minor, Ab minor, A minor, Bb minor, B minor""".split(
 40 |             ", "
 41 |         ),
 42 |     },
 43 |     "magnatagatune": {
 44 |         "num_outputs": 50,
 45 |         "output_type": "multilabel",
 46 |         "labels": """guitar, classical, slow, techno, strings, drums, electronic, rock, fast, piano, ambient, beat, violin, vocal, synth, female, indian, opera, male, singing, vocals, no vocals, harpsichord, loud, quiet, flute, woman, male vocal, no vocal, pop, soft, sitar, solo, man, classic, choir, voice, new age, dance, male voice, female vocal, beats, harp, cello, no voice, weird, country, metal, female voice, choral""".split(
 47 |             ", "
 48 |         ),
 49 |     },
 50 |     "emomusic": {
 51 |         "num_outputs": 2,
 52 |         "output_type": "regression",
 53 |         "labels": None,
 54 |     },
 55 | }
 56 | 
 57 | 
 58 | class SimpleMLP(nn.Module):
 59 |     def __init__(
 60 |         self,
 61 |         num_features,
 62 |         hidden_layer_sizes,
 63 |         num_outputs,
 64 |         dropout_input=True,
 65 |         dropout_p=0.5,
 66 |     ):
 67 |         super().__init__()
 68 |         d = num_features
 69 |         self.num_layers = len(hidden_layer_sizes)
 70 |         for i, ld in enumerate(hidden_layer_sizes):
 71 |             setattr(self, f"hidden_{i}", nn.Linear(d, ld))
 72 |             d = ld
 73 |         self.output = nn.Linear(d, num_outputs)
 74 |         self.dropout = nn.Dropout(p=dropout_p)
 75 | 
 76 |     def forward(self, x):
 77 |         x = self.dropout(x)
 78 |         for i in range(self.num_layers):
 79 |             x = getattr(self, f"hidden_{i}")(x)
 80 |             x = F.relu(x)
 81 |             x = self.dropout(x)
 82 |         return self.output(x)
 83 | 
 84 | 
 85 | class ProbeExperimentConfig(dict):
 86 |     _DEFAULTS = {
 87 |         "dataset": None,
 88 |         "representation": None,
 89 |         "data_standardization": True,
 90 |         "hidden_layer_sizes": [],
 91 |         "batch_size": 64,
 92 |         "learning_rate": 1e-3,
 93 |         "dropout_input": True,
 94 |         "dropout_p": 0.5,
 95 |         "l2_weight_decay": None,
 96 |         "max_num_epochs": None,
 97 |         "early_stopping_metric": "primary",
 98 |         "early_stopping": True,
 99 |         "early_stopping_eval_frequency": 8,
100 |         "early_stopping_boredom": 256,
101 |         "seed": 0,
102 |     }
103 |     _REQUIRED = ["dataset", "representation"]
104 | 
105 |     def __init__(self, *args, **kwargs):
106 |         kwargs = dict(*args, **kwargs)
107 |         for field in self._REQUIRED:
108 |             if field not in kwargs:
109 |                 raise ValueError(f"Required field {field} missing")
110 |         for field in kwargs.keys():
111 |             if field not in self._DEFAULTS:
112 |                 raise ValueError(f"Unknown field {field} specified")
113 | 
114 |         for field, value in self._DEFAULTS.items():
115 |             if field in kwargs:
116 |                 value = kwargs[field]
117 |             self[field] = value
118 | 
119 |         try:
120 |             json.dumps(self)
121 |         except:
122 |             raise ValueError("All values must be JSON-serializable")
123 | 
124 |     def uid(self):
125 |         return compute_checksum(
126 |             json.dumps(self, indent=2, sort_keys=True).encode("utf-8"), algorithm="sha1"
127 |         )
128 | 
129 | 
130 | class ProbeExperiment:
131 |     def __init__(
132 |         self,
133 |         cfg,
134 |         pretrained_scaler=None,
135 |         pretrained_probe=None,
136 |         summarize_frequency=8,
137 |         datasets_root_dir=None,
138 |         representations_root_dir=None,
139 |     ):
140 |         if not cfg["early_stopping"] and cfg["max_num_epochs"] is None:
141 |             raise ValueError("No termination criteria specified")
142 |         if datasets_root_dir is None:
143 |             datasets_root_dir = CACHE_DATASETS_DIR
144 |         if representations_root_dir is None:
145 |             representations_root_dir = CACHE_REPRESENTATIONS_DIR
146 | 
147 |         self.cfg = cfg
148 |         self.scaler = pretrained_scaler
149 |         self.probe = pretrained_probe
150 |         self.summarize_frequency = summarize_frequency
151 |         self.datasets_root_dir = datasets_root_dir
152 |         self.representations_root_dir = representations_root_dir
153 |         self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
154 |         if self.probe is not None:
155 |             self.probe.to(self.device)
156 | 
157 |     def load_data(self):
158 |         # Load data from disk
159 |         logging.info("Loading data")
160 |         with open(
161 |             pathlib.Path(self.datasets_root_dir, self.cfg["dataset"], "meta.json"), "r"
162 |         ) as f:
163 |             data = json.load(f)
164 |         for uid in data.keys():
165 |             if data[uid]["split"] not in ["train", "valid", "test"]:
166 |                 continue
167 |             data[uid]["x"] = np.load(
168 |                 pathlib.Path(
169 |                     self.representations_root_dir,
170 |                     self.cfg["dataset"],
171 |                     self.cfg["representation"],
172 |                     f"{uid}.npy",
173 |                 )
174 |             )
175 | 
176 |         # Organize data
177 |         self.split_to_uids = {"train": [], "valid": [], "test": []}
178 |         self.split_to_X = {}
179 |         self.split_to_y = {}
180 |         for uid, attrs in data.items():
181 |             if data[uid]["split"] not in ["train", "valid", "test"]:
182 |                 continue
183 |             self.split_to_uids[attrs["split"]].append(uid)
184 |         self.split_to_uids = {k: sorted(v) for k, v in self.split_to_uids.items()}
185 |         for split in ["train", "valid", "test"]:
186 |             if self.cfg["seed"] is not None:
187 |                 random.seed(self.cfg["seed"])
188 |             random.shuffle(self.split_to_uids[split])
189 |         for split, uids in self.split_to_uids.items():
190 |             if len(uids) == 0:
191 |                 raise Exception("Empty split")
192 |             X = np.array([data[uid]["x"] for uid in uids], dtype=np.float32)
193 |             y = [data[uid]["y"] for uid in uids]
194 |             self.split_to_X[split] = X
195 |             self.split_to_y[split] = y
196 | 
197 |         if len(set(X.shape[1] for X in self.split_to_X.values())) != 1:
198 |             raise Exception()
199 | 
200 |     def raw_labels_to_targets(self, y):
201 |         output_type = DATASET_TO_ATTRS[self.cfg["dataset"]]["output_type"]
202 | 
203 |         if output_type != "regression":
204 |             id_to_label = DATASET_TO_ATTRS[self.cfg["dataset"]]["labels"]
205 |             label_to_id = {v: k for k, v in enumerate(id_to_label)}
206 |             assert len(id_to_label) == len(label_to_id)
207 | 
208 |         num_outputs = DATASET_TO_ATTRS[self.cfg["dataset"]]["num_outputs"]
209 |         if output_type == "multiclass":
210 |             targets = np.array([label_to_id[yi] for yi in y], dtype=np.int64)
211 |         elif output_type == "multilabel":
212 |             targets = np.zeros((len(y), num_outputs), dtype=np.uint8)
213 |             for i, tags in enumerate(y):
214 |                 for t in tags:
215 |                     targets[i, label_to_id[t]] = 1
216 |         elif output_type == "regression":
217 |             targets = np.array(y, dtype=np.float32)
218 |         else:
219 |             raise NotImplementedError()
220 | 
221 |         return targets
222 | 
223 |     def compute_loss(self, logits, y):
224 |         output_type = DATASET_TO_ATTRS[self.cfg["dataset"]]["output_type"]
225 |         if output_type == "multiclass":
226 |             loss = F.cross_entropy(logits, y, reduction="mean")
227 |         elif output_type == "multilabel":
228 |             loss = F.binary_cross_entropy_with_logits(
229 |                 logits, y.float(), reduction="mean"
230 |             )
231 |         elif output_type == "regression":
232 |             # TODO: Optimize R2 directly? https://stackoverflow.com/questions/65840698/how-to-make-r2-score-in-nn-lstm-pytorch
233 |             loss = F.mse_loss(logits, y, reduction="mean")
234 |         else:
235 |             raise NotImplementedError()
236 | 
237 |         return loss
238 | 
239 |     def train(self, wandb=False):
240 |         if wandb:
241 |             import wandb as wandb_lib
242 | 
243 |             wandb_lib.init(
244 |                 project="jukemir",
245 |                 name=f"{self.cfg['dataset']}-{self.cfg['representation']}-{self.cfg.uid()}",
246 |                 reinit=True,
247 |             )
248 |             wandb_lib.config.update(self.cfg)
249 | 
250 |         # Set seed
251 |         if self.cfg["seed"] is not None:
252 |             random.seed(self.cfg["seed"])
253 |             torch.manual_seed(self.cfg["seed"])
254 |             torch.cuda.manual_seed_all(self.cfg["seed"])
255 | 
256 |         # Create model
257 |         logging.info("Creating model")
258 |         self.probe = SimpleMLP(
259 |             self.split_to_X["train"].shape[1],
260 |             self.cfg["hidden_layer_sizes"],
261 |             DATASET_TO_ATTRS[self.cfg["dataset"]]["num_outputs"],
262 |             dropout_input=self.cfg["dropout_input"],
263 |             dropout_p=self.cfg["dropout_p"],
264 |         )
265 |         self.probe.to(self.device)
266 |         self.probe.train()
267 | 
268 |         # Create optimizer
269 |         optimizer = torch.optim.Adam(
270 |             self.probe.parameters(),
271 |             lr=self.cfg["learning_rate"],
272 |             weight_decay=0
273 |             if self.cfg["l2_weight_decay"] is None
274 |             else self.cfg["l2_weight_decay"],
275 |         )
276 | 
277 |         # Retrieve dataset
278 |         X_train = self.split_to_X["train"]
279 |         y_train = self.raw_labels_to_targets(self.split_to_y["train"])
280 | 
281 |         # Fit scaler
282 |         self.scaler = StandardScaler(
283 |             with_mean=self.cfg["data_standardization"],
284 |             with_std=self.cfg["data_standardization"],
285 |         )
286 |         self.scaler.fit(X_train)
287 | 
288 |         # Train model
289 |         step = 0
290 |         early_stopping_best_score = float("-inf")
291 |         early_stopping_boredom = 0
292 |         early_stopping_state_dict = None
293 |         while True:
294 |             # Check if exceeded max num epochs
295 |             epoch = (step * self.cfg["batch_size"]) / X_train.shape[0]
296 |             if (
297 |                 self.cfg["max_num_epochs"] is not None
298 |                 and epoch > self.cfg["max_num_epochs"]
299 |             ):
300 |                 break
301 | 
302 |             # Evaluate for early stopping
303 |             if (
304 |                 self.cfg["early_stopping"]
305 |                 and step % self.cfg["early_stopping_eval_frequency"] == 0
306 |             ):
307 |                 if early_stopping_boredom >= self.cfg["early_stopping_boredom"]:
308 |                     if early_stopping_state_dict is not None:
309 |                         self.probe.load_state_dict(early_stopping_state_dict)
310 |                     break
311 |                 with torch.no_grad():
312 |                     self.probe.eval()
313 |                     metrics = self.eval("valid")
314 |                     if self.cfg["early_stopping_metric"].startswith("-"):
315 |                         score = -1 * metrics[self.cfg["early_stopping_metric"][1:]]
316 |                     else:
317 |                         score = metrics[self.cfg["early_stopping_metric"]]
318 |                     self.probe.train()
319 |                     logging.info(f"eval,{step},{score}")
320 |                     if wandb:
321 |                         metrics.update(
322 |                             {
323 |                                 "epoch": epoch,
324 |                                 "early_stopping_score": score,
325 |                                 "early_stopping_best_score": early_stopping_best_score,
326 |                                 "early_stopping_boredom": early_stopping_boredom,
327 |                             }
328 |                         )
329 |                         wandb_lib.log(metrics, step=step)
330 |                     if math.isnan(score):
331 |                         raise Exception("NaN score")
332 |                     if score > early_stopping_best_score:
333 |                         early_stopping_best_score = score
334 |                         early_stopping_boredom = 0
335 |                         # NOTE: This is just an ignorant way to copy the state dict
336 |                         # TODO: Reduce ignorance?
337 |                         with tempfile.NamedTemporaryFile(suffix=".pt") as f:
338 |                             torch.save(self.probe.state_dict(), f.name)
339 |                             early_stopping_state_dict = torch.load(f.name)
340 |                     else:
341 |                         early_stopping_boredom += 1
342 | 
343 |             # Create batch
344 |             idxs = random.sample(
345 |                 list(range(X_train.shape[0])),
346 |                 min(self.cfg["batch_size"], X_train.shape[0]),
347 |             )
348 |             X, y = X_train[idxs], y_train[idxs]
349 |             X = self.scaler.transform(X)
350 |             X = torch.tensor(X, dtype=torch.float32, device=self.device)
351 |             y = torch.tensor(y, device=self.device)
352 | 
353 |             # Update
354 |             optimizer.zero_grad()
355 |             loss = self.compute_loss(self.probe(X), y)
356 |             loss.backward()
357 |             optimizer.step()
358 |             step += 1
359 | 
360 |             # Summarize
361 |             if step % self.summarize_frequency == 0:
362 |                 loss = loss.item()
363 |                 logging.debug(f"train,{step},{loss}")
364 |                 if wandb:
365 |                     wandb_lib.log({"train_loss": loss}, step=step)
366 | 
367 |     def eval_logits(self, X):
368 |         X = self.scaler.transform(X)
369 |         with torch.no_grad():
370 |             self.probe.eval()
371 |             logits = []
372 |             for i in range(0, X.shape[0], self.cfg["batch_size"]):
373 |                 X_batch = torch.tensor(
374 |                     X[i : i + self.cfg["batch_size"]],
375 |                     dtype=torch.float32,
376 |                     device=self.device,
377 |                 )
378 |                 logits.append(self.probe(X_batch))
379 |             logits = torch.cat(logits, dim=0)
380 |         return logits
381 | 
382 |     def eval(self, uids_or_split_name, X=None, y=None):
383 |         if type(uids_or_split_name) == str:
384 |             split_name = uids_or_split_name
385 |             uids = self.split_to_uids[split_name]
386 |             X = self.split_to_X[split_name]
387 |             y = self.split_to_y[split_name]
388 |         else:
389 |             uids = uids_or_split_name
390 |         y = self.raw_labels_to_targets(y)
391 | 
392 |         metrics = {}
393 |         primary_metric_name = None
394 | 
395 |         # Compute logits / task-specific loss
396 |         with torch.no_grad():
397 |             self.probe.eval()
398 |             logits = self.eval_logits(X)
399 |             y_tensor = torch.tensor(y, device=self.device)
400 |             metrics["loss"] = self.compute_loss(logits, y_tensor).item()
401 |             logits = logits.cpu().numpy()
402 | 
403 |         # Copute task-specific metrics
404 |         if self.cfg["dataset"] in ["test", "gtzan_ff"]:
405 |             primary_metric_name = "accuracy"
406 |             y_preds = np.argmax(logits, axis=1)
407 |             y_correct = y_preds == y
408 |             metrics["accuracy"] = y_correct.astype(np.float32).mean()
409 |         elif self.cfg["dataset"] == "giantsteps_clips":
410 |             primary_metric_name = "score"
411 | 
412 |             # Get clip probabilities
413 |             clip_logits = logits
414 |             clip_labels = y
415 |             clip_preds = np.argmax(logits, axis=1)
416 |             with torch.no_grad():
417 |                 clip_probs = (
418 |                     F.softmax(torch.tensor(logits, device=self.device), dim=-1)
419 |                     .cpu()
420 |                     .numpy()
421 |                 )
422 | 
423 |             # Aggregate songs
424 |             song_uid_to_clip_idxs = defaultdict(list)
425 |             song_uid_to_label = {}
426 |             for clip_idx, (clip_uid, label) in enumerate(zip(uids, y)):
427 |                 song_uid, _ = clip_uid.split("-")
428 |                 song_uid_to_clip_idxs[song_uid].append(clip_idx)
429 |                 if song_uid in song_uid_to_label:
430 |                     assert song_uid_to_label[song_uid] == label
431 |                 song_uid_to_label[song_uid] = label
432 |             song_uids = sorted(song_uid_to_clip_idxs.keys())
433 |             song_labels = np.array(
434 |                 [song_uid_to_label[song_uid] for song_uid in song_uids]
435 |             )
436 | 
437 |             # Ensemble predictions
438 |             ensemble_strategy_to_song_preds = defaultdict(list)
439 |             for song_uid in song_uids:
440 |                 clip_idxs = song_uid_to_clip_idxs[song_uid]
441 | 
442 |                 song_clip_logits = clip_logits[clip_idxs]
443 |                 song_clip_preds = clip_preds[clip_idxs]
444 |                 song_clip_probs = clip_probs[clip_idxs]
445 |                 ensemble_strategy_to_song_preds["vote"].append(
446 |                     scipy_mode(song_clip_preds).mode[0]
447 |                 )
448 |                 ensemble_strategy_to_song_preds["max"].append(
449 |                     song_clip_logits.max(axis=0).argmax()
450 |                 )
451 |                 ensemble_strategy_to_song_preds["gmean"].append(
452 |                     song_clip_logits.mean(axis=0).argmax()
453 |                 )
454 |                 ensemble_strategy_to_song_preds["mean"].append(
455 |                     song_clip_probs.mean(axis=0).argmax()
456 |                 )
457 | 
458 |             def _compute_accuracy_and_scores(preds, labels):
459 |                 id_to_label = DATASET_TO_ATTRS["giantsteps_clips"]["labels"]
460 |                 assert preds.shape == labels.shape
461 |                 correct = preds == labels
462 |                 accuracy = correct.astype(np.float32).mean()
463 |                 scores = [
464 |                     mir_eval.key.weighted_score(
465 |                         id_to_label[ref_key], id_to_label[est_key]
466 |                     )
467 |                     for ref_key, est_key in zip(labels, preds)
468 |                 ]
469 |                 return accuracy, np.mean(scores)
470 | 
471 |             # Compute all metrics
472 |             comparisons = [
473 |                 (
474 |                     "clip",
475 |                     np.argmax(clip_probs, axis=1),
476 |                     clip_labels,
477 |                 )
478 |             ]
479 |             comparisons += [
480 |                 (f"ensemble_{strategy_name}", np.array(strategy_preds), song_labels)
481 |                 for strategy_name, strategy_preds in ensemble_strategy_to_song_preds.items()
482 |             ]
483 |             for prefix, preds, labels in comparisons:
484 |                 accuracy, score = _compute_accuracy_and_scores(preds, labels)
485 |                 metrics[f"{prefix}_accuracy"] = accuracy
486 |                 metrics[f"{prefix}_score"] = score
487 | 
488 |             # Find best ensemble strategy
489 |             if uids_or_split_name == "valid":
490 |                 validation_metrics = metrics
491 |             else:
492 |                 validation_metrics = self.eval("valid")
493 |             best_strategy_name = None
494 |             best_score = float("-inf")
495 |             for strategy_name in ensemble_strategy_to_song_preds.keys():
496 |                 score = validation_metrics[f"ensemble_{strategy_name}_score"]
497 |                 if score > best_score:
498 |                     best_strategy_name = strategy_name
499 |                     best_score = score
500 |             metrics[f"accuracy"] = metrics[f"ensemble_{best_strategy_name}_accuracy"]
501 |             metrics[f"score"] = metrics[f"ensemble_{best_strategy_name}_score"]
502 | 
503 |         elif self.cfg["dataset"] == "magnatagatune":
504 |             primary_metric_name = "auc_roc"
505 |             with torch.no_grad():
506 |                 y_probs = (
507 |                     torch.sigmoid(torch.tensor(logits, device=self.device))
508 |                     .cpu()
509 |                     .numpy()
510 |                 )
511 |             metrics["auc_roc"] = roc_auc_score(y, y_probs, average="macro")
512 |             metrics["ap"] = average_precision_score(y, y_probs, average="macro")
513 |         elif self.cfg["dataset"] == "emomusic":
514 |             primary_metric_name = "r2"
515 |             metrics["r2"] = r2_score(y, logits)
516 |             metrics["arousal_r2"] = r2_score(y[:, 0], logits[:, 0])
517 |             metrics["valence_r2"] = r2_score(y[:, 1], logits[:, 1])
518 |         else:
519 |             raise NotImplementedError()
520 | 
521 |         # Convert to simple Python types
522 |         for k, v in metrics.items():
523 |             if isinstance(v, (np.ndarray, np.generic)):
524 |                 metrics[k] = v.tolist()
525 | 
526 |         assert "primary" not in metrics
527 |         metrics["primary"] = metrics[primary_metric_name]
528 |         return metrics
529 | 
530 |     def save(self, root_dir=None):
531 |         if root_dir is None:
532 |             root_dir = CACHE_PROBES_DIR
533 |         uid = self.cfg.uid()
534 |         model_dir = pathlib.Path(
535 |             root_dir, self.cfg["dataset"], self.cfg["representation"], uid
536 |         )
537 |         model_dir.mkdir(parents=True, exist_ok=True)
538 |         with open(pathlib.Path(model_dir, f"cfg.json"), "w") as f:
539 |             f.write(json.dumps(self.cfg, indent=2, sort_keys=True))
540 |         with open(pathlib.Path(model_dir, f"scaler.pkl"), "wb") as f:
541 |             pickle.dump(self.scaler, f)
542 |         torch.save(self.probe.state_dict(), pathlib.Path(model_dir, f"probe.pt"))
543 |         with open(pathlib.Path(model_dir, f"metrics.json"), "w") as f:
544 |             f.write(json.dumps(self.eval("valid"), indent=2, sort_keys=True))
545 | 
546 |     @classmethod
547 |     def load(cls, uid, root_dir=CACHE_PROBES_DIR, **kwargs):
548 |         model_dir = [d for d in pathlib.Path(root_dir).rglob(f"{uid}*") if d.is_dir()]
549 |         if len(model_dir) < 1:
550 |             raise ValueError("Could not find model directory")
551 |         model_dir = model_dir[0]
552 |         with open(pathlib.Path(model_dir, f"cfg.json"), "r") as f:
553 |             cfg = json.load(f)
554 |         with open(pathlib.Path(model_dir, f"scaler.pkl"), "rb") as f:
555 |             scaler = pickle.load(f)
556 |         state_dict = torch.load(
557 |             pathlib.Path(model_dir, f"probe.pt"),
558 |             map_location=torch.device("cuda" if torch.cuda.is_available() else "cpu"),
559 |         )
560 |         if len(cfg["hidden_layer_sizes"]) > 0:
561 |             input_layer = "hidden_0.weight"
562 |         else:
563 |             input_layer = "output.weight"
564 |         probe = SimpleMLP(
565 |             state_dict[input_layer].shape[1],
566 |             cfg["hidden_layer_sizes"],
567 |             DATASET_TO_ATTRS[cfg["dataset"]]["num_outputs"],
568 |             dropout_input=cfg["dropout_input"],
569 |             dropout_p=cfg["dropout_p"],
570 |         )
571 |         probe.load_state_dict(state_dict)
572 |         return cls(cfg, pretrained_scaler=scaler, pretrained_probe=probe, **kwargs)
573 | 
574 | 
575 | def execute_probe_experiment(
576 |     cfg,
577 |     wandb=False,
578 |     output_root_dir=None,
579 |     datasets_root_dir=None,
580 |     representations_root_dir=None,
581 | ):
582 |     exp = ProbeExperiment(
583 |         cfg,
584 |         datasets_root_dir=datasets_root_dir,
585 |         representations_root_dir=representations_root_dir,
586 |     )
587 |     exp.load_data()
588 |     exp.train(wandb=wandb)
589 |     exp.save(root_dir=output_root_dir)
590 |     return exp
591 | 
592 | 
593 | def grid_cfgs(dataset, representation, grid):
594 |     if not isinstance(grid, dict):
595 |         raise TypeError()
596 |     grid = OrderedDict(grid)
597 |     cfgs = []
598 |     if len(grid) > 0:
599 |         for combination in itertools.product(*grid.values()):
600 |             cfgs.append(
601 |                 ProbeExperimentConfig(
602 |                     dataset=dataset,
603 |                     representation=representation,
604 |                     **{k: v for k, v in zip(grid.keys(), combination)},
605 |                 )
606 |             )
607 |     return cfgs
608 | 
609 | 
610 | _PAPER_GRID = {
611 |     "data_standardization": [False, True],
612 |     "hidden_layer_sizes": [[], [512]],
613 |     "batch_size": [64, 256],
614 |     "learning_rate": [1e-5, 1e-4, 1e-3],
615 |     "dropout_p": [0.25, 0.5, 0.75],
616 |     "l2_weight_decay": [None, 1e-4, 1e-3],
617 | }
618 | 
619 | 
620 | def paper_grid_cfgs(dataset, representation):
621 |     return grid_cfgs(dataset, representation, _PAPER_GRID)
622 | 


--------------------------------------------------------------------------------
/jukemir/probe/aggregate.py:
--------------------------------------------------------------------------------
 1 | if __name__ == "__main__":
 2 |     import json
 3 |     import pathlib
 4 |     from argparse import ArgumentParser
 5 | 
 6 |     from .. import CACHE_PROBES_DIR
 7 |     from . import ProbeExperiment, paper_grid_cfgs
 8 | 
 9 |     parser = ArgumentParser()
10 |     parser.add_argument("dataset", type=str)
11 |     parser.add_argument("representation", type=str)
12 |     parser.add_argument("--output_metrics_path", type=str)
13 |     parser.add_argument("--metric", type=str)
14 |     parser.add_argument("--evaluate", type=str)
15 |     parser.add_argument("--expected_num_runs", type=int)
16 |     parser.add_argument("--probes_root_dir", type=str)
17 |     parser.add_argument("--datasets_root_dir", type=str)
18 |     parser.add_argument("--representations_root_dir", type=str)
19 | 
20 |     parser.set_defaults(
21 |         output_metrics_path=None,
22 |         metric="primary",
23 |         evaluate="valid",
24 |         expected_num_runs=None,
25 |         probes_root_dir=None,
26 |         datasets_root_dir=None,
27 |         representations_root_dir=None,
28 |     )
29 | 
30 |     args = parser.parse_args()
31 | 
32 |     # Find all runs
33 |     probes_root_dir = pathlib.Path(
34 |         CACHE_PROBES_DIR if args.probes_root_dir is None else args.probes_root_dir,
35 |         args.dataset,
36 |         args.representation,
37 |     )
38 |     metrics_paths = list(probes_root_dir.rglob("metrics.json"))
39 |     if (
40 |         args.expected_num_runs is not None
41 |         and len(metrics_paths) != args.expected_num_runs
42 |     ):
43 |         raise Exception(
44 |             f"Expected {args.expected_num_runs} runs but found {len(metrics_paths)}"
45 |         )
46 | 
47 |     # Find best run
48 |     best_uid = None
49 |     best_metrics = None
50 |     for metrics_path in metrics_paths:
51 |         uid = metrics_path.parent.stem
52 |         with open(metrics_path, "r") as f:
53 |             metrics = json.load(f)
54 |         if best_metrics is None or metrics[args.metric] > best_metrics[args.metric]:
55 |             best_uid = uid
56 |             best_metrics = metrics
57 |     print("best uid")
58 |     print(best_uid)
59 |     print("valid")
60 |     print(best_metrics)
61 | 
62 |     # Compute performance
63 |     exp = ProbeExperiment.load(best_uid, root_dir=probes_root_dir)
64 |     exp.load_data()
65 |     metrics = exp.eval(args.evaluate)
66 |     metrics = {k: v for k, v in sorted(metrics.items(), key=lambda x: x[0])}
67 |     if args.output_metrics_path is not None:
68 |         with open(args.output_metrics_path, "w") as f:
69 |             f.write(json.dumps(metrics, indent=2, sort_keys=True))
70 |     print(args.evaluate)
71 |     print(metrics)
72 | 


--------------------------------------------------------------------------------
/jukemir/probe/execute.py:
--------------------------------------------------------------------------------
 1 | if __name__ == "__main__":
 2 |     import json
 3 |     import logging
 4 |     from argparse import ArgumentParser
 5 | 
 6 |     from . import ProbeExperiment, ProbeExperimentConfig, execute_probe_experiment
 7 | 
 8 |     parser = ArgumentParser()
 9 |     parser.add_argument("cfg_json_path", type=str)
10 |     parser.add_argument("--output_root_dir", type=str)
11 |     parser.add_argument("--datasets_root_dir", type=str)
12 |     parser.add_argument("--representations_root_dir", type=str)
13 |     parser.add_argument("--wandb", action="store_true")
14 | 
15 |     parser.set_defaults(
16 |         wandb=False,
17 |         output_root_dir=None,
18 |         datasets_root_dir=None,
19 |         representations_root_dir=None,
20 |     )
21 | 
22 |     args = parser.parse_args()
23 | 
24 |     with open(args.cfg_json_path, "r") as f:
25 |         cfg = ProbeExperimentConfig(json.load(f))
26 |     execute_probe_experiment(
27 |         cfg,
28 |         wandb=args.wandb,
29 |         output_root_dir=args.output_root_dir,
30 |         datasets_root_dir=args.datasets_root_dir,
31 |         representations_root_dir=args.representations_root_dir,
32 |     )
33 | 


--------------------------------------------------------------------------------
/jukemir/probe/test.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import pathlib
  3 | import random
  4 | import tempfile
  5 | import unittest
  6 | 
  7 | import numpy as np
  8 | 
  9 | from . import (
 10 |     ProbeExperiment,
 11 |     ProbeExperimentConfig,
 12 |     SimpleMLP,
 13 |     execute_probe_experiment,
 14 |     grid_cfgs,
 15 |     paper_grid_cfgs,
 16 | )
 17 | 
 18 | 
 19 | class Test(unittest.TestCase):
 20 |     def test_simple_mlp(self):
 21 |         mlp = SimpleMLP(100, [50], 10)
 22 |         num_parameters = sum(p.numel() for p in mlp.parameters())
 23 |         self.assertEqual(num_parameters, 100 * 50 + 50 + 50 * 10 + 10)
 24 | 
 25 |     def test_probe_experiment_config(self):
 26 |         cfg = ProbeExperimentConfig(dataset="debug", representation="debug")
 27 |         self.assertEqual(cfg.uid(), "931d09133314c180443615815249a7415da09c86")
 28 |         with self.assertRaisesRegex(ValueError, "Required field"):
 29 |             ProbeExperimentConfig()
 30 |         with self.assertRaisesRegex(ValueError, "Unknown field"):
 31 |             ProbeExperimentConfig(dataset="debug", representation="debug", foo="bar")
 32 |         with self.assertRaisesRegex(ValueError, "All values must be JSON-serializable"):
 33 | 
 34 |             class NotSerializable:
 35 |                 pass
 36 | 
 37 |             ProbeExperimentConfig(
 38 |                 dataset="debug", representation="debug", batch_size=NotSerializable()
 39 |             )
 40 | 
 41 |     def test_probe_experiment(self):
 42 |         cfg = ProbeExperimentConfig(
 43 |             dataset="test", representation="test", max_num_epochs=100, dropout_p=0
 44 |         )
 45 |         with tempfile.TemporaryDirectory() as ddir, tempfile.TemporaryDirectory() as rdir, tempfile.TemporaryDirectory() as odir:
 46 |             ddir = pathlib.Path(ddir)
 47 |             rdir = pathlib.Path(rdir)
 48 |             odir = pathlib.Path(odir)
 49 | 
 50 |             pathlib.Path(ddir, "test").mkdir()
 51 |             pathlib.Path(rdir, "test", "test").mkdir(parents=True)
 52 | 
 53 |             # Create fake dataset
 54 |             meta = {}
 55 |             random.seed(0)
 56 |             np.random.seed(0)
 57 |             ratio = ["train"] * 8 + ["valid"] + ["test"]
 58 |             for i in range(1000):
 59 |                 y = np.random.randint(0, 4)
 60 |                 x = np.zeros(4, dtype=np.float32)
 61 |                 x[y] = 1
 62 |                 meta[str(i)] = {"y": str(y), "split": ratio[i % len(ratio)]}
 63 |                 np.save(pathlib.Path(rdir, "test", "test", f"{i}.npy"), x)
 64 |             with open(pathlib.Path(ddir, "test", "meta.json"), "w") as f:
 65 |                 f.write(json.dumps(meta))
 66 | 
 67 |             # Train
 68 |             exp = execute_probe_experiment(
 69 |                 cfg,
 70 |                 output_root_dir=odir,
 71 |                 datasets_root_dir=ddir,
 72 |                 representations_root_dir=rdir,
 73 |             )
 74 | 
 75 |             # Evaluate
 76 |             results = exp.eval("test")
 77 |             self.assertEqual(results["accuracy"], 1)
 78 |             self.assertLess(abs(results["loss"] - 0.09166), 0.01)
 79 | 
 80 |             # Load saved model
 81 |             uid = exp.cfg.uid()
 82 |             exp_hat = ProbeExperiment.load(
 83 |                 uid,
 84 |                 root_dir=odir,
 85 |                 datasets_root_dir=ddir,
 86 |                 representations_root_dir=rdir,
 87 |             )
 88 |             exp_hat.load_data()
 89 |             results_hat = exp_hat.eval("test")
 90 |             self.assertEqual(results, results_hat)
 91 | 
 92 |     def test_grid_cfgs(self):
 93 |         cfgs = grid_cfgs("test", "test", {})
 94 |         self.assertEqual(cfgs, [])
 95 |         cfgs = grid_cfgs("test", "test", {"batch_size": [10]})
 96 |         self.assertEqual(len(cfgs), 1)
 97 |         self.assertEqual(cfgs[0]["batch_size"], 10)
 98 |         cfgs = grid_cfgs(
 99 |             "test", "test", {"batch_size": [10, 20], "learning_rate": [1, 2]}
100 |         )
101 |         self.assertEqual(len(cfgs), 4)
102 |         self.assertEqual(cfgs[-1]["batch_size"], 20)
103 |         self.assertEqual(cfgs[-1]["learning_rate"], 2)
104 |         cfgs = paper_grid_cfgs("test", "test")
105 |         self.assertEqual(len(cfgs), 216)
106 | 


--------------------------------------------------------------------------------
/jukemir/utils.py:
--------------------------------------------------------------------------------
  1 | """Common utilities"""
  2 | 
  3 | import base64
  4 | import gzip
  5 | import hashlib
  6 | import shlex
  7 | import subprocess
  8 | 
  9 | import numpy as np
 10 | 
 11 | 
 12 | def run_cmd_sync(cmd, cwd=None, interactive=False, timeout=None):
 13 |     """Runs a console command synchronously and returns the results.
 14 | 
 15 |     Parameters
 16 |     ----------
 17 |     cmd : str
 18 |        The command to execute.
 19 |     cwd : :class:`pathlib.Path`, optional
 20 |        The working directory in which to execute the command.
 21 |     interactive : bool, optional
 22 |        If set, run command interactively and pipe all output to console.
 23 |     timeout : float, optional
 24 |        If specified, kills process and throws error after this many seconds.
 25 | 
 26 |     Returns
 27 |     -------
 28 |     int
 29 |        Process exit status code.
 30 |     str, optional
 31 |        Standard output (if not in interactive mode).
 32 |     str, optional
 33 |        Standard error (if not in interactive mode).
 34 | 
 35 |     Raises
 36 |     ------
 37 |     :class:`ValueError`
 38 |        Empty command.
 39 |     :class:`NotADirectoryError`
 40 |        Specified working directory is not a directory.
 41 |     :class:`subprocess.TimeoutExpired`
 42 |        Specified timeout expired.
 43 |     """
 44 |     if cmd is None or len(cmd.strip()) == 0:
 45 |         raise ValueError()
 46 | 
 47 |     kwargs = {}
 48 |     if not interactive:
 49 |         kwargs["stdout"] = subprocess.PIPE
 50 |         kwargs["stderr"] = subprocess.PIPE
 51 | 
 52 |     p = subprocess.Popen(shlex.split(cmd), cwd=cwd, **kwargs)
 53 |     try:
 54 |         p_res = p.communicate(timeout=timeout)
 55 |     except subprocess.TimeoutExpired as e:
 56 |         p.kill()
 57 |         p.wait()
 58 |         raise e
 59 | 
 60 |     result = p.returncode
 61 | 
 62 |     if not interactive:
 63 |         stdout, stderr = [s.decode("utf-8").strip() for s in p_res]
 64 |         result = (result, stdout, stderr)
 65 | 
 66 |     return result
 67 | 
 68 | 
 69 | def compute_checksum(path_or_bytes, algorithm="sha256", gunzip=False, chunk_size=4096):
 70 |     """Computes checksum of target path.
 71 | 
 72 |     Parameters
 73 |     ----------
 74 |     path_or_bytes : :class:`pathlib.Path` or bytes
 75 |        Location or bytes of file to compute checksum for.
 76 |     algorithm : str, optional
 77 |        Hash algorithm (from :func:`hashlib.algorithms_available`); default ``sha256``.
 78 |     gunzip : bool, optional
 79 |        If true, decompress before computing checksum.
 80 |     chunk_size : int, optional
 81 |        Chunk size for iterating through file.
 82 | 
 83 |     Raises
 84 |     ------
 85 |     :class:`FileNotFoundError`
 86 |        Unknown path.
 87 |     :class:`IsADirectoryError`
 88 |        Path is a directory.
 89 |     :class:`ValueError`
 90 |        Unknown algorithm.
 91 | 
 92 |     Returns
 93 |     -------
 94 |     str
 95 |        Hex representation of checksum.
 96 |     """
 97 |     if algorithm not in hashlib.algorithms_guaranteed or algorithm.startswith("shake"):
 98 |         raise ValueError("Unknown algorithm")
 99 |     computed = hashlib.new(algorithm)
100 |     if isinstance(path_or_bytes, bytes):
101 |         computed.update(path_or_bytes)
102 |     else:
103 |         open_fn = gzip.open if gunzip else open
104 |         with open_fn(path_or_bytes, "rb") as f:
105 |             while True:
106 |                 data = f.read(chunk_size)
107 |                 if not data:
108 |                     break
109 |                 computed.update(data)
110 |     return computed.hexdigest()
111 | 
112 | 
113 | def encode_base64(b):
114 |     """Encode bytes as JSON-friendly base64 string."""
115 |     return base64.b64encode(b).decode("ascii")
116 | 
117 | 
118 | def decode_base64(b):
119 |     """Encode base64 string as bytes."""
120 |     return base64.b64decode(b)
121 | 
122 | 
123 | def float32_to_pcm16(wav):
124 |     """Quantize floating-point waveform to 16-bit signed PCM."""
125 |     if wav.dtype != np.float32:
126 |         raise ValueError()
127 |     wav = np.copy(wav)
128 |     wav *= np.iinfo(np.int16).max
129 |     wav = np.clip(wav, np.iinfo(np.int16).min, np.iinfo(np.int16).max)
130 |     return wav.astype(np.int16)
131 | 
132 | 
133 | def pcm16_to_float32(wav):
134 |     """Convert 16-bit signed PCM to floating-point waveform."""
135 |     if wav.dtype != np.int16:
136 |         raise ValueError()
137 |     return wav.astype(np.float32) / np.iinfo(np.int16).max
138 | 


--------------------------------------------------------------------------------
/jukemir/utils_test.py:
--------------------------------------------------------------------------------
 1 | import hashlib
 2 | import pathlib
 3 | import subprocess
 4 | import tempfile
 5 | import unittest
 6 | 
 7 | import numpy as np
 8 | 
 9 | from .utils import compute_checksum, decode_base64, encode_base64, run_cmd_sync
10 | 
11 | 
12 | class TestUtils(unittest.TestCase):
13 |     def test_run_cmd_sync(self):
14 |         status, stdout, stderr = run_cmd_sync(
15 |             "ls", cwd=pathlib.Path(__file__).resolve().parent
16 |         )
17 |         self.assertEqual(status, 0)
18 |         self.assertTrue(pathlib.Path(__file__).parts[-1] in stdout)
19 |         self.assertEqual(stderr, "")
20 |         with self.assertRaises(ValueError):
21 |             run_cmd_sync("")
22 |         with self.assertRaises(NotADirectoryError):
23 |             run_cmd_sync("ls", cwd=pathlib.Path(__file__).resolve())
24 |         with self.assertRaises(subprocess.TimeoutExpired):
25 |             run_cmd_sync("sleep 1", timeout=1e-2)
26 |         # TODO: Test interactive somehow? contextlib.redirect_stdout didn't work
27 | 
28 |     def test_compute_checksum(self):
29 |         with tempfile.NamedTemporaryFile() as f:
30 |             path = pathlib.Path(f.name)
31 |             with open(path, "w") as f:
32 |                 f.write("foo")
33 |             self.assertEqual(
34 |                 compute_checksum(path),
35 |                 "2c26b46b68ffc68ff99b453c1d30413413422d706483bfa0f98a5e886266e7ae",
36 |             )
37 |             self.assertEqual(
38 |                 compute_checksum("foo".encode("utf-8")),
39 |                 "2c26b46b68ffc68ff99b453c1d30413413422d706483bfa0f98a5e886266e7ae",
40 |             )
41 |             self.assertEqual(
42 |                 compute_checksum(path, algorithm="md5"),
43 |                 "acbd18db4cc2f85cedef654fccc4a4d8",
44 |             )
45 |             for algorithm in hashlib.algorithms_guaranteed:
46 |                 if algorithm.startswith("shake"):
47 |                     continue
48 |                 checksum = compute_checksum(path, algorithm=algorithm)
49 |                 self.assertTrue(isinstance(checksum, str))
50 |                 self.assertTrue(checksum.strip(), checksum)
51 |                 self.assertGreater(len(checksum), 0)
52 | 
53 |         with tempfile.TemporaryDirectory() as d:
54 |             d = pathlib.Path(d)
55 |             with self.assertRaises(FileNotFoundError):
56 |                 compute_checksum(pathlib.Path(d, "nonexistent"))
57 |             with self.assertRaises(IsADirectoryError):
58 |                 compute_checksum(d)
59 | 
60 |         with self.assertRaises(ValueError):
61 |             compute_checksum(None, algorithm="shake_128")
62 |         with self.assertRaises(ValueError):
63 |             compute_checksum(None, algorithm="foo256")
64 | 
65 |     def test_base64(self):
66 |         payload = "foo".encode("utf-8")
67 |         payload_b64 = encode_base64(payload)
68 |         self.assertEqual(payload_b64, "Zm9v")
69 |         payload_hat = decode_base64(payload_b64)
70 |         self.assertEqual(payload, payload_hat)
71 | 
72 |         payload = np.random.rand(1024).astype(np.float32)
73 |         payload_b64 = encode_base64(payload)
74 |         self.assertEqual(len(payload_b64), 5464)
75 |         payload_hat = np.frombuffer(decode_base64(payload_b64), dtype=np.float32)
76 |         self.assertTrue(np.array_equal(payload, payload_hat))
77 | 


--------------------------------------------------------------------------------
/metadata/emomusic.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/p-lambda/jukemir/241555645682d18fb909775c54014edfc3a03163/metadata/emomusic.json.gz


--------------------------------------------------------------------------------
/metadata/giantsteps_clips.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/p-lambda/jukemir/241555645682d18fb909775c54014edfc3a03163/metadata/giantsteps_clips.json.gz


--------------------------------------------------------------------------------
/metadata/gtzan_ff.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/p-lambda/jukemir/241555645682d18fb909775c54014edfc3a03163/metadata/gtzan_ff.json.gz


--------------------------------------------------------------------------------
/metadata/magnatagatune.json.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/p-lambda/jukemir/241555645682d18fb909775c54014edfc3a03163/metadata/magnatagatune.json.gz


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [tool.black]
 2 | line-length = 88
 3 | target-version = ['py36']
 4 | 
 5 | [tool.isort]
 6 | multi_line_output = 3
 7 | include_trailing_comma = true
 8 | force_grid_wrap = 0
 9 | use_parentheses = true
10 | ensure_newline_before_comments = true
11 | line_length = 88
12 | 


--------------------------------------------------------------------------------
/representations/build.sh:
--------------------------------------------------------------------------------
1 | source env.sh
2 | 
3 | docker build -t $DOCKER_NAMESPACE/$DOCKER_TAG -f $REPRESENTATION_TAG.dockerfile .
4 | 


--------------------------------------------------------------------------------
/representations/choi.dockerfile:
--------------------------------------------------------------------------------
 1 | # Import Ubuntu/CUDA/cuDNN
 2 | FROM nvidia/cuda:8.0-cudnn5-devel-ubuntu14.04
 3 | 
 4 | # Configure basic environment
 5 | ENV DEBIAN_FRONTEND noninteractive
 6 | RUN apt-get update -y || true
 7 | RUN apt-get install -y \
 8 | 	build-essential \
 9 | 	git \
10 | 	software-properties-common \
11 | 	libssl-dev \
12 | 	unzip
13 | SHELL ["/bin/bash", "-c"]
14 | 
15 | # Install Python
16 | ENV LANG C.UTF-8
17 | RUN apt-get install -y python python-dev python-tk python-pip
18 | #RUN add-apt-repository ppa:deadsnakes/ppa
19 | #RUN apt-get update -y || true
20 | #RUN apt-get install python3.6 -y \
21 | #    python3.6-dev
22 | #RUN ln -s $(which python3.6) /usr/local/bin/python
23 | 
24 | # Install pip
25 | RUN python -m pip install --upgrade pip==9.0.1
26 | RUN python -m pip install --upgrade setuptools==34.3.3
27 | #RUN apt-get install -y curl; curl https://bootstrap.pypa.io/get-pip.py | python
28 | #RUN python -m pip install --upgrade "pip<20.3" setuptools
29 | 
30 | # Install requirements for (Choi et al. 2017)
31 | RUN python -m pip install --no-cache-dir theano==0.9
32 | RUN python -m pip install --no-cache-dir keras==1.2.2
33 | RUN mkdir -p /root/.keras
34 | RUN echo "{\"image_dim_ordering\": \"th\", \"epsilon\": 1e-07, \"floatx\": \"float32\", \"backend\": \"theano\"}" > /root/.keras/keras.json
35 | RUN python -m pip install --no-cache-dir cython==0.25.2
36 | RUN python -m pip install --no-cache-dir joblib==0.11 --force-reinstall
37 | RUN python -m pip install --no-cache-dir librosa==0.4
38 | RUN python -m pip install --no-cache-dir pandas==0.19.2
39 | RUN python -m pip install --no-cache-dir h5py==2.6.0
40 | 
41 | # Install code for (Choi et al. 2017)
42 | RUN mkdir /input
43 | RUN mkdir /output
44 | RUN mkdir /code
45 | WORKDIR /code
46 | RUN git clone https://github.com/keunwoochoi/kapre.git
47 | RUN cd kapre; git reset --hard a3bde3e; python setup.py install
48 | RUN git clone https://github.com/keunwoochoi/transfer_learning_music.git
49 | WORKDIR /code/transfer_learning_music
50 | RUN git reset --hard e1f83c3
51 | RUN mkdir input
52 | RUN mkdir output
53 | COPY choi/main.py main.py
54 | 
55 | # Test
56 | ENV PATH /usr/local/cuda/bin:$PATH
57 | ENV LD_LIBRARY_PATH /usr/local/cuda/lib64:$LD_LIBRARY_PATH
58 | ENV CUDA_HOME /usr/local/cuda
59 | ENV THEANO_FLAGS floatX=float32,device=cpu,openmp=False
60 | ENTRYPOINT ["python", "main.py"]
61 | 


--------------------------------------------------------------------------------
/representations/choi/main.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Modified from: https://github.com/keunwoochoi/transfer_learning_music/blob/master/utils_featext.py
  3 | """
  4 | 
  5 | import time
  6 | from argparse import Namespace
  7 | from multiprocessing import Pool
  8 | 
  9 | import librosa
 10 | import pandas as pd
 11 | from keras import backend as K
 12 | from models_transfer import build_convnet_model
 13 | 
 14 | FOLDER_CSV = "data_csv/"
 15 | FOLDER_FEATS = "data_feats/"
 16 | FOLDER_WEIGHTS = "weights_transfer/"
 17 | 
 18 | SR = 12000  # [Hz]
 19 | len_src = 29.0  # [second]
 20 | N_JOBS = 9
 21 | ref_n_src = 12000 * 29
 22 | 
 23 | 
 24 | def gen_filepaths(df, dataroot=None):
 25 |     if dataroot is None:
 26 |         dataroot = PATH_DATASETS
 27 |     for filepath in df["filepath"]:
 28 |         yield os.path.join(dataroot, filepath)
 29 | 
 30 | 
 31 | def gen_audiofiles(df, batch_size=256, dataroot=None):
 32 |     """gen single audio file src in a batch_size=1 form for keras model.predict_generator
 33 |     df: dataframe
 34 |     total_size: integer.
 35 |     batch_size: integer.
 36 |     dataroot: root path for data"""
 37 | 
 38 |     """"""
 39 |     pool = Pool(N_JOBS)
 40 | 
 41 |     def _multi_loading(pool, paths):
 42 |         srcs = pool.map(_load_audio, paths)
 43 |         srcs = np.array(srcs)
 44 |         try:
 45 |             srcs = srcs[:, np.newaxis, :]
 46 |         except:
 47 |             pdb.set_trace()
 48 | 
 49 |         return srcs
 50 | 
 51 |     total_size = len(df)
 52 |     n_leftover = int(total_size % batch_size)
 53 |     leftover = n_leftover != 0
 54 |     n_batch = int(total_size / batch_size)
 55 |     gen_f = gen_filepaths(df, dataroot=dataroot)
 56 |     print(
 57 |         "n_batch: {}, n_leftover: {}, all: {}".format(n_batch, n_leftover, total_size)
 58 |     )
 59 | 
 60 |     for batch_idx in xrange(n_batch):
 61 |         paths = []
 62 |         for inbatch_idx in range(batch_size):
 63 |             paths.append(gen_f.next())
 64 |         print("..yielding {}/{} batch..".format(batch_idx, n_batch))
 65 |         yield _multi_loading(pool, paths)
 66 | 
 67 |     if leftover:
 68 |         paths = []
 69 |         for inbatch_idx in range(n_leftover):
 70 |             paths.append(gen_f.next())
 71 |         print("..yielding final batch w {} data sample..".format(len(paths)))
 72 |         yield _multi_loading(pool, paths)
 73 | 
 74 | 
 75 | def _load_audio(path, zero_pad=False):
 76 |     """return (N,) shape mono audio signal
 77 |     if zero_pad, pad zeros.
 78 |     Else, repeat and trim."""
 79 |     src, sr = librosa.load(path, sr=SR, duration=len_src * 12000.0 / float(SR))
 80 |     if len(src) >= ref_n_src:
 81 |         return src[:ref_n_src]
 82 |     else:
 83 |         if zero_pad:
 84 |             result = np.zeros(ref_n_src)
 85 |             result[: len(src)] = src[:ref_n_src]
 86 |             return result
 87 |         else:
 88 |             n_tile = np.ceil(float(ref_n_src) / len(src)).astype("int")
 89 |             src = np.tile(src, n_tile)
 90 |             return src[:ref_n_src]
 91 | 
 92 | 
 93 | def load_model_for_mid(mid_idx):
 94 |     assert 0 <= mid_idx <= 4
 95 |     args = Namespace(
 96 |         test=False,
 97 |         data_percent=100,
 98 |         model_name="",
 99 |         tf_type="melgram",
100 |         normalize="no",
101 |         decibel=True,
102 |         fmin=0.0,
103 |         fmax=6000,
104 |         n_mels=96,
105 |         trainable_fb=False,
106 |         trainable_kernel=False,
107 |         conv_until=mid_idx,
108 |     )
109 |     model = build_convnet_model(args, last_layer=False)
110 |     model.load_weights(
111 |         os.path.join(
112 |             FOLDER_WEIGHTS, "weights_layer{}_{}.hdf5".format(mid_idx, K._backend)
113 |         ),
114 |         by_name=True,
115 |     )
116 |     print("----- model {} weights are loaded. (NO ELM!!!) -----".format(mid_idx))
117 | 
118 |     return model
119 | 
120 | 
121 | def predict(filename, batch_size, model, dataroot=None, npy_suffix=""):
122 |     if dataroot is None:
123 |         dataroot = PATH_DATASETS
124 |     start = time.time()
125 |     csv_filename = "{}.csv".format(filename)
126 |     npy_filename = "{}{}.npy".format(filename, npy_suffix)
127 |     df = pd.DataFrame.from_csv(os.path.join(FOLDER_CSV, csv_filename))
128 |     print("{}: Dataframe with size:{}").format(filename, len(df))
129 |     example_path = os.path.join(dataroot, df["filepath"][0])
130 |     print("An example path - does it exists? {}".format(os.path.exists(example_path)))
131 |     print(df.columns)
132 |     gen_audio = gen_audiofiles(df, batch_size, dataroot)
133 |     feats = model.predict_generator(
134 |         generator=gen_audio, val_samples=len(df), max_q_size=1
135 |     )
136 |     np.save(os.path.join(FOLDER_FEATS, npy_filename), feats)
137 |     print("DONE! You! uuuuu uu! in {:6.4f} sec".format(time.time() - start))
138 | 
139 | 
140 | if __name__ == "__main__":
141 |     import glob
142 |     import os
143 |     import random
144 |     from argparse import ArgumentParser
145 | 
146 |     import numpy as np
147 | 
148 |     parser = ArgumentParser()
149 |     parser.add_argument("--batch_size", type=int)
150 |     parser.add_argument("--batch_idx", type=int)
151 |     parser.add_argument("--representations", type=str)
152 | 
153 |     parser.set_defaults(
154 |         batch_size=256,
155 |         batch_idx=None,
156 |         representations="mean_pool,max_pool",
157 |     )
158 |     args = parser.parse_args()
159 | 
160 |     output_dir = "/output"
161 |     if not os.path.isdir(output_dir):
162 |         os.makedirs(output_dir)
163 |     input_paths = sorted(list(glob.glob("/input/*")))
164 |     # NOTE: Choi is order-dependent due to batchnorm so this shuffle sidesteps file ordering bias issues (e.g., if file paths are ordered by label)
165 |     random.seed(0)
166 |     random.shuffle(input_paths)
167 |     batch_starts = list(range(0, len(input_paths), args.batch_size))
168 |     if args.batch_idx is not None:
169 |         if args.batch_idx >= len(batch_starts):
170 |             raise ValueError("Invalid batch index")
171 |         batch_starts = [batch_starts[args.batch_idx]]
172 |     batches = [input_paths[s : s + args.batch_size] for s in batch_starts]
173 | 
174 |     models = None
175 |     for input_batch in batches:
176 |         # Check if batch done
177 |         batch_done = True
178 |         output_paths = []
179 |         for input_path in input_batch:
180 |             output_path = os.path.join(
181 |                 output_dir,
182 |                 "{}.npy".format(os.path.splitext(os.path.split(input_path)[1])[0]),
183 |             )
184 |             output_paths.append(output_path)
185 |             try:
186 |                 np.load(output_path)
187 |             except:
188 |                 batch_done = False
189 |         if batch_done:
190 |             continue
191 | 
192 |         # Create dummy CSV
193 |         column_names = ["id", "filepath", "label"]
194 |         rows = [[i, in_fp, 0] for i, in_fp in enumerate(input_batch)]
195 |         df = pd.DataFrame(rows, columns=column_names)
196 |         df.to_csv("data_csv/dummy.csv")
197 | 
198 |         # Predict
199 |         if models is None:
200 |             models = [load_model_for_mid(i) for i in range(5)]
201 |         representations = []
202 |         for i in range(5):
203 |             predict("dummy", args.batch_size, models[i], ".", "_layer_{}".format(i))
204 |             representations.append(np.load("data_feats/dummy_layer_{}.npy".format(i)))
205 | 
206 |         # Save batch
207 |         for i in range(len(output_paths)):
208 |             representation = np.concatenate([r[i] for r in representations])
209 |             np.save(output_paths[i], representation)
210 | 


--------------------------------------------------------------------------------
/representations/chroma.dockerfile:
--------------------------------------------------------------------------------
 1 | # Import Ubuntu/CUDA/cuDNN
 2 | FROM ubuntu:20.04
 3 | 
 4 | # Configure shell
 5 | ENV DEBIAN_FRONTEND=noninteractive
 6 | SHELL ["/bin/bash", "-c"]
 7 | RUN apt-get update --fix-missing
 8 | 
 9 | # Install Python
10 | ENV LANG C.UTF-8
11 | RUN apt-get install -y python3 python3-dev python3-pip
12 | RUN ln -s $(which python3) /usr/local/bin/python
13 | RUN python -m pip install --no-cache-dir --upgrade pip setuptools
14 | 
15 | # Install deps
16 | RUN apt-get install -y libsndfile1-dev
17 | RUN python -m pip install --no-cache-dir librosa==0.8.1
18 | RUN python -m pip install --no-cache-dir scipy==1.6.3
19 | RUN python -m pip install --no-cache-dir tqdm==4.61.0
20 | 
21 | # Setup entrypoint
22 | RUN mkdir /input
23 | RUN mkdir /output
24 | RUN mkdir /code
25 | WORKDIR /code
26 | COPY handcrafted/main.py main.py
27 | ENTRYPOINT ["python", "main.py", "chroma"]
28 | 


--------------------------------------------------------------------------------
/representations/clmr.dockerfile:
--------------------------------------------------------------------------------
 1 | # Import Ubuntu/CUDA/cuDNN
 2 | FROM ubuntu:20.04
 3 | 
 4 | # Configure shell
 5 | ENV DEBIAN_FRONTEND=noninteractive
 6 | SHELL ["/bin/bash", "-c"]
 7 | RUN apt-get update --fix-missing
 8 | 
 9 | # Install Python
10 | ENV LANG C.UTF-8
11 | RUN apt-get install -y python3 python3-dev python3-pip
12 | RUN ln -s $(which python3) /usr/local/bin/python
13 | RUN python -m pip install --no-cache-dir --upgrade pip setuptools
14 | 
15 | # Install deps
16 | RUN apt-get install -y wget unzip
17 | RUN apt-get install -y libsndfile1-dev
18 | RUN python -m pip install --no-cache-dir librosa==0.8.1
19 | 
20 | # Setup entrypoint
21 | RUN mkdir /input
22 | RUN mkdir /output
23 | RUN mkdir /code
24 | WORKDIR /code
25 | ARG COMMIT_ID=8939105bf2aea51f2267db99ef66e94db676dbe7
26 | RUN wget https://github.com/Spijkervet/CLMR/archive/${COMMIT_ID}.zip; unzip ${COMMIT_ID}.zip; rm ${COMMIT_ID}.zip; mv CLMR-${COMMIT_ID} clmr
27 | RUN python -m pip install --no-cache-dir -e clmr
28 | RUN wget https://github.com/Spijkervet/CLMR/releases/download/2.1/clmr_magnatagatune_mlp.zip; unzip clmr_magnatagatune_mlp.zip; rm clmr_magnatagatune_mlp.zip
29 | COPY clmr/main.py main.py
30 | ENTRYPOINT ["python", "main.py"]
31 | 


--------------------------------------------------------------------------------
/representations/clmr/main.py:
--------------------------------------------------------------------------------
 1 | ENCODER_CHECKPOINT_PATH = "clmr_magnatagatune_mlp/clmr_epoch=10000.ckpt"
 2 | SAMPLE_RATE = 22050
 3 | FRAME_LENGTH = 59409
 4 | 
 5 | if __name__ == "__main__":
 6 |     import pathlib
 7 |     from argparse import ArgumentParser
 8 | 
 9 |     import librosa
10 |     import numpy as np
11 |     import torch
12 |     from clmr.models import SampleCNN
13 |     from clmr.utils import load_encoder_checkpoint, yaml_config_hook
14 |     from simclr.modules.identity import Identity
15 |     from tqdm import tqdm
16 | 
17 |     parser = ArgumentParser()
18 |     parser.add_argument("--batch_size", type=int)
19 |     parser.add_argument("--batch_idx", type=int)
20 | 
21 |     parser.set_defaults(
22 |         batch_size=None,
23 |         batch_idx=None,
24 |     )
25 |     args = parser.parse_args()
26 | 
27 |     input_dir = pathlib.Path("/input")
28 |     output_dir = pathlib.Path("/output")
29 |     output_dir.mkdir(exist_ok=True)
30 |     input_paths = sorted(list(input_dir.iterdir()))
31 |     if args.batch_size is not None and args.batch_idx is not None:
32 |         batch_starts = list(range(0, len(input_paths), args.batch_size))
33 |         if args.batch_idx >= len(batch_starts):
34 |             raise ValueError("Invalid batch index")
35 |         batch_start = batch_starts[args.batch_idx]
36 |         input_paths = input_paths[batch_start : batch_start + args.batch_size]
37 | 
38 |     encoder = None
39 |     for input_path in tqdm(input_paths):
40 |         # Check if output already exists
41 |         output_path = pathlib.Path(output_dir, f"{input_path.stem}.npy")
42 |         try:
43 |             np.load(output_path)
44 |             continue
45 |         except:
46 |             pass
47 | 
48 |         if encoder is None:
49 |             # References:
50 |             # - https://colab.research.google.com/drive/1Njz8EoN4br587xjpRKcssMuqQY6Cc5nj#scrollTo=igc0TggNyj8U
51 |             # - https://github.com/Spijkervet/CLMR/blob/master/linear_evaluation.py
52 |             # - https://github.com/Spijkervet/CLMR/blob/0e52a20c7687ecec00c4d223230f00bffe7430a7/clmr/evaluation.py#L8
53 | 
54 |             encoder = SampleCNN(
55 |                 strides=[3, 3, 3, 3, 3, 3, 3, 3, 3],
56 |                 supervised=0,
57 |                 out_dim=50,
58 |             )
59 | 
60 |             state_dict = load_encoder_checkpoint(ENCODER_CHECKPOINT_PATH)
61 |             encoder.load_state_dict(state_dict)
62 |             encoder.fc = Identity()
63 | 
64 |             device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
65 |             encoder.eval()
66 |             encoder.to(device)
67 | 
68 |         with torch.no_grad():
69 |             audio, sr = librosa.core.load(input_path, sr=SAMPLE_RATE)
70 |             if audio.ndim == 1:
71 |                 audio = audio[np.newaxis]
72 |             audio = torch.tensor(audio, dtype=torch.float32, device=device)
73 |             audio = torch.mean(audio, axis=0, keepdim=True)
74 |             frames = torch.split(audio, FRAME_LENGTH, dim=1)
75 |             if len(frames) <= 1:
76 |                 raise Exception("Audio too short")
77 |             frames = torch.cat(frames[:-1], dim=0)
78 |             frames = frames.unsqueeze(dim=1)
79 |             representations = encoder(frames)
80 |             representation = representations.mean(dim=0).cpu().numpy()
81 | 
82 |         np.save(output_path, representation)
83 | 


--------------------------------------------------------------------------------
/representations/env.sh:
--------------------------------------------------------------------------------
 1 | REPRESENTATION_TAG=$1
 2 | 
 3 | DATASET_TAG=$2
 4 | INPUT_DIR=~/.jukemir/processed/$DATASET_TAG/wav
 5 | OUTPUT_DIR=~/.jukemir/representations/$DATASET_TAG/$REPRESENTATION_TAG
 6 | 
 7 | REPRESENTATION_ARGS=${@:3}
 8 | 
 9 | DOCKER_NAMESPACE=jukemir
10 | DOCKER_TAG=representations_$REPRESENTATION_TAG
11 | DOCKER_CPUS=$(python3 -c "import os; cpus=os.sched_getaffinity(0); print(','.join(map(str,cpus)))")
12 | DOCKER_GPUS=$(nvidia-smi -L | python3 -c "import sys; print(','.join([l.strip().split()[-1][:-1] for l in list(sys.stdin)]))")
13 | DOCKER_CPU_ARG="--cpuset-cpus ${DOCKER_CPUS}"
14 | DOCKER_GPU_ARG="--gpus device=${DOCKER_GPUS}"
15 | 


--------------------------------------------------------------------------------
/representations/handcrafted/main.py:
--------------------------------------------------------------------------------
 1 | SAMPLE_RATE = 22050
 2 | HOP_LENGTH = 512
 3 | 
 4 | if __name__ == "__main__":
 5 |     import pathlib
 6 |     from argparse import ArgumentParser
 7 | 
 8 |     import librosa
 9 |     import numpy as np
10 |     from tqdm import tqdm
11 | 
12 |     parser = ArgumentParser()
13 |     parser.add_argument("feature", type=str, choices=["chroma", "mfcc"])
14 |     parser.add_argument("--batch_size", type=int)
15 |     parser.add_argument("--batch_idx", type=int)
16 |     parser.set_defaults(batch_size=None, batch_idx=None)
17 |     args = parser.parse_args()
18 |     print(args)
19 | 
20 |     input_dir = pathlib.Path("/input")
21 |     output_dir = pathlib.Path("/output")
22 |     output_dir.mkdir(exist_ok=True)
23 |     input_paths = sorted(list(input_dir.iterdir()))
24 |     if args.batch_size is not None and args.batch_idx is not None:
25 |         batch_starts = list(range(0, len(input_paths), args.batch_size))
26 |         if args.batch_idx >= len(batch_starts):
27 |             raise ValueError("Invalid batch index")
28 |         batch_start = batch_starts[args.batch_idx]
29 |         input_paths = input_paths[batch_start : batch_start + args.batch_size]
30 | 
31 |     for input_path in tqdm(input_paths):
32 |         # Check if output already exists
33 |         output_path = pathlib.Path(output_dir, f"{input_path.stem}.npy")
34 |         try:
35 |             np.load(output_path)
36 |             continue
37 |         except:
38 |             pass
39 | 
40 |         # Load audio
41 |         audio, sr = librosa.core.load(input_path, sr=SAMPLE_RATE, mono=True)
42 | 
43 |         # Normalize
44 |         norm_factor = np.abs(audio).max()
45 |         if norm_factor > 0:
46 |             audio /= norm_factor
47 | 
48 |         # Extract features
49 |         if args.feature == "chroma":
50 |             features = librosa.feature.chroma_cqt(audio, sr=sr, hop_length=HOP_LENGTH)
51 |             assert features.shape[0] == 12
52 |         elif args.feature == "mfcc":
53 |             features = librosa.feature.mfcc(audio, sr=sr, hop_length=HOP_LENGTH)
54 |             assert features.shape[0] == 20
55 |         else:
56 |             assert False
57 |         assert features.ndim == 2
58 |         assert features.dtype == np.float32
59 | 
60 |         # Stack differences/statistics
61 |         moments = []
62 |         for i in range(3):
63 |             f = np.diff(features, n=i, axis=1)
64 |             moments.append(f.mean(axis=1))
65 |             moments.append(f.std(axis=1))
66 |         moments = np.concatenate(moments)
67 | 
68 |         # Save
69 |         np.save(output_path, moments)
70 | 


--------------------------------------------------------------------------------
/representations/jukebox.dockerfile:
--------------------------------------------------------------------------------
 1 | # Import Ubuntu/CUDA/cuDNN
 2 | FROM nvidia/cuda:10.1-cudnn7-devel-ubuntu18.04
 3 | 
 4 | # Configure shell
 5 | ENV DEBIAN_FRONTEND=noninteractive
 6 | SHELL ["/bin/bash", "-c"]
 7 | RUN apt-get update --fix-missing
 8 | 
 9 | # Install Python
10 | ENV LANG C.UTF-8
11 | RUN apt-get install -y python3 python3-dev python3-pip
12 | RUN ln -s $(which python3) /usr/local/bin/python
13 | RUN python -m pip install --no-cache-dir --upgrade pip setuptools
14 | 
15 | # Install deps
16 | RUN apt-get install -y wget unzip
17 | RUN mkdir -p /root/.cache/jukebox/models/5b
18 | RUN wget https://openaipublic.azureedge.net/jukebox/models/5b/vqvae.pth.tar; mv vqvae.pth.tar /root/.cache/jukebox/models/5b
19 | RUN wget https://openaipublic.azureedge.net/jukebox/models/5b/prior_level_2.pth.tar; mv prior_level_2.pth.tar /root/.cache/jukebox/models/5b
20 | RUN apt-get install -y libsndfile1-dev
21 | RUN apt-get install -y libopenmpi-dev
22 | RUN apt-get install -y openssh-server
23 | RUN python -m pip install --no-cache-dir torch==1.4.0
24 | 
25 | # Setup entrypoint
26 | RUN mkdir /input
27 | RUN mkdir /output
28 | RUN mkdir /code
29 | WORKDIR /code
30 | ARG COMMIT_ID=08efbbc1d4ed1a3cef96e08a931944c8b4d63bb3
31 | RUN wget https://github.com/openai/jukebox/archive/${COMMIT_ID}.zip; unzip ${COMMIT_ID}.zip; rm ${COMMIT_ID}.zip; mv jukebox-${COMMIT_ID} jukebox
32 | RUN python -m pip install --no-cache-dir -e jukebox
33 | COPY jukebox/make_models.py.patch make_models.py.patch
34 | RUN patch jukebox/jukebox/make_models.py make_models.py.patch
35 | COPY jukebox/main.py main.py
36 | ENTRYPOINT ["python", "main.py"]
37 | 


--------------------------------------------------------------------------------
/representations/jukebox/main.py:
--------------------------------------------------------------------------------
  1 | import librosa as lr
  2 | import numpy as np
  3 | import torch
  4 | 
  5 | JUKEBOX_SAMPLE_RATE = 44100
  6 | T = 8192
  7 | 
  8 | 
  9 | def load_audio_from_file(fpath):
 10 |     audio, _ = lr.load(fpath, sr=JUKEBOX_SAMPLE_RATE)
 11 |     if audio.ndim == 1:
 12 |         audio = audio[np.newaxis]
 13 |     audio = audio.mean(axis=0)
 14 | 
 15 |     # normalize audio
 16 |     norm_factor = np.abs(audio).max()
 17 |     if norm_factor > 0:
 18 |         audio /= norm_factor
 19 | 
 20 |     return audio.flatten()
 21 | 
 22 | 
 23 | def get_z(audio, vqvae):
 24 |     # don't compute unnecessary discrete encodings
 25 |     audio = audio[: JUKEBOX_SAMPLE_RATE * 25]
 26 | 
 27 |     zs = vqvae.encode(torch.cuda.FloatTensor(audio[np.newaxis, :, np.newaxis]))
 28 | 
 29 |     z = zs[-1].flatten()[np.newaxis, :]
 30 | 
 31 |     if z.shape[-1] < 8192:
 32 |         raise ValueError("Audio file is not long enough")
 33 | 
 34 |     return z
 35 | 
 36 | 
 37 | def get_cond(hps, top_prior):
 38 |     sample_length_in_seconds = 62
 39 | 
 40 |     hps.sample_length = (
 41 |         int(sample_length_in_seconds * hps.sr) // top_prior.raw_to_tokens
 42 |     ) * top_prior.raw_to_tokens
 43 | 
 44 |     # NOTE: the 'lyrics' parameter is required, which is why it is included,
 45 |     # but it doesn't actually change anything about the `x_cond`, `y_cond`,
 46 |     # nor the `prime` variables
 47 |     metas = [
 48 |         dict(
 49 |             artist="unknown",
 50 |             genre="unknown",
 51 |             total_length=hps.sample_length,
 52 |             offset=0,
 53 |             lyrics="""lyrics go here!!!""",
 54 |         ),
 55 |     ] * hps.n_samples
 56 | 
 57 |     labels = [None, None, top_prior.labeller.get_batch_labels(metas, "cuda")]
 58 | 
 59 |     x_cond, y_cond, prime = top_prior.get_cond(None, top_prior.get_y(labels[-1], 0))
 60 | 
 61 |     x_cond = x_cond[0, :T][np.newaxis, ...]
 62 |     y_cond = y_cond[0][np.newaxis, ...]
 63 | 
 64 |     return x_cond, y_cond
 65 | 
 66 | 
 67 | def get_final_activations(z, x_cond, y_cond, top_prior):
 68 | 
 69 |     x = z[:, :T]
 70 | 
 71 |     # make sure that we get the activations
 72 |     top_prior.prior.only_encode = True
 73 | 
 74 |     # encoder_kv and fp16 are set to the defaults, but explicitly so
 75 |     out = top_prior.prior.forward(
 76 |         x, x_cond=x_cond, y_cond=y_cond, encoder_kv=None, fp16=False
 77 |     )
 78 | 
 79 |     return out
 80 | 
 81 | 
 82 | def get_acts_from_file(fpath, hps, vqvae, top_prior, meanpool=True):
 83 |     audio = load_audio_from_file(fpath)
 84 | 
 85 |     # run vq-vae on the audio
 86 |     z = get_z(audio, vqvae)
 87 | 
 88 |     # get conditioning info
 89 |     x_cond, y_cond = get_cond(hps, top_prior)
 90 | 
 91 |     # get the activations from the LM
 92 |     acts = get_final_activations(z, x_cond, y_cond, top_prior)
 93 | 
 94 |     # postprocessing
 95 |     acts = acts.squeeze().type(torch.float32)
 96 | 
 97 |     if meanpool:
 98 |         acts = acts.mean(dim=0)
 99 | 
100 |     acts = np.array(acts.cpu())
101 | 
102 |     return acts
103 | 
104 | 
105 | if __name__ == "__main__":
106 |     import pathlib
107 |     from argparse import ArgumentParser
108 | 
109 |     # imports and set up Jukebox's multi-GPU parallelization
110 |     import jukebox
111 |     from jukebox.hparams import Hyperparams, setup_hparams
112 |     from jukebox.make_models import MODELS, make_prior, make_vqvae
113 |     from jukebox.utils.dist_utils import setup_dist_from_mpi
114 |     from tqdm import tqdm
115 | 
116 |     parser = ArgumentParser()
117 |     parser.add_argument("--batch_size", type=int)
118 |     parser.add_argument("--batch_idx", type=int)
119 |     parser.add_argument("--representations", type=str)
120 | 
121 |     parser.set_defaults(
122 |         batch_size=None,
123 |         batch_idx=None,
124 |         representations="mean_pool,max_pool",
125 |     )
126 |     args = parser.parse_args()
127 | 
128 |     input_dir = pathlib.Path("/input")
129 |     output_dir = pathlib.Path("/output")
130 |     output_dir.mkdir(exist_ok=True)
131 |     input_paths = sorted(list(input_dir.iterdir()))
132 |     if args.batch_size is not None and args.batch_idx is not None:
133 |         batch_starts = list(range(0, len(input_paths), args.batch_size))
134 |         if args.batch_idx >= len(batch_starts):
135 |             raise ValueError("Invalid batch index")
136 |         batch_start = batch_starts[args.batch_idx]
137 |         input_paths = input_paths[batch_start : batch_start + args.batch_size]
138 | 
139 |     loaded = False
140 |     for input_path in tqdm(input_paths):
141 |         # Check if output already exists
142 |         output_path = pathlib.Path(output_dir, f"{input_path.stem}.npy")
143 |         try:
144 |             np.load(output_path)
145 |             continue
146 |         except:
147 |             pass
148 | 
149 |         if not loaded:
150 |             # Set up MPI
151 |             rank, local_rank, device = setup_dist_from_mpi()
152 | 
153 |             # Set up VQVAE
154 |             model = "5b"  # or "1b_lyrics"
155 |             hps = Hyperparams()
156 |             hps.sr = 44100
157 |             hps.n_samples = 3 if model == "5b_lyrics" else 8
158 |             hps.name = "samples"
159 |             chunk_size = 16 if model == "5b_lyrics" else 32
160 |             max_batch_size = 3 if model == "5b_lyrics" else 16
161 |             hps.levels = 3
162 |             hps.hop_fraction = [0.5, 0.5, 0.125]
163 |             vqvae, *priors = MODELS[model]
164 |             vqvae = make_vqvae(
165 |                 setup_hparams(vqvae, dict(sample_length=1048576)), device
166 |             )
167 | 
168 |             # Set up language model
169 |             hparams = setup_hparams(priors[-1], dict())
170 |             hparams["prior_depth"] = 36
171 |             top_prior = make_prior(hparams, vqvae, device)
172 | 
173 |             loaded = True
174 | 
175 |         # Decode, resample, convert to mono, and normalize audio
176 |         with torch.no_grad():
177 |             representation = get_acts_from_file(
178 |                 input_path, hps, vqvae, top_prior, meanpool=True
179 |             )
180 | 
181 |         np.save(output_path, representation)
182 | 


--------------------------------------------------------------------------------
/representations/jukebox/make_models.py.patch:
--------------------------------------------------------------------------------
 1 | --- make_models.py	2021-06-10 16:43:45.195677646 -0700
 2 | +++ make_models.py.patched	2021-06-10 16:44:03.111348075 -0700
 3 | @@ -58,7 +58,7 @@
 4 |          #     if checkpoint_hps.get(k, None) != hps.get(k, None):
 5 |          #         print(k, "Checkpoint:", checkpoint_hps.get(k, None), "Ours:", hps.get(k, None))
 6 |          checkpoint['model'] = {k[7:] if k[:7] == 'module.' else k: v for k, v in checkpoint['model'].items()}
 7 | -        model.load_state_dict(checkpoint['model'])
 8 | +        model.load_state_dict(checkpoint['model'], strict=False)
 9 |          if 'step' in checkpoint: model.step = checkpoint['step']
10 |  
11 |  def restore_opt(opt, shd, checkpoint_path):
12 | 


--------------------------------------------------------------------------------
/representations/mfcc.dockerfile:
--------------------------------------------------------------------------------
 1 | # Import Ubuntu/CUDA/cuDNN
 2 | FROM ubuntu:20.04
 3 | 
 4 | # Configure shell
 5 | ENV DEBIAN_FRONTEND=noninteractive
 6 | SHELL ["/bin/bash", "-c"]
 7 | RUN apt-get update --fix-missing
 8 | 
 9 | # Install Python
10 | ENV LANG C.UTF-8
11 | RUN apt-get install -y python3 python3-dev python3-pip
12 | RUN ln -s $(which python3) /usr/local/bin/python
13 | RUN python -m pip install --no-cache-dir --upgrade pip setuptools
14 | 
15 | # Install deps
16 | RUN apt-get install -y libsndfile1-dev
17 | RUN python -m pip install --no-cache-dir librosa==0.8.1
18 | RUN python -m pip install --no-cache-dir scipy==1.6.3
19 | RUN python -m pip install --no-cache-dir tqdm==4.61.0
20 | 
21 | # Setup entrypoint
22 | RUN mkdir /input
23 | RUN mkdir /output
24 | RUN mkdir /code
25 | WORKDIR /code
26 | COPY handcrafted/main.py main.py
27 | ENTRYPOINT ["python", "main.py", "mfcc"]
28 | 


--------------------------------------------------------------------------------
/representations/musicnn.dockerfile:
--------------------------------------------------------------------------------
 1 | # Import Ubuntu/CUDA/cuDNN
 2 | FROM tensorflow/tensorflow:1.14.0-gpu-py3
 3 | 
 4 | RUN apt-get install -y \
 5 | 	libsndfile1-dev \
 6 | 	wget
 7 | RUN python -m pip install --no-cache-dir librosa==0.7.0
 8 | RUN python -m pip install --no-cache-dir --force-reinstall numba==0.48
 9 | RUN python -m pip install --no-cache-dir tqdm
10 | 
11 | RUN mkdir /input
12 | RUN mkdir /output
13 | RUN mkdir /code
14 | WORKDIR /code
15 | RUN wget https://github.com/jordipons/musicnn/archive/516acb2a0ff5ef73f64547898e018e793152c506.zip
16 | RUN unzip 516acb2a0ff5ef73f64547898e018e793152c506.zip
17 | RUN python -m pip install --no-dependencies -e musicnn-516acb2a0ff5ef73f64547898e018e793152c506
18 | COPY musicnn/main.py main.py
19 | 
20 | ENTRYPOINT ["python", "main.py"]
21 | 


--------------------------------------------------------------------------------
/representations/musicnn/main.py:
--------------------------------------------------------------------------------
 1 | if __name__ == "__main__":
 2 |     import pathlib
 3 |     from argparse import ArgumentParser
 4 | 
 5 |     import numpy as np
 6 |     from musicnn.extractor import extractor
 7 |     from tqdm import tqdm
 8 | 
 9 |     parser = ArgumentParser()
10 |     parser.add_argument("--batch_size", type=int)
11 |     parser.add_argument("--batch_idx", type=int)
12 |     parser.add_argument("--representations", type=str)
13 | 
14 |     parser.set_defaults(
15 |         batch_size=None,
16 |         batch_idx=None,
17 |         representations="mean_pool,max_pool",
18 |     )
19 |     args = parser.parse_args()
20 | 
21 |     input_dir = pathlib.Path("/input")
22 |     output_dir = pathlib.Path("/output")
23 |     output_dir.mkdir(exist_ok=True)
24 |     input_paths = sorted(list(input_dir.iterdir()))
25 |     if args.batch_size is not None and args.batch_idx is not None:
26 |         batch_starts = list(range(0, len(input_paths), args.batch_size))
27 |         if args.batch_idx >= len(batch_starts):
28 |             raise ValueError("Invalid batch index")
29 |         batch_start = batch_starts[args.batch_idx]
30 |         input_paths = input_paths[batch_start : batch_start + args.batch_size]
31 | 
32 |     for input_path in tqdm(input_paths):
33 |         # Check if output already exists
34 |         output_path = pathlib.Path(output_dir, f"{input_path.stem}.npy")
35 |         try:
36 |             np.load(output_path)
37 |             continue
38 |         except:
39 |             pass
40 | 
41 |         taggram, tags, features = extractor(
42 |             input_path, model="MSD_musicnn_big", extract_features=True
43 |         )
44 |         representation = np.concatenate(
45 |             [features[k].mean(axis=0) for k in args.representations.split(",")]
46 |         )
47 | 
48 |         np.save(output_path, representation)
49 | 


--------------------------------------------------------------------------------
/representations/push.sh:
--------------------------------------------------------------------------------
1 | source env.sh
2 | 
3 | docker push $DOCKER_NAMESPACE/$DOCKER_TAG
4 | 


--------------------------------------------------------------------------------
/representations/run.sh:
--------------------------------------------------------------------------------
 1 | source env.sh
 2 | 
 3 | ./build.sh $@
 4 | 
 5 | docker run \
 6 |   -it \
 7 |   --rm \
 8 |   $DOCKER_CPU_ARG \
 9 |   $DOCKER_GPU_ARG \
10 |   -v $INPUT_DIR:/input \
11 |   -v $OUTPUT_DIR:/output \
12 |   $DOCKER_NAMESPACE/$DOCKER_TAG \
13 |   $REPRESENTATION_ARGS
14 | 


--------------------------------------------------------------------------------
/reproduce/0_docker.sh:
--------------------------------------------------------------------------------
 1 | pushd ..
 2 | set -e
 3 | HOST_CACHE=$(python -c "from jukemir import CACHE_DIR; print(CACHE_DIR)")
 4 | echo $HOST_CACHE
 5 | popd
 6 | 
 7 | docker run \
 8 |   -it \
 9 |   --rm \
10 |   -d \
11 |   --name jukemir \
12 |   -u $(id -u):$(id -g) \
13 |   -v $HOST_CACHE:/jukemir/cache \
14 |   -v $(pwd)/../jukemir:/jukemir/jukemir \
15 |   -v $(pwd)/../reproduce:/jukemir/reproduce \
16 |   jukemir/lib \
17 |   bash
18 | 


--------------------------------------------------------------------------------
/reproduce/123_precomputed.sh:
--------------------------------------------------------------------------------
 1 | docker exec -it jukemir python -m jukemir.assets precomputed --delete_wrong --num_parallel 1
 2 | 
 3 | pushd ..
 4 | set -e
 5 | HOST_CACHE=$(python -c "from jukemir import CACHE_DIR; print(CACHE_DIR)")
 6 | echo $HOST_CACHE
 7 | popd
 8 | 
 9 | mkdir -p $HOST_CACHE/representations
10 | pushd $HOST_CACHE/representations
11 | for f in *.tar.gz; do tar xfz "$f"; done
12 | popd
13 | 


--------------------------------------------------------------------------------
/reproduce/1_download.sh:
--------------------------------------------------------------------------------
1 | for ASSET_PREFIX in gtzan giantsteps magnatagatune emomusic
2 | do
3 | 	docker exec -it jukemir python -m jukemir.assets $ASSET_PREFIX --delete_wrong --num_parallel 8
4 | done
5 | 


--------------------------------------------------------------------------------
/reproduce/2_process.sh:
--------------------------------------------------------------------------------
1 | for DATASET_TAG in gtzan_ff giantsteps_clips magnatagatune emomusic
2 | do
3 | 	docker exec -it jukemir python -m jukemir.datasets.cache $DATASET_TAG audio
4 | done
5 | 


--------------------------------------------------------------------------------
/reproduce/3_extract.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | 
 3 | from jukemir import CACHE_DIR
 4 | 
 5 | REPRESENTATIONS = ["chroma", "mfcc", "choi", "musicnn", "clmr", "jukebox"]
 6 | DATASET_SIZES = [
 7 |     ("gtzan_ff", 930),
 8 |     ("giantsteps_clips", 7035),
 9 |     ("magnatagatune", 25860),
10 |     ("emomusic", 744),
11 | ]
12 | BATCH_SIZE = 256
13 | CMD_TEMPLATE = """
14 | docker run \\
15 | 	-it \\
16 | 	--rm \\
17 | 	-v {cache_dir}/processed/{dataset}/wav:/input \\
18 | 	-v {cache_dir}/representations/{dataset}/{representation}:/output \\
19 | 	jukemir/representations_{representation} \\
20 | 	--batch_size {batch_size} \\
21 | 	--batch_idx {batch_idx}
22 | """.strip()
23 | 
24 | for dataset, dataset_size in DATASET_SIZES:
25 |     for representation in REPRESENTATIONS:
26 |         for batch_idx in range(math.ceil(dataset_size / BATCH_SIZE)):
27 |             print(
28 |                 CMD_TEMPLATE.format(
29 |                     cache_dir=CACHE_DIR,
30 |                     dataset=dataset,
31 |                     representation=representation,
32 |                     batch_size=BATCH_SIZE,
33 |                     batch_idx=batch_idx,
34 |                 )
35 |             )
36 | 


--------------------------------------------------------------------------------
/reproduce/4_grid_config.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import pathlib
 3 | 
 4 | from jukemir import CACHE_PROBES_DIR
 5 | from jukemir.probe import paper_grid_cfgs
 6 | 
 7 | DATASETS = ["gtzan_ff", "giantsteps_clips", "magnatagatune", "emomusic"]
 8 | REPRESENTATIONS = ["chroma", "mfcc", "choi", "musicnn", "clmr", "jukebox"]
 9 | 
10 | for dataset in DATASETS:
11 |     for representation in REPRESENTATIONS:
12 |         grid_dir = pathlib.Path(CACHE_PROBES_DIR, dataset, representation)
13 |         grid_dir.mkdir(parents=True, exist_ok=True)
14 |         for cfg in paper_grid_cfgs(dataset, representation):
15 |             with open(pathlib.Path(grid_dir, f"{cfg.uid()}.json"), "w") as f:
16 |                 f.write(json.dumps(cfg, indent=2, sort_keys=True))
17 | 


--------------------------------------------------------------------------------
/reproduce/4_grid_config.sh:
--------------------------------------------------------------------------------
1 | docker exec -it jukemir python reproduce/4_grid_config.py
2 | 


--------------------------------------------------------------------------------
/reproduce/5_grid_train_codalab.sh:
--------------------------------------------------------------------------------
 1 | cl new my-jukemir
 2 | cl work my-jukemir
 3 | cl add bundle jukemir//code
 4 | cl add bundle jukemir//configs
 5 | cl add bundle jukemir//metadata
 6 | 
 7 | for DATASET in gtzan_ff giantsteps_clips magnatagatune emomusic
 8 | do
 9 | 	for REPRESENTATION in chroma mfcc choi musicnn clmr jukebox
10 | 	do
11 | 		cl add bundle jukemir//$DATASET-$REPRESENTATION
12 | 		cl run :code :metadata :configs :$DATASET-$REPRESENTATION " mkdir -p /jukemir/cache/processed; mkdir -p /jukemir/cache/representations/$DATASET/$REPRESENTATION; mkdir -p /jukemir/cache/probes; cp -r code/* /jukemir/jukemir/; cp -r metadata/* /jukemir/cache/processed/; cp -r $DATASET-$REPRESENTATION/* /jukemir/cache/representations/$DATASET/$REPRESENTATION/; cp -r configs/* /jukemir/cache/probes/; for CFG_PATH in /jukemir/cache/probes/$DATASET/$REPRESENTATION/*; do echo \$CFG_PATH; python -m jukemir.probe.execute \$CFG_PATH ./; done " -n jukemir_grid-$DATASET-$REPRESENTATION --tags jukemir_grid $DATASET $REPRESENTATION $DATASET-$REPRESENTATION --request-memory 30g --request-docker-image jukemir/lib --request-gpus 1
13 | 	done
14 | done
15 | 


--------------------------------------------------------------------------------
/reproduce/5_grid_train_serial.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import pathlib
 3 | import sys
 4 | 
 5 | from tqdm import tqdm
 6 | 
 7 | from jukemir import CACHE_PROBES_DIR
 8 | from jukemir.probe import ProbeExperimentConfig, execute_probe_experiment
 9 | 
10 | dataset, representation = sys.argv[1:]
11 | 
12 | grid_dir = pathlib.Path(CACHE_PROBES_DIR, dataset, representation)
13 | grid_cfgs = sorted(list(grid_dir.glob("*.json")))
14 | print(f"Training {len(grid_cfgs)} probes for {dataset} {representation}")
15 | for cfg_path in tqdm(list(grid_dir.glob("*.json"))):
16 |     with open(cfg_path, "r") as f:
17 |         cfg = ProbeExperimentConfig(json.load(f))
18 |     execute_probe_experiment(cfg)
19 | 


--------------------------------------------------------------------------------
/reproduce/5_grid_train_serial.sh:
--------------------------------------------------------------------------------
1 | for DATASET in gtzan_ff giantsteps_clips magnatagatune emomusic
2 | do
3 | 	for REPRESENTATION in chroma mfcc choi musicnn clmr jukebox
4 | 	do
5 | 		docker exec -it jukemir python reproduce/5_grid_train.py $DATASET $REPRESENTATION
6 | 	done
7 | done
8 | 


--------------------------------------------------------------------------------
/reproduce/6_evaluate.sh:
--------------------------------------------------------------------------------
 1 | for DATASET in gtzan_ff giantsteps_clips magnatagatune emomusic
 2 | do
 3 | 	for REPRESENTATION in chroma mfcc choi musicnn clmr jukebox
 4 | 	do
 5 | 		docker exec -it jukemir \
 6 | 			python -m jukemir.probe.aggregate \
 7 | 				$DATASET \
 8 | 				$REPRESENTATION \
 9 | 				--evaluate test \
10 | 				--expected_num_runs 216
11 | 	done
12 | done
13 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | from setuptools import setup
2 | 
3 | setup(
4 |     name="jukemir",
5 |     packages=["jukemir"],
6 | )
7 | 


--------------------------------------------------------------------------------