├── .gitattributes
├── data
└── dMelodies_dataset.npz
├── dMelodies_dataset_latent_info.csv
├── constants_file_names.py
├── constants_metadata.py
├── constants_latent_factors.py
├── .gitignore
├── environment.yml
├── script_create_dataset.py
├── README.md
├── helpers.py
├── dmelodies_dataset.py
├── dmelodies_torch_dataloader.py
├── dmelodies_loading.ipynb
└── figs
└── dataset_example.svg
/.gitattributes:
--------------------------------------------------------------------------------
1 | dMelodies_dataset_latent_info.csv filter=lfs diff=lfs merge=lfs -text
2 | data/dMelodies_dataset.npz filter=lfs diff=lfs merge=lfs -text
3 |
--------------------------------------------------------------------------------
/data/dMelodies_dataset.npz:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:10187867cc86f738325da152cbdc68134a03543d232602be5a86e4915b0a8378
3 | size 135490709
4 |
--------------------------------------------------------------------------------
/dMelodies_dataset_latent_info.csv:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:ab7fac0c1d69a18a1430b5b511e3d77f7a3ae4c106f5df192c013d2da4863e08
3 | size 52675858
4 |
--------------------------------------------------------------------------------
/constants_file_names.py:
--------------------------------------------------------------------------------
1 | """
2 | File containing folder and file names for different assets
3 | Do NOT change
4 | """
5 |
6 | RAW_DATA_FOLDER = 'raw_data' # will contain the raw data in .musicxml or .mid formats
7 | DATASETS_FOLDER = 'data' # will contain the index dictionary file and data in .npz format
8 |
9 | LATENT_INFO_CSV = 'dMelodies_dataset_latent_info.csv' # csv containing latent values for all data points
10 | NPZ_DATASET = 'dMelodies_dataset.npz' # dataset stored as numpy nd-arrays
11 | INDEX_DICT_FILE = 'index_dicts.json' # text file storing dictionary mapping indices to music21 note names
12 |
--------------------------------------------------------------------------------
/constants_metadata.py:
--------------------------------------------------------------------------------
1 | """
2 | File containing some constants for the dataset Metadata
3 | Do NOT change
4 | """
5 |
6 | TITLE = 'dMelodies dataset'
7 | DESCRIPTION = 'dMelodies dataset. Algorithmically generated 2-bar melodies. Each datapoint is a 2-bar melody which' \
8 | ' plays arpreggios on the standard I-IV-V-I cadence chord pattern. The dataset has 9 independent ' \
9 | 'latent factors of variation: root, octave, scale, rhythm bar 1, rhythm bar 2, arpeggiation direction ' \
10 | 'for each of the 4 chords. All possible values of the latent factors are present.'
11 | VERSION_NUM = 1
12 | AUTHORS = 'ashis.pati@gatech.edu, siddharth.gururani@gatech.edu, alexander.lerch@gatech.edu'
13 |
--------------------------------------------------------------------------------
/constants_latent_factors.py:
--------------------------------------------------------------------------------
1 | """
2 | File containing constants for different latent factors
3 | Do NOT change
4 | """
5 |
6 | import numpy as np
7 | from itertools import combinations
8 |
9 | # Dictionary for note indices corresponding to different chords within a scale
10 | CHORD_DICT = {
11 | 'I': [0, 2, 4],
12 | 'IV': [3, 5, 7],
13 | 'V': [4, 6, 8],
14 | 'I-last': [0, 2, 4],
15 | }
16 |
17 | # List of allowed options for Tonic factor
18 | TONIC_LIST = [
19 | 'C', 'C#', 'D', 'D#', 'E', 'F', 'F#', 'G', 'G#', 'A', 'A#', 'B'
20 | ]
21 | TONIC_DICT = {
22 | i: t for i, t in enumerate(TONIC_LIST)
23 | } # dict with indices mapping to the tonic values
24 | TONIC_REVERSE_DICT = {
25 | TONIC_DICT[k]: k for k in TONIC_DICT.keys()
26 | } # reverse dict with tonic values mapping to indices
27 |
28 | # List of allowed options for Octave factor
29 | OCTAVE_LIST = [4, 5, 6]
30 | OCTAVE_DICT = {
31 | i: o for i, o in enumerate(OCTAVE_LIST)
32 | } # dict with indices mapping to the octave values
33 | OCTAVE_REVERSE_DICT = {
34 | OCTAVE_DICT[k]: k for k in OCTAVE_DICT.keys()
35 | } # reverse dict with octave values mapping to indices
36 |
37 | # List of allowed options for Scale (Mode) factor
38 | SCALE_LIST = ['major', 'minor', 'blues']
39 | SCALE_DICT = {
40 | i: m for i, m in enumerate(SCALE_LIST)
41 | } # dict with indices mapping to the scale options
42 | SCALE_REVERSE_DICT = {
43 | SCALE_DICT[k]: k for k in SCALE_DICT.keys()
44 | } # reverse dict with scale values mapping to indices
45 | SCALE_NOTES_DICT = {
46 | 'major': [0, 2, 4, 5, 7, 9, 11, 12, 14],
47 | 'minor': [0, 2, 3, 5, 7, 8, 11, 12, 14],
48 | 'blues': [0, 2, 3, 5, 6, 9, 10, 12, 14],
49 | } # dict with allowed scale degrees for each scale
50 |
51 | # Dict containing options for Rhythm factor
52 | RHYTHM_DICT = {}
53 | all_rhythms = combinations([0, 1, 2, 3, 4, 5, 6, 7], 6)
54 | for i, pos in enumerate(list(all_rhythms)):
55 | temp_array = np.array([0] * 8)
56 | temp_array[np.array(pos)] = 1
57 | RHYTHM_DICT[i] = list(temp_array)
58 |
59 | # Dict containing options for Arpeggiation factor
60 | ARP_DICT = {
61 | 0: 'up',
62 | 1: 'down'
63 | } # dict with indices mapping to arpeggiation direction options
64 | ARP_REVERSE_DICT = {
65 | ARP_DICT[k]: k for k in ARP_DICT.keys()
66 | } # reverse dict mapping arpeggiaition direction options to indices
67 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Folders and files
2 | raw_data/
3 |
4 |
5 | # Byte-compiled / optimized / DLL files
6 | __pycache__/
7 | *.py[cod]
8 | *$py.class
9 |
10 | # C extensions
11 | *.so
12 |
13 | # Distribution / packaging
14 | .Python
15 | build/
16 | develop-eggs/
17 | dist/
18 | downloads/
19 | eggs/
20 | .eggs/
21 | lib/
22 | lib64/
23 | parts/
24 | sdist/
25 | var/
26 | wheels/
27 | pip-wheel-metadata/
28 | share/python-wheels/
29 | *.egg-info/
30 | .installed.cfg
31 | *.egg
32 | MANIFEST
33 |
34 | # PyInstaller
35 | # Usually these files are written by a python script from a template
36 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
37 | *.manifest
38 | *.spec
39 |
40 | # Installer logs
41 | pip-log.txt
42 | pip-delete-this-directory.txt
43 |
44 | # Unit test / coverage reports
45 | htmlcov/
46 | .tox/
47 | .nox/
48 | .coverage
49 | .coverage.*
50 | .cache
51 | nosetests.xml
52 | coverage.xml
53 | *.cover
54 | *.py,cover
55 | .hypothesis/
56 | .pytest_cache/
57 |
58 | # Translations
59 | *.mo
60 | *.pot
61 |
62 | # Django stuff:
63 | *.log
64 | local_settings.py
65 | db.sqlite3
66 | db.sqlite3-journal
67 |
68 | # Flask stuff:
69 | instance/
70 | .webassets-cache
71 |
72 | # Scrapy stuff:
73 | .scrapy
74 |
75 | # Sphinx documentation
76 | docs/_build/
77 |
78 | # PyBuilder
79 | target/
80 |
81 | # Jupyter Notebook
82 | .ipynb_checkpoints
83 |
84 | # IPython
85 | profile_default/
86 | ipython_config.py
87 |
88 | # pyenv
89 | .python-version
90 |
91 | # pipenv
92 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
93 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
94 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
95 | # install all needed dependencies.
96 | #Pipfile.lock
97 |
98 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
99 | __pypackages__/
100 |
101 | # Celery stuff
102 | celerybeat-schedule
103 | celerybeat.pid
104 |
105 | # SageMath parsed files
106 | *.sage.py
107 |
108 | # Environments
109 | .env
110 | .venv
111 | env/
112 | venv/
113 | ENV/
114 | env.bak/
115 | venv.bak/
116 |
117 | # Spyder project settings
118 | .spyderproject
119 | .spyproject
120 |
121 | # Rope project settings
122 | .ropeproject
123 |
124 | # mkdocs documentation
125 | /site
126 |
127 | # mypy
128 | .mypy_cache/
129 | .dmypy.json
130 | dmypy.json
131 |
132 | # Pyre type checker
133 | .pyre/
134 |
135 | # PyCharm Related
136 | .idea/
137 |
138 | # MAC
139 | .DS_Store
140 | .vscode/settings.json
141 |
--------------------------------------------------------------------------------
/environment.yml:
--------------------------------------------------------------------------------
1 | name: dmelodies
2 | channels:
3 | - pytorch
4 | - defaults
5 | dependencies:
6 | - _libgcc_mutex=0.1
7 | - blas=1.0
8 | - ca-certificates=2020.1.1
9 | - certifi=2019.11.28
10 | - cffi=1.11.5
11 | - cloudpickle=1.3.0
12 | - cytoolz=0.10.1
13 | - dask-core=2.10.1
14 | - dbus=1.13.12
15 | - decorator=4.4.1
16 | - expat=2.2.6
17 | - fontconfig=2.13.0
18 | - freetype=2.9.1
19 | - glib=2.63.1
20 | - gst-plugins-base=1.14.0
21 | - gstreamer=1.14.0
22 | - icu=58.2
23 | - imageio=2.6.1
24 | - intel-openmp=2019.1
25 | - jpeg=9b
26 | - libedit=3.1.20181209
27 | - libffi=3.2.1
28 | - libgcc-ng=8.2.0
29 | - libgfortran-ng=7.3.0
30 | - libpng=1.6.36
31 | - libstdcxx-ng=8.2.0
32 | - libtiff=4.0.10
33 | - libuuid=1.0.3
34 | - libxcb=1.13
35 | - libxml2=2.9.9
36 | - mkl=2019.1
37 | - mkl_fft=1.0.10
38 | - mkl_random=1.0.2
39 | - ncurses=6.1
40 | - networkx=2.4
41 | - ninja=1.8.2
42 | - numpy=1.15.4
43 | - numpy-base=1.15.4
44 | - olefile=0.46
45 | - openssl=1.1.1d
46 | - pandas=1.0.1
47 | - pcre=8.43
48 | - pillow=5.4.1
49 | - pip=18.1
50 | - pycparser=2.19
51 | - pymongo=3.7.2
52 | - pyqt=5.9.2
53 | - python=3.7.2
54 | - pytorch=1.0.0
55 | - pytz=2019.3
56 | - pywavelets=1.1.1
57 | - qt=5.9.7
58 | - readline=7.0
59 | - scikit-image=0.16.2
60 | - setuptools=41.0.0
61 | - sip=4.19.8
62 | - six=1.12.0
63 | - sqlite=3.26.0
64 | - tk=8.6.8
65 | - toolz=0.10.0
66 | - torchvision=0.2.1
67 | - tornado=6.0.3
68 | - tqdm=4.29.1
69 | - wheel=0.32.3
70 | - xz=5.2.4
71 | - zlib=1.2.11
72 | - zstd=1.3.7
73 | - pip:
74 | - absl-py==0.9.0
75 | - asn1crypto==0.24.0
76 | - astor==0.8.1
77 | - bcrypt==3.1.6
78 | - click==7.0
79 | - cryptography==2.5
80 | - cycler==0.10.0
81 | - dill==0.3.1.1
82 | - disentanglement-lib==1.4
83 | - future==0.18.2
84 | - gast==0.3.3
85 | - gin-config==0.3.0
86 | - google-pasta==0.1.8
87 | - grpcio==1.27.1
88 | - h5py==2.10.0
89 | - joblib==0.14.1
90 | - keras-applications==1.0.8
91 | - keras-preprocessing==1.1.0
92 | - kiwisolver==1.0.1
93 | - markdown==3.2.1
94 | - matplotlib==3.0.2
95 | - mido==1.2.9
96 | - music21==5.5.0
97 | - paramiko==2.4.2
98 | - pretty-midi==0.2.8
99 | - protobuf==3.11.3
100 | - pyasn1==0.4.5
101 | - pynacl==1.3.0
102 | - pyparsing==2.3.1
103 | - pypianoroll==0.5.3
104 | - python-dateutil==2.7.5
105 | - scikit-learn==0.22.1
106 | - scipy==1.2.0
107 | - seaborn==0.10.0
108 | - simplejson==3.17.0
109 | - sklearn==0.0
110 | - sshtunnel==0.1.4
111 | - tensorboard==1.14.0
112 | - tensorboard-logger==0.1.0
113 | - tensorboardx==2.0
114 | - tensorflow-estimator==1.14.0
115 | - tensorflow-gpu==1.14.0
116 | - tensorflow-hub==0.7.0
117 | - tensorflow-probability==0.7.0
118 | - termcolor==1.1.0
119 | - werkzeug==1.0.0
120 | - wrapt==1.11.2
121 |
122 |
--------------------------------------------------------------------------------
/script_create_dataset.py:
--------------------------------------------------------------------------------
1 | """
2 | Script to create the dMelodies dataset
3 | Running this script creates the following:
4 | - a .npz file containing the dataset and the indices of the latent values as numpy nd.arrays
5 | also contains dictionaries mapping note names to indices
6 | - a .csv file containing the latent factor information for all data points
7 | Additional Options:
8 | Add the following arguments while running the script to save the generated melodies
9 | - '--save-mid': saves all melodies in .mid format
10 | - '--save-xml': saves all melodies in .musicxml format
11 | - '--debug': creates a smaller version of the dataset for debugging
12 | See constants_file_names for information regarding file names and where they will be saved
13 | """
14 |
15 | import os
16 | import argparse
17 | from joblib import Parallel, delayed
18 | import pandas as pd
19 | import multiprocessing
20 | from tqdm import tqdm
21 |
22 | from constants_file_names import RAW_DATA_FOLDER
23 | from dmelodies_dataset import DMelodiesDataset
24 | from helpers import get_score_for_item, get_file_name_for_item
25 |
26 |
27 | def save_(
28 | index: int,
29 | data_row: pd.Series,
30 | save_mid: bool = False,
31 | save_xml: bool = False
32 | ):
33 | """
34 | Saves the score for the index as .mid and / or .musicxml
35 | Args:
36 | index: int, of the row
37 | data_row: single row of a pandas data-frame object containing the attribute values
38 | save_mid: bool, save as .mid if True
39 | save_xml: bool, save as .musicxml if True
40 |
41 | """
42 | if not (save_mid or save_xml):
43 | return
44 | score = get_score_for_item(data_row)
45 | file_name = get_file_name_for_item(data_row, index)
46 | if save_mid:
47 | midi_save_path = os.path.join(RAW_DATA_FOLDER, 'midi', file_name + '.mid')
48 | if not os.path.exists(os.path.dirname(midi_save_path)):
49 | os.makedirs(os.path.dirname(midi_save_path))
50 | score.write('midi', midi_save_path)
51 | if save_xml:
52 | xml_save_path = os.path.join(RAW_DATA_FOLDER, 'musicxml', file_name + '.musicxml')
53 | if not os.path.exists(os.path.dirname(xml_save_path)):
54 | os.makedirs(os.path.dirname(xml_save_path))
55 | score.write('musicxml', xml_save_path)
56 |
57 |
58 | if __name__ == '__main__':
59 | parser = argparse.ArgumentParser()
60 | parser.add_argument(
61 | '--save-mid', help='save data points in .mid format (default: false', action='store_true'
62 | )
63 | parser.add_argument(
64 | '--save-xml', help='save data points in .mid format (default: false', action='store_true'
65 | )
66 | parser.add_argument(
67 | '--debug', help='flag to create a smaller subset for debugging', action='store_true'
68 | )
69 | args = parser.parse_args()
70 | s_mid = args.save_mid
71 | s_xml = args.save_xml
72 | debug = args.debug
73 |
74 | # create and load dataset
75 | num_data_points=None
76 | if debug:
77 | num_data_points = 1000
78 | dataset = DMelodiesDataset(num_data_points=num_data_points)
79 | dataset.make_or_load_dataset()
80 |
81 | # save raw data-files if needed
82 | df = dataset.df.head(n=dataset.num_data_points)
83 | if debug:
84 | for i, d in tqdm(df.iterrows()):
85 | save_(i, d, s_mid, s_xml)
86 | else:
87 | cpu_count = multiprocessing.cpu_count()
88 | print(cpu_count)
89 | Parallel(n_jobs=cpu_count)(
90 | delayed(save_)(i, d, s_mid, s_xml) for i, d in tqdm(df.iterrows())
91 | )
92 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | [](http://creativecommons.org/licenses/by-nc-sa/4.0/)
2 |
3 | # dMelodies: A Music Dataset for Disentanglement Learning
4 |
5 | This repository contains the dMelodies dataset, meant to explore disentanglement in the context of symbolic music. Please cite as follows if you are using the code/data in this repository in any manner.
6 |
7 | > Ashis Pati, Siddharth Gururani, Alexander Lerch. "dMelodies: A Music Dataset for Disentanglement Learning", 21st International Society for Music Information Retrieval Conference (ISMIR), Montréal, Canada, 2020.
8 |
9 | ```
10 | @inproceedings{pati2020dmelodies,
11 | title={dMelodies: A Music Dataset for Disentanglement Learning},
12 | author={Pati, Ashis and Gururani, Siddharth and Lerch, Alexander},
13 | booktitle={21st International Society for Music Information Retrieval Conference (ISMIR)},
14 | year={2020},
15 | address={Montréal, Canada}
16 | }
17 | ```
18 |
19 |
20 | ## Motivation
21 | Over the last few years, there has been significant research attention on representation learning focused on disentangling the underlying factors of variation in given data. However, most of the current/previous studies rely on datasets from the image/computer vision domain (such as the [dSprites](https://github.com/deepmind/dsprites-dataset) dataset).
22 | The purpose of this work is to be able to create a standardized dataset for conducting disentanglement studies on symbolic music data. The key motivation is that such a dataset would help researchers working on disentanglement problems demonstrate their algorithm on diverse domains.
23 |
24 | ## Description
25 | dMelodies is dataset of simple 2-bar melodies generated using 9 independent latent factors of variation where each data point represents a unique melody based on the following constraints:
26 | - Each melody will correspond to a unique scale (major, minor, blues, etc.).
27 | - Each melody plays the arpeggios using the standard I-IV-V-I cadence chord pattern.
28 | - Bar 1 plays the first 2 chords (6 notes), Bar 2 plays the second 2 chords (6 notes).
29 | - Each played note is an 8th note.
30 |
31 | A typical example is shown below.
32 |
33 | 
34 |
35 |
36 |
37 | ### Factors of Variation
38 | The following factors of variation are considered:
39 | 1. **Tonic** (Root): 12 options from C to B
40 | 2. **Octave**: 3 options from C4 through C6
41 | 3. **Mode/Scale**: 3 options (Major, Minor, Blues)
42 | 4. **Rhythm Bar 1**: 28 options based on where the 6 note onsets are located in the first bar.
43 | 5. **Rhythm Bar 2**: 28 options based on where the 6 note onsets are located in the second bar.
44 | 6. **Arpeggiation Direction Chord 1**: 2 options (up/down) based on how the arpreggio is played
45 | 7. **Arpeggiation Direction Chord 2**: 2 options (up/down)
46 | 8. **Arpeggiation Direction Chord 3**: 2 options (up/down)
47 | 9. **Arpeggiation Direction Chord 4**: 2 options (up/down)
48 |
49 | Consequently, the total number of data-points are 1,354,752.
50 |
51 | ### Provided Data
52 | The data is provided as a numpy `.npz` archive with six fields:
53 | 1. `score_array`: (1354752 x 16, int32) tokenized score representation
54 | 2. `latent_array`: (1354752 x 9, int32) integer index of the latent factor values
55 | 3. `note2index_dict`: dictionary mapping the musical note names/symbols to token indices
56 | 4. `index2note_dict`: dictionary mapping the token indices to musical note names/symbols
57 | 5. `latent_dicts`: dictionary mapping the different latent factor values to the corresponding integer indices
58 | 5. `metadata`: additional information (title, authors, date of creation etc.)
59 |
60 |
61 | ## Usage
62 | Install `anaconda` or `miniconda` by following the instruction [here](https://docs.conda.io/projects/conda/en/latest/user-guide/install/).
63 |
64 | Create a new conda environment using the `enviroment.yml` file located in the root folder of this repository. The instructions for the same can be found [here](https://docs.conda.io/projects/conda/en/latest/user-guide/tasks/manage-environments.html#creating-an-environment-from-an-environment-yml-file).
65 |
66 | Activate the `dmelodies` environment using the following command:
67 |
68 | ```
69 | conda activate dmelodies
70 | ```
71 | The `DMelodiesDataset` class (see `dmelodies_dataset.py`) is a wrapper around the dataset and provides methods to read and load the dataset. If you are using [PyTorch](https://pytorch.org), you can use the `DMelodiesTorchDataset` class (see `dmelodies_torch_dataloader.py`) which implements a torch DataLoader.
72 | Examples for using both can be found in the `dmelodies_loading.ipynb` file.
73 |
74 | Explore the [benchmarking](https://github.com/ashispati/dmelodies_benchmarking) experiments on dMelodies using different unsupervised learning methods.
75 |
76 | ### Dataset Creation
77 | In case you want to create your own version of the dataset (as a `.npz` file), delete the contents of `data` folder and then run `script_create_dataset.py` from the root folder of this repository. Additional arguments `--save-midi` and `--save-xml` can be used to save the individual melodies as `.mid` or `.musicxml` files. The files will be saved in a `raw_data` folder.
78 | **Note**: Saving individual melodies will require approx. 16.5GB of space (5.5 GB for `.mid` format and 11GB for `.musicxml` format)
79 |
80 |
--------------------------------------------------------------------------------
/helpers.py:
--------------------------------------------------------------------------------
1 | """
2 | Helper functions to create melodies and music21 score objects
3 | """
4 |
5 | import os
6 | from fractions import Fraction
7 | from itertools import product
8 | import pandas as pd
9 | from tqdm import tqdm
10 | from typing import Union
11 |
12 | import music21
13 | from music21 import note
14 |
15 | from constants_file_names import *
16 | from constants_latent_factors import *
17 |
18 |
19 | SLUR_SYMBOL = '__'
20 | TICK_VALUES = [
21 | 0,
22 | Fraction(1, 2),
23 | ]
24 |
25 |
26 | def create_latent_info_df() -> pd.DataFrame:
27 | """
28 | Creates and returns the data-frame object containing the latent factors information
29 |
30 | Returns:
31 | pandas data-frame object
32 | """
33 | tonic_list = []
34 | octave_list = []
35 | scale_list = []
36 | rhy1_list = []
37 | rhy2_list = []
38 | dir1_list = []
39 | dir2_list = []
40 | dir3_list = []
41 | dir4_list = []
42 |
43 | all_combinations = product(
44 | TONIC_DICT.keys(),
45 | OCTAVE_DICT.keys(),
46 | SCALE_DICT.keys(),
47 | RHYTHM_DICT.keys(),
48 | RHYTHM_DICT.keys(),
49 | ARP_DICT.keys(),
50 | ARP_DICT.keys(),
51 | ARP_DICT.keys(),
52 | ARP_DICT.keys()
53 | )
54 | for params in tqdm(all_combinations):
55 | tonic_list.append(TONIC_DICT[params[0]])
56 | octave_list.append(OCTAVE_DICT[params[1]])
57 | scale_list.append(SCALE_DICT[params[2]])
58 | rhy1_list.append(params[3])
59 | rhy2_list.append(params[4])
60 | dir1_list.append(ARP_DICT[params[5]])
61 | dir2_list.append(ARP_DICT[params[6]])
62 | dir3_list.append(ARP_DICT[params[7]])
63 | dir4_list.append(ARP_DICT[params[8]])
64 | d = {
65 | 'tonic': tonic_list,
66 | 'octave': octave_list,
67 | 'scale': scale_list,
68 | 'rhythm_bar1': rhy1_list,
69 | 'rhythm_bar2': rhy2_list,
70 | 'arp_chord1': dir1_list,
71 | 'arp_chord2': dir2_list,
72 | 'arp_chord3': dir3_list,
73 | 'arp_chord4': dir4_list
74 | }
75 | latent_df = pd.DataFrame(data=d)
76 | return latent_df
77 |
78 |
79 | def get_latent_info() -> pd.DataFrame:
80 | """
81 | Reads the latent factors info from stored LATENT_INFO_CSV (see constants_file_names.py) file.
82 | If file doesn't exist, creates and saves it
83 |
84 | Returns:
85 | pandas data-frame object
86 | """
87 | cur_dir = os.path.dirname(os.path.realpath(__file__))
88 | latent_info_path = os.path.join(cur_dir, LATENT_INFO_CSV)
89 | if os.path.exists(latent_info_path):
90 | latent_df = pd.read_csv(latent_info_path, index_col=0)
91 | else:
92 | latent_df = create_latent_info_df()
93 | latent_df.to_csv(path_or_buf=latent_info_path)
94 | return latent_df
95 |
96 |
97 | def get_midi_pitch_list(
98 | tonic: str,
99 | octave: int,
100 | mode: str,
101 | arp_dir1: str,
102 | arp_dir2: str,
103 | arp_dir3: str,
104 | arp_dir4: str
105 | ) -> list:
106 | """
107 | Create the sequence of midi pitch values. Refer constants_latent_factors.py for details regarding allowed arg values
108 | Args:
109 | tonic: str, specifies the pitch class of the root note (C, C#, ...., through B)
110 | octave: int, specifies of the octave number (4, 5, 6) of the root note
111 | mode: str, specifies the scale (major, minor, blues etc.)
112 | arp_dir1: str, 'up' or 'down', specifies the arpeggiation direction of Chord 1
113 | arp_dir2: str, 'up' or 'down', specifies the arpeggiation direction of Chord 2
114 | arp_dir3: str, 'up' or 'down', specifies the arpeggiation direction of Chord 3
115 | arp_dir4: str, 'up' or 'down', specifies the arpeggiation direction of Chord 4
116 | Returns:
117 | list of MIDI notes corresponding to the melody defined based on the input arguments
118 | """
119 | root_pitch = music21.pitch.Pitch(tonic + str(octave)).midi
120 | pitch_seq = []
121 | dir_seq = [arp_dir1, arp_dir2, arp_dir3, arp_dir4]
122 | for index, chord in enumerate(CHORD_DICT.keys()):
123 | seq = CHORD_DICT[chord]
124 | if dir_seq[index] == 'down':
125 | seq = seq[::-1]
126 | for s in seq:
127 | midi_pitch = root_pitch + SCALE_NOTES_DICT[mode][s]
128 | pitch_seq.append(midi_pitch)
129 | return pitch_seq
130 |
131 |
132 | def create_m21_melody(
133 | tonic: str,
134 | octave: int,
135 | mode: str,
136 | rhythm_bar1: int,
137 | rhythm_bar2: int,
138 | arp_dir1: str,
139 | arp_dir2: str,
140 | arp_dir3: str,
141 | arp_dir4: str
142 | ) -> music21.stream.Score:
143 | """
144 | Creates the 2-bar melody in music21 score format
145 | Args:
146 | tonic: str, specifies the pitch class of the root note (C, C#, ...., through B)
147 | octave: int, specifies of the octave number (4, 5, 6) of the root note
148 | mode: str, specifies the scale (major, minor, blues etc.)
149 | rhythm_bar1: int, specified the rhythm for Bar 1
150 | rhythm_bar2: int, specified the rhythm for Bar 2
151 | arp_dir1: str, 'up' or 'down', specifies the arpergiation direction of Chord 1
152 | arp_dir2: str, 'up' or 'down', specifies the arpeggiation direction of Chord 2
153 | arp_dir3: str, 'up' or 'down', specifies the arpeggiation direction of Chord 3
154 | arp_dir4: str, 'up' or 'down', specifies the arpeggiation direction of Chord 4
155 |
156 | Returns:
157 | music21 score object containing the score
158 | """
159 | score = music21.stream.Score()
160 | part = music21.stream.Part()
161 | dur = 0.0
162 | rhy1 = RHYTHM_DICT[rhythm_bar1]
163 | rhy2 = RHYTHM_DICT[rhythm_bar2]
164 | if sum(rhy1) != 6:
165 | raise(ValueError, f'Invalid rhythm: {rhy1}')
166 | if sum(rhy2) != 6:
167 | raise(ValueError, f'Invalid rhythm: {rhy2}')
168 | midi_pitch_seq = get_midi_pitch_list(tonic, octave, mode, arp_dir1, arp_dir2, arp_dir3, arp_dir4)
169 | curr_note_num = 0
170 | for rhy in [rhy1, rhy2]:
171 | for onset in rhy:
172 | if onset == 1:
173 | f = music21.note.Note()
174 | f.pitch.midi = midi_pitch_seq[curr_note_num]
175 | f.duration = music21.duration.Duration('eighth')
176 | curr_note_num += 1
177 | else:
178 | f = music21.note.Rest()
179 | f.duration = music21.duration.Duration('eighth')
180 | part.insert(dur, f)
181 | dur += music21.duration.Duration('eighth').quarterLength
182 |
183 | score.insert(part)
184 | return score
185 |
186 |
187 | def get_score_for_item(df_row: pd.Series) -> music21.stream.Score:
188 | """
189 | Returns the score for the index given a data-frame
190 | Args:
191 | df_row: data-frame row containing latent attribute values
192 |
193 | Returns:
194 | music21.stream.Score object
195 | """
196 | return create_m21_melody(
197 | tonic=df_row['tonic'],
198 | octave=df_row['octave'],
199 | mode=df_row['scale'],
200 | rhythm_bar1=df_row['rhythm_bar1'],
201 | rhythm_bar2=df_row['rhythm_bar2'],
202 | arp_dir1=df_row['arp_chord1'],
203 | arp_dir2=df_row['arp_chord2'],
204 | arp_dir3=df_row['arp_chord3'],
205 | arp_dir4=df_row['arp_chord4']
206 | )
207 |
208 |
209 | def get_file_name_for_item(df_row: pd.Series, index: int) -> str:
210 | """
211 | Return the file name for index
212 | Args:
213 | df_row: data-frame row containing latent attribute values
214 | index: int, of the item in the dataset
215 |
216 | Returns:
217 | str,
218 | """
219 | tonic = df_row['tonic']
220 | octave = df_row['octave']
221 | mode = df_row['scale']
222 | rhythm_bar1 = df_row['rhythm_bar1']
223 | rhythm_bar2 = df_row['rhythm_bar2']
224 | dir1 = df_row['arp_chord1']
225 | dir2 = df_row['arp_chord2']
226 | dir3 = df_row['arp_chord3']
227 | dir4 = df_row['arp_chord4']
228 | file_name = f'{index}_{tonic}_{octave}_{mode}_{rhythm_bar1}_{rhythm_bar2}_{dir1}_{dir2}_{dir3}_{dir4}'
229 | return file_name
230 |
231 |
232 | def compute_tick_durations(tick_values: list):
233 | """
234 | Computes the tick durations
235 | Args:
236 | tick_values: list of allowed tick values
237 | """
238 | diff = [n - p
239 | for n, p in zip(tick_values[1:], tick_values[:-1])]
240 | diff = diff + [1 - tick_values[-1]]
241 | return diff
242 |
243 |
244 | def get_notes(score: music21.stream.Score) -> list:
245 | """
246 | Returns the notes from the music21 score object
247 | Args:
248 | score: music21 score object
249 | Returns:
250 | list, of music21 note objects
251 | """
252 | notes = score.parts[0].flat.notesAndRests
253 | notes = [n for n in notes if not isinstance(n, music21.harmony.ChordSymbol)]
254 | return notes
255 |
256 |
257 | def is_score_on_ticks(score: music21.stream.Score, tick_values: list) -> bool:
258 | """
259 | Checks if the notes in a score are on ticks
260 | Args:
261 | score: music21 score object
262 | tick_values: list of allowed tick values
263 | """
264 | notes = get_notes(score)
265 | eps = 1e-5
266 | for n in notes:
267 | _, d = divmod(n.offset, 1)
268 | flag = False
269 | for tick_value in tick_values:
270 | if tick_value - eps < d < tick_value + eps:
271 | flag = True
272 | if not flag:
273 | return False
274 | return True
275 |
276 |
277 | def standard_name(note_or_rest: Union[music21.note.Note, music21.note.Rest]) -> str:
278 | """
279 | Converts music21 note objects to string
280 | Args:
281 | note_or_rest: music21 note.Note or note.Rest object
282 |
283 | Returns:
284 | str,
285 | """
286 | if isinstance(note_or_rest, music21.note.Note):
287 | return note_or_rest.nameWithOctave
288 | elif isinstance(note_or_rest, music21.note.Rest):
289 | return note_or_rest.name
290 | else:
291 | raise ValueError("Invalid input. Should be a music21.note.Note or music21.note.Rest object ")
292 |
293 |
294 | def standard_note(note_or_rest_string: str) -> Union[music21.note.Note, music21.note.Rest]:
295 | """
296 | Converts str to music21 note.Note or note.Rest object
297 | Args:
298 | note_or_rest_string:
299 |
300 | Returns:
301 | music21 note.Note or note.Rest object
302 | """
303 | if note_or_rest_string == 'rest':
304 | return note.Rest()
305 | elif note_or_rest_string == SLUR_SYMBOL:
306 | return note.Rest()
307 | else:
308 | return note.Note(note_or_rest_string)
309 |
310 |
311 | def concatenate_scores(scores_list):
312 | """
313 | Each score must 2 bars long
314 | :param scores_list:
315 | :return:
316 | """
317 | score = music21.stream.Score()
318 | part = music21.stream.Part()
319 | dur = 0.0
320 | for s in scores_list:
321 | notes = get_notes(s)
322 | note_dur = 0
323 | for n in notes:
324 | part.insert(dur + note_dur, n)
325 | note_dur += n.duration.quarterLength
326 | dur += 8.0
327 | score.insert(part)
328 | return score
--------------------------------------------------------------------------------
/dmelodies_dataset.py:
--------------------------------------------------------------------------------
1 | """
2 | Module containing the DMelodiesDataset class
3 | """
4 |
5 | from datetime import date
6 |
7 | from helpers import *
8 | from constants_metadata import *
9 |
10 |
11 | class DMelodiesDataset:
12 | """
13 | Class for creating the dMelodies dataset
14 | """
15 | def __init__(self, num_data_points: int = None):
16 | """
17 | Initializes the DMelodiesDataset class object
18 | Args:
19 | num_data_points: int or None, if None create the full dataset,
20 | """
21 | self.df = get_latent_info()
22 | if num_data_points is None:
23 | self.num_data_points = self.df.shape[0]
24 | else:
25 | self.num_data_points = num_data_points
26 | self.time_sig_num = 4
27 | self.time_sig_den = 4
28 | self.beat_subdivisions = len(TICK_VALUES)
29 | self.tick_durations = compute_tick_durations(TICK_VALUES)
30 | cur_dir = os.path.dirname(os.path.realpath(__file__))
31 | if not os.path.exists(os.path.join(cur_dir, DATASETS_FOLDER)):
32 | os.mkdir(os.path.join(cur_dir, DATASETS_FOLDER))
33 | self.dataset_path = os.path.join(cur_dir, DATASETS_FOLDER, NPZ_DATASET)
34 | self.score_array = None
35 | self.latent_array = None
36 | self.metadata = None
37 | self.latent_dicts = {
38 | 'tonic': TONIC_DICT,
39 | 'octave': OCTAVE_DICT,
40 | 'scale': SCALE_DICT,
41 | 'rhythm_bar1': RHYTHM_DICT,
42 | 'rhythm_bar2': RHYTHM_DICT,
43 | 'arp_chord1': ARP_DICT,
44 | 'arp_chord2': ARP_DICT,
45 | 'arp_chord3': ARP_DICT,
46 | 'arp_chord4': ARP_DICT
47 | }
48 | self.note2index_dict = dict()
49 | self.index2note_dict = dict()
50 | self.initialize_index_dicts()
51 |
52 | def _get_score_for_item(self, index: int) -> music21.stream.Score:
53 | """
54 | Returns the score for the index
55 | Args:
56 | index: int, of the item in the dataset
57 |
58 | Returns:
59 | music21.stream.Score object
60 | """
61 | assert 0 <= index < self.num_data_points
62 | d = self.df.iloc[index]
63 | return get_score_for_item(d)
64 |
65 | def _get_file_name_for_item(self, index: int) -> str:
66 | """
67 | Return the file name for index
68 | Args:
69 | index: int, of the item in the dataset
70 |
71 | Returns:
72 | str,
73 | """
74 | assert 0 <= index < self.num_data_points
75 | d = self.df.iloc[index]
76 | return get_file_name_for_item(d, index)
77 |
78 | def make_or_load_dataset(self):
79 | """
80 | Creates the dataset or reads if it already exists
81 | Returns:
82 | None
83 | """
84 | # read dataset if already exists
85 | if os.path.exists(self.dataset_path):
86 | print('Dataset already created. Reading it now')
87 | dataset = np.load(self.dataset_path, allow_pickle=True)
88 | self.score_array = dataset['score_array']
89 | self.latent_array = dataset['latent_array']
90 | self.note2index_dict = dataset['note2index_dict'].item()
91 | self.index2note_dict = dataset['index2note_dict'].item()
92 | self.latent_dicts = dataset['latent_dicts'].item()
93 | self.metadata = dataset['metadata'].item()
94 | return
95 |
96 | # else, create dataset
97 | print('Making tensor dataset')
98 | score_seq = [None] * self.num_data_points
99 | latent_seq = [None] * self.num_data_points
100 |
101 | def _create_data_point(item_index):
102 | m21_score = self._get_score_for_item(item_index)
103 | score_array = self.get_tensor(m21_score)
104 | score_seq[item_index] = score_array
105 | latent_array = self._get_latents_array_for_index(item_index)
106 | latent_seq[item_index] = latent_array
107 |
108 | for idx in tqdm(range(self.num_data_points)):
109 | _create_data_point(idx)
110 |
111 | self.score_array = np.array(score_seq)
112 | self.latent_array = np.array(latent_seq)
113 | print('Number of data points: ', self.score_array.shape[0])
114 | self.metadata = {
115 | 'title': TITLE,
116 | 'description': DESCRIPTION,
117 | 'version': VERSION_NUM,
118 | 'authors': AUTHORS,
119 | 'data': date.today().strftime("%B %d, %Y"),
120 | 'latents_names': tuple([key for key in self.latent_dicts.keys()]),
121 | }
122 | np.savez(
123 | self.dataset_path,
124 | score_array=self.score_array,
125 | latent_array=self.latent_array,
126 | note2index_dict=self.note2index_dict,
127 | index2note_dict=self.index2note_dict,
128 | latent_dicts=self.latent_dicts,
129 | metadata=self.metadata
130 | )
131 |
132 | def get_tensor(self, score: music21.stream.Score) -> Union[np.array, None]:
133 | """
134 | Returns the score as a torch tensor
135 |
136 | Args:
137 | score: music21.stream.Score object
138 |
139 | Returns:
140 | torch.Tensor
141 | """
142 | eps = 1e-5
143 | notes = get_notes(score)
144 | if not is_score_on_ticks(score, TICK_VALUES):
145 | return None
146 | list_note_strings_and_pitches = [(n.nameWithOctave, n.pitch.midi)
147 | for n in notes
148 | if n.isNote]
149 | for note_name, pitch in list_note_strings_and_pitches:
150 |
151 | if note_name not in self.note2index_dict:
152 | self.update_index_dicts(note_name)
153 |
154 | # construct sequence
155 | x = 0
156 | y = 0
157 | length = int(score.highestTime * self.beat_subdivisions)
158 | t = np.zeros((length, 2))
159 | is_articulated = True
160 | num_notes = len(notes)
161 | current_tick = 0
162 | while y < length:
163 | if x < num_notes - 1:
164 | if notes[x + 1].offset > current_tick + eps:
165 | t[y, :] = [self.note2index_dict[standard_name(notes[x])],
166 | is_articulated]
167 | y += 1
168 | current_tick += self.tick_durations[
169 | (y - 1) % len(TICK_VALUES)]
170 | is_articulated = False
171 | else:
172 | x += 1
173 | is_articulated = True
174 | else:
175 | t[y, :] = [self.note2index_dict[standard_name(notes[x])],
176 | is_articulated]
177 | y += 1
178 | is_articulated = False
179 | lead = t[:, 0] * t[:, 1] + (1 - t[:, 1]) * self.note2index_dict[SLUR_SYMBOL]
180 | lead = lead.astype('int32')
181 | return lead
182 |
183 | def _get_latents_array_for_index(self, index: int) -> np.array:
184 | """
185 | Returns the latent arrays from the file name
186 | Args:
187 | index: int,
188 | Returns:
189 | np.array containing the latent values
190 | """
191 | assert 0 <= index < self.num_data_points
192 | d = self.df.iloc[index]
193 | latent_list = [
194 | TONIC_REVERSE_DICT[d['tonic']],
195 | OCTAVE_REVERSE_DICT[d['octave']],
196 | SCALE_REVERSE_DICT[d['scale']],
197 | d['rhythm_bar1'],
198 | d['rhythm_bar2'],
199 | ARP_REVERSE_DICT[d['arp_chord1']],
200 | ARP_REVERSE_DICT[d['arp_chord2']],
201 | ARP_REVERSE_DICT[d['arp_chord3']],
202 | ARP_REVERSE_DICT[d['arp_chord4']],
203 | ]
204 | return np.array(latent_list).astype('int32')
205 |
206 | def initialize_index_dicts(self):
207 | """
208 | Reads index dicts from file if available, else creates it
209 |
210 | """
211 | note_sets = set()
212 | # add rest and slur symbols
213 | note_sets.add(SLUR_SYMBOL)
214 | note_sets.add('rest')
215 | for note_index, note_name in enumerate(note_sets):
216 | self.index2note_dict.update({note_index: note_name})
217 | self.note2index_dict.update({note_name: note_index})
218 |
219 | def update_index_dicts(self, new_note_name):
220 | """
221 | Updates self.note2index_dicts and self.index2note_dicts
222 |
223 | """
224 | new_index = len(self.note2index_dict)
225 | self.index2note_dict.update({new_index: new_note_name})
226 | self.note2index_dict.update({new_note_name: new_index})
227 | print(
228 | f'Warning: Entry {str({new_index: new_note_name})} added to dictionaries'
229 | )
230 |
231 | def get_score_from_datapoint(self, index) -> music21.stream.Score:
232 | """
233 | Converts the given score tensor to a music21 score object
234 | Args:
235 | index: int, index of the item in the dataset
236 | Returns:
237 | music21 score object
238 | """
239 | assert 0 <= index < self.num_data_points
240 | np_score = self.score_array[index]
241 | slur_index = self.note2index_dict[SLUR_SYMBOL]
242 | score = music21.stream.Score()
243 | part = music21.stream.Part()
244 | # LEAD
245 | dur = 0
246 | f = music21.note.Rest()
247 | tensor_lead_np = np_score.flatten()
248 | for tick_index, note_index in enumerate(tensor_lead_np):
249 | # if it is a played note
250 | if not note_index == slur_index:
251 | # add previous note
252 | if dur > 0:
253 | f.duration = music21.duration.Duration(dur)
254 | part.append(f)
255 |
256 | dur = self.tick_durations[tick_index % self.beat_subdivisions]
257 | f = standard_note(self.index2note_dict[note_index])
258 | else:
259 | dur += self.tick_durations[tick_index % self.beat_subdivisions]
260 | # add last note
261 | f.duration = music21.duration.Duration(dur)
262 | part.append(f)
263 | score.insert(part)
264 | return score
265 |
266 | def get_latent_values_for_index(self, index) -> dict:
267 | """
268 | Returns the latent values for the datapoint specified by the index
269 | Args:
270 | index: int, index of the item in the dataset
271 | Returns:
272 | np.array containing the latent values
273 |
274 | """
275 | assert 0 <= index < self.num_data_points
276 | latents = self.latent_array[index, :]
277 | latent_dict = {
278 | 'tonic': TONIC_DICT[latents[0]],
279 | 'octave': OCTAVE_DICT[latents[1]],
280 | 'scale': SCALE_DICT[latents[2]],
281 | 'rhythm_bar1': RHYTHM_DICT[latents[3]],
282 | 'rhythm_bar2': RHYTHM_DICT[latents[4]],
283 | 'arp_chord1': ARP_DICT[latents[5]],
284 | 'arp_chord2': ARP_DICT[latents[6]],
285 | 'arp_chord3': ARP_DICT[latents[7]],
286 | 'arp_chord4': ARP_DICT[latents[8]],
287 | }
288 | return latent_dict
289 |
--------------------------------------------------------------------------------
/dmelodies_torch_dataloader.py:
--------------------------------------------------------------------------------
1 | """
2 | Module containing the dMelodies torch dataloader
3 | """
4 |
5 | import numpy as np
6 | import torch
7 | from torch.utils.data import TensorDataset, DataLoader
8 | from typing import Tuple
9 | from helpers import *
10 | from dmelodies_dataset import DMelodiesDataset
11 | from constants_latent_factors import *
12 |
13 |
14 | class DMelodiesTorchDataset:
15 | """
16 | Class defining a torch dataloader for the dMelodies dataset
17 | """
18 | def __init__(self, seed: int = 0):
19 | """
20 | Initializes the DMelodiesTorchDataset class
21 | Args:
22 | seed: int, specifies the random seed to be used for shuffling the data
23 | """
24 | self.kwargs = {'pin_memory': True} if torch.cuda.is_available() else {}
25 | self.dataset = None
26 | self.seed = seed
27 | self.note2index_dict = None
28 | self.index2note_dict = None
29 | self.latent_dicts = None
30 | self.tick_durations = None
31 | self.beat_subdivisions = None
32 | np.random.seed(seed)
33 |
34 | def load_dataset(self):
35 | """
36 | Loads and shuffles the data
37 | """
38 | dataset = DMelodiesDataset()
39 | dataset.make_or_load_dataset()
40 | score = dataset.score_array
41 | score = np.expand_dims(score, axis=1)
42 | latent_values = dataset.latent_array
43 | a = np.c_[
44 | score.reshape(len(score), -1),
45 | latent_values.reshape(len(latent_values), -1)
46 | ]
47 | score2 = a[:, :score.size // len(score)].reshape(score.shape)
48 | latent_values2 = a[:, score.size // len(score):].reshape(latent_values.shape)
49 | np.random.shuffle(a)
50 | self.dataset = TensorDataset(
51 | torch.from_numpy(score2),
52 | torch.from_numpy(latent_values2)
53 | )
54 | self.note2index_dict = dataset.note2index_dict
55 | self.index2note_dict = dataset.index2note_dict
56 | self.latent_dicts = dataset.latent_dicts
57 | self.beat_subdivisions = dataset.beat_subdivisions
58 | self.tick_durations = dataset.tick_durations
59 |
60 | def data_loaders(
61 | self, batch_size: int, split: tuple = (0.70, 0.20)
62 | ) -> Tuple[DataLoader, DataLoader, DataLoader]:
63 | """
64 | Returns three data loaders obtained by splitting the data
65 | Args:
66 | batch_size: int, number of data points in each batch
67 | split: tuple, specify the ratio in which the dataset is to be divided
68 | Returns:
69 | tuple of 3 DataLoader objects corresponding to the train, validation and test sets
70 | """
71 | assert sum(split) < 1
72 |
73 | if self.dataset is None:
74 | self.load_dataset()
75 |
76 | num_examples = len(self.dataset)
77 | a, b = split
78 | train_dataset = TensorDataset(
79 | *self.dataset[: int(a * num_examples)]
80 | )
81 | val_dataset = TensorDataset(
82 | *self.dataset[int(a * num_examples):int((a + b) * num_examples)]
83 | )
84 | eval_dataset = TensorDataset(
85 | *self.dataset[int((a + b) * num_examples):]
86 | )
87 |
88 | train_dl = DataLoader(
89 | train_dataset,
90 | batch_size=batch_size,
91 | shuffle=True,
92 | **self.kwargs
93 | )
94 |
95 | val_dl = DataLoader(
96 | val_dataset,
97 | batch_size=batch_size,
98 | shuffle=True,
99 | )
100 |
101 | eval_dl = DataLoader(
102 | eval_dataset,
103 | batch_size=batch_size,
104 | shuffle=False,
105 | )
106 | return train_dl, val_dl, eval_dl
107 |
108 | def tensor_to_m21score(self, tensor_score):
109 | """
110 | Converts lead given as tensor_lead to a true music21 score
111 | :param tensor_score:
112 | :return:
113 | """
114 | slur_index = self.note2index_dict[SLUR_SYMBOL]
115 |
116 | score = music21.stream.Score()
117 | part = music21.stream.Part()
118 | # LEAD
119 | dur = 0
120 | f = music21.note.Rest()
121 | tensor_lead_np = tensor_score.cpu().numpy().flatten()
122 | tensor_lead_np[tensor_lead_np >= 52] = slur_index
123 | a = 1
124 | for tick_index, note_index in enumerate(tensor_lead_np):
125 | # if it is a played note
126 | if not note_index == slur_index:
127 | # add previous note
128 | if dur > 0:
129 | f.duration = music21.duration.Duration(dur)
130 | part.append(f)
131 |
132 | dur = self.tick_durations[tick_index % self.beat_subdivisions]
133 | f = standard_note(self.index2note_dict[note_index])
134 | else:
135 | dur += self.tick_durations[tick_index % self.beat_subdivisions]
136 | # add last note
137 | f.duration = music21.duration.Duration(dur)
138 | part.append(f)
139 | score.insert(part)
140 | return score
141 |
142 | @staticmethod
143 | def fix_note_str(note_str):
144 | note_map = {
145 | 'B': 'A',
146 | 'A': 'G',
147 | 'E': 'D',
148 | 'D': 'C'
149 | }
150 | new_note_str = ''
151 | if note_str[1] == '-':
152 | new_note_str += note_map[note_str[0]]
153 | new_note_str += '#'
154 | new_note_str += note_str[2]
155 | else:
156 | new_note_str = note_str
157 | return new_note_str
158 |
159 | def get_root_note(self, tensor_score):
160 | """
161 | Returns the root note for the given input score
162 | :param tensor_score: pytorch tensor, (16,)
163 | :return: music21.note.Note object
164 | """
165 | midi_pitch_array, note_list = self.compute_midi_sequence(tensor_score)
166 | if len(note_list) < 3:
167 | return None
168 | min_idx = np.argmin(midi_pitch_array[:3])
169 | root_note = note_list[min_idx]
170 | return root_note
171 |
172 | def compute_midi_sequence(self, tensor_score):
173 | """
174 | Returns a numpy array of midi pitch numbers given an input score
175 | :param tensor_score: tensor_score: pytorch tensor, (16,)
176 | :return: tuple[np.array, (L,), list[music21.note.Note]]
177 | """
178 | # create MIDI pitch sequence
179 | slur_index = self.note2index_dict[SLUR_SYMBOL]
180 | rest_index = self.note2index_dict['rest']
181 | numpy_score = tensor_score.numpy()
182 | numpy_score[numpy_score >= 52] = slur_index
183 | numpy_score = numpy_score[numpy_score != rest_index]
184 | numpy_score = numpy_score[numpy_score != slur_index]
185 | midi_pitch_array = np.zeros_like(numpy_score)
186 | note_list = []
187 | for i in range(numpy_score.size):
188 | note_str = self.fix_note_str(self.index2note_dict[numpy_score[i]])
189 | n = music21.note.Note(note_str)
190 | note_list.append(n)
191 | midi_pitch_array[i] = n.pitch.midi
192 |
193 | return midi_pitch_array, note_list
194 |
195 | def compute_tonic_octave(self, tensor_score):
196 | """
197 | Computes the indices fo the tonic, octave for a given input score
198 | :param tensor_score: pytorch tensor, (16,)
199 | :return: tuple[int, int]
200 | """
201 | root_note = self.get_root_note(tensor_score)
202 | if root_note is None:
203 | return -1, -1
204 | octave = OCTAVE_REVERSE_DICT[root_note.octave] if root_note.octave in OCTAVE_REVERSE_DICT.keys() else -1
205 | tonic = TONIC_REVERSE_DICT[root_note.name] if root_note.name in TONIC_REVERSE_DICT.keys() else -1
206 | return tonic, octave
207 |
208 | def compute_mode(self, tensor_score):
209 | """
210 | Computes the most likely mode for a given input score
211 | :param tensor_score: pytorch tensor, (16,)
212 | :return: int
213 | """
214 | # get midi for root note
215 | root_note = self.get_root_note(tensor_score)
216 | if root_note is None:
217 | return -1
218 | root_midi = root_note.pitch.midi
219 | # get midi pitch sequence
220 | midi_pitch_array, _ = self.compute_midi_sequence(tensor_score)
221 | # create diff array
222 | diff_array = (midi_pitch_array - root_midi) % 12
223 | # compare diff array
224 | mode_idx = -1
225 | for mode in SCALE_NOTES_DICT.keys():
226 | scale_note_set = set(SCALE_NOTES_DICT[mode])
227 | if set(diff_array).issubset(scale_note_set):
228 | mode_idx = SCALE_REVERSE_DICT[mode]
229 | break
230 | return mode_idx
231 |
232 | def compute_rhythm(self, tensor_score, bar_num):
233 | """
234 | Computes the index for the rhythm for a given input score and bar number
235 | :param tensor_score: pytorch tensor, (16,)
236 | :param bar_num: int, 1 or 2
237 | :return: int
238 | """
239 | slur_index = self.note2index_dict[SLUR_SYMBOL]
240 | rest_index = self.note2index_dict['rest']
241 | if bar_num == 1:
242 | bar1_tensor = tensor_score[:8].clone().numpy()
243 | elif bar_num == 2:
244 | bar1_tensor = tensor_score[8:].clone().numpy()
245 | else:
246 | raise ValueError("Invalid bar number")
247 | bar1_tensor[bar1_tensor >= 52] = rest_index
248 | bar1_tensor[bar1_tensor == slur_index] = rest_index
249 | bar1_tensor[bar1_tensor == rest_index] = -1
250 | bar1_tensor[bar1_tensor != -1] = 1
251 | bar1_tensor[bar1_tensor == -1] = 0
252 | for rhy_idx in RHYTHM_DICT.keys():
253 | if list(bar1_tensor) == RHYTHM_DICT[rhy_idx]:
254 | return rhy_idx
255 | return -1
256 |
257 | def compute_arp_dir(self, tensor_score):
258 | """
259 | Computes the arpeggiation direction for a given input score
260 | :param tensor_score: pytorch tensor, (16,)
261 | :return: tuple[int, int, int, int]
262 | """
263 | midi_pitch_array, _ = self.compute_midi_sequence(tensor_score)
264 | arp_dir = [-1, -1, -1, -1]
265 | if midi_pitch_array.size == 12:
266 | midi_pitch_array = np.reshape(midi_pitch_array, (4, 3))
267 | diff_array = np.sign(np.diff(midi_pitch_array, axis=1))
268 | s_array = np.sum(diff_array, axis=1)
269 | for i in range(s_array.size):
270 | if s_array[i] > 0:
271 | arp_dir[i] = ARP_REVERSE_DICT['up']
272 | elif s_array[i] < 0:
273 | arp_dir[i] = ARP_REVERSE_DICT['down']
274 | return tuple(arp_dir)
275 |
276 | def compute_attributes(self, tensor_score):
277 | """
278 | Computes all attributes for a given input score
279 | :param tensor_score: tensor_score: pytorch tensor, (16,)
280 | :return: tuple[int, int, int, int, int, int, int, int, int]
281 | """
282 | # get the midi pitch array
283 | midi_pitch_array, note_list = self.compute_midi_sequence(tensor_score)
284 | if len(note_list) < 3:
285 | return -1, -1, -1, -1, -1, -1, -1, -1, -1
286 |
287 | # estimate root note
288 | min_idx = np.argmin(midi_pitch_array[:3])
289 | root_note = note_list[min_idx]
290 |
291 | # get tonic and octave
292 | octave_idx = OCTAVE_REVERSE_DICT[root_note.octave] if root_note.octave in OCTAVE_REVERSE_DICT.keys() else -1
293 | tonic_idx = TONIC_REVERSE_DICT[root_note.name] if root_note.name in TONIC_REVERSE_DICT.keys() else -1
294 |
295 | # estimate mode
296 | # create diff array
297 | diff_array = (midi_pitch_array - root_note.pitch.midi) % 12
298 | # compare diff array
299 | mode_idx = -1
300 | for mode in SCALE_NOTES_DICT.keys():
301 | scale_note_set = set(SCALE_NOTES_DICT[mode])
302 | if set(diff_array).issubset(scale_note_set):
303 | mode_idx = SCALE_REVERSE_DICT[mode]
304 | break
305 |
306 | # estimate rhythm factors
307 | rhy1_idx = self.compute_rhythm(tensor_score, bar_num=1)
308 | rhy2_idx = self.compute_rhythm(tensor_score, bar_num=2)
309 |
310 | # estimate arpreggiation factors
311 | arp_dir = [-1, -1, -1, -1]
312 | if midi_pitch_array.size == 12:
313 | midi_pitch_array = np.reshape(midi_pitch_array, (4, 3))
314 | diff_array = np.sign(np.diff(midi_pitch_array, axis=1))
315 | s_array = np.sum(diff_array, axis=1)
316 | for i in range(s_array.size):
317 | if s_array[i] > 0:
318 | arp_dir[i] = ARP_REVERSE_DICT['up']
319 | elif s_array[i] < 0:
320 | arp_dir[i] = ARP_REVERSE_DICT['down']
321 | arp1_idx = arp_dir[0]
322 | arp2_idx = arp_dir[1]
323 | arp3_idx = arp_dir[2]
324 | arp4_idx = arp_dir[3]
325 |
326 | return tonic_idx, octave_idx, mode_idx, rhy1_idx, rhy2_idx, arp1_idx, arp2_idx, arp3_idx, arp4_idx
327 |
--------------------------------------------------------------------------------
/dmelodies_loading.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "markdown",
5 | "source": [
6 | "## dMelodies: A Music Dataset for Disentanglement Learning\n",
7 | "### Description\n",
8 | "The dMelodies dataset is a dataset of algorithmically generated monophonic melodies. Each datapoint is a 2-bar melody which plays arpreggios on the standard I-IV-V-I cadence chord pattern. The dataset has 9 independent latent factors of variation: root, octave, scale, rhythm bar 1, rhythm bar 2, arpeggiation direction for each of the 4 chords.\n",
9 | "\n",
10 | "This jupyter notebook provides examples on how to use the dMelodies dataset and the provided pyTorch dataloader. \n",
11 | "\n",
12 | "### Loading Data\n",
13 | "**Option 1**: The dataset can also be read using the .npz object."
14 | ],
15 | "metadata": {
16 | "collapsed": false
17 | }
18 | },
19 | {
20 | "cell_type": "code",
21 | "execution_count": 25,
22 | "outputs": [],
23 | "source": [
24 | "import music21\n",
25 | "import numpy as np\n",
26 | "import random\n",
27 | "import warnings\n",
28 | "warnings.simplefilter(action='ignore', category=FutureWarning)"
29 | ],
30 | "metadata": {
31 | "collapsed": false,
32 | "pycharm": {
33 | "name": "#%%\n",
34 | "is_executing": false
35 | }
36 | }
37 | },
38 | {
39 | "cell_type": "code",
40 | "source": [
41 | "# Load dataset\n",
42 | "dataset_npz = np.load('data/dMelodies_dataset.npz', allow_pickle=True)\n",
43 | "\n",
44 | "print('Keys in the dataset:', dataset_npz.files)\n",
45 | "score_array = dataset_npz['score_array'] # numpy array containing the score representation (num_datapoint x 16)\n",
46 | "latent_array = dataset_npz['latent_array'] # numpy array containing the latent values of each datapoint (num_datapoint x 9)\n",
47 | "print(f'Score Array Shape: {score_array.shape}, Latent Array Shape: {latent_array.shape}' )\n",
48 | "\n",
49 | "metadata = dataset_npz['metadata']\n",
50 | "print(f'Metadata: \\n {metadata}')"
51 | ],
52 | "metadata": {
53 | "collapsed": false,
54 | "pycharm": {
55 | "name": "#%% \n",
56 | "is_executing": false
57 | }
58 | },
59 | "execution_count": 26,
60 | "outputs": [
61 | {
62 | "name": "stdout",
63 | "text": [
64 | "Keys in the dataset: ['score_array', 'latent_array', 'note2index_dict', 'index2note_dict', 'latent_dicts', 'metadata']\n",
65 | "Score Array Shape: (1354752, 16), Latent Array Shape: (1354752, 9)\n",
66 | "Metadata: \n",
67 | " {'title': 'dMelodies dataset', 'description': 'dMelodies dataset. Algorithmically generated 2-bar melodies. Each datapoint is a 2-bar melody which plays arpreggios on the standard I-IV-V-I cadence chord pattern. The dataset has 9 independent latent factors of variation: root, octave, scale, rhythm bar 1, rhythm bar 2, arpeggiation direction for each of the 4 chords. All possible values of the latent factors are present.', 'version': 1, 'authors': 'ashis.pati@gatech.edu, siddharth.gururani@gatech.edu, alexander.lerch@gatech.edu', 'data': 'July 26, 2020', 'latents_names': ('tonic', 'octave', 'scale', 'rhythm_bar1', 'rhythm_bar2', 'arp_chord1', 'arp_chord2', 'arp_chord3', 'arp_chord4')}\n"
68 | ],
69 | "output_type": "stream"
70 | }
71 | ]
72 | },
73 | {
74 | "cell_type": "markdown",
75 | "source": [
76 | "**Option 2**: Alternatively, the dataset can also be accessed using the `DMelodiesDataset` class."
77 | ],
78 | "metadata": {
79 | "collapsed": false
80 | }
81 | },
82 | {
83 | "cell_type": "code",
84 | "execution_count": 19,
85 | "outputs": [
86 | {
87 | "name": "stdout",
88 | "text": [
89 | "Dataset already created. Reading it now\n",
90 | "Score Array Shape: (1354752, 16), Latent Array Shape: (1354752, 9)\n",
91 | "Metadata: \n",
92 | " {'title': 'dMelodies dataset', 'description': 'dMelodies dataset. Algorithmically generated 2-bar melodies. Each datapoint is a 2-bar melody which plays arpreggios on the standard I-IV-V-I cadence chord pattern. The dataset has 9 independent latent factors of variation: root, octave, scale, rhythm bar 1, rhythm bar 2, arpeggiation direction for each of the 4 chords. All possible values of the latent factors are present.', 'version': 1, 'authors': 'ashis.pati@gatech.edu, siddharth.gururani@gatech.edu, alexander.lerch@gatech.edu', 'data': 'July 26, 2020', 'latents_names': ('tonic', 'octave', 'scale', 'rhythm_bar1', 'rhythm_bar2', 'arp_chord1', 'arp_chord2', 'arp_chord3', 'arp_chord4')}\n"
93 | ],
94 | "output_type": "stream"
95 | }
96 | ],
97 | "source": [
98 | "from dmelodies_dataset import DMelodiesDataset\n",
99 | "\n",
100 | "# Load dataset\n",
101 | "dataset = DMelodiesDataset()\n",
102 | "dataset.make_or_load_dataset() # loads the dataset\n",
103 | "\n",
104 | "score_array = dataset.score_array # numpy array containing the score representation (num_datapoint x 16)\n",
105 | "latent_array = dataset.latent_array # numpy array containing the latent values of each datapoint (num_datapoint x 9)\n",
106 | "print(f'Score Array Shape: {score_array.shape}, Latent Array Shape: {latent_array.shape}' )\n",
107 | "\n",
108 | "metadata = dataset.metadata\n",
109 | "print(f'Metadata: \\n {metadata}')\n"
110 | ],
111 | "metadata": {
112 | "collapsed": false,
113 | "pycharm": {
114 | "name": "#%%\n",
115 | "is_executing": false
116 | }
117 | }
118 | },
119 | {
120 | "cell_type": "markdown",
121 | "source": [
122 | "### Visualizing Example Melodies"
123 | ],
124 | "metadata": {
125 | "collapsed": false,
126 | "pycharm": {
127 | "name": "#%% md\n"
128 | }
129 | }
130 | },
131 | {
132 | "cell_type": "code",
133 | "execution_count": 20,
134 | "outputs": [
135 | {
136 | "name": "stdout",
137 | "text": [
138 | "{'tonic': 'G', 'octave': 6, 'scale': 'minor', 'rhythm_bar1': [1, 1, 0, 0, 1, 1, 1, 1], 'rhythm_bar2': [1, 0, 1, 1, 1, 1, 0, 1], 'arp_chord1': 'up', 'arp_chord2': 'down', 'arp_chord3': 'up', 'arp_chord4': 'down'}\n"
139 | ],
140 | "output_type": "stream"
141 | },
142 | {
143 | "data": {
144 | "text/plain": "",
145 | "image/png": "\n"
146 | },
147 | "metadata": {
148 | "image/png": {
149 | "width": 586,
150 | "height": 382
151 | }
152 | },
153 | "output_type": "display_data"
154 | }
155 | ],
156 | "source": [
157 | "rand_idx = random.randint(0, dataset.num_data_points)\n",
158 | "melody = dataset.get_score_from_datapoint(rand_idx)\n",
159 | "melody.parts[0].measures(1,2).plot() # show as piano roll\n",
160 | "latents = dataset.get_latent_values_for_index(rand_idx)\n",
161 | "print(latents)\n",
162 | "# alternatively, if Musescore is correctly configured to be used with music21, then the score (as sheet music) can also be displayed\n",
163 | "# melody.show()"
164 | ],
165 | "metadata": {
166 | "collapsed": false,
167 | "pycharm": {
168 | "name": "#%%\n",
169 | "is_executing": false
170 | }
171 | }
172 | },
173 | {
174 | "cell_type": "markdown",
175 | "source": [
176 | "### Using the PyTorch Dataloader\n",
177 | "\n",
178 | "A pyTorch dataloader class is provided in this repository in the `dmelodies_torch_dataloader.py` file. "
179 | ],
180 | "metadata": {
181 | "collapsed": false,
182 | "pycharm": {
183 | "name": "#%% md\n"
184 | }
185 | }
186 | },
187 | {
188 | "cell_type": "code",
189 | "execution_count": 14,
190 | "outputs": [
191 | {
192 | "name": "stdout",
193 | "text": [
194 | "Dataset already created. Reading it now\n",
195 | "Total number of datapoints: 1354752\n",
196 | "Number of batches: \n",
197 | " \t train:25402 \n",
198 | " \t validation:12701 \n",
199 | " \t test:4234\n"
200 | ],
201 | "output_type": "stream"
202 | }
203 | ],
204 | "source": [
205 | "from dmelodies_torch_dataloader import DMelodiesTorchDataset\n",
206 | "\n",
207 | "# Instantiate dataloader and load dataset\n",
208 | "torch_dataloader = DMelodiesTorchDataset(seed=0) # seed can be used to initialize different random seeds\n",
209 | "torch_dataloader.load_dataset() \n",
210 | "print(f'Total number of datapoints: {len(torch_dataloader.dataset)}')\n",
211 | "\n",
212 | "# Create dataloaders\n",
213 | "train_dl, val_dl, eval_dl = torch_dataloader.data_loaders(batch_size=32, split=(0.6, 0.3))\n",
214 | "print(f'Number of batches: \\n \\t train:{len(train_dl)} \\n \\t validation:{len(val_dl)} \\n \\t test:{len(eval_dl)}')\n"
215 | ],
216 | "metadata": {
217 | "collapsed": false,
218 | "pycharm": {
219 | "name": "#%%\n",
220 | "is_executing": false
221 | }
222 | }
223 | }
224 | ],
225 | "metadata": {
226 | "kernelspec": {
227 | "display_name": "Python 3",
228 | "language": "python",
229 | "name": "python3"
230 | },
231 | "language_info": {
232 | "codemirror_mode": {
233 | "name": "ipython",
234 | "version": 2
235 | },
236 | "file_extension": ".py",
237 | "mimetype": "text/x-python",
238 | "name": "python",
239 | "nbconvert_exporter": "python",
240 | "pygments_lexer": "ipython2",
241 | "version": "2.7.6"
242 | },
243 | "pycharm": {
244 | "stem_cell": {
245 | "cell_type": "raw",
246 | "source": [],
247 | "metadata": {
248 | "collapsed": false
249 | }
250 | }
251 | }
252 | },
253 | "nbformat": 4,
254 | "nbformat_minor": 0
255 | }
--------------------------------------------------------------------------------
/figs/dataset_example.svg:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------