├── LICENSE ├── README.md ├── config.json ├── data ├── __init__.py ├── _dataset.py ├── _utils.py ├── _utils_midi │ ├── README.md │ ├── __init__.py │ ├── conv_midi2note.py │ └── conv_note2label.py ├── create_dataset.py ├── create_labels.py ├── download.py ├── sv │ ├── extract.py │ ├── sampler.py │ └── style_vectors.json ├── sync.py └── transcribe.py ├── dataset └── src.json ├── docs ├── index.html └── static │ ├── bg-cover.png │ ├── favicon.ico │ └── overview.png ├── eval ├── cover.py ├── distance.py └── f1.py ├── infer └── __main__.py ├── models ├── __init__.py ├── _models.py ├── hFT_Transformer │ ├── README.md │ ├── amt.py │ └── model_spec2midi.py └── params │ └── .gitkeep ├── requirements.txt ├── train ├── __init__.py ├── __main__.py ├── _loss.py └── _trainer.py └── utils ├── __init__.py ├── _config.py ├── _info.py └── info.json /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 komiya 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # AMT-APC 2 | 3 | AMT-APC is a method for training an automatic piano cover generation model by fine-tuning an AMT (Automatic Music Transcription) model. 4 | 5 | - Project page: [AMT-APC](https://misya11p.github.io/amt-apc/) 6 | - Paper: [[2409.14086] AMT-APC: Automatic Piano Cover by Fine-Tuning an Automatic Music Transcription Model](https://arxiv.org/abs/2409.14086) 7 | 8 | ## Usage (Piano Cover Generation) 9 | 10 | Python version: 3.10 11 | 12 | 1. Install dependencies 13 | 14 | ```bash 15 | pip install torch torchaudio soundfile pretty-midi tqdm 16 | ``` 17 | 18 | 2. Download the pre-trained model 19 | 20 | ```bash 21 | wget -P models/params/ https://github.com/misya11p/amt-apc/releases/download/beta/apc.pth 22 | ``` 23 | 24 | 3. Run the inference code 25 | 26 | ```bash 27 | python infer input.wav 28 | ``` 29 | 30 | You can also input a YouTube URL (requires [`yt-dlp`](https://github.com/yt-dlp/yt-dlp)). 31 | 32 | ```bash 33 | python infer 'https://www.youtube.com/watch?v=...' 34 | ``` 35 | 36 | You can also specify a style (`level1`, `level2`, `level3`). 37 | 38 | ```bash 39 | python infer input.wav --style level3 40 | ``` 41 | 42 | ## Usage (Training & Evaluation) 43 | 44 | Python version: 3.10 45 | 46 | 1. Install dependencies 47 | 48 | ```bash 49 | pip install -r requirements.txt 50 | ``` 51 | 52 | 2. Download the pre-trained AMT model 53 | 54 | ```bash 55 | wget -P models/params/ https://github.com/misya11p/amt-apc/releases/download/beta/amt.pth 56 | ``` 57 | 58 | 3. Download the dataset 59 | 60 | ```bash 61 | python download.py 62 | ``` 63 | 64 | The dataset directory is set to `dataset/` by default. You can change this directory by modifying `path.dataset` in `config.json`. 65 | 66 | 4. Create the dataset 67 | 68 | ```bash 69 | python data/sync.py 70 | python data/transcribe.py 71 | python data/sv/extract.py 72 | python data/create_labels.py 73 | python data/create_dataset.py 74 | ``` 75 | 76 | 5. Train the model 77 | 78 | ```bash 79 | python train --n_gpus 1 80 | ``` 81 | 82 | 5. Evaluate the model 83 | 84 | Calculate $\mathcal Q_{\text{max}}$. 85 | 86 | ```bash 87 | git clone https://github.com/albincorreya/ChromaCoverId.git eval/ChromaCoverId 88 | python eval/cover.py 89 | python eval/distance.py 90 | ``` 91 | 92 | ### Options 93 | 94 | Detailed configuration can be done through `config.json` or by using command line options, which are explained with `--help`. The default values are those used in the experiments in the paper. 95 | 96 | ## Citation 97 | 98 | ``` 99 | @article{komiya2024, 100 | title={AMT-APC: Automatic Piano Cover by Fine-Tuning an Automatic Music Transcription Model}, 101 | author={Komiya, Kazuma and Fukuhara, Yoshihisa}, 102 | journal={arXiv preprint arXiv:2409.14086}, 103 | year={2024} 104 | } 105 | ``` 106 | -------------------------------------------------------------------------------- /config.json: -------------------------------------------------------------------------------- 1 | { 2 | "data": { 3 | "feature": { 4 | "sr": 16000, 5 | "hop_sample": 256, 6 | "mel_bins": 256, 7 | "n_bins": 256, 8 | "fft_bins": 2048, 9 | "window_length": 2048, 10 | "log_offset": 1e-08, 11 | "window": "hann", 12 | "pad_mode": "constant" 13 | }, 14 | "input": { 15 | "margin_b": 32, 16 | "margin_f": 32, 17 | "num_frame": 512, 18 | "min_value": -18.0 19 | }, 20 | "midi": { 21 | "note_min": 21, 22 | "note_max": 108, 23 | "num_note": 88, 24 | "num_velocity": 128 25 | } 26 | }, 27 | "model": { 28 | "cnn": { 29 | "channel": 4, 30 | "kernel": 5 31 | }, 32 | "dropout": 0.1, 33 | "transformer": { 34 | "decoder": { 35 | "n_head": 4, 36 | "n_layer": 3 37 | }, 38 | "encoder": { 39 | "n_head": 4, 40 | "n_layer": 3 41 | }, 42 | "hid_dim": 256, 43 | "pf_dim": 512 44 | }, 45 | "sv_dim": 24 46 | }, 47 | "train": { 48 | "batch_size": 4, 49 | "n_epochs": 5, 50 | "lr": 0.0001, 51 | "beta": 0.75, 52 | "theta_onset": 0.07, 53 | "theta_frame": 0.2, 54 | "theta_velocity": 0.01 55 | }, 56 | "infer": { 57 | "threshold": { 58 | "onset": 0.5, 59 | "offset": 1.0, 60 | "frame": 0.5 61 | }, 62 | "min_duration": 0.08 63 | }, 64 | "path": { 65 | "amt": "models/params/amt.pth", 66 | "apc": "models/params/apc.pth", 67 | "dataset": "dataset/", 68 | "info": "utils/info.json", 69 | "src": "dataset/src.json", 70 | "style_vectors": "data/sv/style_vectors.json", 71 | "checkpoints": "models/params/checkpoints/" 72 | } 73 | } -------------------------------------------------------------------------------- /data/__init__.py: -------------------------------------------------------------------------------- 1 | from ._utils import wav2feature, preprocess_feature 2 | from ._dataset import PianoCoversDataset 3 | from .sv.sampler import Sampler as SVSampler 4 | from .sv.extract import extract_raw_style 5 | -------------------------------------------------------------------------------- /data/_dataset.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | import sys 3 | 4 | ROOT = Path(__file__).resolve().parent.parent 5 | sys.path.append(str(ROOT)) 6 | 7 | import numpy as np 8 | import torch 9 | from torch.utils.data import Dataset 10 | 11 | from utils import config, info 12 | from .sv.sampler import Sampler as SVSampler 13 | 14 | 15 | DIR_DATASET = ROOT / config.path.dataset / "dataset/" 16 | DIR_SPEC = DIR_DATASET / "spec/" 17 | DIR_LABEL = DIR_DATASET / "label/" 18 | 19 | 20 | class PianoCoversDataset(Dataset): 21 | def __init__(self, split="train"): 22 | self.data = list(DIR_LABEL.glob("*.npz")) 23 | if split == "train": 24 | self.data = [path for path in self.data if self.is_train(path)] 25 | elif split == "test": 26 | self.data = [path for path in self.data if not self.is_train(path)] 27 | elif split == "all": 28 | pass 29 | else: 30 | raise ValueError(f"Invalid value for 'split': {split}") 31 | self.sv_sampler = SVSampler() 32 | 33 | def __len__(self): 34 | return len(self.data) 35 | 36 | def __getitem__(self, idx): 37 | path = self.data[idx] 38 | label = np.load(path) 39 | spec, sv = self.get_spec_sv(path) 40 | 41 | spec = torch.from_numpy(spec).float() 42 | sv = torch.tensor(sv).float() 43 | onset = torch.from_numpy(label["onset"]) 44 | offset = torch.from_numpy(label["offset"]) 45 | frame = torch.from_numpy(label["frame"]) 46 | velocity = torch.from_numpy(label["velocity"]).long() 47 | 48 | return spec, sv, onset, offset, frame, velocity 49 | 50 | @staticmethod 51 | def get_id_n(path: Path): 52 | split = path.stem.split("_") 53 | n_segment = split[-1] 54 | id_piano = "_".join(split[:-1]) 55 | return id_piano, n_segment 56 | 57 | def is_train(self, path: Path): 58 | return info.is_train(self.get_id_n(path)[0]) 59 | 60 | def get_spec_sv(self, path: Path): 61 | id_piano, n_segment = self.get_id_n(path) 62 | id_orig = info.piano2orig(id_piano) 63 | fname_orig = f"{id_orig}_{n_segment}.npy" 64 | path_orig = DIR_SPEC / fname_orig 65 | spec = np.load(path_orig) 66 | sv = self.sv_sampler[id_piano] 67 | return spec, sv 68 | -------------------------------------------------------------------------------- /data/_utils.py: -------------------------------------------------------------------------------- 1 | import sys; sys.path.append("./") 2 | import torch 3 | import numpy as np 4 | 5 | from models import Pipeline 6 | 7 | 8 | PIPELINE = Pipeline(no_model=True) 9 | CONFIG = PIPELINE.config 10 | 11 | 12 | def wav2feature(path_input: str) -> torch.Tensor: 13 | """ 14 | Convert a wav file to a feature: 15 | mel-spectrogram according to config.json 16 | 17 | Args: 18 | path_input (str): Path to the input wav file. 19 | 20 | Returns: 21 | torch.Tensor: Feature tensor. (n_frames, n_mels) 22 | """ 23 | return PIPELINE.wav2feature(path_input) 24 | 25 | 26 | def preprocess_feature(feature: torch.Tensor) -> torch.Tensor: 27 | feature = np.array(feature, dtype=np.float32) 28 | 29 | tmp_b = np.full([CONFIG["input"]["margin_b"], CONFIG["feature"]["n_bins"]], CONFIG["input"]["min_value"], dtype=np.float32) 30 | len_s = int(np.ceil(feature.shape[0] / CONFIG["input"]["num_frame"]) * CONFIG["input"]["num_frame"]) - feature.shape[0] 31 | tmp_f = np.full([len_s+CONFIG["input"]["margin_f"], CONFIG["feature"]["n_bins"]], CONFIG["input"]["min_value"], dtype=np.float32) 32 | 33 | preprocessed_feature = torch.from_numpy(np.concatenate([tmp_b, feature, tmp_f], axis=0)) 34 | 35 | return preprocessed_feature 36 | -------------------------------------------------------------------------------- /data/_utils_midi/README.md: -------------------------------------------------------------------------------- 1 | The files in this directory are based on https://github.com/sony/hFT-Transformer 2 | -------------------------------------------------------------------------------- /data/_utils_midi/__init__.py: -------------------------------------------------------------------------------- 1 | from .conv_midi2note import midi2note 2 | from .conv_note2label import note2label 3 | -------------------------------------------------------------------------------- /data/_utils_midi/conv_midi2note.py: -------------------------------------------------------------------------------- 1 | #! python 2 | 3 | import argparse 4 | import json 5 | import mido 6 | 7 | 8 | with open("models/config.json", "r") as f: 9 | config = json.load(f)["data"] 10 | 11 | 12 | NUM_PITCH=128 13 | def midi2note(path_midi, verbose_flag = False): 14 | # (1) read MIDI file 15 | midi_file = mido.MidiFile(path_midi) 16 | ticks_per_beat = midi_file.ticks_per_beat 17 | num_tracks = len(midi_file.tracks) 18 | 19 | # (2) tempo curve 20 | max_ticks_total = 0 21 | for it in range(len(midi_file.tracks)): 22 | ticks_total = 0 23 | for message in midi_file.tracks[it]: 24 | ticks_total += int(message.time) 25 | if max_ticks_total < ticks_total: 26 | max_ticks_total = ticks_total 27 | a_time_in_sec = [0.0 for i in range(max_ticks_total+1)] 28 | ticks_curr = 0 29 | ticks_prev = 0 30 | tempo_curr = 0 31 | tempo_prev = 0 32 | time_in_sec_prev = 0.0 33 | for im, message in enumerate(midi_file.tracks[0]): 34 | ticks_curr += message.time 35 | if 'set_tempo' in str(message): 36 | tempo_curr = int(message.tempo) 37 | for i in range(ticks_prev, ticks_curr): 38 | a_time_in_sec[i] = time_in_sec_prev + ((i-ticks_prev) / ticks_per_beat * tempo_prev / 1e06) 39 | if ticks_curr > 0: 40 | time_in_sec_prev = time_in_sec_prev + ((ticks_curr-ticks_prev) / ticks_per_beat * tempo_prev / 1e06) 41 | tempo_prev = tempo_curr 42 | ticks_prev = ticks_curr 43 | for i in range(ticks_prev, max_ticks_total+1): 44 | a_time_in_sec[i] = time_in_sec_prev + ((i-ticks_prev) / ticks_per_beat * tempo_curr / 1e06) 45 | 46 | # (3) obtain MIDI message 47 | a_note = [] 48 | a_onset = [] 49 | a_velocity = [] 50 | a_reonset = [] 51 | a_push = [] 52 | a_sustain = [] 53 | for i in range(NUM_PITCH): 54 | a_onset.append(-1) 55 | a_velocity.append(-1) 56 | a_reonset.append(False) 57 | a_push.append(False) 58 | a_sustain.append(False) 59 | 60 | ticks = 0 61 | sustain_flag = False 62 | for message in midi_file.tracks[num_tracks-1]: 63 | ticks += message.time 64 | time_in_sec = a_time_in_sec[ticks] 65 | if verbose_flag is True: 66 | #print('[message]'+str(message)+' [ticks]: '+str(ticks/ticks_per_sec)) 67 | print('[message]'+str(message)+' [ticks]: '+str(time_in_sec)+' [time]: '+str(time_in_sec)) 68 | if ('control_change' in str(message)) and ('control=64' in str(message)): 69 | if message.value < 64: 70 | # sustain off 71 | if verbose_flag is True: 72 | print('** sustain pedal OFF **') 73 | for i in range(config['midi']['note_min'], config['midi']['note_max']+1): 74 | if (a_push[i] is False) and (a_sustain[i] is True): 75 | if verbose_flag is True: 76 | print('## output sustain pedal off : '+str(i)) 77 | print({'onset': a_onset[i], 78 | 'offset': time_in_sec, 79 | 'pitch': i, 80 | 'velocity': a_velocity[i], 81 | 'reonset': a_reonset[i]}) 82 | a_note.append({'onset': a_onset[i], 83 | 'offset': time_in_sec, 84 | 'pitch': i, 85 | 'velocity': a_velocity[i], 86 | 'reonset': a_reonset[i]}) 87 | a_onset[i] = -1 88 | a_velocity[i] = -1 89 | a_reonset[i] = False 90 | sustain_flag = False 91 | for i in range(config['midi']['note_min'], config['midi']['note_max']+1): 92 | a_sustain[i] = False 93 | else: 94 | # sustain on 95 | if verbose_flag is True: 96 | print('** sustain pedal ON **') 97 | sustain_flag = True 98 | for i in range(config['midi']['note_min'], config['midi']['note_max']+1): 99 | if a_push[i] is True: 100 | a_sustain[i] = True 101 | if verbose_flag is True: 102 | print('sustain('+str(i)+') ON') 103 | elif ('note_on' in str(message)) and (int(message.velocity) > 0): 104 | # note on 105 | note = message.note 106 | velocity = message.velocity 107 | if verbose_flag is True: 108 | print('++note ON++: '+str(note)) 109 | if (a_push[note] is True) or (a_sustain[note] is True): 110 | if verbose_flag is True: 111 | print('## output reonset : '+str(note)) 112 | print({'onset': a_onset[note], 113 | 'offset': time_in_sec, 114 | 'pitch': note, 115 | 'velocity': a_velocity[note], 116 | 'reonset': a_reonset[note]}) 117 | # reonset 118 | a_note.append({'onset': a_onset[note], 119 | 'offset': time_in_sec, 120 | 'pitch': note, 121 | 'velocity': a_velocity[note], 122 | 'reonset': a_reonset[note]}) 123 | a_reonset[note] = True 124 | else: 125 | a_reonset[note] = False 126 | a_onset[note] = time_in_sec 127 | a_velocity[note] = velocity 128 | a_push[note] = True 129 | if sustain_flag is True: 130 | a_sustain[note] = True 131 | if verbose_flag is True: 132 | print('sustain('+str(note)+') ON') 133 | elif (('note_off' in str(message)) or \ 134 | (('note_on' in str(message)) and (int(message.velocity) == 0))): 135 | # note off 136 | note = message.note 137 | velocity = message.velocity 138 | if verbose_flag is True: 139 | print('++note OFF++: '+str(note)) 140 | if (a_push[note] is True) and (a_sustain[note] is False): 141 | # offset 142 | if verbose_flag is True: 143 | print('## output offset : '+str(note)) 144 | print({'onset': a_onset[note], 145 | 'offset': time_in_sec, 146 | 'pitch': note, 147 | 'velocity': a_velocity[note], 148 | 'reonset': a_reonset[note]}) 149 | print({'onset': a_onset[note], 150 | 'offset': time_in_sec, 151 | 'pitch': note, 152 | 'velocity': a_velocity[note], 153 | 'reonset': a_reonset[note]}) 154 | a_note.append({'onset': a_onset[note], 155 | 'offset': time_in_sec, 156 | 'pitch': note, 157 | 'velocity': a_velocity[note], 158 | 'reonset': a_reonset[note]}) 159 | a_onset[note] = -1 160 | a_velocity[note] = -1 161 | a_reonset[note] = False 162 | a_push[note] = False 163 | 164 | for i in range(config['midi']['note_min'], config['midi']['note_max']+1): 165 | if (a_push[i] is True) or (a_sustain[i] is True): 166 | if verbose_flag is True: 167 | print('## output final : '+str(i)) 168 | print({'onset': a_onset[i], 169 | 'offset': time_in_sec, 170 | 'pitch': i, 171 | 'velocity': a_velocity[i], 172 | 'reonset': a_reonset[i]}) 173 | a_note.append({'onset': a_onset[i], 174 | 'offset': time_in_sec, 175 | 'pitch': i, 176 | 'velocity': a_velocity[i], 177 | 'reonset': a_reonset[i]}) 178 | a_note_sort = sorted(sorted(a_note, key=lambda x: x['pitch']), key=lambda x: x['onset']) 179 | 180 | return a_note_sort 181 | 182 | 183 | if __name__ == '__main__': 184 | parser = argparse.ArgumentParser() 185 | parser.add_argument('-d_list', help='corpus list directory') 186 | parser.add_argument('-d_midi', help='midi file directory (input)') 187 | parser.add_argument('-d_note', help='note file directory (output)') 188 | parser.add_argument('-config', help='config file') 189 | #parser.add_argument('-check', help='double check with pretty_midi', action='store_true') 190 | args = parser.parse_args() 191 | 192 | print('** conv_midi2note: convert midi to note **') 193 | print(' directory') 194 | print(' midi (input) : '+str(args.d_midi)) 195 | print(' note (output) : '+str(args.d_note)) 196 | print(' corpus list : '+str(args.d_list)) 197 | print(' config file : '+str(args.config)) 198 | 199 | # read config file 200 | with open(args.config, 'r', encoding='utf-8') as f: 201 | config = json.load(f) 202 | 203 | a_attribute = ['train', 'test', 'valid'] 204 | for attribute in a_attribute: 205 | print('-'+attribute+'-') 206 | with open(args.d_list.rstrip('/')+'/'+str(attribute)+'.list', 'r', encoding='utf-8') as f: 207 | a_input = f.readlines() 208 | 209 | for i in range(len(a_input)): 210 | fname = a_input[i].rstrip('\n') 211 | print(fname) 212 | 213 | # convert midi to note 214 | a_note = midi2note(config, args.d_midi.rstrip('/')+'/'+fname+'.mid', verbose_flag=False) 215 | ''' 216 | if args.check is True: 217 | a_note_pretty_midi = midi2note_pretty_midi(args.d_midi.rstrip('/')+'/'+fname+'.mid') 218 | if len(a_note) != len(a_note_pretty_midi): 219 | print('[error] fname: '+str(fname)+' note number mismatch') 220 | for j in range(len(a_note)): 221 | if (a_note[j]['pitch'] != a_note_pretty_midi[j]['pitch']) or \ 222 | (a_note[j]['velocity'] != a_note_pretty_midi[j]['velocity']) or \ 223 | (abs(a_note[j]['onset'] - a_note_pretty_midi[j]['onset']) > 0.01): 224 | print('[error] fname: '+str(fname)+' note('+str(j)+') data mismatch') 225 | ''' 226 | with open(args.d_note.rstrip('/')+'/'+fname+'.json', 'w', encoding='utf-8') as f: 227 | json.dump(a_note, f, ensure_ascii=False, indent=4, sort_keys=False) 228 | with open(args.d_note.rstrip('/')+'/'+fname+'.txt', 'w', encoding='utf-8') as f: 229 | f.write('OnsetTime\tOffsetTime\tVelocity\tMidiPitch\n') 230 | for note in a_note: 231 | f.write(str(note['onset'])+'\t') 232 | f.write(str(note['offset'])+'\t') 233 | f.write(str(note['velocity'])+'\t') 234 | f.write(str(note['pitch'])+'\n') 235 | 236 | print('** done **') 237 | -------------------------------------------------------------------------------- /data/_utils_midi/conv_note2label.py: -------------------------------------------------------------------------------- 1 | #! python 2 | 3 | import argparse 4 | import json 5 | import pickle 6 | import numpy as np 7 | 8 | 9 | with open("models/config.json", "r") as f: 10 | config = json.load(f)["data"] 11 | 12 | 13 | def note2label(a_note, offset_duration_tolerance_flag=False): 14 | # (0) settings 15 | # tolerance: 50[ms] 16 | hop_ms = 1000 * config['feature']['hop_sample'] / config['feature']['sr'] 17 | onset_tolerance = int(50.0 / hop_ms + 0.5) 18 | offset_tolerance = int(50.0 / hop_ms + 0.5) 19 | 20 | # with open(f_note, 'r', encoding='utf-8') as f: 21 | # a_note = json.load(f) 22 | 23 | # 62.5 (hop=256, fs=16000) 24 | nframe_in_sec = config['feature']['sr'] / config['feature']['hop_sample'] 25 | 26 | max_offset = 0 27 | for note in a_note: 28 | if max_offset < note['offset']: 29 | max_offset = note['offset'] 30 | 31 | nframe = int(max_offset * nframe_in_sec + 0.5) + 1 32 | a_mpe = np.zeros((nframe, config['midi']['num_note']), dtype=bool) 33 | a_onset = np.zeros((nframe, config['midi']['num_note']), dtype=np.float32) 34 | a_offset = np.zeros((nframe, config['midi']['num_note']), dtype=np.float32) 35 | a_velocity = np.zeros((nframe, config['midi']['num_note']), dtype=np.int8) 36 | 37 | for i in range(len(a_note)): 38 | pitch = a_note[i]['pitch'] - config['midi']['note_min'] 39 | 40 | # a_note[i]['onset'] in sec 41 | onset_frame = int(a_note[i]['onset'] * nframe_in_sec + 0.5) 42 | onset_ms = a_note[i]['onset']*1000.0 43 | onset_sharpness = onset_tolerance 44 | 45 | # a_note[i]['offset'] in sec 46 | offset_frame = int(a_note[i]['offset'] * nframe_in_sec + 0.5) 47 | offset_ms = a_note[i]['offset']*1000.0 48 | offset_sharpness = offset_tolerance 49 | 50 | if offset_duration_tolerance_flag is True: 51 | offset_duration_tolerance = int((offset_ms - onset_ms) * 0.2 / hop_ms + 0.5) 52 | offset_sharpness = max(offset_tolerance, offset_duration_tolerance) 53 | 54 | # velocity 55 | velocity = a_note[i]['velocity'] 56 | 57 | # onset 58 | for j in range(0, onset_sharpness+1): 59 | onset_ms_q = (onset_frame + j) * hop_ms 60 | onset_ms_diff = onset_ms_q - onset_ms 61 | onset_val = max(0.0, 1.0 - (abs(onset_ms_diff) / (onset_sharpness * hop_ms))) 62 | if onset_frame+j < nframe: 63 | a_onset[onset_frame+j][pitch] = max(a_onset[onset_frame+j][pitch], onset_val) 64 | if (a_onset[onset_frame+j][pitch] >= 0.5): 65 | a_velocity[onset_frame+j][pitch] = velocity 66 | 67 | for j in range(1, onset_sharpness+1): 68 | onset_ms_q = (onset_frame - j) * hop_ms 69 | onset_ms_diff = onset_ms_q - onset_ms 70 | onset_val = max(0.0, 1.0 - (abs(onset_ms_diff) / (onset_sharpness * hop_ms))) 71 | if onset_frame-j >= 0: 72 | a_onset[onset_frame-j][pitch] = max(a_onset[onset_frame-j][pitch], onset_val) 73 | if (a_onset[onset_frame-j][pitch] >= 0.5) and (a_velocity[onset_frame-j][pitch] == 0): 74 | a_velocity[onset_frame-j][pitch] = velocity 75 | 76 | # mpe 77 | for j in range(onset_frame, offset_frame+1): 78 | a_mpe[j][pitch] = 1 79 | 80 | # offset 81 | offset_flag = True 82 | for j in range(len(a_note)): 83 | if a_note[i]['pitch'] != a_note[j]['pitch']: 84 | continue 85 | if a_note[i]['offset'] == a_note[j]['onset']: 86 | offset_flag = False 87 | break 88 | 89 | if offset_flag is True: 90 | for j in range(0, offset_sharpness+1): 91 | offset_ms_q = (offset_frame + j) * hop_ms 92 | offset_ms_diff = offset_ms_q - offset_ms 93 | offset_val = max(0.0, 1.0 - (abs(offset_ms_diff) / (offset_sharpness * hop_ms))) 94 | if offset_frame+j < nframe: 95 | a_offset[offset_frame+j][pitch] = max(a_offset[offset_frame+j][pitch], offset_val) 96 | for j in range(1, offset_sharpness+1): 97 | offset_ms_q = (offset_frame - j) * hop_ms 98 | offset_ms_diff = offset_ms_q - offset_ms 99 | offset_val = max(0.0, 1.0 - (abs(offset_ms_diff) / (offset_sharpness * hop_ms))) 100 | if offset_frame-j >= 0: 101 | a_offset[offset_frame-j][pitch] = max(a_offset[offset_frame-j][pitch], offset_val) 102 | 103 | # (5-2) output label file 104 | # mpe : 0 or 1 105 | # onset : 0.0-1.0 106 | # offset : 0.0-1.0 107 | # velocity : 0 - 127 108 | a_label = { 109 | 'mpe': a_mpe.tolist(), 110 | 'onset': a_onset.tolist(), 111 | 'offset': a_offset.tolist(), 112 | 'velocity': a_velocity.tolist() 113 | } 114 | 115 | return a_label 116 | 117 | if __name__ == '__main__': 118 | parser = argparse.ArgumentParser() 119 | parser.add_argument('-d_list', help='corpus list directory') 120 | parser.add_argument('-d_note', help='note file directory (input)') 121 | parser.add_argument('-d_label', help='label file directory (output)') 122 | parser.add_argument('-config', help='config file') 123 | parser.add_argument('-offset_duration_tolerance', help='offset_duration_tolerance ON', action='store_true') 124 | args = parser.parse_args() 125 | 126 | print('** conv_note2label: convert note to label **') 127 | print(' directory') 128 | print(' note (input) : '+str(args.d_note)) 129 | print(' label (output): '+str(args.d_label)) 130 | print(' corpus list : '+str(args.d_list)) 131 | print(' config file : '+str(args.config)) 132 | print(' offset duration tolerance: '+str(args.offset_duration_tolerance)) 133 | 134 | # read config file 135 | with open(args.config, 'r', encoding='utf-8') as f: 136 | config = json.load(f) 137 | 138 | a_attribute = ['train', 'test', 'valid'] 139 | for attribute in a_attribute: 140 | print('-'+attribute+'-') 141 | with open(args.d_list.rstrip('/')+'/'+str(attribute)+'.list', 'r', encoding='utf-8') as f: 142 | a_input = f.readlines() 143 | 144 | for i in range(len(a_input)): 145 | fname = a_input[i].rstrip('\n') 146 | print(fname) 147 | 148 | # convert note to label 149 | a_label = note2label(config, args.d_note.rstrip('/')+'/'+fname+'.json', args.offset_duration_tolerance) 150 | 151 | with open(args.d_label.rstrip('/')+'/'+fname+'.pkl', 'wb') as f: 152 | pickle.dump(a_label, f, protocol=4) 153 | 154 | print('** done **') 155 | -------------------------------------------------------------------------------- /data/create_dataset.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | import sys 3 | import argparse 4 | import random 5 | 6 | ROOT = Path(__file__).resolve().parent.parent 7 | sys.path.append(str(ROOT)) 8 | 9 | import numpy as np 10 | 11 | from data._utils import preprocess_feature 12 | from utils import config, info 13 | 14 | 15 | DIR_DATASET = ROOT / config.path.dataset 16 | DIR_ARRAY = DIR_DATASET / "array/" 17 | DIR_FINAL = DIR_DATASET / "dataset/" 18 | DIR_SPEC = DIR_FINAL / "spec/" 19 | DIR_LABEL = DIR_FINAL / "label/" 20 | DIR_NAME_PIANO = "piano/" 21 | N_FRAMES = config.data.input.num_frame 22 | MARGIN = config.data.input.margin_b + config.data.input.margin_f 23 | 24 | 25 | def main(args): 26 | DIR_SPEC.mkdir(exist_ok=True, parents=True) 27 | DIR_LABEL.mkdir(exist_ok=True) 28 | 29 | songs = list(DIR_ARRAY.glob("*/")) 30 | is_train = {song.name: True for song in songs} 31 | random.shuffle(songs) 32 | for song in songs[:int(len(songs) * args.test_size)]: 33 | is_train[song.name] = False 34 | 35 | songs = sorted(songs) 36 | n_songs = len(songs) 37 | for ns, song in enumerate(songs, 1): 38 | print(f"{ns}/{n_songs}: {song.name}", end=" ", flush=True) 39 | create_dataset(song, is_train[song.name], args.overwrite, args.rm_ends) 40 | 41 | info.export() 42 | 43 | 44 | def create_dataset( 45 | song: Path, 46 | is_train: bool, 47 | overwrite: bool, 48 | rm_ends: int, 49 | ) -> None: 50 | """ 51 | Create the dataset from the song directory. Split the song into 52 | segments. 53 | 54 | Args: 55 | song (Path): Path to the song directory. 56 | is_train (bool): Train or test. 57 | overwrite (bool): Overwrite existing files. 58 | rm_ends (int): Remove n segments from the beginning and the end of the song. 59 | """ 60 | dir_piano = song / DIR_NAME_PIANO 61 | 62 | orig, = list(song.glob("*.npy")) 63 | spec = np.load(orig) 64 | spec = preprocess_feature(spec) 65 | length_song = len(spec) - MARGIN 66 | idxs = range(0, length_song, N_FRAMES) 67 | n_dig = len(str(len(idxs))) 68 | for ns, i in enumerate(idxs): 69 | spec_block = (spec[i:i + N_FRAMES + MARGIN]).T # (n_bins, n_frames) 70 | sid = str(ns).zfill(n_dig) 71 | filename = DIR_SPEC / f"{orig.stem}_{sid}" 72 | if (not overwrite) and Path(filename).with_suffix(".npy").exists(): 73 | continue 74 | np.save(filename, spec_block) 75 | 76 | pianos = list(dir_piano.glob("*.npz")) 77 | pianos = sorted(pianos) 78 | for piano in pianos: 79 | if not info[piano.stem].include_dataset: 80 | continue 81 | 82 | label = np.load(piano) 83 | label = { 84 | "onset": label["onset"], 85 | "offset": label["offset"], 86 | "frame": label["frame"], 87 | "velocity": label["velocity"], 88 | } 89 | label = align_length(label, length_song) 90 | 91 | save_args = [] 92 | for ns, i in enumerate(range(0, length_song, N_FRAMES)): 93 | # (n_frames, n_bins) 94 | onset_block = label["onset"][i:i + N_FRAMES] # float [0, 1] 95 | offset_block = label["offset"][i:i + N_FRAMES] # float [0, 1] 96 | frame_block = label["frame"][i:i + N_FRAMES] # int {0, 1} 97 | velocity_block = label["velocity"][i:i + N_FRAMES] # int [0, 127] 98 | 99 | sid = str(ns).zfill(n_dig) 100 | prefix = DIR_LABEL / f"{piano.stem}_{sid}" 101 | if (not overwrite) and prefix.with_suffix(".npz").exists(): 102 | continue 103 | 104 | save_args.append({ 105 | "prefix": prefix, 106 | "data": { 107 | "onset": onset_block, 108 | "offset": offset_block, 109 | "frame": frame_block, 110 | "velocity": velocity_block, 111 | } 112 | }) 113 | if rm_ends: 114 | save_args = save_args[rm_ends:-rm_ends] 115 | for arg in save_args: 116 | np.savez(arg["prefix"], **arg["data"]) 117 | info.update(piano.stem, { 118 | "n_segments": len(save_args), 119 | "split": "train" if is_train else "test", 120 | }) 121 | 122 | print(".", end="", flush=True) 123 | print(f" Done.", flush=True) 124 | 125 | 126 | def align_length(label, length): 127 | length_label = len(label["onset"]) 128 | if length_label == length: 129 | pass 130 | elif length_label > length: 131 | for key in label.keys(): 132 | label[key] = label[key][:length] 133 | else: 134 | for key in label.keys(): 135 | label[key] = np.pad(label[key], ((0, length - length_label), (0, 0))) 136 | return label 137 | 138 | 139 | if __name__ == "__main__": 140 | parser = argparse.ArgumentParser(description="Create train dataset.") 141 | parser.add_argument("--overwrite", action="store_true", help="Overwrite existing files.") 142 | parser.add_argument("--test_size", type=float, default=0.2, help="Test size. Defaults to 0.2.") 143 | parser.add_argument("--rm_ends", type=int, default=2, help="Remove n segments from the beginning and the end of the song. Defaults to 2.") 144 | args = parser.parse_args() 145 | main(args) 146 | -------------------------------------------------------------------------------- /data/create_labels.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | from pathlib import Path 3 | import sys 4 | from typing import Dict 5 | 6 | ROOT = Path(__file__).resolve().parent.parent 7 | sys.path.append(str(ROOT)) 8 | 9 | import numpy as np 10 | 11 | from data._utils import wav2feature 12 | from data._utils_midi import midi2note, note2label 13 | from utils import config 14 | 15 | 16 | DIR_DATASET = ROOT / config.path.dataset 17 | DIR_SYNCED = DIR_DATASET / "synced/" 18 | DIR_ARRAY = DIR_DATASET / "array/" 19 | DIR_ARRAY.mkdir(exist_ok=True) 20 | DIR_NAME_PIANO = "piano/" 21 | 22 | 23 | def main(args): 24 | songs = list(DIR_SYNCED.glob("*/")) 25 | songs = sorted(songs) 26 | n_songs = len(songs) 27 | 28 | for n, song in enumerate(songs, 1): 29 | print(f"{n}/{n_songs}: {song.name}", end=" ", flush=True) 30 | create_label(song, args.overwrite) 31 | 32 | 33 | def create_label(song: Path, overwrite: bool) -> None: 34 | """ 35 | Create the label files from the piano midi files. 36 | 37 | Args: 38 | song (Path): Path to the song directory. 39 | overwrite (bool): Overwrite existing files. 40 | """ 41 | dir_song = DIR_ARRAY / song.name 42 | if (not overwrite) and dir_song.exists(): 43 | print("Already exists, skip.") 44 | return 45 | 46 | dir_song.mkdir(exist_ok=True) 47 | dir_song_piano = dir_song / DIR_NAME_PIANO 48 | dir_song_piano.mkdir(exist_ok=True) 49 | 50 | orig, = list(song.glob("*.wav")) 51 | pianos = sorted(list((song / DIR_NAME_PIANO).glob("*.mid"))) 52 | 53 | spec = wav2feature(str(orig)) 54 | np.save(dir_song / orig.stem, spec) 55 | labels = {} 56 | for piano in pianos: 57 | prefix = dir_song_piano / piano.stem 58 | if (not args.overwrite) and prefix.with_suffix(".npz").exists(): 59 | continue 60 | label = get_label(piano) 61 | labels[prefix] = label 62 | 63 | for prefix, label in labels.items(): 64 | np.savez( 65 | prefix, 66 | onset=label["onset"], 67 | offset=label["offset"], 68 | frame=label["frame"], 69 | velocity=label["velocity"], 70 | ) 71 | print(".", end="", flush=True) 72 | print(f" Done.", flush=True) 73 | 74 | 75 | def get_label(path_midi: Path) -> Dict[str, np.ndarray]: 76 | """ 77 | Get the label from the piano midi 78 | 79 | Args: 80 | path_midi (Path): Path to the piano midi file. 81 | 82 | Returns: 83 | Dict[str, np.ndarray]: Dictionary of the label. 84 | """ 85 | notes = midi2note(str(path_midi)) 86 | label = note2label(notes) 87 | label = { 88 | "onset": np.array(label["onset"], dtype=np.float32), 89 | "offset": np.array(label["offset"], dtype=np.float32), 90 | "frame": (np.array(label["mpe"]) > 0.5).astype(np.uint8), 91 | "velocity": np.array(label["velocity"], dtype=np.uint8), 92 | } 93 | return label 94 | 95 | 96 | if __name__ == "__main__": 97 | parser = argparse.ArgumentParser("Transform the midi files to tuple of (onset, offset, frame, velocity).") 98 | parser.add_argument("--overwrite", action="store_true", help="Overwrite existing files.") 99 | args = parser.parse_args() 100 | main(args) -------------------------------------------------------------------------------- /data/download.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | import sys 3 | import argparse 4 | import json 5 | from typing import List 6 | 7 | ROOT = Path(__file__).resolve().parent.parent 8 | sys.path.append(str(ROOT)) 9 | 10 | from yt_dlp import YoutubeDL 11 | 12 | from utils import config 13 | 14 | 15 | DIR_RAW = ROOT / config.path.dataset / "raw" 16 | FILE_SRC = ROOT / config.path.src 17 | DIR_NAME_PIANO = "piano" 18 | 19 | 20 | def main(args): 21 | path_src = args.path_src or FILE_SRC 22 | with open(path_src, "r") as f: 23 | src = json.load(f) 24 | 25 | for n, (title, movies) in enumerate(src.items(), 1): 26 | print(f"{n}/{len(src)} {title}") 27 | download(DIR_RAW / title, movies["original"], movies["pianos"]) 28 | 29 | 30 | def download(dir_song: Path, original: str, pianos: List[str]) -> None: 31 | """ 32 | Download the audio files from the source file on YouTube. 33 | 34 | Args: 35 | dir_song (Path): Path to the song directory to save the audio files. 36 | original (str): ID of the original audio file on YouTube. 37 | pianos (List[str]): IDs of the piano audio files on YouTube. 38 | """ 39 | dir_song_piano = dir_song / DIR_NAME_PIANO 40 | dir_song_piano.mkdir(exist_ok=True, parents=True) 41 | 42 | ids = [original] + pianos 43 | urls = [f"https://www.youtube.com/watch?v={id}" for id in ids] 44 | ydl_opts = { 45 | "outtmpl": f"{dir_song}/%(id)s.%(ext)s", 46 | "format": "bestaudio/best", 47 | "postprocessors": [ 48 | { 49 | "key": "FFmpegExtractAudio", 50 | "preferredcodec": "wav", 51 | "preferredquality": "192", 52 | } 53 | ], 54 | "ignoreerrors": True, 55 | } 56 | with YoutubeDL(ydl_opts) as ydl: 57 | ydl.download(urls) 58 | print() 59 | 60 | for piano in pianos: 61 | piano_wav = dir_song / f"{piano}.wav" 62 | piano_wav.rename(dir_song_piano / f"{piano}.wav") 63 | 64 | 65 | if __name__ == "__main__": 66 | parser = argparse.ArgumentParser("Download the audio files from the source file on YouTube.") 67 | parser.add_argument("--path_src", type=str, default=None, help="Path to the source file. Defaults to CONFIG.PATH.SRC.") 68 | args = parser.parse_args() 69 | main(args) 70 | -------------------------------------------------------------------------------- /data/sv/extract.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | from pathlib import Path 3 | import sys 4 | import json 5 | 6 | ROOT = Path(__file__).resolve().parent.parent.parent 7 | sys.path.append(str(ROOT)) 8 | 9 | import numpy as np 10 | import pretty_midi 11 | from tqdm import tqdm 12 | 13 | from utils import config, info 14 | 15 | 16 | DIR_DATASET = ROOT / config.path.dataset 17 | DIR_SYNCED = DIR_DATASET / "synced/" 18 | DIR_NAME_PIANO = "piano/" 19 | PATH_TMP = ROOT / "data/sv/tmp.json" 20 | PATH_STYLE_VECTORS = ROOT / config.path.style_vectors 21 | 22 | PITCH_MIN = config.data.midi.note_min 23 | PITCH_MAX = config.data.midi.note_max 24 | NUM_PITCH = config.data.midi.num_note 25 | N_VELOCITY = config.data.midi.num_velocity 26 | SR = config.data.feature.sr 27 | HOP_LENGTH = config.data.feature.hop_sample 28 | N_FRAMES = config.data.input.num_frame 29 | BIN_VEL = np.arange(1, N_VELOCITY) 30 | BIN_PITCH = np.arange(PITCH_MIN, PITCH_MAX + 1) 31 | 32 | 33 | def main(args): 34 | pianos = list(DIR_SYNCED.glob("*/piano/*.mid")) 35 | pianos = sorted(pianos) 36 | 37 | if (not args.overwrite) and Path(PATH_TMP).exists(): 38 | with open(PATH_TMP, "r") as f: 39 | tmp = json.load(f) 40 | raw_styles = tmp["raw_styles"] 41 | params = tmp["params"] 42 | else: 43 | raw_styles, ignore_ids = extract_raw_styles(pianos, args.min_notes) 44 | params = estimate_params(raw_styles, ignore_ids) 45 | out = { 46 | "raw_styles": raw_styles, 47 | "params": params 48 | } 49 | with open(PATH_TMP, "w") as f: 50 | json.dump(out, f) 51 | style_vectors, style_features = create_style_vectors(raw_styles, params) 52 | out = { 53 | "style_vectors": style_vectors, 54 | "style_features": style_features, 55 | "params": params 56 | } 57 | with open(PATH_STYLE_VECTORS, "w") as f: 58 | json.dump(out, f) 59 | 60 | 61 | def extract_raw_styles(pianos, min_notes=1000): 62 | raw_styles = {} 63 | ignore_ids = [] 64 | for piano in tqdm(pianos, desc="Extracting raw styles"): 65 | pid = piano.stem 66 | status, raw_style = extract_raw_style(piano, min_notes) 67 | if status == 1: 68 | ignore_ids.append(pid) 69 | info.set(pid, "include_dataset", False) 70 | elif status == 2: 71 | info.set(pid, "include_dataset", False) 72 | continue 73 | else: 74 | info.set(pid, "include_dataset", True) 75 | 76 | raw_styles[pid] = { 77 | "dist_vel": raw_style[0], 78 | "dist_pitch": raw_style[1], 79 | "onset_rates": raw_style[2], 80 | } 81 | return raw_styles, ignore_ids 82 | 83 | def extract_raw_style(path, min_notes=1000): 84 | midi = pretty_midi.PrettyMIDI(str(path)) 85 | piano_roll = midi.get_piano_roll(int(SR / HOP_LENGTH)) 86 | piano_roll = piano_roll[PITCH_MIN:PITCH_MAX + 1] 87 | n_frames_midi = piano_roll.shape[1] 88 | onset = np.diff(piano_roll, axis=1) > 0 89 | status = 0 90 | if not onset.any(): 91 | return 2, None 92 | elif onset.sum() < min_notes: 93 | status = 1 94 | onset = np.pad(onset, ((0, 0), (1, 0))) 95 | velocity = piano_roll[onset] 96 | dist_vel = [int((velocity == v).sum()) for v in BIN_VEL] 97 | dist_pitch = [int((np.diff(onset[p]) > 0).sum()) for p in range(NUM_PITCH)] 98 | 99 | onset_rates = [] 100 | for i in range(0, n_frames_midi, N_FRAMES): 101 | seg_onset = onset[:, i:i + N_FRAMES] 102 | onset_rate = seg_onset.sum() / N_FRAMES 103 | onset_rates.append(onset_rate) 104 | 105 | return status, (dist_vel, dist_pitch, onset_rates) 106 | 107 | 108 | def estimate_params(raw_styles, ignore_ids): 109 | sum_dist_vel = np.zeros(N_VELOCITY - 1) 110 | sum_dist_pitch = np.zeros(NUM_PITCH) 111 | all_onset_rate = [] 112 | for pid, style in raw_styles.items(): 113 | if pid in ignore_ids: 114 | continue 115 | sum_dist_vel += style["dist_vel"] 116 | sum_dist_pitch += style["dist_pitch"] 117 | all_onset_rate += style["onset_rates"] 118 | mean_vel = np.average(BIN_VEL, weights=sum_dist_vel) 119 | mean_pitch = np.average(BIN_PITCH, weights=sum_dist_pitch) 120 | mean_onset_rate = np.mean(all_onset_rate) 121 | std_vel = np.sqrt(np.average((BIN_VEL - mean_vel) ** 2, weights=sum_dist_vel)) 122 | std_pitch = np.sqrt(np.average((BIN_PITCH - mean_pitch) ** 2, weights=sum_dist_pitch)) 123 | std_onset_rate = np.std(all_onset_rate) 124 | params = { 125 | "mean_vel": mean_vel, 126 | "mean_pitch": mean_pitch, 127 | "mean_onset_rate": mean_onset_rate, 128 | "std_vel": std_vel, 129 | "std_pitch": std_pitch, 130 | "std_onset_rate": std_onset_rate, 131 | } 132 | return params 133 | 134 | 135 | BIN_DIST = np.array([-2, -4/3, -2/3, 0, 2/3, 4/3, 2]) 136 | 137 | def create_style_vectors(raw_styles, params): 138 | mean_vel = params["mean_vel"] 139 | mean_pitch = params["mean_pitch"] 140 | mean_onset_rate = params["mean_onset_rate"] 141 | std_vel = params["std_vel"] 142 | std_pitch = params["std_pitch"] 143 | std_onset_rate = params["std_onset_rate"] 144 | 145 | style_vectors = {} 146 | style_features = {} 147 | for id_piano, style in tqdm(raw_styles.items(), desc="Normalize style vectors"): 148 | dist_vel = style["dist_vel"] 149 | dist_pitch = style["dist_pitch"] 150 | onset_rates = style["onset_rates"] 151 | 152 | # To list 153 | vels = sum([[v] * n for v, n in zip(BIN_VEL, dist_vel)], []) 154 | pitches = sum([[p] * n for p, n in zip(BIN_PITCH, dist_pitch)], []) 155 | vels = np.array(vels) 156 | pitches = np.array(pitches) 157 | onset_rates = np.array(onset_rates) 158 | 159 | # Normalize 160 | vels_norm = (vels - mean_vel) / std_vel 161 | pitches_norm = (pitches - mean_pitch) / std_pitch 162 | onset_rates_norm = (onset_rates - mean_onset_rate) / std_onset_rate 163 | 164 | # Digitize 165 | dist_vel = get_distribution(vels_norm) 166 | dist_pitch = get_distribution(pitches_norm) 167 | dist_onset_rate = get_distribution(onset_rates_norm) 168 | 169 | # Concatenate 170 | style_vector = np.concatenate([ 171 | dist_vel, dist_pitch, dist_onset_rate 172 | ]).tolist() 173 | style_feature = [ 174 | vels_norm.mean(), pitches_norm.std(), onset_rates_norm.mean() 175 | ] 176 | style_vectors[id_piano] = style_vector 177 | style_features[id_piano] = style_feature 178 | 179 | return style_vectors, style_features 180 | 181 | def get_distribution(data): 182 | digit = np.digitize(data, BIN_DIST) 183 | dist = [(digit == v).sum() for v in range(len(BIN_DIST) + 1)] 184 | dist = np.array(dist) 185 | dist = dist / dist.sum() 186 | return dist 187 | 188 | 189 | if __name__ == "__main__": 190 | parser = argparse.ArgumentParser("Extract style vectors.") 191 | parser.add_argument("--overwrite", action="store_true", help="Overwrite existing files.") 192 | parser.add_argument("--min_notes", type=int, default=1000, help="The minimum number of notes, below which they will not be included in the training dataset. Defaults to 1000.") 193 | args = parser.parse_args() 194 | main(args) 195 | -------------------------------------------------------------------------------- /data/sv/sampler.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | import sys 3 | import json 4 | 5 | ROOT = Path(__file__).resolve().parent.parent.parent 6 | sys.path.append(str(ROOT)) 7 | 8 | import numpy as np 9 | 10 | from utils import config 11 | 12 | 13 | PATH_STYLE_VECTORS = ROOT / config.path.style_vectors 14 | PRESETS = { 15 | "level1": (0., 0.9, -0.5), 16 | "level2": (0., 1., 0.), 17 | "level3": (0.5, 1.05, 0.5), 18 | } 19 | 20 | 21 | class Sampler: 22 | def __init__(self, variances=(0., 0., 0.), windows=(0.5, 0.1, 0.5)): 23 | self.latest = None 24 | self.variances = variances 25 | self.windows = windows 26 | 27 | with open(PATH_STYLE_VECTORS, "r") as f: 28 | style_vectors = json.load(f) 29 | self.style_vectors = style_vectors["style_vectors"] 30 | self.style_vectors = { 31 | key: np.array(value) for key, value in self.style_vectors.items() 32 | } 33 | self.features = style_vectors["style_features"] 34 | self.params = style_vectors["params"] 35 | 36 | def __len__(self): 37 | return len(self.style_vectors) 38 | 39 | def __getitem__(self, key): 40 | return self.style_vectors[key] 41 | 42 | def random(self): 43 | key = np.random.choice(list(self.style_vectors.keys())) 44 | return self[key] 45 | 46 | def get_sv(self, key_vel, key_pitch, key_onset): 47 | sv_vel = self[key_vel][0:8] 48 | sv_pitch = self[key_pitch][8:16] 49 | sv_onset = self[key_onset][16:24] 50 | sv = np.concatenate([sv_vel, sv_pitch, sv_onset]).astype(np.float32) 51 | return sv 52 | 53 | def get_feature(self, key_vel, key_pitch, key_onset): 54 | f_Vel, _, _ = self.features[key_vel] 55 | _, f_pitch, _ = self.features[key_pitch] 56 | _, _, f_onset = self.features[key_onset] 57 | return f_Vel, f_pitch, f_onset 58 | 59 | def sample(self, params="level2"): 60 | if isinstance(params, str): 61 | if params not in PRESETS: 62 | raise ValueError(f"Invalid value for 'params': {params}") 63 | params = PRESETS[params] 64 | 65 | keys_vel, keys_pitch, keys_onset = self.choices(params) 66 | v_vel, v_pitch, v_onset = self.variances 67 | sv_vel = self.summarize(keys_vel, v_vel)[0:8] 68 | sv_pitch = self.summarize(keys_pitch, v_pitch)[8:16] 69 | sv_onset = self.summarize(keys_onset, v_onset)[16:24] 70 | sv = np.concatenate([sv_vel, sv_pitch, sv_onset]).astype(np.float32) 71 | self.latest = sv 72 | return sv 73 | 74 | def choices(self, params): 75 | mean_vel, mean_pitch, mean_onset = params 76 | w_vel, w_pitch, w_onset = self.windows 77 | r_vel = (mean_vel - (w_vel / 2), mean_vel + (w_vel / 2)) 78 | r_pitch = (mean_pitch - (w_pitch / 2), mean_pitch + (w_pitch / 2)) 79 | r_onset = (mean_onset - (w_onset / 2), mean_onset + (w_onset / 2)) 80 | 81 | keys_vel = [] 82 | keys_pitch = [] 83 | keys_onset = [] 84 | for key, feature in self.features.items(): 85 | f_vel, f_pitch, f_onset = feature 86 | if self._isin(f_vel, r_vel): 87 | keys_vel.append(key) 88 | if self._isin(f_pitch, r_pitch): 89 | keys_pitch.append(key) 90 | if self._isin(f_onset, r_onset): 91 | keys_onset.append(key) 92 | return keys_vel, keys_pitch, keys_onset 93 | 94 | @staticmethod 95 | def _isin(x, range): 96 | return range[0] <= x <= range[1] 97 | 98 | def summarize(self, keys, variance): 99 | weights = np.ones(len(keys)) + np.random.randn(len(keys)) * variance 100 | weights = np.maximum(weights, 0) 101 | weights /= np.sum(weights) 102 | sv = np.zeros(24) 103 | for key, weight in zip(keys, weights): 104 | sv += self[key] * weight 105 | return sv 106 | -------------------------------------------------------------------------------- /data/sync.py: -------------------------------------------------------------------------------- 1 | # >>> ------------------------------------------------------------------ 2 | # This code is based on the code from synctoolbox demo notebook: 3 | # https://github.com/meinardmueller/synctoolbox/blob/master/sync_audio_audio_full.ipynb 4 | 5 | 6 | import os 7 | import warnings 8 | from contextlib import redirect_stdout 9 | 10 | import numpy as np 11 | from synctoolbox.dtw.mrmsdtw import sync_via_mrmsdtw 12 | from synctoolbox.dtw.utils import make_path_strictly_monotonic 13 | from synctoolbox.feature.chroma import pitch_to_chroma, quantize_chroma 14 | from synctoolbox.feature.dlnco import pitch_onset_features_to_DLNCO 15 | from synctoolbox.feature.pitch import audio_to_pitch_features 16 | from synctoolbox.feature.pitch_onset import audio_to_pitch_onset_features 17 | from synctoolbox.feature.utils import estimate_tuning 18 | import pytsmod 19 | 20 | 21 | FEATURE_RATE = 50 22 | STEP_WEIGHTS = np.array([1.5, 1.5, 2.0]) 23 | THRESHOLD_REC = 10 ** 6 24 | 25 | 26 | def get_features_from_audio(audio, tuning_offset, sr): 27 | with redirect_stdout(open(os.devnull, "w")): 28 | f_pitch = audio_to_pitch_features( 29 | f_audio=audio, 30 | Fs=sr, 31 | tuning_offset=tuning_offset, 32 | feature_rate=FEATURE_RATE, 33 | verbose=False, 34 | ) 35 | f_chroma = pitch_to_chroma(f_pitch=f_pitch) 36 | f_chroma_quantized = quantize_chroma(f_chroma=f_chroma) 37 | f_pitch_onset = audio_to_pitch_onset_features( 38 | f_audio=audio, 39 | Fs=sr, 40 | tuning_offset=tuning_offset, 41 | verbose=False, 42 | ) 43 | f_DLNCO = pitch_onset_features_to_DLNCO( 44 | f_peaks=f_pitch_onset, 45 | feature_rate=FEATURE_RATE, 46 | feature_sequence_length=f_chroma_quantized.shape[1], 47 | visualize=False, 48 | ) 49 | return f_chroma_quantized, f_DLNCO 50 | 51 | 52 | def sync_audio( 53 | y_source: np.ndarray, 54 | y_target: np.ndarray, 55 | sr: int, 56 | ) -> np.ndarray: 57 | """ 58 | Synchronize the source audio with the target audio. 59 | 60 | Args: 61 | y_source (np.ndarray): Source audio. (n_samples,) 62 | y_target (np.ndarray): Target audio. (n_samples,) 63 | sr (int): Sample rate. 64 | 65 | Returns: 66 | np.ndarray: Synchronized source audio. (n_samples,) 67 | """ 68 | tuning_offset_source = estimate_tuning(y_source, sr) 69 | tuning_offset_target = estimate_tuning(y_target, sr) 70 | 71 | f_chroma_quantized_1, f_DLNCO_1 = get_features_from_audio( 72 | y_source, tuning_offset_source, sr 73 | ) 74 | f_chroma_quantized_2, f_DLNCO_2 = get_features_from_audio( 75 | y_target, tuning_offset_target, sr 76 | ) 77 | 78 | wp = sync_via_mrmsdtw( 79 | f_chroma1=f_chroma_quantized_1, 80 | f_onset1=f_DLNCO_1, 81 | f_chroma2=f_chroma_quantized_2, 82 | f_onset2=f_DLNCO_2, 83 | input_feature_rate=FEATURE_RATE, 84 | step_weights=STEP_WEIGHTS, 85 | threshold_rec=THRESHOLD_REC, 86 | verbose=False, 87 | ) 88 | wp = make_path_strictly_monotonic(wp) 89 | 90 | time_map = wp / FEATURE_RATE * sr 91 | time_map[0, time_map[0, :] > len(y_source)] = len(y_source) - 1 92 | time_map[1, time_map[1, :] > len(y_target)] = len(y_target) - 1 93 | 94 | with warnings.catch_warnings(): 95 | warnings.simplefilter("ignore") 96 | y_hptsm = pytsmod.hptsm(y_source, time_map) 97 | y_hptsm = np.ravel(y_hptsm) 98 | return y_hptsm 99 | 100 | 101 | # ------------------------------------------------------------------ <<< 102 | 103 | 104 | from pathlib import Path 105 | import sys 106 | import argparse 107 | import time 108 | import shutil 109 | 110 | ROOT = Path(__file__).resolve().parent.parent 111 | sys.path.append(str(ROOT)) 112 | 113 | import librosa 114 | import soundfile as sf 115 | 116 | from utils import config, info 117 | 118 | 119 | DIR_DATASET = ROOT / config.path.dataset 120 | DIR_RAW = DIR_DATASET / "raw/" 121 | DIR_SYNCED = DIR_DATASET / "synced/" 122 | DIR_SYNCED.mkdir(exist_ok=True) 123 | DIR_NAME_PIANO = "piano/" 124 | SR = config.data.feature.sr 125 | 126 | 127 | def main(args): 128 | songs = DIR_RAW.glob("*/") 129 | songs = sorted(songs) 130 | n_songs = len(songs) 131 | for n, song in enumerate(songs, 1): 132 | print(f"{n}/{n_songs}: {song.name}", end=" ", flush=True) 133 | sync_song(song, DIR_SYNCED, args.overwrite) 134 | 135 | 136 | def sync_song( 137 | dir_song: str, 138 | dir_output: str, 139 | overwrite: bool = False 140 | ) -> None: 141 | """ 142 | Synchronize the piano audio with the original audio in given song 143 | directory. 144 | 145 | Args: 146 | dir_song (str): Path to the song directory. 147 | dir_output (str): Path to the output directory. 148 | overwrite (bool): Overwrite existing files. 149 | """ 150 | dir_output_song = dir_output / dir_song.name 151 | time_start = time.time() 152 | orig = next(dir_song.glob("*.wav")) 153 | orig_new = dir_output_song / orig.name 154 | flag_load_orig = False 155 | 156 | if overwrite or (not orig_new.exists()): 157 | dir_output_song.mkdir(exist_ok=True) 158 | y_orig, _ = librosa.load(str(orig), sr=SR) 159 | flag_load_orig = True 160 | shutil.copy(orig, str(orig_new)) 161 | 162 | dir_output_song_piano = dir_output_song / DIR_NAME_PIANO 163 | dir_output_song_piano.mkdir(exist_ok=True) 164 | for piano in (dir_song / DIR_NAME_PIANO).glob("*.wav"): 165 | piano_new = dir_output_song_piano / piano.name 166 | if overwrite or (not piano_new.exists()): 167 | if not flag_load_orig: 168 | y_orig, _ = librosa.load(str(orig), sr=SR) 169 | flag_load_orig = True 170 | y_piano, _ = librosa.load(str(piano), sr=SR) 171 | y_piano_synced = sync_audio(y_piano, y_orig, SR) 172 | sf.write(str(piano_new), y_piano_synced, SR) 173 | 174 | info.update(piano.stem, { 175 | "original": orig.stem, 176 | "title": orig.parent.stem, 177 | }) 178 | print(".", end="", flush=True) 179 | print(f" Done ({time.time() - time_start:.2f}s)", flush=True) 180 | 181 | 182 | if __name__ == "__main__": 183 | parser = argparse.ArgumentParser(description="Synchronize piano audio with original audio.") 184 | parser.add_argument("--overwrite", action="store_true", help="Overwrite existing files.") 185 | args = parser.parse_args() 186 | main(args) 187 | -------------------------------------------------------------------------------- /data/transcribe.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | from pathlib import Path 3 | import sys 4 | 5 | ROOT = Path(__file__).resolve().parent.parent 6 | sys.path.append(str(ROOT)) 7 | 8 | import torch 9 | from tqdm import tqdm 10 | 11 | from models import Pipeline 12 | from utils import config 13 | 14 | 15 | DIR_DATASET = ROOT / config.path.dataset 16 | DIR_SYNCED = DIR_DATASET / "synced/" 17 | DIR_NAME_PIANO = "piano/" 18 | DEVICE_DEFAULT = torch.device("cuda" if torch.cuda.is_available() else "cpu") 19 | 20 | 21 | def main(args): 22 | piano_wavs = list(DIR_SYNCED.glob("*/piano/*.wav")) 23 | piano_wavs = sorted(piano_wavs) 24 | device = torch.device(args.device) if args.device else DEVICE_DEFAULT 25 | amt = Pipeline(path_model=args.path_amt, device=device, amt=True) 26 | 27 | for piano_wav in tqdm(piano_wavs, desc="Transcribing piano audio to MIDI"): 28 | piano_midi = piano_wav.with_suffix(".mid") 29 | if (not args.overwrite) and piano_midi.exists(): 30 | continue 31 | amt.wav2midi(str(piano_wav), str(piano_midi)) 32 | 33 | 34 | if __name__ == "__main__": 35 | parser = argparse.ArgumentParser("Transcribe piano audio to MIDI.") 36 | parser.add_argument("--device", type=str, default=None, help="Device to use. Defaults to auto (CUDA if available else CPU).") 37 | parser.add_argument("--path_amt", type=str, default=None, help="Path to the AMT model. Defaults to CONFIG.PATH.AMT.") 38 | parser.add_argument("--overwrite", action="store_true", help="Overwrite existing MIDI files.") 39 | args = parser.parse_args() 40 | main(args) 41 | -------------------------------------------------------------------------------- /dataset/src.json: -------------------------------------------------------------------------------- 1 | { 2 | "AKB48_恋するフォーチュンクッキー": { 3 | "original": "dFf4AgBNR1E", 4 | "pianos": [ 5 | "6gWC7ya1zY8" 6 | ] 7 | }, 8 | "Ado_Tot Musica": { 9 | "original": "V9_ZpqfqHFI", 10 | "pianos": [ 11 | "AuDfNo-wn_s", 12 | "BiQaZdWG4KQ", 13 | "LgpnTFdzf2I", 14 | "PKSfECrBkKM", 15 | "XK8rUyFoPh4", 16 | "eUx_Z3rxJf8", 17 | "iRyCKMvMcO8" 18 | ] 19 | }, 20 | "Ado_うっせぇわ": { 21 | "original": "Qp3b-RXtz4w", 22 | "pianos": [ 23 | "13yD5sS6jag", 24 | "DIYhI5GASKA", 25 | "dxb6JxT1sgY", 26 | "fcOj2CuMxmg" 27 | ] 28 | }, 29 | "Ado_ウタカタララバイ": { 30 | "original": "hyV1AJiFNyo", 31 | "pianos": [ 32 | "BXoe4iZUqsA", 33 | "LDhrT34w-48", 34 | "bUoYr33wR-g", 35 | "bWM-zXbhNm0" 36 | ] 37 | }, 38 | "Ado_ギラギラ": { 39 | "original": "sOiMD45QGLs", 40 | "pianos": [ 41 | "0eaRGLz-AbU", 42 | "WrtPvE4LX5E", 43 | "haSTnG5TWD4", 44 | "uLS_0Ii0ehM" 45 | ] 46 | }, 47 | "Ado_唱": { 48 | "original": "pgXpM4l_MwI", 49 | "pianos": [ 50 | "EAq-LLkLsWs", 51 | "erqqUoCaHX0", 52 | "ihuoRh_5NkM", 53 | "mYAOStCzUK0", 54 | "s_ZLqYSist8" 55 | ] 56 | }, 57 | "Ado_新時代": { 58 | "original": "1FliVTcX8bQ", 59 | "pianos": [ 60 | "EDS5_PrW2nE", 61 | "JbYa3hlV5C0", 62 | "ccKVmQ5hgZU", 63 | "i3yNQon2oVE", 64 | "n1M5MyYB04U", 65 | "o24obR-MA9w", 66 | "s8CUV8kigU4" 67 | ] 68 | }, 69 | "Ado_私は最強": { 70 | "original": "sk1Z-Hqwwog", 71 | "pianos": [ 72 | "GD3IA8S0GaE", 73 | "dYaIVMtlNdY", 74 | "nuKz2rOKLqk", 75 | "wVEdtFc3qYs" 76 | ] 77 | }, 78 | "Ado_踊": { 79 | "original": "YnSW8ian29w", 80 | "pianos": [ 81 | "2TfWzi9BImY", 82 | "Q17BWcFMnFU", 83 | "Scm_BsqGYSg", 84 | "h_DMRnSBfN4", 85 | "seSUXjUazpg" 86 | ] 87 | }, 88 | "Ado_逆光": { 89 | "original": "gt-v_YCkaMY", 90 | "pianos": [ 91 | "LJCBPuwSDn8", 92 | "X0FBgneA5MI", 93 | "nY4-spz89c0", 94 | "yycTj54myLY" 95 | ] 96 | }, 97 | "Ado_阿修羅ちゃん": { 98 | "original": "cyq5-StPISU", 99 | "pianos": [ 100 | "AK6TncMfuv0", 101 | "Ai6Vp8kcFZ0", 102 | "C79gQj8ues8", 103 | "PZ92f_hd8MY", 104 | "bTsuNIuxtwc", 105 | "rPSUcgzaloE" 106 | ] 107 | }, 108 | "Aimer_I beg you": { 109 | "original": "pCC6qbAnX00", 110 | "pianos": [ 111 | "3iC-IHq-xkM", 112 | "9nTIIX4xEhk", 113 | "EXslQGBxIBo", 114 | "MtSnZiAxdYs", 115 | "aqscRmTvDd0", 116 | "qTUcFOFGvKQ" 117 | ] 118 | }, 119 | "Aimer_RE:I AM": { 120 | "original": "ImQSdDX_1K0", 121 | "pianos": [ 122 | "DvnLittO0fw", 123 | "OexLHfMmK0E", 124 | "ccJApkXIxA0", 125 | "ehVmQ6nhTGg", 126 | "mhkAdamzvf4", 127 | "sMs_bipSF28" 128 | ] 129 | }, 130 | "Aimer_カタオモイ": { 131 | "original": "kxs9Su_mbpU", 132 | "pianos": [ 133 | "4gBdRFc6mCY", 134 | "ObcmoPddqzM", 135 | "y7ZEgrZwGeE" 136 | ] 137 | }, 138 | "Aimer_残響散歌": { 139 | "original": "tLQLa6lM3Us", 140 | "pianos": [ 141 | "CPAK57oTB1g", 142 | "CPQimCdZLJI", 143 | "OACXQbZvWCw", 144 | "jDp1x2y1Cdg", 145 | "wVXOrb-UQQ0", 146 | "zQ-xZ5egx-I" 147 | ] 148 | }, 149 | "B'z_いつかのメリークリスマス": { 150 | "original": "QZFEnfBmfqg", 151 | "pianos": [ 152 | "2wOgMXosgs8", 153 | "Ef7NtGeQFmg", 154 | "PgmxNSU7fCE", 155 | "XLCI2rTRH_g", 156 | "naRsidrlhwY", 157 | "tGm7W6syMkw" 158 | ] 159 | }, 160 | "BUMP OF CHICKEN_カルマ": { 161 | "original": "mTdcxQZcQAE", 162 | "pianos": [ 163 | "F8qX4MOJ0WM", 164 | "mlqEhaq7kG8" 165 | ] 166 | }, 167 | "BUMP OF CHICKEN_天体観測": { 168 | "original": "j7CDb610Bg0", 169 | "pianos": [ 170 | "Fvcexwaz3MQ", 171 | "IrsxuQgWpaM", 172 | "VsnfP1snH4c", 173 | "vC2ZiYtzvBE" 174 | ] 175 | }, 176 | "FUNKY MONKEY BABYS_あとひとつ": { 177 | "original": "ir5cF-EvBig", 178 | "pianos": [ 179 | "89Ky_1vSW_Y", 180 | "9shZyeJJ5rw" 181 | ] 182 | }, 183 | "GENERATIONS_Love You More": { 184 | "original": "k1VYP-hfYrs", 185 | "pianos": [ 186 | "ksvjjGrqqmo" 187 | ] 188 | }, 189 | "GENERATIONS_涙": { 190 | "original": "Qa2LaayKf10", 191 | "pianos": [ 192 | "J0UM0QfNxRU", 193 | "cftLIPkgJKU" 194 | ] 195 | }, 196 | "GLAY_HOWEVER": { 197 | "original": "gPcPseeICjs", 198 | "pianos": [ 199 | "ZNSNOl4fd1w" 200 | ] 201 | }, 202 | "GLAY_グロリアス": { 203 | "original": "6ewGwJ63Nr4", 204 | "pianos": [ 205 | "nGoSdQbYcjQ", 206 | "uT5S0Q1SCVQ" 207 | ] 208 | }, 209 | "GLAY_春を愛する人": { 210 | "original": "SzUN6GNtqos", 211 | "pianos": [ 212 | "18qYifAth3I", 213 | "2y6xwJsDeGY", 214 | "75FcVAZ_TLk" 215 | ] 216 | }, 217 | "GLAY_誘惑": { 218 | "original": "fGlDHwlq5SQ", 219 | "pianos": [ 220 | "1ogwFP5oWZE", 221 | "v2jSRLf0wfI" 222 | ] 223 | }, 224 | "GReeeeN_キセキ": { 225 | "original": "DwTinTO0o9I", 226 | "pianos": [ 227 | "0OSx8Wdfp3g", 228 | "2L4HZ8szO88", 229 | "faEF9WFcjCo", 230 | "yOriJwTUh4E" 231 | ] 232 | }, 233 | "GReeeeN_愛唄": { 234 | "original": "EvwRYGlJWfQ", 235 | "pianos": [ 236 | "3dmd51o-aHU", 237 | "5mMwg8_Cd3A", 238 | "YtAaD1oTeew", 239 | "r8dff5qqRFM" 240 | ] 241 | }, 242 | "HY_366日": { 243 | "original": "glsH4Mgxz-g", 244 | "pianos": [ 245 | "BA6F4_KJJbM", 246 | "HT7ZJ1yPNJY", 247 | "HhcSEP93fYU", 248 | "exX6A_rH9V4", 249 | "n_gmVZJyAdI", 250 | "zysqEnag5Gw" 251 | ] 252 | }, 253 | "HY_あなた": { 254 | "original": "dTpJ06QuvOI", 255 | "pianos": [ 256 | "1pvcCRKHeCY", 257 | "GXAuWeX-wTE", 258 | "ZuSUiRu3vjA" 259 | ] 260 | }, 261 | "Hey! Say! JUMP_Come On A My House": { 262 | "original": "DeysylGcsP4", 263 | "pianos": [ 264 | "HtbB1te1294", 265 | "ifj5AWRwAXc", 266 | "uIoZf-G-HAE" 267 | ] 268 | }, 269 | "Hey! Say! JUMP_White Love": { 270 | "original": "CqPZrzgLMso", 271 | "pianos": [ 272 | "82i4jWyk-tw", 273 | "8nDSED4zEtk", 274 | "NXWaYsVswgc", 275 | "gB1gJyufhgk", 276 | "mwfmjrQvHVY" 277 | ] 278 | }, 279 | "Hey! Say! JUMP_ウィークエンダー": { 280 | "original": "R_ipNv527rE", 281 | "pianos": [ 282 | "-3fpw1by9As", 283 | "QT9l1b7Ux24", 284 | "RWYNvFNo_ko", 285 | "bv80qi4jvtc", 286 | "qpH6ZeMMGAo", 287 | "vIhnXGOB9EI" 288 | ] 289 | }, 290 | "JUJU_ありがとう": { 291 | "original": "2KnShDMNF_w", 292 | "pianos": [ 293 | "7S6AqBfYRqY", 294 | "WcUr0hE_sUQ", 295 | "bjbBd4q2Bno", 296 | "qlQmej8FbBw" 297 | ] 298 | }, 299 | "JUJU_ただいま": { 300 | "original": "4Jkk9bfxj20", 301 | "pianos": [ 302 | "gUcQ5cc-HKM", 303 | "stp9UKL_JpA" 304 | ] 305 | }, 306 | "JUJU_また明日": { 307 | "original": "cyXPCufrt6k", 308 | "pianos": [ 309 | "IWL8Gs6gGDo", 310 | "dz7_zoICRAc" 311 | ] 312 | }, 313 | "JUJU_やさしさで溢れるように": { 314 | "original": "jIl8_jMMULs", 315 | "pianos": [ 316 | "5yiN5RakjaA", 317 | "KuURfLSkvyc", 318 | "zOhL4GynIN0" 319 | ] 320 | }, 321 | "Janne Da Arc_振り向けば…": { 322 | "original": "l9-yxwxLAn8", 323 | "pianos": [ 324 | "RQdsxJZ9mDw", 325 | "W7egfML5rrc", 326 | "dnSiiZyGHIg" 327 | ] 328 | }, 329 | "Janne Da Arc_月光花": { 330 | "original": "tHk4thfcL9Q", 331 | "pianos": [ 332 | "3r7l-s02IF0", 333 | "MVyixwv2Eso", 334 | "OgTXSJpG42U", 335 | "PurD8sUnZEg", 336 | "UhpUOy9RX2Y", 337 | "oOneXew-018" 338 | ] 339 | }, 340 | "King Gnu_SPECIALZ": { 341 | "original": "fhzKLBZJC3w", 342 | "pianos": [ 343 | "D0aPU3ykK78", 344 | "GYLtpKFrPfY", 345 | "TyGYz7l8ZUE", 346 | "Z4wg_PFWE7o", 347 | "iZw9WGUTxOs", 348 | "pKvUaUkilMM", 349 | "ptnAeX39DFI" 350 | ] 351 | }, 352 | "King Gnu_Teenager Forever": { 353 | "original": "XPUN-w543bc", 354 | "pianos": [ 355 | "7Rt3gbME8jM", 356 | "9uFFfR3dZIQ", 357 | "o0JKpOoJ1QM" 358 | ] 359 | }, 360 | "King Gnu_Vinyl": { 361 | "original": "RLAw8Ct9k48", 362 | "pianos": [ 363 | "0P3c_49PgvU", 364 | "17khhS6_gwg", 365 | "I3iMOrd2XJ4", 366 | "zW61UKUBVC4" 367 | ] 368 | }, 369 | "King Gnu_どろん": { 370 | "original": "gc0_Acq8dV4", 371 | "pianos": [ 372 | "9vb9KIMrRDM", 373 | "KkkT-1xo7Mo", 374 | "_zqosRGKRNk" 375 | ] 376 | }, 377 | "King Gnu_カメレオン": { 378 | "original": "BOrdMrh4uKg", 379 | "pianos": [ 380 | "FoThoCU2orE", 381 | "HmMdgVHtcq0", 382 | "b6ldC2tj1HM", 383 | "cDTwJbvI0yI", 384 | "qFjO1sszZ64", 385 | "vb7Mu0Z69Dc", 386 | "wQcI7xJHvFA", 387 | "yTHPb9C92Xs" 388 | ] 389 | }, 390 | "King Gnu_一途": { 391 | "original": "hm1na9R2uYA", 392 | "pianos": [ 393 | "13FAPeqaBjg", 394 | "5FUk9FCMuj0", 395 | "AL6oO8n-14M", 396 | "Cs_KWmRhPtY", 397 | "JJSiMKkUgLM", 398 | "Nl1qN3wAs4g", 399 | "cKDd44yzIQY" 400 | ] 401 | }, 402 | "King Gnu_三文小説": { 403 | "original": "Y8HeOA95UzQ", 404 | "pianos": [ 405 | "DcWAehV3w3I", 406 | "v4UNQKg38VE", 407 | "yKzZwf90Wd0" 408 | ] 409 | }, 410 | "King Gnu_白日": { 411 | "original": "ony539T074w", 412 | "pianos": [ 413 | "GpeMS_JoMd0", 414 | "Tnr2H8YNgkQ", 415 | "YI_CeEsS7Bs", 416 | "kHHj9LZbnnM" 417 | ] 418 | }, 419 | "King Gnu_逆夢": { 420 | "original": "M6gcoDN9jBc", 421 | "pianos": [ 422 | "Cz9JMLe2P9o", 423 | "H6QBuc2YfcU", 424 | "PGe7USWT8r0", 425 | "QS2xh6Rz4CI", 426 | "Yb_BXEKc_m4", 427 | "ZL4n2KThtd8", 428 | "q6ZGS4msOzw", 429 | "yJ4F0HNGliU" 430 | ] 431 | }, 432 | "King Gnu_飛行艇": { 433 | "original": "MSv7NbfbtU8", 434 | "pianos": [ 435 | "A2i62dG2zIU", 436 | "QkFsSon7KhU", 437 | "Vgy0bvLUawQ", 438 | "bJJhTH3pw2k", 439 | "l3a1MxDaXA8", 440 | "sX30or4Q9JQ" 441 | ] 442 | }, 443 | "Kis-My-Ft2_Everybody Go": { 444 | "original": "8xnQRSKT9eA", 445 | "pianos": [ 446 | "4ikFYdfGDBc", 447 | "xkfW9ugGqiE" 448 | ] 449 | }, 450 | "Kis-My-Ft2_Luv Bias": { 451 | "original": "xzc5ZwSCtxg", 452 | "pianos": [ 453 | "5ZVdmE-18PM", 454 | "JSrXHq50nVo", 455 | "TVnbaWa_E4Q", 456 | "qgsiziPVxWU", 457 | "rhYzk7pDmGY", 458 | "syE5rULjbzc" 459 | ] 460 | }, 461 | "Kis-My-Ft2_SHE! HER! HER!": { 462 | "original": "xqG83DMJ6NY", 463 | "pianos": [ 464 | "-16P3KCZLbk", 465 | "0IVKWoX7UHQ", 466 | "3zk_18MomVk", 467 | "l-UwMfPuxAc" 468 | ] 469 | }, 470 | "L'Arc~en~Ciel_DAYBREAK'S BELL": { 471 | "original": "VUOgmg0RXrE", 472 | "pianos": [ 473 | "0eaGjF2thsQ", 474 | "O1Wi-tzwj9I", 475 | "cjHw54b3Drk" 476 | ] 477 | }, 478 | "L'Arc~en~Ciel_Driver's High": { 479 | "original": "bzuuxp7dsxQ", 480 | "pianos": [ 481 | "6p8Znwe0Dw4", 482 | "QTDO9HB8TSo", 483 | "ll8ly2Xvv6g" 484 | ] 485 | }, 486 | "L'Arc~en~Ciel_flower": { 487 | "original": "Tj4aN-JxA7M", 488 | "pianos": [ 489 | "H_C1OTnasUk", 490 | "qYH7dsQF3xA" 491 | ] 492 | }, 493 | "L'Arc~en~Ciel_瞳の住人": { 494 | "original": "Ssd111jVg2c", 495 | "pianos": [ 496 | "3YZ4B56urM4", 497 | "bzEt3ZbC0Ek", 498 | "ip2Qn3mFPOg", 499 | "ml8wSgEBqho", 500 | "rFC9Nilia78" 501 | ] 502 | }, 503 | "LiSA_Catch the Moment": { 504 | "original": "LJkn2qqtijk", 505 | "pianos": [ 506 | "SWgA3xdm1xY", 507 | "TACoHSclkl0", 508 | "lYIKfUgEnF8", 509 | "m28C1Mwi9eM", 510 | "u-MAM2ltWfs" 511 | ] 512 | }, 513 | "LiSA_Rising Hope": { 514 | "original": "cVKb_oerO7w", 515 | "pianos": [ 516 | "BZUPesAHvBQ", 517 | "PPzvKopK-ao", 518 | "W3kpTZ298eA", 519 | "YqVCN9GbwL4", 520 | "eX6xM60950Y", 521 | "yMAYTJxJ-_k" 522 | ] 523 | }, 524 | "LiSA_crossing field": { 525 | "original": "1aPOj0ERTEc", 526 | "pianos": [ 527 | "717qEKaca5I", 528 | "IT8Wo8GGDGg", 529 | "Kr9it8Qwv5E", 530 | "OtI2EMKyrt4", 531 | "p-fFgLE3vB0", 532 | "siLxOrsUe2s" 533 | ] 534 | }, 535 | "LiSA_シルシ": { 536 | "original": "qqs3USR3_PE", 537 | "pianos": [ 538 | "44k3FDlhg9o", 539 | "GXRim6C-czs", 540 | "IhbQlDcahU4", 541 | "NtsHZTFUXtY", 542 | "Nyfl-HP8XTI", 543 | "mn819YlDPJQ", 544 | "uiYPmcdqYp4" 545 | ] 546 | }, 547 | "LiSA_明け星": { 548 | "original": "yGcm81aaTHg", 549 | "pianos": [ 550 | "-x8SngQfa9s", 551 | "5IhJqIZYrMk", 552 | "Dai18_tEIrE" 553 | ] 554 | }, 555 | "LiSA_炎": { 556 | "original": "4DxL6IKmXx4", 557 | "pianos": [ 558 | "0QzFvaWbTwk", 559 | "XJVpSTTUpKg", 560 | "ZFUDvVgIMO0", 561 | "joxgTFxy-GQ", 562 | "zt01iZ67I0c" 563 | ] 564 | }, 565 | "LiSA_紅蓮華": { 566 | "original": "x1FV6IrjZCY", 567 | "pianos": [ 568 | "7Ncj1fR7dZg", 569 | "GfjrRR-FzKE", 570 | "NQMmZ4Psb7w", 571 | "eA16v1NStA0", 572 | "taj88J5RjbY" 573 | ] 574 | }, 575 | "MISIA_Everything": { 576 | "original": "aHIR33pOUv0", 577 | "pianos": [ 578 | "A_aXPfFWCRQ", 579 | "Q5r6Rc2ma7s", 580 | "oGBdDQQ8AaE", 581 | "rER5Q4ndqKI" 582 | ] 583 | }, 584 | "MISIA_忘れない日々": { 585 | "original": "7-xChqpitwQ", 586 | "pianos": [ 587 | "AB1jhVd_b1A", 588 | "hMOG26m8Tjg", 589 | "z9MDj2oin3A" 590 | ] 591 | }, 592 | "MISIA_逢いたくていま": { 593 | "original": "MYo5alIaUOk", 594 | "pianos": [ 595 | "CW83N6za2DQ", 596 | "NyzsOfBO0Hc", 597 | "nijb4SYvxTw", 598 | "unTZsAVSdLo" 599 | ] 600 | }, 601 | "MrChildren_365日": { 602 | "original": "43D_nBGfuGY", 603 | "pianos": [ 604 | "8CHhy_PauQw", 605 | "JKpmthfePDc", 606 | "g_SWeBfdOVY" 607 | ] 608 | }, 609 | "MrChildren_HANABI": { 610 | "original": "EXxaBXKjl6Q", 611 | "pianos": [ 612 | "Sne7dJVr4MQ", 613 | "bNXuL7l_s2k", 614 | "fWUbILyVJ5s", 615 | "pkIUOXRszXg" 616 | ] 617 | }, 618 | "MrChildren_HERO": { 619 | "original": "ofYTAoS4LzE", 620 | "pianos": [ 621 | "-I5Y-8x7jbU", 622 | "5FtxZPHLuVc", 623 | "JVdAIMylTyE" 624 | ] 625 | }, 626 | "MrChildren_Sign": { 627 | "original": "4wuWJ7GAc2s", 628 | "pianos": [ 629 | "Flddh4JoyOc", 630 | "Hvf40zg5Fm0", 631 | "Pt4GfbIJSKM", 632 | "WEGq07yr5dY", 633 | "yS_jZd3vjhY", 634 | "z5abSAwyhNM" 635 | ] 636 | }, 637 | "MrChildren_Tomorrow never knows": { 638 | "original": "Nxwt_s1lM04", 639 | "pianos": [ 640 | "0dbsoE19RyA", 641 | "cHMSe-EtPCM", 642 | "vClqPJ0pmJE", 643 | "wOoJHrQTHic" 644 | ] 645 | }, 646 | "MrChildren_しるし": { 647 | "original": "xXA5StMti8c", 648 | "pianos": [ 649 | "D_izAYhavuo", 650 | "Dh4raypnn-A", 651 | "M8VUDjHPzng", 652 | "SzXH2U1SCT4", 653 | "ZAywww3gTtw", 654 | "guW76dREz5Y" 655 | ] 656 | }, 657 | "MrChildren_エソラ": { 658 | "original": "sQtlTOLyb_w", 659 | "pianos": [ 660 | "31xlUzWmZc4", 661 | "HIhBKmHO54I" 662 | ] 663 | }, 664 | "MrChildren_名もなき詩": { 665 | "original": "gj5Nu6feFTQ", 666 | "pianos": [ 667 | "20hmR8y_p88", 668 | "Hq_fCbz7pRE", 669 | "ZWybp1OhCYY", 670 | "rr5wx82M-LU", 671 | "tHaOQwyu8Ng" 672 | ] 673 | }, 674 | "MrChildren_終わりなき旅": { 675 | "original": "QCiERL2m3Ss", 676 | "pianos": [ 677 | "aNsqWeCt9S4", 678 | "eOxR14Ma2GI", 679 | "lGQkDg3khAU" 680 | ] 681 | }, 682 | "Mrs GREEN APPLE_Soranji": { 683 | "original": "44cICMd3jW4", 684 | "pianos": [ 685 | "1O0Zz7XYcVc", 686 | "CnbBfc2Tl8I", 687 | "DHAeiFIz6ao", 688 | "DNZt2Tos0Ww", 689 | "Zo5QjcdIBhQ", 690 | "edHgNwNZkMQ" 691 | ] 692 | }, 693 | "Mrs GREEN APPLE_Speaking": { 694 | "original": "4KUA-1DvQZk", 695 | "pianos": [ 696 | "ky9dCJPIrhQ", 697 | "rGQpESVW1ss" 698 | ] 699 | }, 700 | "Mrs GREEN APPLE_StaRt": { 701 | "original": "OTUtF7ZxRN8", 702 | "pianos": [ 703 | "2jzVTn1_jX0", 704 | "40DEfEjh9OM", 705 | "X_MgIHhphVI", 706 | "dB4-uvoQ85s", 707 | "oHAhtntbW00", 708 | "tMq57v-J8Eo" 709 | ] 710 | }, 711 | "Mrs GREEN APPLE_WanteD! WanteD!": { 712 | "original": "PbISczErpKY", 713 | "pianos": [ 714 | "EeB6KrEIb08", 715 | "oUgjY7g7SSE" 716 | ] 717 | }, 718 | "Mrs GREEN APPLE_インフェルノ": { 719 | "original": "wfCcs0vLysk", 720 | "pianos": [ 721 | "3tcbbtPqJJQ", 722 | "AnU-IxidSgY", 723 | "KmA1Qoxd1oE", 724 | "LhWQHxaRjl0", 725 | "PCyynFl4E9c", 726 | "aGs3DTs0AGY", 727 | "vRdbHvR6xWg", 728 | "xvAjxhqZGmA" 729 | ] 730 | }, 731 | "Mrs GREEN APPLE_ケセラセラ": { 732 | "original": "Jy-QS27q7lA", 733 | "pianos": [ 734 | "3jf8RCDoinE", 735 | "AocJUtWH6qw", 736 | "L4LC1AIwQAI", 737 | "SaTs13UrsHw", 738 | "bbyNocEEEA4", 739 | "bwyH_okERLw", 740 | "nnyl8W2ABtM" 741 | ] 742 | }, 743 | "Mrs GREEN APPLE_ダンスホール": { 744 | "original": "x2rvSf0STBM", 745 | "pianos": [ 746 | "72UixYyR79s", 747 | "Od1cP-DhcIA", 748 | "Vnuqzjg2v30", 749 | "XPmv-5gl8iU", 750 | "_k8eoC3DlY8", 751 | "gX3HYr_PZR4", 752 | "hgsElXPMR6Y", 753 | "qbYA5lx17EY", 754 | "u1l18y4e8CU" 755 | ] 756 | }, 757 | "Mrs GREEN APPLE_僕のこと": { 758 | "original": "xefpHEg5UIA", 759 | "pianos": [ 760 | "CAWjqNWe_5U", 761 | "CZ9u0lxi8VA", 762 | "Tn9FjLCdYss", 763 | "fnGwOm2GewA", 764 | "lZEWaFzUHoI" 765 | ] 766 | }, 767 | "Mrs GREEN APPLE_点描の唄(feat井上苑子)": { 768 | "original": "sL-yJIyuEaM", 769 | "pianos": [ 770 | "C15TdPoUucw", 771 | "EKqS3JUGUyg", 772 | "T0b9AD5HLo4", 773 | "aqBFtF38ehY", 774 | "oc-t0J3usvk", 775 | "u0oPPQj-fNQ" 776 | ] 777 | }, 778 | "Mrs GREEN APPLE_青と夏": { 779 | "original": "m34DPnRUfMU", 780 | "pianos": [ 781 | "ASV_6rRd0WA", 782 | "ID9mpoXLGfk", 783 | "JfJIM69p2EE", 784 | "QVokGi7xCZc", 785 | "R6D3ehOoWOI", 786 | "fDxA73spy2U", 787 | "tjAX1alDs6o" 788 | ] 789 | }, 790 | "ONE OK ROCK_Re:make": { 791 | "original": "bV4vcr8E4HU", 792 | "pianos": [ 793 | "S0TRharvci0", 794 | "SKirO0i8pbM", 795 | "aHrkozroBpc" 796 | ] 797 | }, 798 | "ONE OK ROCK_The Beginning": { 799 | "original": "Hh9yZWeTmVM", 800 | "pianos": [ 801 | "K4cuWmtVF18", 802 | "agOm6o02Smc", 803 | "hhsXnf2R7A8" 804 | ] 805 | }, 806 | "ONE OK ROCK_Wasted Nights": { 807 | "original": "b4YLo74OWfY", 808 | "pianos": [ 809 | "CyN5oxj-5a0", 810 | "ETzfhnImoHs", 811 | "sxGYEpvbwyQ" 812 | ] 813 | }, 814 | "ONE OK ROCK_じぶんROCK": { 815 | "original": "kTkXe60AP8M", 816 | "pianos": [ 817 | "BQ-z2JRo5nw", 818 | "x6b_cV55KSU" 819 | ] 820 | }, 821 | "ONE OK ROCK_完全感覚Dreamer": { 822 | "original": "xGbxsiBZGPI", 823 | "pianos": [ 824 | "T4AUFnx0dQw", 825 | "h8gIIRDYrxM", 826 | "onI4d6Z4WTw" 827 | ] 828 | }, 829 | "ORANGE RANGE_イケナイ太陽": { 830 | "original": "UzeKDH29TW0", 831 | "pianos": [ 832 | "5LHHqsEAwBY", 833 | "6uHenJOKT9o" 834 | ] 835 | }, 836 | "ORANGE RANGE_花": { 837 | "original": "iYkg90hDInc", 838 | "pianos": [ 839 | "MIbY5ad8z3s", 840 | "PmJiSsG55ug", 841 | "QEnSvMt2HsI", 842 | "SyeA3MQYapk" 843 | ] 844 | }, 845 | "Official髭男dism_115万キロのフィルム": { 846 | "original": "ReCnlwVZj1M", 847 | "pianos": [ 848 | "TsN3HM7dz00", 849 | "q7QfutySdBA", 850 | "uIRplIh0XP8" 851 | ] 852 | }, 853 | "Official髭男dism_Cry Baby": { 854 | "original": "O1bhZgkC4Gw", 855 | "pianos": [ 856 | "KSbjc2E68yo", 857 | "P8uWvzMWF4A", 858 | "TlahSNyYOgg", 859 | "qwXuXFGTg6A", 860 | "wvgkChFFmpU" 861 | ] 862 | }, 863 | "Official髭男dism_I LOVE": { 864 | "original": "bt8wNQJaKAk", 865 | "pianos": [ 866 | "LbkxaLX5SfQ", 867 | "oWqE6BBx1QI" 868 | ] 869 | }, 870 | "Official髭男dism_Pretender": { 871 | "original": "TQ8WlA2GXbk", 872 | "pianos": [ 873 | "3Ah0xXJMHJw", 874 | "Gsq6z7HReYc", 875 | "Ww3Qwk2hbzU" 876 | ] 877 | }, 878 | "Official髭男dism_Subtitle": { 879 | "original": "hN5MBlGv2Ac", 880 | "pianos": [ 881 | "5vVMgXdThC0", 882 | "D8gKsfFWEPw", 883 | "hduPzGTJoyI", 884 | "i8FgDFy9QMM" 885 | ] 886 | }, 887 | "Official髭男dism_イエスタデイ": { 888 | "original": "DuMqFknYHBs", 889 | "pianos": [ 890 | "MXCP-Q9bmJU", 891 | "WARfLQLXigw", 892 | "WC2AVe2_FV0", 893 | "Z6tX-B4rAdM", 894 | "ilFIERmw9Xc" 895 | ] 896 | }, 897 | "Official髭男dism_ノーダウト": { 898 | "original": "EHw005ZqCXk", 899 | "pianos": [ 900 | "0M1SFJE2kX4", 901 | "lgHztnl8bdM" 902 | ] 903 | }, 904 | "Official髭男dism_ミックスナッツ": { 905 | "original": "CbH2F0kXgTY", 906 | "pianos": [ 907 | "81Ut3d4il-o", 908 | "Q-ORmbDQWFg", 909 | "fZaKtTX8xxk", 910 | "y84tjSkVY2w" 911 | ] 912 | }, 913 | "Official髭男dism_宿命": { 914 | "original": "-kgOFJG881I", 915 | "pianos": [ 916 | "6_jY5IRbYt8", 917 | "Bk0qpLBn6Kg", 918 | "qNBJIsI2CxQ" 919 | ] 920 | }, 921 | "Perfume_FLASH": { 922 | "original": "q6T0wOMsNrI", 923 | "pianos": [ 924 | "0AUgRinkysY", 925 | "2-V6PLis6ic", 926 | "SuCuWr1SQ6s", 927 | "uIKHv3o4i4g" 928 | ] 929 | }, 930 | "Perfume_TOKYO GIRL": { 931 | "original": "vxl4gsvgEQY", 932 | "pianos": [ 933 | "6gWJKMWIqTY", 934 | "kILTxJhV-r0", 935 | "knj-LtVk3SU", 936 | "oEpZEsH2S40", 937 | "zSagpafI1ks" 938 | ] 939 | }, 940 | "Perfume_ポリリズム": { 941 | "original": "KbiSxunJatM", 942 | "pianos": [ 943 | "395x6kRb0A8" 944 | ] 945 | }, 946 | "RADWIMPS_おしゃかしゃま": { 947 | "original": "7MaF-bWeLGw", 948 | "pianos": [ 949 | "Qk8vBnM52v4", 950 | "VW2pph9pvGg", 951 | "bRMg6IUwRCQ" 952 | ] 953 | }, 954 | "RADWIMPS_前前前世 (movie ver)": { 955 | "original": "PDSkFeMVNFs", 956 | "pianos": [ 957 | "5jnd6Er9YYg", 958 | "_w0Hx5I5AdA", 959 | "eIeGUwj2C6U", 960 | "irCzStp5IHY" 961 | ] 962 | }, 963 | "RADWIMPS_夢灯籠": { 964 | "original": "S6kjwLlKXnk", 965 | "pianos": [ 966 | "8_gMR7Tkesg", 967 | "HzGU3xZLYHE", 968 | "Yoa45OdIpmU", 969 | "u8XkoXzOD4E" 970 | ] 971 | }, 972 | "RADWIMPS_有心論": { 973 | "original": "c2y8Ba3WwPY", 974 | "pianos": [ 975 | "-lOsx0lblvo", 976 | "QljF_v4gWFY", 977 | "RUuoqX5FZmQ" 978 | ] 979 | }, 980 | "SEKAI NO OWARI_Dragon Night": { 981 | "original": "gsVGf1T2Hfs", 982 | "pianos": [ 983 | "FsFSeit95Mw", 984 | "nB5GqBymLNE" 985 | ] 986 | }, 987 | "SEKAI NO OWARI_Habit": { 988 | "original": "8OZDgBmehbA", 989 | "pianos": [ 990 | "PtOsy3JyGog", 991 | "i9VXkUSS6Ak", 992 | "nsrX6QDkM-o" 993 | ] 994 | }, 995 | "SEKAI NO OWARI_RPG": { 996 | "original": "Mi9uNu35Gmk", 997 | "pianos": [ 998 | "-8PykZU0G1c", 999 | "6jbdf9XJhTA", 1000 | "QcjPdH7siVE", 1001 | "_0Z9J5VsdU0" 1002 | ] 1003 | }, 1004 | "SEKAI NO OWARI_不死鳥": { 1005 | "original": "YwYRzpMhwws", 1006 | "pianos": [ 1007 | "QXKpSk8k-Yo", 1008 | "negZ5Wq3IOg", 1009 | "yTQVmJp7dZQ" 1010 | ] 1011 | }, 1012 | "SEKAI NO OWARI_天使と悪魔": { 1013 | "original": "hVFY4Yvv3ho", 1014 | "pianos": [ 1015 | "QUw7F7YxacQ", 1016 | "VPLF-H_e7p0", 1017 | "Xda4uMbYSd8", 1018 | "oq6bI2zAeqA" 1019 | ] 1020 | }, 1021 | "SEKAI NO OWARI_炎と森のカーニバル": { 1022 | "original": "w9V3x61D994", 1023 | "pianos": [ 1024 | "M_zcbdI3Wag", 1025 | "RZsKGnGF5lI", 1026 | "msH8gcSXZdM" 1027 | ] 1028 | }, 1029 | "SEKAI NO OWARI_眠り姫": { 1030 | "original": "45Ft2gmvgPg", 1031 | "pianos": [ 1032 | "M3sXQsHCxt8", 1033 | "QnErsILfF0I", 1034 | "fraHEydt-uo", 1035 | "wcFzDhXS59g" 1036 | ] 1037 | }, 1038 | "SEKAI NO OWARI_花鳥風月": { 1039 | "original": "ZyLCgAcoRsY", 1040 | "pianos": [ 1041 | "2tFIvHtBnUM", 1042 | "GUe8CL196cg", 1043 | "QjTM3-BIZuY", 1044 | "YBxlEG8Neag" 1045 | ] 1046 | }, 1047 | "Superfly_Beautiful": { 1048 | "original": "tfeSwQ-iU0U", 1049 | "pianos": [ 1050 | "27yLQGBot64", 1051 | "LLZV839yeLU", 1052 | "RF3N4bQMt20", 1053 | "XyMCNvY6F8Y", 1054 | "cxEcUzTEn5k", 1055 | "eiriIzC2Zyc" 1056 | ] 1057 | }, 1058 | "Superfly_Bi-Li-Li Emotion": { 1059 | "original": "HIEdkeWZwQM", 1060 | "pianos": [ 1061 | "AisyKKg4JCw", 1062 | "s4hYU7fuMgQ" 1063 | ] 1064 | }, 1065 | "Superfly_フレア": { 1066 | "original": "0vJrBbcsfac", 1067 | "pianos": [ 1068 | "DkGbgK6ObAQ", 1069 | "EQgynPEYvwQ", 1070 | "TVAnqElFPxI", 1071 | "axw2Ds1QaGk", 1072 | "dfqOZNqGpog", 1073 | "oNmMTz0fTDg", 1074 | "yzzfiSDWD8Q" 1075 | ] 1076 | }, 1077 | "Superfly_輝く月のように": { 1078 | "original": "gG7evVU0OdA", 1079 | "pianos": [ 1080 | "RvtY-gaEC1I", 1081 | "p3jzGio9whY" 1082 | ] 1083 | }, 1084 | "TUBE_あー夏休み": { 1085 | "original": "hXDkGFPJ9Ns", 1086 | "pianos": [ 1087 | "SlUHhOIEMpo", 1088 | "ahSD6qXpni4" 1089 | ] 1090 | }, 1091 | "TUBE_シーズン・イン・ザ・サン": { 1092 | "original": "jdBWcHTGo9U", 1093 | "pianos": [ 1094 | "1YuO2qqnE0Y", 1095 | "RYyAtLPo-s8", 1096 | "WxLQcDG4pGA" 1097 | ] 1098 | }, 1099 | "TUBE_君となら": { 1100 | "original": "rB3qzyC27Y8", 1101 | "pianos": [ 1102 | "WfTooZyhSdM", 1103 | "boI0JjQBWX4", 1104 | "h9xseBDlPNk", 1105 | "lBUquwqGoQg" 1106 | ] 1107 | }, 1108 | "TUBE_夏を待ちきれなくて": { 1109 | "original": "6epn2zs7uSg", 1110 | "pianos": [ 1111 | "VDQGqO5M1V0" 1112 | ] 1113 | }, 1114 | "TUBE_夏を抱きしめて": { 1115 | "original": "_t45ZxZRGYQ", 1116 | "pianos": [ 1117 | "F09FfJC0bRc", 1118 | "VpIZFiZArgU", 1119 | "_eqpZfR-0kg", 1120 | "jCtjkt_EzkI" 1121 | ] 1122 | }, 1123 | "UNISON SQUARE GARDEN_シュガーソングとビターステップ": { 1124 | "original": "ERLEeGVWYxg", 1125 | "pianos": [ 1126 | "2yo5FHH2KnM", 1127 | "CrKR78rBxPg", 1128 | "TYhAV5jz8Y4", 1129 | "TZ_8CBahXAA", 1130 | "jARQlgnTaKY" 1131 | ] 1132 | }, 1133 | "UVERworld_7th Trigger": { 1134 | "original": "FAU7NyiqwQY", 1135 | "pianos": [ 1136 | "YAn0BboBWe0" 1137 | ] 1138 | }, 1139 | "UVERworld_儚くも永久のカナシ": { 1140 | "original": "ZIEQDjrAdwE", 1141 | "pianos": [ 1142 | "KzA8rGttbro", 1143 | "MuXdJPy8bSs" 1144 | ] 1145 | }, 1146 | "V6_Feel your breeze": { 1147 | "original": "zbB9sg6pErg", 1148 | "pianos": [ 1149 | "02NiKQOI6I4", 1150 | "PghNnfK81mA", 1151 | "i9OMFdMMSNc" 1152 | ] 1153 | }, 1154 | "YOASOBI_あの夢をなぞって": { 1155 | "original": "sAuEeM_6zpk", 1156 | "pianos": [ 1157 | "29CGsHBpb5I", 1158 | "5bpDTXXORng", 1159 | "DY2xZIg4wjY", 1160 | "FubdoQHArXM", 1161 | "Ksj0ooMFYT8", 1162 | "X2dE7s3ehXw", 1163 | "_C2tZ5oZELA", 1164 | "cf0uxV5n3II", 1165 | "ka6HfI0NX6Q", 1166 | "l_u8YyYq7mo", 1167 | "oiUZxbEXhVE" 1168 | ] 1169 | }, 1170 | "YOASOBI_アイドル": { 1171 | "original": "ZRtdQ81jPUQ", 1172 | "pianos": [ 1173 | "4HggbTkjeQw", 1174 | "C4r883BWuE0", 1175 | "IDQj_PDYSd4", 1176 | "J7PzKoF-b94", 1177 | "S7k8eoUrXwE", 1178 | "V5BeBRjuLkU", 1179 | "lzSfnMp_DTs", 1180 | "x97GFCiIlAg" 1181 | ] 1182 | }, 1183 | "YOASOBI_ハルカ": { 1184 | "original": "vd3IlOjSUGQ", 1185 | "pianos": [ 1186 | "5OTfm4lYfpE", 1187 | "9mwIOxbThRg", 1188 | "CKZ4qVJATj4", 1189 | "TkzApUJDAyg", 1190 | "ZORMjL_UWeY", 1191 | "jXNODjCEtco" 1192 | ] 1193 | }, 1194 | "YOASOBI_ハルジオン": { 1195 | "original": "kzdJkT4kp-A", 1196 | "pianos": [ 1197 | "AUI9g67TkVk", 1198 | "K7uNkzR6-pM", 1199 | "R_AvnlNEzto", 1200 | "RgI_0gBVvv8", 1201 | "XFEgigNIfKg", 1202 | "_TuC-eXgkX4", 1203 | "s-IGy_tS95k", 1204 | "vrIzWEI5v_Y" 1205 | ] 1206 | }, 1207 | "YOASOBI_三原色": { 1208 | "original": "nhOhFOoURnE", 1209 | "pianos": [ 1210 | "5cCg8tJjJXc", 1211 | "FhynF6cEpS0", 1212 | "H7MDLFXge6s", 1213 | "L51rhYzXLUk", 1214 | "MZaQ8mxcgO4", 1215 | "PlNc_ob687g", 1216 | "XYgGKhwKe_g", 1217 | "fRaHIUnPCgA" 1218 | ] 1219 | }, 1220 | "YOASOBI_勇者": { 1221 | "original": "OIBODIPC_8Y", 1222 | "pianos": [ 1223 | "0GAMxN0caNw", 1224 | "5wfUkAJtB0A", 1225 | "CZMfzl27ZWA", 1226 | "E-ujnX-rnBo", 1227 | "i1XDAxS8qeQ", 1228 | "l3ufvSwc3SY", 1229 | "rFNhmkKbh3U", 1230 | "sru72Wk20Y0", 1231 | "yGaybQmxOvs" 1232 | ] 1233 | }, 1234 | "YOASOBI_夜に駆ける": { 1235 | "original": "x8VYWazR5mE", 1236 | "pianos": [ 1237 | "8-_c0HDZI6E", 1238 | "Bjk6jb-Nnm8", 1239 | "LEsUnmMDFgk", 1240 | "_K2Ux_8fWiw", 1241 | "dx3ghXUvVis", 1242 | "qSD0hZaeTbQ", 1243 | "vmbw2ps3roc" 1244 | ] 1245 | }, 1246 | "YOASOBI_怪物": { 1247 | "original": "dy90tA3TT1c", 1248 | "pianos": [ 1249 | "-noJ0yrujfs", 1250 | "D6NVzaydXCI", 1251 | "F1aHep240eg", 1252 | "FBcM7_-XJk8", 1253 | "wD3MlqaLDZM", 1254 | "wy9qitqQVts", 1255 | "xrug1HwhGN0" 1256 | ] 1257 | }, 1258 | "YOASOBI_祝福": { 1259 | "original": "3eytpBOkOFA", 1260 | "pianos": [ 1261 | "32hgHAQNSSQ", 1262 | "EdV-EDk69y0", 1263 | "FFplrpwOf5s", 1264 | "GWTM-ChkEy0", 1265 | "KoD-eO3O9k0", 1266 | "i34iOMyKZnQ", 1267 | "l4iipa6DqUU", 1268 | "nBcScW-L8u0" 1269 | ] 1270 | }, 1271 | "YOASOBI_群青": { 1272 | "original": "Y4nEEZwckuU", 1273 | "pianos": [ 1274 | "35wgyC0rmeQ", 1275 | "CBt5jUphTyE", 1276 | "HDWeB_2fYa4", 1277 | "hRsrxYlFBa8", 1278 | "mIGy10keGwI", 1279 | "npFyWejtmEM" 1280 | ] 1281 | }, 1282 | "YUI_GLORIA": { 1283 | "original": "7zaL1bPmbhE", 1284 | "pianos": [ 1285 | "BigMn6dudNE", 1286 | "OPbaRn7pqqo", 1287 | "pRIffEK3thg" 1288 | ] 1289 | }, 1290 | "YUI_Namidairo": { 1291 | "original": "sZu5c_QYW50", 1292 | "pianos": [ 1293 | "PIO0NIvg2IM", 1294 | "tmSYqzbLxaY" 1295 | ] 1296 | }, 1297 | "YUI_again": { 1298 | "original": "w5OUAY1j3gQ", 1299 | "pianos": [ 1300 | "CFawH7V8aN4", 1301 | "Kt_JePg86b8", 1302 | "MK1_5jULtvk", 1303 | "O9cTZ7koWU0", 1304 | "n5youKptrxY" 1305 | ] 1306 | }, 1307 | "[Alexandros]_ワタリドリ": { 1308 | "original": "O_DLtVuiqhI", 1309 | "pianos": [ 1310 | "C0jkZrq_Yms", 1311 | "elqG6RjiEtc", 1312 | "ndae4Yi_MvA" 1313 | ] 1314 | }, 1315 | "[Alexandros]_閃光": { 1316 | "original": "xfG6L9I7N8I", 1317 | "pianos": [ 1318 | "D5aT3iiPgJk", 1319 | "MJFO-iAMpDk", 1320 | "gpxxpF10yD4", 1321 | "lXLWfKpqjL4", 1322 | "t-1EkjaVDQQ" 1323 | ] 1324 | }, 1325 | "aiko_KissHug": { 1326 | "original": "r8yTsDQCfR0", 1327 | "pianos": [ 1328 | "emYXWGe4uzc", 1329 | "jVwgkJTpE2Q" 1330 | ] 1331 | }, 1332 | "aiko_カブトムシ": { 1333 | "original": "wp2U40KI63A", 1334 | "pianos": [ 1335 | "fckbKSQMo9Y", 1336 | "kiGCWVfFJM4", 1337 | "lNMlL19gWdQ", 1338 | "pYANxeG6-W4" 1339 | ] 1340 | }, 1341 | "aiko_キラキラ": { 1342 | "original": "S0bXDRY1DGM", 1343 | "pianos": [ 1344 | "ToSGjCeEsWU", 1345 | "flskQTwnnWE", 1346 | "gkYPD3oW2ok" 1347 | ] 1348 | }, 1349 | "aiko_花火": { 1350 | "original": "iqGHxcTPRfI", 1351 | "pianos": [ 1352 | "2oBs2j2sQZY", 1353 | "h2skntwL0io", 1354 | "n7CKujgdTcg", 1355 | "pueU2rwh_bg" 1356 | ] 1357 | }, 1358 | "back number_HAPPY BIRTHDAY": { 1359 | "original": "IsFs06cw-gY", 1360 | "pianos": [ 1361 | "EK1WeX9qn6E", 1362 | "EiTzrUgQaqg", 1363 | "dH1CQmvrjpo", 1364 | "ghbh_g0rMjk", 1365 | "h8AmNH5uE_s", 1366 | "rbRN1hJ2TNg" 1367 | ] 1368 | }, 1369 | "back number_SISTER": { 1370 | "original": "WOHXFNzfehs", 1371 | "pianos": [ 1372 | "F-QOosx4PuQ", 1373 | "hGV9BF_0FRA", 1374 | "xIpqizLWPjA" 1375 | ] 1376 | }, 1377 | "back number_わたがし": { 1378 | "original": "uy_BaRBJIzQ", 1379 | "pianos": [ 1380 | "JZHu5S-13WM", 1381 | "clTj6MZUD7Q", 1382 | "ny3V3IoOFRc" 1383 | ] 1384 | }, 1385 | "back number_クリスマスソング": { 1386 | "original": "7zBeQezaz4U", 1387 | "pianos": [ 1388 | "AEwt927ucvQ", 1389 | "Kl0-LmsonP0", 1390 | "S_vFsC0Kolw", 1391 | "lnaYqSzP4dc", 1392 | "o2AH4vitzPo", 1393 | "sf5GnArFGIU", 1394 | "vPkQozE9fxI" 1395 | ] 1396 | }, 1397 | "back number_ハッピーエンド": { 1398 | "original": "T8y_RsF4TSw", 1399 | "pianos": [ 1400 | "ALkimpc63sY", 1401 | "UeMW9ky-zOg", 1402 | "cQdoFZ-OsFs", 1403 | "xbA0nkGWGjg" 1404 | ] 1405 | }, 1406 | "back number_ヒロイン": { 1407 | "original": "VPZK72W4Xxw", 1408 | "pianos": [ 1409 | "MC2fRzZzbE0", 1410 | "UrlJ4E1ng_U", 1411 | "tOh1mftSIpY" 1412 | ] 1413 | }, 1414 | "back number_瞬き": { 1415 | "original": "h-KuoHHjGRs", 1416 | "pianos": [ 1417 | "9JntRfrPBOU", 1418 | "J0VNAGbu8g4", 1419 | "LXxj5ewyTY0", 1420 | "p22pAVuC_14" 1421 | ] 1422 | }, 1423 | "back number_花束": { 1424 | "original": "meZPD28Y7xE", 1425 | "pianos": [ 1426 | "GzGhIn7RBt4", 1427 | "teMazAHeeq4", 1428 | "yVEKhV8d8eo" 1429 | ] 1430 | }, 1431 | "back number_高嶺の花子さん": { 1432 | "original": "SII-S-zCg-c", 1433 | "pianos": [ 1434 | "IywKVDhA0f8", 1435 | "bkecytOl9Sg", 1436 | "lXU8BoUdQoc", 1437 | "nucf6-sWxHE" 1438 | ] 1439 | }, 1440 | "flumpool_Over the rain~ひかりの橋~": { 1441 | "original": "5jII3WMOZCE", 1442 | "pianos": [ 1443 | "LNRyquuZb2I", 1444 | "c-9PJxnSnSs" 1445 | ] 1446 | }, 1447 | "miwa_ミラクル": { 1448 | "original": "HlDo8qEPbNg", 1449 | "pianos": [ 1450 | "3mTuVY6RMz4", 1451 | "BCxMt97owUY" 1452 | ] 1453 | }, 1454 | "あいみょん_ふたりの世界": { 1455 | "original": "WsEOnichYgI", 1456 | "pianos": [ 1457 | "4UGbU9Ri5qA", 1458 | "gBa9RRkYeiU" 1459 | ] 1460 | }, 1461 | "あいみょん_ハルノヒ": { 1462 | "original": "pfGI91CFtRg", 1463 | "pianos": [ 1464 | "DBGaAUrK0rQ", 1465 | "R2UM-Qo_8Nw", 1466 | "exsAr4-CVbw" 1467 | ] 1468 | }, 1469 | "あいみょん_マリーゴールド": { 1470 | "original": "0xSiBpUdW4E", 1471 | "pianos": [ 1472 | "Fb4y6C36AJ4", 1473 | "OG5FECVlsrA", 1474 | "W7eoLSGvq8s", 1475 | "gIluCoYnN8Q" 1476 | ] 1477 | }, 1478 | "あいみょん_今夜このまま": { 1479 | "original": "mH6LoI63buY", 1480 | "pianos": [ 1481 | "29GLkfvfhGg", 1482 | "lMEB9bK42Vs" 1483 | ] 1484 | }, 1485 | "あいみょん_君はロックを聴かない": { 1486 | "original": "ARwVe1MYAUA", 1487 | "pianos": [ 1488 | "-gi-qVZNmxE", 1489 | "JG9pfa2rDgM", 1490 | "Tz_qIlUf7zc", 1491 | "bIYXTcjyFbE" 1492 | ] 1493 | }, 1494 | "あいみょん_愛を伝えたいだとか": { 1495 | "original": "9qRCARM_LfE", 1496 | "pianos": [ 1497 | "_WCENT8CGgw" 1498 | ] 1499 | }, 1500 | "あいみょん_生きていたんだよな": { 1501 | "original": "EEMwA8KZAqg", 1502 | "pianos": [ 1503 | "-m5Hu0cSp_U", 1504 | "_0nlLDFHZRE", 1505 | "jTZAvKIqERI", 1506 | "rZLiE1nz__U", 1507 | "zplR4I6OOf4" 1508 | ] 1509 | }, 1510 | "あいみょん_空の青さを知る人よ": { 1511 | "original": "ztdpBUDf00o", 1512 | "pianos": [ 1513 | "hQ2LZelx3I4", 1514 | "sqPsL9We1WI" 1515 | ] 1516 | }, 1517 | "あいみょん_裸の心": { 1518 | "original": "yOAwvRmVIyo", 1519 | "pianos": [ 1520 | "FQSqiSyuH4k", 1521 | "QOffvF8SDu0", 1522 | "X4ZwfE4KwAI", 1523 | "dJIZCq0Em6A", 1524 | "mGAzW5C726w", 1525 | "u_Lw4mDAR6k" 1526 | ] 1527 | }, 1528 | "あいみょん_貴方解剖純愛歌 ~死ね~": { 1529 | "original": "IL35V9wYr-U", 1530 | "pianos": [ 1531 | "QsCaif0aeZ0", 1532 | "mKXGiSFj32o", 1533 | "npIpBHQR6JA" 1534 | ] 1535 | }, 1536 | "いきものがかり_SAKURA": { 1537 | "original": "61z-cqg28R8", 1538 | "pianos": [ 1539 | "_ToMRNWqEqY", 1540 | "hPoxDkvsRwQ", 1541 | "nmPy1Lagcpw", 1542 | "oytBS3GtWAU", 1543 | "zZQfL0LO-DU" 1544 | ] 1545 | }, 1546 | "いきものがかり_YELL": { 1547 | "original": "lz8frtP6_kk", 1548 | "pianos": [ 1549 | "6_pI-N2uJeI", 1550 | "UIBBeKywFwM", 1551 | "ki-d8E96FGE", 1552 | "uYbsJKGuM2k" 1553 | ] 1554 | }, 1555 | "いきものがかり_ありがとう": { 1556 | "original": "VZBU8LvZ91Q", 1557 | "pianos": [ 1558 | "2N91TglfJRY", 1559 | "WP57dpe62lE", 1560 | "uTubjWEhkiY", 1561 | "y6vkloEw5G4" 1562 | ] 1563 | }, 1564 | "いきものがかり_ブルーバード": { 1565 | "original": "KpsJWFuVTdI", 1566 | "pianos": [ 1567 | "FTrJ-J_lsr0", 1568 | "RqwC7R2tbG8", 1569 | "geEW2r1GW1c", 1570 | "yrkk92VY-N4" 1571 | ] 1572 | }, 1573 | "いきものがかり_帰りたくなったよ": { 1574 | "original": "NzIcRQBkMY8", 1575 | "pianos": [ 1576 | "3IQGcVoflfw", 1577 | "IT62IA62Xgk", 1578 | "eHbeJCo1yPA", 1579 | "jvNhYQ8q92E" 1580 | ] 1581 | }, 1582 | "いきものがかり_歩いていこう": { 1583 | "original": "XWE8XsRiMqQ", 1584 | "pianos": [ 1585 | "8GZvYnG4D0U", 1586 | "IGuWdu_ZdgU", 1587 | "jte51jYk4lk", 1588 | "pbCN2g1lV-E" 1589 | ] 1590 | }, 1591 | "いきものがかり_気まぐれロマンティック": { 1592 | "original": "5XCSt_0lwOE", 1593 | "pianos": [ 1594 | "sfu4fxnG55w", 1595 | "vPvYtQz0j6k" 1596 | ] 1597 | }, 1598 | "いきものがかり_笑顔": { 1599 | "original": "Z22loO5hY4A", 1600 | "pianos": [ 1601 | "P4ZqyfNW7c8", 1602 | "SfxJ5oXn-eI" 1603 | ] 1604 | }, 1605 | "さだまさし_いのちの理由": { 1606 | "original": "u6nqSPHPi1s", 1607 | "pianos": [ 1608 | "LSplmQESHuk", 1609 | "ahHNnJHrAe8" 1610 | ] 1611 | }, 1612 | "さだまさし_案山子": { 1613 | "original": "M8oBMVADanc", 1614 | "pianos": [ 1615 | "ONilPEUGlcg" 1616 | ] 1617 | }, 1618 | "さだまさし_道化師のソネット": { 1619 | "original": "nxkVWhQ6HeA", 1620 | "pianos": [ 1621 | "4_D1CBTxLzU", 1622 | "hnnpzJ08Rzs", 1623 | "qm9DBoLJurA" 1624 | ] 1625 | }, 1626 | "ゆず_友 ~旅立ちの時~": { 1627 | "original": "WgAhMfhdZ4c", 1628 | "pianos": [ 1629 | "RXTpn4wW-Z8", 1630 | "l8alka2F9jQ" 1631 | ] 1632 | }, 1633 | "ゆず_夏色": { 1634 | "original": "hhDzDL9Y2Lo", 1635 | "pianos": [ 1636 | "5-lheeORWnI", 1637 | "Eb2XjB7rnrA", 1638 | "I9oiZzEVnkA" 1639 | ] 1640 | }, 1641 | "ゆず_栄光の架橋": { 1642 | "original": "PRJoAPH0ZGo", 1643 | "pianos": [ 1644 | "KT9JQvXZJxs", 1645 | "QgZJy0CUhO8" 1646 | ] 1647 | }, 1648 | "ゆず_桜会": { 1649 | "original": "Tsl3covFGyQ", 1650 | "pianos": [ 1651 | "ZM3Kj5Njp9E", 1652 | "dRURlgT8njc" 1653 | ] 1654 | }, 1655 | "ケツメイシ_さらば涙": { 1656 | "original": "yJjazvJiGwM", 1657 | "pianos": [ 1658 | "UZNs1R5hU6o", 1659 | "lYXmwhhhTII" 1660 | ] 1661 | }, 1662 | "コブクロ_ここにしか咲かない花": { 1663 | "original": "GCx-zSwQNb4", 1664 | "pianos": [ 1665 | "93pJuQ9KBAg", 1666 | "Jxp7xT7fjLg" 1667 | ] 1668 | }, 1669 | "コブクロ_桜": { 1670 | "original": "Dsdxxrnz1AQ", 1671 | "pianos": [ 1672 | "626Nr-NHIdU", 1673 | "MVWcLulh7p8", 1674 | "UVJlXqZulmg" 1675 | ] 1676 | }, 1677 | "コブクロ_永遠にともに": { 1678 | "original": "4FK3W_E8YGo", 1679 | "pianos": [ 1680 | "cKMl0apjJyA", 1681 | "ttZ_W_09FDY" 1682 | ] 1683 | }, 1684 | "コブクロ_流星": { 1685 | "original": "NshFw-eUj4c", 1686 | "pianos": [ 1687 | "1zQ676pZODs", 1688 | "TL5SeJgq1rM" 1689 | ] 1690 | }, 1691 | "コブクロ_蕾": { 1692 | "original": "WPH1BLHKOJE", 1693 | "pianos": [ 1694 | "VxE-0f60BxU", 1695 | "X0G9msVMnnY" 1696 | ] 1697 | }, 1698 | "サザンオールスターズ_涙のキッス": { 1699 | "original": "fsgBd5ln7-U", 1700 | "pianos": [ 1701 | "-RVkrTb1bGg", 1702 | "F0p5zNVo484", 1703 | "nPw1XbwG5G4" 1704 | ] 1705 | }, 1706 | "シド_レイン": { 1707 | "original": "GB3DN7B4mx4", 1708 | "pianos": [ 1709 | "IQqQBDQtdHo", 1710 | "NZ5phNAwIBo", 1711 | "QeWJOVU6dGc", 1712 | "yxaJXVHlUF8" 1713 | ] 1714 | }, 1715 | "スキマスイッチ_Ah Yeah!!": { 1716 | "original": "HxTQF1bCyag", 1717 | "pianos": [ 1718 | "XmGDNFAH6C4", 1719 | "f6QAzE4Cgfw" 1720 | ] 1721 | }, 1722 | "スキマスイッチ_ボクノート": { 1723 | "original": "AeMRXJtg500", 1724 | "pianos": [ 1725 | "9-PaIvBMGa8", 1726 | "LSFgWsHmeLk", 1727 | "jldlH0ZmMyg" 1728 | ] 1729 | }, 1730 | "スキマスイッチ_ユリーカ": { 1731 | "original": "2B7jXfcqs18", 1732 | "pianos": [ 1733 | "7M_yczOTEew", 1734 | "K0AAf9qskJk" 1735 | ] 1736 | }, 1737 | "スキマスイッチ_全力少年": { 1738 | "original": "IvDTkTKi5pA", 1739 | "pianos": [ 1740 | "0bQAqYBRRm4", 1741 | "HUFHKWnLqWU", 1742 | "pLGH0sfLoMY" 1743 | ] 1744 | }, 1745 | "スキマスイッチ_奏(かなで)": { 1746 | "original": "J5Z7tIq7bco", 1747 | "pianos": [ 1748 | "NZPADQzxoZs", 1749 | "QKJe2aK1yEw" 1750 | ] 1751 | }, 1752 | "スピッツ_スカーレット": { 1753 | "original": "l1-D43s6D_A", 1754 | "pianos": [ 1755 | "OFZjXoFVAxQ", 1756 | "TBzr-et5U1k", 1757 | "YA9xmDB_kbU", 1758 | "uU8Z4eSPxjo" 1759 | ] 1760 | }, 1761 | "スピッツ_チェリー": { 1762 | "original": "Eze6-eHmtJg", 1763 | "pianos": [ 1764 | "GyONvdXW8u0", 1765 | "MY2-vrhFn9I", 1766 | "UnS-nTqKmSc", 1767 | "fKsy7aD5E78", 1768 | "qKf4nue4rV4", 1769 | "v3QbTekEjxA" 1770 | ] 1771 | }, 1772 | "スピッツ_ロビンソン": { 1773 | "original": "51CH3dPaWXc", 1774 | "pianos": [ 1775 | "L7YOrRvq8pY", 1776 | "SNO04Pn0sSc", 1777 | "g48uGMUDB5c", 1778 | "waS16_A6Eqs" 1779 | ] 1780 | }, 1781 | "スピッツ_優しいあの子": { 1782 | "original": "RkIOd78C82I", 1783 | "pianos": [ 1784 | "4wCFECx-GNQ", 1785 | "E7EeZec5eyw", 1786 | "WJFQo5EqjWU", 1787 | "ZdIg_w9TJe0", 1788 | "d7ooieSjbyk", 1789 | "sRIFuSWb2G8" 1790 | ] 1791 | }, 1792 | "スピッツ_君が思い出になる前に": { 1793 | "original": "LrwC2Xu2POs", 1794 | "pianos": [ 1795 | "PQiQQXZRkeE", 1796 | "TglacgZUyqw", 1797 | "Za3beA72v6s" 1798 | ] 1799 | }, 1800 | "スピッツ_春の歌": { 1801 | "original": "94uxNQqmknk", 1802 | "pianos": [ 1803 | "7l5WMkEo8Mw", 1804 | "fvCazQkgbSk", 1805 | "hsR0tsBnzlU", 1806 | "ubzV9uTF_Vs" 1807 | ] 1808 | }, 1809 | "スピッツ_楓": { 1810 | "original": "YapsFDcGe_s", 1811 | "pianos": [ 1812 | "DK8ORpYsZNE", 1813 | "KFVfItPFiuQ", 1814 | "UDwz2ZnNpWA", 1815 | "_EszMwuqqlo", 1816 | "_IAlBuY7HIc" 1817 | ] 1818 | }, 1819 | "スピッツ_空も飛べるはず": { 1820 | "original": "h-kQw4JqCHE", 1821 | "pianos": [ 1822 | "8_CicsG5oeM", 1823 | "Ay1ZFX-yOuY", 1824 | "InLnAg9Bx4A", 1825 | "N7eK7WlzohI" 1826 | ] 1827 | }, 1828 | "スピッツ_魔法のコトバ": { 1829 | "original": "gPTFyx2R46w", 1830 | "pianos": [ 1831 | "0uUqliCurac", 1832 | "720bKySILJk", 1833 | "AyRC37piFO4", 1834 | "G_foiy4d7r8", 1835 | "JAG0z5g9H0U", 1836 | "UWjqWKmiFUE", 1837 | "xdmdbOK5hbE" 1838 | ] 1839 | }, 1840 | "ナオト・インティライミ_いつかきっと": { 1841 | "original": "zSe-bcW7kwE", 1842 | "pianos": [ 1843 | "2IKNab3HJ-o", 1844 | "QxL957DWa2o", 1845 | "UBylkfsYBvU" 1846 | ] 1847 | }, 1848 | "ポルノグラフィティ_アポロ": { 1849 | "original": "q_8ulbgIF5w", 1850 | "pianos": [ 1851 | "5wUjmE0JYQE", 1852 | "iuFqqhvPqYA" 1853 | ] 1854 | }, 1855 | "ポルノグラフィティ_オー!リバル": { 1856 | "original": "0_Hns1hqBoA", 1857 | "pianos": [ 1858 | "cbIv2s4Mf68", 1859 | "gljztcf2v98", 1860 | "lit8nOGpNQQ", 1861 | "qMf_cUzU1YA" 1862 | ] 1863 | }, 1864 | "ポルノグラフィティ_メリッサ": { 1865 | "original": "DXo_xX_Hqro", 1866 | "pianos": [ 1867 | "5tmLuqjPo_I", 1868 | "9yeV5vYDcUg" 1869 | ] 1870 | }, 1871 | "ポルノグラフィティ_愛が呼ぶほうへ": { 1872 | "original": "0U7PvH4bZiQ", 1873 | "pianos": [ 1874 | "itciaNyxIOk", 1875 | "rCHTHFvjWNc" 1876 | ] 1877 | }, 1878 | "ヨルシカ_あの夏に咲け": { 1879 | "original": "yJRiv2gTZ0I", 1880 | "pianos": [ 1881 | "Np3bQYPzSX4", 1882 | "mljTHs7HoPg", 1883 | "mvc2lQHs3dk", 1884 | "vI9Rw_DRlDw" 1885 | ] 1886 | }, 1887 | "ヨルシカ_ただ君に晴れ": { 1888 | "original": "-VKIqrvVOpo", 1889 | "pianos": [ 1890 | "3J9sN0u6cos", 1891 | "5HkBF_9d8Vw", 1892 | "AXQiULYW0uY", 1893 | "LhQUvZfn9nA", 1894 | "Sr9hMg1DxJM", 1895 | "nMYK-fWyILM", 1896 | "rxD-5QACw5E" 1897 | ] 1898 | }, 1899 | "ヨルシカ_だから僕は音楽を辞めた": { 1900 | "original": "KTZ-y85Erus", 1901 | "pianos": [ 1902 | "AimJ-EWDK1I", 1903 | "J73Fhk5cTX0", 1904 | "TPvXv5TmIjE", 1905 | "ZKP3UU-jqrI", 1906 | "sM4cYnksY6Y" 1907 | ] 1908 | }, 1909 | "ヨルシカ_ヒッチコック": { 1910 | "original": "t7MBzMP4OzY", 1911 | "pianos": [ 1912 | "0sY5e1uC31k", 1913 | "Ox3IglSS9zQ", 1914 | "PurX3cMaj0Y", 1915 | "YZmyCevTRhE", 1916 | "saVuvcnw-aE" 1917 | ] 1918 | }, 1919 | "ヨルシカ_春泥棒": { 1920 | "original": "Sw1Flgub9s8", 1921 | "pianos": [ 1922 | "9XLSltIx7OM", 1923 | "LHGx4eXgD5s", 1924 | "LHyJSrIFQ3M", 1925 | "XWB3qHv7N6o", 1926 | "YJnLkuj2DI8", 1927 | "ZxJLRIXPiKk", 1928 | "pKBj0QwP5xk", 1929 | "s6XNluN2dkk" 1930 | ] 1931 | }, 1932 | "ヨルシカ_花に亡霊": { 1933 | "original": "9lVPAWLWtWc", 1934 | "pianos": [ 1935 | "65EMWkVJOKU", 1936 | "8RrMeTJAc9w", 1937 | "D-vjdktcgKY", 1938 | "kx0dkOJoqAg" 1939 | ] 1940 | }, 1941 | "ヨルシカ_藍二乗": { 1942 | "original": "4MoRLTAJY_0", 1943 | "pianos": [ 1944 | "6iqsNqd8mLs", 1945 | "9sfxO9kwJsU", 1946 | "ImeIp3RjStk", 1947 | "O0mQF5aCq54", 1948 | "cMGJCm1ImNI", 1949 | "nHxoX-LxlxE", 1950 | "x7zKvDlAfKY" 1951 | ] 1952 | }, 1953 | "ヨルシカ_言って。": { 1954 | "original": "F64yFFnZfkI", 1955 | "pianos": [ 1956 | "CZlSNLNPc4M", 1957 | "EkisvG986Bs" 1958 | ] 1959 | }, 1960 | "ヨルシカ_負け犬にアンコールはいらない": { 1961 | "original": "gCu7C4aIfmU", 1962 | "pianos": [ 1963 | "JgieETatXQs", 1964 | "kFMP9L04mXY" 1965 | ] 1966 | }, 1967 | "ヨルシカ_雨とカプチーノ": { 1968 | "original": "PWbRleMGagU", 1969 | "pianos": [ 1970 | "3QApoycu_R0", 1971 | "BfjjmyACmfI", 1972 | "WqgvyRekXn4", 1973 | "dwNZhHGGIgA", 1974 | "gAytVBsZxRo" 1975 | ] 1976 | }, 1977 | "三代目 J SOUL BROTHERS from EXILE TRIBE_COSMOS ~秋桜~": { 1978 | "original": "v1ynPdEqjO0", 1979 | "pianos": [ 1980 | "qGkpRzHR0nM" 1981 | ] 1982 | }, 1983 | "三代目 J SOUL BROTHERS from EXILE TRIBE_RYUSEI": { 1984 | "original": "4-Gw0TAM6-Q", 1985 | "pianos": [ 1986 | "82wvEvqmYZY", 1987 | "OMdo3wrIDq8", 1988 | "XJ7s3EU0D3c", 1989 | "pIG9PglJVsY" 1990 | ] 1991 | }, 1992 | "三代目 J SOUL BROTHERS from EXILE TRIBE_Unfair World": { 1993 | "original": "tWsPC2-nkAY", 1994 | "pianos": [ 1995 | "00QoJIz0GSs", 1996 | "PQ-Oa4CNj8M", 1997 | "WN018aKCV9k" 1998 | ] 1999 | }, 2000 | "三代目 J SOUL BROTHERS from EXILE TRIBE_starting over": { 2001 | "original": "TQZ1VPqvKyo", 2002 | "pianos": [ 2003 | "DrpOfla5l30", 2004 | "NEoTwyuSQgM" 2005 | ] 2006 | }, 2007 | "中島みゆき_わかれうた": { 2008 | "original": "NXJ_v4t9Mk0", 2009 | "pianos": [ 2010 | "-MkXcUm49M8", 2011 | "aERLpLykOqM", 2012 | "gJYfFrK3ZX4", 2013 | "lZjTZF11Zo4" 2014 | ] 2015 | }, 2016 | "中島みゆき_ヘッドライト・テールライト": { 2017 | "original": "mBYmBjLcn_4", 2018 | "pianos": [ 2019 | "Z_Fk_u0x8ZE", 2020 | "qK1gA6zDVgU" 2021 | ] 2022 | }, 2023 | "中島みゆき_地上の星": { 2024 | "original": "v2SlpjCz7uE", 2025 | "pianos": [ 2026 | "gjkxHEO6eJ8", 2027 | "i_pIN91_Gow" 2028 | ] 2029 | }, 2030 | "中島みゆき_時代": { 2031 | "original": "aOOpDfmy7mw", 2032 | "pianos": [ 2033 | "YgjDqTGk5aE", 2034 | "eTM5St7n9dc" 2035 | ] 2036 | }, 2037 | "中島みゆき_空と君のあいだに": { 2038 | "original": "U2jEWTrExsg", 2039 | "pianos": [ 2040 | "ExTwd7rQX1I", 2041 | "NxIahwUPDx0", 2042 | "dV-sayNAqVs" 2043 | ] 2044 | }, 2045 | "中島みゆき_糸": { 2046 | "original": "78UwqbJnI18", 2047 | "pianos": [ 2048 | "GvMGZP-y67o", 2049 | "LK3xgzFNPjo", 2050 | "MJ4vkjdtAgM", 2051 | "gL5rqaufHbE" 2052 | ] 2053 | }, 2054 | "中島みゆき_銀の龍の背に乗って": { 2055 | "original": "t3bSqTsnEgU", 2056 | "pianos": [ 2057 | "7TsE-1nMwN0", 2058 | "RxOv2dfyUWs", 2059 | "ZoO2_Y8y7CQ", 2060 | "xoMDRkHM6jc" 2061 | ] 2062 | }, 2063 | "中島美嘉_Dear": { 2064 | "original": "eO3j3usyT4k", 2065 | "pianos": [ 2066 | "s3F4QGtWun0", 2067 | "t-VjitBdnuQ" 2068 | ] 2069 | }, 2070 | "中島美嘉_LIFE": { 2071 | "original": "fPECG_5TckE", 2072 | "pianos": [ 2073 | "-symiKlmSwA" 2074 | ] 2075 | }, 2076 | "中島美嘉_ORION": { 2077 | "original": "q7GFtKuEQik", 2078 | "pianos": [ 2079 | "1lUGJu5dYWE", 2080 | "Xc7gfkh62Sk", 2081 | "j_ZOxp4WkjQ" 2082 | ] 2083 | }, 2084 | "中島美嘉_一番綺麗な私を": { 2085 | "original": "utq_CdyhsIk", 2086 | "pianos": [ 2087 | "GGshjOa-IMo", 2088 | "m1YLlK2ozXM", 2089 | "nSlL9jvCnwA", 2090 | "yhiucDXy7To" 2091 | ] 2092 | }, 2093 | "中島美嘉_僕が死のうと思ったのは": { 2094 | "original": "AabAbM_Lmuo", 2095 | "pianos": [ 2096 | "8eiJP_TK-bs", 2097 | "CTqAU4mV3a8", 2098 | "F6ZB6cqyPsQ", 2099 | "Uf8TWFn_8bw" 2100 | ] 2101 | }, 2102 | "中島美嘉_桜色舞うころ": { 2103 | "original": "_LvGDnGP6pk", 2104 | "pianos": [ 2105 | "AHVPJKdmI8I", 2106 | "DKjoE54mmEE", 2107 | "VnmttFaONP4", 2108 | "psqzJ4YJ-sA", 2109 | "q0FBiDf54AY" 2110 | ] 2111 | }, 2112 | "中島美嘉_雪の華": { 2113 | "original": "mF5Qq2YheTg", 2114 | "pianos": [ 2115 | "65ks0Rw4zUM", 2116 | "Eqltv82F3eI", 2117 | "HlL1VmQhASU", 2118 | "JF3Xnpp7p2Q", 2119 | "NJjf5OfPFBs", 2120 | "Q-H6aT_3uv8", 2121 | "_hr0z1uYSdo", 2122 | "aCzsooW7Db8" 2123 | ] 2124 | }, 2125 | "中森明菜_セカンド・ラブ": { 2126 | "original": "ZAVoEfNk0Ho", 2127 | "pianos": [ 2128 | "IRz9-25lhes", 2129 | "Xq08e31rjps", 2130 | "iUG6rPgVAaw", 2131 | "zt6WEe8RtW0" 2132 | ] 2133 | }, 2134 | "中森明菜_少女A": { 2135 | "original": "iqXEdDh6gro", 2136 | "pianos": [ 2137 | "3WQLAw55QA4", 2138 | "hsmASISqLSg" 2139 | ] 2140 | }, 2141 | "中森明菜_難破船": { 2142 | "original": "qUlVH6mzNG4", 2143 | "pianos": [ 2144 | "Fpk__6HbkyM", 2145 | "Mar9G69i2qE", 2146 | "UYxVzXlLIyA", 2147 | "moPB-q-sim8" 2148 | ] 2149 | }, 2150 | "乃木坂46_シンクロニシティ": { 2151 | "original": "f0wbnQw89J0", 2152 | "pianos": [ 2153 | "3uWx3CBx_BA", 2154 | "UC0II9KLv50", 2155 | "_0dATR_BoN4" 2156 | ] 2157 | }, 2158 | "乃木坂46_君の名は希望": { 2159 | "original": "Q-8uL-s_t3U", 2160 | "pianos": [ 2161 | "DuWzqAqbl7s", 2162 | "anokANRibvY" 2163 | ] 2164 | }, 2165 | "加藤ミリヤ_Love is": { 2166 | "original": "HlXVnUl7Z3Q", 2167 | "pianos": [ 2168 | "69tN1YNKV9E", 2169 | "I4i9sSqYqaM" 2170 | ] 2171 | }, 2172 | "宇多田ヒカル_Automatic": { 2173 | "original": "-9DxpPiE458", 2174 | "pianos": [ 2175 | "e5TDr0r8fYM", 2176 | "grWOOxSOp1U" 2177 | ] 2178 | }, 2179 | "宇多田ヒカル_Can You Keep A Secret?": { 2180 | "original": "SRQbQ_nd4fc", 2181 | "pianos": [ 2182 | "HHWgNUfWNiM", 2183 | "tIGS4ICWhc8" 2184 | ] 2185 | }, 2186 | "宇多田ヒカル_First Love": { 2187 | "original": "o1sUaVJUeB0", 2188 | "pianos": [ 2189 | "2LMjfyxNOr4", 2190 | "BxXyNf-CmHI", 2191 | "LpxVwc-FomI", 2192 | "S1ej17N2RlY", 2193 | "Uf2TRlssAeI", 2194 | "Zk0pc7RYfEY", 2195 | "kfACeVnL0ko" 2196 | ] 2197 | }, 2198 | "宇多田ヒカル_Flavor Of Life": { 2199 | "original": "tpk0PxK-c5E", 2200 | "pianos": [ 2201 | "18hhx9S-PlI", 2202 | "7obd0zsRCEI", 2203 | "QEN1AE1q1vw", 2204 | "sGzkN32r3T8" 2205 | ] 2206 | }, 2207 | "宇多田ヒカル_One Last Kiss": { 2208 | "original": "0Uhh62MUEic", 2209 | "pianos": [ 2210 | "9keF7Ed_SuE", 2211 | "9pZYp_wbU2I", 2212 | "EmPSbGlt8to", 2213 | "U6Nj2Wd-OfI", 2214 | "m4DF7mAgMoI", 2215 | "oBwVQsvnz94", 2216 | "pItqMVQ-Dvg" 2217 | ] 2218 | }, 2219 | "宇多田ヒカル_Prisoner Of Love": { 2220 | "original": "6bDyqwwH4Xs", 2221 | "pianos": [ 2222 | "INK4Kajld1s" 2223 | ] 2224 | }, 2225 | "宇多田ヒカル_あなた": { 2226 | "original": "A_5wTaQKK6c", 2227 | "pianos": [ 2228 | "EslgwU_gSb8", 2229 | "Rsnn4j3gnP0", 2230 | "_bob3zlw0Sw", 2231 | "q5RsK2jxby0", 2232 | "seM2iZ7YFqE", 2233 | "zmd7gu_j0KI" 2234 | ] 2235 | }, 2236 | "宇多田ヒカル_初恋": { 2237 | "original": "FtHpWnr99OY", 2238 | "pianos": [ 2239 | "3IPuZHV6hnM", 2240 | "FVxqJWbMDNY", 2241 | "Ky24v5vS2Nc", 2242 | "_IXC0-if38I", 2243 | "c8GJ5c51bNo" 2244 | ] 2245 | }, 2246 | "宇多田ヒカル_真夏の通り雨": { 2247 | "original": "f_M3V4C8nWY", 2248 | "pianos": [ 2249 | "TM4FdT6SPfk", 2250 | "keIXfz22Izg", 2251 | "zkn9axtNL84" 2252 | ] 2253 | }, 2254 | "宇多田ヒカル_花束を君に": { 2255 | "original": "yCZFof7Y0tQ", 2256 | "pianos": [ 2257 | "2_RtnkbuC04", 2258 | "4sX21ph1G34", 2259 | "Ijj6F5lH08Y", 2260 | "sNVhnGKNbCw" 2261 | ] 2262 | }, 2263 | "安室奈美恵_CAN YOU CELEBRATE?": { 2264 | "original": "W_M2QP5fL_I", 2265 | "pianos": [ 2266 | "14-7zSGNADE", 2267 | "Cz4fU36yKVQ", 2268 | "EUCevf3PfFw", 2269 | "Qc8pjl2960k", 2270 | "Ygnq7DNQoPE", 2271 | "hTkdn8O51k8" 2272 | ] 2273 | }, 2274 | "安室奈美恵_Fight Together": { 2275 | "original": "i9tIzJ1S2zY", 2276 | "pianos": [ 2277 | "LCBszNwBzVk", 2278 | "eqBuoYRiocE" 2279 | ] 2280 | }, 2281 | "安室奈美恵_Hope": { 2282 | "original": "AL-NN1fL8t8", 2283 | "pianos": [ 2284 | "9j0qCNnvLh8", 2285 | "ihkoYv9doLw", 2286 | "o5Dnk8gceQY" 2287 | ] 2288 | }, 2289 | "安室奈美恵_Love Story": { 2290 | "original": "-c1q0iJJMcw", 2291 | "pianos": [ 2292 | "pZDI2RBWnMs" 2293 | ] 2294 | }, 2295 | "嵐_5×10": { 2296 | "original": "lVbniR1AYVY", 2297 | "pianos": [ 2298 | "FFuuWFn6LWk", 2299 | "_pyiUl0tgPk", 2300 | "kfQxCYywEMw", 2301 | "zAl_vJGIrUE" 2302 | ] 2303 | }, 2304 | "嵐_Happiness": { 2305 | "original": "HeXVJvEYynw", 2306 | "pianos": [ 2307 | "HBWWlX22AQ8", 2308 | "WL563VdzgS0", 2309 | "XEgJ8H8OOGw" 2310 | ] 2311 | }, 2312 | "嵐_Love so sweet": { 2313 | "original": "EAgACSowE5k", 2314 | "pianos": [ 2315 | "IaSTyRK9uYc", 2316 | "h_OqFE20X2A" 2317 | ] 2318 | }, 2319 | "嵐_One Love": { 2320 | "original": "EDu47zD-DgI", 2321 | "pianos": [ 2322 | "bl_Eed7d2HQ", 2323 | "hIVPRZOS84s", 2324 | "mxRsvsgrOeo", 2325 | "uBzZ2vyYT3E" 2326 | ] 2327 | }, 2328 | "嵐_truth": { 2329 | "original": "8etBhO7i7bM", 2330 | "pianos": [ 2331 | "56WJLxOl4mw", 2332 | "HJO4zoZ7SDo" 2333 | ] 2334 | }, 2335 | "嵐_ふるさと": { 2336 | "original": "SfQ2J6h--9M", 2337 | "pianos": [ 2338 | "1al_VSpcuCw", 2339 | "MuEW5Gq2xO4", 2340 | "vPnw2KWYo3Q" 2341 | ] 2342 | }, 2343 | "嵐_サクラ咲ケ": { 2344 | "original": "p8RnIn7iqJ0", 2345 | "pianos": [ 2346 | "7dsxKatCs98", 2347 | "JMNV7RLVBs4" 2348 | ] 2349 | }, 2350 | "平井堅_僕は君に恋をする": { 2351 | "original": "pdrKth95Lh8", 2352 | "pianos": [ 2353 | "Q4e7Btmd3KQ", 2354 | "YJdsUuMH0gY", 2355 | "bo04BNDz23Y" 2356 | ] 2357 | }, 2358 | "平井堅_告白": { 2359 | "original": "_o10f0CMTvU", 2360 | "pianos": [ 2361 | "M4r7e2UFA1A", 2362 | "MBBUQYZsHq0", 2363 | "nrXsgZrMX3Y" 2364 | ] 2365 | }, 2366 | "平井堅_瞳をとじて": { 2367 | "original": "EqVoCfSwfUY", 2368 | "pianos": [ 2369 | "3-OIqJVvQ0g", 2370 | "FhCMvzpn0Wc", 2371 | "INoplUJfg-w" 2372 | ] 2373 | }, 2374 | "星野源_Family Song": { 2375 | "original": "Ucbp-Ej5RpI", 2376 | "pianos": [ 2377 | "BQdR3_Kknys", 2378 | "Dfs3JxiY_f0", 2379 | "Q6kEYVPFmMU", 2380 | "icqGrdNOYis" 2381 | ] 2382 | }, 2383 | "星野源_SUN": { 2384 | "original": "7gcCRAl58u4", 2385 | "pianos": [ 2386 | "4ybPVNV6BW4", 2387 | "V2VJbrE-jxk", 2388 | "W-J3cZ5coxE", 2389 | "lAYwaxgfsRM", 2390 | "ox3xu5btieI", 2391 | "yxGKEC6QSqE" 2392 | ] 2393 | }, 2394 | "星野源_Week End": { 2395 | "original": "F4kUnm4nOpI", 2396 | "pianos": [ 2397 | "Pg78872egxo", 2398 | "v_8p8VU8nFg" 2399 | ] 2400 | }, 2401 | "星野源_ドラえもん": { 2402 | "original": "ypRTzt1KrF8", 2403 | "pianos": [ 2404 | "EgFF1sQaH4o", 2405 | "LkXC0E7kZvE", 2406 | "SYsk68T5VIA" 2407 | ] 2408 | }, 2409 | "東京事変_キラーチューン": { 2410 | "original": "lC8la4l4RhQ", 2411 | "pianos": [ 2412 | "36sD0sVna1Y", 2413 | "65F84LI4o64", 2414 | "EgdB4Z0jvSE" 2415 | ] 2416 | }, 2417 | "東京事変_女の子は誰でも": { 2418 | "original": "8tTkCZzRx5Q", 2419 | "pianos": [ 2420 | "7kP9OyfXHMk", 2421 | "MARYWIyvAZw", 2422 | "YkX8PgHS1v0", 2423 | "ZrGLYIoAS0w" 2424 | ] 2425 | }, 2426 | "東京事変_群青日和": { 2427 | "original": "gD2mhJ3ByGQ", 2428 | "pianos": [ 2429 | "OFD1XNY7eyY", 2430 | "SAzb-kdz9k4", 2431 | "tEI8wQOXcOo" 2432 | ] 2433 | }, 2434 | "東京事変_能動的三分間": { 2435 | "original": "NTKwzRAdY7w", 2436 | "pianos": [ 2437 | "LIUZ_EFD2CA", 2438 | "NrJnjUQqPCY", 2439 | "oSZj_iw6mLg", 2440 | "rdulgucnMJA" 2441 | ] 2442 | }, 2443 | "東京事変_閃光少女": { 2444 | "original": "5jsdarfpsLk", 2445 | "pianos": [ 2446 | "HmgdZKtq8I0", 2447 | "_cz_NEkkCNw", 2448 | "aPqXOGcePrg" 2449 | ] 2450 | }, 2451 | "東方神起_Stand by U": { 2452 | "original": "LLzujr-rgkI", 2453 | "pianos": [ 2454 | "9H7Ehg-x-J0" 2455 | ] 2456 | }, 2457 | "松任谷由実_あの日にかえりたい": { 2458 | "original": "2qJnqZenIFY", 2459 | "pianos": [ 2460 | "SCAeT0-60nw", 2461 | "xDU6d_MJYlI" 2462 | ] 2463 | }, 2464 | "松任谷由実_ひこうき雲": { 2465 | "original": "SlXL1A7rrxo", 2466 | "pianos": [ 2467 | "OA0SoQyDr68", 2468 | "Y9MM-4G0Es8", 2469 | "fOdHqHQ7MVA", 2470 | "pDf0kTkxYdE", 2471 | "rRx9X74r3ZE", 2472 | "yXu8Fs-hczU" 2473 | ] 2474 | }, 2475 | "松任谷由実_ルージュの伝言": { 2476 | "original": "ZS6eNJPVvoE", 2477 | "pianos": [ 2478 | "Fho2uxk7fag", 2479 | "KjvCCUPBpek" 2480 | ] 2481 | }, 2482 | "松任谷由実_卒業写真": { 2483 | "original": "drP4GpMEtd8", 2484 | "pianos": [ 2485 | "2LhcC2QVHBw", 2486 | "Ff5wi-DjlMo", 2487 | "Q_YGmDZ5BoY", 2488 | "eNIfH4bueuY" 2489 | ] 2490 | }, 2491 | "松任谷由実_春よ、来い": { 2492 | "original": "qX7pFYH9O04", 2493 | "pianos": [ 2494 | "1L4mjaVEvJY", 2495 | "KzoekVp4pCc", 2496 | "XlfurSs8KqM", 2497 | "h0Qij0th7J8" 2498 | ] 2499 | }, 2500 | "松任谷由実_真夏の夜の夢": { 2501 | "original": "fxTD_ZIAnH4", 2502 | "pianos": [ 2503 | "0HsefMJLkt8" 2504 | ] 2505 | }, 2506 | "松田聖子_天使のウィンク": { 2507 | "original": "-nZdJ9r1mCA", 2508 | "pianos": [ 2509 | "JfRmFXH2Egs" 2510 | ] 2511 | }, 2512 | "松田聖子_渚のバルコニー": { 2513 | "original": "0kifwztjZUA", 2514 | "pianos": [ 2515 | "Jt3s-9scVbE", 2516 | "XgoS0GnxJtw", 2517 | "ciKy8Bq2P6s" 2518 | ] 2519 | }, 2520 | "松田聖子_赤いスイートピー": { 2521 | "original": "GKDwtvOTsDE", 2522 | "pianos": [ 2523 | "4FRx05yVK7E", 2524 | "8XkVW1hBpOk", 2525 | "BpceA8InvMI", 2526 | "agOxi8-UJX8", 2527 | "crgAQX6fEgw", 2528 | "cxIpyt7_mZE", 2529 | "d-IerfQb0Jk", 2530 | "vBxv1HppWIQ" 2531 | ] 2532 | }, 2533 | "椎名林檎_NIPPON": { 2534 | "original": "p-RLC9ZgjhY", 2535 | "pianos": [ 2536 | "G0VsH1b0K-E", 2537 | "L4PgP0WdPMs", 2538 | "f1pyq6cn25Q" 2539 | ] 2540 | }, 2541 | "椎名林檎_ここでキスして。": { 2542 | "original": "nV1HLjeOEL4", 2543 | "pianos": [ 2544 | "Nb29unA3wpI", 2545 | "e4pF3scXW6Y", 2546 | "ejrBg1Lo_ws", 2547 | "v_fr8QgsLrY" 2548 | ] 2549 | }, 2550 | "椎名林檎_ギブス": { 2551 | "original": "zldBTSx9JpE", 2552 | "pianos": [ 2553 | "FnsQAxGHKs4", 2554 | "Z-amuX1W5eg", 2555 | "bMQVp9j24bg" 2556 | ] 2557 | }, 2558 | "椎名林檎_丸の内サディスティック": { 2559 | "original": "4tlUwgtgdZA", 2560 | "pianos": [ 2561 | "3DougGOybQI", 2562 | "F9BVaPX4OL4", 2563 | "TPPpvSUKIb4", 2564 | "_3M9oYDtwCs" 2565 | ] 2566 | }, 2567 | "椎名林檎_幸福論": { 2568 | "original": "M55uRP4DWW0", 2569 | "pianos": [ 2570 | "IjjQX2dSkLk", 2571 | "pzGsCvPEjmQ" 2572 | ] 2573 | }, 2574 | "椎名林檎_歌舞伎町の女王": { 2575 | "original": "krCk3EcsaxE", 2576 | "pianos": [ 2577 | "GF887ZUKuZ4", 2578 | "NNWfZHhsIao", 2579 | "zK7E-2A8hhk" 2580 | ] 2581 | }, 2582 | "椎名林檎_罪と罰": { 2583 | "original": "dSXvVmwJxh4", 2584 | "pianos": [ 2585 | "JPHsP0edFpc", 2586 | "OpSQNjRxKvw" 2587 | ] 2588 | }, 2589 | "椎名林檎_長く短い祭": { 2590 | "original": "3LVAmMxICoA", 2591 | "pianos": [ 2592 | "MJX1rAUjSOw", 2593 | "lcXYb5aOuLk", 2594 | "qa9gFKpTc6k" 2595 | ] 2596 | }, 2597 | "槇原敬之_どんなときも。": { 2598 | "original": "b88pxLpMZKk", 2599 | "pianos": [ 2600 | "71sO1MkC0HY", 2601 | "97egbO0cLN4", 2602 | "EkhQhhy1ymw", 2603 | "uPvvZyM0fTI" 2604 | ] 2605 | }, 2606 | "槇原敬之_もう恋なんてしない": { 2607 | "original": "naz0-szzYXk", 2608 | "pianos": [ 2609 | "DzamJyf4CPU", 2610 | "QgDLrcnYnj0", 2611 | "p1dL3LN1uSg" 2612 | ] 2613 | }, 2614 | "槇原敬之_冬がはじまるよ": { 2615 | "original": "yZt2XEjsduc", 2616 | "pianos": [ 2617 | "0t99bCQMXXI", 2618 | "Z1mxgWeEe3A", 2619 | "eK7oXfsV2qI" 2620 | ] 2621 | }, 2622 | "槇原敬之_遠く遠く": { 2623 | "original": "YHj8O_SKpBk", 2624 | "pianos": [ 2625 | "FT1Hg-UjbvE", 2626 | "pxPwSnKP0Ss" 2627 | ] 2628 | }, 2629 | "浜崎あゆみ_Dearest": { 2630 | "original": "VFSeESRAPKU", 2631 | "pianos": [ 2632 | "Fc7fuW3v7YM", 2633 | "lnEljz1QBD0" 2634 | ] 2635 | }, 2636 | "浜田省吾_家路": { 2637 | "original": "B1DGRJeUHwU", 2638 | "pianos": [ 2639 | "dYb5V7SlbvY", 2640 | "g2Wy3Q3YU9g" 2641 | ] 2642 | }, 2643 | "浜田省吾_悲しみは雪のように": { 2644 | "original": "Oy7fhdAWvpk", 2645 | "pianos": [ 2646 | "5lvl4WQJvL4", 2647 | "Skm_VIRS4wU", 2648 | "uxAxuJ6ApjE" 2649 | ] 2650 | }, 2651 | "清水翔太_花束のかわりにメロディーを": { 2652 | "original": "L1D_hLw4AA4", 2653 | "pianos": [ 2654 | "5cmiCSYK2-I", 2655 | "pcPOd535MvU", 2656 | "u0VpMV30ivc", 2657 | "uYx4fYtDD-8" 2658 | ] 2659 | }, 2660 | "福山雅治_Squall": { 2661 | "original": "rZSQbQtrLds", 2662 | "pianos": [ 2663 | "1EPWuDs6x8c", 2664 | "LfCXNAndumo" 2665 | ] 2666 | }, 2667 | "福山雅治_家族になろうよ": { 2668 | "original": "vrkbf9CVkn4", 2669 | "pianos": [ 2670 | "19PZWUM-WP8", 2671 | "I-KeNILEyng", 2672 | "dCR1DLcJNCc", 2673 | "i4bRz0TZFio", 2674 | "p2w43jQvcfE" 2675 | ] 2676 | }, 2677 | "福山雅治_虹": { 2678 | "original": "SXyqhjhaQQA", 2679 | "pianos": [ 2680 | "J461AdY1jMk", 2681 | "xRyHaxpyUI0" 2682 | ] 2683 | }, 2684 | "竹内まりや_いのちの歌": { 2685 | "original": "avPCGPgDOBY", 2686 | "pianos": [ 2687 | "4o8E-Xc-fz0", 2688 | "7htd6-_46RY", 2689 | "LyWxQ8Xj6Sg", 2690 | "em6vVea7nuk", 2691 | "iNc34gYMzYw" 2692 | ] 2693 | }, 2694 | "竹内まりや_すてきなホリデイ": { 2695 | "original": "KiBEY5v4GfI", 2696 | "pianos": [ 2697 | "O8cJfus_SC4", 2698 | "XVj8pNj9ezY", 2699 | "bMuKLPU8xtk" 2700 | ] 2701 | }, 2702 | "竹内まりや_カムフラージュ": { 2703 | "original": "MOCeWhqR15s", 2704 | "pianos": [ 2705 | "AIEpkFkZgOk", 2706 | "dZ0SWI5l-l8", 2707 | "i7zXMOXgt90" 2708 | ] 2709 | }, 2710 | "竹内まりや_シングル・アゲイン": { 2711 | "original": "vl0QZ_2B1W4", 2712 | "pianos": [ 2713 | "17ypqe4HeRg", 2714 | "1KaAZYGR8jk", 2715 | "mxrEGEPQ7kM", 2716 | "yWEjZSN50Es" 2717 | ] 2718 | }, 2719 | "竹内まりや_告白": { 2720 | "original": "tOkyxchJArk", 2721 | "pianos": [ 2722 | "8FFlGQQdwZI", 2723 | "T8jqPaHeUVE", 2724 | "a1M63ZO_YS8" 2725 | ] 2726 | }, 2727 | "竹内まりや_純愛ラプソディ": { 2728 | "original": "d83ojou5e-c", 2729 | "pianos": [ 2730 | "1xXOj9s3NkE", 2731 | "Jbnd82BWvTE", 2732 | "R4qDNKCKW0A", 2733 | "g1M6O_nnnN4", 2734 | "iYSFlUf50e4" 2735 | ] 2736 | }, 2737 | "竹内まりや_駅": { 2738 | "original": "hqQUlq3V72E", 2739 | "pianos": [ 2740 | "5a87AEh3HAg", 2741 | "FDK1BEXaZpU" 2742 | ] 2743 | }, 2744 | "米津玄師_KICK BACK": { 2745 | "original": "M2cckDmNLMI", 2746 | "pianos": [ 2747 | "2c_iSmm5SFw", 2748 | "AJ21yAKOMlM", 2749 | "EBGZChrQAd0", 2750 | "GsSYCC0qQFE", 2751 | "glmR8l5QE5w", 2752 | "jhC3ES30yi4", 2753 | "ub5dNHEagDA" 2754 | ] 2755 | }, 2756 | "米津玄師_LOSER": { 2757 | "original": "Dx_fKPBPYUI", 2758 | "pianos": [ 2759 | "DZvP0WwQRjY", 2760 | "HLDCI8700VM", 2761 | "Yb3_c_J7WTw", 2762 | "gzJxZ9ETnJU", 2763 | "isnhMznBr_k" 2764 | ] 2765 | }, 2766 | "米津玄師_Lemon": { 2767 | "original": "SX_ViT4Ra7k", 2768 | "pianos": [ 2769 | "M8byC9UGbZs", 2770 | "NwoOmkuiy0Q", 2771 | "R2cmSi6alq8", 2772 | "XKJ0v0-zTPc", 2773 | "Y1XoyH5MDP8", 2774 | "lIBfzpaF5dg" 2775 | ] 2776 | }, 2777 | "米津玄師_orion": { 2778 | "original": "lzAyrgSqeeE", 2779 | "pianos": [ 2780 | "SvmbDRCGUy4", 2781 | "etG_ch29lGg" 2782 | ] 2783 | }, 2784 | "米津玄師_アイネクライネ": { 2785 | "original": "-EKxzId_Sj4", 2786 | "pianos": [ 2787 | "TikHTF00FgU", 2788 | "arpCqgsZzgQ", 2789 | "nuPf5qzwIJ0" 2790 | ] 2791 | }, 2792 | "米津玄師_ゴーゴー幽霊船": { 2793 | "original": "2PqxOytUjz0", 2794 | "pianos": [ 2795 | "nz9jHtnFbyU" 2796 | ] 2797 | }, 2798 | "米津玄師_ピースサイン": { 2799 | "original": "9aJVr5tTTWk", 2800 | "pianos": [ 2801 | "X6QU7_WYDTI", 2802 | "kNHC9JoIHhE", 2803 | "vI0t6BBrdbc" 2804 | ] 2805 | }, 2806 | "米津玄師_感電": { 2807 | "original": "UFQEttrn6CQ", 2808 | "pianos": [ 2809 | "1wOQLyyIhpw", 2810 | "G3rcFoMhoSc", 2811 | "InQRv1PKnmM", 2812 | "Y8Qh2NE6UUw" 2813 | ] 2814 | }, 2815 | "米津玄師_灰色と青 ( + 菅田将暉)": { 2816 | "original": "gJX2iy6nhHc", 2817 | "pianos": [ 2818 | "-bcaHQU7S_g", 2819 | "6aD9iJbR6s8", 2820 | "Bzznwy5AOg8", 2821 | "PI3tG6GMlTI", 2822 | "bKkB6yFsKOs" 2823 | ] 2824 | }, 2825 | "米津玄師_馬と鹿": { 2826 | "original": "ptnYBctoexk", 2827 | "pianos": [ 2828 | "5bgSJuQ_zvQ", 2829 | "6K9fSFGNmWE", 2830 | "C9u4y19obD4", 2831 | "QrdcVMvRO68", 2832 | "fCJXdrSEC-8", 2833 | "zreC1QaLI48" 2834 | ] 2835 | }, 2836 | "絢香_I believe": { 2837 | "original": "sJItuaykRRk", 2838 | "pianos": [ 2839 | "A7Mrx3cYwyg", 2840 | "K5c6idSN9q8", 2841 | "PrcmaQTssec", 2842 | "VWlQ8wa6OfI", 2843 | "mTkJil0h70w" 2844 | ] 2845 | }, 2846 | "絢香_beautiful": { 2847 | "original": "N3lgcufjB3Y", 2848 | "pianos": [ 2849 | "140GqTa6exc", 2850 | "Trh6ZxG5q48", 2851 | "kNEuKuS6ot4", 2852 | "pfq0M8ygXAQ" 2853 | ] 2854 | }, 2855 | "絢香_にじいろ": { 2856 | "original": "ia0lAgfhbAo", 2857 | "pianos": [ 2858 | "CUVrNp8zZwg", 2859 | "YfN_K-RHEbQ", 2860 | "bYaS6uyoN4U", 2861 | "pHOF374SCpc", 2862 | "utHHZtiq3yI", 2863 | "vmMceXwAV4k" 2864 | ] 2865 | }, 2866 | "絢香_三日月": { 2867 | "original": "wHw6W4BznTM", 2868 | "pianos": [ 2869 | "jGZskJwsyXk" 2870 | ] 2871 | } 2872 | } -------------------------------------------------------------------------------- /docs/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | AMT-APC 8 | 9 | 10 | 11 |
12 |

AMT-APC

13 |
14 |
15 |
16 |

We are developing a automatic piano cover generation model. The demo video is posted below.

17 | 21 |
22 |
23 |

Demo

24 |
25 |

Lilac / Mrs. GREEN APPLE

26 | 27 |

Original: Mrs. GREEN APPLE「ライラック」Official Music Video - YouTube

28 |
29 |
30 |

Himawari / Ado

31 | 32 |

Original: 【Ado】向日葵 - YouTube

33 |
34 |
35 |
36 |

Technology

37 |

38 | AMT-APC is a method for training an automatic piano cover generation model by fine-tuning an AMT (Automatic Music Transcription) model. 39 | We employ the hFT-Transformer as the base AMT model. 40 | The model is trained on a piano cover dataset collected from YouTube. 41 |

42 | overview 43 |
44 |
45 | 48 | 49 | 50 | -------------------------------------------------------------------------------- /docs/static/bg-cover.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/misya11p/amt-apc/f78ae99a11579e1bedd2a65c089cfcabe296b66f/docs/static/bg-cover.png -------------------------------------------------------------------------------- /docs/static/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/misya11p/amt-apc/f78ae99a11579e1bedd2a65c089cfcabe296b66f/docs/static/favicon.ico -------------------------------------------------------------------------------- /docs/static/overview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/misya11p/amt-apc/f78ae99a11579e1bedd2a65c089cfcabe296b66f/docs/static/overview.png -------------------------------------------------------------------------------- /eval/cover.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | import sys 3 | import argparse 4 | 5 | ROOT = Path(__file__).parent.parent 6 | sys.path.append(str(ROOT)) 7 | 8 | import torch 9 | from midi2audio import FluidSynth 10 | from tqdm import tqdm 11 | 12 | from models import Pipeline 13 | from data import SVSampler 14 | from utils import info 15 | 16 | 17 | DEFAULT_DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu") 18 | sv_sampler = SVSampler() 19 | 20 | 21 | def main(args): 22 | dir_output = ROOT / args.dir_output 23 | dir_output.mkdir(exist_ok=True) 24 | device = torch.device(args.device) if args.device else DEFAULT_DEVICE 25 | 26 | # Create MIDI files from WAV files 27 | midis = cover( 28 | dir_output, 29 | args.path_model, 30 | device, 31 | not args.no_sv, 32 | args.no_load, 33 | args.overwrite 34 | ) 35 | 36 | # Convert MIDI files to audio files 37 | midi2audio(midis, args.sound_font) 38 | 39 | 40 | def cover(dir_output, path_model, device, with_sv, no_load, overwrite): 41 | pipeline = Pipeline( 42 | path_model=path_model, 43 | device=device, 44 | with_sv=with_sv, 45 | no_load=no_load, 46 | ) 47 | 48 | songs = info.get_ids("test", orig=True) 49 | songs = sorted(songs) 50 | midis = [] 51 | for song in tqdm(songs): 52 | path_input = info.id2path(song).raw 53 | if not path_input.exists(): 54 | print(f"File not found: {path_input}") 55 | continue 56 | 57 | path_output = dir_output / f"{song}.mid" 58 | if path_output.exists() and not overwrite: 59 | midis.append(path_output) 60 | continue 61 | 62 | sv = sv_sampler.random() if with_sv else None 63 | # sv = sv_sampler.sample("level2") if with_sv else None # 安定版 64 | pipeline.wav2midi( 65 | path_input=str(path_input), 66 | path_output=str(path_output), 67 | sv=sv, 68 | ) 69 | midis.append(path_output) 70 | return midis 71 | 72 | 73 | def midi2audio(midis, sound_font): 74 | if sound_font: 75 | fs = FluidSynth(sound_font=sound_font) 76 | else: 77 | fs = FluidSynth() 78 | 79 | for midi in midis: 80 | path_save = midi.with_suffix(".wav") 81 | fs.midi_to_audio(str(midi), str(path_save)) 82 | 83 | 84 | if __name__ == "__main__": 85 | parser = argparse.ArgumentParser("Generate piano covers (MIDI) using the trained model, and convert them to audio.") 86 | parser.add_argument("--dir_output", "-o", type=str, default="eval/data/") 87 | parser.add_argument("--overwrite", action="store_true") 88 | parser.add_argument("--path_model", type=str, default=None) 89 | parser.add_argument("--device", type=str, default=None) 90 | parser.add_argument("--no_sv", action="store_true") 91 | parser.add_argument("--no_load", action="store_true") 92 | parser.add_argument("--sound_font", type=str, default=None) 93 | args = parser.parse_args() 94 | main(args) -------------------------------------------------------------------------------- /eval/distance.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | from pathlib import Path 3 | import sys 4 | 5 | HERE = Path(__file__).parent 6 | ROOT = HERE.parent 7 | sys.path.append(str(HERE)) 8 | sys.path.append(str(ROOT)) 9 | 10 | from ChromaCoverId.chroma_features import ChromaFeatures 11 | from ChromaCoverId.cover_similarity_measures import ( 12 | cross_recurrent_plot, 13 | qmax_measure, 14 | ) 15 | 16 | from utils import info 17 | 18 | 19 | def main(args): 20 | dir_input = Path(args.dir_input) 21 | covers = list(dir_input.glob("*.wav")) 22 | covers = sorted(covers) 23 | 24 | if not covers: 25 | print("No covers found.") 26 | return 27 | 28 | no_origs = [] 29 | dists = {} 30 | for cover in covers: 31 | orig = info.id2path(cover.stem).raw 32 | if not orig.exists(): 33 | no_origs.append(cover) 34 | print(f"No original found for {cover.stem}.") 35 | continue 36 | dist = get_distance(orig, cover) 37 | dists[cover.stem] = dist 38 | 39 | if dists: 40 | write_result(args.path_result, dists, no_origs) 41 | 42 | 43 | def get_distance(path1, path2): 44 | chroma1 = ChromaFeatures(str(path1)) 45 | chroma2 = ChromaFeatures(str(path2)) 46 | hpcp1 = chroma1.chroma_hpcp() 47 | hpcp2 = chroma2.chroma_hpcp() 48 | crp = cross_recurrent_plot(hpcp1, hpcp2) 49 | qmax, _ = qmax_measure(crp) 50 | return qmax 51 | 52 | 53 | def write_result(path, dists, no_origs): 54 | sim_avg = sum(dists.values()) / len(dists) 55 | print(f"Average distance: {sim_avg}") 56 | with open(path, "w") as f: 57 | f.write(f"Average distance: {sim_avg}\n\n") 58 | f.write("Distance per cover:\n") 59 | for cover, dist in dists.items(): 60 | f.write(f" {cover}: {dist}\n") 61 | f.write("\n") 62 | if no_origs: 63 | f.write("No original found for covers:\n") 64 | for cover in no_origs: 65 | f.write(f" {cover}\n") 66 | 67 | 68 | if __name__ == "__main__": 69 | parser = argparse.ArgumentParser("Evaluate cover similarity using qmax measure.") 70 | parser.add_argument("--dir_input", type=str, default="eval/data/", help="Directory containing cover WAV files. Defaults to 'ROOT/eval/data/'.") 71 | parser.add_argument("--path_result", type=str, default="eval/qmax.txt", help="Path to save the result. Defaults to 'ROOT/eval/qmax.txt'.") 72 | args = parser.parse_args() 73 | main(args) 74 | -------------------------------------------------------------------------------- /eval/f1.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | import sys 3 | import argparse 4 | 5 | ROOT = Path(__file__).resolve().parent.parent 6 | sys.path.append(str(ROOT)) 7 | 8 | import torch 9 | from torch.utils.data import DataLoader 10 | from tqdm import tqdm 11 | 12 | from data import PianoCoversDataset 13 | from models import load_model 14 | from train import loss_fn 15 | from utils import config 16 | 17 | 18 | DEFAULT_DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu") 19 | 20 | 21 | def main(args): 22 | path_model = args.path_model or config.path.apc 23 | device = torch.device(args.device) if args.device else DEFAULT_DEVICE 24 | model = load_model( 25 | device, 26 | path_model=path_model, 27 | with_sv=not args.no_sv, 28 | no_load=args.no_load, 29 | ) 30 | 31 | dataset = PianoCoversDataset(split="test") 32 | dataloader = DataLoader(dataset, batch_size=args.batch_size) 33 | loss, f1 = get_f1(model, dataloader, device) 34 | print(f"loss: {loss}") 35 | print(f"f1: {f1}") 36 | 37 | 38 | @torch.no_grad() 39 | def get_f1(model, dataloader, device): 40 | all_loss = 0 41 | all_f1 = 0 42 | 43 | model.eval() 44 | for batch in tqdm(dataloader): 45 | spec, sv, onset, offset, frame, velocity = batch 46 | spec = spec.to(device) 47 | sv = sv.to(device) 48 | onset = onset.to(device) 49 | offset = offset.to(device) 50 | frame = frame.to(device) 51 | velocity = velocity.to(device) 52 | 53 | pred = model(spec, sv) 54 | label = onset, offset, frame, velocity 55 | loss, f1 = loss_fn(pred, label) 56 | 57 | all_loss += loss.item() 58 | all_f1 += sum(f1) / 3 59 | 60 | loss = all_loss / len(dataloader) 61 | f1 = all_f1 / len(dataloader) 62 | return loss, f1 63 | 64 | 65 | if __name__ == "__main__": 66 | parser = argparse.ArgumentParser("Compute F1 score") 67 | parser.add_argument("--path_model", type=str, default=None, help="Path to the model. Defaults to CONFIG.PATH.APC.") 68 | parser.add_argument("--batch_size", type=int, default=8, help="Batch size") 69 | parser.add_argument("--device", type=str, default=None, help="Device to use. Defaults to auto (CUDA if available else CPU).") 70 | parser.add_argument("--split", type=str, default="test", help="Dataset split to use ('train' or 'test' or 'all'). Defaults to 'test'.") 71 | parser.add_argument("--no_sv", action="store_true", help="Do not use the style vector.") 72 | parser.add_argument("--no_load", action="store_true", help="Do not load the model.") 73 | args = parser.parse_args() 74 | main(args) 75 | -------------------------------------------------------------------------------- /infer/__main__.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | import sys 3 | import argparse 4 | 5 | ROOT = Path(__file__).resolve().parent.parent 6 | sys.path.append(str(ROOT)) 7 | 8 | import torch 9 | 10 | from models import Pipeline 11 | from data import SVSampler 12 | from utils import config 13 | 14 | 15 | DEVICE_DEFAULT = torch.device("cuda" if torch.cuda.is_available() else "cpu") 16 | SV_SAMPLER = SVSampler() 17 | 18 | 19 | def main(args): 20 | path_model = args.path_model or config.path.apc 21 | device = torch.device(args.device) if args.device else DEVICE_DEFAULT 22 | pipeline = Pipeline(path_model, device) 23 | 24 | src = args.input 25 | if src.startswith("https://"): 26 | src = download(src) 27 | 28 | sv = SV_SAMPLER.sample(params=args.style) 29 | pipeline.wav2midi(src, args.output, sv, silent=False) 30 | 31 | 32 | def download(url): 33 | from yt_dlp import YoutubeDL 34 | ydl_opts = { 35 | "outtmpl": "_audio.%(ext)s", 36 | "format": "bestaudio/best", 37 | "postprocessors": [ 38 | { 39 | "key": "FFmpegExtractAudio", 40 | "preferredcodec": "wav", 41 | "preferredquality": "192", 42 | } 43 | ], 44 | "ignoreerrors": True, 45 | } 46 | with YoutubeDL(ydl_opts) as ydl: 47 | ydl.download([url]) 48 | return "_audio.wav" 49 | 50 | 51 | if __name__ == "__main__": 52 | parser = argparse.ArgumentParser() 53 | parser.add_argument("input", type=str, help="Path to the input wav file or URL of YouTube video") 54 | parser.add_argument("-o", "--output", type=str, default="output.mid", help="Path to the output midi file. Defaults to 'output.mid'") 55 | parser.add_argument("-s", "--style", type=str, default="level2", help="Cover style. Valid values are 'level1', 'level2', and 'level3'. Defaults to 'level2'") 56 | parser.add_argument("--path_model", type=str, default=None) 57 | parser.add_argument("--device", type=str, default=None) 58 | args = parser.parse_args() 59 | main(args) 60 | -------------------------------------------------------------------------------- /models/__init__.py: -------------------------------------------------------------------------------- 1 | from ._models import Pipeline, Spec2MIDI, load_model, save_model 2 | -------------------------------------------------------------------------------- /models/_models.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | import sys 3 | from collections import OrderedDict 4 | from typing import List, Tuple 5 | 6 | ROOT = Path(__file__).resolve().parent.parent 7 | sys.path.append(str(ROOT)) 8 | 9 | import numpy as np 10 | import torch 11 | import torch.nn as nn 12 | 13 | from .hFT_Transformer.amt import AMT 14 | from .hFT_Transformer.model_spec2midi import ( 15 | Encoder_SPEC2MIDI as Encoder, 16 | Decoder_SPEC2MIDI as Decoder, 17 | Model_SPEC2MIDI as BaseSpec2MIDI, 18 | ) 19 | from utils import config 20 | 21 | 22 | DEVICE_DEFAULT = torch.device("cuda" if torch.cuda.is_available() else "cpu") 23 | Array = List | Tuple | np.ndarray | torch.Tensor 24 | 25 | 26 | class Pipeline(AMT): 27 | def __init__( 28 | self, 29 | path_model: str | None = None, 30 | device: torch.device = DEVICE_DEFAULT, 31 | amt: bool = False, 32 | with_sv: bool = True, 33 | no_load: bool = False, 34 | no_model: bool = False, 35 | ): 36 | """ 37 | Pipeline for converting audio to MIDI. Contains some methods for 38 | converting audio to MIDI, models, and configurations. 39 | 40 | Args: 41 | path_model (str, optional): 42 | Path to the model. If None, use the default model 43 | (CONFIG.PATH.AMT or CONFIG.PATH.APC). Defaults to None. 44 | device (torch.device, optional): 45 | Device to use for the model. Defaults to auto (CUDA if 46 | available else CPU). 47 | amt (bool, optional): 48 | Whether to use the AMT model. Defaults to False (use the 49 | APC model). 50 | with_sv (bool, optional): 51 | Whether to use the style vector. Defaults to True. 52 | no_load (bool, optional): 53 | Do not load the model. Defaults to False. 54 | no_model (bool, optional): 55 | Do not own the model. Defaults to False. 56 | """ 57 | self.device = device 58 | self.with_sv = with_sv 59 | if no_model: 60 | self.model = None 61 | else: 62 | self.model = load_model( 63 | device=self.device, 64 | amt=amt, 65 | path_model=path_model, 66 | with_sv=with_sv, 67 | no_load=no_load, 68 | ) 69 | self.config = config.data 70 | 71 | def wav2midi( 72 | self, 73 | path_input: str, 74 | path_output: str, 75 | sv: None | Array = None, 76 | silent: bool = True, 77 | ): 78 | """ 79 | Convert audio to MIDI. 80 | 81 | Args: 82 | path_input (str): Path to the input audio file. 83 | path_output (str): Path to the output MIDI file. 84 | sv (None | Array, optional): Style vector. Defaults to None. 85 | """ 86 | if sv is not None: 87 | sv = torch.tensor(sv) 88 | if sv.dim() == 1: 89 | sv = sv.unsqueeze(0) 90 | if sv.dim() == 2: 91 | pass 92 | else: 93 | raise ValueError(f"Invalid shape of style vector: {sv.shape}") 94 | sv = sv.to(self.device).to(torch.float32) 95 | 96 | feature = self.wav2feature(path_input) 97 | _, _, _, _, onset, offset, frame, velocity = self.transcript(feature, sv, silent) 98 | if not silent: 99 | print("Converting to MIDI ...", end=" ", flush=True) 100 | note = self.mpe2note( 101 | onset, 102 | offset, 103 | frame, 104 | velocity, 105 | thred_onset=config.infer.threshold.onset, 106 | thred_offset=config.infer.threshold.offset, 107 | thred_mpe=config.infer.threshold.frame, 108 | ) 109 | self.note2midi(note, path_output, config.infer.min_duration) 110 | if not silent: 111 | print("Done.") 112 | 113 | 114 | class Spec2MIDI(BaseSpec2MIDI): 115 | def __init__(self, encoder, decoder, sv_dim: int = 0): 116 | super().__init__(encoder, decoder) 117 | self.encoder = encoder 118 | self.decoder = decoder 119 | delattr(self, "encoder_spec2midi") 120 | delattr(self, "decoder_spec2midi") 121 | self.sv_dim = sv_dim 122 | if sv_dim: 123 | hidden_size = encoder.hid_dim 124 | self.fc_sv = nn.Linear(sv_dim, hidden_size) 125 | self.gate_sv = nn.Sequential( 126 | nn.Linear(hidden_size, hidden_size), 127 | nn.ReLU(), 128 | nn.Linear(hidden_size, hidden_size), 129 | nn.Sigmoid(), 130 | ) 131 | 132 | def forward(self, x, sv=None): 133 | h = self.encode(x, sv) 134 | y = self.decode(h) 135 | return y 136 | 137 | def encode(self, x, sv=None): 138 | h = self.encoder(x) 139 | if self.sv_dim and (sv is not None): 140 | sv = self.fc_sv(sv) 141 | _, n_frames, n_bin, _ = h.shape 142 | sv = sv.unsqueeze(1).unsqueeze(2) 143 | sv = sv.repeat(1, n_frames, n_bin, 1) 144 | z = self.gate_sv(h) 145 | h = z * h + (1 - z) * sv 146 | return h 147 | 148 | def decode(self, h): 149 | onset_f, offset_f, mpe_f, velocity_f, attention, \ 150 | onset_t, offset_t, mpe_t, velocity_t = self.decoder(h) 151 | return ( 152 | onset_f, offset_f, mpe_f, velocity_f, attention, 153 | onset_t, offset_t, mpe_t, velocity_t 154 | ) 155 | 156 | 157 | def load_model( 158 | path_model: str | None = None, 159 | device: torch.device = DEVICE_DEFAULT, 160 | amt: bool = False, 161 | with_sv: bool = True, 162 | no_load: bool = False, 163 | ) -> Spec2MIDI: 164 | """ 165 | Load the model. 166 | 167 | Args: 168 | path_model (str, optional): 169 | Path to the model. If None, use the default model 170 | (CONFIG.PATH.AMT or CONFIG.PATH.APC). Defaults to None. 171 | device (torch.device, optional): 172 | Device to use for the model. Defaults to auto (CUDA if 173 | available else CPU). 174 | amt (bool, optional): 175 | Whether to use the AMT model. Defaults to False (use the 176 | APC model). 177 | with_sv (bool, optional): 178 | Whether to use the style vector. Defaults to True. 179 | no_load (bool, optional): 180 | Do not load the model. Defaults to False. 181 | Returns: 182 | Spec2MIDI: The model. 183 | """ 184 | if amt: 185 | path_model = path_model or str(ROOT / config.path.amt) 186 | else: 187 | path_model = path_model or str(ROOT / config.path.apc) 188 | 189 | encoder = Encoder( 190 | n_margin=config.data.input.margin_b, 191 | n_frame=config.data.input.num_frame, 192 | n_bin=config.data.feature.n_bins, 193 | cnn_channel=config.model.cnn.channel, 194 | cnn_kernel=config.model.cnn.kernel, 195 | hid_dim=config.model.transformer.hid_dim, 196 | n_layers=config.model.transformer.encoder.n_layer, 197 | n_heads=config.model.transformer.encoder.n_head, 198 | pf_dim=config.model.transformer.pf_dim, 199 | dropout=config.model.dropout, 200 | device=device, 201 | ) 202 | decoder = Decoder( 203 | n_frame=config.data.input.num_frame, 204 | n_bin=config.data.feature.n_bins, 205 | n_note=config.data.midi.num_note, 206 | n_velocity=config.data.midi.num_velocity, 207 | hid_dim=config.model.transformer.hid_dim, 208 | n_layers=config.model.transformer.decoder.n_layer, 209 | n_heads=config.model.transformer.decoder.n_head, 210 | pf_dim=config.model.transformer.pf_dim, 211 | dropout=config.model.dropout, 212 | device=device, 213 | ) 214 | sv_dim = config.model.sv_dim if with_sv else 0 215 | model = Spec2MIDI(encoder, decoder, sv_dim=sv_dim) 216 | if not no_load: 217 | state_dict = torch.load( 218 | path_model, 219 | weights_only=True, 220 | map_location=device 221 | ) 222 | model.load_state_dict(state_dict, strict=False) 223 | model.to(device) 224 | return model 225 | 226 | 227 | def save_model(model: nn.Module, path: str) -> None: 228 | """ 229 | Save the model. 230 | 231 | Args: 232 | model (nn.Module): Model to save. 233 | path (str): Path to save the model. 234 | """ 235 | state_dict = model.state_dict() 236 | correct_state_dict = OrderedDict() 237 | for key, value in state_dict.items(): 238 | key = key.replace("_orig_mod.", "") 239 | # If the model has been compiled with `torch.compile()`, 240 | # "_orig_mod." is appended to the key 241 | key = key.replace("module.", "") 242 | # If the model is saved with `torch.nn.DataParallel()`, 243 | # "module." is appended to the key 244 | correct_state_dict[key] = value 245 | torch.save(correct_state_dict, path) 246 | -------------------------------------------------------------------------------- /models/hFT_Transformer/README.md: -------------------------------------------------------------------------------- 1 | Files in this directory are from: https://github.com/sony/hFT-Transformer 2 | -------------------------------------------------------------------------------- /models/hFT_Transformer/amt.py: -------------------------------------------------------------------------------- 1 | #! python 2 | 3 | import pickle 4 | import torch 5 | import numpy as np 6 | import torchaudio 7 | import pretty_midi 8 | from tqdm import tqdm 9 | 10 | class AMT(): 11 | def __init__(self, config, model_path, batch_size=1, verbose_flag=False): 12 | if verbose_flag is True: 13 | print('torch version: '+torch.__version__) 14 | print('torch cuda : '+str(torch.cuda.is_available())) 15 | if torch.cuda.is_available(): 16 | self.device = 'cuda' 17 | else: 18 | self.device = 'cpu' 19 | 20 | self.config = config 21 | 22 | if model_path == None: 23 | self.model = None 24 | else: 25 | with open(model_path, 'rb') as f: 26 | self.model = pickle.load(f) 27 | self.model = self.model.to(self.device) 28 | self.model.eval() 29 | if verbose_flag is True: 30 | print(self.model) 31 | 32 | self.batch_size = batch_size 33 | 34 | 35 | def wav2feature(self, f_wav): 36 | ### torchaudio 37 | # torchaudio.transforms.MelSpectrogram() 38 | # default 39 | # sapmle_rate(16000) 40 | # win_length(n_fft) 41 | # hop_length(win_length//2) 42 | # n_fft(400) 43 | # f_min(0) 44 | # f_max(None) 45 | # pad(0) 46 | # n_mels(128) 47 | # window_fn(hann_window) 48 | # center(True) 49 | # power(2.0) 50 | # pad_mode(reflect) 51 | # onesided(True) 52 | # norm(None) 53 | ## melfilter: htk 54 | ## normalize: none -> slaney 55 | 56 | wave, sr = torchaudio.load(f_wav) 57 | wave_mono = torch.mean(wave, dim=0) 58 | tr_fsconv = torchaudio.transforms.Resample(sr, self.config['feature']['sr']) 59 | wave_mono_16k = tr_fsconv(wave_mono) 60 | tr_mel = torchaudio.transforms.MelSpectrogram(sample_rate=self.config['feature']['sr'], n_fft=self.config['feature']['fft_bins'], win_length=self.config['feature']['window_length'], hop_length=self.config['feature']['hop_sample'], pad_mode=self.config['feature']['pad_mode'], n_mels=self.config['feature']['mel_bins'], norm='slaney') 61 | mel_spec = tr_mel(wave_mono_16k) 62 | a_feature = (torch.log(mel_spec + self.config['feature']['log_offset'])).T 63 | 64 | return a_feature 65 | 66 | 67 | def transcript(self, a_feature, sv=None, silent=True, mode='combination', ablation_flag=False): # Modified from the original (this line) 68 | # a_feature: [num_frame, n_mels] 69 | a_feature = np.array(a_feature, dtype=np.float32) 70 | 71 | a_tmp_b = np.full([self.config['input']['margin_b'], self.config['feature']['n_bins']], self.config['input']['min_value'], dtype=np.float32) 72 | len_s = int(np.ceil(a_feature.shape[0] / self.config['input']['num_frame']) * self.config['input']['num_frame']) - a_feature.shape[0] 73 | a_tmp_f = np.full([len_s+self.config['input']['margin_f'], self.config['feature']['n_bins']], self.config['input']['min_value'], dtype=np.float32) 74 | a_input = torch.from_numpy(np.concatenate([a_tmp_b, a_feature, a_tmp_f], axis=0)) 75 | # a_input: [margin_b+a_feature.shape[0]+len_s+margin_f, n_bins] 76 | 77 | a_output_onset_A = np.zeros((a_feature.shape[0]+len_s, self.config['midi']['num_note']), dtype=np.float32) 78 | a_output_offset_A = np.zeros((a_feature.shape[0]+len_s, self.config['midi']['num_note']), dtype=np.float32) 79 | a_output_mpe_A = np.zeros((a_feature.shape[0]+len_s, self.config['midi']['num_note']), dtype=np.float32) 80 | a_output_velocity_A = np.zeros((a_feature.shape[0]+len_s, self.config['midi']['num_note']), dtype=np.int8) 81 | 82 | if mode == 'combination': 83 | a_output_onset_B = np.zeros((a_feature.shape[0]+len_s, self.config['midi']['num_note']), dtype=np.float32) 84 | a_output_offset_B = np.zeros((a_feature.shape[0]+len_s, self.config['midi']['num_note']), dtype=np.float32) 85 | a_output_mpe_B = np.zeros((a_feature.shape[0]+len_s, self.config['midi']['num_note']), dtype=np.float32) 86 | a_output_velocity_B = np.zeros((a_feature.shape[0]+len_s, self.config['midi']['num_note']), dtype=np.int8) 87 | 88 | self.model.eval() 89 | for i in tqdm(range(0, a_feature.shape[0], self.config['input']['num_frame']), desc="Processing each segment", disable=silent): # Modified from the original (this line) 90 | input_spec = (a_input[i:i+self.config['input']['margin_b']+self.config['input']['num_frame']+self.config['input']['margin_f']]).T.unsqueeze(0).to(self.device) 91 | 92 | with torch.no_grad(): 93 | if mode == 'combination': 94 | if ablation_flag is True: 95 | output_onset_A, output_offset_A, output_mpe_A, output_velocity_A, output_onset_B, output_offset_B, output_mpe_B, output_velocity_B = self.model(input_spec, sv) # Modified from the original (this line) 96 | else: 97 | output_onset_A, output_offset_A, output_mpe_A, output_velocity_A, attention, output_onset_B, output_offset_B, output_mpe_B, output_velocity_B = self.model(input_spec, sv) # Modified from the original (this line) 98 | # output_onset: [batch_size, n_frame, n_note] 99 | # output_offset: [batch_size, n_frame, n_note] 100 | # output_mpe: [batch_size, n_frame, n_note] 101 | # output_velocity: [batch_size, n_frame, n_note, n_velocity] 102 | else: 103 | output_onset_A, output_offset_A, output_mpe_A, output_velocity_A = self.model(input_spec) 104 | 105 | a_output_onset_A[i:i+self.config['input']['num_frame']] = (output_onset_A.squeeze(0)).to('cpu').detach().numpy() 106 | a_output_offset_A[i:i+self.config['input']['num_frame']] = (output_offset_A.squeeze(0)).to('cpu').detach().numpy() 107 | a_output_mpe_A[i:i+self.config['input']['num_frame']] = (output_mpe_A.squeeze(0)).to('cpu').detach().numpy() 108 | a_output_velocity_A[i:i+self.config['input']['num_frame']] = (output_velocity_A.squeeze(0).argmax(2)).to('cpu').detach().numpy() 109 | 110 | if mode == 'combination': 111 | a_output_onset_B[i:i+self.config['input']['num_frame']] = (output_onset_B.squeeze(0)).to('cpu').detach().numpy() 112 | a_output_offset_B[i:i+self.config['input']['num_frame']] = (output_offset_B.squeeze(0)).to('cpu').detach().numpy() 113 | a_output_mpe_B[i:i+self.config['input']['num_frame']] = (output_mpe_B.squeeze(0)).to('cpu').detach().numpy() 114 | a_output_velocity_B[i:i+self.config['input']['num_frame']] = (output_velocity_B.squeeze(0).argmax(2)).to('cpu').detach().numpy() 115 | 116 | if mode == 'combination': 117 | return a_output_onset_A, a_output_offset_A, a_output_mpe_A, a_output_velocity_A, a_output_onset_B, a_output_offset_B, a_output_mpe_B, a_output_velocity_B 118 | else: 119 | return a_output_onset_A, a_output_offset_A, a_output_mpe_A, a_output_velocity_A 120 | 121 | 122 | def transcript_stride(self, a_feature, n_offset, mode='combination', ablation_flag=False): 123 | # a_feature: [num_frame, n_mels] 124 | a_feature = np.array(a_feature, dtype=np.float32) 125 | 126 | half_frame = int(self.config['input']['num_frame']/2) 127 | a_tmp_b = np.full([self.config['input']['margin_b'] + n_offset, self.config['feature']['n_bins']], self.config['input']['min_value'], dtype=np.float32) 128 | tmp_len = a_feature.shape[0] + self.config['input']['margin_b'] + self.config['input']['margin_f'] + half_frame 129 | len_s = int(np.ceil(tmp_len / half_frame) * half_frame) - tmp_len 130 | a_tmp_f = np.full([len_s+self.config['input']['margin_f']+(half_frame-n_offset), self.config['feature']['n_bins']], self.config['input']['min_value'], dtype=np.float32) 131 | 132 | a_input = torch.from_numpy(np.concatenate([a_tmp_b, a_feature, a_tmp_f], axis=0)) 133 | # a_input: [n_offset+margin_b+a_feature.shape[0]+len_s+(half_frame-n_offset)+margin_f, n_bins] 134 | 135 | a_output_onset_A = np.zeros((a_feature.shape[0]+len_s, self.config['midi']['num_note']), dtype=np.float32) 136 | a_output_offset_A = np.zeros((a_feature.shape[0]+len_s, self.config['midi']['num_note']), dtype=np.float32) 137 | a_output_mpe_A = np.zeros((a_feature.shape[0]+len_s, self.config['midi']['num_note']), dtype=np.float32) 138 | a_output_velocity_A = np.zeros((a_feature.shape[0]+len_s, self.config['midi']['num_note']), dtype=np.int8) 139 | 140 | if mode == 'combination': 141 | a_output_onset_B = np.zeros((a_feature.shape[0]+len_s, self.config['midi']['num_note']), dtype=np.float32) 142 | a_output_offset_B = np.zeros((a_feature.shape[0]+len_s, self.config['midi']['num_note']), dtype=np.float32) 143 | a_output_mpe_B = np.zeros((a_feature.shape[0]+len_s, self.config['midi']['num_note']), dtype=np.float32) 144 | a_output_velocity_B = np.zeros((a_feature.shape[0]+len_s, self.config['midi']['num_note']), dtype=np.int8) 145 | 146 | self.model.eval() 147 | for i in range(0, a_feature.shape[0], half_frame): 148 | input_spec = (a_input[i:i+self.config['input']['margin_b']+self.config['input']['num_frame']+self.config['input']['margin_f']]).T.unsqueeze(0).to(self.device) 149 | 150 | with torch.no_grad(): 151 | if mode == 'combination': 152 | if ablation_flag is True: 153 | output_onset_A, output_offset_A, output_mpe_A, output_velocity_A, output_onset_B, output_offset_B, output_mpe_B, output_velocity_B = self.model(input_spec) 154 | else: 155 | output_onset_A, output_offset_A, output_mpe_A, output_velocity_A, attention, output_onset_B, output_offset_B, output_mpe_B, output_velocity_B = self.model(input_spec) 156 | # output_onset: [batch_size, n_frame, n_note] 157 | # output_offset: [batch_size, n_frame, n_note] 158 | # output_mpe: [batch_size, n_frame, n_note] 159 | # output_velocity: [batch_size, n_frame, n_note, n_velocity] 160 | else: 161 | output_onset_A, output_offset_A, output_mpe_A, output_velocity_A = self.model(input_spec) 162 | 163 | a_output_onset_A[i:i+half_frame] = (output_onset_A.squeeze(0)[n_offset:n_offset+half_frame]).to('cpu').detach().numpy() 164 | a_output_offset_A[i:i+half_frame] = (output_offset_A.squeeze(0)[n_offset:n_offset+half_frame]).to('cpu').detach().numpy() 165 | a_output_mpe_A[i:i+half_frame] = (output_mpe_A.squeeze(0)[n_offset:n_offset+half_frame]).to('cpu').detach().numpy() 166 | a_output_velocity_A[i:i+half_frame] = (output_velocity_A.squeeze(0)[n_offset:n_offset+half_frame].argmax(2)).to('cpu').detach().numpy() 167 | 168 | if mode == 'combination': 169 | a_output_onset_B[i:i+half_frame] = (output_onset_B.squeeze(0)[n_offset:n_offset+half_frame]).to('cpu').detach().numpy() 170 | a_output_offset_B[i:i+half_frame] = (output_offset_B.squeeze(0)[n_offset:n_offset+half_frame]).to('cpu').detach().numpy() 171 | a_output_mpe_B[i:i+half_frame] = (output_mpe_B.squeeze(0)[n_offset:n_offset+half_frame]).to('cpu').detach().numpy() 172 | a_output_velocity_B[i:i+half_frame] = (output_velocity_B.squeeze(0)[n_offset:n_offset+half_frame].argmax(2)).to('cpu').detach().numpy() 173 | 174 | if mode == 'combination': 175 | return a_output_onset_A, a_output_offset_A, a_output_mpe_A, a_output_velocity_A, a_output_onset_B, a_output_offset_B, a_output_mpe_B, a_output_velocity_B 176 | else: 177 | return a_output_onset_A, a_output_offset_A, a_output_mpe_A, a_output_velocity_A 178 | 179 | 180 | def mpe2note(self, a_onset=None, a_offset=None, a_mpe=None, a_velocity=None, thred_onset=0.5, thred_offset=0.5, thred_mpe=0.5, mode_velocity='ignore_zero', mode_offset='shorter'): 181 | ## mode_velocity 182 | ## org: 0-127 183 | ## ignore_zero: 0-127 (output note does not include 0) (default) 184 | 185 | ## mode_offset 186 | ## shorter: use shorter one of mpe and offset (default) 187 | ## longer : use longer one of mpe and offset 188 | ## offset : use offset (ignore mpe) 189 | 190 | a_note = [] 191 | hop_sec = float(self.config['feature']['hop_sample'] / self.config['feature']['sr']) 192 | 193 | for j in range(self.config['midi']['num_note']): 194 | # find local maximum 195 | a_onset_detect = [] 196 | for i in range(len(a_onset)): 197 | if a_onset[i][j] >= thred_onset: 198 | left_flag = True 199 | for ii in range(i-1, -1, -1): 200 | if a_onset[i][j] > a_onset[ii][j]: 201 | left_flag = True 202 | break 203 | elif a_onset[i][j] < a_onset[ii][j]: 204 | left_flag = False 205 | break 206 | right_flag = True 207 | for ii in range(i+1, len(a_onset)): 208 | if a_onset[i][j] > a_onset[ii][j]: 209 | right_flag = True 210 | break 211 | elif a_onset[i][j] < a_onset[ii][j]: 212 | right_flag = False 213 | break 214 | if (left_flag is True) and (right_flag is True): 215 | if (i == 0) or (i == len(a_onset) - 1): 216 | onset_time = i * hop_sec 217 | else: 218 | if a_onset[i-1][j] == a_onset[i+1][j]: 219 | onset_time = i * hop_sec 220 | elif a_onset[i-1][j] > a_onset[i+1][j]: 221 | onset_time = (i * hop_sec - (hop_sec * 0.5 * (a_onset[i-1][j] - a_onset[i+1][j]) / (a_onset[i][j] - a_onset[i+1][j]))) 222 | else: 223 | onset_time = (i * hop_sec + (hop_sec * 0.5 * (a_onset[i+1][j] - a_onset[i-1][j]) / (a_onset[i][j] - a_onset[i-1][j]))) 224 | a_onset_detect.append({'loc': i, 'onset_time': onset_time}) 225 | a_offset_detect = [] 226 | for i in range(len(a_offset)): 227 | if a_offset[i][j] >= thred_offset: 228 | left_flag = True 229 | for ii in range(i-1, -1, -1): 230 | if a_offset[i][j] > a_offset[ii][j]: 231 | left_flag = True 232 | break 233 | elif a_offset[i][j] < a_offset[ii][j]: 234 | left_flag = False 235 | break 236 | right_flag = True 237 | for ii in range(i+1, len(a_offset)): 238 | if a_offset[i][j] > a_offset[ii][j]: 239 | right_flag = True 240 | break 241 | elif a_offset[i][j] < a_offset[ii][j]: 242 | right_flag = False 243 | break 244 | if (left_flag is True) and (right_flag is True): 245 | if (i == 0) or (i == len(a_offset) - 1): 246 | offset_time = i * hop_sec 247 | else: 248 | if a_offset[i-1][j] == a_offset[i+1][j]: 249 | offset_time = i * hop_sec 250 | elif a_offset[i-1][j] > a_offset[i+1][j]: 251 | offset_time = (i * hop_sec - (hop_sec * 0.5 * (a_offset[i-1][j] - a_offset[i+1][j]) / (a_offset[i][j] - a_offset[i+1][j]))) 252 | else: 253 | offset_time = (i * hop_sec + (hop_sec * 0.5 * (a_offset[i+1][j] - a_offset[i-1][j]) / (a_offset[i][j] - a_offset[i-1][j]))) 254 | a_offset_detect.append({'loc': i, 'offset_time': offset_time}) 255 | 256 | time_next = 0.0 257 | time_offset = 0.0 258 | time_mpe = 0.0 259 | for idx_on in range(len(a_onset_detect)): 260 | # onset 261 | loc_onset = a_onset_detect[idx_on]['loc'] 262 | time_onset = a_onset_detect[idx_on]['onset_time'] 263 | 264 | if idx_on + 1 < len(a_onset_detect): 265 | loc_next = a_onset_detect[idx_on+1]['loc'] 266 | #time_next = loc_next * hop_sec 267 | time_next = a_onset_detect[idx_on+1]['onset_time'] 268 | else: 269 | loc_next = len(a_mpe) 270 | time_next = (loc_next-1) * hop_sec 271 | 272 | # offset 273 | loc_offset = loc_onset+1 274 | flag_offset = False 275 | #time_offset = 0### 276 | for idx_off in range(len(a_offset_detect)): 277 | if loc_onset < a_offset_detect[idx_off]['loc']: 278 | loc_offset = a_offset_detect[idx_off]['loc'] 279 | time_offset = a_offset_detect[idx_off]['offset_time'] 280 | flag_offset = True 281 | break 282 | if loc_offset > loc_next: 283 | loc_offset = loc_next 284 | time_offset = time_next 285 | 286 | # offset by MPE 287 | # (1frame longer) 288 | loc_mpe = loc_onset+1 289 | flag_mpe = False 290 | #time_mpe = 0### 291 | for ii_mpe in range(loc_onset+1, loc_next): 292 | if a_mpe[ii_mpe][j] < thred_mpe: 293 | loc_mpe = ii_mpe 294 | flag_mpe = True 295 | time_mpe = loc_mpe * hop_sec 296 | break 297 | ''' 298 | # (right algorighm) 299 | loc_mpe = loc_onset 300 | flag_mpe = False 301 | for ii_mpe in range(loc_onset+1, loc_next+1): 302 | if a_mpe[ii_mpe][j] < thred_mpe: 303 | loc_mpe = ii_mpe-1 304 | flag_mpe = True 305 | time_mpe = loc_mpe * hop_sec 306 | break 307 | ''' 308 | pitch_value = int(j+self.config['midi']['note_min']) 309 | velocity_value = int(a_velocity[loc_onset][j]) 310 | 311 | if (flag_offset is False) and (flag_mpe is False): 312 | offset_value = float(time_next) 313 | elif (flag_offset is True) and (flag_mpe is False): 314 | offset_value = float(time_offset) 315 | elif (flag_offset is False) and (flag_mpe is True): 316 | offset_value = float(time_mpe) 317 | else: 318 | if mode_offset == 'offset': 319 | ## (a) offset 320 | offset_value = float(time_offset) 321 | elif mode_offset == 'longer': 322 | ## (b) longer 323 | if loc_offset >= loc_mpe: 324 | offset_value = float(time_offset) 325 | else: 326 | offset_value = float(time_mpe) 327 | else: 328 | ## (c) shorter 329 | if loc_offset <= loc_mpe: 330 | offset_value = float(time_offset) 331 | else: 332 | offset_value = float(time_mpe) 333 | if mode_velocity != 'ignore_zero': 334 | a_note.append({'pitch': pitch_value, 'onset': float(time_onset), 'offset': offset_value, 'velocity': velocity_value}) 335 | else: 336 | if velocity_value > 0: 337 | a_note.append({'pitch': pitch_value, 'onset': float(time_onset), 'offset': offset_value, 'velocity': velocity_value}) 338 | 339 | if (len(a_note) > 1) and \ 340 | (a_note[len(a_note)-1]['pitch'] == a_note[len(a_note)-2]['pitch']) and \ 341 | (a_note[len(a_note)-1]['onset'] < a_note[len(a_note)-2]['offset']): 342 | a_note[len(a_note)-2]['offset'] = a_note[len(a_note)-1]['onset'] 343 | 344 | a_note = sorted(sorted(a_note, key=lambda x: x['pitch']), key=lambda x: x['onset']) 345 | return a_note 346 | 347 | 348 | def note2midi(self, a_note, f_midi, min_length=0.): 349 | midi = pretty_midi.PrettyMIDI() 350 | instrument = pretty_midi.Instrument(program=0) 351 | for note in a_note: 352 | if note['offset'] - note['onset'] < min_length: 353 | continue 354 | instrument.notes.append(pretty_midi.Note(velocity=note['velocity'], pitch=note['pitch'], start=note['onset'], end=note['offset'])) 355 | midi.instruments.append(instrument) 356 | midi.write(f_midi) 357 | 358 | return 359 | -------------------------------------------------------------------------------- /models/hFT_Transformer/model_spec2midi.py: -------------------------------------------------------------------------------- 1 | #! python 2 | 3 | import torch 4 | import torch.nn as nn 5 | 6 | ## 7 | ## Model 8 | ## 9 | class Model_SPEC2MIDI(nn.Module): 10 | def __init__(self, encoder, decoder): 11 | super().__init__() 12 | self.encoder_spec2midi = encoder 13 | self.decoder_spec2midi = decoder 14 | 15 | def forward(self, input_spec): 16 | #input_spec = [batch_size, n_bin, margin+n_frame+margin] (8, 256, 192) 17 | #print('Model_SPEC2MIDI(0) input_spec: '+str(input_spec.shape)) 18 | 19 | enc_vector = self.encoder_spec2midi(input_spec) 20 | #enc_freq = [batch_size, n_frame, n_bin, hid_dim] (8, 128, 256, 256) 21 | #print('Model_SPEC2MIDI(1) enc_vector: '+str(enc_vector.shape)) 22 | 23 | output_onset_A, output_offset_A, output_mpe_A, output_velocity_A, attention, output_onset_B, output_offset_B, output_mpe_B, output_velocity_B = self.decoder_spec2midi(enc_vector) 24 | #output_onset_A = [batch_size, n_frame, n_note] (8, 128, 88) 25 | #output_onset_B = [batch_size, n_frame, n_note] (8, 128, 88) 26 | #output_velocity_A = [batch_size, n_frame, n_note, n_velocity] (8, 128, 88, 128) 27 | #output_velocity_B = [batch_size, n_frame, n_note, n_velocity] (8, 128, 88, 128) 28 | #attention = [batch_size, n_frame, n_heads, n_note, n_bin] (8, 128, 4, 88, 256) 29 | #print('Model_SPEC2MIDI(2) output_onset_A: '+str(output_onset_A.shape)) 30 | #print('Model_SPEC2MIDI(2) output_onset_B: '+str(output_onset_B.shape)) 31 | #print('Model_SPEC2MIDI(2) output_velocity_A: '+str(output_velocity_A.shape)) 32 | #print('Model_SPEC2MIDI(2) output_velocity_B: '+str(output_velocity_B.shape)) 33 | #print('Model_SPEC2MIDI(2) attention: '+str(attention.shape)) 34 | 35 | return output_onset_A, output_offset_A, output_mpe_A, output_velocity_A, attention, output_onset_B, output_offset_B, output_mpe_B, output_velocity_B 36 | 37 | 38 | ## 39 | ## Encoder 40 | ## 41 | class Encoder_SPEC2MIDI(nn.Module): 42 | def __init__(self, n_margin, n_frame, n_bin, cnn_channel, cnn_kernel, hid_dim, n_layers, n_heads, pf_dim, dropout, device): 43 | super().__init__() 44 | 45 | self.device = device 46 | self.n_frame = n_frame 47 | self.n_bin = n_bin 48 | self.cnn_channel = cnn_channel 49 | self.cnn_kernel = cnn_kernel 50 | self.hid_dim = hid_dim 51 | self.conv = nn.Conv2d(1, self.cnn_channel, kernel_size=(1, self.cnn_kernel)) 52 | self.n_proc = n_margin * 2 + 1 53 | self.cnn_dim = self.cnn_channel * (self.n_proc - (self.cnn_kernel - 1)) 54 | self.tok_embedding_freq = nn.Linear(self.cnn_dim, hid_dim) 55 | self.pos_embedding_freq = nn.Embedding(n_bin, hid_dim) 56 | self.layers_freq = nn.ModuleList([EncoderLayer(hid_dim, n_heads, pf_dim, dropout, device) for _ in range(n_layers)]) 57 | self.dropout = nn.Dropout(dropout) 58 | self.scale_freq = torch.sqrt(torch.FloatTensor([hid_dim])).to(device) 59 | 60 | def forward(self, spec_in): 61 | #spec_in = [batch_size, n_bin, n_margin+n_frame+n_margin] (8, 256, 192) (batch_size=8, n_bins=256, margin=32/n_frame=128) 62 | #print('Encoder_SPEC2MIDI(0) spec_in: '+str(spec_in.shape)) 63 | batch_size = spec_in.shape[0] 64 | 65 | spec = spec_in.unfold(2, self.n_proc, 1).permute(0, 2, 1, 3).contiguous() 66 | #spec = [batch_size, n_frame, n_bin, n_proc] (8, 128, 256, 65) (batch_size=8, n_frame=128, n_bins=256, n_proc=65) 67 | #print('Encoder_SPEC2MIDI(1) spec: '+str(spec.shape)) 68 | 69 | # CNN 1D 70 | spec_cnn = spec.reshape(batch_size*self.n_frame, self.n_bin, self.n_proc).unsqueeze(1) 71 | #spec = [batch_size*n_frame, 1, n_bin, n_proc] (8*128, 1, 256, 65) (batch_size=128, 1, n_frame, n_bins=256, n_proc=65) 72 | #print('Encoder_SPEC2MIDI(2) spec_cnn: '+str(spec_cnn.shape)) 73 | spec_cnn = self.conv(spec_cnn).permute(0, 2, 1, 3).contiguous() 74 | # spec_cnn: [batch_size*n_frame, n_bin, cnn_channel, n_proc-(cnn_kernel-1)] (8*128, 256, 4, 61) 75 | #print('Encoder_SPEC2MIDI(2) spec_cnn: '+str(spec_cnn.shape)) 76 | 77 | ## 78 | ## frequency 79 | ## 80 | spec_cnn_freq = spec_cnn.reshape(batch_size*self.n_frame, self.n_bin, self.cnn_dim) 81 | # spec_cnn_freq: [batch_size*n_frame, n_bin, cnn_channel, (n_proc)-(cnn_kernel-1)] (8*128, 256, 244) 82 | #print('Encoder_SPEC2MIDI(3) spec_cnn_freq: '+str(spec_cnn_freq.shape)) 83 | 84 | # embedding 85 | spec_emb_freq = self.tok_embedding_freq(spec_cnn_freq) 86 | # spec_emb_freq: [batch_size*n_frame, n_bin, hid_dim] (8*128, 256, 256) 87 | #print('Encoder_SPEC2MIDI(4) spec_emb_freq: '+str(spec_emb_freq.shape)) 88 | 89 | # position coding 90 | pos_freq = torch.arange(0, self.n_bin).unsqueeze(0).repeat(batch_size*self.n_frame, 1).to(self.device) 91 | #pos_freq = [batch_size, n_frame, n_bin] (8*128, 256) 92 | #print('Encoder_SPEC2MIDI(5) pos_freq: '+str(pos_freq.shape)) 93 | 94 | # embedding 95 | spec_freq = self.dropout((spec_emb_freq * self.scale_freq) + self.pos_embedding_freq(pos_freq)) 96 | #spec_freq = [batch_size*n_frame, n_bin, hid_dim] (8*128, 256, 256) 97 | #print('Encoder_SPEC2MIDI(6) spec_freq: '+str(spec_freq.shape)) 98 | 99 | # transformer encoder 100 | for layer_freq in self.layers_freq: 101 | spec_freq = layer_freq(spec_freq) 102 | spec_freq = spec_freq.reshape(batch_size, self.n_frame, self.n_bin, self.hid_dim) 103 | #spec_freq = [batch_size, n_frame, n_bin, hid_dim] (8, 128, 256, 256) 104 | #print('Encoder_SPEC2MIDI(7) spec_freq: '+str(spec_freq.shape)) 105 | 106 | return spec_freq 107 | 108 | 109 | ## 110 | ## Decoder 111 | ## 112 | class Decoder_SPEC2MIDI(nn.Module): 113 | def __init__(self, n_frame, n_bin, n_note, n_velocity, hid_dim, n_layers, n_heads, pf_dim, dropout, device): 114 | super().__init__() 115 | self.device = device 116 | self.n_note = n_note 117 | self.n_frame = n_frame 118 | self.n_velocity = n_velocity 119 | self.n_bin = n_bin 120 | self.hid_dim = hid_dim 121 | self.sigmoid = nn.Sigmoid() 122 | self.dropout = nn.Dropout(dropout) 123 | 124 | # CAfreq 125 | self.pos_embedding_freq = nn.Embedding(n_note, hid_dim) 126 | self.layer_zero_freq = DecoderLayer_Zero(hid_dim, n_heads, pf_dim, dropout, device) 127 | self.layers_freq = nn.ModuleList([DecoderLayer(hid_dim, n_heads, pf_dim, dropout, device) for _ in range(n_layers-1)]) 128 | 129 | self.fc_onset_freq = nn.Linear(hid_dim, 1) 130 | self.fc_offset_freq = nn.Linear(hid_dim, 1) 131 | self.fc_mpe_freq = nn.Linear(hid_dim, 1) 132 | self.fc_velocity_freq = nn.Linear(hid_dim, self.n_velocity) 133 | 134 | # SAtime 135 | self.scale_time = torch.sqrt(torch.FloatTensor([hid_dim])).to(device) 136 | self.pos_embedding_time = nn.Embedding(n_frame, hid_dim) 137 | #self.layers_time = nn.ModuleList([DecoderLayer(hid_dim, n_heads, pf_dim, dropout, device) for _ in range(n_layers)]) 138 | self.layers_time = nn.ModuleList([EncoderLayer(hid_dim, n_heads, pf_dim, dropout, device) for _ in range(n_layers)]) 139 | 140 | self.fc_onset_time = nn.Linear(hid_dim, 1) 141 | self.fc_offset_time = nn.Linear(hid_dim, 1) 142 | self.fc_mpe_time = nn.Linear(hid_dim, 1) 143 | self.fc_velocity_time = nn.Linear(hid_dim, self.n_velocity) 144 | 145 | def forward(self, enc_spec): 146 | batch_size = enc_spec.shape[0] 147 | enc_spec = enc_spec.reshape([batch_size*self.n_frame, self.n_bin, self.hid_dim]) 148 | #enc_spec = [batch_size*n_frame, n_bin, hid_dim] (8*128, 256, 256) 149 | #print('Decoder_SPEC2MIDI(0) enc_spec: '+str(enc_spec.shape)) 150 | 151 | ## 152 | ## CAfreq freq(256)/note(88) 153 | ## 154 | pos_freq = torch.arange(0, self.n_note).unsqueeze(0).repeat(batch_size*self.n_frame, 1).to(self.device) 155 | midi_freq = self.pos_embedding_freq(pos_freq) 156 | #pos_freq = [batch_size*n_frame, n_note] (8*128, 88) 157 | #midi_freq = [batch_size, n_note, hid_dim] (8*128, 88, 256) 158 | #print('Decoder_SPEC2MIDI(1) pos_freq: '+str(pos_freq.shape)) 159 | #print('Decoder_SPEC2MIDI(1) midi_freq: '+str(midi_freq.shape)) 160 | 161 | midi_freq, attention_freq = self.layer_zero_freq(enc_spec, midi_freq) 162 | for layer_freq in self.layers_freq: 163 | midi_freq, attention_freq = layer_freq(enc_spec, midi_freq) 164 | dim = attention_freq.shape 165 | attention_freq = attention_freq.reshape([batch_size, self.n_frame, dim[1], dim[2], dim[3]]) 166 | #midi_freq = [batch_size*n_frame, n_note, hid_dim] (8*128, 88, 256) 167 | #attention_freq = [batch_size, n_frame, n_heads, n_note, n_bin] (8, 128, 4, 88, 256) 168 | #print('Decoder_SPEC2MIDI(2) midi_freq: '+str(midi_freq.shape)) 169 | #print('Decoder_SPEC2MIDI(2) attention_freq: '+str(attention_freq.shape)) 170 | 171 | ## output(freq) 172 | output_onset_freq = self.sigmoid(self.fc_onset_freq(midi_freq).reshape([batch_size, self.n_frame, self.n_note])) 173 | output_offset_freq = self.sigmoid(self.fc_offset_freq(midi_freq).reshape([batch_size, self.n_frame, self.n_note])) 174 | output_mpe_freq = self.sigmoid(self.fc_mpe_freq(midi_freq).reshape([batch_size, self.n_frame, self.n_note])) 175 | output_velocity_freq = self.fc_velocity_freq(midi_freq).reshape([batch_size, self.n_frame, self.n_note, self.n_velocity]) 176 | #output_onset_freq = [batch_size, n_frame, n_note] (8, 128, 88) 177 | #output_offset_freq = [batch_size, n_frame, n_note] (8, 128, 88) 178 | #output_mpe_freq = [batch_size, n_frame, n_note] (8, 128, 88) 179 | #output_velocity_freq = [batch_size, n_frame, n_note, n_velocity] (8, 128, 88, 128) 180 | #print('Decoder_SPEC2MIDI(3) output_onset_freq: '+str(output_onset_freq.shape)) 181 | #print('Decoder_SPEC2MIDI(3) output_offset_freq: '+str(output_offset_freq.shape)) 182 | #print('Decoder_SPEC2MIDI(3) output_mpe_freq: '+str(output_mpe_freq.shape)) 183 | #print('Decoder_SPEC2MIDI(3) output_velocity_freq: '+str(output_velocity_freq.shape)) 184 | 185 | ## 186 | ## SAtime time(64) 187 | ## 188 | #midi_time: [batch_size*n_frame, n_note, hid_dim] -> [batch_size*n_note, n_frame, hid_dim] 189 | midi_time = midi_freq.reshape([batch_size, self.n_frame, self.n_note, self.hid_dim]).permute(0, 2, 1, 3).contiguous().reshape([batch_size*self.n_note, self.n_frame, self.hid_dim]) 190 | pos_time = torch.arange(0, self.n_frame).unsqueeze(0).repeat(batch_size*self.n_note, 1).to(self.device) 191 | midi_time = self.dropout((midi_time * self.scale_time) + self.pos_embedding_time(pos_time)) 192 | #pos_time = [batch_size*n_note, n_frame] (8*88, 128) 193 | #midi_time = [batch_size*n_note, n_frame, hid_dim] (8*88, 128, 256) 194 | #print('Decoder_SPEC2MIDI(4) pos_time: '+str(pos_time.shape)) 195 | #print('Decoder_SPEC2MIDI(4) midi_time: '+str(midi_time.shape)) 196 | 197 | for layer_time in self.layers_time: 198 | midi_time = layer_time(midi_time) 199 | #midi_time = [batch_size*n_note, n_frame, hid_dim] (8*88, 128, 256) 200 | #print('Decoder_SPEC2MIDI(5) midi_time: '+str(midi_time.shape)) 201 | 202 | ## output(time) 203 | output_onset_time = self.sigmoid(self.fc_onset_time(midi_time).reshape([batch_size, self.n_note, self.n_frame]).permute(0, 2, 1).contiguous()) 204 | output_offset_time = self.sigmoid(self.fc_offset_time(midi_time).reshape([batch_size, self.n_note, self.n_frame]).permute(0, 2, 1).contiguous()) 205 | output_mpe_time = self.sigmoid(self.fc_mpe_time(midi_time).reshape([batch_size, self.n_note, self.n_frame]).permute(0, 2, 1).contiguous()) 206 | output_velocity_time = self.fc_velocity_time(midi_time).reshape([batch_size, self.n_note, self.n_frame, self.n_velocity]).permute(0, 2, 1, 3).contiguous() 207 | #output_onset_time = [batch_size, n_frame, n_note] (8, 128, 88) 208 | #output_offset_time = [batch_size, n_frame, n_note] (8, 128, 88) 209 | #output_mpe_time = [batch_size, n_frame, n_note] (8, 128, 88) 210 | #output_velocity_time = [batch_size, n_frame, n_note, n_velocity] (8, 128, 88, 128) 211 | #print('Decoder_SPEC2MIDI(6) output_onset_time: '+str(output_onset_time.shape)) 212 | #print('Decoder_SPEC2MIDI(6) output_offset_time: '+str(output_offset_time.shape)) 213 | #print('Decoder_SPEC2MIDI(6) output_mpe_time: '+str(output_mpe_time.shape)) 214 | #print('Decoder_SPEC2MIDI(6) output_velocity_time: '+str(output_velocity_time.shape)) 215 | 216 | return output_onset_freq, output_offset_freq, output_mpe_freq, output_velocity_freq, attention_freq, output_onset_time, output_offset_time, output_mpe_time, output_velocity_time 217 | 218 | 219 | ## 220 | ## sub functions 221 | ## 222 | class EncoderLayer(nn.Module): 223 | def __init__(self, hid_dim, n_heads, pf_dim, dropout, device): 224 | super().__init__() 225 | self.layer_norm = nn.LayerNorm(hid_dim) 226 | self.self_attention = MultiHeadAttentionLayer(hid_dim, n_heads, dropout, device) 227 | self.positionwise_feedforward = PositionwiseFeedforwardLayer(hid_dim, pf_dim, dropout) 228 | self.dropout = nn.Dropout(dropout) 229 | 230 | def forward(self, src): 231 | #src = [batch_size, src_len, hid_dim] 232 | 233 | #self attention 234 | _src, _ = self.self_attention(src, src, src) 235 | #dropout, residual connection and layer norm 236 | src = self.layer_norm(src + self.dropout(_src)) 237 | #src = [batch_size, src_len, hid_dim] 238 | 239 | #positionwise feedforward 240 | _src = self.positionwise_feedforward(src) 241 | #dropout, residual and layer norm 242 | src = self.layer_norm(src + self.dropout(_src)) 243 | #src = [batch_size, src_len, hid_dim] 244 | 245 | return src 246 | 247 | class DecoderLayer_Zero(nn.Module): 248 | def __init__(self, hid_dim, n_heads, pf_dim, dropout, device): 249 | super().__init__() 250 | self.layer_norm = nn.LayerNorm(hid_dim) 251 | self.encoder_attention = MultiHeadAttentionLayer(hid_dim, n_heads, dropout, device) 252 | self.positionwise_feedforward = PositionwiseFeedforwardLayer(hid_dim, pf_dim, dropout) 253 | self.dropout = nn.Dropout(dropout) 254 | 255 | def forward(self, enc_src, trg): 256 | #trg = [batch_size, trg_len, hid_dim] 257 | #enc_src = [batch_size, src_len, hid_dim] 258 | 259 | #encoder attention 260 | _trg, attention = self.encoder_attention(trg, enc_src, enc_src) 261 | #dropout, residual connection and layer norm 262 | trg = self.layer_norm(trg + self.dropout(_trg)) 263 | #trg = [batch_size, trg_len, hid_dim] 264 | 265 | #positionwise feedforward 266 | _trg = self.positionwise_feedforward(trg) 267 | #dropout, residual and layer norm 268 | trg = self.layer_norm(trg + self.dropout(_trg)) 269 | #trg = [batch_size, trg_len, hid_dim] 270 | #attention = [batch_size, n_heads, trg_len, src_len] 271 | 272 | return trg, attention 273 | 274 | class DecoderLayer(nn.Module): 275 | def __init__(self, hid_dim, n_heads, pf_dim, dropout, device): 276 | super().__init__() 277 | self.layer_norm = nn.LayerNorm(hid_dim) 278 | self.self_attention = MultiHeadAttentionLayer(hid_dim, n_heads, dropout, device) 279 | self.encoder_attention = MultiHeadAttentionLayer(hid_dim, n_heads, dropout, device) 280 | self.positionwise_feedforward = PositionwiseFeedforwardLayer(hid_dim, pf_dim, dropout) 281 | self.dropout = nn.Dropout(dropout) 282 | 283 | def forward(self, enc_src, trg): 284 | #trg = [batch_size, trg_len, hid_dim] 285 | #enc_src = [batch_size, src_len, hid_dim] 286 | 287 | #self attention 288 | _trg, _ = self.self_attention(trg, trg, trg) 289 | #dropout, residual connection and layer norm 290 | trg = self.layer_norm(trg + self.dropout(_trg)) 291 | #trg = [batch_size, trg_len, hid_dim] 292 | 293 | #encoder attention 294 | _trg, attention = self.encoder_attention(trg, enc_src, enc_src) 295 | #dropout, residual connection and layer norm 296 | trg = self.layer_norm(trg + self.dropout(_trg)) 297 | #trg = [batch_size, trg_len, hid_dim] 298 | 299 | #positionwise feedforward 300 | _trg = self.positionwise_feedforward(trg) 301 | #dropout, residual and layer norm 302 | trg = self.layer_norm(trg + self.dropout(_trg)) 303 | #trg = [batch_size, trg_len, hid_dim] 304 | #attention = [batch_size, n_heads, trg_len, src_len] 305 | 306 | return trg, attention 307 | 308 | class MultiHeadAttentionLayer(nn.Module): 309 | def __init__(self, hid_dim, n_heads, dropout, device): 310 | super().__init__() 311 | assert hid_dim % n_heads == 0 312 | self.hid_dim = hid_dim 313 | self.n_heads = n_heads 314 | self.head_dim = hid_dim // n_heads 315 | self.fc_q = nn.Linear(hid_dim, hid_dim) 316 | self.fc_k = nn.Linear(hid_dim, hid_dim) 317 | self.fc_v = nn.Linear(hid_dim, hid_dim) 318 | self.fc_o = nn.Linear(hid_dim, hid_dim) 319 | self.dropout = nn.Dropout(dropout) 320 | self.scale = torch.sqrt(torch.FloatTensor([self.head_dim])).to(device) 321 | 322 | def forward(self, query, key, value): 323 | batch_size = query.shape[0] 324 | #query = [batch_size, query_len, hid_dim] 325 | #key = [batch_size, key_len, hid_dim] 326 | #value = [batch_size, value_len, hid_dim] 327 | 328 | Q = self.fc_q(query) 329 | K = self.fc_k(key) 330 | V = self.fc_v(value) 331 | #Q = [batch_size, query_len, hid_dim] 332 | #K = [batch_size, key_len, hid_dim] 333 | #V = [batch_size, value_len, hid_dim] 334 | 335 | Q = Q.view(batch_size, -1, self.n_heads, self.head_dim).permute(0, 2, 1, 3) 336 | K = K.view(batch_size, -1, self.n_heads, self.head_dim).permute(0, 2, 1, 3) 337 | V = V.view(batch_size, -1, self.n_heads, self.head_dim).permute(0, 2, 1, 3) 338 | #Q = [batch_size, n_heads, query_len, head_dim] 339 | #K = [batch_size, n_heads, key_len, head_dim] 340 | #V = [batch_size, n_heads, value_len, head_dim] 341 | 342 | energy = torch.matmul(Q, K.permute(0, 1, 3, 2)) / self.scale 343 | #energy = [batch_size, n_heads, seq len, seq len] 344 | 345 | attention = torch.softmax(energy, dim = -1) 346 | #attention = [batch_size, n_heads, query_len, key_len] 347 | 348 | x = torch.matmul(self.dropout(attention), V) 349 | #x = [batch_size, n_heads, seq len, head_dim] 350 | 351 | x = x.permute(0, 2, 1, 3).contiguous() 352 | #x = [batch_size, seq_len, n_heads, head_dim] 353 | 354 | x = x.view(batch_size, -1, self.hid_dim) 355 | #x = [batch_size, seq_len, hid_dim] 356 | 357 | x = self.fc_o(x) 358 | #x = [batch_size, seq_len, hid_dim] 359 | 360 | return x, attention 361 | 362 | class PositionwiseFeedforwardLayer(nn.Module): 363 | def __init__(self, hid_dim, pf_dim, dropout): 364 | super().__init__() 365 | self.fc_1 = nn.Linear(hid_dim, pf_dim) 366 | self.fc_2 = nn.Linear(pf_dim, hid_dim) 367 | self.dropout = nn.Dropout(dropout) 368 | 369 | def forward(self, x): 370 | #x = [batch_size, seq_len, hid_dim] 371 | 372 | x = self.dropout(torch.relu(self.fc_1(x))) 373 | #x = [batch_size, seq_len, pf dim] 374 | 375 | x = self.fc_2(x) 376 | #x = [batch_size, seq_len, hid_dim] 377 | 378 | return x 379 | -------------------------------------------------------------------------------- /models/params/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/misya11p/amt-apc/f78ae99a11579e1bedd2a65c089cfcabe296b66f/models/params/.gitkeep -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | dlprog==1.2.8 2 | essentia==2.1b6.dev1110 3 | librosa==0.10.2.post1 4 | midi2audio==0.1.1 5 | pytsmod==0.3.8 6 | soundfile==0.13.1 7 | synctoolbox==1.3.2 8 | torch==2.3.1 9 | torchaudio==2.3.1 10 | tqdm==4.66.5 11 | yt-dlp==2024.8.6 12 | -------------------------------------------------------------------------------- /train/__init__.py: -------------------------------------------------------------------------------- 1 | from ._loss import loss_fn 2 | -------------------------------------------------------------------------------- /train/__main__.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | 4 | import torch 5 | import torch.multiprocessing as mp 6 | 7 | from _trainer import Trainer 8 | from data import PianoCoversDataset 9 | 10 | 11 | DEVICE_CUDA = torch.device("cuda") 12 | 13 | 14 | def main(args): 15 | print("Start training.") 16 | 17 | os.environ["MASTER_ADDR"] = "localhost" 18 | os.environ["MASTER_PORT"] = "12355" 19 | 20 | dataset = PianoCoversDataset(split=args.split) 21 | print(f"Dataset split: {args.split}.", flush=True) 22 | print(f"Number of samples: {len(dataset)}.", flush=True) 23 | 24 | trainer = Trainer( 25 | path_model=args.path_model, 26 | dataset=dataset, 27 | n_gpus=args.n_gpus, 28 | with_sv=not args.no_sv, 29 | no_load=args.no_load, 30 | freq_save=args.freq_save, 31 | ) 32 | if args.n_gpus >= 2: 33 | print(f"Number of GPUs: {args.n_gpus}, using DDP.", flush=True) 34 | mp.spawn( 35 | trainer, 36 | nprocs=args.n_gpus, 37 | join=True, 38 | ) 39 | else: 40 | print("Number of GPUs: 1", flush=True) 41 | trainer(DEVICE_CUDA) 42 | 43 | 44 | if __name__ == "__main__": 45 | parser = argparse.ArgumentParser("Train the model.") 46 | parser.add_argument("--path_model", type=str, default=None, help="Path to the base model. Defaults to CONFIG.PATH.AMT.") 47 | parser.add_argument("--n_gpus", type=int, default=2, help="Number of GPUs to use. Defaults to 2.") 48 | parser.add_argument("--split", type=str, default="train", help="Dataset split to use: 'train' or 'test' or 'all'. Defaults to 'train'.") 49 | parser.add_argument("--no_sv", action="store_true", help="Do not use the style vector.") 50 | parser.add_argument("--no_load", action="store_true", help="Do not load the base model.") 51 | parser.add_argument("--freq_save", type=int, default=100, help="Frequency to save the model and logs. Defaults to 100.") 52 | args = parser.parse_args() 53 | main(args) 54 | -------------------------------------------------------------------------------- /train/_loss.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | import sys 3 | 4 | ROOT = Path(__file__).resolve().parent.parent 5 | sys.path.append(str(ROOT)) 6 | 7 | import torch 8 | import torch.nn as nn 9 | from sklearn.metrics import f1_score 10 | 11 | from utils import config 12 | 13 | 14 | THRESHOLD = 0.5 15 | BCE_LOSS = nn.BCELoss() 16 | CE_LOSS = nn.CrossEntropyLoss() 17 | BETA = config.train.beta 18 | THETA_ONSET = config.train.theta_onset 19 | THETA_FRAME = config.train.theta_frame 20 | THETA_VELOCITY = config.train.theta_velocity 21 | 22 | 23 | def f1_fn( 24 | onset_pred, 25 | frame_pred, 26 | velocity_pred, 27 | onset_label, 28 | frame_label, 29 | velocity_label 30 | ): 31 | onset_pred = onset_pred.cpu().numpy().flatten() 32 | frame_pred = frame_pred.cpu().numpy().flatten() 33 | velocity_pred = velocity_pred.cpu().numpy().flatten() 34 | onset_label = onset_label.cpu().numpy().flatten() 35 | frame_label = frame_label.cpu().numpy().flatten() 36 | velocity_label = velocity_label.cpu().numpy().flatten() 37 | f1_onset = f1_score(onset_label, onset_pred, zero_division=1) 38 | f1_frame = f1_score(frame_label, frame_pred, zero_division=1) 39 | f1_velocity = f1_score(velocity_label, velocity_pred, zero_division=1) 40 | return f1_onset, f1_frame, f1_velocity 41 | 42 | 43 | def extract(label, prob=0.): 44 | idx_pos = (label > 0) 45 | shifted_p = torch.roll(idx_pos, shifts=1, dims=-1) 46 | shifted_n = torch.roll(idx_pos, shifts=-1, dims=-1) 47 | idx_rand = torch.rand(idx_pos.shape).to(idx_pos.device) < prob 48 | idx = idx_pos | shifted_p | shifted_n | idx_rand 49 | return idx 50 | 51 | 52 | def loss_fn(pred, label): 53 | # Unpack 54 | onset_pred_f, offset_pred_f, frame_pred_f, velocity_pred_f, _, \ 55 | onset_pred_t, offset_pred_t, frame_pred_t, velocity_pred_t = pred 56 | 57 | onset_label, offset_label, frame_label, velocity_label = label 58 | frame_label = frame_label.float() 59 | 60 | # Calculate F1 61 | with torch.no_grad(): 62 | f1 = f1_fn( 63 | (onset_pred_t > THRESHOLD), 64 | (frame_pred_t > THRESHOLD), 65 | velocity_pred_t.argmax(dim=-1).bool(), 66 | onset_label.bool(), 67 | frame_label.bool(), 68 | velocity_label.bool() 69 | ) 70 | 71 | # Extract the position where the sound is occurring and its surroundings. 72 | onset_idx = extract(onset_label, prob=THETA_ONSET) 73 | onset_pred_f = onset_pred_f[onset_idx] 74 | onset_pred_t = onset_pred_t[onset_idx] 75 | onset_label = onset_label[onset_idx] 76 | 77 | frame_idx = extract(frame_label, prob=THETA_FRAME) 78 | frame_pred_f = frame_pred_f[frame_idx] 79 | frame_pred_t = frame_pred_t[frame_idx] 80 | frame_label = frame_label[frame_idx] 81 | 82 | velocity_idx = extract(velocity_label, prob=THETA_VELOCITY) 83 | velocity_pred_f = velocity_pred_f[velocity_idx] 84 | velocity_pred_t = velocity_pred_t[velocity_idx] 85 | velocity_label = velocity_label[velocity_idx] 86 | 87 | # Calculate loss 88 | loss_onset_f = BCE_LOSS(onset_pred_f, onset_label) 89 | loss_onset_t = BCE_LOSS(onset_pred_t, onset_label) 90 | 91 | loss_frame_f = BCE_LOSS(frame_pred_f, frame_label) 92 | loss_frame_t = BCE_LOSS(frame_pred_t, frame_label) 93 | 94 | loss_velocity_f = CE_LOSS(velocity_pred_f, velocity_label) 95 | loss_velocity_t = CE_LOSS(velocity_pred_t, velocity_label) 96 | 97 | loss_f = (loss_onset_f + loss_frame_f + loss_velocity_f) / 3 98 | loss_t = (loss_onset_t + loss_frame_t + loss_velocity_t) / 3 99 | loss = BETA * loss_f + (1 - BETA) * loss_t 100 | 101 | return loss, f1 -------------------------------------------------------------------------------- /train/_trainer.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | import sys 3 | from datetime import datetime 4 | 5 | ROOT = Path(__file__).resolve().parent.parent 6 | sys.path.append(str(ROOT)) 7 | 8 | import torch 9 | import torch.optim as optim 10 | from torch.utils.data import DataLoader 11 | import torch.distributed as dist 12 | from torch.nn.parallel import DistributedDataParallel as DDP 13 | from torch.utils.data.distributed import DistributedSampler 14 | import dlprog 15 | 16 | from models import load_model, save_model 17 | from train._loss import loss_fn 18 | from utils import config 19 | 20 | 21 | DIR_CHECKPOINTS = ROOT / config.path.checkpoints 22 | NAME_FILE_LOG = "log.txt" 23 | PROG_LABELS = ["loss", "F1 avg", "F1 onset", "F1 frame", "F1 velocity"] 24 | 25 | 26 | def train( 27 | model: torch.nn.Module, 28 | optimizer: torch.optim.Optimizer, 29 | dataloader: DataLoader, 30 | device: int | torch.device, 31 | freq_save: int = 0, 32 | prog: dlprog.Progress = None, 33 | file_log: Path = None, 34 | ) -> None: 35 | """ 36 | Training loop in one epoch. 37 | 38 | Args: 39 | model (torch.nn.Module): Model to train. 40 | optimizer (torch.optim.Optimizer): Optimizer to use. 41 | dataloader (DataLoader): DataLoader to use. 42 | device (int | torch.device): Device to use. 43 | freq_save (int, optional): Frequency to save the model. 44 | prog (dlprog.Progress, optional): Progress bar. 45 | file_log (Path, optional): Path to the log file. 46 | """ 47 | model.train() 48 | 49 | for i, batch in enumerate(dataloader, 1): 50 | optimizer.zero_grad() 51 | spec, sv, onset, offset, frame, velocity = batch 52 | spec = spec.to(device) 53 | sv = sv.to(device) 54 | onset = onset.to(device) 55 | offset = offset.to(device) 56 | frame = frame.to(device) 57 | velocity = velocity.to(device) 58 | 59 | pred = model(spec, sv) 60 | label = onset, offset, frame, velocity 61 | loss, f1 = loss_fn(pred, label) 62 | loss.backward() 63 | optimizer.step() 64 | 65 | if prog is not None: 66 | prog.update([loss.item(), sum(f1) / 3, *f1]) 67 | 68 | if freq_save and (i % freq_save == 0): 69 | save_model(model, DIR_CHECKPOINTS / "latest.pth") 70 | loss, f1, f1_onset, f1_frame, f1_velocity = prog.now_values() 71 | with open(file_log, "a") as f: 72 | f.write( 73 | f"{i}iter, loss: {loss:.3f}, F1 avg: {f1:.3f}, " 74 | f"F1 onset: {f1_onset:.3f}, " 75 | f"F1 frame: {f1_frame:.3f}, " 76 | f"F1 velocity: {f1_velocity:.3f}\n" 77 | ) 78 | 79 | 80 | class Trainer: 81 | def __init__( 82 | self, 83 | path_model: str, 84 | dataset: torch.utils.data.Dataset, 85 | n_gpus: int, 86 | with_sv: bool, 87 | no_load: bool, 88 | freq_save: int, 89 | ): 90 | """ 91 | Trainer for calling in DDP. 92 | 93 | Args: 94 | path_model (str): Path to the base model. 95 | dataset (torch.utils.data.Dataset): Dataset to use. 96 | n_gpus (int): Number of GPUs to use. 97 | with_sv (bool): Whether to use the style vector. 98 | no_load (bool): Do not use the base model. 99 | freq_save (int): Frequency to save the model. 100 | """ 101 | self.path_model = path_model 102 | self.dataset = dataset 103 | self.n_gpus = n_gpus 104 | self.batch_size = config.train.batch_size 105 | self.n_epochs = config.train.n_epochs 106 | self.ddp = (n_gpus >= 2) 107 | self.with_sv = with_sv 108 | self.no_load = no_load 109 | self.freq_save = freq_save 110 | 111 | def setup(self, device: int | torch.device) -> None: 112 | """Setup the model, optimizer, and dataloader.""" 113 | model = load_model( 114 | path_model=self.path_model, 115 | device=device, 116 | amt=True, 117 | with_sv=self.with_sv, 118 | no_load=self.no_load, 119 | ) 120 | if self.ddp: 121 | dist.init_process_group("nccl", rank=device, world_size=self.n_gpus) 122 | model = DDP(model, device_ids=[device]) 123 | self.model = torch.compile(model) 124 | self.optimizer = optim.Adam(model.parameters(), lr=config.train.lr) 125 | torch.set_float32_matmul_precision("high") 126 | if self.ddp: 127 | self.sampler = DistributedSampler( 128 | self.dataset, 129 | num_replicas=self.n_gpus, 130 | rank=device, 131 | shuffle=True, 132 | ) 133 | else: 134 | self.sampler = None 135 | self.dataloader = DataLoader( 136 | self.dataset, 137 | batch_size=config.train.batch_size, 138 | sampler=self.sampler, 139 | shuffle=(self.sampler is None), 140 | ) 141 | if self.is_parent: 142 | date = datetime.now().strftime("%Y-%m%d-%H%M%S") 143 | self.dir_checkpoint = DIR_CHECKPOINTS / date 144 | self.dir_checkpoint.mkdir(parents=True) 145 | self.file_log = self.dir_checkpoint / NAME_FILE_LOG 146 | self.prog = dlprog.train_progress(width=20, label=PROG_LABELS, round=3) 147 | self.prog.start(n_epochs=self.n_epochs, n_iter=len(self.dataloader)) 148 | print(f"Checkpoint directory: {self.dir_checkpoint}") 149 | print("Setup done.") 150 | else: 151 | self.prog = None 152 | self.file_log = None 153 | 154 | def __call__(self, device: int | torch.device) -> None: 155 | """Training loop.""" 156 | self.is_parent = (not self.ddp) or (device == 0) 157 | self.setup(device) 158 | 159 | for n in range(self.n_epochs): 160 | if self.ddp: 161 | self.sampler.set_epoch(n) 162 | train( 163 | model=self.model, 164 | optimizer=self.optimizer, 165 | dataloader=self.dataloader, 166 | device=device, 167 | freq_save=self.freq_save if self.is_parent else 0, 168 | prog=self.prog, 169 | file_log=self.file_log, 170 | ) 171 | 172 | if self.is_parent: 173 | loss, f1, f1_onset, f1_frame, f1_velocity = self.prog.values[-1] 174 | path_pc_epoch = self.dir_checkpoint / f"{n + 1}.pth" 175 | save_model(self.model, path_pc_epoch) 176 | with open(self.file_log, "a") as f: 177 | time = datetime.now().strftime("%Y/%m/%d %H:%M") 178 | f.write( 179 | f"{time}, epoch {n + 1} Finished, " 180 | f"loss: {loss:.3f}, F1 avg: {f1:.3f}, " 181 | f"F1 onset: {f1_onset:.3f}, " 182 | f"F1 frame: {f1_frame:.3f}, " 183 | f"F1 velocity: {f1_velocity:.3f}\n" 184 | ) 185 | -------------------------------------------------------------------------------- /utils/__init__.py: -------------------------------------------------------------------------------- 1 | from ._config import config 2 | from ._info import info 3 | -------------------------------------------------------------------------------- /utils/_config.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | import json 3 | from typing import Dict, Any 4 | 5 | 6 | class CustomDict(dict): 7 | def __init__(self, config: Dict): 8 | super().__init__(config) 9 | 10 | def __getattr__(self, name: str) -> Dict | Any: 11 | value = self[name] 12 | if isinstance(value, dict): 13 | return CustomDict(value) 14 | else: 15 | return value 16 | 17 | def __getitem__(self, key: Any) -> Any: 18 | item = super().__getitem__(key) 19 | if isinstance(item, dict): 20 | return CustomDict(item) 21 | else: 22 | return item 23 | 24 | 25 | root = Path(__file__).parent.parent 26 | path_config = root / "config.json" 27 | with open(path_config, "r") as f: 28 | config_json = json.load(f) 29 | config = CustomDict(config_json) 30 | -------------------------------------------------------------------------------- /utils/_info.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | import json 3 | from typing import Dict, Any 4 | 5 | 6 | from ._config import config, CustomDict 7 | 8 | 9 | ROOT = Path(__file__).parent.parent 10 | PATH_DATASET = ROOT / config.path.dataset 11 | PATH_MOVIES = ROOT / config.path.src 12 | 13 | 14 | class Info: 15 | def __init__(self, path: Path): 16 | self.path = path 17 | if path.exists(): 18 | with open(path, "r") as f: 19 | self.data = json.load(f) 20 | else: 21 | self.data = {} 22 | with open(path, "w") as f: 23 | json.dump(self.data, f) 24 | for id in self.data: 25 | self.data[id] = CustomDict(self.data[id]) 26 | self._set_id2path() 27 | 28 | def _set_id2path(self): 29 | id2path = {} 30 | for id_piano, info in self.data.items(): 31 | id_orig = info["original"] 32 | title = info["title"] 33 | if id_orig not in id2path: 34 | id2path[id_orig] = { 35 | "raw": PATH_DATASET / "raw" / title / f"{id_orig}.wav", 36 | "synced": { 37 | "wav": PATH_DATASET / "synced" / title / f"{id_orig}.wav", 38 | "midi": PATH_DATASET / "synced" / title / f"{id_orig}.mid", 39 | }, 40 | "array": PATH_DATASET / "array" / title / f"{id_orig}.npy", 41 | } 42 | id2path[id_piano] = { 43 | "raw": PATH_DATASET / "raw" / title / "piano" / f"{id_piano}.wav", 44 | "synced": { 45 | "wav": PATH_DATASET / "synced" / title / "piano" / f"{id_piano}.wav", 46 | "midi": PATH_DATASET / "synced" / title / "piano" / f"{id_piano}.mid", 47 | }, 48 | "array": PATH_DATASET / "array" / title / "piano" / f"{id_piano}.npz", 49 | } 50 | self._id2path = CustomDict(id2path) 51 | 52 | def __getitem__(self, id: str): 53 | return self.data[id] 54 | 55 | def set(self, id: str, key: str, value: Any, save: bool = True): 56 | if id not in self.data: 57 | self.data[id] = {} 58 | self.data[id][key] = value 59 | if save: 60 | with open(self.path, "w") as f: 61 | json.dump(self.data, f, indent=2, ensure_ascii=False) 62 | 63 | def update(self, id: str, values: Dict, save: bool = True): 64 | if id not in self.data: 65 | self.data[id] = {} 66 | self.data[id].update(values) 67 | if save: 68 | with open(self.path, "w") as f: 69 | json.dump(self.data, f, indent=2, ensure_ascii=False) 70 | 71 | def export(self): 72 | movies = {} 73 | for id, info in self.data.items(): 74 | if not info["include_dataset"]: 75 | continue 76 | 77 | title = info["title"] 78 | if title not in movies: 79 | movies[title] = { 80 | "original": info["original"], 81 | "pianos": [] 82 | } 83 | movies[title]["pianos"].append(id) 84 | 85 | with open(PATH_MOVIES, "w") as f: 86 | json.dump(movies, f, indent=2, ensure_ascii=False) 87 | 88 | def piano2orig(self, id: str): 89 | return self[id].original 90 | 91 | def is_train(self, id: str): 92 | return (self[id].split == "train") 93 | 94 | def is_test(self, id: str): 95 | return (self[id].split == "test") 96 | 97 | def id2path(self, id: str, orig: bool = False): 98 | if orig: 99 | return self._id2path[self.piano2orig(id)] 100 | else: 101 | return self._id2path[id] 102 | 103 | def get_ids(self, split: str, orig: bool = False): 104 | ids = [id for id, info in self.data.items() if info["split"] == split] 105 | if orig: 106 | ids = list(set([self.piano2orig(id) for id in ids])) 107 | return ids 108 | 109 | 110 | root = Path(__file__).resolve().parent.parent 111 | info = Info(root / config.path.info) 112 | --------------------------------------------------------------------------------