├── src ├── conf │ ├── __init__.py │ ├── urmp.py │ ├── feature.py │ ├── inference.py │ ├── sample.py │ └── models.py ├── dataset │ ├── __init__.py │ └── urmp │ │ ├── urmp_test.py │ │ ├── urmp_generate_dataset.py │ │ ├── urmp_feature.py │ │ └── urmp_sample.py ├── utils │ ├── __init__.py │ ├── multiEpochsDataLoader.py │ ├── target_process.py │ ├── weiMidi.py │ └── utilities.py ├── __init__.py ├── models │ ├── model_factory.py │ └── layers.py ├── analyze │ ├── utilities.py │ └── draw_table.py └── inference │ ├── compute_measure.py │ └── utilities.py ├── scripts ├── draw.sh ├── generate_dataset.sh ├── generate_feature.sh ├── evaluate.sh ├── train-model.sh ├── synthesis.sh ├── evaluate-model.sh └── clean_packed_data.sh ├── songs └── road.mid ├── data └── urmp │ ├── Trombone │ └── train.lst │ ├── Clarinet │ └── train.lst │ ├── Viola │ └── train.lst │ ├── Cello │ └── train.lst │ ├── Saxophone │ └── train.lst │ ├── Flute │ └── train.lst │ ├── Trumpet │ └── train.lst │ ├── Violin │ └── train.lst │ ├── Vn_Fl_Tpt │ └── train-query.lst.bk │ └── testset │ ├── query.lst │ └── test.lst ├── imgs ├── model-fig-3.png └── model-fig-1-ab.png ├── .gitignore ├── run.sh ├── requirements.txt ├── LICENSE ├── synthesis.py ├── evaluate.py ├── evaluation ├── MSI-DIS │ ├── scores-190.json │ ├── scores-191.json │ ├── scores-198.json │ ├── scores-199.json │ ├── scores-192.json │ ├── scores-193.json │ ├── scores-194.json │ ├── scores-195.json │ ├── scores-196.json │ └── scores-197.json └── MSI │ ├── scores-192.json │ ├── scores-197.json │ ├── scores-190.json │ ├── scores-198.json │ ├── scores-193.json │ ├── scores-195.json │ ├── scores-199.json │ ├── scores-191.json │ ├── scores-194.json │ └── scores-196.json ├── README.md └── train.py /src/conf/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/dataset/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /scripts/draw.sh: -------------------------------------------------------------------------------- 1 | python src/analyze/draw_table.py --evaluation_folder=evaluation 2 | -------------------------------------------------------------------------------- /scripts/generate_dataset.sh: -------------------------------------------------------------------------------- 1 | python src/dataset/urmp/urmp_generate_dataset.py --feature_dir=dataset/hdf5s/urmp --data_dir=data/urmp-rec 2 | -------------------------------------------------------------------------------- /scripts/generate_feature.sh: -------------------------------------------------------------------------------- 1 | python src/dataset/urmp/urmp_feature.py --dataset_dir=dataset/URMP/Dataset --feature_dir=dataset/hdf5s/urmp --process_num=8 2 | -------------------------------------------------------------------------------- /songs/road.mid: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kikyo-16/A-unified-model-for-zero-shot-musical-source-separation-transcription-and-synthesis/HEAD/songs/road.mid -------------------------------------------------------------------------------- /data/urmp/Trombone/train.lst: -------------------------------------------------------------------------------- 1 | dataset/hdf5s/urmp/07_GString_tpt_tbn/AuSep_2_tbn_07_GString.h5 2 | dataset/hdf5s/urmp/15_Surprise_tpt_tpt_tbn/AuSep_3_tbn_15_Surprise.h5 -------------------------------------------------------------------------------- /imgs/model-fig-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kikyo-16/A-unified-model-for-zero-shot-musical-source-separation-transcription-and-synthesis/HEAD/imgs/model-fig-3.png -------------------------------------------------------------------------------- /scripts/evaluate.sh: -------------------------------------------------------------------------------- 1 | cuda_id=0 2 | evaluate_epoch=199 3 | 4 | # evaluate MSI-DIS model 5 | sh scripts/evaluate-model.sh $cuda_id MSI $evaluate_epoch save_model/MSI 8 6 | -------------------------------------------------------------------------------- /imgs/model-fig-1-ab.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Kikyo-16/A-unified-model-for-zero-shot-musical-source-separation-transcription-and-synthesis/HEAD/imgs/model-fig-1-ab.png -------------------------------------------------------------------------------- /scripts/train-model.sh: -------------------------------------------------------------------------------- 1 | cuda=$1 2 | model_name=$2 3 | model_folder=$3 4 | CUDA_VISIBLE_DEVICES=$cuda python train.py --model_name=$model_name --model_folder=$model_folder --epoch=200 5 | -------------------------------------------------------------------------------- /scripts/synthesis.sh: -------------------------------------------------------------------------------- 1 | CUDA_VISIBLE_DEVICES=0 python synthesis.py --model_name=MSI-DIS --model_path=save_model/wei_MSI-DIS/params_epoch-199.pkl \ 2 | --evaluation_folder=demo 3 | 4 | -------------------------------------------------------------------------------- /src/__init__.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | 4 | pkg_path = os.path.dirname(os.path.abspath(__file__)) 5 | sys.path.append(pkg_path) 6 | 7 | import conf, utils, dataset, models, inference 8 | 9 | 10 | -------------------------------------------------------------------------------- /src/conf/urmp.py: -------------------------------------------------------------------------------- 1 | TRAINING_FILE_LST_PATH = "data/urmp/INSTR_NAME/train.lst" 2 | SEEN_INSTRUMENTS=["Violin", 3 | "Cello", 4 | "Viola", 5 | "Flute", 6 | "Clarinet", 7 | "Saxophone", 8 | "Trumpet", 9 | "Trombone"] 10 | SEEN_INSTRUMENTS_NUM = len(SEEN_INSTRUMENTS) 11 | -------------------------------------------------------------------------------- /data/urmp/Clarinet/train.lst: -------------------------------------------------------------------------------- 1 | dataset/hdf5s/urmp/17_Nocturne_vn_fl_cl/AuSep_3_cl_17_Nocturne.h5 2 | dataset/hdf5s/urmp/14_Waltz_fl_fl_cl/AuSep_3_cl_14_Waltz.h5 3 | dataset/hdf5s/urmp/03_Dance_fl_cl/AuSep_2_cl_03_Dance.h5 4 | dataset/hdf5s/urmp/19_Pavane_cl_vn_vc/AuSep_1_cl_19_Pavane.h5 -------------------------------------------------------------------------------- /scripts/evaluate-model.sh: -------------------------------------------------------------------------------- 1 | cuda=$1 2 | model_name=$2 3 | epoch=$3 4 | model_folder=$4 5 | ps=$5 6 | CUDA_VISIBLE_DEVICES=$cuda python evaluate.py --model_name=$model_name --model_path=$model_folder/params_epoch-$epoch.pkl \ 7 | --evaluation_folder=evaluation --epoch=$epoch --ps=$ps 8 | -------------------------------------------------------------------------------- /src/conf/feature.py: -------------------------------------------------------------------------------- 1 | SAMPLE_RATE = 16000 2 | FRAMES_PER_SEC = 100 3 | 4 | WINDOW_SIZE = 2048 5 | HOP_SIZE = 160 6 | PAD_MODE = "reflect" 7 | WINDOW = "hann" 8 | CHANNELS_NUM = 1 9 | 10 | NOTES_NUM_EXCLUDE_SILENCE = 88 11 | NOTES_NUM = NOTES_NUM_EXCLUDE_SILENCE + 1 12 | BEGIN_NOTE = 21 13 | N_FFT = WINDOW_SIZE 14 | -------------------------------------------------------------------------------- /src/conf/inference.py: -------------------------------------------------------------------------------- 1 | INFERENCE_BATCH_SIZE = 12 2 | BATCH_FRAMES_NUM = 410 3 | N_ITER = 200 4 | PAD_FRAME = 5 5 | 6 | TEST_DATA_LST_PATH = "data/urmp/testset/test.lst" 7 | TEST_QUERY_LST_PATH = "data/urmp/testset/query.lst" 8 | 9 | 10 | SYN_DURATION = 10 11 | SYN_SONG_ONSET = 25 12 | 13 | MIX_ONSET = 35 14 | OCTAVE = 12 15 | -------------------------------------------------------------------------------- /data/urmp/Viola/train.lst: -------------------------------------------------------------------------------- 1 | dataset/hdf5s/urmp/44_K515_vn_vn_va_va_vc/AuSep_3_va_44_K515.h5 2 | dataset/hdf5s/urmp/44_K515_vn_vn_va_va_vc/AuSep_4_va_44_K515.h5 3 | dataset/hdf5s/urmp/24_Pirates_vn_vn_va_vc/AuSep_3_va_24_Pirates.h5 4 | dataset/hdf5s/urmp/13_Hark_vn_vn_va/AuSep_3_va_13_Hark.h5 5 | dataset/hdf5s/urmp/27_King_vn_vn_va_sax/AuSep_3_va_27_King.h5 -------------------------------------------------------------------------------- /src/conf/sample.py: -------------------------------------------------------------------------------- 1 | from conf.feature import * 2 | 3 | SOURCES_NUM_OF_MIXTURE = 2 4 | UP_BOUND = 2 5 | SHIFT_SEMITONE = 4 6 | POS_SHIFT_SEMITONE = int( 2 * SHIFT_SEMITONE + 1) 7 | DURATION = 3 8 | SAMPLE_DURATION = int(SAMPLE_RATE * DURATION) 9 | FRAME_DURATION = int(FRAMES_PER_SEC * DURATION) 10 | 11 | LEARNING_RATE = 5e-4 12 | DECAY = 100 13 | TRAINING_BATCH_SIZE = 12 14 | SAMPLES_NUM = 6240 15 | -------------------------------------------------------------------------------- /data/urmp/Cello/train.lst: -------------------------------------------------------------------------------- 1 | dataset/hdf5s/urmp/44_K515_vn_vn_va_va_vc/AuSep_5_vc_44_K515.h5 2 | dataset/hdf5s/urmp/24_Pirates_vn_vn_va_vc/AuSep_4_vc_24_Pirates.h5 3 | dataset/hdf5s/urmp/12_Spring_vn_vn_vc/AuSep_3_vc_12_Spring.h5 4 | dataset/hdf5s/urmp/01_Jupiter_vn_vc/AuSep_2_vc_01_Jupiter.h5 5 | dataset/hdf5s/urmp/19_Pavane_cl_vn_vc/AuSep_3_vc_19_Pavane.h5 6 | dataset/hdf5s/urmp/26_King_vn_vn_va_vc/AuSep_4_vc_26_King.h5 -------------------------------------------------------------------------------- /data/urmp/Saxophone/train.lst: -------------------------------------------------------------------------------- 1 | dataset/hdf5s/urmp/25_Pirates_vn_vn_va_sax/AuSep_4_sax_25_Pirates.h5 2 | dataset/hdf5s/urmp/16_Surprise_tpt_tpt_sax/AuSep_3_sax_16_Surprise.h5 3 | dataset/hdf5s/urmp/06_Entertainer_sax_sax/AuSep_1_sax_06_Entertainer.h5 4 | dataset/hdf5s/urmp/06_Entertainer_sax_sax/AuSep_2_sax_06_Entertainer.h5 5 | dataset/hdf5s/urmp/27_King_vn_vn_va_sax/AuSep_4_sax_27_King.h5 6 | dataset/hdf5s/urmp/10_March_tpt_sax/AuSep_2_sax_10_March.h5 -------------------------------------------------------------------------------- /data/urmp/Flute/train.lst: -------------------------------------------------------------------------------- 1 | dataset/hdf5s/urmp/17_Nocturne_vn_fl_cl/AuSep_2_fl_17_Nocturne.h5 2 | dataset/hdf5s/urmp/14_Waltz_fl_fl_cl/AuSep_1_fl_14_Waltz.h5 3 | dataset/hdf5s/urmp/14_Waltz_fl_fl_cl/AuSep_2_fl_14_Waltz.h5 4 | dataset/hdf5s/urmp/08_Spring_fl_vn/AuSep_1_fl_08_Spring.h5 5 | dataset/hdf5s/urmp/03_Dance_fl_cl/AuSep_1_fl_03_Dance.h5 6 | dataset/hdf5s/urmp/04_Allegro_fl_fl/AuSep_2_fl_04_Allegro.h5 7 | dataset/hdf5s/urmp/04_Allegro_fl_fl/AuSep_1_fl_04_Allegro.h5 -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__/ 2 | data/musicnet*/ 3 | data/musdb18/ 4 | data/maestro/ 5 | data/urmp-test/ 6 | data/urmp-rec/ 7 | output/ 8 | output/ 9 | reproduce_dataset/ 10 | results/ 11 | *.pyc 12 | src/analyze/analyze_experimental_results.py 13 | src/analyze/draw_3D_hQuery.py 14 | src/analyze/draw_dataset_distribution.py 15 | src/analyze/draw_epoch_curve.py 16 | src/analyze/draw_hQuery.py 17 | src/analyze/draw_instr_results.py 18 | src/analyze/draw_spec.py 19 | src/analyze/gradient_colors.py 20 | -------------------------------------------------------------------------------- /scripts/clean_packed_data.sh: -------------------------------------------------------------------------------- 1 | git rev-list --objects --all | grep demo/MSI > .packed_data.lst 2 | awk '{print $2}' .packed_data.lst > .packed_data.name 3 | rm .packed_data.lst 4 | for name in `cat .packed_data.name` 5 | do 6 | echo $name 7 | git filter-branch -f --index-filter "git rm -r --cached --ignore-unmatch $name" -- --all 8 | break 9 | done 10 | rm .packed_data.name 11 | #rm -Rf .git/refs/original 12 | #git reflog expire --expire=now --all 13 | #git gc --aggressive --prune=now 14 | # 15 | -------------------------------------------------------------------------------- /src/conf/models.py: -------------------------------------------------------------------------------- 1 | from conf.feature import * 2 | 3 | QUERY_DIM = 6 4 | 5 | MODEL_CONFIG = {"UNet": 6 | {"with_bn0" : True, 7 | "input_channels_num" : CHANNELS_NUM, 8 | "input_size" : WINDOW_SIZE // 2, 9 | "blocks_num" : 5, 10 | "condition_dim" : QUERY_DIM, 11 | "output_dim" : CHANNELS_NUM, 12 | }, 13 | "QueryNet": 14 | {"blocks_num" : 2, 15 | "input_size" : WINDOW_SIZE // 2, 16 | "input_channels_num" : CHANNELS_NUM, 17 | "pnum" : QUERY_DIM, 18 | }, 19 | "Transcriptor": 20 | {"blocks_num" : 2, 21 | "output_dim" : NOTES_NUM 22 | } 23 | } 24 | 25 | -------------------------------------------------------------------------------- /data/urmp/Trumpet/train.lst: -------------------------------------------------------------------------------- 1 | dataset/hdf5s/urmp/18_Nocturne_vn_fl_tpt/AuSep_3_tpt_18_Nocturne.h5 2 | dataset/hdf5s/urmp/07_GString_tpt_tbn/AuSep_1_tpt_07_GString.h5 3 | dataset/hdf5s/urmp/15_Surprise_tpt_tpt_tbn/AuSep_2_tpt_15_Surprise.h5 4 | dataset/hdf5s/urmp/15_Surprise_tpt_tpt_tbn/AuSep_1_tpt_15_Surprise.h5 5 | dataset/hdf5s/urmp/05_Entertainer_tpt_tpt/AuSep_2_tpt_05_Entertainer.h5 6 | dataset/hdf5s/urmp/05_Entertainer_tpt_tpt/AuSep_1_tpt_05_Entertainer.h5 7 | dataset/hdf5s/urmp/20_Pavane_tpt_vn_vc/AuSep_1_tpt_20_Pavane.h5 8 | dataset/hdf5s/urmp/10_March_tpt_sax/AuSep_1_tpt_10_March.h5 9 | dataset/hdf5s/urmp/09_Jesus_tpt_vn/AuSep_1_tpt_09_Jesus.h5 -------------------------------------------------------------------------------- /src/utils/multiEpochsDataLoader.py: -------------------------------------------------------------------------------- 1 | from torch.utils.data import DataLoader 2 | 3 | class _RepeatSampler(object): 4 | def __init__(self, sampler): 5 | self.sampler = sampler 6 | 7 | def __iter__(self): 8 | while True: 9 | yield from iter(self.sampler) 10 | 11 | class MultiEpochsDataLoader(DataLoader): 12 | def __init__(self, *args, **kwargs): 13 | super().__init__(*args, **kwargs) 14 | object.__setattr__(self, 'batch_sampler', _RepeatSampler(self.batch_sampler)) 15 | self.iterator = super().__iter__() 16 | 17 | def __len__(self): 18 | return len(self.batch_sampler.sampler) 19 | 20 | def __iter__(self): 21 | for i in range(len(self)): 22 | yield next(self.iterator) 23 | -------------------------------------------------------------------------------- /run.sh: -------------------------------------------------------------------------------- 1 | # pack urmp dataset to hdf5 2 | #sh scripts/generate_feature.sh 3 | 4 | cuda_id=0 5 | 6 | # train transcription-only baseline 7 | #sh scripts/train-model.sh $cuda_id AMT save_model/AMT 8 | 9 | # train separation-only baseline 10 | #sh scripts/train-model.sh $cuda_id MSS save_model/MSS 11 | 12 | # train multi-task baseline 13 | #sh scripts/train-model.sh $cuda_id MSS-AMT save_model/MSS-AMT 14 | 15 | # train the proposed multi-task score-informed (MSI) model 16 | #sh scripts/train-model.sh $cuda_id MSI save_model/MSI 17 | 18 | # train the proposed multi-task score-informed with further disentanglement (MSI-DIS) model 19 | sh scripts/train-model.sh $cuda_id MSI-DIS save_model/MSI-DIS 20 | 21 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | appdirs==1.4.4 2 | audioread==2.1.9 3 | cached-property==1.5.2 4 | certifi==2021.5.30 5 | cffi==1.14.6 6 | charset-normalizer==2.0.4 7 | cycler==0.10.0 8 | dataclasses==0.8 9 | decorator==5.0.9 10 | future==0.18.2 11 | h5py==3.1.0 12 | idna==3.2 13 | joblib==1.0.1 14 | kiwisolver==1.3.1 15 | librosa==0.8.1 16 | llvmlite==0.36.0 17 | matplotlib==3.3.4 18 | mido==1.2.10 19 | mir-eval==0.6 20 | numba==0.53.1 21 | numpy==1.19.5 22 | packaging==21.0 23 | Pillow==8.3.1 24 | pooch==1.4.0 25 | prefetch-generator==1.0.1 26 | pycparser==2.20 27 | pyparsing==2.4.7 28 | python-dateutil==2.8.2 29 | requests==2.26.0 30 | resampy==0.2.2 31 | scikit-learn==0.24.2 32 | scipy==1.5.4 33 | six==1.16.0 34 | SoundFile==0.10.3.post1 35 | threadpoolctl==2.2.0 36 | torch==1.7.1+cu101 37 | torchaudio==0.7.2 38 | torchlibrosa==0.0.4 39 | torchvision==0.8.2+cu101 40 | typing-extensions==3.10.0.0 41 | urllib3==1.26.6 42 | -------------------------------------------------------------------------------- /data/urmp/Violin/train.lst: -------------------------------------------------------------------------------- 1 | dataset/hdf5s/urmp/44_K515_vn_vn_va_va_vc/AuSep_1_vn_44_K515.h5 2 | dataset/hdf5s/urmp/44_K515_vn_vn_va_va_vc/AuSep_2_vn_44_K515.h5 3 | dataset/hdf5s/urmp/24_Pirates_vn_vn_va_vc/AuSep_2_vn_24_Pirates.h5 4 | dataset/hdf5s/urmp/24_Pirates_vn_vn_va_vc/AuSep_1_vn_24_Pirates.h5 5 | dataset/hdf5s/urmp/17_Nocturne_vn_fl_cl/AuSep_1_vn_17_Nocturne.h5 6 | dataset/hdf5s/urmp/02_Sonata_vn_vn/AuSep_1_vn_02_Sonata.h5 7 | dataset/hdf5s/urmp/02_Sonata_vn_vn/AuSep_2_vn_02_Sonata.h5 8 | dataset/hdf5s/urmp/12_Spring_vn_vn_vc/AuSep_1_vn_12_Spring.h5 9 | dataset/hdf5s/urmp/08_Spring_fl_vn/AuSep_2_vn_08_Spring.h5 10 | dataset/hdf5s/urmp/13_Hark_vn_vn_va/AuSep_1_vn_13_Hark.h5 11 | dataset/hdf5s/urmp/13_Hark_vn_vn_va/AuSep_2_vn_13_Hark.h5 12 | dataset/hdf5s/urmp/01_Jupiter_vn_vc/AuSep_1_vn_01_Jupiter.h5 13 | dataset/hdf5s/urmp/19_Pavane_cl_vn_vc/AuSep_2_vn_19_Pavane.h5 14 | dataset/hdf5s/urmp/27_King_vn_vn_va_sax/AuSep_1_vn_27_King.h5 15 | dataset/hdf5s/urmp/27_King_vn_vn_va_sax/AuSep_2_vn_27_King.h5 16 | dataset/hdf5s/urmp/09_Jesus_tpt_vn/AuSep_2_vn_09_Jesus.h5 -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 Liwei Lin 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /synthesis.py: -------------------------------------------------------------------------------- 1 | import time 2 | import torch 3 | import torch.nn.functional as F 4 | import os 5 | import sys 6 | import numpy as np 7 | import torch.nn as nn 8 | import argparse 9 | 10 | import src 11 | from utils.utilities import (save_json, compute_time, print_dict, mkdir) 12 | from inference.inference import Inference 13 | from inference.compute_measure import (evaluate_transcription, evaluate_separation) 14 | 15 | 16 | if __name__=='__main__': 17 | 18 | parser = argparse.ArgumentParser(description='') 19 | parser.add_argument('--model_name', type=str, required=True, help='Model name in \ 20 | [`AMT` for trainscription-only baseline, \ 21 | `MSS` for separation-only baseline, \ 22 | `MSS-AMT` for multi-task baseline, \ 23 | `MSI` for the proposed multi-task score-informed model, \ 24 | `MSI-DIS` for the proposed multi-task score-informed with further disentanglement model].') 25 | parser.add_argument('--model_path', type=str, required=True, help='Model weights path.') 26 | parser.add_argument('--evaluation_folder', type=str, required=True, help='Directory to store evaluation results.') 27 | parser.add_argument('--epoch', type=str, required=True, help='Epoch.') 28 | 29 | 30 | args = parser.parse_args() 31 | 32 | model_name = args.model_name 33 | model_path = args.model_path 34 | output_dir = args.evaluation_folder 35 | epoch = args.epoch 36 | 37 | evaluation_dir = f"{output_dir}/{model_name}" 38 | 39 | path = "songs/road.mid" 40 | 41 | inference = Inference(model_name, model_path, evaluation_dir, epoch) 42 | inference.synthesis(path, 0) 43 | 44 | 45 | -------------------------------------------------------------------------------- /data/urmp/Vn_Fl_Tpt/train-query.lst.bk: -------------------------------------------------------------------------------- 1 | Oboe,Bassoon dataset/hdf5s/urmp/41_Miserere_fl_fl_ob_sax_bn/AuSep_3_ob_41_Miserere.h5,dataset/hdf5s/urmp/41_Miserere_fl_fl_ob_sax_bn/AuSep_5_bn_41_Miserere.h5 2 | Oboe,Bassoon dataset/hdf5s/urmp/28_Fugue_fl_ob_cl_bn/AuSep_2_ob_28_Fugue.h5,dataset/hdf5s/urmp/28_Fugue_fl_ob_cl_bn/AuSep_4_bn_28_Fugue.h5 3 | Tuba,Horn dataset/hdf5s/urmp/42_Arioso_tpt_tpt_hn_tbn_tba/AuSep_5_tba_42_Arioso.h5,dataset/hdf5s/urmp/42_Arioso_tpt_tpt_hn_tbn_tba/AuSep_3_hn_42_Arioso.h5 4 | Tuba,Horn dataset/hdf5s/urmp/43_Chorale_tpt_tpt_hn_tbn_tba/AuSep_5_tba_43_Chorale.h5,dataset/hdf5s/urmp/43_Chorale_tpt_tpt_hn_tbn_tba/AuSep_3_hn_43_Chorale.h5 5 | Violin,Clarinet dataset/hdf5s/urmp/32_Fugue_vn_vn_va_vc/AuSep_2_vn_32_Fugue.h5,dataset/hdf5s/urmp/28_Fugue_fl_ob_cl_bn/AuSep_3_cl_28_Fugue.h5 6 | Saxophone,Flute dataset/hdf5s/urmp/23_Rejouissance_cl_sax_tba/AuSep_2_sax_23_Rejouissance.h5,dataset/hdf5s/urmp/37_Rondeau_fl_vn_va_cl/AuSep_1_fl_37_Rondeau.h5 7 | Viola,Cello dataset/hdf5s/urmp/32_Fugue_vn_vn_va_vc/AuSep_3_va_32_Fugue.h5,dataset/hdf5s/urmp/32_Fugue_vn_vn_va_vc/AuSep_4_vc_32_Fugue.h5 8 | Trumpet,Trombone dataset/hdf5s/urmp/18_Nocturne_vn_fl_tpt/AuSep_3_tpt_18_Nocturne.h5,dataset/hdf5s/urmp/21_Rejouissance_cl_tbn_tba/AuSep_2_tbn_21_Rejouissance.h5 9 | Violin,Double_bass dataset/hdf5s/urmp/32_Fugue_vn_vn_va_vc/AuSep_2_vn_32_Fugue.h5,dataset/hdf5s/urmp/38_Jerusalem_vn_vn_va_vc_db/AuSep_5_db_38_Jerusalem.h5 10 | Trombone,Tuba dataset/hdf5s/urmp/34_Fugue_tpt_tpt_hn_tbn/AuSep_4_tbn_34_Fugue.h5,dataset/hdf5s/urmp/43_Chorale_tpt_tpt_hn_tbn_tba/AuSep_5_tba_43_Chorale.h5 11 | Trumpet,Horn dataset/hdf5s/urmp/18_Nocturne_vn_fl_tpt/AuSep_3_tpt_18_Nocturne.h5,dataset/hdf5s/urmp/43_Chorale_tpt_tpt_hn_tbn_tba/AuSep_3_hn_43_Chorale.h5 12 | Flute,Oboe dataset/hdf5s/urmp/37_Rondeau_fl_vn_va_cl/AuSep_1_fl_37_Rondeau.h5,dataset/hdf5s/urmp/41_Miserere_fl_fl_ob_sax_bn/AuSep_3_ob_41_Miserere.h5 13 | Clarinet,Bassoon dataset/hdf5s/urmp/37_Rondeau_fl_vn_va_cl/AuSep_4_cl_37_Rondeau.h5,dataset/hdf5s/urmp/41_Miserere_fl_fl_ob_sax_bn/AuSep_5_bn_41_Miserere.h5 14 | -------------------------------------------------------------------------------- /src/models/model_factory.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | import sys 6 | import numpy as np 7 | import time 8 | import h5py 9 | from torchlibrosa.stft import STFT, ISTFT, magphase 10 | 11 | from utils.utilities import (read_lst, read_config) 12 | from models.models import (AMTBaseline, MSSBaseline, MultiTaskBaseline, DisentanglementModel) 13 | 14 | from conf.feature import * 15 | 16 | et = 1e-8 17 | 18 | class ModelFactory(nn.Module): 19 | 20 | def __init__(self, model_name): 21 | super(ModelFactory, self).__init__() 22 | 23 | self.stft = STFT(n_fft=WINDOW_SIZE, hop_length=HOP_SIZE, 24 | win_length=WINDOW_SIZE, window=WINDOW, center=True, 25 | pad_mode=PAD_MODE, freeze_parameters=True) 26 | 27 | self.istft = ISTFT(n_fft=WINDOW_SIZE, hop_length=HOP_SIZE, 28 | win_length=WINDOW_SIZE, window=WINDOW, center=True, 29 | pad_mode=PAD_MODE, freeze_parameters=True) 30 | 31 | 32 | if model_name in ['AMT', 'AMTBaseline']: 33 | network = AMTBaseline() 34 | elif model_name in ['MSS', 'MSSBaseline']: 35 | network = MSSBaseline() 36 | elif model_name in ['MSS-AMT', 'MultiTaskBaseline']: 37 | network = MultiTaskBaseline() 38 | elif model_name in ['MSI', 'MSI-DIS', 'DisentanglementModel']: 39 | network = DisentanglementModel() 40 | 41 | self.network = network 42 | 43 | def wav2spec(self, input): 44 | channels_num = input.shape[-2] 45 | 46 | def spectrogram(input): 47 | (real, imag) = self.stft(input) 48 | spec = (real ** 2 + imag ** 2) ** 0.5 49 | return spec 50 | 51 | spec_list = [] 52 | 53 | for channel in range(channels_num): 54 | spec = spectrogram(input[:, channel, :]) 55 | spec_list.append(spec) 56 | 57 | spec = torch.cat(spec_list, 1)[:, :, :, :-1] 58 | return spec 59 | 60 | def forward(self, input, mode=None): 61 | if mode == "wav2spec": 62 | spec = self.wav2spec(input) 63 | return spec 64 | return self.network(input) if mode is None else self.network(input, mode) 65 | 66 | 67 | if __name__ == '__main__': 68 | model_name = 'MSI-DIS' 69 | model = ModelFactory(model_name) 70 | 71 | 72 | -------------------------------------------------------------------------------- /src/utils/target_process.py: -------------------------------------------------------------------------------- 1 | import os 2 | import h5py 3 | import numpy as np 4 | import random 5 | 6 | 7 | class TargetProcessor(object): 8 | '''target process''' 9 | 10 | def __init__(self, segment_seconds, frames_per_second, begin_note, classes_num): 11 | self.segment_seconds = segment_seconds 12 | self.frames_per_second = frames_per_second 13 | self.begin_note = begin_note 14 | self.classes_num = classes_num 15 | self.max_piano_note = self.classes_num - 1 16 | 17 | def process(self, start_time, midi_events_time): 18 | 19 | for midi_events_time_st, events in enumerate(midi_events_time): 20 | if events[1] >= start_time: 21 | break 22 | 23 | frames_per_second = self.frames_per_second 24 | segment_seconds = self.segment_seconds 25 | begin_note = self.begin_note 26 | segment_frames = segment_seconds * frames_per_second 27 | classes_num = self.classes_num 28 | end_time = start_time + segment_seconds 29 | #mask_segments = [] 30 | frame_roll = np.ones([int(segment_frames) + 1]) * classes_num 31 | onset_offset = np.zeros([int(segment_frames) + 1]) 32 | #frame_roll_mask = np.ones([int(segment_frames) + 1]) * classes_num 33 | for i in range(midi_events_time_st, midi_events_time.shape[0]): 34 | st = midi_events_time[i][0] 35 | ed = midi_events_time[i][1] 36 | with_onset = True 37 | with_offset = True 38 | 39 | assert ed >= st 40 | 41 | if st > end_time: 42 | break 43 | if st < start_time: 44 | st = start_time 45 | with_onset = False 46 | 47 | if ed > end_time: 48 | ed = end_time 49 | with_offset = False 50 | 51 | 52 | note = int(midi_events_time[i][2]) 53 | 54 | st = int((st - start_time)* frames_per_second) 55 | ed = int((ed - start_time)* frames_per_second) 56 | if ed <= st: 57 | ed = st + 1 58 | frame_roll[st : ed] = np.clip(note, 0, classes_num - 1) 59 | duration = ed - st 60 | 61 | if with_onset: 62 | onset_offset[st] = 1 63 | if with_offset: 64 | onset_offset[ed - 1] = 2 65 | 66 | target_dict = {} 67 | target_dict['frame_roll'] = frame_roll 68 | target_dict['onset_offset'] = onset_offset 69 | return target_dict 70 | -------------------------------------------------------------------------------- /evaluate.py: -------------------------------------------------------------------------------- 1 | import time 2 | import torch 3 | import torch.nn.functional as F 4 | import os 5 | import sys 6 | import argparse 7 | import numpy as np 8 | import torch.nn as nn 9 | 10 | import src 11 | from utils.utilities import (save_json, compute_time, print_dict, mkdir) 12 | from inference.inference import Inference 13 | from inference.compute_measure import (evaluate_transcription, evaluate_separation) 14 | 15 | if __name__=='__main__': 16 | 17 | parser = argparse.ArgumentParser(description='') 18 | parser.add_argument('--model_name', type=str, required=True, help='Model name in \ 19 | [`AMT` for trainscription-only baseline, \ 20 | `MSS` for separation-only baseline, \ 21 | `MSS-AMT` for multi-task baseline, \ 22 | `MSI` for the proposed multi-task score-informed model, \ 23 | `MSI-DIS` for the proposed multi-task score-informed with further disentanglement model].') 24 | parser.add_argument('--model_path', type=str, required=True, help='Model weights path.') 25 | parser.add_argument('--evaluation_folder', type=str, required=True, help='Directory to store evaluation results.') 26 | parser.add_argument('--epoch', type=str, required=True, help='Epoch.') 27 | parser.add_argument('--ps', type=int, required=True, help='Processes number.') 28 | 29 | args = parser.parse_args() 30 | 31 | model_name = args.model_name 32 | model_path = args.model_path 33 | output_dir = args.evaluation_folder 34 | epoch = args.epoch 35 | processes_num = args.ps 36 | 37 | evaluation_dir = f"{output_dir}/{model_name}" 38 | 39 | inference = Inference(model_name, model_path, evaluation_dir, epoch) 40 | preds = inference.inference() 41 | 42 | scores = {} 43 | for i, mode in enumerate(preds): 44 | scores[mode] = {} 45 | pred = preds[mode] 46 | if mode in ["AMT", "MSS-AMT", "MSI", "MSI-DIS"]: 47 | scores[mode]["transcription"] = evaluate_transcription(pred, processes_num=processes_num) 48 | if mode in ["MSS", "MSS-AMT", "MSI", "MSI-S", "MSI-MSI", "MSI-DIS", "MSI-DIS-S"]: 49 | scores[mode]["separation"] = evaluate_separation(pred, processes_num=processes_num) 50 | save_json(inference.score_path, scores) 51 | 52 | -------------------------------------------------------------------------------- /src/analyze/utilities.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import time 4 | 5 | sys.path.insert(1, os.path.join(sys.path[0], '../..')) 6 | 7 | import src 8 | from utils.utilities import (load_json) 9 | import numpy as np 10 | 11 | 12 | BOUND = 1 13 | 14 | INSTRUMENTS = {"seen": "Violin,Cello,Viola,Flute,Clarinet,Saxophone,Trumpet,Trombone", 15 | "unseen": "Horn,Tuba,Double_Bass,Bassoon,Oboe"} 16 | 17 | skip_instrs = [] 18 | 19 | for instr in INSTRUMENTS: 20 | INSTRUMENTS[instr] = INSTRUMENTS[instr].split(',') 21 | 22 | def seen(instr): 23 | for seen in INSTRUMENTS: 24 | if instr in INSTRUMENTS[seen]: 25 | return seen 26 | 27 | def ave_val(x): 28 | return np.mean(x) 29 | 30 | def compute_results(json_data): 31 | scores = {} 32 | for mode in json_data: 33 | scores[mode] = {} 34 | data_per_mode = json_data[mode] 35 | for sheet_name in data_per_mode: 36 | results = {"seen-seen": [], "seen-unseen" : [], "unseen-unseen" : [], "seen" : [], "unseen" : [], 'all' : []} 37 | sheet_data = data_per_mode[sheet_name] 38 | for row in sheet_data: 39 | pairs = [] 40 | tags = [] 41 | for j, instr in enumerate(row): 42 | if instr in skip_instrs: 43 | break 44 | if instr not in results: 45 | results[instr] = [] 46 | results[instr].append(float(row[instr])) 47 | seen_tag = seen(instr) 48 | pairs.append(float(row[instr])) 49 | tags.append(seen_tag) 50 | results[seen_tag].append(float(row[instr])) 51 | results["all"].append(float(row[instr])) 52 | 53 | if len(tags) < 2: 54 | continue 55 | seen_tag = '-'.join(tags) 56 | seen_tag = "seen-unseen" if seen_tag == "unseen-seen" else seen_tag 57 | 58 | results[seen_tag] += pairs 59 | 60 | for seen_tag in results: 61 | results[seen_tag] = ave_val(results[seen_tag]) 62 | 63 | scores[mode][sheet_name] = results 64 | 65 | return scores 66 | 67 | def get_json_data(score_path): 68 | #score_path = f"evaluation/demo/{model_name}/scores-{epoch}.json" 69 | json_data = load_json(score_path) 70 | return compute_results(json_data) 71 | 72 | def get_results(scores): 73 | return [scores["seen"], scores["unseen"], scores["all"]] 74 | 75 | 76 | def example(model_name, sheet_name, epoch): 77 | score_path = f"evaluation/{model_name}/scores-{epoch}.json" 78 | json_data = load_json(score_path) 79 | scores = compute_results(json_data) 80 | print(scores[model_name][sheet_name]["seen"], scores[model_name][sheet_name]["unseen"], scores[model_name][sheet_name]["all"]) 81 | 82 | if __name__=="__main__": 83 | model_name = "MSS" 84 | sheet_name = "separation" 85 | 86 | #model_name = "AMT" 87 | #sheet_name = "transcription" 88 | 89 | for i in range(80): 90 | epoch = i 91 | example(model_name, sheet_name, epoch) 92 | 93 | 94 | -------------------------------------------------------------------------------- /src/analyze/draw_table.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import os 3 | import sys 4 | import argparse 5 | 6 | sys.path.insert(1, os.path.join(sys.path[0], '../..')) 7 | 8 | import src 9 | from analyze.utilities import (get_json_data, get_results) 10 | from utils.utilities import (mkdir) 11 | 12 | skip = ["MSI-S", "MSI-DIS-S", "DMSI-S"] 13 | 14 | 15 | def get_data_with_last_10_epochs(folder, model_name): 16 | folder = os.path.join(folder, f"{model_name}") 17 | if not os.path.exists(folder): 18 | return {} 19 | 20 | files = os.listdir(folder) 21 | 22 | data = {"transcription" : {}, "separation" : {}} 23 | for sheet_name in data: 24 | data[sheet_name] = {"seen" : {}, "unseen" : {}, "all" : {}} 25 | 26 | for f in files: 27 | if not str.startswith(f, "score"): 28 | continue 29 | 30 | epoch = int(f.split(".")[0].split("-")[1]) 31 | 32 | assert epoch >= 190 and epoch < 200 33 | 34 | path = os.path.join(folder, f) 35 | json_data = get_json_data(path) 36 | 37 | for mode in json_data: 38 | for sheet_name in json_data[mode]: 39 | sheet_data = json_data[mode][sheet_name] 40 | for i, tag in enumerate(data[sheet_name]): 41 | if mode not in data[sheet_name][tag]: 42 | data[sheet_name][tag][mode] = [] 43 | data[sheet_name][tag][mode].append([epoch, sheet_data[tag]]) 44 | 45 | def cmp(item): 46 | return item[0] 47 | 48 | for sheet_name in data: 49 | for tag in data[sheet_name]: 50 | for mode in data[sheet_name][tag]: 51 | data[sheet_name][tag][mode].sort(key=cmp) 52 | 53 | return data 54 | 55 | def draw_table(data): 56 | 57 | for sheet_name in data: 58 | sheet_data = data[sheet_name] 59 | for i, tag in enumerate(sheet_data): 60 | tag_data = sheet_data[tag] 61 | for mode in tag_data: 62 | if mode in skip: 63 | continue 64 | results = [c[1] for c in tag_data[mode]] 65 | assert len(results) == 10 66 | mu = np.mean(results) 67 | pstd = np.sqrt(((results-mu) * (results-mu)).sum()) 68 | interv = pstd * 1.96 / 10 69 | mu = round(mu, 2) 70 | interv = round(interv, 2) 71 | print(mode, f"&${mu}\pm{interv}$") 72 | 73 | def get_data(evaluation_folder): 74 | model_names = ["MSS", "AMT", "MSS-AMT", "MSI", "MSI-DIS"] 75 | results = {} 76 | for model_name in model_names: 77 | data = get_data_with_last_10_epochs(evaluation_folder, model_name) 78 | for sheet_name in data: 79 | if sheet_name not in results: 80 | results[sheet_name] = {} 81 | for seen_tag in data[sheet_name]: 82 | if seen_tag not in results[sheet_name]: 83 | results[sheet_name][seen_tag] = {} 84 | for mode in data[sheet_name][seen_tag]: 85 | results[sheet_name][seen_tag][mode] = data[sheet_name][seen_tag][mode] 86 | return results 87 | 88 | 89 | if __name__=="__main__": 90 | parser = argparse.ArgumentParser(description='') 91 | parser.add_argument('--evaluation_folder', type=str, required=True, help='Directory to store evaluation results.') 92 | 93 | args = parser.parse_args() 94 | 95 | evaluation_folder = args.evaluation_folder 96 | data = get_data(evaluation_folder) 97 | draw_table(data) 98 | -------------------------------------------------------------------------------- /src/dataset/urmp/urmp_test.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import librosa 3 | import sys 4 | import os 5 | import h5py 6 | 7 | from utils.utilities import (parse_frameroll2annotation, read_lst, read_config, write_lst, mkdir, int16_to_float32) 8 | from conf.feature import * 9 | from conf.inference import * 10 | 11 | class UrmpTest(object): 12 | def __init__(self): 13 | file_lst = read_lst(TEST_DATA_LST_PATH) 14 | query_lst = read_lst(TEST_QUERY_LST_PATH) 15 | data_path = [] 16 | for i, f in enumerate(file_lst): 17 | fs = f.split('\t') 18 | qs = query_lst[i].split('\t') 19 | instruments = fs[0].split(',') 20 | files = fs[1].split(',') 21 | query = qs[1].split(',') 22 | sample_name = str.replace(files[0].split('_')[-2], '.' ,'') 23 | sample = {} 24 | for j, instr in enumerate(instruments): 25 | sample[instr] = {} 26 | sample[instr]['ref'] = files[j] 27 | sample[instr]['query'] = query[j] 28 | 29 | data_path.append({'sample_name' : sample_name, 'instrs' : sample}) 30 | 31 | self.data_path = data_path 32 | 33 | 34 | def vad(self, x, frame_roll, frames_per_second=FRAMES_PER_SEC, sample_rate=SAMPLE_RATE, notes_num=NOTES_NUM_EXCLUDE_SILENCE): 35 | 36 | frames_per_sample = frames_per_second * 1. / sample_rate 37 | 38 | if len(x.shape) == 2: 39 | y = x[0] 40 | else: 41 | y = x 42 | output = np.zeros_like(y) 43 | frame_roll_len = int(y.shape[-1] / sample_rate * frames_per_second + 1) 44 | frame_roll = frame_roll[ : frame_roll_len] 45 | new_frame_roll = np.zeros_like(frame_roll) + notes_num 46 | split_index = librosa.effects.split(y, top_db=18) 47 | st = 0 48 | ed = 0 49 | for index in split_index: 50 | ed = st + index[1] - index[0] 51 | output[st : ed] = y[index[0] : index[1]] 52 | offset = (index[1] - index[0]) * frames_per_sample 53 | ori_st = int(st * frames_per_sample) 54 | ori_ed = int(st * frames_per_sample + offset) 55 | obj_st = int(index[0] * frames_per_sample) 56 | obj_ed = int(index[0] * frames_per_sample + offset) 57 | offset = (ori_ed - ori_st) if ori_ed - ori_st < ed - st else obj_ed - obj_st 58 | new_frame_roll[ori_st : ori_st + offset] = frame_roll[obj_st : obj_st + offset] 59 | st = ed 60 | output = output[:ed] 61 | new_frame_roll = new_frame_roll[: int(ed * frames_per_sample)] 62 | 63 | if len(x.shape) == 2: 64 | output = output[None, :] 65 | return output, new_frame_roll 66 | 67 | 68 | def test_samples(self): 69 | for data in self.data_path: 70 | sample_name = data['sample_name'] 71 | sample = data['instrs'] 72 | samples = [] 73 | mix = [] 74 | for instr in sample: 75 | ref = sample[instr]['ref'] 76 | queries = sample[instr]['query'].split(' ') 77 | query = [] 78 | tr_query = [] 79 | for q in queries: 80 | with h5py.File(q, 'r') as hf: 81 | waveform = int16_to_float32(hf['waveform'][:])[None, :] 82 | frame_roll = hf['frame_roll'][:].astype(np.int) 83 | 84 | waveform, frame_roll = self.vad(waveform, frame_roll) 85 | query.append(waveform) 86 | tr_query.append(parse_frameroll2annotation(frame_roll)) 87 | 88 | with h5py.File(ref, 'r') as hf: 89 | wav_ref = int16_to_float32(hf['waveform'][:])[None, :] 90 | tr_ref = hf['note_annotations_txt'][0].decode() 91 | frame_roll = hf['frame_roll'][:].astype(np.int) 92 | 93 | samples.append([instr, wav_ref, tr_ref, frame_roll, query, tr_query]) 94 | 95 | ref_len = samples[0][1].shape[-1] 96 | for i, ref in enumerate(samples): 97 | ref_len = ref[1].shape[-1] if ref_len > ref[1].shape[-1] else ref_len 98 | 99 | samples = [[s[0], s[1][:, :ref_len]] + s[2:] for s in samples] 100 | mix = [s[1] for s in samples] 101 | mix = np.stack(mix, 0) 102 | test_sample = {'mix' : mix, 'sample_name': sample_name, 'instrs' : samples} 103 | yield test_sample 104 | 105 | -------------------------------------------------------------------------------- /data/urmp/testset/query.lst: -------------------------------------------------------------------------------- 1 | Violin,Saxophone dataset/hdf5s/urmp/35_Rondeau_vn_vn_va_db/AuSep_2_vn_35_Rondeau.h5,dataset/hdf5s/urmp/30_Fugue_fl_fl_ob_sax/AuSep_4_sax_30_Fugue.h5 2 | Violin,Double_Bass dataset/hdf5s/urmp/35_Rondeau_vn_vn_va_db/AuSep_2_vn_35_Rondeau.h5,dataset/hdf5s/urmp/35_Rondeau_vn_vn_va_db/AuSep_4_db_35_Rondeau.h5 3 | Cello,Viola dataset/hdf5s/urmp/11_Maria_ob_vc/AuSep_2_vc_11_Maria.h5,dataset/hdf5s/urmp/32_Fugue_vn_vn_va_vc/AuSep_3_va_32_Fugue.h5 4 | Violin,Double_Bass dataset/hdf5s/urmp/39_Jerusalem_vn_vn_va_sax_db/AuSep_1_vn_39_Jerusalem.h5,dataset/hdf5s/urmp/39_Jerusalem_vn_vn_va_sax_db/AuSep_5_db_39_Jerusalem.h5 5 | Violin,Cello dataset/hdf5s/urmp/39_Jerusalem_vn_vn_va_sax_db/AuSep_1_vn_39_Jerusalem.h5,dataset/hdf5s/urmp/11_Maria_ob_vc/AuSep_2_vc_11_Maria.h5 6 | Viola,Clarinet dataset/hdf5s/urmp/39_Jerusalem_vn_vn_va_sax_db/AuSep_3_va_39_Jerusalem.h5,dataset/hdf5s/urmp/23_Rejouissance_cl_sax_tba/AuSep_1_cl_23_Rejouissance.h5 7 | Flute,Violin dataset/hdf5s/urmp/30_Fugue_fl_fl_ob_sax/AuSep_1_fl_30_Fugue.h5,dataset/hdf5s/urmp/39_Jerusalem_vn_vn_va_sax_db/AuSep_1_vn_39_Jerusalem.h5 8 | Saxophone,Clarinet dataset/hdf5s/urmp/41_Miserere_fl_fl_ob_sax_bn/AuSep_4_sax_41_Miserere.h5,dataset/hdf5s/urmp/40_Miserere_fl_fl_ob_cl_bn/AuSep_4_cl_40_Miserere.h5 9 | Saxophone,Tuba dataset/hdf5s/urmp/39_Jerusalem_vn_vn_va_sax_db/AuSep_4_sax_39_Jerusalem.h5,dataset/hdf5s/urmp/43_Chorale_tpt_tpt_hn_tbn_tba/AuSep_5_tba_43_Chorale.h5 10 | Trombone,Saxophone dataset/hdf5s/urmp/43_Chorale_tpt_tpt_hn_tbn_tba/AuSep_4_tbn_43_Chorale.h5,dataset/hdf5s/urmp/39_Jerusalem_vn_vn_va_sax_db/AuSep_4_sax_39_Jerusalem.h5 11 | Oboe,Saxophone dataset/hdf5s/urmp/11_Maria_ob_vc/AuSep_1_ob_11_Maria.h5,dataset/hdf5s/urmp/22_Rejouissance_sax_tbn_tba/AuSep_1_sax_22_Rejouissance.h5 12 | Oboe,Viola dataset/hdf5s/urmp/11_Maria_ob_vc/AuSep_1_ob_11_Maria.h5,dataset/hdf5s/urmp/35_Rondeau_vn_vn_va_db/AuSep_3_va_35_Rondeau.h5 13 | Flute,Cello dataset/hdf5s/urmp/37_Rondeau_fl_vn_va_cl/AuSep_1_fl_37_Rondeau.h5,dataset/hdf5s/urmp/36_Rondeau_vn_vn_va_vc/AuSep_4_vc_36_Rondeau.h5 14 | Flute,Trombone dataset/hdf5s/urmp/40_Miserere_fl_fl_ob_cl_bn/AuSep_1_fl_40_Miserere.h5,dataset/hdf5s/urmp/43_Chorale_tpt_tpt_hn_tbn_tba/AuSep_4_tbn_43_Chorale.h5 15 | Clarinet,Horn dataset/hdf5s/urmp/37_Rondeau_fl_vn_va_cl/AuSep_4_cl_37_Rondeau.h5,dataset/hdf5s/urmp/31_Slavonic_tpt_tpt_hn_tbn/AuSep_3_hn_31_Slavonic.h5 16 | Clarinet,Bassoon dataset/hdf5s/urmp/40_Miserere_fl_fl_ob_cl_bn/AuSep_4_cl_40_Miserere.h5,dataset/hdf5s/urmp/40_Miserere_fl_fl_ob_cl_bn/AuSep_5_bn_40_Miserere.h5 17 | Trumpet,Violin dataset/hdf5s/urmp/42_Arioso_tpt_tpt_hn_tbn_tba/AuSep_2_tpt_42_Arioso.h5,dataset/hdf5s/urmp/35_Rondeau_vn_vn_va_db/AuSep_2_vn_35_Rondeau.h5 18 | Trumpet,Violin dataset/hdf5s/urmp/43_Chorale_tpt_tpt_hn_tbn_tba/AuSep_1_tpt_43_Chorale.h5,dataset/hdf5s/urmp/35_Rondeau_vn_vn_va_db/AuSep_2_vn_35_Rondeau.h5 19 | Flute,Clarinet dataset/hdf5s/urmp/37_Rondeau_fl_vn_va_cl/AuSep_1_fl_37_Rondeau.h5,dataset/hdf5s/urmp/29_Fugue_fl_fl_ob_cl/AuSep_4_cl_29_Fugue.h5 20 | Flute,Saxophone dataset/hdf5s/urmp/37_Rondeau_fl_vn_va_cl/AuSep_1_fl_37_Rondeau.h5,dataset/hdf5s/urmp/30_Fugue_fl_fl_ob_sax/AuSep_4_sax_30_Fugue.h5 21 | Bassoon,Oboe dataset/hdf5s/urmp/28_Fugue_fl_ob_cl_bn/AuSep_4_bn_28_Fugue.h5,dataset/hdf5s/urmp/30_Fugue_fl_fl_ob_sax/AuSep_3_ob_30_Fugue.h5 22 | Trumpet,Trombone dataset/hdf5s/urmp/42_Arioso_tpt_tpt_hn_tbn_tba/AuSep_2_tpt_42_Arioso.h5,dataset/hdf5s/urmp/33_Elise_tpt_tpt_hn_tbn/AuSep_4_tbn_33_Elise.h5 23 | Trumpet,Horn dataset/hdf5s/urmp/31_Slavonic_tpt_tpt_hn_tbn/AuSep_2_tpt_31_Slavonic.h5,dataset/hdf5s/urmp/34_Fugue_tpt_tpt_hn_tbn/AuSep_3_hn_34_Fugue.h5 24 | Tuba,Trumpet dataset/hdf5s/urmp/42_Arioso_tpt_tpt_hn_tbn_tba/AuSep_5_tba_42_Arioso.h5,dataset/hdf5s/urmp/42_Arioso_tpt_tpt_hn_tbn_tba/AuSep_2_tpt_42_Arioso.h5 25 | Trumpet,Trombone dataset/hdf5s/urmp/43_Chorale_tpt_tpt_hn_tbn_tba/AuSep_1_tpt_43_Chorale.h5,dataset/hdf5s/urmp/21_Rejouissance_cl_tbn_tba/AuSep_2_tbn_21_Rejouissance.h5 26 | Trumpet,Horn dataset/hdf5s/urmp/31_Slavonic_tpt_tpt_hn_tbn/AuSep_2_tpt_31_Slavonic.h5,dataset/hdf5s/urmp/42_Arioso_tpt_tpt_hn_tbn_tba/AuSep_3_hn_42_Arioso.h5 27 | Cello,Oboe dataset/hdf5s/urmp/36_Rondeau_vn_vn_va_vc/AuSep_4_vc_36_Rondeau.h5,dataset/hdf5s/urmp/30_Fugue_fl_fl_ob_sax/AuSep_3_ob_30_Fugue.h5 28 | Trumpet,Tuba dataset/hdf5s/urmp/33_Elise_tpt_tpt_hn_tbn/AuSep_2_tpt_33_Elise.h5,dataset/hdf5s/urmp/43_Chorale_tpt_tpt_hn_tbn_tba/AuSep_5_tba_43_Chorale.h5 29 | Trumpet,Trombone dataset/hdf5s/urmp/34_Fugue_tpt_tpt_hn_tbn/AuSep_1_tpt_34_Fugue.h5,dataset/hdf5s/urmp/33_Elise_tpt_tpt_hn_tbn/AuSep_4_tbn_33_Elise.h5 30 | Horn,Trumpet dataset/hdf5s/urmp/33_Elise_tpt_tpt_hn_tbn/AuSep_3_hn_33_Elise.h5,dataset/hdf5s/urmp/43_Chorale_tpt_tpt_hn_tbn_tba/AuSep_1_tpt_43_Chorale.h5 31 | Trumpet,Horn dataset/hdf5s/urmp/42_Arioso_tpt_tpt_hn_tbn_tba/AuSep_2_tpt_42_Arioso.h5,dataset/hdf5s/urmp/33_Elise_tpt_tpt_hn_tbn/AuSep_3_hn_33_Elise.h5 32 | Trumpet,Trombone dataset/hdf5s/urmp/33_Elise_tpt_tpt_hn_tbn/AuSep_2_tpt_33_Elise.h5,dataset/hdf5s/urmp/42_Arioso_tpt_tpt_hn_tbn_tba/AuSep_4_tbn_42_Arioso.h5 -------------------------------------------------------------------------------- /data/urmp/testset/test.lst: -------------------------------------------------------------------------------- 1 | Violin,Saxophone dataset/hdf5s/urmp/39_Jerusalem_vn_vn_va_sax_db/AuSep_1_vn_39_Jerusalem.h5,dataset/hdf5s/urmp/39_Jerusalem_vn_vn_va_sax_db/AuSep_4_sax_39_Jerusalem.h5 2 | Violin,Double_Bass dataset/hdf5s/urmp/39_Jerusalem_vn_vn_va_sax_db/AuSep_2_vn_39_Jerusalem.h5,dataset/hdf5s/urmp/39_Jerusalem_vn_vn_va_sax_db/AuSep_5_db_39_Jerusalem.h5 3 | Cello,Viola dataset/hdf5s/urmp/38_Jerusalem_vn_vn_va_vc_db/AuSep_4_vc_38_Jerusalem.h5,dataset/hdf5s/urmp/39_Jerusalem_vn_vn_va_sax_db/AuSep_3_va_39_Jerusalem.h5 4 | Violin,Double_Bass dataset/hdf5s/urmp/35_Rondeau_vn_vn_va_db/AuSep_2_vn_35_Rondeau.h5,dataset/hdf5s/urmp/35_Rondeau_vn_vn_va_db/AuSep_4_db_35_Rondeau.h5 5 | Violin,Cello dataset/hdf5s/urmp/35_Rondeau_vn_vn_va_db/AuSep_1_vn_35_Rondeau.h5,dataset/hdf5s/urmp/36_Rondeau_vn_vn_va_vc/AuSep_4_vc_36_Rondeau.h5 6 | Viola,Clarinet dataset/hdf5s/urmp/35_Rondeau_vn_vn_va_db/AuSep_3_va_35_Rondeau.h5,dataset/hdf5s/urmp/37_Rondeau_fl_vn_va_cl/AuSep_4_cl_37_Rondeau.h5 7 | Flute,Violin dataset/hdf5s/urmp/37_Rondeau_fl_vn_va_cl/AuSep_1_fl_37_Rondeau.h5,dataset/hdf5s/urmp/35_Rondeau_vn_vn_va_db/AuSep_2_vn_35_Rondeau.h5 8 | Saxophone,Clarinet dataset/hdf5s/urmp/22_Rejouissance_sax_tbn_tba/AuSep_1_sax_22_Rejouissance.h5,dataset/hdf5s/urmp/23_Rejouissance_cl_sax_tba/AuSep_1_cl_23_Rejouissance.h5 9 | Saxophone,Tuba dataset/hdf5s/urmp/23_Rejouissance_cl_sax_tba/AuSep_2_sax_23_Rejouissance.h5,dataset/hdf5s/urmp/23_Rejouissance_cl_sax_tba/AuSep_3_tba_23_Rejouissance.h5 10 | Trombone,Saxophone dataset/hdf5s/urmp/21_Rejouissance_cl_tbn_tba/AuSep_2_tbn_21_Rejouissance.h5,dataset/hdf5s/urmp/22_Rejouissance_sax_tbn_tba/AuSep_1_sax_22_Rejouissance.h5 11 | Oboe,Saxophone dataset/hdf5s/urmp/30_Fugue_fl_fl_ob_sax/AuSep_3_ob_30_Fugue.h5,dataset/hdf5s/urmp/30_Fugue_fl_fl_ob_sax/AuSep_4_sax_30_Fugue.h5 12 | Oboe,Viola dataset/hdf5s/urmp/28_Fugue_fl_ob_cl_bn/AuSep_2_ob_28_Fugue.h5,dataset/hdf5s/urmp/32_Fugue_vn_vn_va_vc/AuSep_3_va_32_Fugue.h5 13 | Flute,Cello dataset/hdf5s/urmp/30_Fugue_fl_fl_ob_sax/AuSep_1_fl_30_Fugue.h5,dataset/hdf5s/urmp/32_Fugue_vn_vn_va_vc/AuSep_4_vc_32_Fugue.h5 14 | Flute,Trombone dataset/hdf5s/urmp/30_Fugue_fl_fl_ob_sax/AuSep_2_fl_30_Fugue.h5,dataset/hdf5s/urmp/34_Fugue_tpt_tpt_hn_tbn/AuSep_4_tbn_34_Fugue.h5 15 | Clarinet,Horn dataset/hdf5s/urmp/29_Fugue_fl_fl_ob_cl/AuSep_4_cl_29_Fugue.h5,dataset/hdf5s/urmp/34_Fugue_tpt_tpt_hn_tbn/AuSep_3_hn_34_Fugue.h5 16 | Clarinet,Bassoon dataset/hdf5s/urmp/28_Fugue_fl_ob_cl_bn/AuSep_3_cl_28_Fugue.h5,dataset/hdf5s/urmp/28_Fugue_fl_ob_cl_bn/AuSep_4_bn_28_Fugue.h5 17 | Trumpet,Violin dataset/hdf5s/urmp/34_Fugue_tpt_tpt_hn_tbn/AuSep_1_tpt_34_Fugue.h5,dataset/hdf5s/urmp/32_Fugue_vn_vn_va_vc/AuSep_1_vn_32_Fugue.h5 18 | Trumpet,Violin dataset/hdf5s/urmp/34_Fugue_tpt_tpt_hn_tbn/AuSep_2_tpt_34_Fugue.h5,dataset/hdf5s/urmp/32_Fugue_vn_vn_va_vc/AuSep_2_vn_32_Fugue.h5 19 | Flute,Clarinet dataset/hdf5s/urmp/40_Miserere_fl_fl_ob_cl_bn/AuSep_1_fl_40_Miserere.h5,dataset/hdf5s/urmp/40_Miserere_fl_fl_ob_cl_bn/AuSep_4_cl_40_Miserere.h5 20 | Flute,Saxophone dataset/hdf5s/urmp/40_Miserere_fl_fl_ob_cl_bn/AuSep_2_fl_40_Miserere.h5,dataset/hdf5s/urmp/41_Miserere_fl_fl_ob_sax_bn/AuSep_4_sax_41_Miserere.h5 21 | Bassoon,Oboe dataset/hdf5s/urmp/40_Miserere_fl_fl_ob_cl_bn/AuSep_5_bn_40_Miserere.h5,dataset/hdf5s/urmp/40_Miserere_fl_fl_ob_cl_bn/AuSep_3_ob_40_Miserere.h5 22 | Trumpet,Trombone dataset/hdf5s/urmp/43_Chorale_tpt_tpt_hn_tbn_tba/AuSep_1_tpt_43_Chorale.h5,dataset/hdf5s/urmp/43_Chorale_tpt_tpt_hn_tbn_tba/AuSep_4_tbn_43_Chorale.h5 23 | Trumpet,Horn dataset/hdf5s/urmp/43_Chorale_tpt_tpt_hn_tbn_tba/AuSep_2_tpt_43_Chorale.h5,dataset/hdf5s/urmp/43_Chorale_tpt_tpt_hn_tbn_tba/AuSep_3_hn_43_Chorale.h5 24 | Tuba,Trumpet dataset/hdf5s/urmp/43_Chorale_tpt_tpt_hn_tbn_tba/AuSep_5_tba_43_Chorale.h5,dataset/hdf5s/urmp/43_Chorale_tpt_tpt_hn_tbn_tba/AuSep_1_tpt_43_Chorale.h5 25 | Trumpet,Trombone dataset/hdf5s/urmp/33_Elise_tpt_tpt_hn_tbn/AuSep_2_tpt_33_Elise.h5,dataset/hdf5s/urmp/33_Elise_tpt_tpt_hn_tbn/AuSep_4_tbn_33_Elise.h5 26 | Trumpet,Horn dataset/hdf5s/urmp/33_Elise_tpt_tpt_hn_tbn/AuSep_1_tpt_33_Elise.h5,dataset/hdf5s/urmp/33_Elise_tpt_tpt_hn_tbn/AuSep_3_hn_33_Elise.h5 27 | Cello,Oboe dataset/hdf5s/urmp/11_Maria_ob_vc/AuSep_2_vc_11_Maria.h5,dataset/hdf5s/urmp/11_Maria_ob_vc/AuSep_1_ob_11_Maria.h5 28 | Trumpet,Tuba dataset/hdf5s/urmp/42_Arioso_tpt_tpt_hn_tbn_tba/AuSep_2_tpt_42_Arioso.h5,dataset/hdf5s/urmp/42_Arioso_tpt_tpt_hn_tbn_tba/AuSep_5_tba_42_Arioso.h5 29 | Trumpet,Trombone dataset/hdf5s/urmp/42_Arioso_tpt_tpt_hn_tbn_tba/AuSep_1_tpt_42_Arioso.h5,dataset/hdf5s/urmp/42_Arioso_tpt_tpt_hn_tbn_tba/AuSep_4_tbn_42_Arioso.h5 30 | Horn,Trumpet dataset/hdf5s/urmp/42_Arioso_tpt_tpt_hn_tbn_tba/AuSep_3_hn_42_Arioso.h5,dataset/hdf5s/urmp/42_Arioso_tpt_tpt_hn_tbn_tba/AuSep_2_tpt_42_Arioso.h5 31 | Trumpet,Horn dataset/hdf5s/urmp/31_Slavonic_tpt_tpt_hn_tbn/AuSep_2_tpt_31_Slavonic.h5,dataset/hdf5s/urmp/31_Slavonic_tpt_tpt_hn_tbn/AuSep_3_hn_31_Slavonic.h5 32 | Trumpet,Trombone dataset/hdf5s/urmp/31_Slavonic_tpt_tpt_hn_tbn/AuSep_1_tpt_31_Slavonic.h5,dataset/hdf5s/urmp/31_Slavonic_tpt_tpt_hn_tbn/AuSep_4_tbn_31_Slavonic.h5 -------------------------------------------------------------------------------- /src/utils/weiMidi.py: -------------------------------------------------------------------------------- 1 | from mido import MidiFile 2 | import numpy as np 3 | 4 | MAX_TICKS = 1000007 5 | FRAMES_PER_SECOND = 100 6 | NOTES_NUM = 88 7 | BEGIN_NOTE = 21 8 | GRAIN_SEC = 0.03 9 | GRAIN_FRAME = FRAMES_PER_SECOND * GRAIN_SEC 10 | 11 | 12 | def devide(msg): 13 | return str(msg).split(' ') 14 | 15 | def calculate_second(tempo, ticks_per_beat, onset_ticks, ticks): 16 | second = 0 17 | for i in range(ticks): 18 | microseconds_per_beat = tempo[onset_ticks + ticks] 19 | beats_per_second = 1e6 / microseconds_per_beat 20 | ticks_per_second = ticks_per_beat * beats_per_second 21 | second += 1. / ticks_per_second 22 | return second 23 | 24 | def read_midi(midi_path): 25 | 26 | midi_file = MidiFile(midi_path) 27 | ticks_per_beat = midi_file.ticks_per_beat 28 | 29 | #meta = {"key_signature": None, "tempo": [0, 0] } 30 | 31 | check = {} 32 | cur = 0 33 | pre_tempo = -1 34 | tempo_record = np.zeros([MAX_TICKS]) 35 | 36 | for msg in midi_file.tracks[0]: 37 | detailed_msg = devide(msg) 38 | #if msg.type == "key_signature": 39 | # meta["key_signature"] = msg.key 40 | if msg.type == "set_tempo": 41 | tempo_record[cur : cur + msg.time] = pre_tempo 42 | pre_tempo = msg.tempo 43 | cur += msg.time 44 | 45 | tempo_record[cur:] = pre_tempo 46 | 47 | tracks = [] 48 | 49 | ticks = 0 50 | time_in_seconds = [] 51 | 52 | for i, tr in enumerate(midi_file.tracks[1:]): 53 | track = [] 54 | seconds = [] 55 | second = 0. 56 | ticks = 0 57 | for msg in tr: 58 | track.append(str(msg)) 59 | second += calculate_second(tempo_record, ticks_per_beat, ticks, msg.time) 60 | ticks += msg.time 61 | seconds.append(second) 62 | 63 | tracks.append(track) 64 | time_in_seconds.append(seconds) 65 | 66 | midiTracks = [] 67 | for i, tr in enumerate(tracks): 68 | midiTrack = WeiMidiTrack(tr, time_in_seconds[i]) 69 | midiTracks.append(midiTrack) 70 | 71 | return midiTracks 72 | 73 | def frame(second): 74 | return int(second * FRAMES_PER_SECOND) 75 | 76 | def c2note(msg): 77 | return int(msg.split("=")[-1]) 78 | 79 | def c2velocity(msg): 80 | return int(msg.split("=")[-1]) 81 | 82 | def convert2frameRoll(tracks, seconds): 83 | onset_note = -1 84 | onset = 0 85 | frameRoll = np.zeros([NOTES_NUM + 1, frame(seconds[-1]) + 1]) 86 | frameRoll_pairs = [] 87 | buffer_notes = {} 88 | for i, tr in enumerate(tracks): 89 | detailed_tr = devide(tr) 90 | tag = detailed_tr[0] 91 | if tag not in ["note_on", "note_off"]: 92 | continue 93 | velocity = c2velocity(detailed_tr[3]) 94 | current_frame = frame(seconds[i]) 95 | note = c2note(detailed_tr[2]) 96 | if note >= NOTES_NUM or note < 0: 97 | continue 98 | 99 | if tag == "note_on" and velocity > 0: 100 | buffer_notes[note] = current_frame 101 | 102 | elif note in buffer_notes and buffer_notes[note] > 0: 103 | onset = buffer_notes[note] 104 | frameRoll[note - BEGIN_NOTE, onset : current_frame] = 1 105 | frameRoll_pairs.append([note - BEGIN_NOTE, onset, current_frame]) 106 | buffer_notes[note] = -1 107 | #onset = current_frame 108 | #onset_note = c2note(detailed_tr[2]) - BEGIN_NOTE if tag == "note_on" else -1 109 | 110 | #if onset_note > 0: 111 | # frameRoll[onset_note, onset:] = 1 112 | 113 | for i in range(frameRoll.shape[-1]): 114 | if frameRoll[:, i].sum() < 1: 115 | frameRoll[NOTES_NUM, i] =1 116 | 117 | return frameRoll, frameRoll_pairs 118 | 119 | def checkMono(frameRoll): 120 | cnt = 0 121 | for i in range(frameRoll.shape[-1]): 122 | if frameRoll[:, i].sum() > 1: 123 | cnt += 1 124 | else: 125 | cnt = 0 126 | if cnt > GRAIN_FRAME: 127 | return False 128 | return True 129 | 130 | 131 | class WeiMidiTrack(object): 132 | def __init__(self, midi_events, seconds): 133 | self.frameRoll, self.frameRollPair = convert2frameRoll(midi_events, seconds) 134 | self.isMono = checkMono(self.frameRoll) 135 | #print(self.isMono) 136 | 137 | def monoFrameRoll(self): 138 | assert self.isMono 139 | frameRoll = np.argmax(self.frameRoll, 0) 140 | return frameRoll 141 | 142 | class WeiMidi(object): 143 | def __init__(self, path): 144 | self.midi_path = path 145 | self.midi_tracks = read_midi(path) 146 | self.maxSec = self.get_maxSec() 147 | 148 | def get_maxSec(self): 149 | maxSec = 0 150 | for i in range(self.tracks_num()): 151 | if len(self.frameRoll_pair(i)) > 0: 152 | if len(self.frameRoll_pair(i)[-1]) > 0: 153 | sec = self.frameRoll_pair(i)[-1][-1] 154 | maxSec = maxSec if maxSec > sec else sec 155 | return maxSec 156 | 157 | def is_mono(self, n): 158 | return self.midi_tracks[n].isMono 159 | 160 | def tracks_num(self): 161 | return len(self.midi_tracks) 162 | 163 | def frameRoll_pair(self, n): 164 | return self.midi_tracks[n].frameRollPair 165 | 166 | def __getitem__(self, n): 167 | assert isinstance(n, int) 168 | return self.midi_tracks[n].monoFrameRoll() 169 | 170 | def test(): 171 | path = 'data/midi/20210409033250183-9212.mid' 172 | song = WeiMidi(path) 173 | #for i in range(song.tracks_num()): 174 | #a = song[i] 175 | #if a.shape[0] * 88 == a.sum(): 176 | 177 | if __name__ == '__main__': 178 | test() 179 | -------------------------------------------------------------------------------- /src/inference/compute_measure.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import shutil 4 | import librosa 5 | import mir_eval 6 | import numpy as np 7 | from sklearn import metrics 8 | import multiprocessing 9 | from multiprocessing import Manager 10 | 11 | from utils.utilities import (get_process_groups, read_lst, read_config, write_lst, mkdir, int16_to_float32) 12 | from conf.feature import * 13 | 14 | 15 | def load_audio_pair(est_path, ref_path, sample_rate=SAMPLE_RATE): 16 | max_len = -1 17 | ests = [] 18 | for path in est_path: 19 | est, _ = librosa.load(path, sr=sample_rate, mono=True) 20 | ests.append(est) 21 | if est.shape[-1] > max_len: 22 | max_len = est.shape[-1] 23 | 24 | refs = [] 25 | for path in ref_path: 26 | ref, _ = librosa.load(path, sr=sample_rate, mono=True) 27 | refs.append(ref) 28 | 29 | ref = np.zeros([len(refs), max_len]) 30 | for i in range(len(refs)): 31 | ref[i, : refs[i].shape[-1]] = refs[i] 32 | 33 | est = np.zeros([len(refs), max_len]) 34 | for i in range(len(refs)): 35 | est[i, : ests[i].shape[-1]] = ests[i] 36 | return est, ref 37 | 38 | def frame_roll_from_path(path, max_frame=-1, frames_per_second=100, notes_num=88): 39 | segments = read_lst(path) 40 | segments = [seg.rstrip().split('\t') for seg in segments] 41 | if max_frame == -1: 42 | max_frame = int(float(segments[-1][1]) * frames_per_second + 1) 43 | frame_roll = np.zeros([max_frame, notes_num + 1]) 44 | frame_roll[:, notes_num] = 1 45 | for seg in segments: 46 | st = int(float(seg[0]) * frames_per_second) 47 | ed = int(float(seg[1]) * frames_per_second + 1) 48 | if st >= max_frame: 49 | break 50 | if ed > max_frame: 51 | ed = max_frame 52 | frame_roll[st : ed, int(float(seg[2]))] = 1 53 | frame_roll[st : ed, notes_num] = 0 54 | if ed == max_frame: 55 | break 56 | return frame_roll, max_frame 57 | 58 | 59 | def measure_for_transcription(est_path, ref_path, mode='frame'): 60 | if mode == "onset": 61 | est_intervals, est_pitches = mir_eval.io.load_valued_intervals(est_path) 62 | ref_intervals, ref_pitches = mir_eval.io.load_valued_intervals(ref_path) 63 | precision, recall, f_measure, _ = mir_eval.transcription.precision_recall_f1_overlap( 64 | ref_intervals, ref_pitches, est_intervals, est_pitches) 65 | else: 66 | ref_frame_roll, max_frame = frame_roll_from_path(ref_path) 67 | est_frame_roll, _ = frame_roll_from_path(est_path, max_frame) 68 | pre = metrics.average_precision_score(ref_frame_roll, est_frame_roll, average='micro') 69 | precision = recall = f_measure = pre 70 | 71 | return precision, recall, f_measure 72 | 73 | 74 | def measure_for_separation(est_path, ref_path, sample_rate=SAMPLE_RATE): 75 | 76 | if type(est_path) is str: 77 | est, ref = load_audio_pair(est_path, ref_path, sample_rate) 78 | else: 79 | est = est_path 80 | ref = ref_path 81 | (sdr, sir, sar, perm) = mir_eval.separation.bss_eval_sources(ref, est, compute_permutation=True) 82 | 83 | return sdr, sir, sar 84 | 85 | 86 | def evaluate_transcription(samples, processes_num=1): 87 | return multi_process_evaluation(samples, processes_num, "transcription") 88 | 89 | def evaluate_separation(samples, processes_num=1): 90 | return multi_process_evaluation(samples, processes_num, "separation") 91 | 92 | def multi_process_evaluation(samples, processes_num=1, mode="separation"): 93 | 94 | def process_unit(n): 95 | sample = samples[n] 96 | sample_score = {} 97 | for instr in sample: 98 | pairs = sample[instr][mode] 99 | for pair in pairs: 100 | if mode == "separation": 101 | est, _ = librosa.load(pair[0], sr=SAMPLE_RATE, mono=True) 102 | ref, _ = librosa.load(pair[1], sr=SAMPLE_RATE, mono=True) 103 | sdr, sir, sar = measure_for_separation(est, ref) 104 | sample_score[instr] = sdr[0] 105 | else: 106 | f1, pre, recall = measure_for_transcription(pair[0], pair[1]) 107 | sample_score[instr] = f1 108 | return sample_score 109 | 110 | def process_group(sample_scores, st, ed, total_num, pid): 111 | print(f"process {pid + 1} starts") 112 | for n in range(st, ed): 113 | sample_score = process_unit(n) 114 | print(f"process {pid + 1} : {n + 1}/{total_num} done.") 115 | sample_scores[n] = sample_score 116 | print(f"process {pid + 1} ends") 117 | return sample_scores 118 | 119 | samples_num = len(samples) 120 | sample_scores = range(len(samples)) 121 | if processes_num < 2: 122 | sample_scores = list(sample_scores) 123 | sample_scores = process_group(sample_scores, 0, samples_num, samples_num, 0) 124 | 125 | else: 126 | with Manager() as manager: 127 | return_list = manager.list(sample_scores) 128 | groups = get_process_groups(samples_num, processes_num) 129 | ps = [] 130 | for pid, (st, ed) in enumerate(groups): 131 | p = multiprocessing.Process(target = process_group, args = (return_list, st, ed, samples_num, pid)) 132 | p.start() 133 | ps.append(p) 134 | for p in ps: 135 | p.join() 136 | sample_scores = list(return_list) 137 | return sample_scores 138 | 139 | 140 | 141 | if __name__=='__main__': 142 | est_path = ['evaluation/separation/test/AuSep_1_vn_27_King_est_1_.wav', 'evaluation/separation/test/AuSep_2_fl_30_Fugue_est_1_.wav'] 143 | ref_path = ['evaluation/separation/test/AuSep_1_vn_27_King_ref_1_.wav', 'evaluation/separation/test/AuSep_2_fl_30_Fugue_ref_1_.wav'] 144 | sdr, sir, sar = separation_evaluation(est_path, ref_path) 145 | print(sdr) 146 | 147 | -------------------------------------------------------------------------------- /src/inference/utilities.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | import torchaudio 6 | import torchaudio.functional as AF 7 | 8 | import librosa 9 | import math 10 | 11 | import sys 12 | import numpy as np 13 | import time 14 | 15 | 16 | from conf.feature import * 17 | from conf.inference import * 18 | 19 | def align(a, b, dim): 20 | return a.transpose(0, dim)[:b.shape[dim]].transpose(0, dim) 21 | 22 | def get_fft_window(): 23 | fft_window = librosa.filters.get_window(WINDOW, WINDOW_SIZE, fftbins=True) 24 | fft_window = librosa.util.pad_center(fft_window, N_FFT) 25 | return torch.from_numpy(fft_window) 26 | 27 | FFT_WINDOW = get_fft_window() 28 | 29 | 30 | def onehot_tensor(x, dim=0, classes_num=NOTES_NUM): 31 | x = x.unsqueeze(dim) 32 | shape = list(x.shape) 33 | shape[dim] = classes_num 34 | y = torch.zeros(shape).to(x.device).scatter_(dim, x, 1) 35 | return y 36 | 37 | 38 | def spec2wav(x, cos, sin, wav_len, syn_phase=0, device="cuda"): 39 | 40 | #''' 41 | # args : channels * frames * n_fft 42 | #''' 43 | 44 | x = F.pad(x, (0, 1), "constant", 0) 45 | fft_window = FFT_WINDOW.cuda() if device == "cuda" else FFT_WINDOW 46 | 47 | if syn_phase == 1: 48 | print("here") 49 | wav_len = int((x.shape[-2] - 1)/ FRAMES_PER_SEC * SAMPLE_RATE) 50 | wav = AF.griffinlim(x.transpose(1, 2), 51 | window=fft_window, 52 | n_fft=N_FFT, 53 | hop_length=HOP_SIZE, 54 | win_length=WINDOW_SIZE, 55 | power=1, 56 | normalized=False, 57 | length=wav_len, 58 | n_iter=N_ITER, 59 | momentum=0, 60 | rand_init=False) 61 | elif syn_phase == 2: 62 | itersNum = 100 63 | for i in range(itersNum): 64 | spec = torch.stack([x * cos, x * sin], -1).transpose(1, 2) 65 | wav = torch.istft(spec, 66 | n_fft=N_FFT, 67 | hop_length=HOP_SIZE, 68 | win_length=WINDOW_SIZE, 69 | window=fft_window, 70 | center=True, 71 | normalized=False, 72 | onesided=None, 73 | length=wav_len, 74 | return_complex=False) 75 | if i < itersNum - 1: 76 | _, cos, sin = wav2spec(wav) 77 | 78 | 79 | 80 | elif syn_phase == 0: 81 | spec = torch.stack([x * cos, x * sin], -1).transpose(1, 2) 82 | wav = torch.istft(spec, 83 | n_fft=N_FFT, 84 | hop_length=HOP_SIZE, 85 | win_length=WINDOW_SIZE, 86 | window=fft_window, 87 | center=True, 88 | normalized=False, 89 | onesided=None, 90 | length=wav_len, 91 | return_complex=False) 92 | return wav 93 | 94 | def wav2spec(x, device="cuda"): 95 | ''' 96 | return channel * frames * n_fft 97 | ''' 98 | 99 | fft_window = FFT_WINDOW.cuda() if device == "cuda" else FFT_WINDOW 100 | 101 | spec = torch.stft(x, 102 | N_FFT, 103 | hop_length=HOP_SIZE, 104 | win_length=WINDOW_SIZE, 105 | window=fft_window, 106 | center=True, 107 | pad_mode='reflect', 108 | normalized=False, 109 | onesided=None, 110 | return_complex=False) 111 | spec = spec.transpose(1, 2) 112 | real = spec[:, :, :, 0] 113 | imag = spec[:, :, :, 1] 114 | mag = (real ** 2 + imag ** 2) ** 0.5 115 | cos = real / torch.clamp(mag, 1e-10, np.inf) 116 | sin = imag / torch.clamp(mag, 1e-10, np.inf) 117 | return mag[:, :, :-1], cos, sin 118 | 119 | def save_audio(wav, path): 120 | torchaudio.save(path, wav.float().cpu(), SAMPLE_RATE) 121 | 122 | 123 | def devide_into_batches(x, pad_value=0, overlap_edge=PAD_FRAME, duration_axis=-1): 124 | 125 | x = x.unsqueeze(0).unsqueeze(-1) 126 | duration_axis = duration_axis - 1 if duration_axis < 0 else duration_axis + 1 127 | x = x.transpose(duration_axis, -1) 128 | 129 | frames_num = x.shape[-1] 130 | 131 | batch_frames_num_non_padding = BATCH_FRAMES_NUM - overlap_edge * 2 132 | segments_num = frames_num // batch_frames_num_non_padding 133 | 134 | if pad_value == -1: 135 | x = x[ :segments_num * batch_frames_num_non_padding] 136 | elif segments_num * batch_frames_num_non_padding < frames_num: 137 | x = F.pad(x, (0, int((segments_num + 1) * batch_frames_num_non_padding) - frames_num), 'constant', value=pad_value) 138 | segments_num += 1 139 | 140 | x = F.pad(x, (overlap_edge, overlap_edge), 'constant', value=pad_value) 141 | 142 | x = x.transpose(-1, 0) 143 | samples = [] 144 | for i in range(segments_num): 145 | st = i * batch_frames_num_non_padding 146 | ed = st + BATCH_FRAMES_NUM 147 | sample = x[st : ed].transpose(0, duration_axis).squeeze(0).squeeze(-1) 148 | samples.append(sample) 149 | 150 | batches = [] 151 | samples_num = len(samples) 152 | batches_num = (samples_num + INFERENCE_BATCH_SIZE - 1) // INFERENCE_BATCH_SIZE 153 | for i in range(batches_num): 154 | st = i * INFERENCE_BATCH_SIZE 155 | ed = st + INFERENCE_BATCH_SIZE 156 | ed = samples_num if ed > samples_num else ed 157 | batches.append(torch.stack(samples[st : ed], 0)) 158 | return batches 159 | 160 | 161 | def merge_batches(x, overlap_edge=PAD_FRAME, duration_axis=-1): 162 | if duration_axis >= 0: 163 | duration_axis += 1 164 | x = x.unsqueeze(0).transpose(0, duration_axis) 165 | if duration_axis >= 0: 166 | duration_axis -= 1 167 | x = x[overlap_edge : -overlap_edge].transpose(0, 1).flatten(0, 1).transpose(0, duration_axis).squeeze(0) 168 | return x 169 | 170 | 171 | def merge_from_list(x, index=0): 172 | results = [] 173 | for unit in x: 174 | if type(unit) in [tuple, list]: 175 | results.append(unit[index]) 176 | else: 177 | results.append(unit) 178 | return torch.cat(results, 0) 179 | 180 | -------------------------------------------------------------------------------- /evaluation/MSI-DIS/scores-190.json: -------------------------------------------------------------------------------- 1 | {"MSI-DIS": {"transcription": [{"Violin": 0.9155356872822401, "Saxophone": 0.4922168581534541}, {"Violin": 0.906904571797, "Double_Bass": 0.7330175196626085}, {"Cello": 0.7716511678000424, "Viola": 0.6434034747048931}, {"Violin": 0.9200418795170666, "Double_Bass": 0.8179615908098763}, {"Violin": 0.855165271815949, "Cello": 0.8337352880755539}, {"Viola": 0.6149773318368282, "Clarinet": 0.7710395391594762}, {"Flute": 0.7083241588874832, "Violin": 0.887958054553282}, {"Saxophone": 0.6605534252500374, "Clarinet": 0.6594286094936119}, {"Saxophone": 0.5860077041512487, "Tuba": 0.5673013015675976}, {"Trombone": 0.7877233204170254, "Saxophone": 0.03781229740252132}, {"Oboe": 0.18800959366764133, "Saxophone": 0.44948093132197087}, {"Oboe": 0.4796317221126738, "Viola": 0.8448553667172828}, {"Flute": 0.8280862424486122, "Cello": 0.8930023244670272}, {"Flute": 0.8267930141461336, "Trombone": 0.8788783567444686}, {"Clarinet": 0.7461117325660884, "Horn": 0.7032581753974934}, {"Clarinet": 0.6563554459912304, "Bassoon": 0.2353563387884311}, {"Trumpet": 0.8619435148322085, "Violin": 0.724248884975393}, {"Trumpet": 0.46115154108952194, "Violin": 0.7666968607472413}, {"Flute": 0.8527652724900775, "Clarinet": 0.7393480261360227}, {"Flute": 0.6586441061688577, "Saxophone": 0.2491150773942218}, {"Bassoon": 0.46117680233655867, "Oboe": 0.3668244893427309}, {"Trumpet": 0.6322599603098203, "Trombone": 0.8572844451707077}, {"Trumpet": 0.4909331593806311, "Horn": 0.8389384407916984}, {"Tuba": 0.5497697590729947, "Trumpet": 0.855529304200216}, {"Trumpet": 0.47877785625509706, "Trombone": 0.6518660545606864}, {"Trumpet": 0.8618582497239909, "Horn": 0.8798279812510436}, {"Cello": 0.5947627192421981, "Oboe": 0.409469028065668}, {"Trumpet": 0.9389893409351974, "Tuba": 0.8680561027336061}, {"Trumpet": 0.798509790637697, "Trombone": 0.798237047904076}, {"Horn": 0.7659901411178002, "Trumpet": 0.48924055691281143}, {"Trumpet": 0.7763481761537584, "Horn": 0.7618845583301896}, {"Trumpet": 0.6581694910442168, "Trombone": 0.7394180175342637}], "separation": [{"Violin": 6.541116437371974, "Saxophone": 1.8881928954020841}, {"Violin": 9.085971834725244, "Double_Bass": 6.939945149867201}, {"Cello": 7.948946571611801, "Viola": 2.9854057021232103}, {"Violin": 8.027842855421742, "Double_Bass": 7.623472124412347}, {"Violin": 9.628268860563933, "Cello": 10.901597768940217}, {"Viola": 1.7124445277319533, "Clarinet": 4.8651812471553075}, {"Flute": 8.468505210426615, "Violin": 5.89170621585735}, {"Saxophone": -1.7425961502063514, "Clarinet": 1.789691326687349}, {"Saxophone": 5.058587893150619, "Tuba": 4.732612885768726}, {"Trombone": 6.439471796882162, "Saxophone": -12.734088587651616}, {"Oboe": -7.105028712955862, "Saxophone": -1.4624145596087077}, {"Oboe": 0.6622300530031615, "Viola": 6.4944053285021}, {"Flute": 8.588378412276096, "Cello": 11.381660139740637}, {"Flute": 11.285043361386428, "Trombone": 6.808830188676809}, {"Clarinet": 6.456273989937547, "Horn": 4.743802232015218}, {"Clarinet": 5.244730530716417, "Bassoon": -10.493317188874995}, {"Trumpet": 4.340486727770855, "Violin": 5.290570254455115}, {"Trumpet": 0.6531070386405622, "Violin": 4.709271765210785}, {"Flute": 12.234576327549737, "Clarinet": 8.912410369226688}, {"Flute": 8.201778642251398, "Saxophone": -5.0961533288595575}, {"Bassoon": 5.596215024044069, "Oboe": -0.9849432318223452}, {"Trumpet": 4.45269382716973, "Trombone": 6.427959235523154}, {"Trumpet": 0.20277078878847551, "Horn": 9.49491145739134}, {"Tuba": 8.066828307649828, "Trumpet": 9.994846913719279}, {"Trumpet": 0.7814876576097167, "Trombone": 2.794755491462097}, {"Trumpet": 8.926938201028934, "Horn": 8.991136171921566}, {"Cello": 7.020040020491056, "Oboe": 6.0812640429824985}, {"Trumpet": 8.88255234062147, "Tuba": 8.051688364863747}, {"Trumpet": 4.2050602035974585, "Trombone": 4.82251196830306}, {"Horn": 5.624537966554493, "Trumpet": -2.830977900011998}, {"Trumpet": 5.897005864337276, "Horn": 8.267664444168025}, {"Trumpet": 6.30121119732959, "Trombone": 3.751358755393699}]}, "MSI-DIS-S": {"separation": [{"Violin": 6.613001284988619, "Saxophone": 6.908647788563865}, {"Violin": 9.889224519215695, "Double_Bass": 5.908313175088509}, {"Cello": 7.950142830194029, "Viola": 5.165176589502689}, {"Violin": 8.197718446853044, "Double_Bass": 7.135280756021612}, {"Violin": 9.412095105067737, "Cello": 10.62875802088553}, {"Viola": 3.415010862232417, "Clarinet": 7.018410524186724}, {"Flute": 8.771259771677965, "Violin": 8.679388561407553}, {"Saxophone": -0.8469234611064538, "Clarinet": 3.156610247604527}, {"Saxophone": 10.107492749043523, "Tuba": 5.391805023134176}, {"Trombone": 6.344937936708229, "Saxophone": 7.449428411196601}, {"Oboe": 4.354182781300693, "Saxophone": 4.535728677183428}, {"Oboe": 3.3279251141151307, "Viola": 7.941159170648245}, {"Flute": 7.517535391227003, "Cello": 10.472847415042636}, {"Flute": 10.774473481643323, "Trombone": 7.022919250744013}, {"Clarinet": 8.423195318092167, "Horn": 10.013757977040878}, {"Clarinet": 10.330679457956993, "Bassoon": 4.844246401602945}, {"Trumpet": 5.439569589818225, "Violin": 5.5061771445010965}, {"Trumpet": 3.1963841212511213, "Violin": 5.055929487124545}, {"Flute": 14.526501153758142, "Clarinet": 12.725290612709902}, {"Flute": 10.088511882368177, "Saxophone": 6.786088697778048}, {"Bassoon": 10.047690078508866, "Oboe": -2.669156077895666}, {"Trumpet": 4.376574626491411, "Trombone": 5.963778598996368}, {"Trumpet": 10.521624108965868, "Horn": 11.668561666943846}, {"Tuba": 6.828793099384825, "Trumpet": 9.923279545351896}, {"Trumpet": 3.636903746109033, "Trombone": 6.012195640826754}, {"Trumpet": 9.820882133987544, "Horn": 10.475290472025538}, {"Cello": 6.770775799798311, "Oboe": 7.108617519921708}, {"Trumpet": 8.935373367206758, "Tuba": 8.200000625712908}, {"Trumpet": 5.243726242520109, "Trombone": 6.64422064218101}, {"Horn": 8.085503508412174, "Trumpet": 3.5976478306216753}, {"Trumpet": 8.737509266782887, "Horn": 10.902989949246876}, {"Trumpet": 5.653195910534603, "Trombone": 3.8592234846691973}]}} -------------------------------------------------------------------------------- /evaluation/MSI-DIS/scores-191.json: -------------------------------------------------------------------------------- 1 | {"MSI-DIS": {"transcription": [{"Violin": 0.9152028513501567, "Saxophone": 0.5281816750237217}, {"Violin": 0.8894809825456819, "Double_Bass": 0.7564888710487585}, {"Cello": 0.7955522662816865, "Viola": 0.589311791845628}, {"Violin": 0.9240299544268021, "Double_Bass": 0.8178991333418093}, {"Violin": 0.8556467102425984, "Cello": 0.8420676657261306}, {"Viola": 0.6177582103382321, "Clarinet": 0.7477714226716111}, {"Flute": 0.7147039244734346, "Violin": 0.8772163576984613}, {"Saxophone": 0.6055464920242927, "Clarinet": 0.6087610358509389}, {"Saxophone": 0.6233418086930349, "Tuba": 0.508229096911424}, {"Trombone": 0.7511127309593514, "Saxophone": 0.05001789349254738}, {"Oboe": 0.1642970139611122, "Saxophone": 0.42103724990527547}, {"Oboe": 0.46962586287749836, "Viola": 0.829122827556487}, {"Flute": 0.8133807880925873, "Cello": 0.9023525384577459}, {"Flute": 0.8009353684165248, "Trombone": 0.8675524570849545}, {"Clarinet": 0.7944425367678136, "Horn": 0.6762319968246755}, {"Clarinet": 0.5851073148392318, "Bassoon": 0.29180740150863377}, {"Trumpet": 0.8792550555115606, "Violin": 0.7914435302799414}, {"Trumpet": 0.709345011107144, "Violin": 0.8295938404747756}, {"Flute": 0.7801502072146993, "Clarinet": 0.7556575436758589}, {"Flute": 0.6214883931019535, "Saxophone": 0.30073061133925605}, {"Bassoon": 0.5168496586940959, "Oboe": 0.35347643106873966}, {"Trumpet": 0.676842780237606, "Trombone": 0.7377590802196126}, {"Trumpet": 0.6201216700778358, "Horn": 0.860993125500221}, {"Tuba": 0.4554720562926493, "Trumpet": 0.916079589572939}, {"Trumpet": 0.5820072090019888, "Trombone": 0.5139270401889783}, {"Trumpet": 0.9129983018405798, "Horn": 0.8790249797158725}, {"Cello": 0.568129456612791, "Oboe": 0.3900170624251111}, {"Trumpet": 0.9402001676026692, "Tuba": 0.8388586679418029}, {"Trumpet": 0.8125182774087631, "Trombone": 0.3654825263959058}, {"Horn": 0.7567374238167376, "Trumpet": 0.7574156510224312}, {"Trumpet": 0.7504505429275314, "Horn": 0.7516463142873844}, {"Trumpet": 0.6559709774728097, "Trombone": 0.6196404533078165}], "separation": [{"Violin": 6.01402885365448, "Saxophone": 2.0227570869102007}, {"Violin": 8.617024848167853, "Double_Bass": 7.662765396033107}, {"Cello": 7.881505509268791, "Viola": 3.0242663148502076}, {"Violin": 7.6322299886753155, "Double_Bass": 7.215900709436623}, {"Violin": 9.669083919848568, "Cello": 10.73390653778075}, {"Viola": 2.0173184813998586, "Clarinet": 4.237385898067096}, {"Flute": 9.562086939985486, "Violin": 4.822729696220785}, {"Saxophone": -2.111665522425011, "Clarinet": 1.3633824119525142}, {"Saxophone": 6.589518980597719, "Tuba": 4.802625019518544}, {"Trombone": 6.375799689229939, "Saxophone": -11.920952938245865}, {"Oboe": -7.929629543872667, "Saxophone": -2.857517891185486}, {"Oboe": 0.4822105063588595, "Viola": 5.998177780394043}, {"Flute": 8.647281656129325, "Cello": 10.861208186542019}, {"Flute": 11.439602537192698, "Trombone": 5.878526415180094}, {"Clarinet": 7.992234264260642, "Horn": 3.7660556040093573}, {"Clarinet": 3.340546292724547, "Bassoon": -9.712649075597511}, {"Trumpet": 6.27280232568041, "Violin": 5.365404330326867}, {"Trumpet": 2.7530440536987717, "Violin": 3.9860653272992335}, {"Flute": 12.375022855565536, "Clarinet": 8.162547020325995}, {"Flute": 7.939344426676529, "Saxophone": -6.436437203948287}, {"Bassoon": 2.004995986552016, "Oboe": -1.003342496734781}, {"Trumpet": 4.02280288973232, "Trombone": 3.3231483427048083}, {"Trumpet": 2.6523800307458574, "Horn": 9.812528818015107}, {"Tuba": 8.449304094268616, "Trumpet": 10.206241245150133}, {"Trumpet": 2.300514857507982, "Trombone": 1.056134029359734}, {"Trumpet": 8.70879450205567, "Horn": 8.222110059730491}, {"Cello": 6.991849426886853, "Oboe": 5.712956343758789}, {"Trumpet": 8.792307112559548, "Tuba": 8.36211006075664}, {"Trumpet": 4.3560459404333205, "Trombone": -1.01285309965147}, {"Horn": 5.227601021073907, "Trumpet": 1.612272678084615}, {"Trumpet": 5.762252492894948, "Horn": 8.392794003657357}, {"Trumpet": 6.279908802431981, "Trombone": 2.8378479937503833}]}, "MSI-DIS-S": {"separation": [{"Violin": 6.734891416669992, "Saxophone": 5.670513830788097}, {"Violin": 9.689815241115252, "Double_Bass": 6.680106986621591}, {"Cello": 8.066595574563912, "Viola": 5.8320503802104895}, {"Violin": 8.049740214186382, "Double_Bass": 6.784126561462066}, {"Violin": 9.352149454605094, "Cello": 10.418975006104944}, {"Viola": 3.8625325400910637, "Clarinet": 6.853707832580569}, {"Flute": 9.208372037524379, "Violin": 7.843164226586619}, {"Saxophone": -0.31587457486004195, "Clarinet": 2.2060298707047563}, {"Saxophone": 9.911227702907592, "Tuba": 6.001949128490924}, {"Trombone": 6.591199171716987, "Saxophone": 7.278233690017634}, {"Oboe": 4.459946738503614, "Saxophone": 3.961157059636802}, {"Oboe": 3.4000403955803113, "Viola": 8.082460870375295}, {"Flute": 7.884384577290541, "Cello": 10.244802425176335}, {"Flute": 11.183964535092024, "Trombone": 6.314344874665632}, {"Clarinet": 8.746251734141381, "Horn": 9.68137852315886}, {"Clarinet": 9.591317061469242, "Bassoon": 4.646300891024933}, {"Trumpet": 6.352678361583814, "Violin": 4.713868852928564}, {"Trumpet": 4.0853297145362095, "Violin": 4.380285905180017}, {"Flute": 14.763996039360674, "Clarinet": 12.32383756209942}, {"Flute": 10.174442153912256, "Saxophone": 5.586477875288363}, {"Bassoon": 7.717122603372129, "Oboe": -1.3305717999047202}, {"Trumpet": 5.041142276629325, "Trombone": 5.275001433382017}, {"Trumpet": 10.811707261233972, "Horn": 11.201716771321353}, {"Tuba": 6.50945209650572, "Trumpet": 10.168448968314344}, {"Trumpet": 4.616370173021068, "Trombone": 5.687129705134582}, {"Trumpet": 10.02049907450755, "Horn": 9.491519642671815}, {"Cello": 6.856453262232307, "Oboe": 7.312102669633125}, {"Trumpet": 8.787208208398678, "Tuba": 8.772666590958496}, {"Trumpet": 5.216224265442982, "Trombone": 6.321188438923736}, {"Horn": 7.356662165923753, "Trumpet": 4.597759879805192}, {"Trumpet": 8.958492212961064, "Horn": 10.581214456138856}, {"Trumpet": 6.023777070845441, "Trombone": 3.5091096593213926}]}} -------------------------------------------------------------------------------- /evaluation/MSI-DIS/scores-198.json: -------------------------------------------------------------------------------- 1 | {"MSI-DIS": {"transcription": [{"Violin": 0.8924510093062347, "Saxophone": 0.5605541846379607}, {"Violin": 0.8850512431237135, "Double_Bass": 0.7522365954911729}, {"Cello": 0.7734280844723757, "Viola": 0.451816253948843}, {"Violin": 0.9257626072598296, "Double_Bass": 0.8315987864389841}, {"Violin": 0.8607913249122879, "Cello": 0.8442287283217075}, {"Viola": 0.5543061770300618, "Clarinet": 0.7288928697844018}, {"Flute": 0.7437707957158572, "Violin": 0.8778654375699722}, {"Saxophone": 0.5888791374571082, "Clarinet": 0.4564262813082402}, {"Saxophone": 0.6410632199132241, "Tuba": 0.5624406255062508}, {"Trombone": 0.7729754407638761, "Saxophone": 0.0761173130695134}, {"Oboe": 0.1872876190783464, "Saxophone": 0.4214673736438897}, {"Oboe": 0.4664665413852489, "Viola": 0.8188682931019281}, {"Flute": 0.7803131938372087, "Cello": 0.8964779841298565}, {"Flute": 0.7447517470737175, "Trombone": 0.8858520416386712}, {"Clarinet": 0.7122604085663651, "Horn": 0.67653382760051}, {"Clarinet": 0.5603713199125716, "Bassoon": 0.2656289047239678}, {"Trumpet": 0.8561317932338018, "Violin": 0.6851677257363626}, {"Trumpet": 0.7053621494425206, "Violin": 0.7541451225738156}, {"Flute": 0.7517207044365125, "Clarinet": 0.6016313959159212}, {"Flute": 0.4805246769756156, "Saxophone": 0.1518930165071761}, {"Bassoon": 0.3648943057668641, "Oboe": 0.28722670374947173}, {"Trumpet": 0.6094735557773957, "Trombone": 0.835499646140991}, {"Trumpet": 0.7051194098353029, "Horn": 0.8579551626059381}, {"Tuba": 0.5844820450013005, "Trumpet": 0.9155701837559244}, {"Trumpet": 0.5854111225487281, "Trombone": 0.5805188928077251}, {"Trumpet": 0.9062628792526456, "Horn": 0.8622206153088223}, {"Cello": 0.6003260416446784, "Oboe": 0.38737267933970415}, {"Trumpet": 0.9333191392924272, "Tuba": 0.8715648964628934}, {"Trumpet": 0.7778775156850734, "Trombone": 0.6787387908951699}, {"Horn": 0.797470396293232, "Trumpet": 0.8624599974274298}, {"Trumpet": 0.7920946193499684, "Horn": 0.7146566422091702}, {"Trumpet": 0.6143898500308628, "Trombone": 0.6885496873720812}], "separation": [{"Violin": 6.944569133196032, "Saxophone": 3.212414017789522}, {"Violin": 8.68144485703329, "Double_Bass": 7.5420044000931075}, {"Cello": 7.810631265066242, "Viola": -0.7748288894759613}, {"Violin": 7.979837140262056, "Double_Bass": 7.976955060425512}, {"Violin": 9.857800696259126, "Cello": 10.898517174171676}, {"Viola": 1.7118293976794432, "Clarinet": 4.801448905140044}, {"Flute": 9.374501043107726, "Violin": 6.180707292029123}, {"Saxophone": -2.158727703671019, "Clarinet": -0.008027077618480184}, {"Saxophone": 5.17720150144658, "Tuba": 4.884210782349435}, {"Trombone": 5.998907619253795, "Saxophone": -11.003881679030854}, {"Oboe": -8.941868539539414, "Saxophone": -2.2499306242481225}, {"Oboe": 0.9307880630210634, "Viola": 5.7648286583908}, {"Flute": 7.776900360402213, "Cello": 11.586534425478067}, {"Flute": 8.146892744799107, "Trombone": 6.367731270984958}, {"Clarinet": 5.88537474496703, "Horn": 3.674138717696093}, {"Clarinet": 1.6326022123266604, "Bassoon": -5.797728479769438}, {"Trumpet": 4.932250939835388, "Violin": 5.745227076351145}, {"Trumpet": 2.587476924052913, "Violin": 4.5876155279512}, {"Flute": 10.227363735816732, "Clarinet": 5.213935025083013}, {"Flute": 4.6693836103727655, "Saxophone": -10.593131512977871}, {"Bassoon": -0.09114585988140317, "Oboe": 1.9145263779490649}, {"Trumpet": 3.515782783879651, "Trombone": 4.9255632714616375}, {"Trumpet": 4.809343589806482, "Horn": 10.343994423271457}, {"Tuba": 7.907610985479172, "Trumpet": 10.09533238543785}, {"Trumpet": 2.0748903076645946, "Trombone": 0.9625363013018168}, {"Trumpet": 9.402082738311961, "Horn": 8.572585205392352}, {"Cello": 7.169132084207552, "Oboe": 4.359673617595262}, {"Trumpet": 8.930889433583442, "Tuba": 8.137672181630624}, {"Trumpet": 4.037076044044631, "Trombone": 2.5817942467319344}, {"Horn": 5.27608065609058, "Trumpet": 3.2728467791119478}, {"Trumpet": 7.052004439766591, "Horn": 8.023781701110822}, {"Trumpet": 6.0247897597884, "Trombone": 2.3878849972236873}]}, "MSI-DIS-S": {"separation": [{"Violin": 6.583509794277567, "Saxophone": 6.665538413662101}, {"Violin": 9.642250230178998, "Double_Bass": 6.559850857741068}, {"Cello": 8.155361451650615, "Viola": 4.2531779963725755}, {"Violin": 8.413615565512035, "Double_Bass": 7.572491436792042}, {"Violin": 9.488675632655397, "Cello": 10.49753279903658}, {"Viola": 3.173289324489717, "Clarinet": 7.580256334608057}, {"Flute": 9.483408830570866, "Violin": 8.560799851418013}, {"Saxophone": -0.04860033274023924, "Clarinet": 2.289948475563871}, {"Saxophone": 9.624489979726976, "Tuba": 6.110792422442842}, {"Trombone": 6.427255073951244, "Saxophone": 7.8970008068428}, {"Oboe": 4.734512788739627, "Saxophone": 5.012269750486958}, {"Oboe": 4.977258577773617, "Viola": 8.620709552432054}, {"Flute": 7.544364079359656, "Cello": 10.477396637746665}, {"Flute": 11.320697931673143, "Trombone": 6.490925109946981}, {"Clarinet": 8.07613314334626, "Horn": 9.94008951275371}, {"Clarinet": 10.29548612420437, "Bassoon": 5.825322556327775}, {"Trumpet": 6.477481673087492, "Violin": 5.321437711960064}, {"Trumpet": 5.0591126334423775, "Violin": 4.587272516143913}, {"Flute": 14.678323260047335, "Clarinet": 10.948360858364968}, {"Flute": 10.066267873246302, "Saxophone": 4.48913650026043}, {"Bassoon": 8.954227970932472, "Oboe": -0.9020744321399814}, {"Trumpet": 5.385638101590099, "Trombone": 5.408172840384058}, {"Trumpet": 10.916193204823124, "Horn": 11.018996224003246}, {"Tuba": 6.486775793070562, "Trumpet": 10.465153580771709}, {"Trumpet": 4.288759921240995, "Trombone": 6.05939510309074}, {"Trumpet": 10.443474548026828, "Horn": 10.064098940451952}, {"Cello": 6.681953756702276, "Oboe": 7.5091635947946145}, {"Trumpet": 9.063019258890959, "Tuba": 8.553556406897604}, {"Trumpet": 5.057798904162273, "Trombone": 6.386318834830238}, {"Horn": 7.1387031563499015, "Trumpet": 4.338761476575872}, {"Trumpet": 8.930121948392234, "Horn": 10.350260460764929}, {"Trumpet": 5.969376382478478, "Trombone": 3.8099477069030794}]}} -------------------------------------------------------------------------------- /evaluation/MSI-DIS/scores-199.json: -------------------------------------------------------------------------------- 1 | {"MSI-DIS": {"transcription": [{"Violin": 0.9121755181353801, "Saxophone": 0.5166693919064482}, {"Violin": 0.8998901802880341, "Double_Bass": 0.7189771156586594}, {"Cello": 0.7723822648048051, "Viola": 0.6428622733533773}, {"Violin": 0.914252855116599, "Double_Bass": 0.8487529615131667}, {"Violin": 0.8611046087214415, "Cello": 0.8424270502033921}, {"Viola": 0.5883172544049333, "Clarinet": 0.7162842630690908}, {"Flute": 0.7199137406102079, "Violin": 0.8451581795871065}, {"Saxophone": 0.37163793158755143, "Clarinet": 0.38146271619237915}, {"Saxophone": 0.5569922409258469, "Tuba": 0.5433201111282178}, {"Trombone": 0.7230299321506162, "Saxophone": 0.05051627606429156}, {"Oboe": 0.16418783011865346, "Saxophone": 0.5447622573468253}, {"Oboe": 0.47750112367745445, "Viola": 0.8638481617482999}, {"Flute": 0.8307871742613372, "Cello": 0.8846340365715661}, {"Flute": 0.8233583648883612, "Trombone": 0.873866188123911}, {"Clarinet": 0.7078128790427227, "Horn": 0.7169384140161733}, {"Clarinet": 0.547966277730213, "Bassoon": 0.1965090653988483}, {"Trumpet": 0.8770281861607411, "Violin": 0.7666193382474715}, {"Trumpet": 0.8327018484319814, "Violin": 0.8306234217673182}, {"Flute": 0.7494504939712152, "Clarinet": 0.8251463120997821}, {"Flute": 0.5172981625041865, "Saxophone": 0.44706492215850074}, {"Bassoon": 0.3426747007113617, "Oboe": 0.27678840213593664}, {"Trumpet": 0.5046771801974216, "Trombone": 0.8449867572901932}, {"Trumpet": 0.7154269314315669, "Horn": 0.8427686392640829}, {"Tuba": 0.29623351768418504, "Trumpet": 0.9155402251131829}, {"Trumpet": 0.6378145193646023, "Trombone": 0.6441273055893}, {"Trumpet": 0.922979020065121, "Horn": 0.9279941267997682}, {"Cello": 0.5978284357852842, "Oboe": 0.3560053167227133}, {"Trumpet": 0.9409055407254417, "Tuba": 0.8439290213547604}, {"Trumpet": 0.7487690192409491, "Trombone": 0.6842485271721009}, {"Horn": 0.7697000534170869, "Trumpet": 0.915280865926377}, {"Trumpet": 0.7474109967962473, "Horn": 0.7604385678403959}, {"Trumpet": 0.6536306931097742, "Trombone": 0.7230219553640371}], "separation": [{"Violin": 6.363528277443229, "Saxophone": 3.7416979268394885}, {"Violin": 9.979636057335682, "Double_Bass": 6.885701510609595}, {"Cello": 7.524106263777637, "Viola": 4.027854017453346}, {"Violin": 7.600784595278894, "Double_Bass": 8.00847272654533}, {"Violin": 9.631470843726104, "Cello": 10.759000855162702}, {"Viola": 2.138274057316985, "Clarinet": 4.422662594857392}, {"Flute": 9.262293524106388, "Violin": 5.637892234358364}, {"Saxophone": -4.856699277760855, "Clarinet": 2.743637782780664}, {"Saxophone": 5.074893142598987, "Tuba": 4.70544663836166}, {"Trombone": 6.102004540226165, "Saxophone": -12.495076818045977}, {"Oboe": -7.259088793278789, "Saxophone": -0.8239150032834182}, {"Oboe": 1.322183816681479, "Viola": 6.647371275207421}, {"Flute": 8.955902745338305, "Cello": 10.88466582588892}, {"Flute": 11.34268023739078, "Trombone": 6.081433063382872}, {"Clarinet": 6.639587037347184, "Horn": 4.226225166651642}, {"Clarinet": 3.1317266161317487, "Bassoon": -10.27086145836003}, {"Trumpet": 5.215311544251104, "Violin": 5.975324227430431}, {"Trumpet": 3.8002514387779973, "Violin": 3.890958315442235}, {"Flute": 10.120629600531164, "Clarinet": 9.141701467047822}, {"Flute": 6.2394706506428, "Saxophone": 1.2791968801292861}, {"Bassoon": -0.38263297880299535, "Oboe": -1.574529350638163}, {"Trumpet": 2.5551316278004044, "Trombone": 5.987110663307936}, {"Trumpet": 4.572957036199296, "Horn": 11.207756637788403}, {"Tuba": 7.053583790986961, "Trumpet": 10.179681389752009}, {"Trumpet": 3.2076371157142503, "Trombone": 3.0051832185149827}, {"Trumpet": 9.043371697353736, "Horn": 9.716240984222217}, {"Cello": 6.359476466604099, "Oboe": 4.796304527156384}, {"Trumpet": 9.231048774217435, "Tuba": 8.38556241363989}, {"Trumpet": 3.5384233035418204, "Trombone": 3.44092002772258}, {"Horn": 6.0538183707148505, "Trumpet": 3.775561000023913}, {"Trumpet": 6.178061029418467, "Horn": 9.79286613291342}, {"Trumpet": 5.163960208725554, "Trombone": 3.8436211977201507}]}, "MSI-DIS-S": {"separation": [{"Violin": 6.416882618532192, "Saxophone": 7.474259290702362}, {"Violin": 9.924318733624963, "Double_Bass": 6.182707743737116}, {"Cello": 7.882401078486675, "Viola": 5.864430615227453}, {"Violin": 8.489774058222615, "Double_Bass": 7.682567838239711}, {"Violin": 9.022972241623492, "Cello": 10.565909935787982}, {"Viola": 4.287354630411389, "Clarinet": 7.424186668267491}, {"Flute": 9.415552307364116, "Violin": 8.953834734700026}, {"Saxophone": -0.653448544623779, "Clarinet": 2.7986387306087925}, {"Saxophone": 10.272234791936679, "Tuba": 5.648046437435048}, {"Trombone": 6.599250581602627, "Saxophone": 7.643932774635623}, {"Oboe": 4.563991099456322, "Saxophone": 4.832802330728196}, {"Oboe": 4.130068453204881, "Viola": 8.46302185427415}, {"Flute": 8.024427741031744, "Cello": 10.406938447958975}, {"Flute": 11.144062449855463, "Trombone": 6.4143408765250856}, {"Clarinet": 8.326046353224521, "Horn": 10.075265695609708}, {"Clarinet": 10.887309333397841, "Bassoon": 5.51689812527586}, {"Trumpet": 6.536064974424214, "Violin": 5.7455272676035545}, {"Trumpet": 4.462293548665275, "Violin": 5.059602303540871}, {"Flute": 14.638059262942889, "Clarinet": 11.971569056772964}, {"Flute": 10.200641217533748, "Saxophone": 6.5557638697757215}, {"Bassoon": 8.545300734426757, "Oboe": -1.9224531182926174}, {"Trumpet": 4.879707799772849, "Trombone": 5.645456041249086}, {"Trumpet": 10.832033931174184, "Horn": 12.071548847417725}, {"Tuba": 6.36661354574702, "Trumpet": 10.192860758265319}, {"Trumpet": 4.279147833648288, "Trombone": 5.907035791249751}, {"Trumpet": 10.273901325893993, "Horn": 10.39831116916815}, {"Cello": 6.3849612614483044, "Oboe": 6.825264796261585}, {"Trumpet": 9.284632015987238, "Tuba": 8.918315325333362}, {"Trumpet": 5.047816027313996, "Trombone": 6.19169113582144}, {"Horn": 7.157389926817755, "Trumpet": 4.170868963913187}, {"Trumpet": 8.925785716934183, "Horn": 11.013365058995712}, {"Trumpet": 5.788717441254181, "Trombone": 3.6658189558584557}]}} -------------------------------------------------------------------------------- /evaluation/MSI/scores-192.json: -------------------------------------------------------------------------------- 1 | {"MSI": {"transcription": [{"Violin": 0.9361394554231726, "Saxophone": 0.507355947986083}, {"Violin": 0.9035493979955787, "Double_Bass": 0.7842063860975458}, {"Cello": 0.7644316713827919, "Viola": 0.5263633849957178}, {"Violin": 0.9365505736686, "Double_Bass": 0.8284833143854402}, {"Violin": 0.8423349355224742, "Cello": 0.839228454271447}, {"Viola": 0.6240776393424528, "Clarinet": 0.74941251966239}, {"Flute": 0.7196487429544639, "Violin": 0.876464916743926}, {"Saxophone": 0.48561929733248405, "Clarinet": 0.6548060321025061}, {"Saxophone": 0.47585414552639316, "Tuba": 0.5839950434367663}, {"Trombone": 0.8334086526345647, "Saxophone": 0.044808006443237555}, {"Oboe": 0.20454899014807562, "Saxophone": 0.448685365948155}, {"Oboe": 0.5216479439508223, "Viola": 0.8678643969922711}, {"Flute": 0.8149097734493286, "Cello": 0.9080176204659974}, {"Flute": 0.8395199842059287, "Trombone": 0.8786820932784377}, {"Clarinet": 0.7697982800904478, "Horn": 0.7803428692554318}, {"Clarinet": 0.7497705807279238, "Bassoon": 0.18500549826016005}, {"Trumpet": 0.8354577933863586, "Violin": 0.8174184373619785}, {"Trumpet": 0.6273069453440938, "Violin": 0.8432019835410031}, {"Flute": 0.9085927197470614, "Clarinet": 0.8008748407368664}, {"Flute": 0.5874170095914809, "Saxophone": 0.2930463072983357}, {"Bassoon": 0.23204534786943615, "Oboe": 0.6445144062134838}, {"Trumpet": 0.667560421157832, "Trombone": 0.8187067883923883}, {"Trumpet": 0.8245578482997433, "Horn": 0.8034427179511888}, {"Tuba": 0.6109277436986952, "Trumpet": 0.9226272321909085}, {"Trumpet": 0.7149172528171747, "Trombone": 0.569793646868569}, {"Trumpet": 0.9236047189736051, "Horn": 0.8685638333691418}, {"Cello": 0.5961044640558822, "Oboe": 0.5383706357167409}, {"Trumpet": 0.9403395517614258, "Tuba": 0.7423280694429848}, {"Trumpet": 0.8248085985139659, "Trombone": 0.6619350988089323}, {"Horn": 0.7989076652692978, "Trumpet": 0.8906921127595409}, {"Trumpet": 0.7175214499370501, "Horn": 0.773602903504052}, {"Trumpet": 0.5906262562166622, "Trombone": 0.7351650157285545}], "separation": [{"Violin": 8.035166513774772, "Saxophone": 4.464094975769749}, {"Violin": 12.34001377868122, "Double_Bass": 7.254324752402312}, {"Cello": 7.09389787668715, "Viola": 3.6987374133551336}, {"Violin": 10.491858734453505, "Double_Bass": 7.816976469377801}, {"Violin": 12.20450171408885, "Cello": 14.298573392873546}, {"Viola": 3.051135000048191, "Clarinet": 5.202811749433797}, {"Flute": 10.914150638871476, "Violin": 7.533063832311553}, {"Saxophone": -3.063516407543825, "Clarinet": 1.3203185832075157}, {"Saxophone": 3.628852574319628, "Tuba": 6.652992299657383}, {"Trombone": 9.943070448119602, "Saxophone": -12.847788067725165}, {"Oboe": -7.291816804988782, "Saxophone": 0.32741228324087}, {"Oboe": 3.3727129734708865, "Viola": 9.344766927288688}, {"Flute": 11.323491812969444, "Cello": 14.81360054769368}, {"Flute": 12.85221676961761, "Trombone": 10.26307336578499}, {"Clarinet": 7.933362437702749, "Horn": 6.449547157755119}, {"Clarinet": 7.175871776302207, "Bassoon": -8.94347029159849}, {"Trumpet": 8.23513821002718, "Violin": 5.651490897968548}, {"Trumpet": 3.6488377792363296, "Violin": 2.6313096410622414}, {"Flute": 14.114316567424336, "Clarinet": 5.568780402591859}, {"Flute": 5.1562864997793465, "Saxophone": -3.5956745104946903}, {"Bassoon": -1.6585578086183066, "Oboe": 0.13914306895167333}, {"Trumpet": 3.822139539765784, "Trombone": 7.215936188055406}, {"Trumpet": 9.532306448850788, "Horn": 12.704855193397817}, {"Tuba": 14.595275924126522, "Trumpet": 13.439816373521081}, {"Trumpet": 3.1223404996272217, "Trombone": 2.5465515917671477}, {"Trumpet": 13.840447411805556, "Horn": 8.392067949699253}, {"Cello": 9.120797202926642, "Oboe": 8.023969651713166}, {"Trumpet": 13.755644538796581, "Tuba": 9.532603547906378}, {"Trumpet": 5.966257951763573, "Trombone": 4.33193410282663}, {"Horn": 6.62316898180327, "Trumpet": 5.069422218604237}, {"Trumpet": 4.963934699598849, "Horn": 12.745759425903058}, {"Trumpet": 6.952457926174736, "Trombone": 6.263006744221888}]}, "MSI-S": {"separation": [{"Violin": 7.714739876401469, "Saxophone": 11.295923006277961}, {"Violin": 12.19058587219231, "Double_Bass": 6.87065965425178}, {"Cello": 8.401271651932268, "Viola": 8.401595588422664}, {"Violin": 12.025667180301607, "Double_Bass": 9.251738118810561}, {"Violin": 11.888601235356164, "Cello": 13.813763574707671}, {"Viola": 4.267899252407349, "Clarinet": 5.847582738537627}, {"Flute": 11.723662290229209, "Violin": 11.170828814127564}, {"Saxophone": -1.110819998355692, "Clarinet": 2.2594576933794115}, {"Saxophone": 10.736616807180123, "Tuba": 7.0524233525005044}, {"Trombone": 9.637816834235718, "Saxophone": 9.628402218639785}, {"Oboe": 7.200044011024138, "Saxophone": 8.248577932306917}, {"Oboe": 7.698095954525224, "Viola": 11.154373508506865}, {"Flute": 10.035328716800239, "Cello": 13.49098604942732}, {"Flute": 12.862390389993658, "Trombone": 10.200401908340382}, {"Clarinet": 7.797095736287459, "Horn": 11.71393518353264}, {"Clarinet": 11.00351516944156, "Bassoon": 7.90497809246499}, {"Trumpet": 7.205973408959282, "Violin": 2.8382673093364903}, {"Trumpet": 4.5744667192383055, "Violin": 4.029191732618011}, {"Flute": 16.41310799502572, "Clarinet": 9.678759478157609}, {"Flute": 10.403763818472093, "Saxophone": 5.348416194610808}, {"Bassoon": 12.314611461701785, "Oboe": -1.5191586272627204}, {"Trumpet": 6.305427916734421, "Trombone": 8.76475124129314}, {"Trumpet": 14.707879915628794, "Horn": 15.206919758083162}, {"Tuba": 10.458005991273867, "Trumpet": 13.614645736567649}, {"Trumpet": 5.806854866004546, "Trombone": 8.757502628879205}, {"Trumpet": 14.078232675350657, "Horn": 13.447319260378814}, {"Cello": 9.53112713863298, "Oboe": 11.393525479271087}, {"Trumpet": 13.887213187022054, "Tuba": 13.171748448322509}, {"Trumpet": 7.229478006517997, "Trombone": 7.95958219586563}, {"Horn": 7.770212674878571, "Trumpet": 5.352733237610803}, {"Trumpet": 13.551005242540459, "Horn": 14.048520471348029}, {"Trumpet": 6.844299763701207, "Trombone": 6.559767714661048}]}} -------------------------------------------------------------------------------- /evaluation/MSI/scores-197.json: -------------------------------------------------------------------------------- 1 | {"MSI": {"transcription": [{"Violin": 0.9153824707282537, "Saxophone": 0.41214169958008584}, {"Violin": 0.9045804935702648, "Double_Bass": 0.7911181641453596}, {"Cello": 0.7928932546860387, "Viola": 0.7130892959007321}, {"Violin": 0.9150882141224306, "Double_Bass": 0.8330529664285353}, {"Violin": 0.8502225375004542, "Cello": 0.8448101189148244}, {"Viola": 0.6174415919700625, "Clarinet": 0.7436143252164596}, {"Flute": 0.7426473773313286, "Violin": 0.8351087831427891}, {"Saxophone": 0.5324677797485345, "Clarinet": 0.6450426916837877}, {"Saxophone": 0.46701875082509303, "Tuba": 0.5321708800891947}, {"Trombone": 0.8260272374717064, "Saxophone": 0.06238401252123674}, {"Oboe": 0.2083258930842431, "Saxophone": 0.4905063749391071}, {"Oboe": 0.5195363519679062, "Viola": 0.8874781704630496}, {"Flute": 0.8151307805056968, "Cello": 0.9144130414231073}, {"Flute": 0.8181321924616023, "Trombone": 0.8703783093512343}, {"Clarinet": 0.8358574862907144, "Horn": 0.763159651646269}, {"Clarinet": 0.7504875916727449, "Bassoon": 0.17535066467142718}, {"Trumpet": 0.7770635415452568, "Violin": 0.7404910794898754}, {"Trumpet": 0.6925855087813726, "Violin": 0.809406494831253}, {"Flute": 0.7577412483587282, "Clarinet": 0.7234402226250822}, {"Flute": 0.5846565242844187, "Saxophone": 0.34834398530824295}, {"Bassoon": 0.09001586626504829, "Oboe": 0.537029034510867}, {"Trumpet": 0.515690658479332, "Trombone": 0.8090722589949623}, {"Trumpet": 0.7172186086173673, "Horn": 0.7874070319119018}, {"Tuba": 0.6794970850143396, "Trumpet": 0.8957403637649856}, {"Trumpet": 0.6910479805002857, "Trombone": 0.5908810794484339}, {"Trumpet": 0.921405628333067, "Horn": 0.8998108216774549}, {"Cello": 0.5737315911821874, "Oboe": 0.5185059797624623}, {"Trumpet": 0.9288160845004607, "Tuba": 0.8684865634257202}, {"Trumpet": 0.8170581920069685, "Trombone": 0.5874078297813579}, {"Horn": 0.8035340164823668, "Trumpet": 0.8922853242570884}, {"Trumpet": 0.7455946337571833, "Horn": 0.7362979091072929}, {"Trumpet": 0.5912517334103375, "Trombone": 0.7221973759574}], "separation": [{"Violin": 7.755068867923328, "Saxophone": 4.422501072577965}, {"Violin": 12.080766986936542, "Double_Bass": 7.435378841525225}, {"Cello": 6.860982002635446, "Viola": 6.009800388649049}, {"Violin": 9.833935180897534, "Double_Bass": 9.846639373025631}, {"Violin": 12.086120611092676, "Cello": 14.381731419180579}, {"Viola": 3.113145795382743, "Clarinet": 4.8391882269034}, {"Flute": 11.484319375517016, "Violin": 6.580420106625952}, {"Saxophone": -3.2440470309831544, "Clarinet": 1.78726912163885}, {"Saxophone": 2.8993420335009565, "Tuba": 6.763843189971172}, {"Trombone": 9.64093423731461, "Saxophone": -11.213988330085293}, {"Oboe": -7.687328965970424, "Saxophone": 0.35918048576109485}, {"Oboe": 1.8964445822040523, "Viola": 10.542032678087882}, {"Flute": 10.791933163964751, "Cello": 15.066843482581053}, {"Flute": 12.486141115659413, "Trombone": 10.45957407608897}, {"Clarinet": 9.529626072146536, "Horn": 6.164771839902689}, {"Clarinet": 6.563705741581037, "Bassoon": -8.963748181041746}, {"Trumpet": 5.867211030244906, "Violin": 5.703806191178326}, {"Trumpet": 3.8326188235150536, "Violin": 3.6388439073024283}, {"Flute": 5.146714454087881, "Clarinet": 7.2113743370218275}, {"Flute": 5.755697196006264, "Saxophone": 0.10635920845241215}, {"Bassoon": -3.8856130064807237, "Oboe": 0.10825542762723303}, {"Trumpet": -0.19270147051477943, "Trombone": 7.580539667933356}, {"Trumpet": 5.589441820597825, "Horn": 11.133836592623043}, {"Tuba": 14.869850156412873, "Trumpet": 13.010566847722455}, {"Trumpet": 2.0696810745276144, "Trombone": 2.893310585545757}, {"Trumpet": 10.035910702967056, "Horn": 12.354128806998741}, {"Cello": 9.169286771222803, "Oboe": 5.975852893791725}, {"Trumpet": 12.858820157397826, "Tuba": 12.76781745062759}, {"Trumpet": 5.719049872936772, "Trombone": 3.2686446216980825}, {"Horn": 6.768962520832441, "Trumpet": 4.444869132478698}, {"Trumpet": 6.26931565556651, "Horn": 13.661250175350046}, {"Trumpet": 6.215661723487587, "Trombone": 5.745846146157682}]}, "MSI-S": {"separation": [{"Violin": 7.66366279066125, "Saxophone": 11.244583651332654}, {"Violin": 12.228989460598408, "Double_Bass": 6.768548405981781}, {"Cello": 7.998889107818876, "Viola": 8.579519788333124}, {"Violin": 11.849298535238166, "Double_Bass": 9.299678083853578}, {"Violin": 11.573991972437323, "Cello": 13.823600928897001}, {"Viola": 4.71362579292008, "Clarinet": 5.381836900499285}, {"Flute": 11.777855718623641, "Violin": 10.5582819039724}, {"Saxophone": -1.1954460126187554, "Clarinet": 2.4508310005017537}, {"Saxophone": 10.568746780983513, "Tuba": 7.340229763791733}, {"Trombone": 9.49534335658358, "Saxophone": 9.565232463807876}, {"Oboe": 7.04762396445329, "Saxophone": 8.382335663264996}, {"Oboe": 6.667060559374289, "Viola": 11.354445053253963}, {"Flute": 9.56967681963538, "Cello": 13.897774518140626}, {"Flute": 12.972836300233281, "Trombone": 10.393963429448359}, {"Clarinet": 7.841777585323474, "Horn": 11.820228633292762}, {"Clarinet": 10.942645142081233, "Bassoon": 7.747615142167936}, {"Trumpet": 7.097462788236941, "Violin": 3.3775879822091865}, {"Trumpet": 4.407141455325539, "Violin": 4.22408249238051}, {"Flute": 16.43206291308859, "Clarinet": 9.47248175011848}, {"Flute": 10.490765677634396, "Saxophone": 5.87301120869226}, {"Bassoon": 12.56792453334784, "Oboe": -1.5035733674689964}, {"Trumpet": 6.346680262737556, "Trombone": 8.889645110172808}, {"Trumpet": 14.64327020923933, "Horn": 15.565914924061346}, {"Tuba": 10.514784214633877, "Trumpet": 13.663930378832866}, {"Trumpet": 5.694484004615681, "Trombone": 8.435074247798168}, {"Trumpet": 14.08316056638894, "Horn": 13.707306463831292}, {"Cello": 9.284111951374648, "Oboe": 10.201066976880808}, {"Trumpet": 13.99187267987431, "Tuba": 13.626781149872414}, {"Trumpet": 7.1742928723302, "Trombone": 7.950234684456116}, {"Horn": 8.012831912372725, "Trumpet": 5.4367097116978265}, {"Trumpet": 13.416754975741762, "Horn": 14.37795240682023}, {"Trumpet": 6.802765532136819, "Trombone": 6.53587072892728}]}} -------------------------------------------------------------------------------- /evaluation/MSI-DIS/scores-192.json: -------------------------------------------------------------------------------- 1 | {"MSI-DIS": {"transcription": [{"Violin": 0.9384123359352381, "Saxophone": 0.5346570619299896}, {"Violin": 0.8962927289131267, "Double_Bass": 0.6925760894070635}, {"Cello": 0.7865696716445996, "Viola": 0.6328194305933684}, {"Violin": 0.9229585584035308, "Double_Bass": 0.8234973511338932}, {"Violin": 0.8675022604793753, "Cello": 0.8524542862208624}, {"Viola": 0.6721445941556236, "Clarinet": 0.7030537891053213}, {"Flute": 0.7191649012486236, "Violin": 0.8854475675966219}, {"Saxophone": 0.5735980055398083, "Clarinet": 0.6766489808782865}, {"Saxophone": 0.6335845542528676, "Tuba": 0.4738865609514924}, {"Trombone": 0.8016375878324989, "Saxophone": 0.03340697404214741}, {"Oboe": 0.16436982503858766, "Saxophone": 0.49189164588593626}, {"Oboe": 0.5052150558649842, "Viola": 0.872151840379869}, {"Flute": 0.8346843936448153, "Cello": 0.8960843101571067}, {"Flute": 0.839611668886874, "Trombone": 0.8675815445133644}, {"Clarinet": 0.7837561543668288, "Horn": 0.7387845171199218}, {"Clarinet": 0.6718173992775168, "Bassoon": 0.33206436768229114}, {"Trumpet": 0.8338942492214145, "Violin": 0.8108613346884792}, {"Trumpet": 0.6704463718346559, "Violin": 0.8694303579253583}, {"Flute": 0.7931344037303044, "Clarinet": 0.719040802778413}, {"Flute": 0.5695275052929194, "Saxophone": 0.3026071769602664}, {"Bassoon": 0.3594576296462653, "Oboe": 0.27715871804662257}, {"Trumpet": 0.5734960367787332, "Trombone": 0.8184933684774732}, {"Trumpet": 0.5645511494524913, "Horn": 0.881125193281301}, {"Tuba": 0.36213145389489865, "Trumpet": 0.8986994703967741}, {"Trumpet": 0.6801017199121147, "Trombone": 0.612806917918625}, {"Trumpet": 0.9364567222873682, "Horn": 0.8671034065677014}, {"Cello": 0.6115863242780858, "Oboe": 0.3858034810156793}, {"Trumpet": 0.9402636964809244, "Tuba": 0.854377219843421}, {"Trumpet": 0.8189789431625908, "Trombone": 0.6402501351182317}, {"Horn": 0.7831049616954892, "Trumpet": 0.8278607750927547}, {"Trumpet": 0.7369004718873324, "Horn": 0.7659167945030255}, {"Trumpet": 0.6553875189037773, "Trombone": 0.7141208861563172}], "separation": [{"Violin": 6.676672559822361, "Saxophone": 2.3348465242002376}, {"Violin": 9.404163814410834, "Double_Bass": 7.420192402823448}, {"Cello": 7.467213998833598, "Viola": 3.7064337620231678}, {"Violin": 7.54802382022195, "Double_Bass": 7.916627083214696}, {"Violin": 9.482879897662128, "Cello": 10.787081371782484}, {"Viola": 2.7177377498870134, "Clarinet": 4.862530245481819}, {"Flute": 9.389141850603275, "Violin": 5.632359440528358}, {"Saxophone": -2.0959229954479937, "Clarinet": 0.9372701898654899}, {"Saxophone": 7.149761932689749, "Tuba": 4.810568493617931}, {"Trombone": 6.643752445184459, "Saxophone": -14.873575377223471}, {"Oboe": -8.692588407161985, "Saxophone": -1.661061117648404}, {"Oboe": 0.08612609923532208, "Viola": 6.800032423727425}, {"Flute": 8.795309854093162, "Cello": 11.155895617464928}, {"Flute": 11.317366939741545, "Trombone": 6.483312399154793}, {"Clarinet": 7.217236860934996, "Horn": 5.294140288413738}, {"Clarinet": 4.320818162122788, "Bassoon": -7.096628571169973}, {"Trumpet": 6.011886477347743, "Violin": 6.1212527259447445}, {"Trumpet": 1.9426567655278613, "Violin": 4.434570125133407}, {"Flute": 11.965756516675658, "Clarinet": 6.460077520222929}, {"Flute": 6.834174082154183, "Saxophone": -6.684840750984334}, {"Bassoon": -0.83765750859254, "Oboe": -1.282289127572327}, {"Trumpet": 4.439209297944088, "Trombone": 5.45674405508683}, {"Trumpet": 2.434565904107025, "Horn": 10.004313559164444}, {"Tuba": 8.450388809469612, "Trumpet": 10.242706751554667}, {"Trumpet": 2.687476515460918, "Trombone": 1.649756481931083}, {"Trumpet": 9.537940501529018, "Horn": 8.644672076952018}, {"Cello": 7.0772707457215, "Oboe": 4.171325883268516}, {"Trumpet": 8.62281891670103, "Tuba": 8.400674851801151}, {"Trumpet": 4.392363040595803, "Trombone": 3.2647108195159085}, {"Horn": 5.645558169926554, "Trumpet": 1.9590712858935833}, {"Trumpet": 4.615250386092959, "Horn": 8.085999211659177}, {"Trumpet": 6.240655851582709, "Trombone": 3.6316358012160297}]}, "MSI-DIS-S": {"separation": [{"Violin": 6.475938121354281, "Saxophone": 7.039929637452515}, {"Violin": 9.48979487380436, "Double_Bass": 6.688641069075696}, {"Cello": 7.734235476544503, "Viola": 5.837932441192093}, {"Violin": 8.141032800785874, "Double_Bass": 7.342035419223208}, {"Violin": 8.972648641447664, "Cello": 10.487373977713737}, {"Viola": 3.5828883186346374, "Clarinet": 7.291456794422764}, {"Flute": 9.26763701734334, "Violin": 8.367825884161153}, {"Saxophone": -0.26808767963785013, "Clarinet": 2.411447794388967}, {"Saxophone": 10.158230973179332, "Tuba": 5.831841040042562}, {"Trombone": 6.450371386451965, "Saxophone": 7.572108090463495}, {"Oboe": 4.31346828275459, "Saxophone": 4.624909827568285}, {"Oboe": 3.4850337248624803, "Viola": 8.256076871168561}, {"Flute": 7.814479014778533, "Cello": 10.287352700697724}, {"Flute": 11.250317433242623, "Trombone": 6.645725487001409}, {"Clarinet": 8.441457230615303, "Horn": 9.901041998288576}, {"Clarinet": 9.99903442974184, "Bassoon": 5.497798618238241}, {"Trumpet": 6.380382802058308, "Violin": 5.074638578028992}, {"Trumpet": 3.9325723387538423, "Violin": 4.999301508113722}, {"Flute": 14.591513129281687, "Clarinet": 11.936891155742098}, {"Flute": 10.186406901845391, "Saxophone": 5.5828841904890725}, {"Bassoon": 7.898332672412542, "Oboe": -1.9405367318789388}, {"Trumpet": 4.7368874319295475, "Trombone": 5.580470552333834}, {"Trumpet": 10.724081829994422, "Horn": 11.223131969468497}, {"Tuba": 6.744742354871911, "Trumpet": 10.160980610538417}, {"Trumpet": 4.158253938030842, "Trombone": 5.73621962593748}, {"Trumpet": 9.845215623354214, "Horn": 9.766528233677214}, {"Cello": 6.8711354116474475, "Oboe": 7.582145920387572}, {"Trumpet": 8.998808793158993, "Tuba": 9.222062646563016}, {"Trumpet": 5.276183062957486, "Trombone": 6.5064418893823905}, {"Horn": 7.532194673491736, "Trumpet": 3.744302912712678}, {"Trumpet": 8.767605793458053, "Horn": 10.734268569509894}, {"Trumpet": 5.689950371820757, "Trombone": 3.4903668078261862}]}} -------------------------------------------------------------------------------- /evaluation/MSI-DIS/scores-193.json: -------------------------------------------------------------------------------- 1 | {"MSI-DIS": {"transcription": [{"Violin": 0.9221536748615152, "Saxophone": 0.5159039490739373}, {"Violin": 0.9010616149401837, "Double_Bass": 0.6992582099983028}, {"Cello": 0.7794827597511965, "Viola": 0.6514306690951148}, {"Violin": 0.9188626163753273, "Double_Bass": 0.832384185248992}, {"Violin": 0.861941555610172, "Cello": 0.8562145385216754}, {"Viola": 0.6536868735526751, "Clarinet": 0.7855589335603378}, {"Flute": 0.705507503593955, "Violin": 0.8715179540765242}, {"Saxophone": 0.6151628328619586, "Clarinet": 0.6541424701272047}, {"Saxophone": 0.6231490093869938, "Tuba": 0.5713686766768591}, {"Trombone": 0.7552381345614598, "Saxophone": 0.08278921110688948}, {"Oboe": 0.1691090082384871, "Saxophone": 0.4170089932484352}, {"Oboe": 0.4581272819730028, "Viola": 0.8305759646210241}, {"Flute": 0.8022176514676511, "Cello": 0.9066148411365446}, {"Flute": 0.8692350444393018, "Trombone": 0.8606650669722671}, {"Clarinet": 0.7705367153379585, "Horn": 0.6312154253278269}, {"Clarinet": 0.6605347926486815, "Bassoon": 0.19589351743961042}, {"Trumpet": 0.8761042878558835, "Violin": 0.7841301028681331}, {"Trumpet": 0.8142610626256009, "Violin": 0.8126155222293977}, {"Flute": 0.7678230451592728, "Clarinet": 0.7538866991273583}, {"Flute": 0.6240901879393502, "Saxophone": 0.20514400479363035}, {"Bassoon": 0.36299310021672926, "Oboe": 0.33765698711886855}, {"Trumpet": 0.6131605053943183, "Trombone": 0.8384212126095958}, {"Trumpet": 0.7060713013299754, "Horn": 0.8573255256086534}, {"Tuba": 0.5366662399291285, "Trumpet": 0.928101569000177}, {"Trumpet": 0.6410040517037849, "Trombone": 0.6225602124395043}, {"Trumpet": 0.9160642452915085, "Horn": 0.837632835538449}, {"Cello": 0.6412579651673358, "Oboe": 0.3463361773243479}, {"Trumpet": 0.9396425697730952, "Tuba": 0.8976548442863882}, {"Trumpet": 0.7775781818028951, "Trombone": 0.5895020114865376}, {"Horn": 0.8028814145398346, "Trumpet": 0.8710161448114203}, {"Trumpet": 0.7884024667753711, "Horn": 0.7290748463357478}, {"Trumpet": 0.6148168900530278, "Trombone": 0.7076562730436958}], "separation": [{"Violin": 7.447542824370913, "Saxophone": 3.607049027669911}, {"Violin": 8.908582752233938, "Double_Bass": 7.532810643671405}, {"Cello": 7.020854023113169, "Viola": 2.9643882138428923}, {"Violin": 7.636430287101893, "Double_Bass": 8.362693622471237}, {"Violin": 9.257392967727089, "Cello": 11.261348224020999}, {"Viola": 1.5926315987380837, "Clarinet": 5.077777573311809}, {"Flute": 9.566443195952099, "Violin": 5.591125960981456}, {"Saxophone": -1.4693347571010213, "Clarinet": 1.0740330314851707}, {"Saxophone": 7.04927058679057, "Tuba": 5.045051142406015}, {"Trombone": 6.399783373400477, "Saxophone": -9.098995707150296}, {"Oboe": -8.508813865314037, "Saxophone": -2.8216598855219566}, {"Oboe": 0.696002973854982, "Viola": 6.203017831029543}, {"Flute": 9.096964948314998, "Cello": 11.11544773758067}, {"Flute": 11.477196145468621, "Trombone": 6.0160097113426145}, {"Clarinet": 6.9452046032267525, "Horn": 3.528476832178025}, {"Clarinet": 4.6871999247111775, "Bassoon": -10.81966877064463}, {"Trumpet": 6.625234102329152, "Violin": 5.083696067497239}, {"Trumpet": 3.9392100344945016, "Violin": 5.090414482285286}, {"Flute": 10.921668720335164, "Clarinet": 6.874751472031566}, {"Flute": 6.926190714163707, "Saxophone": -6.373300901445483}, {"Bassoon": -0.1192442180161776, "Oboe": -0.8555344180531811}, {"Trumpet": 3.865316609734188, "Trombone": 5.677977093372287}, {"Trumpet": 3.8622922338023766, "Horn": 9.940066373943312}, {"Tuba": 8.525103768538482, "Trumpet": 10.184205551819662}, {"Trumpet": 3.3907497288820796, "Trombone": 2.0870595403592644}, {"Trumpet": 9.686416668700474, "Horn": 7.73029442836971}, {"Cello": 6.760472587429841, "Oboe": 4.0498399769575375}, {"Trumpet": 9.219971737370685, "Tuba": 9.537632387384344}, {"Trumpet": 4.188663126763118, "Trombone": 3.117197934082434}, {"Horn": 5.934709538557721, "Trumpet": 2.8242000035904398}, {"Trumpet": 7.490075627103366, "Horn": 7.221520694997765}, {"Trumpet": 6.282851723700981, "Trombone": 3.774421874615943}]}, "MSI-DIS-S": {"separation": [{"Violin": 7.239493454803423, "Saxophone": 7.506072445165949}, {"Violin": 9.869830067764806, "Double_Bass": 7.100157710784649}, {"Cello": 7.347228243021272, "Viola": 4.987958553140595}, {"Violin": 8.128449083671693, "Double_Bass": 7.904265275002693}, {"Violin": 9.13949461002675, "Cello": 10.878504261647953}, {"Viola": 3.632442417459951, "Clarinet": 7.449472292120269}, {"Flute": 9.516783167133195, "Violin": 8.143956147613366}, {"Saxophone": -0.17762689596302678, "Clarinet": 2.1793291383255355}, {"Saxophone": 10.119266554654757, "Tuba": 6.134626277672989}, {"Trombone": 6.420280463941335, "Saxophone": 7.7925617085346435}, {"Oboe": 4.276621865477028, "Saxophone": 4.813759018305921}, {"Oboe": 4.401057648014203, "Viola": 8.708000929787529}, {"Flute": 7.999859354544244, "Cello": 10.181199794543085}, {"Flute": 11.267008889009219, "Trombone": 6.388730143189148}, {"Clarinet": 8.023229548466201, "Horn": 10.19842257726027}, {"Clarinet": 10.395582765878588, "Bassoon": 4.865036784169604}, {"Trumpet": 6.844486092443402, "Violin": 4.228082978262868}, {"Trumpet": 5.063036530869571, "Violin": 4.924570606723959}, {"Flute": 14.821293086089488, "Clarinet": 10.60856592695377}, {"Flute": 10.408724357814076, "Saxophone": 5.620699750759234}, {"Bassoon": 7.89251681392704, "Oboe": -1.3927658826686393}, {"Trumpet": 5.22293532872267, "Trombone": 5.498201262919498}, {"Trumpet": 10.97898850582823, "Horn": 11.471732330352921}, {"Tuba": 6.869678805736095, "Trumpet": 10.315352532353245}, {"Trumpet": 4.474589850812589, "Trombone": 5.495595549559411}, {"Trumpet": 10.47462282968281, "Horn": 10.718394022007185}, {"Cello": 6.686888935581265, "Oboe": 7.1249781961920355}, {"Trumpet": 9.247771847934615, "Tuba": 9.977057602859068}, {"Trumpet": 5.379798964004117, "Trombone": 6.311303663055247}, {"Horn": 7.576471000456349, "Trumpet": 4.405704831464769}, {"Trumpet": 8.952465399131778, "Horn": 10.87587174150849}, {"Trumpet": 5.916607646441453, "Trombone": 3.7453138760251568}]}} -------------------------------------------------------------------------------- /evaluation/MSI-DIS/scores-194.json: -------------------------------------------------------------------------------- 1 | {"MSI-DIS": {"transcription": [{"Violin": 0.9332389130315771, "Saxophone": 0.5405101158127558}, {"Violin": 0.8402200154372012, "Double_Bass": 0.7512393976986393}, {"Cello": 0.7979413727594686, "Viola": 0.7202389521849519}, {"Violin": 0.9195613281926149, "Double_Bass": 0.848038520474201}, {"Violin": 0.8487739058044316, "Cello": 0.8432960960775304}, {"Viola": 0.6641803912148794, "Clarinet": 0.6752264983190095}, {"Flute": 0.7271990937410853, "Violin": 0.8648348076552865}, {"Saxophone": 0.5552518150524953, "Clarinet": 0.5594854615778567}, {"Saxophone": 0.4231039982381646, "Tuba": 0.6383050701480621}, {"Trombone": 0.7562222069501227, "Saxophone": 0.034512512546808746}, {"Oboe": 0.21458616507734307, "Saxophone": 0.46293629939405023}, {"Oboe": 0.4792162750822663, "Viola": 0.871780967239092}, {"Flute": 0.8020802000263941, "Cello": 0.9017678435962615}, {"Flute": 0.865704113117796, "Trombone": 0.8500849831102429}, {"Clarinet": 0.7045613022886172, "Horn": 0.7931135118962632}, {"Clarinet": 0.5541512809904605, "Bassoon": 0.2753682483924805}, {"Trumpet": 0.7969852464410275, "Violin": 0.7898942737868044}, {"Trumpet": 0.6165833742188005, "Violin": 0.831619428716475}, {"Flute": 0.9636425135887419, "Clarinet": 0.5512659985982284}, {"Flute": 0.7590098407752213, "Saxophone": 0.16977345123489201}, {"Bassoon": 0.4166742226101159, "Oboe": 0.5135983699578529}, {"Trumpet": 0.6692731580022638, "Trombone": 0.830099563538242}, {"Trumpet": 0.6736810617012595, "Horn": 0.8626397706179734}, {"Tuba": 0.41188870250608783, "Trumpet": 0.8897904946374271}, {"Trumpet": 0.6425723694942701, "Trombone": 0.5440666238889852}, {"Trumpet": 0.8985887482918221, "Horn": 0.8944965943344576}, {"Cello": 0.5716330938678559, "Oboe": 0.4043811154654226}, {"Trumpet": 0.9392592446683938, "Tuba": 0.8722474026895343}, {"Trumpet": 0.8048621282608199, "Trombone": 0.4966428953042927}, {"Horn": 0.8179877319130804, "Trumpet": 0.8741191560077508}, {"Trumpet": 0.6826889542936153, "Horn": 0.7489649650835999}, {"Trumpet": 0.6253170904113182, "Trombone": 0.708692600202517}], "separation": [{"Violin": 7.1053311685807365, "Saxophone": 1.9698055162821557}, {"Violin": 8.370993831818467, "Double_Bass": 7.380666787305563}, {"Cello": 6.75903322035598, "Viola": 3.911443156476823}, {"Violin": 7.871385249049576, "Double_Bass": 7.198124354381231}, {"Violin": 9.39533283680237, "Cello": 11.200916956609074}, {"Viola": 2.3077121929175894, "Clarinet": 3.9839693881321647}, {"Flute": 8.86841865139781, "Violin": 4.743356708698105}, {"Saxophone": -1.113223598404096, "Clarinet": 0.3729680168595979}, {"Saxophone": 2.0255907668663506, "Tuba": 4.585345134983456}, {"Trombone": 6.438993742431647, "Saxophone": -15.383692726073615}, {"Oboe": -5.582535882411319, "Saxophone": -2.808477362275144}, {"Oboe": 1.059349684023927, "Viola": 7.689032851701025}, {"Flute": 8.64086732672847, "Cello": 11.206444484367132}, {"Flute": 11.581225568846207, "Trombone": 5.253926913407558}, {"Clarinet": 5.814819602940879, "Horn": 7.067580730392679}, {"Clarinet": 2.2345968719234537, "Bassoon": -7.939268154754919}, {"Trumpet": 5.70903608912386, "Violin": 6.077382255109814}, {"Trumpet": 1.9589692721017364, "Violin": 4.651689416589583}, {"Flute": 13.892627811980788, "Clarinet": 5.243847124873211}, {"Flute": 9.319818682702124, "Saxophone": -9.416088083701698}, {"Bassoon": -1.807542648346924, "Oboe": 0.7635097464157224}, {"Trumpet": 3.661909549845289, "Trombone": 5.362562599470123}, {"Trumpet": 4.051410290405063, "Horn": 10.25498465553827}, {"Tuba": 7.278648460206522, "Trumpet": 10.295022755852488}, {"Trumpet": 4.067942445696543, "Trombone": 0.04968077762432831}, {"Trumpet": 8.860049352520882, "Horn": 9.70992258426048}, {"Cello": 7.023451175074813, "Oboe": 7.649457938099364}, {"Trumpet": 8.825893531240506, "Tuba": 8.138872255484547}, {"Trumpet": 3.5542694674054562, "Trombone": 1.2114207823942988}, {"Horn": 5.395239557538057, "Trumpet": 3.0083327445765233}, {"Trumpet": 4.774994852593438, "Horn": 10.478567451166734}, {"Trumpet": 5.662184903676573, "Trombone": 3.365869115976767}]}, "MSI-DIS-S": {"separation": [{"Violin": 6.799490998246122, "Saxophone": 7.425779725416931}, {"Violin": 9.956535132580179, "Double_Bass": 6.704841691489635}, {"Cello": 7.86811649670787, "Viola": 5.392519622783749}, {"Violin": 8.079542574273791, "Double_Bass": 6.887743208562954}, {"Violin": 9.082905092128827, "Cello": 10.643084831891148}, {"Viola": 3.8994376028471986, "Clarinet": 7.291146991893298}, {"Flute": 8.919201020319544, "Violin": 8.600458689421629}, {"Saxophone": -0.4808766981547216, "Clarinet": 2.7486685315504915}, {"Saxophone": 9.857761288081214, "Tuba": 5.430798836678132}, {"Trombone": 6.343344777106807, "Saxophone": 7.644559416741884}, {"Oboe": 4.318504041689563, "Saxophone": 4.788817047133102}, {"Oboe": 3.8882812628062196, "Viola": 8.217738725589324}, {"Flute": 7.783745612484866, "Cello": 10.274815251507402}, {"Flute": 11.392419268758466, "Trombone": 6.3899052793287305}, {"Clarinet": 8.588810669317162, "Horn": 9.916349899736838}, {"Clarinet": 10.679200633069195, "Bassoon": 4.799325351838396}, {"Trumpet": 6.231051922455892, "Violin": 5.310333759644454}, {"Trumpet": 4.045488695329501, "Violin": 4.889235998566622}, {"Flute": 14.356487020908428, "Clarinet": 11.11459785825151}, {"Flute": 10.498633411346487, "Saxophone": 5.906452133723401}, {"Bassoon": 7.720671588503598, "Oboe": -1.4666033883677212}, {"Trumpet": 5.066955658790732, "Trombone": 5.7367770408728695}, {"Trumpet": 10.766624378014008, "Horn": 11.499517449337883}, {"Tuba": 6.150860624600508, "Trumpet": 10.352278837321748}, {"Trumpet": 4.163570986292237, "Trombone": 6.065049265397986}, {"Trumpet": 10.258374100752008, "Horn": 10.963462023668848}, {"Cello": 6.526691435934246, "Oboe": 7.533552686684506}, {"Trumpet": 9.164268285175213, "Tuba": 8.514495401959216}, {"Trumpet": 5.312502454533419, "Trombone": 6.470768235086438}, {"Horn": 7.055817296012165, "Trumpet": 4.220578214702017}, {"Trumpet": 9.002451772622685, "Horn": 10.796805763790122}, {"Trumpet": 5.834271324744451, "Trombone": 3.408497572336328}]}} -------------------------------------------------------------------------------- /evaluation/MSI-DIS/scores-195.json: -------------------------------------------------------------------------------- 1 | {"MSI-DIS": {"transcription": [{"Violin": 0.9338328209972552, "Saxophone": 0.5236792603925782}, {"Violin": 0.9139512907476833, "Double_Bass": 0.6956826196563523}, {"Cello": 0.7880293484210517, "Viola": 0.6186272341486709}, {"Violin": 0.9445934860283608, "Double_Bass": 0.8208049144500267}, {"Violin": 0.8521911321824317, "Cello": 0.8398067088760933}, {"Viola": 0.6284188829778667, "Clarinet": 0.7565251390874353}, {"Flute": 0.7084593295112571, "Violin": 0.8992738888159868}, {"Saxophone": 0.5173594980631657, "Clarinet": 0.5253347395158582}, {"Saxophone": 0.49855422592151255, "Tuba": 0.5651050087940954}, {"Trombone": 0.7790337195548938, "Saxophone": 0.03190459112822542}, {"Oboe": 0.2063498385830621, "Saxophone": 0.44622413420568846}, {"Oboe": 0.5765082210786721, "Viola": 0.8281592294407049}, {"Flute": 0.7903235696478881, "Cello": 0.9034958263907762}, {"Flute": 0.814673146255352, "Trombone": 0.8763335107708878}, {"Clarinet": 0.7390884513112589, "Horn": 0.7285923227086202}, {"Clarinet": 0.6024513121605906, "Bassoon": 0.24753767387647693}, {"Trumpet": 0.8302173962665341, "Violin": 0.7756160345709969}, {"Trumpet": 0.7164086131221123, "Violin": 0.866874157645728}, {"Flute": 0.7491982612022688, "Clarinet": 0.790861300467682}, {"Flute": 0.6467448762918807, "Saxophone": 0.3830757732808301}, {"Bassoon": 0.5280544365530617, "Oboe": 0.28094219293045986}, {"Trumpet": 0.6342148521489019, "Trombone": 0.8273458240629123}, {"Trumpet": 0.6361829913219859, "Horn": 0.8590791006154075}, {"Tuba": 0.4360330905334375, "Trumpet": 0.9191040449795649}, {"Trumpet": 0.6465191544886886, "Trombone": 0.586333187551523}, {"Trumpet": 0.9224271401978115, "Horn": 0.9069070340595503}, {"Cello": 0.5937481139995986, "Oboe": 0.3312896656244718}, {"Trumpet": 0.934402280806624, "Tuba": 0.8399591593525064}, {"Trumpet": 0.8101486400089163, "Trombone": 0.5148221059525258}, {"Horn": 0.7848972919717536, "Trumpet": 0.9034203391880866}, {"Trumpet": 0.7672127574964576, "Horn": 0.7628041173812228}, {"Trumpet": 0.639452654378895, "Trombone": 0.6824340919319164}], "separation": [{"Violin": 7.157068508725844, "Saxophone": 2.7753264613512103}, {"Violin": 9.41461180450968, "Double_Bass": 8.013605645580059}, {"Cello": 8.234945178105281, "Viola": 2.502490023024613}, {"Violin": 7.757418445524236, "Double_Bass": 8.083175338050463}, {"Violin": 9.493479633247155, "Cello": 11.514902227435211}, {"Viola": 2.3452577451951617, "Clarinet": 4.371484920411191}, {"Flute": 9.176407855928895, "Violin": 5.394931291218249}, {"Saxophone": -3.435599788553202, "Clarinet": 2.3134847979012907}, {"Saxophone": 3.0759504763426886, "Tuba": 5.541911924644928}, {"Trombone": 6.576680270656013, "Saxophone": -15.781537678785813}, {"Oboe": -6.681036316491591, "Saxophone": -2.7319831689256024}, {"Oboe": 0.690821797199349, "Viola": 5.90326967846503}, {"Flute": 8.675869313338074, "Cello": 11.536426563993922}, {"Flute": 10.929509940422244, "Trombone": 6.290413672961987}, {"Clarinet": 6.961157735490675, "Horn": 4.694225158653683}, {"Clarinet": 4.320404985262499, "Bassoon": -8.508427023227656}, {"Trumpet": 6.024360565505678, "Violin": 7.358121448827502}, {"Trumpet": 3.168822772668201, "Violin": 4.301075619611758}, {"Flute": 11.640068000845424, "Clarinet": 7.074285214394811}, {"Flute": 7.951613795876789, "Saxophone": -2.9712397873086798}, {"Bassoon": 2.4055814518915444, "Oboe": -3.3903595760996312}, {"Trumpet": 3.6244899778676185, "Trombone": 5.3658703031808}, {"Trumpet": 4.591235558008526, "Horn": 10.25672050772932}, {"Tuba": 7.591665689206636, "Trumpet": 10.129239899323997}, {"Trumpet": 3.9323628955983354, "Trombone": 1.1779565444544435}, {"Trumpet": 10.173864924310735, "Horn": 10.720615515794165}, {"Cello": 7.397002470102136, "Oboe": 3.89245142411557}, {"Trumpet": 8.7742734131817, "Tuba": 8.893527506931688}, {"Trumpet": 4.179571756813656, "Trombone": 0.8318161749817116}, {"Horn": 5.9437689921502335, "Trumpet": 3.85413913280958}, {"Trumpet": 6.89989518933137, "Horn": 9.833876989640656}, {"Trumpet": 5.683645789939662, "Trombone": 3.2526869465945243}]}, "MSI-DIS-S": {"separation": [{"Violin": 7.080065769522134, "Saxophone": 7.131776126824328}, {"Violin": 9.532570643979172, "Double_Bass": 7.175293923686416}, {"Cello": 8.274862404005663, "Viola": 5.332899365134224}, {"Violin": 7.8258153135384, "Double_Bass": 7.60321497656927}, {"Violin": 9.126633539391145, "Cello": 11.082905545553988}, {"Viola": 3.578198883428407, "Clarinet": 7.473071515888863}, {"Flute": 9.243527446027135, "Violin": 7.822670154520757}, {"Saxophone": -0.47885591171505204, "Clarinet": 2.621365118601928}, {"Saxophone": 9.791159550615005, "Tuba": 5.846612092015748}, {"Trombone": 6.607971371924172, "Saxophone": 7.7105629683717645}, {"Oboe": 4.180971674204535, "Saxophone": 4.141570894905506}, {"Oboe": 3.5983246589189033, "Viola": 8.114385887500946}, {"Flute": 8.189495886110805, "Cello": 10.584835587495316}, {"Flute": 11.071086279571622, "Trombone": 6.553307548863437}, {"Clarinet": 8.044957057253944, "Horn": 9.870753922218912}, {"Clarinet": 10.81315446134979, "Bassoon": 5.723103229196096}, {"Trumpet": 6.5418271653021005, "Violin": 5.248009288530595}, {"Trumpet": 5.3000606666703565, "Violin": 4.797505770849435}, {"Flute": 14.909777035150512, "Clarinet": 11.929730137292578}, {"Flute": 10.275247742548748, "Saxophone": 5.945074203737635}, {"Bassoon": 9.109491971448737, "Oboe": -1.3348741612415391}, {"Trumpet": 4.943938506056652, "Trombone": 5.801938668648426}, {"Trumpet": 10.580778082771772, "Horn": 11.137316445290306}, {"Tuba": 6.674226026525268, "Trumpet": 10.134546085209251}, {"Trumpet": 4.520601242941067, "Trombone": 5.6317937230840425}, {"Trumpet": 10.602966788304311, "Horn": 11.166564788679608}, {"Cello": 6.877205822165349, "Oboe": 7.885847730876038}, {"Trumpet": 9.072903989525415, "Tuba": 9.356419172248055}, {"Trumpet": 5.134511207209691, "Trombone": 6.547931629006058}, {"Horn": 7.334727056797508, "Trumpet": 4.5488663843082495}, {"Trumpet": 8.966076103121173, "Horn": 10.917172920885005}, {"Trumpet": 5.702162783788873, "Trombone": 3.800955604370176}]}} -------------------------------------------------------------------------------- /evaluation/MSI-DIS/scores-196.json: -------------------------------------------------------------------------------- 1 | {"MSI-DIS": {"transcription": [{"Violin": 0.894937207013595, "Saxophone": 0.518675104650533}, {"Violin": 0.8870851103566548, "Double_Bass": 0.7378645836164271}, {"Cello": 0.8003679660095206, "Viola": 0.6530517187866063}, {"Violin": 0.9101161320187563, "Double_Bass": 0.8268849387239864}, {"Violin": 0.8533863363817036, "Cello": 0.8549830181031985}, {"Viola": 0.5585483497993066, "Clarinet": 0.733686148776802}, {"Flute": 0.6961929755926543, "Violin": 0.810258533167835}, {"Saxophone": 0.6832192457915292, "Clarinet": 0.5833648602027455}, {"Saxophone": 0.5646124978128715, "Tuba": 0.6030085187393598}, {"Trombone": 0.715680279472744, "Saxophone": 0.042801933664294474}, {"Oboe": 0.23053195618221406, "Saxophone": 0.537502052627512}, {"Oboe": 0.48500407759354863, "Viola": 0.8851581291588069}, {"Flute": 0.819125830954443, "Cello": 0.895246818224642}, {"Flute": 0.8477725149752019, "Trombone": 0.8627369636905478}, {"Clarinet": 0.7160940693047467, "Horn": 0.7524403830997698}, {"Clarinet": 0.6587308672995665, "Bassoon": 0.18154096484025933}, {"Trumpet": 0.8469826726422774, "Violin": 0.6992474645731492}, {"Trumpet": 0.7609694456065114, "Violin": 0.7560559232612903}, {"Flute": 0.7919140956609152, "Clarinet": 0.6838683888138188}, {"Flute": 0.747940878319353, "Saxophone": 0.42404493946857424}, {"Bassoon": 0.22475972926691867, "Oboe": 0.6711534990561925}, {"Trumpet": 0.6365530264034165, "Trombone": 0.8446084576718169}, {"Trumpet": 0.7026422668480723, "Horn": 0.8633001951809095}, {"Tuba": 0.4768186141915502, "Trumpet": 0.9084069538971468}, {"Trumpet": 0.5837317096586493, "Trombone": 0.6550463060953308}, {"Trumpet": 0.8836093302310891, "Horn": 0.8707775475248584}, {"Cello": 0.5943607181988817, "Oboe": 0.39016070171841366}, {"Trumpet": 0.9381199022748048, "Tuba": 0.884740978362636}, {"Trumpet": 0.7748045969491845, "Trombone": 0.46627667651479604}, {"Horn": 0.7943639679132832, "Trumpet": 0.8991853370955128}, {"Trumpet": 0.6838905216009431, "Horn": 0.7456134896200367}, {"Trumpet": 0.6397454902184914, "Trombone": 0.7095685710323106}], "separation": [{"Violin": 6.713324722258274, "Saxophone": 2.032071242586922}, {"Violin": 9.251599327777965, "Double_Bass": 6.937197727792457}, {"Cello": 8.628928091362035, "Viola": 3.2468407282842593}, {"Violin": 6.687626042796239, "Double_Bass": 7.759033576916314}, {"Violin": 9.59800744252268, "Cello": 10.765627654532308}, {"Viola": 3.1284857301995848, "Clarinet": 4.458752524841133}, {"Flute": 9.209401797601979, "Violin": 4.27658050779931}, {"Saxophone": -1.1055847320756205, "Clarinet": 0.7516890645605535}, {"Saxophone": 5.495354868383991, "Tuba": 5.175008497625607}, {"Trombone": 6.183629428675136, "Saxophone": -14.242783028090294}, {"Oboe": -4.568098235823826, "Saxophone": -0.6291874236785278}, {"Oboe": 0.6496692207323763, "Viola": 7.8122854033614235}, {"Flute": 8.888885253160183, "Cello": 11.605821650899307}, {"Flute": 11.668471601506518, "Trombone": 6.445123925885379}, {"Clarinet": 6.132476433140976, "Horn": 6.011742145323886}, {"Clarinet": 4.024732860989589, "Bassoon": -8.848392339972712}, {"Trumpet": 5.380607607718472, "Violin": 5.35781316403625}, {"Trumpet": 3.3230355254107886, "Violin": 3.540972002470796}, {"Flute": 12.813264615426508, "Clarinet": 6.687919088010013}, {"Flute": 8.928373335229953, "Saxophone": -3.738057418796908}, {"Bassoon": -0.26455267620314116, "Oboe": 0.26962657057462297}, {"Trumpet": 2.6765444600721473, "Trombone": 6.326103113633838}, {"Trumpet": 5.092767125444753, "Horn": 9.762286842742716}, {"Tuba": 8.03169168758505, "Trumpet": 10.293282805474027}, {"Trumpet": 3.9786579389654486, "Trombone": 2.9436576014149836}, {"Trumpet": 9.758792056606664, "Horn": 9.609807717131972}, {"Cello": 7.648393116233021, "Oboe": 6.4802956293437495}, {"Trumpet": 9.064933781091675, "Tuba": 8.074673470459048}, {"Trumpet": 3.315083322463889, "Trombone": 1.4328727244722483}, {"Horn": 5.804549614725037, "Trumpet": 3.1447001950102482}, {"Trumpet": 4.540860911560189, "Horn": 11.171941969028754}, {"Trumpet": 5.804867289816604, "Trombone": 3.604629740835625}]}, "MSI-DIS-S": {"separation": [{"Violin": 6.521403217863613, "Saxophone": 6.983464605460762}, {"Violin": 9.447227732258277, "Double_Bass": 6.23281543688209}, {"Cello": 8.735113048088696, "Viola": 5.104697563949083}, {"Violin": 7.861428832701172, "Double_Bass": 7.253193927071148}, {"Violin": 9.18431614877667, "Cello": 10.456018781195798}, {"Viola": 3.7665083104342836, "Clarinet": 7.356204007968387}, {"Flute": 9.055996324423335, "Violin": 7.898478426695225}, {"Saxophone": -0.16857528393248, "Clarinet": 2.2011505871713304}, {"Saxophone": 9.951695220585805, "Tuba": 5.770330458967661}, {"Trombone": 6.681984979783816, "Saxophone": 7.897250085429166}, {"Oboe": 4.44681385227674, "Saxophone": 4.219120350132117}, {"Oboe": 3.2153177232100405, "Viola": 8.359344568860251}, {"Flute": 7.89731479962012, "Cello": 10.495487559457889}, {"Flute": 11.33343503736429, "Trombone": 6.871591993078655}, {"Clarinet": 8.375808423775954, "Horn": 10.224980528335845}, {"Clarinet": 10.424667179088852, "Bassoon": 5.704257288977324}, {"Trumpet": 6.047817943599504, "Violin": 5.358908244917848}, {"Trumpet": 3.9655650089034933, "Violin": 4.470570424534397}, {"Flute": 14.641050139296949, "Clarinet": 11.050210989002238}, {"Flute": 10.313151303873077, "Saxophone": 6.689024907957661}, {"Bassoon": 8.017703483889969, "Oboe": -1.4867946423239162}, {"Trumpet": 5.081067823207137, "Trombone": 5.978648901400581}, {"Trumpet": 10.467828721090166, "Horn": 11.294702255419132}, {"Tuba": 6.605358501051459, "Trumpet": 10.257273345259208}, {"Trumpet": 4.067576582159983, "Trombone": 6.01159502450985}, {"Trumpet": 10.13111734630126, "Horn": 10.850390901573608}, {"Cello": 7.100341117381982, "Oboe": 6.812476364474771}, {"Trumpet": 9.102717940932884, "Tuba": 8.226223437321663}, {"Trumpet": 5.360578249946477, "Trombone": 6.7661293549564485}, {"Horn": 7.854573806969909, "Trumpet": 3.8945439081044206}, {"Trumpet": 8.856871265896016, "Horn": 10.962718739442856}, {"Trumpet": 5.63266500415506, "Trombone": 3.718497698507282}]}} -------------------------------------------------------------------------------- /evaluation/MSI-DIS/scores-197.json: -------------------------------------------------------------------------------- 1 | {"MSI-DIS": {"transcription": [{"Violin": 0.89237537947467, "Saxophone": 0.44510238029764515}, {"Violin": 0.8556221472703088, "Double_Bass": 0.7367069285736837}, {"Cello": 0.7700215390356321, "Viola": 0.6020352795541265}, {"Violin": 0.9023963971087151, "Double_Bass": 0.8325830325150321}, {"Violin": 0.85347249740386, "Cello": 0.8441735204462577}, {"Viola": 0.655828150395638, "Clarinet": 0.7312129069706628}, {"Flute": 0.7274294783180636, "Violin": 0.8617809823305205}, {"Saxophone": 0.6161629862761638, "Clarinet": 0.6286135051219482}, {"Saxophone": 0.6002161265463757, "Tuba": 0.5063368051058555}, {"Trombone": 0.7843167854698471, "Saxophone": 0.05874185056691073}, {"Oboe": 0.16904790956065618, "Saxophone": 0.5045632153130077}, {"Oboe": 0.49862727867612283, "Viola": 0.8420476979359018}, {"Flute": 0.749772458939221, "Cello": 0.9029408469382657}, {"Flute": 0.6965261134906171, "Trombone": 0.8743410396684531}, {"Clarinet": 0.8023153731963755, "Horn": 0.6379802064010492}, {"Clarinet": 0.6590131508457827, "Bassoon": 0.19266169823998555}, {"Trumpet": 0.8163087406871357, "Violin": 0.6805988386292915}, {"Trumpet": 0.779189274699307, "Violin": 0.7953052662993431}, {"Flute": 0.6858713295593329, "Clarinet": 0.6394640119662909}, {"Flute": 0.361464153013896, "Saxophone": 0.27013696842278306}, {"Bassoon": 0.1295922447106464, "Oboe": 0.41483241432495527}, {"Trumpet": 0.5394760704019307, "Trombone": 0.8404326662615018}, {"Trumpet": 0.5340195969708901, "Horn": 0.8339660108351642}, {"Tuba": 0.6373837525451183, "Trumpet": 0.9262914678420405}, {"Trumpet": 0.5844239518780254, "Trombone": 0.6841637552899233}, {"Trumpet": 0.894347838435463, "Horn": 0.8982667093394014}, {"Cello": 0.5867802468145809, "Oboe": 0.4249470496070424}, {"Trumpet": 0.9308400711008051, "Tuba": 0.8772392750723959}, {"Trumpet": 0.822151541716393, "Trombone": 0.7455067630608492}, {"Horn": 0.7603527053280081, "Trumpet": 0.8749233977537402}, {"Trumpet": 0.7489058833464853, "Horn": 0.7107329007029287}, {"Trumpet": 0.6522310085481386, "Trombone": 0.7129749739935904}], "separation": [{"Violin": 6.325352212579337, "Saxophone": 2.355110866817485}, {"Violin": 8.34897201894815, "Double_Bass": 6.90577158222558}, {"Cello": 6.364271577020151, "Viola": 2.761319556998712}, {"Violin": 7.312206968333803, "Double_Bass": 7.135712603036613}, {"Violin": 9.239476776592227, "Cello": 10.573650766482412}, {"Viola": 3.0785353601470407, "Clarinet": 4.842173726849181}, {"Flute": 9.363223935687254, "Violin": 5.729148706950928}, {"Saxophone": -2.208011522001557, "Clarinet": 1.626256445716062}, {"Saxophone": 4.525261948318366, "Tuba": 3.8711151103564303}, {"Trombone": 6.492664804311835, "Saxophone": -11.460727198643177}, {"Oboe": -10.027083660030033, "Saxophone": -1.139859364905568}, {"Oboe": -0.7321952499639489, "Viola": 6.352269803908089}, {"Flute": 7.340293174681898, "Cello": 10.864129251096077}, {"Flute": 7.405251542695906, "Trombone": 6.453718828315837}, {"Clarinet": 7.7253063339332995, "Horn": 3.0519130218373407}, {"Clarinet": 3.703266105445524, "Bassoon": -9.716815854898881}, {"Trumpet": 5.317681612805037, "Violin": 5.199798037747085}, {"Trumpet": 2.998355765919683, "Violin": 4.131989840961902}, {"Flute": 7.762777102782823, "Clarinet": 4.703996405741363}, {"Flute": 0.5801766272758793, "Saxophone": -3.860017004957613}, {"Bassoon": -5.633327376626225, "Oboe": 2.258592482203077}, {"Trumpet": 2.9435727729082997, "Trombone": 6.033829754320279}, {"Trumpet": 1.143057616618786, "Horn": 10.391724584898698}, {"Tuba": 7.7080250328485995, "Trumpet": 10.114150126210586}, {"Trumpet": 0.7674672266700421, "Trombone": 1.687416843994157}, {"Trumpet": 7.345057692744197, "Horn": 9.057155022369821}, {"Cello": 6.588845805782206, "Oboe": 6.032289607470027}, {"Trumpet": 8.901532348797101, "Tuba": 8.219206081447766}, {"Trumpet": 5.2682827954752645, "Trombone": 4.651545632827356}, {"Horn": 5.209315846348989, "Trumpet": 2.7856418280090485}, {"Trumpet": 4.545532814942542, "Horn": 8.912036508120453}, {"Trumpet": 5.926574193978961, "Trombone": 3.741061197900076}]}, "MSI-DIS-S": {"separation": [{"Violin": 6.251115470150459, "Saxophone": 7.884513713299848}, {"Violin": 9.545937699520385, "Double_Bass": 6.274510475531583}, {"Cello": 7.520606995582185, "Viola": 5.091017630289622}, {"Violin": 8.446044827338657, "Double_Bass": 6.704541008069734}, {"Violin": 8.815925149707262, "Cello": 10.285060072173414}, {"Viola": 3.93537553677161, "Clarinet": 7.18688438951165}, {"Flute": 9.176126413870065, "Violin": 8.631117934197498}, {"Saxophone": -0.416760570908894, "Clarinet": 2.5220909351338228}, {"Saxophone": 9.950375762115959, "Tuba": 5.730435566821114}, {"Trombone": 6.454590448160587, "Saxophone": 7.449444668538713}, {"Oboe": 4.316112154541536, "Saxophone": 4.449941165719888}, {"Oboe": 3.2000019129281148, "Viola": 8.441884769690777}, {"Flute": 7.805992272605051, "Cello": 10.262162684896087}, {"Flute": 11.03384090313118, "Trombone": 6.646034983367257}, {"Clarinet": 8.433194809497781, "Horn": 10.049260309925085}, {"Clarinet": 10.777067694414002, "Bassoon": 5.146004407228368}, {"Trumpet": 6.7223716824656785, "Violin": 4.880086896497433}, {"Trumpet": 4.310304117108964, "Violin": 4.668883798978594}, {"Flute": 14.575404019619626, "Clarinet": 11.447146603983708}, {"Flute": 10.130888516483523, "Saxophone": 5.835447981381627}, {"Bassoon": 8.873670119317286, "Oboe": -1.0286265947346116}, {"Trumpet": 5.036149730312115, "Trombone": 5.701037197167351}, {"Trumpet": 10.591534119844182, "Horn": 11.966472953913696}, {"Tuba": 6.209705267184028, "Trumpet": 10.136869665970057}, {"Trumpet": 4.7186076680299, "Trombone": 5.900529522286721}, {"Trumpet": 10.492795400271014, "Horn": 10.092508133035746}, {"Cello": 6.810037610486766, "Oboe": 6.5655674162955915}, {"Trumpet": 9.362600020193808, "Tuba": 8.580276277609844}, {"Trumpet": 5.741049414964444, "Trombone": 6.672906658283532}, {"Horn": 7.547199325887254, "Trumpet": 3.9334232529851043}, {"Trumpet": 8.808437799868987, "Horn": 11.225484812838339}, {"Trumpet": 6.127368961149795, "Trombone": 3.8140906216543238}]}} -------------------------------------------------------------------------------- /evaluation/MSI/scores-190.json: -------------------------------------------------------------------------------- 1 | {"MSI": {"transcription": [{"Violin": 0.9251442848839847, "Saxophone": 0.5181546733929234}, {"Violin": 0.9102259102261561, "Double_Bass": 0.7752092276121464}, {"Cello": 0.7342586937107307, "Viola": 0.660170327861245}, {"Violin": 0.9251839827225579, "Double_Bass": 0.8344817984677688}, {"Violin": 0.842339533810597, "Cello": 0.8264666805679032}, {"Viola": 0.5810072333732746, "Clarinet": 0.826391147551746}, {"Flute": 0.6762144965062589, "Violin": 0.8821178788976707}, {"Saxophone": 0.36505175708200155, "Clarinet": 0.4453237702960427}, {"Saxophone": 0.4161524545734008, "Tuba": 0.6226111577170401}, {"Trombone": 0.8470594673956615, "Saxophone": 0.04481943770003019}, {"Oboe": 0.21653351657261455, "Saxophone": 0.41426634626983655}, {"Oboe": 0.4947882381063612, "Viola": 0.8485804231841394}, {"Flute": 0.8339634517873997, "Cello": 0.8887994690569462}, {"Flute": 0.8502812959857168, "Trombone": 0.8971240200923347}, {"Clarinet": 0.6925028500035234, "Horn": 0.7483736580321206}, {"Clarinet": 0.6781474899875646, "Bassoon": 0.27571901696779744}, {"Trumpet": 0.8272035769572158, "Violin": 0.7757573880818966}, {"Trumpet": 0.5326242283264728, "Violin": 0.8003995420530146}, {"Flute": 0.9279064164461931, "Clarinet": 0.7840266192166879}, {"Flute": 0.6886910481868935, "Saxophone": 0.3169213187862774}, {"Bassoon": 0.4821477123530618, "Oboe": 0.6760352634599481}, {"Trumpet": 0.7052701286449962, "Trombone": 0.8653153802713082}, {"Trumpet": 0.6114493326396717, "Horn": 0.8155378876173611}, {"Tuba": 0.5685467696113435, "Trumpet": 0.8873762467049575}, {"Trumpet": 0.555759829105926, "Trombone": 0.7134012839272565}, {"Trumpet": 0.8722578460034435, "Horn": 0.8315576718149527}, {"Cello": 0.5379875469801705, "Oboe": 0.46708727812363726}, {"Trumpet": 0.9388084157973589, "Tuba": 0.7997813713165582}, {"Trumpet": 0.8094475973296866, "Trombone": 0.817587893111617}, {"Horn": 0.7366386092690937, "Trumpet": 0.7193563199798007}, {"Trumpet": 0.7286939706138229, "Horn": 0.7777772835095649}, {"Trumpet": 0.5909634654700726, "Trombone": 0.7460937395291403}], "separation": [{"Violin": 8.269233007379697, "Saxophone": 4.338338830854572}, {"Violin": 12.314969374903939, "Double_Bass": 7.6731520945521625}, {"Cello": 7.1786076072832525, "Viola": 5.455080331849846}, {"Violin": 11.564712403474966, "Double_Bass": 9.535666801636365}, {"Violin": 11.897168006384668, "Cello": 14.222602067451138}, {"Viola": 2.858339755857327, "Clarinet": 4.749034659478985}, {"Flute": 9.798047106498677, "Violin": 7.742842039750225}, {"Saxophone": -4.10994966895947, "Clarinet": -0.510236744234285}, {"Saxophone": 2.4867257664885334, "Tuba": 7.670025500290858}, {"Trombone": 9.953810177748572, "Saxophone": -14.633298241968387}, {"Oboe": -6.0121962473655755, "Saxophone": 0.21062296802478192}, {"Oboe": 2.751310231411171, "Viola": 9.04517801509609}, {"Flute": 10.686735511524347, "Cello": 14.489174954666776}, {"Flute": 12.960043824038092, "Trombone": 10.423056178069965}, {"Clarinet": 5.753313819560761, "Horn": 6.125051513272348}, {"Clarinet": 6.279371860208991, "Bassoon": -6.863060952103935}, {"Trumpet": 5.120465674999285, "Violin": 5.457680133469218}, {"Trumpet": 2.5654578322302335, "Violin": 4.727559531763638}, {"Flute": 15.390505258701117, "Clarinet": 5.970558634988674}, {"Flute": 7.703407225619773, "Saxophone": -3.3552999126603145}, {"Bassoon": 1.8454882133004364, "Oboe": -3.17993499403644}, {"Trumpet": 3.753078501509269, "Trombone": 8.42561357225662}, {"Trumpet": 3.8896826431125686, "Horn": 12.226250401841225}, {"Tuba": 14.757338725547184, "Trumpet": 13.31441398221505}, {"Trumpet": 3.9283419011189036, "Trombone": 5.991385298752856}, {"Trumpet": 12.717369148135552, "Horn": 6.889294885051784}, {"Cello": 9.24997801631479, "Oboe": 4.860141959890007}, {"Trumpet": 13.253915449643292, "Tuba": 12.540433594208348}, {"Trumpet": 5.412086610263316, "Trombone": 5.894642477378911}, {"Horn": 5.898436892267443, "Trumpet": 0.5702562952756055}, {"Trumpet": 4.704932092525263, "Horn": 13.872607594278936}, {"Trumpet": 6.991438713476075, "Trombone": 6.252403355896494}]}, "MSI-S": {"separation": [{"Violin": 8.047104792156668, "Saxophone": 11.082545926622885}, {"Violin": 12.254939614677266, "Double_Bass": 7.084260135390188}, {"Cello": 8.090960511081427, "Viola": 8.489943533014928}, {"Violin": 11.90780876514177, "Double_Bass": 9.215632665423524}, {"Violin": 11.5783171325271, "Cello": 13.791491534640787}, {"Viola": 4.779626823128284, "Clarinet": 5.408722034029381}, {"Flute": 11.581006935994145, "Violin": 10.985776365051885}, {"Saxophone": -1.129362234105362, "Clarinet": 2.333325936004596}, {"Saxophone": 10.674102958801466, "Tuba": 7.358436178472214}, {"Trombone": 9.61610795592729, "Saxophone": 9.55901277509283}, {"Oboe": 6.80628971034443, "Saxophone": 8.908976079166214}, {"Oboe": 6.700097512853127, "Viola": 11.208968318780395}, {"Flute": 9.570129792185067, "Cello": 13.362766990158176}, {"Flute": 12.691962102046027, "Trombone": 10.189738615391864}, {"Clarinet": 7.834684026598236, "Horn": 11.563370802266476}, {"Clarinet": 10.880525574443634, "Bassoon": 7.882743254788611}, {"Trumpet": 7.284965517549401, "Violin": 2.7439416545145368}, {"Trumpet": 4.03834539591089, "Violin": 4.212580273261593}, {"Flute": 16.37407676787338, "Clarinet": 9.436727226021798}, {"Flute": 10.560042926605568, "Saxophone": 6.080197374269638}, {"Bassoon": 12.409777167959962, "Oboe": -1.6961181766260438}, {"Trumpet": 6.202559620169078, "Trombone": 8.812139820850447}, {"Trumpet": 14.492299618580871, "Horn": 15.380975663013894}, {"Tuba": 10.677507967472371, "Trumpet": 13.558397891064136}, {"Trumpet": 5.682573641723998, "Trombone": 8.711709478940147}, {"Trumpet": 14.038187417915813, "Horn": 13.438976991818933}, {"Cello": 9.458398085562012, "Oboe": 11.75103396136266}, {"Trumpet": 13.496684207835393, "Tuba": 13.11574071724432}, {"Trumpet": 7.143966893547329, "Trombone": 7.917465641322568}, {"Horn": 7.811413761486648, "Trumpet": 4.654760900442918}, {"Trumpet": 13.47688997743125, "Horn": 14.262065694914334}, {"Trumpet": 6.791381271377515, "Trombone": 6.652098560078631}]}} -------------------------------------------------------------------------------- /evaluation/MSI/scores-198.json: -------------------------------------------------------------------------------- 1 | {"MSI": {"transcription": [{"Violin": 0.9240562021852543, "Saxophone": 0.568345650753969}, {"Violin": 0.9153148660051246, "Double_Bass": 0.7994580054631747}, {"Cello": 0.7797932546933432, "Viola": 0.3411302390173449}, {"Violin": 0.935079046530209, "Double_Bass": 0.8508446265902565}, {"Violin": 0.8564015890267973, "Cello": 0.845216630717393}, {"Viola": 0.5266569254542182, "Clarinet": 0.7522504919731647}, {"Flute": 0.7519803820564964, "Violin": 0.8934413945894634}, {"Saxophone": 0.4267941253868336, "Clarinet": 0.49727687494180345}, {"Saxophone": 0.5452988319584595, "Tuba": 0.614111579122122}, {"Trombone": 0.8369091908958469, "Saxophone": 0.08985526373178969}, {"Oboe": 0.201920692248492, "Saxophone": 0.491948627463316}, {"Oboe": 0.5103982991392773, "Viola": 0.8597633753248017}, {"Flute": 0.8316819636282551, "Cello": 0.8987670747298576}, {"Flute": 0.8362251384042749, "Trombone": 0.8909972197453055}, {"Clarinet": 0.7737352378049958, "Horn": 0.7270063773787437}, {"Clarinet": 0.6204042637633179, "Bassoon": 0.16005388783463503}, {"Trumpet": 0.7933395590797395, "Violin": 0.7277376829143688}, {"Trumpet": 0.6323500316919551, "Violin": 0.8004611683406033}, {"Flute": 0.789976686889872, "Clarinet": 0.6629963256196488}, {"Flute": 0.5984597138031175, "Saxophone": 0.29622156656139526}, {"Bassoon": 0.13883301411653517, "Oboe": 0.4217819109596345}, {"Trumpet": 0.6233525069297288, "Trombone": 0.6099843207527753}, {"Trumpet": 0.6981588091099461, "Horn": 0.8077292657217913}, {"Tuba": 0.6201647473836734, "Trumpet": 0.8703178800773663}, {"Trumpet": 0.5622130959324757, "Trombone": 0.5981636749990653}, {"Trumpet": 0.9095472503123851, "Horn": 0.8284052753642809}, {"Cello": 0.5836042853064146, "Oboe": 0.4960642966330491}, {"Trumpet": 0.9355778596502329, "Tuba": 0.8967261479799133}, {"Trumpet": 0.7815979967611193, "Trombone": 0.4576627148514712}, {"Horn": 0.8180549040812352, "Trumpet": 0.8863887265077279}, {"Trumpet": 0.7166515478524493, "Horn": 0.713591448879571}, {"Trumpet": 0.6001084958620146, "Trombone": 0.6376698553394347}], "separation": [{"Violin": 8.386771834035775, "Saxophone": 5.522756190424323}, {"Violin": 12.169255833990105, "Double_Bass": 7.981836996822372}, {"Cello": 6.579897082846177, "Viola": -0.7035558467680549}, {"Violin": 11.69476599859357, "Double_Bass": 9.465504867785675}, {"Violin": 12.165202497123168, "Cello": 14.06702005907595}, {"Viola": 2.7398959596282735, "Clarinet": 4.611783879054412}, {"Flute": 11.537470311708898, "Violin": 7.443221096318304}, {"Saxophone": -3.6859830881988036, "Clarinet": -0.4217837325888902}, {"Saxophone": 2.789639424165515, "Tuba": 7.556183949703094}, {"Trombone": 9.718120830450998, "Saxophone": -10.193149098639969}, {"Oboe": -7.9815411605021875, "Saxophone": -0.07345546320580675}, {"Oboe": 2.559875461905407, "Viola": 9.175258636419654}, {"Flute": 11.031547415366758, "Cello": 15.232476023303335}, {"Flute": 12.707020263848621, "Trombone": 10.267912556859395}, {"Clarinet": 6.58710717636599, "Horn": 5.710329571082195}, {"Clarinet": 4.250195007720068, "Bassoon": -8.406621940455443}, {"Trumpet": 5.710614675776288, "Violin": 5.980082028893074}, {"Trumpet": 3.2978324858088275, "Violin": 4.680371555855011}, {"Flute": 5.93174309392687, "Clarinet": 3.9264205310572136}, {"Flute": 5.892199241324855, "Saxophone": -6.902002560602556}, {"Bassoon": -3.075369340566991, "Oboe": 0.6489817711444132}, {"Trumpet": 3.550870669026798, "Trombone": 2.6607934436111815}, {"Trumpet": 6.124741851435262, "Horn": 12.27683786942867}, {"Tuba": 14.227907044410157, "Trumpet": 12.73813507097433}, {"Trumpet": 1.3575086969477175, "Trombone": 3.850657818457792}, {"Trumpet": 12.292363731414081, "Horn": 4.59812739907047}, {"Cello": 8.85121571233573, "Oboe": 5.666712922357422}, {"Trumpet": 13.20722696238372, "Tuba": 13.01454748900403}, {"Trumpet": 6.081743024186396, "Trombone": -0.7244814781435521}, {"Horn": 6.688887567368357, "Trumpet": 5.0692576215816025}, {"Trumpet": 5.605975954069844, "Horn": 12.086761492636295}, {"Trumpet": 6.006112617990129, "Trombone": 3.127727828430526}]}, "MSI-S": {"separation": [{"Violin": 7.846971984480206, "Saxophone": 11.840984234692108}, {"Violin": 12.199577182314416, "Double_Bass": 7.339234580593179}, {"Cello": 8.094610470778449, "Viola": 8.380023057843912}, {"Violin": 11.994605075459859, "Double_Bass": 9.018348468222051}, {"Violin": 11.671633501743969, "Cello": 13.617765940315225}, {"Viola": 4.605064371175164, "Clarinet": 5.632290542476499}, {"Flute": 11.740663216922094, "Violin": 10.919509529255294}, {"Saxophone": -1.0504145664442202, "Clarinet": 2.289676507926674}, {"Saxophone": 10.511236086665365, "Tuba": 7.682885964617046}, {"Trombone": 9.555856755663225, "Saxophone": 9.711785831766424}, {"Oboe": 7.185899546280057, "Saxophone": 8.259465318401327}, {"Oboe": 8.495196948182219, "Viola": 11.34844936076465}, {"Flute": 9.677758361082482, "Cello": 13.584666741162664}, {"Flute": 12.875241996866798, "Trombone": 10.186197595570377}, {"Clarinet": 7.98697556188623, "Horn": 11.744457717441225}, {"Clarinet": 11.32172000112689, "Bassoon": 7.933447421333608}, {"Trumpet": 7.508277773835639, "Violin": 2.6515007007916096}, {"Trumpet": 4.565072849585745, "Violin": 4.481200581800574}, {"Flute": 16.357871360031247, "Clarinet": 9.663360794663701}, {"Flute": 10.451907650916578, "Saxophone": 3.817569645764915}, {"Bassoon": 12.510757948088587, "Oboe": -1.3101312956894295}, {"Trumpet": 6.265598031844025, "Trombone": 8.696425238852424}, {"Trumpet": 14.790516035289606, "Horn": 15.39216870224207}, {"Tuba": 10.21403522992911, "Trumpet": 13.51899736927772}, {"Trumpet": 5.8523196223401435, "Trombone": 8.47342382321218}, {"Trumpet": 14.096387957092968, "Horn": 13.34878418763118}, {"Cello": 9.026280239119291, "Oboe": 9.400195392066701}, {"Trumpet": 13.72595554393383, "Tuba": 13.655450513713985}, {"Trumpet": 7.2540770117647195, "Trombone": 8.057803830899136}, {"Horn": 7.933959756481133, "Trumpet": 5.72312652720202}, {"Trumpet": 13.288756957325207, "Horn": 14.153428805027133}, {"Trumpet": 6.89548356389272, "Trombone": 6.643526295765945}]}} -------------------------------------------------------------------------------- /evaluation/MSI/scores-193.json: -------------------------------------------------------------------------------- 1 | {"MSI": {"transcription": [{"Violin": 0.9244617632443347, "Saxophone": 0.5593304002240704}, {"Violin": 0.8802883026897393, "Double_Bass": 0.7754679668362998}, {"Cello": 0.7603695891087061, "Viola": 0.6643071161158848}, {"Violin": 0.9270978840386803, "Double_Bass": 0.8444560517550049}, {"Violin": 0.8568251950540356, "Cello": 0.848481725353147}, {"Viola": 0.5922023443112255, "Clarinet": 0.7647419615661581}, {"Flute": 0.7333100287275491, "Violin": 0.8748480863891732}, {"Saxophone": 0.45481391160900225, "Clarinet": 0.5175529484421656}, {"Saxophone": 0.4658603046829378, "Tuba": 0.6052916034343235}, {"Trombone": 0.7892719662869737, "Saxophone": 0.04501789003289155}, {"Oboe": 0.20939084354369777, "Saxophone": 0.4863235005508187}, {"Oboe": 0.4875299695286682, "Viola": 0.8718464435830005}, {"Flute": 0.8296203153651434, "Cello": 0.9124242246447343}, {"Flute": 0.8749812721954476, "Trombone": 0.871126976381628}, {"Clarinet": 0.7206862456499559, "Horn": 0.7523929360391318}, {"Clarinet": 0.7277053969679255, "Bassoon": 0.13487023368704545}, {"Trumpet": 0.8500349390322046, "Violin": 0.8359251990066111}, {"Trumpet": 0.7473010548250449, "Violin": 0.7935175929935436}, {"Flute": 0.812206314822883, "Clarinet": 0.782824278146499}, {"Flute": 0.6487360104355274, "Saxophone": 0.3398590624326701}, {"Bassoon": 0.38967696016873343, "Oboe": 0.5164408268642677}, {"Trumpet": 0.6067254018449528, "Trombone": 0.7907018079465361}, {"Trumpet": 0.7249312268694436, "Horn": 0.8274219464003405}, {"Tuba": 0.5199329896842052, "Trumpet": 0.9150869812041426}, {"Trumpet": 0.6871816044867367, "Trombone": 0.6425679201880953}, {"Trumpet": 0.9287733634858255, "Horn": 0.8820497498241525}, {"Cello": 0.5931549322048726, "Oboe": 0.49798267374355226}, {"Trumpet": 0.9308235898652567, "Tuba": 0.8668279307276997}, {"Trumpet": 0.8151225363573554, "Trombone": 0.5223681727010532}, {"Horn": 0.8078062559496695, "Trumpet": 0.9074014506798808}, {"Trumpet": 0.725017737699224, "Horn": 0.7360288831085608}, {"Trumpet": 0.59419067005877, "Trombone": 0.728958473153738}], "separation": [{"Violin": 8.062433475902145, "Saxophone": 6.632652275985622}, {"Violin": 11.210475449369895, "Double_Bass": 7.899986969253366}, {"Cello": 7.081242809786832, "Viola": 5.643440360134631}, {"Violin": 11.285258758423566, "Double_Bass": 9.931102654562913}, {"Violin": 11.924270081959383, "Cello": 14.214379689529677}, {"Viola": 2.4846663968826843, "Clarinet": 5.228194480827861}, {"Flute": 11.31880368787244, "Violin": 7.414807163113229}, {"Saxophone": -2.715756852983728, "Clarinet": -0.3678961304236015}, {"Saxophone": 3.6946460282165066, "Tuba": 7.412322167796546}, {"Trombone": 9.698466073365768, "Saxophone": -15.497330773004538}, {"Oboe": -6.106251657090574, "Saxophone": 0.20709082742416168}, {"Oboe": 2.574190015518303, "Viola": 9.418361662602635}, {"Flute": 10.965249108259128, "Cello": 15.189084957341974}, {"Flute": 13.080513313721667, "Trombone": 10.081790892487469}, {"Clarinet": 5.059876944240438, "Horn": 7.282784159550497}, {"Clarinet": 6.355874033221949, "Bassoon": -10.729736541052727}, {"Trumpet": 8.213621192276381, "Violin": 4.017862800830307}, {"Trumpet": 4.488067663762125, "Violin": 3.8621095567805153}, {"Flute": 14.491527384457115, "Clarinet": 5.077140365957579}, {"Flute": 8.030498062212892, "Saxophone": -3.1071559146250367}, {"Bassoon": -2.056275497621944, "Oboe": 0.25591815859334616}, {"Trumpet": 2.8967283638727155, "Trombone": 5.783828536079467}, {"Trumpet": 7.061563461694228, "Horn": 12.031026423011031}, {"Tuba": 14.89447473239295, "Trumpet": 13.32577858314799}, {"Trumpet": 5.191119070077824, "Trombone": 4.884364936677561}, {"Trumpet": 13.150740351497873, "Horn": 10.5868342263642}, {"Cello": 9.348906368774601, "Oboe": 7.60741088677431}, {"Trumpet": 13.116784240845602, "Tuba": 13.121297919364919}, {"Trumpet": 6.378649869407855, "Trombone": 2.5902547290008164}, {"Horn": 6.582751990647305, "Trumpet": 5.384880754109012}, {"Trumpet": 6.81170316210022, "Horn": 12.827565777743557}, {"Trumpet": 6.92676404635733, "Trombone": 6.140498429386195}]}, "MSI-S": {"separation": [{"Violin": 7.768353967035834, "Saxophone": 11.881263271197495}, {"Violin": 12.136068949431422, "Double_Bass": 7.339712349846161}, {"Cello": 8.070068071395633, "Viola": 8.399629904740626}, {"Violin": 11.95181974429685, "Double_Bass": 9.442938575690508}, {"Violin": 11.695659977318611, "Cello": 13.674640718172611}, {"Viola": 4.52066326476576, "Clarinet": 5.793204005078499}, {"Flute": 11.793642357410135, "Violin": 10.941707438597607}, {"Saxophone": -1.0017104708275384, "Clarinet": 2.2641173749920043}, {"Saxophone": 10.729177820650445, "Tuba": 7.668074321368641}, {"Trombone": 9.679304025745719, "Saxophone": 9.695588345119255}, {"Oboe": 6.894391782359195, "Saxophone": 8.071664543377919}, {"Oboe": 7.046523757367044, "Viola": 11.090837781055429}, {"Flute": 9.698968801296576, "Cello": 13.491428084831869}, {"Flute": 12.828485732118729, "Trombone": 10.213177215573923}, {"Clarinet": 7.632633550468762, "Horn": 11.666913543764586}, {"Clarinet": 10.837185913195206, "Bassoon": 8.313142741361405}, {"Trumpet": 7.630824149744368, "Violin": 2.7820441398583378}, {"Trumpet": 4.659280468670007, "Violin": 4.260745985891942}, {"Flute": 16.43037047221107, "Clarinet": 9.331572546951783}, {"Flute": 10.329852582723538, "Saxophone": 5.5579704230338045}, {"Bassoon": 12.508181490198577, "Oboe": -1.4740326161664474}, {"Trumpet": 6.276552008350533, "Trombone": 8.768047995998938}, {"Trumpet": 14.495060879044797, "Horn": 15.256659352788454}, {"Tuba": 10.744841386681848, "Trumpet": 13.605201555088215}, {"Trumpet": 5.968837051263861, "Trombone": 8.631734033990318}, {"Trumpet": 13.754927032917056, "Horn": 13.292014290275873}, {"Cello": 9.459960745462983, "Oboe": 11.811345313619263}, {"Trumpet": 13.445614797822909, "Tuba": 14.033333436065528}, {"Trumpet": 7.039480478980648, "Trombone": 8.096547329984286}, {"Horn": 7.978715516483349, "Trumpet": 5.57277668152468}, {"Trumpet": 13.45905368817337, "Horn": 14.064533272839627}, {"Trumpet": 6.994979719818818, "Trombone": 6.504297724623181}]}} -------------------------------------------------------------------------------- /evaluation/MSI/scores-195.json: -------------------------------------------------------------------------------- 1 | {"MSI": {"transcription": [{"Violin": 0.9313184236630168, "Saxophone": 0.46996658809063735}, {"Violin": 0.9210162175461973, "Double_Bass": 0.7302736521381672}, {"Cello": 0.7968504264474542, "Viola": 0.5227482038994987}, {"Violin": 0.9442940875535218, "Double_Bass": 0.8605585050315773}, {"Violin": 0.8336126844464975, "Cello": 0.8316162303397849}, {"Viola": 0.549609129450183, "Clarinet": 0.7380018176215098}, {"Flute": 0.7445581030774061, "Violin": 0.9001301632979073}, {"Saxophone": 0.46519719707323026, "Clarinet": 0.4653305045541827}, {"Saxophone": 0.35933518310562296, "Tuba": 0.5620447771406816}, {"Trombone": 0.8041519719225962, "Saxophone": 0.03558864333118414}, {"Oboe": 0.2611975077798584, "Saxophone": 0.5097463896606519}, {"Oboe": 0.5653900639672017, "Viola": 0.8556946188367348}, {"Flute": 0.7841410033756995, "Cello": 0.9076025430127005}, {"Flute": 0.7862050982394203, "Trombone": 0.885939215595156}, {"Clarinet": 0.7776934336968994, "Horn": 0.7850422941413437}, {"Clarinet": 0.6805191005088744, "Bassoon": 0.13921125832793405}, {"Trumpet": 0.8181137587095445, "Violin": 0.7676506793811783}, {"Trumpet": 0.6724228099860647, "Violin": 0.8139921321681473}, {"Flute": 0.7812536803812524, "Clarinet": 0.6866810986415959}, {"Flute": 0.7269052292430188, "Saxophone": 0.2573889180215115}, {"Bassoon": 0.29884020401449585, "Oboe": 0.6135526627444909}, {"Trumpet": 0.6264949979835742, "Trombone": 0.7283157397128702}, {"Trumpet": 0.7301707162623018, "Horn": 0.8001272565392903}, {"Tuba": 0.5738075141943123, "Trumpet": 0.903847567563141}, {"Trumpet": 0.6775496324188846, "Trombone": 0.6564596040919183}, {"Trumpet": 0.9034094718018223, "Horn": 0.9177432917240086}, {"Cello": 0.5938054648265988, "Oboe": 0.4560081730079119}, {"Trumpet": 0.9333509087536362, "Tuba": 0.8091743055281851}, {"Trumpet": 0.8000314329221302, "Trombone": 0.44705697055317656}, {"Horn": 0.770416396438021, "Trumpet": 0.9031447042215406}, {"Trumpet": 0.6849772732105041, "Horn": 0.7989662489170208}, {"Trumpet": 0.5848076093103303, "Trombone": 0.7163187887803806}], "separation": [{"Violin": 8.599516733404187, "Saxophone": 4.565529863975607}, {"Violin": 12.078862894795781, "Double_Bass": 6.631751591756752}, {"Cello": 7.782602030948783, "Viola": 1.8597794081838914}, {"Violin": 11.858880004988261, "Double_Bass": 10.018772089189799}, {"Violin": 11.802061725603341, "Cello": 14.457851945595229}, {"Viola": 2.9262384731602404, "Clarinet": 4.950946681737335}, {"Flute": 11.441580055973368, "Violin": 6.767176530445998}, {"Saxophone": -3.713483088688661, "Clarinet": 0.5021289812469275}, {"Saxophone": -0.695447384900048, "Tuba": 6.679994334583949}, {"Trombone": 9.81800616640154, "Saxophone": -15.198697230965319}, {"Oboe": -5.612506908162345, "Saxophone": -0.23623160013655217}, {"Oboe": 3.0964091685538753, "Viola": 8.902287182475058}, {"Flute": 10.983898139809247, "Cello": 15.066685839379424}, {"Flute": 12.207856158070836, "Trombone": 10.558356066014845}, {"Clarinet": 8.248258609274728, "Horn": 7.394979612559222}, {"Clarinet": 6.351219893180584, "Bassoon": -9.71699068786476}, {"Trumpet": 6.3635717564040934, "Violin": 6.545304163632222}, {"Trumpet": 3.786498854570637, "Violin": 3.5969724079842047}, {"Flute": 8.91437069192924, "Clarinet": 3.6121257849461452}, {"Flute": 8.197440148416499, "Saxophone": -3.8047734656299452}, {"Bassoon": -0.334587651487473, "Oboe": -1.2422828604582257}, {"Trumpet": 3.1048114084214875, "Trombone": 4.1588943180602005}, {"Trumpet": 6.9870005003795495, "Horn": 10.995937846209397}, {"Tuba": 15.137204305784671, "Trumpet": 13.197341712985281}, {"Trumpet": 4.472134389445005, "Trombone": 4.492206591153951}, {"Trumpet": 12.55558930062177, "Horn": 12.850836093893756}, {"Cello": 9.284261564111539, "Oboe": 5.965147991423256}, {"Trumpet": 13.272054478348993, "Tuba": 11.817007087518686}, {"Trumpet": 5.572915553810688, "Trombone": -0.38531703673293477}, {"Horn": 6.5583486715593375, "Trumpet": 5.579780297702247}, {"Trumpet": 5.383004769727327, "Horn": 12.443750219098838}, {"Trumpet": 6.020048986236221, "Trombone": 4.885177377004129}]}, "MSI-S": {"separation": [{"Violin": 7.921994675490519, "Saxophone": 11.366995507112584}, {"Violin": 12.038662534248733, "Double_Bass": 6.87432627096078}, {"Cello": 8.148104027180775, "Viola": 8.198127609300254}, {"Violin": 11.98011099201215, "Double_Bass": 9.57358878274719}, {"Violin": 11.440761908393497, "Cello": 14.038834583543329}, {"Viola": 4.512211469691159, "Clarinet": 5.761657332214858}, {"Flute": 11.82297688743914, "Violin": 10.699554244420602}, {"Saxophone": -0.900564981418269, "Clarinet": 2.1366051044109433}, {"Saxophone": 10.817807572920216, "Tuba": 7.384066774990411}, {"Trombone": 9.639162857416927, "Saxophone": 9.847265694382466}, {"Oboe": 7.24825301014474, "Saxophone": 8.196116213661215}, {"Oboe": 7.33642414337921, "Viola": 11.28020507942852}, {"Flute": 9.836888357692988, "Cello": 13.547636857648124}, {"Flute": 12.851097443619395, "Trombone": 10.46500369330717}, {"Clarinet": 7.602902274230314, "Horn": 11.470063225714943}, {"Clarinet": 10.62103690474582, "Bassoon": 7.760127638920407}, {"Trumpet": 7.7634571031588955, "Violin": 2.9190470289012875}, {"Trumpet": 4.60425092234097, "Violin": 4.20663034563419}, {"Flute": 16.511524378044133, "Clarinet": 9.250314957907685}, {"Flute": 10.43917612868238, "Saxophone": 4.033993692631519}, {"Bassoon": 11.92044101412928, "Oboe": -1.3083684861412412}, {"Trumpet": 6.329311750758518, "Trombone": 8.392445113853464}, {"Trumpet": 14.6180165071682, "Horn": 15.383653025218747}, {"Tuba": 10.919045547788544, "Trumpet": 13.595617518172238}, {"Trumpet": 5.842772995624461, "Trombone": 8.631171729225189}, {"Trumpet": 13.997703905521472, "Horn": 13.39407154565187}, {"Cello": 9.458064356688796, "Oboe": 10.297229748451484}, {"Trumpet": 13.699367001890622, "Tuba": 12.900278382966349}, {"Trumpet": 7.09601690995039, "Trombone": 7.9695257687255605}, {"Horn": 7.991149056594455, "Trumpet": 5.643293305203569}, {"Trumpet": 13.623362925687237, "Horn": 13.872989573238371}, {"Trumpet": 6.899729635581686, "Trombone": 6.636503030035552}]}} -------------------------------------------------------------------------------- /evaluation/MSI/scores-199.json: -------------------------------------------------------------------------------- 1 | {"MSI": {"transcription": [{"Violin": 0.9175314552414292, "Saxophone": 0.48769407210316695}, {"Violin": 0.9125718826083175, "Double_Bass": 0.754509249710048}, {"Cello": 0.7713371339231954, "Viola": 0.6277673929358589}, {"Violin": 0.9340282738648057, "Double_Bass": 0.8466029079140432}, {"Violin": 0.8494433532640323, "Cello": 0.8234397744682567}, {"Viola": 0.5366152114501777, "Clarinet": 0.7683789788698187}, {"Flute": 0.7083169398695166, "Violin": 0.8639839564463551}, {"Saxophone": 0.3742301357547141, "Clarinet": 0.38706905722611123}, {"Saxophone": 0.46958433150836665, "Tuba": 0.5933034451715863}, {"Trombone": 0.8087472148848392, "Saxophone": 0.07448094325683406}, {"Oboe": 0.17444751034314088, "Saxophone": 0.4909253731724391}, {"Oboe": 0.4527031595659372, "Viola": 0.8467251228972997}, {"Flute": 0.8121309121549076, "Cello": 0.8678964295542637}, {"Flute": 0.8047517314950919, "Trombone": 0.8825534858507422}, {"Clarinet": 0.7924821801722192, "Horn": 0.7196349405764514}, {"Clarinet": 0.7184619019101741, "Bassoon": 0.11655248309864526}, {"Trumpet": 0.8377585225687513, "Violin": 0.7346748743292829}, {"Trumpet": 0.7569191947743098, "Violin": 0.763493180421253}, {"Flute": 0.7590852320900267, "Clarinet": 0.8305817016784037}, {"Flute": 0.4969204676729673, "Saxophone": 0.47542269192461206}, {"Bassoon": 0.2340276142622117, "Oboe": 0.3578672053054495}, {"Trumpet": 0.4901797343765197, "Trombone": 0.8338687860332582}, {"Trumpet": 0.6757474185491802, "Horn": 0.8491059958764835}, {"Tuba": 0.35710567506371227, "Trumpet": 0.9129449504149315}, {"Trumpet": 0.6746590921706681, "Trombone": 0.7290862938843112}, {"Trumpet": 0.9292135663189819, "Horn": 0.9253320796367778}, {"Cello": 0.5220304794979947, "Oboe": 0.44216339198203447}, {"Trumpet": 0.9334312937132726, "Tuba": 0.8776233750716557}, {"Trumpet": 0.7911654731180092, "Trombone": 0.7138301954536044}, {"Horn": 0.7311518064512271, "Trumpet": 0.8873737601414587}, {"Trumpet": 0.7436783101899987, "Horn": 0.7830704970476989}, {"Trumpet": 0.6309310886292882, "Trombone": 0.7596549826631929}], "separation": [{"Violin": 8.08709214609101, "Saxophone": 6.555664851425415}, {"Violin": 11.90966558226069, "Double_Bass": 7.747678694483349}, {"Cello": 7.77647331155909, "Viola": 4.852155013844731}, {"Violin": 11.179336714889512, "Double_Bass": 10.067168316108175}, {"Violin": 11.783430884995793, "Cello": 14.254165502121692}, {"Viola": 2.628063367590781, "Clarinet": 5.16276811240536}, {"Flute": 10.869537156063183, "Violin": 7.012412764325196}, {"Saxophone": -2.407752088189431, "Clarinet": -1.3683124339386485}, {"Saxophone": 2.441953291791611, "Tuba": 6.891359665004966}, {"Trombone": 9.798673998715264, "Saxophone": -11.355150084282501}, {"Oboe": -7.885302648665503, "Saxophone": 0.3099868598701906}, {"Oboe": 1.3327031147881367, "Viola": 8.95329764450916}, {"Flute": 10.881682586912627, "Cello": 13.802538881030733}, {"Flute": 12.336582339883348, "Trombone": 10.02528058427761}, {"Clarinet": 8.303029706412266, "Horn": 5.517934814663721}, {"Clarinet": 6.527387551018312, "Bassoon": -11.963376001210104}, {"Trumpet": 5.471768743166365, "Violin": 5.7851901570987785}, {"Trumpet": 3.9167689944269792, "Violin": 3.9082345174224353}, {"Flute": 5.8651942195801965, "Clarinet": 8.775183633566566}, {"Flute": 4.739660064268817, "Saxophone": 0.5893582766712064}, {"Bassoon": -3.000419743638128, "Oboe": -0.28510062155767174}, {"Trumpet": 0.39431452410672374, "Trombone": 8.168607144699726}, {"Trumpet": 5.604162812705422, "Horn": 13.781713052164195}, {"Tuba": 13.080317110352137, "Trumpet": 13.309915685058245}, {"Trumpet": 2.719614848081245, "Trombone": 5.318071927118502}, {"Trumpet": 12.608098456874316, "Horn": 12.631853241319515}, {"Cello": 8.634561592386294, "Oboe": 5.546655711792797}, {"Trumpet": 13.155388595998534, "Tuba": 12.916895993884925}, {"Trumpet": 5.293807862302776, "Trombone": 3.675216010197393}, {"Horn": 6.2579938655145035, "Trumpet": 4.900601445005213}, {"Trumpet": 6.6474611897278155, "Horn": 13.518361343720166}, {"Trumpet": 6.7032216324148655, "Trombone": 6.485716015377154}]}, "MSI-S": {"separation": [{"Violin": 7.937513142912341, "Saxophone": 11.55675068110638}, {"Violin": 12.044297056235887, "Double_Bass": 7.18065780993134}, {"Cello": 8.027604024765587, "Viola": 8.461314060352336}, {"Violin": 12.052432728610816, "Double_Bass": 9.642349442558482}, {"Violin": 11.395965786083572, "Cello": 13.810815089053055}, {"Viola": 4.811770591930236, "Clarinet": 5.54433901070463}, {"Flute": 11.755518729804098, "Violin": 10.613372691403066}, {"Saxophone": -0.9763683650580944, "Clarinet": 2.1833665856149436}, {"Saxophone": 10.559369300612094, "Tuba": 7.765526213160637}, {"Trombone": 9.628565024978922, "Saxophone": 9.6995372840224}, {"Oboe": 7.230960833837253, "Saxophone": 8.484284564944574}, {"Oboe": 7.49956264549693, "Viola": 11.332564007565434}, {"Flute": 9.698356254840114, "Cello": 13.579258190713686}, {"Flute": 12.842999876763852, "Trombone": 9.998067046663229}, {"Clarinet": 7.8312030055070805, "Horn": 11.972107833328895}, {"Clarinet": 11.18344974759945, "Bassoon": 8.473246428304677}, {"Trumpet": 7.865342805391078, "Violin": 2.667928647181053}, {"Trumpet": 4.747579747689764, "Violin": 4.3046334481368875}, {"Flute": 16.40655145007629, "Clarinet": 9.262744009739345}, {"Flute": 10.377361410342804, "Saxophone": 5.306469115191316}, {"Bassoon": 12.188541999425693, "Oboe": -1.076195020871555}, {"Trumpet": 6.285112677130419, "Trombone": 8.997985714863413}, {"Trumpet": 14.68934227936489, "Horn": 15.478047758302576}, {"Tuba": 10.494971364324421, "Trumpet": 13.546534858848688}, {"Trumpet": 5.810183418243813, "Trombone": 8.709189037784375}, {"Trumpet": 13.22125363272086, "Horn": 13.891869171330526}, {"Cello": 9.003097183764366, "Oboe": 9.381588572045331}, {"Trumpet": 13.733627860348003, "Tuba": 14.301435544182882}, {"Trumpet": 6.752398712768088, "Trombone": 8.213741901518414}, {"Horn": 7.926709353273152, "Trumpet": 5.562194430363451}, {"Trumpet": 13.49145953025394, "Horn": 14.38378907539935}, {"Trumpet": 6.976230801366472, "Trombone": 6.633688997831351}]}} -------------------------------------------------------------------------------- /evaluation/MSI/scores-191.json: -------------------------------------------------------------------------------- 1 | {"MSI": {"transcription": [{"Violin": 0.9225995974022633, "Saxophone": 0.4413402624758425}, {"Violin": 0.8965189516830728, "Double_Bass": 0.778812303669732}, {"Cello": 0.776771166865604, "Viola": 0.6504239865273502}, {"Violin": 0.9379424538854333, "Double_Bass": 0.7950399690956629}, {"Violin": 0.8528224865033466, "Cello": 0.8132685818579029}, {"Viola": 0.6010818320528796, "Clarinet": 0.7713203383428671}, {"Flute": 0.7411899494477089, "Violin": 0.869715179058957}, {"Saxophone": 0.46249917307384036, "Clarinet": 0.564021325083742}, {"Saxophone": 0.40124368490171236, "Tuba": 0.5377399580292576}, {"Trombone": 0.7807928833803116, "Saxophone": 0.06475725422229726}, {"Oboe": 0.20345567253976457, "Saxophone": 0.4024644539655617}, {"Oboe": 0.4696658964626575, "Viola": 0.8330448386842768}, {"Flute": 0.8301076591073561, "Cello": 0.8903297150292006}, {"Flute": 0.8270575501094422, "Trombone": 0.8659796240875405}, {"Clarinet": 0.7863956683240846, "Horn": 0.7104868580270978}, {"Clarinet": 0.6623751519060289, "Bassoon": 0.17852589856112544}, {"Trumpet": 0.8470153178176278, "Violin": 0.7644443388036903}, {"Trumpet": 0.62422625057308, "Violin": 0.7981823788746835}, {"Flute": 0.8123535129249135, "Clarinet": 0.7954479379532579}, {"Flute": 0.6363008401149348, "Saxophone": 0.17696277556740142}, {"Bassoon": 0.15745949880183926, "Oboe": 0.44964809064298616}, {"Trumpet": 0.6463454976689511, "Trombone": 0.7045269069491815}, {"Trumpet": 0.6715687371107681, "Horn": 0.8110018974198121}, {"Tuba": 0.42894527507104047, "Trumpet": 0.8948483231210234}, {"Trumpet": 0.6810341217160938, "Trombone": 0.5869211662416288}, {"Trumpet": 0.8863997237204001, "Horn": 0.8094233386349049}, {"Cello": 0.5416675360452202, "Oboe": 0.45073995114267856}, {"Trumpet": 0.9323715444558518, "Tuba": 0.6958199654759223}, {"Trumpet": 0.7918967215753786, "Trombone": 0.41000858558037356}, {"Horn": 0.7691291259427434, "Trumpet": 0.8246173600125221}, {"Trumpet": 0.7036394693330495, "Horn": 0.7799222501354737}, {"Trumpet": 0.5708066420161441, "Trombone": 0.6646036706855561}], "separation": [{"Violin": 8.010686133864272, "Saxophone": 5.7059231746903745}, {"Violin": 11.412973564496355, "Double_Bass": 7.44488281960354}, {"Cello": 7.8087021714671305, "Viola": 5.693851191953469}, {"Violin": 11.279039421823535, "Double_Bass": 9.543076033687353}, {"Violin": 12.09763572117872, "Cello": 14.236084570739997}, {"Viola": 2.887391280202843, "Clarinet": 4.626664402895886}, {"Flute": 11.373303674937578, "Violin": 7.192455790593489}, {"Saxophone": -3.6793791198524852, "Clarinet": 0.3761958254556904}, {"Saxophone": 1.6529985803142446, "Tuba": 6.448297972243405}, {"Trombone": 9.178616316342712, "Saxophone": -12.448816458301774}, {"Oboe": -7.579621857742906, "Saxophone": -0.1819368294493734}, {"Oboe": 1.975123984247302, "Viola": 7.569245606506126}, {"Flute": 10.730196481340442, "Cello": 14.739950965163695}, {"Flute": 12.1179403676805, "Trombone": 10.151080636526661}, {"Clarinet": 7.881543039515142, "Horn": 6.258165410831528}, {"Clarinet": 5.0461383457412055, "Bassoon": -8.312944091244551}, {"Trumpet": 7.947997093246549, "Violin": 5.884329046057044}, {"Trumpet": 3.4906013329792343, "Violin": 3.6077726731608752}, {"Flute": 8.245184507095422, "Clarinet": 6.455376574643435}, {"Flute": 6.967520803117491, "Saxophone": -4.210696035008051}, {"Bassoon": -2.8476055884598015, "Oboe": -0.006435557364543816}, {"Trumpet": 4.183759313263778, "Trombone": 4.268229363219645}, {"Trumpet": 4.777202797899097, "Horn": 12.053542828272674}, {"Tuba": 14.302752278968413, "Trumpet": 13.121161124966605}, {"Trumpet": 3.696199432779127, "Trombone": 3.57996545991643}, {"Trumpet": 13.456693278874974, "Horn": 7.541233738326532}, {"Cello": 9.037666502751014, "Oboe": 5.69123497221756}, {"Trumpet": 13.230985878737357, "Tuba": 11.82208802969761}, {"Trumpet": 5.692344163639756, "Trombone": -0.038399038262363794}, {"Horn": 5.903787143558194, "Trumpet": 3.08322878637523}, {"Trumpet": 5.248489193325934, "Horn": 12.878399840314591}, {"Trumpet": 6.528325678804467, "Trombone": 4.8500217174309626}]}, "MSI-S": {"separation": [{"Violin": 7.7712022629507, "Saxophone": 11.879543348997084}, {"Violin": 11.862272299049312, "Double_Bass": 6.710834439112427}, {"Cello": 8.06272626751225, "Viola": 8.228429781836986}, {"Violin": 11.946702083927734, "Double_Bass": 9.172743970655542}, {"Violin": 11.678365786497842, "Cello": 13.685135246519824}, {"Viola": 4.452087563331184, "Clarinet": 5.825598120242668}, {"Flute": 11.50759588568928, "Violin": 11.107727201137013}, {"Saxophone": -1.0948985708102497, "Clarinet": 2.220853498447357}, {"Saxophone": 10.316269363423228, "Tuba": 7.088573053292623}, {"Trombone": 9.52883622977521, "Saxophone": 9.580226833317868}, {"Oboe": 7.010895756972238, "Saxophone": 8.371373101228416}, {"Oboe": 6.514136774331385, "Viola": 10.99006723094876}, {"Flute": 9.545664593582433, "Cello": 13.359713009237717}, {"Flute": 12.716952828053634, "Trombone": 10.145914433770287}, {"Clarinet": 7.873963892807194, "Horn": 11.72812423264064}, {"Clarinet": 11.014678205075759, "Bassoon": 7.609387583640848}, {"Trumpet": 7.542684484617423, "Violin": 2.6379567803326336}, {"Trumpet": 4.310216170431006, "Violin": 4.236945778903439}, {"Flute": 16.062547537119237, "Clarinet": 9.928857509873666}, {"Flute": 10.312884567636221, "Saxophone": 5.453272679650074}, {"Bassoon": 12.195897364642267, "Oboe": -1.521294084942422}, {"Trumpet": 6.336855903200195, "Trombone": 7.2988605846237755}, {"Trumpet": 14.50169328499394, "Horn": 15.152331516978727}, {"Tuba": 10.707504268762825, "Trumpet": 13.421095436502908}, {"Trumpet": 5.718330399682571, "Trombone": 8.631778047776123}, {"Trumpet": 13.888643078213514, "Horn": 13.208683036515936}, {"Cello": 9.019806693647674, "Oboe": 12.267152607035483}, {"Trumpet": 13.503274953540728, "Tuba": 13.067487388605866}, {"Trumpet": 7.147299158708008, "Trombone": 7.954129082434135}, {"Horn": 7.898629493604814, "Trumpet": 5.030873918407536}, {"Trumpet": 13.354506527643613, "Horn": 14.274400281460217}, {"Trumpet": 6.883123810746039, "Trombone": 6.634738555340189}]}} -------------------------------------------------------------------------------- /evaluation/MSI/scores-194.json: -------------------------------------------------------------------------------- 1 | {"MSI": {"transcription": [{"Violin": 0.9375940630372809, "Saxophone": 0.49135964909513513}, {"Violin": 0.9204274451764851, "Double_Bass": 0.8026947011939985}, {"Cello": 0.7766548060889136, "Viola": 0.7101577024552148}, {"Violin": 0.9310114771516497, "Double_Bass": 0.8574413489575807}, {"Violin": 0.8547593864553509, "Cello": 0.8264246600908538}, {"Viola": 0.611081827156737, "Clarinet": 0.7242727882404661}, {"Flute": 0.7402821856780738, "Violin": 0.8915553576920556}, {"Saxophone": 0.4392051419128846, "Clarinet": 0.5144519006508103}, {"Saxophone": 0.3886397824657571, "Tuba": 0.6588977093308064}, {"Trombone": 0.8193023914520491, "Saxophone": 0.04064798674595933}, {"Oboe": 0.19981415172669795, "Saxophone": 0.4824252273851331}, {"Oboe": 0.47061331612327073, "Viola": 0.8774851068267819}, {"Flute": 0.8049109652344533, "Cello": 0.9034680571259537}, {"Flute": 0.859714808272614, "Trombone": 0.8836876074583174}, {"Clarinet": 0.7541413441381483, "Horn": 0.7832292504648372}, {"Clarinet": 0.6473470238551157, "Bassoon": 0.20083958739221222}, {"Trumpet": 0.8221573423071687, "Violin": 0.7894467345091507}, {"Trumpet": 0.602981962425549, "Violin": 0.8610861947652021}, {"Flute": 0.9236481962290084, "Clarinet": 0.5889199894549356}, {"Flute": 0.7365865838269245, "Saxophone": 0.1982776521823885}, {"Bassoon": 0.16251598602743972, "Oboe": 0.46202436922869206}, {"Trumpet": 0.7069121672913086, "Trombone": 0.8058424324183276}, {"Trumpet": 0.6983959422070949, "Horn": 0.8700894396791217}, {"Tuba": 0.4385437239619477, "Trumpet": 0.9103355662260838}, {"Trumpet": 0.6487868772174991, "Trombone": 0.6987178169209547}, {"Trumpet": 0.8905323160647035, "Horn": 0.8957125769322402}, {"Cello": 0.575197279400998, "Oboe": 0.5512478100402811}, {"Trumpet": 0.9352919029048294, "Tuba": 0.8563289638682463}, {"Trumpet": 0.7931538565783495, "Trombone": 0.6715140804353453}, {"Horn": 0.8246675832913997, "Trumpet": 0.8437704074209696}, {"Trumpet": 0.7186214550903973, "Horn": 0.7379764895783653}, {"Trumpet": 0.6402499063873751, "Trombone": 0.7293742983599567}], "separation": [{"Violin": 8.019243857830281, "Saxophone": 6.084410039134168}, {"Violin": 12.291504397421864, "Double_Bass": 6.893395157264827}, {"Cello": 5.872747188527417, "Viola": 6.027574715941437}, {"Violin": 11.930149343542308, "Double_Bass": 9.511122449682867}, {"Violin": 12.117178569455557, "Cello": 14.182675714548276}, {"Viola": 2.1710059147171887, "Clarinet": 4.804987573257236}, {"Flute": 10.638133869277176, "Violin": 7.365847455558377}, {"Saxophone": -2.6914681330661128, "Clarinet": -0.6824840728513982}, {"Saxophone": 1.0920076863703598, "Tuba": 6.939882568142684}, {"Trombone": 9.814917700770534, "Saxophone": -14.53845484904912}, {"Oboe": -6.150614494725222, "Saxophone": -0.15621172737245215}, {"Oboe": 2.3086248170437313, "Viola": 10.090207473293766}, {"Flute": 10.446685047090718, "Cello": 14.722213316667894}, {"Flute": 12.60931641823548, "Trombone": 10.261812098740396}, {"Clarinet": 6.250616514622534, "Horn": 7.811019421396226}, {"Clarinet": 5.130268501184643, "Bassoon": -8.235064680431892}, {"Trumpet": 7.639549927162783, "Violin": 6.217349408060809}, {"Trumpet": 2.609762934418039, "Violin": 4.465008460357421}, {"Flute": 13.857288201688522, "Clarinet": 3.3110710035993534}, {"Flute": 8.298480167998596, "Saxophone": -7.4192074491963265}, {"Bassoon": -3.315419660080834, "Oboe": 0.49375370492935}, {"Trumpet": 4.090566280530151, "Trombone": 6.335770401372324}, {"Trumpet": 5.287027637008959, "Horn": 13.115500476223561}, {"Tuba": 13.94276752458131, "Trumpet": 13.292996962746768}, {"Trumpet": 2.6969141854194385, "Trombone": 4.7219729407511215}, {"Trumpet": 10.262674025505012, "Horn": 10.824899787631628}, {"Cello": 8.956043817065645, "Oboe": 7.4399326830211}, {"Trumpet": 12.549469598163183, "Tuba": 12.778470682550491}, {"Trumpet": 5.112171218608262, "Trombone": 3.4491767714463437}, {"Horn": 6.688794211878191, "Trumpet": 2.468812309829052}, {"Trumpet": 5.075766261968998, "Horn": 12.77943987540247}, {"Trumpet": 7.040214323689675, "Trombone": 5.142482248811982}]}, "MSI-S": {"separation": [{"Violin": 7.8950576964321275, "Saxophone": 11.521535774124656}, {"Violin": 12.240288133360849, "Double_Bass": 6.497806769422036}, {"Cello": 7.7822597994491405, "Viola": 8.575431014391944}, {"Violin": 11.958714811793573, "Double_Bass": 9.145814303483121}, {"Violin": 11.730606133674407, "Cello": 13.643400190278655}, {"Viola": 4.44436604691258, "Clarinet": 5.536169706585381}, {"Flute": 11.371363531264699, "Violin": 10.638486840955492}, {"Saxophone": -1.1151071015972513, "Clarinet": 2.296042573808309}, {"Saxophone": 10.721859396330341, "Tuba": 7.341200198812241}, {"Trombone": 9.500192793170658, "Saxophone": 9.622594153992289}, {"Oboe": 7.108914956425903, "Saxophone": 8.468950116778764}, {"Oboe": 6.652418859594963, "Viola": 11.215939930036203}, {"Flute": 9.432282884747181, "Cello": 13.311502657758838}, {"Flute": 12.682504008147259, "Trombone": 10.205703975267792}, {"Clarinet": 7.717821517358312, "Horn": 11.745515356943827}, {"Clarinet": 10.740431145625532, "Bassoon": 7.478575059208223}, {"Trumpet": 7.0566084751709734, "Violin": 3.0432672004822585}, {"Trumpet": 4.332294454333908, "Violin": 4.017711090710906}, {"Flute": 16.057979031556883, "Clarinet": 9.645061313578129}, {"Flute": 10.361405092447598, "Saxophone": 5.518675946101787}, {"Bassoon": 12.125062838233081, "Oboe": -1.4505518577008023}, {"Trumpet": 6.362135940471588, "Trombone": 8.453877856717932}, {"Trumpet": 14.604379512195251, "Horn": 15.325512796932465}, {"Tuba": 10.553781314042398, "Trumpet": 13.472689768587038}, {"Trumpet": 5.755090232812073, "Trombone": 8.85094200618022}, {"Trumpet": 13.875039064305895, "Horn": 13.328900533649673}, {"Cello": 9.147060798045763, "Oboe": 12.25954311873494}, {"Trumpet": 13.56540942855366, "Tuba": 13.619410746788734}, {"Trumpet": 7.2019274271069165, "Trombone": 7.8215518405000095}, {"Horn": 7.84924285160136, "Trumpet": 5.1065603274290545}, {"Trumpet": 13.56727642360838, "Horn": 13.897197495367887}, {"Trumpet": 6.851062263683259, "Trombone": 6.519800705711802}]}} -------------------------------------------------------------------------------- /evaluation/MSI/scores-196.json: -------------------------------------------------------------------------------- 1 | {"MSI": {"transcription": [{"Violin": 0.9274906863337553, "Saxophone": 0.5043368474157827}, {"Violin": 0.914386969149301, "Double_Bass": 0.7730874989655719}, {"Cello": 0.7739149409285708, "Viola": 0.6573918072270389}, {"Violin": 0.9341836426263121, "Double_Bass": 0.8482583407528802}, {"Violin": 0.8420241341423795, "Cello": 0.8469197040403452}, {"Viola": 0.5588747697109471, "Clarinet": 0.7887064087570265}, {"Flute": 0.7256794977633976, "Violin": 0.8846825615282289}, {"Saxophone": 0.41758428008822285, "Clarinet": 0.4745905919360384}, {"Saxophone": 0.5008825092141026, "Tuba": 0.6009273497542329}, {"Trombone": 0.8237454859698395, "Saxophone": 0.037795896748970326}, {"Oboe": 0.17629782871849398, "Saxophone": 0.5489591573760875}, {"Oboe": 0.4470377803679896, "Viola": 0.8775972488727447}, {"Flute": 0.8245298887051747, "Cello": 0.9098069836136474}, {"Flute": 0.8085748162858232, "Trombone": 0.887140237057048}, {"Clarinet": 0.8218144367199967, "Horn": 0.7584396533479583}, {"Clarinet": 0.7580983123470278, "Bassoon": 0.1449074010723677}, {"Trumpet": 0.8052751226931595, "Violin": 0.6587967356664012}, {"Trumpet": 0.6744847172777865, "Violin": 0.7994543216362688}, {"Flute": 0.7706943945052336, "Clarinet": 0.7933323991162853}, {"Flute": 0.497779125049499, "Saxophone": 0.5439810323511594}, {"Bassoon": 0.09778074874342473, "Oboe": 0.6243635353635927}, {"Trumpet": 0.5270488542066626, "Trombone": 0.623102735409506}, {"Trumpet": 0.5914551924008531, "Horn": 0.8337775629995053}, {"Tuba": 0.683974960034785, "Trumpet": 0.9026053388651559}, {"Trumpet": 0.6599779681018643, "Trombone": 0.6193870310948638}, {"Trumpet": 0.8942374923024897, "Horn": 0.9352429590162552}, {"Cello": 0.5853176435234323, "Oboe": 0.5584756747453624}, {"Trumpet": 0.9364077281688825, "Tuba": 0.8992721462995069}, {"Trumpet": 0.7871728550809822, "Trombone": 0.3085604821426051}, {"Horn": 0.7992328076443653, "Trumpet": 0.8735621504109806}, {"Trumpet": 0.7397214853489611, "Horn": 0.7391133259916535}, {"Trumpet": 0.5625548825248867, "Trombone": 0.6839988061598271}], "separation": [{"Violin": 8.002384350102595, "Saxophone": 5.451628907209875}, {"Violin": 12.104245950152052, "Double_Bass": 7.448966351944486}, {"Cello": 6.6859480871167465, "Viola": 5.200347895991829}, {"Violin": 11.102478884432239, "Double_Bass": 10.032329776858885}, {"Violin": 11.889955175893839, "Cello": 14.180611149691524}, {"Viola": 3.7982159793782424, "Clarinet": 4.7174038599921895}, {"Flute": 11.069693917056645, "Violin": 7.364196869323203}, {"Saxophone": -3.164941206815505, "Clarinet": -2.0062585695287103}, {"Saxophone": 2.6221343692888506, "Tuba": 6.83415768262533}, {"Trombone": 9.558411751715411, "Saxophone": -14.789737378334515}, {"Oboe": -8.675431616942674, "Saxophone": 1.668428640979334}, {"Oboe": 1.0431281882152108, "Viola": 9.705655649674934}, {"Flute": 10.654960335024237, "Cello": 15.046428454678441}, {"Flute": 12.521223809661198, "Trombone": 10.224332377154543}, {"Clarinet": 7.474110284591646, "Horn": 6.721126014335286}, {"Clarinet": 7.219275456942902, "Bassoon": -10.224139685412252}, {"Trumpet": 5.939763951566752, "Violin": 5.957894962709236}, {"Trumpet": 3.0868434340069455, "Violin": 4.642075483548619}, {"Flute": 6.259745551692517, "Clarinet": 6.818697549020795}, {"Flute": 4.941118922955765, "Saxophone": 0.1954592801971419}, {"Bassoon": -3.448178985167047, "Oboe": -0.27161265937604895}, {"Trumpet": 1.2723559004955045, "Trombone": 2.3506275151405003}, {"Trumpet": 2.740302664280981, "Horn": 11.871646489756532}, {"Tuba": 14.886345187021178, "Trumpet": 13.26696557903243}, {"Trumpet": 1.5628933516633805, "Trombone": 3.276492702484322}, {"Trumpet": 9.442530332209113, "Horn": 12.705992978512022}, {"Cello": 9.137813694083905, "Oboe": 4.823742745856226}, {"Trumpet": 12.682455929463304, "Tuba": 12.919274738450994}, {"Trumpet": 5.582079973697156, "Trombone": -1.856029538771877}, {"Horn": 6.1622367098751285, "Trumpet": 2.945647568426861}, {"Trumpet": 6.2673377917483775, "Horn": 13.575113452101357}, {"Trumpet": 6.152750484815411, "Trombone": 5.147797855183857}]}, "MSI-S": {"separation": [{"Violin": 7.843305068898716, "Saxophone": 11.929191243917652}, {"Violin": 11.966525037474744, "Double_Bass": 6.953270915241092}, {"Cello": 7.99183965297512, "Viola": 8.784201514471224}, {"Violin": 11.863988477785286, "Double_Bass": 9.647516816581131}, {"Violin": 11.61399832787309, "Cello": 13.681708381172886}, {"Viola": 4.695670230946728, "Clarinet": 5.425892433455645}, {"Flute": 11.457398633516359, "Violin": 10.825109962905238}, {"Saxophone": -1.0970764928261632, "Clarinet": 2.2864066634822864}, {"Saxophone": 10.715997355229979, "Tuba": 7.28990878475631}, {"Trombone": 9.454668995454501, "Saxophone": 9.67125970610323}, {"Oboe": 6.911185742493357, "Saxophone": 8.05445637932222}, {"Oboe": 6.445197425179235, "Viola": 11.101000867905714}, {"Flute": 9.448056963711478, "Cello": 13.591085361388993}, {"Flute": 12.648575125646753, "Trombone": 10.071433827411067}, {"Clarinet": 7.820966719407945, "Horn": 11.666964306059898}, {"Clarinet": 10.737167531779043, "Bassoon": 7.641218293749594}, {"Trumpet": 7.425035449903609, "Violin": 3.0107595965766714}, {"Trumpet": 4.385935545620114, "Violin": 3.9094550435989115}, {"Flute": 15.978379782076061, "Clarinet": 9.550151237463139}, {"Flute": 10.587900116515016, "Saxophone": 5.126442933388766}, {"Bassoon": 12.42846602342926, "Oboe": -1.4857967442847158}, {"Trumpet": 6.281325663604525, "Trombone": 7.519311618295204}, {"Trumpet": 14.580846573807111, "Horn": 15.242704647130925}, {"Tuba": 10.676579735080168, "Trumpet": 13.508922484162513}, {"Trumpet": 5.573236423848283, "Trombone": 8.413966273693276}, {"Trumpet": 13.99350522687913, "Horn": 13.242641617389108}, {"Cello": 9.144709678550118, "Oboe": 11.358703796489953}, {"Trumpet": 13.442816644984163, "Tuba": 13.565610990043393}, {"Trumpet": 7.333466654352797, "Trombone": 7.801098855812278}, {"Horn": 7.869435886175751, "Trumpet": 5.078572594050752}, {"Trumpet": 13.35644524716921, "Horn": 14.13488797378907}, {"Trumpet": 6.8030524763534785, "Trombone": 6.300163096039944}]}} -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # A Unified Model for Zero-shot Music Source Separation, Transcription and Synthesis 2 | This is the code repository for the paper: A Unified Model for Zero-shot Music Source Separation, Transcription and Synthesis. The paper is available [here](https://arxiv.org/abs/2108.03456). 3 | 4 | ## Introduction 5 | We propose a unified model for three inter-related tasks: 6 | - to *separate* individual sound sources from a mixed music audio; 7 | - to *transcribe* each sound source to MIDI notes; 8 | - to *synthesize* new pieces based on the timber of separated sources. 9 | 10 | The model is inspired by the fact that when humans listen to music, our minds can not only separate the sounds of different instruments, but also at the same time perceive high-level representations such as score and timbre. 11 | 12 | ## Model architecture 13 | ### - Components of the proposed model 14 | The proposed model comprises four components: 15 | - a query-by-example (QBE) network 16 | - a pitch-timber disentanglement module 17 | - a transcriptor 18 | - an audio encoder-decoder network 19 | 20 | ![](https://github.com/Kikyo-16/A-unified-model-for-zero-shot-musical-source-separation-transcription-and-synthesis/blob/main/imgs/model-fig-1-ab.png) 21 | >The baseline models and the proposed model. In the left figure, the large orange and gray box indicate a QBE 22 | transcription-only and QBE separation-only model respectively. The whole figure indicates a QBE multi-task model. 23 | 24 | 25 | ### - Training losses 26 | The model is trained with separatiopn loss, transcription loss and contrastive loss. See details in [our paper](https://arxiv.org/abs/2108.03456). 27 | 28 | ### - Pitch-translation Invariance Loss 29 | To further improve the timbre disentanglement performance, we propose a *pitch-translation invariance loss*. We term the model without pitch-transformation invariance loss `multi-task informed (MSI) model`. And we term MSI model with further disentanglement via pitch-transformation invariance loss `MSI-DIS model`. 30 | 31 | ### - Detailed hyper-parameters of the proposed model 32 | ![](https://github.com/Kikyo-16/A-unified-model-for-zero-shot-musical-source-separation-transcription-and-synthesis/blob/main/imgs/model-fig-3.png) 33 | 34 | ## Experimental results 35 | 36 | | Model|MSS-only| Multi-task| MSI (ours)| MSI-DIS (ours)| 37 | | ----| ----| ----| ----| ----| 38 | | Seen| 4.69 ± 0.31| 3.32 ± 0.1| **6.33 ± 0.17**| 5.04 ± 0.16| 39 | | Unseen| **6.20 ± 0.26**| 4.63 ± 0.34| 5.53 ± 0.11| 3.99 ± 0.22| 40 | | **Overall**| 5.07 ± 0.22| 3.65 ± 0.22| **6.13 ± 0.15**| 4.77 ± 0.14| 41 | 42 | 43 | ## Demos 44 | The initial version of the demo page is available [here](https://kikyo-16.github.io/demo-page-of-a-unified-model-for-separation-transcriptiion-synthesis/). New demo page with more demos will be updated soon. 45 | 46 | ## Quick start 47 | 48 | ### Requirements 49 | You will need at least Python 3.6 and Pytorch . See requirements.txt for requirements. Install dependencies with pip: 50 | ``` 51 | pip install -r requirements.txt 52 | ``` 53 | 54 | ### Data preparation 55 | 1. Download URMP Dataset from [URMP homepage](http://www2.ece.rochester.edu/projects/air/projects/URMP.html). 56 | 2. Run the following command to generate your feature and annotations. 57 | ``` 58 | python src/dataset/urmp/urmp_feature.py --dataset_dir=ur_unzipped_dataset_folder --feature_dir=dataset/hdf5s/urmp --process_num=1 59 | ``` 60 | **NOTE** that `ur_unzipped_dataset_folder` is your unzipped data folder and it should contain directories of songs: 61 | > . 62 | ├── `ur_unzipped_dataset_folder` 63 |         ├── `0_song0` 64 |         ├── `1_song1` 65 |         ├── ... 66 |         ... 67 | 68 | ### Training 69 | Run the following command to train the proposed MSI-DIS Model: 70 | ``` 71 | python train.py --model_name=MSI_DIS --model_folder=folder_to_store_model_weights --epoch=200 72 | ``` 73 | 74 | ### Evaluation 75 | Download models weights [here](https://drive.google.com/drive/folders/1fT3Fva5JywhpYnOhsORbDkLQ9Vnhv_Lj?usp=sharing). 76 | Run the following command to evaluate the proposed MSI-DIS Model on the test set: 77 | ``` 78 | python evaluate.py --model_name=MSI_DIS --model_path=path_of_model_weights --evaluation_folder=folder_to_store_evaluation_results --epoch=199 79 | ``` 80 | **NOTE:** Since we do not divide a validation set to chose the bestperformance model among all the training epochs, we report average results with a 95% confidence interval (CI) of models at the last 10 epochs. 81 | Therefore, if you want to reproduce the results of our paper, please 82 | 1. Evaluate all last-10-epoch models. 83 | 2. Run the following command to print experimental result tables: 84 | ``` 85 | python src/analyze/draw_table.py --evaluation_folder=`folder_to_store_evaluation_results 86 | ``` 87 | 88 | ### Synthesis 89 | Run the following command to synthesize audios using the given midi, the test set, and the proposed MSI-DIS Model: 90 | ``` 91 | python synthesis.py --model_name=MSI-DIS --model_path=path_of_model_weights --evaluation_folder=folder_to_store_synthesis_results 92 | ``` 93 | 94 | ## Citation 95 | Please cite our work as: 96 | 97 | >@inproceedings{lin2021unified, 98 | >title={A Unified Model for Zero-shot Music Source Separation, Transcription and Synthesis}, 99 | >author={Liwei Lin and Qiuqiang Kong and Junyan Jiang and Gus Xia}, 100 | >booktitle = {Proceedings of 22st International Conference on Music Information Retrieval, {ISMIR}}, 101 | >year = {2021} 102 | >} 103 | 104 | ## License 105 | This code is released under the MIT license as found in the LICENSE file. 106 | -------------------------------------------------------------------------------- /src/utils/utilities.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import time 4 | import numpy as np 5 | import configparser 6 | import json 7 | 8 | et = 1e-8 9 | 10 | def load_json(path): 11 | with open(path,'r') as load_f: 12 | load_dict = json.load(load_f) 13 | return load_dict 14 | 15 | def save_json(path, data): 16 | with open(path,'w') as f: 17 | json.dump(data,f) 18 | 19 | def print_dict(x): 20 | for key in x: 21 | print(key, x[key]) 22 | 23 | def factorized_fft(fft, onset_offset): 24 | st = -1 25 | curve_fft = np.zeros_like(fft) 26 | mean_fft = np.zeros_like(fft) 27 | for i in range(fft.shape[-1]): 28 | if onset_offset[i] == 1 and st == -1: 29 | st = i 30 | elif not onset_offset[i] == 0: 31 | if st == -1: 32 | out_fft[i] = 0 33 | mean_fft = fft[i] 34 | else: 35 | ave = np.mean(fft[st : i + 1]) 36 | std = np.std(fft[st : i + 1]) 37 | mean_fft[st : i + 1] = ave 38 | curve_fft[st : i + 1] = (fft[st : i + 1] - ave) / (std + et) 39 | 40 | if onset_offset[i] == 2: 41 | st = -1 42 | 43 | return curve_fft, mean_fft 44 | 45 | 46 | 47 | def compute_time(event, pre_time): 48 | cur_time = time.time() 49 | print(f'{event} use', cur_time - pre_time) 50 | return cur_time 51 | 52 | def encode_mu_law(x, mu=256): 53 | mu = mu - 1 54 | fx = np.sign(x) * np.log(1 + mu * np.abs(x)) / np.log(1 + mu) 55 | return np.floor((fx + 1) / 2 * mu + 0.5).astype(np.int64) 56 | 57 | 58 | def decode_mu_law(y, mu=256): 59 | mu = mu - 1 60 | fx = (y - 0.5) / mu * 2 - 1 61 | x = np.sign(fx) / mu * ((1 + mu) ** np.abs(fx) - 1) 62 | return x 63 | 64 | 65 | def read_config(config_path, name): 66 | config = configparser.ConfigParser() 67 | config.read(config_path) 68 | return config[name] 69 | 70 | 71 | def dict2str(dic, pre): 72 | res = '' 73 | for i, d in enumerate(dic): 74 | if i == 0: 75 | res += pre 76 | res += d + ' :' 77 | val = dic[d] 78 | if type(val) is dict: 79 | res += '\n' + dict2str(val, pre + '\t') + '\n' 80 | else: 81 | res += f'\t{val}\t' 82 | 83 | return res 84 | 85 | def save_score(path, score): 86 | mkdir(path, is_file=True) 87 | res = dict2str(score, '') 88 | write_lst(path, [res]) 89 | return res 90 | 91 | def get_process_groups(audio_num, process_num): 92 | assert audio_num > 0 and process_num > 0 93 | if process_num > audio_num: 94 | process_num = audio_num 95 | audio_num_per_process = (audio_num + process_num - 1) // process_num 96 | 97 | reduce_id = process_num - (audio_num_per_process * process_num - audio_num) 98 | 99 | groups = [] 100 | cur = 0 101 | for i in range(process_num): 102 | if i == reduce_id: 103 | audio_num_per_process -= 1 104 | groups += [[cur, cur + audio_num_per_process]] 105 | cur += audio_num_per_process 106 | return groups 107 | 108 | 109 | def mkdir(fd, is_file=False): 110 | fd = fd.split('/') 111 | fd = fd[:-1] if is_file else fd 112 | ds = [] 113 | for d in fd: 114 | ds.append(d) 115 | d = "/".join(ds) 116 | if not d == "" and not os.path.exists(d): 117 | os.makedirs(d) 118 | 119 | 120 | def get_filename(path): 121 | path = os.path.realpath(path) 122 | na_ext = path.split('/')[-1] 123 | na = os.path.splitext(na_ext)[0] 124 | return na 125 | 126 | 127 | def traverse_folder(folder): 128 | paths = [] 129 | names = [] 130 | 131 | for root, dirs, files in os.walk(folder): 132 | for name in files: 133 | filepath = os.path.join(root, name) 134 | names.append(name) 135 | paths.append(filepath) 136 | 137 | return names, paths 138 | 139 | 140 | def note_to_freq(piano_note): 141 | return 2 ** ((piano_note - 39) / 12) * 440 142 | 143 | 144 | def create_logging(log_dir, filemode): 145 | mkdir(log_dir) 146 | i1 = 0 147 | 148 | while os.path.isfile(os.path.join(log_dir, '{:04d}.log'.format(i1))): 149 | i1 += 1 150 | 151 | log_path = os.path.join(log_dir, '{:04d}.log'.format(i1)) 152 | logging.basicConfig( 153 | level=logging.DEBUG, 154 | format='%(asctime)s %(filename)s[line:%(lineno)d] %(levelname)s %(message)s', 155 | datefmt='%a, %d %b %Y %H:%M:%S', 156 | filename=log_path, 157 | filemode=filemode) 158 | 159 | # Print to console 160 | console = logging.StreamHandler() 161 | console.setLevel(logging.INFO) 162 | formatter = logging.Formatter('%(name)-12s: %(levelname)-8s %(message)s') 163 | console.setFormatter(formatter) 164 | logging.getLogger('').addHandler(console) 165 | 166 | return logging 167 | 168 | 169 | def float32_to_int16(x): 170 | x = np.clip(x, -1, 1) 171 | assert np.max(np.abs(x)) <= 1. 172 | return (x * 32767.).astype(np.int16) 173 | 174 | 175 | def int16_to_float32(x): 176 | return (x / 32767.).astype(np.float32) 177 | 178 | 179 | def pad_truncate_sequence(x, max_len): 180 | if len(x) < max_len: 181 | return np.concatenate((x, np.zeros(max_len - len(x)))) 182 | else: 183 | return x[0 : max_len] 184 | 185 | def read_lst(lst_path): 186 | with open(lst_path) as f: 187 | data = f.readlines() 188 | data = [d.rstrip() for d in data] 189 | return data 190 | 191 | def write_lst(lst_path, lst): 192 | lst = [str(l) for l in lst] 193 | with open(lst_path, 'w') as f: 194 | f.writelines('\n'.join(lst)) 195 | 196 | def freq2note(freq): 197 | freq = float(freq) 198 | note = round(12 * np.log2(freq / 440)) + 48 199 | return note 200 | 201 | def note2freq(note): 202 | note = float(note) 203 | freq = (2**((note - 48) / 12)) * 440 204 | return freq 205 | 206 | 207 | def parse_frameroll2annotation(frame_roll, frames_per_second=100, notes_num=88): 208 | pre = notes_num 209 | st = -1 210 | est = [] 211 | preds = np.pad(frame_roll,(0,1), 'constant', constant_values=(0, notes_num)) 212 | for i in range(frame_roll.shape[0]): 213 | if not frame_roll[i] == pre: 214 | if st > -1 and not pre == notes_num: 215 | est.append(\ 216 | '%f\t%f\t%d' % (st * 1.0 / frames_per_second, i * 1.0 / frames_per_second, pre)) 217 | st = i 218 | pre = frame_roll[i] 219 | return est 220 | -------------------------------------------------------------------------------- /src/dataset/urmp/urmp_generate_dataset.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import random 4 | import argparse 5 | 6 | sys.path.insert(1, os.path.join(sys.path[0], '../..')) 7 | from utils.utilities import (mkdir, write_lst) 8 | 9 | random.seed(1234) 10 | 11 | instr_tags = "vn,vc,va,fl,cl,sax,tpt,tbn,bn,hn,tba,db,ob" 12 | instrs = "Violin,Cello,Viola,Flute,Clarinet,Saxophone,Trumpet,Trombone,Bassoon,Horn,Tuba,Double_Bass,Oboe" 13 | tag2instr = {} 14 | 15 | seen = "Violin,Cello,Viola,Flute,Clarinet,Saxophone,Trumpet,Trombone" 16 | unseen = "Horn,Tuba,Double_Bass,Bassoon,Oboe" 17 | 18 | skips = "" 19 | 20 | instr_tags = instr_tags.split(',') 21 | instrs = instrs.split(',') 22 | seen = seen.split(',') 23 | unseen = unseen.split(',') 24 | skips = skips.split(',') 25 | 26 | for i, tag in enumerate(instr_tags): 27 | tag2instr[tag] = instrs[i] 28 | 29 | def get_all_audios(folder): 30 | audios = {} 31 | tracks_num = 0 32 | sample_folders = os.listdir(folder) 33 | for sample in sample_folders: 34 | sample_path = os.path.join(folder, sample) 35 | tracks = os.listdir(sample_path) 36 | if len(sample.split('_')) < 2: 37 | continue 38 | sampleName = sample.split('_')[1] 39 | sample_instrs = sample.split('_')[2:] 40 | if sampleName not in audios: 41 | audios[sampleName] = {} 42 | for track in tracks: 43 | if not str.endswith(track, "ref.txt"): 44 | continue 45 | track = str.replace(track, "_ref.txt", ".h5") 46 | #track = str.replace(track, "_TRAIN.h5", "_TEST.h5") 47 | track_path = os.path.join(sample_path, track) 48 | track_name = track.split("_")[1] 49 | instr = tag2instr[track.split("_")[2]] 50 | if instr not in audios[sampleName]: 51 | audios[sampleName][instr] = {} 52 | if track_name not in audios[sampleName][instr]: 53 | tracks_num += 1 54 | audios[sampleName][instr][track_name] = track_path 55 | seen_audios = [] 56 | unseen_audios = [] 57 | for songName in audios: 58 | for instr in audios[songName]: 59 | if instr in seen: 60 | seen_audios.append(songName) 61 | else: 62 | unseen_audios.append(songName) 63 | 64 | 65 | train_lst = {} 66 | test_lst = {} 67 | 68 | for songName in audios: 69 | if songName in unseen_audios: 70 | instrs = {} 71 | instrs_num = 0 72 | for instr in audios[songName]: 73 | if instr not in instrs: 74 | instrs[instr] = [] 75 | for track in audios[songName][instr]: 76 | instrs[instr].append(audios[songName][instr][track]) 77 | instrs_num += len(instrs[instr]) 78 | instrs = sorted(instrs.items(), key=lambda d: -len(d[1])) 79 | show = [{instr[0]:len(instr[1])} for instr in instrs] 80 | print(show) 81 | data_lst = [] 82 | for instr in instrs: 83 | if len(instr[1]) > instrs_num // 2: 84 | print("aaaaaaaaaaaaaaaaaaaaaaaah") 85 | for track in instr[1]: 86 | data_lst.append([instr[0], track]) 87 | 88 | total = len(data_lst) 89 | pairs = [] 90 | for i, track in enumerate(data_lst): 91 | j = total - 1- i 92 | if j == i: 93 | j = 0 94 | pairs.append([track[0], data_lst[j][0], track[1],data_lst[j][1]]) 95 | if i + 1 >= (total + 1)// 2: 96 | break 97 | test_lst[songName] = {"test" : pairs, "query" : []} 98 | 99 | else: 100 | for instr in audios[songName]: 101 | if instr not in train_lst: 102 | train_lst[instr] = [] 103 | for track in audios[songName][instr]: 104 | train_lst[instr].append(str.replace(audios[songName][instr][track], "_TEST.h5", "h5")) 105 | 106 | 107 | 108 | print("\nseen:\n") 109 | compute_instr_samples(audios, songNames=None, skipNames=unseen_audios) 110 | print("\nunseen:\n") 111 | compute_instr_samples(audios, songNames=unseen_audios) 112 | 113 | print("\nall:\n") 114 | compute_instr_samples(audios) 115 | 116 | 117 | query_lst = [] 118 | 119 | songs_lst = [] 120 | songs_num = len(test_lst) 121 | for test in test_lst: 122 | songs_lst.append(test) 123 | 124 | for i, test in enumerate(test_lst): 125 | for pair in test_lst[test]["test"]: 126 | query = [] 127 | query += pair[:2] 128 | for j in range(2): 129 | path = None 130 | while path is None: 131 | song_id = random.randint(0, songs_num - 1) 132 | if song_id == i: 133 | continue 134 | query_pairs = test_lst[songs_lst[song_id]]["test"] 135 | for query_pair in query_pairs: 136 | for k in range(2): 137 | if query_pair[k] == pair[j] and not query_pair[k + 2] == pair[j + 2]: 138 | path = query_pair[k + 2] 139 | query.append(path) 140 | break 141 | if path is not None: 142 | break 143 | test_lst[test]["query"] += [query] 144 | 145 | return audios, train_lst, test_lst 146 | 147 | def compute_instr_samples(audios, songNames=None, skipNames=None): 148 | samples = {} 149 | num = 0 150 | for songName in audios: 151 | if songNames is not None and songName not in songNames: 152 | continue 153 | if skipNames is not None and songName in skipNames: 154 | continue 155 | for instr in audios[songName]: 156 | if instr not in samples: 157 | samples[instr] = 0 158 | num += len(audios[songName][instr]) 159 | samples[instr] += len(audios[songName][instr]) 160 | 161 | total_num = 0 162 | for instr in samples: 163 | total_num += samples[instr] 164 | print(instr, samples[instr]) 165 | print(total_num, num) 166 | return samples 167 | 168 | def save_train_lst(data, output_folder): 169 | for instr in data: 170 | instr_folder = os.path.join(output_folder, instr) 171 | mkdir(instr_folder) 172 | path = os.path.join(instr_folder, "train.lst") 173 | write_lst(path, data[instr]) 174 | 175 | def save_test_lst(data, output_folder): 176 | testset_folder = os.path.join(output_folder, "testset") 177 | mkdir(testset_folder) 178 | test_lst = [] 179 | query_lst = [] 180 | for songName in data: 181 | test_lst += data[songName]["test"] 182 | query_lst += data[songName]["query"] 183 | test_lst = [f"{t[0]},{t[1]}\t{t[2]},{t[3]}" for t in test_lst] 184 | query_lst = [f"{t[0]},{t[1]}\t{t[2]},{t[3]}" for t in query_lst] 185 | print("test set", len(test_lst)) 186 | test_lst_path = os.path.join(testset_folder, "test.lst") 187 | query_lst_path = os.path.join(testset_folder, "query.lst") 188 | write_lst(test_lst_path, test_lst) 189 | write_lst(query_lst_path, query_lst) 190 | 191 | 192 | if __name__=="__main__": 193 | parser = argparse.ArgumentParser(description='') 194 | parser.add_argument('--feature_dir', type=str, required=True, help='Directory of generated dataset.') 195 | parser.add_argument('--data_dir', type=str, required=True, help='Directory to store generated files.') 196 | 197 | args = parser.parse_args() 198 | 199 | folder = args.feature_dir 200 | output_folder = args.data_dir 201 | audios, train_lst, test_lst = get_all_audios(folder) 202 | save_train_lst(train_lst, output_folder) 203 | save_test_lst(test_lst, output_folder) 204 | instr_samples = compute_instr_samples(audios) 205 | 206 | 207 | -------------------------------------------------------------------------------- /src/models/layers.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | import sys 6 | import numpy as np 7 | import time 8 | import h5py 9 | import math 10 | 11 | EPS = 1e-8 12 | 13 | 14 | def init_layer(layer): 15 | """Initialize a Linear or Convolutional layer. """ 16 | nn.init.xavier_uniform_(layer.weight) 17 | 18 | if hasattr(layer, 'bias'): 19 | if layer.bias is not None: 20 | layer.bias.data.fill_(0.) 21 | 22 | 23 | def init_bn(bn): 24 | """Initialize a Batchnorm layer. """ 25 | bn.bias.data.fill_(0.) 26 | bn.weight.data.fill_(1.) 27 | 28 | 29 | 30 | class ConvBlock(nn.Module): 31 | """A Convolutional Layer Followed by a Batchnorm Layer and a ReLU Activation Layer. 32 | 33 | Input : [B x in_channels x T x F] 34 | Output : [B x out_chanels x T x F] 35 | 36 | Parameters 37 | ----------- 38 | in_channels : int 39 | out_channels : int 40 | momentum : float 41 | 42 | """ 43 | def __init__(self, in_channels, out_channels, momentum=0.01): 44 | super(ConvBlock, self).__init__() 45 | 46 | self.conv = nn.Conv2d(in_channels=in_channels, 47 | out_channels=out_channels, 48 | kernel_size=(3, 3), stride=(1, 1), 49 | padding=(1, 1), bias=False) 50 | 51 | self.bn = nn.BatchNorm2d(out_channels, momentum=momentum) 52 | 53 | self.init_weights() 54 | 55 | def init_weights(self): 56 | init_layer(self.conv) 57 | init_bn(self.bn) 58 | 59 | 60 | def forward(self, input): 61 | """ 62 | Parameters 63 | ---------- 64 | input : [B x in_channels x T x F] 65 | 66 | Returns 67 | ------- 68 | x : [B x out_chanels x T x F] 69 | 70 | """ 71 | x = input 72 | x = F.relu_(self.bn(self.conv(x))) 73 | return x 74 | 75 | 76 | class DeepConvBlock(nn.Module): 77 | """2 Convolutional Layers, each of which is followed by a Batchnorm Layer and a ReLU Activation Layer. 78 | 79 | Input : [B x in_channels x T x F] 80 | Output : [B x out_chanels x T x F] 81 | 82 | Parameters 83 | ----------- 84 | in_channels : int 85 | out_channels : int 86 | momentum : float 87 | 88 | """ 89 | 90 | def __init__(self, in_channels, out_channels, momentum=0.01): 91 | super(DeepConvBlock, self).__init__() 92 | 93 | self.conv1 = ConvBlock(in_channels, out_channels, momentum) 94 | self.conv2 = ConvBlock(out_channels, out_channels, momentum) 95 | 96 | 97 | def forward(self, input): 98 | """ 99 | 100 | Parameters 101 | ---------- 102 | input : [B x in_channels x T x F] 103 | 104 | Returns 105 | ------- 106 | : [B x out_chanels x T x F] 107 | """ 108 | x = input 109 | return self.conv2(self.conv1(x)) 110 | 111 | 112 | class LinearBlock2D(nn.Module): 113 | """1 2D 1x1 Convolutional Layer with bias. 114 | 115 | Input : [B x in_channels x T x F] 116 | Output : [B x out_chanels x T x F] 117 | 118 | Parameters 119 | ----------- 120 | in_channels : int 121 | out_channels : int 122 | 123 | """ 124 | 125 | def __init__(self, in_channels, out_channels): 126 | super(LinearBlock2D, self).__init__() 127 | self.conv = nn.Conv2d(in_channels=in_channels, 128 | out_channels=out_channels, 129 | kernel_size=(1, 1), stride=(1, 1), bias=True) 130 | 131 | self.init_weights() 132 | 133 | 134 | def init_weights(self): 135 | init_layer(self.conv) 136 | 137 | def forward(self, input): 138 | """ 139 | Parameters 140 | ---------- 141 | input : [B x in_channels x T x F] 142 | 143 | Returns 144 | ------- 145 | x : [B x out_chanels x T x F] 146 | """ 147 | x = input 148 | x = self.conv(x) 149 | return x 150 | 151 | class LinearBlock1D(nn.Module): 152 | """1 1D 1x1 Convolutional Layer. 153 | 154 | Input : [B x in_channels x T] 155 | Output : [B x out_chanels x T] 156 | 157 | Parameters 158 | ----------- 159 | in_channels : int 160 | out_channels : int 161 | bias : boolean 162 | default : true 163 | has bias if true 164 | 165 | """ 166 | 167 | def __init__(self, in_channels, out_channels, bias=True): 168 | super(LinearBlock1D, self).__init__() 169 | self.conv = nn.Conv1d(in_channels=in_channels, 170 | out_channels=out_channels, 171 | kernel_size=1, stride=1, bias=bias) 172 | 173 | self.init_weights() 174 | 175 | 176 | def init_weights(self): 177 | init_layer(self.conv) 178 | 179 | 180 | def forward(self, input): 181 | """ 182 | Parameters 183 | ----------- 184 | input : [B x in_channels x T] 185 | 186 | Returns 187 | ----------- 188 | x : [B x out_chanels x T] 189 | """ 190 | 191 | x = input 192 | x = self.conv(x) 193 | return x 194 | 195 | class EncoderBlock(nn.Module): 196 | """A Convolutional Layer Followed by a Batchnorm Layer and a ReLU Activation Layer. 197 | 198 | Look details of the description at `ConvBlock`. 199 | 200 | """ 201 | 202 | def __init__(self, in_channels, out_channels, momentum = 0.01): 203 | super(EncoderBlock, self).__init__() 204 | 205 | self.conv_block = ConvBlock(in_channels, out_channels, momentum) 206 | 207 | def forward(self, input): 208 | x = input 209 | x = self.conv_block(x) 210 | #x_pool = F.avg_pool2d(x, kernel_size=self.downsample) 211 | return x 212 | 213 | class DecoderBlock(nn.Module): 214 | """A Deconv Block (a 2D 3x3 Deconvolutional Layer Followed by a Batchnorm Layer and a ReLU Activation Layer) followed by a `DeepConvBlock` or `ConvBlock`. 215 | 216 | Input: [B x in_channels x T x F], [B x (out_channels // 2) x (T* strides[0]) x (F * strides[1])] 217 | Output:[B x out_channels x (T* strides[0]) x (F * strides[1])] 218 | (ummmm... stride other than (2, 2) might require extra consideration of padding operation) 219 | 220 | Parameters 221 | ---------- 222 | in_channels : int 223 | out_channels : int 224 | strides : tuple 225 | momentum : float 226 | deep : boolean 227 | default: False 228 | the Deconv Block is followed by a `DeepConvBlock` if true else `ConvBlock` 229 | 230 | """ 231 | 232 | def __init__(self, in_channels, out_channels, strides, momentum=0.01, deep=False): 233 | super(DecoderBlock, self).__init__() 234 | 235 | self.conv = torch.nn.ConvTranspose2d(in_channels=in_channels, 236 | out_channels=out_channels, kernel_size=(3, 3), stride=strides, 237 | padding=(0, 0), output_padding=(0, 0), bias=False) 238 | 239 | self.bn = nn.BatchNorm2d(out_channels, momentum=momentum) 240 | self.conv_block = DeepConvBlock(out_channels * 2, out_channels, momentum) if deep else ConvBlock(out_channels * 2, out_channels, momentum) 241 | 242 | self.init_weights() 243 | 244 | self.prune_temporal = (not strides[-2] == 1) 245 | 246 | def init_weights(self): 247 | init_layer(self.conv) 248 | init_bn(self.bn) 249 | 250 | def prune(self, x): 251 | """Prune the shape of x after transpose convolution. 252 | """ 253 | if self.prune_temporal: 254 | x = x[:, :, : - 1, : - 1] 255 | else: 256 | x = x[:, :, 1 : -1, : -1] 257 | return x 258 | 259 | 260 | def forward(self, input_tensor, concat_tensor): 261 | """ 262 | 263 | Parameters 264 | ---------- 265 | input_tensor : tensor 266 | [B x in_channels x T x F] 267 | concat_tensor : tensor 268 | [B x (out_channels // 2) x (T* strides[0]) x (F * strides[1])] 269 | 270 | Returns 271 | --------- 272 | x : tensor 273 | [B x out_channels x (T* strides[0]) x (F * strides[1])] 274 | 275 | """ 276 | 277 | x = input_tensor 278 | x = F.relu_(self.bn(self.conv(x))) 279 | x = self.prune(x) 280 | x = torch.cat((x, concat_tensor), dim=1) 281 | x = self.conv_block(x) 282 | return x 283 | 284 | 285 | 286 | -------------------------------------------------------------------------------- /src/dataset/urmp/urmp_feature.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import argparse 3 | import csv 4 | import os 5 | import time 6 | import h5py 7 | import librosa 8 | import multiprocessing 9 | import sys 10 | 11 | sys.path.insert(1, os.path.join(sys.path[0], '../..')) 12 | 13 | from utils.utilities import (mkdir, float32_to_int16, freq2note, get_filename, get_process_groups, read_lst, write_lst) 14 | from utils.target_process import TargetProcessor 15 | 16 | from conf.feature import * 17 | 18 | et = 1e-8 19 | 20 | 21 | def remove_empty_segment(wav, frame_roll, sample_rate): 22 | segments = [] 23 | samples_per_frame = sample_rate * 1. / FRAMES_PER_SEC 24 | for i in range(frame_roll.shape[-1]): 25 | if not frame_roll[i] == NOTES_NUM_EXCLUDE_SILENCE: 26 | st = int(i * samples_per_frame) 27 | ed = int((i + 1)* samples_per_frame) 28 | if ed > wav.shape[-1]: 29 | ed = wav.shape[-1] 30 | segments.append(wav[st : ed]) 31 | if ed == wav.shape[-1]: 32 | break 33 | return np.concatenate(segments, -1) 34 | 35 | 36 | def pack_urmp_dataset_to_hdf5(args): 37 | 38 | dataset_dir = args.dataset_dir 39 | feature_dir = args.feature_dir 40 | process_num = args.process_num 41 | 42 | mkdir(feature_dir) 43 | 44 | meta_dict = {} 45 | meta_dict['audio_filename'] = [] 46 | audios_num = 0 47 | 48 | for folder in os.listdir(dataset_dir): 49 | if str.startswith(folder, "._"): 50 | continue 51 | meta_data = folder.split('_') 52 | if len(meta_data) < 4: 53 | continue 54 | audios_num += 1 55 | id = meta_data[0] 56 | name = meta_data[1] 57 | sources = meta_data[2:] 58 | audio = {} 59 | audio['mix'] = os.path.join(folder, f'AuMix_{folder}.wav') 60 | audio['separated_sources'] = [] 61 | audio['note_annotations'] = [] 62 | for j, s in enumerate(sources): 63 | audio['separated_sources'] += [os.path.join(folder, f'AuSep_{j + 1}_{s}_{id}_{name}.wav')] 64 | audio['note_annotations'] += [os.path.join(folder, f'Notes_{j + 1}_{s}_{id}_{name}.txt')] 65 | 66 | meta_dict['audio_filename'] += [audio] 67 | 68 | feature_time = time.time() 69 | print(f"The total number of the mixture audio is {audios_num}") 70 | def process_unit(n): 71 | 72 | name = meta_dict['audio_filename'][n]['mix'] 73 | print(name) 74 | audio_path = os.path.join(dataset_dir, name) 75 | (audio, _) = librosa.core.load(audio_path, sr=SAMPLE_RATE, mono=True) 76 | packed_hdf5_path = os.path.join(feature_dir, '{}.h5'.format(os.path.splitext(name)[0])) 77 | mkdir(os.path.dirname(packed_hdf5_path)) 78 | with h5py.File(packed_hdf5_path, 'w') as hf: 79 | #hf.attrs.create('midi_filename', data=meta_dict['midi_filename'][n].encode(), dtype='S100') 80 | hf.create_dataset(name='waveform', data=float32_to_int16(audio), dtype=np.int16) 81 | 82 | for i, name in enumerate(meta_dict['audio_filename'][n]['separated_sources']): 83 | audio_path = os.path.join(dataset_dir, name) 84 | 85 | (audio, _) = librosa.core.load(audio_path, sr=SAMPLE_RATE, mono=True) 86 | (hq_audio, _) = librosa.core.load(audio_path, sr=SAMPLE_RATE * 2, mono=True) 87 | 88 | note_annotations_path = os.path.join(dataset_dir, meta_dict['audio_filename'][n]['note_annotations'][i]) 89 | note_annotations = read_lst(note_annotations_path) 90 | note_annotations = [notes.split('\t\t') for notes in note_annotations] 91 | note_annotations = [[notes[0], float(notes[2]) + float(notes[0]), float(freq2note(notes[1]))] for notes in note_annotations] 92 | note_annotations = np.array(note_annotations, dtype = np.float32) 93 | note_annotations_lst = ['%s\t%s\t%s' % (notes[0], str(notes[1]), str(notes[2])) for notes in note_annotations] 94 | ref_path = os.path.join(feature_dir, '{}_ref.txt'.format(os.path.splitext(name)[0])) 95 | mkdir(os.path.dirname(packed_hdf5_path)) 96 | write_lst(ref_path, note_annotations_lst) 97 | 98 | duration = (audio.shape[-1] + SAMPLE_RATE - 1) // SAMPLE_RATE 99 | target_processor = TargetProcessor(duration, FRAMES_PER_SEC, BEGIN_NOTE, NOTES_NUM_EXCLUDE_SILENCE) 100 | target_dict = target_processor.process(0, note_annotations) 101 | frame_roll = np.array(target_dict['frame_roll'], dtype=np.int16) 102 | 103 | 104 | train_packed_hdf5_path = os.path.join(feature_dir, '{}._TRAIN.h5'.format(os.path.splitext(name)[0])) 105 | test_packed_hdf5_path = os.path.join(feature_dir, '{}._TEST.h5'.format(os.path.splitext(name)[0])) 106 | 107 | scale = 9 108 | dense_audio = remove_empty_segment(audio, frame_roll, SAMPLE_RATE) 109 | dense_hq_audio = remove_empty_segment(hq_audio, frame_roll, SAMPLE_RATE * 2) 110 | 111 | for i in range(scale): 112 | shift_pitch = i - (scale // 2) 113 | packed_hdf5_path = os.path.join(feature_dir, '{}._TRAIN_shift_pitch_{}.h5'.format(os.path.splitext(name)[0], shift_pitch)) 114 | if os.path.exists(packed_hdf5_path): 115 | continue 116 | 117 | if shift_pitch == 0: 118 | shift_audio = audio 119 | shift_dense_audio = dense_audio 120 | else: 121 | shift_audio = librosa.effects.pitch_shift(hq_audio, SAMPLE_RATE * 2, n_steps=shift_pitch) 122 | shift_audio = librosa.core.resample(shift_audio, SAMPLE_RATE * 2, SAMPLE_RATE) 123 | shift_dense_audio = librosa.effects.pitch_shift(dense_hq_audio, SAMPLE_RATE * 2, n_steps=shift_pitch) 124 | shift_dense_audio = librosa.core.resample(shift_dense_audio, SAMPLE_RATE * 2, SAMPLE_RATE) 125 | 126 | shift_frame_roll = frame_roll.copy() + shift_pitch 127 | shift_frame_roll[shift_frame_roll == NOTES_NUM_EXCLUDE_SILENCE + shift_pitch] = NOTES_NUM_EXCLUDE_SILENCE 128 | shift_frame_roll = np.clip(shift_frame_roll, 0, NOTES_NUM_EXCLUDE_SILENCE) 129 | 130 | with h5py.File(packed_hdf5_path, 'w') as hf: 131 | hf.create_dataset(name='shift_waveform', data=float32_to_int16(shift_audio), dtype=np.int16) 132 | hf.create_dataset(name='shift_dense_waveform', data=float32_to_int16(shift_dense_audio), dtype=np.int16) 133 | hf.create_dataset(name='frame_roll', data=shift_frame_roll, dtype=np.int16) 134 | 135 | with h5py.File(train_packed_hdf5_path, 'w') as hf: 136 | hf.create_dataset(name='waveform', data=float32_to_int16(audio), dtype=np.int16) 137 | hf.create_dataset(name='frame_roll', data=frame_roll, dtype=np.int16) 138 | 139 | with h5py.File(test_packed_hdf5_path, 'w') as hf: 140 | hf.create_dataset(name='waveform', data=float32_to_int16(audio), dtype=np.int16) 141 | hf.create_dataset(name='waveform_path', data=[audio_path.encode()], dtype='S200') 142 | hf.create_dataset(name='note_annotations_txt', data=[ref_path.encode()], dtype='S200') 143 | hf.create_dataset(name='frame_roll', data=frame_roll, dtype=np.int16) 144 | 145 | def process_group(st, ed, total_num, pid): 146 | print(f"process {pid + 1} starts") 147 | for n in range(st, ed): 148 | process_unit(n) 149 | print(f"process {pid + 1} : {n + 1}/{total_num} done.") 150 | print(f"process {pid + 1} ends") 151 | 152 | 153 | audio_groups = get_process_groups(audios_num, process_num) 154 | for pid, (st, ed) in enumerate(audio_groups): 155 | p = multiprocessing.Process(target = process_group, args = (st, ed, audios_num, pid)) 156 | p.start() 157 | 158 | if __name__ == '__main__': 159 | 160 | parser = argparse.ArgumentParser(description='') 161 | parser.add_argument('--dataset_dir', type=str, required=True, help='Directory of dataset.') 162 | parser.add_argument('--feature_dir', type=str, required=True, help='Directory to store generated files.') 163 | parser.add_argument('--process_num', type=int, required=True, help='Number of processes.') 164 | 165 | args = parser.parse_args() 166 | pack_urmp_dataset_to_hdf5(args) 167 | 168 | -------------------------------------------------------------------------------- /src/dataset/urmp/urmp_sample.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import numpy as np 4 | import torch 5 | from torch.utils.data import Dataset 6 | from torch.utils.data.dataloader import default_collate 7 | import random 8 | import h5py 9 | from prefetch_generator import BackgroundGenerator 10 | 11 | from utils.utilities import (read_lst, read_config, int16_to_float32, encode_mu_law) 12 | from utils.audio_utilities import write_audio 13 | 14 | from conf.feature import * 15 | from conf.sample import * 16 | from conf.urmp import * 17 | 18 | 19 | shuffle_rng = np.random.RandomState(1234) 20 | sample_rng = np.random.RandomState(1234) 21 | 22 | 23 | class UrmpDataset(): 24 | def __init__(self, instr_name): 25 | self._file_lst = read_lst(str.replace(TRAINING_FILE_LST_PATH, "INSTR_NAME", instr_name)) 26 | audios_num = len(self._file_lst) 27 | self._data = [None] * audios_num 28 | self._tracks_id = np.arange(audios_num) 29 | self._audios_num = audios_num 30 | self._current_id = 0 31 | self.tag = -1 32 | 33 | 34 | 35 | def __get_next_track_id__(self, pos): 36 | audios_num = self._audios_num 37 | current_id = pos % audios_num 38 | shuffle_rng.shuffle(self._tracks_id) 39 | nid = self._tracks_id[current_id] 40 | return nid 41 | 42 | def next_sample(self, pos=None, is_query=False): 43 | 44 | def is_silence(x): 45 | return x.shape[-1] * 88 == x.sum() 46 | 47 | def frame_roll_mask(x, y): 48 | mask = np.ones_like(x) 49 | mask[x == 88] = 0 50 | mask[y == 88] = 1 51 | return mask 52 | 53 | def load_file(pos, track_id, shift_pitch): 54 | if self._data[track_id] is None: 55 | hdf5_path = self._file_lst[track_id] 56 | datas = [] 57 | for i in range(POS_SHIFT_SEMITONE): 58 | data = {} 59 | train_hdf5_path = str.replace(hdf5_path, '.h5', f'._TRAIN_shift_pitch_{i - SHIFT_SEMITONE}.h5') 60 | hf = h5py.File(train_hdf5_path, 'r') 61 | data = {'shift_waveform': int16_to_float32(hf['shift_waveform'][:])[None, :], 62 | 'shift_dense_waveform' : int16_to_float32(hf['shift_dense_waveform'][:])[None, :], 63 | 'frame_roll': hf['frame_roll'][:].astype(np.int)} 64 | datas.append(data) 65 | self._data[track_id] = datas 66 | return self._data[track_id][shift_pitch + SHIFT_SEMITONE] 67 | 68 | def load_cache_data(pos, track_id, other_nid, another_nid, is_query): 69 | 70 | if is_query: 71 | shift_pitch = sample_rng.randint(0, POS_SHIFT_SEMITONE) - SHIFT_SEMITONE 72 | hf = load_file(pos, other_nid, shift_pitch) 73 | shift_dense_waveform = hf['shift_dense_waveform'] 74 | st = sample_rng.randint(0, shift_dense_waveform.shape[1] - SAMPLE_DURATION) 75 | query_waveform = shift_dense_waveform[:, st : st + SAMPLE_DURATION].copy() 76 | 77 | shift_pitch = sample_rng.randint(0, POS_SHIFT_SEMITONE) - SHIFT_SEMITONE 78 | hf = load_file(pos, another_nid, shift_pitch) 79 | shift_dense_waveform = hf['shift_dense_waveform'] 80 | st = sample_rng.randint(0, shift_dense_waveform.shape[1] - SAMPLE_DURATION) 81 | another_query_waveform = shift_dense_waveform[:, st : st + SAMPLE_DURATION].copy() 82 | 83 | return query_waveform, another_query_waveform 84 | 85 | else: 86 | 87 | shift_pitch = sample_rng.randint(0, POS_SHIFT_SEMITONE) - SHIFT_SEMITONE 88 | hf = load_file(pos, track_id, shift_pitch) 89 | waveform = hf['shift_waveform'] 90 | frame_roll = hf['frame_roll'] 91 | 92 | shift_pitch = sample_rng.randint(0, POS_SHIFT_SEMITONE) - SHIFT_SEMITONE 93 | hf = load_file(pos, track_id, shift_pitch) 94 | strong_waveform = hf['shift_waveform'] 95 | another_frame_roll = hf['frame_roll'] 96 | 97 | start_time = sample_rng.randint(0, int((waveform.shape[-1] - SAMPLE_DURATION) / SAMPLE_RATE)) 98 | st = start_time * SAMPLE_RATE 99 | frame_roll_st = int(start_time * FRAMES_PER_SEC) 100 | ed = frame_roll_st + FRAME_DURATION + 1 101 | obj_frame_roll = frame_roll[frame_roll_st : ed].copy() 102 | 103 | another_start_time = sample_rng.randint(0, int((waveform.shape[-1] - SAMPLE_DURATION) / SAMPLE_RATE)) if is_silence(obj_frame_roll) else start_time 104 | another_st = another_start_time * SAMPLE_RATE 105 | another_frame_roll_st = int(another_start_time * FRAMES_PER_SEC) 106 | another_ed = another_frame_roll_st + FRAME_DURATION + 1 107 | another_frame_roll = another_frame_roll[another_frame_roll_st : another_ed].copy() 108 | 109 | ori_waveform = waveform[:, st : st + SAMPLE_DURATION].copy() 110 | strong_waveform = strong_waveform[:, another_st : another_st + SAMPLE_DURATION].copy() 111 | 112 | return (ori_waveform, strong_waveform, obj_frame_roll, another_frame_roll) 113 | 114 | def get_next_track(pos=None, is_query=False): 115 | nid = self.__get_next_track_id__(pos) 116 | other_nid = self.__get_next_track_id__(pos + 1) 117 | another_nid = self.__get_next_track_id__(pos + 2) 118 | return load_cache_data(pos, nid, other_nid, another_nid, is_query) 119 | 120 | tracks = get_next_track(pos, is_query) 121 | return tracks 122 | 123 | def get_samples_num(self): 124 | return len(self._file_lst) 125 | 126 | 127 | 128 | class UrmpSample(Dataset): 129 | def __init__(self): 130 | super(UrmpSample, self).__init__() 131 | 132 | datasets = {} 133 | for instr in SEEN_INSTRUMENTS: 134 | datasets[instr] = UrmpDataset(instr) 135 | 136 | self._datasets = datasets 137 | datasets_index = [] 138 | datasets_samples_num = [0] 139 | for d in datasets: 140 | datasets_index.append(d) 141 | n = datasets[d].get_samples_num() 142 | datasets_samples_num.append(n + datasets_samples_num[-1]) 143 | 144 | self._datasets_index = datasets_index 145 | self.datasets_samples_num = datasets_samples_num 146 | 147 | def __iter__(self): 148 | return BackgroundGenerator(super().__iter__()) 149 | 150 | def __get_train_sample__(self, index, instr_indexs, is_query): 151 | input_samples = [] 152 | datasets = self._datasets 153 | datasets_index = self._datasets_index 154 | 155 | for instr in instr_indexs: 156 | dataset = datasets[datasets_index[instr]] 157 | inputs = dataset.next_sample(index, is_query) 158 | for i, input in enumerate(inputs): 159 | if len(input_samples) == i: 160 | input_samples.append([]) 161 | input = np.expand_dims(input, 0) 162 | input_samples[i].append(input) 163 | 164 | for i, input in enumerate(input_samples): 165 | input_samples[i] = np.concatenate(input_samples[i], 0) 166 | 167 | return input_samples 168 | 169 | 170 | def __sample_seen_instruments__(self): 171 | instruments_ratio = self.datasets_samples_num 172 | index = sample_rng.randint(instruments_ratio[-1]) 173 | for i in range(len(instruments_ratio) - 1): 174 | if index < instruments_ratio[i + 1]: 175 | return i 176 | 177 | assert False 178 | 179 | def __getitem__(self, index = 0): 180 | up_bound = SEEN_INSTRUMENTS_NUM if SEEN_INSTRUMENTS_NUM < UP_BOUND else UP_BOUND 181 | selected_ids = [] 182 | while len(selected_ids) < up_bound: 183 | id = self.__sample_seen_instruments__() 184 | if not id in selected_ids: 185 | selected_ids.append(id) 186 | 187 | (separated, strong_separated, target, another_target) = self.__get_train_sample__(index, selected_ids[ :SOURCES_NUM_OF_MIXTURE], is_query=False) 188 | (query_separated, another_query_separated) = self.__get_train_sample__(index, selected_ids, is_query=True) 189 | mix = torch.from_numpy(separated).float().sum(0) 190 | strong_mix = torch.from_numpy(strong_separated).float().sum(0) 191 | separated = torch.from_numpy(separated).float() 192 | query_separated = torch.from_numpy(query_separated).float() 193 | another_query_separated = torch.from_numpy(another_query_separated).float() 194 | target = torch.from_numpy(target).long() 195 | another_target = torch.from_numpy(another_target).long() 196 | batch = (separated, query_separated, another_query_separated, target, another_target) 197 | return mix, strong_mix, batch 198 | 199 | def __len__(self): 200 | return SAMPLES_NUM 201 | 202 | def get_len(self): 203 | return self.__len__() 204 | 205 | def get_collate_fn(self): 206 | return default_collate 207 | 208 | -------------------------------------------------------------------------------- /train.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import time 4 | import numpy as np 5 | import random 6 | import argparse 7 | import torch 8 | import torch.nn as nn 9 | import torch.nn.functional as F 10 | 11 | import src 12 | from dataset.urmp.urmp_sample import UrmpSample 13 | from models.model_factory import ModelFactory 14 | from utils.utilities import (compute_time, save_score, mkdir) 15 | from utils.multiEpochsDataLoader import MultiEpochsDataLoader as DataLoader 16 | from conf.sample import * 17 | from conf.feature import * 18 | 19 | def seed_torch(seed=1234): 20 | os.environ['PYTHONHASHSEED'] = str(seed) 21 | torch.manual_seed(seed) 22 | torch.cuda.manual_seed_all(seed) 23 | np.random.seed(seed) 24 | random.seed(seed) 25 | torch.cuda.manual_seed_all(seed) 26 | torch.backends.cudnn.deterministic = True 27 | torch.backends.cudnn.benchmark = False 28 | 29 | def mae(input, target): 30 | return torch.mean(torch.abs(input - target)) 31 | 32 | def align(a, b, dim): 33 | return a.transpose(0, dim)[:b.shape[dim]].transpose(0, dim) 34 | 35 | def onehot(x, dim, classes_num): 36 | x = x.unsqueeze(dim) 37 | shape = list(x.shape) 38 | shape[dim] = classes_num 39 | y = torch.zeros(shape).to(x.device).scatter_(dim, x, 1) 40 | return y 41 | 42 | def move_data2cuda(urmp_batch): 43 | mix, another_mix, batch = urmp_batch 44 | separated, query, another_query, pitch_target, another_pitch_target = batch 45 | batch = [separated, query, another_query, pitch_target, another_pitch_target] 46 | for i, b in enumerate(batch): 47 | batch[i] = b.cuda() 48 | mix = mix.cuda() 49 | another_mix = another_mix.cuda() 50 | return mix, another_mix, batch 51 | 52 | def train_step(network, urmp_batch, mode, adv_id=0): 53 | mix, another_mix, batch = urmp_batch 54 | separated, query, another_query, pitch_target, another_pitch_target = batch 55 | 56 | a = 1./ 8. 57 | if mode == 'query': 58 | 59 | #contrastive loss 60 | 61 | latent_vectors = [] 62 | hQuery = [] 63 | for i in range(query.shape[1]): 64 | query_spec = network(query[:, i], 'wav2spec') 65 | another_query_spec = network(another_query[:, i], 'wav2spec') 66 | h = network(query_spec, 'query') 67 | hc = network(another_query_spec, 'query') 68 | latent_vectors.append([h, hc]) 69 | sim = 0. 70 | sep_num = query.shape[1] 71 | batch_size = query.shape[0] 72 | for i in range(sep_num): 73 | next_i = (i + 1) % sep_num 74 | sim += torch.mean((latent_vectors[i][0] - latent_vectors[i][1])**2, dim=-1) + \ 75 | torch.relu(a - torch.mean((latent_vectors[i][0] - latent_vectors[next_i][1])**2, dim=-1)) 76 | sim_loss = sim.mean() / sep_num 77 | return sim_loss, f'{sim_loss.item()}' 78 | 79 | 80 | elif mode == 'AMT': 81 | 82 | # transcription loss for AMT-only baseline 83 | 84 | pitch_transcription = [] 85 | mix_spec = network(mix, 'wav2spec') 86 | for i in range(separated.shape[1]): 87 | query_spec = network(query[:, i], 'wav2spec') 88 | hQuery = network(query_spec, "query") 89 | args = (mix_spec, hQuery) 90 | prob = network(args, 'transcribe') 91 | pitch_transcription.append(prob) 92 | 93 | transcription = torch.stack(pitch_transcription, 2) 94 | pitch_loss = nn.CrossEntropyLoss()(transcription, align(pitch_target, transcription, -1)) 95 | return pitch_loss, f'{pitch_loss.item()}' 96 | 97 | elif mode == 'MSS': 98 | 99 | # separation loss for MSS-only baseline 100 | 101 | spec_losses = [] 102 | mix_spec = network(mix, 'wav2spec') 103 | for i in range(separated.shape[1]): 104 | query_spec = network(query[:, i], 'wav2spec') 105 | hQuery = network(query_spec, "query") 106 | source_spec = network(separated[:, i], 'wav2spec') 107 | args = (mix_spec, hQuery) 108 | est_spec = network(args, 'separate') 109 | spec_loss = torch.abs(est_spec - align(source_spec, est_spec, -2)) 110 | spec_losses.append(spec_loss) 111 | 112 | spec_loss = torch.stack(spec_losses, 1) 113 | spec_loss = spec_loss.mean() 114 | return spec_loss, f'{spec_loss.item()}' 115 | 116 | 117 | elif mode == 'MSS-AMT': 118 | 119 | # separation and transcription loss for muli-task baseline and multi-task score-informed (MSI) model 120 | 121 | spec_losses = [] 122 | pitch_transcription = [] 123 | mix_spec = network(mix, 'wav2spec') 124 | for i in range(separated.shape[1]): 125 | source_spec = network(separated[:, i], 'wav2spec') 126 | query_spec = network(query[:, i], 'wav2spec') 127 | hQuery = network(query_spec, "query") 128 | args = (mix_spec, hQuery) 129 | est_spec, prob = network(args, 'multiTask') 130 | pitch_transcription.append(prob) 131 | spec_loss = torch.abs(est_spec - align(source_spec, est_spec, -2)) 132 | spec_losses.append(spec_loss) 133 | 134 | transcription = torch.stack(pitch_transcription, 2) 135 | pitch_loss = nn.CrossEntropyLoss()(transcription, align(pitch_target, transcription, -1)) 136 | 137 | spec_loss = torch.stack(spec_losses, 1) 138 | spec_loss = spec_loss.mean() 139 | return spec_loss + pitch_loss, f'{spec_loss.item()} {pitch_loss.item()}' 140 | 141 | 142 | 143 | elif mode == 'MSI-DIS': 144 | 145 | # transcription loss and pitch-translation invariance loss for MSI-DIS model 146 | 147 | spec_losses = [] 148 | another_mix_spec = network(another_mix, 'wav2spec') 149 | mix_spec = network(mix, 'wav2spec') 150 | target = onehot(pitch_target, 1, NOTES_NUM) 151 | another_target = onehot(another_pitch_target, 1, NOTES_NUM) 152 | 153 | pitch_transcription = [] 154 | another_pitch_transcription = [] 155 | 156 | for i in range(separated.shape[1]): 157 | source_spec = network(separated[:, i], 'wav2spec') 158 | 159 | query_spec = network(query[:, i], 'wav2spec') 160 | hQuery = network(query_spec, "query") 161 | 162 | args = (mix_spec, another_mix_spec, hQuery) 163 | est_spec, target_prob = network(args, 'transfer') 164 | 165 | pitch_transcription.append(target_prob) 166 | spec_loss = torch.abs(est_spec - align(source_spec, est_spec, -2)) 167 | spec_losses.append(spec_loss) 168 | 169 | spec_loss = torch.stack(spec_losses, 1) 170 | spec_loss = spec_loss.mean() 171 | 172 | transcription = torch.stack(pitch_transcription, 2) 173 | pitch_loss = nn.CrossEntropyLoss()(transcription, align(pitch_target, transcription, -1)) 174 | 175 | return spec_loss + pitch_loss, f'{spec_loss.item()} {pitch_loss.item()}' 176 | 177 | 178 | 179 | 180 | def train(model_name, load_epoch, epoch, model_folder): 181 | 182 | nnet = ModelFactory(model_name) 183 | nnet = nnet.cuda() 184 | 185 | learning_rate=LEARNING_RATE 186 | 187 | mkdir(model_folder) 188 | 189 | if load_epoch >=0: 190 | model_path = f'{model_folder}/params_epoch-{load_epoch}.pkl' 191 | nnet.load_state_dict(torch.load(model_path), strict=True) 192 | 193 | resume_epoch = load_epoch + 1 194 | 195 | urmp_data = UrmpSample() 196 | 197 | urmp_loader = DataLoader(urmp_data, 198 | batch_size=TRAINING_BATCH_SIZE, shuffle=False, num_workers=1, pin_memory=True, persistent_workers=False, 199 | collate_fn=urmp_data.get_collate_fn()) 200 | 201 | def get_parameters(nnet, model_name): 202 | parameters = {} 203 | parameters['query'] = list(nnet.network.parameters()) 204 | 205 | if model_name in ['MSI']: 206 | parameters['MSS-AMT'] = list(nnet.network.parameters()) 207 | if model_name in ['UNET']: 208 | parameters['MSS'] = list(nnet.network.parameters()) 209 | if model_name in ['MSI-DIS', 'AMT', 'MSS', 'MSS-AMT']: 210 | parameters[model_name] = list(nnet.network.parameters()) 211 | 212 | return parameters 213 | 214 | 215 | def get_optimizer(r_epoch, parameters): 216 | optimizers = [] 217 | for param in parameters: 218 | optimizer = torch.optim.Adam(parameters[param], lr=learning_rate / (2**(r_epoch // DECAY)), \ 219 | betas=(0.9, 0.999), eps=1e-08, weight_decay=0., amsgrad=True) 220 | optimizers.append({'mode' : param, 'opt': optimizer, 'name' : param}) 221 | return optimizers 222 | 223 | parameters = get_parameters(nnet, model_name) 224 | optimizer = get_optimizer(resume_epoch, parameters) 225 | step_per_epoch = urmp_data.get_len() // TRAINING_BATCH_SIZE 226 | 227 | pre_time = time.time() 228 | pre_time = compute_time(f'begin train...', pre_time) 229 | nnet.train() 230 | pre_time = compute_time(f'train done', pre_time) 231 | for i in range(resume_epoch, epoch): 232 | if i % DECAY == 0: 233 | pre_time = compute_time(f'begin update op...', pre_time) 234 | optimizer = get_optimizer(resume_epoch, parameters) 235 | print('learning rate', learning_rate / (2**(i // DECAY))) 236 | 237 | 238 | for i_batch, urmp_batch in enumerate(urmp_loader): 239 | urmp_batch = move_data2cuda(urmp_batch) 240 | for j in range(len(optimizer)): 241 | op = optimizer[j]['opt'] 242 | name = optimizer[j]['name'] 243 | op.zero_grad() 244 | loss, loss_text = train_step(nnet, urmp_batch, optimizer[j]['mode']) 245 | loss.backward() 246 | op.step() 247 | print(f"update {optimizer[j]['mode']} network epoch {i} loss: {i_batch}/{step_per_epoch}", loss_text) 248 | del loss 249 | torch.save(nnet.state_dict(), f"{model_folder}/params_epoch-{i}.pkl") 250 | if __name__ == "__main__": 251 | 252 | seed_torch(1234) 253 | 254 | parser = argparse.ArgumentParser(description='') 255 | parser.add_argument('--model_name', type=str, required=True, help='Model name in [`AMT` for trainscription-only baseline, \ 256 | `MSS` for separation-only baseline, \ 257 | `MSS-AMT` for multi-task baseline, \ 258 | `MSI` for the proposed multi-task score-informed model, \ 259 | `MSI-DIS` for the proposed multi-task score-informed with further disentanglement model].') 260 | parser.add_argument('--resume_epoch', type=int, default=-1, help='Epoch to resume training.') 261 | parser.add_argument('--model_folder', type=str, required=True, help='Directory to store model weights.') 262 | parser.add_argument('--epoch', type=int, default=200, help='Number of total training epochs.') 263 | 264 | args = parser.parse_args() 265 | 266 | assert args.model_name in ["AMT", "MSS", "MSS-AMT", "MSI", "MSI-DIS"] 267 | 268 | train(model_name=args.model_name, 269 | load_epoch=args.resume_epoch, 270 | epoch=args.epoch, 271 | model_folder=args.model_folder) 272 | --------------------------------------------------------------------------------