├── src
    ├── conf
    │   ├── __init__.py
    │   ├── urmp.py
    │   ├── feature.py
    │   ├── inference.py
    │   ├── sample.py
    │   └── models.py
    ├── dataset
    │   ├── __init__.py
    │   └── urmp
    │   │   ├── urmp_test.py
    │   │   ├── urmp_generate_dataset.py
    │   │   ├── urmp_feature.py
    │   │   └── urmp_sample.py
    ├── utils
    │   ├── __init__.py
    │   ├── multiEpochsDataLoader.py
    │   ├── target_process.py
    │   ├── weiMidi.py
    │   └── utilities.py
    ├── __init__.py
    ├── models
    │   ├── model_factory.py
    │   └── layers.py
    ├── analyze
    │   ├── utilities.py
    │   └── draw_table.py
    └── inference
    │   ├── compute_measure.py
    │   └── utilities.py
├── scripts
    ├── draw.sh
    ├── generate_dataset.sh
    ├── generate_feature.sh
    ├── evaluate.sh
    ├── train-model.sh
    ├── synthesis.sh
    ├── evaluate-model.sh
    └── clean_packed_data.sh
├── songs
    └── road.mid
├── data
    └── urmp
    │   ├── Trombone
    │       └── train.lst
    │   ├── Clarinet
    │       └── train.lst
    │   ├── Viola
    │       └── train.lst
    │   ├── Cello
    │       └── train.lst
    │   ├── Saxophone
    │       └── train.lst
    │   ├── Flute
    │       └── train.lst
    │   ├── Trumpet
    │       └── train.lst
    │   ├── Violin
    │       └── train.lst
    │   ├── Vn_Fl_Tpt
    │       └── train-query.lst.bk
    │   └── testset
    │       ├── query.lst
    │       └── test.lst
├── imgs
    ├── model-fig-3.png
    └── model-fig-1-ab.png
├── .gitignore
├── run.sh
├── requirements.txt
├── LICENSE
├── synthesis.py
├── evaluate.py
├── evaluation
    ├── MSI-DIS
    │   ├── scores-190.json
    │   ├── scores-191.json
    │   ├── scores-198.json
    │   ├── scores-199.json
    │   ├── scores-192.json
    │   ├── scores-193.json
    │   ├── scores-194.json
    │   ├── scores-195.json
    │   ├── scores-196.json
    │   └── scores-197.json
    └── MSI
    │   ├── scores-192.json
    │   ├── scores-197.json
    │   ├── scores-190.json
    │   ├── scores-198.json
    │   ├── scores-193.json
    │   ├── scores-195.json
    │   ├── scores-199.json
    │   ├── scores-191.json
    │   ├── scores-194.json
    │   └── scores-196.json
├── README.md
└── train.py


/src/conf/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/src/dataset/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/src/utils/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/scripts/draw.sh:
--------------------------------------------------------------------------------
1 | python src/analyze/draw_table.py --evaluation_folder=evaluation
2 | 


--------------------------------------------------------------------------------
/scripts/generate_dataset.sh:
--------------------------------------------------------------------------------
1 | python src/dataset/urmp/urmp_generate_dataset.py --feature_dir=dataset/hdf5s/urmp --data_dir=data/urmp-rec
2 | 


--------------------------------------------------------------------------------
/scripts/generate_feature.sh:
--------------------------------------------------------------------------------
1 | python src/dataset/urmp/urmp_feature.py --dataset_dir=dataset/URMP/Dataset --feature_dir=dataset/hdf5s/urmp --process_num=8
2 | 


--------------------------------------------------------------------------------
/songs/road.mid:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kikyo-16/A-unified-model-for-zero-shot-musical-source-separation-transcription-and-synthesis/HEAD/songs/road.mid


--------------------------------------------------------------------------------
/data/urmp/Trombone/train.lst:
--------------------------------------------------------------------------------
1 | dataset/hdf5s/urmp/07_GString_tpt_tbn/AuSep_2_tbn_07_GString.h5
2 | dataset/hdf5s/urmp/15_Surprise_tpt_tpt_tbn/AuSep_3_tbn_15_Surprise.h5


--------------------------------------------------------------------------------
/imgs/model-fig-3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kikyo-16/A-unified-model-for-zero-shot-musical-source-separation-transcription-and-synthesis/HEAD/imgs/model-fig-3.png


--------------------------------------------------------------------------------
/scripts/evaluate.sh:
--------------------------------------------------------------------------------
1 | cuda_id=0
2 | evaluate_epoch=199
3 | 
4 | # evaluate MSI-DIS model
5 | sh scripts/evaluate-model.sh $cuda_id MSI $evaluate_epoch save_model/MSI 8
6 | 


--------------------------------------------------------------------------------
/imgs/model-fig-1-ab.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Kikyo-16/A-unified-model-for-zero-shot-musical-source-separation-transcription-and-synthesis/HEAD/imgs/model-fig-1-ab.png


--------------------------------------------------------------------------------
/scripts/train-model.sh:
--------------------------------------------------------------------------------
1 | cuda=$1
2 | model_name=$2
3 | model_folder=$3
4 | CUDA_VISIBLE_DEVICES=$cuda python train.py --model_name=$model_name --model_folder=$model_folder --epoch=200
5 | 


--------------------------------------------------------------------------------
/scripts/synthesis.sh:
--------------------------------------------------------------------------------
1 | CUDA_VISIBLE_DEVICES=0 python synthesis.py --model_name=MSI-DIS --model_path=save_model/wei_MSI-DIS/params_epoch-199.pkl \
2 | 																							--evaluation_folder=demo
3 | 
4 | 


--------------------------------------------------------------------------------
/src/__init__.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import os
 3 | 
 4 | pkg_path = os.path.dirname(os.path.abspath(__file__))
 5 | sys.path.append(pkg_path)
 6 | 
 7 | import conf, utils, dataset, models, inference 
 8 | 
 9 | 
10 | 


--------------------------------------------------------------------------------
/src/conf/urmp.py:
--------------------------------------------------------------------------------
 1 | TRAINING_FILE_LST_PATH = "data/urmp/INSTR_NAME/train.lst"
 2 | SEEN_INSTRUMENTS=["Violin",
 3 | "Cello",
 4 | "Viola",
 5 | "Flute",
 6 | "Clarinet",
 7 | "Saxophone",
 8 | "Trumpet",
 9 | "Trombone"]
10 | SEEN_INSTRUMENTS_NUM = len(SEEN_INSTRUMENTS)
11 | 


--------------------------------------------------------------------------------
/data/urmp/Clarinet/train.lst:
--------------------------------------------------------------------------------
1 | dataset/hdf5s/urmp/17_Nocturne_vn_fl_cl/AuSep_3_cl_17_Nocturne.h5
2 | dataset/hdf5s/urmp/14_Waltz_fl_fl_cl/AuSep_3_cl_14_Waltz.h5
3 | dataset/hdf5s/urmp/03_Dance_fl_cl/AuSep_2_cl_03_Dance.h5
4 | dataset/hdf5s/urmp/19_Pavane_cl_vn_vc/AuSep_1_cl_19_Pavane.h5


--------------------------------------------------------------------------------
/scripts/evaluate-model.sh:
--------------------------------------------------------------------------------
1 | cuda=$1
2 | model_name=$2
3 | epoch=$3
4 | model_folder=$4
5 | ps=$5
6 | CUDA_VISIBLE_DEVICES=$cuda python evaluate.py --model_name=$model_name --model_path=$model_folder/params_epoch-$epoch.pkl \
7 | 																							--evaluation_folder=evaluation --epoch=$epoch --ps=$ps
8 | 


--------------------------------------------------------------------------------
/src/conf/feature.py:
--------------------------------------------------------------------------------
 1 | SAMPLE_RATE = 16000
 2 | FRAMES_PER_SEC = 100
 3 | 
 4 | WINDOW_SIZE = 2048
 5 | HOP_SIZE = 160
 6 | PAD_MODE = "reflect"
 7 | WINDOW = "hann"
 8 | CHANNELS_NUM = 1
 9 | 
10 | NOTES_NUM_EXCLUDE_SILENCE = 88
11 | NOTES_NUM = NOTES_NUM_EXCLUDE_SILENCE + 1
12 | BEGIN_NOTE = 21
13 | N_FFT = WINDOW_SIZE
14 | 


--------------------------------------------------------------------------------
/src/conf/inference.py:
--------------------------------------------------------------------------------
 1 | INFERENCE_BATCH_SIZE = 12
 2 | BATCH_FRAMES_NUM = 410
 3 | N_ITER = 200
 4 | PAD_FRAME = 5
 5 | 
 6 | TEST_DATA_LST_PATH = "data/urmp/testset/test.lst" 
 7 | TEST_QUERY_LST_PATH = "data/urmp/testset/query.lst"
 8 | 
 9 | 
10 | SYN_DURATION = 10
11 | SYN_SONG_ONSET = 25
12 | 
13 | MIX_ONSET = 35
14 | OCTAVE = 12
15 | 


--------------------------------------------------------------------------------
/data/urmp/Viola/train.lst:
--------------------------------------------------------------------------------
1 | dataset/hdf5s/urmp/44_K515_vn_vn_va_va_vc/AuSep_3_va_44_K515.h5
2 | dataset/hdf5s/urmp/44_K515_vn_vn_va_va_vc/AuSep_4_va_44_K515.h5
3 | dataset/hdf5s/urmp/24_Pirates_vn_vn_va_vc/AuSep_3_va_24_Pirates.h5
4 | dataset/hdf5s/urmp/13_Hark_vn_vn_va/AuSep_3_va_13_Hark.h5
5 | dataset/hdf5s/urmp/27_King_vn_vn_va_sax/AuSep_3_va_27_King.h5


--------------------------------------------------------------------------------
/src/conf/sample.py:
--------------------------------------------------------------------------------
 1 | from conf.feature import *
 2 | 
 3 | SOURCES_NUM_OF_MIXTURE = 2
 4 | UP_BOUND = 2
 5 | SHIFT_SEMITONE = 4
 6 | POS_SHIFT_SEMITONE = int( 2 * SHIFT_SEMITONE + 1)
 7 | DURATION = 3
 8 | SAMPLE_DURATION = int(SAMPLE_RATE * DURATION)
 9 | FRAME_DURATION = int(FRAMES_PER_SEC * DURATION)
10 | 
11 | LEARNING_RATE = 5e-4
12 | DECAY = 100
13 | TRAINING_BATCH_SIZE = 12
14 | SAMPLES_NUM = 6240
15 | 


--------------------------------------------------------------------------------
/data/urmp/Cello/train.lst:
--------------------------------------------------------------------------------
1 | dataset/hdf5s/urmp/44_K515_vn_vn_va_va_vc/AuSep_5_vc_44_K515.h5
2 | dataset/hdf5s/urmp/24_Pirates_vn_vn_va_vc/AuSep_4_vc_24_Pirates.h5
3 | dataset/hdf5s/urmp/12_Spring_vn_vn_vc/AuSep_3_vc_12_Spring.h5
4 | dataset/hdf5s/urmp/01_Jupiter_vn_vc/AuSep_2_vc_01_Jupiter.h5
5 | dataset/hdf5s/urmp/19_Pavane_cl_vn_vc/AuSep_3_vc_19_Pavane.h5
6 | dataset/hdf5s/urmp/26_King_vn_vn_va_vc/AuSep_4_vc_26_King.h5


--------------------------------------------------------------------------------
/data/urmp/Saxophone/train.lst:
--------------------------------------------------------------------------------
1 | dataset/hdf5s/urmp/25_Pirates_vn_vn_va_sax/AuSep_4_sax_25_Pirates.h5
2 | dataset/hdf5s/urmp/16_Surprise_tpt_tpt_sax/AuSep_3_sax_16_Surprise.h5
3 | dataset/hdf5s/urmp/06_Entertainer_sax_sax/AuSep_1_sax_06_Entertainer.h5
4 | dataset/hdf5s/urmp/06_Entertainer_sax_sax/AuSep_2_sax_06_Entertainer.h5
5 | dataset/hdf5s/urmp/27_King_vn_vn_va_sax/AuSep_4_sax_27_King.h5
6 | dataset/hdf5s/urmp/10_March_tpt_sax/AuSep_2_sax_10_March.h5


--------------------------------------------------------------------------------
/data/urmp/Flute/train.lst:
--------------------------------------------------------------------------------
1 | dataset/hdf5s/urmp/17_Nocturne_vn_fl_cl/AuSep_2_fl_17_Nocturne.h5
2 | dataset/hdf5s/urmp/14_Waltz_fl_fl_cl/AuSep_1_fl_14_Waltz.h5
3 | dataset/hdf5s/urmp/14_Waltz_fl_fl_cl/AuSep_2_fl_14_Waltz.h5
4 | dataset/hdf5s/urmp/08_Spring_fl_vn/AuSep_1_fl_08_Spring.h5
5 | dataset/hdf5s/urmp/03_Dance_fl_cl/AuSep_1_fl_03_Dance.h5
6 | dataset/hdf5s/urmp/04_Allegro_fl_fl/AuSep_2_fl_04_Allegro.h5
7 | dataset/hdf5s/urmp/04_Allegro_fl_fl/AuSep_1_fl_04_Allegro.h5


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | __pycache__/
 2 | data/musicnet*/
 3 | data/musdb18/
 4 | data/maestro/
 5 | data/urmp-test/
 6 | data/urmp-rec/
 7 | output/
 8 | output/
 9 | reproduce_dataset/
10 | results/
11 | *.pyc
12 | src/analyze/analyze_experimental_results.py
13 | src/analyze/draw_3D_hQuery.py
14 | src/analyze/draw_dataset_distribution.py
15 | src/analyze/draw_epoch_curve.py
16 | src/analyze/draw_hQuery.py
17 | src/analyze/draw_instr_results.py
18 | src/analyze/draw_spec.py
19 | src/analyze/gradient_colors.py
20 | 


--------------------------------------------------------------------------------
/scripts/clean_packed_data.sh:
--------------------------------------------------------------------------------
 1 | git rev-list --objects --all | grep demo/MSI > .packed_data.lst
 2 | awk '{print $2}' .packed_data.lst > .packed_data.name
 3 | rm .packed_data.lst
 4 | for name in `cat .packed_data.name`
 5 | do
 6 | 	echo $name
 7 | 	git filter-branch -f --index-filter "git rm -r --cached --ignore-unmatch $name" -- --all
 8 | 	break
 9 | done
10 | rm .packed_data.name
11 | #rm -Rf .git/refs/original
12 | #git reflog expire --expire=now --all
13 | #git gc --aggressive --prune=now
14 | #
15 | 


--------------------------------------------------------------------------------
/src/conf/models.py:
--------------------------------------------------------------------------------
 1 | from conf.feature import *
 2 | 
 3 | QUERY_DIM = 6
 4 | 
 5 | MODEL_CONFIG = {"UNet":
 6 | 		{"with_bn0" : True,
 7 | 			"input_channels_num" : CHANNELS_NUM,
 8 | 			"input_size" : WINDOW_SIZE // 2,
 9 | 			"blocks_num" : 5,
10 | 			"condition_dim" : QUERY_DIM,
11 | 			"output_dim" : CHANNELS_NUM,
12 | 		},
13 | 	"QueryNet":
14 | 		{"blocks_num" : 2,
15 | 			"input_size" : WINDOW_SIZE // 2,
16 | 			"input_channels_num" : CHANNELS_NUM,
17 | 			"pnum" : QUERY_DIM,
18 | 		},
19 | 	"Transcriptor":
20 | 		{"blocks_num" : 2,
21 | 			"output_dim" : NOTES_NUM
22 | 		}
23 | }
24 | 
25 | 


--------------------------------------------------------------------------------
/data/urmp/Trumpet/train.lst:
--------------------------------------------------------------------------------
1 | dataset/hdf5s/urmp/18_Nocturne_vn_fl_tpt/AuSep_3_tpt_18_Nocturne.h5
2 | dataset/hdf5s/urmp/07_GString_tpt_tbn/AuSep_1_tpt_07_GString.h5
3 | dataset/hdf5s/urmp/15_Surprise_tpt_tpt_tbn/AuSep_2_tpt_15_Surprise.h5
4 | dataset/hdf5s/urmp/15_Surprise_tpt_tpt_tbn/AuSep_1_tpt_15_Surprise.h5
5 | dataset/hdf5s/urmp/05_Entertainer_tpt_tpt/AuSep_2_tpt_05_Entertainer.h5
6 | dataset/hdf5s/urmp/05_Entertainer_tpt_tpt/AuSep_1_tpt_05_Entertainer.h5
7 | dataset/hdf5s/urmp/20_Pavane_tpt_vn_vc/AuSep_1_tpt_20_Pavane.h5
8 | dataset/hdf5s/urmp/10_March_tpt_sax/AuSep_1_tpt_10_March.h5
9 | dataset/hdf5s/urmp/09_Jesus_tpt_vn/AuSep_1_tpt_09_Jesus.h5


--------------------------------------------------------------------------------
/src/utils/multiEpochsDataLoader.py:
--------------------------------------------------------------------------------
 1 | from torch.utils.data import DataLoader
 2 | 
 3 | class _RepeatSampler(object):
 4 | 	def __init__(self, sampler):
 5 | 		self.sampler = sampler
 6 | 
 7 | 	def __iter__(self):
 8 | 		while True:
 9 | 			yield from iter(self.sampler)
10 | 
11 | class MultiEpochsDataLoader(DataLoader):
12 | 	def __init__(self, *args, **kwargs):
13 | 		super().__init__(*args, **kwargs)
14 | 		object.__setattr__(self, 'batch_sampler', _RepeatSampler(self.batch_sampler))
15 | 		self.iterator = super().__iter__()
16 | 
17 | 	def __len__(self):
18 | 		return len(self.batch_sampler.sampler)
19 | 
20 | 	def __iter__(self):
21 | 		for i in range(len(self)):
22 | 			yield next(self.iterator)
23 | 


--------------------------------------------------------------------------------
/run.sh:
--------------------------------------------------------------------------------
 1 | # pack urmp dataset to hdf5
 2 | #sh scripts/generate_feature.sh
 3 | 
 4 | cuda_id=0
 5 | 
 6 | # train transcription-only baseline
 7 | #sh scripts/train-model.sh $cuda_id AMT save_model/AMT
 8 | 
 9 | # train separation-only baseline
10 | #sh scripts/train-model.sh $cuda_id MSS save_model/MSS
11 | 
12 | # train multi-task baseline
13 | #sh scripts/train-model.sh $cuda_id MSS-AMT save_model/MSS-AMT
14 | 
15 | # train the proposed multi-task score-informed (MSI) model
16 | #sh scripts/train-model.sh $cuda_id MSI save_model/MSI
17 | 
18 | # train the proposed multi-task score-informed with further disentanglement (MSI-DIS) model
19 | sh scripts/train-model.sh $cuda_id MSI-DIS save_model/MSI-DIS
20 | 
21 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | appdirs==1.4.4
 2 | audioread==2.1.9
 3 | cached-property==1.5.2
 4 | certifi==2021.5.30
 5 | cffi==1.14.6
 6 | charset-normalizer==2.0.4
 7 | cycler==0.10.0
 8 | dataclasses==0.8
 9 | decorator==5.0.9
10 | future==0.18.2
11 | h5py==3.1.0
12 | idna==3.2
13 | joblib==1.0.1
14 | kiwisolver==1.3.1
15 | librosa==0.8.1
16 | llvmlite==0.36.0
17 | matplotlib==3.3.4
18 | mido==1.2.10
19 | mir-eval==0.6
20 | numba==0.53.1
21 | numpy==1.19.5
22 | packaging==21.0
23 | Pillow==8.3.1
24 | pooch==1.4.0
25 | prefetch-generator==1.0.1
26 | pycparser==2.20
27 | pyparsing==2.4.7
28 | python-dateutil==2.8.2
29 | requests==2.26.0
30 | resampy==0.2.2
31 | scikit-learn==0.24.2
32 | scipy==1.5.4
33 | six==1.16.0
34 | SoundFile==0.10.3.post1
35 | threadpoolctl==2.2.0
36 | torch==1.7.1+cu101
37 | torchaudio==0.7.2
38 | torchlibrosa==0.0.4
39 | torchvision==0.8.2+cu101
40 | typing-extensions==3.10.0.0
41 | urllib3==1.26.6
42 | 


--------------------------------------------------------------------------------
/data/urmp/Violin/train.lst:
--------------------------------------------------------------------------------
 1 | dataset/hdf5s/urmp/44_K515_vn_vn_va_va_vc/AuSep_1_vn_44_K515.h5
 2 | dataset/hdf5s/urmp/44_K515_vn_vn_va_va_vc/AuSep_2_vn_44_K515.h5
 3 | dataset/hdf5s/urmp/24_Pirates_vn_vn_va_vc/AuSep_2_vn_24_Pirates.h5
 4 | dataset/hdf5s/urmp/24_Pirates_vn_vn_va_vc/AuSep_1_vn_24_Pirates.h5
 5 | dataset/hdf5s/urmp/17_Nocturne_vn_fl_cl/AuSep_1_vn_17_Nocturne.h5
 6 | dataset/hdf5s/urmp/02_Sonata_vn_vn/AuSep_1_vn_02_Sonata.h5
 7 | dataset/hdf5s/urmp/02_Sonata_vn_vn/AuSep_2_vn_02_Sonata.h5
 8 | dataset/hdf5s/urmp/12_Spring_vn_vn_vc/AuSep_1_vn_12_Spring.h5
 9 | dataset/hdf5s/urmp/08_Spring_fl_vn/AuSep_2_vn_08_Spring.h5
10 | dataset/hdf5s/urmp/13_Hark_vn_vn_va/AuSep_1_vn_13_Hark.h5
11 | dataset/hdf5s/urmp/13_Hark_vn_vn_va/AuSep_2_vn_13_Hark.h5
12 | dataset/hdf5s/urmp/01_Jupiter_vn_vc/AuSep_1_vn_01_Jupiter.h5
13 | dataset/hdf5s/urmp/19_Pavane_cl_vn_vc/AuSep_2_vn_19_Pavane.h5
14 | dataset/hdf5s/urmp/27_King_vn_vn_va_sax/AuSep_1_vn_27_King.h5
15 | dataset/hdf5s/urmp/27_King_vn_vn_va_sax/AuSep_2_vn_27_King.h5
16 | dataset/hdf5s/urmp/09_Jesus_tpt_vn/AuSep_2_vn_09_Jesus.h5


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2021 Liwei Lin
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/synthesis.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | import torch
 3 | import torch.nn.functional as F
 4 | import os
 5 | import sys
 6 | import numpy as np
 7 | import torch.nn as nn
 8 | import argparse
 9 | 
10 | import src
11 | from utils.utilities import (save_json, compute_time, print_dict, mkdir)
12 | from inference.inference import Inference
13 | from inference.compute_measure import (evaluate_transcription, evaluate_separation)
14 | 
15 | 
16 | if __name__=='__main__':
17 | 
18 | 	parser = argparse.ArgumentParser(description='')
19 | 	parser.add_argument('--model_name', type=str, required=True, help='Model name in \
20 | 																																					[`AMT` for trainscription-only baseline, \
21 | 																																						`MSS` for separation-only baseline, \
22 | 																																						`MSS-AMT` for multi-task baseline, \
23 | 																																						`MSI` for the proposed multi-task score-informed model, \
24 | 																																						`MSI-DIS` for the proposed multi-task score-informed with further disentanglement model].')
25 | 	parser.add_argument('--model_path', type=str, required=True, help='Model weights path.')
26 | 	parser.add_argument('--evaluation_folder', type=str, required=True, help='Directory to store evaluation results.')
27 | 	parser.add_argument('--epoch', type=str, required=True, help='Epoch.')
28 | 
29 | 
30 | 	args = parser.parse_args()
31 | 
32 | 	model_name = args.model_name
33 | 	model_path = args.model_path
34 | 	output_dir = args.evaluation_folder
35 | 	epoch = args.epoch
36 | 
37 | 	evaluation_dir = f"{output_dir}/{model_name}"
38 | 
39 | 	path = "songs/road.mid"
40 | 
41 | 	inference = Inference(model_name, model_path, evaluation_dir, epoch)
42 | 	inference.synthesis(path, 0)
43 | 
44 | 
45 | 


--------------------------------------------------------------------------------
/data/urmp/Vn_Fl_Tpt/train-query.lst.bk:
--------------------------------------------------------------------------------
 1 | Oboe,Bassoon	dataset/hdf5s/urmp/41_Miserere_fl_fl_ob_sax_bn/AuSep_3_ob_41_Miserere.h5,dataset/hdf5s/urmp/41_Miserere_fl_fl_ob_sax_bn/AuSep_5_bn_41_Miserere.h5
 2 | Oboe,Bassoon	dataset/hdf5s/urmp/28_Fugue_fl_ob_cl_bn/AuSep_2_ob_28_Fugue.h5,dataset/hdf5s/urmp/28_Fugue_fl_ob_cl_bn/AuSep_4_bn_28_Fugue.h5
 3 | Tuba,Horn	dataset/hdf5s/urmp/42_Arioso_tpt_tpt_hn_tbn_tba/AuSep_5_tba_42_Arioso.h5,dataset/hdf5s/urmp/42_Arioso_tpt_tpt_hn_tbn_tba/AuSep_3_hn_42_Arioso.h5
 4 | Tuba,Horn	dataset/hdf5s/urmp/43_Chorale_tpt_tpt_hn_tbn_tba/AuSep_5_tba_43_Chorale.h5,dataset/hdf5s/urmp/43_Chorale_tpt_tpt_hn_tbn_tba/AuSep_3_hn_43_Chorale.h5
 5 | Violin,Clarinet	dataset/hdf5s/urmp/32_Fugue_vn_vn_va_vc/AuSep_2_vn_32_Fugue.h5,dataset/hdf5s/urmp/28_Fugue_fl_ob_cl_bn/AuSep_3_cl_28_Fugue.h5
 6 | Saxophone,Flute	dataset/hdf5s/urmp/23_Rejouissance_cl_sax_tba/AuSep_2_sax_23_Rejouissance.h5,dataset/hdf5s/urmp/37_Rondeau_fl_vn_va_cl/AuSep_1_fl_37_Rondeau.h5 
 7 | Viola,Cello	dataset/hdf5s/urmp/32_Fugue_vn_vn_va_vc/AuSep_3_va_32_Fugue.h5,dataset/hdf5s/urmp/32_Fugue_vn_vn_va_vc/AuSep_4_vc_32_Fugue.h5
 8 | Trumpet,Trombone	dataset/hdf5s/urmp/18_Nocturne_vn_fl_tpt/AuSep_3_tpt_18_Nocturne.h5,dataset/hdf5s/urmp/21_Rejouissance_cl_tbn_tba/AuSep_2_tbn_21_Rejouissance.h5
 9 | Violin,Double_bass	dataset/hdf5s/urmp/32_Fugue_vn_vn_va_vc/AuSep_2_vn_32_Fugue.h5,dataset/hdf5s/urmp/38_Jerusalem_vn_vn_va_vc_db/AuSep_5_db_38_Jerusalem.h5
10 | Trombone,Tuba	dataset/hdf5s/urmp/34_Fugue_tpt_tpt_hn_tbn/AuSep_4_tbn_34_Fugue.h5,dataset/hdf5s/urmp/43_Chorale_tpt_tpt_hn_tbn_tba/AuSep_5_tba_43_Chorale.h5
11 | Trumpet,Horn	dataset/hdf5s/urmp/18_Nocturne_vn_fl_tpt/AuSep_3_tpt_18_Nocturne.h5,dataset/hdf5s/urmp/43_Chorale_tpt_tpt_hn_tbn_tba/AuSep_3_hn_43_Chorale.h5
12 | Flute,Oboe	dataset/hdf5s/urmp/37_Rondeau_fl_vn_va_cl/AuSep_1_fl_37_Rondeau.h5,dataset/hdf5s/urmp/41_Miserere_fl_fl_ob_sax_bn/AuSep_3_ob_41_Miserere.h5
13 | Clarinet,Bassoon	dataset/hdf5s/urmp/37_Rondeau_fl_vn_va_cl/AuSep_4_cl_37_Rondeau.h5,dataset/hdf5s/urmp/41_Miserere_fl_fl_ob_sax_bn/AuSep_5_bn_41_Miserere.h5
14 | 


--------------------------------------------------------------------------------
/src/models/model_factory.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import torch
 3 | import torch.nn as nn
 4 | import torch.nn.functional as F
 5 | import sys
 6 | import numpy as np
 7 | import time
 8 | import h5py
 9 | from torchlibrosa.stft import STFT, ISTFT, magphase
10 | 
11 | from utils.utilities import (read_lst, read_config)
12 | from models.models import (AMTBaseline, MSSBaseline, MultiTaskBaseline, DisentanglementModel)
13 | 
14 | from conf.feature import *
15 | 
16 | et = 1e-8
17 | 
18 | class ModelFactory(nn.Module):
19 | 
20 | 	def __init__(self, model_name):
21 | 		super(ModelFactory, self).__init__()
22 | 
23 | 		self.stft = STFT(n_fft=WINDOW_SIZE, hop_length=HOP_SIZE,
24 | 			win_length=WINDOW_SIZE, window=WINDOW, center=True,
25 | 			pad_mode=PAD_MODE, freeze_parameters=True)
26 | 
27 | 		self.istft = ISTFT(n_fft=WINDOW_SIZE, hop_length=HOP_SIZE,
28 | 			win_length=WINDOW_SIZE, window=WINDOW, center=True,
29 | 			pad_mode=PAD_MODE, freeze_parameters=True)
30 | 
31 | 
32 | 		if model_name in ['AMT', 'AMTBaseline']:
33 | 			network = AMTBaseline()
34 | 		elif model_name in ['MSS', 'MSSBaseline']:
35 | 			network = MSSBaseline()
36 | 		elif model_name in ['MSS-AMT', 'MultiTaskBaseline']:
37 | 			network = MultiTaskBaseline()
38 | 		elif model_name in ['MSI', 'MSI-DIS', 'DisentanglementModel']:
39 | 			network = DisentanglementModel()
40 | 	
41 | 		self.network = network
42 | 
43 | 	def wav2spec(self, input):
44 | 		channels_num = input.shape[-2]
45 | 
46 | 		def spectrogram(input):
47 | 			(real, imag) = self.stft(input)
48 | 			spec = (real ** 2 + imag ** 2) ** 0.5
49 | 			return spec
50 | 
51 | 		spec_list = []
52 | 
53 | 		for channel in range(channels_num):
54 | 			spec = spectrogram(input[:, channel, :])
55 | 			spec_list.append(spec)
56 | 
57 | 		spec = torch.cat(spec_list, 1)[:, :, :, :-1]
58 | 		return spec
59 | 
60 | 	def forward(self, input, mode=None):
61 | 		if mode == "wav2spec":
62 | 			spec = self.wav2spec(input)
63 | 			return spec
64 | 		return self.network(input) if mode is None else self.network(input, mode)
65 | 		
66 | 
67 | if __name__ == '__main__':
68 | 	model_name = 'MSI-DIS'
69 | 	model = ModelFactory(model_name)
70 | 
71 | 	
72 | 


--------------------------------------------------------------------------------
/src/utils/target_process.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import h5py
 3 | import numpy as np
 4 | import random
 5 | 
 6 | 
 7 | class TargetProcessor(object):
 8 | 	'''target process'''
 9 | 
10 | 	def __init__(self, segment_seconds, frames_per_second, begin_note, classes_num):
11 | 		self.segment_seconds = segment_seconds
12 | 		self.frames_per_second = frames_per_second
13 | 		self.begin_note = begin_note
14 | 		self.classes_num = classes_num
15 | 		self.max_piano_note = self.classes_num - 1
16 | 
17 | 	def process(self, start_time, midi_events_time):
18 | 
19 | 		for midi_events_time_st, events in enumerate(midi_events_time):
20 | 			if events[1] >= start_time:
21 | 				break
22 | 
23 | 		frames_per_second = self.frames_per_second
24 | 		segment_seconds = self.segment_seconds
25 | 		begin_note = self.begin_note
26 | 		segment_frames = segment_seconds * frames_per_second
27 | 		classes_num = self.classes_num
28 | 		end_time = start_time + segment_seconds
29 | 		#mask_segments = []
30 | 		frame_roll = np.ones([int(segment_frames) + 1]) * classes_num
31 | 		onset_offset = np.zeros([int(segment_frames) + 1])
32 | 		#frame_roll_mask = np.ones([int(segment_frames) + 1]) * classes_num
33 | 		for i in range(midi_events_time_st, midi_events_time.shape[0]):
34 | 			st = midi_events_time[i][0]
35 | 			ed = midi_events_time[i][1]
36 | 			with_onset = True
37 | 			with_offset = True
38 | 
39 | 			assert ed >= st
40 | 
41 | 			if st > end_time:
42 | 				break
43 | 			if st < start_time:
44 | 				st = start_time
45 | 				with_onset = False
46 | 
47 | 			if ed > end_time:
48 | 				ed = end_time
49 | 				with_offset = False
50 | 
51 | 			
52 | 			note = int(midi_events_time[i][2])
53 | 
54 | 			st = int((st - start_time)* frames_per_second)
55 | 			ed = int((ed - start_time)* frames_per_second)
56 | 			if ed <= st:
57 | 				ed = st + 1
58 | 			frame_roll[st : ed] = np.clip(note, 0, classes_num - 1)
59 | 			duration = ed - st
60 | 			
61 | 			if with_onset:
62 | 				onset_offset[st] = 1
63 | 			if with_offset:
64 | 				onset_offset[ed - 1] = 2
65 | 
66 | 		target_dict = {}
67 | 		target_dict['frame_roll'] = frame_roll
68 | 		target_dict['onset_offset'] = onset_offset
69 | 		return target_dict
70 | 


--------------------------------------------------------------------------------
/evaluate.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | import torch
 3 | import torch.nn.functional as F
 4 | import os
 5 | import sys
 6 | import argparse
 7 | import numpy as np
 8 | import torch.nn as nn
 9 | 
10 | import src
11 | from utils.utilities import (save_json, compute_time, print_dict, mkdir)
12 | from inference.inference import Inference
13 | from inference.compute_measure import (evaluate_transcription, evaluate_separation)
14 | 
15 | if __name__=='__main__':
16 | 
17 | 	parser = argparse.ArgumentParser(description='')
18 | 	parser.add_argument('--model_name', type=str, required=True, help='Model name in \
19 | 																																					[`AMT` for trainscription-only baseline, \
20 | 																																						`MSS` for separation-only baseline, \
21 | 																																						`MSS-AMT` for multi-task baseline, \
22 | 																																						`MSI` for the proposed multi-task score-informed model, \
23 | 																																						`MSI-DIS` for the proposed multi-task score-informed with further disentanglement model].')
24 | 	parser.add_argument('--model_path', type=str, required=True, help='Model weights path.')
25 | 	parser.add_argument('--evaluation_folder', type=str, required=True, help='Directory to store evaluation results.')
26 | 	parser.add_argument('--epoch', type=str, required=True, help='Epoch.')
27 | 	parser.add_argument('--ps', type=int, required=True, help='Processes number.')
28 | 
29 | 	args = parser.parse_args()
30 | 
31 | 	model_name = args.model_name
32 | 	model_path = args.model_path
33 | 	output_dir = args.evaluation_folder
34 | 	epoch = args.epoch	
35 | 	processes_num = args.ps
36 | 
37 | 	evaluation_dir = f"{output_dir}/{model_name}"
38 | 
39 | 	inference = Inference(model_name, model_path, evaluation_dir, epoch)
40 | 	preds = inference.inference()
41 | 
42 | 	scores = {}
43 | 	for i, mode in enumerate(preds):
44 | 		scores[mode] = {}
45 | 		pred = preds[mode]
46 | 		if mode in ["AMT", "MSS-AMT", "MSI", "MSI-DIS"]:
47 | 			scores[mode]["transcription"] = evaluate_transcription(pred, processes_num=processes_num)
48 | 		if mode in ["MSS", "MSS-AMT", "MSI", "MSI-S", "MSI-MSI", "MSI-DIS", "MSI-DIS-S"]:
49 | 			scores[mode]["separation"] = evaluate_separation(pred, processes_num=processes_num)
50 | 	save_json(inference.score_path, scores)
51 | 
52 | 


--------------------------------------------------------------------------------
/src/analyze/utilities.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | import time
 4 | 
 5 | sys.path.insert(1, os.path.join(sys.path[0], '../..'))
 6 | 
 7 | import src
 8 | from utils.utilities import (load_json)
 9 | import numpy as np
10 | 
11 | 
12 | BOUND = 1
13 | 
14 | INSTRUMENTS = {"seen": "Violin,Cello,Viola,Flute,Clarinet,Saxophone,Trumpet,Trombone", 
15 | 						"unseen": "Horn,Tuba,Double_Bass,Bassoon,Oboe"}
16 | 
17 | skip_instrs = []
18 | 
19 | for instr in INSTRUMENTS:
20 | 	INSTRUMENTS[instr] = INSTRUMENTS[instr].split(',')
21 | 
22 | def seen(instr):
23 | 	for seen in INSTRUMENTS:
24 | 		if instr in INSTRUMENTS[seen]:
25 | 			return seen
26 | 
27 | def ave_val(x):
28 | 	return np.mean(x)
29 | 
30 | def compute_results(json_data):
31 | 	scores = {}
32 | 	for mode in json_data:
33 | 		scores[mode] = {}
34 | 		data_per_mode = json_data[mode]
35 | 		for sheet_name in data_per_mode:
36 | 			results = {"seen-seen": [], "seen-unseen" : [], "unseen-unseen" : [], "seen" : [], "unseen" : [], 'all' : []}
37 | 			sheet_data = data_per_mode[sheet_name]
38 | 			for row in sheet_data:
39 | 				pairs = []
40 | 				tags = []
41 | 				for j, instr in enumerate(row):
42 | 					if instr in skip_instrs:
43 | 						break
44 | 					if instr not in results:
45 | 						results[instr] = []
46 | 					results[instr].append(float(row[instr]))
47 | 					seen_tag = seen(instr)
48 | 					pairs.append(float(row[instr]))
49 | 					tags.append(seen_tag)
50 | 					results[seen_tag].append(float(row[instr]))
51 | 					results["all"].append(float(row[instr]))
52 | 				
53 | 				if len(tags) < 2:
54 | 					continue
55 | 				seen_tag = '-'.join(tags)
56 | 				seen_tag = "seen-unseen" if seen_tag == "unseen-seen" else seen_tag
57 | 		
58 | 				results[seen_tag] += pairs
59 | 
60 | 			for seen_tag in results:
61 | 				results[seen_tag] = ave_val(results[seen_tag])
62 | 
63 | 			scores[mode][sheet_name] = results
64 | 
65 | 	return scores
66 | 
67 | def get_json_data(score_path):
68 | 	#score_path = f"evaluation/demo/{model_name}/scores-{epoch}.json"
69 | 	json_data = load_json(score_path)
70 | 	return compute_results(json_data)
71 | 
72 | def get_results(scores):
73 | 	return [scores["seen"], scores["unseen"], scores["all"]]
74 | 
75 | 
76 | def example(model_name, sheet_name, epoch):
77 | 	score_path = f"evaluation/{model_name}/scores-{epoch}.json"
78 | 	json_data = load_json(score_path)
79 | 	scores = compute_results(json_data)
80 | 	print(scores[model_name][sheet_name]["seen"], scores[model_name][sheet_name]["unseen"], scores[model_name][sheet_name]["all"])
81 | 
82 | if __name__=="__main__":
83 | 	model_name = "MSS"
84 | 	sheet_name = "separation"
85 | 
86 | 	#model_name = "AMT"
87 | 	#sheet_name = "transcription"
88 | 
89 | 	for i in range(80):
90 | 		epoch = i
91 | 		example(model_name, sheet_name, epoch)
92 | 
93 | 
94 | 


--------------------------------------------------------------------------------
/src/analyze/draw_table.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import os
 3 | import sys
 4 | import argparse
 5 | 
 6 | sys.path.insert(1, os.path.join(sys.path[0], '../..'))
 7 | 
 8 | import src
 9 | from analyze.utilities import (get_json_data, get_results) 
10 | from utils.utilities import (mkdir)
11 | 
12 | skip = ["MSI-S", "MSI-DIS-S", "DMSI-S"]
13 | 
14 | 
15 | def get_data_with_last_10_epochs(folder, model_name):
16 | 	folder = os.path.join(folder, f"{model_name}")
17 | 	if not os.path.exists(folder):
18 | 		return {}
19 | 
20 | 	files = os.listdir(folder)
21 | 
22 | 	data = {"transcription" : {}, "separation" : {}}
23 | 	for sheet_name in data:
24 | 		data[sheet_name] = {"seen" : {}, "unseen" : {}, "all" : {}}
25 | 
26 | 	for f in files:
27 | 		if not str.startswith(f, "score"):
28 | 			continue
29 | 		
30 | 		epoch = int(f.split(".")[0].split("-")[1])
31 | 
32 | 		assert epoch >= 190 and epoch < 200
33 | 
34 | 		path = os.path.join(folder, f)
35 | 		json_data = get_json_data(path)
36 | 
37 | 		for mode in json_data:
38 | 			for sheet_name in json_data[mode]:
39 | 				sheet_data = json_data[mode][sheet_name]
40 | 				for i, tag in enumerate(data[sheet_name]):
41 | 					if mode not in data[sheet_name][tag]:
42 | 						data[sheet_name][tag][mode] = []
43 | 					data[sheet_name][tag][mode].append([epoch, sheet_data[tag]])
44 | 
45 | 	def cmp(item):
46 | 		return item[0]
47 |  
48 | 	for sheet_name in data:
49 | 		for tag in data[sheet_name]:
50 | 			for mode in data[sheet_name][tag]:
51 | 				data[sheet_name][tag][mode].sort(key=cmp)
52 | 
53 | 	return data
54 | 
55 | def draw_table(data):
56 | 
57 | 	for sheet_name in data:
58 | 		sheet_data = data[sheet_name]
59 | 		for i, tag in enumerate(sheet_data):
60 | 			tag_data = sheet_data[tag]
61 | 			for mode in tag_data:
62 | 				if mode in skip:
63 | 					continue
64 | 				results = [c[1] for c in tag_data[mode]]
65 | 				assert len(results) == 10
66 | 				mu = np.mean(results)
67 | 				pstd = np.sqrt(((results-mu) * (results-mu)).sum())
68 | 				interv = pstd * 1.96 / 10
69 | 				mu = round(mu, 2)
70 | 				interv = round(interv, 2)
71 | 				print(mode, f"&${mu}\pm{interv}$")
72 | 
73 | def get_data(evaluation_folder):
74 | 	model_names = ["MSS", "AMT", "MSS-AMT", "MSI", "MSI-DIS"]
75 | 	results = {}
76 | 	for model_name in model_names:
77 | 		data = get_data_with_last_10_epochs(evaluation_folder, model_name)
78 | 		for sheet_name in data:
79 | 			if sheet_name not in results:
80 | 				results[sheet_name] = {}
81 | 			for seen_tag in data[sheet_name]:
82 | 				if seen_tag not in results[sheet_name]:
83 | 					results[sheet_name][seen_tag] = {}
84 | 				for mode in data[sheet_name][seen_tag]:
85 | 					results[sheet_name][seen_tag][mode] = data[sheet_name][seen_tag][mode]
86 | 	return results
87 | 				
88 | 
89 | if __name__=="__main__":
90 | 	parser = argparse.ArgumentParser(description='')
91 | 	parser.add_argument('--evaluation_folder', type=str, required=True, help='Directory to store evaluation results.')
92 | 
93 | 	args = parser.parse_args()
94 | 
95 | 	evaluation_folder = args.evaluation_folder
96 | 	data = get_data(evaluation_folder)
97 | 	draw_table(data)
98 | 


--------------------------------------------------------------------------------
/src/dataset/urmp/urmp_test.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import librosa
  3 | import sys
  4 | import os
  5 | import h5py
  6 | 
  7 | from utils.utilities import (parse_frameroll2annotation, read_lst, read_config, write_lst, mkdir, int16_to_float32)
  8 | from conf.feature import *
  9 | from conf.inference import *
 10 | 
 11 | class UrmpTest(object):
 12 | 	def __init__(self):
 13 | 		file_lst = read_lst(TEST_DATA_LST_PATH)
 14 | 		query_lst = read_lst(TEST_QUERY_LST_PATH)
 15 | 		data_path = []
 16 | 		for i, f in enumerate(file_lst):
 17 | 			fs = f.split('\t')
 18 | 			qs = query_lst[i].split('\t')
 19 | 			instruments = fs[0].split(',')
 20 | 			files = fs[1].split(',')
 21 | 			query = qs[1].split(',')
 22 | 			sample_name = str.replace(files[0].split('_')[-2], '.' ,'')
 23 | 			sample = {}
 24 | 			for j, instr in enumerate(instruments):
 25 | 				sample[instr] = {}
 26 | 				sample[instr]['ref'] = files[j]
 27 | 				sample[instr]['query'] = query[j]
 28 | 			
 29 | 			data_path.append({'sample_name' : sample_name, 'instrs' : sample})
 30 | 
 31 | 		self.data_path = data_path
 32 | 
 33 | 
 34 | 	def vad(self, x, frame_roll, frames_per_second=FRAMES_PER_SEC, sample_rate=SAMPLE_RATE, notes_num=NOTES_NUM_EXCLUDE_SILENCE):
 35 | 
 36 | 		frames_per_sample = frames_per_second * 1. / sample_rate
 37 | 
 38 | 		if len(x.shape) == 2:
 39 | 			y = x[0]
 40 | 		else:
 41 | 			y = x
 42 | 		output = np.zeros_like(y)
 43 | 		frame_roll_len = int(y.shape[-1] / sample_rate * frames_per_second + 1)
 44 | 		frame_roll = frame_roll[ : frame_roll_len]
 45 | 		new_frame_roll = np.zeros_like(frame_roll) + notes_num
 46 | 		split_index = librosa.effects.split(y, top_db=18)
 47 | 		st = 0
 48 | 		ed = 0
 49 | 		for index in split_index:
 50 | 			ed = st + index[1] - index[0]
 51 | 			output[st : ed] = y[index[0] : index[1]]
 52 | 			offset = (index[1] - index[0]) * frames_per_sample
 53 | 			ori_st = int(st * frames_per_sample)
 54 | 			ori_ed = int(st * frames_per_sample + offset)
 55 | 			obj_st = int(index[0] * frames_per_sample)
 56 | 			obj_ed = int(index[0] * frames_per_sample + offset)
 57 | 			offset = (ori_ed - ori_st) if ori_ed - ori_st < ed - st else obj_ed - obj_st
 58 | 			new_frame_roll[ori_st : ori_st + offset] = frame_roll[obj_st : obj_st + offset]
 59 | 			st = ed
 60 | 		output = output[:ed]
 61 | 		new_frame_roll = new_frame_roll[: int(ed * frames_per_sample)]
 62 | 
 63 | 		if len(x.shape) == 2:
 64 | 			output = output[None, :]
 65 | 		return output, new_frame_roll
 66 | 
 67 | 
 68 | 	def test_samples(self):
 69 | 		for data in self.data_path:
 70 | 			sample_name = data['sample_name']
 71 | 			sample = data['instrs']
 72 | 			samples = []
 73 | 			mix = []
 74 | 			for instr in sample:
 75 | 				ref = sample[instr]['ref']
 76 | 				queries = sample[instr]['query'].split(' ')
 77 | 				query = []
 78 | 				tr_query = []
 79 | 				for q in queries:
 80 | 					with h5py.File(q, 'r') as hf:
 81 | 						waveform = int16_to_float32(hf['waveform'][:])[None, :]
 82 | 						frame_roll = hf['frame_roll'][:].astype(np.int)
 83 | 
 84 | 					waveform, frame_roll = self.vad(waveform, frame_roll)
 85 | 					query.append(waveform)
 86 | 					tr_query.append(parse_frameroll2annotation(frame_roll))
 87 | 
 88 | 				with h5py.File(ref, 'r') as hf:
 89 | 					wav_ref = int16_to_float32(hf['waveform'][:])[None, :]
 90 | 					tr_ref = hf['note_annotations_txt'][0].decode()
 91 | 					frame_roll = hf['frame_roll'][:].astype(np.int)
 92 | 
 93 | 				samples.append([instr, wav_ref, tr_ref, frame_roll, query, tr_query])
 94 | 
 95 | 			ref_len = samples[0][1].shape[-1]
 96 | 			for i, ref in enumerate(samples):
 97 | 				ref_len = ref[1].shape[-1] if ref_len > ref[1].shape[-1] else ref_len
 98 | 				
 99 | 			samples = [[s[0], s[1][:, :ref_len]] + s[2:] for s in samples]
100 | 			mix = [s[1] for s in samples]
101 | 			mix = np.stack(mix, 0)
102 | 			test_sample = {'mix' : mix, 'sample_name': sample_name, 'instrs' : samples}
103 | 			yield test_sample
104 | 		
105 | 


--------------------------------------------------------------------------------
/data/urmp/testset/query.lst:
--------------------------------------------------------------------------------
 1 | Violin,Saxophone	dataset/hdf5s/urmp/35_Rondeau_vn_vn_va_db/AuSep_2_vn_35_Rondeau.h5,dataset/hdf5s/urmp/30_Fugue_fl_fl_ob_sax/AuSep_4_sax_30_Fugue.h5
 2 | Violin,Double_Bass	dataset/hdf5s/urmp/35_Rondeau_vn_vn_va_db/AuSep_2_vn_35_Rondeau.h5,dataset/hdf5s/urmp/35_Rondeau_vn_vn_va_db/AuSep_4_db_35_Rondeau.h5
 3 | Cello,Viola	dataset/hdf5s/urmp/11_Maria_ob_vc/AuSep_2_vc_11_Maria.h5,dataset/hdf5s/urmp/32_Fugue_vn_vn_va_vc/AuSep_3_va_32_Fugue.h5
 4 | Violin,Double_Bass	dataset/hdf5s/urmp/39_Jerusalem_vn_vn_va_sax_db/AuSep_1_vn_39_Jerusalem.h5,dataset/hdf5s/urmp/39_Jerusalem_vn_vn_va_sax_db/AuSep_5_db_39_Jerusalem.h5
 5 | Violin,Cello	dataset/hdf5s/urmp/39_Jerusalem_vn_vn_va_sax_db/AuSep_1_vn_39_Jerusalem.h5,dataset/hdf5s/urmp/11_Maria_ob_vc/AuSep_2_vc_11_Maria.h5
 6 | Viola,Clarinet	dataset/hdf5s/urmp/39_Jerusalem_vn_vn_va_sax_db/AuSep_3_va_39_Jerusalem.h5,dataset/hdf5s/urmp/23_Rejouissance_cl_sax_tba/AuSep_1_cl_23_Rejouissance.h5
 7 | Flute,Violin	dataset/hdf5s/urmp/30_Fugue_fl_fl_ob_sax/AuSep_1_fl_30_Fugue.h5,dataset/hdf5s/urmp/39_Jerusalem_vn_vn_va_sax_db/AuSep_1_vn_39_Jerusalem.h5
 8 | Saxophone,Clarinet	dataset/hdf5s/urmp/41_Miserere_fl_fl_ob_sax_bn/AuSep_4_sax_41_Miserere.h5,dataset/hdf5s/urmp/40_Miserere_fl_fl_ob_cl_bn/AuSep_4_cl_40_Miserere.h5
 9 | Saxophone,Tuba	dataset/hdf5s/urmp/39_Jerusalem_vn_vn_va_sax_db/AuSep_4_sax_39_Jerusalem.h5,dataset/hdf5s/urmp/43_Chorale_tpt_tpt_hn_tbn_tba/AuSep_5_tba_43_Chorale.h5
10 | Trombone,Saxophone	dataset/hdf5s/urmp/43_Chorale_tpt_tpt_hn_tbn_tba/AuSep_4_tbn_43_Chorale.h5,dataset/hdf5s/urmp/39_Jerusalem_vn_vn_va_sax_db/AuSep_4_sax_39_Jerusalem.h5
11 | Oboe,Saxophone	dataset/hdf5s/urmp/11_Maria_ob_vc/AuSep_1_ob_11_Maria.h5,dataset/hdf5s/urmp/22_Rejouissance_sax_tbn_tba/AuSep_1_sax_22_Rejouissance.h5
12 | Oboe,Viola	dataset/hdf5s/urmp/11_Maria_ob_vc/AuSep_1_ob_11_Maria.h5,dataset/hdf5s/urmp/35_Rondeau_vn_vn_va_db/AuSep_3_va_35_Rondeau.h5
13 | Flute,Cello	dataset/hdf5s/urmp/37_Rondeau_fl_vn_va_cl/AuSep_1_fl_37_Rondeau.h5,dataset/hdf5s/urmp/36_Rondeau_vn_vn_va_vc/AuSep_4_vc_36_Rondeau.h5
14 | Flute,Trombone	dataset/hdf5s/urmp/40_Miserere_fl_fl_ob_cl_bn/AuSep_1_fl_40_Miserere.h5,dataset/hdf5s/urmp/43_Chorale_tpt_tpt_hn_tbn_tba/AuSep_4_tbn_43_Chorale.h5
15 | Clarinet,Horn	dataset/hdf5s/urmp/37_Rondeau_fl_vn_va_cl/AuSep_4_cl_37_Rondeau.h5,dataset/hdf5s/urmp/31_Slavonic_tpt_tpt_hn_tbn/AuSep_3_hn_31_Slavonic.h5
16 | Clarinet,Bassoon	dataset/hdf5s/urmp/40_Miserere_fl_fl_ob_cl_bn/AuSep_4_cl_40_Miserere.h5,dataset/hdf5s/urmp/40_Miserere_fl_fl_ob_cl_bn/AuSep_5_bn_40_Miserere.h5
17 | Trumpet,Violin	dataset/hdf5s/urmp/42_Arioso_tpt_tpt_hn_tbn_tba/AuSep_2_tpt_42_Arioso.h5,dataset/hdf5s/urmp/35_Rondeau_vn_vn_va_db/AuSep_2_vn_35_Rondeau.h5
18 | Trumpet,Violin	dataset/hdf5s/urmp/43_Chorale_tpt_tpt_hn_tbn_tba/AuSep_1_tpt_43_Chorale.h5,dataset/hdf5s/urmp/35_Rondeau_vn_vn_va_db/AuSep_2_vn_35_Rondeau.h5
19 | Flute,Clarinet	dataset/hdf5s/urmp/37_Rondeau_fl_vn_va_cl/AuSep_1_fl_37_Rondeau.h5,dataset/hdf5s/urmp/29_Fugue_fl_fl_ob_cl/AuSep_4_cl_29_Fugue.h5
20 | Flute,Saxophone	dataset/hdf5s/urmp/37_Rondeau_fl_vn_va_cl/AuSep_1_fl_37_Rondeau.h5,dataset/hdf5s/urmp/30_Fugue_fl_fl_ob_sax/AuSep_4_sax_30_Fugue.h5
21 | Bassoon,Oboe	dataset/hdf5s/urmp/28_Fugue_fl_ob_cl_bn/AuSep_4_bn_28_Fugue.h5,dataset/hdf5s/urmp/30_Fugue_fl_fl_ob_sax/AuSep_3_ob_30_Fugue.h5
22 | Trumpet,Trombone	dataset/hdf5s/urmp/42_Arioso_tpt_tpt_hn_tbn_tba/AuSep_2_tpt_42_Arioso.h5,dataset/hdf5s/urmp/33_Elise_tpt_tpt_hn_tbn/AuSep_4_tbn_33_Elise.h5
23 | Trumpet,Horn	dataset/hdf5s/urmp/31_Slavonic_tpt_tpt_hn_tbn/AuSep_2_tpt_31_Slavonic.h5,dataset/hdf5s/urmp/34_Fugue_tpt_tpt_hn_tbn/AuSep_3_hn_34_Fugue.h5
24 | Tuba,Trumpet	dataset/hdf5s/urmp/42_Arioso_tpt_tpt_hn_tbn_tba/AuSep_5_tba_42_Arioso.h5,dataset/hdf5s/urmp/42_Arioso_tpt_tpt_hn_tbn_tba/AuSep_2_tpt_42_Arioso.h5
25 | Trumpet,Trombone	dataset/hdf5s/urmp/43_Chorale_tpt_tpt_hn_tbn_tba/AuSep_1_tpt_43_Chorale.h5,dataset/hdf5s/urmp/21_Rejouissance_cl_tbn_tba/AuSep_2_tbn_21_Rejouissance.h5
26 | Trumpet,Horn	dataset/hdf5s/urmp/31_Slavonic_tpt_tpt_hn_tbn/AuSep_2_tpt_31_Slavonic.h5,dataset/hdf5s/urmp/42_Arioso_tpt_tpt_hn_tbn_tba/AuSep_3_hn_42_Arioso.h5
27 | Cello,Oboe	dataset/hdf5s/urmp/36_Rondeau_vn_vn_va_vc/AuSep_4_vc_36_Rondeau.h5,dataset/hdf5s/urmp/30_Fugue_fl_fl_ob_sax/AuSep_3_ob_30_Fugue.h5
28 | Trumpet,Tuba	dataset/hdf5s/urmp/33_Elise_tpt_tpt_hn_tbn/AuSep_2_tpt_33_Elise.h5,dataset/hdf5s/urmp/43_Chorale_tpt_tpt_hn_tbn_tba/AuSep_5_tba_43_Chorale.h5
29 | Trumpet,Trombone	dataset/hdf5s/urmp/34_Fugue_tpt_tpt_hn_tbn/AuSep_1_tpt_34_Fugue.h5,dataset/hdf5s/urmp/33_Elise_tpt_tpt_hn_tbn/AuSep_4_tbn_33_Elise.h5
30 | Horn,Trumpet	dataset/hdf5s/urmp/33_Elise_tpt_tpt_hn_tbn/AuSep_3_hn_33_Elise.h5,dataset/hdf5s/urmp/43_Chorale_tpt_tpt_hn_tbn_tba/AuSep_1_tpt_43_Chorale.h5
31 | Trumpet,Horn	dataset/hdf5s/urmp/42_Arioso_tpt_tpt_hn_tbn_tba/AuSep_2_tpt_42_Arioso.h5,dataset/hdf5s/urmp/33_Elise_tpt_tpt_hn_tbn/AuSep_3_hn_33_Elise.h5
32 | Trumpet,Trombone	dataset/hdf5s/urmp/33_Elise_tpt_tpt_hn_tbn/AuSep_2_tpt_33_Elise.h5,dataset/hdf5s/urmp/42_Arioso_tpt_tpt_hn_tbn_tba/AuSep_4_tbn_42_Arioso.h5


--------------------------------------------------------------------------------
/data/urmp/testset/test.lst:
--------------------------------------------------------------------------------
 1 | Violin,Saxophone	dataset/hdf5s/urmp/39_Jerusalem_vn_vn_va_sax_db/AuSep_1_vn_39_Jerusalem.h5,dataset/hdf5s/urmp/39_Jerusalem_vn_vn_va_sax_db/AuSep_4_sax_39_Jerusalem.h5
 2 | Violin,Double_Bass	dataset/hdf5s/urmp/39_Jerusalem_vn_vn_va_sax_db/AuSep_2_vn_39_Jerusalem.h5,dataset/hdf5s/urmp/39_Jerusalem_vn_vn_va_sax_db/AuSep_5_db_39_Jerusalem.h5
 3 | Cello,Viola	dataset/hdf5s/urmp/38_Jerusalem_vn_vn_va_vc_db/AuSep_4_vc_38_Jerusalem.h5,dataset/hdf5s/urmp/39_Jerusalem_vn_vn_va_sax_db/AuSep_3_va_39_Jerusalem.h5
 4 | Violin,Double_Bass	dataset/hdf5s/urmp/35_Rondeau_vn_vn_va_db/AuSep_2_vn_35_Rondeau.h5,dataset/hdf5s/urmp/35_Rondeau_vn_vn_va_db/AuSep_4_db_35_Rondeau.h5
 5 | Violin,Cello	dataset/hdf5s/urmp/35_Rondeau_vn_vn_va_db/AuSep_1_vn_35_Rondeau.h5,dataset/hdf5s/urmp/36_Rondeau_vn_vn_va_vc/AuSep_4_vc_36_Rondeau.h5
 6 | Viola,Clarinet	dataset/hdf5s/urmp/35_Rondeau_vn_vn_va_db/AuSep_3_va_35_Rondeau.h5,dataset/hdf5s/urmp/37_Rondeau_fl_vn_va_cl/AuSep_4_cl_37_Rondeau.h5
 7 | Flute,Violin	dataset/hdf5s/urmp/37_Rondeau_fl_vn_va_cl/AuSep_1_fl_37_Rondeau.h5,dataset/hdf5s/urmp/35_Rondeau_vn_vn_va_db/AuSep_2_vn_35_Rondeau.h5
 8 | Saxophone,Clarinet	dataset/hdf5s/urmp/22_Rejouissance_sax_tbn_tba/AuSep_1_sax_22_Rejouissance.h5,dataset/hdf5s/urmp/23_Rejouissance_cl_sax_tba/AuSep_1_cl_23_Rejouissance.h5
 9 | Saxophone,Tuba	dataset/hdf5s/urmp/23_Rejouissance_cl_sax_tba/AuSep_2_sax_23_Rejouissance.h5,dataset/hdf5s/urmp/23_Rejouissance_cl_sax_tba/AuSep_3_tba_23_Rejouissance.h5
10 | Trombone,Saxophone	dataset/hdf5s/urmp/21_Rejouissance_cl_tbn_tba/AuSep_2_tbn_21_Rejouissance.h5,dataset/hdf5s/urmp/22_Rejouissance_sax_tbn_tba/AuSep_1_sax_22_Rejouissance.h5
11 | Oboe,Saxophone	dataset/hdf5s/urmp/30_Fugue_fl_fl_ob_sax/AuSep_3_ob_30_Fugue.h5,dataset/hdf5s/urmp/30_Fugue_fl_fl_ob_sax/AuSep_4_sax_30_Fugue.h5
12 | Oboe,Viola	dataset/hdf5s/urmp/28_Fugue_fl_ob_cl_bn/AuSep_2_ob_28_Fugue.h5,dataset/hdf5s/urmp/32_Fugue_vn_vn_va_vc/AuSep_3_va_32_Fugue.h5
13 | Flute,Cello	dataset/hdf5s/urmp/30_Fugue_fl_fl_ob_sax/AuSep_1_fl_30_Fugue.h5,dataset/hdf5s/urmp/32_Fugue_vn_vn_va_vc/AuSep_4_vc_32_Fugue.h5
14 | Flute,Trombone	dataset/hdf5s/urmp/30_Fugue_fl_fl_ob_sax/AuSep_2_fl_30_Fugue.h5,dataset/hdf5s/urmp/34_Fugue_tpt_tpt_hn_tbn/AuSep_4_tbn_34_Fugue.h5
15 | Clarinet,Horn	dataset/hdf5s/urmp/29_Fugue_fl_fl_ob_cl/AuSep_4_cl_29_Fugue.h5,dataset/hdf5s/urmp/34_Fugue_tpt_tpt_hn_tbn/AuSep_3_hn_34_Fugue.h5
16 | Clarinet,Bassoon	dataset/hdf5s/urmp/28_Fugue_fl_ob_cl_bn/AuSep_3_cl_28_Fugue.h5,dataset/hdf5s/urmp/28_Fugue_fl_ob_cl_bn/AuSep_4_bn_28_Fugue.h5
17 | Trumpet,Violin	dataset/hdf5s/urmp/34_Fugue_tpt_tpt_hn_tbn/AuSep_1_tpt_34_Fugue.h5,dataset/hdf5s/urmp/32_Fugue_vn_vn_va_vc/AuSep_1_vn_32_Fugue.h5
18 | Trumpet,Violin	dataset/hdf5s/urmp/34_Fugue_tpt_tpt_hn_tbn/AuSep_2_tpt_34_Fugue.h5,dataset/hdf5s/urmp/32_Fugue_vn_vn_va_vc/AuSep_2_vn_32_Fugue.h5
19 | Flute,Clarinet	dataset/hdf5s/urmp/40_Miserere_fl_fl_ob_cl_bn/AuSep_1_fl_40_Miserere.h5,dataset/hdf5s/urmp/40_Miserere_fl_fl_ob_cl_bn/AuSep_4_cl_40_Miserere.h5
20 | Flute,Saxophone	dataset/hdf5s/urmp/40_Miserere_fl_fl_ob_cl_bn/AuSep_2_fl_40_Miserere.h5,dataset/hdf5s/urmp/41_Miserere_fl_fl_ob_sax_bn/AuSep_4_sax_41_Miserere.h5
21 | Bassoon,Oboe	dataset/hdf5s/urmp/40_Miserere_fl_fl_ob_cl_bn/AuSep_5_bn_40_Miserere.h5,dataset/hdf5s/urmp/40_Miserere_fl_fl_ob_cl_bn/AuSep_3_ob_40_Miserere.h5
22 | Trumpet,Trombone	dataset/hdf5s/urmp/43_Chorale_tpt_tpt_hn_tbn_tba/AuSep_1_tpt_43_Chorale.h5,dataset/hdf5s/urmp/43_Chorale_tpt_tpt_hn_tbn_tba/AuSep_4_tbn_43_Chorale.h5
23 | Trumpet,Horn	dataset/hdf5s/urmp/43_Chorale_tpt_tpt_hn_tbn_tba/AuSep_2_tpt_43_Chorale.h5,dataset/hdf5s/urmp/43_Chorale_tpt_tpt_hn_tbn_tba/AuSep_3_hn_43_Chorale.h5
24 | Tuba,Trumpet	dataset/hdf5s/urmp/43_Chorale_tpt_tpt_hn_tbn_tba/AuSep_5_tba_43_Chorale.h5,dataset/hdf5s/urmp/43_Chorale_tpt_tpt_hn_tbn_tba/AuSep_1_tpt_43_Chorale.h5
25 | Trumpet,Trombone	dataset/hdf5s/urmp/33_Elise_tpt_tpt_hn_tbn/AuSep_2_tpt_33_Elise.h5,dataset/hdf5s/urmp/33_Elise_tpt_tpt_hn_tbn/AuSep_4_tbn_33_Elise.h5
26 | Trumpet,Horn	dataset/hdf5s/urmp/33_Elise_tpt_tpt_hn_tbn/AuSep_1_tpt_33_Elise.h5,dataset/hdf5s/urmp/33_Elise_tpt_tpt_hn_tbn/AuSep_3_hn_33_Elise.h5
27 | Cello,Oboe	dataset/hdf5s/urmp/11_Maria_ob_vc/AuSep_2_vc_11_Maria.h5,dataset/hdf5s/urmp/11_Maria_ob_vc/AuSep_1_ob_11_Maria.h5
28 | Trumpet,Tuba	dataset/hdf5s/urmp/42_Arioso_tpt_tpt_hn_tbn_tba/AuSep_2_tpt_42_Arioso.h5,dataset/hdf5s/urmp/42_Arioso_tpt_tpt_hn_tbn_tba/AuSep_5_tba_42_Arioso.h5
29 | Trumpet,Trombone	dataset/hdf5s/urmp/42_Arioso_tpt_tpt_hn_tbn_tba/AuSep_1_tpt_42_Arioso.h5,dataset/hdf5s/urmp/42_Arioso_tpt_tpt_hn_tbn_tba/AuSep_4_tbn_42_Arioso.h5
30 | Horn,Trumpet	dataset/hdf5s/urmp/42_Arioso_tpt_tpt_hn_tbn_tba/AuSep_3_hn_42_Arioso.h5,dataset/hdf5s/urmp/42_Arioso_tpt_tpt_hn_tbn_tba/AuSep_2_tpt_42_Arioso.h5
31 | Trumpet,Horn	dataset/hdf5s/urmp/31_Slavonic_tpt_tpt_hn_tbn/AuSep_2_tpt_31_Slavonic.h5,dataset/hdf5s/urmp/31_Slavonic_tpt_tpt_hn_tbn/AuSep_3_hn_31_Slavonic.h5
32 | Trumpet,Trombone	dataset/hdf5s/urmp/31_Slavonic_tpt_tpt_hn_tbn/AuSep_1_tpt_31_Slavonic.h5,dataset/hdf5s/urmp/31_Slavonic_tpt_tpt_hn_tbn/AuSep_4_tbn_31_Slavonic.h5


--------------------------------------------------------------------------------
/src/utils/weiMidi.py:
--------------------------------------------------------------------------------
  1 | from mido import MidiFile
  2 | import numpy as np
  3 | 
  4 | MAX_TICKS = 1000007
  5 | FRAMES_PER_SECOND = 100
  6 | NOTES_NUM = 88
  7 | BEGIN_NOTE = 21
  8 | GRAIN_SEC = 0.03
  9 | GRAIN_FRAME = FRAMES_PER_SECOND * GRAIN_SEC
 10 | 
 11 |  
 12 | def devide(msg):
 13 | 	return str(msg).split(' ')
 14 | 
 15 | def calculate_second(tempo, ticks_per_beat, onset_ticks, ticks):
 16 | 	second = 0
 17 | 	for i in range(ticks):
 18 | 		microseconds_per_beat = tempo[onset_ticks + ticks]
 19 | 		beats_per_second = 1e6 / microseconds_per_beat
 20 | 		ticks_per_second = ticks_per_beat * beats_per_second
 21 | 		second += 1. / ticks_per_second
 22 | 	return second
 23 | 
 24 | def read_midi(midi_path):
 25 | 
 26 | 	midi_file = MidiFile(midi_path)
 27 | 	ticks_per_beat = midi_file.ticks_per_beat
 28 | 
 29 | 	#meta = {"key_signature": None, "tempo": [0, 0] }
 30 | 
 31 | 	check = {}
 32 | 	cur = 0
 33 | 	pre_tempo = -1
 34 | 	tempo_record = np.zeros([MAX_TICKS])
 35 | 
 36 | 	for msg in midi_file.tracks[0]:
 37 | 		detailed_msg = devide(msg)
 38 | 		#if msg.type == "key_signature":
 39 | 		#	meta["key_signature"] = msg.key
 40 | 		if msg.type == "set_tempo":
 41 | 			tempo_record[cur : cur + msg.time] = pre_tempo
 42 | 			pre_tempo = msg.tempo
 43 | 			cur += msg.time
 44 | 			
 45 | 	tempo_record[cur:] = pre_tempo
 46 | 
 47 | 	tracks = []
 48 | 
 49 | 	ticks = 0
 50 | 	time_in_seconds = []
 51 | 
 52 | 	for i, tr in enumerate(midi_file.tracks[1:]):
 53 | 		track = []
 54 | 		seconds = []
 55 | 		second = 0.
 56 | 		ticks = 0
 57 | 		for msg in tr:
 58 | 			track.append(str(msg))
 59 | 			second += calculate_second(tempo_record, ticks_per_beat, ticks, msg.time)
 60 | 			ticks += msg.time
 61 | 			seconds.append(second)
 62 | 
 63 | 		tracks.append(track)
 64 | 		time_in_seconds.append(seconds)
 65 | 
 66 | 	midiTracks = []
 67 | 	for i, tr in enumerate(tracks):
 68 | 		midiTrack = WeiMidiTrack(tr, time_in_seconds[i])
 69 | 		midiTracks.append(midiTrack)
 70 | 
 71 | 	return midiTracks
 72 | 
 73 | def frame(second):
 74 | 	return int(second * FRAMES_PER_SECOND)
 75 | 
 76 | def c2note(msg):
 77 | 	return int(msg.split("=")[-1])
 78 | 
 79 | def c2velocity(msg):
 80 | 	return int(msg.split("=")[-1])
 81 | 
 82 | def convert2frameRoll(tracks, seconds):
 83 | 	onset_note = -1
 84 | 	onset = 0
 85 | 	frameRoll = np.zeros([NOTES_NUM + 1, frame(seconds[-1]) + 1])
 86 | 	frameRoll_pairs = []
 87 | 	buffer_notes = {}
 88 | 	for i, tr in enumerate(tracks):
 89 | 		detailed_tr = devide(tr)
 90 | 		tag = detailed_tr[0]
 91 | 		if tag not in ["note_on", "note_off"]:
 92 | 			continue
 93 | 		velocity = c2velocity(detailed_tr[3])
 94 | 		current_frame = frame(seconds[i])
 95 | 		note = c2note(detailed_tr[2])
 96 | 		if note >= NOTES_NUM or note < 0:
 97 | 			continue
 98 | 
 99 | 		if tag == "note_on" and velocity > 0:
100 | 			buffer_notes[note] = current_frame
101 | 
102 | 		elif note in buffer_notes and buffer_notes[note] > 0:
103 | 			onset = buffer_notes[note]
104 | 			frameRoll[note - BEGIN_NOTE, onset : current_frame] = 1
105 | 			frameRoll_pairs.append([note - BEGIN_NOTE, onset, current_frame])
106 | 			buffer_notes[note] = -1
107 | 			#onset = current_frame
108 | 			#onset_note = c2note(detailed_tr[2]) - BEGIN_NOTE if tag == "note_on" else -1
109 | 
110 | 	#if onset_note > 0:
111 | 	#	frameRoll[onset_note, onset:] = 1
112 | 
113 | 	for i in range(frameRoll.shape[-1]):
114 | 		if frameRoll[:, i].sum() < 1:
115 | 			frameRoll[NOTES_NUM, i] =1
116 | 
117 | 	return frameRoll, frameRoll_pairs
118 | 
119 | def checkMono(frameRoll):
120 | 	cnt = 0
121 | 	for i in range(frameRoll.shape[-1]):
122 | 		if frameRoll[:, i].sum() > 1:
123 | 			cnt += 1
124 | 		else:
125 | 			cnt = 0
126 | 		if cnt > GRAIN_FRAME:
127 | 			return False
128 | 	return True
129 | 
130 | 
131 | class WeiMidiTrack(object):
132 | 	def __init__(self, midi_events, seconds):
133 | 		self.frameRoll, self.frameRollPair = convert2frameRoll(midi_events, seconds)
134 | 		self.isMono = checkMono(self.frameRoll)
135 | 		#print(self.isMono)
136 | 
137 | 	def monoFrameRoll(self):
138 | 		assert self.isMono
139 | 		frameRoll = np.argmax(self.frameRoll, 0)
140 | 		return frameRoll
141 | 
142 | class WeiMidi(object):
143 | 	def __init__(self, path):
144 | 		self.midi_path = path
145 | 		self.midi_tracks = read_midi(path)
146 | 		self.maxSec = self.get_maxSec()
147 | 
148 | 	def get_maxSec(self):
149 | 		maxSec = 0
150 | 		for i in range(self.tracks_num()):
151 | 			if len(self.frameRoll_pair(i)) > 0:
152 | 				if len(self.frameRoll_pair(i)[-1]) > 0:
153 | 					sec = self.frameRoll_pair(i)[-1][-1]
154 | 					maxSec = maxSec if maxSec > sec else sec
155 | 		return maxSec
156 | 
157 | 	def is_mono(self, n):
158 | 		return self.midi_tracks[n].isMono
159 | 
160 | 	def tracks_num(self):
161 | 		return len(self.midi_tracks)
162 | 
163 | 	def frameRoll_pair(self, n):
164 | 		return self.midi_tracks[n].frameRollPair
165 | 
166 | 	def __getitem__(self, n):
167 | 		assert isinstance(n, int)
168 | 		return self.midi_tracks[n].monoFrameRoll()
169 | 
170 | def test():
171 | 	path = 'data/midi/20210409033250183-9212.mid'
172 | 	song = WeiMidi(path)
173 | 	#for i in range(song.tracks_num()):
174 | 		#a = song[i]
175 | 		#if a.shape[0] * 88 == a.sum():
176 | 
177 | if __name__ == '__main__':
178 | 	test()
179 | 


--------------------------------------------------------------------------------
/src/inference/compute_measure.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | import shutil
  4 | import librosa
  5 | import mir_eval
  6 | import numpy as np
  7 | from sklearn import metrics
  8 | import multiprocessing
  9 | from multiprocessing import Manager
 10 | 
 11 | from utils.utilities import (get_process_groups, read_lst, read_config, write_lst, mkdir, int16_to_float32)
 12 | from conf.feature import *
 13 | 
 14 | 
 15 | def load_audio_pair(est_path, ref_path, sample_rate=SAMPLE_RATE):
 16 | 	max_len = -1
 17 | 	ests = []
 18 | 	for path in est_path:
 19 | 		est, _ = librosa.load(path, sr=sample_rate, mono=True)
 20 | 		ests.append(est)
 21 | 		if est.shape[-1] > max_len:
 22 | 			max_len = est.shape[-1]
 23 | 
 24 | 	refs = []
 25 | 	for path in ref_path:
 26 | 		ref, _ = librosa.load(path, sr=sample_rate, mono=True)
 27 | 		refs.append(ref)
 28 | 
 29 | 	ref = np.zeros([len(refs),	max_len])
 30 | 	for i in range(len(refs)):
 31 | 		ref[i, : refs[i].shape[-1]] = refs[i]
 32 | 
 33 | 	est = np.zeros([len(refs),	max_len])
 34 | 	for i in range(len(refs)):
 35 | 		est[i, : ests[i].shape[-1]] = ests[i]
 36 | 	return est, ref
 37 | 
 38 | def frame_roll_from_path(path, max_frame=-1, frames_per_second=100, notes_num=88):
 39 | 	segments = read_lst(path)
 40 | 	segments = [seg.rstrip().split('\t') for seg in segments]	
 41 | 	if max_frame == -1:
 42 | 		max_frame = int(float(segments[-1][1]) * frames_per_second + 1)
 43 | 	frame_roll = np.zeros([max_frame, notes_num + 1])
 44 | 	frame_roll[:, notes_num] = 1
 45 | 	for seg in segments:
 46 | 		st = int(float(seg[0]) * frames_per_second)
 47 | 		ed = int(float(seg[1]) * frames_per_second + 1)
 48 | 		if st >= max_frame:
 49 | 			break
 50 | 		if ed > max_frame:
 51 | 			ed = max_frame
 52 | 		frame_roll[st : ed, int(float(seg[2]))] = 1
 53 | 		frame_roll[st : ed, notes_num] = 0
 54 | 		if ed == max_frame:
 55 | 			break
 56 | 	return frame_roll, max_frame
 57 | 	
 58 | 
 59 | def measure_for_transcription(est_path, ref_path, mode='frame'):
 60 | 	if mode == "onset":
 61 | 		est_intervals, est_pitches = mir_eval.io.load_valued_intervals(est_path)
 62 | 		ref_intervals, ref_pitches = mir_eval.io.load_valued_intervals(ref_path)
 63 | 		precision, recall, f_measure, _ = mir_eval.transcription.precision_recall_f1_overlap(
 64 | 			ref_intervals, ref_pitches, est_intervals, est_pitches)
 65 | 	else:
 66 | 		ref_frame_roll, max_frame = frame_roll_from_path(ref_path)
 67 | 		est_frame_roll, _ = frame_roll_from_path(est_path, max_frame)
 68 | 		pre = metrics.average_precision_score(ref_frame_roll, est_frame_roll, average='micro')
 69 | 		precision = recall = f_measure = pre
 70 | 
 71 | 	return precision, recall, f_measure
 72 | 
 73 | 
 74 | def measure_for_separation(est_path, ref_path, sample_rate=SAMPLE_RATE):
 75 | 
 76 | 	if type(est_path) is str:
 77 | 		est, ref = load_audio_pair(est_path, ref_path, sample_rate)	
 78 | 	else:
 79 | 		est = est_path
 80 | 		ref = ref_path
 81 | 	(sdr, sir, sar, perm) = mir_eval.separation.bss_eval_sources(ref, est, compute_permutation=True)
 82 | 
 83 | 	return sdr, sir, sar
 84 | 
 85 | 
 86 | def evaluate_transcription(samples, processes_num=1):
 87 | 	return multi_process_evaluation(samples, processes_num, "transcription")
 88 | 
 89 | def evaluate_separation(samples, processes_num=1):
 90 | 	return multi_process_evaluation(samples, processes_num, "separation")
 91 | 
 92 | def multi_process_evaluation(samples, processes_num=1, mode="separation"):
 93 | 	
 94 | 	def process_unit(n):
 95 | 		sample = samples[n]
 96 | 		sample_score = {}
 97 | 		for instr in sample:
 98 | 			pairs = sample[instr][mode]
 99 | 			for pair in pairs:
100 | 				if mode == "separation":
101 | 					est, _ = librosa.load(pair[0], sr=SAMPLE_RATE, mono=True)
102 | 					ref, _ = librosa.load(pair[1], sr=SAMPLE_RATE, mono=True)
103 | 					sdr, sir, sar = measure_for_separation(est, ref)
104 | 					sample_score[instr] = sdr[0]
105 | 				else:
106 | 					f1, pre, recall = measure_for_transcription(pair[0], pair[1])
107 | 					sample_score[instr] = f1
108 | 		return sample_score
109 | 
110 | 	def process_group(sample_scores, st, ed, total_num, pid):
111 | 		print(f"process {pid + 1} starts")
112 | 		for n in range(st, ed):
113 | 			sample_score = process_unit(n)
114 | 			print(f"process {pid + 1} : {n + 1}/{total_num} done.")
115 | 			sample_scores[n] = sample_score
116 | 		print(f"process {pid + 1} ends")
117 | 		return sample_scores
118 | 
119 | 	samples_num = len(samples)
120 | 	sample_scores = range(len(samples))
121 | 	if processes_num < 2:
122 | 		sample_scores = list(sample_scores)
123 | 		sample_scores = process_group(sample_scores, 0, samples_num, samples_num, 0)
124 | 
125 | 	else:
126 | 		with Manager() as manager:
127 | 			return_list = manager.list(sample_scores)
128 | 			groups = get_process_groups(samples_num, processes_num)
129 | 			ps = []
130 | 			for pid, (st, ed) in enumerate(groups):
131 | 				p = multiprocessing.Process(target = process_group, args = (return_list, st, ed, samples_num, pid))
132 | 				p.start()
133 | 				ps.append(p)
134 | 			for p in ps:
135 | 				p.join()
136 | 			sample_scores = list(return_list)
137 | 	return sample_scores
138 | 
139 | 
140 | 
141 | if __name__=='__main__':
142 | 	est_path = ['evaluation/separation/test/AuSep_1_vn_27_King_est_1_.wav', 'evaluation/separation/test/AuSep_2_fl_30_Fugue_est_1_.wav']
143 | 	ref_path = ['evaluation/separation/test/AuSep_1_vn_27_King_ref_1_.wav', 'evaluation/separation/test/AuSep_2_fl_30_Fugue_ref_1_.wav']
144 | 	sdr, sir, sar = separation_evaluation(est_path, ref_path)
145 | 	print(sdr)
146 | 
147 | 


--------------------------------------------------------------------------------
/src/inference/utilities.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import torch
  3 | import torch.nn as nn
  4 | import torch.nn.functional as F
  5 | import torchaudio
  6 | import torchaudio.functional as AF
  7 | 
  8 | import librosa
  9 | import math
 10 | 
 11 | import sys
 12 | import numpy as np
 13 | import time
 14 | 
 15 | 
 16 | from conf.feature import *
 17 | from conf.inference import *
 18 | 
 19 | def align(a, b, dim):
 20 | 	return a.transpose(0, dim)[:b.shape[dim]].transpose(0, dim)
 21 | 
 22 | def get_fft_window():
 23 | 	fft_window = librosa.filters.get_window(WINDOW, WINDOW_SIZE, fftbins=True)
 24 | 	fft_window = librosa.util.pad_center(fft_window, N_FFT)
 25 | 	return torch.from_numpy(fft_window)
 26 | 
 27 | FFT_WINDOW = get_fft_window()
 28 | 
 29 | 
 30 | def onehot_tensor(x, dim=0, classes_num=NOTES_NUM):
 31 | 	x = x.unsqueeze(dim)
 32 | 	shape = list(x.shape)
 33 | 	shape[dim] = classes_num
 34 | 	y = torch.zeros(shape).to(x.device).scatter_(dim, x, 1)
 35 | 	return y
 36 | 
 37 | 
 38 | def spec2wav(x, cos, sin, wav_len, syn_phase=0, device="cuda"):
 39 | 
 40 | #'''
 41 | #	args : channels * frames * n_fft
 42 | #'''
 43 | 
 44 | 	x = F.pad(x, (0, 1), "constant", 0)
 45 | 	fft_window = FFT_WINDOW.cuda() if device == "cuda" else FFT_WINDOW
 46 | 
 47 | 	if syn_phase == 1:
 48 | 		print("here")
 49 | 		wav_len = int((x.shape[-2] - 1)/ FRAMES_PER_SEC * SAMPLE_RATE)
 50 | 		wav = AF.griffinlim(x.transpose(1, 2), 
 51 | 												window=fft_window, 
 52 | 												n_fft=N_FFT, 
 53 | 												hop_length=HOP_SIZE, 
 54 | 												win_length=WINDOW_SIZE, 
 55 | 												power=1,
 56 | 												normalized=False, 
 57 | 												length=wav_len, 
 58 | 												n_iter=N_ITER, 
 59 | 												momentum=0, 
 60 | 												rand_init=False)
 61 | 	elif syn_phase == 2:
 62 | 		itersNum = 100
 63 | 		for i in range(itersNum):
 64 | 			spec = torch.stack([x * cos, x * sin], -1).transpose(1, 2)	
 65 | 			wav = torch.istft(spec,
 66 | 											n_fft=N_FFT,
 67 | 											hop_length=HOP_SIZE,
 68 | 											win_length=WINDOW_SIZE,
 69 | 											window=fft_window,
 70 | 											center=True,
 71 | 											normalized=False,
 72 | 											onesided=None,
 73 | 											length=wav_len,
 74 | 											return_complex=False)
 75 | 			if i < itersNum - 1:
 76 | 				_, cos, sin = wav2spec(wav)
 77 | 
 78 | 
 79 | 	
 80 | 	elif syn_phase == 0:
 81 | 		spec = torch.stack([x * cos, x * sin], -1).transpose(1, 2)
 82 | 		wav = torch.istft(spec, 
 83 | 											n_fft=N_FFT, 
 84 | 											hop_length=HOP_SIZE, 
 85 | 											win_length=WINDOW_SIZE,
 86 | 											window=fft_window, 
 87 | 											center=True, 
 88 | 											normalized=False, 
 89 | 											onesided=None, 
 90 | 											length=wav_len, 
 91 | 											return_complex=False)
 92 | 	return wav
 93 | 
 94 | def wav2spec(x, device="cuda"):
 95 | 	'''
 96 | 			return channel * frames * n_fft
 97 | 	'''
 98 | 
 99 | 	fft_window = FFT_WINDOW.cuda() if device == "cuda" else FFT_WINDOW
100 | 
101 | 	spec = torch.stft(x, 
102 | 										N_FFT,
103 | 										hop_length=HOP_SIZE,
104 | 										win_length=WINDOW_SIZE,
105 | 										window=fft_window,
106 | 										center=True, 
107 | 										pad_mode='reflect', 
108 | 										normalized=False,
109 | 										onesided=None,
110 | 										return_complex=False)
111 | 	spec = spec.transpose(1, 2)
112 | 	real = spec[:, :, :, 0]
113 | 	imag = spec[:, :, :, 1]
114 | 	mag = (real ** 2 + imag ** 2) ** 0.5
115 | 	cos = real / torch.clamp(mag, 1e-10, np.inf)
116 | 	sin = imag / torch.clamp(mag, 1e-10, np.inf)
117 | 	return mag[:, :, :-1], cos, sin
118 | 
119 | def save_audio(wav, path):
120 | 	torchaudio.save(path, wav.float().cpu(), SAMPLE_RATE)
121 | 	
122 | 
123 | def devide_into_batches(x, pad_value=0, overlap_edge=PAD_FRAME, duration_axis=-1):
124 | 
125 | 	x = x.unsqueeze(0).unsqueeze(-1)
126 | 	duration_axis = duration_axis - 1 if duration_axis < 0 else duration_axis + 1
127 | 	x = x.transpose(duration_axis, -1)
128 | 
129 | 	frames_num = x.shape[-1]
130 | 
131 | 	batch_frames_num_non_padding = BATCH_FRAMES_NUM - overlap_edge * 2
132 | 	segments_num = frames_num // batch_frames_num_non_padding
133 | 
134 | 	if pad_value == -1:
135 | 		x = x[ :segments_num * batch_frames_num_non_padding]
136 | 	elif segments_num * batch_frames_num_non_padding < frames_num:
137 | 		x = F.pad(x, (0, int((segments_num + 1) * batch_frames_num_non_padding) - frames_num), 'constant', value=pad_value)
138 | 		segments_num += 1
139 | 
140 | 	x = F.pad(x, (overlap_edge, overlap_edge), 'constant', value=pad_value)
141 | 
142 | 	x = x.transpose(-1, 0)
143 | 	samples = []
144 | 	for i in range(segments_num):
145 | 		st = i * batch_frames_num_non_padding
146 | 		ed = st + BATCH_FRAMES_NUM
147 | 		sample = x[st : ed].transpose(0, duration_axis).squeeze(0).squeeze(-1)
148 | 		samples.append(sample)
149 | 
150 | 	batches = []
151 | 	samples_num = len(samples)
152 | 	batches_num = (samples_num + INFERENCE_BATCH_SIZE - 1) // INFERENCE_BATCH_SIZE
153 | 	for i in range(batches_num):
154 | 		st = i * INFERENCE_BATCH_SIZE
155 | 		ed = st + INFERENCE_BATCH_SIZE
156 | 		ed = samples_num if ed > samples_num else ed
157 | 		batches.append(torch.stack(samples[st : ed], 0))
158 | 	return batches
159 | 
160 | 
161 | def merge_batches(x, overlap_edge=PAD_FRAME, duration_axis=-1):
162 | 	if duration_axis >= 0:
163 | 		duration_axis += 1
164 | 	x = x.unsqueeze(0).transpose(0, duration_axis)
165 | 	if duration_axis >= 0:
166 | 		duration_axis -= 1
167 | 	x = x[overlap_edge : -overlap_edge].transpose(0, 1).flatten(0, 1).transpose(0, duration_axis).squeeze(0)
168 | 	return x
169 | 
170 | 
171 | def merge_from_list(x, index=0):
172 | 	results = []
173 | 	for unit in x:
174 | 		if type(unit) in [tuple, list]:
175 | 			results.append(unit[index])
176 | 		else:
177 | 			results.append(unit)
178 | 	return torch.cat(results, 0)
179 | 
180 | 


--------------------------------------------------------------------------------
/evaluation/MSI-DIS/scores-190.json:
--------------------------------------------------------------------------------
1 | {"MSI-DIS": {"transcription": [{"Violin": 0.9155356872822401, "Saxophone": 0.4922168581534541}, {"Violin": 0.906904571797, "Double_Bass": 0.7330175196626085}, {"Cello": 0.7716511678000424, "Viola": 0.6434034747048931}, {"Violin": 0.9200418795170666, "Double_Bass": 0.8179615908098763}, {"Violin": 0.855165271815949, "Cello": 0.8337352880755539}, {"Viola": 0.6149773318368282, "Clarinet": 0.7710395391594762}, {"Flute": 0.7083241588874832, "Violin": 0.887958054553282}, {"Saxophone": 0.6605534252500374, "Clarinet": 0.6594286094936119}, {"Saxophone": 0.5860077041512487, "Tuba": 0.5673013015675976}, {"Trombone": 0.7877233204170254, "Saxophone": 0.03781229740252132}, {"Oboe": 0.18800959366764133, "Saxophone": 0.44948093132197087}, {"Oboe": 0.4796317221126738, "Viola": 0.8448553667172828}, {"Flute": 0.8280862424486122, "Cello": 0.8930023244670272}, {"Flute": 0.8267930141461336, "Trombone": 0.8788783567444686}, {"Clarinet": 0.7461117325660884, "Horn": 0.7032581753974934}, {"Clarinet": 0.6563554459912304, "Bassoon": 0.2353563387884311}, {"Trumpet": 0.8619435148322085, "Violin": 0.724248884975393}, {"Trumpet": 0.46115154108952194, "Violin": 0.7666968607472413}, {"Flute": 0.8527652724900775, "Clarinet": 0.7393480261360227}, {"Flute": 0.6586441061688577, "Saxophone": 0.2491150773942218}, {"Bassoon": 0.46117680233655867, "Oboe": 0.3668244893427309}, {"Trumpet": 0.6322599603098203, "Trombone": 0.8572844451707077}, {"Trumpet": 0.4909331593806311, "Horn": 0.8389384407916984}, {"Tuba": 0.5497697590729947, "Trumpet": 0.855529304200216}, {"Trumpet": 0.47877785625509706, "Trombone": 0.6518660545606864}, {"Trumpet": 0.8618582497239909, "Horn": 0.8798279812510436}, {"Cello": 0.5947627192421981, "Oboe": 0.409469028065668}, {"Trumpet": 0.9389893409351974, "Tuba": 0.8680561027336061}, {"Trumpet": 0.798509790637697, "Trombone": 0.798237047904076}, {"Horn": 0.7659901411178002, "Trumpet": 0.48924055691281143}, {"Trumpet": 0.7763481761537584, "Horn": 0.7618845583301896}, {"Trumpet": 0.6581694910442168, "Trombone": 0.7394180175342637}], "separation": [{"Violin": 6.541116437371974, "Saxophone": 1.8881928954020841}, {"Violin": 9.085971834725244, "Double_Bass": 6.939945149867201}, {"Cello": 7.948946571611801, "Viola": 2.9854057021232103}, {"Violin": 8.027842855421742, "Double_Bass": 7.623472124412347}, {"Violin": 9.628268860563933, "Cello": 10.901597768940217}, {"Viola": 1.7124445277319533, "Clarinet": 4.8651812471553075}, {"Flute": 8.468505210426615, "Violin": 5.89170621585735}, {"Saxophone": -1.7425961502063514, "Clarinet": 1.789691326687349}, {"Saxophone": 5.058587893150619, "Tuba": 4.732612885768726}, {"Trombone": 6.439471796882162, "Saxophone": -12.734088587651616}, {"Oboe": -7.105028712955862, "Saxophone": -1.4624145596087077}, {"Oboe": 0.6622300530031615, "Viola": 6.4944053285021}, {"Flute": 8.588378412276096, "Cello": 11.381660139740637}, {"Flute": 11.285043361386428, "Trombone": 6.808830188676809}, {"Clarinet": 6.456273989937547, "Horn": 4.743802232015218}, {"Clarinet": 5.244730530716417, "Bassoon": -10.493317188874995}, {"Trumpet": 4.340486727770855, "Violin": 5.290570254455115}, {"Trumpet": 0.6531070386405622, "Violin": 4.709271765210785}, {"Flute": 12.234576327549737, "Clarinet": 8.912410369226688}, {"Flute": 8.201778642251398, "Saxophone": -5.0961533288595575}, {"Bassoon": 5.596215024044069, "Oboe": -0.9849432318223452}, {"Trumpet": 4.45269382716973, "Trombone": 6.427959235523154}, {"Trumpet": 0.20277078878847551, "Horn": 9.49491145739134}, {"Tuba": 8.066828307649828, "Trumpet": 9.994846913719279}, {"Trumpet": 0.7814876576097167, "Trombone": 2.794755491462097}, {"Trumpet": 8.926938201028934, "Horn": 8.991136171921566}, {"Cello": 7.020040020491056, "Oboe": 6.0812640429824985}, {"Trumpet": 8.88255234062147, "Tuba": 8.051688364863747}, {"Trumpet": 4.2050602035974585, "Trombone": 4.82251196830306}, {"Horn": 5.624537966554493, "Trumpet": -2.830977900011998}, {"Trumpet": 5.897005864337276, "Horn": 8.267664444168025}, {"Trumpet": 6.30121119732959, "Trombone": 3.751358755393699}]}, "MSI-DIS-S": {"separation": [{"Violin": 6.613001284988619, "Saxophone": 6.908647788563865}, {"Violin": 9.889224519215695, "Double_Bass": 5.908313175088509}, {"Cello": 7.950142830194029, "Viola": 5.165176589502689}, {"Violin": 8.197718446853044, "Double_Bass": 7.135280756021612}, {"Violin": 9.412095105067737, "Cello": 10.62875802088553}, {"Viola": 3.415010862232417, "Clarinet": 7.018410524186724}, {"Flute": 8.771259771677965, "Violin": 8.679388561407553}, {"Saxophone": -0.8469234611064538, "Clarinet": 3.156610247604527}, {"Saxophone": 10.107492749043523, "Tuba": 5.391805023134176}, {"Trombone": 6.344937936708229, "Saxophone": 7.449428411196601}, {"Oboe": 4.354182781300693, "Saxophone": 4.535728677183428}, {"Oboe": 3.3279251141151307, "Viola": 7.941159170648245}, {"Flute": 7.517535391227003, "Cello": 10.472847415042636}, {"Flute": 10.774473481643323, "Trombone": 7.022919250744013}, {"Clarinet": 8.423195318092167, "Horn": 10.013757977040878}, {"Clarinet": 10.330679457956993, "Bassoon": 4.844246401602945}, {"Trumpet": 5.439569589818225, "Violin": 5.5061771445010965}, {"Trumpet": 3.1963841212511213, "Violin": 5.055929487124545}, {"Flute": 14.526501153758142, "Clarinet": 12.725290612709902}, {"Flute": 10.088511882368177, "Saxophone": 6.786088697778048}, {"Bassoon": 10.047690078508866, "Oboe": -2.669156077895666}, {"Trumpet": 4.376574626491411, "Trombone": 5.963778598996368}, {"Trumpet": 10.521624108965868, "Horn": 11.668561666943846}, {"Tuba": 6.828793099384825, "Trumpet": 9.923279545351896}, {"Trumpet": 3.636903746109033, "Trombone": 6.012195640826754}, {"Trumpet": 9.820882133987544, "Horn": 10.475290472025538}, {"Cello": 6.770775799798311, "Oboe": 7.108617519921708}, {"Trumpet": 8.935373367206758, "Tuba": 8.200000625712908}, {"Trumpet": 5.243726242520109, "Trombone": 6.64422064218101}, {"Horn": 8.085503508412174, "Trumpet": 3.5976478306216753}, {"Trumpet": 8.737509266782887, "Horn": 10.902989949246876}, {"Trumpet": 5.653195910534603, "Trombone": 3.8592234846691973}]}}


--------------------------------------------------------------------------------
/evaluation/MSI-DIS/scores-191.json:
--------------------------------------------------------------------------------
1 | {"MSI-DIS": {"transcription": [{"Violin": 0.9152028513501567, "Saxophone": 0.5281816750237217}, {"Violin": 0.8894809825456819, "Double_Bass": 0.7564888710487585}, {"Cello": 0.7955522662816865, "Viola": 0.589311791845628}, {"Violin": 0.9240299544268021, "Double_Bass": 0.8178991333418093}, {"Violin": 0.8556467102425984, "Cello": 0.8420676657261306}, {"Viola": 0.6177582103382321, "Clarinet": 0.7477714226716111}, {"Flute": 0.7147039244734346, "Violin": 0.8772163576984613}, {"Saxophone": 0.6055464920242927, "Clarinet": 0.6087610358509389}, {"Saxophone": 0.6233418086930349, "Tuba": 0.508229096911424}, {"Trombone": 0.7511127309593514, "Saxophone": 0.05001789349254738}, {"Oboe": 0.1642970139611122, "Saxophone": 0.42103724990527547}, {"Oboe": 0.46962586287749836, "Viola": 0.829122827556487}, {"Flute": 0.8133807880925873, "Cello": 0.9023525384577459}, {"Flute": 0.8009353684165248, "Trombone": 0.8675524570849545}, {"Clarinet": 0.7944425367678136, "Horn": 0.6762319968246755}, {"Clarinet": 0.5851073148392318, "Bassoon": 0.29180740150863377}, {"Trumpet": 0.8792550555115606, "Violin": 0.7914435302799414}, {"Trumpet": 0.709345011107144, "Violin": 0.8295938404747756}, {"Flute": 0.7801502072146993, "Clarinet": 0.7556575436758589}, {"Flute": 0.6214883931019535, "Saxophone": 0.30073061133925605}, {"Bassoon": 0.5168496586940959, "Oboe": 0.35347643106873966}, {"Trumpet": 0.676842780237606, "Trombone": 0.7377590802196126}, {"Trumpet": 0.6201216700778358, "Horn": 0.860993125500221}, {"Tuba": 0.4554720562926493, "Trumpet": 0.916079589572939}, {"Trumpet": 0.5820072090019888, "Trombone": 0.5139270401889783}, {"Trumpet": 0.9129983018405798, "Horn": 0.8790249797158725}, {"Cello": 0.568129456612791, "Oboe": 0.3900170624251111}, {"Trumpet": 0.9402001676026692, "Tuba": 0.8388586679418029}, {"Trumpet": 0.8125182774087631, "Trombone": 0.3654825263959058}, {"Horn": 0.7567374238167376, "Trumpet": 0.7574156510224312}, {"Trumpet": 0.7504505429275314, "Horn": 0.7516463142873844}, {"Trumpet": 0.6559709774728097, "Trombone": 0.6196404533078165}], "separation": [{"Violin": 6.01402885365448, "Saxophone": 2.0227570869102007}, {"Violin": 8.617024848167853, "Double_Bass": 7.662765396033107}, {"Cello": 7.881505509268791, "Viola": 3.0242663148502076}, {"Violin": 7.6322299886753155, "Double_Bass": 7.215900709436623}, {"Violin": 9.669083919848568, "Cello": 10.73390653778075}, {"Viola": 2.0173184813998586, "Clarinet": 4.237385898067096}, {"Flute": 9.562086939985486, "Violin": 4.822729696220785}, {"Saxophone": -2.111665522425011, "Clarinet": 1.3633824119525142}, {"Saxophone": 6.589518980597719, "Tuba": 4.802625019518544}, {"Trombone": 6.375799689229939, "Saxophone": -11.920952938245865}, {"Oboe": -7.929629543872667, "Saxophone": -2.857517891185486}, {"Oboe": 0.4822105063588595, "Viola": 5.998177780394043}, {"Flute": 8.647281656129325, "Cello": 10.861208186542019}, {"Flute": 11.439602537192698, "Trombone": 5.878526415180094}, {"Clarinet": 7.992234264260642, "Horn": 3.7660556040093573}, {"Clarinet": 3.340546292724547, "Bassoon": -9.712649075597511}, {"Trumpet": 6.27280232568041, "Violin": 5.365404330326867}, {"Trumpet": 2.7530440536987717, "Violin": 3.9860653272992335}, {"Flute": 12.375022855565536, "Clarinet": 8.162547020325995}, {"Flute": 7.939344426676529, "Saxophone": -6.436437203948287}, {"Bassoon": 2.004995986552016, "Oboe": -1.003342496734781}, {"Trumpet": 4.02280288973232, "Trombone": 3.3231483427048083}, {"Trumpet": 2.6523800307458574, "Horn": 9.812528818015107}, {"Tuba": 8.449304094268616, "Trumpet": 10.206241245150133}, {"Trumpet": 2.300514857507982, "Trombone": 1.056134029359734}, {"Trumpet": 8.70879450205567, "Horn": 8.222110059730491}, {"Cello": 6.991849426886853, "Oboe": 5.712956343758789}, {"Trumpet": 8.792307112559548, "Tuba": 8.36211006075664}, {"Trumpet": 4.3560459404333205, "Trombone": -1.01285309965147}, {"Horn": 5.227601021073907, "Trumpet": 1.612272678084615}, {"Trumpet": 5.762252492894948, "Horn": 8.392794003657357}, {"Trumpet": 6.279908802431981, "Trombone": 2.8378479937503833}]}, "MSI-DIS-S": {"separation": [{"Violin": 6.734891416669992, "Saxophone": 5.670513830788097}, {"Violin": 9.689815241115252, "Double_Bass": 6.680106986621591}, {"Cello": 8.066595574563912, "Viola": 5.8320503802104895}, {"Violin": 8.049740214186382, "Double_Bass": 6.784126561462066}, {"Violin": 9.352149454605094, "Cello": 10.418975006104944}, {"Viola": 3.8625325400910637, "Clarinet": 6.853707832580569}, {"Flute": 9.208372037524379, "Violin": 7.843164226586619}, {"Saxophone": -0.31587457486004195, "Clarinet": 2.2060298707047563}, {"Saxophone": 9.911227702907592, "Tuba": 6.001949128490924}, {"Trombone": 6.591199171716987, "Saxophone": 7.278233690017634}, {"Oboe": 4.459946738503614, "Saxophone": 3.961157059636802}, {"Oboe": 3.4000403955803113, "Viola": 8.082460870375295}, {"Flute": 7.884384577290541, "Cello": 10.244802425176335}, {"Flute": 11.183964535092024, "Trombone": 6.314344874665632}, {"Clarinet": 8.746251734141381, "Horn": 9.68137852315886}, {"Clarinet": 9.591317061469242, "Bassoon": 4.646300891024933}, {"Trumpet": 6.352678361583814, "Violin": 4.713868852928564}, {"Trumpet": 4.0853297145362095, "Violin": 4.380285905180017}, {"Flute": 14.763996039360674, "Clarinet": 12.32383756209942}, {"Flute": 10.174442153912256, "Saxophone": 5.586477875288363}, {"Bassoon": 7.717122603372129, "Oboe": -1.3305717999047202}, {"Trumpet": 5.041142276629325, "Trombone": 5.275001433382017}, {"Trumpet": 10.811707261233972, "Horn": 11.201716771321353}, {"Tuba": 6.50945209650572, "Trumpet": 10.168448968314344}, {"Trumpet": 4.616370173021068, "Trombone": 5.687129705134582}, {"Trumpet": 10.02049907450755, "Horn": 9.491519642671815}, {"Cello": 6.856453262232307, "Oboe": 7.312102669633125}, {"Trumpet": 8.787208208398678, "Tuba": 8.772666590958496}, {"Trumpet": 5.216224265442982, "Trombone": 6.321188438923736}, {"Horn": 7.356662165923753, "Trumpet": 4.597759879805192}, {"Trumpet": 8.958492212961064, "Horn": 10.581214456138856}, {"Trumpet": 6.023777070845441, "Trombone": 3.5091096593213926}]}}


--------------------------------------------------------------------------------
/evaluation/MSI-DIS/scores-198.json:
--------------------------------------------------------------------------------
1 | {"MSI-DIS": {"transcription": [{"Violin": 0.8924510093062347, "Saxophone": 0.5605541846379607}, {"Violin": 0.8850512431237135, "Double_Bass": 0.7522365954911729}, {"Cello": 0.7734280844723757, "Viola": 0.451816253948843}, {"Violin": 0.9257626072598296, "Double_Bass": 0.8315987864389841}, {"Violin": 0.8607913249122879, "Cello": 0.8442287283217075}, {"Viola": 0.5543061770300618, "Clarinet": 0.7288928697844018}, {"Flute": 0.7437707957158572, "Violin": 0.8778654375699722}, {"Saxophone": 0.5888791374571082, "Clarinet": 0.4564262813082402}, {"Saxophone": 0.6410632199132241, "Tuba": 0.5624406255062508}, {"Trombone": 0.7729754407638761, "Saxophone": 0.0761173130695134}, {"Oboe": 0.1872876190783464, "Saxophone": 0.4214673736438897}, {"Oboe": 0.4664665413852489, "Viola": 0.8188682931019281}, {"Flute": 0.7803131938372087, "Cello": 0.8964779841298565}, {"Flute": 0.7447517470737175, "Trombone": 0.8858520416386712}, {"Clarinet": 0.7122604085663651, "Horn": 0.67653382760051}, {"Clarinet": 0.5603713199125716, "Bassoon": 0.2656289047239678}, {"Trumpet": 0.8561317932338018, "Violin": 0.6851677257363626}, {"Trumpet": 0.7053621494425206, "Violin": 0.7541451225738156}, {"Flute": 0.7517207044365125, "Clarinet": 0.6016313959159212}, {"Flute": 0.4805246769756156, "Saxophone": 0.1518930165071761}, {"Bassoon": 0.3648943057668641, "Oboe": 0.28722670374947173}, {"Trumpet": 0.6094735557773957, "Trombone": 0.835499646140991}, {"Trumpet": 0.7051194098353029, "Horn": 0.8579551626059381}, {"Tuba": 0.5844820450013005, "Trumpet": 0.9155701837559244}, {"Trumpet": 0.5854111225487281, "Trombone": 0.5805188928077251}, {"Trumpet": 0.9062628792526456, "Horn": 0.8622206153088223}, {"Cello": 0.6003260416446784, "Oboe": 0.38737267933970415}, {"Trumpet": 0.9333191392924272, "Tuba": 0.8715648964628934}, {"Trumpet": 0.7778775156850734, "Trombone": 0.6787387908951699}, {"Horn": 0.797470396293232, "Trumpet": 0.8624599974274298}, {"Trumpet": 0.7920946193499684, "Horn": 0.7146566422091702}, {"Trumpet": 0.6143898500308628, "Trombone": 0.6885496873720812}], "separation": [{"Violin": 6.944569133196032, "Saxophone": 3.212414017789522}, {"Violin": 8.68144485703329, "Double_Bass": 7.5420044000931075}, {"Cello": 7.810631265066242, "Viola": -0.7748288894759613}, {"Violin": 7.979837140262056, "Double_Bass": 7.976955060425512}, {"Violin": 9.857800696259126, "Cello": 10.898517174171676}, {"Viola": 1.7118293976794432, "Clarinet": 4.801448905140044}, {"Flute": 9.374501043107726, "Violin": 6.180707292029123}, {"Saxophone": -2.158727703671019, "Clarinet": -0.008027077618480184}, {"Saxophone": 5.17720150144658, "Tuba": 4.884210782349435}, {"Trombone": 5.998907619253795, "Saxophone": -11.003881679030854}, {"Oboe": -8.941868539539414, "Saxophone": -2.2499306242481225}, {"Oboe": 0.9307880630210634, "Viola": 5.7648286583908}, {"Flute": 7.776900360402213, "Cello": 11.586534425478067}, {"Flute": 8.146892744799107, "Trombone": 6.367731270984958}, {"Clarinet": 5.88537474496703, "Horn": 3.674138717696093}, {"Clarinet": 1.6326022123266604, "Bassoon": -5.797728479769438}, {"Trumpet": 4.932250939835388, "Violin": 5.745227076351145}, {"Trumpet": 2.587476924052913, "Violin": 4.5876155279512}, {"Flute": 10.227363735816732, "Clarinet": 5.213935025083013}, {"Flute": 4.6693836103727655, "Saxophone": -10.593131512977871}, {"Bassoon": -0.09114585988140317, "Oboe": 1.9145263779490649}, {"Trumpet": 3.515782783879651, "Trombone": 4.9255632714616375}, {"Trumpet": 4.809343589806482, "Horn": 10.343994423271457}, {"Tuba": 7.907610985479172, "Trumpet": 10.09533238543785}, {"Trumpet": 2.0748903076645946, "Trombone": 0.9625363013018168}, {"Trumpet": 9.402082738311961, "Horn": 8.572585205392352}, {"Cello": 7.169132084207552, "Oboe": 4.359673617595262}, {"Trumpet": 8.930889433583442, "Tuba": 8.137672181630624}, {"Trumpet": 4.037076044044631, "Trombone": 2.5817942467319344}, {"Horn": 5.27608065609058, "Trumpet": 3.2728467791119478}, {"Trumpet": 7.052004439766591, "Horn": 8.023781701110822}, {"Trumpet": 6.0247897597884, "Trombone": 2.3878849972236873}]}, "MSI-DIS-S": {"separation": [{"Violin": 6.583509794277567, "Saxophone": 6.665538413662101}, {"Violin": 9.642250230178998, "Double_Bass": 6.559850857741068}, {"Cello": 8.155361451650615, "Viola": 4.2531779963725755}, {"Violin": 8.413615565512035, "Double_Bass": 7.572491436792042}, {"Violin": 9.488675632655397, "Cello": 10.49753279903658}, {"Viola": 3.173289324489717, "Clarinet": 7.580256334608057}, {"Flute": 9.483408830570866, "Violin": 8.560799851418013}, {"Saxophone": -0.04860033274023924, "Clarinet": 2.289948475563871}, {"Saxophone": 9.624489979726976, "Tuba": 6.110792422442842}, {"Trombone": 6.427255073951244, "Saxophone": 7.8970008068428}, {"Oboe": 4.734512788739627, "Saxophone": 5.012269750486958}, {"Oboe": 4.977258577773617, "Viola": 8.620709552432054}, {"Flute": 7.544364079359656, "Cello": 10.477396637746665}, {"Flute": 11.320697931673143, "Trombone": 6.490925109946981}, {"Clarinet": 8.07613314334626, "Horn": 9.94008951275371}, {"Clarinet": 10.29548612420437, "Bassoon": 5.825322556327775}, {"Trumpet": 6.477481673087492, "Violin": 5.321437711960064}, {"Trumpet": 5.0591126334423775, "Violin": 4.587272516143913}, {"Flute": 14.678323260047335, "Clarinet": 10.948360858364968}, {"Flute": 10.066267873246302, "Saxophone": 4.48913650026043}, {"Bassoon": 8.954227970932472, "Oboe": -0.9020744321399814}, {"Trumpet": 5.385638101590099, "Trombone": 5.408172840384058}, {"Trumpet": 10.916193204823124, "Horn": 11.018996224003246}, {"Tuba": 6.486775793070562, "Trumpet": 10.465153580771709}, {"Trumpet": 4.288759921240995, "Trombone": 6.05939510309074}, {"Trumpet": 10.443474548026828, "Horn": 10.064098940451952}, {"Cello": 6.681953756702276, "Oboe": 7.5091635947946145}, {"Trumpet": 9.063019258890959, "Tuba": 8.553556406897604}, {"Trumpet": 5.057798904162273, "Trombone": 6.386318834830238}, {"Horn": 7.1387031563499015, "Trumpet": 4.338761476575872}, {"Trumpet": 8.930121948392234, "Horn": 10.350260460764929}, {"Trumpet": 5.969376382478478, "Trombone": 3.8099477069030794}]}}


--------------------------------------------------------------------------------
/evaluation/MSI-DIS/scores-199.json:
--------------------------------------------------------------------------------
1 | {"MSI-DIS": {"transcription": [{"Violin": 0.9121755181353801, "Saxophone": 0.5166693919064482}, {"Violin": 0.8998901802880341, "Double_Bass": 0.7189771156586594}, {"Cello": 0.7723822648048051, "Viola": 0.6428622733533773}, {"Violin": 0.914252855116599, "Double_Bass": 0.8487529615131667}, {"Violin": 0.8611046087214415, "Cello": 0.8424270502033921}, {"Viola": 0.5883172544049333, "Clarinet": 0.7162842630690908}, {"Flute": 0.7199137406102079, "Violin": 0.8451581795871065}, {"Saxophone": 0.37163793158755143, "Clarinet": 0.38146271619237915}, {"Saxophone": 0.5569922409258469, "Tuba": 0.5433201111282178}, {"Trombone": 0.7230299321506162, "Saxophone": 0.05051627606429156}, {"Oboe": 0.16418783011865346, "Saxophone": 0.5447622573468253}, {"Oboe": 0.47750112367745445, "Viola": 0.8638481617482999}, {"Flute": 0.8307871742613372, "Cello": 0.8846340365715661}, {"Flute": 0.8233583648883612, "Trombone": 0.873866188123911}, {"Clarinet": 0.7078128790427227, "Horn": 0.7169384140161733}, {"Clarinet": 0.547966277730213, "Bassoon": 0.1965090653988483}, {"Trumpet": 0.8770281861607411, "Violin": 0.7666193382474715}, {"Trumpet": 0.8327018484319814, "Violin": 0.8306234217673182}, {"Flute": 0.7494504939712152, "Clarinet": 0.8251463120997821}, {"Flute": 0.5172981625041865, "Saxophone": 0.44706492215850074}, {"Bassoon": 0.3426747007113617, "Oboe": 0.27678840213593664}, {"Trumpet": 0.5046771801974216, "Trombone": 0.8449867572901932}, {"Trumpet": 0.7154269314315669, "Horn": 0.8427686392640829}, {"Tuba": 0.29623351768418504, "Trumpet": 0.9155402251131829}, {"Trumpet": 0.6378145193646023, "Trombone": 0.6441273055893}, {"Trumpet": 0.922979020065121, "Horn": 0.9279941267997682}, {"Cello": 0.5978284357852842, "Oboe": 0.3560053167227133}, {"Trumpet": 0.9409055407254417, "Tuba": 0.8439290213547604}, {"Trumpet": 0.7487690192409491, "Trombone": 0.6842485271721009}, {"Horn": 0.7697000534170869, "Trumpet": 0.915280865926377}, {"Trumpet": 0.7474109967962473, "Horn": 0.7604385678403959}, {"Trumpet": 0.6536306931097742, "Trombone": 0.7230219553640371}], "separation": [{"Violin": 6.363528277443229, "Saxophone": 3.7416979268394885}, {"Violin": 9.979636057335682, "Double_Bass": 6.885701510609595}, {"Cello": 7.524106263777637, "Viola": 4.027854017453346}, {"Violin": 7.600784595278894, "Double_Bass": 8.00847272654533}, {"Violin": 9.631470843726104, "Cello": 10.759000855162702}, {"Viola": 2.138274057316985, "Clarinet": 4.422662594857392}, {"Flute": 9.262293524106388, "Violin": 5.637892234358364}, {"Saxophone": -4.856699277760855, "Clarinet": 2.743637782780664}, {"Saxophone": 5.074893142598987, "Tuba": 4.70544663836166}, {"Trombone": 6.102004540226165, "Saxophone": -12.495076818045977}, {"Oboe": -7.259088793278789, "Saxophone": -0.8239150032834182}, {"Oboe": 1.322183816681479, "Viola": 6.647371275207421}, {"Flute": 8.955902745338305, "Cello": 10.88466582588892}, {"Flute": 11.34268023739078, "Trombone": 6.081433063382872}, {"Clarinet": 6.639587037347184, "Horn": 4.226225166651642}, {"Clarinet": 3.1317266161317487, "Bassoon": -10.27086145836003}, {"Trumpet": 5.215311544251104, "Violin": 5.975324227430431}, {"Trumpet": 3.8002514387779973, "Violin": 3.890958315442235}, {"Flute": 10.120629600531164, "Clarinet": 9.141701467047822}, {"Flute": 6.2394706506428, "Saxophone": 1.2791968801292861}, {"Bassoon": -0.38263297880299535, "Oboe": -1.574529350638163}, {"Trumpet": 2.5551316278004044, "Trombone": 5.987110663307936}, {"Trumpet": 4.572957036199296, "Horn": 11.207756637788403}, {"Tuba": 7.053583790986961, "Trumpet": 10.179681389752009}, {"Trumpet": 3.2076371157142503, "Trombone": 3.0051832185149827}, {"Trumpet": 9.043371697353736, "Horn": 9.716240984222217}, {"Cello": 6.359476466604099, "Oboe": 4.796304527156384}, {"Trumpet": 9.231048774217435, "Tuba": 8.38556241363989}, {"Trumpet": 3.5384233035418204, "Trombone": 3.44092002772258}, {"Horn": 6.0538183707148505, "Trumpet": 3.775561000023913}, {"Trumpet": 6.178061029418467, "Horn": 9.79286613291342}, {"Trumpet": 5.163960208725554, "Trombone": 3.8436211977201507}]}, "MSI-DIS-S": {"separation": [{"Violin": 6.416882618532192, "Saxophone": 7.474259290702362}, {"Violin": 9.924318733624963, "Double_Bass": 6.182707743737116}, {"Cello": 7.882401078486675, "Viola": 5.864430615227453}, {"Violin": 8.489774058222615, "Double_Bass": 7.682567838239711}, {"Violin": 9.022972241623492, "Cello": 10.565909935787982}, {"Viola": 4.287354630411389, "Clarinet": 7.424186668267491}, {"Flute": 9.415552307364116, "Violin": 8.953834734700026}, {"Saxophone": -0.653448544623779, "Clarinet": 2.7986387306087925}, {"Saxophone": 10.272234791936679, "Tuba": 5.648046437435048}, {"Trombone": 6.599250581602627, "Saxophone": 7.643932774635623}, {"Oboe": 4.563991099456322, "Saxophone": 4.832802330728196}, {"Oboe": 4.130068453204881, "Viola": 8.46302185427415}, {"Flute": 8.024427741031744, "Cello": 10.406938447958975}, {"Flute": 11.144062449855463, "Trombone": 6.4143408765250856}, {"Clarinet": 8.326046353224521, "Horn": 10.075265695609708}, {"Clarinet": 10.887309333397841, "Bassoon": 5.51689812527586}, {"Trumpet": 6.536064974424214, "Violin": 5.7455272676035545}, {"Trumpet": 4.462293548665275, "Violin": 5.059602303540871}, {"Flute": 14.638059262942889, "Clarinet": 11.971569056772964}, {"Flute": 10.200641217533748, "Saxophone": 6.5557638697757215}, {"Bassoon": 8.545300734426757, "Oboe": -1.9224531182926174}, {"Trumpet": 4.879707799772849, "Trombone": 5.645456041249086}, {"Trumpet": 10.832033931174184, "Horn": 12.071548847417725}, {"Tuba": 6.36661354574702, "Trumpet": 10.192860758265319}, {"Trumpet": 4.279147833648288, "Trombone": 5.907035791249751}, {"Trumpet": 10.273901325893993, "Horn": 10.39831116916815}, {"Cello": 6.3849612614483044, "Oboe": 6.825264796261585}, {"Trumpet": 9.284632015987238, "Tuba": 8.918315325333362}, {"Trumpet": 5.047816027313996, "Trombone": 6.19169113582144}, {"Horn": 7.157389926817755, "Trumpet": 4.170868963913187}, {"Trumpet": 8.925785716934183, "Horn": 11.013365058995712}, {"Trumpet": 5.788717441254181, "Trombone": 3.6658189558584557}]}}


--------------------------------------------------------------------------------
/evaluation/MSI/scores-192.json:
--------------------------------------------------------------------------------
1 | {"MSI": {"transcription": [{"Violin": 0.9361394554231726, "Saxophone": 0.507355947986083}, {"Violin": 0.9035493979955787, "Double_Bass": 0.7842063860975458}, {"Cello": 0.7644316713827919, "Viola": 0.5263633849957178}, {"Violin": 0.9365505736686, "Double_Bass": 0.8284833143854402}, {"Violin": 0.8423349355224742, "Cello": 0.839228454271447}, {"Viola": 0.6240776393424528, "Clarinet": 0.74941251966239}, {"Flute": 0.7196487429544639, "Violin": 0.876464916743926}, {"Saxophone": 0.48561929733248405, "Clarinet": 0.6548060321025061}, {"Saxophone": 0.47585414552639316, "Tuba": 0.5839950434367663}, {"Trombone": 0.8334086526345647, "Saxophone": 0.044808006443237555}, {"Oboe": 0.20454899014807562, "Saxophone": 0.448685365948155}, {"Oboe": 0.5216479439508223, "Viola": 0.8678643969922711}, {"Flute": 0.8149097734493286, "Cello": 0.9080176204659974}, {"Flute": 0.8395199842059287, "Trombone": 0.8786820932784377}, {"Clarinet": 0.7697982800904478, "Horn": 0.7803428692554318}, {"Clarinet": 0.7497705807279238, "Bassoon": 0.18500549826016005}, {"Trumpet": 0.8354577933863586, "Violin": 0.8174184373619785}, {"Trumpet": 0.6273069453440938, "Violin": 0.8432019835410031}, {"Flute": 0.9085927197470614, "Clarinet": 0.8008748407368664}, {"Flute": 0.5874170095914809, "Saxophone": 0.2930463072983357}, {"Bassoon": 0.23204534786943615, "Oboe": 0.6445144062134838}, {"Trumpet": 0.667560421157832, "Trombone": 0.8187067883923883}, {"Trumpet": 0.8245578482997433, "Horn": 0.8034427179511888}, {"Tuba": 0.6109277436986952, "Trumpet": 0.9226272321909085}, {"Trumpet": 0.7149172528171747, "Trombone": 0.569793646868569}, {"Trumpet": 0.9236047189736051, "Horn": 0.8685638333691418}, {"Cello": 0.5961044640558822, "Oboe": 0.5383706357167409}, {"Trumpet": 0.9403395517614258, "Tuba": 0.7423280694429848}, {"Trumpet": 0.8248085985139659, "Trombone": 0.6619350988089323}, {"Horn": 0.7989076652692978, "Trumpet": 0.8906921127595409}, {"Trumpet": 0.7175214499370501, "Horn": 0.773602903504052}, {"Trumpet": 0.5906262562166622, "Trombone": 0.7351650157285545}], "separation": [{"Violin": 8.035166513774772, "Saxophone": 4.464094975769749}, {"Violin": 12.34001377868122, "Double_Bass": 7.254324752402312}, {"Cello": 7.09389787668715, "Viola": 3.6987374133551336}, {"Violin": 10.491858734453505, "Double_Bass": 7.816976469377801}, {"Violin": 12.20450171408885, "Cello": 14.298573392873546}, {"Viola": 3.051135000048191, "Clarinet": 5.202811749433797}, {"Flute": 10.914150638871476, "Violin": 7.533063832311553}, {"Saxophone": -3.063516407543825, "Clarinet": 1.3203185832075157}, {"Saxophone": 3.628852574319628, "Tuba": 6.652992299657383}, {"Trombone": 9.943070448119602, "Saxophone": -12.847788067725165}, {"Oboe": -7.291816804988782, "Saxophone": 0.32741228324087}, {"Oboe": 3.3727129734708865, "Viola": 9.344766927288688}, {"Flute": 11.323491812969444, "Cello": 14.81360054769368}, {"Flute": 12.85221676961761, "Trombone": 10.26307336578499}, {"Clarinet": 7.933362437702749, "Horn": 6.449547157755119}, {"Clarinet": 7.175871776302207, "Bassoon": -8.94347029159849}, {"Trumpet": 8.23513821002718, "Violin": 5.651490897968548}, {"Trumpet": 3.6488377792363296, "Violin": 2.6313096410622414}, {"Flute": 14.114316567424336, "Clarinet": 5.568780402591859}, {"Flute": 5.1562864997793465, "Saxophone": -3.5956745104946903}, {"Bassoon": -1.6585578086183066, "Oboe": 0.13914306895167333}, {"Trumpet": 3.822139539765784, "Trombone": 7.215936188055406}, {"Trumpet": 9.532306448850788, "Horn": 12.704855193397817}, {"Tuba": 14.595275924126522, "Trumpet": 13.439816373521081}, {"Trumpet": 3.1223404996272217, "Trombone": 2.5465515917671477}, {"Trumpet": 13.840447411805556, "Horn": 8.392067949699253}, {"Cello": 9.120797202926642, "Oboe": 8.023969651713166}, {"Trumpet": 13.755644538796581, "Tuba": 9.532603547906378}, {"Trumpet": 5.966257951763573, "Trombone": 4.33193410282663}, {"Horn": 6.62316898180327, "Trumpet": 5.069422218604237}, {"Trumpet": 4.963934699598849, "Horn": 12.745759425903058}, {"Trumpet": 6.952457926174736, "Trombone": 6.263006744221888}]}, "MSI-S": {"separation": [{"Violin": 7.714739876401469, "Saxophone": 11.295923006277961}, {"Violin": 12.19058587219231, "Double_Bass": 6.87065965425178}, {"Cello": 8.401271651932268, "Viola": 8.401595588422664}, {"Violin": 12.025667180301607, "Double_Bass": 9.251738118810561}, {"Violin": 11.888601235356164, "Cello": 13.813763574707671}, {"Viola": 4.267899252407349, "Clarinet": 5.847582738537627}, {"Flute": 11.723662290229209, "Violin": 11.170828814127564}, {"Saxophone": -1.110819998355692, "Clarinet": 2.2594576933794115}, {"Saxophone": 10.736616807180123, "Tuba": 7.0524233525005044}, {"Trombone": 9.637816834235718, "Saxophone": 9.628402218639785}, {"Oboe": 7.200044011024138, "Saxophone": 8.248577932306917}, {"Oboe": 7.698095954525224, "Viola": 11.154373508506865}, {"Flute": 10.035328716800239, "Cello": 13.49098604942732}, {"Flute": 12.862390389993658, "Trombone": 10.200401908340382}, {"Clarinet": 7.797095736287459, "Horn": 11.71393518353264}, {"Clarinet": 11.00351516944156, "Bassoon": 7.90497809246499}, {"Trumpet": 7.205973408959282, "Violin": 2.8382673093364903}, {"Trumpet": 4.5744667192383055, "Violin": 4.029191732618011}, {"Flute": 16.41310799502572, "Clarinet": 9.678759478157609}, {"Flute": 10.403763818472093, "Saxophone": 5.348416194610808}, {"Bassoon": 12.314611461701785, "Oboe": -1.5191586272627204}, {"Trumpet": 6.305427916734421, "Trombone": 8.76475124129314}, {"Trumpet": 14.707879915628794, "Horn": 15.206919758083162}, {"Tuba": 10.458005991273867, "Trumpet": 13.614645736567649}, {"Trumpet": 5.806854866004546, "Trombone": 8.757502628879205}, {"Trumpet": 14.078232675350657, "Horn": 13.447319260378814}, {"Cello": 9.53112713863298, "Oboe": 11.393525479271087}, {"Trumpet": 13.887213187022054, "Tuba": 13.171748448322509}, {"Trumpet": 7.229478006517997, "Trombone": 7.95958219586563}, {"Horn": 7.770212674878571, "Trumpet": 5.352733237610803}, {"Trumpet": 13.551005242540459, "Horn": 14.048520471348029}, {"Trumpet": 6.844299763701207, "Trombone": 6.559767714661048}]}}


--------------------------------------------------------------------------------
/evaluation/MSI/scores-197.json:
--------------------------------------------------------------------------------
1 | {"MSI": {"transcription": [{"Violin": 0.9153824707282537, "Saxophone": 0.41214169958008584}, {"Violin": 0.9045804935702648, "Double_Bass": 0.7911181641453596}, {"Cello": 0.7928932546860387, "Viola": 0.7130892959007321}, {"Violin": 0.9150882141224306, "Double_Bass": 0.8330529664285353}, {"Violin": 0.8502225375004542, "Cello": 0.8448101189148244}, {"Viola": 0.6174415919700625, "Clarinet": 0.7436143252164596}, {"Flute": 0.7426473773313286, "Violin": 0.8351087831427891}, {"Saxophone": 0.5324677797485345, "Clarinet": 0.6450426916837877}, {"Saxophone": 0.46701875082509303, "Tuba": 0.5321708800891947}, {"Trombone": 0.8260272374717064, "Saxophone": 0.06238401252123674}, {"Oboe": 0.2083258930842431, "Saxophone": 0.4905063749391071}, {"Oboe": 0.5195363519679062, "Viola": 0.8874781704630496}, {"Flute": 0.8151307805056968, "Cello": 0.9144130414231073}, {"Flute": 0.8181321924616023, "Trombone": 0.8703783093512343}, {"Clarinet": 0.8358574862907144, "Horn": 0.763159651646269}, {"Clarinet": 0.7504875916727449, "Bassoon": 0.17535066467142718}, {"Trumpet": 0.7770635415452568, "Violin": 0.7404910794898754}, {"Trumpet": 0.6925855087813726, "Violin": 0.809406494831253}, {"Flute": 0.7577412483587282, "Clarinet": 0.7234402226250822}, {"Flute": 0.5846565242844187, "Saxophone": 0.34834398530824295}, {"Bassoon": 0.09001586626504829, "Oboe": 0.537029034510867}, {"Trumpet": 0.515690658479332, "Trombone": 0.8090722589949623}, {"Trumpet": 0.7172186086173673, "Horn": 0.7874070319119018}, {"Tuba": 0.6794970850143396, "Trumpet": 0.8957403637649856}, {"Trumpet": 0.6910479805002857, "Trombone": 0.5908810794484339}, {"Trumpet": 0.921405628333067, "Horn": 0.8998108216774549}, {"Cello": 0.5737315911821874, "Oboe": 0.5185059797624623}, {"Trumpet": 0.9288160845004607, "Tuba": 0.8684865634257202}, {"Trumpet": 0.8170581920069685, "Trombone": 0.5874078297813579}, {"Horn": 0.8035340164823668, "Trumpet": 0.8922853242570884}, {"Trumpet": 0.7455946337571833, "Horn": 0.7362979091072929}, {"Trumpet": 0.5912517334103375, "Trombone": 0.7221973759574}], "separation": [{"Violin": 7.755068867923328, "Saxophone": 4.422501072577965}, {"Violin": 12.080766986936542, "Double_Bass": 7.435378841525225}, {"Cello": 6.860982002635446, "Viola": 6.009800388649049}, {"Violin": 9.833935180897534, "Double_Bass": 9.846639373025631}, {"Violin": 12.086120611092676, "Cello": 14.381731419180579}, {"Viola": 3.113145795382743, "Clarinet": 4.8391882269034}, {"Flute": 11.484319375517016, "Violin": 6.580420106625952}, {"Saxophone": -3.2440470309831544, "Clarinet": 1.78726912163885}, {"Saxophone": 2.8993420335009565, "Tuba": 6.763843189971172}, {"Trombone": 9.64093423731461, "Saxophone": -11.213988330085293}, {"Oboe": -7.687328965970424, "Saxophone": 0.35918048576109485}, {"Oboe": 1.8964445822040523, "Viola": 10.542032678087882}, {"Flute": 10.791933163964751, "Cello": 15.066843482581053}, {"Flute": 12.486141115659413, "Trombone": 10.45957407608897}, {"Clarinet": 9.529626072146536, "Horn": 6.164771839902689}, {"Clarinet": 6.563705741581037, "Bassoon": -8.963748181041746}, {"Trumpet": 5.867211030244906, "Violin": 5.703806191178326}, {"Trumpet": 3.8326188235150536, "Violin": 3.6388439073024283}, {"Flute": 5.146714454087881, "Clarinet": 7.2113743370218275}, {"Flute": 5.755697196006264, "Saxophone": 0.10635920845241215}, {"Bassoon": -3.8856130064807237, "Oboe": 0.10825542762723303}, {"Trumpet": -0.19270147051477943, "Trombone": 7.580539667933356}, {"Trumpet": 5.589441820597825, "Horn": 11.133836592623043}, {"Tuba": 14.869850156412873, "Trumpet": 13.010566847722455}, {"Trumpet": 2.0696810745276144, "Trombone": 2.893310585545757}, {"Trumpet": 10.035910702967056, "Horn": 12.354128806998741}, {"Cello": 9.169286771222803, "Oboe": 5.975852893791725}, {"Trumpet": 12.858820157397826, "Tuba": 12.76781745062759}, {"Trumpet": 5.719049872936772, "Trombone": 3.2686446216980825}, {"Horn": 6.768962520832441, "Trumpet": 4.444869132478698}, {"Trumpet": 6.26931565556651, "Horn": 13.661250175350046}, {"Trumpet": 6.215661723487587, "Trombone": 5.745846146157682}]}, "MSI-S": {"separation": [{"Violin": 7.66366279066125, "Saxophone": 11.244583651332654}, {"Violin": 12.228989460598408, "Double_Bass": 6.768548405981781}, {"Cello": 7.998889107818876, "Viola": 8.579519788333124}, {"Violin": 11.849298535238166, "Double_Bass": 9.299678083853578}, {"Violin": 11.573991972437323, "Cello": 13.823600928897001}, {"Viola": 4.71362579292008, "Clarinet": 5.381836900499285}, {"Flute": 11.777855718623641, "Violin": 10.5582819039724}, {"Saxophone": -1.1954460126187554, "Clarinet": 2.4508310005017537}, {"Saxophone": 10.568746780983513, "Tuba": 7.340229763791733}, {"Trombone": 9.49534335658358, "Saxophone": 9.565232463807876}, {"Oboe": 7.04762396445329, "Saxophone": 8.382335663264996}, {"Oboe": 6.667060559374289, "Viola": 11.354445053253963}, {"Flute": 9.56967681963538, "Cello": 13.897774518140626}, {"Flute": 12.972836300233281, "Trombone": 10.393963429448359}, {"Clarinet": 7.841777585323474, "Horn": 11.820228633292762}, {"Clarinet": 10.942645142081233, "Bassoon": 7.747615142167936}, {"Trumpet": 7.097462788236941, "Violin": 3.3775879822091865}, {"Trumpet": 4.407141455325539, "Violin": 4.22408249238051}, {"Flute": 16.43206291308859, "Clarinet": 9.47248175011848}, {"Flute": 10.490765677634396, "Saxophone": 5.87301120869226}, {"Bassoon": 12.56792453334784, "Oboe": -1.5035733674689964}, {"Trumpet": 6.346680262737556, "Trombone": 8.889645110172808}, {"Trumpet": 14.64327020923933, "Horn": 15.565914924061346}, {"Tuba": 10.514784214633877, "Trumpet": 13.663930378832866}, {"Trumpet": 5.694484004615681, "Trombone": 8.435074247798168}, {"Trumpet": 14.08316056638894, "Horn": 13.707306463831292}, {"Cello": 9.284111951374648, "Oboe": 10.201066976880808}, {"Trumpet": 13.99187267987431, "Tuba": 13.626781149872414}, {"Trumpet": 7.1742928723302, "Trombone": 7.950234684456116}, {"Horn": 8.012831912372725, "Trumpet": 5.4367097116978265}, {"Trumpet": 13.416754975741762, "Horn": 14.37795240682023}, {"Trumpet": 6.802765532136819, "Trombone": 6.53587072892728}]}}


--------------------------------------------------------------------------------
/evaluation/MSI-DIS/scores-192.json:
--------------------------------------------------------------------------------
1 | {"MSI-DIS": {"transcription": [{"Violin": 0.9384123359352381, "Saxophone": 0.5346570619299896}, {"Violin": 0.8962927289131267, "Double_Bass": 0.6925760894070635}, {"Cello": 0.7865696716445996, "Viola": 0.6328194305933684}, {"Violin": 0.9229585584035308, "Double_Bass": 0.8234973511338932}, {"Violin": 0.8675022604793753, "Cello": 0.8524542862208624}, {"Viola": 0.6721445941556236, "Clarinet": 0.7030537891053213}, {"Flute": 0.7191649012486236, "Violin": 0.8854475675966219}, {"Saxophone": 0.5735980055398083, "Clarinet": 0.6766489808782865}, {"Saxophone": 0.6335845542528676, "Tuba": 0.4738865609514924}, {"Trombone": 0.8016375878324989, "Saxophone": 0.03340697404214741}, {"Oboe": 0.16436982503858766, "Saxophone": 0.49189164588593626}, {"Oboe": 0.5052150558649842, "Viola": 0.872151840379869}, {"Flute": 0.8346843936448153, "Cello": 0.8960843101571067}, {"Flute": 0.839611668886874, "Trombone": 0.8675815445133644}, {"Clarinet": 0.7837561543668288, "Horn": 0.7387845171199218}, {"Clarinet": 0.6718173992775168, "Bassoon": 0.33206436768229114}, {"Trumpet": 0.8338942492214145, "Violin": 0.8108613346884792}, {"Trumpet": 0.6704463718346559, "Violin": 0.8694303579253583}, {"Flute": 0.7931344037303044, "Clarinet": 0.719040802778413}, {"Flute": 0.5695275052929194, "Saxophone": 0.3026071769602664}, {"Bassoon": 0.3594576296462653, "Oboe": 0.27715871804662257}, {"Trumpet": 0.5734960367787332, "Trombone": 0.8184933684774732}, {"Trumpet": 0.5645511494524913, "Horn": 0.881125193281301}, {"Tuba": 0.36213145389489865, "Trumpet": 0.8986994703967741}, {"Trumpet": 0.6801017199121147, "Trombone": 0.612806917918625}, {"Trumpet": 0.9364567222873682, "Horn": 0.8671034065677014}, {"Cello": 0.6115863242780858, "Oboe": 0.3858034810156793}, {"Trumpet": 0.9402636964809244, "Tuba": 0.854377219843421}, {"Trumpet": 0.8189789431625908, "Trombone": 0.6402501351182317}, {"Horn": 0.7831049616954892, "Trumpet": 0.8278607750927547}, {"Trumpet": 0.7369004718873324, "Horn": 0.7659167945030255}, {"Trumpet": 0.6553875189037773, "Trombone": 0.7141208861563172}], "separation": [{"Violin": 6.676672559822361, "Saxophone": 2.3348465242002376}, {"Violin": 9.404163814410834, "Double_Bass": 7.420192402823448}, {"Cello": 7.467213998833598, "Viola": 3.7064337620231678}, {"Violin": 7.54802382022195, "Double_Bass": 7.916627083214696}, {"Violin": 9.482879897662128, "Cello": 10.787081371782484}, {"Viola": 2.7177377498870134, "Clarinet": 4.862530245481819}, {"Flute": 9.389141850603275, "Violin": 5.632359440528358}, {"Saxophone": -2.0959229954479937, "Clarinet": 0.9372701898654899}, {"Saxophone": 7.149761932689749, "Tuba": 4.810568493617931}, {"Trombone": 6.643752445184459, "Saxophone": -14.873575377223471}, {"Oboe": -8.692588407161985, "Saxophone": -1.661061117648404}, {"Oboe": 0.08612609923532208, "Viola": 6.800032423727425}, {"Flute": 8.795309854093162, "Cello": 11.155895617464928}, {"Flute": 11.317366939741545, "Trombone": 6.483312399154793}, {"Clarinet": 7.217236860934996, "Horn": 5.294140288413738}, {"Clarinet": 4.320818162122788, "Bassoon": -7.096628571169973}, {"Trumpet": 6.011886477347743, "Violin": 6.1212527259447445}, {"Trumpet": 1.9426567655278613, "Violin": 4.434570125133407}, {"Flute": 11.965756516675658, "Clarinet": 6.460077520222929}, {"Flute": 6.834174082154183, "Saxophone": -6.684840750984334}, {"Bassoon": -0.83765750859254, "Oboe": -1.282289127572327}, {"Trumpet": 4.439209297944088, "Trombone": 5.45674405508683}, {"Trumpet": 2.434565904107025, "Horn": 10.004313559164444}, {"Tuba": 8.450388809469612, "Trumpet": 10.242706751554667}, {"Trumpet": 2.687476515460918, "Trombone": 1.649756481931083}, {"Trumpet": 9.537940501529018, "Horn": 8.644672076952018}, {"Cello": 7.0772707457215, "Oboe": 4.171325883268516}, {"Trumpet": 8.62281891670103, "Tuba": 8.400674851801151}, {"Trumpet": 4.392363040595803, "Trombone": 3.2647108195159085}, {"Horn": 5.645558169926554, "Trumpet": 1.9590712858935833}, {"Trumpet": 4.615250386092959, "Horn": 8.085999211659177}, {"Trumpet": 6.240655851582709, "Trombone": 3.6316358012160297}]}, "MSI-DIS-S": {"separation": [{"Violin": 6.475938121354281, "Saxophone": 7.039929637452515}, {"Violin": 9.48979487380436, "Double_Bass": 6.688641069075696}, {"Cello": 7.734235476544503, "Viola": 5.837932441192093}, {"Violin": 8.141032800785874, "Double_Bass": 7.342035419223208}, {"Violin": 8.972648641447664, "Cello": 10.487373977713737}, {"Viola": 3.5828883186346374, "Clarinet": 7.291456794422764}, {"Flute": 9.26763701734334, "Violin": 8.367825884161153}, {"Saxophone": -0.26808767963785013, "Clarinet": 2.411447794388967}, {"Saxophone": 10.158230973179332, "Tuba": 5.831841040042562}, {"Trombone": 6.450371386451965, "Saxophone": 7.572108090463495}, {"Oboe": 4.31346828275459, "Saxophone": 4.624909827568285}, {"Oboe": 3.4850337248624803, "Viola": 8.256076871168561}, {"Flute": 7.814479014778533, "Cello": 10.287352700697724}, {"Flute": 11.250317433242623, "Trombone": 6.645725487001409}, {"Clarinet": 8.441457230615303, "Horn": 9.901041998288576}, {"Clarinet": 9.99903442974184, "Bassoon": 5.497798618238241}, {"Trumpet": 6.380382802058308, "Violin": 5.074638578028992}, {"Trumpet": 3.9325723387538423, "Violin": 4.999301508113722}, {"Flute": 14.591513129281687, "Clarinet": 11.936891155742098}, {"Flute": 10.186406901845391, "Saxophone": 5.5828841904890725}, {"Bassoon": 7.898332672412542, "Oboe": -1.9405367318789388}, {"Trumpet": 4.7368874319295475, "Trombone": 5.580470552333834}, {"Trumpet": 10.724081829994422, "Horn": 11.223131969468497}, {"Tuba": 6.744742354871911, "Trumpet": 10.160980610538417}, {"Trumpet": 4.158253938030842, "Trombone": 5.73621962593748}, {"Trumpet": 9.845215623354214, "Horn": 9.766528233677214}, {"Cello": 6.8711354116474475, "Oboe": 7.582145920387572}, {"Trumpet": 8.998808793158993, "Tuba": 9.222062646563016}, {"Trumpet": 5.276183062957486, "Trombone": 6.5064418893823905}, {"Horn": 7.532194673491736, "Trumpet": 3.744302912712678}, {"Trumpet": 8.767605793458053, "Horn": 10.734268569509894}, {"Trumpet": 5.689950371820757, "Trombone": 3.4903668078261862}]}}


--------------------------------------------------------------------------------
/evaluation/MSI-DIS/scores-193.json:
--------------------------------------------------------------------------------
1 | {"MSI-DIS": {"transcription": [{"Violin": 0.9221536748615152, "Saxophone": 0.5159039490739373}, {"Violin": 0.9010616149401837, "Double_Bass": 0.6992582099983028}, {"Cello": 0.7794827597511965, "Viola": 0.6514306690951148}, {"Violin": 0.9188626163753273, "Double_Bass": 0.832384185248992}, {"Violin": 0.861941555610172, "Cello": 0.8562145385216754}, {"Viola": 0.6536868735526751, "Clarinet": 0.7855589335603378}, {"Flute": 0.705507503593955, "Violin": 0.8715179540765242}, {"Saxophone": 0.6151628328619586, "Clarinet": 0.6541424701272047}, {"Saxophone": 0.6231490093869938, "Tuba": 0.5713686766768591}, {"Trombone": 0.7552381345614598, "Saxophone": 0.08278921110688948}, {"Oboe": 0.1691090082384871, "Saxophone": 0.4170089932484352}, {"Oboe": 0.4581272819730028, "Viola": 0.8305759646210241}, {"Flute": 0.8022176514676511, "Cello": 0.9066148411365446}, {"Flute": 0.8692350444393018, "Trombone": 0.8606650669722671}, {"Clarinet": 0.7705367153379585, "Horn": 0.6312154253278269}, {"Clarinet": 0.6605347926486815, "Bassoon": 0.19589351743961042}, {"Trumpet": 0.8761042878558835, "Violin": 0.7841301028681331}, {"Trumpet": 0.8142610626256009, "Violin": 0.8126155222293977}, {"Flute": 0.7678230451592728, "Clarinet": 0.7538866991273583}, {"Flute": 0.6240901879393502, "Saxophone": 0.20514400479363035}, {"Bassoon": 0.36299310021672926, "Oboe": 0.33765698711886855}, {"Trumpet": 0.6131605053943183, "Trombone": 0.8384212126095958}, {"Trumpet": 0.7060713013299754, "Horn": 0.8573255256086534}, {"Tuba": 0.5366662399291285, "Trumpet": 0.928101569000177}, {"Trumpet": 0.6410040517037849, "Trombone": 0.6225602124395043}, {"Trumpet": 0.9160642452915085, "Horn": 0.837632835538449}, {"Cello": 0.6412579651673358, "Oboe": 0.3463361773243479}, {"Trumpet": 0.9396425697730952, "Tuba": 0.8976548442863882}, {"Trumpet": 0.7775781818028951, "Trombone": 0.5895020114865376}, {"Horn": 0.8028814145398346, "Trumpet": 0.8710161448114203}, {"Trumpet": 0.7884024667753711, "Horn": 0.7290748463357478}, {"Trumpet": 0.6148168900530278, "Trombone": 0.7076562730436958}], "separation": [{"Violin": 7.447542824370913, "Saxophone": 3.607049027669911}, {"Violin": 8.908582752233938, "Double_Bass": 7.532810643671405}, {"Cello": 7.020854023113169, "Viola": 2.9643882138428923}, {"Violin": 7.636430287101893, "Double_Bass": 8.362693622471237}, {"Violin": 9.257392967727089, "Cello": 11.261348224020999}, {"Viola": 1.5926315987380837, "Clarinet": 5.077777573311809}, {"Flute": 9.566443195952099, "Violin": 5.591125960981456}, {"Saxophone": -1.4693347571010213, "Clarinet": 1.0740330314851707}, {"Saxophone": 7.04927058679057, "Tuba": 5.045051142406015}, {"Trombone": 6.399783373400477, "Saxophone": -9.098995707150296}, {"Oboe": -8.508813865314037, "Saxophone": -2.8216598855219566}, {"Oboe": 0.696002973854982, "Viola": 6.203017831029543}, {"Flute": 9.096964948314998, "Cello": 11.11544773758067}, {"Flute": 11.477196145468621, "Trombone": 6.0160097113426145}, {"Clarinet": 6.9452046032267525, "Horn": 3.528476832178025}, {"Clarinet": 4.6871999247111775, "Bassoon": -10.81966877064463}, {"Trumpet": 6.625234102329152, "Violin": 5.083696067497239}, {"Trumpet": 3.9392100344945016, "Violin": 5.090414482285286}, {"Flute": 10.921668720335164, "Clarinet": 6.874751472031566}, {"Flute": 6.926190714163707, "Saxophone": -6.373300901445483}, {"Bassoon": -0.1192442180161776, "Oboe": -0.8555344180531811}, {"Trumpet": 3.865316609734188, "Trombone": 5.677977093372287}, {"Trumpet": 3.8622922338023766, "Horn": 9.940066373943312}, {"Tuba": 8.525103768538482, "Trumpet": 10.184205551819662}, {"Trumpet": 3.3907497288820796, "Trombone": 2.0870595403592644}, {"Trumpet": 9.686416668700474, "Horn": 7.73029442836971}, {"Cello": 6.760472587429841, "Oboe": 4.0498399769575375}, {"Trumpet": 9.219971737370685, "Tuba": 9.537632387384344}, {"Trumpet": 4.188663126763118, "Trombone": 3.117197934082434}, {"Horn": 5.934709538557721, "Trumpet": 2.8242000035904398}, {"Trumpet": 7.490075627103366, "Horn": 7.221520694997765}, {"Trumpet": 6.282851723700981, "Trombone": 3.774421874615943}]}, "MSI-DIS-S": {"separation": [{"Violin": 7.239493454803423, "Saxophone": 7.506072445165949}, {"Violin": 9.869830067764806, "Double_Bass": 7.100157710784649}, {"Cello": 7.347228243021272, "Viola": 4.987958553140595}, {"Violin": 8.128449083671693, "Double_Bass": 7.904265275002693}, {"Violin": 9.13949461002675, "Cello": 10.878504261647953}, {"Viola": 3.632442417459951, "Clarinet": 7.449472292120269}, {"Flute": 9.516783167133195, "Violin": 8.143956147613366}, {"Saxophone": -0.17762689596302678, "Clarinet": 2.1793291383255355}, {"Saxophone": 10.119266554654757, "Tuba": 6.134626277672989}, {"Trombone": 6.420280463941335, "Saxophone": 7.7925617085346435}, {"Oboe": 4.276621865477028, "Saxophone": 4.813759018305921}, {"Oboe": 4.401057648014203, "Viola": 8.708000929787529}, {"Flute": 7.999859354544244, "Cello": 10.181199794543085}, {"Flute": 11.267008889009219, "Trombone": 6.388730143189148}, {"Clarinet": 8.023229548466201, "Horn": 10.19842257726027}, {"Clarinet": 10.395582765878588, "Bassoon": 4.865036784169604}, {"Trumpet": 6.844486092443402, "Violin": 4.228082978262868}, {"Trumpet": 5.063036530869571, "Violin": 4.924570606723959}, {"Flute": 14.821293086089488, "Clarinet": 10.60856592695377}, {"Flute": 10.408724357814076, "Saxophone": 5.620699750759234}, {"Bassoon": 7.89251681392704, "Oboe": -1.3927658826686393}, {"Trumpet": 5.22293532872267, "Trombone": 5.498201262919498}, {"Trumpet": 10.97898850582823, "Horn": 11.471732330352921}, {"Tuba": 6.869678805736095, "Trumpet": 10.315352532353245}, {"Trumpet": 4.474589850812589, "Trombone": 5.495595549559411}, {"Trumpet": 10.47462282968281, "Horn": 10.718394022007185}, {"Cello": 6.686888935581265, "Oboe": 7.1249781961920355}, {"Trumpet": 9.247771847934615, "Tuba": 9.977057602859068}, {"Trumpet": 5.379798964004117, "Trombone": 6.311303663055247}, {"Horn": 7.576471000456349, "Trumpet": 4.405704831464769}, {"Trumpet": 8.952465399131778, "Horn": 10.87587174150849}, {"Trumpet": 5.916607646441453, "Trombone": 3.7453138760251568}]}}


--------------------------------------------------------------------------------
/evaluation/MSI-DIS/scores-194.json:
--------------------------------------------------------------------------------
1 | {"MSI-DIS": {"transcription": [{"Violin": 0.9332389130315771, "Saxophone": 0.5405101158127558}, {"Violin": 0.8402200154372012, "Double_Bass": 0.7512393976986393}, {"Cello": 0.7979413727594686, "Viola": 0.7202389521849519}, {"Violin": 0.9195613281926149, "Double_Bass": 0.848038520474201}, {"Violin": 0.8487739058044316, "Cello": 0.8432960960775304}, {"Viola": 0.6641803912148794, "Clarinet": 0.6752264983190095}, {"Flute": 0.7271990937410853, "Violin": 0.8648348076552865}, {"Saxophone": 0.5552518150524953, "Clarinet": 0.5594854615778567}, {"Saxophone": 0.4231039982381646, "Tuba": 0.6383050701480621}, {"Trombone": 0.7562222069501227, "Saxophone": 0.034512512546808746}, {"Oboe": 0.21458616507734307, "Saxophone": 0.46293629939405023}, {"Oboe": 0.4792162750822663, "Viola": 0.871780967239092}, {"Flute": 0.8020802000263941, "Cello": 0.9017678435962615}, {"Flute": 0.865704113117796, "Trombone": 0.8500849831102429}, {"Clarinet": 0.7045613022886172, "Horn": 0.7931135118962632}, {"Clarinet": 0.5541512809904605, "Bassoon": 0.2753682483924805}, {"Trumpet": 0.7969852464410275, "Violin": 0.7898942737868044}, {"Trumpet": 0.6165833742188005, "Violin": 0.831619428716475}, {"Flute": 0.9636425135887419, "Clarinet": 0.5512659985982284}, {"Flute": 0.7590098407752213, "Saxophone": 0.16977345123489201}, {"Bassoon": 0.4166742226101159, "Oboe": 0.5135983699578529}, {"Trumpet": 0.6692731580022638, "Trombone": 0.830099563538242}, {"Trumpet": 0.6736810617012595, "Horn": 0.8626397706179734}, {"Tuba": 0.41188870250608783, "Trumpet": 0.8897904946374271}, {"Trumpet": 0.6425723694942701, "Trombone": 0.5440666238889852}, {"Trumpet": 0.8985887482918221, "Horn": 0.8944965943344576}, {"Cello": 0.5716330938678559, "Oboe": 0.4043811154654226}, {"Trumpet": 0.9392592446683938, "Tuba": 0.8722474026895343}, {"Trumpet": 0.8048621282608199, "Trombone": 0.4966428953042927}, {"Horn": 0.8179877319130804, "Trumpet": 0.8741191560077508}, {"Trumpet": 0.6826889542936153, "Horn": 0.7489649650835999}, {"Trumpet": 0.6253170904113182, "Trombone": 0.708692600202517}], "separation": [{"Violin": 7.1053311685807365, "Saxophone": 1.9698055162821557}, {"Violin": 8.370993831818467, "Double_Bass": 7.380666787305563}, {"Cello": 6.75903322035598, "Viola": 3.911443156476823}, {"Violin": 7.871385249049576, "Double_Bass": 7.198124354381231}, {"Violin": 9.39533283680237, "Cello": 11.200916956609074}, {"Viola": 2.3077121929175894, "Clarinet": 3.9839693881321647}, {"Flute": 8.86841865139781, "Violin": 4.743356708698105}, {"Saxophone": -1.113223598404096, "Clarinet": 0.3729680168595979}, {"Saxophone": 2.0255907668663506, "Tuba": 4.585345134983456}, {"Trombone": 6.438993742431647, "Saxophone": -15.383692726073615}, {"Oboe": -5.582535882411319, "Saxophone": -2.808477362275144}, {"Oboe": 1.059349684023927, "Viola": 7.689032851701025}, {"Flute": 8.64086732672847, "Cello": 11.206444484367132}, {"Flute": 11.581225568846207, "Trombone": 5.253926913407558}, {"Clarinet": 5.814819602940879, "Horn": 7.067580730392679}, {"Clarinet": 2.2345968719234537, "Bassoon": -7.939268154754919}, {"Trumpet": 5.70903608912386, "Violin": 6.077382255109814}, {"Trumpet": 1.9589692721017364, "Violin": 4.651689416589583}, {"Flute": 13.892627811980788, "Clarinet": 5.243847124873211}, {"Flute": 9.319818682702124, "Saxophone": -9.416088083701698}, {"Bassoon": -1.807542648346924, "Oboe": 0.7635097464157224}, {"Trumpet": 3.661909549845289, "Trombone": 5.362562599470123}, {"Trumpet": 4.051410290405063, "Horn": 10.25498465553827}, {"Tuba": 7.278648460206522, "Trumpet": 10.295022755852488}, {"Trumpet": 4.067942445696543, "Trombone": 0.04968077762432831}, {"Trumpet": 8.860049352520882, "Horn": 9.70992258426048}, {"Cello": 7.023451175074813, "Oboe": 7.649457938099364}, {"Trumpet": 8.825893531240506, "Tuba": 8.138872255484547}, {"Trumpet": 3.5542694674054562, "Trombone": 1.2114207823942988}, {"Horn": 5.395239557538057, "Trumpet": 3.0083327445765233}, {"Trumpet": 4.774994852593438, "Horn": 10.478567451166734}, {"Trumpet": 5.662184903676573, "Trombone": 3.365869115976767}]}, "MSI-DIS-S": {"separation": [{"Violin": 6.799490998246122, "Saxophone": 7.425779725416931}, {"Violin": 9.956535132580179, "Double_Bass": 6.704841691489635}, {"Cello": 7.86811649670787, "Viola": 5.392519622783749}, {"Violin": 8.079542574273791, "Double_Bass": 6.887743208562954}, {"Violin": 9.082905092128827, "Cello": 10.643084831891148}, {"Viola": 3.8994376028471986, "Clarinet": 7.291146991893298}, {"Flute": 8.919201020319544, "Violin": 8.600458689421629}, {"Saxophone": -0.4808766981547216, "Clarinet": 2.7486685315504915}, {"Saxophone": 9.857761288081214, "Tuba": 5.430798836678132}, {"Trombone": 6.343344777106807, "Saxophone": 7.644559416741884}, {"Oboe": 4.318504041689563, "Saxophone": 4.788817047133102}, {"Oboe": 3.8882812628062196, "Viola": 8.217738725589324}, {"Flute": 7.783745612484866, "Cello": 10.274815251507402}, {"Flute": 11.392419268758466, "Trombone": 6.3899052793287305}, {"Clarinet": 8.588810669317162, "Horn": 9.916349899736838}, {"Clarinet": 10.679200633069195, "Bassoon": 4.799325351838396}, {"Trumpet": 6.231051922455892, "Violin": 5.310333759644454}, {"Trumpet": 4.045488695329501, "Violin": 4.889235998566622}, {"Flute": 14.356487020908428, "Clarinet": 11.11459785825151}, {"Flute": 10.498633411346487, "Saxophone": 5.906452133723401}, {"Bassoon": 7.720671588503598, "Oboe": -1.4666033883677212}, {"Trumpet": 5.066955658790732, "Trombone": 5.7367770408728695}, {"Trumpet": 10.766624378014008, "Horn": 11.499517449337883}, {"Tuba": 6.150860624600508, "Trumpet": 10.352278837321748}, {"Trumpet": 4.163570986292237, "Trombone": 6.065049265397986}, {"Trumpet": 10.258374100752008, "Horn": 10.963462023668848}, {"Cello": 6.526691435934246, "Oboe": 7.533552686684506}, {"Trumpet": 9.164268285175213, "Tuba": 8.514495401959216}, {"Trumpet": 5.312502454533419, "Trombone": 6.470768235086438}, {"Horn": 7.055817296012165, "Trumpet": 4.220578214702017}, {"Trumpet": 9.002451772622685, "Horn": 10.796805763790122}, {"Trumpet": 5.834271324744451, "Trombone": 3.408497572336328}]}}


--------------------------------------------------------------------------------
/evaluation/MSI-DIS/scores-195.json:
--------------------------------------------------------------------------------
1 | {"MSI-DIS": {"transcription": [{"Violin": 0.9338328209972552, "Saxophone": 0.5236792603925782}, {"Violin": 0.9139512907476833, "Double_Bass": 0.6956826196563523}, {"Cello": 0.7880293484210517, "Viola": 0.6186272341486709}, {"Violin": 0.9445934860283608, "Double_Bass": 0.8208049144500267}, {"Violin": 0.8521911321824317, "Cello": 0.8398067088760933}, {"Viola": 0.6284188829778667, "Clarinet": 0.7565251390874353}, {"Flute": 0.7084593295112571, "Violin": 0.8992738888159868}, {"Saxophone": 0.5173594980631657, "Clarinet": 0.5253347395158582}, {"Saxophone": 0.49855422592151255, "Tuba": 0.5651050087940954}, {"Trombone": 0.7790337195548938, "Saxophone": 0.03190459112822542}, {"Oboe": 0.2063498385830621, "Saxophone": 0.44622413420568846}, {"Oboe": 0.5765082210786721, "Viola": 0.8281592294407049}, {"Flute": 0.7903235696478881, "Cello": 0.9034958263907762}, {"Flute": 0.814673146255352, "Trombone": 0.8763335107708878}, {"Clarinet": 0.7390884513112589, "Horn": 0.7285923227086202}, {"Clarinet": 0.6024513121605906, "Bassoon": 0.24753767387647693}, {"Trumpet": 0.8302173962665341, "Violin": 0.7756160345709969}, {"Trumpet": 0.7164086131221123, "Violin": 0.866874157645728}, {"Flute": 0.7491982612022688, "Clarinet": 0.790861300467682}, {"Flute": 0.6467448762918807, "Saxophone": 0.3830757732808301}, {"Bassoon": 0.5280544365530617, "Oboe": 0.28094219293045986}, {"Trumpet": 0.6342148521489019, "Trombone": 0.8273458240629123}, {"Trumpet": 0.6361829913219859, "Horn": 0.8590791006154075}, {"Tuba": 0.4360330905334375, "Trumpet": 0.9191040449795649}, {"Trumpet": 0.6465191544886886, "Trombone": 0.586333187551523}, {"Trumpet": 0.9224271401978115, "Horn": 0.9069070340595503}, {"Cello": 0.5937481139995986, "Oboe": 0.3312896656244718}, {"Trumpet": 0.934402280806624, "Tuba": 0.8399591593525064}, {"Trumpet": 0.8101486400089163, "Trombone": 0.5148221059525258}, {"Horn": 0.7848972919717536, "Trumpet": 0.9034203391880866}, {"Trumpet": 0.7672127574964576, "Horn": 0.7628041173812228}, {"Trumpet": 0.639452654378895, "Trombone": 0.6824340919319164}], "separation": [{"Violin": 7.157068508725844, "Saxophone": 2.7753264613512103}, {"Violin": 9.41461180450968, "Double_Bass": 8.013605645580059}, {"Cello": 8.234945178105281, "Viola": 2.502490023024613}, {"Violin": 7.757418445524236, "Double_Bass": 8.083175338050463}, {"Violin": 9.493479633247155, "Cello": 11.514902227435211}, {"Viola": 2.3452577451951617, "Clarinet": 4.371484920411191}, {"Flute": 9.176407855928895, "Violin": 5.394931291218249}, {"Saxophone": -3.435599788553202, "Clarinet": 2.3134847979012907}, {"Saxophone": 3.0759504763426886, "Tuba": 5.541911924644928}, {"Trombone": 6.576680270656013, "Saxophone": -15.781537678785813}, {"Oboe": -6.681036316491591, "Saxophone": -2.7319831689256024}, {"Oboe": 0.690821797199349, "Viola": 5.90326967846503}, {"Flute": 8.675869313338074, "Cello": 11.536426563993922}, {"Flute": 10.929509940422244, "Trombone": 6.290413672961987}, {"Clarinet": 6.961157735490675, "Horn": 4.694225158653683}, {"Clarinet": 4.320404985262499, "Bassoon": -8.508427023227656}, {"Trumpet": 6.024360565505678, "Violin": 7.358121448827502}, {"Trumpet": 3.168822772668201, "Violin": 4.301075619611758}, {"Flute": 11.640068000845424, "Clarinet": 7.074285214394811}, {"Flute": 7.951613795876789, "Saxophone": -2.9712397873086798}, {"Bassoon": 2.4055814518915444, "Oboe": -3.3903595760996312}, {"Trumpet": 3.6244899778676185, "Trombone": 5.3658703031808}, {"Trumpet": 4.591235558008526, "Horn": 10.25672050772932}, {"Tuba": 7.591665689206636, "Trumpet": 10.129239899323997}, {"Trumpet": 3.9323628955983354, "Trombone": 1.1779565444544435}, {"Trumpet": 10.173864924310735, "Horn": 10.720615515794165}, {"Cello": 7.397002470102136, "Oboe": 3.89245142411557}, {"Trumpet": 8.7742734131817, "Tuba": 8.893527506931688}, {"Trumpet": 4.179571756813656, "Trombone": 0.8318161749817116}, {"Horn": 5.9437689921502335, "Trumpet": 3.85413913280958}, {"Trumpet": 6.89989518933137, "Horn": 9.833876989640656}, {"Trumpet": 5.683645789939662, "Trombone": 3.2526869465945243}]}, "MSI-DIS-S": {"separation": [{"Violin": 7.080065769522134, "Saxophone": 7.131776126824328}, {"Violin": 9.532570643979172, "Double_Bass": 7.175293923686416}, {"Cello": 8.274862404005663, "Viola": 5.332899365134224}, {"Violin": 7.8258153135384, "Double_Bass": 7.60321497656927}, {"Violin": 9.126633539391145, "Cello": 11.082905545553988}, {"Viola": 3.578198883428407, "Clarinet": 7.473071515888863}, {"Flute": 9.243527446027135, "Violin": 7.822670154520757}, {"Saxophone": -0.47885591171505204, "Clarinet": 2.621365118601928}, {"Saxophone": 9.791159550615005, "Tuba": 5.846612092015748}, {"Trombone": 6.607971371924172, "Saxophone": 7.7105629683717645}, {"Oboe": 4.180971674204535, "Saxophone": 4.141570894905506}, {"Oboe": 3.5983246589189033, "Viola": 8.114385887500946}, {"Flute": 8.189495886110805, "Cello": 10.584835587495316}, {"Flute": 11.071086279571622, "Trombone": 6.553307548863437}, {"Clarinet": 8.044957057253944, "Horn": 9.870753922218912}, {"Clarinet": 10.81315446134979, "Bassoon": 5.723103229196096}, {"Trumpet": 6.5418271653021005, "Violin": 5.248009288530595}, {"Trumpet": 5.3000606666703565, "Violin": 4.797505770849435}, {"Flute": 14.909777035150512, "Clarinet": 11.929730137292578}, {"Flute": 10.275247742548748, "Saxophone": 5.945074203737635}, {"Bassoon": 9.109491971448737, "Oboe": -1.3348741612415391}, {"Trumpet": 4.943938506056652, "Trombone": 5.801938668648426}, {"Trumpet": 10.580778082771772, "Horn": 11.137316445290306}, {"Tuba": 6.674226026525268, "Trumpet": 10.134546085209251}, {"Trumpet": 4.520601242941067, "Trombone": 5.6317937230840425}, {"Trumpet": 10.602966788304311, "Horn": 11.166564788679608}, {"Cello": 6.877205822165349, "Oboe": 7.885847730876038}, {"Trumpet": 9.072903989525415, "Tuba": 9.356419172248055}, {"Trumpet": 5.134511207209691, "Trombone": 6.547931629006058}, {"Horn": 7.334727056797508, "Trumpet": 4.5488663843082495}, {"Trumpet": 8.966076103121173, "Horn": 10.917172920885005}, {"Trumpet": 5.702162783788873, "Trombone": 3.800955604370176}]}}


--------------------------------------------------------------------------------
/evaluation/MSI-DIS/scores-196.json:
--------------------------------------------------------------------------------
1 | {"MSI-DIS": {"transcription": [{"Violin": 0.894937207013595, "Saxophone": 0.518675104650533}, {"Violin": 0.8870851103566548, "Double_Bass": 0.7378645836164271}, {"Cello": 0.8003679660095206, "Viola": 0.6530517187866063}, {"Violin": 0.9101161320187563, "Double_Bass": 0.8268849387239864}, {"Violin": 0.8533863363817036, "Cello": 0.8549830181031985}, {"Viola": 0.5585483497993066, "Clarinet": 0.733686148776802}, {"Flute": 0.6961929755926543, "Violin": 0.810258533167835}, {"Saxophone": 0.6832192457915292, "Clarinet": 0.5833648602027455}, {"Saxophone": 0.5646124978128715, "Tuba": 0.6030085187393598}, {"Trombone": 0.715680279472744, "Saxophone": 0.042801933664294474}, {"Oboe": 0.23053195618221406, "Saxophone": 0.537502052627512}, {"Oboe": 0.48500407759354863, "Viola": 0.8851581291588069}, {"Flute": 0.819125830954443, "Cello": 0.895246818224642}, {"Flute": 0.8477725149752019, "Trombone": 0.8627369636905478}, {"Clarinet": 0.7160940693047467, "Horn": 0.7524403830997698}, {"Clarinet": 0.6587308672995665, "Bassoon": 0.18154096484025933}, {"Trumpet": 0.8469826726422774, "Violin": 0.6992474645731492}, {"Trumpet": 0.7609694456065114, "Violin": 0.7560559232612903}, {"Flute": 0.7919140956609152, "Clarinet": 0.6838683888138188}, {"Flute": 0.747940878319353, "Saxophone": 0.42404493946857424}, {"Bassoon": 0.22475972926691867, "Oboe": 0.6711534990561925}, {"Trumpet": 0.6365530264034165, "Trombone": 0.8446084576718169}, {"Trumpet": 0.7026422668480723, "Horn": 0.8633001951809095}, {"Tuba": 0.4768186141915502, "Trumpet": 0.9084069538971468}, {"Trumpet": 0.5837317096586493, "Trombone": 0.6550463060953308}, {"Trumpet": 0.8836093302310891, "Horn": 0.8707775475248584}, {"Cello": 0.5943607181988817, "Oboe": 0.39016070171841366}, {"Trumpet": 0.9381199022748048, "Tuba": 0.884740978362636}, {"Trumpet": 0.7748045969491845, "Trombone": 0.46627667651479604}, {"Horn": 0.7943639679132832, "Trumpet": 0.8991853370955128}, {"Trumpet": 0.6838905216009431, "Horn": 0.7456134896200367}, {"Trumpet": 0.6397454902184914, "Trombone": 0.7095685710323106}], "separation": [{"Violin": 6.713324722258274, "Saxophone": 2.032071242586922}, {"Violin": 9.251599327777965, "Double_Bass": 6.937197727792457}, {"Cello": 8.628928091362035, "Viola": 3.2468407282842593}, {"Violin": 6.687626042796239, "Double_Bass": 7.759033576916314}, {"Violin": 9.59800744252268, "Cello": 10.765627654532308}, {"Viola": 3.1284857301995848, "Clarinet": 4.458752524841133}, {"Flute": 9.209401797601979, "Violin": 4.27658050779931}, {"Saxophone": -1.1055847320756205, "Clarinet": 0.7516890645605535}, {"Saxophone": 5.495354868383991, "Tuba": 5.175008497625607}, {"Trombone": 6.183629428675136, "Saxophone": -14.242783028090294}, {"Oboe": -4.568098235823826, "Saxophone": -0.6291874236785278}, {"Oboe": 0.6496692207323763, "Viola": 7.8122854033614235}, {"Flute": 8.888885253160183, "Cello": 11.605821650899307}, {"Flute": 11.668471601506518, "Trombone": 6.445123925885379}, {"Clarinet": 6.132476433140976, "Horn": 6.011742145323886}, {"Clarinet": 4.024732860989589, "Bassoon": -8.848392339972712}, {"Trumpet": 5.380607607718472, "Violin": 5.35781316403625}, {"Trumpet": 3.3230355254107886, "Violin": 3.540972002470796}, {"Flute": 12.813264615426508, "Clarinet": 6.687919088010013}, {"Flute": 8.928373335229953, "Saxophone": -3.738057418796908}, {"Bassoon": -0.26455267620314116, "Oboe": 0.26962657057462297}, {"Trumpet": 2.6765444600721473, "Trombone": 6.326103113633838}, {"Trumpet": 5.092767125444753, "Horn": 9.762286842742716}, {"Tuba": 8.03169168758505, "Trumpet": 10.293282805474027}, {"Trumpet": 3.9786579389654486, "Trombone": 2.9436576014149836}, {"Trumpet": 9.758792056606664, "Horn": 9.609807717131972}, {"Cello": 7.648393116233021, "Oboe": 6.4802956293437495}, {"Trumpet": 9.064933781091675, "Tuba": 8.074673470459048}, {"Trumpet": 3.315083322463889, "Trombone": 1.4328727244722483}, {"Horn": 5.804549614725037, "Trumpet": 3.1447001950102482}, {"Trumpet": 4.540860911560189, "Horn": 11.171941969028754}, {"Trumpet": 5.804867289816604, "Trombone": 3.604629740835625}]}, "MSI-DIS-S": {"separation": [{"Violin": 6.521403217863613, "Saxophone": 6.983464605460762}, {"Violin": 9.447227732258277, "Double_Bass": 6.23281543688209}, {"Cello": 8.735113048088696, "Viola": 5.104697563949083}, {"Violin": 7.861428832701172, "Double_Bass": 7.253193927071148}, {"Violin": 9.18431614877667, "Cello": 10.456018781195798}, {"Viola": 3.7665083104342836, "Clarinet": 7.356204007968387}, {"Flute": 9.055996324423335, "Violin": 7.898478426695225}, {"Saxophone": -0.16857528393248, "Clarinet": 2.2011505871713304}, {"Saxophone": 9.951695220585805, "Tuba": 5.770330458967661}, {"Trombone": 6.681984979783816, "Saxophone": 7.897250085429166}, {"Oboe": 4.44681385227674, "Saxophone": 4.219120350132117}, {"Oboe": 3.2153177232100405, "Viola": 8.359344568860251}, {"Flute": 7.89731479962012, "Cello": 10.495487559457889}, {"Flute": 11.33343503736429, "Trombone": 6.871591993078655}, {"Clarinet": 8.375808423775954, "Horn": 10.224980528335845}, {"Clarinet": 10.424667179088852, "Bassoon": 5.704257288977324}, {"Trumpet": 6.047817943599504, "Violin": 5.358908244917848}, {"Trumpet": 3.9655650089034933, "Violin": 4.470570424534397}, {"Flute": 14.641050139296949, "Clarinet": 11.050210989002238}, {"Flute": 10.313151303873077, "Saxophone": 6.689024907957661}, {"Bassoon": 8.017703483889969, "Oboe": -1.4867946423239162}, {"Trumpet": 5.081067823207137, "Trombone": 5.978648901400581}, {"Trumpet": 10.467828721090166, "Horn": 11.294702255419132}, {"Tuba": 6.605358501051459, "Trumpet": 10.257273345259208}, {"Trumpet": 4.067576582159983, "Trombone": 6.01159502450985}, {"Trumpet": 10.13111734630126, "Horn": 10.850390901573608}, {"Cello": 7.100341117381982, "Oboe": 6.812476364474771}, {"Trumpet": 9.102717940932884, "Tuba": 8.226223437321663}, {"Trumpet": 5.360578249946477, "Trombone": 6.7661293549564485}, {"Horn": 7.854573806969909, "Trumpet": 3.8945439081044206}, {"Trumpet": 8.856871265896016, "Horn": 10.962718739442856}, {"Trumpet": 5.63266500415506, "Trombone": 3.718497698507282}]}}


--------------------------------------------------------------------------------
/evaluation/MSI-DIS/scores-197.json:
--------------------------------------------------------------------------------
1 | {"MSI-DIS": {"transcription": [{"Violin": 0.89237537947467, "Saxophone": 0.44510238029764515}, {"Violin": 0.8556221472703088, "Double_Bass": 0.7367069285736837}, {"Cello": 0.7700215390356321, "Viola": 0.6020352795541265}, {"Violin": 0.9023963971087151, "Double_Bass": 0.8325830325150321}, {"Violin": 0.85347249740386, "Cello": 0.8441735204462577}, {"Viola": 0.655828150395638, "Clarinet": 0.7312129069706628}, {"Flute": 0.7274294783180636, "Violin": 0.8617809823305205}, {"Saxophone": 0.6161629862761638, "Clarinet": 0.6286135051219482}, {"Saxophone": 0.6002161265463757, "Tuba": 0.5063368051058555}, {"Trombone": 0.7843167854698471, "Saxophone": 0.05874185056691073}, {"Oboe": 0.16904790956065618, "Saxophone": 0.5045632153130077}, {"Oboe": 0.49862727867612283, "Viola": 0.8420476979359018}, {"Flute": 0.749772458939221, "Cello": 0.9029408469382657}, {"Flute": 0.6965261134906171, "Trombone": 0.8743410396684531}, {"Clarinet": 0.8023153731963755, "Horn": 0.6379802064010492}, {"Clarinet": 0.6590131508457827, "Bassoon": 0.19266169823998555}, {"Trumpet": 0.8163087406871357, "Violin": 0.6805988386292915}, {"Trumpet": 0.779189274699307, "Violin": 0.7953052662993431}, {"Flute": 0.6858713295593329, "Clarinet": 0.6394640119662909}, {"Flute": 0.361464153013896, "Saxophone": 0.27013696842278306}, {"Bassoon": 0.1295922447106464, "Oboe": 0.41483241432495527}, {"Trumpet": 0.5394760704019307, "Trombone": 0.8404326662615018}, {"Trumpet": 0.5340195969708901, "Horn": 0.8339660108351642}, {"Tuba": 0.6373837525451183, "Trumpet": 0.9262914678420405}, {"Trumpet": 0.5844239518780254, "Trombone": 0.6841637552899233}, {"Trumpet": 0.894347838435463, "Horn": 0.8982667093394014}, {"Cello": 0.5867802468145809, "Oboe": 0.4249470496070424}, {"Trumpet": 0.9308400711008051, "Tuba": 0.8772392750723959}, {"Trumpet": 0.822151541716393, "Trombone": 0.7455067630608492}, {"Horn": 0.7603527053280081, "Trumpet": 0.8749233977537402}, {"Trumpet": 0.7489058833464853, "Horn": 0.7107329007029287}, {"Trumpet": 0.6522310085481386, "Trombone": 0.7129749739935904}], "separation": [{"Violin": 6.325352212579337, "Saxophone": 2.355110866817485}, {"Violin": 8.34897201894815, "Double_Bass": 6.90577158222558}, {"Cello": 6.364271577020151, "Viola": 2.761319556998712}, {"Violin": 7.312206968333803, "Double_Bass": 7.135712603036613}, {"Violin": 9.239476776592227, "Cello": 10.573650766482412}, {"Viola": 3.0785353601470407, "Clarinet": 4.842173726849181}, {"Flute": 9.363223935687254, "Violin": 5.729148706950928}, {"Saxophone": -2.208011522001557, "Clarinet": 1.626256445716062}, {"Saxophone": 4.525261948318366, "Tuba": 3.8711151103564303}, {"Trombone": 6.492664804311835, "Saxophone": -11.460727198643177}, {"Oboe": -10.027083660030033, "Saxophone": -1.139859364905568}, {"Oboe": -0.7321952499639489, "Viola": 6.352269803908089}, {"Flute": 7.340293174681898, "Cello": 10.864129251096077}, {"Flute": 7.405251542695906, "Trombone": 6.453718828315837}, {"Clarinet": 7.7253063339332995, "Horn": 3.0519130218373407}, {"Clarinet": 3.703266105445524, "Bassoon": -9.716815854898881}, {"Trumpet": 5.317681612805037, "Violin": 5.199798037747085}, {"Trumpet": 2.998355765919683, "Violin": 4.131989840961902}, {"Flute": 7.762777102782823, "Clarinet": 4.703996405741363}, {"Flute": 0.5801766272758793, "Saxophone": -3.860017004957613}, {"Bassoon": -5.633327376626225, "Oboe": 2.258592482203077}, {"Trumpet": 2.9435727729082997, "Trombone": 6.033829754320279}, {"Trumpet": 1.143057616618786, "Horn": 10.391724584898698}, {"Tuba": 7.7080250328485995, "Trumpet": 10.114150126210586}, {"Trumpet": 0.7674672266700421, "Trombone": 1.687416843994157}, {"Trumpet": 7.345057692744197, "Horn": 9.057155022369821}, {"Cello": 6.588845805782206, "Oboe": 6.032289607470027}, {"Trumpet": 8.901532348797101, "Tuba": 8.219206081447766}, {"Trumpet": 5.2682827954752645, "Trombone": 4.651545632827356}, {"Horn": 5.209315846348989, "Trumpet": 2.7856418280090485}, {"Trumpet": 4.545532814942542, "Horn": 8.912036508120453}, {"Trumpet": 5.926574193978961, "Trombone": 3.741061197900076}]}, "MSI-DIS-S": {"separation": [{"Violin": 6.251115470150459, "Saxophone": 7.884513713299848}, {"Violin": 9.545937699520385, "Double_Bass": 6.274510475531583}, {"Cello": 7.520606995582185, "Viola": 5.091017630289622}, {"Violin": 8.446044827338657, "Double_Bass": 6.704541008069734}, {"Violin": 8.815925149707262, "Cello": 10.285060072173414}, {"Viola": 3.93537553677161, "Clarinet": 7.18688438951165}, {"Flute": 9.176126413870065, "Violin": 8.631117934197498}, {"Saxophone": -0.416760570908894, "Clarinet": 2.5220909351338228}, {"Saxophone": 9.950375762115959, "Tuba": 5.730435566821114}, {"Trombone": 6.454590448160587, "Saxophone": 7.449444668538713}, {"Oboe": 4.316112154541536, "Saxophone": 4.449941165719888}, {"Oboe": 3.2000019129281148, "Viola": 8.441884769690777}, {"Flute": 7.805992272605051, "Cello": 10.262162684896087}, {"Flute": 11.03384090313118, "Trombone": 6.646034983367257}, {"Clarinet": 8.433194809497781, "Horn": 10.049260309925085}, {"Clarinet": 10.777067694414002, "Bassoon": 5.146004407228368}, {"Trumpet": 6.7223716824656785, "Violin": 4.880086896497433}, {"Trumpet": 4.310304117108964, "Violin": 4.668883798978594}, {"Flute": 14.575404019619626, "Clarinet": 11.447146603983708}, {"Flute": 10.130888516483523, "Saxophone": 5.835447981381627}, {"Bassoon": 8.873670119317286, "Oboe": -1.0286265947346116}, {"Trumpet": 5.036149730312115, "Trombone": 5.701037197167351}, {"Trumpet": 10.591534119844182, "Horn": 11.966472953913696}, {"Tuba": 6.209705267184028, "Trumpet": 10.136869665970057}, {"Trumpet": 4.7186076680299, "Trombone": 5.900529522286721}, {"Trumpet": 10.492795400271014, "Horn": 10.092508133035746}, {"Cello": 6.810037610486766, "Oboe": 6.5655674162955915}, {"Trumpet": 9.362600020193808, "Tuba": 8.580276277609844}, {"Trumpet": 5.741049414964444, "Trombone": 6.672906658283532}, {"Horn": 7.547199325887254, "Trumpet": 3.9334232529851043}, {"Trumpet": 8.808437799868987, "Horn": 11.225484812838339}, {"Trumpet": 6.127368961149795, "Trombone": 3.8140906216543238}]}}


--------------------------------------------------------------------------------
/evaluation/MSI/scores-190.json:
--------------------------------------------------------------------------------
1 | {"MSI": {"transcription": [{"Violin": 0.9251442848839847, "Saxophone": 0.5181546733929234}, {"Violin": 0.9102259102261561, "Double_Bass": 0.7752092276121464}, {"Cello": 0.7342586937107307, "Viola": 0.660170327861245}, {"Violin": 0.9251839827225579, "Double_Bass": 0.8344817984677688}, {"Violin": 0.842339533810597, "Cello": 0.8264666805679032}, {"Viola": 0.5810072333732746, "Clarinet": 0.826391147551746}, {"Flute": 0.6762144965062589, "Violin": 0.8821178788976707}, {"Saxophone": 0.36505175708200155, "Clarinet": 0.4453237702960427}, {"Saxophone": 0.4161524545734008, "Tuba": 0.6226111577170401}, {"Trombone": 0.8470594673956615, "Saxophone": 0.04481943770003019}, {"Oboe": 0.21653351657261455, "Saxophone": 0.41426634626983655}, {"Oboe": 0.4947882381063612, "Viola": 0.8485804231841394}, {"Flute": 0.8339634517873997, "Cello": 0.8887994690569462}, {"Flute": 0.8502812959857168, "Trombone": 0.8971240200923347}, {"Clarinet": 0.6925028500035234, "Horn": 0.7483736580321206}, {"Clarinet": 0.6781474899875646, "Bassoon": 0.27571901696779744}, {"Trumpet": 0.8272035769572158, "Violin": 0.7757573880818966}, {"Trumpet": 0.5326242283264728, "Violin": 0.8003995420530146}, {"Flute": 0.9279064164461931, "Clarinet": 0.7840266192166879}, {"Flute": 0.6886910481868935, "Saxophone": 0.3169213187862774}, {"Bassoon": 0.4821477123530618, "Oboe": 0.6760352634599481}, {"Trumpet": 0.7052701286449962, "Trombone": 0.8653153802713082}, {"Trumpet": 0.6114493326396717, "Horn": 0.8155378876173611}, {"Tuba": 0.5685467696113435, "Trumpet": 0.8873762467049575}, {"Trumpet": 0.555759829105926, "Trombone": 0.7134012839272565}, {"Trumpet": 0.8722578460034435, "Horn": 0.8315576718149527}, {"Cello": 0.5379875469801705, "Oboe": 0.46708727812363726}, {"Trumpet": 0.9388084157973589, "Tuba": 0.7997813713165582}, {"Trumpet": 0.8094475973296866, "Trombone": 0.817587893111617}, {"Horn": 0.7366386092690937, "Trumpet": 0.7193563199798007}, {"Trumpet": 0.7286939706138229, "Horn": 0.7777772835095649}, {"Trumpet": 0.5909634654700726, "Trombone": 0.7460937395291403}], "separation": [{"Violin": 8.269233007379697, "Saxophone": 4.338338830854572}, {"Violin": 12.314969374903939, "Double_Bass": 7.6731520945521625}, {"Cello": 7.1786076072832525, "Viola": 5.455080331849846}, {"Violin": 11.564712403474966, "Double_Bass": 9.535666801636365}, {"Violin": 11.897168006384668, "Cello": 14.222602067451138}, {"Viola": 2.858339755857327, "Clarinet": 4.749034659478985}, {"Flute": 9.798047106498677, "Violin": 7.742842039750225}, {"Saxophone": -4.10994966895947, "Clarinet": -0.510236744234285}, {"Saxophone": 2.4867257664885334, "Tuba": 7.670025500290858}, {"Trombone": 9.953810177748572, "Saxophone": -14.633298241968387}, {"Oboe": -6.0121962473655755, "Saxophone": 0.21062296802478192}, {"Oboe": 2.751310231411171, "Viola": 9.04517801509609}, {"Flute": 10.686735511524347, "Cello": 14.489174954666776}, {"Flute": 12.960043824038092, "Trombone": 10.423056178069965}, {"Clarinet": 5.753313819560761, "Horn": 6.125051513272348}, {"Clarinet": 6.279371860208991, "Bassoon": -6.863060952103935}, {"Trumpet": 5.120465674999285, "Violin": 5.457680133469218}, {"Trumpet": 2.5654578322302335, "Violin": 4.727559531763638}, {"Flute": 15.390505258701117, "Clarinet": 5.970558634988674}, {"Flute": 7.703407225619773, "Saxophone": -3.3552999126603145}, {"Bassoon": 1.8454882133004364, "Oboe": -3.17993499403644}, {"Trumpet": 3.753078501509269, "Trombone": 8.42561357225662}, {"Trumpet": 3.8896826431125686, "Horn": 12.226250401841225}, {"Tuba": 14.757338725547184, "Trumpet": 13.31441398221505}, {"Trumpet": 3.9283419011189036, "Trombone": 5.991385298752856}, {"Trumpet": 12.717369148135552, "Horn": 6.889294885051784}, {"Cello": 9.24997801631479, "Oboe": 4.860141959890007}, {"Trumpet": 13.253915449643292, "Tuba": 12.540433594208348}, {"Trumpet": 5.412086610263316, "Trombone": 5.894642477378911}, {"Horn": 5.898436892267443, "Trumpet": 0.5702562952756055}, {"Trumpet": 4.704932092525263, "Horn": 13.872607594278936}, {"Trumpet": 6.991438713476075, "Trombone": 6.252403355896494}]}, "MSI-S": {"separation": [{"Violin": 8.047104792156668, "Saxophone": 11.082545926622885}, {"Violin": 12.254939614677266, "Double_Bass": 7.084260135390188}, {"Cello": 8.090960511081427, "Viola": 8.489943533014928}, {"Violin": 11.90780876514177, "Double_Bass": 9.215632665423524}, {"Violin": 11.5783171325271, "Cello": 13.791491534640787}, {"Viola": 4.779626823128284, "Clarinet": 5.408722034029381}, {"Flute": 11.581006935994145, "Violin": 10.985776365051885}, {"Saxophone": -1.129362234105362, "Clarinet": 2.333325936004596}, {"Saxophone": 10.674102958801466, "Tuba": 7.358436178472214}, {"Trombone": 9.61610795592729, "Saxophone": 9.55901277509283}, {"Oboe": 6.80628971034443, "Saxophone": 8.908976079166214}, {"Oboe": 6.700097512853127, "Viola": 11.208968318780395}, {"Flute": 9.570129792185067, "Cello": 13.362766990158176}, {"Flute": 12.691962102046027, "Trombone": 10.189738615391864}, {"Clarinet": 7.834684026598236, "Horn": 11.563370802266476}, {"Clarinet": 10.880525574443634, "Bassoon": 7.882743254788611}, {"Trumpet": 7.284965517549401, "Violin": 2.7439416545145368}, {"Trumpet": 4.03834539591089, "Violin": 4.212580273261593}, {"Flute": 16.37407676787338, "Clarinet": 9.436727226021798}, {"Flute": 10.560042926605568, "Saxophone": 6.080197374269638}, {"Bassoon": 12.409777167959962, "Oboe": -1.6961181766260438}, {"Trumpet": 6.202559620169078, "Trombone": 8.812139820850447}, {"Trumpet": 14.492299618580871, "Horn": 15.380975663013894}, {"Tuba": 10.677507967472371, "Trumpet": 13.558397891064136}, {"Trumpet": 5.682573641723998, "Trombone": 8.711709478940147}, {"Trumpet": 14.038187417915813, "Horn": 13.438976991818933}, {"Cello": 9.458398085562012, "Oboe": 11.75103396136266}, {"Trumpet": 13.496684207835393, "Tuba": 13.11574071724432}, {"Trumpet": 7.143966893547329, "Trombone": 7.917465641322568}, {"Horn": 7.811413761486648, "Trumpet": 4.654760900442918}, {"Trumpet": 13.47688997743125, "Horn": 14.262065694914334}, {"Trumpet": 6.791381271377515, "Trombone": 6.652098560078631}]}}


--------------------------------------------------------------------------------
/evaluation/MSI/scores-198.json:
--------------------------------------------------------------------------------
1 | {"MSI": {"transcription": [{"Violin": 0.9240562021852543, "Saxophone": 0.568345650753969}, {"Violin": 0.9153148660051246, "Double_Bass": 0.7994580054631747}, {"Cello": 0.7797932546933432, "Viola": 0.3411302390173449}, {"Violin": 0.935079046530209, "Double_Bass": 0.8508446265902565}, {"Violin": 0.8564015890267973, "Cello": 0.845216630717393}, {"Viola": 0.5266569254542182, "Clarinet": 0.7522504919731647}, {"Flute": 0.7519803820564964, "Violin": 0.8934413945894634}, {"Saxophone": 0.4267941253868336, "Clarinet": 0.49727687494180345}, {"Saxophone": 0.5452988319584595, "Tuba": 0.614111579122122}, {"Trombone": 0.8369091908958469, "Saxophone": 0.08985526373178969}, {"Oboe": 0.201920692248492, "Saxophone": 0.491948627463316}, {"Oboe": 0.5103982991392773, "Viola": 0.8597633753248017}, {"Flute": 0.8316819636282551, "Cello": 0.8987670747298576}, {"Flute": 0.8362251384042749, "Trombone": 0.8909972197453055}, {"Clarinet": 0.7737352378049958, "Horn": 0.7270063773787437}, {"Clarinet": 0.6204042637633179, "Bassoon": 0.16005388783463503}, {"Trumpet": 0.7933395590797395, "Violin": 0.7277376829143688}, {"Trumpet": 0.6323500316919551, "Violin": 0.8004611683406033}, {"Flute": 0.789976686889872, "Clarinet": 0.6629963256196488}, {"Flute": 0.5984597138031175, "Saxophone": 0.29622156656139526}, {"Bassoon": 0.13883301411653517, "Oboe": 0.4217819109596345}, {"Trumpet": 0.6233525069297288, "Trombone": 0.6099843207527753}, {"Trumpet": 0.6981588091099461, "Horn": 0.8077292657217913}, {"Tuba": 0.6201647473836734, "Trumpet": 0.8703178800773663}, {"Trumpet": 0.5622130959324757, "Trombone": 0.5981636749990653}, {"Trumpet": 0.9095472503123851, "Horn": 0.8284052753642809}, {"Cello": 0.5836042853064146, "Oboe": 0.4960642966330491}, {"Trumpet": 0.9355778596502329, "Tuba": 0.8967261479799133}, {"Trumpet": 0.7815979967611193, "Trombone": 0.4576627148514712}, {"Horn": 0.8180549040812352, "Trumpet": 0.8863887265077279}, {"Trumpet": 0.7166515478524493, "Horn": 0.713591448879571}, {"Trumpet": 0.6001084958620146, "Trombone": 0.6376698553394347}], "separation": [{"Violin": 8.386771834035775, "Saxophone": 5.522756190424323}, {"Violin": 12.169255833990105, "Double_Bass": 7.981836996822372}, {"Cello": 6.579897082846177, "Viola": -0.7035558467680549}, {"Violin": 11.69476599859357, "Double_Bass": 9.465504867785675}, {"Violin": 12.165202497123168, "Cello": 14.06702005907595}, {"Viola": 2.7398959596282735, "Clarinet": 4.611783879054412}, {"Flute": 11.537470311708898, "Violin": 7.443221096318304}, {"Saxophone": -3.6859830881988036, "Clarinet": -0.4217837325888902}, {"Saxophone": 2.789639424165515, "Tuba": 7.556183949703094}, {"Trombone": 9.718120830450998, "Saxophone": -10.193149098639969}, {"Oboe": -7.9815411605021875, "Saxophone": -0.07345546320580675}, {"Oboe": 2.559875461905407, "Viola": 9.175258636419654}, {"Flute": 11.031547415366758, "Cello": 15.232476023303335}, {"Flute": 12.707020263848621, "Trombone": 10.267912556859395}, {"Clarinet": 6.58710717636599, "Horn": 5.710329571082195}, {"Clarinet": 4.250195007720068, "Bassoon": -8.406621940455443}, {"Trumpet": 5.710614675776288, "Violin": 5.980082028893074}, {"Trumpet": 3.2978324858088275, "Violin": 4.680371555855011}, {"Flute": 5.93174309392687, "Clarinet": 3.9264205310572136}, {"Flute": 5.892199241324855, "Saxophone": -6.902002560602556}, {"Bassoon": -3.075369340566991, "Oboe": 0.6489817711444132}, {"Trumpet": 3.550870669026798, "Trombone": 2.6607934436111815}, {"Trumpet": 6.124741851435262, "Horn": 12.27683786942867}, {"Tuba": 14.227907044410157, "Trumpet": 12.73813507097433}, {"Trumpet": 1.3575086969477175, "Trombone": 3.850657818457792}, {"Trumpet": 12.292363731414081, "Horn": 4.59812739907047}, {"Cello": 8.85121571233573, "Oboe": 5.666712922357422}, {"Trumpet": 13.20722696238372, "Tuba": 13.01454748900403}, {"Trumpet": 6.081743024186396, "Trombone": -0.7244814781435521}, {"Horn": 6.688887567368357, "Trumpet": 5.0692576215816025}, {"Trumpet": 5.605975954069844, "Horn": 12.086761492636295}, {"Trumpet": 6.006112617990129, "Trombone": 3.127727828430526}]}, "MSI-S": {"separation": [{"Violin": 7.846971984480206, "Saxophone": 11.840984234692108}, {"Violin": 12.199577182314416, "Double_Bass": 7.339234580593179}, {"Cello": 8.094610470778449, "Viola": 8.380023057843912}, {"Violin": 11.994605075459859, "Double_Bass": 9.018348468222051}, {"Violin": 11.671633501743969, "Cello": 13.617765940315225}, {"Viola": 4.605064371175164, "Clarinet": 5.632290542476499}, {"Flute": 11.740663216922094, "Violin": 10.919509529255294}, {"Saxophone": -1.0504145664442202, "Clarinet": 2.289676507926674}, {"Saxophone": 10.511236086665365, "Tuba": 7.682885964617046}, {"Trombone": 9.555856755663225, "Saxophone": 9.711785831766424}, {"Oboe": 7.185899546280057, "Saxophone": 8.259465318401327}, {"Oboe": 8.495196948182219, "Viola": 11.34844936076465}, {"Flute": 9.677758361082482, "Cello": 13.584666741162664}, {"Flute": 12.875241996866798, "Trombone": 10.186197595570377}, {"Clarinet": 7.98697556188623, "Horn": 11.744457717441225}, {"Clarinet": 11.32172000112689, "Bassoon": 7.933447421333608}, {"Trumpet": 7.508277773835639, "Violin": 2.6515007007916096}, {"Trumpet": 4.565072849585745, "Violin": 4.481200581800574}, {"Flute": 16.357871360031247, "Clarinet": 9.663360794663701}, {"Flute": 10.451907650916578, "Saxophone": 3.817569645764915}, {"Bassoon": 12.510757948088587, "Oboe": -1.3101312956894295}, {"Trumpet": 6.265598031844025, "Trombone": 8.696425238852424}, {"Trumpet": 14.790516035289606, "Horn": 15.39216870224207}, {"Tuba": 10.21403522992911, "Trumpet": 13.51899736927772}, {"Trumpet": 5.8523196223401435, "Trombone": 8.47342382321218}, {"Trumpet": 14.096387957092968, "Horn": 13.34878418763118}, {"Cello": 9.026280239119291, "Oboe": 9.400195392066701}, {"Trumpet": 13.72595554393383, "Tuba": 13.655450513713985}, {"Trumpet": 7.2540770117647195, "Trombone": 8.057803830899136}, {"Horn": 7.933959756481133, "Trumpet": 5.72312652720202}, {"Trumpet": 13.288756957325207, "Horn": 14.153428805027133}, {"Trumpet": 6.89548356389272, "Trombone": 6.643526295765945}]}}


--------------------------------------------------------------------------------
/evaluation/MSI/scores-193.json:
--------------------------------------------------------------------------------
1 | {"MSI": {"transcription": [{"Violin": 0.9244617632443347, "Saxophone": 0.5593304002240704}, {"Violin": 0.8802883026897393, "Double_Bass": 0.7754679668362998}, {"Cello": 0.7603695891087061, "Viola": 0.6643071161158848}, {"Violin": 0.9270978840386803, "Double_Bass": 0.8444560517550049}, {"Violin": 0.8568251950540356, "Cello": 0.848481725353147}, {"Viola": 0.5922023443112255, "Clarinet": 0.7647419615661581}, {"Flute": 0.7333100287275491, "Violin": 0.8748480863891732}, {"Saxophone": 0.45481391160900225, "Clarinet": 0.5175529484421656}, {"Saxophone": 0.4658603046829378, "Tuba": 0.6052916034343235}, {"Trombone": 0.7892719662869737, "Saxophone": 0.04501789003289155}, {"Oboe": 0.20939084354369777, "Saxophone": 0.4863235005508187}, {"Oboe": 0.4875299695286682, "Viola": 0.8718464435830005}, {"Flute": 0.8296203153651434, "Cello": 0.9124242246447343}, {"Flute": 0.8749812721954476, "Trombone": 0.871126976381628}, {"Clarinet": 0.7206862456499559, "Horn": 0.7523929360391318}, {"Clarinet": 0.7277053969679255, "Bassoon": 0.13487023368704545}, {"Trumpet": 0.8500349390322046, "Violin": 0.8359251990066111}, {"Trumpet": 0.7473010548250449, "Violin": 0.7935175929935436}, {"Flute": 0.812206314822883, "Clarinet": 0.782824278146499}, {"Flute": 0.6487360104355274, "Saxophone": 0.3398590624326701}, {"Bassoon": 0.38967696016873343, "Oboe": 0.5164408268642677}, {"Trumpet": 0.6067254018449528, "Trombone": 0.7907018079465361}, {"Trumpet": 0.7249312268694436, "Horn": 0.8274219464003405}, {"Tuba": 0.5199329896842052, "Trumpet": 0.9150869812041426}, {"Trumpet": 0.6871816044867367, "Trombone": 0.6425679201880953}, {"Trumpet": 0.9287733634858255, "Horn": 0.8820497498241525}, {"Cello": 0.5931549322048726, "Oboe": 0.49798267374355226}, {"Trumpet": 0.9308235898652567, "Tuba": 0.8668279307276997}, {"Trumpet": 0.8151225363573554, "Trombone": 0.5223681727010532}, {"Horn": 0.8078062559496695, "Trumpet": 0.9074014506798808}, {"Trumpet": 0.725017737699224, "Horn": 0.7360288831085608}, {"Trumpet": 0.59419067005877, "Trombone": 0.728958473153738}], "separation": [{"Violin": 8.062433475902145, "Saxophone": 6.632652275985622}, {"Violin": 11.210475449369895, "Double_Bass": 7.899986969253366}, {"Cello": 7.081242809786832, "Viola": 5.643440360134631}, {"Violin": 11.285258758423566, "Double_Bass": 9.931102654562913}, {"Violin": 11.924270081959383, "Cello": 14.214379689529677}, {"Viola": 2.4846663968826843, "Clarinet": 5.228194480827861}, {"Flute": 11.31880368787244, "Violin": 7.414807163113229}, {"Saxophone": -2.715756852983728, "Clarinet": -0.3678961304236015}, {"Saxophone": 3.6946460282165066, "Tuba": 7.412322167796546}, {"Trombone": 9.698466073365768, "Saxophone": -15.497330773004538}, {"Oboe": -6.106251657090574, "Saxophone": 0.20709082742416168}, {"Oboe": 2.574190015518303, "Viola": 9.418361662602635}, {"Flute": 10.965249108259128, "Cello": 15.189084957341974}, {"Flute": 13.080513313721667, "Trombone": 10.081790892487469}, {"Clarinet": 5.059876944240438, "Horn": 7.282784159550497}, {"Clarinet": 6.355874033221949, "Bassoon": -10.729736541052727}, {"Trumpet": 8.213621192276381, "Violin": 4.017862800830307}, {"Trumpet": 4.488067663762125, "Violin": 3.8621095567805153}, {"Flute": 14.491527384457115, "Clarinet": 5.077140365957579}, {"Flute": 8.030498062212892, "Saxophone": -3.1071559146250367}, {"Bassoon": -2.056275497621944, "Oboe": 0.25591815859334616}, {"Trumpet": 2.8967283638727155, "Trombone": 5.783828536079467}, {"Trumpet": 7.061563461694228, "Horn": 12.031026423011031}, {"Tuba": 14.89447473239295, "Trumpet": 13.32577858314799}, {"Trumpet": 5.191119070077824, "Trombone": 4.884364936677561}, {"Trumpet": 13.150740351497873, "Horn": 10.5868342263642}, {"Cello": 9.348906368774601, "Oboe": 7.60741088677431}, {"Trumpet": 13.116784240845602, "Tuba": 13.121297919364919}, {"Trumpet": 6.378649869407855, "Trombone": 2.5902547290008164}, {"Horn": 6.582751990647305, "Trumpet": 5.384880754109012}, {"Trumpet": 6.81170316210022, "Horn": 12.827565777743557}, {"Trumpet": 6.92676404635733, "Trombone": 6.140498429386195}]}, "MSI-S": {"separation": [{"Violin": 7.768353967035834, "Saxophone": 11.881263271197495}, {"Violin": 12.136068949431422, "Double_Bass": 7.339712349846161}, {"Cello": 8.070068071395633, "Viola": 8.399629904740626}, {"Violin": 11.95181974429685, "Double_Bass": 9.442938575690508}, {"Violin": 11.695659977318611, "Cello": 13.674640718172611}, {"Viola": 4.52066326476576, "Clarinet": 5.793204005078499}, {"Flute": 11.793642357410135, "Violin": 10.941707438597607}, {"Saxophone": -1.0017104708275384, "Clarinet": 2.2641173749920043}, {"Saxophone": 10.729177820650445, "Tuba": 7.668074321368641}, {"Trombone": 9.679304025745719, "Saxophone": 9.695588345119255}, {"Oboe": 6.894391782359195, "Saxophone": 8.071664543377919}, {"Oboe": 7.046523757367044, "Viola": 11.090837781055429}, {"Flute": 9.698968801296576, "Cello": 13.491428084831869}, {"Flute": 12.828485732118729, "Trombone": 10.213177215573923}, {"Clarinet": 7.632633550468762, "Horn": 11.666913543764586}, {"Clarinet": 10.837185913195206, "Bassoon": 8.313142741361405}, {"Trumpet": 7.630824149744368, "Violin": 2.7820441398583378}, {"Trumpet": 4.659280468670007, "Violin": 4.260745985891942}, {"Flute": 16.43037047221107, "Clarinet": 9.331572546951783}, {"Flute": 10.329852582723538, "Saxophone": 5.5579704230338045}, {"Bassoon": 12.508181490198577, "Oboe": -1.4740326161664474}, {"Trumpet": 6.276552008350533, "Trombone": 8.768047995998938}, {"Trumpet": 14.495060879044797, "Horn": 15.256659352788454}, {"Tuba": 10.744841386681848, "Trumpet": 13.605201555088215}, {"Trumpet": 5.968837051263861, "Trombone": 8.631734033990318}, {"Trumpet": 13.754927032917056, "Horn": 13.292014290275873}, {"Cello": 9.459960745462983, "Oboe": 11.811345313619263}, {"Trumpet": 13.445614797822909, "Tuba": 14.033333436065528}, {"Trumpet": 7.039480478980648, "Trombone": 8.096547329984286}, {"Horn": 7.978715516483349, "Trumpet": 5.57277668152468}, {"Trumpet": 13.45905368817337, "Horn": 14.064533272839627}, {"Trumpet": 6.994979719818818, "Trombone": 6.504297724623181}]}}


--------------------------------------------------------------------------------
/evaluation/MSI/scores-195.json:
--------------------------------------------------------------------------------
1 | {"MSI": {"transcription": [{"Violin": 0.9313184236630168, "Saxophone": 0.46996658809063735}, {"Violin": 0.9210162175461973, "Double_Bass": 0.7302736521381672}, {"Cello": 0.7968504264474542, "Viola": 0.5227482038994987}, {"Violin": 0.9442940875535218, "Double_Bass": 0.8605585050315773}, {"Violin": 0.8336126844464975, "Cello": 0.8316162303397849}, {"Viola": 0.549609129450183, "Clarinet": 0.7380018176215098}, {"Flute": 0.7445581030774061, "Violin": 0.9001301632979073}, {"Saxophone": 0.46519719707323026, "Clarinet": 0.4653305045541827}, {"Saxophone": 0.35933518310562296, "Tuba": 0.5620447771406816}, {"Trombone": 0.8041519719225962, "Saxophone": 0.03558864333118414}, {"Oboe": 0.2611975077798584, "Saxophone": 0.5097463896606519}, {"Oboe": 0.5653900639672017, "Viola": 0.8556946188367348}, {"Flute": 0.7841410033756995, "Cello": 0.9076025430127005}, {"Flute": 0.7862050982394203, "Trombone": 0.885939215595156}, {"Clarinet": 0.7776934336968994, "Horn": 0.7850422941413437}, {"Clarinet": 0.6805191005088744, "Bassoon": 0.13921125832793405}, {"Trumpet": 0.8181137587095445, "Violin": 0.7676506793811783}, {"Trumpet": 0.6724228099860647, "Violin": 0.8139921321681473}, {"Flute": 0.7812536803812524, "Clarinet": 0.6866810986415959}, {"Flute": 0.7269052292430188, "Saxophone": 0.2573889180215115}, {"Bassoon": 0.29884020401449585, "Oboe": 0.6135526627444909}, {"Trumpet": 0.6264949979835742, "Trombone": 0.7283157397128702}, {"Trumpet": 0.7301707162623018, "Horn": 0.8001272565392903}, {"Tuba": 0.5738075141943123, "Trumpet": 0.903847567563141}, {"Trumpet": 0.6775496324188846, "Trombone": 0.6564596040919183}, {"Trumpet": 0.9034094718018223, "Horn": 0.9177432917240086}, {"Cello": 0.5938054648265988, "Oboe": 0.4560081730079119}, {"Trumpet": 0.9333509087536362, "Tuba": 0.8091743055281851}, {"Trumpet": 0.8000314329221302, "Trombone": 0.44705697055317656}, {"Horn": 0.770416396438021, "Trumpet": 0.9031447042215406}, {"Trumpet": 0.6849772732105041, "Horn": 0.7989662489170208}, {"Trumpet": 0.5848076093103303, "Trombone": 0.7163187887803806}], "separation": [{"Violin": 8.599516733404187, "Saxophone": 4.565529863975607}, {"Violin": 12.078862894795781, "Double_Bass": 6.631751591756752}, {"Cello": 7.782602030948783, "Viola": 1.8597794081838914}, {"Violin": 11.858880004988261, "Double_Bass": 10.018772089189799}, {"Violin": 11.802061725603341, "Cello": 14.457851945595229}, {"Viola": 2.9262384731602404, "Clarinet": 4.950946681737335}, {"Flute": 11.441580055973368, "Violin": 6.767176530445998}, {"Saxophone": -3.713483088688661, "Clarinet": 0.5021289812469275}, {"Saxophone": -0.695447384900048, "Tuba": 6.679994334583949}, {"Trombone": 9.81800616640154, "Saxophone": -15.198697230965319}, {"Oboe": -5.612506908162345, "Saxophone": -0.23623160013655217}, {"Oboe": 3.0964091685538753, "Viola": 8.902287182475058}, {"Flute": 10.983898139809247, "Cello": 15.066685839379424}, {"Flute": 12.207856158070836, "Trombone": 10.558356066014845}, {"Clarinet": 8.248258609274728, "Horn": 7.394979612559222}, {"Clarinet": 6.351219893180584, "Bassoon": -9.71699068786476}, {"Trumpet": 6.3635717564040934, "Violin": 6.545304163632222}, {"Trumpet": 3.786498854570637, "Violin": 3.5969724079842047}, {"Flute": 8.91437069192924, "Clarinet": 3.6121257849461452}, {"Flute": 8.197440148416499, "Saxophone": -3.8047734656299452}, {"Bassoon": -0.334587651487473, "Oboe": -1.2422828604582257}, {"Trumpet": 3.1048114084214875, "Trombone": 4.1588943180602005}, {"Trumpet": 6.9870005003795495, "Horn": 10.995937846209397}, {"Tuba": 15.137204305784671, "Trumpet": 13.197341712985281}, {"Trumpet": 4.472134389445005, "Trombone": 4.492206591153951}, {"Trumpet": 12.55558930062177, "Horn": 12.850836093893756}, {"Cello": 9.284261564111539, "Oboe": 5.965147991423256}, {"Trumpet": 13.272054478348993, "Tuba": 11.817007087518686}, {"Trumpet": 5.572915553810688, "Trombone": -0.38531703673293477}, {"Horn": 6.5583486715593375, "Trumpet": 5.579780297702247}, {"Trumpet": 5.383004769727327, "Horn": 12.443750219098838}, {"Trumpet": 6.020048986236221, "Trombone": 4.885177377004129}]}, "MSI-S": {"separation": [{"Violin": 7.921994675490519, "Saxophone": 11.366995507112584}, {"Violin": 12.038662534248733, "Double_Bass": 6.87432627096078}, {"Cello": 8.148104027180775, "Viola": 8.198127609300254}, {"Violin": 11.98011099201215, "Double_Bass": 9.57358878274719}, {"Violin": 11.440761908393497, "Cello": 14.038834583543329}, {"Viola": 4.512211469691159, "Clarinet": 5.761657332214858}, {"Flute": 11.82297688743914, "Violin": 10.699554244420602}, {"Saxophone": -0.900564981418269, "Clarinet": 2.1366051044109433}, {"Saxophone": 10.817807572920216, "Tuba": 7.384066774990411}, {"Trombone": 9.639162857416927, "Saxophone": 9.847265694382466}, {"Oboe": 7.24825301014474, "Saxophone": 8.196116213661215}, {"Oboe": 7.33642414337921, "Viola": 11.28020507942852}, {"Flute": 9.836888357692988, "Cello": 13.547636857648124}, {"Flute": 12.851097443619395, "Trombone": 10.46500369330717}, {"Clarinet": 7.602902274230314, "Horn": 11.470063225714943}, {"Clarinet": 10.62103690474582, "Bassoon": 7.760127638920407}, {"Trumpet": 7.7634571031588955, "Violin": 2.9190470289012875}, {"Trumpet": 4.60425092234097, "Violin": 4.20663034563419}, {"Flute": 16.511524378044133, "Clarinet": 9.250314957907685}, {"Flute": 10.43917612868238, "Saxophone": 4.033993692631519}, {"Bassoon": 11.92044101412928, "Oboe": -1.3083684861412412}, {"Trumpet": 6.329311750758518, "Trombone": 8.392445113853464}, {"Trumpet": 14.6180165071682, "Horn": 15.383653025218747}, {"Tuba": 10.919045547788544, "Trumpet": 13.595617518172238}, {"Trumpet": 5.842772995624461, "Trombone": 8.631171729225189}, {"Trumpet": 13.997703905521472, "Horn": 13.39407154565187}, {"Cello": 9.458064356688796, "Oboe": 10.297229748451484}, {"Trumpet": 13.699367001890622, "Tuba": 12.900278382966349}, {"Trumpet": 7.09601690995039, "Trombone": 7.9695257687255605}, {"Horn": 7.991149056594455, "Trumpet": 5.643293305203569}, {"Trumpet": 13.623362925687237, "Horn": 13.872989573238371}, {"Trumpet": 6.899729635581686, "Trombone": 6.636503030035552}]}}


--------------------------------------------------------------------------------
/evaluation/MSI/scores-199.json:
--------------------------------------------------------------------------------
1 | {"MSI": {"transcription": [{"Violin": 0.9175314552414292, "Saxophone": 0.48769407210316695}, {"Violin": 0.9125718826083175, "Double_Bass": 0.754509249710048}, {"Cello": 0.7713371339231954, "Viola": 0.6277673929358589}, {"Violin": 0.9340282738648057, "Double_Bass": 0.8466029079140432}, {"Violin": 0.8494433532640323, "Cello": 0.8234397744682567}, {"Viola": 0.5366152114501777, "Clarinet": 0.7683789788698187}, {"Flute": 0.7083169398695166, "Violin": 0.8639839564463551}, {"Saxophone": 0.3742301357547141, "Clarinet": 0.38706905722611123}, {"Saxophone": 0.46958433150836665, "Tuba": 0.5933034451715863}, {"Trombone": 0.8087472148848392, "Saxophone": 0.07448094325683406}, {"Oboe": 0.17444751034314088, "Saxophone": 0.4909253731724391}, {"Oboe": 0.4527031595659372, "Viola": 0.8467251228972997}, {"Flute": 0.8121309121549076, "Cello": 0.8678964295542637}, {"Flute": 0.8047517314950919, "Trombone": 0.8825534858507422}, {"Clarinet": 0.7924821801722192, "Horn": 0.7196349405764514}, {"Clarinet": 0.7184619019101741, "Bassoon": 0.11655248309864526}, {"Trumpet": 0.8377585225687513, "Violin": 0.7346748743292829}, {"Trumpet": 0.7569191947743098, "Violin": 0.763493180421253}, {"Flute": 0.7590852320900267, "Clarinet": 0.8305817016784037}, {"Flute": 0.4969204676729673, "Saxophone": 0.47542269192461206}, {"Bassoon": 0.2340276142622117, "Oboe": 0.3578672053054495}, {"Trumpet": 0.4901797343765197, "Trombone": 0.8338687860332582}, {"Trumpet": 0.6757474185491802, "Horn": 0.8491059958764835}, {"Tuba": 0.35710567506371227, "Trumpet": 0.9129449504149315}, {"Trumpet": 0.6746590921706681, "Trombone": 0.7290862938843112}, {"Trumpet": 0.9292135663189819, "Horn": 0.9253320796367778}, {"Cello": 0.5220304794979947, "Oboe": 0.44216339198203447}, {"Trumpet": 0.9334312937132726, "Tuba": 0.8776233750716557}, {"Trumpet": 0.7911654731180092, "Trombone": 0.7138301954536044}, {"Horn": 0.7311518064512271, "Trumpet": 0.8873737601414587}, {"Trumpet": 0.7436783101899987, "Horn": 0.7830704970476989}, {"Trumpet": 0.6309310886292882, "Trombone": 0.7596549826631929}], "separation": [{"Violin": 8.08709214609101, "Saxophone": 6.555664851425415}, {"Violin": 11.90966558226069, "Double_Bass": 7.747678694483349}, {"Cello": 7.77647331155909, "Viola": 4.852155013844731}, {"Violin": 11.179336714889512, "Double_Bass": 10.067168316108175}, {"Violin": 11.783430884995793, "Cello": 14.254165502121692}, {"Viola": 2.628063367590781, "Clarinet": 5.16276811240536}, {"Flute": 10.869537156063183, "Violin": 7.012412764325196}, {"Saxophone": -2.407752088189431, "Clarinet": -1.3683124339386485}, {"Saxophone": 2.441953291791611, "Tuba": 6.891359665004966}, {"Trombone": 9.798673998715264, "Saxophone": -11.355150084282501}, {"Oboe": -7.885302648665503, "Saxophone": 0.3099868598701906}, {"Oboe": 1.3327031147881367, "Viola": 8.95329764450916}, {"Flute": 10.881682586912627, "Cello": 13.802538881030733}, {"Flute": 12.336582339883348, "Trombone": 10.02528058427761}, {"Clarinet": 8.303029706412266, "Horn": 5.517934814663721}, {"Clarinet": 6.527387551018312, "Bassoon": -11.963376001210104}, {"Trumpet": 5.471768743166365, "Violin": 5.7851901570987785}, {"Trumpet": 3.9167689944269792, "Violin": 3.9082345174224353}, {"Flute": 5.8651942195801965, "Clarinet": 8.775183633566566}, {"Flute": 4.739660064268817, "Saxophone": 0.5893582766712064}, {"Bassoon": -3.000419743638128, "Oboe": -0.28510062155767174}, {"Trumpet": 0.39431452410672374, "Trombone": 8.168607144699726}, {"Trumpet": 5.604162812705422, "Horn": 13.781713052164195}, {"Tuba": 13.080317110352137, "Trumpet": 13.309915685058245}, {"Trumpet": 2.719614848081245, "Trombone": 5.318071927118502}, {"Trumpet": 12.608098456874316, "Horn": 12.631853241319515}, {"Cello": 8.634561592386294, "Oboe": 5.546655711792797}, {"Trumpet": 13.155388595998534, "Tuba": 12.916895993884925}, {"Trumpet": 5.293807862302776, "Trombone": 3.675216010197393}, {"Horn": 6.2579938655145035, "Trumpet": 4.900601445005213}, {"Trumpet": 6.6474611897278155, "Horn": 13.518361343720166}, {"Trumpet": 6.7032216324148655, "Trombone": 6.485716015377154}]}, "MSI-S": {"separation": [{"Violin": 7.937513142912341, "Saxophone": 11.55675068110638}, {"Violin": 12.044297056235887, "Double_Bass": 7.18065780993134}, {"Cello": 8.027604024765587, "Viola": 8.461314060352336}, {"Violin": 12.052432728610816, "Double_Bass": 9.642349442558482}, {"Violin": 11.395965786083572, "Cello": 13.810815089053055}, {"Viola": 4.811770591930236, "Clarinet": 5.54433901070463}, {"Flute": 11.755518729804098, "Violin": 10.613372691403066}, {"Saxophone": -0.9763683650580944, "Clarinet": 2.1833665856149436}, {"Saxophone": 10.559369300612094, "Tuba": 7.765526213160637}, {"Trombone": 9.628565024978922, "Saxophone": 9.6995372840224}, {"Oboe": 7.230960833837253, "Saxophone": 8.484284564944574}, {"Oboe": 7.49956264549693, "Viola": 11.332564007565434}, {"Flute": 9.698356254840114, "Cello": 13.579258190713686}, {"Flute": 12.842999876763852, "Trombone": 9.998067046663229}, {"Clarinet": 7.8312030055070805, "Horn": 11.972107833328895}, {"Clarinet": 11.18344974759945, "Bassoon": 8.473246428304677}, {"Trumpet": 7.865342805391078, "Violin": 2.667928647181053}, {"Trumpet": 4.747579747689764, "Violin": 4.3046334481368875}, {"Flute": 16.40655145007629, "Clarinet": 9.262744009739345}, {"Flute": 10.377361410342804, "Saxophone": 5.306469115191316}, {"Bassoon": 12.188541999425693, "Oboe": -1.076195020871555}, {"Trumpet": 6.285112677130419, "Trombone": 8.997985714863413}, {"Trumpet": 14.68934227936489, "Horn": 15.478047758302576}, {"Tuba": 10.494971364324421, "Trumpet": 13.546534858848688}, {"Trumpet": 5.810183418243813, "Trombone": 8.709189037784375}, {"Trumpet": 13.22125363272086, "Horn": 13.891869171330526}, {"Cello": 9.003097183764366, "Oboe": 9.381588572045331}, {"Trumpet": 13.733627860348003, "Tuba": 14.301435544182882}, {"Trumpet": 6.752398712768088, "Trombone": 8.213741901518414}, {"Horn": 7.926709353273152, "Trumpet": 5.562194430363451}, {"Trumpet": 13.49145953025394, "Horn": 14.38378907539935}, {"Trumpet": 6.976230801366472, "Trombone": 6.633688997831351}]}}


--------------------------------------------------------------------------------
/evaluation/MSI/scores-191.json:
--------------------------------------------------------------------------------
1 | {"MSI": {"transcription": [{"Violin": 0.9225995974022633, "Saxophone": 0.4413402624758425}, {"Violin": 0.8965189516830728, "Double_Bass": 0.778812303669732}, {"Cello": 0.776771166865604, "Viola": 0.6504239865273502}, {"Violin": 0.9379424538854333, "Double_Bass": 0.7950399690956629}, {"Violin": 0.8528224865033466, "Cello": 0.8132685818579029}, {"Viola": 0.6010818320528796, "Clarinet": 0.7713203383428671}, {"Flute": 0.7411899494477089, "Violin": 0.869715179058957}, {"Saxophone": 0.46249917307384036, "Clarinet": 0.564021325083742}, {"Saxophone": 0.40124368490171236, "Tuba": 0.5377399580292576}, {"Trombone": 0.7807928833803116, "Saxophone": 0.06475725422229726}, {"Oboe": 0.20345567253976457, "Saxophone": 0.4024644539655617}, {"Oboe": 0.4696658964626575, "Viola": 0.8330448386842768}, {"Flute": 0.8301076591073561, "Cello": 0.8903297150292006}, {"Flute": 0.8270575501094422, "Trombone": 0.8659796240875405}, {"Clarinet": 0.7863956683240846, "Horn": 0.7104868580270978}, {"Clarinet": 0.6623751519060289, "Bassoon": 0.17852589856112544}, {"Trumpet": 0.8470153178176278, "Violin": 0.7644443388036903}, {"Trumpet": 0.62422625057308, "Violin": 0.7981823788746835}, {"Flute": 0.8123535129249135, "Clarinet": 0.7954479379532579}, {"Flute": 0.6363008401149348, "Saxophone": 0.17696277556740142}, {"Bassoon": 0.15745949880183926, "Oboe": 0.44964809064298616}, {"Trumpet": 0.6463454976689511, "Trombone": 0.7045269069491815}, {"Trumpet": 0.6715687371107681, "Horn": 0.8110018974198121}, {"Tuba": 0.42894527507104047, "Trumpet": 0.8948483231210234}, {"Trumpet": 0.6810341217160938, "Trombone": 0.5869211662416288}, {"Trumpet": 0.8863997237204001, "Horn": 0.8094233386349049}, {"Cello": 0.5416675360452202, "Oboe": 0.45073995114267856}, {"Trumpet": 0.9323715444558518, "Tuba": 0.6958199654759223}, {"Trumpet": 0.7918967215753786, "Trombone": 0.41000858558037356}, {"Horn": 0.7691291259427434, "Trumpet": 0.8246173600125221}, {"Trumpet": 0.7036394693330495, "Horn": 0.7799222501354737}, {"Trumpet": 0.5708066420161441, "Trombone": 0.6646036706855561}], "separation": [{"Violin": 8.010686133864272, "Saxophone": 5.7059231746903745}, {"Violin": 11.412973564496355, "Double_Bass": 7.44488281960354}, {"Cello": 7.8087021714671305, "Viola": 5.693851191953469}, {"Violin": 11.279039421823535, "Double_Bass": 9.543076033687353}, {"Violin": 12.09763572117872, "Cello": 14.236084570739997}, {"Viola": 2.887391280202843, "Clarinet": 4.626664402895886}, {"Flute": 11.373303674937578, "Violin": 7.192455790593489}, {"Saxophone": -3.6793791198524852, "Clarinet": 0.3761958254556904}, {"Saxophone": 1.6529985803142446, "Tuba": 6.448297972243405}, {"Trombone": 9.178616316342712, "Saxophone": -12.448816458301774}, {"Oboe": -7.579621857742906, "Saxophone": -0.1819368294493734}, {"Oboe": 1.975123984247302, "Viola": 7.569245606506126}, {"Flute": 10.730196481340442, "Cello": 14.739950965163695}, {"Flute": 12.1179403676805, "Trombone": 10.151080636526661}, {"Clarinet": 7.881543039515142, "Horn": 6.258165410831528}, {"Clarinet": 5.0461383457412055, "Bassoon": -8.312944091244551}, {"Trumpet": 7.947997093246549, "Violin": 5.884329046057044}, {"Trumpet": 3.4906013329792343, "Violin": 3.6077726731608752}, {"Flute": 8.245184507095422, "Clarinet": 6.455376574643435}, {"Flute": 6.967520803117491, "Saxophone": -4.210696035008051}, {"Bassoon": -2.8476055884598015, "Oboe": -0.006435557364543816}, {"Trumpet": 4.183759313263778, "Trombone": 4.268229363219645}, {"Trumpet": 4.777202797899097, "Horn": 12.053542828272674}, {"Tuba": 14.302752278968413, "Trumpet": 13.121161124966605}, {"Trumpet": 3.696199432779127, "Trombone": 3.57996545991643}, {"Trumpet": 13.456693278874974, "Horn": 7.541233738326532}, {"Cello": 9.037666502751014, "Oboe": 5.69123497221756}, {"Trumpet": 13.230985878737357, "Tuba": 11.82208802969761}, {"Trumpet": 5.692344163639756, "Trombone": -0.038399038262363794}, {"Horn": 5.903787143558194, "Trumpet": 3.08322878637523}, {"Trumpet": 5.248489193325934, "Horn": 12.878399840314591}, {"Trumpet": 6.528325678804467, "Trombone": 4.8500217174309626}]}, "MSI-S": {"separation": [{"Violin": 7.7712022629507, "Saxophone": 11.879543348997084}, {"Violin": 11.862272299049312, "Double_Bass": 6.710834439112427}, {"Cello": 8.06272626751225, "Viola": 8.228429781836986}, {"Violin": 11.946702083927734, "Double_Bass": 9.172743970655542}, {"Violin": 11.678365786497842, "Cello": 13.685135246519824}, {"Viola": 4.452087563331184, "Clarinet": 5.825598120242668}, {"Flute": 11.50759588568928, "Violin": 11.107727201137013}, {"Saxophone": -1.0948985708102497, "Clarinet": 2.220853498447357}, {"Saxophone": 10.316269363423228, "Tuba": 7.088573053292623}, {"Trombone": 9.52883622977521, "Saxophone": 9.580226833317868}, {"Oboe": 7.010895756972238, "Saxophone": 8.371373101228416}, {"Oboe": 6.514136774331385, "Viola": 10.99006723094876}, {"Flute": 9.545664593582433, "Cello": 13.359713009237717}, {"Flute": 12.716952828053634, "Trombone": 10.145914433770287}, {"Clarinet": 7.873963892807194, "Horn": 11.72812423264064}, {"Clarinet": 11.014678205075759, "Bassoon": 7.609387583640848}, {"Trumpet": 7.542684484617423, "Violin": 2.6379567803326336}, {"Trumpet": 4.310216170431006, "Violin": 4.236945778903439}, {"Flute": 16.062547537119237, "Clarinet": 9.928857509873666}, {"Flute": 10.312884567636221, "Saxophone": 5.453272679650074}, {"Bassoon": 12.195897364642267, "Oboe": -1.521294084942422}, {"Trumpet": 6.336855903200195, "Trombone": 7.2988605846237755}, {"Trumpet": 14.50169328499394, "Horn": 15.152331516978727}, {"Tuba": 10.707504268762825, "Trumpet": 13.421095436502908}, {"Trumpet": 5.718330399682571, "Trombone": 8.631778047776123}, {"Trumpet": 13.888643078213514, "Horn": 13.208683036515936}, {"Cello": 9.019806693647674, "Oboe": 12.267152607035483}, {"Trumpet": 13.503274953540728, "Tuba": 13.067487388605866}, {"Trumpet": 7.147299158708008, "Trombone": 7.954129082434135}, {"Horn": 7.898629493604814, "Trumpet": 5.030873918407536}, {"Trumpet": 13.354506527643613, "Horn": 14.274400281460217}, {"Trumpet": 6.883123810746039, "Trombone": 6.634738555340189}]}}


--------------------------------------------------------------------------------
/evaluation/MSI/scores-194.json:
--------------------------------------------------------------------------------
1 | {"MSI": {"transcription": [{"Violin": 0.9375940630372809, "Saxophone": 0.49135964909513513}, {"Violin": 0.9204274451764851, "Double_Bass": 0.8026947011939985}, {"Cello": 0.7766548060889136, "Viola": 0.7101577024552148}, {"Violin": 0.9310114771516497, "Double_Bass": 0.8574413489575807}, {"Violin": 0.8547593864553509, "Cello": 0.8264246600908538}, {"Viola": 0.611081827156737, "Clarinet": 0.7242727882404661}, {"Flute": 0.7402821856780738, "Violin": 0.8915553576920556}, {"Saxophone": 0.4392051419128846, "Clarinet": 0.5144519006508103}, {"Saxophone": 0.3886397824657571, "Tuba": 0.6588977093308064}, {"Trombone": 0.8193023914520491, "Saxophone": 0.04064798674595933}, {"Oboe": 0.19981415172669795, "Saxophone": 0.4824252273851331}, {"Oboe": 0.47061331612327073, "Viola": 0.8774851068267819}, {"Flute": 0.8049109652344533, "Cello": 0.9034680571259537}, {"Flute": 0.859714808272614, "Trombone": 0.8836876074583174}, {"Clarinet": 0.7541413441381483, "Horn": 0.7832292504648372}, {"Clarinet": 0.6473470238551157, "Bassoon": 0.20083958739221222}, {"Trumpet": 0.8221573423071687, "Violin": 0.7894467345091507}, {"Trumpet": 0.602981962425549, "Violin": 0.8610861947652021}, {"Flute": 0.9236481962290084, "Clarinet": 0.5889199894549356}, {"Flute": 0.7365865838269245, "Saxophone": 0.1982776521823885}, {"Bassoon": 0.16251598602743972, "Oboe": 0.46202436922869206}, {"Trumpet": 0.7069121672913086, "Trombone": 0.8058424324183276}, {"Trumpet": 0.6983959422070949, "Horn": 0.8700894396791217}, {"Tuba": 0.4385437239619477, "Trumpet": 0.9103355662260838}, {"Trumpet": 0.6487868772174991, "Trombone": 0.6987178169209547}, {"Trumpet": 0.8905323160647035, "Horn": 0.8957125769322402}, {"Cello": 0.575197279400998, "Oboe": 0.5512478100402811}, {"Trumpet": 0.9352919029048294, "Tuba": 0.8563289638682463}, {"Trumpet": 0.7931538565783495, "Trombone": 0.6715140804353453}, {"Horn": 0.8246675832913997, "Trumpet": 0.8437704074209696}, {"Trumpet": 0.7186214550903973, "Horn": 0.7379764895783653}, {"Trumpet": 0.6402499063873751, "Trombone": 0.7293742983599567}], "separation": [{"Violin": 8.019243857830281, "Saxophone": 6.084410039134168}, {"Violin": 12.291504397421864, "Double_Bass": 6.893395157264827}, {"Cello": 5.872747188527417, "Viola": 6.027574715941437}, {"Violin": 11.930149343542308, "Double_Bass": 9.511122449682867}, {"Violin": 12.117178569455557, "Cello": 14.182675714548276}, {"Viola": 2.1710059147171887, "Clarinet": 4.804987573257236}, {"Flute": 10.638133869277176, "Violin": 7.365847455558377}, {"Saxophone": -2.6914681330661128, "Clarinet": -0.6824840728513982}, {"Saxophone": 1.0920076863703598, "Tuba": 6.939882568142684}, {"Trombone": 9.814917700770534, "Saxophone": -14.53845484904912}, {"Oboe": -6.150614494725222, "Saxophone": -0.15621172737245215}, {"Oboe": 2.3086248170437313, "Viola": 10.090207473293766}, {"Flute": 10.446685047090718, "Cello": 14.722213316667894}, {"Flute": 12.60931641823548, "Trombone": 10.261812098740396}, {"Clarinet": 6.250616514622534, "Horn": 7.811019421396226}, {"Clarinet": 5.130268501184643, "Bassoon": -8.235064680431892}, {"Trumpet": 7.639549927162783, "Violin": 6.217349408060809}, {"Trumpet": 2.609762934418039, "Violin": 4.465008460357421}, {"Flute": 13.857288201688522, "Clarinet": 3.3110710035993534}, {"Flute": 8.298480167998596, "Saxophone": -7.4192074491963265}, {"Bassoon": -3.315419660080834, "Oboe": 0.49375370492935}, {"Trumpet": 4.090566280530151, "Trombone": 6.335770401372324}, {"Trumpet": 5.287027637008959, "Horn": 13.115500476223561}, {"Tuba": 13.94276752458131, "Trumpet": 13.292996962746768}, {"Trumpet": 2.6969141854194385, "Trombone": 4.7219729407511215}, {"Trumpet": 10.262674025505012, "Horn": 10.824899787631628}, {"Cello": 8.956043817065645, "Oboe": 7.4399326830211}, {"Trumpet": 12.549469598163183, "Tuba": 12.778470682550491}, {"Trumpet": 5.112171218608262, "Trombone": 3.4491767714463437}, {"Horn": 6.688794211878191, "Trumpet": 2.468812309829052}, {"Trumpet": 5.075766261968998, "Horn": 12.77943987540247}, {"Trumpet": 7.040214323689675, "Trombone": 5.142482248811982}]}, "MSI-S": {"separation": [{"Violin": 7.8950576964321275, "Saxophone": 11.521535774124656}, {"Violin": 12.240288133360849, "Double_Bass": 6.497806769422036}, {"Cello": 7.7822597994491405, "Viola": 8.575431014391944}, {"Violin": 11.958714811793573, "Double_Bass": 9.145814303483121}, {"Violin": 11.730606133674407, "Cello": 13.643400190278655}, {"Viola": 4.44436604691258, "Clarinet": 5.536169706585381}, {"Flute": 11.371363531264699, "Violin": 10.638486840955492}, {"Saxophone": -1.1151071015972513, "Clarinet": 2.296042573808309}, {"Saxophone": 10.721859396330341, "Tuba": 7.341200198812241}, {"Trombone": 9.500192793170658, "Saxophone": 9.622594153992289}, {"Oboe": 7.108914956425903, "Saxophone": 8.468950116778764}, {"Oboe": 6.652418859594963, "Viola": 11.215939930036203}, {"Flute": 9.432282884747181, "Cello": 13.311502657758838}, {"Flute": 12.682504008147259, "Trombone": 10.205703975267792}, {"Clarinet": 7.717821517358312, "Horn": 11.745515356943827}, {"Clarinet": 10.740431145625532, "Bassoon": 7.478575059208223}, {"Trumpet": 7.0566084751709734, "Violin": 3.0432672004822585}, {"Trumpet": 4.332294454333908, "Violin": 4.017711090710906}, {"Flute": 16.057979031556883, "Clarinet": 9.645061313578129}, {"Flute": 10.361405092447598, "Saxophone": 5.518675946101787}, {"Bassoon": 12.125062838233081, "Oboe": -1.4505518577008023}, {"Trumpet": 6.362135940471588, "Trombone": 8.453877856717932}, {"Trumpet": 14.604379512195251, "Horn": 15.325512796932465}, {"Tuba": 10.553781314042398, "Trumpet": 13.472689768587038}, {"Trumpet": 5.755090232812073, "Trombone": 8.85094200618022}, {"Trumpet": 13.875039064305895, "Horn": 13.328900533649673}, {"Cello": 9.147060798045763, "Oboe": 12.25954311873494}, {"Trumpet": 13.56540942855366, "Tuba": 13.619410746788734}, {"Trumpet": 7.2019274271069165, "Trombone": 7.8215518405000095}, {"Horn": 7.84924285160136, "Trumpet": 5.1065603274290545}, {"Trumpet": 13.56727642360838, "Horn": 13.897197495367887}, {"Trumpet": 6.851062263683259, "Trombone": 6.519800705711802}]}}


--------------------------------------------------------------------------------
/evaluation/MSI/scores-196.json:
--------------------------------------------------------------------------------
1 | {"MSI": {"transcription": [{"Violin": 0.9274906863337553, "Saxophone": 0.5043368474157827}, {"Violin": 0.914386969149301, "Double_Bass": 0.7730874989655719}, {"Cello": 0.7739149409285708, "Viola": 0.6573918072270389}, {"Violin": 0.9341836426263121, "Double_Bass": 0.8482583407528802}, {"Violin": 0.8420241341423795, "Cello": 0.8469197040403452}, {"Viola": 0.5588747697109471, "Clarinet": 0.7887064087570265}, {"Flute": 0.7256794977633976, "Violin": 0.8846825615282289}, {"Saxophone": 0.41758428008822285, "Clarinet": 0.4745905919360384}, {"Saxophone": 0.5008825092141026, "Tuba": 0.6009273497542329}, {"Trombone": 0.8237454859698395, "Saxophone": 0.037795896748970326}, {"Oboe": 0.17629782871849398, "Saxophone": 0.5489591573760875}, {"Oboe": 0.4470377803679896, "Viola": 0.8775972488727447}, {"Flute": 0.8245298887051747, "Cello": 0.9098069836136474}, {"Flute": 0.8085748162858232, "Trombone": 0.887140237057048}, {"Clarinet": 0.8218144367199967, "Horn": 0.7584396533479583}, {"Clarinet": 0.7580983123470278, "Bassoon": 0.1449074010723677}, {"Trumpet": 0.8052751226931595, "Violin": 0.6587967356664012}, {"Trumpet": 0.6744847172777865, "Violin": 0.7994543216362688}, {"Flute": 0.7706943945052336, "Clarinet": 0.7933323991162853}, {"Flute": 0.497779125049499, "Saxophone": 0.5439810323511594}, {"Bassoon": 0.09778074874342473, "Oboe": 0.6243635353635927}, {"Trumpet": 0.5270488542066626, "Trombone": 0.623102735409506}, {"Trumpet": 0.5914551924008531, "Horn": 0.8337775629995053}, {"Tuba": 0.683974960034785, "Trumpet": 0.9026053388651559}, {"Trumpet": 0.6599779681018643, "Trombone": 0.6193870310948638}, {"Trumpet": 0.8942374923024897, "Horn": 0.9352429590162552}, {"Cello": 0.5853176435234323, "Oboe": 0.5584756747453624}, {"Trumpet": 0.9364077281688825, "Tuba": 0.8992721462995069}, {"Trumpet": 0.7871728550809822, "Trombone": 0.3085604821426051}, {"Horn": 0.7992328076443653, "Trumpet": 0.8735621504109806}, {"Trumpet": 0.7397214853489611, "Horn": 0.7391133259916535}, {"Trumpet": 0.5625548825248867, "Trombone": 0.6839988061598271}], "separation": [{"Violin": 8.002384350102595, "Saxophone": 5.451628907209875}, {"Violin": 12.104245950152052, "Double_Bass": 7.448966351944486}, {"Cello": 6.6859480871167465, "Viola": 5.200347895991829}, {"Violin": 11.102478884432239, "Double_Bass": 10.032329776858885}, {"Violin": 11.889955175893839, "Cello": 14.180611149691524}, {"Viola": 3.7982159793782424, "Clarinet": 4.7174038599921895}, {"Flute": 11.069693917056645, "Violin": 7.364196869323203}, {"Saxophone": -3.164941206815505, "Clarinet": -2.0062585695287103}, {"Saxophone": 2.6221343692888506, "Tuba": 6.83415768262533}, {"Trombone": 9.558411751715411, "Saxophone": -14.789737378334515}, {"Oboe": -8.675431616942674, "Saxophone": 1.668428640979334}, {"Oboe": 1.0431281882152108, "Viola": 9.705655649674934}, {"Flute": 10.654960335024237, "Cello": 15.046428454678441}, {"Flute": 12.521223809661198, "Trombone": 10.224332377154543}, {"Clarinet": 7.474110284591646, "Horn": 6.721126014335286}, {"Clarinet": 7.219275456942902, "Bassoon": -10.224139685412252}, {"Trumpet": 5.939763951566752, "Violin": 5.957894962709236}, {"Trumpet": 3.0868434340069455, "Violin": 4.642075483548619}, {"Flute": 6.259745551692517, "Clarinet": 6.818697549020795}, {"Flute": 4.941118922955765, "Saxophone": 0.1954592801971419}, {"Bassoon": -3.448178985167047, "Oboe": -0.27161265937604895}, {"Trumpet": 1.2723559004955045, "Trombone": 2.3506275151405003}, {"Trumpet": 2.740302664280981, "Horn": 11.871646489756532}, {"Tuba": 14.886345187021178, "Trumpet": 13.26696557903243}, {"Trumpet": 1.5628933516633805, "Trombone": 3.276492702484322}, {"Trumpet": 9.442530332209113, "Horn": 12.705992978512022}, {"Cello": 9.137813694083905, "Oboe": 4.823742745856226}, {"Trumpet": 12.682455929463304, "Tuba": 12.919274738450994}, {"Trumpet": 5.582079973697156, "Trombone": -1.856029538771877}, {"Horn": 6.1622367098751285, "Trumpet": 2.945647568426861}, {"Trumpet": 6.2673377917483775, "Horn": 13.575113452101357}, {"Trumpet": 6.152750484815411, "Trombone": 5.147797855183857}]}, "MSI-S": {"separation": [{"Violin": 7.843305068898716, "Saxophone": 11.929191243917652}, {"Violin": 11.966525037474744, "Double_Bass": 6.953270915241092}, {"Cello": 7.99183965297512, "Viola": 8.784201514471224}, {"Violin": 11.863988477785286, "Double_Bass": 9.647516816581131}, {"Violin": 11.61399832787309, "Cello": 13.681708381172886}, {"Viola": 4.695670230946728, "Clarinet": 5.425892433455645}, {"Flute": 11.457398633516359, "Violin": 10.825109962905238}, {"Saxophone": -1.0970764928261632, "Clarinet": 2.2864066634822864}, {"Saxophone": 10.715997355229979, "Tuba": 7.28990878475631}, {"Trombone": 9.454668995454501, "Saxophone": 9.67125970610323}, {"Oboe": 6.911185742493357, "Saxophone": 8.05445637932222}, {"Oboe": 6.445197425179235, "Viola": 11.101000867905714}, {"Flute": 9.448056963711478, "Cello": 13.591085361388993}, {"Flute": 12.648575125646753, "Trombone": 10.071433827411067}, {"Clarinet": 7.820966719407945, "Horn": 11.666964306059898}, {"Clarinet": 10.737167531779043, "Bassoon": 7.641218293749594}, {"Trumpet": 7.425035449903609, "Violin": 3.0107595965766714}, {"Trumpet": 4.385935545620114, "Violin": 3.9094550435989115}, {"Flute": 15.978379782076061, "Clarinet": 9.550151237463139}, {"Flute": 10.587900116515016, "Saxophone": 5.126442933388766}, {"Bassoon": 12.42846602342926, "Oboe": -1.4857967442847158}, {"Trumpet": 6.281325663604525, "Trombone": 7.519311618295204}, {"Trumpet": 14.580846573807111, "Horn": 15.242704647130925}, {"Tuba": 10.676579735080168, "Trumpet": 13.508922484162513}, {"Trumpet": 5.573236423848283, "Trombone": 8.413966273693276}, {"Trumpet": 13.99350522687913, "Horn": 13.242641617389108}, {"Cello": 9.144709678550118, "Oboe": 11.358703796489953}, {"Trumpet": 13.442816644984163, "Tuba": 13.565610990043393}, {"Trumpet": 7.333466654352797, "Trombone": 7.801098855812278}, {"Horn": 7.869435886175751, "Trumpet": 5.078572594050752}, {"Trumpet": 13.35644524716921, "Horn": 14.13488797378907}, {"Trumpet": 6.8030524763534785, "Trombone": 6.300163096039944}]}}


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # A Unified Model for Zero-shot Music Source Separation, Transcription and Synthesis
  2 | This is the code repository for the paper: A Unified Model for Zero-shot Music Source Separation, Transcription and Synthesis. The paper is available [here](https://arxiv.org/abs/2108.03456).
  3 | 
  4 | ## Introduction
  5 | We propose a unified model for three inter-related tasks:
  6 | - to *separate* individual sound sources from a mixed music audio;
  7 | - to *transcribe* each sound source to MIDI notes;
  8 | - to *synthesize* new pieces based on the timber of separated sources.
  9 | 
 10 | The model is inspired by the fact that when humans listen to music, our minds can not only separate the sounds of different instruments, but also at the same time perceive high-level representations such as score and timbre.
 11 | 
 12 | ## Model architecture
 13 | ### - Components of the proposed model
 14 | The proposed model comprises four components:
 15 | - a query-by-example (QBE) network
 16 | - a pitch-timber disentanglement module
 17 | - a transcriptor
 18 | - an audio encoder-decoder network
 19 | 
 20 | ![](https://github.com/Kikyo-16/A-unified-model-for-zero-shot-musical-source-separation-transcription-and-synthesis/blob/main/imgs/model-fig-1-ab.png)
 21 | >The baseline models and the proposed model. In the left figure, the large orange and gray box indicate a QBE
 22 | transcription-only and QBE separation-only model respectively. The whole figure indicates a QBE multi-task model.
 23 | 
 24 | 
 25 | ### - Training losses
 26 | The model is trained with separatiopn loss, transcription loss and contrastive loss. See details in [our paper](https://arxiv.org/abs/2108.03456).
 27 | 
 28 | ### - Pitch-translation Invariance Loss
 29 | To further improve the timbre disentanglement performance, we propose a *pitch-translation invariance loss*. We term the model without pitch-transformation invariance loss `multi-task informed (MSI) model`. And we term MSI model with further disentanglement via pitch-transformation invariance loss `MSI-DIS model`.
 30 | 
 31 | ### - Detailed hyper-parameters of the proposed model
 32 | ![](https://github.com/Kikyo-16/A-unified-model-for-zero-shot-musical-source-separation-transcription-and-synthesis/blob/main/imgs/model-fig-3.png)
 33 | 
 34 | ## Experimental results
 35 | 
 36 | |            Model|MSS-only|        Multi-task|       MSI (ours)| MSI-DIS (ours)|
 37 | |                  ----|        ----|        ----|        ----|        ----|
 38 | |  Seen|        4.69 ± 0.31| 3.32 ± 0.1|   **6.33 ± 0.17**|     5.04 ± 0.16|
 39 | |   Unseen|    **6.20 ± 0.26**|   4.63 ± 0.34|   5.53 ± 0.11|      3.99 ± 0.22| 
 40 | |   **Overall**|     5.07 ± 0.22|   3.65 ± 0.22|   **6.13 ± 0.15**|     4.77 ± 0.14|  
 41 | 
 42 | 
 43 | ## Demos
 44 | The initial version of the demo page is available [here](https://kikyo-16.github.io/demo-page-of-a-unified-model-for-separation-transcriptiion-synthesis/). New demo page with more demos will be updated soon.
 45 | 
 46 | ## Quick start
 47 | 
 48 | ### Requirements
 49 | You will need at least Python 3.6 and Pytorch . See requirements.txt for requirements. Install dependencies with pip:
 50 | ```
 51 | pip install -r requirements.txt
 52 | ```
 53 | 
 54 | ### Data preparation
 55 | 1. Download URMP Dataset from [URMP homepage](http://www2.ece.rochester.edu/projects/air/projects/URMP.html).
 56 | 2. Run the following command to generate your feature and annotations.
 57 | ```
 58 |  python src/dataset/urmp/urmp_feature.py --dataset_dir=ur_unzipped_dataset_folder --feature_dir=dataset/hdf5s/urmp --process_num=1
 59 | ```
 60 | **NOTE** that `ur_unzipped_dataset_folder` is your unzipped data folder and it should contain directories of songs:
 61 | > .
 62 | ├── `ur_unzipped_dataset_folder`  
 63 |   &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;├── `0_song0`  
 64 |   &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;├── `1_song1`  
 65 |   &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;├── ...  
 66 |   &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;...  
 67 | 
 68 | ### Training
 69 | Run the following command to train the proposed MSI-DIS Model:
 70 | ```
 71 | python train.py --model_name=MSI_DIS --model_folder=folder_to_store_model_weights --epoch=200
 72 | ```
 73 | 
 74 | ### Evaluation
 75 | Download models weights [here](https://drive.google.com/drive/folders/1fT3Fva5JywhpYnOhsORbDkLQ9Vnhv_Lj?usp=sharing).  
 76 | Run the following command to evaluate the proposed MSI-DIS Model on the test set:
 77 | ```
 78 | python evaluate.py --model_name=MSI_DIS --model_path=path_of_model_weights --evaluation_folder=folder_to_store_evaluation_results --epoch=199
 79 | ```
 80 | **NOTE:** Since we do not divide a validation set to chose the bestperformance model among all the training epochs, we report average results with a 95% confidence interval (CI) of models at the last 10 epochs.
 81 | Therefore, if you want to reproduce the results of our paper, please
 82 | 1. Evaluate all last-10-epoch models.
 83 | 2. Run the following command to print experimental result tables:
 84 | ```
 85 | python src/analyze/draw_table.py --evaluation_folder=`folder_to_store_evaluation_results
 86 | ```
 87 | 
 88 | ### Synthesis
 89 | Run the following command to synthesize audios using the given midi, the test set, and the proposed MSI-DIS Model:
 90 | ```
 91 | python synthesis.py --model_name=MSI-DIS --model_path=path_of_model_weights --evaluation_folder=folder_to_store_synthesis_results
 92 | ```
 93 | 
 94 | ## Citation
 95 | Please cite our work as:
 96 | 
 97 | >@inproceedings{lin2021unified,  
 98 | >title={A Unified Model for Zero-shot Music Source Separation, Transcription and Synthesis},   
 99 | >author={Liwei Lin and Qiuqiang Kong and Junyan Jiang and Gus Xia},  
100 | >booktitle = {Proceedings of 22st International Conference on Music Information Retrieval, {ISMIR}},  
101 | >year = {2021}  
102 | >}
103 | 
104 | ## License
105 | This code is released under the MIT license as found in the LICENSE file.
106 | 


--------------------------------------------------------------------------------
/src/utils/utilities.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | import time
  4 | import numpy as np
  5 | import configparser
  6 | import json
  7 | 
  8 | et = 1e-8
  9 | 
 10 | def load_json(path):
 11 | 	with open(path,'r') as load_f:
 12 | 		load_dict = json.load(load_f)
 13 | 	return load_dict
 14 | 
 15 | def save_json(path, data):
 16 | 	with open(path,'w') as f:
 17 | 		json.dump(data,f) 
 18 | 	
 19 | def print_dict(x):
 20 | 	for key in x:
 21 | 		print(key, x[key])
 22 | 
 23 | def factorized_fft(fft, onset_offset):
 24 | 	st = -1
 25 | 	curve_fft = np.zeros_like(fft)
 26 | 	mean_fft = np.zeros_like(fft)
 27 | 	for i in range(fft.shape[-1]):
 28 | 		if onset_offset[i] == 1 and st == -1:
 29 | 			st = i
 30 | 		elif not onset_offset[i] == 0:
 31 | 			if st == -1:
 32 | 				out_fft[i] = 0
 33 | 				mean_fft = fft[i]
 34 | 			else:
 35 | 				ave = np.mean(fft[st : i + 1])
 36 | 				std = np.std(fft[st : i + 1])
 37 | 				mean_fft[st : i + 1] = ave
 38 | 				curve_fft[st : i + 1] = (fft[st : i + 1] - ave) / (std + et)
 39 | 
 40 | 			if onset_offset[i] == 2:
 41 | 				st = -1
 42 | 
 43 | 	return curve_fft, mean_fft
 44 | 
 45 | 
 46 | 
 47 | def compute_time(event, pre_time):
 48 | 	cur_time = time.time()
 49 | 	print(f'{event} use', cur_time - pre_time)
 50 | 	return cur_time
 51 | 
 52 | def encode_mu_law(x, mu=256):
 53 | 	mu = mu - 1
 54 | 	fx = np.sign(x) * np.log(1 + mu * np.abs(x)) / np.log(1 + mu)
 55 | 	return np.floor((fx + 1) / 2 * mu + 0.5).astype(np.int64)
 56 | 
 57 | 
 58 | def decode_mu_law(y, mu=256):
 59 | 	mu = mu - 1
 60 | 	fx = (y - 0.5) / mu * 2 - 1
 61 | 	x = np.sign(fx) / mu * ((1 + mu) ** np.abs(fx) - 1)
 62 | 	return x
 63 | 
 64 | 
 65 | def read_config(config_path, name):
 66 | 	config = configparser.ConfigParser()
 67 | 	config.read(config_path)
 68 | 	return config[name]
 69 | 
 70 | 
 71 | def dict2str(dic, pre):
 72 | 	res = ''
 73 | 	for i, d in enumerate(dic):
 74 | 		if i == 0:
 75 | 			res += pre
 76 | 		res += d + ' :'
 77 | 		val = dic[d]
 78 | 		if type(val) is dict:
 79 | 			res += '\n' + dict2str(val, pre + '\t') + '\n'
 80 | 		else:
 81 | 			res += f'\t{val}\t'
 82 | 
 83 | 	return res		
 84 | 
 85 | def save_score(path, score):
 86 | 	mkdir(path, is_file=True)
 87 | 	res = dict2str(score, '')
 88 | 	write_lst(path, [res])
 89 | 	return res
 90 | 		
 91 | def get_process_groups(audio_num, process_num):
 92 | 	assert audio_num > 0 and process_num > 0
 93 | 	if process_num > audio_num:
 94 | 		process_num = audio_num
 95 | 	audio_num_per_process = (audio_num + process_num - 1) // process_num
 96 | 
 97 | 	reduce_id = process_num - (audio_num_per_process * process_num - audio_num)
 98 | 
 99 | 	groups = []
100 | 	cur = 0
101 | 	for i in range(process_num):
102 | 		if i == reduce_id:
103 | 			audio_num_per_process -= 1
104 | 		groups += [[cur, cur + audio_num_per_process]]
105 | 		cur += audio_num_per_process
106 | 	return groups
107 | 
108 | 
109 | def mkdir(fd, is_file=False):
110 | 	fd = fd.split('/')
111 | 	fd = fd[:-1] if is_file else fd
112 | 	ds = []
113 | 	for d in fd:
114 | 		ds.append(d)
115 | 		d = "/".join(ds)
116 | 		if not d == "" and not os.path.exists(d):
117 | 			os.makedirs(d)
118 | 		
119 | 		
120 | def get_filename(path):
121 | 	path = os.path.realpath(path)
122 | 	na_ext = path.split('/')[-1]
123 | 	na = os.path.splitext(na_ext)[0]
124 | 	return na
125 | 
126 | 
127 | def traverse_folder(folder):
128 | 	paths = []
129 | 	names = []
130 | 	
131 | 	for root, dirs, files in os.walk(folder):
132 | 		for name in files:
133 | 			filepath = os.path.join(root, name)
134 | 			names.append(name)
135 | 			paths.append(filepath)
136 | 			
137 | 	return names, paths
138 | 
139 | 
140 | def note_to_freq(piano_note):
141 | 	return 2 ** ((piano_note - 39) / 12) * 440
142 | 
143 | 	
144 | def create_logging(log_dir, filemode):
145 | 	mkdir(log_dir)
146 | 	i1 = 0
147 | 
148 | 	while os.path.isfile(os.path.join(log_dir, '{:04d}.log'.format(i1))):
149 | 		i1 += 1
150 | 		
151 | 	log_path = os.path.join(log_dir, '{:04d}.log'.format(i1))
152 | 	logging.basicConfig(
153 | 		level=logging.DEBUG,
154 | 		format='%(asctime)s %(filename)s[line:%(lineno)d] %(levelname)s %(message)s',
155 | 		datefmt='%a, %d %b %Y %H:%M:%S',
156 | 		filename=log_path,
157 | 		filemode=filemode)
158 | 
159 | 	# Print to console
160 | 	console = logging.StreamHandler()
161 | 	console.setLevel(logging.INFO)
162 | 	formatter = logging.Formatter('%(name)-12s: %(levelname)-8s %(message)s')
163 | 	console.setFormatter(formatter)
164 | 	logging.getLogger('').addHandler(console)
165 | 	
166 | 	return logging
167 | 
168 | 
169 | def float32_to_int16(x):
170 | 	x = np.clip(x, -1, 1)
171 | 	assert np.max(np.abs(x)) <= 1.
172 | 	return (x * 32767.).astype(np.int16)
173 | 
174 | 
175 | def int16_to_float32(x):
176 | 	return (x / 32767.).astype(np.float32)
177 | 	
178 | 
179 | def pad_truncate_sequence(x, max_len):
180 | 	if len(x) < max_len:
181 | 		return np.concatenate((x, np.zeros(max_len - len(x))))
182 | 	else:
183 | 		return x[0 : max_len]
184 | 
185 | def read_lst(lst_path):
186 | 	with open(lst_path) as f:
187 | 		data = f.readlines()
188 | 	data = [d.rstrip() for d in data]
189 | 	return data
190 | 
191 | def write_lst(lst_path, lst):
192 | 	lst = [str(l) for l in lst]
193 | 	with open(lst_path, 'w') as f:
194 | 		f.writelines('\n'.join(lst))
195 | 
196 | def freq2note(freq):
197 | 	freq = float(freq)
198 | 	note = round(12 * np.log2(freq / 440)) + 48
199 | 	return note
200 | 
201 | def note2freq(note):
202 | 	note = float(note)
203 | 	freq = (2**((note - 48) / 12)) * 440
204 | 	return freq
205 | 	
206 | 	
207 | def parse_frameroll2annotation(frame_roll, frames_per_second=100, notes_num=88):
208 | 	pre = notes_num
209 | 	st = -1
210 | 	est = []
211 | 	preds = np.pad(frame_roll,(0,1), 'constant', constant_values=(0, notes_num))
212 | 	for i in range(frame_roll.shape[0]):
213 | 		if not frame_roll[i] == pre:
214 | 			if st > -1 and not pre == notes_num:
215 | 				est.append(\
216 | 					'%f\t%f\t%d' % (st * 1.0 / frames_per_second, i * 1.0 / frames_per_second, pre))
217 | 			st = i
218 | 		pre = frame_roll[i]
219 | 	return est
220 | 


--------------------------------------------------------------------------------
/src/dataset/urmp/urmp_generate_dataset.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | import random
  4 | import argparse
  5 | 
  6 | sys.path.insert(1, os.path.join(sys.path[0], '../..'))
  7 | from utils.utilities import (mkdir, write_lst)
  8 | 
  9 | random.seed(1234)
 10 | 
 11 | instr_tags = "vn,vc,va,fl,cl,sax,tpt,tbn,bn,hn,tba,db,ob"
 12 | instrs = "Violin,Cello,Viola,Flute,Clarinet,Saxophone,Trumpet,Trombone,Bassoon,Horn,Tuba,Double_Bass,Oboe"
 13 | tag2instr = {}
 14 | 
 15 | seen = "Violin,Cello,Viola,Flute,Clarinet,Saxophone,Trumpet,Trombone"
 16 | unseen = "Horn,Tuba,Double_Bass,Bassoon,Oboe"
 17 | 
 18 | skips = ""
 19 | 
 20 | instr_tags = instr_tags.split(',')
 21 | instrs = instrs.split(',')
 22 | seen = seen.split(',')
 23 | unseen = unseen.split(',')
 24 | skips = skips.split(',')
 25 | 
 26 | for i, tag in enumerate(instr_tags):
 27 | 	tag2instr[tag] = instrs[i]
 28 | 
 29 | def get_all_audios(folder):
 30 | 	audios = {}
 31 | 	tracks_num = 0
 32 | 	sample_folders = os.listdir(folder)
 33 | 	for sample in sample_folders:
 34 | 		sample_path = os.path.join(folder, sample)
 35 | 		tracks = os.listdir(sample_path)
 36 | 		if len(sample.split('_')) < 2:
 37 | 			continue
 38 | 		sampleName = sample.split('_')[1]
 39 | 		sample_instrs = sample.split('_')[2:]
 40 | 		if sampleName not in audios:
 41 | 			audios[sampleName] = {}
 42 | 		for track in tracks:
 43 | 			if not str.endswith(track, "ref.txt"):
 44 | 				continue
 45 | 			track = str.replace(track, "_ref.txt", ".h5")
 46 | 			#track = str.replace(track, "_TRAIN.h5", "_TEST.h5")
 47 | 			track_path = os.path.join(sample_path, track)
 48 | 			track_name = track.split("_")[1]
 49 | 			instr = tag2instr[track.split("_")[2]]
 50 | 			if instr not in audios[sampleName]:
 51 | 				audios[sampleName][instr] = {}
 52 | 			if track_name not in audios[sampleName][instr]:
 53 | 				tracks_num += 1
 54 | 			audios[sampleName][instr][track_name] = track_path
 55 | 	seen_audios = []
 56 | 	unseen_audios = []
 57 | 	for songName in audios:
 58 | 		for instr in audios[songName]:
 59 | 			if instr in seen:
 60 | 				seen_audios.append(songName)
 61 | 			else:
 62 | 				unseen_audios.append(songName)
 63 | 
 64 | 
 65 | 	train_lst = {}
 66 | 	test_lst = {}
 67 | 
 68 | 	for songName in audios:
 69 | 		if songName in unseen_audios:
 70 | 			instrs = {}
 71 | 			instrs_num = 0
 72 | 			for instr in audios[songName]:
 73 | 				if instr not in instrs:
 74 | 					instrs[instr] = []
 75 | 				for track in audios[songName][instr]:
 76 | 					instrs[instr].append(audios[songName][instr][track])
 77 | 				instrs_num += len(instrs[instr])
 78 | 			instrs = sorted(instrs.items(), key=lambda d: -len(d[1]))
 79 | 			show = [{instr[0]:len(instr[1])} for instr in instrs]
 80 | 			print(show)
 81 | 			data_lst = []
 82 | 			for instr in instrs:
 83 | 				if len(instr[1]) > instrs_num // 2:
 84 | 					print("aaaaaaaaaaaaaaaaaaaaaaaah")
 85 | 				for track in instr[1]:
 86 | 					data_lst.append([instr[0], track])
 87 | 			
 88 | 			total = len(data_lst)
 89 | 			pairs = []
 90 | 			for i, track in enumerate(data_lst):
 91 | 				j = total - 1- i
 92 | 				if j == i:
 93 | 					j = 0
 94 | 				pairs.append([track[0], data_lst[j][0],	track[1],data_lst[j][1]])
 95 | 				if i + 1 >=	(total + 1)// 2:
 96 | 					break
 97 | 			test_lst[songName] = {"test" : pairs, "query" : []}
 98 | 
 99 | 		else:
100 | 			for instr in audios[songName]:
101 | 				if instr not in train_lst:
102 | 					train_lst[instr] = []
103 | 				for track in audios[songName][instr]:
104 | 					train_lst[instr].append(str.replace(audios[songName][instr][track], "_TEST.h5", "h5"))
105 | 
106 | 	
107 | 	
108 | 	print("\nseen:\n")
109 | 	compute_instr_samples(audios, songNames=None, skipNames=unseen_audios)	
110 | 	print("\nunseen:\n")
111 | 	compute_instr_samples(audios, songNames=unseen_audios)
112 | 
113 | 	print("\nall:\n")
114 | 	compute_instr_samples(audios)
115 | 
116 | 
117 | 	query_lst = []
118 | 
119 | 	songs_lst = []
120 | 	songs_num = len(test_lst)
121 | 	for test in test_lst:
122 | 		songs_lst.append(test)
123 | 
124 | 	for i, test in enumerate(test_lst):
125 | 		for pair in test_lst[test]["test"]:
126 | 			query = []
127 | 			query += pair[:2]
128 | 			for j in range(2):
129 | 				path = None
130 | 				while path is None:
131 | 					song_id = random.randint(0, songs_num - 1)
132 | 					if song_id == i:
133 | 						continue
134 | 					query_pairs = test_lst[songs_lst[song_id]]["test"]
135 | 					for query_pair in query_pairs:
136 | 						for k in range(2):
137 | 							if query_pair[k] == pair[j] and not query_pair[k + 2] == pair[j + 2]:
138 | 								path = query_pair[k + 2]
139 | 								query.append(path)
140 | 								break
141 | 						if path is not None:
142 | 							break
143 | 			test_lst[test]["query"] += [query]
144 | 
145 | 	return audios, train_lst, test_lst
146 | 
147 | def compute_instr_samples(audios, songNames=None, skipNames=None):
148 | 	samples = {}
149 | 	num = 0
150 | 	for songName in audios:
151 | 		if songNames is not None and songName not in songNames:
152 | 			continue
153 | 		if skipNames is not None and songName in skipNames:
154 | 			continue
155 | 		for instr in audios[songName]:
156 | 			if instr not in samples:
157 | 				samples[instr] = 0
158 | 			num += len(audios[songName][instr])
159 | 			samples[instr] += len(audios[songName][instr])
160 | 	
161 | 	total_num = 0
162 | 	for instr in samples:
163 | 		total_num += samples[instr]
164 | 		print(instr, samples[instr])
165 | 	print(total_num, num)
166 | 	return samples
167 | 
168 | def save_train_lst(data, output_folder):
169 | 	for instr in data:
170 | 		instr_folder = os.path.join(output_folder, instr)
171 | 		mkdir(instr_folder)
172 | 		path = os.path.join(instr_folder, "train.lst")
173 | 		write_lst(path, data[instr])
174 | 
175 | def save_test_lst(data, output_folder):
176 | 	testset_folder = os.path.join(output_folder, "testset")
177 | 	mkdir(testset_folder)
178 | 	test_lst = []
179 | 	query_lst = []
180 | 	for songName in data:
181 | 		test_lst += data[songName]["test"]
182 | 		query_lst += data[songName]["query"]
183 | 	test_lst = [f"{t[0]},{t[1]}\t{t[2]},{t[3]}" for t in test_lst]
184 | 	query_lst = [f"{t[0]},{t[1]}\t{t[2]},{t[3]}" for t in query_lst]
185 | 	print("test set", len(test_lst))
186 | 	test_lst_path = os.path.join(testset_folder, "test.lst")
187 | 	query_lst_path = os.path.join(testset_folder, "query.lst")
188 | 	write_lst(test_lst_path, test_lst)
189 | 	write_lst(query_lst_path, query_lst)
190 | 
191 | 
192 | if __name__=="__main__":
193 | 	parser = argparse.ArgumentParser(description='')
194 | 	parser.add_argument('--feature_dir', type=str, required=True, help='Directory of generated dataset.')
195 | 	parser.add_argument('--data_dir', type=str, required=True, help='Directory to store generated files.')
196 | 
197 | 	args = parser.parse_args()
198 | 
199 | 	folder = args.feature_dir
200 | 	output_folder = args.data_dir
201 | 	audios, train_lst, test_lst = get_all_audios(folder)
202 | 	save_train_lst(train_lst, output_folder)
203 | 	save_test_lst(test_lst, output_folder)
204 | 	instr_samples = compute_instr_samples(audios)
205 | 			
206 | 			
207 | 


--------------------------------------------------------------------------------
/src/models/layers.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import torch
  3 | import torch.nn as nn
  4 | import torch.nn.functional as F
  5 | import sys
  6 | import numpy as np
  7 | import time
  8 | import h5py
  9 | import math
 10 | 
 11 | EPS = 1e-8
 12 | 
 13 | 
 14 | def init_layer(layer):
 15 | 	"""Initialize a Linear or Convolutional layer. """
 16 | 	nn.init.xavier_uniform_(layer.weight)
 17 | 
 18 | 	if hasattr(layer, 'bias'):
 19 | 		if layer.bias is not None:
 20 | 			layer.bias.data.fill_(0.)
 21 | 
 22 | 
 23 | def init_bn(bn):
 24 | 	"""Initialize a Batchnorm layer. """
 25 | 	bn.bias.data.fill_(0.)
 26 | 	bn.weight.data.fill_(1.)
 27 | 
 28 | 
 29 | 
 30 | class ConvBlock(nn.Module):
 31 | 	"""A Convolutional Layer Followed by a Batchnorm Layer and a ReLU Activation Layer.
 32 | 
 33 | 		Input : [B x in_channels x T x F]
 34 | 		Output : [B x out_chanels x T x F]
 35 | 
 36 | 		Parameters
 37 | 		-----------
 38 | 		in_channels : int
 39 | 		out_channels : int
 40 | 		momentum : float
 41 | 		
 42 | 	"""
 43 | 	def __init__(self, in_channels, out_channels, momentum=0.01):
 44 | 		super(ConvBlock, self).__init__()
 45 | 
 46 | 		self.conv = nn.Conv2d(in_channels=in_channels,
 47 | 								out_channels=out_channels,
 48 | 								kernel_size=(3, 3), stride=(1, 1),
 49 | 								padding=(1, 1), bias=False)
 50 | 
 51 | 		self.bn = nn.BatchNorm2d(out_channels, momentum=momentum)
 52 | 		
 53 | 		self.init_weights()		
 54 | 
 55 | 	def init_weights(self):
 56 | 		init_layer(self.conv)
 57 | 		init_bn(self.bn)
 58 | 
 59 | 
 60 | 	def forward(self, input):
 61 | 		"""
 62 | 		Parameters
 63 | 		----------
 64 | 		input : [B x in_channels x T x F]
 65 | 		
 66 | 		Returns
 67 | 		-------
 68 | 		x : [B x out_chanels x T x F]
 69 | 
 70 | 		"""	
 71 | 		x = input
 72 | 		x = F.relu_(self.bn(self.conv(x)))
 73 | 		return x
 74 | 
 75 | 
 76 | class DeepConvBlock(nn.Module):
 77 | 	"""2 Convolutional Layers, each of which is followed by a Batchnorm Layer and a ReLU Activation Layer.
 78 | 
 79 | 		Input : [B x in_channels x T x F]
 80 | 		Output : [B x out_chanels x T x F]
 81 | 
 82 | 		Parameters
 83 | 		-----------
 84 | 		in_channels : int
 85 | 		out_channels : int
 86 | 		momentum : float
 87 | 		
 88 | 	"""
 89 | 
 90 | 	def __init__(self, in_channels, out_channels, momentum=0.01):
 91 | 		super(DeepConvBlock, self).__init__()
 92 | 
 93 | 		self.conv1 = ConvBlock(in_channels, out_channels, momentum)
 94 | 		self.conv2 = ConvBlock(out_channels, out_channels, momentum)
 95 | 
 96 | 
 97 | 	def forward(self, input):
 98 | 		"""
 99 | 
100 | 		Parameters
101 | 		----------
102 | 		input : [B x in_channels x T x F]
103 | 
104 | 		Returns
105 | 		-------
106 | 			:	[B x out_chanels x T x F]
107 | 		"""
108 | 		x = input
109 | 		return self.conv2(self.conv1(x))
110 | 
111 | 
112 | class LinearBlock2D(nn.Module):
113 | 	"""1 2D 1x1 Convolutional Layer with bias.
114 | 	
115 | 		Input : [B x in_channels x T x F]
116 | 		Output : [B x out_chanels x T x F]
117 | 
118 | 		Parameters
119 | 		-----------
120 | 		in_channels : int
121 | 		out_channels : int
122 | 		
123 | 	"""
124 | 
125 | 	def __init__(self, in_channels, out_channels):
126 | 		super(LinearBlock2D, self).__init__()
127 | 		self.conv = nn.Conv2d(in_channels=in_channels,
128 | 								out_channels=out_channels,
129 | 								kernel_size=(1, 1), stride=(1, 1), bias=True)
130 | 		
131 | 		self.init_weights()
132 | 
133 | 
134 | 	def init_weights(self):
135 | 		init_layer(self.conv)
136 | 
137 | 	def forward(self, input):
138 | 		"""
139 | 		Parameters
140 | 		----------
141 | 		input : [B x in_channels x T x F]
142 | 
143 | 		Returns
144 | 		-------
145 | 		x : [B x out_chanels x T x F]	
146 | 		"""
147 | 		x = input
148 | 		x = self.conv(x)
149 | 		return x
150 | 	
151 | class LinearBlock1D(nn.Module):
152 | 	"""1 1D 1x1 Convolutional Layer.
153 | 
154 | 		Input : [B x in_channels x T]
155 | 		Output : [B x out_chanels x T]
156 | 	
157 | 		Parameters
158 | 		-----------
159 | 		in_channels : int
160 | 		out_channels : int
161 | 		bias : boolean
162 | 			default : true
163 | 			has bias if true
164 | 		
165 | 	"""
166 | 
167 | 	def __init__(self, in_channels, out_channels, bias=True):
168 | 		super(LinearBlock1D, self).__init__()
169 | 		self.conv = nn.Conv1d(in_channels=in_channels,
170 | 								out_channels=out_channels,
171 | 								kernel_size=1, stride=1, bias=bias)
172 | 		
173 | 		self.init_weights()
174 | 		
175 | 
176 | 	def init_weights(self):
177 | 		init_layer(self.conv)
178 | 
179 | 
180 | 	def forward(self, input):
181 | 		"""
182 | 		Parameters
183 | 		-----------
184 | 		input : [B x in_channels x T]	
185 | 
186 | 		Returns
187 | 		-----------
188 | 		x : [B x out_chanels x T]
189 | 		"""
190 | 	
191 | 		x = input
192 | 		x = self.conv(x)
193 | 		return x
194 | 	
195 | class EncoderBlock(nn.Module):
196 | 	"""A Convolutional Layer Followed by a Batchnorm Layer and a ReLU Activation Layer.
197 | 
198 | 		Look details of the description at `ConvBlock`.
199 | 		
200 | 	"""
201 | 
202 | 	def __init__(self, in_channels, out_channels, momentum = 0.01):
203 | 		super(EncoderBlock, self).__init__()
204 | 
205 | 		self.conv_block = ConvBlock(in_channels, out_channels, momentum)
206 | 
207 | 	def forward(self, input):
208 | 		x = input
209 | 		x = self.conv_block(x)
210 | 		#x_pool = F.avg_pool2d(x, kernel_size=self.downsample)
211 | 		return x
212 | 
213 | class DecoderBlock(nn.Module):
214 | 	"""A Deconv Block (a 2D 3x3 Deconvolutional Layer Followed by a Batchnorm Layer and a ReLU Activation Layer) followed by a `DeepConvBlock` or `ConvBlock`.
215 | 		
216 | 		Input: [B x in_channels x T x F], [B x (out_channels // 2) x (T* strides[0]) x (F * strides[1])]
217 | 		Output：[B x out_channels x (T* strides[0]) x (F * strides[1])]
218 | 				(ummmm... stride other than (2, 2) might require extra consideration of padding operation)
219 | 		
220 | 		Parameters
221 | 		----------
222 | 		in_channels : int
223 | 		out_channels : int
224 | 		strides : tuple
225 | 		momentum : float
226 | 		deep : boolean
227 | 			default: False
228 | 			the Deconv Block is followed by a `DeepConvBlock` if true else `ConvBlock`
229 | 		
230 | 	"""
231 | 
232 | 	def __init__(self, in_channels, out_channels, strides, momentum=0.01, deep=False):
233 | 		super(DecoderBlock, self).__init__()
234 | 
235 | 		self.conv = torch.nn.ConvTranspose2d(in_channels=in_channels,
236 | 			out_channels=out_channels, kernel_size=(3, 3), stride=strides,
237 | 			padding=(0, 0), output_padding=(0, 0), bias=False)
238 | 
239 | 		self.bn = nn.BatchNorm2d(out_channels, momentum=momentum)
240 | 		self.conv_block = DeepConvBlock(out_channels * 2, out_channels, momentum) if deep else ConvBlock(out_channels * 2, out_channels, momentum)
241 | 		
242 | 		self.init_weights()
243 | 
244 | 		self.prune_temporal = (not strides[-2] == 1)
245 | 		
246 | 	def init_weights(self):
247 | 		init_layer(self.conv)
248 | 		init_bn(self.bn)
249 | 
250 | 	def prune(self, x):
251 | 		"""Prune the shape of x after transpose convolution.
252 | 		"""
253 | 		if self.prune_temporal:
254 | 			x = x[:, :, : - 1, : - 1]
255 | 		else:
256 | 			x = x[:, :, 1 : -1, : -1]
257 | 		return x
258 | 
259 | 
260 | 	def forward(self, input_tensor, concat_tensor):
261 | 		"""
262 | 		
263 | 		Parameters
264 | 		----------
265 | 		input_tensor : tensor
266 | 			[B x in_channels x T x F]
267 | 		concat_tensor : tensor
268 | 			[B x (out_channels // 2) x (T* strides[0]) x (F * strides[1])]
269 | 
270 | 		Returns
271 | 		---------
272 | 		x : tensor	
273 | 			[B x out_channels x (T* strides[0]) x (F * strides[1])]
274 | 			
275 | 		"""
276 | 
277 | 		x = input_tensor
278 | 		x = F.relu_(self.bn(self.conv(x)))
279 | 		x = self.prune(x)
280 | 		x = torch.cat((x, concat_tensor), dim=1)
281 | 		x = self.conv_block(x)
282 | 		return x
283 | 
284 | 
285 | 
286 | 


--------------------------------------------------------------------------------
/src/dataset/urmp/urmp_feature.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import argparse
  3 | import csv
  4 | import os
  5 | import time
  6 | import h5py
  7 | import librosa
  8 | import multiprocessing
  9 | import sys
 10 | 
 11 | sys.path.insert(1, os.path.join(sys.path[0], '../..'))
 12 | 
 13 | from utils.utilities import (mkdir, float32_to_int16, freq2note, get_filename, get_process_groups, read_lst, write_lst)
 14 | from utils.target_process import TargetProcessor
 15 | 
 16 | from conf.feature import *
 17 | 
 18 | et = 1e-8
 19 | 
 20 | 
 21 | def remove_empty_segment(wav, frame_roll, sample_rate):
 22 | 	segments = []
 23 | 	samples_per_frame = sample_rate * 1. / FRAMES_PER_SEC
 24 | 	for i in range(frame_roll.shape[-1]):
 25 | 		if not frame_roll[i] == NOTES_NUM_EXCLUDE_SILENCE:
 26 | 			st = int(i * samples_per_frame)
 27 | 			ed = int((i + 1)* samples_per_frame)
 28 | 			if ed > wav.shape[-1]:
 29 | 				ed = wav.shape[-1]
 30 | 			segments.append(wav[st : ed])
 31 | 			if ed == wav.shape[-1]:
 32 | 				break
 33 | 	return np.concatenate(segments, -1)
 34 | 
 35 | 
 36 | def pack_urmp_dataset_to_hdf5(args):
 37 | 
 38 | 	dataset_dir = args.dataset_dir
 39 | 	feature_dir = args.feature_dir
 40 | 	process_num = args.process_num
 41 | 
 42 | 	mkdir(feature_dir)
 43 | 
 44 | 	meta_dict = {}
 45 | 	meta_dict['audio_filename'] = []
 46 | 	audios_num = 0
 47 | 
 48 | 	for folder in os.listdir(dataset_dir):
 49 | 		if str.startswith(folder, "._"):
 50 | 			continue
 51 | 		meta_data = folder.split('_')
 52 | 		if len(meta_data) < 4:
 53 | 			continue	
 54 | 		audios_num += 1
 55 | 		id = meta_data[0]
 56 | 		name = meta_data[1]
 57 | 		sources = meta_data[2:]
 58 | 		audio = {}
 59 | 		audio['mix'] = os.path.join(folder, f'AuMix_{folder}.wav')
 60 | 		audio['separated_sources'] = []
 61 | 		audio['note_annotations'] = []
 62 | 		for j, s in enumerate(sources):
 63 | 			audio['separated_sources'] += [os.path.join(folder, f'AuSep_{j + 1}_{s}_{id}_{name}.wav')]
 64 | 			audio['note_annotations'] += [os.path.join(folder, f'Notes_{j + 1}_{s}_{id}_{name}.txt')]
 65 | 	
 66 | 		meta_dict['audio_filename'] += [audio]
 67 | 
 68 | 	feature_time = time.time()
 69 | 	print(f"The total number of the mixture audio is {audios_num}")
 70 | 	def process_unit(n):
 71 | 	
 72 | 		name = meta_dict['audio_filename'][n]['mix']
 73 | 		print(name)
 74 | 		audio_path = os.path.join(dataset_dir, name)
 75 | 		(audio, _) = librosa.core.load(audio_path, sr=SAMPLE_RATE, mono=True)
 76 | 		packed_hdf5_path = os.path.join(feature_dir, '{}.h5'.format(os.path.splitext(name)[0]))
 77 | 		mkdir(os.path.dirname(packed_hdf5_path))
 78 | 		with h5py.File(packed_hdf5_path, 'w') as hf:
 79 | 			#hf.attrs.create('midi_filename', data=meta_dict['midi_filename'][n].encode(), dtype='S100')
 80 | 			hf.create_dataset(name='waveform', data=float32_to_int16(audio), dtype=np.int16)
 81 | 
 82 | 		for i, name in enumerate(meta_dict['audio_filename'][n]['separated_sources']):
 83 | 			audio_path = os.path.join(dataset_dir, name)
 84 | 
 85 | 			(audio, _) = librosa.core.load(audio_path, sr=SAMPLE_RATE, mono=True)
 86 | 			(hq_audio, _) = librosa.core.load(audio_path, sr=SAMPLE_RATE * 2, mono=True)
 87 | 
 88 | 			note_annotations_path = os.path.join(dataset_dir, meta_dict['audio_filename'][n]['note_annotations'][i])
 89 | 			note_annotations = read_lst(note_annotations_path)
 90 | 			note_annotations = [notes.split('\t\t') for notes in note_annotations]
 91 | 			note_annotations = [[notes[0], float(notes[2]) + float(notes[0]), float(freq2note(notes[1]))] for notes in note_annotations]
 92 | 			note_annotations = np.array(note_annotations, dtype = np.float32)
 93 | 			note_annotations_lst = ['%s\t%s\t%s' % (notes[0], str(notes[1]), str(notes[2])) for notes in note_annotations]
 94 | 			ref_path = os.path.join(feature_dir, '{}_ref.txt'.format(os.path.splitext(name)[0]))
 95 | 			mkdir(os.path.dirname(packed_hdf5_path))
 96 | 			write_lst(ref_path, note_annotations_lst)
 97 | 
 98 | 			duration = (audio.shape[-1] + SAMPLE_RATE - 1) // SAMPLE_RATE
 99 | 			target_processor = TargetProcessor(duration, FRAMES_PER_SEC, BEGIN_NOTE, NOTES_NUM_EXCLUDE_SILENCE)
100 | 			target_dict = target_processor.process(0, note_annotations)
101 | 			frame_roll = np.array(target_dict['frame_roll'], dtype=np.int16)
102 | 			
103 | 
104 | 			train_packed_hdf5_path = os.path.join(feature_dir, '{}._TRAIN.h5'.format(os.path.splitext(name)[0]))
105 | 			test_packed_hdf5_path = os.path.join(feature_dir, '{}._TEST.h5'.format(os.path.splitext(name)[0]))
106 | 
107 | 			scale = 9
108 | 			dense_audio = remove_empty_segment(audio, frame_roll, SAMPLE_RATE)
109 | 			dense_hq_audio = remove_empty_segment(hq_audio, frame_roll, SAMPLE_RATE * 2)
110 | 
111 | 			for i in range(scale):
112 | 				shift_pitch = i - (scale // 2)
113 | 				packed_hdf5_path = os.path.join(feature_dir, '{}._TRAIN_shift_pitch_{}.h5'.format(os.path.splitext(name)[0], shift_pitch))
114 | 				if os.path.exists(packed_hdf5_path):
115 | 					continue
116 | 
117 | 				if shift_pitch == 0:
118 | 					shift_audio = audio
119 | 					shift_dense_audio = dense_audio
120 | 				else:
121 | 					shift_audio = librosa.effects.pitch_shift(hq_audio, SAMPLE_RATE * 2, n_steps=shift_pitch)	
122 | 					shift_audio = librosa.core.resample(shift_audio, SAMPLE_RATE * 2, SAMPLE_RATE)	
123 | 					shift_dense_audio = librosa.effects.pitch_shift(dense_hq_audio, SAMPLE_RATE * 2, n_steps=shift_pitch)
124 | 					shift_dense_audio = librosa.core.resample(shift_dense_audio, SAMPLE_RATE * 2, SAMPLE_RATE)
125 | 
126 | 				shift_frame_roll = frame_roll.copy() + shift_pitch
127 | 				shift_frame_roll[shift_frame_roll == NOTES_NUM_EXCLUDE_SILENCE + shift_pitch] = NOTES_NUM_EXCLUDE_SILENCE
128 | 				shift_frame_roll = np.clip(shift_frame_roll, 0, NOTES_NUM_EXCLUDE_SILENCE)
129 | 
130 | 				with h5py.File(packed_hdf5_path, 'w') as hf:
131 | 					hf.create_dataset(name='shift_waveform', data=float32_to_int16(shift_audio), dtype=np.int16)
132 | 					hf.create_dataset(name='shift_dense_waveform', data=float32_to_int16(shift_dense_audio), dtype=np.int16)
133 | 					hf.create_dataset(name='frame_roll', data=shift_frame_roll, dtype=np.int16)
134 | 
135 | 			with h5py.File(train_packed_hdf5_path, 'w') as hf:
136 | 				hf.create_dataset(name='waveform', data=float32_to_int16(audio), dtype=np.int16)
137 | 				hf.create_dataset(name='frame_roll', data=frame_roll, dtype=np.int16)
138 | 
139 | 			with h5py.File(test_packed_hdf5_path, 'w') as hf:				
140 | 				hf.create_dataset(name='waveform', data=float32_to_int16(audio), dtype=np.int16)
141 | 				hf.create_dataset(name='waveform_path', data=[audio_path.encode()], dtype='S200')
142 | 				hf.create_dataset(name='note_annotations_txt', data=[ref_path.encode()], dtype='S200')
143 | 				hf.create_dataset(name='frame_roll', data=frame_roll, dtype=np.int16)	
144 | 
145 | 	def process_group(st, ed, total_num, pid):
146 | 		print(f"process {pid + 1} starts")
147 | 		for n in range(st, ed):
148 | 			process_unit(n)
149 | 			print(f"process {pid + 1} : {n + 1}/{total_num} done.")
150 | 		print(f"process {pid + 1} ends")
151 | 
152 | 
153 | 	audio_groups = get_process_groups(audios_num, process_num)
154 | 	for pid, (st, ed) in enumerate(audio_groups):
155 | 		p = multiprocessing.Process(target = process_group, args = (st, ed, audios_num, pid))
156 | 		p.start()
157 | 
158 | if __name__ == '__main__':
159 | 	
160 | 	parser = argparse.ArgumentParser(description='')
161 | 	parser.add_argument('--dataset_dir', type=str, required=True, help='Directory of dataset.')
162 | 	parser.add_argument('--feature_dir', type=str, required=True, help='Directory to store generated files.')
163 | 	parser.add_argument('--process_num', type=int, required=True, help='Number of processes.')
164 | 
165 | 	args = parser.parse_args()
166 | 	pack_urmp_dataset_to_hdf5(args)
167 | 		
168 | 


--------------------------------------------------------------------------------
/src/dataset/urmp/urmp_sample.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | import numpy as np
  4 | import torch
  5 | from torch.utils.data import Dataset
  6 | from torch.utils.data.dataloader import default_collate
  7 | import random
  8 | import h5py
  9 | from prefetch_generator import BackgroundGenerator
 10 | 
 11 | from utils.utilities import (read_lst, read_config, int16_to_float32, encode_mu_law)
 12 | from utils.audio_utilities import write_audio
 13 | 
 14 | from conf.feature import *
 15 | from conf.sample import *
 16 | from conf.urmp import *
 17 | 
 18 | 
 19 | shuffle_rng = np.random.RandomState(1234)
 20 | sample_rng = np.random.RandomState(1234)
 21 | 
 22 | 
 23 | class UrmpDataset():
 24 | 	def __init__(self, instr_name):
 25 | 		self._file_lst = read_lst(str.replace(TRAINING_FILE_LST_PATH, "INSTR_NAME", instr_name))
 26 | 		audios_num = len(self._file_lst)
 27 | 		self._data = [None] * audios_num
 28 | 		self._tracks_id = np.arange(audios_num)
 29 | 		self._audios_num = audios_num
 30 | 		self._current_id = 0
 31 | 		self.tag = -1
 32 | 
 33 | 
 34 | 
 35 | 	def __get_next_track_id__(self, pos):
 36 | 		audios_num = self._audios_num
 37 | 		current_id = pos % audios_num
 38 | 		shuffle_rng.shuffle(self._tracks_id)
 39 | 		nid = self._tracks_id[current_id]
 40 | 		return nid
 41 | 
 42 | 	def next_sample(self, pos=None, is_query=False):
 43 | 
 44 | 		def is_silence(x):
 45 | 			return x.shape[-1] * 88 == x.sum()
 46 | 
 47 | 		def frame_roll_mask(x, y):
 48 | 			mask = np.ones_like(x)
 49 | 			mask[x == 88] = 0
 50 | 			mask[y == 88] = 1
 51 | 			return mask
 52 | 
 53 | 		def load_file(pos, track_id, shift_pitch):
 54 | 			if self._data[track_id] is None:
 55 | 				hdf5_path = self._file_lst[track_id]
 56 | 				datas = []
 57 | 				for i in range(POS_SHIFT_SEMITONE):
 58 | 					data = {}
 59 | 					train_hdf5_path = str.replace(hdf5_path, '.h5', f'._TRAIN_shift_pitch_{i - SHIFT_SEMITONE}.h5')
 60 | 					hf = h5py.File(train_hdf5_path, 'r')
 61 | 					data = {'shift_waveform': int16_to_float32(hf['shift_waveform'][:])[None, :],
 62 | 						'shift_dense_waveform' : int16_to_float32(hf['shift_dense_waveform'][:])[None, :],
 63 | 						'frame_roll': hf['frame_roll'][:].astype(np.int)}
 64 | 					datas.append(data)
 65 | 				self._data[track_id] = datas
 66 | 			return self._data[track_id][shift_pitch + SHIFT_SEMITONE]
 67 | 
 68 | 		def load_cache_data(pos, track_id, other_nid, another_nid, is_query):
 69 | 
 70 | 			if is_query:
 71 | 				shift_pitch = sample_rng.randint(0, POS_SHIFT_SEMITONE) - SHIFT_SEMITONE 
 72 | 				hf = load_file(pos, other_nid, shift_pitch)
 73 | 				shift_dense_waveform = hf['shift_dense_waveform']
 74 | 				st = sample_rng.randint(0, shift_dense_waveform.shape[1] - SAMPLE_DURATION)
 75 | 				query_waveform = shift_dense_waveform[:, st : st + SAMPLE_DURATION].copy()
 76 | 
 77 | 				shift_pitch = sample_rng.randint(0, POS_SHIFT_SEMITONE) - SHIFT_SEMITONE
 78 | 				hf = load_file(pos, another_nid, shift_pitch)
 79 | 				shift_dense_waveform = hf['shift_dense_waveform']
 80 | 				st = sample_rng.randint(0, shift_dense_waveform.shape[1] - SAMPLE_DURATION)
 81 | 				another_query_waveform = shift_dense_waveform[:, st : st + SAMPLE_DURATION].copy()
 82 | 
 83 | 				return query_waveform, another_query_waveform
 84 | 
 85 | 			else:
 86 | 
 87 | 				shift_pitch = sample_rng.randint(0, POS_SHIFT_SEMITONE) - SHIFT_SEMITONE
 88 | 				hf = load_file(pos, track_id, shift_pitch)
 89 | 				waveform = hf['shift_waveform']
 90 | 				frame_roll = hf['frame_roll']
 91 | 
 92 | 				shift_pitch = sample_rng.randint(0, POS_SHIFT_SEMITONE) - SHIFT_SEMITONE
 93 | 				hf = load_file(pos, track_id, shift_pitch)
 94 | 				strong_waveform = hf['shift_waveform']
 95 | 				another_frame_roll = hf['frame_roll']
 96 | 
 97 | 				start_time = sample_rng.randint(0, int((waveform.shape[-1] - SAMPLE_DURATION) / SAMPLE_RATE))
 98 | 				st = start_time * SAMPLE_RATE
 99 | 				frame_roll_st = int(start_time * FRAMES_PER_SEC)
100 | 				ed = frame_roll_st + FRAME_DURATION + 1
101 | 				obj_frame_roll = frame_roll[frame_roll_st : ed].copy()
102 | 					
103 | 				another_start_time = sample_rng.randint(0, int((waveform.shape[-1] - SAMPLE_DURATION) / SAMPLE_RATE)) if is_silence(obj_frame_roll) else start_time
104 | 				another_st = another_start_time * SAMPLE_RATE
105 | 				another_frame_roll_st = int(another_start_time * FRAMES_PER_SEC)
106 | 				another_ed = another_frame_roll_st + FRAME_DURATION + 1
107 | 				another_frame_roll = another_frame_roll[another_frame_roll_st : another_ed].copy()
108 | 
109 | 				ori_waveform = waveform[:, st : st + SAMPLE_DURATION].copy()
110 | 				strong_waveform = strong_waveform[:, another_st : another_st + SAMPLE_DURATION].copy()
111 | 		
112 | 				return (ori_waveform, strong_waveform, obj_frame_roll, another_frame_roll)
113 | 
114 | 		def get_next_track(pos=None, is_query=False):
115 | 			nid = self.__get_next_track_id__(pos)
116 | 			other_nid = self.__get_next_track_id__(pos + 1)
117 | 			another_nid = self.__get_next_track_id__(pos + 2)
118 | 			return load_cache_data(pos, nid, other_nid, another_nid, is_query)
119 | 
120 | 		tracks = get_next_track(pos, is_query)
121 | 		return tracks
122 | 	
123 | 	def get_samples_num(self):
124 | 		return len(self._file_lst)
125 | 
126 | 
127 | 
128 | class UrmpSample(Dataset):
129 | 	def __init__(self):
130 | 		super(UrmpSample, self).__init__()
131 | 		
132 | 		datasets = {}
133 | 		for instr in SEEN_INSTRUMENTS:
134 | 			datasets[instr] = UrmpDataset(instr)
135 | 
136 | 		self._datasets = datasets
137 | 		datasets_index = []
138 | 		datasets_samples_num = [0]
139 | 		for d in datasets:
140 | 			datasets_index.append(d)
141 | 			n = datasets[d].get_samples_num()
142 | 			datasets_samples_num.append(n + datasets_samples_num[-1])
143 | 
144 | 		self._datasets_index = datasets_index
145 | 		self.datasets_samples_num = datasets_samples_num
146 | 
147 | 	def __iter__(self):
148 | 		return BackgroundGenerator(super().__iter__())
149 | 		
150 | 	def __get_train_sample__(self, index, instr_indexs, is_query):
151 | 		input_samples = []
152 | 		datasets = self._datasets
153 | 		datasets_index = self._datasets_index
154 | 
155 | 		for instr in instr_indexs:
156 | 			dataset = datasets[datasets_index[instr]]
157 | 			inputs = dataset.next_sample(index, is_query)
158 | 			for i, input in enumerate(inputs):
159 | 				if len(input_samples) == i:
160 | 					input_samples.append([])
161 | 				input = np.expand_dims(input, 0)
162 | 				input_samples[i].append(input)
163 | 
164 | 		for i, input in enumerate(input_samples):
165 | 			input_samples[i] = np.concatenate(input_samples[i], 0)
166 | 
167 | 		return input_samples
168 | 
169 | 
170 | 	def __sample_seen_instruments__(self):
171 | 		instruments_ratio = self.datasets_samples_num
172 | 		index = sample_rng.randint(instruments_ratio[-1])
173 | 		for i in range(len(instruments_ratio) - 1):
174 | 			if index < instruments_ratio[i + 1]:
175 | 				return i
176 | 
177 | 		assert False
178 | 
179 | 	def __getitem__(self, index = 0):
180 | 		up_bound = SEEN_INSTRUMENTS_NUM if SEEN_INSTRUMENTS_NUM < UP_BOUND else UP_BOUND
181 | 		selected_ids = []
182 | 		while len(selected_ids) < up_bound:
183 | 			id = self.__sample_seen_instruments__()
184 | 			if not id in selected_ids:
185 | 				selected_ids.append(id)
186 | 
187 | 		(separated, strong_separated, target, another_target) = self.__get_train_sample__(index, selected_ids[ :SOURCES_NUM_OF_MIXTURE], is_query=False)
188 | 		(query_separated, another_query_separated) = self.__get_train_sample__(index, selected_ids, is_query=True)
189 | 		mix = torch.from_numpy(separated).float().sum(0)
190 | 		strong_mix = torch.from_numpy(strong_separated).float().sum(0)
191 | 		separated = torch.from_numpy(separated).float()
192 | 		query_separated = torch.from_numpy(query_separated).float()
193 | 		another_query_separated = torch.from_numpy(another_query_separated).float()
194 | 		target = torch.from_numpy(target).long()
195 | 		another_target = torch.from_numpy(another_target).long()
196 | 		batch = (separated, query_separated, another_query_separated, target, another_target)
197 | 		return mix, strong_mix, batch
198 | 
199 | 	def __len__(self):
200 | 		return SAMPLES_NUM
201 | 
202 | 	def get_len(self):
203 | 		return self.__len__()
204 | 
205 | 	def get_collate_fn(self):
206 | 		return default_collate
207 | 
208 | 


--------------------------------------------------------------------------------
/train.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | import time
  4 | import numpy as np
  5 | import random
  6 | import argparse
  7 | import torch
  8 | import torch.nn as nn
  9 | import torch.nn.functional as F
 10 | 
 11 | import src
 12 | from dataset.urmp.urmp_sample import UrmpSample
 13 | from models.model_factory import ModelFactory
 14 | from utils.utilities import (compute_time, save_score, mkdir)
 15 | from utils.multiEpochsDataLoader import MultiEpochsDataLoader as DataLoader
 16 | from conf.sample import *
 17 | from conf.feature import *
 18 | 
 19 | def seed_torch(seed=1234):
 20 | 	os.environ['PYTHONHASHSEED'] = str(seed)
 21 | 	torch.manual_seed(seed)
 22 | 	torch.cuda.manual_seed_all(seed)
 23 | 	np.random.seed(seed)
 24 | 	random.seed(seed)
 25 | 	torch.cuda.manual_seed_all(seed)
 26 | 	torch.backends.cudnn.deterministic = True
 27 | 	torch.backends.cudnn.benchmark = False
 28 | 
 29 | def mae(input, target):
 30 | 	return torch.mean(torch.abs(input - target))
 31 | 
 32 | def align(a, b, dim):
 33 | 	return a.transpose(0, dim)[:b.shape[dim]].transpose(0, dim)
 34 | 
 35 | def onehot(x, dim, classes_num):
 36 | 	x = x.unsqueeze(dim)
 37 | 	shape = list(x.shape)
 38 | 	shape[dim] = classes_num
 39 | 	y = torch.zeros(shape).to(x.device).scatter_(dim, x, 1)
 40 | 	return y
 41 | 
 42 | def move_data2cuda(urmp_batch):
 43 | 	mix, another_mix, batch = urmp_batch
 44 | 	separated, query, another_query, pitch_target, another_pitch_target = batch
 45 | 	batch = [separated, query, another_query, pitch_target, another_pitch_target]
 46 | 	for i, b in enumerate(batch):
 47 | 		batch[i] = b.cuda()
 48 | 	mix = mix.cuda()
 49 | 	another_mix = another_mix.cuda()
 50 | 	return mix, another_mix, batch	
 51 | 
 52 | def train_step(network, urmp_batch, mode, adv_id=0):
 53 | 	mix, another_mix, batch = urmp_batch
 54 | 	separated, query, another_query, pitch_target, another_pitch_target = batch
 55 | 
 56 | 	a = 1./ 8.
 57 | 	if mode == 'query':
 58 | 		
 59 | 		#contrastive loss
 60 | 
 61 | 		latent_vectors = []
 62 | 		hQuery = []
 63 | 		for i in range(query.shape[1]):
 64 | 			query_spec = network(query[:, i], 'wav2spec')
 65 | 			another_query_spec = network(another_query[:, i], 'wav2spec')
 66 | 			h = network(query_spec, 'query')
 67 | 			hc = network(another_query_spec, 'query')
 68 | 			latent_vectors.append([h, hc])
 69 | 		sim = 0.
 70 | 		sep_num = query.shape[1]
 71 | 		batch_size = query.shape[0]
 72 | 		for i in range(sep_num):
 73 | 			next_i = (i + 1) % sep_num
 74 | 			sim += torch.mean((latent_vectors[i][0] - latent_vectors[i][1])**2, dim=-1) + \
 75 | 				torch.relu(a - torch.mean((latent_vectors[i][0] - latent_vectors[next_i][1])**2, dim=-1))
 76 | 		sim_loss = sim.mean() / sep_num
 77 | 		return sim_loss, f'{sim_loss.item()}'
 78 | 
 79 | 
 80 | 	elif mode == 'AMT':
 81 | 
 82 | 		# transcription loss for AMT-only baseline		
 83 | 
 84 | 		pitch_transcription = []
 85 | 		mix_spec = network(mix, 'wav2spec')
 86 | 		for i in range(separated.shape[1]):
 87 | 			query_spec = network(query[:, i], 'wav2spec')
 88 | 			hQuery = network(query_spec, "query")
 89 | 			args = (mix_spec, hQuery)
 90 | 			prob = network(args, 'transcribe')
 91 | 			pitch_transcription.append(prob)
 92 | 
 93 | 		transcription = torch.stack(pitch_transcription, 2)
 94 | 		pitch_loss = nn.CrossEntropyLoss()(transcription, align(pitch_target, transcription, -1))
 95 | 		return pitch_loss, f'{pitch_loss.item()}'
 96 | 	
 97 | 	elif mode == 'MSS':
 98 | 
 99 | 		# separation loss for MSS-only baseline
100 | 
101 | 		spec_losses = []
102 | 		mix_spec = network(mix, 'wav2spec')
103 | 		for i in range(separated.shape[1]):
104 | 			query_spec = network(query[:, i], 'wav2spec')
105 | 			hQuery = network(query_spec, "query")
106 | 			source_spec = network(separated[:, i], 'wav2spec')
107 | 			args = (mix_spec, hQuery)
108 | 			est_spec = network(args, 'separate')
109 | 			spec_loss = torch.abs(est_spec - align(source_spec, est_spec, -2))
110 | 			spec_losses.append(spec_loss)
111 | 
112 | 		spec_loss = torch.stack(spec_losses, 1)
113 | 		spec_loss = spec_loss.mean()
114 | 		return spec_loss, f'{spec_loss.item()}'
115 | 
116 | 
117 | 	elif mode == 'MSS-AMT':
118 | 
119 | 		# separation and transcription loss for muli-task baseline and multi-task score-informed (MSI) model
120 | 
121 | 		spec_losses = []
122 | 		pitch_transcription = []
123 | 		mix_spec = network(mix, 'wav2spec')
124 | 		for i in range(separated.shape[1]):
125 | 			source_spec = network(separated[:, i], 'wav2spec')
126 | 			query_spec = network(query[:, i], 'wav2spec')
127 | 			hQuery = network(query_spec, "query")
128 | 			args = (mix_spec, hQuery)
129 | 			est_spec, prob = network(args, 'multiTask')
130 | 			pitch_transcription.append(prob)
131 | 			spec_loss = torch.abs(est_spec - align(source_spec, est_spec, -2))
132 | 			spec_losses.append(spec_loss)
133 | 
134 | 		transcription = torch.stack(pitch_transcription, 2)
135 | 		pitch_loss = nn.CrossEntropyLoss()(transcription, align(pitch_target, transcription, -1))
136 | 
137 | 		spec_loss = torch.stack(spec_losses, 1)
138 | 		spec_loss = spec_loss.mean()
139 | 		return spec_loss + pitch_loss, f'{spec_loss.item()} {pitch_loss.item()}'
140 | 
141 | 
142 | 
143 | 	elif mode == 'MSI-DIS':
144 | 
145 | 		# transcription loss and pitch-translation invariance loss for MSI-DIS model
146 | 
147 | 		spec_losses = []
148 | 		another_mix_spec = network(another_mix, 'wav2spec')
149 | 		mix_spec = network(mix, 'wav2spec')
150 | 		target = onehot(pitch_target, 1, NOTES_NUM)
151 | 		another_target = onehot(another_pitch_target, 1, NOTES_NUM)
152 | 
153 | 		pitch_transcription = []
154 | 		another_pitch_transcription = []
155 | 
156 | 		for i in range(separated.shape[1]):
157 | 			source_spec = network(separated[:, i], 'wav2spec')
158 | 
159 | 			query_spec = network(query[:, i], 'wav2spec')
160 | 			hQuery = network(query_spec, "query")
161 | 
162 | 			args = (mix_spec, another_mix_spec, hQuery)
163 | 			est_spec, target_prob = network(args, 'transfer')
164 | 
165 | 			pitch_transcription.append(target_prob)
166 | 			spec_loss = torch.abs(est_spec - align(source_spec, est_spec, -2))
167 | 			spec_losses.append(spec_loss)
168 | 
169 | 		spec_loss = torch.stack(spec_losses, 1)
170 | 		spec_loss = spec_loss.mean()
171 | 
172 | 		transcription = torch.stack(pitch_transcription, 2)
173 | 		pitch_loss = nn.CrossEntropyLoss()(transcription, align(pitch_target, transcription, -1))
174 | 
175 | 		return spec_loss + pitch_loss, f'{spec_loss.item()} {pitch_loss.item()}'
176 | 
177 | 
178 | 
179 | 
180 | def train(model_name, load_epoch, epoch, model_folder):
181 | 
182 | 	nnet = ModelFactory(model_name)
183 | 	nnet = nnet.cuda()
184 | 
185 | 	learning_rate=LEARNING_RATE
186 | 	
187 | 	mkdir(model_folder)
188 | 
189 | 	if load_epoch >=0:
190 | 		model_path = f'{model_folder}/params_epoch-{load_epoch}.pkl'
191 | 		nnet.load_state_dict(torch.load(model_path), strict=True)
192 | 
193 | 	resume_epoch = load_epoch + 1	
194 | 
195 | 	urmp_data = UrmpSample()
196 | 
197 | 	urmp_loader = DataLoader(urmp_data,
198 | 		batch_size=TRAINING_BATCH_SIZE, shuffle=False, num_workers=1, pin_memory=True, persistent_workers=False,
199 | 		collate_fn=urmp_data.get_collate_fn())
200 | 
201 | 	def get_parameters(nnet, model_name):
202 | 		parameters = {}
203 | 		parameters['query'] = list(nnet.network.parameters())
204 | 		
205 | 		if model_name in ['MSI']:
206 | 			parameters['MSS-AMT'] = list(nnet.network.parameters())
207 | 		if model_name in ['UNET']:
208 | 			parameters['MSS'] = list(nnet.network.parameters())
209 | 		if model_name in ['MSI-DIS', 'AMT', 'MSS', 'MSS-AMT']:
210 | 			parameters[model_name] = list(nnet.network.parameters())
211 | 
212 | 		return parameters
213 | 
214 | 		
215 | 	def get_optimizer(r_epoch, parameters):
216 | 		optimizers = []
217 | 		for param in parameters:
218 | 			optimizer = torch.optim.Adam(parameters[param], lr=learning_rate / (2**(r_epoch // DECAY)), \
219 | 					betas=(0.9, 0.999), eps=1e-08, weight_decay=0., amsgrad=True)
220 | 			optimizers.append({'mode' : param, 'opt': optimizer, 'name' : param})
221 | 		return optimizers	
222 | 
223 | 	parameters = get_parameters(nnet, model_name)
224 | 	optimizer = get_optimizer(resume_epoch, parameters)
225 | 	step_per_epoch = urmp_data.get_len() // TRAINING_BATCH_SIZE
226 | 
227 | 	pre_time = time.time()
228 | 	pre_time = compute_time(f'begin train...', pre_time)
229 | 	nnet.train()
230 | 	pre_time = compute_time(f'train done', pre_time)
231 | 	for i in range(resume_epoch, epoch):
232 | 		if i % DECAY == 0:
233 | 			pre_time = compute_time(f'begin update op...', pre_time)
234 | 			optimizer = get_optimizer(resume_epoch, parameters)
235 | 			print('learning rate', learning_rate / (2**(i // DECAY)))
236 | 
237 | 
238 | 		for i_batch, urmp_batch in enumerate(urmp_loader):
239 | 			urmp_batch = move_data2cuda(urmp_batch)
240 | 			for j in range(len(optimizer)):
241 | 				op = optimizer[j]['opt']
242 | 				name = optimizer[j]['name']
243 | 				op.zero_grad()
244 | 				loss, loss_text = train_step(nnet, urmp_batch, optimizer[j]['mode'])
245 | 				loss.backward()
246 | 				op.step()
247 | 				print(f"update {optimizer[j]['mode']} network epoch {i} loss: {i_batch}/{step_per_epoch}", loss_text)
248 | 				del loss
249 | 		torch.save(nnet.state_dict(), f"{model_folder}/params_epoch-{i}.pkl")
250 | if __name__ == "__main__":
251 | 	
252 | 	seed_torch(1234)
253 | 
254 | 	parser = argparse.ArgumentParser(description='')
255 | 	parser.add_argument('--model_name', type=str, required=True, help='Model name in [`AMT` for trainscription-only baseline, \
256 | 			`MSS` for separation-only baseline, \
257 | 			`MSS-AMT` for multi-task baseline, \
258 | 			`MSI` for the proposed multi-task score-informed model, \
259 | 			`MSI-DIS` for the proposed multi-task score-informed with further disentanglement model].')
260 | 	parser.add_argument('--resume_epoch', type=int, default=-1, help='Epoch to resume training.')
261 | 	parser.add_argument('--model_folder', type=str, required=True, help='Directory to store model weights.')
262 | 	parser.add_argument('--epoch', type=int, default=200, help='Number of total training epochs.')
263 | 
264 | 	args = parser.parse_args()
265 | 
266 | 	assert args.model_name in ["AMT", "MSS", "MSS-AMT", "MSI", "MSI-DIS"]
267 | 
268 | 	train(model_name=args.model_name, 
269 | 		load_epoch=args.resume_epoch,
270 | 		epoch=args.epoch,
271 | 		model_folder=args.model_folder)
272 | 


--------------------------------------------------------------------------------