├── README.md ├── attention_layer.py ├── config.py ├── data ├── test_adc.txt ├── test_melody.txt ├── test_mirex.txt ├── train_data.txt ├── train_data_extra.txt ├── train_data_small.txt └── train_data_small_extra.txt ├── data_generator.py ├── feature_extraction.py ├── ftanet.py ├── main.py ├── mcdnn.py ├── model_backup ├── 0_84.48420698924731_best.txt ├── 0_86.91616263440861_best.txt ├── 1_89.19454225352112_best.txt ├── 1_89.7593896713615_best.txt ├── 2_73.70216679030662_best.txt └── 2_74.52630440158259_best.txt ├── msnet.py ├── multi_dr.py ├── piano_net.py ├── tonet.py └── util.py /README.md: -------------------------------------------------------------------------------- 1 | ### KKNet 2 | 3 | An implementation of "[Towards Improving Harmonic Sensitivity and Prediction Stability for Singing Melody Extraction](https://arxiv.org/abs/2308.02723)", in ISMIR 2023 4 | 5 | Will update training/inference instructions soon. Basically ``python feature_extraction.py`` for caching CFP/z-CFP before training. Then ``python main.py train`` will call ``tonet.py`` and start the main training loop. ``tonet.py`` in turn calls the PianoNet model in ``piano_net.py``. 6 | 7 | Standalone testing can be done using ``python main.py test`` 8 | 9 | The data used for the experiments can be found here: https://drive.google.com/file/d/1QKX6rpuRxMPt54HOqNQztmLQqGlCALZ4/view?usp=sharing 10 | 11 | -------------------------------------------------------------------------------- /attention_layer.py: -------------------------------------------------------------------------------- 1 | # Attention layer 2 | # from https://github.com/jadore801120/attention-is-all-you-need-pytorch 3 | import numpy as np 4 | import torch 5 | import torch.nn as nn 6 | import torch.nn.functional as F 7 | 8 | class ScaledDotProductAttention(nn.Module): 9 | ''' Scaled Dot-Product Attention ''' 10 | 11 | def __init__(self, temperature, attn_dropout=0.1): 12 | super().__init__() 13 | self.temperature = temperature 14 | self.dropout = nn.Dropout(attn_dropout) 15 | 16 | def forward(self, q, k, v, mask=None): 17 | 18 | attn = torch.matmul(q / self.temperature, k.transpose(2, 3)) 19 | 20 | if mask is not None: 21 | attn = attn.masked_fill(mask == 0, -1e9) 22 | 23 | attn = self.dropout(F.softmax(attn, dim=-1)) 24 | output = torch.matmul(attn, v) 25 | 26 | return output, attn 27 | 28 | class MultiHeadAttention(nn.Module): 29 | ''' Multi-Head Attention module ''' 30 | 31 | def __init__(self, n_head, d_model, d_k, d_v, dropout=0.1): 32 | super().__init__() 33 | 34 | self.n_head = n_head 35 | self.d_k = d_k 36 | self.d_v = d_v 37 | 38 | self.w_qs = nn.Linear(d_model, n_head * d_k, bias=False) 39 | self.w_ks = nn.Linear(d_model, n_head * d_k, bias=False) 40 | self.w_vs = nn.Linear(d_model, n_head * d_v, bias=False) 41 | self.fc = nn.Linear(n_head * d_v, d_model, bias=False) 42 | 43 | self.attention = ScaledDotProductAttention(temperature=d_k ** 0.5) 44 | 45 | self.dropout = nn.Dropout(dropout) 46 | self.layer_norm = nn.LayerNorm(d_model, eps=1e-6) 47 | 48 | 49 | def forward(self, q, k, v, mask=None): 50 | 51 | d_k, d_v, n_head = self.d_k, self.d_v, self.n_head 52 | sz_b, len_q, len_k, len_v = q.size(0), q.size(1), k.size(1), v.size(1) 53 | 54 | residual = q 55 | q = self.layer_norm(q) 56 | 57 | # Pass through the pre-attention projection: b x lq x (n*dv) 58 | # Separate different heads: b x lq x n x dv 59 | q = self.w_qs(q).view(sz_b, len_q, n_head, d_k) 60 | k = self.w_ks(k).view(sz_b, len_k, n_head, d_k) 61 | v = self.w_vs(v).view(sz_b, len_v, n_head, d_v) 62 | 63 | # Transpose for attention dot product: b x n x lq x dv 64 | q, k, v = q.transpose(1, 2), k.transpose(1, 2), v.transpose(1, 2) 65 | 66 | if mask is not None: 67 | mask = mask.unsqueeze(1) # For head axis broadcasting. 68 | 69 | q, attn = self.attention(q, k, v, mask=mask) 70 | 71 | # Transpose to move the head dimension back: b x lq x n x dv 72 | # Combine the last two dimensions to concatenate all the heads together: b x lq x (n*dv) 73 | q = q.transpose(1, 2).contiguous().view(sz_b, len_q, -1) 74 | q = self.dropout(self.fc(q)) 75 | q += residual 76 | 77 | return q, attn 78 | 79 | 80 | class PositionwiseFeedForward(nn.Module): 81 | ''' A two-feed-forward-layer module ''' 82 | 83 | def __init__(self, d_in, d_hid, dropout=0.1): 84 | super().__init__() 85 | self.w_1 = nn.Linear(d_in, d_hid) # position-wise 86 | self.w_2 = nn.Linear(d_hid, d_in) # position-wise 87 | self.layer_norm = nn.LayerNorm(d_in, eps=1e-6) 88 | self.dropout = nn.Dropout(dropout) 89 | 90 | def forward(self, x): 91 | residual = x 92 | x = self.layer_norm(x) 93 | x = self.w_2(F.relu(self.w_1(x))) 94 | x = self.dropout(x) 95 | x += residual 96 | return x 97 | 98 | class PositionalEncoding(nn.Module): 99 | 100 | def __init__(self, d_hid, n_position=200): 101 | super(PositionalEncoding, self).__init__() 102 | 103 | # Not a parameter 104 | self.register_buffer('pos_table', self._get_sinusoid_encoding_table(n_position, d_hid)) 105 | 106 | def _get_sinusoid_encoding_table(self, n_position, d_hid): 107 | ''' Sinusoid position encoding table ''' 108 | # TODO: make it with torch instead of numpy 109 | 110 | def get_position_angle_vec(position): 111 | return [position / np.power(10000, 2 * (hid_j // 2) / d_hid) for hid_j in range(d_hid)] 112 | 113 | sinusoid_table = np.array([get_position_angle_vec(pos_i) for pos_i in range(n_position)]) 114 | sinusoid_table[:, 0::2] = np.sin(sinusoid_table[:, 0::2]) # dim 2i 115 | sinusoid_table[:, 1::2] = np.cos(sinusoid_table[:, 1::2]) # dim 2i+1 116 | 117 | return torch.FloatTensor(sinusoid_table).unsqueeze(0) 118 | 119 | def forward(self, x): 120 | return x + self.pos_table[:, :x.size(1)].clone().detach() 121 | 122 | class CombineLayer(nn.Module): 123 | ''' transformer encoder component ''' 124 | 125 | def __init__(self, d_model, d_inner, n_head, d_k, d_v, dropout=0.1): 126 | super(CombineLayer, self).__init__() 127 | self.slf_attn = MultiHeadAttention(n_head, d_model, d_k, d_v, dropout=dropout) 128 | self.pos_ffn = PositionwiseFeedForward(d_model, d_inner, dropout=dropout) 129 | 130 | def forward(self, enc_input, slf_attn_mask=None): 131 | enc_output, enc_slf_attn = self.slf_attn( 132 | enc_input, enc_input, enc_input, mask=slf_attn_mask) 133 | enc_output = self.pos_ffn(enc_output) 134 | return enc_output, enc_slf_attn 135 | 136 | -------------------------------------------------------------------------------- /config.py: -------------------------------------------------------------------------------- 1 | 2 | # exp_name = "dummy" 3 | # file path 4 | model_type = "dummy" # MCDNN, FTANet, MSNet, MLDRNet, dummy 5 | data_path = "data" 6 | train_file = "data/train_data.txt" 7 | test_file = [ 8 | "data/test_adc.txt", 9 | "data/test_mirex.txt", 10 | "data/test_melody.txt" 11 | ] 12 | 13 | save_path = "model_backup" 14 | resume_checkpoint = "model_backup/bestk_0.ckpt" 15 | # resume_checkpoint = "model_backup/TO-FTANet_mirex_best.ckpt" 16 | # "model_backup/TO-FTANet_adc_best.ckpt" # the model checkpoint 17 | 18 | # train config 19 | batch_size = 10 20 | lr = 1e-4 21 | epochs = 1000 22 | n_workers = 4 23 | save_period = 1 24 | tone_class = 12 # 60 25 | octave_class = 8 # 6 26 | random_seed = 19961206 27 | max_epoch = 500 28 | freq_bin = 360 29 | 30 | ablation_mode = "single" # single, tcfp, spl, spat, all, a parameter inherited from TONet's code, and remain single for our simplified model 31 | 32 | include_model_tweak = False # small tweak on vocal detection bin, unsure of its effectiveness 33 | include_loss_component = False # loss component for prediction stability 34 | include_adjusted_exp = False # z-transform 35 | apply_median_filter = True # median filter baseline 36 | 37 | startfreq = 32 38 | stopfreq = 2050 39 | cfp_dir = "cfp_360_new" 40 | 41 | # feature config 42 | fs = 44100.0 43 | hop = 441.0 44 | octave_res = 60 45 | seg_dur = 1.28 # sec 46 | seg_frame = int(seg_dur * fs // hop) 47 | shift_dur = 1.28 # sec 48 | shift_frame = int(shift_dur * fs // hop) 49 | 50 | network_time_shrink_size = 8 51 | -------------------------------------------------------------------------------- /data/test_adc.txt: -------------------------------------------------------------------------------- 1 | daisy1.npy 2 | daisy2.npy 3 | daisy3.npy 4 | daisy4.npy 5 | opera_fem2.npy 6 | opera_fem4.npy 7 | opera_male3.npy 8 | opera_male5.npy 9 | pop1.npy 10 | pop2.npy 11 | pop3.npy 12 | pop4.npy -------------------------------------------------------------------------------- /data/test_melody.txt: -------------------------------------------------------------------------------- 1 | AClassicEducation_NightOwl_MIX.npy 2 | Auctioneer_OurFutureFaces_MIX.npy 3 | CelestialShore_DieForUs_MIX.npy 4 | Creepoid_OldTree_MIX.npy 5 | Debussy_LenfantProdigue_MIX.npy 6 | MatthewEntwistle_DontYouEver_MIX.npy 7 | MatthewEntwistle_Lontano_MIX.npy 8 | Mozart_BesterJungling_MIX.npy 9 | MusicDelta_Gospel_MIX.npy 10 | PortStWillow_StayEven_MIX.npy 11 | Schubert_Erstarrung_MIX.npy 12 | StrandOfOaks_Spacestation_MIX.npy -------------------------------------------------------------------------------- /data/test_mirex.txt: -------------------------------------------------------------------------------- 1 | train01.npy 2 | train02.npy 3 | train03.npy 4 | train04.npy 5 | train05.npy 6 | train06.npy 7 | train07.npy 8 | train08.npy 9 | train09.npy -------------------------------------------------------------------------------- /data/train_data.txt: -------------------------------------------------------------------------------- 1 | tammy_1_07.npy 2 | ariel_3_03.npy 3 | heycat_2_07.npy 4 | amy_5_06.npy 5 | geniusturtle_4_09.npy 6 | abjones_4_04.npy 7 | Ani_1_06.npy 8 | bug_2_07.npy 9 | leon_3_12.npy 10 | leon_3_06.npy 11 | bobon_3_03.npy 12 | jmzen_4_06.npy 13 | davidson_4_05.npy 14 | fdps_1_12.npy 15 | khair_1_07.npy 16 | fdps_1_06.npy 17 | leon_1_03.npy 18 | bobon_1_06.npy 19 | davidson_1_09.npy 20 | yifen_5_05.npy 21 | yifen_5_11.npy 22 | khair_3_02.npy 23 | fdps_3_03.npy 24 | ariel_1_06.npy 25 | titon_4_03.npy 26 | amy_7_03.npy 27 | amy_9_06.npy 28 | geniusturtle_1_05.npy 29 | Ani_3_03.npy 30 | amy_15_11.npy 31 | amy_15_05.npy 32 | annar_2_02.npy 33 | stool_2_01.npy 34 | stool_5_08.npy 35 | annar_2_03.npy 36 | amy_15_04.npy 37 | Ani_3_02.npy 38 | Kenshin_5_01.npy 39 | geniusturtle_1_04.npy 40 | amy_9_07.npy 41 | titon_4_02.npy 42 | amy_7_02.npy 43 | geniusturtle_8_08.npy 44 | ariel_1_07.npy 45 | yifen_5_10.npy 46 | fdps_3_02.npy 47 | khair_3_03.npy 48 | davidson_1_08.npy 49 | yifen_5_04.npy 50 | bobon_1_07.npy 51 | leon_1_02.npy 52 | fdps_1_07.npy 53 | khair_1_06.npy 54 | fdps_1_13.npy 55 | davidson_4_04.npy 56 | bobon_3_02.npy 57 | jmzen_4_07.npy 58 | leon_3_07.npy 59 | leon_3_13.npy 60 | bug_2_06.npy 61 | amy_10_08.npy 62 | Ani_1_07.npy 63 | geniusturtle_4_08.npy 64 | abjones_4_05.npy 65 | geniusturtle_3_01.npy 66 | amy_5_07.npy 67 | ariel_3_02.npy 68 | tammy_1_06.npy 69 | heycat_2_06.npy 70 | amy_5_05.npy 71 | heycat_2_04.npy 72 | tammy_1_04.npy 73 | geniusturtle_3_03.npy 74 | abjones_4_07.npy 75 | Ani_1_05.npy 76 | bug_2_04.npy 77 | leon_3_05.npy 78 | leon_3_11.npy 79 | bobon_4_09.npy 80 | jmzen_4_05.npy 81 | khair_1_04.npy 82 | fdps_1_05.npy 83 | davidson_4_06.npy 84 | fdps_1_11.npy 85 | leon_6_09.npy 86 | jmzen_1_09.npy 87 | bobon_1_05.npy 88 | khair_3_01.npy 89 | khair_4_08.npy 90 | yifen_5_06.npy 91 | ariel_1_05.npy 92 | geniusturtle_1_06.npy 93 | amy_9_05.npy 94 | amy_15_06.npy 95 | amy_15_12.npy 96 | Ani_4_09.npy 97 | Kenshin_5_03.npy 98 | annar_5_08.npy 99 | annar_2_01.npy 100 | stool_2_02.npy 101 | annar_5_09.npy 102 | stool_2_03.npy 103 | Ani_4_08.npy 104 | amy_15_13.npy 105 | Ani_3_01.npy 106 | Kenshin_5_02.npy 107 | amy_15_07.npy 108 | amy_9_04.npy 109 | geniusturtle_1_07.npy 110 | ariel_1_04.npy 111 | amy_7_01.npy 112 | titon_3_08.npy 113 | titon_4_01.npy 114 | yifen_5_07.npy 115 | fdps_3_01.npy 116 | bobon_1_04.npy 117 | jmzen_1_08.npy 118 | bobon_1_10.npy 119 | leon_1_01.npy 120 | leon_6_08.npy 121 | fdps_1_10.npy 122 | davidson_4_07.npy 123 | fdps_1_04.npy 124 | khair_1_05.npy 125 | jmzen_4_04.npy 126 | bobon_4_08.npy 127 | bobon_3_01.npy 128 | jmzen_4_10.npy 129 | leon_3_10.npy 130 | leon_3_04.npy 131 | bug_2_05.npy 132 | Ani_1_04.npy 133 | abjones_4_06.npy 134 | geniusturtle_3_02.npy 135 | ariel_4_08.npy 136 | heycat_2_05.npy 137 | ariel_3_01.npy 138 | tammy_1_05.npy 139 | amy_5_04.npy 140 | geniusturtle_3_06.npy 141 | abjones_4_02.npy 142 | ariel_3_05.npy 143 | tammy_1_01.npy 144 | heycat_5_08.npy 145 | heycat_2_01.npy 146 | bug_5_08.npy 147 | bug_2_01.npy 148 | jmzen_3_09.npy 149 | bobon_3_05.npy 150 | khair_1_01.npy 151 | fdps_1_14.npy 152 | davidson_4_03.npy 153 | leon_8_09.npy 154 | leon_1_05.npy 155 | leon_1_11.npy 156 | khair_3_04.npy 157 | fdps_3_05.npy 158 | yifen_5_03.npy 159 | geniusturtle_1_03.npy 160 | titon_4_05.npy 161 | amy_7_05.npy 162 | titon_4_11.npy 163 | stool_2_07.npy 164 | annar_2_04.npy 165 | amy_15_03.npy 166 | Kenshin_5_12.npy 167 | Kenshin_5_06.npy 168 | Ani_3_05.npy 169 | Ani_3_04.npy 170 | Kenshin_5_07.npy 171 | amy_15_02.npy 172 | Kenshin_5_13.npy 173 | annar_2_05.npy 174 | stool_2_06.npy 175 | titon_4_10.npy 176 | ariel_1_01.npy 177 | titon_4_04.npy 178 | amy_7_04.npy 179 | amy_9_01.npy 180 | geniusturtle_1_02.npy 181 | yifen_5_02.npy 182 | fdps_3_04.npy 183 | khair_3_05.npy 184 | leon_1_10.npy 185 | leon_1_04.npy 186 | bobon_1_01.npy 187 | leon_8_08.npy 188 | davidson_4_02.npy 189 | fdps_1_01.npy 190 | leon_3_01.npy 191 | leon_4_08.npy 192 | bobon_3_04.npy 193 | jmzen_3_08.npy 194 | jmzen_4_01.npy 195 | bobon_3_10.npy 196 | Ani_1_01.npy 197 | bug_5_09.npy 198 | ariel_3_04.npy 199 | titon_1_08.npy 200 | amy_5_01.npy 201 | abjones_4_03.npy 202 | geniusturtle_3_07.npy 203 | geniusturtle_3_05.npy 204 | abjones_4_01.npy 205 | heycat_2_02.npy 206 | ariel_3_06.npy 207 | tammy_1_02.npy 208 | amy_5_03.npy 209 | bug_2_02.npy 210 | Ani_1_03.npy 211 | jmzen_4_03.npy 212 | bobon_3_06.npy 213 | leon_3_03.npy 214 | davidson_3_09.npy 215 | khair_1_02.npy 216 | fdps_1_03.npy 217 | bobon_1_03.npy 218 | leon_1_12.npy 219 | leon_1_06.npy 220 | yifen_2_09.npy 221 | khair_3_07.npy 222 | fdps_3_06.npy 223 | amy_9_03.npy 224 | ariel_1_03.npy 225 | amy_7_06.npy 226 | titon_4_06.npy 227 | stool_2_04.npy 228 | annar_2_07.npy 229 | Kenshin_5_05.npy 230 | Ani_3_06.npy 231 | Kenshin_5_11.npy 232 | Kenshin_5_10.npy 233 | amy_15_01.npy 234 | Kenshin_5_04.npy 235 | annar_2_06.npy 236 | stool_2_05.npy 237 | amy_7_07.npy 238 | titon_4_07.npy 239 | ariel_1_02.npy 240 | geniusturtle_1_01.npy 241 | amy_9_02.npy 242 | fdps_3_07.npy 243 | khair_3_06.npy 244 | yifen_5_01.npy 245 | yifen_2_08.npy 246 | leon_1_07.npy 247 | bobon_1_02.npy 248 | fdps_1_02.npy 249 | khair_1_03.npy 250 | davidson_4_01.npy 251 | davidson_3_08.npy 252 | leon_3_02.npy 253 | jmzen_4_02.npy 254 | bobon_3_07.npy 255 | Ani_1_02.npy 256 | bug_2_03.npy 257 | amy_5_02.npy 258 | heycat_2_03.npy 259 | tammy_1_03.npy 260 | ariel_3_07.npy 261 | geniusturtle_3_04.npy 262 | abjones_3_09.npy 263 | leon_7_01.npy 264 | leon_9_04.npy 265 | yifen_3_07.npy 266 | fdps_5_01.npy 267 | fdps_2_08.npy 268 | amy_1_01.npy 269 | titon_5_08.npy 270 | amy_6_08.npy 271 | titon_2_01.npy 272 | geniusturtle_7_13.npy 273 | geniusturtle_7_07.npy 274 | Kenshin_3_02.npy 275 | Ani_5_01.npy 276 | amy_13_07.npy 277 | bug_1_09.npy 278 | stool_4_03.npy 279 | ariel_2_08.npy 280 | heycat_4_05.npy 281 | ariel_5_01.npy 282 | amy_3_04.npy 283 | abjones_2_12.npy 284 | geniusturtle_5_02.npy 285 | abjones_2_06.npy 286 | Kenshin_1_07.npy 287 | amy_11_02.npy 288 | leon_5_10.npy 289 | leon_5_04.npy 290 | bobon_2_08.npy 291 | jmzen_2_04.npy 292 | bobon_5_01.npy 293 | jmzen_2_10.npy 294 | davidson_2_07.npy 295 | yifen_1_02.npy 296 | yifen_1_16.npy 297 | davidson_2_06.npy 298 | yifen_1_03.npy 299 | jmzen_2_11.npy 300 | leon_5_05.npy 301 | leon_5_11.npy 302 | amy_11_03.npy 303 | Kenshin_1_06.npy 304 | abjones_2_07.npy 305 | geniusturtle_5_03.npy 306 | amy_3_05.npy 307 | heycat_4_04.npy 308 | ariel_2_09.npy 309 | bug_1_08.npy 310 | annar_3_08.npy 311 | annar_4_01.npy 312 | stool_4_02.npy 313 | amy_13_06.npy 314 | Kenshin_3_03.npy 315 | geniusturtle_7_06.npy 316 | geniusturtle_7_12.npy 317 | titon_5_09.npy 318 | amy_6_09.npy 319 | heycat_1_08.npy 320 | khair_5_01.npy 321 | yifen_3_12.npy 322 | fdps_2_09.npy 323 | yifen_3_06.npy 324 | leon_9_05.npy 325 | leon_7_02.npy 326 | yifen_3_10.npy 327 | fdps_5_02.npy 328 | khair_5_03.npy 329 | yifen_3_04.npy 330 | titon_2_02.npy 331 | amy_1_02.npy 332 | geniusturtle_7_04.npy 333 | geniusturtle_7_10.npy 334 | amy_13_04.npy 335 | Kenshin_3_01.npy 336 | Ani_5_02.npy 337 | Kenshin_4_08.npy 338 | annar_4_03.npy 339 | stool_3_09.npy 340 | amy_3_07.npy 341 | ariel_5_02.npy 342 | heycat_4_06.npy 343 | geniusturtle_2_08.npy 344 | geniusturtle_5_01.npy 345 | abjones_2_05.npy 346 | abjones_2_11.npy 347 | Kenshin_1_10.npy 348 | amy_16_08.npy 349 | amy_11_01.npy 350 | Kenshin_1_04.npy 351 | leon_5_07.npy 352 | bobon_5_02.npy 353 | jmzen_2_07.npy 354 | davidson_2_10.npy 355 | yifen_1_15.npy 356 | yifen_1_01.npy 357 | davidson_2_04.npy 358 | davidson_2_05.npy 359 | yifen_1_14.npy 360 | bobon_5_03.npy 361 | jmzen_2_06.npy 362 | jmzen_2_12.npy 363 | leon_5_12.npy 364 | leon_5_06.npy 365 | Kenshin_1_05.npy 366 | Kenshin_1_11.npy 367 | abjones_2_10.npy 368 | abjones_2_04.npy 369 | ariel_5_03.npy 370 | heycat_4_07.npy 371 | amy_3_06.npy 372 | annar_4_02.npy 373 | stool_4_01.npy 374 | stool_3_08.npy 375 | Ani_5_03.npy 376 | Kenshin_4_09.npy 377 | amy_13_05.npy 378 | geniusturtle_7_11.npy 379 | geniusturtle_7_05.npy 380 | titon_2_03.npy 381 | amy_1_03.npy 382 | yifen_3_05.npy 383 | yifen_3_11.npy 384 | khair_5_02.npy 385 | fdps_5_03.npy 386 | leon_9_06.npy 387 | leon_7_03.npy 388 | leon_9_02.npy 389 | leon_7_07.npy 390 | leon_7_13.npy 391 | fdps_5_07.npy 392 | khair_5_06.npy 393 | yifen_3_01.npy 394 | yifen_4_08.npy 395 | geniusturtle_7_01.npy 396 | geniusturtle_7_15.npy 397 | amy_1_07.npy 398 | titon_2_07.npy 399 | annar_4_06.npy 400 | stool_4_05.npy 401 | amy_13_01.npy 402 | Kenshin_3_04.npy 403 | Ani_5_07.npy 404 | geniusturtle_5_04.npy 405 | abjones_5_09.npy 406 | amy_3_02.npy 407 | heycat_4_03.npy 408 | ariel_5_07.npy 409 | bug_4_03.npy 410 | stool_1_09.npy 411 | amy_11_04.npy 412 | Kenshin_1_01.npy 413 | jmzen_2_02.npy 414 | bobon_5_07.npy 415 | leon_5_02.npy 416 | yifen_1_10.npy 417 | davidson_2_01.npy 418 | davidson_5_08.npy 419 | yifen_1_04.npy 420 | davidson_5_09.npy 421 | yifen_1_05.npy 422 | yifen_1_11.npy 423 | leon_5_03.npy 424 | jmzen_2_03.npy 425 | bobon_5_06.npy 426 | bobon_5_12.npy 427 | amy_11_05.npy 428 | bug_4_02.npy 429 | stool_1_08.npy 430 | heycat_4_02.npy 431 | ariel_5_06.npy 432 | amy_3_03.npy 433 | abjones_2_01.npy 434 | abjones_5_08.npy 435 | Ani_5_06.npy 436 | Kenshin_3_05.npy 437 | stool_4_10.npy 438 | stool_4_04.npy 439 | annar_4_07.npy 440 | amy_1_06.npy 441 | titon_2_06.npy 442 | geniusturtle_7_14.npy 443 | yifen_4_09.npy 444 | khair_5_07.npy 445 | fdps_5_06.npy 446 | leon_7_12.npy 447 | leon_7_06.npy 448 | leon_9_03.npy 449 | leon_9_01.npy 450 | leon_7_10.npy 451 | leon_7_04.npy 452 | yifen_3_02.npy 453 | fdps_5_04.npy 454 | khair_5_05.npy 455 | amy_8_08.npy 456 | geniusturtle_7_02.npy 457 | titon_2_04.npy 458 | amy_1_04.npy 459 | annar_4_05.npy 460 | stool_4_06.npy 461 | Kenshin_3_07.npy 462 | Ani_5_04.npy 463 | amy_13_02.npy 464 | abjones_2_03.npy 465 | ariel_5_04.npy 466 | amy_3_01.npy 467 | amy_4_08.npy 468 | bug_3_09.npy 469 | Kenshin_1_02.npy 470 | amy_11_07.npy 471 | jmzen_5_08.npy 472 | bobon_5_04.npy 473 | jmzen_2_01.npy 474 | bobon_5_10.npy 475 | leon_5_01.npy 476 | leon_2_08.npy 477 | yifen_1_07.npy 478 | davidson_2_02.npy 479 | yifen_1_13.npy 480 | yifen_1_12.npy 481 | yifen_1_06.npy 482 | davidson_2_03.npy 483 | leon_2_09.npy 484 | bobon_5_11.npy 485 | bobon_5_05.npy 486 | jmzen_5_09.npy 487 | amy_11_06.npy 488 | Kenshin_1_03.npy 489 | bug_3_08.npy 490 | annar_1_08.npy 491 | bug_4_01.npy 492 | amy_4_09.npy 493 | ariel_5_05.npy 494 | heycat_3_08.npy 495 | heycat_4_01.npy 496 | abjones_2_02.npy 497 | amy_13_03.npy 498 | Ani_5_05.npy 499 | Kenshin_3_06.npy 500 | stool_4_07.npy 501 | annar_4_04.npy 502 | titon_2_05.npy 503 | amy_1_05.npy 504 | geniusturtle_7_03.npy 505 | khair_5_04.npy 506 | fdps_5_05.npy 507 | yifen_3_03.npy 508 | leon_7_05.npy 509 | leon_7_11.npy 510 | leon_7_08.npy 511 | fdps_2_01.npy 512 | fdps_5_08.npy 513 | yifen_4_07.npy 514 | amy_8_04.npy 515 | amy_6_01.npy 516 | titon_2_08.npy 517 | titon_5_01.npy 518 | annar_4_09.npy 519 | stool_3_03.npy 520 | Ani_5_08.npy 521 | Ani_2_01.npy 522 | Kenshin_4_02.npy 523 | abjones_5_06.npy 524 | geniusturtle_2_02.npy 525 | amy_4_04.npy 526 | ariel_5_08.npy 527 | amy_4_10.npy 528 | heycat_3_05.npy 529 | ariel_2_01.npy 530 | bug_3_05.npy 531 | stool_1_06.npy 532 | annar_1_05.npy 533 | amy_16_02.npy 534 | jmzen_5_04.npy 535 | bobon_5_08.npy 536 | bobon_2_01.npy 537 | leon_2_04.npy 538 | leon_2_10.npy 539 | davidson_5_07.npy 540 | davidson_5_06.npy 541 | leon_2_11.npy 542 | leon_2_05.npy 543 | bobon_5_09.npy 544 | jmzen_5_05.npy 545 | amy_16_03.npy 546 | annar_1_04.npy 547 | stool_1_07.npy 548 | bug_3_04.npy 549 | heycat_3_04.npy 550 | amy_4_11.npy 551 | amy_4_05.npy 552 | abjones_5_07.npy 553 | Kenshin_4_03.npy 554 | amy_14_06.npy 555 | annar_4_08.npy 556 | annar_3_01.npy 557 | stool_3_02.npy 558 | heycat_1_01.npy 559 | titon_2_09.npy 560 | amy_8_05.npy 561 | yifen_4_06.npy 562 | khair_2_01.npy 563 | fdps_5_09.npy 564 | leon_7_09.npy 565 | yifen_4_04.npy 566 | yifen_4_10.npy 567 | fdps_2_02.npy 568 | khair_2_03.npy 569 | amy_8_07.npy 570 | heycat_1_03.npy 571 | titon_5_02.npy 572 | amy_6_02.npy 573 | annar_3_03.npy 574 | stool_4_09.npy 575 | Ani_2_02.npy 576 | Kenshin_4_01.npy 577 | Kenshin_3_08.npy 578 | amy_14_04.npy 579 | abjones_5_05.npy 580 | geniusturtle_2_01.npy 581 | ariel_2_02.npy 582 | heycat_3_06.npy 583 | amy_4_07.npy 584 | bug_3_06.npy 585 | stool_1_05.npy 586 | annar_1_06.npy 587 | amy_16_01.npy 588 | bobon_2_02.npy 589 | jmzen_5_07.npy 590 | leon_2_07.npy 591 | yifen_1_08.npy 592 | davidson_5_04.npy 593 | davidson_5_10.npy 594 | davidson_5_11.npy 595 | yifen_1_09.npy 596 | davidson_5_05.npy 597 | leon_2_06.npy 598 | bobon_2_03.npy 599 | jmzen_5_06.npy 600 | annar_1_07.npy 601 | stool_1_04.npy 602 | bug_3_07.npy 603 | amy_4_06.npy 604 | ariel_2_03.npy 605 | heycat_3_07.npy 606 | abjones_5_04.npy 607 | amy_14_05.npy 608 | Ani_2_03.npy 609 | annar_3_02.npy 610 | stool_3_01.npy 611 | stool_4_08.npy 612 | titon_5_03.npy 613 | amy_6_03.npy 614 | heycat_1_02.npy 615 | amy_8_06.npy 616 | yifen_4_11.npy 617 | khair_2_02.npy 618 | fdps_2_03.npy 619 | yifen_4_05.npy 620 | yifen_4_01.npy 621 | yifen_3_08.npy 622 | fdps_2_07.npy 623 | khair_2_06.npy 624 | heycat_1_06.npy 625 | amy_6_07.npy 626 | titon_5_07.npy 627 | amy_8_02.npy 628 | geniusturtle_7_08.npy 629 | Ani_2_07.npy 630 | Kenshin_4_04.npy 631 | Kenshin_4_10.npy 632 | amy_14_01.npy 633 | annar_3_06.npy 634 | stool_3_05.npy 635 | heycat_3_03.npy 636 | ariel_2_07.npy 637 | amy_4_02.npy 638 | abjones_2_09.npy 639 | Kenshin_1_08.npy 640 | amy_16_04.npy 641 | bug_3_03.npy 642 | annar_1_03.npy 643 | leon_2_02.npy 644 | jmzen_5_02.npy 645 | bobon_2_07.npy 646 | davidson_5_01.npy 647 | davidson_2_08.npy 648 | davidson_2_09.npy 649 | jmzen_5_03.npy 650 | bobon_2_06.npy 651 | leon_2_03.npy 652 | stool_1_01.npy 653 | annar_1_02.npy 654 | bug_3_02.npy 655 | amy_16_05.npy 656 | Kenshin_1_09.npy 657 | geniusturtle_2_05.npy 658 | abjones_5_01.npy 659 | abjones_2_08.npy 660 | amy_4_03.npy 661 | heycat_3_02.npy 662 | ariel_2_06.npy 663 | stool_3_04.npy 664 | annar_3_07.npy 665 | bug_1_07.npy 666 | stool_3_10.npy 667 | Kenshin_4_11.npy 668 | Kenshin_4_05.npy 669 | Ani_2_06.npy 670 | geniusturtle_7_09.npy 671 | amy_8_03.npy 672 | amy_6_06.npy 673 | titon_5_06.npy 674 | heycat_1_07.npy 675 | khair_2_07.npy 676 | fdps_2_06.npy 677 | fdps_2_12.npy 678 | yifen_3_09.npy 679 | fdps_2_04.npy 680 | khair_2_05.npy 681 | fdps_2_10.npy 682 | yifen_4_02.npy 683 | titon_5_04.npy 684 | amy_6_04.npy 685 | heycat_1_05.npy 686 | amy_6_10.npy 687 | amy_8_01.npy 688 | amy_14_02.npy 689 | Ani_2_04.npy 690 | Kenshin_4_07.npy 691 | annar_3_05.npy 692 | stool_3_06.npy 693 | amy_4_01.npy 694 | amy_3_08.npy 695 | heycat_4_09.npy 696 | ariel_2_04.npy 697 | abjones_5_03.npy 698 | geniusturtle_2_07.npy 699 | amy_16_07.npy 700 | stool_1_03.npy 701 | leon_2_01.npy 702 | leon_5_08.npy 703 | bobon_2_04.npy 704 | jmzen_2_08.npy 705 | jmzen_5_01.npy 706 | davidson_5_02.npy 707 | davidson_5_03.npy 708 | jmzen_2_09.npy 709 | bobon_2_05.npy 710 | leon_5_09.npy 711 | stool_1_02.npy 712 | annar_1_01.npy 713 | bug_3_01.npy 714 | amy_16_06.npy 715 | geniusturtle_2_06.npy 716 | abjones_5_02.npy 717 | ariel_2_05.npy 718 | heycat_4_08.npy 719 | heycat_3_01.npy 720 | bug_1_10.npy 721 | stool_3_07.npy 722 | annar_3_04.npy 723 | Kenshin_4_06.npy 724 | Ani_2_05.npy 725 | amy_14_03.npy 726 | amy_6_11.npy 727 | heycat_1_04.npy 728 | titon_5_05.npy 729 | amy_6_05.npy 730 | fdps_2_11.npy 731 | yifen_4_03.npy 732 | khair_2_04.npy 733 | fdps_2_05.npy 734 | abjones_3_04.npy 735 | abjones_3_10.npy 736 | amy_2_06.npy 737 | titon_1_06.npy 738 | ariel_4_03.npy 739 | heycat_5_07.npy 740 | bug_5_07.npy 741 | bug_5_13.npy 742 | bobon_4_03.npy 743 | jmzen_3_06.npy 744 | leon_4_06.npy 745 | khair_6_07.npy 746 | davidson_3_11.npy 747 | davidson_3_05.npy 748 | leon_8_12.npy 749 | jmzen_1_03.npy 750 | leon_8_06.npy 751 | leon_6_03.npy 752 | yifen_2_11.npy 753 | khair_4_02.npy 754 | fdps_4_03.npy 755 | yifen_2_05.npy 756 | abjones_1_01.npy 757 | geniusturtle_6_05.npy 758 | titon_3_03.npy 759 | annar_5_02.npy 760 | stool_5_01.npy 761 | stool_2_08.npy 762 | amy_12_05.npy 763 | Ani_4_03.npy 764 | Kenshin_5_09.npy 765 | Kenshin_2_01.npy 766 | Ani_4_02.npy 767 | Kenshin_5_08.npy 768 | amy_12_04.npy 769 | annar_5_03.npy 770 | geniusturtle_8_01.npy 771 | titon_3_02.npy 772 | geniusturtle_6_04.npy 773 | davidson_1_01.npy 774 | yifen_2_04.npy 775 | yifen_2_10.npy 776 | fdps_4_02.npy 777 | khair_4_03.npy 778 | leon_6_02.npy 779 | leon_8_07.npy 780 | jmzen_1_02.npy 781 | leon_8_13.npy 782 | davidson_3_04.npy 783 | davidson_3_10.npy 784 | khair_6_06.npy 785 | leon_4_07.npy 786 | bobon_4_02.npy 787 | jmzen_3_07.npy 788 | amy_10_01.npy 789 | bug_5_12.npy 790 | bug_5_06.npy 791 | ariel_4_02.npy 792 | heycat_5_06.npy 793 | amy_2_07.npy 794 | titon_1_07.npy 795 | abjones_3_11.npy 796 | geniusturtle_3_08.npy 797 | geniusturtle_4_01.npy 798 | geniusturtle_4_03.npy 799 | heycat_5_04.npy 800 | titon_1_05.npy 801 | amy_2_05.npy 802 | bug_5_10.npy 803 | bug_5_04.npy 804 | amy_10_03.npy 805 | jmzen_3_05.npy 806 | bobon_3_09.npy 807 | jmzen_3_11.npy 808 | leon_4_05.npy 809 | davidson_3_06.npy 810 | khair_6_04.npy 811 | davidson_3_12.npy 812 | leon_8_05.npy 813 | leon_8_11.npy 814 | leon_1_09.npy 815 | yifen_2_06.npy 816 | davidson_1_03.npy 817 | khair_4_01.npy 818 | yifen_2_12.npy 819 | abjones_1_02.npy 820 | geniusturtle_6_06.npy 821 | geniusturtle_8_03.npy 822 | titon_4_09.npy 823 | annar_2_08.npy 824 | annar_5_01.npy 825 | stool_5_02.npy 826 | Kenshin_2_03.npy 827 | amy_12_06.npy 828 | amy_12_07.npy 829 | Kenshin_2_02.npy 830 | Ani_4_01.npy 831 | stool_5_03.npy 832 | titon_4_08.npy 833 | amy_7_08.npy 834 | titon_3_01.npy 835 | geniusturtle_8_02.npy 836 | geniusturtle_6_07.npy 837 | abjones_1_03.npy 838 | fdps_4_01.npy 839 | yifen_2_13.npy 840 | yifen_2_07.npy 841 | davidson_1_02.npy 842 | leon_6_01.npy 843 | leon_1_08.npy 844 | leon_8_10.npy 845 | leon_8_04.npy 846 | jmzen_1_01.npy 847 | davidson_3_13.npy 848 | khair_6_05.npy 849 | davidson_3_07.npy 850 | leon_4_04.npy 851 | jmzen_3_10.npy 852 | bobon_3_08.npy 853 | jmzen_3_04.npy 854 | bobon_4_01.npy 855 | amy_10_02.npy 856 | bug_5_05.npy 857 | bug_5_11.npy 858 | titon_1_04.npy 859 | amy_2_04.npy 860 | ariel_3_08.npy 861 | heycat_5_05.npy 862 | ariel_4_01.npy 863 | geniusturtle_4_02.npy 864 | abjones_3_06.npy 865 | abjones_3_12.npy 866 | ariel_4_05.npy 867 | heycat_5_01.npy 868 | tammy_1_08.npy 869 | geniusturtle_4_12.npy 870 | abjones_3_02.npy 871 | geniusturtle_4_06.npy 872 | amy_10_06.npy 873 | bug_2_08.npy 874 | bug_5_01.npy 875 | leon_3_09.npy 876 | bobon_4_05.npy 877 | jmzen_4_09.npy 878 | davidson_3_03.npy 879 | khair_6_01.npy 880 | fdps_1_09.npy 881 | khair_1_08.npy 882 | leon_6_05.npy 883 | jmzen_1_05.npy 884 | bobon_1_09.npy 885 | jmzen_1_11.npy 886 | davidson_1_06.npy 887 | yifen_2_03.npy 888 | khair_4_04.npy 889 | fdps_4_05.npy 890 | geniusturtle_8_06.npy 891 | titon_3_05.npy 892 | amy_9_09.npy 893 | geniusturtle_6_03.npy 894 | Ani_4_05.npy 895 | Kenshin_2_06.npy 896 | amy_12_03.npy 897 | stool_5_07.npy 898 | annar_5_04.npy 899 | annar_5_05.npy 900 | stool_5_06.npy 901 | amy_12_02.npy 902 | Ani_4_10.npy 903 | Kenshin_2_07.npy 904 | Ani_4_04.npy 905 | geniusturtle_6_02.npy 906 | amy_9_08.npy 907 | titon_3_04.npy 908 | geniusturtle_8_07.npy 909 | fdps_4_04.npy 910 | khair_4_05.npy 911 | davidson_1_07.npy 912 | yifen_2_02.npy 913 | jmzen_1_10.npy 914 | bobon_1_08.npy 915 | leon_8_01.npy 916 | jmzen_1_04.npy 917 | leon_6_04.npy 918 | fdps_1_08.npy 919 | davidson_3_02.npy 920 | bobon_4_10.npy 921 | jmzen_4_08.npy 922 | bobon_4_04.npy 923 | jmzen_3_01.npy 924 | leon_4_01.npy 925 | leon_3_08.npy 926 | bug_5_14.npy 927 | amy_10_07.npy 928 | geniusturtle_4_07.npy 929 | abjones_3_03.npy 930 | amy_2_01.npy 931 | titon_1_01.npy 932 | amy_5_08.npy 933 | ariel_4_04.npy 934 | titon_1_03.npy 935 | amy_2_03.npy 936 | heycat_5_02.npy 937 | ariel_4_06.npy 938 | abjones_3_01.npy 939 | geniusturtle_4_05.npy 940 | abjones_4_08.npy 941 | geniusturtle_4_11.npy 942 | amy_10_05.npy 943 | bug_5_02.npy 944 | leon_4_03.npy 945 | jmzen_3_03.npy 946 | bobon_4_06.npy 947 | khair_6_02.npy 948 | davidson_3_14.npy 949 | leon_6_06.npy 950 | jmzen_1_12.npy 951 | leon_8_03.npy 952 | khair_4_07.npy 953 | fdps_4_06.npy 954 | yifen_2_14.npy 955 | yifen_5_09.npy 956 | davidson_1_05.npy 957 | titon_3_06.npy 958 | geniusturtle_8_05.npy 959 | geniusturtle_1_09.npy 960 | abjones_1_04.npy 961 | amy_15_09.npy 962 | Kenshin_2_11.npy 963 | Ani_4_06.npy 964 | Kenshin_2_05.npy 965 | stool_5_04.npy 966 | annar_5_07.npy 967 | annar_5_06.npy 968 | stool_5_05.npy 969 | Kenshin_2_04.npy 970 | Ani_4_07.npy 971 | Kenshin_2_10.npy 972 | amy_15_08.npy 973 | amy_12_01.npy 974 | geniusturtle_1_08.npy 975 | geniusturtle_6_01.npy 976 | geniusturtle_8_04.npy 977 | titon_3_07.npy 978 | yifen_2_01.npy 979 | yifen_5_08.npy 980 | davidson_1_04.npy 981 | davidson_1_10.npy 982 | khair_4_06.npy 983 | yifen_2_15.npy 984 | leon_8_02.npy 985 | jmzen_1_07.npy 986 | leon_6_07.npy 987 | davidson_3_01.npy 988 | khair_6_03.npy 989 | jmzen_3_02.npy 990 | bobon_4_07.npy 991 | leon_4_02.npy 992 | bug_5_03.npy 993 | amy_10_04.npy 994 | geniusturtle_4_10.npy 995 | abjones_3_14.npy 996 | geniusturtle_4_04.npy 997 | heycat_5_03.npy 998 | ariel_4_07.npy 999 | titon_1_02.npy 1000 | amy_2_02.npy 1001 | AimeeNorwich_Child_MIX.npy 1002 | AlexanderRoss_GoodbyeBolero_MIX.npy 1003 | AlexanderRoss_VelvetCurtain_MIX.npy 1004 | AvaLuna_Waterduct_MIX.npy 1005 | BigTroubles_Phantom_MIX.npy 1006 | DreamersOfTheGhetto_HeavyLove_MIX.npy 1007 | FacesOnFilm_WaitingForGa_MIX.npy 1008 | FamilyBand_Again_MIX.npy 1009 | Handel_TornamiAVagheggiar_MIX.npy 1010 | HeladoNegro_MitadDelMundo_MIX.npy 1011 | HopAlong_SisterCities_MIX.npy 1012 | LizNelson_Coldwar_MIX.npy 1013 | LizNelson_ImComingHome_MIX.npy 1014 | LizNelson_Rainfall_MIX.npy 1015 | Meaxic_TakeAStep_MIX.npy 1016 | Meaxic_YouListen_MIX.npy 1017 | MusicDelta_80sRock_MIX.npy 1018 | MusicDelta_Beatles_MIX.npy 1019 | MusicDelta_Britpop_MIX.npy 1020 | MusicDelta_Country1_MIX.npy 1021 | MusicDelta_Country2_MIX.npy 1022 | MusicDelta_Disco_MIX.npy 1023 | MusicDelta_Grunge_MIX.npy 1024 | MusicDelta_Hendrix_MIX.npy 1025 | MusicDelta_Punk_MIX.npy 1026 | MusicDelta_Reggae_MIX.npy 1027 | MusicDelta_Rock_MIX.npy 1028 | MusicDelta_Rockabilly_MIX.npy 1029 | PurlingHiss_Lolita_MIX.npy 1030 | StevenClark_Bounty_MIX.npy 1031 | SweetLights_YouLetMeDown_MIX.npy 1032 | TheDistricts_Vermont_MIX.npy 1033 | TheScarletBrand_LesFleursDuMal_MIX.npy 1034 | TheSoSoGlos_Emergency_MIX.npy 1035 | Wolf_DieBekherte_MIX.npy -------------------------------------------------------------------------------- /data/train_data_extra.txt: -------------------------------------------------------------------------------- 1 | tammy_1_07.npy 2 | tammy_1_07_vocal_only.npy 3 | ariel_3_03.npy 4 | ariel_3_03_vocal_only.npy 5 | heycat_2_07.npy 6 | heycat_2_07_vocal_only.npy 7 | amy_5_06.npy 8 | amy_5_06_vocal_only.npy 9 | geniusturtle_4_09.npy 10 | geniusturtle_4_09_vocal_only.npy 11 | abjones_4_04.npy 12 | abjones_4_04_vocal_only.npy 13 | Ani_1_06.npy 14 | Ani_1_06_vocal_only.npy 15 | bug_2_07.npy 16 | bug_2_07_vocal_only.npy 17 | leon_3_12.npy 18 | leon_3_12_vocal_only.npy 19 | leon_3_06.npy 20 | leon_3_06_vocal_only.npy 21 | bobon_3_03.npy 22 | bobon_3_03_vocal_only.npy 23 | jmzen_4_06.npy 24 | jmzen_4_06_vocal_only.npy 25 | davidson_4_05.npy 26 | davidson_4_05_vocal_only.npy 27 | fdps_1_12.npy 28 | fdps_1_12_vocal_only.npy 29 | khair_1_07.npy 30 | khair_1_07_vocal_only.npy 31 | fdps_1_06.npy 32 | fdps_1_06_vocal_only.npy 33 | leon_1_03.npy 34 | leon_1_03_vocal_only.npy 35 | bobon_1_06.npy 36 | bobon_1_06_vocal_only.npy 37 | davidson_1_09.npy 38 | davidson_1_09_vocal_only.npy 39 | yifen_5_05.npy 40 | yifen_5_05_vocal_only.npy 41 | yifen_5_11.npy 42 | yifen_5_11_vocal_only.npy 43 | khair_3_02.npy 44 | khair_3_02_vocal_only.npy 45 | fdps_3_03.npy 46 | fdps_3_03_vocal_only.npy 47 | ariel_1_06.npy 48 | ariel_1_06_vocal_only.npy 49 | titon_4_03.npy 50 | titon_4_03_vocal_only.npy 51 | amy_7_03.npy 52 | amy_7_03_vocal_only.npy 53 | amy_9_06.npy 54 | amy_9_06_vocal_only.npy 55 | geniusturtle_1_05.npy 56 | geniusturtle_1_05_vocal_only.npy 57 | Ani_3_03.npy 58 | Ani_3_03_vocal_only.npy 59 | amy_15_11.npy 60 | amy_15_11_vocal_only.npy 61 | amy_15_05.npy 62 | amy_15_05_vocal_only.npy 63 | annar_2_02.npy 64 | annar_2_02_vocal_only.npy 65 | stool_2_01.npy 66 | stool_2_01_vocal_only.npy 67 | stool_5_08.npy 68 | stool_5_08_vocal_only.npy 69 | annar_2_03.npy 70 | annar_2_03_vocal_only.npy 71 | amy_15_04.npy 72 | amy_15_04_vocal_only.npy 73 | Ani_3_02.npy 74 | Ani_3_02_vocal_only.npy 75 | Kenshin_5_01.npy 76 | Kenshin_5_01_vocal_only.npy 77 | geniusturtle_1_04.npy 78 | geniusturtle_1_04_vocal_only.npy 79 | amy_9_07.npy 80 | amy_9_07_vocal_only.npy 81 | titon_4_02.npy 82 | titon_4_02_vocal_only.npy 83 | amy_7_02.npy 84 | amy_7_02_vocal_only.npy 85 | geniusturtle_8_08.npy 86 | geniusturtle_8_08_vocal_only.npy 87 | ariel_1_07.npy 88 | ariel_1_07_vocal_only.npy 89 | yifen_5_10.npy 90 | yifen_5_10_vocal_only.npy 91 | fdps_3_02.npy 92 | fdps_3_02_vocal_only.npy 93 | khair_3_03.npy 94 | khair_3_03_vocal_only.npy 95 | davidson_1_08.npy 96 | davidson_1_08_vocal_only.npy 97 | yifen_5_04.npy 98 | yifen_5_04_vocal_only.npy 99 | bobon_1_07.npy 100 | bobon_1_07_vocal_only.npy 101 | leon_1_02.npy 102 | leon_1_02_vocal_only.npy 103 | fdps_1_07.npy 104 | fdps_1_07_vocal_only.npy 105 | khair_1_06.npy 106 | khair_1_06_vocal_only.npy 107 | fdps_1_13.npy 108 | fdps_1_13_vocal_only.npy 109 | davidson_4_04.npy 110 | davidson_4_04_vocal_only.npy 111 | bobon_3_02.npy 112 | bobon_3_02_vocal_only.npy 113 | jmzen_4_07.npy 114 | jmzen_4_07_vocal_only.npy 115 | leon_3_07.npy 116 | leon_3_07_vocal_only.npy 117 | leon_3_13.npy 118 | leon_3_13_vocal_only.npy 119 | bug_2_06.npy 120 | bug_2_06_vocal_only.npy 121 | amy_10_08.npy 122 | amy_10_08_vocal_only.npy 123 | Ani_1_07.npy 124 | Ani_1_07_vocal_only.npy 125 | geniusturtle_4_08.npy 126 | geniusturtle_4_08_vocal_only.npy 127 | abjones_4_05.npy 128 | abjones_4_05_vocal_only.npy 129 | geniusturtle_3_01.npy 130 | geniusturtle_3_01_vocal_only.npy 131 | amy_5_07.npy 132 | amy_5_07_vocal_only.npy 133 | ariel_3_02.npy 134 | ariel_3_02_vocal_only.npy 135 | tammy_1_06.npy 136 | tammy_1_06_vocal_only.npy 137 | heycat_2_06.npy 138 | heycat_2_06_vocal_only.npy 139 | amy_5_05.npy 140 | amy_5_05_vocal_only.npy 141 | heycat_2_04.npy 142 | heycat_2_04_vocal_only.npy 143 | tammy_1_04.npy 144 | tammy_1_04_vocal_only.npy 145 | geniusturtle_3_03.npy 146 | geniusturtle_3_03_vocal_only.npy 147 | abjones_4_07.npy 148 | abjones_4_07_vocal_only.npy 149 | Ani_1_05.npy 150 | Ani_1_05_vocal_only.npy 151 | bug_2_04.npy 152 | bug_2_04_vocal_only.npy 153 | leon_3_05.npy 154 | leon_3_05_vocal_only.npy 155 | leon_3_11.npy 156 | leon_3_11_vocal_only.npy 157 | bobon_4_09.npy 158 | bobon_4_09_vocal_only.npy 159 | jmzen_4_05.npy 160 | jmzen_4_05_vocal_only.npy 161 | khair_1_04.npy 162 | khair_1_04_vocal_only.npy 163 | fdps_1_05.npy 164 | fdps_1_05_vocal_only.npy 165 | davidson_4_06.npy 166 | davidson_4_06_vocal_only.npy 167 | fdps_1_11.npy 168 | fdps_1_11_vocal_only.npy 169 | leon_6_09.npy 170 | leon_6_09_vocal_only.npy 171 | jmzen_1_09.npy 172 | jmzen_1_09_vocal_only.npy 173 | bobon_1_05.npy 174 | bobon_1_05_vocal_only.npy 175 | khair_3_01.npy 176 | khair_3_01_vocal_only.npy 177 | khair_4_08.npy 178 | khair_4_08_vocal_only.npy 179 | yifen_5_06.npy 180 | yifen_5_06_vocal_only.npy 181 | ariel_1_05.npy 182 | ariel_1_05_vocal_only.npy 183 | geniusturtle_1_06.npy 184 | geniusturtle_1_06_vocal_only.npy 185 | amy_9_05.npy 186 | amy_9_05_vocal_only.npy 187 | amy_15_06.npy 188 | amy_15_06_vocal_only.npy 189 | amy_15_12.npy 190 | amy_15_12_vocal_only.npy 191 | Ani_4_09.npy 192 | Ani_4_09_vocal_only.npy 193 | Kenshin_5_03.npy 194 | Kenshin_5_03_vocal_only.npy 195 | annar_5_08.npy 196 | annar_5_08_vocal_only.npy 197 | annar_2_01.npy 198 | annar_2_01_vocal_only.npy 199 | stool_2_02.npy 200 | stool_2_02_vocal_only.npy 201 | annar_5_09.npy 202 | annar_5_09_vocal_only.npy 203 | stool_2_03.npy 204 | stool_2_03_vocal_only.npy 205 | Ani_4_08.npy 206 | Ani_4_08_vocal_only.npy 207 | amy_15_13.npy 208 | amy_15_13_vocal_only.npy 209 | Ani_3_01.npy 210 | Ani_3_01_vocal_only.npy 211 | Kenshin_5_02.npy 212 | Kenshin_5_02_vocal_only.npy 213 | amy_15_07.npy 214 | amy_15_07_vocal_only.npy 215 | amy_9_04.npy 216 | amy_9_04_vocal_only.npy 217 | geniusturtle_1_07.npy 218 | geniusturtle_1_07_vocal_only.npy 219 | ariel_1_04.npy 220 | ariel_1_04_vocal_only.npy 221 | amy_7_01.npy 222 | amy_7_01_vocal_only.npy 223 | titon_3_08.npy 224 | titon_3_08_vocal_only.npy 225 | titon_4_01.npy 226 | titon_4_01_vocal_only.npy 227 | yifen_5_07.npy 228 | yifen_5_07_vocal_only.npy 229 | fdps_3_01.npy 230 | fdps_3_01_vocal_only.npy 231 | bobon_1_04.npy 232 | bobon_1_04_vocal_only.npy 233 | jmzen_1_08.npy 234 | jmzen_1_08_vocal_only.npy 235 | bobon_1_10.npy 236 | bobon_1_10_vocal_only.npy 237 | leon_1_01.npy 238 | leon_1_01_vocal_only.npy 239 | leon_6_08.npy 240 | leon_6_08_vocal_only.npy 241 | fdps_1_10.npy 242 | fdps_1_10_vocal_only.npy 243 | davidson_4_07.npy 244 | davidson_4_07_vocal_only.npy 245 | fdps_1_04.npy 246 | fdps_1_04_vocal_only.npy 247 | khair_1_05.npy 248 | khair_1_05_vocal_only.npy 249 | jmzen_4_04.npy 250 | jmzen_4_04_vocal_only.npy 251 | bobon_4_08.npy 252 | bobon_4_08_vocal_only.npy 253 | bobon_3_01.npy 254 | bobon_3_01_vocal_only.npy 255 | jmzen_4_10.npy 256 | jmzen_4_10_vocal_only.npy 257 | leon_3_10.npy 258 | leon_3_10_vocal_only.npy 259 | leon_3_04.npy 260 | leon_3_04_vocal_only.npy 261 | bug_2_05.npy 262 | bug_2_05_vocal_only.npy 263 | Ani_1_04.npy 264 | Ani_1_04_vocal_only.npy 265 | abjones_4_06.npy 266 | abjones_4_06_vocal_only.npy 267 | geniusturtle_3_02.npy 268 | geniusturtle_3_02_vocal_only.npy 269 | ariel_4_08.npy 270 | ariel_4_08_vocal_only.npy 271 | heycat_2_05.npy 272 | heycat_2_05_vocal_only.npy 273 | ariel_3_01.npy 274 | ariel_3_01_vocal_only.npy 275 | tammy_1_05.npy 276 | tammy_1_05_vocal_only.npy 277 | amy_5_04.npy 278 | amy_5_04_vocal_only.npy 279 | geniusturtle_3_06.npy 280 | geniusturtle_3_06_vocal_only.npy 281 | abjones_4_02.npy 282 | abjones_4_02_vocal_only.npy 283 | ariel_3_05.npy 284 | ariel_3_05_vocal_only.npy 285 | tammy_1_01.npy 286 | tammy_1_01_vocal_only.npy 287 | heycat_5_08.npy 288 | heycat_5_08_vocal_only.npy 289 | heycat_2_01.npy 290 | heycat_2_01_vocal_only.npy 291 | bug_5_08.npy 292 | bug_5_08_vocal_only.npy 293 | bug_2_01.npy 294 | bug_2_01_vocal_only.npy 295 | jmzen_3_09.npy 296 | jmzen_3_09_vocal_only.npy 297 | bobon_3_05.npy 298 | bobon_3_05_vocal_only.npy 299 | khair_1_01.npy 300 | khair_1_01_vocal_only.npy 301 | fdps_1_14.npy 302 | fdps_1_14_vocal_only.npy 303 | davidson_4_03.npy 304 | davidson_4_03_vocal_only.npy 305 | leon_8_09.npy 306 | leon_8_09_vocal_only.npy 307 | leon_1_05.npy 308 | leon_1_05_vocal_only.npy 309 | leon_1_11.npy 310 | leon_1_11_vocal_only.npy 311 | khair_3_04.npy 312 | khair_3_04_vocal_only.npy 313 | fdps_3_05.npy 314 | fdps_3_05_vocal_only.npy 315 | yifen_5_03.npy 316 | yifen_5_03_vocal_only.npy 317 | geniusturtle_1_03.npy 318 | geniusturtle_1_03_vocal_only.npy 319 | titon_4_05.npy 320 | titon_4_05_vocal_only.npy 321 | amy_7_05.npy 322 | amy_7_05_vocal_only.npy 323 | titon_4_11.npy 324 | titon_4_11_vocal_only.npy 325 | stool_2_07.npy 326 | stool_2_07_vocal_only.npy 327 | annar_2_04.npy 328 | annar_2_04_vocal_only.npy 329 | amy_15_03.npy 330 | amy_15_03_vocal_only.npy 331 | Kenshin_5_12.npy 332 | Kenshin_5_12_vocal_only.npy 333 | Kenshin_5_06.npy 334 | Kenshin_5_06_vocal_only.npy 335 | Ani_3_05.npy 336 | Ani_3_05_vocal_only.npy 337 | Ani_3_04.npy 338 | Ani_3_04_vocal_only.npy 339 | Kenshin_5_07.npy 340 | Kenshin_5_07_vocal_only.npy 341 | amy_15_02.npy 342 | amy_15_02_vocal_only.npy 343 | Kenshin_5_13.npy 344 | Kenshin_5_13_vocal_only.npy 345 | annar_2_05.npy 346 | annar_2_05_vocal_only.npy 347 | stool_2_06.npy 348 | stool_2_06_vocal_only.npy 349 | titon_4_10.npy 350 | titon_4_10_vocal_only.npy 351 | ariel_1_01.npy 352 | ariel_1_01_vocal_only.npy 353 | titon_4_04.npy 354 | titon_4_04_vocal_only.npy 355 | amy_7_04.npy 356 | amy_7_04_vocal_only.npy 357 | amy_9_01.npy 358 | amy_9_01_vocal_only.npy 359 | geniusturtle_1_02.npy 360 | geniusturtle_1_02_vocal_only.npy 361 | yifen_5_02.npy 362 | yifen_5_02_vocal_only.npy 363 | fdps_3_04.npy 364 | fdps_3_04_vocal_only.npy 365 | khair_3_05.npy 366 | khair_3_05_vocal_only.npy 367 | leon_1_10.npy 368 | leon_1_10_vocal_only.npy 369 | leon_1_04.npy 370 | leon_1_04_vocal_only.npy 371 | bobon_1_01.npy 372 | bobon_1_01_vocal_only.npy 373 | leon_8_08.npy 374 | leon_8_08_vocal_only.npy 375 | davidson_4_02.npy 376 | davidson_4_02_vocal_only.npy 377 | fdps_1_01.npy 378 | fdps_1_01_vocal_only.npy 379 | leon_3_01.npy 380 | leon_3_01_vocal_only.npy 381 | leon_4_08.npy 382 | leon_4_08_vocal_only.npy 383 | bobon_3_04.npy 384 | bobon_3_04_vocal_only.npy 385 | jmzen_3_08.npy 386 | jmzen_3_08_vocal_only.npy 387 | jmzen_4_01.npy 388 | jmzen_4_01_vocal_only.npy 389 | bobon_3_10.npy 390 | bobon_3_10_vocal_only.npy 391 | Ani_1_01.npy 392 | Ani_1_01_vocal_only.npy 393 | bug_5_09.npy 394 | bug_5_09_vocal_only.npy 395 | ariel_3_04.npy 396 | ariel_3_04_vocal_only.npy 397 | titon_1_08.npy 398 | titon_1_08_vocal_only.npy 399 | amy_5_01.npy 400 | amy_5_01_vocal_only.npy 401 | abjones_4_03.npy 402 | abjones_4_03_vocal_only.npy 403 | geniusturtle_3_07.npy 404 | geniusturtle_3_07_vocal_only.npy 405 | geniusturtle_3_05.npy 406 | geniusturtle_3_05_vocal_only.npy 407 | abjones_4_01.npy 408 | abjones_4_01_vocal_only.npy 409 | heycat_2_02.npy 410 | heycat_2_02_vocal_only.npy 411 | ariel_3_06.npy 412 | ariel_3_06_vocal_only.npy 413 | tammy_1_02.npy 414 | tammy_1_02_vocal_only.npy 415 | amy_5_03.npy 416 | amy_5_03_vocal_only.npy 417 | bug_2_02.npy 418 | bug_2_02_vocal_only.npy 419 | Ani_1_03.npy 420 | Ani_1_03_vocal_only.npy 421 | jmzen_4_03.npy 422 | jmzen_4_03_vocal_only.npy 423 | bobon_3_06.npy 424 | bobon_3_06_vocal_only.npy 425 | leon_3_03.npy 426 | leon_3_03_vocal_only.npy 427 | davidson_3_09.npy 428 | davidson_3_09_vocal_only.npy 429 | khair_1_02.npy 430 | khair_1_02_vocal_only.npy 431 | fdps_1_03.npy 432 | fdps_1_03_vocal_only.npy 433 | bobon_1_03.npy 434 | bobon_1_03_vocal_only.npy 435 | leon_1_12.npy 436 | leon_1_12_vocal_only.npy 437 | leon_1_06.npy 438 | leon_1_06_vocal_only.npy 439 | yifen_2_09.npy 440 | yifen_2_09_vocal_only.npy 441 | khair_3_07.npy 442 | khair_3_07_vocal_only.npy 443 | fdps_3_06.npy 444 | fdps_3_06_vocal_only.npy 445 | amy_9_03.npy 446 | amy_9_03_vocal_only.npy 447 | ariel_1_03.npy 448 | ariel_1_03_vocal_only.npy 449 | amy_7_06.npy 450 | amy_7_06_vocal_only.npy 451 | titon_4_06.npy 452 | titon_4_06_vocal_only.npy 453 | stool_2_04.npy 454 | stool_2_04_vocal_only.npy 455 | annar_2_07.npy 456 | annar_2_07_vocal_only.npy 457 | Kenshin_5_05.npy 458 | Kenshin_5_05_vocal_only.npy 459 | Ani_3_06.npy 460 | Ani_3_06_vocal_only.npy 461 | Kenshin_5_11.npy 462 | Kenshin_5_11_vocal_only.npy 463 | Kenshin_5_10.npy 464 | Kenshin_5_10_vocal_only.npy 465 | amy_15_01.npy 466 | amy_15_01_vocal_only.npy 467 | Kenshin_5_04.npy 468 | Kenshin_5_04_vocal_only.npy 469 | annar_2_06.npy 470 | annar_2_06_vocal_only.npy 471 | stool_2_05.npy 472 | stool_2_05_vocal_only.npy 473 | amy_7_07.npy 474 | amy_7_07_vocal_only.npy 475 | titon_4_07.npy 476 | titon_4_07_vocal_only.npy 477 | ariel_1_02.npy 478 | ariel_1_02_vocal_only.npy 479 | geniusturtle_1_01.npy 480 | geniusturtle_1_01_vocal_only.npy 481 | amy_9_02.npy 482 | amy_9_02_vocal_only.npy 483 | fdps_3_07.npy 484 | fdps_3_07_vocal_only.npy 485 | khair_3_06.npy 486 | khair_3_06_vocal_only.npy 487 | yifen_5_01.npy 488 | yifen_5_01_vocal_only.npy 489 | yifen_2_08.npy 490 | yifen_2_08_vocal_only.npy 491 | leon_1_07.npy 492 | leon_1_07_vocal_only.npy 493 | bobon_1_02.npy 494 | bobon_1_02_vocal_only.npy 495 | fdps_1_02.npy 496 | fdps_1_02_vocal_only.npy 497 | khair_1_03.npy 498 | khair_1_03_vocal_only.npy 499 | davidson_4_01.npy 500 | davidson_4_01_vocal_only.npy 501 | davidson_3_08.npy 502 | davidson_3_08_vocal_only.npy 503 | leon_3_02.npy 504 | leon_3_02_vocal_only.npy 505 | jmzen_4_02.npy 506 | jmzen_4_02_vocal_only.npy 507 | bobon_3_07.npy 508 | bobon_3_07_vocal_only.npy 509 | Ani_1_02.npy 510 | Ani_1_02_vocal_only.npy 511 | bug_2_03.npy 512 | bug_2_03_vocal_only.npy 513 | amy_5_02.npy 514 | amy_5_02_vocal_only.npy 515 | heycat_2_03.npy 516 | heycat_2_03_vocal_only.npy 517 | tammy_1_03.npy 518 | tammy_1_03_vocal_only.npy 519 | ariel_3_07.npy 520 | ariel_3_07_vocal_only.npy 521 | geniusturtle_3_04.npy 522 | geniusturtle_3_04_vocal_only.npy 523 | abjones_3_09.npy 524 | abjones_3_09_vocal_only.npy 525 | leon_7_01.npy 526 | leon_7_01_vocal_only.npy 527 | leon_9_04.npy 528 | leon_9_04_vocal_only.npy 529 | yifen_3_07.npy 530 | yifen_3_07_vocal_only.npy 531 | fdps_5_01.npy 532 | fdps_5_01_vocal_only.npy 533 | fdps_2_08.npy 534 | fdps_2_08_vocal_only.npy 535 | amy_1_01.npy 536 | amy_1_01_vocal_only.npy 537 | titon_5_08.npy 538 | titon_5_08_vocal_only.npy 539 | amy_6_08.npy 540 | amy_6_08_vocal_only.npy 541 | titon_2_01.npy 542 | titon_2_01_vocal_only.npy 543 | geniusturtle_7_13.npy 544 | geniusturtle_7_13_vocal_only.npy 545 | geniusturtle_7_07.npy 546 | geniusturtle_7_07_vocal_only.npy 547 | Kenshin_3_02.npy 548 | Kenshin_3_02_vocal_only.npy 549 | Ani_5_01.npy 550 | Ani_5_01_vocal_only.npy 551 | amy_13_07.npy 552 | amy_13_07_vocal_only.npy 553 | bug_1_09.npy 554 | bug_1_09_vocal_only.npy 555 | stool_4_03.npy 556 | stool_4_03_vocal_only.npy 557 | ariel_2_08.npy 558 | ariel_2_08_vocal_only.npy 559 | heycat_4_05.npy 560 | heycat_4_05_vocal_only.npy 561 | ariel_5_01.npy 562 | ariel_5_01_vocal_only.npy 563 | amy_3_04.npy 564 | amy_3_04_vocal_only.npy 565 | abjones_2_12.npy 566 | abjones_2_12_vocal_only.npy 567 | geniusturtle_5_02.npy 568 | geniusturtle_5_02_vocal_only.npy 569 | abjones_2_06.npy 570 | abjones_2_06_vocal_only.npy 571 | Kenshin_1_07.npy 572 | Kenshin_1_07_vocal_only.npy 573 | amy_11_02.npy 574 | amy_11_02_vocal_only.npy 575 | leon_5_10.npy 576 | leon_5_10_vocal_only.npy 577 | leon_5_04.npy 578 | leon_5_04_vocal_only.npy 579 | bobon_2_08.npy 580 | bobon_2_08_vocal_only.npy 581 | jmzen_2_04.npy 582 | jmzen_2_04_vocal_only.npy 583 | bobon_5_01.npy 584 | bobon_5_01_vocal_only.npy 585 | jmzen_2_10.npy 586 | jmzen_2_10_vocal_only.npy 587 | davidson_2_07.npy 588 | davidson_2_07_vocal_only.npy 589 | yifen_1_02.npy 590 | yifen_1_02_vocal_only.npy 591 | yifen_1_16.npy 592 | yifen_1_16_vocal_only.npy 593 | davidson_2_06.npy 594 | davidson_2_06_vocal_only.npy 595 | yifen_1_03.npy 596 | yifen_1_03_vocal_only.npy 597 | jmzen_2_11.npy 598 | jmzen_2_11_vocal_only.npy 599 | leon_5_05.npy 600 | leon_5_05_vocal_only.npy 601 | leon_5_11.npy 602 | leon_5_11_vocal_only.npy 603 | amy_11_03.npy 604 | amy_11_03_vocal_only.npy 605 | Kenshin_1_06.npy 606 | Kenshin_1_06_vocal_only.npy 607 | abjones_2_07.npy 608 | abjones_2_07_vocal_only.npy 609 | geniusturtle_5_03.npy 610 | geniusturtle_5_03_vocal_only.npy 611 | amy_3_05.npy 612 | amy_3_05_vocal_only.npy 613 | heycat_4_04.npy 614 | heycat_4_04_vocal_only.npy 615 | ariel_2_09.npy 616 | ariel_2_09_vocal_only.npy 617 | bug_1_08.npy 618 | bug_1_08_vocal_only.npy 619 | annar_3_08.npy 620 | annar_3_08_vocal_only.npy 621 | annar_4_01.npy 622 | annar_4_01_vocal_only.npy 623 | stool_4_02.npy 624 | stool_4_02_vocal_only.npy 625 | amy_13_06.npy 626 | amy_13_06_vocal_only.npy 627 | Kenshin_3_03.npy 628 | Kenshin_3_03_vocal_only.npy 629 | geniusturtle_7_06.npy 630 | geniusturtle_7_06_vocal_only.npy 631 | geniusturtle_7_12.npy 632 | geniusturtle_7_12_vocal_only.npy 633 | titon_5_09.npy 634 | titon_5_09_vocal_only.npy 635 | amy_6_09.npy 636 | amy_6_09_vocal_only.npy 637 | heycat_1_08.npy 638 | heycat_1_08_vocal_only.npy 639 | khair_5_01.npy 640 | khair_5_01_vocal_only.npy 641 | yifen_3_12.npy 642 | yifen_3_12_vocal_only.npy 643 | fdps_2_09.npy 644 | fdps_2_09_vocal_only.npy 645 | yifen_3_06.npy 646 | yifen_3_06_vocal_only.npy 647 | leon_9_05.npy 648 | leon_9_05_vocal_only.npy 649 | leon_7_02.npy 650 | leon_7_02_vocal_only.npy 651 | yifen_3_10.npy 652 | yifen_3_10_vocal_only.npy 653 | fdps_5_02.npy 654 | fdps_5_02_vocal_only.npy 655 | khair_5_03.npy 656 | khair_5_03_vocal_only.npy 657 | yifen_3_04.npy 658 | yifen_3_04_vocal_only.npy 659 | titon_2_02.npy 660 | titon_2_02_vocal_only.npy 661 | amy_1_02.npy 662 | amy_1_02_vocal_only.npy 663 | geniusturtle_7_04.npy 664 | geniusturtle_7_04_vocal_only.npy 665 | geniusturtle_7_10.npy 666 | geniusturtle_7_10_vocal_only.npy 667 | amy_13_04.npy 668 | amy_13_04_vocal_only.npy 669 | Kenshin_3_01.npy 670 | Kenshin_3_01_vocal_only.npy 671 | Ani_5_02.npy 672 | Ani_5_02_vocal_only.npy 673 | Kenshin_4_08.npy 674 | Kenshin_4_08_vocal_only.npy 675 | annar_4_03.npy 676 | annar_4_03_vocal_only.npy 677 | stool_3_09.npy 678 | stool_3_09_vocal_only.npy 679 | amy_3_07.npy 680 | amy_3_07_vocal_only.npy 681 | ariel_5_02.npy 682 | ariel_5_02_vocal_only.npy 683 | heycat_4_06.npy 684 | heycat_4_06_vocal_only.npy 685 | geniusturtle_2_08.npy 686 | geniusturtle_2_08_vocal_only.npy 687 | geniusturtle_5_01.npy 688 | geniusturtle_5_01_vocal_only.npy 689 | abjones_2_05.npy 690 | abjones_2_05_vocal_only.npy 691 | abjones_2_11.npy 692 | abjones_2_11_vocal_only.npy 693 | Kenshin_1_10.npy 694 | Kenshin_1_10_vocal_only.npy 695 | amy_16_08.npy 696 | amy_16_08_vocal_only.npy 697 | amy_11_01.npy 698 | amy_11_01_vocal_only.npy 699 | Kenshin_1_04.npy 700 | Kenshin_1_04_vocal_only.npy 701 | leon_5_07.npy 702 | leon_5_07_vocal_only.npy 703 | bobon_5_02.npy 704 | bobon_5_02_vocal_only.npy 705 | jmzen_2_07.npy 706 | jmzen_2_07_vocal_only.npy 707 | davidson_2_10.npy 708 | davidson_2_10_vocal_only.npy 709 | yifen_1_15.npy 710 | yifen_1_15_vocal_only.npy 711 | yifen_1_01.npy 712 | yifen_1_01_vocal_only.npy 713 | davidson_2_04.npy 714 | davidson_2_04_vocal_only.npy 715 | davidson_2_05.npy 716 | davidson_2_05_vocal_only.npy 717 | yifen_1_14.npy 718 | yifen_1_14_vocal_only.npy 719 | bobon_5_03.npy 720 | bobon_5_03_vocal_only.npy 721 | jmzen_2_06.npy 722 | jmzen_2_06_vocal_only.npy 723 | jmzen_2_12.npy 724 | jmzen_2_12_vocal_only.npy 725 | leon_5_12.npy 726 | leon_5_12_vocal_only.npy 727 | leon_5_06.npy 728 | leon_5_06_vocal_only.npy 729 | Kenshin_1_05.npy 730 | Kenshin_1_05_vocal_only.npy 731 | Kenshin_1_11.npy 732 | Kenshin_1_11_vocal_only.npy 733 | abjones_2_10.npy 734 | abjones_2_10_vocal_only.npy 735 | abjones_2_04.npy 736 | abjones_2_04_vocal_only.npy 737 | ariel_5_03.npy 738 | ariel_5_03_vocal_only.npy 739 | heycat_4_07.npy 740 | heycat_4_07_vocal_only.npy 741 | amy_3_06.npy 742 | amy_3_06_vocal_only.npy 743 | annar_4_02.npy 744 | annar_4_02_vocal_only.npy 745 | stool_4_01.npy 746 | stool_4_01_vocal_only.npy 747 | stool_3_08.npy 748 | stool_3_08_vocal_only.npy 749 | Ani_5_03.npy 750 | Ani_5_03_vocal_only.npy 751 | Kenshin_4_09.npy 752 | Kenshin_4_09_vocal_only.npy 753 | amy_13_05.npy 754 | amy_13_05_vocal_only.npy 755 | geniusturtle_7_11.npy 756 | geniusturtle_7_11_vocal_only.npy 757 | geniusturtle_7_05.npy 758 | geniusturtle_7_05_vocal_only.npy 759 | titon_2_03.npy 760 | titon_2_03_vocal_only.npy 761 | amy_1_03.npy 762 | amy_1_03_vocal_only.npy 763 | yifen_3_05.npy 764 | yifen_3_05_vocal_only.npy 765 | yifen_3_11.npy 766 | yifen_3_11_vocal_only.npy 767 | khair_5_02.npy 768 | khair_5_02_vocal_only.npy 769 | fdps_5_03.npy 770 | fdps_5_03_vocal_only.npy 771 | leon_9_06.npy 772 | leon_9_06_vocal_only.npy 773 | leon_7_03.npy 774 | leon_7_03_vocal_only.npy 775 | leon_9_02.npy 776 | leon_9_02_vocal_only.npy 777 | leon_7_07.npy 778 | leon_7_07_vocal_only.npy 779 | leon_7_13.npy 780 | leon_7_13_vocal_only.npy 781 | fdps_5_07.npy 782 | fdps_5_07_vocal_only.npy 783 | khair_5_06.npy 784 | khair_5_06_vocal_only.npy 785 | yifen_3_01.npy 786 | yifen_3_01_vocal_only.npy 787 | yifen_4_08.npy 788 | yifen_4_08_vocal_only.npy 789 | geniusturtle_7_01.npy 790 | geniusturtle_7_01_vocal_only.npy 791 | geniusturtle_7_15.npy 792 | geniusturtle_7_15_vocal_only.npy 793 | amy_1_07.npy 794 | amy_1_07_vocal_only.npy 795 | titon_2_07.npy 796 | titon_2_07_vocal_only.npy 797 | annar_4_06.npy 798 | annar_4_06_vocal_only.npy 799 | stool_4_05.npy 800 | stool_4_05_vocal_only.npy 801 | amy_13_01.npy 802 | amy_13_01_vocal_only.npy 803 | Kenshin_3_04.npy 804 | Kenshin_3_04_vocal_only.npy 805 | Ani_5_07.npy 806 | Ani_5_07_vocal_only.npy 807 | geniusturtle_5_04.npy 808 | geniusturtle_5_04_vocal_only.npy 809 | abjones_5_09.npy 810 | abjones_5_09_vocal_only.npy 811 | amy_3_02.npy 812 | amy_3_02_vocal_only.npy 813 | heycat_4_03.npy 814 | heycat_4_03_vocal_only.npy 815 | ariel_5_07.npy 816 | ariel_5_07_vocal_only.npy 817 | bug_4_03.npy 818 | bug_4_03_vocal_only.npy 819 | stool_1_09.npy 820 | stool_1_09_vocal_only.npy 821 | amy_11_04.npy 822 | amy_11_04_vocal_only.npy 823 | Kenshin_1_01.npy 824 | Kenshin_1_01_vocal_only.npy 825 | jmzen_2_02.npy 826 | jmzen_2_02_vocal_only.npy 827 | bobon_5_07.npy 828 | bobon_5_07_vocal_only.npy 829 | leon_5_02.npy 830 | leon_5_02_vocal_only.npy 831 | yifen_1_10.npy 832 | yifen_1_10_vocal_only.npy 833 | davidson_2_01.npy 834 | davidson_2_01_vocal_only.npy 835 | davidson_5_08.npy 836 | davidson_5_08_vocal_only.npy 837 | yifen_1_04.npy 838 | yifen_1_04_vocal_only.npy 839 | davidson_5_09.npy 840 | davidson_5_09_vocal_only.npy 841 | yifen_1_05.npy 842 | yifen_1_05_vocal_only.npy 843 | yifen_1_11.npy 844 | yifen_1_11_vocal_only.npy 845 | leon_5_03.npy 846 | leon_5_03_vocal_only.npy 847 | jmzen_2_03.npy 848 | jmzen_2_03_vocal_only.npy 849 | bobon_5_06.npy 850 | bobon_5_06_vocal_only.npy 851 | bobon_5_12.npy 852 | bobon_5_12_vocal_only.npy 853 | amy_11_05.npy 854 | amy_11_05_vocal_only.npy 855 | bug_4_02.npy 856 | bug_4_02_vocal_only.npy 857 | stool_1_08.npy 858 | stool_1_08_vocal_only.npy 859 | heycat_4_02.npy 860 | heycat_4_02_vocal_only.npy 861 | ariel_5_06.npy 862 | ariel_5_06_vocal_only.npy 863 | amy_3_03.npy 864 | amy_3_03_vocal_only.npy 865 | abjones_2_01.npy 866 | abjones_2_01_vocal_only.npy 867 | abjones_5_08.npy 868 | abjones_5_08_vocal_only.npy 869 | Ani_5_06.npy 870 | Ani_5_06_vocal_only.npy 871 | Kenshin_3_05.npy 872 | Kenshin_3_05_vocal_only.npy 873 | stool_4_10.npy 874 | stool_4_10_vocal_only.npy 875 | stool_4_04.npy 876 | stool_4_04_vocal_only.npy 877 | annar_4_07.npy 878 | annar_4_07_vocal_only.npy 879 | amy_1_06.npy 880 | amy_1_06_vocal_only.npy 881 | titon_2_06.npy 882 | titon_2_06_vocal_only.npy 883 | geniusturtle_7_14.npy 884 | geniusturtle_7_14_vocal_only.npy 885 | yifen_4_09.npy 886 | yifen_4_09_vocal_only.npy 887 | khair_5_07.npy 888 | khair_5_07_vocal_only.npy 889 | fdps_5_06.npy 890 | fdps_5_06_vocal_only.npy 891 | leon_7_12.npy 892 | leon_7_12_vocal_only.npy 893 | leon_7_06.npy 894 | leon_7_06_vocal_only.npy 895 | leon_9_03.npy 896 | leon_9_03_vocal_only.npy 897 | leon_9_01.npy 898 | leon_9_01_vocal_only.npy 899 | leon_7_10.npy 900 | leon_7_10_vocal_only.npy 901 | leon_7_04.npy 902 | leon_7_04_vocal_only.npy 903 | yifen_3_02.npy 904 | yifen_3_02_vocal_only.npy 905 | fdps_5_04.npy 906 | fdps_5_04_vocal_only.npy 907 | khair_5_05.npy 908 | khair_5_05_vocal_only.npy 909 | amy_8_08.npy 910 | amy_8_08_vocal_only.npy 911 | geniusturtle_7_02.npy 912 | geniusturtle_7_02_vocal_only.npy 913 | titon_2_04.npy 914 | titon_2_04_vocal_only.npy 915 | amy_1_04.npy 916 | amy_1_04_vocal_only.npy 917 | annar_4_05.npy 918 | annar_4_05_vocal_only.npy 919 | stool_4_06.npy 920 | stool_4_06_vocal_only.npy 921 | Kenshin_3_07.npy 922 | Kenshin_3_07_vocal_only.npy 923 | Ani_5_04.npy 924 | Ani_5_04_vocal_only.npy 925 | amy_13_02.npy 926 | amy_13_02_vocal_only.npy 927 | abjones_2_03.npy 928 | abjones_2_03_vocal_only.npy 929 | ariel_5_04.npy 930 | ariel_5_04_vocal_only.npy 931 | amy_3_01.npy 932 | amy_3_01_vocal_only.npy 933 | amy_4_08.npy 934 | amy_4_08_vocal_only.npy 935 | bug_3_09.npy 936 | bug_3_09_vocal_only.npy 937 | Kenshin_1_02.npy 938 | Kenshin_1_02_vocal_only.npy 939 | amy_11_07.npy 940 | amy_11_07_vocal_only.npy 941 | jmzen_5_08.npy 942 | jmzen_5_08_vocal_only.npy 943 | bobon_5_04.npy 944 | bobon_5_04_vocal_only.npy 945 | jmzen_2_01.npy 946 | jmzen_2_01_vocal_only.npy 947 | bobon_5_10.npy 948 | bobon_5_10_vocal_only.npy 949 | leon_5_01.npy 950 | leon_5_01_vocal_only.npy 951 | leon_2_08.npy 952 | leon_2_08_vocal_only.npy 953 | yifen_1_07.npy 954 | yifen_1_07_vocal_only.npy 955 | davidson_2_02.npy 956 | davidson_2_02_vocal_only.npy 957 | yifen_1_13.npy 958 | yifen_1_13_vocal_only.npy 959 | yifen_1_12.npy 960 | yifen_1_12_vocal_only.npy 961 | yifen_1_06.npy 962 | yifen_1_06_vocal_only.npy 963 | davidson_2_03.npy 964 | davidson_2_03_vocal_only.npy 965 | leon_2_09.npy 966 | leon_2_09_vocal_only.npy 967 | bobon_5_11.npy 968 | bobon_5_11_vocal_only.npy 969 | bobon_5_05.npy 970 | bobon_5_05_vocal_only.npy 971 | jmzen_5_09.npy 972 | jmzen_5_09_vocal_only.npy 973 | amy_11_06.npy 974 | amy_11_06_vocal_only.npy 975 | Kenshin_1_03.npy 976 | Kenshin_1_03_vocal_only.npy 977 | bug_3_08.npy 978 | bug_3_08_vocal_only.npy 979 | annar_1_08.npy 980 | annar_1_08_vocal_only.npy 981 | bug_4_01.npy 982 | bug_4_01_vocal_only.npy 983 | amy_4_09.npy 984 | amy_4_09_vocal_only.npy 985 | ariel_5_05.npy 986 | ariel_5_05_vocal_only.npy 987 | heycat_3_08.npy 988 | heycat_3_08_vocal_only.npy 989 | heycat_4_01.npy 990 | heycat_4_01_vocal_only.npy 991 | abjones_2_02.npy 992 | abjones_2_02_vocal_only.npy 993 | amy_13_03.npy 994 | amy_13_03_vocal_only.npy 995 | Ani_5_05.npy 996 | Ani_5_05_vocal_only.npy 997 | Kenshin_3_06.npy 998 | Kenshin_3_06_vocal_only.npy 999 | stool_4_07.npy 1000 | stool_4_07_vocal_only.npy 1001 | annar_4_04.npy 1002 | annar_4_04_vocal_only.npy 1003 | titon_2_05.npy 1004 | amy_1_05.npy 1005 | geniusturtle_7_03.npy 1006 | khair_5_04.npy 1007 | fdps_5_05.npy 1008 | yifen_3_03.npy 1009 | leon_7_05.npy 1010 | leon_7_11.npy 1011 | leon_7_08.npy 1012 | fdps_2_01.npy 1013 | fdps_5_08.npy 1014 | yifen_4_07.npy 1015 | amy_8_04.npy 1016 | amy_6_01.npy 1017 | titon_2_08.npy 1018 | titon_5_01.npy 1019 | annar_4_09.npy 1020 | stool_3_03.npy 1021 | Ani_5_08.npy 1022 | Ani_2_01.npy 1023 | Kenshin_4_02.npy 1024 | abjones_5_06.npy 1025 | geniusturtle_2_02.npy 1026 | amy_4_04.npy 1027 | ariel_5_08.npy 1028 | amy_4_10.npy 1029 | heycat_3_05.npy 1030 | ariel_2_01.npy 1031 | bug_3_05.npy 1032 | stool_1_06.npy 1033 | annar_1_05.npy 1034 | amy_16_02.npy 1035 | jmzen_5_04.npy 1036 | bobon_5_08.npy 1037 | bobon_2_01.npy 1038 | leon_2_04.npy 1039 | leon_2_10.npy 1040 | davidson_5_07.npy 1041 | davidson_5_06.npy 1042 | leon_2_11.npy 1043 | leon_2_05.npy 1044 | bobon_5_09.npy 1045 | jmzen_5_05.npy 1046 | amy_16_03.npy 1047 | annar_1_04.npy 1048 | stool_1_07.npy 1049 | bug_3_04.npy 1050 | heycat_3_04.npy 1051 | amy_4_11.npy 1052 | amy_4_05.npy 1053 | abjones_5_07.npy 1054 | Kenshin_4_03.npy 1055 | amy_14_06.npy 1056 | annar_4_08.npy 1057 | annar_3_01.npy 1058 | stool_3_02.npy 1059 | heycat_1_01.npy 1060 | titon_2_09.npy 1061 | amy_8_05.npy 1062 | yifen_4_06.npy 1063 | khair_2_01.npy 1064 | fdps_5_09.npy 1065 | leon_7_09.npy 1066 | yifen_4_04.npy 1067 | yifen_4_10.npy 1068 | fdps_2_02.npy 1069 | khair_2_03.npy 1070 | amy_8_07.npy 1071 | heycat_1_03.npy 1072 | titon_5_02.npy 1073 | amy_6_02.npy 1074 | annar_3_03.npy 1075 | stool_4_09.npy 1076 | Ani_2_02.npy 1077 | Kenshin_4_01.npy 1078 | Kenshin_3_08.npy 1079 | amy_14_04.npy 1080 | abjones_5_05.npy 1081 | geniusturtle_2_01.npy 1082 | ariel_2_02.npy 1083 | heycat_3_06.npy 1084 | amy_4_07.npy 1085 | bug_3_06.npy 1086 | stool_1_05.npy 1087 | annar_1_06.npy 1088 | amy_16_01.npy 1089 | bobon_2_02.npy 1090 | jmzen_5_07.npy 1091 | leon_2_07.npy 1092 | yifen_1_08.npy 1093 | davidson_5_04.npy 1094 | davidson_5_10.npy 1095 | davidson_5_11.npy 1096 | yifen_1_09.npy 1097 | davidson_5_05.npy 1098 | leon_2_06.npy 1099 | bobon_2_03.npy 1100 | jmzen_5_06.npy 1101 | annar_1_07.npy 1102 | stool_1_04.npy 1103 | bug_3_07.npy 1104 | amy_4_06.npy 1105 | ariel_2_03.npy 1106 | heycat_3_07.npy 1107 | abjones_5_04.npy 1108 | amy_14_05.npy 1109 | Ani_2_03.npy 1110 | annar_3_02.npy 1111 | stool_3_01.npy 1112 | stool_4_08.npy 1113 | titon_5_03.npy 1114 | amy_6_03.npy 1115 | heycat_1_02.npy 1116 | amy_8_06.npy 1117 | yifen_4_11.npy 1118 | khair_2_02.npy 1119 | fdps_2_03.npy 1120 | yifen_4_05.npy 1121 | yifen_4_01.npy 1122 | yifen_3_08.npy 1123 | fdps_2_07.npy 1124 | khair_2_06.npy 1125 | heycat_1_06.npy 1126 | amy_6_07.npy 1127 | titon_5_07.npy 1128 | amy_8_02.npy 1129 | geniusturtle_7_08.npy 1130 | Ani_2_07.npy 1131 | Kenshin_4_04.npy 1132 | Kenshin_4_10.npy 1133 | amy_14_01.npy 1134 | annar_3_06.npy 1135 | stool_3_05.npy 1136 | heycat_3_03.npy 1137 | ariel_2_07.npy 1138 | amy_4_02.npy 1139 | abjones_2_09.npy 1140 | Kenshin_1_08.npy 1141 | amy_16_04.npy 1142 | bug_3_03.npy 1143 | annar_1_03.npy 1144 | leon_2_02.npy 1145 | jmzen_5_02.npy 1146 | bobon_2_07.npy 1147 | davidson_5_01.npy 1148 | davidson_2_08.npy 1149 | davidson_2_09.npy 1150 | jmzen_5_03.npy 1151 | bobon_2_06.npy 1152 | leon_2_03.npy 1153 | stool_1_01.npy 1154 | annar_1_02.npy 1155 | bug_3_02.npy 1156 | amy_16_05.npy 1157 | Kenshin_1_09.npy 1158 | geniusturtle_2_05.npy 1159 | abjones_5_01.npy 1160 | abjones_2_08.npy 1161 | amy_4_03.npy 1162 | heycat_3_02.npy 1163 | ariel_2_06.npy 1164 | stool_3_04.npy 1165 | annar_3_07.npy 1166 | bug_1_07.npy 1167 | stool_3_10.npy 1168 | Kenshin_4_11.npy 1169 | Kenshin_4_05.npy 1170 | Ani_2_06.npy 1171 | geniusturtle_7_09.npy 1172 | amy_8_03.npy 1173 | amy_6_06.npy 1174 | titon_5_06.npy 1175 | heycat_1_07.npy 1176 | khair_2_07.npy 1177 | fdps_2_06.npy 1178 | fdps_2_12.npy 1179 | yifen_3_09.npy 1180 | fdps_2_04.npy 1181 | khair_2_05.npy 1182 | fdps_2_10.npy 1183 | yifen_4_02.npy 1184 | titon_5_04.npy 1185 | amy_6_04.npy 1186 | heycat_1_05.npy 1187 | amy_6_10.npy 1188 | amy_8_01.npy 1189 | amy_14_02.npy 1190 | Ani_2_04.npy 1191 | Kenshin_4_07.npy 1192 | annar_3_05.npy 1193 | stool_3_06.npy 1194 | amy_4_01.npy 1195 | amy_3_08.npy 1196 | heycat_4_09.npy 1197 | ariel_2_04.npy 1198 | abjones_5_03.npy 1199 | geniusturtle_2_07.npy 1200 | amy_16_07.npy 1201 | stool_1_03.npy 1202 | leon_2_01.npy 1203 | leon_5_08.npy 1204 | bobon_2_04.npy 1205 | jmzen_2_08.npy 1206 | jmzen_5_01.npy 1207 | davidson_5_02.npy 1208 | davidson_5_03.npy 1209 | jmzen_2_09.npy 1210 | bobon_2_05.npy 1211 | leon_5_09.npy 1212 | stool_1_02.npy 1213 | annar_1_01.npy 1214 | bug_3_01.npy 1215 | amy_16_06.npy 1216 | geniusturtle_2_06.npy 1217 | abjones_5_02.npy 1218 | ariel_2_05.npy 1219 | heycat_4_08.npy 1220 | heycat_3_01.npy 1221 | bug_1_10.npy 1222 | stool_3_07.npy 1223 | annar_3_04.npy 1224 | Kenshin_4_06.npy 1225 | Ani_2_05.npy 1226 | amy_14_03.npy 1227 | amy_6_11.npy 1228 | heycat_1_04.npy 1229 | titon_5_05.npy 1230 | amy_6_05.npy 1231 | fdps_2_11.npy 1232 | yifen_4_03.npy 1233 | khair_2_04.npy 1234 | fdps_2_05.npy 1235 | abjones_3_04.npy 1236 | abjones_3_10.npy 1237 | amy_2_06.npy 1238 | titon_1_06.npy 1239 | ariel_4_03.npy 1240 | heycat_5_07.npy 1241 | bug_5_07.npy 1242 | bug_5_13.npy 1243 | bobon_4_03.npy 1244 | jmzen_3_06.npy 1245 | leon_4_06.npy 1246 | khair_6_07.npy 1247 | davidson_3_11.npy 1248 | davidson_3_05.npy 1249 | leon_8_12.npy 1250 | jmzen_1_03.npy 1251 | leon_8_06.npy 1252 | leon_6_03.npy 1253 | yifen_2_11.npy 1254 | khair_4_02.npy 1255 | fdps_4_03.npy 1256 | yifen_2_05.npy 1257 | abjones_1_01.npy 1258 | geniusturtle_6_05.npy 1259 | titon_3_03.npy 1260 | annar_5_02.npy 1261 | stool_5_01.npy 1262 | stool_2_08.npy 1263 | amy_12_05.npy 1264 | Ani_4_03.npy 1265 | Kenshin_5_09.npy 1266 | Kenshin_2_01.npy 1267 | Ani_4_02.npy 1268 | Kenshin_5_08.npy 1269 | amy_12_04.npy 1270 | annar_5_03.npy 1271 | geniusturtle_8_01.npy 1272 | titon_3_02.npy 1273 | geniusturtle_6_04.npy 1274 | davidson_1_01.npy 1275 | yifen_2_04.npy 1276 | yifen_2_10.npy 1277 | fdps_4_02.npy 1278 | khair_4_03.npy 1279 | leon_6_02.npy 1280 | leon_8_07.npy 1281 | jmzen_1_02.npy 1282 | leon_8_13.npy 1283 | davidson_3_04.npy 1284 | davidson_3_10.npy 1285 | khair_6_06.npy 1286 | leon_4_07.npy 1287 | bobon_4_02.npy 1288 | jmzen_3_07.npy 1289 | amy_10_01.npy 1290 | bug_5_12.npy 1291 | bug_5_06.npy 1292 | ariel_4_02.npy 1293 | heycat_5_06.npy 1294 | amy_2_07.npy 1295 | titon_1_07.npy 1296 | abjones_3_11.npy 1297 | geniusturtle_3_08.npy 1298 | geniusturtle_4_01.npy 1299 | geniusturtle_4_03.npy 1300 | heycat_5_04.npy 1301 | titon_1_05.npy 1302 | amy_2_05.npy 1303 | bug_5_10.npy 1304 | bug_5_04.npy 1305 | amy_10_03.npy 1306 | jmzen_3_05.npy 1307 | bobon_3_09.npy 1308 | jmzen_3_11.npy 1309 | leon_4_05.npy 1310 | davidson_3_06.npy 1311 | khair_6_04.npy 1312 | davidson_3_12.npy 1313 | leon_8_05.npy 1314 | leon_8_11.npy 1315 | leon_1_09.npy 1316 | yifen_2_06.npy 1317 | davidson_1_03.npy 1318 | khair_4_01.npy 1319 | yifen_2_12.npy 1320 | abjones_1_02.npy 1321 | geniusturtle_6_06.npy 1322 | geniusturtle_8_03.npy 1323 | titon_4_09.npy 1324 | annar_2_08.npy 1325 | annar_5_01.npy 1326 | stool_5_02.npy 1327 | Kenshin_2_03.npy 1328 | amy_12_06.npy 1329 | amy_12_07.npy 1330 | Kenshin_2_02.npy 1331 | Ani_4_01.npy 1332 | stool_5_03.npy 1333 | titon_4_08.npy 1334 | amy_7_08.npy 1335 | titon_3_01.npy 1336 | geniusturtle_8_02.npy 1337 | geniusturtle_6_07.npy 1338 | abjones_1_03.npy 1339 | fdps_4_01.npy 1340 | yifen_2_13.npy 1341 | yifen_2_07.npy 1342 | davidson_1_02.npy 1343 | leon_6_01.npy 1344 | leon_1_08.npy 1345 | leon_8_10.npy 1346 | leon_8_04.npy 1347 | jmzen_1_01.npy 1348 | davidson_3_13.npy 1349 | khair_6_05.npy 1350 | davidson_3_07.npy 1351 | leon_4_04.npy 1352 | jmzen_3_10.npy 1353 | bobon_3_08.npy 1354 | jmzen_3_04.npy 1355 | bobon_4_01.npy 1356 | amy_10_02.npy 1357 | bug_5_05.npy 1358 | bug_5_11.npy 1359 | titon_1_04.npy 1360 | amy_2_04.npy 1361 | ariel_3_08.npy 1362 | heycat_5_05.npy 1363 | ariel_4_01.npy 1364 | geniusturtle_4_02.npy 1365 | abjones_3_06.npy 1366 | abjones_3_12.npy 1367 | ariel_4_05.npy 1368 | heycat_5_01.npy 1369 | tammy_1_08.npy 1370 | geniusturtle_4_12.npy 1371 | abjones_3_02.npy 1372 | geniusturtle_4_06.npy 1373 | amy_10_06.npy 1374 | bug_2_08.npy 1375 | bug_5_01.npy 1376 | leon_3_09.npy 1377 | bobon_4_05.npy 1378 | jmzen_4_09.npy 1379 | davidson_3_03.npy 1380 | khair_6_01.npy 1381 | fdps_1_09.npy 1382 | khair_1_08.npy 1383 | leon_6_05.npy 1384 | jmzen_1_05.npy 1385 | bobon_1_09.npy 1386 | jmzen_1_11.npy 1387 | davidson_1_06.npy 1388 | yifen_2_03.npy 1389 | khair_4_04.npy 1390 | fdps_4_05.npy 1391 | geniusturtle_8_06.npy 1392 | titon_3_05.npy 1393 | amy_9_09.npy 1394 | geniusturtle_6_03.npy 1395 | Ani_4_05.npy 1396 | Kenshin_2_06.npy 1397 | amy_12_03.npy 1398 | stool_5_07.npy 1399 | annar_5_04.npy 1400 | annar_5_05.npy 1401 | stool_5_06.npy 1402 | amy_12_02.npy 1403 | Ani_4_10.npy 1404 | Kenshin_2_07.npy 1405 | Ani_4_04.npy 1406 | geniusturtle_6_02.npy 1407 | amy_9_08.npy 1408 | titon_3_04.npy 1409 | geniusturtle_8_07.npy 1410 | fdps_4_04.npy 1411 | khair_4_05.npy 1412 | davidson_1_07.npy 1413 | yifen_2_02.npy 1414 | jmzen_1_10.npy 1415 | bobon_1_08.npy 1416 | leon_8_01.npy 1417 | jmzen_1_04.npy 1418 | leon_6_04.npy 1419 | fdps_1_08.npy 1420 | davidson_3_02.npy 1421 | bobon_4_10.npy 1422 | jmzen_4_08.npy 1423 | bobon_4_04.npy 1424 | jmzen_3_01.npy 1425 | leon_4_01.npy 1426 | leon_3_08.npy 1427 | bug_5_14.npy 1428 | amy_10_07.npy 1429 | geniusturtle_4_07.npy 1430 | abjones_3_03.npy 1431 | amy_2_01.npy 1432 | titon_1_01.npy 1433 | amy_5_08.npy 1434 | ariel_4_04.npy 1435 | titon_1_03.npy 1436 | amy_2_03.npy 1437 | heycat_5_02.npy 1438 | ariel_4_06.npy 1439 | abjones_3_01.npy 1440 | geniusturtle_4_05.npy 1441 | abjones_4_08.npy 1442 | geniusturtle_4_11.npy 1443 | amy_10_05.npy 1444 | bug_5_02.npy 1445 | leon_4_03.npy 1446 | jmzen_3_03.npy 1447 | bobon_4_06.npy 1448 | khair_6_02.npy 1449 | davidson_3_14.npy 1450 | leon_6_06.npy 1451 | jmzen_1_12.npy 1452 | leon_8_03.npy 1453 | khair_4_07.npy 1454 | fdps_4_06.npy 1455 | yifen_2_14.npy 1456 | yifen_5_09.npy 1457 | davidson_1_05.npy 1458 | titon_3_06.npy 1459 | geniusturtle_8_05.npy 1460 | geniusturtle_1_09.npy 1461 | abjones_1_04.npy 1462 | amy_15_09.npy 1463 | Kenshin_2_11.npy 1464 | Ani_4_06.npy 1465 | Kenshin_2_05.npy 1466 | stool_5_04.npy 1467 | annar_5_07.npy 1468 | annar_5_06.npy 1469 | stool_5_05.npy 1470 | Kenshin_2_04.npy 1471 | Ani_4_07.npy 1472 | Kenshin_2_10.npy 1473 | amy_15_08.npy 1474 | amy_12_01.npy 1475 | geniusturtle_1_08.npy 1476 | geniusturtle_6_01.npy 1477 | geniusturtle_8_04.npy 1478 | titon_3_07.npy 1479 | yifen_2_01.npy 1480 | yifen_5_08.npy 1481 | davidson_1_04.npy 1482 | davidson_1_10.npy 1483 | khair_4_06.npy 1484 | yifen_2_15.npy 1485 | leon_8_02.npy 1486 | jmzen_1_07.npy 1487 | leon_6_07.npy 1488 | davidson_3_01.npy 1489 | khair_6_03.npy 1490 | jmzen_3_02.npy 1491 | bobon_4_07.npy 1492 | leon_4_02.npy 1493 | bug_5_03.npy 1494 | amy_10_04.npy 1495 | geniusturtle_4_10.npy 1496 | abjones_3_14.npy 1497 | geniusturtle_4_04.npy 1498 | heycat_5_03.npy 1499 | ariel_4_07.npy 1500 | titon_1_02.npy 1501 | amy_2_02.npy 1502 | AimeeNorwich_Child_MIX.npy 1503 | AlexanderRoss_GoodbyeBolero_MIX.npy 1504 | AlexanderRoss_VelvetCurtain_MIX.npy 1505 | AvaLuna_Waterduct_MIX.npy 1506 | BigTroubles_Phantom_MIX.npy 1507 | DreamersOfTheGhetto_HeavyLove_MIX.npy 1508 | FacesOnFilm_WaitingForGa_MIX.npy 1509 | FamilyBand_Again_MIX.npy 1510 | Handel_TornamiAVagheggiar_MIX.npy 1511 | HeladoNegro_MitadDelMundo_MIX.npy 1512 | HopAlong_SisterCities_MIX.npy 1513 | LizNelson_Coldwar_MIX.npy 1514 | LizNelson_ImComingHome_MIX.npy 1515 | LizNelson_Rainfall_MIX.npy 1516 | Meaxic_TakeAStep_MIX.npy 1517 | Meaxic_YouListen_MIX.npy 1518 | MusicDelta_80sRock_MIX.npy 1519 | MusicDelta_Beatles_MIX.npy 1520 | MusicDelta_Britpop_MIX.npy 1521 | MusicDelta_Country1_MIX.npy 1522 | MusicDelta_Country2_MIX.npy 1523 | MusicDelta_Disco_MIX.npy 1524 | MusicDelta_Grunge_MIX.npy 1525 | MusicDelta_Hendrix_MIX.npy 1526 | MusicDelta_Punk_MIX.npy 1527 | MusicDelta_Reggae_MIX.npy 1528 | MusicDelta_Rock_MIX.npy 1529 | MusicDelta_Rockabilly_MIX.npy 1530 | PurlingHiss_Lolita_MIX.npy 1531 | StevenClark_Bounty_MIX.npy 1532 | SweetLights_YouLetMeDown_MIX.npy 1533 | TheDistricts_Vermont_MIX.npy 1534 | TheScarletBrand_LesFleursDuMal_MIX.npy 1535 | TheSoSoGlos_Emergency_MIX.npy 1536 | Wolf_DieBekherte_MIX.npy -------------------------------------------------------------------------------- /data/train_data_small.txt: -------------------------------------------------------------------------------- 1 | titon_1_02.npy 2 | amy_2_02.npy 3 | TheScarletBrand_LesFleursDuMal_MIX.npy 4 | TheSoSoGlos_Emergency_MIX.npy 5 | Wolf_DieBekherte_MIX.npy 6 | -------------------------------------------------------------------------------- /data/train_data_small_extra.txt: -------------------------------------------------------------------------------- 1 | titon_1_02.npy 2 | titon_1_02_vocal_only.npy 3 | titon_1_02_instrumental_only.npy 4 | amy_2_02.npy 5 | amy_2_02_vocal_only.npy 6 | amy_2_02_instrumental_only.npy 7 | TheScarletBrand_LesFleursDuMal_MIX.npy 8 | TheSoSoGlos_Emergency_MIX.npy 9 | Wolf_DieBekherte_MIX.npy 10 | -------------------------------------------------------------------------------- /data_generator.py: -------------------------------------------------------------------------------- 1 | """ 2 | Ke Chen knutchen@ucsd.edu 3 | 4 | Tone-Octave Network - data_generator file 5 | 6 | This file contains the dataset and data generator classes 7 | 8 | """ 9 | import os 10 | import torch 11 | import numpy as np 12 | from tqdm import tqdm 13 | from torch.utils.data import Dataset, DataLoader 14 | 15 | from util import index2centf 16 | from feature_extraction import get_CenFreq 17 | 18 | def reorganize(x, octave_res): 19 | n_order = [] 20 | max_bin = x.shape[1] 21 | for i in range(octave_res): 22 | n_order += [j for j in range(i, max_bin, octave_res)] 23 | nx = [x[:,n_order[i],:] for i in range(x.shape[1])] 24 | nx = np.array(nx) 25 | nx = nx.transpose((1,0,2)) 26 | return nx 27 | 28 | 29 | class TONetTrainDataset(Dataset): 30 | def __init__(self, data_list, config): 31 | self.config = config 32 | # self.cfp_dir = os.path.join(config.data_path,config.cfp_dir) 33 | # self.f0_dir = os.path.join(config.data_path,"f0ref") 34 | self.cfp_dir = "/home/ken/Downloads/cfp_saved/" 35 | self.f0_dir = "/home/ken/Downloads/labels_and_waveform/" 36 | self.data_list = data_list 37 | self.cent_f = np.array(get_CenFreq(config.startfreq, config.stopfreq, config.octave_res)) 38 | # init data array 39 | self.data_cfp = [] 40 | self.data_gd = [] 41 | self.data_tcfp = [] 42 | seg_frame = config.seg_frame 43 | shift_frame = config.shift_frame 44 | print("Data List:", data_list) 45 | with open(data_list, "r") as f: 46 | data_txt = f.readlines() 47 | data_txt = [d.split(".")[0] for d in data_txt] 48 | # data_txt = data_txt[:100] 49 | print("Song Size:", len(data_txt)) 50 | # process cfp 51 | for i, filename in enumerate(tqdm(data_txt)): 52 | # file set 53 | cfp_file = os.path.join(self.cfp_dir, filename + ".npy") 54 | ref_file = os.path.join(self.f0_dir, filename + ".txt") 55 | # get raw cfp and freq 56 | temp_cfp = np.load(cfp_file, allow_pickle = True) 57 | # temp_cfp[0, :, :] = temp_cfp[1, :, :] * temp_cfp[2, :, :] 58 | temp_freq = np.loadtxt(ref_file) 59 | temp_freq = temp_freq[:,1] 60 | # check length 61 | if temp_freq.shape[0] > temp_cfp.shape[2]: 62 | temp_freq = temp_freq[:temp_cfp.shape[2]] 63 | else: 64 | temp_cfp = temp_cfp[:,:,:temp_freq.shape[0]] 65 | # build data 66 | for j in range(0, temp_cfp.shape[2], shift_frame): 67 | bgnt = j 68 | endt = j + seg_frame 69 | # temp_x = temp_cfp[:, :, bgnt:endt] 70 | temp_gd = index2centf(temp_freq[bgnt:endt], self.cent_f) 71 | 72 | # left and right pad temp_x to counter shrinking 73 | # we hope that bgnt - network_time_shrink_size >= 0 and endt + network_time_shrink_size <= temp_cfp.shape[2] 74 | from config import network_time_shrink_size 75 | temp_x = temp_cfp[:, :, max(0, bgnt - network_time_shrink_size):min(endt + network_time_shrink_size, temp_cfp.shape[2])] 76 | 77 | # print(temp_x.shape[2]) 78 | 79 | if bgnt - network_time_shrink_size < 0: 80 | left_padding_size = abs(bgnt - network_time_shrink_size) 81 | temp_x = np.concatenate([np.zeros((temp_cfp.shape[0], temp_cfp.shape[1], left_padding_size)), temp_x], axis = 2) 82 | 83 | # print(temp_x.shape[2]) 84 | if endt + network_time_shrink_size > temp_cfp.shape[2]: 85 | # in this temp_gds will have everything at the right end 86 | if endt >= temp_cfp.shape[2]: 87 | right_padding_size = network_time_shrink_size 88 | else: 89 | right_padding_size = endt + network_time_shrink_size - temp_cfp.shape[2] 90 | 91 | temp_x = np.concatenate([temp_x, np.zeros((temp_cfp.shape[0], temp_cfp.shape[1], right_padding_size))], axis = 2) 92 | # print(right_padding_size, endt, temp_cfp.shape[2]) 93 | 94 | # print(temp_x.shape[2], len(temp_gd), 2*network_time_shrink_size) 95 | 96 | 97 | if temp_x.shape[2] < seg_frame + 2*network_time_shrink_size: 98 | rl = temp_x.shape[2] 99 | # pad_x = np.zeros((temp_x.shape[0], temp_x.shape[1], seg_frame)) 100 | pad_x = np.zeros((temp_x.shape[0], temp_x.shape[1], seg_frame + 2*network_time_shrink_size)) 101 | pad_gd = np.zeros(seg_frame) 102 | # pad_gd[:rl] = temp_gd 103 | pad_gd[:rl - 2*network_time_shrink_size] = temp_gd 104 | pad_x[:,:, :rl] = temp_x 105 | temp_x = pad_x 106 | temp_gd = pad_gd 107 | 108 | assert temp_x.shape[2] - len(temp_gd) == 2*network_time_shrink_size 109 | 110 | 111 | temp_tx = reorganize(temp_x[:], config.octave_res) 112 | # self.data_tcfp.append(temp_tx) 113 | # to save memory 114 | self.data_tcfp = list(range(50000)) 115 | self.data_cfp.append(temp_x) 116 | 117 | 118 | # print(temp_gd.shape, temp_freq[bgnt:endt].shape) 119 | self.data_gd.append(temp_gd) 120 | self.data_cfp = np.array(self.data_cfp) 121 | self.data_tcfp = np.array(self.data_tcfp) 122 | # no need for tcfp for now (to save space) 123 | self.data_gd = np.array(self.data_gd) 124 | print("Total Datasize:", self.data_cfp.shape) 125 | 126 | def __len__(self): 127 | return len(self.data_cfp) 128 | 129 | def __getitem__(self,index): 130 | temp_dict = { 131 | "cfp": self.data_cfp[index].astype(np.float32), 132 | "tcfp": self.data_tcfp[index].astype(np.float32), 133 | "gd": self.data_gd[index] 134 | } 135 | # print("Haaa", temp_dict["gd"].shape) 136 | return temp_dict 137 | 138 | 139 | class TONetTestDataset(Dataset): 140 | def __init__(self, data_list, config): 141 | self.config = config 142 | # self.cfp_dir = os.path.join(config.data_path,config.cfp_dir) 143 | # self.f0_dir = os.path.join(config.data_path,"f0ref") 144 | self.cfp_dir = "/home/ken/Downloads/cfp_saved/" 145 | self.f0_dir = "/home/ken/Downloads/labels_and_waveform/" 146 | 147 | self.data_list = data_list 148 | self.cent_f = np.array(get_CenFreq(config.startfreq, config.stopfreq, config.octave_res)) 149 | # init data array 150 | self.data_names = [] 151 | self.data_cfp = [] 152 | self.data_gd = [] 153 | self.data_len = [] 154 | self.data_tcfp = [] 155 | seg_frame = config.seg_frame 156 | shift_frame = config.shift_frame 157 | print("Data List:", data_list) 158 | with open(data_list, "r") as f: 159 | data_txt = f.readlines() 160 | data_txt = [d.split(".")[0] for d in data_txt] 161 | print("Song Size:", len(data_txt)) 162 | # process cfp 163 | for i, filename in enumerate(tqdm(data_txt)): 164 | 165 | group_cfp = [] 166 | group_gd = [] 167 | group_tcfp = [] 168 | # file set 169 | cfp_file = os.path.join(self.cfp_dir, filename + ".npy") 170 | ref_file = os.path.join(self.f0_dir, filename + ".txt") 171 | 172 | 173 | # get raw cfp and freq 174 | temp_cfp = np.load(cfp_file, allow_pickle = True) 175 | # temp_cfp[0, :, :] = temp_cfp[1, :, :] * temp_cfp[2, :, :] 176 | temp_freq = np.loadtxt(ref_file) 177 | temp_freq = temp_freq[:,1] 178 | self.data_len.append(len(temp_freq)) 179 | # check length 180 | if temp_freq.shape[0] > temp_cfp.shape[2]: 181 | temp_freq = temp_freq[:temp_cfp.shape[2]] 182 | else: 183 | temp_cfp = temp_cfp[:,:,:temp_freq.shape[0]] 184 | # build data 185 | for j in range(0, temp_cfp.shape[2], shift_frame): 186 | bgnt = j 187 | endt = j + seg_frame 188 | # temp_x = temp_cfp[:, :, bgnt:endt] 189 | temp_gd = temp_freq[bgnt:endt] 190 | 191 | 192 | # left and right pad temp_x to counter shrinking 193 | # we hope that bgnt - network_time_shrink_size >= 0 and endt + network_time_shrink_size <= temp_cfp.shape[2] 194 | from config import network_time_shrink_size 195 | temp_x = temp_cfp[:, :, max(0, bgnt - network_time_shrink_size):min(endt + network_time_shrink_size, temp_cfp.shape[2])] 196 | 197 | if bgnt - network_time_shrink_size < 0: 198 | left_padding_size = abs(bgnt - network_time_shrink_size) 199 | temp_x = np.concatenate([np.zeros((temp_cfp.shape[0], temp_cfp.shape[1], left_padding_size)), temp_x], axis = 2) 200 | 201 | if endt + network_time_shrink_size > temp_cfp.shape[2]: 202 | # in this temp_gds will have everything at the right end 203 | if endt >= temp_cfp.shape[2]: 204 | right_padding_size = network_time_shrink_size 205 | else: 206 | right_padding_size = endt + network_time_shrink_size - temp_cfp.shape[2] 207 | 208 | temp_x = np.concatenate([temp_x, np.zeros((temp_cfp.shape[0], temp_cfp.shape[1], right_padding_size))], axis = 2) 209 | 210 | # print(temp_x.shape[2], len(temp_gd), 2*network_time_shrink_size) 211 | 212 | # not enough only when we are already at the right end, hence padding gds by 0, it will correspond to white padding which is also 0 213 | if temp_x.shape[2] < seg_frame + 2*network_time_shrink_size: 214 | rl = temp_x.shape[2] 215 | # pad_x = np.zeros((temp_x.shape[0], temp_x.shape[1], seg_frame)) 216 | pad_x = np.zeros((temp_x.shape[0], temp_x.shape[1], seg_frame + 2*network_time_shrink_size)) 217 | pad_gd = np.zeros(seg_frame) 218 | # pad_gd[:rl] = temp_gd 219 | pad_gd[:rl - 2*network_time_shrink_size] = temp_gd 220 | pad_x[:,:, :rl] = temp_x 221 | temp_x = pad_x 222 | temp_gd = pad_gd 223 | 224 | assert temp_x.shape[2] - len(temp_gd) == 2*network_time_shrink_size 225 | 226 | 227 | temp_tx = reorganize(temp_x[:], config.octave_res) 228 | group_tcfp.append(temp_tx) 229 | group_cfp.append(temp_x) 230 | group_gd.append(temp_gd) 231 | group_tcfp = np.array(group_tcfp) 232 | group_cfp = np.array(group_cfp) 233 | group_gd = np.array(group_gd) 234 | 235 | self.data_names.append(ref_file) 236 | self.data_tcfp.append(group_tcfp) 237 | self.data_cfp.append(group_cfp) 238 | self.data_gd.append(group_gd) 239 | 240 | def __len__(self): 241 | return len(self.data_cfp) 242 | 243 | def __getitem__(self,index): 244 | temp_dict = { 245 | "cfp": self.data_cfp[index].astype(np.float32), 246 | "tcfp": self.data_tcfp[index].astype(np.float32), 247 | "gd": self.data_gd[index], 248 | "length": self.data_len[index], 249 | "name": self.data_names[index] 250 | } 251 | return temp_dict 252 | -------------------------------------------------------------------------------- /feature_extraction.py: -------------------------------------------------------------------------------- 1 | import soundfile as sf 2 | import numpy as np 3 | import os 4 | import time 5 | 6 | np.seterr(divide='ignore', invalid='ignore') 7 | import scipy 8 | import scipy.signal 9 | import scipy.fftpack 10 | import pandas as pd 11 | import config 12 | 13 | def STFT(x, fr, fs, Hop, h): 14 | t = np.arange(0, np.ceil(len(x) / float(Hop)) * Hop, Hop) 15 | N = int(fs / float(fr)) 16 | window_size = len(h) 17 | f = fs * np.linspace(0, 0.5, int(np.round(N / 2)), endpoint=True) 18 | Lh = int(np.floor(float(window_size - 1) / 2)) 19 | tfr = np.zeros((int(N), len(t)), dtype=np.float32) 20 | 21 | for icol in range(0, len(t)): 22 | ti = int(t[icol]) 23 | tau = np.arange(int(-min([round(N / 2.0) - 1, Lh, ti - 1])), \ 24 | int(min([round(N / 2.0) - 1, Lh, len(x) - ti]))) 25 | indices = np.mod(N + tau, N) + 1 26 | tfr[indices - 1, icol] = x[ti + tau - 1] * h[Lh + tau - 1] \ 27 | / np.linalg.norm(h[Lh + tau - 1]) 28 | start = time.time() 29 | tfr = abs(scipy.fftpack.fft(tfr, n=N, axis=0)) 30 | print('fft time:', time.time() - start) 31 | return tfr, f, t, N 32 | 33 | 34 | def nonlinear_func(X, g, cutoff): 35 | cutoff = int(cutoff) 36 | if g != 0: 37 | X[X < 0] = 0 38 | X[:cutoff, :] = 0 39 | X[-cutoff:, :] = 0 40 | X = np.power(X, g) 41 | else: 42 | X = np.log(X) 43 | X[:cutoff, :] = 0 44 | X[-cutoff:, :] = 0 45 | return X 46 | 47 | 48 | def Freq2LogFreqMapping(tfr, f, fr, fc, tc, NumPerOct): 49 | StartFreq = fc 50 | StopFreq = 1 / tc 51 | Nest = int(np.ceil(np.log2(StopFreq / StartFreq)) * NumPerOct) 52 | central_freq = [] 53 | 54 | for i in range(0, Nest): 55 | CenFreq = StartFreq * pow(2, float(i) / NumPerOct) 56 | if CenFreq < StopFreq: 57 | central_freq.append(CenFreq) 58 | else: 59 | break 60 | 61 | ''' 62 | for i in range(len(central_freq)): 63 | print(i, central_freq[i]) 64 | # print(len(central_freq)) 65 | sys.exit() 66 | ''' 67 | 68 | Nest = len(central_freq) 69 | freq_band_transformation = np.zeros((Nest - 1, len(f)), dtype=np.float32) 70 | import bisect 71 | for i in range(1, Nest - 1): 72 | l = int(round(central_freq[i - 1] / fr)) 73 | r = int(round(central_freq[i + 1] / fr) + 1) 74 | # interval (l,r) (i.e. not including l, r) 75 | # l = bisect.bisect_right(X, central_freq[i-1]) 76 | # r = bisect.bisect_left(X, central_freq[i+1]) 77 | # rounding1 78 | if l >= r - 1: 79 | freq_band_transformation[i, l] = 1 80 | else: 81 | for j in range(l, r): 82 | if f[j] > central_freq[i - 1] and f[j] <= entral_freq[i]: 83 | freq_band_transformation[i, j] = (f[j] - central_freq[i - 1]) / ( 84 | central_freq[i] - central_freq[i - 1]) 85 | elif f[j] > central_freq[i] and f[j] < central_freq[i + 1]: 86 | freq_band_transformation[i, j] = (central_freq[i + 1] - f[j]) / ( 87 | central_freq[i + 1] - central_freq[i]) 88 | tfrL = np.dot(freq_band_transformation, tfr) 89 | 90 | 91 | # print(len(tfrL), len(central_freq)) 92 | # sys.exit() 93 | return tfrL, central_freq 94 | 95 | 96 | def Quef2LogFreqMapping(ceps, q, fs, fc, tc, NumPerOct): 97 | StartFreq = fc 98 | StopFreq = 1 / tc 99 | Nest = int(np.ceil(np.log2(StopFreq / StartFreq)) * NumPerOct) 100 | central_freq = [] 101 | 102 | for i in range(0, Nest): 103 | CenFreq = StartFreq * pow(2, float(i) / NumPerOct) 104 | if CenFreq < StopFreq: 105 | central_freq.append(CenFreq) 106 | else: 107 | break 108 | f = 1 / q 109 | 110 | # this is basically remapping so that the lenght of cepstrum fit the length of 360 (for spectrum itself, this transform is basicaly x-log) 111 | # q: from 0 all the way to f_s/f_c (which is the smallest cutoff freq, and therefore the longest "period") 112 | 113 | # central_freq, the freq, ranges from [f_c, 1/t_c] 114 | # hence already reversed here 115 | 116 | 117 | Nest = len(central_freq) 118 | freq_band_transformation = np.zeros((Nest - 1, len(f)), dtype=np.float32) 119 | for i in range(1, Nest - 1): 120 | for j in range(int(round(fs / central_freq[i + 1])), int(round(fs / central_freq[i - 1]) + 1)): 121 | if f[j] > central_freq[i - 1] and f[j] < central_freq[i]: 122 | freq_band_transformation[i, j] = (f[j] - central_freq[i - 1]) / (central_freq[i] - central_freq[i - 1]) 123 | elif f[j] > central_freq[i] and f[j] < central_freq[i + 1]: 124 | freq_band_transformation[i, j] = (central_freq[i + 1] - f[j]) / (central_freq[i + 1] - central_freq[i]) 125 | 126 | tfrL = np.dot(freq_band_transformation, ceps) 127 | # import sys 128 | # print(np.nonzero(freq_band_transformation[:, 200:210])) 129 | # sys.exit() 130 | return tfrL, central_freq 131 | 132 | 133 | def CFP_filterbank(x, fr, fs, Hop, h, fc, tc, g, NumPerOctave): 134 | NumofLayer = np.size(g) 135 | N = int(fs / float(fr)) 136 | [tfr, f, t, N] = STFT(x, fr, fs, Hop, h) 137 | tfr = np.power(abs(tfr), g[0]) 138 | tfr0 = tfr # original STFT 139 | ceps = np.zeros(tfr.shape) 140 | 141 | from config import include_adjusted_exp 142 | 143 | if include_adjusted_exp: 144 | exp_rate = np.exp(0.0006*f) 145 | else: 146 | exp_rate = np.exp(0.0000*f) 147 | z_trans = np.concatenate([exp_rate, np.flip(exp_rate)], axis = 0) 148 | 149 | # print(f[:10], f[-10:]) 150 | # print(exp_rate[:10], exp_rate[-10:]) 151 | 152 | # print(z_trans.shape) 153 | # sys.exit() 154 | 155 | if NumofLayer >= 2: 156 | for gc in range(1, NumofLayer): 157 | if np.remainder(gc, 2) == 1: 158 | tc_idx = round(fs * tc) 159 | # ceps = np.real(np.fft.fft(tfr, axis=0)) / np.sqrt(N) 160 | ceps = np.real(np.fft.fft(tfr*np.expand_dims(z_trans, axis = 1), axis=0)) / np.sqrt(N) 161 | # ceps_2 = np.real(np.fft.fft(tfr, axis=0)) / np.sqrt(N) 162 | 163 | ceps = nonlinear_func(ceps, g[gc], tc_idx) 164 | # ceps_2 = nonlinear_func(ceps_2, g[gc], tc_idx) 165 | else: 166 | fc_idx = round(fc / fr) 167 | tfr = np.real(np.fft.fft(ceps, axis=0)) / np.sqrt(N) 168 | tfr = nonlinear_func(tfr, g[gc], fc_idx) 169 | 170 | tfr0 = tfr0[:int(round(N / 2)), :] 171 | tfr = tfr[:int(round(N / 2)), :] 172 | ceps = ceps[:int(round(N / 2)), :] 173 | 174 | HighFreqIdx = int(round((1 / tc) / fr) + 1) 175 | f = f[:HighFreqIdx] 176 | tfr0 = tfr0[:HighFreqIdx, :] 177 | tfr = tfr[:HighFreqIdx, :] 178 | HighQuefIdx = int(round(fs / fc) + 1) 179 | 180 | # print(f[:10], f[-10:]) 181 | # print(exp_rate[:HighFreqIdx][:10], exp_rate[:HighFreqIdx][-10:]) 182 | # sys.exit() 183 | 184 | q = np.arange(HighQuefIdx) / float(fs) 185 | # print("q len", len(q), fs, fc) 186 | # sys.exit() 187 | 188 | ceps = ceps[:HighQuefIdx, :] 189 | # ceps_2 = ceps_2[:HighQuefIdx, :] 190 | 191 | tfrL0, central_frequencies = Freq2LogFreqMapping(tfr0, f, fr, fc, tc, NumPerOctave) 192 | tfrLF, central_frequencies = Freq2LogFreqMapping(tfr, f, fr, fc, tc, NumPerOctave) 193 | tfrLQ, central_frequencies = Quef2LogFreqMapping(ceps, q, fs, fc, tc, NumPerOctave) 194 | 195 | # from dummy_utils import plot_multi_sequences 196 | # time_index = 200 197 | # print(np.array(central_frequencies).shape, tfrL0.shape) 198 | # sys.exit() 199 | # plot_multi_sequences(central_frequencies[:-1], [tfrL0[:, time_index], tfrLF[:, time_index], tfrLQ[:, time_index]], ["spec", "GCoS", "GC"]) 200 | # plot_multi_sequences(f, [(tfr0**(1/g[0]))[:, time_index], (tfr**(1/g[2]))[:, time_index], (ceps**(1/g[1]))[:, time_index], (tfr0**(1/g[0])*np.expand_dims(np.exp(0.0015*f), axis = 1))[:, time_index]], ["spec", "GCoS", "GC", "spec2"]) 201 | 202 | # plot_multi_sequences(f, [tfr0[:, time_index], tfr[:, time_index], ceps[:, time_index], ceps_2[:, time_index], (tfr0*np.expand_dims(np.exp(0.00036*f), axis = 1))[:, time_index], ((tfr0**(1/g[0])*np.expand_dims(np.exp(0.0015*f), axis = 1))**g[0])[:, time_index]], ["spec", "GCoS", "GC", "GC2", "spec2", "spec3"]) 203 | 204 | # from dummy_utils import plot_sequence 205 | # plot_sequence(list(range(len(z_trans))), z_trans) 206 | 207 | # sys.exit() 208 | 209 | 210 | return tfrL0, tfrLF, tfrLQ, f, q, t, central_frequencies 211 | 212 | 213 | def load_audio(filepath, sr=None, mono=True, dtype='float32'): 214 | if '.mp3' in filepath: 215 | from pydub import AudioSegment 216 | import tempfile 217 | import os 218 | mp3 = AudioSegment.from_mp3(filepath) 219 | _, path = tempfile.mkstemp() 220 | mp3.export(path, format="wav") 221 | del mp3 222 | x, fs = sf.read(path) 223 | os.remove(path) 224 | else: 225 | x, fs = sf.read(filepath) 226 | 227 | if mono == True and len(x.shape) > 1: 228 | x = np.mean(x, axis=1) 229 | elif mono == "Left" and len(x.shape) > 1: 230 | x = x[:, 0] 231 | elif mono == "Right" and len(x.shape) > 1: 232 | x = x[:, 1] 233 | 234 | 235 | if sr: 236 | x = scipy.signal.resample_poly(x, sr, fs) 237 | fs = sr 238 | x = x.astype(dtype) 239 | 240 | 241 | # from util import play_sequence 242 | # play_sequence(x, fs) 243 | 244 | 245 | return x, fs 246 | 247 | 248 | def feature_extraction(x, fs, Hop=512, Window=2049, StartFreq=80.0, StopFreq=1000.0, NumPerOct=48): 249 | fr = 2.0 # frequency resolution 250 | h = scipy.signal.blackmanharris(Window) # window size 251 | g = np.array([0.24, 0.6, 1]) # gamma value 252 | 253 | tfrL0, tfrLF, tfrLQ, f, q, t, CenFreq = CFP_filterbank(x, fr, fs, Hop, h, StartFreq, 1 / StopFreq, g, NumPerOct) 254 | Z = tfrLF * tfrLQ 255 | time = t / fs 256 | return Z, time, CenFreq, tfrL0, tfrLF, tfrLQ 257 | 258 | 259 | def midi2hz(midi): 260 | return 2 ** ((midi - 69) / 12.0) * 440 261 | 262 | 263 | def hz2midi(hz): 264 | return 69 + 12 * np.log2(hz / 440.0) 265 | 266 | 267 | def get_CenFreq(StartFreq=80, StopFreq=1000, NumPerOct=48): 268 | Nest = int(np.ceil(np.log2(StopFreq / StartFreq)) * NumPerOct) 269 | central_freq = [] 270 | for i in range(0, Nest): 271 | CenFreq = StartFreq * pow(2, float(i) / NumPerOct) 272 | if CenFreq < StopFreq: 273 | central_freq.append(CenFreq) 274 | else: 275 | break 276 | return central_freq 277 | 278 | 279 | def get_time(fs, Hop, end): 280 | return np.arange(Hop / fs, end, Hop / fs) 281 | 282 | 283 | def lognorm(x): 284 | return np.log(1 + x) 285 | 286 | 287 | def norm(x): 288 | return (x - np.min(x)) / (np.max(x) - np.min(x)) 289 | 290 | from config import fs, hop 291 | fs = int(fs) 292 | hop = int(hop) 293 | 294 | def cfp_process(fpath, ypath=None, csv=False, sr=None, hop=hop, model_type='vocal', mono=True): 295 | print('CFP process in ' + str(fpath) + ' ... (It may take some times)') 296 | y, sr = load_audio(fpath, sr=sr, mono=mono) 297 | if 'vocal' in model_type: 298 | # 1250 299 | # 32 2050 300 | # Z, time, CenFreq, tfrL0, tfrLF, tfrLQ = feature_extraction(y, sr, Hop=hop, Window=768, StartFreq=32, StopFreq=2050, NumPerOct=60) 301 | Z, time, CenFreq, tfrL0, tfrLF, tfrLQ = feature_extraction(y, sr, Hop=hop, Window=int(768*fs/8000), StartFreq=32, StopFreq=2050, NumPerOct=60) 302 | if 'melody' in model_type: 303 | # Z, time, CenFreq, tfrL0, tfrLF, tfrLQ = feature_extraction(y, sr, Hop=hop, Window=768, StartFreq=20.0, StopFreq=2048.0, NumPerOct=60) 304 | raise NotImplementedError 305 | 306 | tfrL0 = norm(lognorm(tfrL0))[np.newaxis, :, :] 307 | tfrLF = norm(lognorm(tfrLF))[np.newaxis, :, :] 308 | tfrLQ = norm(lognorm(tfrLQ))[np.newaxis, :, :] 309 | W = np.concatenate((tfrL0, tfrLF, tfrLQ), axis=0) 310 | print('Done!') 311 | print('Data shape: ' + str(W.shape)) 312 | if ypath: 313 | if csv: 314 | ycsv = pd.read_csv(ypath, names=["time", "freq"]) 315 | gt0 = ycsv['time'].values 316 | gt0 = gt0[1:, np.newaxis] 317 | 318 | gt1 = ycsv['freq'].values 319 | gt1 = gt1[1:, np.newaxis] 320 | gt = np.concatenate((gt0, gt1), axis=1) 321 | else: 322 | gt = np.loadtxt(ypath) 323 | return W, gt, CenFreq, time 324 | else: 325 | return W, CenFreq, time 326 | 327 | 328 | if __name__ == '__main__': 329 | datasets = [config.train_file] + config.test_file 330 | data_dir = "/home/ken/Downloads/labels_and_waveform/" 331 | cfp_save_dir = "/home/ken/Downloads/cfp_saved/" 332 | 333 | print(datasets) 334 | 335 | 336 | # load VOICED version 337 | # load INSTRUMENTAL version 338 | 339 | 340 | for dataset_index, item in enumerate(datasets): 341 | txtpath = item 342 | f = open(txtpath) 343 | filelists = f.readlines() 344 | 345 | for i, file in enumerate(filelists): 346 | 347 | print(i) 348 | filename = file.rstrip('\n') 349 | 350 | 351 | if "_vocal_only" in filename: 352 | wavpath = data_dir + filename.replace('_vocal_only.npy', '.wav') 353 | mono = "Right" 354 | original_f0path = data_dir + filename.replace('_vocal_only.npy', '.txt') 355 | 356 | elif "_instrumental_only" in filename: 357 | wavpath = data_dir + filename.replace('_instrumental_only.npy', '.wav') 358 | original_f0path = data_dir + filename.replace('_instrumental_only.npy', '.txt') 359 | mono = "Left" 360 | else: 361 | wavpath = data_dir + filename.replace('.npy', '.wav') 362 | mono = True 363 | 364 | 365 | import shutil 366 | f0path = data_dir + filename.replace('.npy', '.txt') 367 | if "_vocal_only" in filename and not os.path.isfile(f0path): 368 | shutil.copyfile(original_f0path, f0path) 369 | elif "_instrumental_only" in filename and not os.path.isfile(f0path): 370 | ref_temp = np.loadtxt(original_f0path) 371 | ref_time = ref_temp[:, 0] 372 | empty_ref_freq = np.zeros(len(ref_time)) 373 | np.savetxt(f0path, np.c_[ref_time, empty_ref_freq], fmt = "%.3f") 374 | 375 | 376 | 377 | magfile = cfp_save_dir + filename 378 | print(magfile) 379 | 380 | 381 | if not os.path.exists(f0path): 382 | raise Exception("Not f0 file!! for %s" %(f0path)) 383 | 384 | 385 | 386 | successfully_loaded = False 387 | if os.path.exists(magfile): 388 | try: 389 | np.load(magfile) 390 | print("Exist:", filename) 391 | successfully_loaded = True 392 | except: 393 | pass 394 | 395 | if successfully_loaded == False: 396 | W, CenFreq, _ = cfp_process(wavpath, sr=fs, mono=mono) 397 | 398 | np.save(magfile, W) 399 | -------------------------------------------------------------------------------- /ftanet.py: -------------------------------------------------------------------------------- 1 | # FTANet 2 | import torch 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | 6 | 7 | 8 | class SF_Module(nn.Module): 9 | def __init__(self, input_num, n_channel, reduction, limitation): 10 | super(SF_Module, self).__init__() 11 | # Fuse Layer 12 | self.f_avg = nn.AdaptiveAvgPool2d((1,1)) 13 | self.f_bn = nn.BatchNorm1d(n_channel) 14 | self.f_linear = nn.Sequential( 15 | nn.Linear(n_channel, max(n_channel // reduction, limitation)), 16 | nn.SELU() 17 | ) 18 | # Select Layer 19 | self.s_linear = nn.ModuleList([ 20 | nn.Linear(max(n_channel // reduction, limitation), n_channel) for _ in range(input_num) 21 | ]) 22 | 23 | 24 | def forward(self, x): 25 | # x [3, bs, c, h, w] 26 | fused = None 27 | for x_s in x: 28 | if fused is None: 29 | fused = x_s 30 | else: 31 | fused = fused + x_s 32 | # [bs, c, h, w] 33 | fused = self.f_avg(fused) # bs,c,1,1 34 | fused = fused.view(fused.shape[0], fused.shape[1]) 35 | fused = self.f_bn(fused) 36 | fused = self.f_linear(fused) 37 | 38 | masks = [] 39 | for i in range(len(x)): 40 | masks.append(self.s_linear[i](fused)) 41 | # [3, bs, c] 42 | mask_stack = torch.stack(masks, dim = -1) # bs, c, 3 43 | mask_stack = nn.Softmax(dim = -2)(mask_stack) 44 | 45 | selected = None 46 | for i, x_s in enumerate(x): 47 | mask = mask_stack[:, :, i][:,:, None, None] # bs,c,1,1 48 | x_s = x_s * mask 49 | if selected is None: 50 | selected = x_s 51 | else: 52 | selected = selected + x_s 53 | # [bs, c, h,w] 54 | return selected 55 | 56 | 57 | 58 | class FTA_Module(nn.Module): 59 | def __init__(self, shape, kt, kf): 60 | super(FTA_Module, self).__init__() 61 | self.bn = nn.BatchNorm2d(shape[2]) 62 | self.r_cn = nn.Sequential( 63 | nn.Conv2d(shape[2], shape[3], (1,1)), 64 | nn.ReLU() 65 | ) 66 | self.ta_cn1 = nn.Sequential( 67 | nn.Conv1d(shape[2], shape[3], kt, padding=(kt - 1) // 2), 68 | nn.SELU() 69 | ) 70 | self.ta_cn2 = nn.Sequential( 71 | nn.Conv1d(shape[3], shape[3], kt, padding=(kt - 1) // 2), 72 | nn.SELU() 73 | ) 74 | self.ta_cn3 = nn.Sequential( 75 | nn.Conv2d(shape[2], shape[3], 3, padding=1), 76 | nn.SELU() 77 | ) 78 | self.ta_cn4 = nn.Sequential( 79 | nn.Conv2d(shape[3], shape[3], 5, padding=2), 80 | nn.SELU() 81 | ) 82 | 83 | self.fa_cn1 = nn.Sequential( 84 | nn.Conv1d(shape[2], shape[3], kf, padding=(kf - 1) // 2), 85 | nn.SELU() 86 | ) 87 | self.fa_cn2 = nn.Sequential( 88 | nn.Conv1d(shape[3], shape[3], kf, padding=(kf - 1) // 2), 89 | nn.SELU() 90 | ) 91 | self.fa_cn3 = nn.Sequential( 92 | nn.Conv2d(shape[2], shape[3], 3, padding=1), 93 | nn.SELU() 94 | ) 95 | self.fa_cn4 = nn.Sequential( 96 | nn.Conv2d(shape[3], shape[3], 5, padding=2), 97 | nn.SELU() 98 | ) 99 | 100 | def forward(self, x): 101 | x = self.bn(x) 102 | x_r = self.r_cn(x) 103 | 104 | a_t = torch.mean(x, dim=-2) 105 | a_t = self.ta_cn1(a_t) 106 | a_t = self.ta_cn2(a_t) 107 | a_t = nn.Softmax(dim=-1)(a_t) 108 | a_t = a_t.unsqueeze(dim=-2) 109 | x_t = self.ta_cn3(x) 110 | x_t = self.ta_cn4(x_t) 111 | x_t = x_t * a_t 112 | 113 | a_f = torch.mean(x, dim=-1) 114 | a_f = self.fa_cn1(a_f) 115 | a_f = self.fa_cn2(a_f) 116 | a_f = nn.Softmax(dim=-1)(a_f) 117 | a_f = a_f.unsqueeze(dim=-1) 118 | x_f = self.fa_cn3(x) 119 | x_f = self.fa_cn4(x_f) 120 | x_f = x_f * a_f 121 | 122 | return x_r, x_t, x_f 123 | 124 | 125 | class FTAnet(nn.Module): 126 | def __init__(self, freq_bin = 360, time_segment = 128): 127 | super(FTAnet, self).__init__() 128 | self.bn_layer = nn.BatchNorm2d(3) 129 | # bm 130 | self.bm_layer = nn.Sequential( 131 | nn.Conv2d(3, 16, (4,1), stride=(4,1)), 132 | nn.SELU(), 133 | nn.Conv2d(16, 16, (3,1), stride=(3,1)), 134 | nn.SELU(), 135 | nn.Conv2d(16, 16, (6,1), stride=(6,1)), 136 | nn.SELU(), 137 | nn.Conv2d(16, 1, (5,1), stride=(5,1)), 138 | nn.SELU() 139 | ) 140 | 141 | # fta_module 142 | self.fta_1 = FTA_Module((freq_bin, time_segment, 3, 32), 3, 3) 143 | self.fta_2 = FTA_Module((freq_bin // 2, time_segment // 2, 32, 64), 3, 3) 144 | self.fta_3 = FTA_Module((freq_bin // 4, time_segment // 4, 64, 128), 3, 3) 145 | self.fta_4 = FTA_Module((freq_bin // 4, time_segment // 4, 128, 128), 3, 3) 146 | self.fta_5 = FTA_Module((freq_bin // 2, time_segment // 2, 128, 64), 3, 3) 147 | self.fta_6 = FTA_Module((freq_bin, time_segment, 64, 32), 3, 3) 148 | self.fta_7 = FTA_Module((freq_bin, time_segment, 32, 1), 3, 3) 149 | 150 | # sf_module 151 | self.sf_1 = SF_Module(3, 32, 4, 4) 152 | self.sf_2 = SF_Module(3, 64, 4, 4) 153 | self.sf_3 = SF_Module(3, 128, 4, 4) 154 | self.sf_4 = SF_Module(3, 128, 4, 4) 155 | self.sf_5 = SF_Module(3, 64, 4, 4) 156 | self.sf_6 = SF_Module(3, 32, 4, 4) 157 | self.sf_7 = SF_Module(3, 1, 4, 4) 158 | 159 | # maxpool 160 | self.mp_1 = nn.MaxPool2d((2,2), (2,2)) 161 | self.mp_2 = nn.MaxPool2d((2,2), (2,2)) 162 | self.up_1 = nn.Upsample(scale_factor=2) 163 | self.up_2 = nn.Upsample(scale_factor=2) 164 | 165 | def forward(self, x): 166 | x = self.bn_layer(x) 167 | bm = x 168 | bm = self.bm_layer(bm) 169 | 170 | x_r, x_t, x_f = self.fta_1(x) 171 | x = self.sf_1([x_r, x_t, x_f]) 172 | x = self.mp_1(x) 173 | 174 | x_r, x_t, x_f = self.fta_2(x) 175 | x = self.sf_2([x_r, x_t, x_f]) 176 | x = self.mp_2(x) 177 | 178 | x_r, x_t, x_f = self.fta_3(x) 179 | x = self.sf_3([x_r, x_t, x_f]) 180 | 181 | x_r, x_t, x_f = self.fta_4(x) 182 | x = self.sf_4([x_r, x_t, x_f]) 183 | 184 | x = self.up_1(x) 185 | x_r, x_t, x_f = self.fta_5(x) 186 | x = self.sf_5([x_r, x_t, x_f]) 187 | x = self.up_2(x) 188 | x_r, x_t, x_f = self.fta_6(x) 189 | x = self.sf_6([x_r, x_t, x_f]) 190 | 191 | x_r, x_t, x_f = self.fta_7(x) 192 | x = self.sf_7([x_r, x_t, x_f]) 193 | 194 | output_pre = torch.cat([bm, x], dim = 2) 195 | output = nn.Softmax(dim=-2)(output_pre) 196 | 197 | return output, output_pre -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | """ 2 | Ke Chen knutchen@ucsd.edu 3 | 4 | Tone-Octave Network - main file 5 | 6 | This file contains the main script 7 | 8 | """ 9 | import os 10 | import random 11 | import numpy as np 12 | import argparse 13 | 14 | import torch 15 | from torch import nn 16 | import torch.nn.functional as F 17 | from torch.utils.data import DataLoader 18 | import pytorch_lightning as pl 19 | 20 | import config 21 | from data_generator import TONetTrainDataset, TONetTestDataset 22 | from msnet import MSnet 23 | from tonet import TONet 24 | from multi_dr import MLDRnet 25 | from ftanet import FTAnet 26 | from mcdnn import MCDNN 27 | 28 | from util import tonpy_fn 29 | 30 | 31 | def train(): 32 | train_dataset = TONetTrainDataset( 33 | data_list = config.train_file, 34 | config = config 35 | ) 36 | train_dataloader = DataLoader( 37 | dataset = train_dataset, 38 | shuffle = True, 39 | num_workers = config.n_workers, 40 | batch_size = config.batch_size, 41 | drop_last=True 42 | ) 43 | test_datasets = [ 44 | TONetTestDataset( 45 | data_list = d, 46 | config = config 47 | ) for d in config.test_file 48 | ] 49 | test_dataloaders = [ 50 | DataLoader( 51 | dataset = d, 52 | shuffle = False, 53 | batch_size = 1, 54 | collate_fn=tonpy_fn 55 | ) for d in test_datasets 56 | ] 57 | loss_func = nn.BCELoss() 58 | 59 | if config.model_type == "MCDNN": 60 | me_model = MCDNN() 61 | me_model_r = MCDNN() 62 | elif config.model_type == "MLDRNet": 63 | me_model = MLDRnet() 64 | me_model_r = MLDRnet() 65 | elif config.model_type == "FTANet": 66 | me_model = FTAnet(freq_bin = config.freq_bin, time_segment=config.seg_frame) 67 | me_model_r = FTAnet(freq_bin = config.freq_bin, time_segment=config.seg_frame) 68 | elif config.model_type == "MSNet": 69 | me_model = MSnet() 70 | me_model_r = MSnet() 71 | else: # AcousticModelCRnn8Dropout 72 | from piano_net import AcousticModelCRnn8Dropout 73 | me_model = AcousticModelCRnn8Dropout() 74 | me_model_r = AcousticModelCRnn8Dropout() 75 | 76 | 77 | if config.ablation_mode == "single" or config.ablation_mode == "spl" or config.ablation_mode == "spat": 78 | me_model_r = None 79 | model = TONet( 80 | l_model = me_model, 81 | r_model = me_model_r, 82 | config = config, 83 | loss_func = loss_func, 84 | mode = config.ablation_mode 85 | ) 86 | trainer = pl.Trainer( 87 | # deterministic = True, 88 | gpus = 1, 89 | # checkpoint_callback = False, 90 | max_epochs = config.max_epoch, 91 | auto_lr_find = True, 92 | sync_batchnorm=True, 93 | # check_val_every_n_epoch = 1, 94 | # val_check_interval = 0.25, 95 | num_sanity_val_steps=0 96 | ) 97 | trainer.fit(model, train_dataloader, test_dataloaders) 98 | 99 | 100 | def test(): 101 | test_datasets = [ 102 | TONetTestDataset( 103 | data_list = d, 104 | config = config 105 | ) for d in config.test_file 106 | ] 107 | test_dataloaders = [ 108 | DataLoader( 109 | dataset = d, 110 | shuffle = False, 111 | batch_size = 1, 112 | collate_fn=tonpy_fn 113 | ) for d in test_datasets 114 | ] 115 | loss_func = nn.BCELoss() 116 | 117 | if config.model_type == "MCDNN": 118 | me_model = MCDNN() 119 | me_model_r = MCDNN() 120 | elif config.model_type == "MLDRNet": 121 | me_model = MLDRnet() 122 | me_model_r = MLDRnet() 123 | elif config.model_type == "FTANet": 124 | me_model = FTAnet(freq_bin = config.freq_bin, time_segment=config.seg_frame) 125 | me_model_r = FTAnet(freq_bin = config.freq_bin, time_segment=config.seg_frame) 126 | elif config.model_type == "MSNet": 127 | me_model = MSnet() 128 | me_model_r = MSnet() 129 | else: # AcousticModelCRnn8Dropout 130 | from piano_net import AcousticModelCRnn8Dropout 131 | me_model = AcousticModelCRnn8Dropout() 132 | me_model_r = AcousticModelCRnn8Dropout() 133 | 134 | 135 | if config.ablation_mode == "single" or config.ablation_mode == "spl" or config.ablation_mode == "spat": 136 | me_model_r = None 137 | model = TONet( 138 | l_model = me_model, 139 | r_model = me_model_r, 140 | config = config, 141 | loss_func = loss_func, 142 | mode = config.ablation_mode 143 | ) 144 | trainer = pl.Trainer( 145 | # deterministic = True, 146 | gpus = 1, 147 | # checkpoint_callback = False, 148 | max_epochs = config.max_epoch, 149 | auto_lr_find = True, 150 | sync_batchnorm=True, 151 | # check_val_every_n_epoch = 1, 152 | # val_check_interval = 0.25, 153 | ) 154 | # load the checkpoint 155 | ckpt = torch.load(config.resume_checkpoint, map_location="cpu") 156 | model.load_state_dict(ckpt) 157 | trainer.test(model, test_dataloaders) 158 | 159 | 160 | if __name__ == "__main__": 161 | parser = argparse.ArgumentParser(prog = "TONET for Singing Melody Extraction") 162 | subparsers = parser.add_subparsers(dest = "mode") 163 | parser_train = subparsers.add_parser("train") 164 | parser_test = subparsers.add_parser("test") 165 | args = parser.parse_args() 166 | pl.seed_everything(config.random_seed) 167 | if args.mode == "train": 168 | train() 169 | elif args.mode == "test": 170 | test() 171 | 172 | -------------------------------------------------------------------------------- /mcdnn.py: -------------------------------------------------------------------------------- 1 | # MCDNN from https://github.com/LqNoob/MelodyExtraction-MCDNN/blob/master/MelodyExtraction_SCDNN.py 2 | import torch 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | class MCDNN(nn.Module): 6 | def __init__(self): 7 | super(MCDNN, self).__init__() 8 | 9 | self.mcdnn = nn.Sequential( 10 | nn.Linear(360 * 3, 2048), 11 | nn.Dropout(0.2), 12 | nn.SELU(), 13 | nn.Linear(2048, 1024), 14 | nn.Dropout(0.2), 15 | nn.SELU(), 16 | nn.Linear(1024, 512), 17 | nn.Dropout(0.2), 18 | nn.SELU(), 19 | nn.Linear(512, 360) 20 | ) 21 | self.bm_layer = nn.Sequential( 22 | nn.Linear(360 * 3, 512), 23 | nn.Dropout(0.2), 24 | nn.SELU(), 25 | nn.Linear(512, 128), 26 | nn.Dropout(0.2), 27 | nn.SELU(), 28 | nn.Linear(128, 1), 29 | nn.SELU() 30 | ) 31 | 32 | def forward(self, x): 33 | # [bs, 3, f, t] 34 | x = x.view(x.shape[0], -1, x.shape[-1]) 35 | x = x.permute(0,2,1) # [bs, t, f * 3] 36 | output_pre = self.mcdnn(x) 37 | bm = self.bm_layer(x) 38 | output_pre = output_pre.permute(0,2,1) 39 | output_pre = output_pre.unsqueeze(dim=1) 40 | bm = bm.permute(0,2,1) 41 | bm = bm.unsqueeze(dim=1) 42 | output_pre = torch.cat((bm, output_pre), dim=2) 43 | output = nn.Softmax(dim=2)(output_pre) 44 | 45 | return output, output_pre -------------------------------------------------------------------------------- /msnet.py: -------------------------------------------------------------------------------- 1 | # MSNEt from https://github.com/bill317996/Melody-extraction-with-melodic-segnet/blob/master/MSnet/model.py 2 | # We only use the vocal version 3 | import torch 4 | import torch.nn as nn 5 | import torch.nn.functional as F 6 | class MSnet(nn.Module): 7 | def __init__(self): 8 | super(MSnet, self).__init__() 9 | 10 | self.conv1 = nn.Sequential( 11 | nn.BatchNorm2d(3), 12 | nn.Conv2d(3, 32, 5, padding=2), 13 | nn.SELU() 14 | ) 15 | self.pool1 = nn.MaxPool2d((4,1), return_indices=True) 16 | 17 | self.conv2 = nn.Sequential( 18 | nn.BatchNorm2d(32), 19 | nn.Conv2d(32, 64, 5, padding=2), 20 | nn.SELU() 21 | ) 22 | self.pool2 = nn.MaxPool2d((3,1), return_indices=True) 23 | 24 | self.conv3 = nn.Sequential( 25 | nn.BatchNorm2d(64), 26 | nn.Conv2d(64, 128, 5, padding=2), 27 | nn.SELU() 28 | ) 29 | self.pool3 = nn.MaxPool2d((6,1), return_indices=True) 30 | 31 | self.bottom = nn.Sequential( 32 | nn.BatchNorm2d(128), 33 | nn.Conv2d(128, 1, 5, padding=(0,2)), 34 | nn.SELU() 35 | ) 36 | 37 | self.up_pool3 = nn.MaxUnpool2d((6,1)) 38 | self.up_conv3 = nn.Sequential( 39 | nn.BatchNorm2d(128), 40 | nn.Conv2d(128, 64, 5, padding=2), 41 | nn.SELU() 42 | ) 43 | 44 | self.up_pool2 = nn.MaxUnpool2d((3,1)) 45 | self.up_conv2 = nn.Sequential( 46 | nn.BatchNorm2d(64), 47 | nn.Conv2d(64, 32, 5, padding=2), 48 | nn.SELU() 49 | ) 50 | 51 | self.up_pool1 = nn.MaxUnpool2d((4,1)) 52 | self.up_conv1 = nn.Sequential( 53 | nn.BatchNorm2d(32), 54 | nn.Conv2d(32, 1, 5, padding=2), 55 | nn.SELU() 56 | ) 57 | 58 | self.softmax = nn.Softmax(dim=2) 59 | 60 | def forward(self, x): 61 | c1, ind1 = self.pool1(self.conv1(x)) 62 | c2, ind2 = self.pool2(self.conv2(c1)) 63 | c3, ind3 = self.pool3(self.conv3(c2)) 64 | bm = self.bottom(c3) 65 | u3 = self.up_conv3(self.up_pool3(c3, ind3)) 66 | u2 = self.up_conv2(self.up_pool2(u3, ind2)) 67 | u1 = self.up_conv1(self.up_pool1(u2, ind1)) 68 | output_pre = torch.cat((bm, u1), dim=2) 69 | output = self.softmax(torch.cat((bm, u1), dim=2)) 70 | # output = torch.cat((bm, u1), dim=2) 71 | 72 | return output, output_pre -------------------------------------------------------------------------------- /multi_dr.py: -------------------------------------------------------------------------------- 1 | # Multi-Dilation Model by self-implementation 2 | import torch 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | class MLDRnet(nn.Module): 6 | def __init__(self, freq_bin = 360): 7 | super(MLDRnet, self).__init__() 8 | 9 | # Encoder 10 | self.encoder_bn = nn.BatchNorm2d(3) 11 | self.encoder_c2_1 = nn.Conv2d(3, 3, 3, padding=1, stride=2) 12 | self.encoder_c3_1 = nn.Conv2d(3, 3, 3, padding=1, stride=2) 13 | 14 | self.encoder_c1_1 = nn.Conv2d(10, 10, 3, padding=1, stride=2) 15 | self.encoder_c1_2 = nn.Conv2d(10, 10, 3, padding=1, stride=2) 16 | 17 | self.encoder_c2_2 = nn.ConvTranspose2d(10, 10, 1, output_padding=1, stride=2) 18 | self.encoder_c2_3 = nn.Conv2d(10, 10, 3, padding=1, stride=2) 19 | 20 | self.encoder_c3_2 = nn.ConvTranspose2d(10, 10, 1, output_padding=1, stride=2) 21 | self.encoder_c3_3 = nn.ConvTranspose2d(10, 10, 1, output_padding=1, stride=2) 22 | 23 | self.encoder_c2_4 = nn.ConvTranspose2d(10, 10, 1, output_padding=1, stride=2) 24 | self.encoder_c3_4 = nn.ConvTranspose2d(10, 10, 1, output_padding=1, stride=2) 25 | self.encoder_c3_5 = nn.ConvTranspose2d(10, 10, 1, output_padding=1, stride=2) 26 | 27 | self.encoder_final = nn.Conv2d(30, 10, 1) 28 | 29 | # Decoder 30 | self.decoder_bn = nn.BatchNorm2d(10) 31 | self.decoder_c1 = nn.Sequential( 32 | nn.Conv2d(10, 10, 3, padding=1), 33 | nn.SELU() 34 | ) 35 | 36 | self.decoder_bm = nn.Sequential( 37 | nn.AvgPool2d((freq_bin, 1)), 38 | nn.BatchNorm2d(10), 39 | nn.Conv2d(10, 1, 3, padding=1), 40 | nn.SELU() 41 | ) 42 | 43 | self.decoder_final = nn.Sequential( 44 | nn.BatchNorm2d(10), 45 | nn.Conv2d(10, 10, 3, padding=1), 46 | nn.SELU(), 47 | nn.Conv2d(10, 1, 3, padding=1), 48 | nn.SELU() 49 | ) 50 | # Multi-Dilation ModuleList 51 | self.md_bn_1 = nn.ModuleList([ 52 | nn.BatchNorm2d(3), 53 | nn.BatchNorm2d(3), 54 | nn.BatchNorm2d(3), 55 | nn.BatchNorm2d(30), 56 | nn.BatchNorm2d(30), 57 | nn.BatchNorm2d(30) 58 | ]) 59 | self.md_bn_2 = nn.ModuleList([ 60 | nn.BatchNorm2d(13), 61 | nn.BatchNorm2d(13), 62 | nn.BatchNorm2d(13), 63 | nn.BatchNorm2d(40), 64 | nn.BatchNorm2d(40), 65 | nn.BatchNorm2d(40) 66 | ]) 67 | self.md_bn_3 = nn.ModuleList([ 68 | nn.BatchNorm2d(23), 69 | nn.BatchNorm2d(23), 70 | nn.BatchNorm2d(23), 71 | nn.BatchNorm2d(50), 72 | nn.BatchNorm2d(50), 73 | nn.BatchNorm2d(50) 74 | ]) 75 | self.md_c1 = nn.ModuleList([ 76 | nn.Conv2d(3, 10, 3, padding=3, dilation=3), 77 | nn.Conv2d(3, 10, 3, padding=3, dilation=3), 78 | nn.Conv2d(3, 10, 3, padding=3, dilation=3), 79 | nn.Conv2d(30, 10, 3, padding=3, dilation=3), 80 | nn.Conv2d(30, 10, 3, padding=3, dilation=3), 81 | nn.Conv2d(30, 10, 3, padding=3, dilation=3) 82 | ]) 83 | self.md_c2 = nn.ModuleList([ 84 | nn.Conv2d(13, 10, 3, padding=6, dilation=6), 85 | nn.Conv2d(13, 10, 3, padding=6, dilation=6), 86 | nn.Conv2d(13, 10, 3, padding=6, dilation=6), 87 | nn.Conv2d(40, 10, 3, padding=6, dilation=6), 88 | nn.Conv2d(40, 10, 3, padding=6, dilation=6), 89 | nn.Conv2d(40, 10, 3, padding=6, dilation=6) 90 | 91 | ]) 92 | self.md_c3 = nn.ModuleList([ 93 | nn.Conv2d(23, 10, 3, padding=6, dilation=6), 94 | nn.Conv2d(23, 10, 3, padding=6, dilation=6), 95 | nn.Conv2d(23, 10, 3, padding=6, dilation=6), 96 | nn.Conv2d(50, 10, 3, padding=6, dilation=6), 97 | nn.Conv2d(50, 10, 3, padding=6, dilation=6), 98 | nn.Conv2d(50, 10, 3, padding=6, dilation=6) 99 | ]) 100 | self.md_act1 = nn.SELU() 101 | self.md_act2 = nn.SELU() 102 | self.md_act3 = nn.SELU() 103 | 104 | self.softmax = nn.Softmax(dim=2) 105 | 106 | def encoder(self, x): 107 | x = self.encoder_bn(x) 108 | f1 = x 109 | f2 = self.encoder_c2_1(f1) 110 | f3 = self.encoder_c3_1(f2) 111 | # print("f1 f2 f3:", f1.shape, f2.shape, f3.shape) 112 | f1 = self.multi_dilation(f1, 0) 113 | f2 = self.multi_dilation(f2, 1) 114 | f3 = self.multi_dilation(f3, 2) 115 | # print("f1 f2 f3:", f1.shape, f2.shape, f3.shape) 116 | 117 | f1_2 = self.encoder_c1_1(f1) 118 | f1_3 = self.encoder_c1_2(f1_2) 119 | # print("f1_3", f1_3.shape) 120 | 121 | f2_1 = self.encoder_c2_2(f2) 122 | f2_3 = self.encoder_c2_3(f2) 123 | # print("f2_1 f2_3", f2_1.shape, f2_3.shape) 124 | 125 | f3_2 = self.encoder_c3_2(f3) 126 | f3_1 = self.encoder_c3_3(f3_2) 127 | # print("f3_2 f3_1", f3_2.shape, f3_1.shape) 128 | 129 | f1 = torch.cat([f1, f2_1, f3_1], dim = 1) 130 | f2 = torch.cat([f2, f1_2, f3_2], dim = 1) 131 | f3 = torch.cat([f3, f1_3, f2_3], dim = 1) 132 | # print("f1 f2 f3:", f1.shape, f2.shape, f3.shape) 133 | 134 | f1 = self.multi_dilation(f1, 3) 135 | f2 = self.multi_dilation(f2, 4) 136 | f3 = self.multi_dilation(f3, 5) 137 | # print("f1 f2 f3:", f1.shape, f2.shape, f3.shape) 138 | 139 | f2 = self.encoder_c2_4(f2) 140 | f3 = self.encoder_c3_4(f3) 141 | f3 = self.encoder_c3_5(f3) 142 | # print("f1 f2 f3:", f1.shape, f2.shape, f3.shape) 143 | final_x = torch.cat([f1, f2, f3], dim = 1) 144 | final_x = self.encoder_final(final_x) 145 | # print("final_x:", final_x.shape) 146 | return final_x 147 | 148 | def decoder(self, x): 149 | x = self.decoder_bn(x) 150 | x = self.decoder_c1(x) 151 | 152 | bm = self.decoder_bm(x) 153 | # print("bm:", bm.shape) 154 | 155 | final_x = self.decoder_final(x) 156 | final_x = torch.cat([bm, final_x], dim = -2) 157 | # print("final_x", final_x.shape) 158 | return final_x, bm 159 | 160 | 161 | def multi_dilation(self, x, i): 162 | x0 = x 163 | x1 = self.md_bn_1[i](x0) 164 | x1 = self.md_c1[i](x1) 165 | x1 = self.md_act1(x1) 166 | # print("x1:", x1.shape) 167 | 168 | x2 = torch.cat([x0, x1], dim = 1) 169 | x2 = self.md_bn_2[i](x2) 170 | x2 = self.md_c2[i](x2) 171 | x2 = self.md_act2(x2) 172 | # print("x2:", x2.shape) 173 | 174 | x3 = torch.cat([x0, x1, x2], dim = 1) 175 | x3 = self.md_bn_3[i](x3) 176 | x3 = self.md_c3[i](x3) 177 | x3 = self.md_act3(x3) 178 | # print("x3:", x3.shape) 179 | 180 | return x3 181 | 182 | def forward(self, x): 183 | x = self.encoder(x) 184 | output_pre, bm = self.decoder(x) 185 | output = self.softmax(output_pre) 186 | # print("output bm:", output.shape, bm.shape) 187 | # exit() 188 | return output, output_pre -------------------------------------------------------------------------------- /piano_net.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import math 4 | import time 5 | import numpy as np 6 | import torch 7 | import torch.nn as nn 8 | import torch.nn.functional as F 9 | 10 | def init_layer(layer): 11 | """Initialize a Linear or Convolutional layer. """ 12 | nn.init.xavier_uniform_(layer.weight) 13 | 14 | if hasattr(layer, 'bias'): 15 | if layer.bias is not None: 16 | layer.bias.data.fill_(0.) 17 | 18 | 19 | def init_bn(bn): 20 | """Initialize a Batchnorm layer. """ 21 | bn.bias.data.fill_(0.) 22 | bn.weight.data.fill_(1.) 23 | 24 | 25 | def init_gru(rnn): 26 | """Initialize a GRU layer. """ 27 | 28 | def _concat_init(tensor, init_funcs): 29 | (length, fan_out) = tensor.shape 30 | fan_in = length // len(init_funcs) 31 | 32 | for (i, init_func) in enumerate(init_funcs): 33 | init_func(tensor[i * fan_in : (i + 1) * fan_in, :]) 34 | 35 | def _inner_uniform(tensor): 36 | fan_in = nn.init._calculate_correct_fan(tensor, 'fan_in') 37 | nn.init.uniform_(tensor, -math.sqrt(3 / fan_in), math.sqrt(3 / fan_in)) 38 | 39 | for i in range(rnn.num_layers): 40 | _concat_init( 41 | getattr(rnn, 'weight_ih_l{}'.format(i)), 42 | [_inner_uniform, _inner_uniform, _inner_uniform] 43 | ) 44 | torch.nn.init.constant_(getattr(rnn, 'bias_ih_l{}'.format(i)), 0) 45 | 46 | _concat_init( 47 | getattr(rnn, 'weight_hh_l{}'.format(i)), 48 | [_inner_uniform, _inner_uniform, nn.init.orthogonal_] 49 | ) 50 | torch.nn.init.constant_(getattr(rnn, 'bias_hh_l{}'.format(i)), 0) 51 | 52 | 53 | class ConvBlock(nn.Module): 54 | def __init__(self, in_channels, out_channels, momentum): 55 | 56 | super(ConvBlock, self).__init__() 57 | 58 | self.conv1 = nn.Conv2d(in_channels=in_channels, 59 | out_channels=out_channels, 60 | kernel_size=(3, 5), stride=(1, 1), 61 | padding=(0, 1), bias=False) 62 | 63 | self.conv2 = nn.Conv2d(in_channels=out_channels, 64 | out_channels=out_channels, 65 | kernel_size=(3, 3), stride=(1, 1), 66 | padding=(0, 1), bias=False) 67 | 68 | self.bn1 = nn.BatchNorm2d(out_channels, momentum) 69 | self.bn2 = nn.BatchNorm2d(out_channels, momentum) 70 | 71 | self.init_weight() 72 | 73 | def init_weight(self): 74 | init_layer(self.conv1) 75 | init_layer(self.conv2) 76 | init_bn(self.bn1) 77 | init_bn(self.bn2) 78 | 79 | 80 | def forward(self, input, pool_size=(2, 2), pool_type='avg'): 81 | """ 82 | Args: 83 | input: (batch_size, in_channels, time_steps, freq_bins) 84 | 85 | Outputs: 86 | output: (batch_size, out_channels, classes_num) 87 | """ 88 | 89 | x = F.relu_(self.bn1(self.conv1(input))) 90 | x = F.relu_(self.bn2(self.conv2(x))) 91 | 92 | # x = F.selu(self.conv1(input)) 93 | # x = F.selu(self.conv2(x)) 94 | 95 | if pool_type == 'avg': 96 | x = F.avg_pool2d(x, kernel_size=pool_size) 97 | 98 | return x 99 | 100 | 101 | if torch.cuda.is_available(): 102 | device = torch.device("cuda") 103 | print("Using cuda") 104 | else: 105 | device = torch.device("cpu") 106 | print("Using cpu") 107 | 108 | 109 | from config import include_model_tweak 110 | class AcousticModelCRnn8Dropout(nn.Module): 111 | def __init__(self, classes_num = 361, midfeat = 2560, momentum = 0.01): 112 | super(AcousticModelCRnn8Dropout, self).__init__() 113 | 114 | self.conv_block1 = ConvBlock(in_channels=3, out_channels=48, momentum=momentum) 115 | self.conv_block2 = ConvBlock(in_channels=48, out_channels=64, momentum=momentum) 116 | self.conv_block3 = ConvBlock(in_channels=64, out_channels=96, momentum=momentum) 117 | self.conv_block4 = ConvBlock(in_channels=96, out_channels=128, momentum=momentum) 118 | 119 | self.fc5 = nn.Linear(midfeat, 768, bias=False) 120 | self.bn5 = nn.BatchNorm1d(768, momentum=momentum) 121 | 122 | self.gru = nn.GRU(input_size=768, hidden_size=256, num_layers=2, 123 | bias=True, batch_first=True, dropout=0., bidirectional=True) 124 | 125 | self.fc = nn.Linear(512, classes_num, bias=True) 126 | 127 | self.sfmax = torch.nn.Softmax(dim = 2) 128 | self.init_weight() 129 | 130 | def init_weight(self): 131 | init_layer(self.fc5) 132 | init_bn(self.bn5) 133 | init_gru(self.gru) 134 | init_layer(self.fc) 135 | 136 | def forward(self, x): 137 | """ 138 | Args: 139 | input: (batch_size, channels_num, time_steps, freq_bins) 140 | 141 | Outputs: 142 | output: (batch_size, time_steps, classes_num) 143 | """ 144 | 145 | x = self.conv_block1(x.transpose(2,3), pool_size=(1, 2), pool_type='avg') 146 | x = F.dropout(x, p=0.2, training=self.training) 147 | x = self.conv_block2(x, pool_size=(1, 2), pool_type='avg') 148 | x = F.dropout(x, p=0.2, training=self.training) 149 | x = self.conv_block3(x, pool_size=(1, 2), pool_type='avg') 150 | x = F.dropout(x, p=0.2, training=self.training) 151 | x = self.conv_block4(x, pool_size=(1, 2), pool_type='avg') 152 | x = F.dropout(x, p=0.2, training=self.training) 153 | 154 | x = x.transpose(1, 2).flatten(2) 155 | 156 | x = F.relu(self.bn5(self.fc5(x).transpose(1, 2)).transpose(1, 2)) 157 | x = F.dropout(x, p=0.5, training=self.training, inplace=False) 158 | 159 | (x, _) = self.gru(x) 160 | x = F.dropout(x, p=0.5, training=self.training, inplace=False) 161 | x = self.fc(x) 162 | 163 | 164 | if include_model_tweak: 165 | # x[:, :, -1] = np.log(x.shape[-1]) - x[:, :, -1] 166 | # x[:, :, 0] = np.log(49*(x.shape[-1]-1)) - x[:, :, 0] 167 | x[:, :, 0] = 1 - x[:, :, 0] 168 | 169 | 170 | output = self.sfmax(x) 171 | return output.transpose(1,2), None 172 | 173 | 174 | 175 | 176 | if __name__ == "__main__": 177 | # (batch_size, 3 -> CFP, 360 -> FREQ_BINS, 144 -> TIME_STEPS) 178 | x = torch.randn(2, 3, 360, 144, device = device) 179 | print(AcousticModelCRnn8Dropout().to(device)(x)[0].shape) 180 | 181 | -------------------------------------------------------------------------------- /tonet.py: -------------------------------------------------------------------------------- 1 | """ 2 | Ke Chen knutchen@ucsd.edu 3 | 4 | Tone-Octave Network - model 5 | 6 | This file contains the TONet core code 7 | 8 | """ 9 | import os 10 | import numpy as np 11 | import torch 12 | from torch import nn 13 | import torch.nn.functional as F 14 | from torchvision import transforms 15 | from torchvision.datasets import MNIST 16 | from torch.utils.data import DataLoader, random_split 17 | import pytorch_lightning as pl 18 | 19 | from util import melody_eval, freq2octave, freq2tone, tofreq 20 | from attention_layer import CombineLayer, PositionalEncoding 21 | from feature_extraction import get_CenFreq 22 | 23 | 24 | class TONet(pl.LightningModule): 25 | """ 26 | Args: 27 | mode: ["disable", "enable"] 28 | """ 29 | def __init__(self, l_model, r_model, config, loss_func, mode = "single"): 30 | super().__init__() 31 | self.config = config 32 | # l_model for original-CFP 33 | self.l_model = l_model 34 | # r_model for Tone-CFP 35 | self.r_model = r_model 36 | self.mode = mode 37 | self.centf = np.array(get_CenFreq(config.startfreq, config.stopfreq, config.octave_res)) 38 | self.centf[0] = 0 39 | self.loss_func = loss_func 40 | self.max_metric = np.zeros((3, 6)) 41 | if self.mode == "all" or self.mode == "tcfp": 42 | assert r_model is not None, "Enabling TONet needs two-branch models!" 43 | 44 | 45 | self.gru_dim = 512 46 | self.attn_dim = 2048 47 | # define hyperparameter 48 | if self.mode == "tcfp": 49 | self.sp_dim = self.config.freq_bin * 2 50 | self.linear_dim = self.config.freq_bin * 2 51 | elif self.mode == "spl": 52 | self.sp_dim = self.config.freq_bin 53 | self.linear_dim = self.gru_dim * 2 54 | elif self.mode == "spat": 55 | self.sp_dim = self.config.freq_bin 56 | self.linear_dim = self.attn_dim 57 | elif self.mode == "all": 58 | self.sp_dim = self.config.freq_bin * 2 59 | self.linear_dim = self.attn_dim 60 | 61 | # Network Architecture 62 | if self.mode == "spl": 63 | self.tone_gru = nn.Linear(self.sp_dim, self.linear_dim) 64 | # nn.GRU( 65 | # self.sp_dim, self.gru_dim, 1, 66 | # batch_first=True, bidirectional=True 67 | # ) 68 | self.octave_gru = nn.Linear(self.sp_dim, self.linear_dim) 69 | # nn.GRU( 70 | # self.sp_dim, self.gru_dim, 1, 71 | # batch_first=True, bidirectional=True 72 | # ) 73 | elif self.mode == "spat" or self.mode == "all": 74 | self.tone_in = nn.Linear(self.sp_dim, self.attn_dim) 75 | self.tone_posenc = PositionalEncoding(self.attn_dim, n_position = self.config.seg_frame) 76 | self.tone_dropout = nn.Dropout(p = 0.2) 77 | self.tone_norm = nn.LayerNorm(self.attn_dim, eps = 1e-6) 78 | self.tone_attn = nn.ModuleList([ 79 | CombineLayer(self.attn_dim, self.attn_dim * 2, 8, 80 | self.attn_dim // 8, self.attn_dim // 8, dropout = 0.2) 81 | for _ in range(2)] 82 | ) 83 | self.octave_in = nn.Linear(self.sp_dim, self.attn_dim) 84 | self.octave_posenc = PositionalEncoding(self.attn_dim, n_position = self.config.seg_frame) 85 | self.octave_dropout = nn.Dropout(p = 0.2) 86 | self.octave_norm = nn.LayerNorm(self.attn_dim, eps = 1e-6) 87 | self.octave_attn = nn.ModuleList([ 88 | CombineLayer(self.attn_dim, self.attn_dim * 2, 8, 89 | self.attn_dim // 8, self.attn_dim // 8, dropout = 0.2) 90 | for _ in range(2)] 91 | ) 92 | if self.mode != "single" and self.mode != "tcfp": 93 | self.tone_linear = nn.Sequential( 94 | nn.Linear(self.linear_dim, 512), 95 | nn.Dropout(p = 0.2), 96 | nn.SELU(), 97 | nn.Linear(512, 128), 98 | nn.Dropout(p = 0.2), 99 | nn.SELU(), 100 | nn.Linear(128, self.config.tone_class), 101 | nn.Dropout(p = 0.2), 102 | nn.SELU() 103 | ) 104 | self.octave_linear = nn.Sequential( 105 | nn.Linear(self.linear_dim, 256), 106 | nn.Dropout(p = 0.2), 107 | nn.SELU(), 108 | nn.Linear(256, 64), 109 | nn.Dropout(p = 0.2), 110 | nn.SELU(), 111 | nn.Linear(64, self.config.octave_class), 112 | nn.Dropout(p = 0.2), 113 | nn.SELU() 114 | ) 115 | self.tone_bm = nn.Sequential( 116 | nn.Linear(2, 1), 117 | nn.SELU() 118 | ) 119 | self.octave_bm = nn.Sequential( 120 | nn.Linear(2, 1), 121 | nn.SELU() 122 | ) 123 | # [bs, 361 + 13 + 9, 128] 124 | self.tcfp_linear = nn.Sequential( 125 | nn.Conv1d(self.config.freq_bin * 2, self.config.freq_bin, 126 | 5, padding=2), 127 | nn.SELU() 128 | ) 129 | self.tcfp_bm = nn.Sequential( 130 | nn.Conv1d(2,1,5,padding=2), 131 | nn.SELU() 132 | ) 133 | self.final_linear = nn.Sequential( 134 | nn.Conv1d( 135 | self.config.tone_class + self.config.octave_class + self.config.freq_bin + 3, 136 | self.config.freq_bin, 5, padding=2), 137 | nn.SELU() 138 | ) 139 | elif self.mode == "tcfp": 140 | self.final_linear = nn.Sequential( 141 | nn.Linear(self.linear_dim, self.config.freq_bin), 142 | nn.SELU() 143 | ) 144 | self.final_bm = nn.Sequential( 145 | nn.Linear(2, 1), 146 | nn.SELU() 147 | ) 148 | """ 149 | Args: 150 | x: [bs, 3, freuqncy_bin, time_frame] 151 | """ 152 | def tone_decoder(self, tone_feature): 153 | if self.mode == "all" or self.mode == "spat": 154 | tone_h = self.tone_dropout(self.tone_posenc(self.tone_in(tone_feature))) 155 | tone_h = self.tone_norm(tone_h) 156 | for tone_layer in self.tone_attn: 157 | tone_h, tone_weight = tone_layer(tone_h, slf_attn_mask = None) 158 | tone_prob = self.tone_linear(tone_h) 159 | tone_prob = tone_prob.permute(0, 2, 1).contiguous() 160 | elif self.mode == "spl": 161 | tone_h = self.tone_gru(tone_feature) 162 | tone_prob = self.tone_linear(tone_h) 163 | tone_prob = tone_prob.permute(0, 2, 1).contiguous() 164 | return tone_prob 165 | 166 | def octave_decoder(self, octave_feature): 167 | if self.mode == "all" or self.mode == "spat": 168 | octave_h = self.octave_dropout(self.octave_posenc(self.octave_in(octave_feature))) 169 | octave_h = self.octave_norm(octave_h) 170 | for octave_layer in self.octave_attn: 171 | octave_h, octave_weight = octave_layer(octave_h, slf_attn_mask = None) 172 | octave_prob = self.octave_linear(octave_h) 173 | octave_prob = octave_prob.permute(0, 2, 1).contiguous() 174 | elif self.mode == "spl": 175 | octave_h = self.octave_gru(octave_feature) 176 | octave_prob = self.octave_linear(octave_h) 177 | octave_prob = octave_prob.permute(0, 2, 1).contiguous() 178 | return octave_prob 179 | 180 | 181 | def forward(self, x, tx = None): 182 | if self.mode == "single": 183 | output, _ = self.l_model(x) 184 | return output 185 | elif self.mode == "all": 186 | _, output_l = self.l_model(x) 187 | _, output_r = self.r_model(tx) 188 | bm_l = output_l[:, :, 0, :].unsqueeze(dim = 2) 189 | output_l = output_l[:,:, 1:,:] 190 | bm_r = output_r[:, :, 0, :].unsqueeze(dim = 2) 191 | output_r = output_r[:,:, 1:,:] 192 | feature_agg = torch.cat((output_l, output_r), dim = 2) 193 | feature_agg = feature_agg.squeeze(dim = 1) 194 | feature_agg_mi = self.tcfp_linear(feature_agg) # [bs, 360, 128] 195 | bm_agg = torch.cat((bm_l, bm_r), dim = 2) 196 | bm_agg = bm_agg.squeeze(dim = 1) 197 | bm_agg_mi = self.tcfp_bm(bm_agg) 198 | bm_agg = bm_agg.permute(0,2,1) 199 | tone_feature = feature_agg.permute(0,2,1).contiguous() 200 | octave_feature = feature_agg.permute(0,2,1).contiguous() 201 | tone_prob = self.tone_decoder(tone_feature) 202 | octave_prob = self.octave_decoder(octave_feature) 203 | 204 | tone_bm = self.tone_bm(bm_agg) 205 | octave_bm = self.octave_bm(bm_agg) 206 | tone_bm = tone_bm.permute(0,2,1) 207 | octave_bm = octave_bm.permute(0,2,1) 208 | 209 | tone_prob = torch.cat((tone_prob, tone_bm), dim = 1) 210 | octave_prob = torch.cat((octave_prob, octave_bm), dim = 1) 211 | 212 | final_feature = torch.cat((tone_prob, octave_prob, feature_agg_mi, bm_agg_mi), dim = 1) 213 | final_feature = self.final_linear(final_feature) 214 | final_feature = torch.cat((bm_agg_mi, final_feature), dim=1) 215 | final_feature = nn.Softmax(dim = 1)(final_feature) 216 | tone_prob = nn.Softmax(dim = 1)(tone_prob) 217 | octave_prob = nn.Softmax(dim = 1)(octave_prob) 218 | return tone_prob, octave_prob, final_feature 219 | elif self.mode == "tcfp": 220 | _, output_l = self.l_model(x) 221 | _, output_r = self.r_model(tx) 222 | bm_l = output_l[:, :, 0, :].unsqueeze(dim = 2) 223 | output_l = output_l[:,:, 1:,:] 224 | bm_r = output_r[:, :, 0, :].unsqueeze(dim = 2) 225 | output_r = output_r[:,:, 1:,:] 226 | feature_agg = torch.cat((output_l, output_r), dim = 2) 227 | feature_agg = feature_agg.permute(0, 1, 3, 2) 228 | bm_agg = torch.cat((bm_l, bm_r), dim = 2) 229 | bm_agg = bm_agg.permute(0, 1, 3, 2) 230 | final_x = self.final_linear(feature_agg) 231 | final_bm = self.final_bm(bm_agg) 232 | final_x = final_x.permute(0,1,3,2) 233 | final_bm = final_bm.permute(0,1,3,2) 234 | final_output = nn.Softmax(dim = 2)(torch.cat((final_bm, final_x), dim = 2)) 235 | return final_output 236 | elif self.mode == "spl" or self.mode == "spat": 237 | _, output_l = self.l_model(x) 238 | bm_l = output_l[:, :, 0, :].unsqueeze(dim = 2) 239 | output_l = output_l[:,:, 1:,:] 240 | feature_agg = output_l 241 | feature_agg = feature_agg.squeeze(dim = 1) 242 | bm_agg = bm_l 243 | bm_agg = bm_agg.squeeze(dim = 1) 244 | tone_feature = feature_agg.permute(0,2,1).contiguous() 245 | octave_feature = feature_agg.permute(0,2,1).contiguous() 246 | tone_prob = self.tone_decoder(tone_feature) 247 | octave_prob = self.octave_decoder(octave_feature) 248 | tone_bm = bm_agg 249 | octave_bm = bm_agg 250 | 251 | tone_prob = torch.cat((tone_prob, tone_bm), dim = 1) 252 | octave_prob = torch.cat((octave_prob, octave_bm), dim = 1) 253 | 254 | final_feature = torch.cat((tone_prob, octave_prob, feature_agg, bm_agg), dim = 1) 255 | final_feature = self.final_linear(final_feature) 256 | final_feature = torch.cat((bm_agg, final_feature), dim=1) 257 | final_feature = nn.Softmax(dim = 1)(final_feature) 258 | tone_prob = nn.Softmax(dim = 1)(tone_prob) 259 | octave_prob = nn.Softmax(dim = 1)(octave_prob) 260 | return tone_prob, octave_prob, final_feature 261 | """ 262 | Args: 263 | batch: { 264 | "cfp": [bs, 3, frequency_bin, time_frame], 265 | "gd": [bs, time_frame] 266 | } 267 | """ 268 | def training_step(self, batch, batch_idx): 269 | device_type = next(self.parameters()).device 270 | 271 | # gds (batch_size, time_steps) 272 | # cfps (batch_size, channel_num, freq_bins, time_steps) 273 | 274 | cfps = batch["cfp"] 275 | tcfps = batch["tcfp"] 276 | gds = batch["gd"] 277 | if self.mode == "single": 278 | # gd_maps = torch.zeros((cfps.shape[0], cfps.shape[-2] + 1, cfps.shape[-1])).to( device_type) 279 | gd_maps = torch.zeros((cfps.shape[0], cfps.shape[-2] + 1, gds.shape[-1])).to( device_type) 280 | 281 | # for each item in batch 282 | # switch to 0.98 and 0.02/rest 283 | for i in range(len(gds)): 284 | # gd_maps[i, gds[i].long(), torch.arange(gds.shape[-1])] = 1.0 285 | gd_maps[i, gds[i].long(), torch.arange(gds.shape[-1])] = 0.98 286 | gd_maps[gd_maps == 0] = 0.02/(gd_maps.shape[1]-1) 287 | # print(gd_maps.shape, (gd_maps.shape[1]-1)) 288 | # sys.exit() 289 | # print(torch.sum(gd_maps[0, :, 0])) 290 | # print("\n\n\n\n\n") 291 | # sys.exit() 292 | 293 | # print(i, gds[i].long(), gds.shape[-1]) 294 | 295 | # print(gds.shape, gd_maps.shape, cfps.shape) 296 | # sys.exit() 297 | 298 | 299 | output = self(cfps) 300 | output = torch.squeeze(output, dim = 1) 301 | loss = self.loss_func(output, gd_maps) 302 | 303 | # now add the polynomial loss 304 | # assume (batch_size, freq_bins, time_steps) -> (???, 361, 128) 305 | 306 | from config import include_loss_component 307 | if include_loss_component: 308 | from util import area_punish 309 | from util import reverse_area_punish 310 | 311 | 312 | # 0. compute the loss for silence part. (101, 1001...) 313 | # now (batch_size, time_steps), and throw it into area punish 314 | special_output = output[:, 0, :] 315 | 316 | # print(special_output.shape) 317 | 318 | special_loss_temp = [] 319 | for area_len in range(3,31): 320 | special_loss_temp.append(reverse_area_punish(special_output, area_len)) 321 | 322 | # special_loss_temp_2 = [] 323 | for area_len in range(3,6): 324 | special_loss_temp.append(area_punish(special_output, area_len)) 325 | # cat along the time_step dim as we need to do tweak and mean across all area_len 326 | special_loss_temp = torch.cat(special_loss_temp, dim = 1) 327 | # special_loss_temp_2 = torch.cat(special_loss_temp_2, dim = 1) 328 | # print(special_loss_temp.shape) 329 | 330 | # now tweak using -> x^3/(x^3 + (1-x)^3) 331 | special_loss_temp = special_loss_temp**5/(special_loss_temp**5 + (1-special_loss_temp)**5) 332 | 333 | # special_loss_temp_2 = /special_loss_temp_2**5/(special_loss_temp_2**5 + (1-special_loss_temp_2)**5) 334 | 335 | loss += torch.mean(special_loss_temp) 336 | # loss += 0.3*torch.mean(special_loss_temp_2) 337 | 338 | 339 | # verification necessary, shall try to avoid double count. 340 | self.log('loss', loss, on_step=True, on_epoch=True, prog_bar=False, logger=False) 341 | elif self.mode == "all": 342 | # from pure pitch estimation 343 | gd_maps = torch.zeros((cfps.shape[0], cfps.shape[-2] + 1, cfps.shape[-1])).to( device_type) 344 | tone_maps = torch.zeros((cfps.shape[0], self.config.tone_class + 1, cfps.shape[-1])).to(device_type) 345 | octave_maps = torch.zeros((cfps.shape[0], self.config.octave_class + 1, cfps.shape[-1])).to(device_type) 346 | tone_index = ((gds % 60) * self.config.tone_class / 60).long() 347 | octave_index = (gds // 60 + 2).long() 348 | tone_index[gds < 1.0] = self.config.tone_class 349 | octave_index[gds < 1.0] = self.config.octave_class 350 | for i in range(len(tone_maps)): 351 | tone_maps[i, tone_index[i], torch.arange(gds.shape[-1])] = 1.0 352 | octave_maps[i, octave_index[i], torch.arange(gds.shape[-1])] = 1.0 353 | gd_maps[i, gds[i].long(), torch.arange(gds.shape[-1])] = 1.0 354 | tone_prob, octave_prob, final_prob = self(cfps, tcfps) 355 | pred_map = torch.cat((tone_prob, octave_prob , final_prob), dim = 1) 356 | gd_map = torch.cat([tone_maps, octave_maps, gd_maps], dim = 1) 357 | loss = self.loss_func(pred_map, gd_map) 358 | self.log('loss', loss, on_step=True, on_epoch=True, prog_bar=False, logger=False) 359 | return loss 360 | 361 | 362 | def write_prediction(self, pred, filename): 363 | time_frame = np.arange(len(pred)) * 0.01 364 | with open(filename, "w") as f: 365 | for i in range(len(time_frame)): 366 | f.write(str(np.round(time_frame[i], 4)) + "\t" + str(pred[i]) + "\n") 367 | 368 | 369 | def validation_step(self, batch, batch_idx, dataset_idx): 370 | device_type = next(self.parameters()).device 371 | mini_batch = self.config.batch_size 372 | mini_batch = 1 373 | 374 | # array of length 1 anyway. (for each song, 375 | cfps = batch["cfp"][0] 376 | tcfps = batch["tcfp"][0] 377 | gds = batch["gd"][0] 378 | lens = batch["length"][0] 379 | name = batch["name"][0] 380 | 381 | # print(len(batch["cfp"]), len(batch["gd"]), cfps.shape, gds.shape, lens, name) 382 | 383 | output = [] 384 | # index 0 of cfps and gds is batch_size. 385 | for i in range(0, len(cfps), mini_batch): 386 | temp_cfp = torch.from_numpy(cfps[i:i + mini_batch]).to(device_type) 387 | temp_tcfp = torch.from_numpy(tcfps[i:i + mini_batch]).to(device_type) 388 | # import sys 389 | # print(name) 390 | # print(temp_cfp.shape) 391 | # sys.exit() 392 | if self.mode == "single": 393 | temp_output = self(temp_cfp) 394 | temp_output = torch.squeeze(temp_output, dim = 1) 395 | elif self.mode == "all": 396 | _, _, temp_output = self(temp_cfp, temp_tcfp) 397 | 398 | temp_output = temp_output.detach().cpu().numpy() 399 | output.append(temp_output) 400 | output = np.concatenate(np.array(output),axis = 0) 401 | return [ 402 | output, 403 | gds, 404 | lens, 405 | name 406 | ] 407 | 408 | 409 | def validation_epoch_end(self, validation_step_outputs, test_flag = False): 410 | for i, dataset_d in enumerate(validation_step_outputs): 411 | metric = np.array([0.,0.,0.,0.,0.,0.]) 412 | preds = [] 413 | gds = [] 414 | special_outputs = [] 415 | for d in dataset_d: 416 | pred, gd, rl, name = d 417 | 418 | special_output = pred[:, 0, :] 419 | 420 | pred = np.argmax(pred, axis = 1) 421 | pred = np.concatenate(pred, axis = 0) 422 | pred = self.centf[pred] 423 | 424 | # at this point should be ready to 425 | # import sys 426 | # print(name, pred.shape) 427 | # new_name = name.replace("labels_and_waveform", "preds") 428 | # if new_name == name: 429 | # sys.exit() 430 | # np.savetxt(new_name, np.c_[pred]) 431 | 432 | 433 | gd = np.concatenate(gd, axis = 0) 434 | preds.append(pred) 435 | gds.append(gd) 436 | special_outputs.append(special_output) 437 | preds = np.concatenate(preds, axis = 0) 438 | gds = np.concatenate(gds, axis = 0) 439 | special_outputs = np.concatenate(special_outputs, axis = 0) 440 | 441 | metric = melody_eval(preds, gds) 442 | self.print("\n") 443 | self.print("Dataset ", i, " OA:", metric[-1]) 444 | if test_flag or metric[-1] > self.max_metric[i, -1]: 445 | 446 | # write the result down 447 | os.system("rm -rf model_backup/" + str(i) + "*_best.txt") 448 | with open("model_backup/" + str(i) + "_" + str(metric[-1]) + "_best.txt", "a+") as f: 449 | np.savetxt(f, np.c_[preds, gds]) 450 | 451 | with open("model_backup/" + str(i) + "_vocal_prob.txt", "w") as f: 452 | np.savetxt(f, special_outputs) 453 | 454 | 455 | for j in range(len(self.max_metric[i])): 456 | self.max_metric[i,j] = metric[j] 457 | self.max_metric[i,j] = metric[j] 458 | if not test_flag: 459 | torch.save(self.state_dict(), "model_backup/bestk_" + str(i) + ".ckpt") 460 | self.print("Best ",i,":", self.max_metric[i]) 461 | 462 | 463 | def test_step(self, batch, batch_idx, dataset_idx): 464 | return self.validation_step(batch, batch_idx, dataset_idx) 465 | 466 | def test_epoch_end(self, test_step_outputs): 467 | self.validation_epoch_end(test_step_outputs, test_flag = True) 468 | # for i, dataset_d in enumerate(test_step_outputs): 469 | # for j, d in enumerate(dataset_d): 470 | # pred, _, rl = d 471 | # pred = np.argmax(pred, axis = 1) 472 | # pred = np.concatenate(pred, axis = 0)[:rl] 473 | # pred = self.centf[pred] 474 | # self.write_prediction(pred, "prediction/" + str(i) + "_" + str(j) + ".txt") 475 | 476 | def configure_optimizers(self): 477 | optimizer = torch.optim.Adam(self.parameters(), lr=self.config.lr) 478 | def lr_foo(epoch): 479 | if epoch < 5: 480 | # warm up lr 481 | lr_scale = 0.5 482 | else: 483 | lr_scale = 0.5 * 0.98 ** (epoch - 5) 484 | 485 | return lr_scale 486 | 487 | if self.mode == "single" or self.mode == "tcfp": 488 | return optimizer 489 | elif self.mode == "all" or self.mode == "spl" or self.mode == "spat": 490 | scheduler = torch.optim.lr_scheduler.LambdaLR( 491 | optimizer, 492 | lr_lambda=lr_foo 493 | ) 494 | return [optimizer], [scheduler] 495 | -------------------------------------------------------------------------------- /util.py: -------------------------------------------------------------------------------- 1 | """ 2 | Ke Chen knutchen@ucsd.edu 3 | 4 | Tone-Octave Network - utils file 5 | 6 | This file contains useful common methods 7 | 8 | """ 9 | import os 10 | import numpy as np 11 | import torch 12 | import mir_eval 13 | import config 14 | 15 | def index2centf(seq, centfreq): 16 | centfreq[0] = 0 17 | re = np.zeros(len(seq)) 18 | for i in range(len(seq)): 19 | for j in range(len(centfreq)): 20 | if seq[i] < 0.1: 21 | re[i] = 0 22 | break 23 | elif centfreq[j] > seq[i]: 24 | # re[i] = j 25 | 26 | if j > 1: 27 | if abs(centfreq[j]/seq[i]) <= abs(seq[i]/centfreq[j - 1]): 28 | re[i] = j 29 | else: 30 | re[i] = j - 1 31 | # print(seq[i], "got", j-1, centfreq[j - 1], "instead of", j, centfreq[j]) 32 | else: 33 | re[i] = j 34 | 35 | break 36 | return re 37 | 38 | 39 | def freq2octave(freq): 40 | if freq < 1.0 or freq > 2050: 41 | return config.octave_class 42 | else: 43 | return int(np.round(69 + 12 * np.log2(freq/440)) // 12) 44 | 45 | def freq2tone(freq): 46 | if freq < 1.0 or freq > 2050: 47 | return config.tone_class 48 | else: 49 | return int(np.round(69 + 12 * np.log2(freq/440)) % 12) 50 | 51 | def tofreq(tone, octave): 52 | if tone >= config.tone_class or octave >= config.octave_class or octave < 2: 53 | return 0.0 54 | else: 55 | return 440 * 2 ** ((12 * octave + tone * 12 / config.tone_class - 69) / 12) 56 | 57 | 58 | def pos_weight(data, freq_bins): 59 | frames = data.shape[-1] 60 | non_vocal = float(len(data[data == 0])) 61 | vocal = float(data.size - non_vocal) 62 | z = np.zeros((freq_bins, frames)) 63 | z[1:,:] += (non_vocal / vocal) 64 | z[0,:] += vocal / non_vocal 65 | print(non_vocal, vocal) 66 | return torch.from_numpy(z).float() 67 | 68 | def freq2octave(freq): 69 | if freq < 1.0 or freq > 1990: 70 | return 0 71 | pitch = round(69 + 12 * np.log2(freq / 440)) 72 | return int(pitch // 12) 73 | 74 | def compute_roa(pred, gd): 75 | pred = pred[gd > 0.1] 76 | gd = gd[gd > 0.1] 77 | pred = np.array([freq2octave(d) for d in pred]) 78 | gd = np.array([freq2octave(d) for d in gd]) 79 | return np.sum(pred == gd) / len(pred) 80 | 81 | 82 | def melody_eval(pred, gd): 83 | ref_time = np.arange(len(gd)) * 0.01 84 | ref_freq = gd 85 | 86 | est_time = np.arange(len(pred)) * 0.01 87 | est_freq = pred 88 | 89 | output_eval = mir_eval.melody.evaluate(ref_time,ref_freq,est_time,est_freq) 90 | VR = output_eval['Voicing Recall']*100.0 91 | VFA = output_eval['Voicing False Alarm']*100.0 92 | RPA = output_eval['Raw Pitch Accuracy']*100.0 93 | RCA = output_eval['Raw Chroma Accuracy']*100.0 94 | ROA = compute_roa(est_freq, ref_freq) * 100.0 95 | OA = output_eval['Overall Accuracy']*100.0 96 | eval_arr = np.array([VR, VFA, RPA, RCA, ROA, OA]) 97 | return eval_arr 98 | 99 | def tonpy_fn(batch): 100 | dict_key = batch[0].keys() 101 | output_batch = {} 102 | for dk in dict_key: 103 | output_batch[dk] = np.array([d[dk] for d in batch]) 104 | return output_batch 105 | 106 | # for 010, 0110 etc. 107 | def area_punish(nn_output, area_len = 3): 108 | assert area_len >= 3 109 | ''' 110 | product = 1 - nn_output[:, :-area_len+1] 111 | for index in range(1, area_len-1): 112 | product = product*nn_output[:, index:-area_len+1+index] 113 | product = product*(1 - nn_output[:, area_len-1:]) 114 | ''' 115 | 116 | product = (1 - nn_output[:, :-area_len+1])*(1 - nn_output[:, area_len-1:]) 117 | 118 | temp = 1 119 | for index in range(1, area_len-1): 120 | temp = temp*(1 - nn_output[:, index:-area_len+1+index]) 121 | 122 | product = product*(1 - temp) 123 | return product 124 | 125 | 126 | # for 101, 1001 etc. 127 | def reverse_area_punish(nn_output, area_len = 3): 128 | assert area_len >= 3 129 | 130 | 131 | product = nn_output[:, :-area_len+1]*nn_output[:, area_len-1:] 132 | temp = 1 133 | for index in range(1, area_len-1): 134 | temp = temp*nn_output[:, index:-area_len+1+index] 135 | 136 | product = product*(1 - temp) 137 | return product 138 | 139 | 140 | import sounddevice as sd 141 | def play_sequence(audio_chunk, f_s): 142 | sd.play(audio_chunk, f_s, blocking = True) 143 | 144 | 145 | 146 | # ys list of y sequences 147 | def plot_multi_sequences(x, ys, y_names, title = "", initial_visibility = True): 148 | 149 | 150 | import plotly.graph_objects as go 151 | 152 | # https://community.plotly.com/t/hovertemplate-does-not-show-name-property/36139/2 153 | fig = go.Figure(data = [go.Scatter(x = x, y = ys[i], name = y_names[i], meta = [y_names[i]], hovertemplate = '%{meta}
x=%{x}
y=%{y}') for i in range(len(ys))]) 154 | 155 | 156 | fig.update_layout( 157 | title=title, 158 | xaxis_title="", 159 | yaxis_title="", 160 | font=dict(size=25), 161 | hoverlabel=dict(font_size=25), 162 | margin={"l":40, "r":40, "t":40, "b":40}, 163 | autosize=True 164 | ) 165 | 166 | 167 | if not initial_visibility: 168 | fig.update_traces(visible = 'legendonly') 169 | 170 | fig.show(config = {'showTips':False}) 171 | 172 | 173 | 174 | 175 | if torch.cuda.is_available(): 176 | device = torch.device("cuda") 177 | print("Using cuda") 178 | else: 179 | device = torch.device("cpu") 180 | print("Using cpu") 181 | 182 | # only dealing with vocal existence 183 | def median_filter(preds, filter_size = 21): 184 | # import sys 185 | # print(preds.shape) 186 | # oddness 187 | # assert filter_size % 2 == 1 188 | 189 | import torch.nn.functional as F 190 | preds = torch.from_numpy(preds).float().to(device) 191 | if filter_size % 2 == 1: 192 | temp = F.pad(preds, (int(filter_size/2), int(filter_size/2)), "constant") 193 | else: 194 | temp = F.pad(preds, (int(filter_size/2), int(filter_size/2) - 1), "constant") 195 | # print(temp.shape, temp.unfold(dimension = -1, size = filter_size, step = 1).shape) 196 | preds_filtered = torch.median(temp.unfold(dimension = -1, size = filter_size, step = 1), dim = -1).values 197 | 198 | assert preds.shape == preds_filtered.shape 199 | 200 | preds_on_off = (preds != 0).int() 201 | preds_filtered_on_off = (preds_filtered != 0).int() 202 | 203 | # 0 -> 0, do not change 204 | # 1 -> 1, do not change 205 | # 0 -> 1, take the value 206 | # 1 -> 0, take the value 207 | # using multiple sizes (one for up and one for down) will cause inconsistency, hence avoid 208 | should_replace = preds_on_off*(1 - preds_filtered_on_off) + (1 - preds_on_off)*preds_filtered_on_off 209 | # print("Here") 210 | 211 | # plot_multi_sequences(torch.arange(len(preds)), [preds.cpu().numpy(), ((1 - should_replace)*preds + should_replace*preds_filtered).cpu().numpy()], ["1", "2"]) 212 | 213 | return ((1 - should_replace)*preds + should_replace*preds_filtered).cpu().numpy() 214 | 215 | if __name__ == "__main__": 216 | 217 | x = torch.randn(2222) 218 | x = torch.arange(2).repeat(200).numpy() 219 | print(median_filter(x, filter_size = 20)) 220 | 221 | plot_multi_sequences(torch.arange(2222), [x, median_filter(x)], ["1", "2"]) 222 | 223 | --------------------------------------------------------------------------------