├── README.md
├── attention_layer.py
├── config.py
├── data
├── test_adc.txt
├── test_melody.txt
├── test_mirex.txt
├── train_data.txt
├── train_data_extra.txt
├── train_data_small.txt
└── train_data_small_extra.txt
├── data_generator.py
├── feature_extraction.py
├── ftanet.py
├── main.py
├── mcdnn.py
├── model_backup
├── 0_84.48420698924731_best.txt
├── 0_86.91616263440861_best.txt
├── 1_89.19454225352112_best.txt
├── 1_89.7593896713615_best.txt
├── 2_73.70216679030662_best.txt
└── 2_74.52630440158259_best.txt
├── msnet.py
├── multi_dr.py
├── piano_net.py
├── tonet.py
└── util.py
/README.md:
--------------------------------------------------------------------------------
1 | ### KKNet
2 |
3 | An implementation of "[Towards Improving Harmonic Sensitivity and Prediction Stability for Singing Melody Extraction](https://arxiv.org/abs/2308.02723)", in ISMIR 2023
4 |
5 | Will update training/inference instructions soon. Basically ``python feature_extraction.py`` for caching CFP/z-CFP before training. Then ``python main.py train`` will call ``tonet.py`` and start the main training loop. ``tonet.py`` in turn calls the PianoNet model in ``piano_net.py``.
6 |
7 | Standalone testing can be done using ``python main.py test``
8 |
9 | The data used for the experiments can be found here: https://drive.google.com/file/d/1QKX6rpuRxMPt54HOqNQztmLQqGlCALZ4/view?usp=sharing
10 |
11 |
--------------------------------------------------------------------------------
/attention_layer.py:
--------------------------------------------------------------------------------
1 | # Attention layer
2 | # from https://github.com/jadore801120/attention-is-all-you-need-pytorch
3 | import numpy as np
4 | import torch
5 | import torch.nn as nn
6 | import torch.nn.functional as F
7 |
8 | class ScaledDotProductAttention(nn.Module):
9 | ''' Scaled Dot-Product Attention '''
10 |
11 | def __init__(self, temperature, attn_dropout=0.1):
12 | super().__init__()
13 | self.temperature = temperature
14 | self.dropout = nn.Dropout(attn_dropout)
15 |
16 | def forward(self, q, k, v, mask=None):
17 |
18 | attn = torch.matmul(q / self.temperature, k.transpose(2, 3))
19 |
20 | if mask is not None:
21 | attn = attn.masked_fill(mask == 0, -1e9)
22 |
23 | attn = self.dropout(F.softmax(attn, dim=-1))
24 | output = torch.matmul(attn, v)
25 |
26 | return output, attn
27 |
28 | class MultiHeadAttention(nn.Module):
29 | ''' Multi-Head Attention module '''
30 |
31 | def __init__(self, n_head, d_model, d_k, d_v, dropout=0.1):
32 | super().__init__()
33 |
34 | self.n_head = n_head
35 | self.d_k = d_k
36 | self.d_v = d_v
37 |
38 | self.w_qs = nn.Linear(d_model, n_head * d_k, bias=False)
39 | self.w_ks = nn.Linear(d_model, n_head * d_k, bias=False)
40 | self.w_vs = nn.Linear(d_model, n_head * d_v, bias=False)
41 | self.fc = nn.Linear(n_head * d_v, d_model, bias=False)
42 |
43 | self.attention = ScaledDotProductAttention(temperature=d_k ** 0.5)
44 |
45 | self.dropout = nn.Dropout(dropout)
46 | self.layer_norm = nn.LayerNorm(d_model, eps=1e-6)
47 |
48 |
49 | def forward(self, q, k, v, mask=None):
50 |
51 | d_k, d_v, n_head = self.d_k, self.d_v, self.n_head
52 | sz_b, len_q, len_k, len_v = q.size(0), q.size(1), k.size(1), v.size(1)
53 |
54 | residual = q
55 | q = self.layer_norm(q)
56 |
57 | # Pass through the pre-attention projection: b x lq x (n*dv)
58 | # Separate different heads: b x lq x n x dv
59 | q = self.w_qs(q).view(sz_b, len_q, n_head, d_k)
60 | k = self.w_ks(k).view(sz_b, len_k, n_head, d_k)
61 | v = self.w_vs(v).view(sz_b, len_v, n_head, d_v)
62 |
63 | # Transpose for attention dot product: b x n x lq x dv
64 | q, k, v = q.transpose(1, 2), k.transpose(1, 2), v.transpose(1, 2)
65 |
66 | if mask is not None:
67 | mask = mask.unsqueeze(1) # For head axis broadcasting.
68 |
69 | q, attn = self.attention(q, k, v, mask=mask)
70 |
71 | # Transpose to move the head dimension back: b x lq x n x dv
72 | # Combine the last two dimensions to concatenate all the heads together: b x lq x (n*dv)
73 | q = q.transpose(1, 2).contiguous().view(sz_b, len_q, -1)
74 | q = self.dropout(self.fc(q))
75 | q += residual
76 |
77 | return q, attn
78 |
79 |
80 | class PositionwiseFeedForward(nn.Module):
81 | ''' A two-feed-forward-layer module '''
82 |
83 | def __init__(self, d_in, d_hid, dropout=0.1):
84 | super().__init__()
85 | self.w_1 = nn.Linear(d_in, d_hid) # position-wise
86 | self.w_2 = nn.Linear(d_hid, d_in) # position-wise
87 | self.layer_norm = nn.LayerNorm(d_in, eps=1e-6)
88 | self.dropout = nn.Dropout(dropout)
89 |
90 | def forward(self, x):
91 | residual = x
92 | x = self.layer_norm(x)
93 | x = self.w_2(F.relu(self.w_1(x)))
94 | x = self.dropout(x)
95 | x += residual
96 | return x
97 |
98 | class PositionalEncoding(nn.Module):
99 |
100 | def __init__(self, d_hid, n_position=200):
101 | super(PositionalEncoding, self).__init__()
102 |
103 | # Not a parameter
104 | self.register_buffer('pos_table', self._get_sinusoid_encoding_table(n_position, d_hid))
105 |
106 | def _get_sinusoid_encoding_table(self, n_position, d_hid):
107 | ''' Sinusoid position encoding table '''
108 | # TODO: make it with torch instead of numpy
109 |
110 | def get_position_angle_vec(position):
111 | return [position / np.power(10000, 2 * (hid_j // 2) / d_hid) for hid_j in range(d_hid)]
112 |
113 | sinusoid_table = np.array([get_position_angle_vec(pos_i) for pos_i in range(n_position)])
114 | sinusoid_table[:, 0::2] = np.sin(sinusoid_table[:, 0::2]) # dim 2i
115 | sinusoid_table[:, 1::2] = np.cos(sinusoid_table[:, 1::2]) # dim 2i+1
116 |
117 | return torch.FloatTensor(sinusoid_table).unsqueeze(0)
118 |
119 | def forward(self, x):
120 | return x + self.pos_table[:, :x.size(1)].clone().detach()
121 |
122 | class CombineLayer(nn.Module):
123 | ''' transformer encoder component '''
124 |
125 | def __init__(self, d_model, d_inner, n_head, d_k, d_v, dropout=0.1):
126 | super(CombineLayer, self).__init__()
127 | self.slf_attn = MultiHeadAttention(n_head, d_model, d_k, d_v, dropout=dropout)
128 | self.pos_ffn = PositionwiseFeedForward(d_model, d_inner, dropout=dropout)
129 |
130 | def forward(self, enc_input, slf_attn_mask=None):
131 | enc_output, enc_slf_attn = self.slf_attn(
132 | enc_input, enc_input, enc_input, mask=slf_attn_mask)
133 | enc_output = self.pos_ffn(enc_output)
134 | return enc_output, enc_slf_attn
135 |
136 |
--------------------------------------------------------------------------------
/config.py:
--------------------------------------------------------------------------------
1 |
2 | # exp_name = "dummy"
3 | # file path
4 | model_type = "dummy" # MCDNN, FTANet, MSNet, MLDRNet, dummy
5 | data_path = "data"
6 | train_file = "data/train_data.txt"
7 | test_file = [
8 | "data/test_adc.txt",
9 | "data/test_mirex.txt",
10 | "data/test_melody.txt"
11 | ]
12 |
13 | save_path = "model_backup"
14 | resume_checkpoint = "model_backup/bestk_0.ckpt"
15 | # resume_checkpoint = "model_backup/TO-FTANet_mirex_best.ckpt"
16 | # "model_backup/TO-FTANet_adc_best.ckpt" # the model checkpoint
17 |
18 | # train config
19 | batch_size = 10
20 | lr = 1e-4
21 | epochs = 1000
22 | n_workers = 4
23 | save_period = 1
24 | tone_class = 12 # 60
25 | octave_class = 8 # 6
26 | random_seed = 19961206
27 | max_epoch = 500
28 | freq_bin = 360
29 |
30 | ablation_mode = "single" # single, tcfp, spl, spat, all, a parameter inherited from TONet's code, and remain single for our simplified model
31 |
32 | include_model_tweak = False # small tweak on vocal detection bin, unsure of its effectiveness
33 | include_loss_component = False # loss component for prediction stability
34 | include_adjusted_exp = False # z-transform
35 | apply_median_filter = True # median filter baseline
36 |
37 | startfreq = 32
38 | stopfreq = 2050
39 | cfp_dir = "cfp_360_new"
40 |
41 | # feature config
42 | fs = 44100.0
43 | hop = 441.0
44 | octave_res = 60
45 | seg_dur = 1.28 # sec
46 | seg_frame = int(seg_dur * fs // hop)
47 | shift_dur = 1.28 # sec
48 | shift_frame = int(shift_dur * fs // hop)
49 |
50 | network_time_shrink_size = 8
51 |
--------------------------------------------------------------------------------
/data/test_adc.txt:
--------------------------------------------------------------------------------
1 | daisy1.npy
2 | daisy2.npy
3 | daisy3.npy
4 | daisy4.npy
5 | opera_fem2.npy
6 | opera_fem4.npy
7 | opera_male3.npy
8 | opera_male5.npy
9 | pop1.npy
10 | pop2.npy
11 | pop3.npy
12 | pop4.npy
--------------------------------------------------------------------------------
/data/test_melody.txt:
--------------------------------------------------------------------------------
1 | AClassicEducation_NightOwl_MIX.npy
2 | Auctioneer_OurFutureFaces_MIX.npy
3 | CelestialShore_DieForUs_MIX.npy
4 | Creepoid_OldTree_MIX.npy
5 | Debussy_LenfantProdigue_MIX.npy
6 | MatthewEntwistle_DontYouEver_MIX.npy
7 | MatthewEntwistle_Lontano_MIX.npy
8 | Mozart_BesterJungling_MIX.npy
9 | MusicDelta_Gospel_MIX.npy
10 | PortStWillow_StayEven_MIX.npy
11 | Schubert_Erstarrung_MIX.npy
12 | StrandOfOaks_Spacestation_MIX.npy
--------------------------------------------------------------------------------
/data/test_mirex.txt:
--------------------------------------------------------------------------------
1 | train01.npy
2 | train02.npy
3 | train03.npy
4 | train04.npy
5 | train05.npy
6 | train06.npy
7 | train07.npy
8 | train08.npy
9 | train09.npy
--------------------------------------------------------------------------------
/data/train_data.txt:
--------------------------------------------------------------------------------
1 | tammy_1_07.npy
2 | ariel_3_03.npy
3 | heycat_2_07.npy
4 | amy_5_06.npy
5 | geniusturtle_4_09.npy
6 | abjones_4_04.npy
7 | Ani_1_06.npy
8 | bug_2_07.npy
9 | leon_3_12.npy
10 | leon_3_06.npy
11 | bobon_3_03.npy
12 | jmzen_4_06.npy
13 | davidson_4_05.npy
14 | fdps_1_12.npy
15 | khair_1_07.npy
16 | fdps_1_06.npy
17 | leon_1_03.npy
18 | bobon_1_06.npy
19 | davidson_1_09.npy
20 | yifen_5_05.npy
21 | yifen_5_11.npy
22 | khair_3_02.npy
23 | fdps_3_03.npy
24 | ariel_1_06.npy
25 | titon_4_03.npy
26 | amy_7_03.npy
27 | amy_9_06.npy
28 | geniusturtle_1_05.npy
29 | Ani_3_03.npy
30 | amy_15_11.npy
31 | amy_15_05.npy
32 | annar_2_02.npy
33 | stool_2_01.npy
34 | stool_5_08.npy
35 | annar_2_03.npy
36 | amy_15_04.npy
37 | Ani_3_02.npy
38 | Kenshin_5_01.npy
39 | geniusturtle_1_04.npy
40 | amy_9_07.npy
41 | titon_4_02.npy
42 | amy_7_02.npy
43 | geniusturtle_8_08.npy
44 | ariel_1_07.npy
45 | yifen_5_10.npy
46 | fdps_3_02.npy
47 | khair_3_03.npy
48 | davidson_1_08.npy
49 | yifen_5_04.npy
50 | bobon_1_07.npy
51 | leon_1_02.npy
52 | fdps_1_07.npy
53 | khair_1_06.npy
54 | fdps_1_13.npy
55 | davidson_4_04.npy
56 | bobon_3_02.npy
57 | jmzen_4_07.npy
58 | leon_3_07.npy
59 | leon_3_13.npy
60 | bug_2_06.npy
61 | amy_10_08.npy
62 | Ani_1_07.npy
63 | geniusturtle_4_08.npy
64 | abjones_4_05.npy
65 | geniusturtle_3_01.npy
66 | amy_5_07.npy
67 | ariel_3_02.npy
68 | tammy_1_06.npy
69 | heycat_2_06.npy
70 | amy_5_05.npy
71 | heycat_2_04.npy
72 | tammy_1_04.npy
73 | geniusturtle_3_03.npy
74 | abjones_4_07.npy
75 | Ani_1_05.npy
76 | bug_2_04.npy
77 | leon_3_05.npy
78 | leon_3_11.npy
79 | bobon_4_09.npy
80 | jmzen_4_05.npy
81 | khair_1_04.npy
82 | fdps_1_05.npy
83 | davidson_4_06.npy
84 | fdps_1_11.npy
85 | leon_6_09.npy
86 | jmzen_1_09.npy
87 | bobon_1_05.npy
88 | khair_3_01.npy
89 | khair_4_08.npy
90 | yifen_5_06.npy
91 | ariel_1_05.npy
92 | geniusturtle_1_06.npy
93 | amy_9_05.npy
94 | amy_15_06.npy
95 | amy_15_12.npy
96 | Ani_4_09.npy
97 | Kenshin_5_03.npy
98 | annar_5_08.npy
99 | annar_2_01.npy
100 | stool_2_02.npy
101 | annar_5_09.npy
102 | stool_2_03.npy
103 | Ani_4_08.npy
104 | amy_15_13.npy
105 | Ani_3_01.npy
106 | Kenshin_5_02.npy
107 | amy_15_07.npy
108 | amy_9_04.npy
109 | geniusturtle_1_07.npy
110 | ariel_1_04.npy
111 | amy_7_01.npy
112 | titon_3_08.npy
113 | titon_4_01.npy
114 | yifen_5_07.npy
115 | fdps_3_01.npy
116 | bobon_1_04.npy
117 | jmzen_1_08.npy
118 | bobon_1_10.npy
119 | leon_1_01.npy
120 | leon_6_08.npy
121 | fdps_1_10.npy
122 | davidson_4_07.npy
123 | fdps_1_04.npy
124 | khair_1_05.npy
125 | jmzen_4_04.npy
126 | bobon_4_08.npy
127 | bobon_3_01.npy
128 | jmzen_4_10.npy
129 | leon_3_10.npy
130 | leon_3_04.npy
131 | bug_2_05.npy
132 | Ani_1_04.npy
133 | abjones_4_06.npy
134 | geniusturtle_3_02.npy
135 | ariel_4_08.npy
136 | heycat_2_05.npy
137 | ariel_3_01.npy
138 | tammy_1_05.npy
139 | amy_5_04.npy
140 | geniusturtle_3_06.npy
141 | abjones_4_02.npy
142 | ariel_3_05.npy
143 | tammy_1_01.npy
144 | heycat_5_08.npy
145 | heycat_2_01.npy
146 | bug_5_08.npy
147 | bug_2_01.npy
148 | jmzen_3_09.npy
149 | bobon_3_05.npy
150 | khair_1_01.npy
151 | fdps_1_14.npy
152 | davidson_4_03.npy
153 | leon_8_09.npy
154 | leon_1_05.npy
155 | leon_1_11.npy
156 | khair_3_04.npy
157 | fdps_3_05.npy
158 | yifen_5_03.npy
159 | geniusturtle_1_03.npy
160 | titon_4_05.npy
161 | amy_7_05.npy
162 | titon_4_11.npy
163 | stool_2_07.npy
164 | annar_2_04.npy
165 | amy_15_03.npy
166 | Kenshin_5_12.npy
167 | Kenshin_5_06.npy
168 | Ani_3_05.npy
169 | Ani_3_04.npy
170 | Kenshin_5_07.npy
171 | amy_15_02.npy
172 | Kenshin_5_13.npy
173 | annar_2_05.npy
174 | stool_2_06.npy
175 | titon_4_10.npy
176 | ariel_1_01.npy
177 | titon_4_04.npy
178 | amy_7_04.npy
179 | amy_9_01.npy
180 | geniusturtle_1_02.npy
181 | yifen_5_02.npy
182 | fdps_3_04.npy
183 | khair_3_05.npy
184 | leon_1_10.npy
185 | leon_1_04.npy
186 | bobon_1_01.npy
187 | leon_8_08.npy
188 | davidson_4_02.npy
189 | fdps_1_01.npy
190 | leon_3_01.npy
191 | leon_4_08.npy
192 | bobon_3_04.npy
193 | jmzen_3_08.npy
194 | jmzen_4_01.npy
195 | bobon_3_10.npy
196 | Ani_1_01.npy
197 | bug_5_09.npy
198 | ariel_3_04.npy
199 | titon_1_08.npy
200 | amy_5_01.npy
201 | abjones_4_03.npy
202 | geniusturtle_3_07.npy
203 | geniusturtle_3_05.npy
204 | abjones_4_01.npy
205 | heycat_2_02.npy
206 | ariel_3_06.npy
207 | tammy_1_02.npy
208 | amy_5_03.npy
209 | bug_2_02.npy
210 | Ani_1_03.npy
211 | jmzen_4_03.npy
212 | bobon_3_06.npy
213 | leon_3_03.npy
214 | davidson_3_09.npy
215 | khair_1_02.npy
216 | fdps_1_03.npy
217 | bobon_1_03.npy
218 | leon_1_12.npy
219 | leon_1_06.npy
220 | yifen_2_09.npy
221 | khair_3_07.npy
222 | fdps_3_06.npy
223 | amy_9_03.npy
224 | ariel_1_03.npy
225 | amy_7_06.npy
226 | titon_4_06.npy
227 | stool_2_04.npy
228 | annar_2_07.npy
229 | Kenshin_5_05.npy
230 | Ani_3_06.npy
231 | Kenshin_5_11.npy
232 | Kenshin_5_10.npy
233 | amy_15_01.npy
234 | Kenshin_5_04.npy
235 | annar_2_06.npy
236 | stool_2_05.npy
237 | amy_7_07.npy
238 | titon_4_07.npy
239 | ariel_1_02.npy
240 | geniusturtle_1_01.npy
241 | amy_9_02.npy
242 | fdps_3_07.npy
243 | khair_3_06.npy
244 | yifen_5_01.npy
245 | yifen_2_08.npy
246 | leon_1_07.npy
247 | bobon_1_02.npy
248 | fdps_1_02.npy
249 | khair_1_03.npy
250 | davidson_4_01.npy
251 | davidson_3_08.npy
252 | leon_3_02.npy
253 | jmzen_4_02.npy
254 | bobon_3_07.npy
255 | Ani_1_02.npy
256 | bug_2_03.npy
257 | amy_5_02.npy
258 | heycat_2_03.npy
259 | tammy_1_03.npy
260 | ariel_3_07.npy
261 | geniusturtle_3_04.npy
262 | abjones_3_09.npy
263 | leon_7_01.npy
264 | leon_9_04.npy
265 | yifen_3_07.npy
266 | fdps_5_01.npy
267 | fdps_2_08.npy
268 | amy_1_01.npy
269 | titon_5_08.npy
270 | amy_6_08.npy
271 | titon_2_01.npy
272 | geniusturtle_7_13.npy
273 | geniusturtle_7_07.npy
274 | Kenshin_3_02.npy
275 | Ani_5_01.npy
276 | amy_13_07.npy
277 | bug_1_09.npy
278 | stool_4_03.npy
279 | ariel_2_08.npy
280 | heycat_4_05.npy
281 | ariel_5_01.npy
282 | amy_3_04.npy
283 | abjones_2_12.npy
284 | geniusturtle_5_02.npy
285 | abjones_2_06.npy
286 | Kenshin_1_07.npy
287 | amy_11_02.npy
288 | leon_5_10.npy
289 | leon_5_04.npy
290 | bobon_2_08.npy
291 | jmzen_2_04.npy
292 | bobon_5_01.npy
293 | jmzen_2_10.npy
294 | davidson_2_07.npy
295 | yifen_1_02.npy
296 | yifen_1_16.npy
297 | davidson_2_06.npy
298 | yifen_1_03.npy
299 | jmzen_2_11.npy
300 | leon_5_05.npy
301 | leon_5_11.npy
302 | amy_11_03.npy
303 | Kenshin_1_06.npy
304 | abjones_2_07.npy
305 | geniusturtle_5_03.npy
306 | amy_3_05.npy
307 | heycat_4_04.npy
308 | ariel_2_09.npy
309 | bug_1_08.npy
310 | annar_3_08.npy
311 | annar_4_01.npy
312 | stool_4_02.npy
313 | amy_13_06.npy
314 | Kenshin_3_03.npy
315 | geniusturtle_7_06.npy
316 | geniusturtle_7_12.npy
317 | titon_5_09.npy
318 | amy_6_09.npy
319 | heycat_1_08.npy
320 | khair_5_01.npy
321 | yifen_3_12.npy
322 | fdps_2_09.npy
323 | yifen_3_06.npy
324 | leon_9_05.npy
325 | leon_7_02.npy
326 | yifen_3_10.npy
327 | fdps_5_02.npy
328 | khair_5_03.npy
329 | yifen_3_04.npy
330 | titon_2_02.npy
331 | amy_1_02.npy
332 | geniusturtle_7_04.npy
333 | geniusturtle_7_10.npy
334 | amy_13_04.npy
335 | Kenshin_3_01.npy
336 | Ani_5_02.npy
337 | Kenshin_4_08.npy
338 | annar_4_03.npy
339 | stool_3_09.npy
340 | amy_3_07.npy
341 | ariel_5_02.npy
342 | heycat_4_06.npy
343 | geniusturtle_2_08.npy
344 | geniusturtle_5_01.npy
345 | abjones_2_05.npy
346 | abjones_2_11.npy
347 | Kenshin_1_10.npy
348 | amy_16_08.npy
349 | amy_11_01.npy
350 | Kenshin_1_04.npy
351 | leon_5_07.npy
352 | bobon_5_02.npy
353 | jmzen_2_07.npy
354 | davidson_2_10.npy
355 | yifen_1_15.npy
356 | yifen_1_01.npy
357 | davidson_2_04.npy
358 | davidson_2_05.npy
359 | yifen_1_14.npy
360 | bobon_5_03.npy
361 | jmzen_2_06.npy
362 | jmzen_2_12.npy
363 | leon_5_12.npy
364 | leon_5_06.npy
365 | Kenshin_1_05.npy
366 | Kenshin_1_11.npy
367 | abjones_2_10.npy
368 | abjones_2_04.npy
369 | ariel_5_03.npy
370 | heycat_4_07.npy
371 | amy_3_06.npy
372 | annar_4_02.npy
373 | stool_4_01.npy
374 | stool_3_08.npy
375 | Ani_5_03.npy
376 | Kenshin_4_09.npy
377 | amy_13_05.npy
378 | geniusturtle_7_11.npy
379 | geniusturtle_7_05.npy
380 | titon_2_03.npy
381 | amy_1_03.npy
382 | yifen_3_05.npy
383 | yifen_3_11.npy
384 | khair_5_02.npy
385 | fdps_5_03.npy
386 | leon_9_06.npy
387 | leon_7_03.npy
388 | leon_9_02.npy
389 | leon_7_07.npy
390 | leon_7_13.npy
391 | fdps_5_07.npy
392 | khair_5_06.npy
393 | yifen_3_01.npy
394 | yifen_4_08.npy
395 | geniusturtle_7_01.npy
396 | geniusturtle_7_15.npy
397 | amy_1_07.npy
398 | titon_2_07.npy
399 | annar_4_06.npy
400 | stool_4_05.npy
401 | amy_13_01.npy
402 | Kenshin_3_04.npy
403 | Ani_5_07.npy
404 | geniusturtle_5_04.npy
405 | abjones_5_09.npy
406 | amy_3_02.npy
407 | heycat_4_03.npy
408 | ariel_5_07.npy
409 | bug_4_03.npy
410 | stool_1_09.npy
411 | amy_11_04.npy
412 | Kenshin_1_01.npy
413 | jmzen_2_02.npy
414 | bobon_5_07.npy
415 | leon_5_02.npy
416 | yifen_1_10.npy
417 | davidson_2_01.npy
418 | davidson_5_08.npy
419 | yifen_1_04.npy
420 | davidson_5_09.npy
421 | yifen_1_05.npy
422 | yifen_1_11.npy
423 | leon_5_03.npy
424 | jmzen_2_03.npy
425 | bobon_5_06.npy
426 | bobon_5_12.npy
427 | amy_11_05.npy
428 | bug_4_02.npy
429 | stool_1_08.npy
430 | heycat_4_02.npy
431 | ariel_5_06.npy
432 | amy_3_03.npy
433 | abjones_2_01.npy
434 | abjones_5_08.npy
435 | Ani_5_06.npy
436 | Kenshin_3_05.npy
437 | stool_4_10.npy
438 | stool_4_04.npy
439 | annar_4_07.npy
440 | amy_1_06.npy
441 | titon_2_06.npy
442 | geniusturtle_7_14.npy
443 | yifen_4_09.npy
444 | khair_5_07.npy
445 | fdps_5_06.npy
446 | leon_7_12.npy
447 | leon_7_06.npy
448 | leon_9_03.npy
449 | leon_9_01.npy
450 | leon_7_10.npy
451 | leon_7_04.npy
452 | yifen_3_02.npy
453 | fdps_5_04.npy
454 | khair_5_05.npy
455 | amy_8_08.npy
456 | geniusturtle_7_02.npy
457 | titon_2_04.npy
458 | amy_1_04.npy
459 | annar_4_05.npy
460 | stool_4_06.npy
461 | Kenshin_3_07.npy
462 | Ani_5_04.npy
463 | amy_13_02.npy
464 | abjones_2_03.npy
465 | ariel_5_04.npy
466 | amy_3_01.npy
467 | amy_4_08.npy
468 | bug_3_09.npy
469 | Kenshin_1_02.npy
470 | amy_11_07.npy
471 | jmzen_5_08.npy
472 | bobon_5_04.npy
473 | jmzen_2_01.npy
474 | bobon_5_10.npy
475 | leon_5_01.npy
476 | leon_2_08.npy
477 | yifen_1_07.npy
478 | davidson_2_02.npy
479 | yifen_1_13.npy
480 | yifen_1_12.npy
481 | yifen_1_06.npy
482 | davidson_2_03.npy
483 | leon_2_09.npy
484 | bobon_5_11.npy
485 | bobon_5_05.npy
486 | jmzen_5_09.npy
487 | amy_11_06.npy
488 | Kenshin_1_03.npy
489 | bug_3_08.npy
490 | annar_1_08.npy
491 | bug_4_01.npy
492 | amy_4_09.npy
493 | ariel_5_05.npy
494 | heycat_3_08.npy
495 | heycat_4_01.npy
496 | abjones_2_02.npy
497 | amy_13_03.npy
498 | Ani_5_05.npy
499 | Kenshin_3_06.npy
500 | stool_4_07.npy
501 | annar_4_04.npy
502 | titon_2_05.npy
503 | amy_1_05.npy
504 | geniusturtle_7_03.npy
505 | khair_5_04.npy
506 | fdps_5_05.npy
507 | yifen_3_03.npy
508 | leon_7_05.npy
509 | leon_7_11.npy
510 | leon_7_08.npy
511 | fdps_2_01.npy
512 | fdps_5_08.npy
513 | yifen_4_07.npy
514 | amy_8_04.npy
515 | amy_6_01.npy
516 | titon_2_08.npy
517 | titon_5_01.npy
518 | annar_4_09.npy
519 | stool_3_03.npy
520 | Ani_5_08.npy
521 | Ani_2_01.npy
522 | Kenshin_4_02.npy
523 | abjones_5_06.npy
524 | geniusturtle_2_02.npy
525 | amy_4_04.npy
526 | ariel_5_08.npy
527 | amy_4_10.npy
528 | heycat_3_05.npy
529 | ariel_2_01.npy
530 | bug_3_05.npy
531 | stool_1_06.npy
532 | annar_1_05.npy
533 | amy_16_02.npy
534 | jmzen_5_04.npy
535 | bobon_5_08.npy
536 | bobon_2_01.npy
537 | leon_2_04.npy
538 | leon_2_10.npy
539 | davidson_5_07.npy
540 | davidson_5_06.npy
541 | leon_2_11.npy
542 | leon_2_05.npy
543 | bobon_5_09.npy
544 | jmzen_5_05.npy
545 | amy_16_03.npy
546 | annar_1_04.npy
547 | stool_1_07.npy
548 | bug_3_04.npy
549 | heycat_3_04.npy
550 | amy_4_11.npy
551 | amy_4_05.npy
552 | abjones_5_07.npy
553 | Kenshin_4_03.npy
554 | amy_14_06.npy
555 | annar_4_08.npy
556 | annar_3_01.npy
557 | stool_3_02.npy
558 | heycat_1_01.npy
559 | titon_2_09.npy
560 | amy_8_05.npy
561 | yifen_4_06.npy
562 | khair_2_01.npy
563 | fdps_5_09.npy
564 | leon_7_09.npy
565 | yifen_4_04.npy
566 | yifen_4_10.npy
567 | fdps_2_02.npy
568 | khair_2_03.npy
569 | amy_8_07.npy
570 | heycat_1_03.npy
571 | titon_5_02.npy
572 | amy_6_02.npy
573 | annar_3_03.npy
574 | stool_4_09.npy
575 | Ani_2_02.npy
576 | Kenshin_4_01.npy
577 | Kenshin_3_08.npy
578 | amy_14_04.npy
579 | abjones_5_05.npy
580 | geniusturtle_2_01.npy
581 | ariel_2_02.npy
582 | heycat_3_06.npy
583 | amy_4_07.npy
584 | bug_3_06.npy
585 | stool_1_05.npy
586 | annar_1_06.npy
587 | amy_16_01.npy
588 | bobon_2_02.npy
589 | jmzen_5_07.npy
590 | leon_2_07.npy
591 | yifen_1_08.npy
592 | davidson_5_04.npy
593 | davidson_5_10.npy
594 | davidson_5_11.npy
595 | yifen_1_09.npy
596 | davidson_5_05.npy
597 | leon_2_06.npy
598 | bobon_2_03.npy
599 | jmzen_5_06.npy
600 | annar_1_07.npy
601 | stool_1_04.npy
602 | bug_3_07.npy
603 | amy_4_06.npy
604 | ariel_2_03.npy
605 | heycat_3_07.npy
606 | abjones_5_04.npy
607 | amy_14_05.npy
608 | Ani_2_03.npy
609 | annar_3_02.npy
610 | stool_3_01.npy
611 | stool_4_08.npy
612 | titon_5_03.npy
613 | amy_6_03.npy
614 | heycat_1_02.npy
615 | amy_8_06.npy
616 | yifen_4_11.npy
617 | khair_2_02.npy
618 | fdps_2_03.npy
619 | yifen_4_05.npy
620 | yifen_4_01.npy
621 | yifen_3_08.npy
622 | fdps_2_07.npy
623 | khair_2_06.npy
624 | heycat_1_06.npy
625 | amy_6_07.npy
626 | titon_5_07.npy
627 | amy_8_02.npy
628 | geniusturtle_7_08.npy
629 | Ani_2_07.npy
630 | Kenshin_4_04.npy
631 | Kenshin_4_10.npy
632 | amy_14_01.npy
633 | annar_3_06.npy
634 | stool_3_05.npy
635 | heycat_3_03.npy
636 | ariel_2_07.npy
637 | amy_4_02.npy
638 | abjones_2_09.npy
639 | Kenshin_1_08.npy
640 | amy_16_04.npy
641 | bug_3_03.npy
642 | annar_1_03.npy
643 | leon_2_02.npy
644 | jmzen_5_02.npy
645 | bobon_2_07.npy
646 | davidson_5_01.npy
647 | davidson_2_08.npy
648 | davidson_2_09.npy
649 | jmzen_5_03.npy
650 | bobon_2_06.npy
651 | leon_2_03.npy
652 | stool_1_01.npy
653 | annar_1_02.npy
654 | bug_3_02.npy
655 | amy_16_05.npy
656 | Kenshin_1_09.npy
657 | geniusturtle_2_05.npy
658 | abjones_5_01.npy
659 | abjones_2_08.npy
660 | amy_4_03.npy
661 | heycat_3_02.npy
662 | ariel_2_06.npy
663 | stool_3_04.npy
664 | annar_3_07.npy
665 | bug_1_07.npy
666 | stool_3_10.npy
667 | Kenshin_4_11.npy
668 | Kenshin_4_05.npy
669 | Ani_2_06.npy
670 | geniusturtle_7_09.npy
671 | amy_8_03.npy
672 | amy_6_06.npy
673 | titon_5_06.npy
674 | heycat_1_07.npy
675 | khair_2_07.npy
676 | fdps_2_06.npy
677 | fdps_2_12.npy
678 | yifen_3_09.npy
679 | fdps_2_04.npy
680 | khair_2_05.npy
681 | fdps_2_10.npy
682 | yifen_4_02.npy
683 | titon_5_04.npy
684 | amy_6_04.npy
685 | heycat_1_05.npy
686 | amy_6_10.npy
687 | amy_8_01.npy
688 | amy_14_02.npy
689 | Ani_2_04.npy
690 | Kenshin_4_07.npy
691 | annar_3_05.npy
692 | stool_3_06.npy
693 | amy_4_01.npy
694 | amy_3_08.npy
695 | heycat_4_09.npy
696 | ariel_2_04.npy
697 | abjones_5_03.npy
698 | geniusturtle_2_07.npy
699 | amy_16_07.npy
700 | stool_1_03.npy
701 | leon_2_01.npy
702 | leon_5_08.npy
703 | bobon_2_04.npy
704 | jmzen_2_08.npy
705 | jmzen_5_01.npy
706 | davidson_5_02.npy
707 | davidson_5_03.npy
708 | jmzen_2_09.npy
709 | bobon_2_05.npy
710 | leon_5_09.npy
711 | stool_1_02.npy
712 | annar_1_01.npy
713 | bug_3_01.npy
714 | amy_16_06.npy
715 | geniusturtle_2_06.npy
716 | abjones_5_02.npy
717 | ariel_2_05.npy
718 | heycat_4_08.npy
719 | heycat_3_01.npy
720 | bug_1_10.npy
721 | stool_3_07.npy
722 | annar_3_04.npy
723 | Kenshin_4_06.npy
724 | Ani_2_05.npy
725 | amy_14_03.npy
726 | amy_6_11.npy
727 | heycat_1_04.npy
728 | titon_5_05.npy
729 | amy_6_05.npy
730 | fdps_2_11.npy
731 | yifen_4_03.npy
732 | khair_2_04.npy
733 | fdps_2_05.npy
734 | abjones_3_04.npy
735 | abjones_3_10.npy
736 | amy_2_06.npy
737 | titon_1_06.npy
738 | ariel_4_03.npy
739 | heycat_5_07.npy
740 | bug_5_07.npy
741 | bug_5_13.npy
742 | bobon_4_03.npy
743 | jmzen_3_06.npy
744 | leon_4_06.npy
745 | khair_6_07.npy
746 | davidson_3_11.npy
747 | davidson_3_05.npy
748 | leon_8_12.npy
749 | jmzen_1_03.npy
750 | leon_8_06.npy
751 | leon_6_03.npy
752 | yifen_2_11.npy
753 | khair_4_02.npy
754 | fdps_4_03.npy
755 | yifen_2_05.npy
756 | abjones_1_01.npy
757 | geniusturtle_6_05.npy
758 | titon_3_03.npy
759 | annar_5_02.npy
760 | stool_5_01.npy
761 | stool_2_08.npy
762 | amy_12_05.npy
763 | Ani_4_03.npy
764 | Kenshin_5_09.npy
765 | Kenshin_2_01.npy
766 | Ani_4_02.npy
767 | Kenshin_5_08.npy
768 | amy_12_04.npy
769 | annar_5_03.npy
770 | geniusturtle_8_01.npy
771 | titon_3_02.npy
772 | geniusturtle_6_04.npy
773 | davidson_1_01.npy
774 | yifen_2_04.npy
775 | yifen_2_10.npy
776 | fdps_4_02.npy
777 | khair_4_03.npy
778 | leon_6_02.npy
779 | leon_8_07.npy
780 | jmzen_1_02.npy
781 | leon_8_13.npy
782 | davidson_3_04.npy
783 | davidson_3_10.npy
784 | khair_6_06.npy
785 | leon_4_07.npy
786 | bobon_4_02.npy
787 | jmzen_3_07.npy
788 | amy_10_01.npy
789 | bug_5_12.npy
790 | bug_5_06.npy
791 | ariel_4_02.npy
792 | heycat_5_06.npy
793 | amy_2_07.npy
794 | titon_1_07.npy
795 | abjones_3_11.npy
796 | geniusturtle_3_08.npy
797 | geniusturtle_4_01.npy
798 | geniusturtle_4_03.npy
799 | heycat_5_04.npy
800 | titon_1_05.npy
801 | amy_2_05.npy
802 | bug_5_10.npy
803 | bug_5_04.npy
804 | amy_10_03.npy
805 | jmzen_3_05.npy
806 | bobon_3_09.npy
807 | jmzen_3_11.npy
808 | leon_4_05.npy
809 | davidson_3_06.npy
810 | khair_6_04.npy
811 | davidson_3_12.npy
812 | leon_8_05.npy
813 | leon_8_11.npy
814 | leon_1_09.npy
815 | yifen_2_06.npy
816 | davidson_1_03.npy
817 | khair_4_01.npy
818 | yifen_2_12.npy
819 | abjones_1_02.npy
820 | geniusturtle_6_06.npy
821 | geniusturtle_8_03.npy
822 | titon_4_09.npy
823 | annar_2_08.npy
824 | annar_5_01.npy
825 | stool_5_02.npy
826 | Kenshin_2_03.npy
827 | amy_12_06.npy
828 | amy_12_07.npy
829 | Kenshin_2_02.npy
830 | Ani_4_01.npy
831 | stool_5_03.npy
832 | titon_4_08.npy
833 | amy_7_08.npy
834 | titon_3_01.npy
835 | geniusturtle_8_02.npy
836 | geniusturtle_6_07.npy
837 | abjones_1_03.npy
838 | fdps_4_01.npy
839 | yifen_2_13.npy
840 | yifen_2_07.npy
841 | davidson_1_02.npy
842 | leon_6_01.npy
843 | leon_1_08.npy
844 | leon_8_10.npy
845 | leon_8_04.npy
846 | jmzen_1_01.npy
847 | davidson_3_13.npy
848 | khair_6_05.npy
849 | davidson_3_07.npy
850 | leon_4_04.npy
851 | jmzen_3_10.npy
852 | bobon_3_08.npy
853 | jmzen_3_04.npy
854 | bobon_4_01.npy
855 | amy_10_02.npy
856 | bug_5_05.npy
857 | bug_5_11.npy
858 | titon_1_04.npy
859 | amy_2_04.npy
860 | ariel_3_08.npy
861 | heycat_5_05.npy
862 | ariel_4_01.npy
863 | geniusturtle_4_02.npy
864 | abjones_3_06.npy
865 | abjones_3_12.npy
866 | ariel_4_05.npy
867 | heycat_5_01.npy
868 | tammy_1_08.npy
869 | geniusturtle_4_12.npy
870 | abjones_3_02.npy
871 | geniusturtle_4_06.npy
872 | amy_10_06.npy
873 | bug_2_08.npy
874 | bug_5_01.npy
875 | leon_3_09.npy
876 | bobon_4_05.npy
877 | jmzen_4_09.npy
878 | davidson_3_03.npy
879 | khair_6_01.npy
880 | fdps_1_09.npy
881 | khair_1_08.npy
882 | leon_6_05.npy
883 | jmzen_1_05.npy
884 | bobon_1_09.npy
885 | jmzen_1_11.npy
886 | davidson_1_06.npy
887 | yifen_2_03.npy
888 | khair_4_04.npy
889 | fdps_4_05.npy
890 | geniusturtle_8_06.npy
891 | titon_3_05.npy
892 | amy_9_09.npy
893 | geniusturtle_6_03.npy
894 | Ani_4_05.npy
895 | Kenshin_2_06.npy
896 | amy_12_03.npy
897 | stool_5_07.npy
898 | annar_5_04.npy
899 | annar_5_05.npy
900 | stool_5_06.npy
901 | amy_12_02.npy
902 | Ani_4_10.npy
903 | Kenshin_2_07.npy
904 | Ani_4_04.npy
905 | geniusturtle_6_02.npy
906 | amy_9_08.npy
907 | titon_3_04.npy
908 | geniusturtle_8_07.npy
909 | fdps_4_04.npy
910 | khair_4_05.npy
911 | davidson_1_07.npy
912 | yifen_2_02.npy
913 | jmzen_1_10.npy
914 | bobon_1_08.npy
915 | leon_8_01.npy
916 | jmzen_1_04.npy
917 | leon_6_04.npy
918 | fdps_1_08.npy
919 | davidson_3_02.npy
920 | bobon_4_10.npy
921 | jmzen_4_08.npy
922 | bobon_4_04.npy
923 | jmzen_3_01.npy
924 | leon_4_01.npy
925 | leon_3_08.npy
926 | bug_5_14.npy
927 | amy_10_07.npy
928 | geniusturtle_4_07.npy
929 | abjones_3_03.npy
930 | amy_2_01.npy
931 | titon_1_01.npy
932 | amy_5_08.npy
933 | ariel_4_04.npy
934 | titon_1_03.npy
935 | amy_2_03.npy
936 | heycat_5_02.npy
937 | ariel_4_06.npy
938 | abjones_3_01.npy
939 | geniusturtle_4_05.npy
940 | abjones_4_08.npy
941 | geniusturtle_4_11.npy
942 | amy_10_05.npy
943 | bug_5_02.npy
944 | leon_4_03.npy
945 | jmzen_3_03.npy
946 | bobon_4_06.npy
947 | khair_6_02.npy
948 | davidson_3_14.npy
949 | leon_6_06.npy
950 | jmzen_1_12.npy
951 | leon_8_03.npy
952 | khair_4_07.npy
953 | fdps_4_06.npy
954 | yifen_2_14.npy
955 | yifen_5_09.npy
956 | davidson_1_05.npy
957 | titon_3_06.npy
958 | geniusturtle_8_05.npy
959 | geniusturtle_1_09.npy
960 | abjones_1_04.npy
961 | amy_15_09.npy
962 | Kenshin_2_11.npy
963 | Ani_4_06.npy
964 | Kenshin_2_05.npy
965 | stool_5_04.npy
966 | annar_5_07.npy
967 | annar_5_06.npy
968 | stool_5_05.npy
969 | Kenshin_2_04.npy
970 | Ani_4_07.npy
971 | Kenshin_2_10.npy
972 | amy_15_08.npy
973 | amy_12_01.npy
974 | geniusturtle_1_08.npy
975 | geniusturtle_6_01.npy
976 | geniusturtle_8_04.npy
977 | titon_3_07.npy
978 | yifen_2_01.npy
979 | yifen_5_08.npy
980 | davidson_1_04.npy
981 | davidson_1_10.npy
982 | khair_4_06.npy
983 | yifen_2_15.npy
984 | leon_8_02.npy
985 | jmzen_1_07.npy
986 | leon_6_07.npy
987 | davidson_3_01.npy
988 | khair_6_03.npy
989 | jmzen_3_02.npy
990 | bobon_4_07.npy
991 | leon_4_02.npy
992 | bug_5_03.npy
993 | amy_10_04.npy
994 | geniusturtle_4_10.npy
995 | abjones_3_14.npy
996 | geniusturtle_4_04.npy
997 | heycat_5_03.npy
998 | ariel_4_07.npy
999 | titon_1_02.npy
1000 | amy_2_02.npy
1001 | AimeeNorwich_Child_MIX.npy
1002 | AlexanderRoss_GoodbyeBolero_MIX.npy
1003 | AlexanderRoss_VelvetCurtain_MIX.npy
1004 | AvaLuna_Waterduct_MIX.npy
1005 | BigTroubles_Phantom_MIX.npy
1006 | DreamersOfTheGhetto_HeavyLove_MIX.npy
1007 | FacesOnFilm_WaitingForGa_MIX.npy
1008 | FamilyBand_Again_MIX.npy
1009 | Handel_TornamiAVagheggiar_MIX.npy
1010 | HeladoNegro_MitadDelMundo_MIX.npy
1011 | HopAlong_SisterCities_MIX.npy
1012 | LizNelson_Coldwar_MIX.npy
1013 | LizNelson_ImComingHome_MIX.npy
1014 | LizNelson_Rainfall_MIX.npy
1015 | Meaxic_TakeAStep_MIX.npy
1016 | Meaxic_YouListen_MIX.npy
1017 | MusicDelta_80sRock_MIX.npy
1018 | MusicDelta_Beatles_MIX.npy
1019 | MusicDelta_Britpop_MIX.npy
1020 | MusicDelta_Country1_MIX.npy
1021 | MusicDelta_Country2_MIX.npy
1022 | MusicDelta_Disco_MIX.npy
1023 | MusicDelta_Grunge_MIX.npy
1024 | MusicDelta_Hendrix_MIX.npy
1025 | MusicDelta_Punk_MIX.npy
1026 | MusicDelta_Reggae_MIX.npy
1027 | MusicDelta_Rock_MIX.npy
1028 | MusicDelta_Rockabilly_MIX.npy
1029 | PurlingHiss_Lolita_MIX.npy
1030 | StevenClark_Bounty_MIX.npy
1031 | SweetLights_YouLetMeDown_MIX.npy
1032 | TheDistricts_Vermont_MIX.npy
1033 | TheScarletBrand_LesFleursDuMal_MIX.npy
1034 | TheSoSoGlos_Emergency_MIX.npy
1035 | Wolf_DieBekherte_MIX.npy
--------------------------------------------------------------------------------
/data/train_data_extra.txt:
--------------------------------------------------------------------------------
1 | tammy_1_07.npy
2 | tammy_1_07_vocal_only.npy
3 | ariel_3_03.npy
4 | ariel_3_03_vocal_only.npy
5 | heycat_2_07.npy
6 | heycat_2_07_vocal_only.npy
7 | amy_5_06.npy
8 | amy_5_06_vocal_only.npy
9 | geniusturtle_4_09.npy
10 | geniusturtle_4_09_vocal_only.npy
11 | abjones_4_04.npy
12 | abjones_4_04_vocal_only.npy
13 | Ani_1_06.npy
14 | Ani_1_06_vocal_only.npy
15 | bug_2_07.npy
16 | bug_2_07_vocal_only.npy
17 | leon_3_12.npy
18 | leon_3_12_vocal_only.npy
19 | leon_3_06.npy
20 | leon_3_06_vocal_only.npy
21 | bobon_3_03.npy
22 | bobon_3_03_vocal_only.npy
23 | jmzen_4_06.npy
24 | jmzen_4_06_vocal_only.npy
25 | davidson_4_05.npy
26 | davidson_4_05_vocal_only.npy
27 | fdps_1_12.npy
28 | fdps_1_12_vocal_only.npy
29 | khair_1_07.npy
30 | khair_1_07_vocal_only.npy
31 | fdps_1_06.npy
32 | fdps_1_06_vocal_only.npy
33 | leon_1_03.npy
34 | leon_1_03_vocal_only.npy
35 | bobon_1_06.npy
36 | bobon_1_06_vocal_only.npy
37 | davidson_1_09.npy
38 | davidson_1_09_vocal_only.npy
39 | yifen_5_05.npy
40 | yifen_5_05_vocal_only.npy
41 | yifen_5_11.npy
42 | yifen_5_11_vocal_only.npy
43 | khair_3_02.npy
44 | khair_3_02_vocal_only.npy
45 | fdps_3_03.npy
46 | fdps_3_03_vocal_only.npy
47 | ariel_1_06.npy
48 | ariel_1_06_vocal_only.npy
49 | titon_4_03.npy
50 | titon_4_03_vocal_only.npy
51 | amy_7_03.npy
52 | amy_7_03_vocal_only.npy
53 | amy_9_06.npy
54 | amy_9_06_vocal_only.npy
55 | geniusturtle_1_05.npy
56 | geniusturtle_1_05_vocal_only.npy
57 | Ani_3_03.npy
58 | Ani_3_03_vocal_only.npy
59 | amy_15_11.npy
60 | amy_15_11_vocal_only.npy
61 | amy_15_05.npy
62 | amy_15_05_vocal_only.npy
63 | annar_2_02.npy
64 | annar_2_02_vocal_only.npy
65 | stool_2_01.npy
66 | stool_2_01_vocal_only.npy
67 | stool_5_08.npy
68 | stool_5_08_vocal_only.npy
69 | annar_2_03.npy
70 | annar_2_03_vocal_only.npy
71 | amy_15_04.npy
72 | amy_15_04_vocal_only.npy
73 | Ani_3_02.npy
74 | Ani_3_02_vocal_only.npy
75 | Kenshin_5_01.npy
76 | Kenshin_5_01_vocal_only.npy
77 | geniusturtle_1_04.npy
78 | geniusturtle_1_04_vocal_only.npy
79 | amy_9_07.npy
80 | amy_9_07_vocal_only.npy
81 | titon_4_02.npy
82 | titon_4_02_vocal_only.npy
83 | amy_7_02.npy
84 | amy_7_02_vocal_only.npy
85 | geniusturtle_8_08.npy
86 | geniusturtle_8_08_vocal_only.npy
87 | ariel_1_07.npy
88 | ariel_1_07_vocal_only.npy
89 | yifen_5_10.npy
90 | yifen_5_10_vocal_only.npy
91 | fdps_3_02.npy
92 | fdps_3_02_vocal_only.npy
93 | khair_3_03.npy
94 | khair_3_03_vocal_only.npy
95 | davidson_1_08.npy
96 | davidson_1_08_vocal_only.npy
97 | yifen_5_04.npy
98 | yifen_5_04_vocal_only.npy
99 | bobon_1_07.npy
100 | bobon_1_07_vocal_only.npy
101 | leon_1_02.npy
102 | leon_1_02_vocal_only.npy
103 | fdps_1_07.npy
104 | fdps_1_07_vocal_only.npy
105 | khair_1_06.npy
106 | khair_1_06_vocal_only.npy
107 | fdps_1_13.npy
108 | fdps_1_13_vocal_only.npy
109 | davidson_4_04.npy
110 | davidson_4_04_vocal_only.npy
111 | bobon_3_02.npy
112 | bobon_3_02_vocal_only.npy
113 | jmzen_4_07.npy
114 | jmzen_4_07_vocal_only.npy
115 | leon_3_07.npy
116 | leon_3_07_vocal_only.npy
117 | leon_3_13.npy
118 | leon_3_13_vocal_only.npy
119 | bug_2_06.npy
120 | bug_2_06_vocal_only.npy
121 | amy_10_08.npy
122 | amy_10_08_vocal_only.npy
123 | Ani_1_07.npy
124 | Ani_1_07_vocal_only.npy
125 | geniusturtle_4_08.npy
126 | geniusturtle_4_08_vocal_only.npy
127 | abjones_4_05.npy
128 | abjones_4_05_vocal_only.npy
129 | geniusturtle_3_01.npy
130 | geniusturtle_3_01_vocal_only.npy
131 | amy_5_07.npy
132 | amy_5_07_vocal_only.npy
133 | ariel_3_02.npy
134 | ariel_3_02_vocal_only.npy
135 | tammy_1_06.npy
136 | tammy_1_06_vocal_only.npy
137 | heycat_2_06.npy
138 | heycat_2_06_vocal_only.npy
139 | amy_5_05.npy
140 | amy_5_05_vocal_only.npy
141 | heycat_2_04.npy
142 | heycat_2_04_vocal_only.npy
143 | tammy_1_04.npy
144 | tammy_1_04_vocal_only.npy
145 | geniusturtle_3_03.npy
146 | geniusturtle_3_03_vocal_only.npy
147 | abjones_4_07.npy
148 | abjones_4_07_vocal_only.npy
149 | Ani_1_05.npy
150 | Ani_1_05_vocal_only.npy
151 | bug_2_04.npy
152 | bug_2_04_vocal_only.npy
153 | leon_3_05.npy
154 | leon_3_05_vocal_only.npy
155 | leon_3_11.npy
156 | leon_3_11_vocal_only.npy
157 | bobon_4_09.npy
158 | bobon_4_09_vocal_only.npy
159 | jmzen_4_05.npy
160 | jmzen_4_05_vocal_only.npy
161 | khair_1_04.npy
162 | khair_1_04_vocal_only.npy
163 | fdps_1_05.npy
164 | fdps_1_05_vocal_only.npy
165 | davidson_4_06.npy
166 | davidson_4_06_vocal_only.npy
167 | fdps_1_11.npy
168 | fdps_1_11_vocal_only.npy
169 | leon_6_09.npy
170 | leon_6_09_vocal_only.npy
171 | jmzen_1_09.npy
172 | jmzen_1_09_vocal_only.npy
173 | bobon_1_05.npy
174 | bobon_1_05_vocal_only.npy
175 | khair_3_01.npy
176 | khair_3_01_vocal_only.npy
177 | khair_4_08.npy
178 | khair_4_08_vocal_only.npy
179 | yifen_5_06.npy
180 | yifen_5_06_vocal_only.npy
181 | ariel_1_05.npy
182 | ariel_1_05_vocal_only.npy
183 | geniusturtle_1_06.npy
184 | geniusturtle_1_06_vocal_only.npy
185 | amy_9_05.npy
186 | amy_9_05_vocal_only.npy
187 | amy_15_06.npy
188 | amy_15_06_vocal_only.npy
189 | amy_15_12.npy
190 | amy_15_12_vocal_only.npy
191 | Ani_4_09.npy
192 | Ani_4_09_vocal_only.npy
193 | Kenshin_5_03.npy
194 | Kenshin_5_03_vocal_only.npy
195 | annar_5_08.npy
196 | annar_5_08_vocal_only.npy
197 | annar_2_01.npy
198 | annar_2_01_vocal_only.npy
199 | stool_2_02.npy
200 | stool_2_02_vocal_only.npy
201 | annar_5_09.npy
202 | annar_5_09_vocal_only.npy
203 | stool_2_03.npy
204 | stool_2_03_vocal_only.npy
205 | Ani_4_08.npy
206 | Ani_4_08_vocal_only.npy
207 | amy_15_13.npy
208 | amy_15_13_vocal_only.npy
209 | Ani_3_01.npy
210 | Ani_3_01_vocal_only.npy
211 | Kenshin_5_02.npy
212 | Kenshin_5_02_vocal_only.npy
213 | amy_15_07.npy
214 | amy_15_07_vocal_only.npy
215 | amy_9_04.npy
216 | amy_9_04_vocal_only.npy
217 | geniusturtle_1_07.npy
218 | geniusturtle_1_07_vocal_only.npy
219 | ariel_1_04.npy
220 | ariel_1_04_vocal_only.npy
221 | amy_7_01.npy
222 | amy_7_01_vocal_only.npy
223 | titon_3_08.npy
224 | titon_3_08_vocal_only.npy
225 | titon_4_01.npy
226 | titon_4_01_vocal_only.npy
227 | yifen_5_07.npy
228 | yifen_5_07_vocal_only.npy
229 | fdps_3_01.npy
230 | fdps_3_01_vocal_only.npy
231 | bobon_1_04.npy
232 | bobon_1_04_vocal_only.npy
233 | jmzen_1_08.npy
234 | jmzen_1_08_vocal_only.npy
235 | bobon_1_10.npy
236 | bobon_1_10_vocal_only.npy
237 | leon_1_01.npy
238 | leon_1_01_vocal_only.npy
239 | leon_6_08.npy
240 | leon_6_08_vocal_only.npy
241 | fdps_1_10.npy
242 | fdps_1_10_vocal_only.npy
243 | davidson_4_07.npy
244 | davidson_4_07_vocal_only.npy
245 | fdps_1_04.npy
246 | fdps_1_04_vocal_only.npy
247 | khair_1_05.npy
248 | khair_1_05_vocal_only.npy
249 | jmzen_4_04.npy
250 | jmzen_4_04_vocal_only.npy
251 | bobon_4_08.npy
252 | bobon_4_08_vocal_only.npy
253 | bobon_3_01.npy
254 | bobon_3_01_vocal_only.npy
255 | jmzen_4_10.npy
256 | jmzen_4_10_vocal_only.npy
257 | leon_3_10.npy
258 | leon_3_10_vocal_only.npy
259 | leon_3_04.npy
260 | leon_3_04_vocal_only.npy
261 | bug_2_05.npy
262 | bug_2_05_vocal_only.npy
263 | Ani_1_04.npy
264 | Ani_1_04_vocal_only.npy
265 | abjones_4_06.npy
266 | abjones_4_06_vocal_only.npy
267 | geniusturtle_3_02.npy
268 | geniusturtle_3_02_vocal_only.npy
269 | ariel_4_08.npy
270 | ariel_4_08_vocal_only.npy
271 | heycat_2_05.npy
272 | heycat_2_05_vocal_only.npy
273 | ariel_3_01.npy
274 | ariel_3_01_vocal_only.npy
275 | tammy_1_05.npy
276 | tammy_1_05_vocal_only.npy
277 | amy_5_04.npy
278 | amy_5_04_vocal_only.npy
279 | geniusturtle_3_06.npy
280 | geniusturtle_3_06_vocal_only.npy
281 | abjones_4_02.npy
282 | abjones_4_02_vocal_only.npy
283 | ariel_3_05.npy
284 | ariel_3_05_vocal_only.npy
285 | tammy_1_01.npy
286 | tammy_1_01_vocal_only.npy
287 | heycat_5_08.npy
288 | heycat_5_08_vocal_only.npy
289 | heycat_2_01.npy
290 | heycat_2_01_vocal_only.npy
291 | bug_5_08.npy
292 | bug_5_08_vocal_only.npy
293 | bug_2_01.npy
294 | bug_2_01_vocal_only.npy
295 | jmzen_3_09.npy
296 | jmzen_3_09_vocal_only.npy
297 | bobon_3_05.npy
298 | bobon_3_05_vocal_only.npy
299 | khair_1_01.npy
300 | khair_1_01_vocal_only.npy
301 | fdps_1_14.npy
302 | fdps_1_14_vocal_only.npy
303 | davidson_4_03.npy
304 | davidson_4_03_vocal_only.npy
305 | leon_8_09.npy
306 | leon_8_09_vocal_only.npy
307 | leon_1_05.npy
308 | leon_1_05_vocal_only.npy
309 | leon_1_11.npy
310 | leon_1_11_vocal_only.npy
311 | khair_3_04.npy
312 | khair_3_04_vocal_only.npy
313 | fdps_3_05.npy
314 | fdps_3_05_vocal_only.npy
315 | yifen_5_03.npy
316 | yifen_5_03_vocal_only.npy
317 | geniusturtle_1_03.npy
318 | geniusturtle_1_03_vocal_only.npy
319 | titon_4_05.npy
320 | titon_4_05_vocal_only.npy
321 | amy_7_05.npy
322 | amy_7_05_vocal_only.npy
323 | titon_4_11.npy
324 | titon_4_11_vocal_only.npy
325 | stool_2_07.npy
326 | stool_2_07_vocal_only.npy
327 | annar_2_04.npy
328 | annar_2_04_vocal_only.npy
329 | amy_15_03.npy
330 | amy_15_03_vocal_only.npy
331 | Kenshin_5_12.npy
332 | Kenshin_5_12_vocal_only.npy
333 | Kenshin_5_06.npy
334 | Kenshin_5_06_vocal_only.npy
335 | Ani_3_05.npy
336 | Ani_3_05_vocal_only.npy
337 | Ani_3_04.npy
338 | Ani_3_04_vocal_only.npy
339 | Kenshin_5_07.npy
340 | Kenshin_5_07_vocal_only.npy
341 | amy_15_02.npy
342 | amy_15_02_vocal_only.npy
343 | Kenshin_5_13.npy
344 | Kenshin_5_13_vocal_only.npy
345 | annar_2_05.npy
346 | annar_2_05_vocal_only.npy
347 | stool_2_06.npy
348 | stool_2_06_vocal_only.npy
349 | titon_4_10.npy
350 | titon_4_10_vocal_only.npy
351 | ariel_1_01.npy
352 | ariel_1_01_vocal_only.npy
353 | titon_4_04.npy
354 | titon_4_04_vocal_only.npy
355 | amy_7_04.npy
356 | amy_7_04_vocal_only.npy
357 | amy_9_01.npy
358 | amy_9_01_vocal_only.npy
359 | geniusturtle_1_02.npy
360 | geniusturtle_1_02_vocal_only.npy
361 | yifen_5_02.npy
362 | yifen_5_02_vocal_only.npy
363 | fdps_3_04.npy
364 | fdps_3_04_vocal_only.npy
365 | khair_3_05.npy
366 | khair_3_05_vocal_only.npy
367 | leon_1_10.npy
368 | leon_1_10_vocal_only.npy
369 | leon_1_04.npy
370 | leon_1_04_vocal_only.npy
371 | bobon_1_01.npy
372 | bobon_1_01_vocal_only.npy
373 | leon_8_08.npy
374 | leon_8_08_vocal_only.npy
375 | davidson_4_02.npy
376 | davidson_4_02_vocal_only.npy
377 | fdps_1_01.npy
378 | fdps_1_01_vocal_only.npy
379 | leon_3_01.npy
380 | leon_3_01_vocal_only.npy
381 | leon_4_08.npy
382 | leon_4_08_vocal_only.npy
383 | bobon_3_04.npy
384 | bobon_3_04_vocal_only.npy
385 | jmzen_3_08.npy
386 | jmzen_3_08_vocal_only.npy
387 | jmzen_4_01.npy
388 | jmzen_4_01_vocal_only.npy
389 | bobon_3_10.npy
390 | bobon_3_10_vocal_only.npy
391 | Ani_1_01.npy
392 | Ani_1_01_vocal_only.npy
393 | bug_5_09.npy
394 | bug_5_09_vocal_only.npy
395 | ariel_3_04.npy
396 | ariel_3_04_vocal_only.npy
397 | titon_1_08.npy
398 | titon_1_08_vocal_only.npy
399 | amy_5_01.npy
400 | amy_5_01_vocal_only.npy
401 | abjones_4_03.npy
402 | abjones_4_03_vocal_only.npy
403 | geniusturtle_3_07.npy
404 | geniusturtle_3_07_vocal_only.npy
405 | geniusturtle_3_05.npy
406 | geniusturtle_3_05_vocal_only.npy
407 | abjones_4_01.npy
408 | abjones_4_01_vocal_only.npy
409 | heycat_2_02.npy
410 | heycat_2_02_vocal_only.npy
411 | ariel_3_06.npy
412 | ariel_3_06_vocal_only.npy
413 | tammy_1_02.npy
414 | tammy_1_02_vocal_only.npy
415 | amy_5_03.npy
416 | amy_5_03_vocal_only.npy
417 | bug_2_02.npy
418 | bug_2_02_vocal_only.npy
419 | Ani_1_03.npy
420 | Ani_1_03_vocal_only.npy
421 | jmzen_4_03.npy
422 | jmzen_4_03_vocal_only.npy
423 | bobon_3_06.npy
424 | bobon_3_06_vocal_only.npy
425 | leon_3_03.npy
426 | leon_3_03_vocal_only.npy
427 | davidson_3_09.npy
428 | davidson_3_09_vocal_only.npy
429 | khair_1_02.npy
430 | khair_1_02_vocal_only.npy
431 | fdps_1_03.npy
432 | fdps_1_03_vocal_only.npy
433 | bobon_1_03.npy
434 | bobon_1_03_vocal_only.npy
435 | leon_1_12.npy
436 | leon_1_12_vocal_only.npy
437 | leon_1_06.npy
438 | leon_1_06_vocal_only.npy
439 | yifen_2_09.npy
440 | yifen_2_09_vocal_only.npy
441 | khair_3_07.npy
442 | khair_3_07_vocal_only.npy
443 | fdps_3_06.npy
444 | fdps_3_06_vocal_only.npy
445 | amy_9_03.npy
446 | amy_9_03_vocal_only.npy
447 | ariel_1_03.npy
448 | ariel_1_03_vocal_only.npy
449 | amy_7_06.npy
450 | amy_7_06_vocal_only.npy
451 | titon_4_06.npy
452 | titon_4_06_vocal_only.npy
453 | stool_2_04.npy
454 | stool_2_04_vocal_only.npy
455 | annar_2_07.npy
456 | annar_2_07_vocal_only.npy
457 | Kenshin_5_05.npy
458 | Kenshin_5_05_vocal_only.npy
459 | Ani_3_06.npy
460 | Ani_3_06_vocal_only.npy
461 | Kenshin_5_11.npy
462 | Kenshin_5_11_vocal_only.npy
463 | Kenshin_5_10.npy
464 | Kenshin_5_10_vocal_only.npy
465 | amy_15_01.npy
466 | amy_15_01_vocal_only.npy
467 | Kenshin_5_04.npy
468 | Kenshin_5_04_vocal_only.npy
469 | annar_2_06.npy
470 | annar_2_06_vocal_only.npy
471 | stool_2_05.npy
472 | stool_2_05_vocal_only.npy
473 | amy_7_07.npy
474 | amy_7_07_vocal_only.npy
475 | titon_4_07.npy
476 | titon_4_07_vocal_only.npy
477 | ariel_1_02.npy
478 | ariel_1_02_vocal_only.npy
479 | geniusturtle_1_01.npy
480 | geniusturtle_1_01_vocal_only.npy
481 | amy_9_02.npy
482 | amy_9_02_vocal_only.npy
483 | fdps_3_07.npy
484 | fdps_3_07_vocal_only.npy
485 | khair_3_06.npy
486 | khair_3_06_vocal_only.npy
487 | yifen_5_01.npy
488 | yifen_5_01_vocal_only.npy
489 | yifen_2_08.npy
490 | yifen_2_08_vocal_only.npy
491 | leon_1_07.npy
492 | leon_1_07_vocal_only.npy
493 | bobon_1_02.npy
494 | bobon_1_02_vocal_only.npy
495 | fdps_1_02.npy
496 | fdps_1_02_vocal_only.npy
497 | khair_1_03.npy
498 | khair_1_03_vocal_only.npy
499 | davidson_4_01.npy
500 | davidson_4_01_vocal_only.npy
501 | davidson_3_08.npy
502 | davidson_3_08_vocal_only.npy
503 | leon_3_02.npy
504 | leon_3_02_vocal_only.npy
505 | jmzen_4_02.npy
506 | jmzen_4_02_vocal_only.npy
507 | bobon_3_07.npy
508 | bobon_3_07_vocal_only.npy
509 | Ani_1_02.npy
510 | Ani_1_02_vocal_only.npy
511 | bug_2_03.npy
512 | bug_2_03_vocal_only.npy
513 | amy_5_02.npy
514 | amy_5_02_vocal_only.npy
515 | heycat_2_03.npy
516 | heycat_2_03_vocal_only.npy
517 | tammy_1_03.npy
518 | tammy_1_03_vocal_only.npy
519 | ariel_3_07.npy
520 | ariel_3_07_vocal_only.npy
521 | geniusturtle_3_04.npy
522 | geniusturtle_3_04_vocal_only.npy
523 | abjones_3_09.npy
524 | abjones_3_09_vocal_only.npy
525 | leon_7_01.npy
526 | leon_7_01_vocal_only.npy
527 | leon_9_04.npy
528 | leon_9_04_vocal_only.npy
529 | yifen_3_07.npy
530 | yifen_3_07_vocal_only.npy
531 | fdps_5_01.npy
532 | fdps_5_01_vocal_only.npy
533 | fdps_2_08.npy
534 | fdps_2_08_vocal_only.npy
535 | amy_1_01.npy
536 | amy_1_01_vocal_only.npy
537 | titon_5_08.npy
538 | titon_5_08_vocal_only.npy
539 | amy_6_08.npy
540 | amy_6_08_vocal_only.npy
541 | titon_2_01.npy
542 | titon_2_01_vocal_only.npy
543 | geniusturtle_7_13.npy
544 | geniusturtle_7_13_vocal_only.npy
545 | geniusturtle_7_07.npy
546 | geniusturtle_7_07_vocal_only.npy
547 | Kenshin_3_02.npy
548 | Kenshin_3_02_vocal_only.npy
549 | Ani_5_01.npy
550 | Ani_5_01_vocal_only.npy
551 | amy_13_07.npy
552 | amy_13_07_vocal_only.npy
553 | bug_1_09.npy
554 | bug_1_09_vocal_only.npy
555 | stool_4_03.npy
556 | stool_4_03_vocal_only.npy
557 | ariel_2_08.npy
558 | ariel_2_08_vocal_only.npy
559 | heycat_4_05.npy
560 | heycat_4_05_vocal_only.npy
561 | ariel_5_01.npy
562 | ariel_5_01_vocal_only.npy
563 | amy_3_04.npy
564 | amy_3_04_vocal_only.npy
565 | abjones_2_12.npy
566 | abjones_2_12_vocal_only.npy
567 | geniusturtle_5_02.npy
568 | geniusturtle_5_02_vocal_only.npy
569 | abjones_2_06.npy
570 | abjones_2_06_vocal_only.npy
571 | Kenshin_1_07.npy
572 | Kenshin_1_07_vocal_only.npy
573 | amy_11_02.npy
574 | amy_11_02_vocal_only.npy
575 | leon_5_10.npy
576 | leon_5_10_vocal_only.npy
577 | leon_5_04.npy
578 | leon_5_04_vocal_only.npy
579 | bobon_2_08.npy
580 | bobon_2_08_vocal_only.npy
581 | jmzen_2_04.npy
582 | jmzen_2_04_vocal_only.npy
583 | bobon_5_01.npy
584 | bobon_5_01_vocal_only.npy
585 | jmzen_2_10.npy
586 | jmzen_2_10_vocal_only.npy
587 | davidson_2_07.npy
588 | davidson_2_07_vocal_only.npy
589 | yifen_1_02.npy
590 | yifen_1_02_vocal_only.npy
591 | yifen_1_16.npy
592 | yifen_1_16_vocal_only.npy
593 | davidson_2_06.npy
594 | davidson_2_06_vocal_only.npy
595 | yifen_1_03.npy
596 | yifen_1_03_vocal_only.npy
597 | jmzen_2_11.npy
598 | jmzen_2_11_vocal_only.npy
599 | leon_5_05.npy
600 | leon_5_05_vocal_only.npy
601 | leon_5_11.npy
602 | leon_5_11_vocal_only.npy
603 | amy_11_03.npy
604 | amy_11_03_vocal_only.npy
605 | Kenshin_1_06.npy
606 | Kenshin_1_06_vocal_only.npy
607 | abjones_2_07.npy
608 | abjones_2_07_vocal_only.npy
609 | geniusturtle_5_03.npy
610 | geniusturtle_5_03_vocal_only.npy
611 | amy_3_05.npy
612 | amy_3_05_vocal_only.npy
613 | heycat_4_04.npy
614 | heycat_4_04_vocal_only.npy
615 | ariel_2_09.npy
616 | ariel_2_09_vocal_only.npy
617 | bug_1_08.npy
618 | bug_1_08_vocal_only.npy
619 | annar_3_08.npy
620 | annar_3_08_vocal_only.npy
621 | annar_4_01.npy
622 | annar_4_01_vocal_only.npy
623 | stool_4_02.npy
624 | stool_4_02_vocal_only.npy
625 | amy_13_06.npy
626 | amy_13_06_vocal_only.npy
627 | Kenshin_3_03.npy
628 | Kenshin_3_03_vocal_only.npy
629 | geniusturtle_7_06.npy
630 | geniusturtle_7_06_vocal_only.npy
631 | geniusturtle_7_12.npy
632 | geniusturtle_7_12_vocal_only.npy
633 | titon_5_09.npy
634 | titon_5_09_vocal_only.npy
635 | amy_6_09.npy
636 | amy_6_09_vocal_only.npy
637 | heycat_1_08.npy
638 | heycat_1_08_vocal_only.npy
639 | khair_5_01.npy
640 | khair_5_01_vocal_only.npy
641 | yifen_3_12.npy
642 | yifen_3_12_vocal_only.npy
643 | fdps_2_09.npy
644 | fdps_2_09_vocal_only.npy
645 | yifen_3_06.npy
646 | yifen_3_06_vocal_only.npy
647 | leon_9_05.npy
648 | leon_9_05_vocal_only.npy
649 | leon_7_02.npy
650 | leon_7_02_vocal_only.npy
651 | yifen_3_10.npy
652 | yifen_3_10_vocal_only.npy
653 | fdps_5_02.npy
654 | fdps_5_02_vocal_only.npy
655 | khair_5_03.npy
656 | khair_5_03_vocal_only.npy
657 | yifen_3_04.npy
658 | yifen_3_04_vocal_only.npy
659 | titon_2_02.npy
660 | titon_2_02_vocal_only.npy
661 | amy_1_02.npy
662 | amy_1_02_vocal_only.npy
663 | geniusturtle_7_04.npy
664 | geniusturtle_7_04_vocal_only.npy
665 | geniusturtle_7_10.npy
666 | geniusturtle_7_10_vocal_only.npy
667 | amy_13_04.npy
668 | amy_13_04_vocal_only.npy
669 | Kenshin_3_01.npy
670 | Kenshin_3_01_vocal_only.npy
671 | Ani_5_02.npy
672 | Ani_5_02_vocal_only.npy
673 | Kenshin_4_08.npy
674 | Kenshin_4_08_vocal_only.npy
675 | annar_4_03.npy
676 | annar_4_03_vocal_only.npy
677 | stool_3_09.npy
678 | stool_3_09_vocal_only.npy
679 | amy_3_07.npy
680 | amy_3_07_vocal_only.npy
681 | ariel_5_02.npy
682 | ariel_5_02_vocal_only.npy
683 | heycat_4_06.npy
684 | heycat_4_06_vocal_only.npy
685 | geniusturtle_2_08.npy
686 | geniusturtle_2_08_vocal_only.npy
687 | geniusturtle_5_01.npy
688 | geniusturtle_5_01_vocal_only.npy
689 | abjones_2_05.npy
690 | abjones_2_05_vocal_only.npy
691 | abjones_2_11.npy
692 | abjones_2_11_vocal_only.npy
693 | Kenshin_1_10.npy
694 | Kenshin_1_10_vocal_only.npy
695 | amy_16_08.npy
696 | amy_16_08_vocal_only.npy
697 | amy_11_01.npy
698 | amy_11_01_vocal_only.npy
699 | Kenshin_1_04.npy
700 | Kenshin_1_04_vocal_only.npy
701 | leon_5_07.npy
702 | leon_5_07_vocal_only.npy
703 | bobon_5_02.npy
704 | bobon_5_02_vocal_only.npy
705 | jmzen_2_07.npy
706 | jmzen_2_07_vocal_only.npy
707 | davidson_2_10.npy
708 | davidson_2_10_vocal_only.npy
709 | yifen_1_15.npy
710 | yifen_1_15_vocal_only.npy
711 | yifen_1_01.npy
712 | yifen_1_01_vocal_only.npy
713 | davidson_2_04.npy
714 | davidson_2_04_vocal_only.npy
715 | davidson_2_05.npy
716 | davidson_2_05_vocal_only.npy
717 | yifen_1_14.npy
718 | yifen_1_14_vocal_only.npy
719 | bobon_5_03.npy
720 | bobon_5_03_vocal_only.npy
721 | jmzen_2_06.npy
722 | jmzen_2_06_vocal_only.npy
723 | jmzen_2_12.npy
724 | jmzen_2_12_vocal_only.npy
725 | leon_5_12.npy
726 | leon_5_12_vocal_only.npy
727 | leon_5_06.npy
728 | leon_5_06_vocal_only.npy
729 | Kenshin_1_05.npy
730 | Kenshin_1_05_vocal_only.npy
731 | Kenshin_1_11.npy
732 | Kenshin_1_11_vocal_only.npy
733 | abjones_2_10.npy
734 | abjones_2_10_vocal_only.npy
735 | abjones_2_04.npy
736 | abjones_2_04_vocal_only.npy
737 | ariel_5_03.npy
738 | ariel_5_03_vocal_only.npy
739 | heycat_4_07.npy
740 | heycat_4_07_vocal_only.npy
741 | amy_3_06.npy
742 | amy_3_06_vocal_only.npy
743 | annar_4_02.npy
744 | annar_4_02_vocal_only.npy
745 | stool_4_01.npy
746 | stool_4_01_vocal_only.npy
747 | stool_3_08.npy
748 | stool_3_08_vocal_only.npy
749 | Ani_5_03.npy
750 | Ani_5_03_vocal_only.npy
751 | Kenshin_4_09.npy
752 | Kenshin_4_09_vocal_only.npy
753 | amy_13_05.npy
754 | amy_13_05_vocal_only.npy
755 | geniusturtle_7_11.npy
756 | geniusturtle_7_11_vocal_only.npy
757 | geniusturtle_7_05.npy
758 | geniusturtle_7_05_vocal_only.npy
759 | titon_2_03.npy
760 | titon_2_03_vocal_only.npy
761 | amy_1_03.npy
762 | amy_1_03_vocal_only.npy
763 | yifen_3_05.npy
764 | yifen_3_05_vocal_only.npy
765 | yifen_3_11.npy
766 | yifen_3_11_vocal_only.npy
767 | khair_5_02.npy
768 | khair_5_02_vocal_only.npy
769 | fdps_5_03.npy
770 | fdps_5_03_vocal_only.npy
771 | leon_9_06.npy
772 | leon_9_06_vocal_only.npy
773 | leon_7_03.npy
774 | leon_7_03_vocal_only.npy
775 | leon_9_02.npy
776 | leon_9_02_vocal_only.npy
777 | leon_7_07.npy
778 | leon_7_07_vocal_only.npy
779 | leon_7_13.npy
780 | leon_7_13_vocal_only.npy
781 | fdps_5_07.npy
782 | fdps_5_07_vocal_only.npy
783 | khair_5_06.npy
784 | khair_5_06_vocal_only.npy
785 | yifen_3_01.npy
786 | yifen_3_01_vocal_only.npy
787 | yifen_4_08.npy
788 | yifen_4_08_vocal_only.npy
789 | geniusturtle_7_01.npy
790 | geniusturtle_7_01_vocal_only.npy
791 | geniusturtle_7_15.npy
792 | geniusturtle_7_15_vocal_only.npy
793 | amy_1_07.npy
794 | amy_1_07_vocal_only.npy
795 | titon_2_07.npy
796 | titon_2_07_vocal_only.npy
797 | annar_4_06.npy
798 | annar_4_06_vocal_only.npy
799 | stool_4_05.npy
800 | stool_4_05_vocal_only.npy
801 | amy_13_01.npy
802 | amy_13_01_vocal_only.npy
803 | Kenshin_3_04.npy
804 | Kenshin_3_04_vocal_only.npy
805 | Ani_5_07.npy
806 | Ani_5_07_vocal_only.npy
807 | geniusturtle_5_04.npy
808 | geniusturtle_5_04_vocal_only.npy
809 | abjones_5_09.npy
810 | abjones_5_09_vocal_only.npy
811 | amy_3_02.npy
812 | amy_3_02_vocal_only.npy
813 | heycat_4_03.npy
814 | heycat_4_03_vocal_only.npy
815 | ariel_5_07.npy
816 | ariel_5_07_vocal_only.npy
817 | bug_4_03.npy
818 | bug_4_03_vocal_only.npy
819 | stool_1_09.npy
820 | stool_1_09_vocal_only.npy
821 | amy_11_04.npy
822 | amy_11_04_vocal_only.npy
823 | Kenshin_1_01.npy
824 | Kenshin_1_01_vocal_only.npy
825 | jmzen_2_02.npy
826 | jmzen_2_02_vocal_only.npy
827 | bobon_5_07.npy
828 | bobon_5_07_vocal_only.npy
829 | leon_5_02.npy
830 | leon_5_02_vocal_only.npy
831 | yifen_1_10.npy
832 | yifen_1_10_vocal_only.npy
833 | davidson_2_01.npy
834 | davidson_2_01_vocal_only.npy
835 | davidson_5_08.npy
836 | davidson_5_08_vocal_only.npy
837 | yifen_1_04.npy
838 | yifen_1_04_vocal_only.npy
839 | davidson_5_09.npy
840 | davidson_5_09_vocal_only.npy
841 | yifen_1_05.npy
842 | yifen_1_05_vocal_only.npy
843 | yifen_1_11.npy
844 | yifen_1_11_vocal_only.npy
845 | leon_5_03.npy
846 | leon_5_03_vocal_only.npy
847 | jmzen_2_03.npy
848 | jmzen_2_03_vocal_only.npy
849 | bobon_5_06.npy
850 | bobon_5_06_vocal_only.npy
851 | bobon_5_12.npy
852 | bobon_5_12_vocal_only.npy
853 | amy_11_05.npy
854 | amy_11_05_vocal_only.npy
855 | bug_4_02.npy
856 | bug_4_02_vocal_only.npy
857 | stool_1_08.npy
858 | stool_1_08_vocal_only.npy
859 | heycat_4_02.npy
860 | heycat_4_02_vocal_only.npy
861 | ariel_5_06.npy
862 | ariel_5_06_vocal_only.npy
863 | amy_3_03.npy
864 | amy_3_03_vocal_only.npy
865 | abjones_2_01.npy
866 | abjones_2_01_vocal_only.npy
867 | abjones_5_08.npy
868 | abjones_5_08_vocal_only.npy
869 | Ani_5_06.npy
870 | Ani_5_06_vocal_only.npy
871 | Kenshin_3_05.npy
872 | Kenshin_3_05_vocal_only.npy
873 | stool_4_10.npy
874 | stool_4_10_vocal_only.npy
875 | stool_4_04.npy
876 | stool_4_04_vocal_only.npy
877 | annar_4_07.npy
878 | annar_4_07_vocal_only.npy
879 | amy_1_06.npy
880 | amy_1_06_vocal_only.npy
881 | titon_2_06.npy
882 | titon_2_06_vocal_only.npy
883 | geniusturtle_7_14.npy
884 | geniusturtle_7_14_vocal_only.npy
885 | yifen_4_09.npy
886 | yifen_4_09_vocal_only.npy
887 | khair_5_07.npy
888 | khair_5_07_vocal_only.npy
889 | fdps_5_06.npy
890 | fdps_5_06_vocal_only.npy
891 | leon_7_12.npy
892 | leon_7_12_vocal_only.npy
893 | leon_7_06.npy
894 | leon_7_06_vocal_only.npy
895 | leon_9_03.npy
896 | leon_9_03_vocal_only.npy
897 | leon_9_01.npy
898 | leon_9_01_vocal_only.npy
899 | leon_7_10.npy
900 | leon_7_10_vocal_only.npy
901 | leon_7_04.npy
902 | leon_7_04_vocal_only.npy
903 | yifen_3_02.npy
904 | yifen_3_02_vocal_only.npy
905 | fdps_5_04.npy
906 | fdps_5_04_vocal_only.npy
907 | khair_5_05.npy
908 | khair_5_05_vocal_only.npy
909 | amy_8_08.npy
910 | amy_8_08_vocal_only.npy
911 | geniusturtle_7_02.npy
912 | geniusturtle_7_02_vocal_only.npy
913 | titon_2_04.npy
914 | titon_2_04_vocal_only.npy
915 | amy_1_04.npy
916 | amy_1_04_vocal_only.npy
917 | annar_4_05.npy
918 | annar_4_05_vocal_only.npy
919 | stool_4_06.npy
920 | stool_4_06_vocal_only.npy
921 | Kenshin_3_07.npy
922 | Kenshin_3_07_vocal_only.npy
923 | Ani_5_04.npy
924 | Ani_5_04_vocal_only.npy
925 | amy_13_02.npy
926 | amy_13_02_vocal_only.npy
927 | abjones_2_03.npy
928 | abjones_2_03_vocal_only.npy
929 | ariel_5_04.npy
930 | ariel_5_04_vocal_only.npy
931 | amy_3_01.npy
932 | amy_3_01_vocal_only.npy
933 | amy_4_08.npy
934 | amy_4_08_vocal_only.npy
935 | bug_3_09.npy
936 | bug_3_09_vocal_only.npy
937 | Kenshin_1_02.npy
938 | Kenshin_1_02_vocal_only.npy
939 | amy_11_07.npy
940 | amy_11_07_vocal_only.npy
941 | jmzen_5_08.npy
942 | jmzen_5_08_vocal_only.npy
943 | bobon_5_04.npy
944 | bobon_5_04_vocal_only.npy
945 | jmzen_2_01.npy
946 | jmzen_2_01_vocal_only.npy
947 | bobon_5_10.npy
948 | bobon_5_10_vocal_only.npy
949 | leon_5_01.npy
950 | leon_5_01_vocal_only.npy
951 | leon_2_08.npy
952 | leon_2_08_vocal_only.npy
953 | yifen_1_07.npy
954 | yifen_1_07_vocal_only.npy
955 | davidson_2_02.npy
956 | davidson_2_02_vocal_only.npy
957 | yifen_1_13.npy
958 | yifen_1_13_vocal_only.npy
959 | yifen_1_12.npy
960 | yifen_1_12_vocal_only.npy
961 | yifen_1_06.npy
962 | yifen_1_06_vocal_only.npy
963 | davidson_2_03.npy
964 | davidson_2_03_vocal_only.npy
965 | leon_2_09.npy
966 | leon_2_09_vocal_only.npy
967 | bobon_5_11.npy
968 | bobon_5_11_vocal_only.npy
969 | bobon_5_05.npy
970 | bobon_5_05_vocal_only.npy
971 | jmzen_5_09.npy
972 | jmzen_5_09_vocal_only.npy
973 | amy_11_06.npy
974 | amy_11_06_vocal_only.npy
975 | Kenshin_1_03.npy
976 | Kenshin_1_03_vocal_only.npy
977 | bug_3_08.npy
978 | bug_3_08_vocal_only.npy
979 | annar_1_08.npy
980 | annar_1_08_vocal_only.npy
981 | bug_4_01.npy
982 | bug_4_01_vocal_only.npy
983 | amy_4_09.npy
984 | amy_4_09_vocal_only.npy
985 | ariel_5_05.npy
986 | ariel_5_05_vocal_only.npy
987 | heycat_3_08.npy
988 | heycat_3_08_vocal_only.npy
989 | heycat_4_01.npy
990 | heycat_4_01_vocal_only.npy
991 | abjones_2_02.npy
992 | abjones_2_02_vocal_only.npy
993 | amy_13_03.npy
994 | amy_13_03_vocal_only.npy
995 | Ani_5_05.npy
996 | Ani_5_05_vocal_only.npy
997 | Kenshin_3_06.npy
998 | Kenshin_3_06_vocal_only.npy
999 | stool_4_07.npy
1000 | stool_4_07_vocal_only.npy
1001 | annar_4_04.npy
1002 | annar_4_04_vocal_only.npy
1003 | titon_2_05.npy
1004 | amy_1_05.npy
1005 | geniusturtle_7_03.npy
1006 | khair_5_04.npy
1007 | fdps_5_05.npy
1008 | yifen_3_03.npy
1009 | leon_7_05.npy
1010 | leon_7_11.npy
1011 | leon_7_08.npy
1012 | fdps_2_01.npy
1013 | fdps_5_08.npy
1014 | yifen_4_07.npy
1015 | amy_8_04.npy
1016 | amy_6_01.npy
1017 | titon_2_08.npy
1018 | titon_5_01.npy
1019 | annar_4_09.npy
1020 | stool_3_03.npy
1021 | Ani_5_08.npy
1022 | Ani_2_01.npy
1023 | Kenshin_4_02.npy
1024 | abjones_5_06.npy
1025 | geniusturtle_2_02.npy
1026 | amy_4_04.npy
1027 | ariel_5_08.npy
1028 | amy_4_10.npy
1029 | heycat_3_05.npy
1030 | ariel_2_01.npy
1031 | bug_3_05.npy
1032 | stool_1_06.npy
1033 | annar_1_05.npy
1034 | amy_16_02.npy
1035 | jmzen_5_04.npy
1036 | bobon_5_08.npy
1037 | bobon_2_01.npy
1038 | leon_2_04.npy
1039 | leon_2_10.npy
1040 | davidson_5_07.npy
1041 | davidson_5_06.npy
1042 | leon_2_11.npy
1043 | leon_2_05.npy
1044 | bobon_5_09.npy
1045 | jmzen_5_05.npy
1046 | amy_16_03.npy
1047 | annar_1_04.npy
1048 | stool_1_07.npy
1049 | bug_3_04.npy
1050 | heycat_3_04.npy
1051 | amy_4_11.npy
1052 | amy_4_05.npy
1053 | abjones_5_07.npy
1054 | Kenshin_4_03.npy
1055 | amy_14_06.npy
1056 | annar_4_08.npy
1057 | annar_3_01.npy
1058 | stool_3_02.npy
1059 | heycat_1_01.npy
1060 | titon_2_09.npy
1061 | amy_8_05.npy
1062 | yifen_4_06.npy
1063 | khair_2_01.npy
1064 | fdps_5_09.npy
1065 | leon_7_09.npy
1066 | yifen_4_04.npy
1067 | yifen_4_10.npy
1068 | fdps_2_02.npy
1069 | khair_2_03.npy
1070 | amy_8_07.npy
1071 | heycat_1_03.npy
1072 | titon_5_02.npy
1073 | amy_6_02.npy
1074 | annar_3_03.npy
1075 | stool_4_09.npy
1076 | Ani_2_02.npy
1077 | Kenshin_4_01.npy
1078 | Kenshin_3_08.npy
1079 | amy_14_04.npy
1080 | abjones_5_05.npy
1081 | geniusturtle_2_01.npy
1082 | ariel_2_02.npy
1083 | heycat_3_06.npy
1084 | amy_4_07.npy
1085 | bug_3_06.npy
1086 | stool_1_05.npy
1087 | annar_1_06.npy
1088 | amy_16_01.npy
1089 | bobon_2_02.npy
1090 | jmzen_5_07.npy
1091 | leon_2_07.npy
1092 | yifen_1_08.npy
1093 | davidson_5_04.npy
1094 | davidson_5_10.npy
1095 | davidson_5_11.npy
1096 | yifen_1_09.npy
1097 | davidson_5_05.npy
1098 | leon_2_06.npy
1099 | bobon_2_03.npy
1100 | jmzen_5_06.npy
1101 | annar_1_07.npy
1102 | stool_1_04.npy
1103 | bug_3_07.npy
1104 | amy_4_06.npy
1105 | ariel_2_03.npy
1106 | heycat_3_07.npy
1107 | abjones_5_04.npy
1108 | amy_14_05.npy
1109 | Ani_2_03.npy
1110 | annar_3_02.npy
1111 | stool_3_01.npy
1112 | stool_4_08.npy
1113 | titon_5_03.npy
1114 | amy_6_03.npy
1115 | heycat_1_02.npy
1116 | amy_8_06.npy
1117 | yifen_4_11.npy
1118 | khair_2_02.npy
1119 | fdps_2_03.npy
1120 | yifen_4_05.npy
1121 | yifen_4_01.npy
1122 | yifen_3_08.npy
1123 | fdps_2_07.npy
1124 | khair_2_06.npy
1125 | heycat_1_06.npy
1126 | amy_6_07.npy
1127 | titon_5_07.npy
1128 | amy_8_02.npy
1129 | geniusturtle_7_08.npy
1130 | Ani_2_07.npy
1131 | Kenshin_4_04.npy
1132 | Kenshin_4_10.npy
1133 | amy_14_01.npy
1134 | annar_3_06.npy
1135 | stool_3_05.npy
1136 | heycat_3_03.npy
1137 | ariel_2_07.npy
1138 | amy_4_02.npy
1139 | abjones_2_09.npy
1140 | Kenshin_1_08.npy
1141 | amy_16_04.npy
1142 | bug_3_03.npy
1143 | annar_1_03.npy
1144 | leon_2_02.npy
1145 | jmzen_5_02.npy
1146 | bobon_2_07.npy
1147 | davidson_5_01.npy
1148 | davidson_2_08.npy
1149 | davidson_2_09.npy
1150 | jmzen_5_03.npy
1151 | bobon_2_06.npy
1152 | leon_2_03.npy
1153 | stool_1_01.npy
1154 | annar_1_02.npy
1155 | bug_3_02.npy
1156 | amy_16_05.npy
1157 | Kenshin_1_09.npy
1158 | geniusturtle_2_05.npy
1159 | abjones_5_01.npy
1160 | abjones_2_08.npy
1161 | amy_4_03.npy
1162 | heycat_3_02.npy
1163 | ariel_2_06.npy
1164 | stool_3_04.npy
1165 | annar_3_07.npy
1166 | bug_1_07.npy
1167 | stool_3_10.npy
1168 | Kenshin_4_11.npy
1169 | Kenshin_4_05.npy
1170 | Ani_2_06.npy
1171 | geniusturtle_7_09.npy
1172 | amy_8_03.npy
1173 | amy_6_06.npy
1174 | titon_5_06.npy
1175 | heycat_1_07.npy
1176 | khair_2_07.npy
1177 | fdps_2_06.npy
1178 | fdps_2_12.npy
1179 | yifen_3_09.npy
1180 | fdps_2_04.npy
1181 | khair_2_05.npy
1182 | fdps_2_10.npy
1183 | yifen_4_02.npy
1184 | titon_5_04.npy
1185 | amy_6_04.npy
1186 | heycat_1_05.npy
1187 | amy_6_10.npy
1188 | amy_8_01.npy
1189 | amy_14_02.npy
1190 | Ani_2_04.npy
1191 | Kenshin_4_07.npy
1192 | annar_3_05.npy
1193 | stool_3_06.npy
1194 | amy_4_01.npy
1195 | amy_3_08.npy
1196 | heycat_4_09.npy
1197 | ariel_2_04.npy
1198 | abjones_5_03.npy
1199 | geniusturtle_2_07.npy
1200 | amy_16_07.npy
1201 | stool_1_03.npy
1202 | leon_2_01.npy
1203 | leon_5_08.npy
1204 | bobon_2_04.npy
1205 | jmzen_2_08.npy
1206 | jmzen_5_01.npy
1207 | davidson_5_02.npy
1208 | davidson_5_03.npy
1209 | jmzen_2_09.npy
1210 | bobon_2_05.npy
1211 | leon_5_09.npy
1212 | stool_1_02.npy
1213 | annar_1_01.npy
1214 | bug_3_01.npy
1215 | amy_16_06.npy
1216 | geniusturtle_2_06.npy
1217 | abjones_5_02.npy
1218 | ariel_2_05.npy
1219 | heycat_4_08.npy
1220 | heycat_3_01.npy
1221 | bug_1_10.npy
1222 | stool_3_07.npy
1223 | annar_3_04.npy
1224 | Kenshin_4_06.npy
1225 | Ani_2_05.npy
1226 | amy_14_03.npy
1227 | amy_6_11.npy
1228 | heycat_1_04.npy
1229 | titon_5_05.npy
1230 | amy_6_05.npy
1231 | fdps_2_11.npy
1232 | yifen_4_03.npy
1233 | khair_2_04.npy
1234 | fdps_2_05.npy
1235 | abjones_3_04.npy
1236 | abjones_3_10.npy
1237 | amy_2_06.npy
1238 | titon_1_06.npy
1239 | ariel_4_03.npy
1240 | heycat_5_07.npy
1241 | bug_5_07.npy
1242 | bug_5_13.npy
1243 | bobon_4_03.npy
1244 | jmzen_3_06.npy
1245 | leon_4_06.npy
1246 | khair_6_07.npy
1247 | davidson_3_11.npy
1248 | davidson_3_05.npy
1249 | leon_8_12.npy
1250 | jmzen_1_03.npy
1251 | leon_8_06.npy
1252 | leon_6_03.npy
1253 | yifen_2_11.npy
1254 | khair_4_02.npy
1255 | fdps_4_03.npy
1256 | yifen_2_05.npy
1257 | abjones_1_01.npy
1258 | geniusturtle_6_05.npy
1259 | titon_3_03.npy
1260 | annar_5_02.npy
1261 | stool_5_01.npy
1262 | stool_2_08.npy
1263 | amy_12_05.npy
1264 | Ani_4_03.npy
1265 | Kenshin_5_09.npy
1266 | Kenshin_2_01.npy
1267 | Ani_4_02.npy
1268 | Kenshin_5_08.npy
1269 | amy_12_04.npy
1270 | annar_5_03.npy
1271 | geniusturtle_8_01.npy
1272 | titon_3_02.npy
1273 | geniusturtle_6_04.npy
1274 | davidson_1_01.npy
1275 | yifen_2_04.npy
1276 | yifen_2_10.npy
1277 | fdps_4_02.npy
1278 | khair_4_03.npy
1279 | leon_6_02.npy
1280 | leon_8_07.npy
1281 | jmzen_1_02.npy
1282 | leon_8_13.npy
1283 | davidson_3_04.npy
1284 | davidson_3_10.npy
1285 | khair_6_06.npy
1286 | leon_4_07.npy
1287 | bobon_4_02.npy
1288 | jmzen_3_07.npy
1289 | amy_10_01.npy
1290 | bug_5_12.npy
1291 | bug_5_06.npy
1292 | ariel_4_02.npy
1293 | heycat_5_06.npy
1294 | amy_2_07.npy
1295 | titon_1_07.npy
1296 | abjones_3_11.npy
1297 | geniusturtle_3_08.npy
1298 | geniusturtle_4_01.npy
1299 | geniusturtle_4_03.npy
1300 | heycat_5_04.npy
1301 | titon_1_05.npy
1302 | amy_2_05.npy
1303 | bug_5_10.npy
1304 | bug_5_04.npy
1305 | amy_10_03.npy
1306 | jmzen_3_05.npy
1307 | bobon_3_09.npy
1308 | jmzen_3_11.npy
1309 | leon_4_05.npy
1310 | davidson_3_06.npy
1311 | khair_6_04.npy
1312 | davidson_3_12.npy
1313 | leon_8_05.npy
1314 | leon_8_11.npy
1315 | leon_1_09.npy
1316 | yifen_2_06.npy
1317 | davidson_1_03.npy
1318 | khair_4_01.npy
1319 | yifen_2_12.npy
1320 | abjones_1_02.npy
1321 | geniusturtle_6_06.npy
1322 | geniusturtle_8_03.npy
1323 | titon_4_09.npy
1324 | annar_2_08.npy
1325 | annar_5_01.npy
1326 | stool_5_02.npy
1327 | Kenshin_2_03.npy
1328 | amy_12_06.npy
1329 | amy_12_07.npy
1330 | Kenshin_2_02.npy
1331 | Ani_4_01.npy
1332 | stool_5_03.npy
1333 | titon_4_08.npy
1334 | amy_7_08.npy
1335 | titon_3_01.npy
1336 | geniusturtle_8_02.npy
1337 | geniusturtle_6_07.npy
1338 | abjones_1_03.npy
1339 | fdps_4_01.npy
1340 | yifen_2_13.npy
1341 | yifen_2_07.npy
1342 | davidson_1_02.npy
1343 | leon_6_01.npy
1344 | leon_1_08.npy
1345 | leon_8_10.npy
1346 | leon_8_04.npy
1347 | jmzen_1_01.npy
1348 | davidson_3_13.npy
1349 | khair_6_05.npy
1350 | davidson_3_07.npy
1351 | leon_4_04.npy
1352 | jmzen_3_10.npy
1353 | bobon_3_08.npy
1354 | jmzen_3_04.npy
1355 | bobon_4_01.npy
1356 | amy_10_02.npy
1357 | bug_5_05.npy
1358 | bug_5_11.npy
1359 | titon_1_04.npy
1360 | amy_2_04.npy
1361 | ariel_3_08.npy
1362 | heycat_5_05.npy
1363 | ariel_4_01.npy
1364 | geniusturtle_4_02.npy
1365 | abjones_3_06.npy
1366 | abjones_3_12.npy
1367 | ariel_4_05.npy
1368 | heycat_5_01.npy
1369 | tammy_1_08.npy
1370 | geniusturtle_4_12.npy
1371 | abjones_3_02.npy
1372 | geniusturtle_4_06.npy
1373 | amy_10_06.npy
1374 | bug_2_08.npy
1375 | bug_5_01.npy
1376 | leon_3_09.npy
1377 | bobon_4_05.npy
1378 | jmzen_4_09.npy
1379 | davidson_3_03.npy
1380 | khair_6_01.npy
1381 | fdps_1_09.npy
1382 | khair_1_08.npy
1383 | leon_6_05.npy
1384 | jmzen_1_05.npy
1385 | bobon_1_09.npy
1386 | jmzen_1_11.npy
1387 | davidson_1_06.npy
1388 | yifen_2_03.npy
1389 | khair_4_04.npy
1390 | fdps_4_05.npy
1391 | geniusturtle_8_06.npy
1392 | titon_3_05.npy
1393 | amy_9_09.npy
1394 | geniusturtle_6_03.npy
1395 | Ani_4_05.npy
1396 | Kenshin_2_06.npy
1397 | amy_12_03.npy
1398 | stool_5_07.npy
1399 | annar_5_04.npy
1400 | annar_5_05.npy
1401 | stool_5_06.npy
1402 | amy_12_02.npy
1403 | Ani_4_10.npy
1404 | Kenshin_2_07.npy
1405 | Ani_4_04.npy
1406 | geniusturtle_6_02.npy
1407 | amy_9_08.npy
1408 | titon_3_04.npy
1409 | geniusturtle_8_07.npy
1410 | fdps_4_04.npy
1411 | khair_4_05.npy
1412 | davidson_1_07.npy
1413 | yifen_2_02.npy
1414 | jmzen_1_10.npy
1415 | bobon_1_08.npy
1416 | leon_8_01.npy
1417 | jmzen_1_04.npy
1418 | leon_6_04.npy
1419 | fdps_1_08.npy
1420 | davidson_3_02.npy
1421 | bobon_4_10.npy
1422 | jmzen_4_08.npy
1423 | bobon_4_04.npy
1424 | jmzen_3_01.npy
1425 | leon_4_01.npy
1426 | leon_3_08.npy
1427 | bug_5_14.npy
1428 | amy_10_07.npy
1429 | geniusturtle_4_07.npy
1430 | abjones_3_03.npy
1431 | amy_2_01.npy
1432 | titon_1_01.npy
1433 | amy_5_08.npy
1434 | ariel_4_04.npy
1435 | titon_1_03.npy
1436 | amy_2_03.npy
1437 | heycat_5_02.npy
1438 | ariel_4_06.npy
1439 | abjones_3_01.npy
1440 | geniusturtle_4_05.npy
1441 | abjones_4_08.npy
1442 | geniusturtle_4_11.npy
1443 | amy_10_05.npy
1444 | bug_5_02.npy
1445 | leon_4_03.npy
1446 | jmzen_3_03.npy
1447 | bobon_4_06.npy
1448 | khair_6_02.npy
1449 | davidson_3_14.npy
1450 | leon_6_06.npy
1451 | jmzen_1_12.npy
1452 | leon_8_03.npy
1453 | khair_4_07.npy
1454 | fdps_4_06.npy
1455 | yifen_2_14.npy
1456 | yifen_5_09.npy
1457 | davidson_1_05.npy
1458 | titon_3_06.npy
1459 | geniusturtle_8_05.npy
1460 | geniusturtle_1_09.npy
1461 | abjones_1_04.npy
1462 | amy_15_09.npy
1463 | Kenshin_2_11.npy
1464 | Ani_4_06.npy
1465 | Kenshin_2_05.npy
1466 | stool_5_04.npy
1467 | annar_5_07.npy
1468 | annar_5_06.npy
1469 | stool_5_05.npy
1470 | Kenshin_2_04.npy
1471 | Ani_4_07.npy
1472 | Kenshin_2_10.npy
1473 | amy_15_08.npy
1474 | amy_12_01.npy
1475 | geniusturtle_1_08.npy
1476 | geniusturtle_6_01.npy
1477 | geniusturtle_8_04.npy
1478 | titon_3_07.npy
1479 | yifen_2_01.npy
1480 | yifen_5_08.npy
1481 | davidson_1_04.npy
1482 | davidson_1_10.npy
1483 | khair_4_06.npy
1484 | yifen_2_15.npy
1485 | leon_8_02.npy
1486 | jmzen_1_07.npy
1487 | leon_6_07.npy
1488 | davidson_3_01.npy
1489 | khair_6_03.npy
1490 | jmzen_3_02.npy
1491 | bobon_4_07.npy
1492 | leon_4_02.npy
1493 | bug_5_03.npy
1494 | amy_10_04.npy
1495 | geniusturtle_4_10.npy
1496 | abjones_3_14.npy
1497 | geniusturtle_4_04.npy
1498 | heycat_5_03.npy
1499 | ariel_4_07.npy
1500 | titon_1_02.npy
1501 | amy_2_02.npy
1502 | AimeeNorwich_Child_MIX.npy
1503 | AlexanderRoss_GoodbyeBolero_MIX.npy
1504 | AlexanderRoss_VelvetCurtain_MIX.npy
1505 | AvaLuna_Waterduct_MIX.npy
1506 | BigTroubles_Phantom_MIX.npy
1507 | DreamersOfTheGhetto_HeavyLove_MIX.npy
1508 | FacesOnFilm_WaitingForGa_MIX.npy
1509 | FamilyBand_Again_MIX.npy
1510 | Handel_TornamiAVagheggiar_MIX.npy
1511 | HeladoNegro_MitadDelMundo_MIX.npy
1512 | HopAlong_SisterCities_MIX.npy
1513 | LizNelson_Coldwar_MIX.npy
1514 | LizNelson_ImComingHome_MIX.npy
1515 | LizNelson_Rainfall_MIX.npy
1516 | Meaxic_TakeAStep_MIX.npy
1517 | Meaxic_YouListen_MIX.npy
1518 | MusicDelta_80sRock_MIX.npy
1519 | MusicDelta_Beatles_MIX.npy
1520 | MusicDelta_Britpop_MIX.npy
1521 | MusicDelta_Country1_MIX.npy
1522 | MusicDelta_Country2_MIX.npy
1523 | MusicDelta_Disco_MIX.npy
1524 | MusicDelta_Grunge_MIX.npy
1525 | MusicDelta_Hendrix_MIX.npy
1526 | MusicDelta_Punk_MIX.npy
1527 | MusicDelta_Reggae_MIX.npy
1528 | MusicDelta_Rock_MIX.npy
1529 | MusicDelta_Rockabilly_MIX.npy
1530 | PurlingHiss_Lolita_MIX.npy
1531 | StevenClark_Bounty_MIX.npy
1532 | SweetLights_YouLetMeDown_MIX.npy
1533 | TheDistricts_Vermont_MIX.npy
1534 | TheScarletBrand_LesFleursDuMal_MIX.npy
1535 | TheSoSoGlos_Emergency_MIX.npy
1536 | Wolf_DieBekherte_MIX.npy
--------------------------------------------------------------------------------
/data/train_data_small.txt:
--------------------------------------------------------------------------------
1 | titon_1_02.npy
2 | amy_2_02.npy
3 | TheScarletBrand_LesFleursDuMal_MIX.npy
4 | TheSoSoGlos_Emergency_MIX.npy
5 | Wolf_DieBekherte_MIX.npy
6 |
--------------------------------------------------------------------------------
/data/train_data_small_extra.txt:
--------------------------------------------------------------------------------
1 | titon_1_02.npy
2 | titon_1_02_vocal_only.npy
3 | titon_1_02_instrumental_only.npy
4 | amy_2_02.npy
5 | amy_2_02_vocal_only.npy
6 | amy_2_02_instrumental_only.npy
7 | TheScarletBrand_LesFleursDuMal_MIX.npy
8 | TheSoSoGlos_Emergency_MIX.npy
9 | Wolf_DieBekherte_MIX.npy
10 |
--------------------------------------------------------------------------------
/data_generator.py:
--------------------------------------------------------------------------------
1 | """
2 | Ke Chen knutchen@ucsd.edu
3 |
4 | Tone-Octave Network - data_generator file
5 |
6 | This file contains the dataset and data generator classes
7 |
8 | """
9 | import os
10 | import torch
11 | import numpy as np
12 | from tqdm import tqdm
13 | from torch.utils.data import Dataset, DataLoader
14 |
15 | from util import index2centf
16 | from feature_extraction import get_CenFreq
17 |
18 | def reorganize(x, octave_res):
19 | n_order = []
20 | max_bin = x.shape[1]
21 | for i in range(octave_res):
22 | n_order += [j for j in range(i, max_bin, octave_res)]
23 | nx = [x[:,n_order[i],:] for i in range(x.shape[1])]
24 | nx = np.array(nx)
25 | nx = nx.transpose((1,0,2))
26 | return nx
27 |
28 |
29 | class TONetTrainDataset(Dataset):
30 | def __init__(self, data_list, config):
31 | self.config = config
32 | # self.cfp_dir = os.path.join(config.data_path,config.cfp_dir)
33 | # self.f0_dir = os.path.join(config.data_path,"f0ref")
34 | self.cfp_dir = "/home/ken/Downloads/cfp_saved/"
35 | self.f0_dir = "/home/ken/Downloads/labels_and_waveform/"
36 | self.data_list = data_list
37 | self.cent_f = np.array(get_CenFreq(config.startfreq, config.stopfreq, config.octave_res))
38 | # init data array
39 | self.data_cfp = []
40 | self.data_gd = []
41 | self.data_tcfp = []
42 | seg_frame = config.seg_frame
43 | shift_frame = config.shift_frame
44 | print("Data List:", data_list)
45 | with open(data_list, "r") as f:
46 | data_txt = f.readlines()
47 | data_txt = [d.split(".")[0] for d in data_txt]
48 | # data_txt = data_txt[:100]
49 | print("Song Size:", len(data_txt))
50 | # process cfp
51 | for i, filename in enumerate(tqdm(data_txt)):
52 | # file set
53 | cfp_file = os.path.join(self.cfp_dir, filename + ".npy")
54 | ref_file = os.path.join(self.f0_dir, filename + ".txt")
55 | # get raw cfp and freq
56 | temp_cfp = np.load(cfp_file, allow_pickle = True)
57 | # temp_cfp[0, :, :] = temp_cfp[1, :, :] * temp_cfp[2, :, :]
58 | temp_freq = np.loadtxt(ref_file)
59 | temp_freq = temp_freq[:,1]
60 | # check length
61 | if temp_freq.shape[0] > temp_cfp.shape[2]:
62 | temp_freq = temp_freq[:temp_cfp.shape[2]]
63 | else:
64 | temp_cfp = temp_cfp[:,:,:temp_freq.shape[0]]
65 | # build data
66 | for j in range(0, temp_cfp.shape[2], shift_frame):
67 | bgnt = j
68 | endt = j + seg_frame
69 | # temp_x = temp_cfp[:, :, bgnt:endt]
70 | temp_gd = index2centf(temp_freq[bgnt:endt], self.cent_f)
71 |
72 | # left and right pad temp_x to counter shrinking
73 | # we hope that bgnt - network_time_shrink_size >= 0 and endt + network_time_shrink_size <= temp_cfp.shape[2]
74 | from config import network_time_shrink_size
75 | temp_x = temp_cfp[:, :, max(0, bgnt - network_time_shrink_size):min(endt + network_time_shrink_size, temp_cfp.shape[2])]
76 |
77 | # print(temp_x.shape[2])
78 |
79 | if bgnt - network_time_shrink_size < 0:
80 | left_padding_size = abs(bgnt - network_time_shrink_size)
81 | temp_x = np.concatenate([np.zeros((temp_cfp.shape[0], temp_cfp.shape[1], left_padding_size)), temp_x], axis = 2)
82 |
83 | # print(temp_x.shape[2])
84 | if endt + network_time_shrink_size > temp_cfp.shape[2]:
85 | # in this temp_gds will have everything at the right end
86 | if endt >= temp_cfp.shape[2]:
87 | right_padding_size = network_time_shrink_size
88 | else:
89 | right_padding_size = endt + network_time_shrink_size - temp_cfp.shape[2]
90 |
91 | temp_x = np.concatenate([temp_x, np.zeros((temp_cfp.shape[0], temp_cfp.shape[1], right_padding_size))], axis = 2)
92 | # print(right_padding_size, endt, temp_cfp.shape[2])
93 |
94 | # print(temp_x.shape[2], len(temp_gd), 2*network_time_shrink_size)
95 |
96 |
97 | if temp_x.shape[2] < seg_frame + 2*network_time_shrink_size:
98 | rl = temp_x.shape[2]
99 | # pad_x = np.zeros((temp_x.shape[0], temp_x.shape[1], seg_frame))
100 | pad_x = np.zeros((temp_x.shape[0], temp_x.shape[1], seg_frame + 2*network_time_shrink_size))
101 | pad_gd = np.zeros(seg_frame)
102 | # pad_gd[:rl] = temp_gd
103 | pad_gd[:rl - 2*network_time_shrink_size] = temp_gd
104 | pad_x[:,:, :rl] = temp_x
105 | temp_x = pad_x
106 | temp_gd = pad_gd
107 |
108 | assert temp_x.shape[2] - len(temp_gd) == 2*network_time_shrink_size
109 |
110 |
111 | temp_tx = reorganize(temp_x[:], config.octave_res)
112 | # self.data_tcfp.append(temp_tx)
113 | # to save memory
114 | self.data_tcfp = list(range(50000))
115 | self.data_cfp.append(temp_x)
116 |
117 |
118 | # print(temp_gd.shape, temp_freq[bgnt:endt].shape)
119 | self.data_gd.append(temp_gd)
120 | self.data_cfp = np.array(self.data_cfp)
121 | self.data_tcfp = np.array(self.data_tcfp)
122 | # no need for tcfp for now (to save space)
123 | self.data_gd = np.array(self.data_gd)
124 | print("Total Datasize:", self.data_cfp.shape)
125 |
126 | def __len__(self):
127 | return len(self.data_cfp)
128 |
129 | def __getitem__(self,index):
130 | temp_dict = {
131 | "cfp": self.data_cfp[index].astype(np.float32),
132 | "tcfp": self.data_tcfp[index].astype(np.float32),
133 | "gd": self.data_gd[index]
134 | }
135 | # print("Haaa", temp_dict["gd"].shape)
136 | return temp_dict
137 |
138 |
139 | class TONetTestDataset(Dataset):
140 | def __init__(self, data_list, config):
141 | self.config = config
142 | # self.cfp_dir = os.path.join(config.data_path,config.cfp_dir)
143 | # self.f0_dir = os.path.join(config.data_path,"f0ref")
144 | self.cfp_dir = "/home/ken/Downloads/cfp_saved/"
145 | self.f0_dir = "/home/ken/Downloads/labels_and_waveform/"
146 |
147 | self.data_list = data_list
148 | self.cent_f = np.array(get_CenFreq(config.startfreq, config.stopfreq, config.octave_res))
149 | # init data array
150 | self.data_names = []
151 | self.data_cfp = []
152 | self.data_gd = []
153 | self.data_len = []
154 | self.data_tcfp = []
155 | seg_frame = config.seg_frame
156 | shift_frame = config.shift_frame
157 | print("Data List:", data_list)
158 | with open(data_list, "r") as f:
159 | data_txt = f.readlines()
160 | data_txt = [d.split(".")[0] for d in data_txt]
161 | print("Song Size:", len(data_txt))
162 | # process cfp
163 | for i, filename in enumerate(tqdm(data_txt)):
164 |
165 | group_cfp = []
166 | group_gd = []
167 | group_tcfp = []
168 | # file set
169 | cfp_file = os.path.join(self.cfp_dir, filename + ".npy")
170 | ref_file = os.path.join(self.f0_dir, filename + ".txt")
171 |
172 |
173 | # get raw cfp and freq
174 | temp_cfp = np.load(cfp_file, allow_pickle = True)
175 | # temp_cfp[0, :, :] = temp_cfp[1, :, :] * temp_cfp[2, :, :]
176 | temp_freq = np.loadtxt(ref_file)
177 | temp_freq = temp_freq[:,1]
178 | self.data_len.append(len(temp_freq))
179 | # check length
180 | if temp_freq.shape[0] > temp_cfp.shape[2]:
181 | temp_freq = temp_freq[:temp_cfp.shape[2]]
182 | else:
183 | temp_cfp = temp_cfp[:,:,:temp_freq.shape[0]]
184 | # build data
185 | for j in range(0, temp_cfp.shape[2], shift_frame):
186 | bgnt = j
187 | endt = j + seg_frame
188 | # temp_x = temp_cfp[:, :, bgnt:endt]
189 | temp_gd = temp_freq[bgnt:endt]
190 |
191 |
192 | # left and right pad temp_x to counter shrinking
193 | # we hope that bgnt - network_time_shrink_size >= 0 and endt + network_time_shrink_size <= temp_cfp.shape[2]
194 | from config import network_time_shrink_size
195 | temp_x = temp_cfp[:, :, max(0, bgnt - network_time_shrink_size):min(endt + network_time_shrink_size, temp_cfp.shape[2])]
196 |
197 | if bgnt - network_time_shrink_size < 0:
198 | left_padding_size = abs(bgnt - network_time_shrink_size)
199 | temp_x = np.concatenate([np.zeros((temp_cfp.shape[0], temp_cfp.shape[1], left_padding_size)), temp_x], axis = 2)
200 |
201 | if endt + network_time_shrink_size > temp_cfp.shape[2]:
202 | # in this temp_gds will have everything at the right end
203 | if endt >= temp_cfp.shape[2]:
204 | right_padding_size = network_time_shrink_size
205 | else:
206 | right_padding_size = endt + network_time_shrink_size - temp_cfp.shape[2]
207 |
208 | temp_x = np.concatenate([temp_x, np.zeros((temp_cfp.shape[0], temp_cfp.shape[1], right_padding_size))], axis = 2)
209 |
210 | # print(temp_x.shape[2], len(temp_gd), 2*network_time_shrink_size)
211 |
212 | # not enough only when we are already at the right end, hence padding gds by 0, it will correspond to white padding which is also 0
213 | if temp_x.shape[2] < seg_frame + 2*network_time_shrink_size:
214 | rl = temp_x.shape[2]
215 | # pad_x = np.zeros((temp_x.shape[0], temp_x.shape[1], seg_frame))
216 | pad_x = np.zeros((temp_x.shape[0], temp_x.shape[1], seg_frame + 2*network_time_shrink_size))
217 | pad_gd = np.zeros(seg_frame)
218 | # pad_gd[:rl] = temp_gd
219 | pad_gd[:rl - 2*network_time_shrink_size] = temp_gd
220 | pad_x[:,:, :rl] = temp_x
221 | temp_x = pad_x
222 | temp_gd = pad_gd
223 |
224 | assert temp_x.shape[2] - len(temp_gd) == 2*network_time_shrink_size
225 |
226 |
227 | temp_tx = reorganize(temp_x[:], config.octave_res)
228 | group_tcfp.append(temp_tx)
229 | group_cfp.append(temp_x)
230 | group_gd.append(temp_gd)
231 | group_tcfp = np.array(group_tcfp)
232 | group_cfp = np.array(group_cfp)
233 | group_gd = np.array(group_gd)
234 |
235 | self.data_names.append(ref_file)
236 | self.data_tcfp.append(group_tcfp)
237 | self.data_cfp.append(group_cfp)
238 | self.data_gd.append(group_gd)
239 |
240 | def __len__(self):
241 | return len(self.data_cfp)
242 |
243 | def __getitem__(self,index):
244 | temp_dict = {
245 | "cfp": self.data_cfp[index].astype(np.float32),
246 | "tcfp": self.data_tcfp[index].astype(np.float32),
247 | "gd": self.data_gd[index],
248 | "length": self.data_len[index],
249 | "name": self.data_names[index]
250 | }
251 | return temp_dict
252 |
--------------------------------------------------------------------------------
/feature_extraction.py:
--------------------------------------------------------------------------------
1 | import soundfile as sf
2 | import numpy as np
3 | import os
4 | import time
5 |
6 | np.seterr(divide='ignore', invalid='ignore')
7 | import scipy
8 | import scipy.signal
9 | import scipy.fftpack
10 | import pandas as pd
11 | import config
12 |
13 | def STFT(x, fr, fs, Hop, h):
14 | t = np.arange(0, np.ceil(len(x) / float(Hop)) * Hop, Hop)
15 | N = int(fs / float(fr))
16 | window_size = len(h)
17 | f = fs * np.linspace(0, 0.5, int(np.round(N / 2)), endpoint=True)
18 | Lh = int(np.floor(float(window_size - 1) / 2))
19 | tfr = np.zeros((int(N), len(t)), dtype=np.float32)
20 |
21 | for icol in range(0, len(t)):
22 | ti = int(t[icol])
23 | tau = np.arange(int(-min([round(N / 2.0) - 1, Lh, ti - 1])), \
24 | int(min([round(N / 2.0) - 1, Lh, len(x) - ti])))
25 | indices = np.mod(N + tau, N) + 1
26 | tfr[indices - 1, icol] = x[ti + tau - 1] * h[Lh + tau - 1] \
27 | / np.linalg.norm(h[Lh + tau - 1])
28 | start = time.time()
29 | tfr = abs(scipy.fftpack.fft(tfr, n=N, axis=0))
30 | print('fft time:', time.time() - start)
31 | return tfr, f, t, N
32 |
33 |
34 | def nonlinear_func(X, g, cutoff):
35 | cutoff = int(cutoff)
36 | if g != 0:
37 | X[X < 0] = 0
38 | X[:cutoff, :] = 0
39 | X[-cutoff:, :] = 0
40 | X = np.power(X, g)
41 | else:
42 | X = np.log(X)
43 | X[:cutoff, :] = 0
44 | X[-cutoff:, :] = 0
45 | return X
46 |
47 |
48 | def Freq2LogFreqMapping(tfr, f, fr, fc, tc, NumPerOct):
49 | StartFreq = fc
50 | StopFreq = 1 / tc
51 | Nest = int(np.ceil(np.log2(StopFreq / StartFreq)) * NumPerOct)
52 | central_freq = []
53 |
54 | for i in range(0, Nest):
55 | CenFreq = StartFreq * pow(2, float(i) / NumPerOct)
56 | if CenFreq < StopFreq:
57 | central_freq.append(CenFreq)
58 | else:
59 | break
60 |
61 | '''
62 | for i in range(len(central_freq)):
63 | print(i, central_freq[i])
64 | # print(len(central_freq))
65 | sys.exit()
66 | '''
67 |
68 | Nest = len(central_freq)
69 | freq_band_transformation = np.zeros((Nest - 1, len(f)), dtype=np.float32)
70 | import bisect
71 | for i in range(1, Nest - 1):
72 | l = int(round(central_freq[i - 1] / fr))
73 | r = int(round(central_freq[i + 1] / fr) + 1)
74 | # interval (l,r) (i.e. not including l, r)
75 | # l = bisect.bisect_right(X, central_freq[i-1])
76 | # r = bisect.bisect_left(X, central_freq[i+1])
77 | # rounding1
78 | if l >= r - 1:
79 | freq_band_transformation[i, l] = 1
80 | else:
81 | for j in range(l, r):
82 | if f[j] > central_freq[i - 1] and f[j] <= entral_freq[i]:
83 | freq_band_transformation[i, j] = (f[j] - central_freq[i - 1]) / (
84 | central_freq[i] - central_freq[i - 1])
85 | elif f[j] > central_freq[i] and f[j] < central_freq[i + 1]:
86 | freq_band_transformation[i, j] = (central_freq[i + 1] - f[j]) / (
87 | central_freq[i + 1] - central_freq[i])
88 | tfrL = np.dot(freq_band_transformation, tfr)
89 |
90 |
91 | # print(len(tfrL), len(central_freq))
92 | # sys.exit()
93 | return tfrL, central_freq
94 |
95 |
96 | def Quef2LogFreqMapping(ceps, q, fs, fc, tc, NumPerOct):
97 | StartFreq = fc
98 | StopFreq = 1 / tc
99 | Nest = int(np.ceil(np.log2(StopFreq / StartFreq)) * NumPerOct)
100 | central_freq = []
101 |
102 | for i in range(0, Nest):
103 | CenFreq = StartFreq * pow(2, float(i) / NumPerOct)
104 | if CenFreq < StopFreq:
105 | central_freq.append(CenFreq)
106 | else:
107 | break
108 | f = 1 / q
109 |
110 | # this is basically remapping so that the lenght of cepstrum fit the length of 360 (for spectrum itself, this transform is basicaly x-log)
111 | # q: from 0 all the way to f_s/f_c (which is the smallest cutoff freq, and therefore the longest "period")
112 |
113 | # central_freq, the freq, ranges from [f_c, 1/t_c]
114 | # hence already reversed here
115 |
116 |
117 | Nest = len(central_freq)
118 | freq_band_transformation = np.zeros((Nest - 1, len(f)), dtype=np.float32)
119 | for i in range(1, Nest - 1):
120 | for j in range(int(round(fs / central_freq[i + 1])), int(round(fs / central_freq[i - 1]) + 1)):
121 | if f[j] > central_freq[i - 1] and f[j] < central_freq[i]:
122 | freq_band_transformation[i, j] = (f[j] - central_freq[i - 1]) / (central_freq[i] - central_freq[i - 1])
123 | elif f[j] > central_freq[i] and f[j] < central_freq[i + 1]:
124 | freq_band_transformation[i, j] = (central_freq[i + 1] - f[j]) / (central_freq[i + 1] - central_freq[i])
125 |
126 | tfrL = np.dot(freq_band_transformation, ceps)
127 | # import sys
128 | # print(np.nonzero(freq_band_transformation[:, 200:210]))
129 | # sys.exit()
130 | return tfrL, central_freq
131 |
132 |
133 | def CFP_filterbank(x, fr, fs, Hop, h, fc, tc, g, NumPerOctave):
134 | NumofLayer = np.size(g)
135 | N = int(fs / float(fr))
136 | [tfr, f, t, N] = STFT(x, fr, fs, Hop, h)
137 | tfr = np.power(abs(tfr), g[0])
138 | tfr0 = tfr # original STFT
139 | ceps = np.zeros(tfr.shape)
140 |
141 | from config import include_adjusted_exp
142 |
143 | if include_adjusted_exp:
144 | exp_rate = np.exp(0.0006*f)
145 | else:
146 | exp_rate = np.exp(0.0000*f)
147 | z_trans = np.concatenate([exp_rate, np.flip(exp_rate)], axis = 0)
148 |
149 | # print(f[:10], f[-10:])
150 | # print(exp_rate[:10], exp_rate[-10:])
151 |
152 | # print(z_trans.shape)
153 | # sys.exit()
154 |
155 | if NumofLayer >= 2:
156 | for gc in range(1, NumofLayer):
157 | if np.remainder(gc, 2) == 1:
158 | tc_idx = round(fs * tc)
159 | # ceps = np.real(np.fft.fft(tfr, axis=0)) / np.sqrt(N)
160 | ceps = np.real(np.fft.fft(tfr*np.expand_dims(z_trans, axis = 1), axis=0)) / np.sqrt(N)
161 | # ceps_2 = np.real(np.fft.fft(tfr, axis=0)) / np.sqrt(N)
162 |
163 | ceps = nonlinear_func(ceps, g[gc], tc_idx)
164 | # ceps_2 = nonlinear_func(ceps_2, g[gc], tc_idx)
165 | else:
166 | fc_idx = round(fc / fr)
167 | tfr = np.real(np.fft.fft(ceps, axis=0)) / np.sqrt(N)
168 | tfr = nonlinear_func(tfr, g[gc], fc_idx)
169 |
170 | tfr0 = tfr0[:int(round(N / 2)), :]
171 | tfr = tfr[:int(round(N / 2)), :]
172 | ceps = ceps[:int(round(N / 2)), :]
173 |
174 | HighFreqIdx = int(round((1 / tc) / fr) + 1)
175 | f = f[:HighFreqIdx]
176 | tfr0 = tfr0[:HighFreqIdx, :]
177 | tfr = tfr[:HighFreqIdx, :]
178 | HighQuefIdx = int(round(fs / fc) + 1)
179 |
180 | # print(f[:10], f[-10:])
181 | # print(exp_rate[:HighFreqIdx][:10], exp_rate[:HighFreqIdx][-10:])
182 | # sys.exit()
183 |
184 | q = np.arange(HighQuefIdx) / float(fs)
185 | # print("q len", len(q), fs, fc)
186 | # sys.exit()
187 |
188 | ceps = ceps[:HighQuefIdx, :]
189 | # ceps_2 = ceps_2[:HighQuefIdx, :]
190 |
191 | tfrL0, central_frequencies = Freq2LogFreqMapping(tfr0, f, fr, fc, tc, NumPerOctave)
192 | tfrLF, central_frequencies = Freq2LogFreqMapping(tfr, f, fr, fc, tc, NumPerOctave)
193 | tfrLQ, central_frequencies = Quef2LogFreqMapping(ceps, q, fs, fc, tc, NumPerOctave)
194 |
195 | # from dummy_utils import plot_multi_sequences
196 | # time_index = 200
197 | # print(np.array(central_frequencies).shape, tfrL0.shape)
198 | # sys.exit()
199 | # plot_multi_sequences(central_frequencies[:-1], [tfrL0[:, time_index], tfrLF[:, time_index], tfrLQ[:, time_index]], ["spec", "GCoS", "GC"])
200 | # plot_multi_sequences(f, [(tfr0**(1/g[0]))[:, time_index], (tfr**(1/g[2]))[:, time_index], (ceps**(1/g[1]))[:, time_index], (tfr0**(1/g[0])*np.expand_dims(np.exp(0.0015*f), axis = 1))[:, time_index]], ["spec", "GCoS", "GC", "spec2"])
201 |
202 | # plot_multi_sequences(f, [tfr0[:, time_index], tfr[:, time_index], ceps[:, time_index], ceps_2[:, time_index], (tfr0*np.expand_dims(np.exp(0.00036*f), axis = 1))[:, time_index], ((tfr0**(1/g[0])*np.expand_dims(np.exp(0.0015*f), axis = 1))**g[0])[:, time_index]], ["spec", "GCoS", "GC", "GC2", "spec2", "spec3"])
203 |
204 | # from dummy_utils import plot_sequence
205 | # plot_sequence(list(range(len(z_trans))), z_trans)
206 |
207 | # sys.exit()
208 |
209 |
210 | return tfrL0, tfrLF, tfrLQ, f, q, t, central_frequencies
211 |
212 |
213 | def load_audio(filepath, sr=None, mono=True, dtype='float32'):
214 | if '.mp3' in filepath:
215 | from pydub import AudioSegment
216 | import tempfile
217 | import os
218 | mp3 = AudioSegment.from_mp3(filepath)
219 | _, path = tempfile.mkstemp()
220 | mp3.export(path, format="wav")
221 | del mp3
222 | x, fs = sf.read(path)
223 | os.remove(path)
224 | else:
225 | x, fs = sf.read(filepath)
226 |
227 | if mono == True and len(x.shape) > 1:
228 | x = np.mean(x, axis=1)
229 | elif mono == "Left" and len(x.shape) > 1:
230 | x = x[:, 0]
231 | elif mono == "Right" and len(x.shape) > 1:
232 | x = x[:, 1]
233 |
234 |
235 | if sr:
236 | x = scipy.signal.resample_poly(x, sr, fs)
237 | fs = sr
238 | x = x.astype(dtype)
239 |
240 |
241 | # from util import play_sequence
242 | # play_sequence(x, fs)
243 |
244 |
245 | return x, fs
246 |
247 |
248 | def feature_extraction(x, fs, Hop=512, Window=2049, StartFreq=80.0, StopFreq=1000.0, NumPerOct=48):
249 | fr = 2.0 # frequency resolution
250 | h = scipy.signal.blackmanharris(Window) # window size
251 | g = np.array([0.24, 0.6, 1]) # gamma value
252 |
253 | tfrL0, tfrLF, tfrLQ, f, q, t, CenFreq = CFP_filterbank(x, fr, fs, Hop, h, StartFreq, 1 / StopFreq, g, NumPerOct)
254 | Z = tfrLF * tfrLQ
255 | time = t / fs
256 | return Z, time, CenFreq, tfrL0, tfrLF, tfrLQ
257 |
258 |
259 | def midi2hz(midi):
260 | return 2 ** ((midi - 69) / 12.0) * 440
261 |
262 |
263 | def hz2midi(hz):
264 | return 69 + 12 * np.log2(hz / 440.0)
265 |
266 |
267 | def get_CenFreq(StartFreq=80, StopFreq=1000, NumPerOct=48):
268 | Nest = int(np.ceil(np.log2(StopFreq / StartFreq)) * NumPerOct)
269 | central_freq = []
270 | for i in range(0, Nest):
271 | CenFreq = StartFreq * pow(2, float(i) / NumPerOct)
272 | if CenFreq < StopFreq:
273 | central_freq.append(CenFreq)
274 | else:
275 | break
276 | return central_freq
277 |
278 |
279 | def get_time(fs, Hop, end):
280 | return np.arange(Hop / fs, end, Hop / fs)
281 |
282 |
283 | def lognorm(x):
284 | return np.log(1 + x)
285 |
286 |
287 | def norm(x):
288 | return (x - np.min(x)) / (np.max(x) - np.min(x))
289 |
290 | from config import fs, hop
291 | fs = int(fs)
292 | hop = int(hop)
293 |
294 | def cfp_process(fpath, ypath=None, csv=False, sr=None, hop=hop, model_type='vocal', mono=True):
295 | print('CFP process in ' + str(fpath) + ' ... (It may take some times)')
296 | y, sr = load_audio(fpath, sr=sr, mono=mono)
297 | if 'vocal' in model_type:
298 | # 1250
299 | # 32 2050
300 | # Z, time, CenFreq, tfrL0, tfrLF, tfrLQ = feature_extraction(y, sr, Hop=hop, Window=768, StartFreq=32, StopFreq=2050, NumPerOct=60)
301 | Z, time, CenFreq, tfrL0, tfrLF, tfrLQ = feature_extraction(y, sr, Hop=hop, Window=int(768*fs/8000), StartFreq=32, StopFreq=2050, NumPerOct=60)
302 | if 'melody' in model_type:
303 | # Z, time, CenFreq, tfrL0, tfrLF, tfrLQ = feature_extraction(y, sr, Hop=hop, Window=768, StartFreq=20.0, StopFreq=2048.0, NumPerOct=60)
304 | raise NotImplementedError
305 |
306 | tfrL0 = norm(lognorm(tfrL0))[np.newaxis, :, :]
307 | tfrLF = norm(lognorm(tfrLF))[np.newaxis, :, :]
308 | tfrLQ = norm(lognorm(tfrLQ))[np.newaxis, :, :]
309 | W = np.concatenate((tfrL0, tfrLF, tfrLQ), axis=0)
310 | print('Done!')
311 | print('Data shape: ' + str(W.shape))
312 | if ypath:
313 | if csv:
314 | ycsv = pd.read_csv(ypath, names=["time", "freq"])
315 | gt0 = ycsv['time'].values
316 | gt0 = gt0[1:, np.newaxis]
317 |
318 | gt1 = ycsv['freq'].values
319 | gt1 = gt1[1:, np.newaxis]
320 | gt = np.concatenate((gt0, gt1), axis=1)
321 | else:
322 | gt = np.loadtxt(ypath)
323 | return W, gt, CenFreq, time
324 | else:
325 | return W, CenFreq, time
326 |
327 |
328 | if __name__ == '__main__':
329 | datasets = [config.train_file] + config.test_file
330 | data_dir = "/home/ken/Downloads/labels_and_waveform/"
331 | cfp_save_dir = "/home/ken/Downloads/cfp_saved/"
332 |
333 | print(datasets)
334 |
335 |
336 | # load VOICED version
337 | # load INSTRUMENTAL version
338 |
339 |
340 | for dataset_index, item in enumerate(datasets):
341 | txtpath = item
342 | f = open(txtpath)
343 | filelists = f.readlines()
344 |
345 | for i, file in enumerate(filelists):
346 |
347 | print(i)
348 | filename = file.rstrip('\n')
349 |
350 |
351 | if "_vocal_only" in filename:
352 | wavpath = data_dir + filename.replace('_vocal_only.npy', '.wav')
353 | mono = "Right"
354 | original_f0path = data_dir + filename.replace('_vocal_only.npy', '.txt')
355 |
356 | elif "_instrumental_only" in filename:
357 | wavpath = data_dir + filename.replace('_instrumental_only.npy', '.wav')
358 | original_f0path = data_dir + filename.replace('_instrumental_only.npy', '.txt')
359 | mono = "Left"
360 | else:
361 | wavpath = data_dir + filename.replace('.npy', '.wav')
362 | mono = True
363 |
364 |
365 | import shutil
366 | f0path = data_dir + filename.replace('.npy', '.txt')
367 | if "_vocal_only" in filename and not os.path.isfile(f0path):
368 | shutil.copyfile(original_f0path, f0path)
369 | elif "_instrumental_only" in filename and not os.path.isfile(f0path):
370 | ref_temp = np.loadtxt(original_f0path)
371 | ref_time = ref_temp[:, 0]
372 | empty_ref_freq = np.zeros(len(ref_time))
373 | np.savetxt(f0path, np.c_[ref_time, empty_ref_freq], fmt = "%.3f")
374 |
375 |
376 |
377 | magfile = cfp_save_dir + filename
378 | print(magfile)
379 |
380 |
381 | if not os.path.exists(f0path):
382 | raise Exception("Not f0 file!! for %s" %(f0path))
383 |
384 |
385 |
386 | successfully_loaded = False
387 | if os.path.exists(magfile):
388 | try:
389 | np.load(magfile)
390 | print("Exist:", filename)
391 | successfully_loaded = True
392 | except:
393 | pass
394 |
395 | if successfully_loaded == False:
396 | W, CenFreq, _ = cfp_process(wavpath, sr=fs, mono=mono)
397 |
398 | np.save(magfile, W)
399 |
--------------------------------------------------------------------------------
/ftanet.py:
--------------------------------------------------------------------------------
1 | # FTANet
2 | import torch
3 | import torch.nn as nn
4 | import torch.nn.functional as F
5 |
6 |
7 |
8 | class SF_Module(nn.Module):
9 | def __init__(self, input_num, n_channel, reduction, limitation):
10 | super(SF_Module, self).__init__()
11 | # Fuse Layer
12 | self.f_avg = nn.AdaptiveAvgPool2d((1,1))
13 | self.f_bn = nn.BatchNorm1d(n_channel)
14 | self.f_linear = nn.Sequential(
15 | nn.Linear(n_channel, max(n_channel // reduction, limitation)),
16 | nn.SELU()
17 | )
18 | # Select Layer
19 | self.s_linear = nn.ModuleList([
20 | nn.Linear(max(n_channel // reduction, limitation), n_channel) for _ in range(input_num)
21 | ])
22 |
23 |
24 | def forward(self, x):
25 | # x [3, bs, c, h, w]
26 | fused = None
27 | for x_s in x:
28 | if fused is None:
29 | fused = x_s
30 | else:
31 | fused = fused + x_s
32 | # [bs, c, h, w]
33 | fused = self.f_avg(fused) # bs,c,1,1
34 | fused = fused.view(fused.shape[0], fused.shape[1])
35 | fused = self.f_bn(fused)
36 | fused = self.f_linear(fused)
37 |
38 | masks = []
39 | for i in range(len(x)):
40 | masks.append(self.s_linear[i](fused))
41 | # [3, bs, c]
42 | mask_stack = torch.stack(masks, dim = -1) # bs, c, 3
43 | mask_stack = nn.Softmax(dim = -2)(mask_stack)
44 |
45 | selected = None
46 | for i, x_s in enumerate(x):
47 | mask = mask_stack[:, :, i][:,:, None, None] # bs,c,1,1
48 | x_s = x_s * mask
49 | if selected is None:
50 | selected = x_s
51 | else:
52 | selected = selected + x_s
53 | # [bs, c, h,w]
54 | return selected
55 |
56 |
57 |
58 | class FTA_Module(nn.Module):
59 | def __init__(self, shape, kt, kf):
60 | super(FTA_Module, self).__init__()
61 | self.bn = nn.BatchNorm2d(shape[2])
62 | self.r_cn = nn.Sequential(
63 | nn.Conv2d(shape[2], shape[3], (1,1)),
64 | nn.ReLU()
65 | )
66 | self.ta_cn1 = nn.Sequential(
67 | nn.Conv1d(shape[2], shape[3], kt, padding=(kt - 1) // 2),
68 | nn.SELU()
69 | )
70 | self.ta_cn2 = nn.Sequential(
71 | nn.Conv1d(shape[3], shape[3], kt, padding=(kt - 1) // 2),
72 | nn.SELU()
73 | )
74 | self.ta_cn3 = nn.Sequential(
75 | nn.Conv2d(shape[2], shape[3], 3, padding=1),
76 | nn.SELU()
77 | )
78 | self.ta_cn4 = nn.Sequential(
79 | nn.Conv2d(shape[3], shape[3], 5, padding=2),
80 | nn.SELU()
81 | )
82 |
83 | self.fa_cn1 = nn.Sequential(
84 | nn.Conv1d(shape[2], shape[3], kf, padding=(kf - 1) // 2),
85 | nn.SELU()
86 | )
87 | self.fa_cn2 = nn.Sequential(
88 | nn.Conv1d(shape[3], shape[3], kf, padding=(kf - 1) // 2),
89 | nn.SELU()
90 | )
91 | self.fa_cn3 = nn.Sequential(
92 | nn.Conv2d(shape[2], shape[3], 3, padding=1),
93 | nn.SELU()
94 | )
95 | self.fa_cn4 = nn.Sequential(
96 | nn.Conv2d(shape[3], shape[3], 5, padding=2),
97 | nn.SELU()
98 | )
99 |
100 | def forward(self, x):
101 | x = self.bn(x)
102 | x_r = self.r_cn(x)
103 |
104 | a_t = torch.mean(x, dim=-2)
105 | a_t = self.ta_cn1(a_t)
106 | a_t = self.ta_cn2(a_t)
107 | a_t = nn.Softmax(dim=-1)(a_t)
108 | a_t = a_t.unsqueeze(dim=-2)
109 | x_t = self.ta_cn3(x)
110 | x_t = self.ta_cn4(x_t)
111 | x_t = x_t * a_t
112 |
113 | a_f = torch.mean(x, dim=-1)
114 | a_f = self.fa_cn1(a_f)
115 | a_f = self.fa_cn2(a_f)
116 | a_f = nn.Softmax(dim=-1)(a_f)
117 | a_f = a_f.unsqueeze(dim=-1)
118 | x_f = self.fa_cn3(x)
119 | x_f = self.fa_cn4(x_f)
120 | x_f = x_f * a_f
121 |
122 | return x_r, x_t, x_f
123 |
124 |
125 | class FTAnet(nn.Module):
126 | def __init__(self, freq_bin = 360, time_segment = 128):
127 | super(FTAnet, self).__init__()
128 | self.bn_layer = nn.BatchNorm2d(3)
129 | # bm
130 | self.bm_layer = nn.Sequential(
131 | nn.Conv2d(3, 16, (4,1), stride=(4,1)),
132 | nn.SELU(),
133 | nn.Conv2d(16, 16, (3,1), stride=(3,1)),
134 | nn.SELU(),
135 | nn.Conv2d(16, 16, (6,1), stride=(6,1)),
136 | nn.SELU(),
137 | nn.Conv2d(16, 1, (5,1), stride=(5,1)),
138 | nn.SELU()
139 | )
140 |
141 | # fta_module
142 | self.fta_1 = FTA_Module((freq_bin, time_segment, 3, 32), 3, 3)
143 | self.fta_2 = FTA_Module((freq_bin // 2, time_segment // 2, 32, 64), 3, 3)
144 | self.fta_3 = FTA_Module((freq_bin // 4, time_segment // 4, 64, 128), 3, 3)
145 | self.fta_4 = FTA_Module((freq_bin // 4, time_segment // 4, 128, 128), 3, 3)
146 | self.fta_5 = FTA_Module((freq_bin // 2, time_segment // 2, 128, 64), 3, 3)
147 | self.fta_6 = FTA_Module((freq_bin, time_segment, 64, 32), 3, 3)
148 | self.fta_7 = FTA_Module((freq_bin, time_segment, 32, 1), 3, 3)
149 |
150 | # sf_module
151 | self.sf_1 = SF_Module(3, 32, 4, 4)
152 | self.sf_2 = SF_Module(3, 64, 4, 4)
153 | self.sf_3 = SF_Module(3, 128, 4, 4)
154 | self.sf_4 = SF_Module(3, 128, 4, 4)
155 | self.sf_5 = SF_Module(3, 64, 4, 4)
156 | self.sf_6 = SF_Module(3, 32, 4, 4)
157 | self.sf_7 = SF_Module(3, 1, 4, 4)
158 |
159 | # maxpool
160 | self.mp_1 = nn.MaxPool2d((2,2), (2,2))
161 | self.mp_2 = nn.MaxPool2d((2,2), (2,2))
162 | self.up_1 = nn.Upsample(scale_factor=2)
163 | self.up_2 = nn.Upsample(scale_factor=2)
164 |
165 | def forward(self, x):
166 | x = self.bn_layer(x)
167 | bm = x
168 | bm = self.bm_layer(bm)
169 |
170 | x_r, x_t, x_f = self.fta_1(x)
171 | x = self.sf_1([x_r, x_t, x_f])
172 | x = self.mp_1(x)
173 |
174 | x_r, x_t, x_f = self.fta_2(x)
175 | x = self.sf_2([x_r, x_t, x_f])
176 | x = self.mp_2(x)
177 |
178 | x_r, x_t, x_f = self.fta_3(x)
179 | x = self.sf_3([x_r, x_t, x_f])
180 |
181 | x_r, x_t, x_f = self.fta_4(x)
182 | x = self.sf_4([x_r, x_t, x_f])
183 |
184 | x = self.up_1(x)
185 | x_r, x_t, x_f = self.fta_5(x)
186 | x = self.sf_5([x_r, x_t, x_f])
187 | x = self.up_2(x)
188 | x_r, x_t, x_f = self.fta_6(x)
189 | x = self.sf_6([x_r, x_t, x_f])
190 |
191 | x_r, x_t, x_f = self.fta_7(x)
192 | x = self.sf_7([x_r, x_t, x_f])
193 |
194 | output_pre = torch.cat([bm, x], dim = 2)
195 | output = nn.Softmax(dim=-2)(output_pre)
196 |
197 | return output, output_pre
--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
1 | """
2 | Ke Chen knutchen@ucsd.edu
3 |
4 | Tone-Octave Network - main file
5 |
6 | This file contains the main script
7 |
8 | """
9 | import os
10 | import random
11 | import numpy as np
12 | import argparse
13 |
14 | import torch
15 | from torch import nn
16 | import torch.nn.functional as F
17 | from torch.utils.data import DataLoader
18 | import pytorch_lightning as pl
19 |
20 | import config
21 | from data_generator import TONetTrainDataset, TONetTestDataset
22 | from msnet import MSnet
23 | from tonet import TONet
24 | from multi_dr import MLDRnet
25 | from ftanet import FTAnet
26 | from mcdnn import MCDNN
27 |
28 | from util import tonpy_fn
29 |
30 |
31 | def train():
32 | train_dataset = TONetTrainDataset(
33 | data_list = config.train_file,
34 | config = config
35 | )
36 | train_dataloader = DataLoader(
37 | dataset = train_dataset,
38 | shuffle = True,
39 | num_workers = config.n_workers,
40 | batch_size = config.batch_size,
41 | drop_last=True
42 | )
43 | test_datasets = [
44 | TONetTestDataset(
45 | data_list = d,
46 | config = config
47 | ) for d in config.test_file
48 | ]
49 | test_dataloaders = [
50 | DataLoader(
51 | dataset = d,
52 | shuffle = False,
53 | batch_size = 1,
54 | collate_fn=tonpy_fn
55 | ) for d in test_datasets
56 | ]
57 | loss_func = nn.BCELoss()
58 |
59 | if config.model_type == "MCDNN":
60 | me_model = MCDNN()
61 | me_model_r = MCDNN()
62 | elif config.model_type == "MLDRNet":
63 | me_model = MLDRnet()
64 | me_model_r = MLDRnet()
65 | elif config.model_type == "FTANet":
66 | me_model = FTAnet(freq_bin = config.freq_bin, time_segment=config.seg_frame)
67 | me_model_r = FTAnet(freq_bin = config.freq_bin, time_segment=config.seg_frame)
68 | elif config.model_type == "MSNet":
69 | me_model = MSnet()
70 | me_model_r = MSnet()
71 | else: # AcousticModelCRnn8Dropout
72 | from piano_net import AcousticModelCRnn8Dropout
73 | me_model = AcousticModelCRnn8Dropout()
74 | me_model_r = AcousticModelCRnn8Dropout()
75 |
76 |
77 | if config.ablation_mode == "single" or config.ablation_mode == "spl" or config.ablation_mode == "spat":
78 | me_model_r = None
79 | model = TONet(
80 | l_model = me_model,
81 | r_model = me_model_r,
82 | config = config,
83 | loss_func = loss_func,
84 | mode = config.ablation_mode
85 | )
86 | trainer = pl.Trainer(
87 | # deterministic = True,
88 | gpus = 1,
89 | # checkpoint_callback = False,
90 | max_epochs = config.max_epoch,
91 | auto_lr_find = True,
92 | sync_batchnorm=True,
93 | # check_val_every_n_epoch = 1,
94 | # val_check_interval = 0.25,
95 | num_sanity_val_steps=0
96 | )
97 | trainer.fit(model, train_dataloader, test_dataloaders)
98 |
99 |
100 | def test():
101 | test_datasets = [
102 | TONetTestDataset(
103 | data_list = d,
104 | config = config
105 | ) for d in config.test_file
106 | ]
107 | test_dataloaders = [
108 | DataLoader(
109 | dataset = d,
110 | shuffle = False,
111 | batch_size = 1,
112 | collate_fn=tonpy_fn
113 | ) for d in test_datasets
114 | ]
115 | loss_func = nn.BCELoss()
116 |
117 | if config.model_type == "MCDNN":
118 | me_model = MCDNN()
119 | me_model_r = MCDNN()
120 | elif config.model_type == "MLDRNet":
121 | me_model = MLDRnet()
122 | me_model_r = MLDRnet()
123 | elif config.model_type == "FTANet":
124 | me_model = FTAnet(freq_bin = config.freq_bin, time_segment=config.seg_frame)
125 | me_model_r = FTAnet(freq_bin = config.freq_bin, time_segment=config.seg_frame)
126 | elif config.model_type == "MSNet":
127 | me_model = MSnet()
128 | me_model_r = MSnet()
129 | else: # AcousticModelCRnn8Dropout
130 | from piano_net import AcousticModelCRnn8Dropout
131 | me_model = AcousticModelCRnn8Dropout()
132 | me_model_r = AcousticModelCRnn8Dropout()
133 |
134 |
135 | if config.ablation_mode == "single" or config.ablation_mode == "spl" or config.ablation_mode == "spat":
136 | me_model_r = None
137 | model = TONet(
138 | l_model = me_model,
139 | r_model = me_model_r,
140 | config = config,
141 | loss_func = loss_func,
142 | mode = config.ablation_mode
143 | )
144 | trainer = pl.Trainer(
145 | # deterministic = True,
146 | gpus = 1,
147 | # checkpoint_callback = False,
148 | max_epochs = config.max_epoch,
149 | auto_lr_find = True,
150 | sync_batchnorm=True,
151 | # check_val_every_n_epoch = 1,
152 | # val_check_interval = 0.25,
153 | )
154 | # load the checkpoint
155 | ckpt = torch.load(config.resume_checkpoint, map_location="cpu")
156 | model.load_state_dict(ckpt)
157 | trainer.test(model, test_dataloaders)
158 |
159 |
160 | if __name__ == "__main__":
161 | parser = argparse.ArgumentParser(prog = "TONET for Singing Melody Extraction")
162 | subparsers = parser.add_subparsers(dest = "mode")
163 | parser_train = subparsers.add_parser("train")
164 | parser_test = subparsers.add_parser("test")
165 | args = parser.parse_args()
166 | pl.seed_everything(config.random_seed)
167 | if args.mode == "train":
168 | train()
169 | elif args.mode == "test":
170 | test()
171 |
172 |
--------------------------------------------------------------------------------
/mcdnn.py:
--------------------------------------------------------------------------------
1 | # MCDNN from https://github.com/LqNoob/MelodyExtraction-MCDNN/blob/master/MelodyExtraction_SCDNN.py
2 | import torch
3 | import torch.nn as nn
4 | import torch.nn.functional as F
5 | class MCDNN(nn.Module):
6 | def __init__(self):
7 | super(MCDNN, self).__init__()
8 |
9 | self.mcdnn = nn.Sequential(
10 | nn.Linear(360 * 3, 2048),
11 | nn.Dropout(0.2),
12 | nn.SELU(),
13 | nn.Linear(2048, 1024),
14 | nn.Dropout(0.2),
15 | nn.SELU(),
16 | nn.Linear(1024, 512),
17 | nn.Dropout(0.2),
18 | nn.SELU(),
19 | nn.Linear(512, 360)
20 | )
21 | self.bm_layer = nn.Sequential(
22 | nn.Linear(360 * 3, 512),
23 | nn.Dropout(0.2),
24 | nn.SELU(),
25 | nn.Linear(512, 128),
26 | nn.Dropout(0.2),
27 | nn.SELU(),
28 | nn.Linear(128, 1),
29 | nn.SELU()
30 | )
31 |
32 | def forward(self, x):
33 | # [bs, 3, f, t]
34 | x = x.view(x.shape[0], -1, x.shape[-1])
35 | x = x.permute(0,2,1) # [bs, t, f * 3]
36 | output_pre = self.mcdnn(x)
37 | bm = self.bm_layer(x)
38 | output_pre = output_pre.permute(0,2,1)
39 | output_pre = output_pre.unsqueeze(dim=1)
40 | bm = bm.permute(0,2,1)
41 | bm = bm.unsqueeze(dim=1)
42 | output_pre = torch.cat((bm, output_pre), dim=2)
43 | output = nn.Softmax(dim=2)(output_pre)
44 |
45 | return output, output_pre
--------------------------------------------------------------------------------
/msnet.py:
--------------------------------------------------------------------------------
1 | # MSNEt from https://github.com/bill317996/Melody-extraction-with-melodic-segnet/blob/master/MSnet/model.py
2 | # We only use the vocal version
3 | import torch
4 | import torch.nn as nn
5 | import torch.nn.functional as F
6 | class MSnet(nn.Module):
7 | def __init__(self):
8 | super(MSnet, self).__init__()
9 |
10 | self.conv1 = nn.Sequential(
11 | nn.BatchNorm2d(3),
12 | nn.Conv2d(3, 32, 5, padding=2),
13 | nn.SELU()
14 | )
15 | self.pool1 = nn.MaxPool2d((4,1), return_indices=True)
16 |
17 | self.conv2 = nn.Sequential(
18 | nn.BatchNorm2d(32),
19 | nn.Conv2d(32, 64, 5, padding=2),
20 | nn.SELU()
21 | )
22 | self.pool2 = nn.MaxPool2d((3,1), return_indices=True)
23 |
24 | self.conv3 = nn.Sequential(
25 | nn.BatchNorm2d(64),
26 | nn.Conv2d(64, 128, 5, padding=2),
27 | nn.SELU()
28 | )
29 | self.pool3 = nn.MaxPool2d((6,1), return_indices=True)
30 |
31 | self.bottom = nn.Sequential(
32 | nn.BatchNorm2d(128),
33 | nn.Conv2d(128, 1, 5, padding=(0,2)),
34 | nn.SELU()
35 | )
36 |
37 | self.up_pool3 = nn.MaxUnpool2d((6,1))
38 | self.up_conv3 = nn.Sequential(
39 | nn.BatchNorm2d(128),
40 | nn.Conv2d(128, 64, 5, padding=2),
41 | nn.SELU()
42 | )
43 |
44 | self.up_pool2 = nn.MaxUnpool2d((3,1))
45 | self.up_conv2 = nn.Sequential(
46 | nn.BatchNorm2d(64),
47 | nn.Conv2d(64, 32, 5, padding=2),
48 | nn.SELU()
49 | )
50 |
51 | self.up_pool1 = nn.MaxUnpool2d((4,1))
52 | self.up_conv1 = nn.Sequential(
53 | nn.BatchNorm2d(32),
54 | nn.Conv2d(32, 1, 5, padding=2),
55 | nn.SELU()
56 | )
57 |
58 | self.softmax = nn.Softmax(dim=2)
59 |
60 | def forward(self, x):
61 | c1, ind1 = self.pool1(self.conv1(x))
62 | c2, ind2 = self.pool2(self.conv2(c1))
63 | c3, ind3 = self.pool3(self.conv3(c2))
64 | bm = self.bottom(c3)
65 | u3 = self.up_conv3(self.up_pool3(c3, ind3))
66 | u2 = self.up_conv2(self.up_pool2(u3, ind2))
67 | u1 = self.up_conv1(self.up_pool1(u2, ind1))
68 | output_pre = torch.cat((bm, u1), dim=2)
69 | output = self.softmax(torch.cat((bm, u1), dim=2))
70 | # output = torch.cat((bm, u1), dim=2)
71 |
72 | return output, output_pre
--------------------------------------------------------------------------------
/multi_dr.py:
--------------------------------------------------------------------------------
1 | # Multi-Dilation Model by self-implementation
2 | import torch
3 | import torch.nn as nn
4 | import torch.nn.functional as F
5 | class MLDRnet(nn.Module):
6 | def __init__(self, freq_bin = 360):
7 | super(MLDRnet, self).__init__()
8 |
9 | # Encoder
10 | self.encoder_bn = nn.BatchNorm2d(3)
11 | self.encoder_c2_1 = nn.Conv2d(3, 3, 3, padding=1, stride=2)
12 | self.encoder_c3_1 = nn.Conv2d(3, 3, 3, padding=1, stride=2)
13 |
14 | self.encoder_c1_1 = nn.Conv2d(10, 10, 3, padding=1, stride=2)
15 | self.encoder_c1_2 = nn.Conv2d(10, 10, 3, padding=1, stride=2)
16 |
17 | self.encoder_c2_2 = nn.ConvTranspose2d(10, 10, 1, output_padding=1, stride=2)
18 | self.encoder_c2_3 = nn.Conv2d(10, 10, 3, padding=1, stride=2)
19 |
20 | self.encoder_c3_2 = nn.ConvTranspose2d(10, 10, 1, output_padding=1, stride=2)
21 | self.encoder_c3_3 = nn.ConvTranspose2d(10, 10, 1, output_padding=1, stride=2)
22 |
23 | self.encoder_c2_4 = nn.ConvTranspose2d(10, 10, 1, output_padding=1, stride=2)
24 | self.encoder_c3_4 = nn.ConvTranspose2d(10, 10, 1, output_padding=1, stride=2)
25 | self.encoder_c3_5 = nn.ConvTranspose2d(10, 10, 1, output_padding=1, stride=2)
26 |
27 | self.encoder_final = nn.Conv2d(30, 10, 1)
28 |
29 | # Decoder
30 | self.decoder_bn = nn.BatchNorm2d(10)
31 | self.decoder_c1 = nn.Sequential(
32 | nn.Conv2d(10, 10, 3, padding=1),
33 | nn.SELU()
34 | )
35 |
36 | self.decoder_bm = nn.Sequential(
37 | nn.AvgPool2d((freq_bin, 1)),
38 | nn.BatchNorm2d(10),
39 | nn.Conv2d(10, 1, 3, padding=1),
40 | nn.SELU()
41 | )
42 |
43 | self.decoder_final = nn.Sequential(
44 | nn.BatchNorm2d(10),
45 | nn.Conv2d(10, 10, 3, padding=1),
46 | nn.SELU(),
47 | nn.Conv2d(10, 1, 3, padding=1),
48 | nn.SELU()
49 | )
50 | # Multi-Dilation ModuleList
51 | self.md_bn_1 = nn.ModuleList([
52 | nn.BatchNorm2d(3),
53 | nn.BatchNorm2d(3),
54 | nn.BatchNorm2d(3),
55 | nn.BatchNorm2d(30),
56 | nn.BatchNorm2d(30),
57 | nn.BatchNorm2d(30)
58 | ])
59 | self.md_bn_2 = nn.ModuleList([
60 | nn.BatchNorm2d(13),
61 | nn.BatchNorm2d(13),
62 | nn.BatchNorm2d(13),
63 | nn.BatchNorm2d(40),
64 | nn.BatchNorm2d(40),
65 | nn.BatchNorm2d(40)
66 | ])
67 | self.md_bn_3 = nn.ModuleList([
68 | nn.BatchNorm2d(23),
69 | nn.BatchNorm2d(23),
70 | nn.BatchNorm2d(23),
71 | nn.BatchNorm2d(50),
72 | nn.BatchNorm2d(50),
73 | nn.BatchNorm2d(50)
74 | ])
75 | self.md_c1 = nn.ModuleList([
76 | nn.Conv2d(3, 10, 3, padding=3, dilation=3),
77 | nn.Conv2d(3, 10, 3, padding=3, dilation=3),
78 | nn.Conv2d(3, 10, 3, padding=3, dilation=3),
79 | nn.Conv2d(30, 10, 3, padding=3, dilation=3),
80 | nn.Conv2d(30, 10, 3, padding=3, dilation=3),
81 | nn.Conv2d(30, 10, 3, padding=3, dilation=3)
82 | ])
83 | self.md_c2 = nn.ModuleList([
84 | nn.Conv2d(13, 10, 3, padding=6, dilation=6),
85 | nn.Conv2d(13, 10, 3, padding=6, dilation=6),
86 | nn.Conv2d(13, 10, 3, padding=6, dilation=6),
87 | nn.Conv2d(40, 10, 3, padding=6, dilation=6),
88 | nn.Conv2d(40, 10, 3, padding=6, dilation=6),
89 | nn.Conv2d(40, 10, 3, padding=6, dilation=6)
90 |
91 | ])
92 | self.md_c3 = nn.ModuleList([
93 | nn.Conv2d(23, 10, 3, padding=6, dilation=6),
94 | nn.Conv2d(23, 10, 3, padding=6, dilation=6),
95 | nn.Conv2d(23, 10, 3, padding=6, dilation=6),
96 | nn.Conv2d(50, 10, 3, padding=6, dilation=6),
97 | nn.Conv2d(50, 10, 3, padding=6, dilation=6),
98 | nn.Conv2d(50, 10, 3, padding=6, dilation=6)
99 | ])
100 | self.md_act1 = nn.SELU()
101 | self.md_act2 = nn.SELU()
102 | self.md_act3 = nn.SELU()
103 |
104 | self.softmax = nn.Softmax(dim=2)
105 |
106 | def encoder(self, x):
107 | x = self.encoder_bn(x)
108 | f1 = x
109 | f2 = self.encoder_c2_1(f1)
110 | f3 = self.encoder_c3_1(f2)
111 | # print("f1 f2 f3:", f1.shape, f2.shape, f3.shape)
112 | f1 = self.multi_dilation(f1, 0)
113 | f2 = self.multi_dilation(f2, 1)
114 | f3 = self.multi_dilation(f3, 2)
115 | # print("f1 f2 f3:", f1.shape, f2.shape, f3.shape)
116 |
117 | f1_2 = self.encoder_c1_1(f1)
118 | f1_3 = self.encoder_c1_2(f1_2)
119 | # print("f1_3", f1_3.shape)
120 |
121 | f2_1 = self.encoder_c2_2(f2)
122 | f2_3 = self.encoder_c2_3(f2)
123 | # print("f2_1 f2_3", f2_1.shape, f2_3.shape)
124 |
125 | f3_2 = self.encoder_c3_2(f3)
126 | f3_1 = self.encoder_c3_3(f3_2)
127 | # print("f3_2 f3_1", f3_2.shape, f3_1.shape)
128 |
129 | f1 = torch.cat([f1, f2_1, f3_1], dim = 1)
130 | f2 = torch.cat([f2, f1_2, f3_2], dim = 1)
131 | f3 = torch.cat([f3, f1_3, f2_3], dim = 1)
132 | # print("f1 f2 f3:", f1.shape, f2.shape, f3.shape)
133 |
134 | f1 = self.multi_dilation(f1, 3)
135 | f2 = self.multi_dilation(f2, 4)
136 | f3 = self.multi_dilation(f3, 5)
137 | # print("f1 f2 f3:", f1.shape, f2.shape, f3.shape)
138 |
139 | f2 = self.encoder_c2_4(f2)
140 | f3 = self.encoder_c3_4(f3)
141 | f3 = self.encoder_c3_5(f3)
142 | # print("f1 f2 f3:", f1.shape, f2.shape, f3.shape)
143 | final_x = torch.cat([f1, f2, f3], dim = 1)
144 | final_x = self.encoder_final(final_x)
145 | # print("final_x:", final_x.shape)
146 | return final_x
147 |
148 | def decoder(self, x):
149 | x = self.decoder_bn(x)
150 | x = self.decoder_c1(x)
151 |
152 | bm = self.decoder_bm(x)
153 | # print("bm:", bm.shape)
154 |
155 | final_x = self.decoder_final(x)
156 | final_x = torch.cat([bm, final_x], dim = -2)
157 | # print("final_x", final_x.shape)
158 | return final_x, bm
159 |
160 |
161 | def multi_dilation(self, x, i):
162 | x0 = x
163 | x1 = self.md_bn_1[i](x0)
164 | x1 = self.md_c1[i](x1)
165 | x1 = self.md_act1(x1)
166 | # print("x1:", x1.shape)
167 |
168 | x2 = torch.cat([x0, x1], dim = 1)
169 | x2 = self.md_bn_2[i](x2)
170 | x2 = self.md_c2[i](x2)
171 | x2 = self.md_act2(x2)
172 | # print("x2:", x2.shape)
173 |
174 | x3 = torch.cat([x0, x1, x2], dim = 1)
175 | x3 = self.md_bn_3[i](x3)
176 | x3 = self.md_c3[i](x3)
177 | x3 = self.md_act3(x3)
178 | # print("x3:", x3.shape)
179 |
180 | return x3
181 |
182 | def forward(self, x):
183 | x = self.encoder(x)
184 | output_pre, bm = self.decoder(x)
185 | output = self.softmax(output_pre)
186 | # print("output bm:", output.shape, bm.shape)
187 | # exit()
188 | return output, output_pre
--------------------------------------------------------------------------------
/piano_net.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 | import math
4 | import time
5 | import numpy as np
6 | import torch
7 | import torch.nn as nn
8 | import torch.nn.functional as F
9 |
10 | def init_layer(layer):
11 | """Initialize a Linear or Convolutional layer. """
12 | nn.init.xavier_uniform_(layer.weight)
13 |
14 | if hasattr(layer, 'bias'):
15 | if layer.bias is not None:
16 | layer.bias.data.fill_(0.)
17 |
18 |
19 | def init_bn(bn):
20 | """Initialize a Batchnorm layer. """
21 | bn.bias.data.fill_(0.)
22 | bn.weight.data.fill_(1.)
23 |
24 |
25 | def init_gru(rnn):
26 | """Initialize a GRU layer. """
27 |
28 | def _concat_init(tensor, init_funcs):
29 | (length, fan_out) = tensor.shape
30 | fan_in = length // len(init_funcs)
31 |
32 | for (i, init_func) in enumerate(init_funcs):
33 | init_func(tensor[i * fan_in : (i + 1) * fan_in, :])
34 |
35 | def _inner_uniform(tensor):
36 | fan_in = nn.init._calculate_correct_fan(tensor, 'fan_in')
37 | nn.init.uniform_(tensor, -math.sqrt(3 / fan_in), math.sqrt(3 / fan_in))
38 |
39 | for i in range(rnn.num_layers):
40 | _concat_init(
41 | getattr(rnn, 'weight_ih_l{}'.format(i)),
42 | [_inner_uniform, _inner_uniform, _inner_uniform]
43 | )
44 | torch.nn.init.constant_(getattr(rnn, 'bias_ih_l{}'.format(i)), 0)
45 |
46 | _concat_init(
47 | getattr(rnn, 'weight_hh_l{}'.format(i)),
48 | [_inner_uniform, _inner_uniform, nn.init.orthogonal_]
49 | )
50 | torch.nn.init.constant_(getattr(rnn, 'bias_hh_l{}'.format(i)), 0)
51 |
52 |
53 | class ConvBlock(nn.Module):
54 | def __init__(self, in_channels, out_channels, momentum):
55 |
56 | super(ConvBlock, self).__init__()
57 |
58 | self.conv1 = nn.Conv2d(in_channels=in_channels,
59 | out_channels=out_channels,
60 | kernel_size=(3, 5), stride=(1, 1),
61 | padding=(0, 1), bias=False)
62 |
63 | self.conv2 = nn.Conv2d(in_channels=out_channels,
64 | out_channels=out_channels,
65 | kernel_size=(3, 3), stride=(1, 1),
66 | padding=(0, 1), bias=False)
67 |
68 | self.bn1 = nn.BatchNorm2d(out_channels, momentum)
69 | self.bn2 = nn.BatchNorm2d(out_channels, momentum)
70 |
71 | self.init_weight()
72 |
73 | def init_weight(self):
74 | init_layer(self.conv1)
75 | init_layer(self.conv2)
76 | init_bn(self.bn1)
77 | init_bn(self.bn2)
78 |
79 |
80 | def forward(self, input, pool_size=(2, 2), pool_type='avg'):
81 | """
82 | Args:
83 | input: (batch_size, in_channels, time_steps, freq_bins)
84 |
85 | Outputs:
86 | output: (batch_size, out_channels, classes_num)
87 | """
88 |
89 | x = F.relu_(self.bn1(self.conv1(input)))
90 | x = F.relu_(self.bn2(self.conv2(x)))
91 |
92 | # x = F.selu(self.conv1(input))
93 | # x = F.selu(self.conv2(x))
94 |
95 | if pool_type == 'avg':
96 | x = F.avg_pool2d(x, kernel_size=pool_size)
97 |
98 | return x
99 |
100 |
101 | if torch.cuda.is_available():
102 | device = torch.device("cuda")
103 | print("Using cuda")
104 | else:
105 | device = torch.device("cpu")
106 | print("Using cpu")
107 |
108 |
109 | from config import include_model_tweak
110 | class AcousticModelCRnn8Dropout(nn.Module):
111 | def __init__(self, classes_num = 361, midfeat = 2560, momentum = 0.01):
112 | super(AcousticModelCRnn8Dropout, self).__init__()
113 |
114 | self.conv_block1 = ConvBlock(in_channels=3, out_channels=48, momentum=momentum)
115 | self.conv_block2 = ConvBlock(in_channels=48, out_channels=64, momentum=momentum)
116 | self.conv_block3 = ConvBlock(in_channels=64, out_channels=96, momentum=momentum)
117 | self.conv_block4 = ConvBlock(in_channels=96, out_channels=128, momentum=momentum)
118 |
119 | self.fc5 = nn.Linear(midfeat, 768, bias=False)
120 | self.bn5 = nn.BatchNorm1d(768, momentum=momentum)
121 |
122 | self.gru = nn.GRU(input_size=768, hidden_size=256, num_layers=2,
123 | bias=True, batch_first=True, dropout=0., bidirectional=True)
124 |
125 | self.fc = nn.Linear(512, classes_num, bias=True)
126 |
127 | self.sfmax = torch.nn.Softmax(dim = 2)
128 | self.init_weight()
129 |
130 | def init_weight(self):
131 | init_layer(self.fc5)
132 | init_bn(self.bn5)
133 | init_gru(self.gru)
134 | init_layer(self.fc)
135 |
136 | def forward(self, x):
137 | """
138 | Args:
139 | input: (batch_size, channels_num, time_steps, freq_bins)
140 |
141 | Outputs:
142 | output: (batch_size, time_steps, classes_num)
143 | """
144 |
145 | x = self.conv_block1(x.transpose(2,3), pool_size=(1, 2), pool_type='avg')
146 | x = F.dropout(x, p=0.2, training=self.training)
147 | x = self.conv_block2(x, pool_size=(1, 2), pool_type='avg')
148 | x = F.dropout(x, p=0.2, training=self.training)
149 | x = self.conv_block3(x, pool_size=(1, 2), pool_type='avg')
150 | x = F.dropout(x, p=0.2, training=self.training)
151 | x = self.conv_block4(x, pool_size=(1, 2), pool_type='avg')
152 | x = F.dropout(x, p=0.2, training=self.training)
153 |
154 | x = x.transpose(1, 2).flatten(2)
155 |
156 | x = F.relu(self.bn5(self.fc5(x).transpose(1, 2)).transpose(1, 2))
157 | x = F.dropout(x, p=0.5, training=self.training, inplace=False)
158 |
159 | (x, _) = self.gru(x)
160 | x = F.dropout(x, p=0.5, training=self.training, inplace=False)
161 | x = self.fc(x)
162 |
163 |
164 | if include_model_tweak:
165 | # x[:, :, -1] = np.log(x.shape[-1]) - x[:, :, -1]
166 | # x[:, :, 0] = np.log(49*(x.shape[-1]-1)) - x[:, :, 0]
167 | x[:, :, 0] = 1 - x[:, :, 0]
168 |
169 |
170 | output = self.sfmax(x)
171 | return output.transpose(1,2), None
172 |
173 |
174 |
175 |
176 | if __name__ == "__main__":
177 | # (batch_size, 3 -> CFP, 360 -> FREQ_BINS, 144 -> TIME_STEPS)
178 | x = torch.randn(2, 3, 360, 144, device = device)
179 | print(AcousticModelCRnn8Dropout().to(device)(x)[0].shape)
180 |
181 |
--------------------------------------------------------------------------------
/tonet.py:
--------------------------------------------------------------------------------
1 | """
2 | Ke Chen knutchen@ucsd.edu
3 |
4 | Tone-Octave Network - model
5 |
6 | This file contains the TONet core code
7 |
8 | """
9 | import os
10 | import numpy as np
11 | import torch
12 | from torch import nn
13 | import torch.nn.functional as F
14 | from torchvision import transforms
15 | from torchvision.datasets import MNIST
16 | from torch.utils.data import DataLoader, random_split
17 | import pytorch_lightning as pl
18 |
19 | from util import melody_eval, freq2octave, freq2tone, tofreq
20 | from attention_layer import CombineLayer, PositionalEncoding
21 | from feature_extraction import get_CenFreq
22 |
23 |
24 | class TONet(pl.LightningModule):
25 | """
26 | Args:
27 | mode: ["disable", "enable"]
28 | """
29 | def __init__(self, l_model, r_model, config, loss_func, mode = "single"):
30 | super().__init__()
31 | self.config = config
32 | # l_model for original-CFP
33 | self.l_model = l_model
34 | # r_model for Tone-CFP
35 | self.r_model = r_model
36 | self.mode = mode
37 | self.centf = np.array(get_CenFreq(config.startfreq, config.stopfreq, config.octave_res))
38 | self.centf[0] = 0
39 | self.loss_func = loss_func
40 | self.max_metric = np.zeros((3, 6))
41 | if self.mode == "all" or self.mode == "tcfp":
42 | assert r_model is not None, "Enabling TONet needs two-branch models!"
43 |
44 |
45 | self.gru_dim = 512
46 | self.attn_dim = 2048
47 | # define hyperparameter
48 | if self.mode == "tcfp":
49 | self.sp_dim = self.config.freq_bin * 2
50 | self.linear_dim = self.config.freq_bin * 2
51 | elif self.mode == "spl":
52 | self.sp_dim = self.config.freq_bin
53 | self.linear_dim = self.gru_dim * 2
54 | elif self.mode == "spat":
55 | self.sp_dim = self.config.freq_bin
56 | self.linear_dim = self.attn_dim
57 | elif self.mode == "all":
58 | self.sp_dim = self.config.freq_bin * 2
59 | self.linear_dim = self.attn_dim
60 |
61 | # Network Architecture
62 | if self.mode == "spl":
63 | self.tone_gru = nn.Linear(self.sp_dim, self.linear_dim)
64 | # nn.GRU(
65 | # self.sp_dim, self.gru_dim, 1,
66 | # batch_first=True, bidirectional=True
67 | # )
68 | self.octave_gru = nn.Linear(self.sp_dim, self.linear_dim)
69 | # nn.GRU(
70 | # self.sp_dim, self.gru_dim, 1,
71 | # batch_first=True, bidirectional=True
72 | # )
73 | elif self.mode == "spat" or self.mode == "all":
74 | self.tone_in = nn.Linear(self.sp_dim, self.attn_dim)
75 | self.tone_posenc = PositionalEncoding(self.attn_dim, n_position = self.config.seg_frame)
76 | self.tone_dropout = nn.Dropout(p = 0.2)
77 | self.tone_norm = nn.LayerNorm(self.attn_dim, eps = 1e-6)
78 | self.tone_attn = nn.ModuleList([
79 | CombineLayer(self.attn_dim, self.attn_dim * 2, 8,
80 | self.attn_dim // 8, self.attn_dim // 8, dropout = 0.2)
81 | for _ in range(2)]
82 | )
83 | self.octave_in = nn.Linear(self.sp_dim, self.attn_dim)
84 | self.octave_posenc = PositionalEncoding(self.attn_dim, n_position = self.config.seg_frame)
85 | self.octave_dropout = nn.Dropout(p = 0.2)
86 | self.octave_norm = nn.LayerNorm(self.attn_dim, eps = 1e-6)
87 | self.octave_attn = nn.ModuleList([
88 | CombineLayer(self.attn_dim, self.attn_dim * 2, 8,
89 | self.attn_dim // 8, self.attn_dim // 8, dropout = 0.2)
90 | for _ in range(2)]
91 | )
92 | if self.mode != "single" and self.mode != "tcfp":
93 | self.tone_linear = nn.Sequential(
94 | nn.Linear(self.linear_dim, 512),
95 | nn.Dropout(p = 0.2),
96 | nn.SELU(),
97 | nn.Linear(512, 128),
98 | nn.Dropout(p = 0.2),
99 | nn.SELU(),
100 | nn.Linear(128, self.config.tone_class),
101 | nn.Dropout(p = 0.2),
102 | nn.SELU()
103 | )
104 | self.octave_linear = nn.Sequential(
105 | nn.Linear(self.linear_dim, 256),
106 | nn.Dropout(p = 0.2),
107 | nn.SELU(),
108 | nn.Linear(256, 64),
109 | nn.Dropout(p = 0.2),
110 | nn.SELU(),
111 | nn.Linear(64, self.config.octave_class),
112 | nn.Dropout(p = 0.2),
113 | nn.SELU()
114 | )
115 | self.tone_bm = nn.Sequential(
116 | nn.Linear(2, 1),
117 | nn.SELU()
118 | )
119 | self.octave_bm = nn.Sequential(
120 | nn.Linear(2, 1),
121 | nn.SELU()
122 | )
123 | # [bs, 361 + 13 + 9, 128]
124 | self.tcfp_linear = nn.Sequential(
125 | nn.Conv1d(self.config.freq_bin * 2, self.config.freq_bin,
126 | 5, padding=2),
127 | nn.SELU()
128 | )
129 | self.tcfp_bm = nn.Sequential(
130 | nn.Conv1d(2,1,5,padding=2),
131 | nn.SELU()
132 | )
133 | self.final_linear = nn.Sequential(
134 | nn.Conv1d(
135 | self.config.tone_class + self.config.octave_class + self.config.freq_bin + 3,
136 | self.config.freq_bin, 5, padding=2),
137 | nn.SELU()
138 | )
139 | elif self.mode == "tcfp":
140 | self.final_linear = nn.Sequential(
141 | nn.Linear(self.linear_dim, self.config.freq_bin),
142 | nn.SELU()
143 | )
144 | self.final_bm = nn.Sequential(
145 | nn.Linear(2, 1),
146 | nn.SELU()
147 | )
148 | """
149 | Args:
150 | x: [bs, 3, freuqncy_bin, time_frame]
151 | """
152 | def tone_decoder(self, tone_feature):
153 | if self.mode == "all" or self.mode == "spat":
154 | tone_h = self.tone_dropout(self.tone_posenc(self.tone_in(tone_feature)))
155 | tone_h = self.tone_norm(tone_h)
156 | for tone_layer in self.tone_attn:
157 | tone_h, tone_weight = tone_layer(tone_h, slf_attn_mask = None)
158 | tone_prob = self.tone_linear(tone_h)
159 | tone_prob = tone_prob.permute(0, 2, 1).contiguous()
160 | elif self.mode == "spl":
161 | tone_h = self.tone_gru(tone_feature)
162 | tone_prob = self.tone_linear(tone_h)
163 | tone_prob = tone_prob.permute(0, 2, 1).contiguous()
164 | return tone_prob
165 |
166 | def octave_decoder(self, octave_feature):
167 | if self.mode == "all" or self.mode == "spat":
168 | octave_h = self.octave_dropout(self.octave_posenc(self.octave_in(octave_feature)))
169 | octave_h = self.octave_norm(octave_h)
170 | for octave_layer in self.octave_attn:
171 | octave_h, octave_weight = octave_layer(octave_h, slf_attn_mask = None)
172 | octave_prob = self.octave_linear(octave_h)
173 | octave_prob = octave_prob.permute(0, 2, 1).contiguous()
174 | elif self.mode == "spl":
175 | octave_h = self.octave_gru(octave_feature)
176 | octave_prob = self.octave_linear(octave_h)
177 | octave_prob = octave_prob.permute(0, 2, 1).contiguous()
178 | return octave_prob
179 |
180 |
181 | def forward(self, x, tx = None):
182 | if self.mode == "single":
183 | output, _ = self.l_model(x)
184 | return output
185 | elif self.mode == "all":
186 | _, output_l = self.l_model(x)
187 | _, output_r = self.r_model(tx)
188 | bm_l = output_l[:, :, 0, :].unsqueeze(dim = 2)
189 | output_l = output_l[:,:, 1:,:]
190 | bm_r = output_r[:, :, 0, :].unsqueeze(dim = 2)
191 | output_r = output_r[:,:, 1:,:]
192 | feature_agg = torch.cat((output_l, output_r), dim = 2)
193 | feature_agg = feature_agg.squeeze(dim = 1)
194 | feature_agg_mi = self.tcfp_linear(feature_agg) # [bs, 360, 128]
195 | bm_agg = torch.cat((bm_l, bm_r), dim = 2)
196 | bm_agg = bm_agg.squeeze(dim = 1)
197 | bm_agg_mi = self.tcfp_bm(bm_agg)
198 | bm_agg = bm_agg.permute(0,2,1)
199 | tone_feature = feature_agg.permute(0,2,1).contiguous()
200 | octave_feature = feature_agg.permute(0,2,1).contiguous()
201 | tone_prob = self.tone_decoder(tone_feature)
202 | octave_prob = self.octave_decoder(octave_feature)
203 |
204 | tone_bm = self.tone_bm(bm_agg)
205 | octave_bm = self.octave_bm(bm_agg)
206 | tone_bm = tone_bm.permute(0,2,1)
207 | octave_bm = octave_bm.permute(0,2,1)
208 |
209 | tone_prob = torch.cat((tone_prob, tone_bm), dim = 1)
210 | octave_prob = torch.cat((octave_prob, octave_bm), dim = 1)
211 |
212 | final_feature = torch.cat((tone_prob, octave_prob, feature_agg_mi, bm_agg_mi), dim = 1)
213 | final_feature = self.final_linear(final_feature)
214 | final_feature = torch.cat((bm_agg_mi, final_feature), dim=1)
215 | final_feature = nn.Softmax(dim = 1)(final_feature)
216 | tone_prob = nn.Softmax(dim = 1)(tone_prob)
217 | octave_prob = nn.Softmax(dim = 1)(octave_prob)
218 | return tone_prob, octave_prob, final_feature
219 | elif self.mode == "tcfp":
220 | _, output_l = self.l_model(x)
221 | _, output_r = self.r_model(tx)
222 | bm_l = output_l[:, :, 0, :].unsqueeze(dim = 2)
223 | output_l = output_l[:,:, 1:,:]
224 | bm_r = output_r[:, :, 0, :].unsqueeze(dim = 2)
225 | output_r = output_r[:,:, 1:,:]
226 | feature_agg = torch.cat((output_l, output_r), dim = 2)
227 | feature_agg = feature_agg.permute(0, 1, 3, 2)
228 | bm_agg = torch.cat((bm_l, bm_r), dim = 2)
229 | bm_agg = bm_agg.permute(0, 1, 3, 2)
230 | final_x = self.final_linear(feature_agg)
231 | final_bm = self.final_bm(bm_agg)
232 | final_x = final_x.permute(0,1,3,2)
233 | final_bm = final_bm.permute(0,1,3,2)
234 | final_output = nn.Softmax(dim = 2)(torch.cat((final_bm, final_x), dim = 2))
235 | return final_output
236 | elif self.mode == "spl" or self.mode == "spat":
237 | _, output_l = self.l_model(x)
238 | bm_l = output_l[:, :, 0, :].unsqueeze(dim = 2)
239 | output_l = output_l[:,:, 1:,:]
240 | feature_agg = output_l
241 | feature_agg = feature_agg.squeeze(dim = 1)
242 | bm_agg = bm_l
243 | bm_agg = bm_agg.squeeze(dim = 1)
244 | tone_feature = feature_agg.permute(0,2,1).contiguous()
245 | octave_feature = feature_agg.permute(0,2,1).contiguous()
246 | tone_prob = self.tone_decoder(tone_feature)
247 | octave_prob = self.octave_decoder(octave_feature)
248 | tone_bm = bm_agg
249 | octave_bm = bm_agg
250 |
251 | tone_prob = torch.cat((tone_prob, tone_bm), dim = 1)
252 | octave_prob = torch.cat((octave_prob, octave_bm), dim = 1)
253 |
254 | final_feature = torch.cat((tone_prob, octave_prob, feature_agg, bm_agg), dim = 1)
255 | final_feature = self.final_linear(final_feature)
256 | final_feature = torch.cat((bm_agg, final_feature), dim=1)
257 | final_feature = nn.Softmax(dim = 1)(final_feature)
258 | tone_prob = nn.Softmax(dim = 1)(tone_prob)
259 | octave_prob = nn.Softmax(dim = 1)(octave_prob)
260 | return tone_prob, octave_prob, final_feature
261 | """
262 | Args:
263 | batch: {
264 | "cfp": [bs, 3, frequency_bin, time_frame],
265 | "gd": [bs, time_frame]
266 | }
267 | """
268 | def training_step(self, batch, batch_idx):
269 | device_type = next(self.parameters()).device
270 |
271 | # gds (batch_size, time_steps)
272 | # cfps (batch_size, channel_num, freq_bins, time_steps)
273 |
274 | cfps = batch["cfp"]
275 | tcfps = batch["tcfp"]
276 | gds = batch["gd"]
277 | if self.mode == "single":
278 | # gd_maps = torch.zeros((cfps.shape[0], cfps.shape[-2] + 1, cfps.shape[-1])).to( device_type)
279 | gd_maps = torch.zeros((cfps.shape[0], cfps.shape[-2] + 1, gds.shape[-1])).to( device_type)
280 |
281 | # for each item in batch
282 | # switch to 0.98 and 0.02/rest
283 | for i in range(len(gds)):
284 | # gd_maps[i, gds[i].long(), torch.arange(gds.shape[-1])] = 1.0
285 | gd_maps[i, gds[i].long(), torch.arange(gds.shape[-1])] = 0.98
286 | gd_maps[gd_maps == 0] = 0.02/(gd_maps.shape[1]-1)
287 | # print(gd_maps.shape, (gd_maps.shape[1]-1))
288 | # sys.exit()
289 | # print(torch.sum(gd_maps[0, :, 0]))
290 | # print("\n\n\n\n\n")
291 | # sys.exit()
292 |
293 | # print(i, gds[i].long(), gds.shape[-1])
294 |
295 | # print(gds.shape, gd_maps.shape, cfps.shape)
296 | # sys.exit()
297 |
298 |
299 | output = self(cfps)
300 | output = torch.squeeze(output, dim = 1)
301 | loss = self.loss_func(output, gd_maps)
302 |
303 | # now add the polynomial loss
304 | # assume (batch_size, freq_bins, time_steps) -> (???, 361, 128)
305 |
306 | from config import include_loss_component
307 | if include_loss_component:
308 | from util import area_punish
309 | from util import reverse_area_punish
310 |
311 |
312 | # 0. compute the loss for silence part. (101, 1001...)
313 | # now (batch_size, time_steps), and throw it into area punish
314 | special_output = output[:, 0, :]
315 |
316 | # print(special_output.shape)
317 |
318 | special_loss_temp = []
319 | for area_len in range(3,31):
320 | special_loss_temp.append(reverse_area_punish(special_output, area_len))
321 |
322 | # special_loss_temp_2 = []
323 | for area_len in range(3,6):
324 | special_loss_temp.append(area_punish(special_output, area_len))
325 | # cat along the time_step dim as we need to do tweak and mean across all area_len
326 | special_loss_temp = torch.cat(special_loss_temp, dim = 1)
327 | # special_loss_temp_2 = torch.cat(special_loss_temp_2, dim = 1)
328 | # print(special_loss_temp.shape)
329 |
330 | # now tweak using -> x^3/(x^3 + (1-x)^3)
331 | special_loss_temp = special_loss_temp**5/(special_loss_temp**5 + (1-special_loss_temp)**5)
332 |
333 | # special_loss_temp_2 = /special_loss_temp_2**5/(special_loss_temp_2**5 + (1-special_loss_temp_2)**5)
334 |
335 | loss += torch.mean(special_loss_temp)
336 | # loss += 0.3*torch.mean(special_loss_temp_2)
337 |
338 |
339 | # verification necessary, shall try to avoid double count.
340 | self.log('loss', loss, on_step=True, on_epoch=True, prog_bar=False, logger=False)
341 | elif self.mode == "all":
342 | # from pure pitch estimation
343 | gd_maps = torch.zeros((cfps.shape[0], cfps.shape[-2] + 1, cfps.shape[-1])).to( device_type)
344 | tone_maps = torch.zeros((cfps.shape[0], self.config.tone_class + 1, cfps.shape[-1])).to(device_type)
345 | octave_maps = torch.zeros((cfps.shape[0], self.config.octave_class + 1, cfps.shape[-1])).to(device_type)
346 | tone_index = ((gds % 60) * self.config.tone_class / 60).long()
347 | octave_index = (gds // 60 + 2).long()
348 | tone_index[gds < 1.0] = self.config.tone_class
349 | octave_index[gds < 1.0] = self.config.octave_class
350 | for i in range(len(tone_maps)):
351 | tone_maps[i, tone_index[i], torch.arange(gds.shape[-1])] = 1.0
352 | octave_maps[i, octave_index[i], torch.arange(gds.shape[-1])] = 1.0
353 | gd_maps[i, gds[i].long(), torch.arange(gds.shape[-1])] = 1.0
354 | tone_prob, octave_prob, final_prob = self(cfps, tcfps)
355 | pred_map = torch.cat((tone_prob, octave_prob , final_prob), dim = 1)
356 | gd_map = torch.cat([tone_maps, octave_maps, gd_maps], dim = 1)
357 | loss = self.loss_func(pred_map, gd_map)
358 | self.log('loss', loss, on_step=True, on_epoch=True, prog_bar=False, logger=False)
359 | return loss
360 |
361 |
362 | def write_prediction(self, pred, filename):
363 | time_frame = np.arange(len(pred)) * 0.01
364 | with open(filename, "w") as f:
365 | for i in range(len(time_frame)):
366 | f.write(str(np.round(time_frame[i], 4)) + "\t" + str(pred[i]) + "\n")
367 |
368 |
369 | def validation_step(self, batch, batch_idx, dataset_idx):
370 | device_type = next(self.parameters()).device
371 | mini_batch = self.config.batch_size
372 | mini_batch = 1
373 |
374 | # array of length 1 anyway. (for each song,
375 | cfps = batch["cfp"][0]
376 | tcfps = batch["tcfp"][0]
377 | gds = batch["gd"][0]
378 | lens = batch["length"][0]
379 | name = batch["name"][0]
380 |
381 | # print(len(batch["cfp"]), len(batch["gd"]), cfps.shape, gds.shape, lens, name)
382 |
383 | output = []
384 | # index 0 of cfps and gds is batch_size.
385 | for i in range(0, len(cfps), mini_batch):
386 | temp_cfp = torch.from_numpy(cfps[i:i + mini_batch]).to(device_type)
387 | temp_tcfp = torch.from_numpy(tcfps[i:i + mini_batch]).to(device_type)
388 | # import sys
389 | # print(name)
390 | # print(temp_cfp.shape)
391 | # sys.exit()
392 | if self.mode == "single":
393 | temp_output = self(temp_cfp)
394 | temp_output = torch.squeeze(temp_output, dim = 1)
395 | elif self.mode == "all":
396 | _, _, temp_output = self(temp_cfp, temp_tcfp)
397 |
398 | temp_output = temp_output.detach().cpu().numpy()
399 | output.append(temp_output)
400 | output = np.concatenate(np.array(output),axis = 0)
401 | return [
402 | output,
403 | gds,
404 | lens,
405 | name
406 | ]
407 |
408 |
409 | def validation_epoch_end(self, validation_step_outputs, test_flag = False):
410 | for i, dataset_d in enumerate(validation_step_outputs):
411 | metric = np.array([0.,0.,0.,0.,0.,0.])
412 | preds = []
413 | gds = []
414 | special_outputs = []
415 | for d in dataset_d:
416 | pred, gd, rl, name = d
417 |
418 | special_output = pred[:, 0, :]
419 |
420 | pred = np.argmax(pred, axis = 1)
421 | pred = np.concatenate(pred, axis = 0)
422 | pred = self.centf[pred]
423 |
424 | # at this point should be ready to
425 | # import sys
426 | # print(name, pred.shape)
427 | # new_name = name.replace("labels_and_waveform", "preds")
428 | # if new_name == name:
429 | # sys.exit()
430 | # np.savetxt(new_name, np.c_[pred])
431 |
432 |
433 | gd = np.concatenate(gd, axis = 0)
434 | preds.append(pred)
435 | gds.append(gd)
436 | special_outputs.append(special_output)
437 | preds = np.concatenate(preds, axis = 0)
438 | gds = np.concatenate(gds, axis = 0)
439 | special_outputs = np.concatenate(special_outputs, axis = 0)
440 |
441 | metric = melody_eval(preds, gds)
442 | self.print("\n")
443 | self.print("Dataset ", i, " OA:", metric[-1])
444 | if test_flag or metric[-1] > self.max_metric[i, -1]:
445 |
446 | # write the result down
447 | os.system("rm -rf model_backup/" + str(i) + "*_best.txt")
448 | with open("model_backup/" + str(i) + "_" + str(metric[-1]) + "_best.txt", "a+") as f:
449 | np.savetxt(f, np.c_[preds, gds])
450 |
451 | with open("model_backup/" + str(i) + "_vocal_prob.txt", "w") as f:
452 | np.savetxt(f, special_outputs)
453 |
454 |
455 | for j in range(len(self.max_metric[i])):
456 | self.max_metric[i,j] = metric[j]
457 | self.max_metric[i,j] = metric[j]
458 | if not test_flag:
459 | torch.save(self.state_dict(), "model_backup/bestk_" + str(i) + ".ckpt")
460 | self.print("Best ",i,":", self.max_metric[i])
461 |
462 |
463 | def test_step(self, batch, batch_idx, dataset_idx):
464 | return self.validation_step(batch, batch_idx, dataset_idx)
465 |
466 | def test_epoch_end(self, test_step_outputs):
467 | self.validation_epoch_end(test_step_outputs, test_flag = True)
468 | # for i, dataset_d in enumerate(test_step_outputs):
469 | # for j, d in enumerate(dataset_d):
470 | # pred, _, rl = d
471 | # pred = np.argmax(pred, axis = 1)
472 | # pred = np.concatenate(pred, axis = 0)[:rl]
473 | # pred = self.centf[pred]
474 | # self.write_prediction(pred, "prediction/" + str(i) + "_" + str(j) + ".txt")
475 |
476 | def configure_optimizers(self):
477 | optimizer = torch.optim.Adam(self.parameters(), lr=self.config.lr)
478 | def lr_foo(epoch):
479 | if epoch < 5:
480 | # warm up lr
481 | lr_scale = 0.5
482 | else:
483 | lr_scale = 0.5 * 0.98 ** (epoch - 5)
484 |
485 | return lr_scale
486 |
487 | if self.mode == "single" or self.mode == "tcfp":
488 | return optimizer
489 | elif self.mode == "all" or self.mode == "spl" or self.mode == "spat":
490 | scheduler = torch.optim.lr_scheduler.LambdaLR(
491 | optimizer,
492 | lr_lambda=lr_foo
493 | )
494 | return [optimizer], [scheduler]
495 |
--------------------------------------------------------------------------------
/util.py:
--------------------------------------------------------------------------------
1 | """
2 | Ke Chen knutchen@ucsd.edu
3 |
4 | Tone-Octave Network - utils file
5 |
6 | This file contains useful common methods
7 |
8 | """
9 | import os
10 | import numpy as np
11 | import torch
12 | import mir_eval
13 | import config
14 |
15 | def index2centf(seq, centfreq):
16 | centfreq[0] = 0
17 | re = np.zeros(len(seq))
18 | for i in range(len(seq)):
19 | for j in range(len(centfreq)):
20 | if seq[i] < 0.1:
21 | re[i] = 0
22 | break
23 | elif centfreq[j] > seq[i]:
24 | # re[i] = j
25 |
26 | if j > 1:
27 | if abs(centfreq[j]/seq[i]) <= abs(seq[i]/centfreq[j - 1]):
28 | re[i] = j
29 | else:
30 | re[i] = j - 1
31 | # print(seq[i], "got", j-1, centfreq[j - 1], "instead of", j, centfreq[j])
32 | else:
33 | re[i] = j
34 |
35 | break
36 | return re
37 |
38 |
39 | def freq2octave(freq):
40 | if freq < 1.0 or freq > 2050:
41 | return config.octave_class
42 | else:
43 | return int(np.round(69 + 12 * np.log2(freq/440)) // 12)
44 |
45 | def freq2tone(freq):
46 | if freq < 1.0 or freq > 2050:
47 | return config.tone_class
48 | else:
49 | return int(np.round(69 + 12 * np.log2(freq/440)) % 12)
50 |
51 | def tofreq(tone, octave):
52 | if tone >= config.tone_class or octave >= config.octave_class or octave < 2:
53 | return 0.0
54 | else:
55 | return 440 * 2 ** ((12 * octave + tone * 12 / config.tone_class - 69) / 12)
56 |
57 |
58 | def pos_weight(data, freq_bins):
59 | frames = data.shape[-1]
60 | non_vocal = float(len(data[data == 0]))
61 | vocal = float(data.size - non_vocal)
62 | z = np.zeros((freq_bins, frames))
63 | z[1:,:] += (non_vocal / vocal)
64 | z[0,:] += vocal / non_vocal
65 | print(non_vocal, vocal)
66 | return torch.from_numpy(z).float()
67 |
68 | def freq2octave(freq):
69 | if freq < 1.0 or freq > 1990:
70 | return 0
71 | pitch = round(69 + 12 * np.log2(freq / 440))
72 | return int(pitch // 12)
73 |
74 | def compute_roa(pred, gd):
75 | pred = pred[gd > 0.1]
76 | gd = gd[gd > 0.1]
77 | pred = np.array([freq2octave(d) for d in pred])
78 | gd = np.array([freq2octave(d) for d in gd])
79 | return np.sum(pred == gd) / len(pred)
80 |
81 |
82 | def melody_eval(pred, gd):
83 | ref_time = np.arange(len(gd)) * 0.01
84 | ref_freq = gd
85 |
86 | est_time = np.arange(len(pred)) * 0.01
87 | est_freq = pred
88 |
89 | output_eval = mir_eval.melody.evaluate(ref_time,ref_freq,est_time,est_freq)
90 | VR = output_eval['Voicing Recall']*100.0
91 | VFA = output_eval['Voicing False Alarm']*100.0
92 | RPA = output_eval['Raw Pitch Accuracy']*100.0
93 | RCA = output_eval['Raw Chroma Accuracy']*100.0
94 | ROA = compute_roa(est_freq, ref_freq) * 100.0
95 | OA = output_eval['Overall Accuracy']*100.0
96 | eval_arr = np.array([VR, VFA, RPA, RCA, ROA, OA])
97 | return eval_arr
98 |
99 | def tonpy_fn(batch):
100 | dict_key = batch[0].keys()
101 | output_batch = {}
102 | for dk in dict_key:
103 | output_batch[dk] = np.array([d[dk] for d in batch])
104 | return output_batch
105 |
106 | # for 010, 0110 etc.
107 | def area_punish(nn_output, area_len = 3):
108 | assert area_len >= 3
109 | '''
110 | product = 1 - nn_output[:, :-area_len+1]
111 | for index in range(1, area_len-1):
112 | product = product*nn_output[:, index:-area_len+1+index]
113 | product = product*(1 - nn_output[:, area_len-1:])
114 | '''
115 |
116 | product = (1 - nn_output[:, :-area_len+1])*(1 - nn_output[:, area_len-1:])
117 |
118 | temp = 1
119 | for index in range(1, area_len-1):
120 | temp = temp*(1 - nn_output[:, index:-area_len+1+index])
121 |
122 | product = product*(1 - temp)
123 | return product
124 |
125 |
126 | # for 101, 1001 etc.
127 | def reverse_area_punish(nn_output, area_len = 3):
128 | assert area_len >= 3
129 |
130 |
131 | product = nn_output[:, :-area_len+1]*nn_output[:, area_len-1:]
132 | temp = 1
133 | for index in range(1, area_len-1):
134 | temp = temp*nn_output[:, index:-area_len+1+index]
135 |
136 | product = product*(1 - temp)
137 | return product
138 |
139 |
140 | import sounddevice as sd
141 | def play_sequence(audio_chunk, f_s):
142 | sd.play(audio_chunk, f_s, blocking = True)
143 |
144 |
145 |
146 | # ys list of y sequences
147 | def plot_multi_sequences(x, ys, y_names, title = "", initial_visibility = True):
148 |
149 |
150 | import plotly.graph_objects as go
151 |
152 | # https://community.plotly.com/t/hovertemplate-does-not-show-name-property/36139/2
153 | fig = go.Figure(data = [go.Scatter(x = x, y = ys[i], name = y_names[i], meta = [y_names[i]], hovertemplate = '%{meta}
x=%{x}
y=%{y}') for i in range(len(ys))])
154 |
155 |
156 | fig.update_layout(
157 | title=title,
158 | xaxis_title="",
159 | yaxis_title="",
160 | font=dict(size=25),
161 | hoverlabel=dict(font_size=25),
162 | margin={"l":40, "r":40, "t":40, "b":40},
163 | autosize=True
164 | )
165 |
166 |
167 | if not initial_visibility:
168 | fig.update_traces(visible = 'legendonly')
169 |
170 | fig.show(config = {'showTips':False})
171 |
172 |
173 |
174 |
175 | if torch.cuda.is_available():
176 | device = torch.device("cuda")
177 | print("Using cuda")
178 | else:
179 | device = torch.device("cpu")
180 | print("Using cpu")
181 |
182 | # only dealing with vocal existence
183 | def median_filter(preds, filter_size = 21):
184 | # import sys
185 | # print(preds.shape)
186 | # oddness
187 | # assert filter_size % 2 == 1
188 |
189 | import torch.nn.functional as F
190 | preds = torch.from_numpy(preds).float().to(device)
191 | if filter_size % 2 == 1:
192 | temp = F.pad(preds, (int(filter_size/2), int(filter_size/2)), "constant")
193 | else:
194 | temp = F.pad(preds, (int(filter_size/2), int(filter_size/2) - 1), "constant")
195 | # print(temp.shape, temp.unfold(dimension = -1, size = filter_size, step = 1).shape)
196 | preds_filtered = torch.median(temp.unfold(dimension = -1, size = filter_size, step = 1), dim = -1).values
197 |
198 | assert preds.shape == preds_filtered.shape
199 |
200 | preds_on_off = (preds != 0).int()
201 | preds_filtered_on_off = (preds_filtered != 0).int()
202 |
203 | # 0 -> 0, do not change
204 | # 1 -> 1, do not change
205 | # 0 -> 1, take the value
206 | # 1 -> 0, take the value
207 | # using multiple sizes (one for up and one for down) will cause inconsistency, hence avoid
208 | should_replace = preds_on_off*(1 - preds_filtered_on_off) + (1 - preds_on_off)*preds_filtered_on_off
209 | # print("Here")
210 |
211 | # plot_multi_sequences(torch.arange(len(preds)), [preds.cpu().numpy(), ((1 - should_replace)*preds + should_replace*preds_filtered).cpu().numpy()], ["1", "2"])
212 |
213 | return ((1 - should_replace)*preds + should_replace*preds_filtered).cpu().numpy()
214 |
215 | if __name__ == "__main__":
216 |
217 | x = torch.randn(2222)
218 | x = torch.arange(2).repeat(200).numpy()
219 | print(median_filter(x, filter_size = 20))
220 |
221 | plot_multi_sequences(torch.arange(2222), [x, median_filter(x)], ["1", "2"])
222 |
223 |
--------------------------------------------------------------------------------