├── fig
    ├── readme.md
    └── x1.png
├── util_esc50
    ├── readme.md
    ├── SpecAugment
    │   ├── __init__.py
    │   ├── readme.md
    │   ├── spec_augment_pytorch.py
    │   ├── spec_augment_tensorflow.py
    │   └── sparse_image_warp_pytorch.py
    ├── __pycache__
    │   ├── config.cpython-37.pyc
    │   └── utils.cpython-37.pyc
    ├── losses.py
    ├── cross_folds.py
    ├── config.py
    ├── pytorch_utils.py
    ├── evaluate.py
    ├── utils.py
    ├── data_generator.py
    ├── test.py
    ├── feature.py
    ├── main.py
    ├── audio.py
    ├── net.py
    ├── fold1_test.csv
    ├── fold2_test.csv
    ├── fold3_test.csv
    ├── fold4_test.csv
    └── fold5_test.csv
├── workspace
    └── readme.md
├── wrong_list
    └── readme.md
├── runme.sh
└── README.md


/fig/readme.md:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/util_esc50/readme.md:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/workspace/readme.md:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/wrong_list/readme.md:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/util_esc50/SpecAugment/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/util_esc50/SpecAugment/readme.md:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/fig/x1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Hadryan/TFNet-for-Environmental-Sound-Classification/HEAD/fig/x1.png


--------------------------------------------------------------------------------
/util_esc50/__pycache__/config.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Hadryan/TFNet-for-Environmental-Sound-Classification/HEAD/util_esc50/__pycache__/config.cpython-37.pyc


--------------------------------------------------------------------------------
/util_esc50/__pycache__/utils.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Hadryan/TFNet-for-Environmental-Sound-Classification/HEAD/util_esc50/__pycache__/utils.cpython-37.pyc


--------------------------------------------------------------------------------
/util_esc50/losses.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn.functional as F
 3 | 
 4 | 
 5 | def nll_loss(output, target):
 6 |     '''Negative likelihood loss. The output should be obtained using F.log_softmax(x). 
 7 |     
 8 |     Args:
 9 |       output: (N, classes_num)
10 |       target: (N, classes_num)
11 |     '''
12 |     loss = - torch.mean(target * output)
13 |     return loss


--------------------------------------------------------------------------------
/runme.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # You need to modify this path to your downloaded dataset directory
 3 | # Download ESC-10 and ESC-50 : https://github.com/karolpiczak/ESC-50
 4 | # Download UrbanSound8k : https://urbansounddataset.weebly.com/urbansound8k.html
 5 | DATASET_DIR='/.../ESC-50'
 6 | #DATASET_DIR='/.../ESC-10'
 7 | #DATASET_DIR='/.../UrbanSound8k'
 8 | 
 9 | # You need to modify this path to your workspace to store features and models
10 | WORKSPACE='/workspace'
11 | 
12 | # Hyper-parameters
13 | GPU_ID=0
14 | MODEL_TYPE='TFNet'
15 | BATCH_SIZE=32
16 | 
17 | ############ Train and test on ESC50 dataset ############
18 | # Calculate feature
19 | python util_esc50/feature.py calculate_feature_for_all_audio_files --dataset_dir=$DATASET_DIR --workspace=$WORKSPACE
20 | 
21 | # Train and test
22 | CUDA_VISIBLE_DEVICES=$GPU_ID python util/main.py train --dataset_dir=$DATASET_DIR --workspace=$WORKSPACE --holdout_fold=1 --model_type=$MODEL_TYPE --batch_size=$BATCH_SIZE --cuda
23 | 


--------------------------------------------------------------------------------
/util_esc50/cross_folds.py:
--------------------------------------------------------------------------------
 1 | # Cross-fold lists generator for ESC-50 Dataset.
 2 | 
 3 | import pandas as pd
 4 | 
 5 | def read_meta(metadata_path):
 6 |     df = pd.read_csv(metadata_path, sep=',')
 7 |     return df
 8 | 
 9 | def make_folds(df, test_fold, train_fold):
10 |     test_path = 'fold'+ str(test_fold)+ '_test.csv'
11 |     train_path = 'fold'+ str(test_fold)+ '_train.csv'
12 |     data = df
13 |     data_test = data[data['fold'].isin([test_fold])]
14 |     fp_test = open(test_path, 'w')
15 |     fp_test.write(data_test.to_csv(header=True, index=False))
16 |     
17 |     data_train = data[data['fold'].isin(train_fold)]
18 |     fp_train = open(train_path, 'w')
19 |     fp_train.write(data_train.to_csv(header=True, index=False))
20 |   
21 | 
22 | if __name__ == '__main__':
23 |     # You need to modify this path to your downloaded dataset directory
24 |     metadata_path = '/.../ESC-50/meta/esc50.csv'
25 |     df = read_meta(metadata_path)
26 |     make_folds(df, 1, [2,3,4,5])
27 |     make_folds(df, 2, [1,3,4,5])
28 |     make_folds(df, 3, [1,2,4,5])
29 |     make_folds(df, 4, [1,2,3,5])
30 |     make_folds(df, 5, [1,2,3,4])


--------------------------------------------------------------------------------
/util_esc50/config.py:
--------------------------------------------------------------------------------
 1 | sample_rate = 44100
 2 | window_size = 1764
 3 | hop_size = 882
 4 | mel_bins = 40
 5 | fmin = 50       # Hz
 6 | fmax = int(sample_rate/4)    # Hz
 7 | 
 8 | frames_per_second = sample_rate // hop_size
 9 | audio_duration = 5     # Audio recordings in ESC-50 are all 5 seconds
10 | total_samples = sample_rate * audio_duration
11 | audio_duration_clip = 5
12 | audio_stride_clip = 1
13 | total_frames = frames_per_second * audio_duration
14 | frames_num_clip = int(frames_per_second * audio_duration_clip)
15 | total_samples_clip = int(sample_rate * audio_duration_clip)
16 | frames_num = frames_per_second * audio_duration_clip
17 | audio_num = (audio_duration-audio_duration_clip)//audio_stride_clip + 1
18 | labels = [ 'dog', 'rooster', 'pig', 'cow', 'frog', 'cat', 'hen', 'insects',
19 |           'sheep', 'crow', 'rain', 'sea_waves', 'crackling_fire', 'crickets', 
20 |           'chirping_birds', 'water_drops', 'wind', 'pouring_water', 'toilet_flush',
21 |           'thunderstorm', 'crying_baby', 'sneezing', 'clapping', 'breathing', 'coughing',
22 |           'footsteps', 'laughing', 'brushing_teeth', 'snoring', 'drinking_sipping',
23 |           'door_wood_knock', 'mouse_click', 'keyboard_typing', 'door_wood_creaks',
24 |           'can_opening', 'washing_machine', 'vacuum_cleaner', 'clock_alarm',
25 |           'clock_tick', 'glass_breaking', 'helicopter', 'chainsaw', 'siren', 'car_horn',
26 |           'engine', 'train', 'church_bells', 'airplane', 'fireworks', 'hand_saw']
27 | classes_num = len(labels)
28 | lb_to_idx = {lb: idx for idx, lb in enumerate(labels)}
29 | idx_to_lb = {idx: lb for idx, lb in enumerate(labels)}
30 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # TFNet-for-Environmental-Sound-Classification
 2 | 
 3 | by *Helin Wang*, *Yuexian Zou*, *Dading Chong*, *Wenwu Wang*
 4 | 
 5 | ## Abstract
 6 | Convolutional neural networks (CNN) are one of the best-performing neural network architectures for environmental sound classification (ESC). Recently, attention mechanisms have been used in CNN to capture the useful information from the audio signal for sound classification, especially for weakly labelled data where the timing information about the acoustic events is not available in the training data, apart from the availability of sound class labels. In these methods, however, the inherent time-frequency characteristics and variations are not explicitly exploited when obtaining the deep features. In this paper, we propose a new method, called time-frequency enhancement block (TFBlock), which temporal attention and frequency attention are employed to enhance the features from relevant frames and frequency bands. Compared with other attention mechanisms, in our method, parallel branches are constructed which allow the temporal and frequency features to be attended respectively in order to mitigate interference from the sections where no sound events happened in the acoustic environments. The experiments on three benchmark ESC datasets show that our method improves the classification performance and also exhibits robustness to noise.
 7 | 
 8 | ## Introduction
 9 | This repository is for the ICME 2020 paper (submitted), '[Learning discriminative and robust time-frequency representations for environmental sound classification](https://arxiv.org/pdf/1912.06808.pdf)'.
10 | 
11 | ## Our network
12 | 
13 | <img src="./fig/x1.png" width="100%" alt="TFNet">
14 | 


--------------------------------------------------------------------------------
/util_esc50/pytorch_utils.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | import config
 4 | 
 5 | def move_data_to_gpu(x, cuda):
 6 |     if 'float' in str(x.dtype):
 7 |         x = torch.Tensor(x)
 8 |     elif 'int' in str(x.dtype):
 9 |         x = torch.LongTensor(x)
10 |     else:
11 |         raise Exception("Error!")
12 | 
13 |     if cuda:
14 |         x = x.cuda()
15 | 
16 |     return x
17 |     
18 |     
19 | def append_to_dict(dict, key, value):
20 |     if key in dict.keys():
21 |         dict[key].append(value)
22 |     else:
23 |         dict[key] = [value]
24 |     
25 |     
26 | def forward(model, generate_func, cuda, return_input=False, 
27 |     return_target=False):
28 |     '''Forward data to model in mini-batch. 
29 |     
30 |     Args: 
31 |       model: object
32 |       generate_func: function
33 |       cuda: bool
34 |       return_input: bool
35 |       return_target: bool
36 |       max_validate_num: None | int, maximum mini-batch to forward to speed up validation
37 |     '''
38 |     output_dict = {}
39 |     audio_num = config.audio_num
40 |     # Evaluate on mini-batch
41 |     for batch_data_dict in generate_func:
42 |         batch_output=[]
43 |         for i in range(audio_num):
44 |             # Predict
45 |             batch_feature = move_data_to_gpu(batch_data_dict['feature'][:, i, :, :, :], cuda)
46 | 
47 |             with torch.no_grad():
48 |                 model.eval()
49 |                 batch_outputx = model(batch_feature)
50 |             if i == 0:
51 |                 batch_output = batch_outputx
52 |             else:
53 |                 batch_output = batch_output + batch_outputx
54 | 
55 |         append_to_dict(output_dict, 'filename', batch_data_dict['filename'])
56 | 
57 |         append_to_dict(output_dict, 'output', batch_output.data.cpu().numpy())
58 | 
59 |         if return_input:
60 |             append_to_dict(output_dict, 'feature', batch_data_dict['feature'])
61 | 
62 |         if return_target:
63 |             if 'target' in batch_data_dict.keys():
64 |                 append_to_dict(output_dict, 'target', batch_data_dict['target'])
65 |                 
66 |     for key in output_dict.keys():
67 |         output_dict[key] = np.concatenate(output_dict[key], axis=0)
68 | 
69 |     return output_dict
70 | 
71 | 


--------------------------------------------------------------------------------
/util_esc50/evaluate.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | import numpy as np
  4 | import time
  5 | import logging
  6 | import matplotlib.pyplot as plt
  7 | from sklearn import metrics
  8 | import datetime
  9 | import _pickle as cPickle
 10 | import sed_eval
 11 | from utils import get_filename, inverse_scale
 12 | from pytorch_utils import forward
 13 | import config
 14 | 
 15 | 
 16 | class Evaluator(object):
 17 |     def __init__(self, model, data_generator, cuda=True):
 18 |         '''Evaluator to evaluate prediction performance. 
 19 |         
 20 |         Args: 
 21 |           model: object
 22 |           data_generator: object
 23 |           cuda: bool
 24 |         '''
 25 |         
 26 |         self.model = model
 27 |         self.data_generator = data_generator
 28 |         self.cuda = cuda
 29 |         
 30 |         self.frames_per_second = config.frames_per_second
 31 |         self.labels = config.labels
 32 |         self.in_domain_classes_num = len(config.labels) - 1
 33 |         self.all_classes_num = len(config.labels)
 34 |         self.idx_to_lb = config.idx_to_lb
 35 |         self.lb_to_idx = config.lb_to_idx
 36 | 
 37 |     def evaluate(self, data_type, iteration, max_iteration=None, verbose=False):
 38 |         '''Evaluate the performance. 
 39 |         
 40 |         Args: 
 41 |           data_type: 'train' | 'validate'
 42 |           max_iteration: None | int, maximum iteration to run to speed up evaluation
 43 |           verbose: bool
 44 |         '''
 45 | 
 46 |         generate_func = self.data_generator.generate_validate(
 47 |             data_type=data_type, 
 48 |             max_iteration=max_iteration)
 49 |         
 50 |         # Forward
 51 |         output_dict = forward(
 52 |             model=self.model, 
 53 |             generate_func=generate_func, 
 54 |             cuda=self.cuda, 
 55 |             return_target=True)
 56 |         file = 'wrong_list/'+ 'wrong_classification_' + str(iteration)
 57 |         output = output_dict['output']  # (audios_num, in_domain_classes_num)
 58 |         target = output_dict['target']  # (audios_num, in_domain_classes_num)
 59 |         filename = output_dict['filename']
 60 | 
 61 |         prob = np.exp(output)   # Subtask a, b use log softmax as output
 62 | 
 63 |         
 64 |         # Evaluate
 65 |         y_true = np.argmax(target, axis=-1)
 66 |         y_pred = np.argmax(prob, axis=-1)
 67 | #         print(y_pred)
 68 |         if data_type=='validate':
 69 |             for i in range(len(y_true)):
 70 |                 if y_true[i] != y_pred[i]:
 71 |                     with open(file,'a') as f:
 72 |                         audioname = filename[i]
 73 |                         true_idx = str(y_true[i])
 74 |                         pred_idx = str(y_pred[i])
 75 |                         true_label = self.idx_to_lb[y_true[i]]
 76 |                         pred_label = self.idx_to_lb[y_pred[i]]
 77 |                         f.write(audioname+'\t'+true_idx+'\t'+true_label+'\t'+pred_idx+'\t'+pred_label+'\n')
 78 |                 
 79 |     
 80 |         confusion_matrix = metrics.confusion_matrix(y_true, y_pred, labels=np.arange(self.in_domain_classes_num))
 81 |   
 82 |         classwise_accuracy = np.diag(confusion_matrix) \
 83 |             / np.sum(confusion_matrix, axis=-1)
 84 |         
 85 |         logging.info('Data type: {}'.format(data_type))
 86 |         
 87 | 
 88 |         logging.info('    Average ccuracy: {:.3f}'.format(np.mean(classwise_accuracy)))
 89 |         
 90 |         if verbose:
 91 |             classes_num = len(classwise_accuracy)
 92 |             for n in range(classes_num):
 93 |                 logging.info('{:<20}{:.3f}'.format(self.labels[n], 
 94 |                     classwise_accuracy[n]))
 95 |                     
 96 |             logging.info(confusion_matrix)
 97 | 
 98 |         statistics = {
 99 |             'accuracy': classwise_accuracy, 
100 |             'confusion_matrix': confusion_matrix}
101 | 
102 |         return statistics
103 | 
104 | 
105 | 
106 | class StatisticsContainer(object):
107 |     def __init__(self, statistics_path):
108 |         '''Container of statistics during training. 
109 |         
110 |         Args:
111 |           statistics_path: string, path to write out
112 |         '''
113 |         self.statistics_path = statistics_path
114 | 
115 |         self.backup_statistics_path = '{}_{}.pickle'.format(
116 |             os.path.splitext(self.statistics_path)[0], 
117 |                 datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S'))
118 | 
119 |         self.statistics_dict = {'data': []}
120 | 
121 |     def append_and_dump(self, iteration, statistics):
122 |         '''Append statistics to container and dump the container. 
123 |         
124 |         Args:
125 |           iteration: int
126 |           statistics: dict of statistics
127 |         '''
128 |         statistics['iteration'] = iteration
129 |         self.statistics_dict['data'].append(statistics)
130 | 
131 |         cPickle.dump(self.statistics_dict, open(self.statistics_path, 'wb'))
132 |         cPickle.dump(self.statistics_dict, open(self.backup_statistics_path, 'wb'))
133 |         logging.info('    Dump statistics to {}'.format(self.statistics_path))


--------------------------------------------------------------------------------
/util_esc50/SpecAugment/spec_augment_pytorch.py:
--------------------------------------------------------------------------------
  1 | # # Reference : https://github.com/DemisEom/SpecAugment
  2 | # Copyright 2019 RnD at Spoon Radio
  3 | #
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | #     http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | # ==============================================================================
 16 | """SpecAugment Implementation for Tensorflow.
 17 | Related paper : https://arxiv.org/pdf/1904.08779.pdf
 18 | 
 19 | In this paper, show summarized parameters by each open datasets in Tabel 1.
 20 | -----------------------------------------
 21 | Policy | W  | F  | m_F |  T  |  p  | m_T
 22 | -----------------------------------------
 23 | None   |  0 |  0 |  -  |  0  |  -  |  -
 24 | -----------------------------------------
 25 | LB     | 80 | 27 |  1  | 100 | 1.0 | 1
 26 | -----------------------------------------
 27 | LD     | 80 | 27 |  2  | 100 | 1.0 | 2
 28 | -----------------------------------------
 29 | SM     | 40 | 15 |  2  |  70 | 0.2 | 2
 30 | -----------------------------------------
 31 | SS     | 40 | 27 |  2  |  70 | 0.2 | 2
 32 | -----------------------------------------
 33 | LB : LibriSpeech basic
 34 | LD : LibriSpeech double
 35 | SM : Switchboard mild
 36 | SS : Switchboard strong
 37 | """
 38 | 
 39 | import librosa
 40 | import librosa.display
 41 | import math
 42 | import numpy as np
 43 | import random
 44 | import matplotlib
 45 | # matplotlib.use('TkAgg')
 46 | import matplotlib.pyplot as plt
 47 | 
 48 | 
 49 | def spec_augment(mel_spectrogram, using_time_warping=False, using_frequency_masking=False, using_time_masking=False,
 50 |                  frequency_masking_para=4, time_masking_para=4,
 51 |                  frequency_mask_num=2, time_mask_num=2):
 52 |     """Spec augmentation Calculation Function.
 53 | 
 54 |     'SpecAugment' have 3 steps for audio data augmentation.
 55 |     first step is time warping using Tensorflow's image_sparse_warp function.
 56 |     Second step is frequency masking, last step is time masking.
 57 | 
 58 |     # Arguments:
 59 |       mel_spectrogram(numpy array): audio file path of you want to warping and masking.
 60 |       time_warping_para(float): Augmentation parameter, "time warp parameter W".
 61 |         If none, default = 80 for LibriSpeech.
 62 |       frequency_masking_para(float): Augmentation parameter, "frequency mask parameter F"
 63 |         If none, default = 100 for LibriSpeech.
 64 |       time_masking_para(float): Augmentation parameter, "time mask parameter T"
 65 |         If none, default = 27 for LibriSpeech.
 66 |       frequency_mask_num(float): number of frequency masking lines, "m_F".
 67 |         If none, default = 1 for LibriSpeech.
 68 |       time_mask_num(float): number of time masking lines, "m_T".
 69 |         If none, default = 1 for LibriSpeech.
 70 | 
 71 |     # Returns
 72 |       mel_spectrogram(numpy array): warped and masked mel spectrogram.
 73 |     """
 74 |     # mel_spectrogram:
 75 |     # (batch_size, times_steps, freq_bins)
 76 |     # v : freq_bins
 77 |     v = mel_spectrogram.shape[3]
 78 |     # tau : times_steps
 79 |     tau = mel_spectrogram.shape[2]
 80 |     num = mel_spectrogram.shape[0]
 81 | 
 82 | 
 83 |     warped_mel_spectrogram = mel_spectrogram
 84 |     # Step 1 : Time warping (TO DO...)
 85 |     if using_time_warping:
 86 |         for n in range(num):
 87 |             for i in range(tau):
 88 |                 for j in range(v):
 89 |                     offset_x = random.randint(0, i-1)
 90 |                     warped_mel_spectrogram[n, :, i, j] = mel_spectrogram[n, :, (i + offset_x) % tau, j]
 91 | 
 92 | 
 93 |     # Step 2 : Frequency masking
 94 |     if using_frequency_masking:
 95 |         for n in range(num):
 96 |             for i in range(frequency_mask_num):
 97 |                 f = np.random.uniform(low=0.0, high=frequency_masking_para)
 98 |                 f = int(f)
 99 |                 f0 = random.randint(0, v - f)
100 |                 warped_mel_spectrogram[n, :, :, f0:f0 + f] = 0
101 | 
102 |     # Step 3 : Time masking
103 |     if using_time_masking:
104 |         for n in range(num):
105 |             for i in range(time_mask_num):
106 |                 t = np.random.uniform(low=0.0, high=time_masking_para)
107 |                 t = int(t)
108 |                 t0 = random.randint(0, tau - t)
109 |                 warped_mel_spectrogram[n, :, t0:t0 + t, :] = 0
110 | 
111 |     return warped_mel_spectrogram
112 | 
113 | 
114 | def visualization_spectrogram(mel_spectrogram, title):
115 |     """visualizing result of SpecAugment
116 | 
117 |     # Arguments:
118 |       mel_spectrogram(ndarray): mel_spectrogram to visualize.
119 |       title(String): plot figure's title
120 |     """
121 |     # Show mel-spectrogram using librosa's specshow.
122 |     plt.figure(figsize=(10, 4))
123 |     librosa.display.specshow(librosa.power_to_db(mel_spectrogram, ref=np.max), y_axis='mel', fmax=14000, x_axis='time')
124 |     # plt.colorbar(format='%+2.0f dB')
125 |     plt.title(title)
126 |     plt.tight_layout()
127 |     plt.show()
128 | 
129 | 


--------------------------------------------------------------------------------
/util_esc50/SpecAugment/spec_augment_tensorflow.py:
--------------------------------------------------------------------------------
  1 | # Reference : https://github.com/DemisEom/SpecAugment
  2 | # Copyright 2019 RnD at Spoon Radio
  3 | #
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | #     http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | # ==============================================================================
 16 | """SpecAugment Implementation for Tensorflow.
 17 | Related paper : https://arxiv.org/pdf/1904.08779.pdf
 18 | 
 19 | In this paper, show summarized parameters by each open datasets in Tabel 1.
 20 | -----------------------------------------
 21 | Policy | W  | F  | m_F |  T  |  p  | m_T
 22 | -----------------------------------------
 23 | None   |  0 |  0 |  -  |  0  |  -  |  -
 24 | -----------------------------------------
 25 | LB     | 80 | 27 |  1  | 100 | 1.0 | 1
 26 | -----------------------------------------
 27 | LD     | 80 | 27 |  2  | 100 | 1.0 | 2
 28 | -----------------------------------------
 29 | SM     | 40 | 15 |  2  |  70 | 0.2 | 2
 30 | -----------------------------------------
 31 | SS     | 40 | 27 |  2  |  70 | 0.2 | 2
 32 | -----------------------------------------
 33 | LB : LibriSpeech basic
 34 | LD : LibriSpeech double
 35 | SM : Switchboard mild
 36 | SS : Switchboard strong
 37 | """
 38 | 
 39 | import librosa
 40 | import librosa.display
 41 | import tensorflow as tf
 42 | from tensorflow.contrib.image import sparse_image_warp
 43 | import numpy as np
 44 | import random
 45 | import matplotlib
 46 | # matplotlib.use('TkAgg')
 47 | import matplotlib.pyplot as plt
 48 | 
 49 | 
 50 | def spec_augment(mel_spectrogram, time_warping_para=80, frequency_masking_para=27,
 51 |                  time_masking_para=100, frequency_mask_num=1, time_mask_num=1):
 52 |     """Spec augmentation Calculation Function.
 53 | 
 54 |     'SpecAugment' have 3 steps for audio data augmentation.
 55 |     first step is time warping using Tensorflow's image_sparse_warp function.
 56 |     Second step is frequency masking, last step is time masking.
 57 | 
 58 |     # Arguments:
 59 |       mel_spectrogram(numpy array): audio file path of you want to warping and masking.
 60 |       time_warping_para(float): Augmentation parameter, "time warp parameter W".
 61 |         If none, default = 80 for LibriSpeech.
 62 |       frequency_masking_para(float): Augmentation parameter, "frequency mask parameter F"
 63 |         If none, default = 100 for LibriSpeech.
 64 |       time_masking_para(float): Augmentation parameter, "time mask parameter T"
 65 |         If none, default = 27 for LibriSpeech.
 66 |       frequency_mask_num(float): number of frequency masking lines, "m_F".
 67 |         If none, default = 1 for LibriSpeech.
 68 |       time_mask_num(float): number of time masking lines, "m_T".
 69 |         If none, default = 1 for LibriSpeech.
 70 | 
 71 |     # Returns
 72 |       mel_spectrogram(numpy array): warped and masked mel spectrogram.
 73 |     """
 74 |     v = mel_spectrogram.shape[0]
 75 |     tau = mel_spectrogram.shape[1]
 76 | 
 77 |     # Step 1 : Time warping
 78 |     # Image warping control point setting.
 79 |     mel_spectrogram_holder = tf.placeholder(tf.float32, shape=[1, v, tau, 1])
 80 |     location_holder = tf.placeholder(tf.float32, shape=[1, 1, 2])
 81 |     destination_holder = tf.placeholder(tf.float32, shape=[1, 1, 2])
 82 | 
 83 |     center_position = v/2
 84 |     random_point = np.random.randint(low=time_warping_para, high=tau - time_warping_para)
 85 |     # warping distance chose.
 86 |     w = np.random.uniform(low=0, high=time_warping_para)
 87 | 
 88 |     control_point_locations = [[center_position, random_point]]
 89 |     control_point_locations = np.float32(np.expand_dims(control_point_locations, 0))
 90 | 
 91 |     control_point_destination = [[center_position, random_point + w]]
 92 |     control_point_destination = np.float32(np.expand_dims(control_point_destination, 0))
 93 | 
 94 |     # mel spectrogram data type convert to tensor constant for sparse_image_warp.
 95 |     mel_spectrogram = mel_spectrogram.reshape([1, mel_spectrogram.shape[0], mel_spectrogram.shape[1], 1])
 96 |     mel_spectrogram = np.float32(mel_spectrogram)
 97 | 
 98 |     warped_mel_spectrogram_op, _ = sparse_image_warp(mel_spectrogram_holder,
 99 |                                                      source_control_point_locations=location_holder,
100 |                                                      dest_control_point_locations=destination_holder,
101 |                                                      interpolation_order=2,
102 |                                                      regularization_weight=0,
103 |                                                      num_boundary_points=1
104 |                                                      )
105 | 
106 |     # Change warp result's data type to numpy array for masking step.
107 |     feed_dict = {mel_spectrogram_holder:mel_spectrogram,
108 |                  location_holder:control_point_locations,
109 |                  destination_holder:control_point_destination}
110 | 
111 |     with tf.Session() as sess:
112 |         warped_mel_spectrogram = sess.run(warped_mel_spectrogram_op, feed_dict=feed_dict)
113 | 
114 |     warped_mel_spectrogram = warped_mel_spectrogram.reshape([warped_mel_spectrogram.shape[1],
115 |                                                              warped_mel_spectrogram.shape[2]])
116 | 
117 |     # Step 2 : Frequency masking
118 |     for i in range(frequency_mask_num):
119 |         f = np.random.uniform(low=0.0, high=frequency_masking_para)
120 |         f = int(f)
121 |         f0 = random.randint(0, v - f)
122 |         warped_mel_spectrogram[f0:f0 + f, :] = 0
123 | 
124 |     # Step 3 : Time masking
125 |     for i in range(time_mask_num):
126 |         t = np.random.uniform(low=0.0, high=time_masking_para)
127 |         t = int(t)
128 |         t0 = random.randint(0, tau - t)
129 |         warped_mel_spectrogram[:, t0:t0 + t] = 0
130 | 
131 |     return warped_mel_spectrogram
132 | 
133 | 
134 | def visualization_spectrogram(mel_spectrogram, title):
135 |     """visualizing result of SpecAugment
136 | 
137 |     # Arguments:
138 |       mel_spectrogram(ndarray): mel_spectrogram to visualize.
139 |       title(String): plot figure's title
140 |     """
141 |     # Show mel-spectrogram using librosa's specshow.
142 |     plt.figure(figsize=(10, 4))
143 |     librosa.display.specshow(librosa.power_to_db(mel_spectrogram, ref=np.max), y_axis='mel', fmax=8000, x_axis='time')
144 |     # plt.colorbar(format='%+2.0f dB')
145 |     plt.title(title)
146 |     plt.tight_layout()
147 |     plt.show()
148 | 
149 | 


--------------------------------------------------------------------------------
/util_esc50/utils.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import os
  3 | import sys
  4 | import numpy as np
  5 | import soundfile
  6 | import librosa
  7 | import h5py
  8 | import math
  9 | import pandas as pd
 10 | from sklearn import metrics
 11 | import logging
 12 | import matplotlib.pyplot as plt
 13 | import config
 14 | 
 15 | def mixup_data(x, y, alpha=0.2):
 16 | 
 17 |     '''Compute the mixup data. Return mixed inputs, pairs of targets, and lambda'''
 18 |     if alpha > 0.:
 19 |         while True:
 20 |             lam = np.random.beta(alpha, alpha)
 21 |             if lam > 0.65 or lam < 0.35 :
 22 |                 break
 23 |     else:
 24 |         lam = 1.
 25 |     batch_size = x.size()[0]
 26 |     index = torch.randperm(batch_size).cuda()
 27 |     mixed_x = lam * x + (1 - lam) * x[index, :]
 28 |     y_a, y_b = y, y[index]
 29 |     return mixed_x, y_a, y_b, lam
 30 | 
 31 | def mixup_criterion(class_criterion, pred, y_a, y_b, lam):
 32 |     return lam * class_criterion(pred, y_a) + (1 - lam) * class_criterion(pred, y_b)
 33 | def create_folder(fd):
 34 |     if not os.path.exists(fd):
 35 |         os.makedirs(fd)
 36 | 
 37 | def get_filename(path):
 38 |     path = os.path.realpath(path)
 39 |     name_ext = path.split('/')[-1]
 40 |     name = os.path.splitext(name_ext)[0]
 41 |     return name
 42 | 
 43 | 
 44 | def create_logging(log_dir, filemode):
 45 |     create_folder(log_dir)
 46 |     i1 = 0
 47 | 
 48 |     while os.path.isfile(os.path.join(log_dir, '{:04d}.log'.format(i1))):
 49 |         i1 += 1
 50 |         
 51 |     log_path = os.path.join(log_dir, '{:04d}.log'.format(i1))
 52 |     logging.basicConfig(
 53 |         level=logging.DEBUG,
 54 |         format='%(asctime)s %(filename)s[line:%(lineno)d] %(levelname)s %(message)s',
 55 |         datefmt='%a, %d %b %Y %H:%M:%S',
 56 |         filename=log_path,
 57 |         filemode=filemode)
 58 | 
 59 |     # Print to console
 60 |     console = logging.StreamHandler()
 61 |     console.setLevel(logging.INFO)
 62 |     formatter = logging.Formatter('%(name)-12s: %(levelname)-8s %(message)s')
 63 |     console.setFormatter(formatter)
 64 |     logging.getLogger('').addHandler(console)
 65 |     
 66 |     return logging
 67 |     
 68 | def data_pre(audio, audio_length, fs, audio_skip):
 69 |     stride = int(audio_skip * fs /2)
 70 |     loop =  int((audio_length * fs) // stride - 1)
 71 |     area = 0
 72 |     maxamp = 0.
 73 |     i = 0
 74 |     out = audio
 75 |     while i < loop:
 76 |         win_data = out[i*stride: (i+2)*stride]
 77 |         maxamp = np.max(np.abs(win_data))
 78 |         if maxamp < 0.005:
 79 |             loop = loop - 2
 80 |             out[i*stride: (loop+1)*stride] = out[(i+2)*stride: (loop+3)*stride]
 81 |         else:
 82 |             i = i + 1
 83 |     length = (audio_length * fs) // stride - loop - 1
 84 |     if length == 0:
 85 |         return out
 86 |     else:
 87 |         out[(loop + 1) * stride:(audio_length * fs // stride) * stride] = out[0:length * stride]
 88 |         if length < (audio_length * fs//stride)/2:
 89 |             out[(loop+1)*stride:(audio_length * fs//stride)*stride] = out[0:length*stride]
 90 |             return out
 91 |         else:
 92 |             out[(loop + 1) * stride:(loop + 1)*2  * stride] = out[0:(loop + 1) * stride]
 93 |             return data_pre(out, audio_length, fs, audio_skip)
 94 | 
 95 | def read_audio(audio_path, target_fs=None):
 96 |     (audio, fs) = soundfile.read(audio_path)
 97 | #     audio = data_pre(audio=audio, audio_length=5, fs=fs, audio_skip=0.1)
 98 |     if audio.ndim > 1:
 99 |         audio = np.mean(audio, axis=1)
100 |         
101 |     if target_fs is not None and fs != target_fs:
102 |         audio = librosa.resample(audio, orig_sr=fs, target_sr=target_fs)
103 |         fs = target_fs
104 |         
105 |     return audio, fs
106 | def read_audio_1D(audio_path, target_fs=None):
107 |     (audio, fs) = soundfile.read(audio_path)
108 |     audio = data_pre(audio=audio, audio_length=5, fs=fs, audio_skip=0.1)
109 |     if audio.ndim > 1:
110 |         audio = np.mean(audio, axis=1)
111 |         
112 |     if target_fs is not None and fs != target_fs:
113 |         audio = librosa.resample(audio, orig_sr=fs, target_sr=target_fs)
114 |         fs = target_fs
115 |         
116 |     return audio, fs
117 | 
118 | 
119 | def read_left_audio(audio_path, target_fs=None):
120 |     (audio, fs) = soundfile.read(audio_path)
121 | 
122 |     if audio.ndim > 1:
123 |         audio = audio[0]
124 | 
125 |     if target_fs is not None and fs != target_fs:
126 |         audio = librosa.resample(audio, orig_sr=fs, target_sr=target_fs)
127 |         fs = target_fs
128 | 
129 |     return audio, fs
130 | 
131 | def read_side_audio(audio_path, target_fs=None):
132 |     (audio, fs) = soundfile.read(audio_path)
133 | 
134 |     if audio.ndim > 1:
135 |         audio = audio[0]-audio[1]
136 | 
137 |     if target_fs is not None and fs != target_fs:
138 |         audio = librosa.resample(audio, orig_sr=fs, target_sr=target_fs)
139 |         fs = target_fs
140 | 
141 |     return audio, fs
142 | 
143 | def read_right_audio(audio_path, target_fs=None):
144 |     (audio, fs) = soundfile.read(audio_path)
145 | 
146 |     if audio.ndim > 1:
147 |         audio = audio[1]
148 | 
149 |     if target_fs is not None and fs != target_fs:
150 |         audio = librosa.resample(audio, orig_sr=fs, target_sr=target_fs)
151 |         fs = target_fs
152 | 
153 |     return audio, fs
154 |     
155 | def pad_truncate_sequence(x, max_len):
156 | # Data length Regularization
157 |     if len(x) < max_len:
158 |         return np.concatenate((x, np.zeros(max_len - len(x))))
159 |     else:
160 |         return x[0 : max_len]
161 |     
162 |    
163 |     
164 | def scale(x, mean, std):
165 |     return (x - mean) / std
166 |     
167 |     
168 | def inverse_scale(x, mean, std):
169 |     return x * std + mean
170 |     
171 |         
172 |         
173 | def read_metadata(metadata_path):
174 |     '''Read metadata from a csv file. 
175 |     
176 |     Returns:
177 |       meta_dict: dict of meta data, e.g.:
178 |          {'filename': np.array(['1-100032-A-0.wav', '1-100038-A-14.wav', ...]),
179 |           'fold': np.array([1, 1, ...]),
180 |            'target': np.array([0, 14, ...]),
181 |            'category': np.array(['dog', 'chirping_birds', ...]),
182 |            'esc10': np.array(['True', 'False', ...]),
183 |            'src_file': np.array(['100032', '100038', ...]),
184 |            'take': np.array(['A', 'A', ...])
185 |          }
186 |     '''
187 |     
188 |     df = pd.read_csv(metadata_path, sep=',')
189 |     meta_dict = {}
190 |     meta_dict['filename'] = np.array(
191 |         [name for name in df['filename'].tolist()])
192 |     
193 |     if 'fold' in df.keys():
194 |         meta_dict['fold'] = np.array(df['fold'])       
195 |     if 'target' in df.keys():
196 |         meta_dict['target'] = np.array(df['target'])
197 |     if 'category' in df.keys():
198 |         meta_dict['category'] = np.array(df['category'])
199 |     if 'esc10' in df.keys():
200 |         meta_dict['esc10'] = np.array(df['esc10'])
201 |     if 'src_file' in df.keys():
202 |         meta_dict['src_file'] = np.array(df['src_file'])
203 |     if 'take' in df.keys():
204 |         meta_dict['take'] = np.array(df['take'])
205 |      
206 |     return meta_dict
207 |     
208 |     
209 | def sparse_to_categorical(x, n_out):
210 |     x = x.astype(int)
211 |     shape = x.shape
212 |     x = x.flatten()
213 |     N = len(x)
214 |     x_categ = np.zeros((N,n_out))
215 |     x_categ[np.arange(N), x] = 1
216 |     return x_categ.reshape((shape)+(n_out,))
217 |     
218 |     
219 | 


--------------------------------------------------------------------------------
/util_esc50/data_generator.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import h5py
  3 | import csv
  4 | import time
  5 | import logging
  6 | import os
  7 | import glob
  8 | import matplotlib.pyplot as plt
  9 | import logging
 10 | 
 11 | from utils import read_metadata, sparse_to_categorical
 12 | import config
 13 | 
 14 | 
 15 | class Base(object):
 16 |     
 17 |     def __init__(self):
 18 |         '''Base class for data generator
 19 |         '''
 20 |         pass
 21 | 
 22 |     def load_hdf5(self, hdf5_path):
 23 |         '''Load hdf5 file. 
 24 |         
 25 |         Returns:
 26 |           data_dict: dict of data, e.g.:
 27 |             {'audio_name': np.array(['a.wav', 'b.wav', ...]), 
 28 |              'feature': (audios_num, frames_num, mel_bins)
 29 |              'target': (audios_num,), 
 30 |              ...}
 31 |         '''
 32 |         data_dict = {}
 33 |         
 34 |         with h5py.File(hdf5_path, 'r') as hf:
 35 |             data_dict['filename'] = np.array(
 36 |                 [filename.decode() for filename in hf['filename'][:]])
 37 | 
 38 |             data_dict['feature'] = hf['feature'][:].astype(np.float32)
 39 |             
 40 |             if 'category' in hf.keys():
 41 |                 data_dict['target'] = np.array(
 42 |                     [self.lb_to_idx[category.decode()] \
 43 |                         for category in hf['category'][:]])
 44 | 
 45 |             if 'category' in hf.keys():
 46 |                 data_dict['category'] = np.array(
 47 |                     [category.decode() \
 48 |                         for category in hf['category'][:]])
 49 |             if 'fold' in hf.keys():
 50 |                 data_dict['fold'] = np.array(
 51 |                     [fold \
 52 |                         for fold in hf['fold'][:]])
 53 |             
 54 |         return data_dict
 55 | 
 56 | 
 57 | 
 58 | class DataGenerator(Base):
 59 |     
 60 |     def __init__(self, feature_hdf5_path, train_csv, validate_csv, holdout_fold, batch_size, seed=1234):
 61 |         '''Data generator for training and validation. 
 62 |         
 63 |         Args:
 64 |           feature_hdf5_path: string, path of hdf5 feature file
 65 |           train_csv: string, path of train csv file
 66 |           validate_csv: string, path of validate csv file
 67 |           holdout_fold: set 1 for development and none for training 
 68 |               on all data without validation
 69 |           scalar: object, containing mean and std value
 70 |           batch_size: int
 71 |           seed: int, random seed
 72 |         '''
 73 | 
 74 |         self.batch_size = batch_size
 75 |         self.random_state = np.random.RandomState(seed)
 76 |         
 77 |         # self.classes_num = classes_num
 78 |         self.in_domain_classes_num = len(config.labels)
 79 |         self.all_classes_num = len(config.labels)
 80 |         self.lb_to_idx = config.lb_to_idx
 81 |         self.idx_to_lb = config.idx_to_lb
 82 |         
 83 |         # Load training data
 84 |         load_time = time.time()
 85 |         
 86 |         self.data_dict = self.load_hdf5(feature_hdf5_path)
 87 |         
 88 |         train_meta = read_metadata(train_csv)
 89 |         validate_meta = read_metadata(validate_csv)
 90 | 
 91 |         self.train_audio_indexes = self.get_audio_indexes(
 92 |             train_meta, self.data_dict, holdout_fold, 'train')
 93 |             
 94 |         self.validate_audio_indexes = self.get_audio_indexes(
 95 |             validate_meta, self.data_dict, holdout_fold, 'validate')
 96 |             
 97 |         if holdout_fold == 'none':
 98 |             self.train_audio_indexes = np.concatenate(
 99 |                 (self.train_audio_indexes, self.validate_audio_indexes), axis=0)
100 |                 
101 |             self.validate_audio_indexes = np.array([])
102 |         
103 |         logging.info('Load data time: {:.3f} s'.format(time.time() - load_time))
104 |         logging.info('Training audio num: {}'.format(len(self.train_audio_indexes)))            
105 |         logging.info('Validation audio num: {}'.format(len(self.validate_audio_indexes)))
106 |         
107 |         self.random_state.shuffle(self.train_audio_indexes)
108 |         self.pointer = 0
109 |         
110 |     def get_audio_indexes(self, meta_data, data_dict, holdout_fold, data_type):
111 |         '''Get train or validate indexes. 
112 |         '''
113 |         audio_indexes = []
114 |         
115 |         for name in meta_data['filename']:
116 |             loct = np.argwhere(data_dict['filename'] == name)
117 |             
118 |             if len(loct) > 0:
119 |                 index = loct[0, 0]
120 |                 label = self.idx_to_lb[self.data_dict['target'][index]]
121 |                 if holdout_fold != 'none':
122 |                     if data_type == 'train':
123 |                         audio_indexes.append(index)
124 |                     elif data_type == 'validate':
125 |                         audio_indexes.append(index)
126 |                 else:
127 |                     audio_indexes.append(index)
128 |             
129 |         return np.array(audio_indexes)
130 |         
131 |     def generate_train(self):
132 |         '''Generate mini-batch data for training. 
133 |         
134 |         Returns:
135 |           batch_data_dict: dict containing audio_name, feature and target
136 |         '''
137 | 
138 |         while True:
139 |             # Reset pointer
140 |             if self.pointer >= len(self.train_audio_indexes):
141 |                 self.pointer = 0
142 |                 self.random_state.shuffle(self.train_audio_indexes)
143 | 
144 |             # Get batch audio_indexes
145 |             batch_audio_indexes = self.train_audio_indexes[
146 |                 self.pointer: self.pointer + self.batch_size]
147 |                 
148 |             self.pointer += self.batch_size
149 | 
150 |             batch_data_dict = {}
151 | 
152 |             batch_data_dict['filename'] = \
153 |                 self.data_dict['filename'][batch_audio_indexes]
154 |             
155 |             batch_feature = self.data_dict['feature'][batch_audio_indexes]
156 |             batch_data_dict['feature'] = batch_feature
157 |             
158 |             sparse_target = self.data_dict['target'][batch_audio_indexes]
159 |             batch_data_dict['target'] = sparse_to_categorical(
160 |                 sparse_target, self.in_domain_classes_num)
161 |             
162 |             yield batch_data_dict
163 |             
164 |             
165 |     def generate_validate(self, data_type, max_iteration=None):
166 |         '''Generate mini-batch data for training. 
167 |         
168 |         Args:
169 |           data_type: 'train' | 'validate'
170 |           max_iteration: int, maximum iteration to validate to speed up validation
171 |         
172 |         Returns:
173 |           batch_data_dict: dict containing audio_name, feature and target
174 |         '''
175 |         
176 |         batch_size = self.batch_size
177 |         
178 |         if data_type == 'train':
179 |             audio_indexes = np.array(self.train_audio_indexes)
180 |         elif data_type == 'validate':
181 |             audio_indexes = np.array(self.validate_audio_indexes)
182 |         else:
183 |             raise Exception('Incorrect argument!')
184 |             
185 |             
186 |         iteration = 0
187 |         pointer = 0
188 |         
189 |         while True:
190 |             if iteration == max_iteration:
191 |                 break
192 | 
193 |             # Reset pointer
194 |             if pointer >= len(audio_indexes):
195 |                 break
196 | 
197 |             # Get batch audio_indexes
198 |             batch_audio_indexes = audio_indexes[pointer: pointer + batch_size]                
199 |             pointer += batch_size
200 |             iteration += 1
201 | 
202 |             batch_data_dict = {}
203 | 
204 |             batch_data_dict['filename'] = \
205 |                 self.data_dict['filename'][batch_audio_indexes]
206 |             
207 |             batch_feature = self.data_dict['feature'][batch_audio_indexes]
208 |             batch_data_dict['feature'] = batch_feature
209 |             
210 |             sparse_target = self.data_dict['target'][batch_audio_indexes]
211 |             batch_data_dict['target'] = sparse_to_categorical(
212 |                 sparse_target, self.all_classes_num)
213 | 
214 |             yield batch_data_dict
215 | 
216 |             


--------------------------------------------------------------------------------
/util_esc50/SpecAugment/sparse_image_warp_pytorch.py:
--------------------------------------------------------------------------------
  1 | # Reference : https://github.com/DemisEom/SpecAugment
  2 | # Copyright 2019 RnD at Spoon Radio
  3 | #
  4 | # Licensed under the Apache License, Version 2.0 (the "License");
  5 | # you may not use this file except in compliance with the License.
  6 | # You may obtain a copy of the License at
  7 | #
  8 | #     http://www.apache.org/licenses/LICENSE-2.0
  9 | #
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | # ==============================================================================
 16 | 
 17 | import scipy as sp
 18 | import numpy as np
 19 | 
 20 | def _get_boundary_locations(image_height, image_width, num_points_per_edge):
 21 |   """Compute evenly-spaced indices along edge of image."""
 22 |   y_range = np.linspace(0, image_height - 1, num_points_per_edge + 2)
 23 |   x_range = np.linspace(0, image_width - 1, num_points_per_edge + 2)
 24 |   ys, xs = np.meshgrid(y_range, x_range, indexing='ij')
 25 |   is_boundary = np.logical_or(
 26 |       np.logical_or(xs == 0, xs == image_width - 1),
 27 |       np.logical_or(ys == 0, ys == image_height - 1))
 28 |   return np.stack([ys[is_boundary], xs[is_boundary]], axis=-1)
 29 | 
 30 | 
 31 | def _add_zero_flow_controls_at_boundary(control_point_locations,
 32 |                                         control_point_flows, image_height,
 33 |                                         image_width, boundary_points_per_edge):
 34 |   """Add control points for zero-flow boundary conditions.
 35 |    Augment the set of control points with extra points on the
 36 |    boundary of the image that have zero flow.
 37 |   Args:
 38 |     control_point_locations: input control points
 39 |     control_point_flows: their flows
 40 |     image_height: image height
 41 |     image_width: image width
 42 |     boundary_points_per_edge: number of points to add in the middle of each
 43 |                            edge (not including the corners).
 44 |                            The total number of points added is
 45 |                            4 + 4*(boundary_points_per_edge).
 46 |   Returns:
 47 |     merged_control_point_locations: augmented set of control point locations
 48 |     merged_control_point_flows: augmented set of control point flows
 49 |   """
 50 | 
 51 |   batch_size = tensor_shape.dimension_value(control_point_locations.shape[0])
 52 | 
 53 |   boundary_point_locations = _get_boundary_locations(image_height, image_width,
 54 |                                                      boundary_points_per_edge)
 55 | 
 56 |   boundary_point_flows = np.zeros([boundary_point_locations.shape[0], 2])
 57 | 
 58 |   type_to_use = control_point_locations.dtype
 59 |   boundary_point_locations = constant_op.constant(
 60 |       _expand_to_minibatch(boundary_point_locations, batch_size),
 61 |       dtype=type_to_use)
 62 | 
 63 |   boundary_point_flows = constant_op.constant(
 64 |       _expand_to_minibatch(boundary_point_flows, batch_size), dtype=type_to_use)
 65 | 
 66 |   merged_control_point_locations = array_ops.concat(
 67 |       [control_point_locations, boundary_point_locations], 1)
 68 | 
 69 |   merged_control_point_flows = array_ops.concat(
 70 |       [control_point_flows, boundary_point_flows], 1)
 71 | 
 72 |   return merged_control_point_locations, merged_control_point_flows
 73 | 
 74 | 
 75 | 
 76 | def _get_grid_locations(image_height, image_width):
 77 |   """Wrapper for np.meshgrid."""
 78 | 
 79 |   y_range = np.linspace(0, image_height - 1, image_height)
 80 |   x_range = np.linspace(0, image_width - 1, image_width)
 81 |   y_grid, x_grid = np.meshgrid(y_range, x_range, indexing='ij')
 82 |   return np.stack((y_grid, x_grid), -1)
 83 | 
 84 | def _expand_to_minibatch(np_array, batch_size):
 85 |   """Tile arbitrarily-sized np_array to include new batch dimension."""
 86 |   tiles = [batch_size] + [1] * np_array.ndim
 87 |   return np.tile(np.expand_dims(np_array, 0), tiles)
 88 | 
 89 | 
 90 | def sparse_image_warp(image,
 91 |                       source_control_point_locations,
 92 |                       dest_control_point_locations,
 93 |                       interpolation_order=2,
 94 |                       regularization_weight=0.0,
 95 |                       num_boundary_points=0):
 96 |   """Image warping using correspondences between sparse control points.
 97 |   Apply a non-linear warp to the image, where the warp is specified by
 98 |   the source and destination locations of a (potentially small) number of
 99 |   control points. First, we use a polyharmonic spline
100 |   (`tf.contrib.image.interpolate_spline`) to interpolate the displacements
101 |   between the corresponding control points to a dense flow field.
102 |   Then, we warp the image using this dense flow field
103 |   (`tf.contrib.image.dense_image_warp`).
104 |   Let t index our control points. For regularization_weight=0, we have:
105 |   warped_image[b, dest_control_point_locations[b, t, 0],
106 |                   dest_control_point_locations[b, t, 1], :] =
107 |   image[b, source_control_point_locations[b, t, 0],
108 |            source_control_point_locations[b, t, 1], :].
109 |   For regularization_weight > 0, this condition is met approximately, since
110 |   regularized interpolation trades off smoothness of the interpolant vs.
111 |   reconstruction of the interpolant at the control points.
112 |   See `tf.contrib.image.interpolate_spline` for further documentation of the
113 |   interpolation_order and regularization_weight arguments.
114 |   Args:
115 |     image: `[batch, height, width, channels]` float `Tensor`
116 |     source_control_point_locations: `[batch, num_control_points, 2]` float
117 |       `Tensor`
118 |     dest_control_point_locations: `[batch, num_control_points, 2]` float
119 |       `Tensor`
120 |     interpolation_order: polynomial order used by the spline interpolation
121 |     regularization_weight: weight on smoothness regularizer in interpolation
122 |     num_boundary_points: How many zero-flow boundary points to include at
123 |       each image edge.Usage:
124 |         num_boundary_points=0: don't add zero-flow points
125 |         num_boundary_points=1: 4 corners of the image
126 |         num_boundary_points=2: 4 corners and one in the middle of each edge
127 |           (8 points total)
128 |         num_boundary_points=n: 4 corners and n-1 along each edge
129 |     name: A name for the operation (optional).
130 |     Note that image and offsets can be of type tf.half, tf.float32, or
131 |     tf.float64, and do not necessarily have to be the same type.
132 |   Returns:
133 |     warped_image: `[batch, height, width, channels]` float `Tensor` with same
134 |       type as input image.
135 |     flow_field: `[batch, height, width, 2]` float `Tensor` containing the dense
136 |       flow field produced by the interpolation.
137 |   """
138 | 
139 |   image = ops.convert_to_tensor(image)
140 |   source_control_point_locations = ops.convert_to_tensor(
141 |       source_control_point_locations)
142 |   dest_control_point_locations = ops.convert_to_tensor(
143 |       dest_control_point_locations)
144 | 
145 |   control_point_flows = (
146 |       dest_control_point_locations - source_control_point_locations)
147 | 
148 |   clamp_boundaries = num_boundary_points > 0
149 |   boundary_points_per_edge = num_boundary_points - 1
150 | 
151 |   with ops.name_scope(name):
152 | 
153 |     batch_size, image_height, image_width, _ = image.get_shape().as_list()
154 | 
155 |     # This generates the dense locations where the interpolant
156 |     # will be evaluated.
157 |     grid_locations = _get_grid_locations(image_height, image_width)
158 | 
159 |     flattened_grid_locations = np.reshape(grid_locations,
160 |                                           [image_height * image_width, 2])
161 | 
162 |     flattened_grid_locations = constant_op.constant(
163 |         _expand_to_minibatch(flattened_grid_locations, batch_size), image.dtype)
164 | 
165 |     if clamp_boundaries:
166 |       (dest_control_point_locations,
167 |        control_point_flows) = _add_zero_flow_controls_at_boundary(
168 |            dest_control_point_locations, control_point_flows, image_height,
169 |            image_width, boundary_points_per_edge)
170 | 
171 |     flattened_flows = interpolate_spline.interpolate_spline(
172 |         dest_control_point_locations, control_point_flows,
173 |         flattened_grid_locations, interpolation_order, regularization_weight)
174 | 
175 |     dense_flows = array_ops.reshape(flattened_flows,
176 |                                     [batch_size, image_height, image_width, 2])
177 | 
178 |     warped_image = dense_image_warp.dense_image_warp(image, dense_flows)
179 | 
180 |     return warped_image, dense_flows
181 | 
182 | 


--------------------------------------------------------------------------------
/util_esc50/test.py:
--------------------------------------------------------------------------------
  1 | # visualize the feature maps
  2 | 
  3 | import torch
  4 | import numpy as np
  5 | from net import Cnns,Cnns2
  6 | from feature import *
  7 | from matplotlib import pyplot as plt
  8 | from net_vis import Cnns_deconv    
  9 | def data_pre(audio, audio_length, fs, audio_skip):
 10 |     stride = int(audio_skip * fs /2)
 11 |     loop =  int((audio_length * fs) // stride - 1)
 12 |     area = 0
 13 |     maxamp = 0.
 14 |     i = 0
 15 |     out = audio
 16 |     while i < loop:
 17 |         win_data = out[i*stride: (i+2)*stride]
 18 |         maxamp = np.max(np.abs(win_data))
 19 |         if maxamp < 0.005:
 20 |             loop = loop - 2
 21 |             out[i*stride: (loop+1)*stride] = out[(i+2)*stride: (loop+3)*stride]
 22 |         else:
 23 |             i = i + 1
 24 |     length = (audio_length * fs) // stride - loop - 1
 25 |     if length == 0:
 26 |         return out
 27 |     else:
 28 |         out[(loop + 1) * stride:(audio_length * fs // stride) * stride] = out[0:length * stride]
 29 |         if length < (audio_length * fs//stride)/2:
 30 |             out[(loop+1)*stride:(audio_length * fs//stride)*stride] = out[0:length*stride]
 31 |             return out
 32 |         else:
 33 |             out[(loop + 1) * stride:(loop + 1)*2  * stride] = out[0:(loop + 1) * stride]
 34 |             return data_pre(out, audio_length, fs, audio_skip)
 35 | def add_noise(audio, percent=0.2):
 36 |     random_values = np.random.randn(len(audio))
 37 |     print(np.mean(random_values))
 38 |     print(np.abs(np.mean(audio)))
 39 |     out = audio + percent * random_values
 40 |     return out
 41 | def fre_noise(feature, percent=0.2):
 42 |     out = feature
 43 |     for i in range(13):
 44 |         random_values = np.random.randn(feature.shape[1])
 45 |         out[50+i, :] = out[50+i, :] + percent*random_values
 46 |     return out
 47 | def time_noise(feature, percent=0.2):
 48 |     out = feature
 49 |     for i in range(5):
 50 |         random_values = np.random.randn(feature.shape[0])
 51 |         out[:, 25+i] = out[:, 25+i] + percent*random_values
 52 |     return out
 53 | 
 54 | if __name__ == '__main__':
 55 |     Model = eval('Cnns')
 56 |     model = Model(50, activation='logsoftmax')
 57 |     checkpoint_path = '3400_iterations.pth'
 58 |     checkpoint = torch.load(checkpoint_path)
 59 |     model.load_state_dict(checkpoint['model'])
 60 |     params=model.state_dict() 
 61 | 
 62 |     audio_path = '1-85362-A-0.wav'
 63 | 
 64 |     (audio, _) = read_audio(audio_path=audio_path, target_fs=44100)
 65 |     audio = pad_truncate_sequence(audio, 44100*5)
 66 | #     audio = add_noise(audio, percent=0.2)
 67 | 
 68 |     feature_extractor = LogMelExtractor(
 69 |         sample_rate=44100, 
 70 |         window_size=1764, 
 71 |         hop_size=882, 
 72 |         mel_bins=40, 
 73 |         fmin=50, 
 74 |         fmax=11025)
 75 |     feature = feature_extractor.transform(audio)
 76 |     feature = feature[0 : 250]
 77 |     feature = time_noise(feature, percent=3)
 78 |     x = np.transpose(feature, (1, 0))
 79 |     plt.imshow(x, cmap = plt.cm.jet)
 80 |     plt.axis('off')
 81 |     fig = plt.gcf()
 82 |     height, width = x.shape
 83 |     fig.set_size_inches(width/5./8.,height/5./8.)
 84 |     plt.gca().xaxis.set_major_locator(plt.NullLocator())
 85 |     plt.gca().yaxis.set_major_locator(plt.NullLocator())
 86 |     plt.subplots_adjust(top = 1, bottom = 0, right = 1, left = 0, hspace = 0, wspace = 0)
 87 |     plt.margins(0,0)
 88 |     plt.savefig("ori_conv_noise2.png", dpi=500, pad_inches = 0)
 89 |     plt.show()
 90 |     feature = torch.from_numpy(feature[None, :, :])
 91 | #     x1 = model.show(feature)
 92 |     x1, x2, x3, x4, out1, out2, out3, out4, out5, out6, out7= model.show(feature)
 93 |     
 94 |     feature = torch.squeeze(out4)
 95 |     x1 = np.transpose(feature.detach().numpy(), (1, 0))
 96 |     plt.imshow(x1, cmap = plt.cm.jet)
 97 |     plt.axis('off')
 98 |     fig = plt.gcf()
 99 |     height, width = x1.shape
100 |     fig.set_size_inches(width/5./8.,height/5./8.)
101 |     plt.gca().xaxis.set_major_locator(plt.NullLocator())
102 |     plt.gca().yaxis.set_major_locator(plt.NullLocator())
103 |     plt.subplots_adjust(top = 1, bottom = 0, right = 1, left = 0, hspace = 0, wspace = 0)
104 |     plt.margins(0,0)
105 |     plt.savefig("conv_noise2.png", dpi=500, pad_inches = 0)
106 |     plt.show()
107 |     
108 |     (audio, _) = read_audio(audio_path=audio_path, target_fs=44100)
109 |     audio = pad_truncate_sequence(audio, 44100*5)
110 | #     audio = add_noise(audio, percent=0.2)
111 | #     audio = data_pre(audio=audio, audio_length=5, fs=44100, audio_skip=0.1)
112 |     feature_extractor = LogMelExtractor(
113 |         sample_rate=44100, 
114 |         window_size=1764, 
115 |         hop_size=882, 
116 |         mel_bins=40, 
117 |         fmin=50, 
118 |         fmax=11025)
119 |     feature = feature_extractor.transform(audio)
120 |     feature = feature[0 : 250]
121 |     x = np.transpose(feature, (1, 0))
122 |     plt.imshow(x, cmap = plt.cm.jet)
123 |     plt.axis('off')
124 |     fig = plt.gcf()
125 |     height, width = x.shape
126 |     fig.set_size_inches(width/5./8.,height/5./8.)
127 |     plt.gca().xaxis.set_major_locator(plt.NullLocator())
128 |     plt.gca().yaxis.set_major_locator(plt.NullLocator())
129 |     plt.subplots_adjust(top = 1, bottom = 0, right = 1, left = 0, hspace = 0, wspace = 0)
130 |     plt.margins(0,0)
131 |     plt.savefig("ori_conv.png", dpi=500, pad_inches = 0)
132 |     plt.show()
133 |     feature = torch.from_numpy(feature[None, :, :])
134 | #     x1 = model.show(feature)
135 |     x1, x2, x3, x4, out1, out2, out3, out4, out5, out6, out7= model.show(feature)
136 |     
137 |     feature = torch.squeeze(out4)
138 |     x1 = np.transpose(feature.detach().numpy(), (1, 0))
139 |     plt.imshow(x1, cmap = plt.cm.jet)
140 |     plt.axis('off')
141 |     fig = plt.gcf()
142 |     height, width = x1.shape
143 |     fig.set_size_inches(width/5./8.,height/5./8.)
144 |     plt.gca().xaxis.set_major_locator(plt.NullLocator())
145 |     plt.gca().yaxis.set_major_locator(plt.NullLocator())
146 |     plt.subplots_adjust(top = 1, bottom = 0, right = 1, left = 0, hspace = 0, wspace = 0)
147 |     plt.margins(0,0)
148 |     plt.savefig("conv.png", dpi=500, pad_inches = 0)
149 |     plt.show()
150 |     
151 |     (audio, _) = read_audio(audio_path=audio_path, target_fs=44100)
152 |     audio = pad_truncate_sequence(audio, 44100*5)
153 | #     audio = add_noise(audio, percent=0.2)
154 | #     audio = data_pre(audio=audio, audio_length=5, fs=44100, audio_skip=0.1)
155 |     feature_extractor = LogMelExtractor(
156 |         sample_rate=44100, 
157 |         window_size=1764, 
158 |         hop_size=882, 
159 |         mel_bins=40, 
160 |         fmin=50, 
161 |         fmax=11025)
162 |     feature = feature_extractor.transform(audio)
163 |     feature = feature[0 : 250]
164 |     feature = fre_noise(feature, percent=3)
165 |     x = np.transpose(feature, (1, 0))
166 |     plt.imshow(x, cmap = plt.cm.jet)
167 |     plt.axis('off')
168 |     fig = plt.gcf()
169 |     height, width = x.shape
170 |     fig.set_size_inches(width/5./8.,height/5./8.)
171 |     plt.gca().xaxis.set_major_locator(plt.NullLocator())
172 |     plt.gca().yaxis.set_major_locator(plt.NullLocator())
173 |     plt.subplots_adjust(top = 1, bottom = 0, right = 1, left = 0, hspace = 0, wspace = 0)
174 |     plt.margins(0,0)
175 |     plt.savefig("ori_conv_noise1.png", dpi=500, pad_inches = 0)
176 |     plt.show()
177 |     feature = torch.from_numpy(feature[None, :, :])
178 | #     x1 = model.show(feature)
179 |     x1, x2, x3, x4, out1, out2, out3, out4, out5, out6, out7= model.show(feature)
180 |     
181 |     feature = torch.squeeze(out4)
182 |     x1 = np.transpose(feature.detach().numpy(), (1, 0))
183 |     plt.imshow(x1, cmap = plt.cm.jet)
184 |     plt.axis('off')
185 |     fig = plt.gcf()
186 |     height, width = x1.shape
187 |     fig.set_size_inches(width/5./8.,height/5./8.)
188 |     plt.gca().xaxis.set_major_locator(plt.NullLocator())
189 |     plt.gca().yaxis.set_major_locator(plt.NullLocator())
190 |     plt.subplots_adjust(top = 1, bottom = 0, right = 1, left = 0, hspace = 0, wspace = 0)
191 |     plt.margins(0,0)
192 |     plt.savefig("conv_noise1.png", dpi=500, pad_inches = 0)
193 |     plt.show()
194 |     
195 |     (audio, _) = read_audio(audio_path=audio_path, target_fs=44100)
196 |     audio = pad_truncate_sequence(audio, 44100*5)
197 |     audio = add_noise(audio, percent=0.08)
198 | #     audio = data_pre(audio=audio, audio_length=5, fs=44100, audio_skip=0.1)
199 |     feature_extractor = LogMelExtractor(
200 |         sample_rate=44100, 
201 |         window_size=1764, 
202 |         hop_size=882, 
203 |         mel_bins=40, 
204 |         fmin=50, 
205 |         fmax=11025)
206 |     feature = feature_extractor.transform(audio)
207 |     feature = feature[0 : 250]
208 |     x = np.transpose(feature, (1, 0))
209 |     plt.imshow(x, cmap = plt.cm.jet)
210 |     plt.axis('off')
211 |     fig = plt.gcf()
212 |     height, width = x.shape
213 |     fig.set_size_inches(width/5./8.,height/5./8.)
214 |     plt.gca().xaxis.set_major_locator(plt.NullLocator())
215 |     plt.gca().yaxis.set_major_locator(plt.NullLocator())
216 |     plt.subplots_adjust(top = 1, bottom = 0, right = 1, left = 0, hspace = 0, wspace = 0)
217 |     plt.margins(0,0)
218 | #     plt.savefig("ori_conv_noise3.png", dpi=500, pad_inches = 0)
219 |     plt.show()
220 |     feature = torch.from_numpy(feature[None, :, :])
221 | #     x1 = model.show(feature)
222 |     x1, x2, x3, x4, out1, out2, out3, out4, out5, out6, out7= model.show(feature)
223 |     
224 |     feature = torch.squeeze(out4)
225 |     x1 = np.transpose(feature.detach().numpy(), (1, 0))
226 |     plt.imshow(x1, cmap = plt.cm.jet)
227 |     plt.axis('off')
228 |     fig = plt.gcf()
229 |     height, width = x1.shape
230 |     fig.set_size_inches(width/5./8.,height/5./8.)
231 |     plt.gca().xaxis.set_major_locator(plt.NullLocator())
232 |     plt.gca().yaxis.set_major_locator(plt.NullLocator())
233 |     plt.subplots_adjust(top = 1, bottom = 0, right = 1, left = 0, hspace = 0, wspace = 0)
234 |     plt.margins(0,0)
235 |     plt.savefig("conv_noise3.png", dpi=500, pad_inches = 0)
236 |     plt.show()
237 | 
238 |     
239 | 


--------------------------------------------------------------------------------
/util_esc50/feature.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | sys.path.insert(1, os.path.join(sys.path[0], 'util'))
  4 | import numpy as np
  5 | import argparse
  6 | import h5py
  7 | import librosa
  8 | from scipy import signal
  9 | import matplotlib.pyplot as plt
 10 | import time
 11 | import math
 12 | import pandas as pd
 13 | import random
 14 | import torch
 15 | 
 16 | from utils import (create_folder, read_audio, pad_truncate_sequence, read_metadata)
 17 | import config
 18 | 
 19 | class LogMelExtractor(object):
 20 |     def __init__(self, sample_rate, window_size, hop_size, mel_bins, fmin, fmax):
 21 |         '''Log mel feature extractor. 
 22 |         
 23 |         Args:
 24 |           sample_rate: int
 25 |           window_size: int
 26 |           hop_size: int
 27 |           mel_bins: int
 28 |           fmin: int, minimum frequency of mel filter banks
 29 |           fmax: int, maximum frequency of mel filter banks
 30 |         '''
 31 |         
 32 |         self.window_size = window_size
 33 |         self.hop_size = hop_size
 34 |         self.window_func = np.hanning(window_size)
 35 |         
 36 |         self.melW = librosa.filters.mel(
 37 |             sr=sample_rate, 
 38 |             n_fft=window_size, 
 39 |             n_mels=mel_bins, 
 40 |             fmin=fmin, 
 41 |             fmax=fmax).T
 42 |         '''(n_fft // 2 + 1, mel_bins)'''
 43 | 
 44 |     def transform(self, audio):
 45 |         '''Extract feature of a singlechannel audio file. 
 46 |         
 47 |         Args:
 48 |           audio: (samples,)
 49 |           
 50 |         Returns:
 51 |           feature: (frames_num, freq_bins)
 52 |         '''
 53 |     
 54 |         window_size = self.window_size
 55 |         hop_size = self.hop_size
 56 |         window_func = self.window_func
 57 |         
 58 |         # Compute short-time Fourier transform
 59 |         stft_matrix = librosa.core.stft(
 60 |             y=audio, 
 61 |             n_fft=window_size, 
 62 |             hop_length=hop_size, 
 63 |             window=window_func, 
 64 |             center=True, 
 65 |             dtype=np.complex64, 
 66 |             pad_mode='reflect').T
 67 |         '''(N, n_fft // 2 + 1)'''
 68 |     
 69 |         # Mel spectrogram
 70 |         mel_spectrogram = np.dot(np.abs(stft_matrix) ** 2, self.melW)
 71 |         
 72 |         # Log mel spectrogram
 73 |         logmel_spectrogram = librosa.core.power_to_db(
 74 |             mel_spectrogram, ref=1.0, amin=1e-10, 
 75 |             top_db=None)
 76 |         
 77 |         logmel_spectrogram = logmel_spectrogram.astype(np.float32)
 78 |         
 79 |         return logmel_spectrogram
 80 | 
 81 | def deltas(X_in):
 82 |     X_out = (X_in[:,2:]-X_in[:,:-2])/10.0
 83 |     X_out = X_out[:,1:-1]+(X_in[:,4:]-X_in[:,:-4])/5.0
 84 |     out = np.zeros((X_in.shape[0], X_in.shape[1]))
 85 |     out[:,2:-2] = X_out
 86 |     return out
 87 | 
 88 | def MaxMinNormalization(x):
 89 |     x = (x - np.min(x)) / (np.max(x) - np.min(x))
 90 |     return x
 91 |     
 92 | def calculate_feature_for_all_audio_files(args):
 93 |     '''Calculate feature of audio files and write out features to a hdf5 file. 
 94 |     
 95 |     Args:
 96 |       dataset_dir: string
 97 |       workspace: string
 98 |       mini_data: bool, set True for debugging on a small part of data
 99 |     '''
100 | 
101 |     # Arguments & parameters
102 |     dataset_dir = args.dataset_dir
103 |     workspace = args.workspace
104 |     mini_data = args.mini_data
105 |     
106 |     sample_rate = config.sample_rate
107 |     window_size = config.window_size
108 |     hop_size = config.hop_size
109 |     mel_bins = config.mel_bins
110 |     fmin = config.fmin
111 |     fmax = config.fmax
112 |     frames_per_second = config.frames_per_second
113 |     frames_num = config.frames_num
114 |     total_samples = config.total_samples
115 |     lb_to_idx = config.lb_to_idx
116 |     audio_duration_clip = config.audio_duration_clip
117 |     audio_stride_clip = config.audio_stride_clip
118 |     audio_duration = config.audio_duration
119 |     audio_num = config.audio_num
120 |     total_frames = config.total_frames
121 |     # Paths
122 |     if mini_data:
123 |         prefix = 'minidata_'
124 |     else:
125 |         prefix = ''
126 |         
127 |     audios_dir = os.path.join(dataset_dir, 'audio')
128 |     metadata_path = os.path.join(dataset_dir, 'meta', 'esc50.csv')
129 |     feature_path = os.path.join(workspace, 'features', 
130 |         '{}logmel_{}frames_{}melbins.h5'.format(prefix, frames_per_second, mel_bins))
131 |     create_folder(os.path.dirname(feature_path))    
132 |     # Feature extractor
133 |     feature_extractor = LogMelExtractor(
134 |         sample_rate=sample_rate, 
135 |         window_size=window_size, 
136 |         hop_size=hop_size, 
137 |         mel_bins=mel_bins, 
138 |         fmin=fmin, 
139 |         fmax=fmax)
140 | 
141 |     # Read metadata
142 |     meta_dict = read_metadata(metadata_path)
143 | 
144 |     # Extract features and targets 
145 |     if mini_data:
146 |         mini_num = 10
147 |         total_num = len(meta_dict['filename'])
148 |         random_state = np.random.RandomState(1234)
149 |         indexes = random_state.choice(total_num, size=mini_num, replace=False)
150 |         for key in meta_dict.keys():
151 |             meta_dict[key] = meta_dict[key][indexes]
152 |         
153 |     print('Extracting features of all audio files ...')
154 |     extract_time = time.time()
155 |     # Hdf5 file for storing features and targets
156 |     hf = h5py.File(feature_path, 'w')
157 | 
158 |     hf.create_dataset(
159 |         name='filename', 
160 |         data=[filename.encode() for filename in meta_dict['filename']], 
161 |         dtype='S80')
162 | 
163 |     if 'fold' in meta_dict.keys():
164 |         hf.create_dataset(
165 |             name='fold', 
166 |             data=[fold for fold in meta_dict['fold']], 
167 |             dtype=np.int64)
168 |             
169 |     if 'target' in meta_dict.keys():
170 |         hf.create_dataset(
171 |             name='target', 
172 |             data=[target for target in meta_dict['target']], 
173 |             dtype=np.int64)
174 |             
175 |     if 'category' in meta_dict.keys():
176 |         hf.create_dataset(
177 |             name='category', 
178 |             data=[category.encode() for category in meta_dict['category']], 
179 |             dtype='S80')
180 |     if 'esc10' in meta_dict.keys():
181 |         hf.create_dataset(
182 |             name='esc10', 
183 |             data=[esc10 for esc10 in meta_dict['esc10']], 
184 |             dtype=np.bool)
185 |     if 'src_file' in meta_dict.keys():
186 |         hf.create_dataset(
187 |             name='src_file', 
188 |             data=[src_file for src_file in meta_dict['src_file']], 
189 |             dtype=np.int64)
190 |     if 'take' in meta_dict.keys():
191 |         hf.create_dataset(
192 |             name='take', 
193 |             data=[take.encode() for take in meta_dict['take']], 
194 |             dtype='S24')
195 |     
196 | 
197 |     hf.create_dataset(
198 |         name='feature', 
199 |         shape=(0, audio_num, 3, frames_num, mel_bins), 
200 |         maxshape=(None, audio_num, 3, frames_num, mel_bins), 
201 |         dtype=np.float32)
202 | 
203 |     for (n, filename) in enumerate(meta_dict['filename']):
204 |         audio_path = os.path.join(audios_dir, filename)
205 |         print(n, audio_path)
206 |         
207 |         # Read audio
208 |         (audio, _) = read_audio(
209 |             audio_path=audio_path, 
210 |             target_fs=sample_rate)
211 |         
212 |         # Pad or truncate audio recording to the same length
213 |         audio = pad_truncate_sequence(audio, total_samples)
214 |         # Extract feature
215 |         fea_list = []
216 | #         for i in range(audio_num):
217 | #             audio_clip = audio[i*sample_rate*audio_stride_clip: (i+2)*sample_rate*audio_stride_clip]
218 | #             feature = feature_extractor.transform(audio_clip)
219 | #             feature = feature[0 : frames_per_second*audio_duration_clip]
220 | #             fea_list.append(feature)
221 |         feature = feature_extractor.transform(audio)
222 | #         # Remove the extra log mel spectrogram frames caused by padding zero
223 |         feature = feature[0 : total_frames]
224 |         feature = MaxMinNormalization(feature)
225 |         feature_delta = deltas(feature)
226 |         feature_delta = MaxMinNormalization(feature_delta)
227 |         feature_delta2 = deltas(feature_delta)
228 |         feature_delta2 = MaxMinNormalization(feature_delta2)
229 |         for i in range(audio_num):
230 |             feature_clip = feature[i*frames_per_second*audio_stride_clip: (i+audio_duration_clip)*frames_per_second*audio_stride_clip]
231 |             feature_delta_clip = feature_delta[i*frames_per_second*audio_stride_clip: (i+audio_duration_clip)*frames_per_second*audio_stride_clip]
232 |             feature_delta2_clip = feature_delta2[i*frames_per_second*audio_stride_clip: (i+audio_duration_clip)*frames_per_second*audio_stride_clip]
233 |             feature_clip = feature_clip[None, :, :]
234 |             feature_delta_clip = feature_delta_clip[None, :, :]
235 |             feature_delta2_clip = feature_delta2_clip[None, :, :]
236 |             f = np.concatenate((feature_clip, feature_delta_clip, feature_delta2_clip), 0)
237 |             fea_list.append(f)
238 |         
239 |         hf['feature'].resize((n + 1, audio_num, 3, frames_num, mel_bins))
240 |         hf['feature'][n] = fea_list
241 |             
242 |     hf.close()
243 |         
244 |     print('Write hdf5 file to {} using {:.3f} s'.format(
245 |         feature_path, time.time() - extract_time))
246 |     
247 |     
248 | 
249 | if __name__ == '__main__':
250 |     
251 |     parser = argparse.ArgumentParser(description='')
252 |     subparsers = parser.add_subparsers(dest='mode')
253 | 
254 |     # Calculate feature for all audio files
255 |     parser_logmel = subparsers.add_parser('calculate_feature_for_all_audio_files')    
256 |     parser_logmel.add_argument('--dataset_dir', type=str, required=True, help='Directory of dataset.')    
257 |     parser_logmel.add_argument('--workspace', type=str, required=True, help='Directory of your workspace.')        
258 |     parser_logmel.add_argument('--mini_data', action='store_true', default=False, help='Set True for debugging on a small part of data.')
259 |         
260 |     
261 |     # Parse arguments
262 |     args = parser.parse_args()
263 |     calculate_feature_for_all_audio_files(args)
264 | 
265 | 


--------------------------------------------------------------------------------
/util_esc50/main.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | import numpy as np
  4 | import argparse
  5 | import h5py
  6 | import math
  7 | import time
  8 | import logging
  9 | import matplotlib.pyplot as plt
 10 | import torch
 11 | import torch.nn as nn
 12 | import torch.nn.functional as F
 13 | import torch.optim as optim
 14 | from SpecAugment import spec_augment_pytorch
 15 | from utils import (create_folder, get_filename, create_logging, mixup_data, mixup_criterion)
 16 | from data_generator import DataGenerator
 17 | from net import TFNet
 18 | from losses import nll_loss
 19 | from evaluate import Evaluator, StatisticsContainer
 20 | from pytorch_utils import move_data_to_gpu, forward
 21 | import config
 22 | 
 23 | 
 24 | def train(args, i):
 25 |     '''Training. Model will be saved after several iterations. 
 26 |     
 27 |     Args: 
 28 |       dataset_dir: string, directory of dataset
 29 |       workspace: string, directory of workspace
 30 |       holdout_fold: '1' | 'none', set 1 for development and none for training 
 31 |           on all data without validation
 32 |       model_type: string, e.g. 'Cnn_9layers_AvgPooling'
 33 |       batch_size: int
 34 |       cuda: bool
 35 |       mini_data: bool, set True for debugging on a small part of data
 36 |     '''
 37 |     
 38 |     # Arugments & parameters
 39 |     dataset_dir = args.dataset_dir
 40 |     workspace = args.workspace
 41 |     holdout_fold = args.holdout_fold
 42 |     model_type = args.model_type
 43 |     batch_size = args.batch_size
 44 |     cuda = args.cuda and torch.cuda.is_available()
 45 |     mini_data = args.mini_data
 46 |     filename = args.filename
 47 |     audio_num = config.audio_num
 48 |     mel_bins = config.mel_bins
 49 |     frames_per_second = config.frames_per_second
 50 |     max_iteration = None      # Number of mini-batches to evaluate on training data
 51 |     reduce_lr = True
 52 |     in_domain_classes_num = len(config.labels)
 53 |     
 54 |     # Paths
 55 |     if mini_data:
 56 |         prefix = 'minidata_'
 57 |     else:
 58 |         prefix = ''
 59 |     
 60 |     train_csv = os.path.join(sys.path[0], 'fold'+str(i)+'_train.csv')
 61 |         
 62 |     validate_csv = os.path.join(sys.path[0], 'fold'+str(i)+'_test.csv')
 63 |                 
 64 |     feature_hdf5_path = os.path.join(workspace, 'features', 
 65 |         '{}logmel_{}frames_{}melbins.h5'.format(prefix, frames_per_second, mel_bins))
 66 |         
 67 |     checkpoints_dir = os.path.join(workspace, 'checkpoints', filename, 
 68 |         '{}logmel_{}frames_{}melbins.h5'.format(prefix, frames_per_second, mel_bins), 
 69 |         'holdout_fold={}'.format(holdout_fold), model_type)
 70 |     create_folder(checkpoints_dir)
 71 | 
 72 |     validate_statistics_path = os.path.join(workspace, 'statistics', filename, 
 73 |         '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins),
 74 |         'holdout_fold={}'.format(holdout_fold), 
 75 |         model_type, 'validate_statistics.pickle')
 76 |     
 77 |     create_folder(os.path.dirname(validate_statistics_path))
 78 |     
 79 |     logs_dir = os.path.join(workspace, 'logs', filename, args.mode, 
 80 |         '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 
 81 |         'holdout_fold={}'.format(holdout_fold), model_type)
 82 |     create_logging(logs_dir, 'w')
 83 |     logging.info(args)
 84 | 
 85 |     if cuda:
 86 |         logging.info('Using GPU.')
 87 |     else:
 88 |         logging.info('Using CPU. Set --cuda flag to use GPU.')
 89 |     
 90 |     # Model
 91 |     Model = eval(model_type)
 92 |    
 93 |     model = Model(in_domain_classes_num, activation='logsoftmax')
 94 |     loss_func = nll_loss
 95 | 
 96 |     if cuda:
 97 |         model.cuda()
 98 | 
 99 |     # Optimizer
100 |     optimizer = optim.Adam(model.parameters(), lr=1e-4, betas=(0.9, 0.999),
101 |         eps=1e-08, weight_decay=0., amsgrad=True)
102 |     # Data generator
103 |     data_generator = DataGenerator(
104 |         feature_hdf5_path=feature_hdf5_path, 
105 |         train_csv=train_csv, 
106 |         validate_csv=validate_csv, 
107 |         holdout_fold=holdout_fold, 
108 |         batch_size=batch_size)
109 |     
110 |     # Evaluator
111 |     evaluator = Evaluator(
112 |         model=model, 
113 |         data_generator=data_generator, 
114 |         cuda=cuda)
115 |     
116 |     # Statistics
117 |     validate_statistics_container = StatisticsContainer(validate_statistics_path)
118 | 
119 |     train_bgn_time = time.time()
120 |     iteration = 0
121 |     
122 |     # Train on mini batches
123 |     for batch_data_dict in data_generator.generate_train():
124 |         
125 |         # Evaluate
126 |         if iteration % 100 == 0 and iteration >= 1000:
127 |             logging.info('------------------------------------')
128 |             logging.info('Iteration: {}'.format(iteration))
129 | 
130 |             train_fin_time = time.time()
131 | 
132 |             
133 |             train_statistics = evaluator.evaluate(data_type='train', iteration= iteration,
134 |                                                   max_iteration=None, verbose=False)
135 |             
136 |             if holdout_fold != 'none':
137 |                 validate_statistics = evaluator.evaluate(data_type='validate', 
138 |                                                          iteration= iteration, max_iteration=None, verbose=False)
139 |                 validate_statistics_container.append_and_dump(iteration, validate_statistics)
140 | 
141 |             train_time = train_fin_time - train_bgn_time
142 |             validate_time = time.time() - train_fin_time
143 | 
144 |             logging.info(
145 |                 'Train time: {:.3f} s, validate time: {:.3f} s'
146 |                 ''.format(train_time, validate_time))
147 | 
148 |             train_bgn_time = time.time()
149 | 
150 | #         Save model
151 |         if iteration % 100 == 0 and iteration > 0:
152 |             checkpoint = {
153 |                 'iteration': iteration, 
154 |                 'model': model.state_dict(), 
155 |                 'optimizer': optimizer.state_dict()}
156 | 
157 |             checkpoint_path = os.path.join(
158 |                 checkpoints_dir, '{}_iterations.pth'.format(iteration))
159 |                 
160 |             torch.save(checkpoint, checkpoint_path)
161 |             logging.info('Model saved to {}'.format(checkpoint_path))
162 |             
163 |         # Reduce learning rate
164 |         if reduce_lr and iteration % 100 == 0 and iteration > 0:
165 |             for param_group in optimizer.param_groups:
166 |                 param_group['lr'] *= 0.9
167 |         
168 |         # Move data to GPU
169 |         for key in batch_data_dict.keys():
170 |             if key in ['feature', 'target']:
171 |                 batch_data_dict[key] = move_data_to_gpu(batch_data_dict[key], cuda)
172 |                 
173 |         if iteration % 3 == 0:
174 |             # Train 
175 |             for i in range(audio_num):
176 |                 model.train() 
177 |                 data, target_a, target_b, lam = mixup_data(x=batch_data_dict['feature'][:, i, :, :, :], y=batch_data_dict['target'], alpha=0.2)
178 |                 batch_output = model(data)
179 |         #         batch_output = model(batch_data_dict['feature'])
180 |                 # loss
181 | #                 loss = loss_func(batch_output, batch_data_dict['target'])
182 |                 loss = mixup_criterion(loss_func, batch_output, target_a, target_b, lam)
183 | 
184 |                 # Backward
185 |                 optimizer.zero_grad()
186 |                 loss.backward()
187 |                 optimizer.step()
188 |                 
189 |         if iteration % 3 == 1:
190 |             # Train 
191 |             for i in range(audio_num):
192 |                 model.train() 
193 |                 batch_output = model(batch_data_dict['feature'][:, i, :, :, :])
194 |                 # loss
195 |                 loss = loss_func(batch_output, batch_data_dict['target'])
196 | 
197 |                 # Backward
198 |                 optimizer.zero_grad()
199 |                 loss.backward()
200 |                 optimizer.step()
201 |                 
202 |         if iteration % 3 == 2:
203 |             # Train 
204 |             for i in range(audio_num):
205 |                 model.train() 
206 |                 data = spec_augment_pytorch.spec_augment(batch_data_dict['feature'][:, i, :, :, :], using_frequency_masking=True,
207 |                                                  using_time_masking=True)
208 |                 batch_output = model(data)
209 |                 # loss
210 |                 loss = loss_func(batch_output, batch_data_dict['target'])
211 | 
212 |                 # Backward
213 |                 optimizer.zero_grad()
214 |                 loss.backward()
215 |                 optimizer.step()
216 |                 
217 |         # Stop learning
218 |         if iteration == 4500:
219 |             break
220 |             
221 |         iteration += 1
222 | 
223 | 
224 | 
225 | if __name__ == '__main__':
226 |     parser = argparse.ArgumentParser(description='Example of parser. ')
227 |     subparsers = parser.add_subparsers(dest='mode')
228 | 
229 |     # Train
230 |     parser_train = subparsers.add_parser('train')
231 |     parser_train.add_argument('--dataset_dir', type=str, required=True, help='Directory of dataset.')
232 |     parser_train.add_argument('--workspace', type=str, required=True, help='Directory of your workspace.')
233 |     parser_train.add_argument('--holdout_fold', type=str, choices=['1', 'none'], required=True, help='Set 1 for development and none for training on all data without validation.')
234 |     parser_train.add_argument('--model_type', type=str, required=True, help='E.g., Cnn_9layers_AvgPooling.')
235 |     parser_train.add_argument('--batch_size', type=int, required=True)
236 |     parser_train.add_argument('--cuda', action='store_true', default=False)
237 |     parser_train.add_argument('--audio_num', type=int, default=4)
238 |     parser_train.add_argument('--mini_data', action='store_true', default=False, help='Set True for debugging on a small part of data.')
239 | 
240 |     # Inference validation data
241 |     parser_inference_validation = subparsers.add_parser('inference_validation')
242 |     parser_inference_validation.add_argument('--dataset_dir', type=str, required=True, help='Directory of dataset.')
243 |     parser_inference_validation.add_argument('--workspace', type=str, required=True, help='Directory of your workspace.')
244 |     parser_inference_validation.add_argument('--holdout_fold', type=str, choices=['1'], required=True)
245 |     parser_inference_validation.add_argument('--model_type', type=str, required=True, help='E.g., Cnn_9layers_AvgPooling.')
246 |     parser_inference_validation.add_argument('--iteration', type=int, required=True, help='Load model of this iteration.')
247 |     parser_inference_validation.add_argument('--batch_size', type=int, required=True)
248 |     parser_inference_validation.add_argument('--cuda', action='store_true', default=False)
249 |     parser_inference_validation.add_argument('--audio_num', type=int, default=4)
250 |     parser_inference_validation.add_argument('--visualize', action='store_true', default=False, help='Visualize log mel spectrogram of different sound classes.')
251 |     parser_inference_validation.add_argument('--mini_data', action='store_true', default=False, help='Set True for debugging on a small part of data.')
252 | 
253 |     # Parse arguments
254 |     args = parser.parse_args()
255 |     args.filename = get_filename(__file__)
256 | 
257 |     if args.mode == 'train':
258 |         for i in range(5):
259 |             train(args, i+1)
260 | 
261 |     else:
262 |         raise Exception('Error argument!')


--------------------------------------------------------------------------------
/util_esc50/audio.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | import torch
  3 | import torch.nn as nn
  4 | import torch.nn.functional as F
  5 | from torch.distributions import Normal, Uniform, HalfNormal
  6 | 
  7 | 
  8 | def torch_angle(t):
  9 |     return torch.atan2(t[...,1], t[...,0])
 10 | 
 11 | 
 12 | def spectrogram(sig, n_fft=2048, hop=None, window=None, **kwargs):
 13 |     """
 14 |     sig -> (batch, channel, time) or (channel, time)
 15 |     stft -> (batch, channel, freq, hop, complex) or (channel, freq, hop, complex)
 16 |     """
 17 |     if hop is None:
 18 |         hop = n_fft // 4
 19 |     if window is None:
 20 |         window = torch.hann_window(n_fft)
 21 | 
 22 |     if sig.dim() == 3:
 23 |         batch, channel, time = sig.size()
 24 |         out_shape = [batch, channel, n_fft//2+1, -1, 2]
 25 |     elif sig.dim() == 2:
 26 |         channel, time = sig.size()
 27 |         out_shape = [channel, n_fft//2+1, -1, 2]
 28 |     else:
 29 |         raise ValueError('Input tensor dim() must be either 2 or 3.')
 30 | 
 31 |     sig = sig.reshape(-1, time)
 32 | 
 33 |     stft = torch.stft(sig, n_fft, hop, window=window, **kwargs)
 34 | 
 35 |     stft = stft.reshape(out_shape)
 36 |     return stft
 37 | 
 38 | 
 39 | def _hertz_to_mel(f):
 40 |     '''
 41 |     Converting frequency into mel values using HTK formula
 42 |     '''
 43 |     return 2595. * torch.log10(torch.tensor(1.) + (f / 700.))
 44 | 
 45 | 
 46 | def _mel_to_hertz(mel):
 47 |     '''
 48 |     Converting mel values into frequency using HTK formula
 49 |     '''
 50 |     return 700. * (10**(mel / 2595.) - 1.)
 51 | 
 52 | 
 53 | def create_mel_filter(n_stft, sr, n_mels=128, f_min=0.0, f_max=None):
 54 |     '''
 55 |     Creates filter matrix to transform fft frequency bins into mel frequency bins.
 56 |     Equivalent to librosa.filters.mel(sr, n_fft, htk=True, norm=None).
 57 |     '''
 58 |     # Convert to find mel lower/upper bounds
 59 |     f_max = f_max if f_max else sr // 2   
 60 |     m_min = 0. if f_min == 0 else _hertz_to_mel(f_min)
 61 |     m_max = _hertz_to_mel(f_max)
 62 | 
 63 |     # Compute stft frequency values
 64 |     stft_freqs = torch.linspace(f_min, f_max, n_stft)
 65 | 
 66 |     # Find mel values, and convert them to frequency units
 67 |     m_pts = torch.linspace(m_min, m_max, n_mels + 2)
 68 |     f_pts = _mel_to_hertz(m_pts)
 69 | 
 70 |     f_diff = f_pts[1:] - f_pts[:-1]  # (n_mels + 1)
 71 |     slopes = f_pts.unsqueeze(0) - stft_freqs.unsqueeze(1)  # (n_stft, n_mels + 2)
 72 |     
 73 |     down_slopes = (-1. * slopes[:, :-2]) / f_diff[:-1]  # (n_stft, n_mels)
 74 |     up_slopes = slopes[:, 2:] / f_diff[1:]  # (n_stft, n_mels)
 75 |     fb = torch.clamp(torch.min(down_slopes, up_slopes), min=0.)
 76 | 
 77 |     return fb, f_pts[:-2]
 78 | 
 79 | 
 80 | def amplitude_to_db(spec, ref=1.0, amin=1e-10, top_db=80):
 81 |     """
 82 |     Amplitude spectrogram to the db scale
 83 |     """
 84 |     power = spec**2
 85 |     return power_to_db(power, ref, amin, top_db)
 86 | 
 87 | 
 88 | def power_to_db(spec, ref=1.0, amin=1e-10, top_db=80.0):
 89 |     """
 90 |     Power spectrogram to the db scale
 91 |     spec -> (*, freq, time)
 92 |     """
 93 |     if amin <= 0:
 94 |         raise ParameterError('amin must be strictly positive')
 95 | 
 96 |     if callable(ref):
 97 |         ref_value = ref(spec)
 98 |     else:
 99 |         ref_value = torch.tensor(ref)
100 | 
101 |     log_spec = 10*torch.log10( torch.clamp(spec, min=amin) )
102 |     log_spec -= 10*torch.log10( torch.clamp(ref_value, min=amin) )
103 |     
104 |     if top_db is not None:
105 |         if top_db < 0:
106 |             raise ParameterError('top_db must be non-negative')
107 |         
108 |         log_spec = torch.clamp(log_spec, min=(log_spec.max() - top_db))
109 | 
110 |     #log_spec /= log_spec.max()
111 |     return log_spec
112 |     
113 | 
114 | def spec_whiten(spec, eps=1):    
115 |     
116 |     along_dim = lambda f, x: f(x, dim=-1).view(-1,1,1,1)
117 |     
118 |     lspec = torch.log10(spec + eps)
119 | 
120 |     batch = lspec.size(0)
121 | 
122 |     mean = along_dim(torch.mean, lspec.view(batch, -1))
123 |     std = along_dim(torch.std, lspec.view(batch, -1))
124 | 
125 |     #std
126 | 
127 |     resu = (lspec - mean)/std
128 | 
129 |     return resu
130 | 
131 | 
132 | class Spectrogram(nn.Module):
133 |     """
134 |     Module that outputs the spectrogram
135 |     of an audio signal with shape (batch, channel, time_hop, frequency_bins).
136 |     Its implemented as a layer so that the computation can be faster (done dynamically
137 |     on GPU) and no need to store the transforms. More information:
138 |         - https://github.com/keunwoochoi/kapre
139 |         - https://arxiv.org/pdf/1706.05781.pdf
140 |     
141 |     Args:
142 |      * hop: int > 0
143 |        - Hop length between frames in sample,  should be <= n_fft.
144 |        - Default: None (in which case n_fft // 4 is used)
145 |      * n_fft: int > 0 
146 |        - Size of the fft.
147 |        - Default: 2048
148 |      * pad: int >= 0
149 |        - Amount of two sided zero padding to apply.
150 |        - Default: 0
151 |      * window: torch.Tensor,
152 |        -  Windowing used in the stft.
153 |        -  Default: None (in which case torch.hann_window(n_fft) is used)
154 |      * sr: int > 0
155 |        -  Sampling rate of the audio signal. This may not be the same in all samples (?)
156 |        -  Default: 44100
157 |      * spec_kwargs: 
158 |        -  Any named arguments to be passed to the stft
159 |     """
160 | 
161 |     def __init__(self, hop=None, n_fft=2048, pad=0, window=None, sr=44100, stretch_param=None, **spec_kwargs):
162 |         
163 |         super(Spectrogram, self).__init__()
164 | 
165 |         if window is None:
166 |             window = torch.hann_window(n_fft)
167 | 
168 |         self.window = self._build_window(window)
169 |         self.hop = n_fft // 4 if hop is None else hop
170 |         self.n_fft = n_fft
171 |         self.pad = pad
172 | 
173 |         # Not all samples will have the same sr
174 |         self.sr = sr
175 |         self.spec_kwargs = spec_kwargs
176 | 
177 |         self.stretch_param = stretch_param
178 |         self.prob = 0
179 |         if self.stretch_param is not None:
180 |             self._build_pv()
181 | 
182 |     def _build_pv(self):
183 |         fft_size = self.n_fft//2 + 1
184 |         self.phi_advance = nn.Parameter(torch.linspace(0, 
185 |             math.pi * self.hop, 
186 |             fft_size)[..., None], requires_grad=False)
187 | 
188 |         self.prob = self.stretch_param[0]
189 |         self.dist = Uniform(-self.stretch_param[1], self.stretch_param[1])
190 | 
191 | 
192 |     def _get_rate(self):
193 |         return 1 - self.dist.sample()
194 | 
195 | 
196 |     def phase_vocoder(self, D):
197 |         # phase_vocoder
198 |         # D -> (freq, old_time, 2)
199 |         # D -> (batch, channel, freq, old_time, 2)
200 |         rate = self._get_rate()
201 |         time_steps = torch.arange(0, D.size(3), rate, device=D.device) # (new_time)
202 |         
203 |         alphas = (time_steps % 1)#.unsqueeze(1) # (new_time)
204 | 
205 |         phase_0 = torch_angle(D[:,:,:,:1])
206 | 
207 |         # Time Padding
208 |         pad_shape = [0,0]+[0,2]+[0]*6
209 |         D = F.pad(D, pad_shape)
210 | 
211 |         D0 = D[:,:,:,time_steps.long()] # (new_time, freq, 2)
212 |         D1 = D[:,:,:,(time_steps + 1).long()] # (new_time, freq, 2)
213 | 
214 |         D0_angle = torch_angle(D0) # (new_time, freq)
215 |         D1_angle = torch_angle(D1) # (new_time, freq)
216 | 
217 |         D0_norm = torch.norm(D0, dim=-1) # (new_time, freq)
218 |         D1_norm = torch.norm(D1, dim=-1) # (new_time, freq)
219 | 
220 |         Dphase = D1_angle - D0_angle - self.phi_advance # (new_time, freq)
221 |         Dphase = Dphase - 2 * math.pi * torch.round(Dphase / (2 * math.pi)) # (new_time, freq)
222 | 
223 |         # Compute Phase Accum
224 |         phase = Dphase + self.phi_advance # (new_time, freq)
225 | 
226 |         phase = torch.cat([phase_0, phase[:,:,:,:-1]], dim=-1)
227 |         
228 |         phase_acc = torch.cumsum(phase, -1) # (new_time, freq)
229 | 
230 |         mag = alphas * D1_norm + (1-alphas) * D0_norm # (new_time, freq)
231 |         
232 |         Dstretch_real = mag * torch.cos(phase_acc) # (new_time, freq)
233 |         Dstretch_imag = mag * torch.sin(phase_acc) # (new_time, freq)
234 |         
235 |         Dstretch = torch.stack([Dstretch_real, Dstretch_imag], dim=-1)
236 | 
237 |         return Dstretch, rate
238 | 
239 | 
240 |     def _build_window(self, window):
241 |         if window is None:
242 |             window = torch.hann_window(n_fft)
243 |         if not isinstance(window, torch.Tensor):
244 |             raise TypeError('window must be a of type torch.Tensor')
245 |         # In order for the window to be added as one of the Module's
246 |         # parameters it has to be a nn.Parameter
247 |         return nn.Parameter(window, requires_grad=False)
248 |     
249 | 
250 |     def __dim_stft_mod(self, arr):
251 |         if arr is None:
252 |             return None
253 |         return arr//self.hop+1
254 | 
255 |     def __dim_pv_mod(self, arr, rate):
256 |         if arr is None:
257 |             return None
258 |         return (arr.float()/rate).long()+1
259 | 
260 |     def _out_dims(self, arr, rate=None):
261 |         if arr is None: 
262 |             return None
263 |         new_arr = self.__dim_stft_mod(arr)
264 |         if rate is None: 
265 |             return new_arr
266 |         return self.__dim_pv_mod(new_arr, rate)
267 | 
268 | 
269 |     def _norm(self, stft):
270 |         #return stft.pow(2).sum(-1).pow(1.0 / 2.0)
271 |         return torch.norm(stft, dim=-1, p=2)
272 | 
273 |     def forward(self, x, lengths=None):
274 |         """
275 |         If x is a padded tensor then lengths should have the 
276 |         corresponding sequence length for every element in the batch.
277 |         Input: (batch, channel, signal)
278 |         Output:(batch, channel, frequency_bins, time_hop)
279 |         """
280 |         with torch.no_grad():
281 |             assert x.dim() == 3
282 | 
283 |             if self.pad > 0:
284 |                 with torch.no_grad():
285 |                     x = F.pad(x, (self.pad, self.pad), "constant")
286 | 
287 |             spec = spectrogram(x,
288 |                 n_fft=self.n_fft,
289 |                 hop=self.hop, 
290 |                 window=self.window,
291 |                 **self.spec_kwargs)
292 | 
293 |             rate = None
294 | 
295 |             if torch.rand(1)[0] <= self.prob and self.training:
296 |                 spec, rate = self.phase_vocoder(spec)
297 |                 #print(rate)
298 | 
299 |             lengths = self._out_dims(lengths, rate)
300 | 
301 |             spec = self._norm(spec)
302 | 
303 |             if lengths is not None:            
304 |                 assert spec.size(0) == lengths.size(0)
305 |                 return spec, lengths
306 | 
307 |             return spec
308 | 
309 | 
310 | class Melspectrogram(Spectrogram):
311 | 
312 |     def __init__(self, hop=None, n_mels=128, n_fft=2048, pad=0, window=None, sr=44100, norm=None, **spec_kwargs):
313 |         
314 |         super(Melspectrogram, self).__init__(hop, n_fft, pad, window, sr, **spec_kwargs)
315 | 
316 |         self.n_fft = n_fft
317 |         self.n_mels = n_mels
318 |         self.mel_fb, self.mel_freq_vals = self._build_filter()
319 |         self.norm = {
320 |             'whiten':spec_whiten,
321 |             'db' : amplitude_to_db
322 |             }.get(norm, None)
323 | 
324 |     def _build_filter(self):
325 |         # Get the mel filter matrix and the mel frequency values
326 |         mel_fb, mel_f = create_mel_filter(
327 |                                     self.n_fft//2 + 1,
328 |                                     self.sr, 
329 |                                     n_mels=self.n_mels)
330 |         # Cast filter matrix as nn.Parameter so it's loaded on model's device 
331 |         return nn.Parameter(mel_fb, requires_grad=False), mel_f
332 | 
333 |     def forward(self, x, lengths=None):
334 |         spec = super(Melspectrogram, self).forward(x, lengths)
335 |         if isinstance(spec, tuple):
336 |             spec, lengths = spec
337 | 
338 |         spec = torch.matmul(spec.transpose(2,3), self.mel_fb).transpose(2,3)
339 | 
340 |         if self.norm is not None:
341 |             spec = self.norm(spec)
342 | 
343 |         if lengths is not None:
344 |             return spec, lengths
345 |         
346 |         return spec


--------------------------------------------------------------------------------
/util_esc50/net.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | import torch
  3 | import torch.nn as nn
  4 | import torch.nn.functional as F
  5 | from torch.autograd import Variable
  6 | from torch.nn.utils import weight_norm
  7 | 
  8 | class ConvBlock(nn.Module):
  9 |     def __init__(self, in_channels, out_channels):
 10 |         
 11 |         super(ConvBlock, self).__init__()
 12 |         self.conv1 = nn.Conv2d(in_channels=in_channels, 
 13 |                               out_channels=out_channels,
 14 |                               kernel_size=(3, 3), stride=(1, 1),
 15 |                               padding=(1, 1), bias=False)
 16 |                               
 17 |         self.conv2 = nn.Conv2d(in_channels=out_channels, 
 18 |                               out_channels=out_channels,
 19 |                               kernel_size=(3, 3), stride=(1, 1),
 20 |                               padding=(1, 1), bias=False)
 21 |         self.bn1 = nn.BatchNorm2d(out_channels)
 22 |         self.bn2 = nn.BatchNorm2d(out_channels)
 23 | 
 24 |         
 25 |     def show(self, input, pool_size=(2, 2), pool_type='avg'):
 26 |         
 27 |         x = input
 28 |         x = F.relu_(self.bn1(self.conv1(x)))
 29 |         x = F.relu_(self.bn2(self.conv2(x)))
 30 |         return x
 31 |         
 32 |     def forward(self, input, pool_size=(2, 2), pool_type='avg'):
 33 |         
 34 |         x = input
 35 |         x = F.relu_(self.bn1(self.conv1(x)))
 36 |         x = F.relu_(self.bn2(self.conv2(x)))
 37 |         if pool_type == 'max':
 38 |             x = F.max_pool2d(x, kernel_size=pool_size)
 39 |         elif pool_type == 'avg':
 40 |             x = F.avg_pool2d(x, kernel_size=pool_size)
 41 |         else:
 42 |             raise Exception('Incorrect argument!')
 43 |         
 44 |         return x
 45 | 
 46 |     
 47 | def conv3x3(in_planes, out_planes, stride=1):
 48 |     """3x3 convolution with padding"""
 49 |     return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
 50 |                      padding=1, bias=False)
 51 | def conv1x1(in_planes, out_planes, stride=1):
 52 |     """1x1 convolution"""
 53 |     return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)
 54 | 
 55 |     
 56 | class TFBlock(nn.Module):
 57 |     def __init__(self, in_channels, out_channels):
 58 |         
 59 |         super(TFBlock, self).__init__()
 60 |         self.conv1 = nn.Conv2d(in_channels=in_channels, 
 61 |                               out_channels=out_channels,
 62 |                               kernel_size=(3, 3), stride=(1, 1),
 63 |                               padding=(1, 1), bias=False)
 64 |                               
 65 |         self.conv2 = nn.Conv2d(in_channels=out_channels, 
 66 |                              out_channels=out_channels,
 67 |                                  kernel_size=(3, 3), stride=(1, 1),
 68 |                              padding=(1, 1), bias=False)
 69 |         self.bn1 = nn.BatchNorm2d(out_channels)
 70 |         self.bn2 = nn.BatchNorm2d(out_channels)
 71 |         self.alpha = nn.Parameter(torch.cuda.FloatTensor([.1, .1, .1]))
 72 |         self.bnx = nn.BatchNorm2d(1)
 73 |         self.bny = nn.BatchNorm2d(1)
 74 |         self.bnz = nn.BatchNorm2d(out_channels)
 75 |         self.bna = nn.BatchNorm2d(out_channels)
 76 |         self.bnb = nn.BatchNorm2d(out_channels)
 77 |         self.conv3 = nn.Conv2d(in_channels=out_channels, 
 78 |                               out_channels=1,
 79 |                               kernel_size=(1, 1), stride=(1, 1),
 80 |                               padding=(0, 0), bias=False)
 81 |         self.conv4 = nn.Conv2d(in_channels=out_channels, 
 82 |                               out_channels=1,
 83 |                               kernel_size=(1, 1), stride=(1, 1),
 84 |                               padding=(0, 0), bias=False)
 85 |         if out_channels == 64:
 86 |             self.globalAvgPool2 = nn.AvgPool2d((250,1), stride=1)
 87 |             self.globalAvgPool3 = nn.AvgPool2d((1,40), stride=1)
 88 |             self.globalMaxPool2 = nn.MaxPool2d((1,64), stride=1)
 89 |             self.globalMaxPool3 = nn.MaxPool2d((64,1), stride=1)
 90 |             self.fc1 = nn.Linear(in_features=40, out_features=40)
 91 |             self.fc2 = nn.Linear(in_features=250, out_features=250)
 92 |             
 93 |         elif out_channels == 128:
 94 |             self.globalAvgPool2 = nn.AvgPool2d((125,1), stride=1)
 95 |             self.globalAvgPool3 = nn.AvgPool2d((1,20), stride=1)
 96 |             self.globalMaxPool2 = nn.MaxPool2d((1,128), stride=1)
 97 |             self.globalMaxPool3 = nn.MaxPool2d((128,1), stride=1)
 98 |             self.fc1 = nn.Linear(in_features=20, out_features=20)
 99 |             self.fc2 = nn.Linear(in_features=125, out_features=125)
100 |         elif out_channels == 256:
101 |             self.globalAvgPool2 = nn.AvgPool2d((62,1), stride=1)
102 |             self.globalAvgPool3 = nn.AvgPool2d((1,10), stride=1)
103 |             self.globalMaxPool2 = nn.MaxPool2d((1,128), stride=1)
104 |             self.globalMaxPool3 = nn.MaxPool2d((128,1), stride=1)
105 |             self.fc1 = nn.Linear(in_features=10, out_features=10)
106 |             self.fc2 = nn.Linear(in_features=62, out_features=62)
107 |         elif out_channels == 512:
108 |             self.globalAvgPool2 = nn.AvgPool2d((31,1), stride=1)
109 |             self.globalAvgPool3 = nn.AvgPool2d((1,5), stride=1)
110 |             self.globalMaxPool2 = nn.MaxPool2d((1,128), stride=1)
111 |             self.globalMaxPool3 = nn.MaxPool2d((128,1), stride=1)
112 |             self.fc1 = nn.Linear(in_features=5, out_features=5)
113 |             self.fc2 = nn.Linear(in_features=31, out_features=31)
114 |         self.sigmoid = nn.Sigmoid()
115 |         self.sigmoid2 = nn.Sigmoid()
116 |         self.downsample = conv1x1(in_channels, out_channels)
117 |         self.bn = nn.BatchNorm2d(out_channels)
118 |         self.relu = nn.LeakyReLU(0.2)
119 | 
120 |     
121 |     def show(self, input, pool_size=(2, 2), pool_type='avg'):
122 |         
123 |         x = input
124 |         x = self.bn1(self.relu(self.conv1(x)))
125 |         x = self.bn2(self.relu(self.conv2(x)))
126 |         out1 = x.clone()
127 |         res = x.clone()
128 |         y = x.clone()
129 |         y = self.bnx(self.relu(self.conv3(y)))
130 |         out6 = y.clone()
131 |         res_2 = x.clone()
132 |         z = x.clone()
133 |         z = self.bny(self.relu(self.conv4(z)))
134 |         res_3 = x.clone()
135 |         out7 = z.clone()
136 |         h = x.clone()
137 |         
138 |         res_2 = res_2.transpose(1,3)
139 |         y = y.transpose(1,3)
140 |         y = self.globalAvgPool2(y)
141 |         y = y.view(y.size(0), -1)
142 |         y = self.sigmoid(y)
143 |         y = y.view(y.size(0), y.size(1), 1, 1)
144 |         y = y * res_2
145 |         y = y.transpose(1,3)
146 |         y = self.bna(y)
147 |         out2=y.clone()
148 |         res_3 = res_3.transpose(1,2)
149 |         z = z.transpose(1,2)
150 |         z = self.globalAvgPool3(z)
151 |         z = z.view(z.size(0), -1)
152 |         z = self.sigmoid(z)
153 |         z = z.view(z.size(0), z.size(1), 1, 1)
154 |         z = z * res_3
155 |         z = z.transpose(1,2)
156 |         z = self.bnb(z)
157 |         out3 = z.clone()
158 |         so_alpha = F.softmax(self.alpha,dim=0)
159 |         x = so_alpha[0]*h + so_alpha[1]*y + so_alpha[2]*z
160 |         x = self.relu(x)
161 |         out4 = x.clone()
162 |         if pool_type == 'max':
163 |             x = F.max_pool2d(x, kernel_size=pool_size)
164 |         elif pool_type == 'avg':
165 |             x = F.avg_pool2d(x, kernel_size=pool_size)
166 |         else:
167 |             raise Exception('Incorrect argument!')
168 |         out5 = x.clone()
169 |         out1 = torch.mean(out1, dim=1)
170 |         out2 = torch.mean(out2, dim=1)
171 |         out3 = torch.mean(out3, dim=1)
172 |         out4 = torch.mean(out4, dim=1)
173 |         out5 = torch.mean(out5, dim=1)
174 |         return out1, out2, out3, out4, out5, out6, out7
175 |     
176 |     def forward(self, input, pool_size=(2, 2), pool_type='avg'):
177 |         
178 |         x = input
179 |         x = self.bn1(self.relu(self.conv1(x)))
180 |         x = self.bn2(self.relu(self.conv2(x)))
181 |         res = x.clone()
182 |         y = x.clone()
183 |         y = self.bnx(self.relu(self.conv3(y)))
184 |         res_2 = x.clone()
185 |         z = x.clone()
186 |         z = self.bny(self.relu(self.conv4(z)))
187 |         res_3 = x.clone()
188 |         
189 |         h = x.clone()
190 |         
191 |         res_2 = res_2.transpose(1,3)
192 |         y = y.transpose(1,3)
193 |         y = self.globalAvgPool2(y)
194 |         y = y.view(y.size(0), -1)
195 |         y = self.sigmoid(y)
196 |         y = y.view(y.size(0), y.size(1), 1, 1)
197 |         y = y * res_2
198 |         y = y.transpose(1,3)
199 |         y = self.bna(y)
200 |         res_3 = res_3.transpose(1,2)
201 |         z = z.transpose(1,2)
202 |         z = self.globalAvgPool3(z)
203 |         z = z.view(z.size(0), -1)
204 |         z = self.sigmoid(z)
205 |         z = z.view(z.size(0), z.size(1), 1, 1)
206 |         z = z * res_3
207 |         z = z.transpose(1,2)
208 |         z = self.bnb(z)
209 |         so_alpha = F.softmax(self.alpha,dim=0)
210 |         x = so_alpha[0]*h + so_alpha[1]*y + so_alpha[2]*z
211 |         x = self.relu(x)
212 |         if pool_type == 'max':
213 |             x = F.max_pool2d(x, kernel_size=pool_size)
214 |         elif pool_type == 'avg':
215 |             x = F.avg_pool2d(x, kernel_size=pool_size)
216 |         else:
217 |             raise Exception('Incorrect argument!')
218 |         
219 |         return x
220 |     
221 | class TFNet(nn.Module):
222 |     
223 |     def __init__(self, classes_num=10, activation='logsoftmax'):
224 |         super(TFNet, self).__init__()
225 | 
226 |         self.activation = activation
227 | 
228 |         self.tfblock1 = TFBlock(in_channels=1, out_channels=64)
229 |         self.tfblock2 = TFBlock(in_channels=64, out_channels=128)
230 |         self.tfblock3 = TFBlock(in_channels=128, out_channels=256)
231 |         self.tfblock4 = TFBlock(in_channels=256, out_channels=512)
232 |         self.fc = nn.Linear(512, classes_num, bias=True)
233 | 
234 |         
235 |     def show(self, input):
236 |        
237 |         x = input[:,None,:,:]
238 |         '''(batch_size, 1, times_steps, freq_bins)'''
239 |         out1, out2, out3, out4, out5, out6, out7 = self.conv_block1.show(x)
240 |         x = self.tfblock1(x, pool_size=(2, 2), pool_type='avg')
241 |         x1 = torch.mean(x, dim=1)
242 |         x = self.tfblock2(x, pool_size=(2, 2), pool_type='avg')
243 |         x2 = torch.mean(x, dim=1)
244 |         x = self.tfblock3(x, pool_size=(2, 2), pool_type='avg')
245 |         x3 = torch.mean(x, dim=1)
246 |         x = self.tfblock4(x, pool_size=(2, 2), pool_type='avg')
247 |         x4 = torch.mean(x, dim=1)
248 |         return x1, x2, x3, x4, out1, out2, out3, out4, out5, out6, out7
249 |     
250 |     def forward(self, input):
251 |         '''
252 |         Input: (batch_size, seq_number, times_steps, freq_bins)'''
253 |         
254 |         x = input[:, 0 , : , :]
255 |         x = x[:, None, :, :]
256 |         '''(batch_size, 1, times_steps, freq_bins)'''
257 |         
258 |         x = self.tfblock1(x, pool_size=(2, 2), pool_type='avg')
259 |         x = self.tfblock2(x, pool_size=(2, 2), pool_type='avg')
260 |         x = self.tfblock3(x, pool_size=(2, 2), pool_type='avg')
261 |         x = self.tfblock4(x, pool_size=(2, 2), pool_type='avg')
262 |         '''(batch_size, feature_maps, time_steps, freq_bins)'''
263 |         x = torch.mean(x, dim=3)        # (batch_size, feature_maps, time_stpes)
264 |         (x, _) = torch.max(x, dim=2)    # (batch_size, feature_maps)
265 |         x = self.fc(x)
266 |         
267 |         if self.activation == 'logsoftmax':
268 |             output = F.log_softmax(x, dim=-1)
269 |             
270 |         elif self.activation == 'sigmoid':
271 |             output = torch.sigmoid(x)
272 |         
273 |         return output  
274 | 
275 |     
276 | class Cnn(nn.Module):
277 |     
278 |     def __init__(self, classes_num=50, activation='logsoftmax'):
279 |         super(Cnn, self).__init__()
280 | 
281 |         self.activation = activation
282 |         self.conv_block1 = ConvBlock(in_channels=1, out_channels=64)
283 |         self.conv_block2 = ConvBlock(in_channels=64, out_channels=128)
284 |         self.conv_block3 = ConvBlock(in_channels=128, out_channels=256)
285 |         self.conv_block4 = ConvBlock(in_channels=256, out_channels=512)
286 |         self.fc2 = nn.Linear(512, 512, bias=True)
287 |         self.dropout = nn.Dropout(p=0.5)
288 |         self.fc = nn.Linear(512, classes_num, bias=True)
289 | 
290 |     def forward(self, input):
291 |         '''
292 |         Input: (batch_size, seq_number, times_steps, freq_bins)'''
293 | 
294 |         x = input[:, 0, :, :]
295 |         x = x[:, None, :, :]
296 |         '''(batch_size, 1, times_steps, freq_bins)'''
297 | 
298 |         x = self.conv_block1(x, pool_size=(2, 2), pool_type='avg')
299 |         x = self.conv_block2(x, pool_size=(2, 2), pool_type='avg')
300 |         x = self.conv_block3(x, pool_size=(2, 2), pool_type='avg')
301 |         x = self.conv_block4(x, pool_size=(2, 2), pool_type='avg')
302 |         '''(batch_size, feature_maps, time_steps, freq_bins)'''
303 |         x = torch.mean(x, dim=3)  # (batch_size, feature_maps, time_stpes)
304 |         (x, _) = torch.max(x, dim=2)  # (batch_size, feature_maps)
305 |         x = self.fc(x)
306 | 
307 |         if self.activation == 'logsoftmax':
308 |             output = F.log_softmax(x, dim=-1)
309 | 
310 |         elif self.activation == 'sigmoid':
311 |             output = torch.sigmoid(x)
312 | 
313 |         return output 
314 | 


--------------------------------------------------------------------------------
/util_esc50/fold1_test.csv:
--------------------------------------------------------------------------------
  1 | filename,fold,target,category,esc10,src_file,take
  2 | 1-100032-A-0.wav,1,0,dog,True,100032,A
  3 | 1-100038-A-14.wav,1,14,chirping_birds,False,100038,A
  4 | 1-100210-A-36.wav,1,36,vacuum_cleaner,False,100210,A
  5 | 1-100210-B-36.wav,1,36,vacuum_cleaner,False,100210,B
  6 | 1-101296-A-19.wav,1,19,thunderstorm,False,101296,A
  7 | 1-101296-B-19.wav,1,19,thunderstorm,False,101296,B
  8 | 1-101336-A-30.wav,1,30,door_wood_knock,False,101336,A
  9 | 1-101404-A-34.wav,1,34,can_opening,False,101404,A
 10 | 1-103298-A-9.wav,1,9,crow,False,103298,A
 11 | 1-103995-A-30.wav,1,30,door_wood_knock,False,103995,A
 12 | 1-103999-A-30.wav,1,30,door_wood_knock,False,103999,A
 13 | 1-104089-A-22.wav,1,22,clapping,False,104089,A
 14 | 1-104089-B-22.wav,1,22,clapping,False,104089,B
 15 | 1-105224-A-22.wav,1,22,clapping,False,105224,A
 16 | 1-110389-A-0.wav,1,0,dog,True,110389,A
 17 | 1-110537-A-22.wav,1,22,clapping,False,110537,A
 18 | 1-115521-A-19.wav,1,19,thunderstorm,False,115521,A
 19 | 1-115545-A-48.wav,1,48,fireworks,False,115545,A
 20 | 1-115545-B-48.wav,1,48,fireworks,False,115545,B
 21 | 1-115545-C-48.wav,1,48,fireworks,False,115545,C
 22 | 1-115546-A-48.wav,1,48,fireworks,False,115546,A
 23 | 1-115920-A-22.wav,1,22,clapping,False,115920,A
 24 | 1-115920-B-22.wav,1,22,clapping,False,115920,B
 25 | 1-115921-A-22.wav,1,22,clapping,False,115921,A
 26 | 1-116765-A-41.wav,1,41,chainsaw,True,116765,A
 27 | 1-11687-A-47.wav,1,47,airplane,False,11687,A
 28 | 1-118206-A-31.wav,1,31,mouse_click,False,118206,A
 29 | 1-118559-A-17.wav,1,17,pouring_water,False,118559,A
 30 | 1-119125-A-45.wav,1,45,train,False,119125,A
 31 | 1-121951-A-8.wav,1,8,sheep,False,121951,A
 32 | 1-12653-A-15.wav,1,15,water_drops,False,12653,A
 33 | 1-12654-A-15.wav,1,15,water_drops,False,12654,A
 34 | 1-12654-B-15.wav,1,15,water_drops,False,12654,B
 35 | 1-13571-A-46.wav,1,46,church_bells,False,13571,A
 36 | 1-13572-A-46.wav,1,46,church_bells,False,13572,A
 37 | 1-13613-A-37.wav,1,37,clock_alarm,False,13613,A
 38 | 1-137-A-32.wav,1,32,keyboard_typing,False,137,A
 39 | 1-137296-A-16.wav,1,16,wind,False,137296,A
 40 | 1-14262-A-37.wav,1,37,clock_alarm,False,14262,A
 41 | 1-155858-A-25.wav,1,25,footsteps,False,155858,A
 42 | 1-155858-B-25.wav,1,25,footsteps,False,155858,B
 43 | 1-155858-C-25.wav,1,25,footsteps,False,155858,C
 44 | 1-155858-D-25.wav,1,25,footsteps,False,155858,D
 45 | 1-155858-E-25.wav,1,25,footsteps,False,155858,E
 46 | 1-155858-F-25.wav,1,25,footsteps,False,155858,F
 47 | 1-15689-A-4.wav,1,4,frog,False,15689,A
 48 | 1-15689-B-4.wav,1,4,frog,False,15689,B
 49 | 1-160563-A-48.wav,1,48,fireworks,False,160563,A
 50 | 1-160563-B-48.wav,1,48,fireworks,False,160563,B
 51 | 1-16568-A-3.wav,1,3,cow,False,16568,A
 52 | 1-16746-A-15.wav,1,15,water_drops,False,16746,A
 53 | 1-17092-A-27.wav,1,27,brushing_teeth,False,17092,A
 54 | 1-17092-B-27.wav,1,27,brushing_teeth,False,17092,B
 55 | 1-17124-A-43.wav,1,43,car_horn,False,17124,A
 56 | 1-17150-A-12.wav,1,12,crackling_fire,True,17150,A
 57 | 1-172649-A-40.wav,1,40,helicopter,True,172649,A
 58 | 1-172649-B-40.wav,1,40,helicopter,True,172649,B
 59 | 1-172649-C-40.wav,1,40,helicopter,True,172649,C
 60 | 1-172649-D-40.wav,1,40,helicopter,True,172649,D
 61 | 1-172649-E-40.wav,1,40,helicopter,True,172649,E
 62 | 1-172649-F-40.wav,1,40,helicopter,True,172649,F
 63 | 1-17295-A-29.wav,1,29,drinking_sipping,False,17295,A
 64 | 1-17367-A-10.wav,1,10,rain,True,17367,A
 65 | 1-17565-A-12.wav,1,12,crackling_fire,True,17565,A
 66 | 1-17585-A-7.wav,1,7,insects,False,17585,A
 67 | 1-17742-A-12.wav,1,12,crackling_fire,True,17742,A
 68 | 1-17808-A-12.wav,1,12,crackling_fire,True,17808,A
 69 | 1-17808-B-12.wav,1,12,crackling_fire,True,17808,B
 70 | 1-1791-A-26.wav,1,26,laughing,False,1791,A
 71 | 1-17970-A-4.wav,1,4,frog,False,17970,A
 72 | 1-18074-A-6.wav,1,6,hen,False,18074,A
 73 | 1-18074-B-6.wav,1,6,hen,False,18074,B
 74 | 1-181071-A-40.wav,1,40,helicopter,True,181071,A
 75 | 1-181071-B-40.wav,1,40,helicopter,True,181071,B
 76 | 1-18527-A-44.wav,1,44,engine,False,18527,A
 77 | 1-18527-B-44.wav,1,44,engine,False,18527,B
 78 | 1-18631-A-23.wav,1,23,breathing,False,18631,A
 79 | 1-18655-A-31.wav,1,31,mouse_click,False,18655,A
 80 | 1-187207-A-20.wav,1,20,crying_baby,True,187207,A
 81 | 1-18755-A-4.wav,1,4,frog,False,18755,A
 82 | 1-18755-B-4.wav,1,4,frog,False,18755,B
 83 | 1-18757-A-4.wav,1,4,frog,False,18757,A
 84 | 1-18810-A-49.wav,1,49,hand_saw,False,18810,A
 85 | 1-19026-A-43.wav,1,43,car_horn,False,19026,A
 86 | 1-19111-A-24.wav,1,24,coughing,False,19111,A
 87 | 1-19118-A-24.wav,1,24,coughing,False,19118,A
 88 | 1-19501-A-7.wav,1,7,insects,False,19501,A
 89 | 1-196660-A-8.wav,1,8,sheep,False,196660,A
 90 | 1-196660-B-8.wav,1,8,sheep,False,196660,B
 91 | 1-19840-A-36.wav,1,36,vacuum_cleaner,False,19840,A
 92 | 1-19872-A-36.wav,1,36,vacuum_cleaner,False,19872,A
 93 | 1-19872-B-36.wav,1,36,vacuum_cleaner,False,19872,B
 94 | 1-19898-A-41.wav,1,41,chainsaw,True,19898,A
 95 | 1-19898-B-41.wav,1,41,chainsaw,True,19898,B
 96 | 1-19898-C-41.wav,1,41,chainsaw,True,19898,C
 97 | 1-20133-A-39.wav,1,39,glass_breaking,False,20133,A
 98 | 1-202111-A-3.wav,1,3,cow,False,202111,A
 99 | 1-20545-A-28.wav,1,28,snoring,False,20545,A
100 | 1-20736-A-18.wav,1,18,toilet_flush,False,20736,A
101 | 1-208757-A-2.wav,1,2,pig,False,208757,A
102 | 1-208757-B-2.wav,1,2,pig,False,208757,B
103 | 1-208757-C-2.wav,1,2,pig,False,208757,C
104 | 1-208757-D-2.wav,1,2,pig,False,208757,D
105 | 1-208757-E-2.wav,1,2,pig,False,208757,E
106 | 1-211527-A-20.wav,1,20,crying_baby,True,211527,A
107 | 1-211527-B-20.wav,1,20,crying_baby,True,211527,B
108 | 1-211527-C-20.wav,1,20,crying_baby,True,211527,C
109 | 1-21189-A-10.wav,1,10,rain,True,21189,A
110 | 1-21421-A-46.wav,1,46,church_bells,False,21421,A
111 | 1-21896-A-35.wav,1,35,washing_machine,False,21896,A
112 | 1-21934-A-38.wav,1,38,clock_tick,True,21934,A
113 | 1-21935-A-38.wav,1,38,clock_tick,True,21935,A
114 | 1-223162-A-25.wav,1,25,footsteps,False,223162,A
115 | 1-22694-A-20.wav,1,20,crying_baby,True,22694,A
116 | 1-22694-B-20.wav,1,20,crying_baby,True,22694,B
117 | 1-22804-A-46.wav,1,46,church_bells,False,22804,A
118 | 1-22882-A-44.wav,1,44,engine,False,22882,A
119 | 1-23094-A-15.wav,1,15,water_drops,False,23094,A
120 | 1-23094-B-15.wav,1,15,water_drops,False,23094,B
121 | 1-23222-A-19.wav,1,19,thunderstorm,False,23222,A
122 | 1-23222-B-19.wav,1,19,thunderstorm,False,23222,B
123 | 1-23706-A-49.wav,1,49,hand_saw,False,23706,A
124 | 1-23996-A-35.wav,1,35,washing_machine,False,23996,A
125 | 1-23996-B-35.wav,1,35,washing_machine,False,23996,B
126 | 1-24074-A-43.wav,1,43,car_horn,False,24074,A
127 | 1-24076-A-43.wav,1,43,car_horn,False,24076,A
128 | 1-24524-A-19.wav,1,19,thunderstorm,False,24524,A
129 | 1-24524-B-19.wav,1,19,thunderstorm,False,24524,B
130 | 1-24524-C-19.wav,1,19,thunderstorm,False,24524,C
131 | 1-24796-A-47.wav,1,47,airplane,False,24796,A
132 | 1-254507-A-43.wav,1,43,car_horn,False,254507,A
133 | 1-25777-A-48.wav,1,48,fireworks,False,25777,A
134 | 1-25781-A-48.wav,1,48,fireworks,False,25781,A
135 | 1-260640-A-2.wav,1,2,pig,False,260640,A
136 | 1-260640-B-2.wav,1,2,pig,False,260640,B
137 | 1-260640-C-2.wav,1,2,pig,False,260640,C
138 | 1-26143-A-21.wav,1,21,sneezing,True,26143,A
139 | 1-26176-A-43.wav,1,43,car_horn,False,26176,A
140 | 1-26177-A-43.wav,1,43,car_horn,False,26177,A
141 | 1-26188-A-30.wav,1,30,door_wood_knock,False,26188,A
142 | 1-26222-A-10.wav,1,10,rain,True,26222,A
143 | 1-26806-A-1.wav,1,1,rooster,True,26806,A
144 | 1-27165-A-35.wav,1,35,washing_machine,False,27165,A
145 | 1-27166-A-35.wav,1,35,washing_machine,False,27166,A
146 | 1-27403-A-28.wav,1,28,snoring,False,27403,A
147 | 1-27405-A-28.wav,1,28,snoring,False,27405,A
148 | 1-27724-A-1.wav,1,1,rooster,True,27724,A
149 | 1-28005-A-18.wav,1,18,toilet_flush,False,28005,A
150 | 1-28135-A-11.wav,1,11,sea_waves,True,28135,A
151 | 1-28135-B-11.wav,1,11,sea_waves,True,28135,B
152 | 1-28808-A-43.wav,1,43,car_horn,False,28808,A
153 | 1-29532-A-16.wav,1,16,wind,False,29532,A
154 | 1-29561-A-10.wav,1,10,rain,True,29561,A
155 | 1-29680-A-21.wav,1,21,sneezing,True,29680,A
156 | 1-30039-A-26.wav,1,26,laughing,False,30039,A
157 | 1-30043-A-26.wav,1,26,laughing,False,30043,A
158 | 1-30214-A-18.wav,1,18,toilet_flush,False,30214,A
159 | 1-30226-A-0.wav,1,0,dog,True,30226,A
160 | 1-30344-A-0.wav,1,0,dog,True,30344,A
161 | 1-30709-A-23.wav,1,23,breathing,False,30709,A
162 | 1-30709-B-23.wav,1,23,breathing,False,30709,B
163 | 1-30709-C-23.wav,1,23,breathing,False,30709,C
164 | 1-30830-A-24.wav,1,24,coughing,False,30830,A
165 | 1-31251-A-6.wav,1,6,hen,False,31251,A
166 | 1-31251-B-6.wav,1,6,hen,False,31251,B
167 | 1-31482-A-42.wav,1,42,siren,False,31482,A
168 | 1-31482-B-42.wav,1,42,siren,False,31482,B
169 | 1-31748-A-21.wav,1,21,sneezing,True,31748,A
170 | 1-31836-A-4.wav,1,4,frog,False,31836,A
171 | 1-31836-B-4.wav,1,4,frog,False,31836,B
172 | 1-32318-A-0.wav,1,0,dog,True,32318,A
173 | 1-32373-A-35.wav,1,35,washing_machine,False,32373,A
174 | 1-32373-B-35.wav,1,35,washing_machine,False,32373,B
175 | 1-32579-A-29.wav,1,29,drinking_sipping,False,32579,A
176 | 1-33658-A-26.wav,1,26,laughing,False,33658,A
177 | 1-34094-A-5.wav,1,5,cat,False,34094,A
178 | 1-34094-B-5.wav,1,5,cat,False,34094,B
179 | 1-34119-A-1.wav,1,1,rooster,True,34119,A
180 | 1-34119-B-1.wav,1,1,rooster,True,34119,B
181 | 1-34495-A-14.wav,1,14,chirping_birds,False,34495,A
182 | 1-34497-A-14.wav,1,14,chirping_birds,False,34497,A
183 | 1-34853-A-37.wav,1,37,clock_alarm,False,34853,A
184 | 1-35687-A-38.wav,1,38,clock_tick,True,35687,A
185 | 1-36164-A-26.wav,1,26,laughing,False,36164,A
186 | 1-36164-B-26.wav,1,26,laughing,False,36164,B
187 | 1-36393-A-23.wav,1,23,breathing,False,36393,A
188 | 1-36397-A-23.wav,1,23,breathing,False,36397,A
189 | 1-36400-A-23.wav,1,23,breathing,False,36400,A
190 | 1-36402-A-23.wav,1,23,breathing,False,36402,A
191 | 1-36929-A-47.wav,1,47,airplane,False,36929,A
192 | 1-37226-A-29.wav,1,29,drinking_sipping,False,37226,A
193 | 1-38559-A-14.wav,1,14,chirping_birds,False,38559,A
194 | 1-38560-A-14.wav,1,14,chirping_birds,False,38560,A
195 | 1-39835-A-9.wav,1,9,crow,False,39835,A
196 | 1-39835-B-9.wav,1,9,crow,False,39835,B
197 | 1-39901-A-11.wav,1,11,sea_waves,True,39901,A
198 | 1-39901-B-11.wav,1,11,sea_waves,True,39901,B
199 | 1-39923-A-1.wav,1,1,rooster,True,39923,A
200 | 1-39937-A-28.wav,1,28,snoring,False,39937,A
201 | 1-40154-A-46.wav,1,46,church_bells,False,40154,A
202 | 1-40621-A-28.wav,1,28,snoring,False,40621,A
203 | 1-40730-A-1.wav,1,1,rooster,True,40730,A
204 | 1-40967-A-28.wav,1,28,snoring,False,40967,A
205 | 1-41615-A-34.wav,1,34,can_opening,False,41615,A
206 | 1-4211-A-12.wav,1,12,crackling_fire,True,4211,A
207 | 1-42139-A-38.wav,1,38,clock_tick,True,42139,A
208 | 1-43382-A-1.wav,1,1,rooster,True,43382,A
209 | 1-43760-A-11.wav,1,11,sea_waves,True,43760,A
210 | 1-43764-A-34.wav,1,34,can_opening,False,43764,A
211 | 1-43807-A-47.wav,1,47,airplane,False,43807,A
212 | 1-43807-B-47.wav,1,47,airplane,False,43807,B
213 | 1-43807-C-47.wav,1,47,airplane,False,43807,C
214 | 1-43807-D-47.wav,1,47,airplane,False,43807,D
215 | 1-44831-A-1.wav,1,1,rooster,True,44831,A
216 | 1-45641-A-27.wav,1,27,brushing_teeth,False,45641,A
217 | 1-45645-A-31.wav,1,31,mouse_click,False,45645,A
218 | 1-46040-A-14.wav,1,14,chirping_birds,False,46040,A
219 | 1-46272-A-12.wav,1,12,crackling_fire,True,46272,A
220 | 1-46274-A-18.wav,1,18,toilet_flush,False,46274,A
221 | 1-46353-A-49.wav,1,49,hand_saw,False,46353,A
222 | 1-46744-A-36.wav,1,36,vacuum_cleaner,False,46744,A
223 | 1-46938-A-7.wav,1,7,insects,False,46938,A
224 | 1-46938-B-7.wav,1,7,insects,False,46938,B
225 | 1-47250-A-41.wav,1,41,chainsaw,True,47250,A
226 | 1-47250-B-41.wav,1,41,chainsaw,True,47250,B
227 | 1-47273-A-21.wav,1,21,sneezing,True,47273,A
228 | 1-47274-A-21.wav,1,21,sneezing,True,47274,A
229 | 1-47709-A-16.wav,1,16,wind,False,47709,A
230 | 1-47714-A-16.wav,1,16,wind,False,47714,A
231 | 1-47819-A-5.wav,1,5,cat,False,47819,A
232 | 1-47819-B-5.wav,1,5,cat,False,47819,B
233 | 1-47819-C-5.wav,1,5,cat,False,47819,C
234 | 1-47923-A-28.wav,1,28,snoring,False,47923,A
235 | 1-48298-A-46.wav,1,46,church_bells,False,48298,A
236 | 1-48413-A-38.wav,1,38,clock_tick,True,48413,A
237 | 1-49098-A-35.wav,1,35,washing_machine,False,49098,A
238 | 1-49409-A-8.wav,1,8,sheep,False,49409,A
239 | 1-49409-B-8.wav,1,8,sheep,False,49409,B
240 | 1-50060-A-10.wav,1,10,rain,True,50060,A
241 | 1-50454-A-44.wav,1,44,engine,False,50454,A
242 | 1-50455-A-44.wav,1,44,engine,False,50455,A
243 | 1-50623-A-15.wav,1,15,water_drops,False,50623,A
244 | 1-50625-A-17.wav,1,17,pouring_water,False,50625,A
245 | 1-50661-A-44.wav,1,44,engine,False,50661,A
246 | 1-50688-A-17.wav,1,17,pouring_water,False,50688,A
247 | 1-51035-A-16.wav,1,16,wind,False,51035,A
248 | 1-51036-A-16.wav,1,16,wind,False,51036,A
249 | 1-51037-A-16.wav,1,16,wind,False,51037,A
250 | 1-51147-A-25.wav,1,25,footsteps,False,51147,A
251 | 1-51170-A-18.wav,1,18,toilet_flush,False,51170,A
252 | 1-51433-A-17.wav,1,17,pouring_water,False,51433,A
253 | 1-51436-A-17.wav,1,17,pouring_water,False,51436,A
254 | 1-51805-A-33.wav,1,33,door_wood_creaks,False,51805,A
255 | 1-51805-B-33.wav,1,33,door_wood_creaks,False,51805,B
256 | 1-51805-C-33.wav,1,33,door_wood_creaks,False,51805,C
257 | 1-51805-D-33.wav,1,33,door_wood_creaks,False,51805,D
258 | 1-51805-E-33.wav,1,33,door_wood_creaks,False,51805,E
259 | 1-51805-F-33.wav,1,33,door_wood_creaks,False,51805,F
260 | 1-51805-G-33.wav,1,33,door_wood_creaks,False,51805,G
261 | 1-51805-H-33.wav,1,33,door_wood_creaks,False,51805,H
262 | 1-52266-A-24.wav,1,24,coughing,False,52266,A
263 | 1-52290-A-30.wav,1,30,door_wood_knock,False,52290,A
264 | 1-52323-A-24.wav,1,24,coughing,False,52323,A
265 | 1-53444-A-28.wav,1,28,snoring,False,53444,A
266 | 1-53467-A-47.wav,1,47,airplane,False,53467,A
267 | 1-53501-A-32.wav,1,32,keyboard_typing,False,53501,A
268 | 1-53663-A-24.wav,1,24,coughing,False,53663,A
269 | 1-53670-A-18.wav,1,18,toilet_flush,False,53670,A
270 | 1-54065-A-45.wav,1,45,train,False,54065,A
271 | 1-54065-B-45.wav,1,45,train,False,54065,B
272 | 1-54084-A-42.wav,1,42,siren,False,54084,A
273 | 1-54505-A-21.wav,1,21,sneezing,True,54505,A
274 | 1-54747-A-46.wav,1,46,church_bells,False,54747,A
275 | 1-54752-A-18.wav,1,18,toilet_flush,False,54752,A
276 | 1-54918-A-14.wav,1,14,chirping_birds,False,54918,A
277 | 1-54918-B-14.wav,1,14,chirping_birds,False,54918,B
278 | 1-54958-A-10.wav,1,10,rain,True,54958,A
279 | 1-56233-A-9.wav,1,9,crow,False,56233,A
280 | 1-56234-A-9.wav,1,9,crow,False,56234,A
281 | 1-56269-A-18.wav,1,18,toilet_flush,False,56269,A
282 | 1-56270-A-29.wav,1,29,drinking_sipping,False,56270,A
283 | 1-56311-A-10.wav,1,10,rain,True,56311,A
284 | 1-56380-A-5.wav,1,5,cat,False,56380,A
285 | 1-56380-B-5.wav,1,5,cat,False,56380,B
286 | 1-56907-A-46.wav,1,46,church_bells,False,56907,A
287 | 1-57163-A-38.wav,1,38,clock_tick,True,57163,A
288 | 1-57316-A-13.wav,1,13,crickets,False,57316,A
289 | 1-57318-A-13.wav,1,13,crickets,False,57318,A
290 | 1-57795-A-8.wav,1,8,sheep,False,57795,A
291 | 1-58277-A-3.wav,1,3,cow,False,58277,A
292 | 1-58792-A-24.wav,1,24,coughing,False,58792,A
293 | 1-58846-A-34.wav,1,34,can_opening,False,58846,A
294 | 1-58923-A-27.wav,1,27,brushing_teeth,False,58923,A
295 | 1-58923-B-27.wav,1,27,brushing_teeth,False,58923,B
296 | 1-59102-A-13.wav,1,13,crickets,False,59102,A
297 | 1-59324-A-21.wav,1,21,sneezing,True,59324,A
298 | 1-59513-A-0.wav,1,0,dog,True,59513,A
299 | 1-5996-A-6.wav,1,6,hen,False,5996,A
300 | 1-60460-A-36.wav,1,36,vacuum_cleaner,False,60460,A
301 | 1-60676-A-34.wav,1,34,can_opening,False,60676,A
302 | 1-60997-A-20.wav,1,20,crying_baby,True,60997,A
303 | 1-60997-B-20.wav,1,20,crying_baby,True,60997,B
304 | 1-61212-A-32.wav,1,32,keyboard_typing,False,61212,A
305 | 1-61221-A-17.wav,1,17,pouring_water,False,61221,A
306 | 1-61252-A-11.wav,1,11,sea_waves,True,61252,A
307 | 1-61261-A-44.wav,1,44,engine,False,61261,A
308 | 1-61534-A-27.wav,1,27,brushing_teeth,False,61534,A
309 | 1-62509-A-45.wav,1,45,train,False,62509,A
310 | 1-62565-A-44.wav,1,44,engine,False,62565,A
311 | 1-62594-A-32.wav,1,32,keyboard_typing,False,62594,A
312 | 1-62849-A-38.wav,1,38,clock_tick,True,62849,A
313 | 1-62850-A-38.wav,1,38,clock_tick,True,62850,A
314 | 1-63679-A-24.wav,1,24,coughing,False,63679,A
315 | 1-63871-A-10.wav,1,10,rain,True,63871,A
316 | 1-64398-A-41.wav,1,41,chainsaw,True,64398,A
317 | 1-64398-B-41.wav,1,41,chainsaw,True,64398,B
318 | 1-64473-A-45.wav,1,45,train,False,64473,A
319 | 1-65483-A-13.wav,1,13,crickets,False,65483,A
320 | 1-67033-A-37.wav,1,37,clock_alarm,False,67033,A
321 | 1-67152-A-17.wav,1,17,pouring_water,False,67152,A
322 | 1-67230-A-29.wav,1,29,drinking_sipping,False,67230,A
323 | 1-67432-A-27.wav,1,27,brushing_teeth,False,67432,A
324 | 1-68628-A-27.wav,1,27,brushing_teeth,False,68628,A
325 | 1-68670-A-34.wav,1,34,can_opening,False,68670,A
326 | 1-68734-A-34.wav,1,34,can_opening,False,68734,A
327 | 1-69165-A-34.wav,1,34,can_opening,False,69165,A
328 | 1-69422-A-3.wav,1,3,cow,False,69422,A
329 | 1-69641-A-3.wav,1,3,cow,False,69641,A
330 | 1-69760-A-16.wav,1,16,wind,False,69760,A
331 | 1-70300-A-45.wav,1,45,train,False,70300,A
332 | 1-7057-A-12.wav,1,12,crackling_fire,True,7057,A
333 | 1-71030-A-6.wav,1,6,hen,False,71030,A
334 | 1-72195-A-37.wav,1,37,clock_alarm,False,72195,A
335 | 1-72195-B-37.wav,1,37,clock_alarm,False,72195,B
336 | 1-72229-A-6.wav,1,6,hen,False,72229,A
337 | 1-72229-B-6.wav,1,6,hen,False,72229,B
338 | 1-72695-A-26.wav,1,26,laughing,False,72695,A
339 | 1-73123-A-26.wav,1,26,laughing,False,73123,A
340 | 1-73585-A-7.wav,1,7,insects,False,73585,A
341 | 1-74517-A-37.wav,1,37,clock_alarm,False,74517,A
342 | 1-7456-A-13.wav,1,13,crickets,False,7456,A
343 | 1-75162-A-9.wav,1,9,crow,False,75162,A
344 | 1-75189-A-7.wav,1,7,insects,False,75189,A
345 | 1-75190-A-8.wav,1,8,sheep,False,75190,A
346 | 1-76831-A-42.wav,1,42,siren,False,76831,A
347 | 1-76831-B-42.wav,1,42,siren,False,76831,B
348 | 1-76831-C-42.wav,1,42,siren,False,76831,C
349 | 1-76831-D-42.wav,1,42,siren,False,76831,D
350 | 1-76831-E-42.wav,1,42,siren,False,76831,E
351 | 1-77160-A-3.wav,1,3,cow,False,77160,A
352 | 1-77241-A-3.wav,1,3,cow,False,77241,A
353 | 1-79113-A-5.wav,1,5,cat,False,79113,A
354 | 1-79146-A-29.wav,1,29,drinking_sipping,False,79146,A
355 | 1-79220-A-17.wav,1,17,pouring_water,False,79220,A
356 | 1-79236-A-36.wav,1,36,vacuum_cleaner,False,79236,A
357 | 1-79711-A-32.wav,1,32,keyboard_typing,False,79711,A
358 | 1-7973-A-7.wav,1,7,insects,False,7973,A
359 | 1-7974-A-49.wav,1,49,hand_saw,False,7974,A
360 | 1-7974-B-49.wav,1,49,hand_saw,False,7974,B
361 | 1-80785-A-7.wav,1,7,insects,False,80785,A
362 | 1-80840-A-13.wav,1,13,crickets,False,80840,A
363 | 1-80841-A-13.wav,1,13,crickets,False,80841,A
364 | 1-81001-A-30.wav,1,30,door_wood_knock,False,81001,A
365 | 1-81001-B-30.wav,1,30,door_wood_knock,False,81001,B
366 | 1-81269-A-3.wav,1,3,cow,False,81269,A
367 | 1-81851-A-31.wav,1,31,mouse_click,False,81851,A
368 | 1-81883-A-21.wav,1,21,sneezing,True,81883,A
369 | 1-82817-A-30.wav,1,30,door_wood_knock,False,82817,A
370 | 1-84393-A-32.wav,1,32,keyboard_typing,False,84393,A
371 | 1-84536-A-39.wav,1,39,glass_breaking,False,84536,A
372 | 1-84704-A-39.wav,1,39,glass_breaking,False,84704,A
373 | 1-84705-A-39.wav,1,39,glass_breaking,False,84705,A
374 | 1-85123-A-31.wav,1,31,mouse_click,False,85123,A
375 | 1-85168-A-39.wav,1,39,glass_breaking,False,85168,A
376 | 1-85184-A-39.wav,1,39,glass_breaking,False,85184,A
377 | 1-85362-A-0.wav,1,0,dog,True,85362,A
378 | 1-85909-A-29.wav,1,29,drinking_sipping,False,85909,A
379 | 1-87565-A-29.wav,1,29,drinking_sipping,False,87565,A
380 | 1-88409-A-45.wav,1,45,train,False,88409,A
381 | 1-88409-B-45.wav,1,45,train,False,88409,B
382 | 1-88574-A-8.wav,1,8,sheep,False,88574,A
383 | 1-88807-A-39.wav,1,39,glass_breaking,False,88807,A
384 | 1-90797-A-15.wav,1,15,water_drops,False,90797,A
385 | 1-91359-A-11.wav,1,11,sea_waves,True,91359,A
386 | 1-91359-B-11.wav,1,11,sea_waves,True,91359,B
387 | 1-94036-A-22.wav,1,22,clapping,False,94036,A
388 | 1-94231-A-32.wav,1,32,keyboard_typing,False,94231,A
389 | 1-94231-B-32.wav,1,32,keyboard_typing,False,94231,B
390 | 1-95563-A-31.wav,1,31,mouse_click,False,95563,A
391 | 1-96890-A-37.wav,1,37,clock_alarm,False,96890,A
392 | 1-96950-A-9.wav,1,9,crow,False,96950,A
393 | 1-96950-B-9.wav,1,9,crow,False,96950,B
394 | 1-97392-A-0.wav,1,0,dog,True,97392,A
395 | 1-977-A-39.wav,1,39,glass_breaking,False,977,A
396 | 1-97793-A-31.wav,1,31,mouse_click,False,97793,A
397 | 1-9841-A-13.wav,1,13,crickets,False,9841,A
398 | 1-9886-A-49.wav,1,49,hand_saw,False,9886,A
399 | 1-9887-A-49.wav,1,49,hand_saw,False,9887,A
400 | 1-9887-B-49.wav,1,49,hand_saw,False,9887,B
401 | 1-99958-A-31.wav,1,31,mouse_click,False,99958,A
402 | 


--------------------------------------------------------------------------------
/util_esc50/fold2_test.csv:
--------------------------------------------------------------------------------
  1 | filename,fold,target,category,esc10,src_file,take
  2 | 2-100648-A-43.wav,2,43,car_horn,False,100648,A
  3 | 2-100786-A-1.wav,2,1,rooster,True,100786,A
  4 | 2-101676-A-10.wav,2,10,rain,True,101676,A
  5 | 2-102414-A-17.wav,2,17,pouring_water,False,102414,A
  6 | 2-102414-B-17.wav,2,17,pouring_water,False,102414,B
  7 | 2-102414-C-17.wav,2,17,pouring_water,False,102414,C
  8 | 2-102414-D-17.wav,2,17,pouring_water,False,102414,D
  9 | 2-102414-E-17.wav,2,17,pouring_water,False,102414,E
 10 | 2-102414-F-17.wav,2,17,pouring_water,False,102414,F
 11 | 2-102414-G-17.wav,2,17,pouring_water,False,102414,G
 12 | 2-102435-A-37.wav,2,37,clock_alarm,False,102435,A
 13 | 2-102567-A-35.wav,2,35,washing_machine,False,102567,A
 14 | 2-102567-B-35.wav,2,35,washing_machine,False,102567,B
 15 | 2-102567-C-35.wav,2,35,washing_machine,False,102567,C
 16 | 2-102567-D-35.wav,2,35,washing_machine,False,102567,D
 17 | 2-102568-A-35.wav,2,35,washing_machine,False,102568,A
 18 | 2-102581-A-29.wav,2,29,drinking_sipping,False,102581,A
 19 | 2-102581-B-29.wav,2,29,drinking_sipping,False,102581,B
 20 | 2-102852-A-11.wav,2,11,sea_waves,True,102852,A
 21 | 2-103423-A-3.wav,2,3,cow,False,103423,A
 22 | 2-103424-A-3.wav,2,3,cow,False,103424,A
 23 | 2-103426-A-3.wav,2,3,cow,False,103426,A
 24 | 2-103427-A-3.wav,2,3,cow,False,103427,A
 25 | 2-103428-A-3.wav,2,3,cow,False,103428,A
 26 | 2-104105-A-19.wav,2,19,thunderstorm,False,104105,A
 27 | 2-104105-B-19.wav,2,19,thunderstorm,False,104105,B
 28 | 2-104168-A-32.wav,2,32,keyboard_typing,False,104168,A
 29 | 2-104475-A-37.wav,2,37,clock_alarm,False,104475,A
 30 | 2-104476-A-37.wav,2,37,clock_alarm,False,104476,A
 31 | 2-104877-A-3.wav,2,3,cow,False,104877,A
 32 | 2-104952-A-16.wav,2,16,wind,False,104952,A
 33 | 2-104952-B-16.wav,2,16,wind,False,104952,B
 34 | 2-105270-A-47.wav,2,47,airplane,False,105270,A
 35 | 2-106014-A-44.wav,2,44,engine,False,106014,A
 36 | 2-106015-A-44.wav,2,44,engine,False,106015,A
 37 | 2-106015-B-44.wav,2,44,engine,False,106015,B
 38 | 2-106019-A-13.wav,2,13,crickets,False,106019,A
 39 | 2-106072-A-36.wav,2,36,vacuum_cleaner,False,106072,A
 40 | 2-106073-A-36.wav,2,36,vacuum_cleaner,False,106073,A
 41 | 2-106486-A-44.wav,2,44,engine,False,106486,A
 42 | 2-106487-A-44.wav,2,44,engine,False,106487,A
 43 | 2-106849-A-47.wav,2,47,airplane,False,106849,A
 44 | 2-106881-A-39.wav,2,39,glass_breaking,False,106881,A
 45 | 2-107228-A-44.wav,2,44,engine,False,107228,A
 46 | 2-107228-B-44.wav,2,44,engine,False,107228,B
 47 | 2-107351-A-20.wav,2,20,crying_baby,True,107351,A
 48 | 2-107351-B-20.wav,2,20,crying_baby,True,107351,B
 49 | 2-108017-A-24.wav,2,24,coughing,False,108017,A
 50 | 2-108760-A-14.wav,2,14,chirping_birds,False,108760,A
 51 | 2-108760-B-14.wav,2,14,chirping_birds,False,108760,B
 52 | 2-108761-A-14.wav,2,14,chirping_birds,False,108761,A
 53 | 2-108763-A-9.wav,2,9,crow,False,108763,A
 54 | 2-108766-A-9.wav,2,9,crow,False,108766,A
 55 | 2-108767-A-9.wav,2,9,crow,False,108767,A
 56 | 2-108767-B-9.wav,2,9,crow,False,108767,B
 57 | 2-108767-C-9.wav,2,9,crow,False,108767,C
 58 | 2-109231-A-9.wav,2,9,crow,False,109231,A
 59 | 2-109231-B-9.wav,2,9,crow,False,109231,B
 60 | 2-109231-C-9.wav,2,9,crow,False,109231,C
 61 | 2-109316-A-32.wav,2,32,keyboard_typing,False,109316,A
 62 | 2-109371-A-16.wav,2,16,wind,False,109371,A
 63 | 2-109371-B-16.wav,2,16,wind,False,109371,B
 64 | 2-109371-C-16.wav,2,16,wind,False,109371,C
 65 | 2-109371-D-16.wav,2,16,wind,False,109371,D
 66 | 2-109374-A-16.wav,2,16,wind,False,109374,A
 67 | 2-109505-A-21.wav,2,21,sneezing,True,109505,A
 68 | 2-109759-A-26.wav,2,26,laughing,False,109759,A
 69 | 2-109759-B-26.wav,2,26,laughing,False,109759,B
 70 | 2-110010-A-5.wav,2,5,cat,False,110010,A
 71 | 2-110011-A-5.wav,2,5,cat,False,110011,A
 72 | 2-110417-A-28.wav,2,28,snoring,False,110417,A
 73 | 2-110417-B-28.wav,2,28,snoring,False,110417,B
 74 | 2-110613-A-13.wav,2,13,crickets,False,110613,A
 75 | 2-110614-A-8.wav,2,8,sheep,False,110614,A
 76 | 2-110614-B-8.wav,2,8,sheep,False,110614,B
 77 | 2-112213-A-39.wav,2,39,glass_breaking,False,112213,A
 78 | 2-112213-B-39.wav,2,39,glass_breaking,False,112213,B
 79 | 2-114254-A-30.wav,2,30,door_wood_knock,False,114254,A
 80 | 2-114280-A-0.wav,2,0,dog,True,114280,A
 81 | 2-114587-A-0.wav,2,0,dog,True,114587,A
 82 | 2-114609-A-28.wav,2,28,snoring,False,114609,A
 83 | 2-114609-B-28.wav,2,28,snoring,False,114609,B
 84 | 2-116400-A-0.wav,2,0,dog,True,116400,A
 85 | 2-117116-A-37.wav,2,37,clock_alarm,False,117116,A
 86 | 2-117271-A-0.wav,2,0,dog,True,117271,A
 87 | 2-117330-A-28.wav,2,28,snoring,False,117330,A
 88 | 2-117615-A-48.wav,2,48,fireworks,False,117615,A
 89 | 2-117615-B-48.wav,2,48,fireworks,False,117615,B
 90 | 2-117615-C-48.wav,2,48,fireworks,False,117615,C
 91 | 2-117615-D-48.wav,2,48,fireworks,False,117615,D
 92 | 2-117615-E-48.wav,2,48,fireworks,False,117615,E
 93 | 2-117616-A-48.wav,2,48,fireworks,False,117616,A
 94 | 2-117617-A-48.wav,2,48,fireworks,False,117617,A
 95 | 2-117625-A-10.wav,2,10,rain,True,117625,A
 96 | 2-117795-A-3.wav,2,3,cow,False,117795,A
 97 | 2-117795-B-3.wav,2,3,cow,False,117795,B
 98 | 2-118072-A-0.wav,2,0,dog,True,118072,A
 99 | 2-118104-A-21.wav,2,21,sneezing,True,118104,A
100 | 2-118459-A-32.wav,2,32,keyboard_typing,False,118459,A
101 | 2-118459-B-32.wav,2,32,keyboard_typing,False,118459,B
102 | 2-118624-A-30.wav,2,30,door_wood_knock,False,118624,A
103 | 2-118625-A-30.wav,2,30,door_wood_knock,False,118625,A
104 | 2-118817-A-32.wav,2,32,keyboard_typing,False,118817,A
105 | 2-118964-A-0.wav,2,0,dog,True,118964,A
106 | 2-119102-A-21.wav,2,21,sneezing,True,119102,A
107 | 2-119139-A-31.wav,2,31,mouse_click,False,119139,A
108 | 2-119161-A-8.wav,2,8,sheep,False,119161,A
109 | 2-119161-B-8.wav,2,8,sheep,False,119161,B
110 | 2-119161-C-8.wav,2,8,sheep,False,119161,C
111 | 2-119748-A-38.wav,2,38,clock_tick,True,119748,A
112 | 2-120218-A-30.wav,2,30,door_wood_knock,False,120218,A
113 | 2-120218-B-30.wav,2,30,door_wood_knock,False,120218,B
114 | 2-120333-A-32.wav,2,32,keyboard_typing,False,120333,A
115 | 2-120586-A-6.wav,2,6,hen,False,120586,A
116 | 2-120587-A-6.wav,2,6,hen,False,120587,A
117 | 2-121909-A-35.wav,2,35,washing_machine,False,121909,A
118 | 2-121978-A-29.wav,2,29,drinking_sipping,False,121978,A
119 | 2-122066-A-45.wav,2,45,train,False,122066,A
120 | 2-122066-B-45.wav,2,45,train,False,122066,B
121 | 2-122067-A-45.wav,2,45,train,False,122067,A
122 | 2-122067-B-45.wav,2,45,train,False,122067,B
123 | 2-122104-A-0.wav,2,0,dog,True,122104,A
124 | 2-122104-B-0.wav,2,0,dog,True,122104,B
125 | 2-122616-A-14.wav,2,14,chirping_birds,False,122616,A
126 | 2-122763-A-29.wav,2,29,drinking_sipping,False,122763,A
127 | 2-122763-B-29.wav,2,29,drinking_sipping,False,122763,B
128 | 2-122820-A-36.wav,2,36,vacuum_cleaner,False,122820,A
129 | 2-122820-B-36.wav,2,36,vacuum_cleaner,False,122820,B
130 | 2-123712-A-33.wav,2,33,door_wood_creaks,False,123712,A
131 | 2-123896-A-24.wav,2,24,coughing,False,123896,A
132 | 2-124564-A-15.wav,2,15,water_drops,False,124564,A
133 | 2-124662-A-11.wav,2,11,sea_waves,True,124662,A
134 | 2-125520-A-43.wav,2,43,car_horn,False,125520,A
135 | 2-125821-A-13.wav,2,13,crickets,False,125821,A
136 | 2-125875-A-13.wav,2,13,crickets,False,125875,A
137 | 2-125966-A-11.wav,2,11,sea_waves,True,125966,A
138 | 2-126433-A-17.wav,2,17,pouring_water,False,126433,A
139 | 2-126756-A-29.wav,2,29,drinking_sipping,False,126756,A
140 | 2-127108-A-38.wav,2,38,clock_tick,True,127108,A
141 | 2-127109-A-6.wav,2,6,hen,False,127109,A
142 | 2-128465-A-43.wav,2,43,car_horn,False,128465,A
143 | 2-128465-B-43.wav,2,43,car_horn,False,128465,B
144 | 2-128631-A-21.wav,2,21,sneezing,True,128631,A
145 | 2-130245-A-34.wav,2,34,can_opening,False,130245,A
146 | 2-130978-A-21.wav,2,21,sneezing,True,130978,A
147 | 2-130979-A-21.wav,2,21,sneezing,True,130979,A
148 | 2-131943-A-38.wav,2,38,clock_tick,True,131943,A
149 | 2-132157-A-11.wav,2,11,sea_waves,True,132157,A
150 | 2-132157-B-11.wav,2,11,sea_waves,True,132157,B
151 | 2-133863-A-11.wav,2,11,sea_waves,True,133863,A
152 | 2-133889-A-30.wav,2,30,door_wood_knock,False,133889,A
153 | 2-134049-A-6.wav,2,6,hen,False,134049,A
154 | 2-134700-A-38.wav,2,38,clock_tick,True,134700,A
155 | 2-134915-A-30.wav,2,30,door_wood_knock,False,134915,A
156 | 2-135649-A-45.wav,2,45,train,False,135649,A
157 | 2-135649-B-45.wav,2,45,train,False,135649,B
158 | 2-135649-C-45.wav,2,45,train,False,135649,C
159 | 2-135728-A-38.wav,2,38,clock_tick,True,135728,A
160 | 2-135860-A-49.wav,2,49,hand_saw,False,135860,A
161 | 2-137162-A-11.wav,2,11,sea_waves,True,137162,A
162 | 2-138257-A-31.wav,2,31,mouse_click,False,138257,A
163 | 2-138465-A-43.wav,2,43,car_horn,False,138465,A
164 | 2-139748-A-15.wav,2,15,water_drops,False,139748,A
165 | 2-139748-B-15.wav,2,15,water_drops,False,139748,B
166 | 2-139749-A-15.wav,2,15,water_drops,False,139749,A
167 | 2-140147-A-38.wav,2,38,clock_tick,True,140147,A
168 | 2-140841-A-30.wav,2,30,door_wood_knock,False,140841,A
169 | 2-141563-A-39.wav,2,39,glass_breaking,False,141563,A
170 | 2-141584-A-38.wav,2,38,clock_tick,True,141584,A
171 | 2-141681-A-36.wav,2,36,vacuum_cleaner,False,141681,A
172 | 2-141681-B-36.wav,2,36,vacuum_cleaner,False,141681,B
173 | 2-141682-A-36.wav,2,36,vacuum_cleaner,False,141682,A
174 | 2-141682-B-36.wav,2,36,vacuum_cleaner,False,141682,B
175 | 2-144031-A-34.wav,2,34,can_opening,False,144031,A
176 | 2-144137-A-43.wav,2,43,car_horn,False,144137,A
177 | 2-146877-A-31.wav,2,31,mouse_click,False,146877,A
178 | 2-146877-B-31.wav,2,31,mouse_click,False,146877,B
179 | 2-151079-A-20.wav,2,20,crying_baby,True,151079,A
180 | 2-152895-A-31.wav,2,31,mouse_click,False,152895,A
181 | 2-152964-A-31.wav,2,31,mouse_click,False,152964,A
182 | 2-153388-A-31.wav,2,31,mouse_click,False,153388,A
183 | 2-154688-A-31.wav,2,31,mouse_click,False,154688,A
184 | 2-155801-A-11.wav,2,11,sea_waves,True,155801,A
185 | 2-157488-A-6.wav,2,6,hen,False,157488,A
186 | 2-158746-A-2.wav,2,2,pig,False,158746,A
187 | 2-158746-B-2.wav,2,2,pig,False,158746,B
188 | 2-158746-C-2.wav,2,2,pig,False,158746,C
189 | 2-158746-D-2.wav,2,2,pig,False,158746,D
190 | 2-160128-A-7.wav,2,7,insects,False,160128,A
191 | 2-160888-A-47.wav,2,47,airplane,False,160888,A
192 | 2-165801-A-48.wav,2,48,fireworks,False,165801,A
193 | 2-166644-A-2.wav,2,2,pig,False,166644,A
194 | 2-166644-B-2.wav,2,2,pig,False,166644,B
195 | 2-166644-C-2.wav,2,2,pig,False,166644,C
196 | 2-173559-A-39.wav,2,39,glass_breaking,False,173559,A
197 | 2-173607-A-39.wav,2,39,glass_breaking,False,173607,A
198 | 2-173618-A-39.wav,2,39,glass_breaking,False,173618,A
199 | 2-182508-A-8.wav,2,8,sheep,False,182508,A
200 | 2-182508-B-8.wav,2,8,sheep,False,182508,B
201 | 2-184077-A-49.wav,2,49,hand_saw,False,184077,A
202 | 2-18766-A-12.wav,2,12,crackling_fire,True,18766,A
203 | 2-18766-B-12.wav,2,12,crackling_fire,True,18766,B
204 | 2-188822-A-40.wav,2,40,helicopter,True,188822,A
205 | 2-188822-B-40.wav,2,40,helicopter,True,188822,B
206 | 2-188822-C-40.wav,2,40,helicopter,True,188822,C
207 | 2-188822-D-40.wav,2,40,helicopter,True,188822,D
208 | 2-196688-A-8.wav,2,8,sheep,False,196688,A
209 | 2-205966-A-16.wav,2,16,wind,False,205966,A
210 | 2-209471-A-25.wav,2,25,footsteps,False,209471,A
211 | 2-209472-A-25.wav,2,25,footsteps,False,209472,A
212 | 2-209473-A-25.wav,2,25,footsteps,False,209473,A
213 | 2-209474-A-25.wav,2,25,footsteps,False,209474,A
214 | 2-209475-A-25.wav,2,25,footsteps,False,209475,A
215 | 2-209476-A-25.wav,2,25,footsteps,False,209476,A
216 | 2-209477-A-25.wav,2,25,footsteps,False,209477,A
217 | 2-209478-A-25.wav,2,25,footsteps,False,209478,A
218 | 2-250710-A-39.wav,2,39,glass_breaking,False,250710,A
219 | 2-25292-A-22.wav,2,22,clapping,False,25292,A
220 | 2-25293-A-22.wav,2,22,clapping,False,25293,A
221 | 2-262579-A-45.wav,2,45,train,False,262579,A
222 | 2-28314-A-12.wav,2,12,crackling_fire,True,28314,A
223 | 2-28314-B-12.wav,2,12,crackling_fire,True,28314,B
224 | 2-30322-A-12.wav,2,12,crackling_fire,True,30322,A
225 | 2-30322-B-12.wav,2,12,crackling_fire,True,30322,B
226 | 2-32515-A-4.wav,2,4,frog,False,32515,A
227 | 2-32515-B-4.wav,2,4,frog,False,32515,B
228 | 2-32515-C-4.wav,2,4,frog,False,32515,C
229 | 2-32515-D-4.wav,2,4,frog,False,32515,D
230 | 2-32834-A-4.wav,2,4,frog,False,32834,A
231 | 2-37806-A-40.wav,2,40,helicopter,True,37806,A
232 | 2-37806-B-40.wav,2,40,helicopter,True,37806,B
233 | 2-37806-C-40.wav,2,40,helicopter,True,37806,C
234 | 2-37806-D-40.wav,2,40,helicopter,True,37806,D
235 | 2-37870-A-2.wav,2,2,pig,False,37870,A
236 | 2-39441-A-19.wav,2,19,thunderstorm,False,39441,A
237 | 2-39443-A-19.wav,2,19,thunderstorm,False,39443,A
238 | 2-39443-B-19.wav,2,19,thunderstorm,False,39443,B
239 | 2-39945-A-19.wav,2,19,thunderstorm,False,39945,A
240 | 2-39945-B-19.wav,2,19,thunderstorm,False,39945,B
241 | 2-39945-C-19.wav,2,19,thunderstorm,False,39945,C
242 | 2-42101-A-43.wav,2,43,car_horn,False,42101,A
243 | 2-43802-A-42.wav,2,42,siren,False,43802,A
244 | 2-43806-A-42.wav,2,42,siren,False,43806,A
245 | 2-50665-A-20.wav,2,20,crying_baby,True,50665,A
246 | 2-50666-A-20.wav,2,20,crying_baby,True,50666,A
247 | 2-50667-A-41.wav,2,41,chainsaw,True,50667,A
248 | 2-50667-B-41.wav,2,41,chainsaw,True,50667,B
249 | 2-50668-A-41.wav,2,41,chainsaw,True,50668,A
250 | 2-50668-B-41.wav,2,41,chainsaw,True,50668,B
251 | 2-50774-A-23.wav,2,23,breathing,False,50774,A
252 | 2-51173-A-35.wav,2,35,washing_machine,False,51173,A
253 | 2-51630-A-49.wav,2,49,hand_saw,False,51630,A
254 | 2-51630-B-49.wav,2,49,hand_saw,False,51630,B
255 | 2-52001-A-28.wav,2,28,snoring,False,52001,A
256 | 2-52001-B-28.wav,2,28,snoring,False,52001,B
257 | 2-52085-A-4.wav,2,4,frog,False,52085,A
258 | 2-52085-B-4.wav,2,4,frog,False,52085,B
259 | 2-52789-A-4.wav,2,4,frog,False,52789,A
260 | 2-54086-A-43.wav,2,43,car_horn,False,54086,A
261 | 2-54961-A-23.wav,2,23,breathing,False,54961,A
262 | 2-54962-A-23.wav,2,23,breathing,False,54962,A
263 | 2-56926-A-46.wav,2,46,church_bells,False,56926,A
264 | 2-57191-A-46.wav,2,46,church_bells,False,57191,A
265 | 2-57733-A-22.wav,2,22,clapping,False,57733,A
266 | 2-59241-A-35.wav,2,35,washing_machine,False,59241,A
267 | 2-59321-A-49.wav,2,49,hand_saw,False,59321,A
268 | 2-59565-A-46.wav,2,46,church_bells,False,59565,A
269 | 2-59566-A-46.wav,2,46,church_bells,False,59566,A
270 | 2-60180-A-49.wav,2,49,hand_saw,False,60180,A
271 | 2-60180-B-49.wav,2,49,hand_saw,False,60180,B
272 | 2-60791-A-26.wav,2,26,laughing,False,60791,A
273 | 2-60794-A-26.wav,2,26,laughing,False,60794,A
274 | 2-60795-A-26.wav,2,26,laughing,False,60795,A
275 | 2-60900-A-22.wav,2,22,clapping,False,60900,A
276 | 2-61311-A-12.wav,2,12,crackling_fire,True,61311,A
277 | 2-61618-A-46.wav,2,46,church_bells,False,61618,A
278 | 2-62226-A-26.wav,2,26,laughing,False,62226,A
279 | 2-64332-A-18.wav,2,18,toilet_flush,False,64332,A
280 | 2-64962-A-15.wav,2,15,water_drops,False,64962,A
281 | 2-64963-A-15.wav,2,15,water_drops,False,64963,A
282 | 2-65484-A-18.wav,2,18,toilet_flush,False,65484,A
283 | 2-65747-A-12.wav,2,12,crackling_fire,True,65747,A
284 | 2-65750-A-1.wav,2,1,rooster,True,65750,A
285 | 2-66205-A-23.wav,2,23,breathing,False,66205,A
286 | 2-66637-A-20.wav,2,20,crying_baby,True,66637,A
287 | 2-66637-B-20.wav,2,20,crying_baby,True,66637,B
288 | 2-67422-A-18.wav,2,18,toilet_flush,False,67422,A
289 | 2-68391-A-41.wav,2,41,chainsaw,True,68391,A
290 | 2-68391-B-41.wav,2,41,chainsaw,True,68391,B
291 | 2-68595-A-15.wav,2,15,water_drops,False,68595,A
292 | 2-68595-B-15.wav,2,15,water_drops,False,68595,B
293 | 2-69131-A-5.wav,2,5,cat,False,69131,A
294 | 2-69131-B-5.wav,2,5,cat,False,69131,B
295 | 2-70052-A-42.wav,2,42,siren,False,70052,A
296 | 2-70052-B-42.wav,2,42,siren,False,70052,B
297 | 2-70280-A-18.wav,2,18,toilet_flush,False,70280,A
298 | 2-70344-A-33.wav,2,33,door_wood_creaks,False,70344,A
299 | 2-70366-A-33.wav,2,33,door_wood_creaks,False,70366,A
300 | 2-70367-A-33.wav,2,33,door_wood_creaks,False,70367,A
301 | 2-70936-A-42.wav,2,42,siren,False,70936,A
302 | 2-70938-A-42.wav,2,42,siren,False,70938,A
303 | 2-70939-A-42.wav,2,42,siren,False,70939,A
304 | 2-71162-A-1.wav,2,1,rooster,True,71162,A
305 | 2-72268-A-6.wav,2,6,hen,False,72268,A
306 | 2-72547-A-14.wav,2,14,chirping_birds,False,72547,A
307 | 2-72547-B-14.wav,2,14,chirping_birds,False,72547,B
308 | 2-72547-C-14.wav,2,14,chirping_birds,False,72547,C
309 | 2-72547-D-14.wav,2,14,chirping_birds,False,72547,D
310 | 2-72677-A-18.wav,2,18,toilet_flush,False,72677,A
311 | 2-72688-A-42.wav,2,42,siren,False,72688,A
312 | 2-72970-A-10.wav,2,10,rain,True,72970,A
313 | 2-73027-A-10.wav,2,10,rain,True,73027,A
314 | 2-7321-A-33.wav,2,33,door_wood_creaks,False,7321,A
315 | 2-73260-A-10.wav,2,10,rain,True,73260,A
316 | 2-73544-A-27.wav,2,27,brushing_teeth,False,73544,A
317 | 2-74361-A-47.wav,2,47,airplane,False,74361,A
318 | 2-74977-A-18.wav,2,18,toilet_flush,False,74977,A
319 | 2-75726-A-6.wav,2,6,hen,False,75726,A
320 | 2-76408-A-22.wav,2,22,clapping,False,76408,A
321 | 2-76408-B-22.wav,2,22,clapping,False,76408,B
322 | 2-76408-C-22.wav,2,22,clapping,False,76408,C
323 | 2-76408-D-22.wav,2,22,clapping,False,76408,D
324 | 2-76868-A-6.wav,2,6,hen,False,76868,A
325 | 2-77346-A-46.wav,2,46,church_bells,False,77346,A
326 | 2-77347-A-46.wav,2,46,church_bells,False,77347,A
327 | 2-77945-A-41.wav,2,41,chainsaw,True,77945,A
328 | 2-77945-B-41.wav,2,41,chainsaw,True,77945,B
329 | 2-78381-A-46.wav,2,46,church_bells,False,78381,A
330 | 2-78562-A-37.wav,2,37,clock_alarm,False,78562,A
331 | 2-78562-B-37.wav,2,37,clock_alarm,False,78562,B
332 | 2-78651-A-44.wav,2,44,engine,False,78651,A
333 | 2-78781-A-47.wav,2,47,airplane,False,78781,A
334 | 2-78799-A-47.wav,2,47,airplane,False,78799,A
335 | 2-79769-A-26.wav,2,26,laughing,False,79769,A
336 | 2-79775-A-26.wav,2,26,laughing,False,79775,A
337 | 2-80313-A-28.wav,2,28,snoring,False,80313,A
338 | 2-80482-A-20.wav,2,20,crying_baby,True,80482,A
339 | 2-80844-A-13.wav,2,13,crickets,False,80844,A
340 | 2-81112-A-34.wav,2,34,can_opening,False,81112,A
341 | 2-81190-A-34.wav,2,34,can_opening,False,81190,A
342 | 2-81270-A-1.wav,2,1,rooster,True,81270,A
343 | 2-81731-A-10.wav,2,10,rain,True,81731,A
344 | 2-81970-A-7.wav,2,7,insects,False,81970,A
345 | 2-81970-B-7.wav,2,7,insects,False,81970,B
346 | 2-81970-C-7.wav,2,7,insects,False,81970,C
347 | 2-82071-A-27.wav,2,27,brushing_teeth,False,82071,A
348 | 2-82077-A-7.wav,2,7,insects,False,82077,A
349 | 2-82274-A-5.wav,2,5,cat,False,82274,A
350 | 2-82274-B-5.wav,2,5,cat,False,82274,B
351 | 2-82367-A-10.wav,2,10,rain,True,82367,A
352 | 2-82455-A-23.wav,2,23,breathing,False,82455,A
353 | 2-82538-A-21.wav,2,21,sneezing,True,82538,A
354 | 2-83270-A-13.wav,2,13,crickets,False,83270,A
355 | 2-83536-A-27.wav,2,27,brushing_teeth,False,83536,A
356 | 2-83667-A-34.wav,2,34,can_opening,False,83667,A
357 | 2-83688-A-34.wav,2,34,can_opening,False,83688,A
358 | 2-83934-A-5.wav,2,5,cat,False,83934,A
359 | 2-83934-B-5.wav,2,5,cat,False,83934,B
360 | 2-84693-A-49.wav,2,49,hand_saw,False,84693,A
361 | 2-84943-A-18.wav,2,18,toilet_flush,False,84943,A
362 | 2-84965-A-23.wav,2,23,breathing,False,84965,A
363 | 2-85139-A-13.wav,2,13,crickets,False,85139,A
364 | 2-85292-A-24.wav,2,24,coughing,False,85292,A
365 | 2-85434-A-27.wav,2,27,brushing_teeth,False,85434,A
366 | 2-85471-A-34.wav,2,34,can_opening,False,85471,A
367 | 2-85945-A-18.wav,2,18,toilet_flush,False,85945,A
368 | 2-86160-A-27.wav,2,27,brushing_teeth,False,86160,A
369 | 2-87282-A-34.wav,2,34,can_opening,False,87282,A
370 | 2-87412-A-24.wav,2,24,coughing,False,87412,A
371 | 2-87780-A-33.wav,2,33,door_wood_creaks,False,87780,A
372 | 2-87781-A-10.wav,2,10,rain,True,87781,A
373 | 2-87794-A-24.wav,2,24,coughing,False,87794,A
374 | 2-87795-A-24.wav,2,24,coughing,False,87795,A
375 | 2-87799-A-24.wav,2,24,coughing,False,87799,A
376 | 2-88724-A-38.wav,2,38,clock_tick,True,88724,A
377 | 2-89516-A-37.wav,2,37,clock_alarm,False,89516,A
378 | 2-91912-A-33.wav,2,33,door_wood_creaks,False,91912,A
379 | 2-91912-B-33.wav,2,33,door_wood_creaks,False,91912,B
380 | 2-92627-A-27.wav,2,27,brushing_teeth,False,92627,A
381 | 2-92978-A-29.wav,2,29,drinking_sipping,False,92978,A
382 | 2-93030-A-21.wav,2,21,sneezing,True,93030,A
383 | 2-94230-A-27.wav,2,27,brushing_teeth,False,94230,A
384 | 2-94807-A-29.wav,2,29,drinking_sipping,False,94807,A
385 | 2-95035-A-1.wav,2,1,rooster,True,95035,A
386 | 2-95258-A-1.wav,2,1,rooster,True,95258,A
387 | 2-95258-B-1.wav,2,1,rooster,True,95258,B
388 | 2-95567-A-23.wav,2,23,breathing,False,95567,A
389 | 2-96033-A-13.wav,2,13,crickets,False,96033,A
390 | 2-96063-A-37.wav,2,37,clock_alarm,False,96063,A
391 | 2-96460-A-1.wav,2,1,rooster,True,96460,A
392 | 2-96654-A-47.wav,2,47,airplane,False,96654,A
393 | 2-96904-A-27.wav,2,27,brushing_teeth,False,96904,A
394 | 2-98392-A-23.wav,2,23,breathing,False,98392,A
395 | 2-98676-A-24.wav,2,24,coughing,False,98676,A
396 | 2-98866-A-47.wav,2,47,airplane,False,98866,A
397 | 2-99795-A-32.wav,2,32,keyboard_typing,False,99795,A
398 | 2-99796-A-32.wav,2,32,keyboard_typing,False,99796,A
399 | 2-99955-A-7.wav,2,7,insects,False,99955,A
400 | 2-99955-B-7.wav,2,7,insects,False,99955,B
401 | 2-99955-C-7.wav,2,7,insects,False,99955,C
402 | 


--------------------------------------------------------------------------------
/util_esc50/fold3_test.csv:
--------------------------------------------------------------------------------
  1 | filename,fold,target,category,esc10,src_file,take
  2 | 3-100018-A-18.wav,3,18,toilet_flush,False,100018,A
  3 | 3-100024-A-27.wav,3,27,brushing_teeth,False,100024,A
  4 | 3-100024-B-27.wav,3,27,brushing_teeth,False,100024,B
  5 | 3-101381-A-33.wav,3,33,door_wood_creaks,False,101381,A
  6 | 3-101381-B-33.wav,3,33,door_wood_creaks,False,101381,B
  7 | 3-102583-A-49.wav,3,49,hand_saw,False,102583,A
  8 | 3-102583-B-49.wav,3,49,hand_saw,False,102583,B
  9 | 3-102583-C-49.wav,3,49,hand_saw,False,102583,C
 10 | 3-102908-A-4.wav,3,4,frog,False,102908,A
 11 | 3-103050-A-19.wav,3,19,thunderstorm,False,103050,A
 12 | 3-103051-A-19.wav,3,19,thunderstorm,False,103051,A
 13 | 3-103051-B-19.wav,3,19,thunderstorm,False,103051,B
 14 | 3-103051-C-19.wav,3,19,thunderstorm,False,103051,C
 15 | 3-103401-A-33.wav,3,33,door_wood_creaks,False,103401,A
 16 | 3-103401-B-33.wav,3,33,door_wood_creaks,False,103401,B
 17 | 3-103401-C-33.wav,3,33,door_wood_creaks,False,103401,C
 18 | 3-103401-D-33.wav,3,33,door_wood_creaks,False,103401,D
 19 | 3-103597-A-25.wav,3,25,footsteps,False,103597,A
 20 | 3-103598-A-25.wav,3,25,footsteps,False,103598,A
 21 | 3-103599-A-25.wav,3,25,footsteps,False,103599,A
 22 | 3-103599-B-25.wav,3,25,footsteps,False,103599,B
 23 | 3-104632-A-12.wav,3,12,crackling_fire,True,104632,A
 24 | 3-104761-A-7.wav,3,7,insects,False,104761,A
 25 | 3-104761-B-7.wav,3,7,insects,False,104761,B
 26 | 3-104958-A-12.wav,3,12,crackling_fire,True,104958,A
 27 | 3-105235-A-7.wav,3,7,insects,False,105235,A
 28 | 3-105236-A-7.wav,3,7,insects,False,105236,A
 29 | 3-107123-A-26.wav,3,26,laughing,False,107123,A
 30 | 3-107219-A-1.wav,3,1,rooster,True,107219,A
 31 | 3-108160-A-23.wav,3,23,breathing,False,108160,A
 32 | 3-108451-A-17.wav,3,17,pouring_water,False,108451,A
 33 | 3-108451-B-17.wav,3,17,pouring_water,False,108451,B
 34 | 3-108677-A-18.wav,3,18,toilet_flush,False,108677,A
 35 | 3-108791-A-18.wav,3,18,toilet_flush,False,108791,A
 36 | 3-110536-A-26.wav,3,26,laughing,False,110536,A
 37 | 3-110913-A-7.wav,3,7,insects,False,110913,A
 38 | 3-110913-B-7.wav,3,7,insects,False,110913,B
 39 | 3-110913-C-7.wav,3,7,insects,False,110913,C
 40 | 3-110913-D-7.wav,3,7,insects,False,110913,D
 41 | 3-111102-A-46.wav,3,46,church_bells,False,111102,A
 42 | 3-111102-B-46.wav,3,46,church_bells,False,111102,B
 43 | 3-112356-A-18.wav,3,18,toilet_flush,False,112356,A
 44 | 3-112397-A-9.wav,3,9,crow,False,112397,A
 45 | 3-112522-A-27.wav,3,27,brushing_teeth,False,112522,A
 46 | 3-112557-A-23.wav,3,23,breathing,False,112557,A
 47 | 3-112557-B-23.wav,3,23,breathing,False,112557,B
 48 | 3-115382-A-44.wav,3,44,engine,False,115382,A
 49 | 3-115387-A-47.wav,3,47,airplane,False,115387,A
 50 | 3-115387-B-47.wav,3,47,airplane,False,115387,B
 51 | 3-115387-C-47.wav,3,47,airplane,False,115387,C
 52 | 3-116135-A-1.wav,3,1,rooster,True,116135,A
 53 | 3-117293-A-9.wav,3,9,crow,False,117293,A
 54 | 3-117504-A-16.wav,3,16,wind,False,117504,A
 55 | 3-117504-B-16.wav,3,16,wind,False,117504,B
 56 | 3-117793-A-37.wav,3,37,clock_alarm,False,117793,A
 57 | 3-117883-A-37.wav,3,37,clock_alarm,False,117883,A
 58 | 3-118059-A-18.wav,3,18,toilet_flush,False,118059,A
 59 | 3-118069-A-27.wav,3,27,brushing_teeth,False,118069,A
 60 | 3-118069-B-27.wav,3,27,brushing_teeth,False,118069,B
 61 | 3-118194-A-33.wav,3,33,door_wood_creaks,False,118194,A
 62 | 3-118487-A-26.wav,3,26,laughing,False,118487,A
 63 | 3-118656-A-41.wav,3,41,chainsaw,True,118656,A
 64 | 3-118657-A-41.wav,3,41,chainsaw,True,118657,A
 65 | 3-118657-B-41.wav,3,41,chainsaw,True,118657,B
 66 | 3-118658-A-41.wav,3,41,chainsaw,True,118658,A
 67 | 3-118658-B-41.wav,3,41,chainsaw,True,118658,B
 68 | 3-118972-A-41.wav,3,41,chainsaw,True,118972,A
 69 | 3-118972-B-41.wav,3,41,chainsaw,True,118972,B
 70 | 3-119120-A-48.wav,3,48,fireworks,False,119120,A
 71 | 3-119120-B-48.wav,3,48,fireworks,False,119120,B
 72 | 3-119120-C-48.wav,3,48,fireworks,False,119120,C
 73 | 3-119120-D-48.wav,3,48,fireworks,False,119120,D
 74 | 3-119120-E-48.wav,3,48,fireworks,False,119120,E
 75 | 3-119455-A-44.wav,3,44,engine,False,119455,A
 76 | 3-119459-A-26.wav,3,26,laughing,False,119459,A
 77 | 3-120526-A-37.wav,3,37,clock_alarm,False,120526,A
 78 | 3-120526-B-37.wav,3,37,clock_alarm,False,120526,B
 79 | 3-120644-A-12.wav,3,12,crackling_fire,True,120644,A
 80 | 3-121348-A-9.wav,3,9,crow,False,121348,A
 81 | 3-122110-A-46.wav,3,46,church_bells,False,122110,A
 82 | 3-123086-A-28.wav,3,28,snoring,False,123086,A
 83 | 3-123224-A-19.wav,3,19,thunderstorm,False,123224,A
 84 | 3-124376-A-3.wav,3,3,cow,False,124376,A
 85 | 3-124376-B-3.wav,3,3,cow,False,124376,B
 86 | 3-124600-A-19.wav,3,19,thunderstorm,False,124600,A
 87 | 3-124795-A-28.wav,3,28,snoring,False,124795,A
 88 | 3-124925-A-9.wav,3,9,crow,False,124925,A
 89 | 3-124958-A-28.wav,3,28,snoring,False,124958,A
 90 | 3-125418-A-24.wav,3,24,coughing,False,125418,A
 91 | 3-125548-A-32.wav,3,32,keyboard_typing,False,125548,A
 92 | 3-126113-A-26.wav,3,26,laughing,False,126113,A
 93 | 3-126358-A-3.wav,3,3,cow,False,126358,A
 94 | 3-126358-B-3.wav,3,3,cow,False,126358,B
 95 | 3-126391-A-27.wav,3,27,brushing_teeth,False,126391,A
 96 | 3-126391-B-27.wav,3,27,brushing_teeth,False,126391,B
 97 | 3-127874-A-17.wav,3,17,pouring_water,False,127874,A
 98 | 3-127890-A-9.wav,3,9,crow,False,127890,A
 99 | 3-127890-B-9.wav,3,9,crow,False,127890,B
100 | 3-127890-C-9.wav,3,9,crow,False,127890,C
101 | 3-128160-A-44.wav,3,44,engine,False,128160,A
102 | 3-128512-A-47.wav,3,47,airplane,False,128512,A
103 | 3-128512-B-47.wav,3,47,airplane,False,128512,B
104 | 3-129264-A-9.wav,3,9,crow,False,129264,A
105 | 3-129338-A-13.wav,3,13,crickets,False,129338,A
106 | 3-129678-A-13.wav,3,13,crickets,False,129678,A
107 | 3-130330-A-22.wav,3,22,clapping,False,130330,A
108 | 3-130998-A-28.wav,3,28,snoring,False,130998,A
109 | 3-130998-B-28.wav,3,28,snoring,False,130998,B
110 | 3-131943-A-37.wav,3,37,clock_alarm,False,131943,A
111 | 3-132340-A-37.wav,3,37,clock_alarm,False,132340,A
112 | 3-132601-A-24.wav,3,24,coughing,False,132601,A
113 | 3-132747-A-26.wav,3,26,laughing,False,132747,A
114 | 3-132830-A-32.wav,3,32,keyboard_typing,False,132830,A
115 | 3-132852-A-10.wav,3,10,rain,True,132852,A
116 | 3-133977-A-29.wav,3,29,drinking_sipping,False,133977,A
117 | 3-134049-A-1.wav,3,1,rooster,True,134049,A
118 | 3-134699-A-16.wav,3,16,wind,False,134699,A
119 | 3-134699-B-16.wav,3,16,wind,False,134699,B
120 | 3-134699-C-16.wav,3,16,wind,False,134699,C
121 | 3-134802-A-13.wav,3,13,crickets,False,134802,A
122 | 3-135469-A-35.wav,3,35,washing_machine,False,135469,A
123 | 3-135650-A-45.wav,3,45,train,False,135650,A
124 | 3-135650-B-45.wav,3,45,train,False,135650,B
125 | 3-136288-A-0.wav,3,0,dog,True,136288,A
126 | 3-136451-A-45.wav,3,45,train,False,136451,A
127 | 3-136608-A-16.wav,3,16,wind,False,136608,A
128 | 3-137152-A-1.wav,3,1,rooster,True,137152,A
129 | 3-138114-A-22.wav,3,22,clapping,False,138114,A
130 | 3-138212-A-45.wav,3,45,train,False,138212,A
131 | 3-139109-A-46.wav,3,46,church_bells,False,139109,A
132 | 3-139331-A-27.wav,3,27,brushing_teeth,False,139331,A
133 | 3-139958-A-37.wav,3,37,clock_alarm,False,139958,A
134 | 3-140199-A-8.wav,3,8,sheep,False,140199,A
135 | 3-140199-B-8.wav,3,8,sheep,False,140199,B
136 | 3-140199-C-8.wav,3,8,sheep,False,140199,C
137 | 3-140199-D-8.wav,3,8,sheep,False,140199,D
138 | 3-140323-A-29.wav,3,29,drinking_sipping,False,140323,A
139 | 3-140774-A-10.wav,3,10,rain,True,140774,A
140 | 3-141240-A-44.wav,3,44,engine,False,141240,A
141 | 3-141240-B-44.wav,3,44,engine,False,141240,B
142 | 3-141559-A-45.wav,3,45,train,False,141559,A
143 | 3-141684-A-21.wav,3,21,sneezing,True,141684,A
144 | 3-142005-A-10.wav,3,10,rain,True,142005,A
145 | 3-142006-A-10.wav,3,10,rain,True,142006,A
146 | 3-142349-A-17.wav,3,17,pouring_water,False,142349,A
147 | 3-142593-A-38.wav,3,38,clock_tick,True,142593,A
148 | 3-142601-A-21.wav,3,21,sneezing,True,142601,A
149 | 3-142604-A-24.wav,3,24,coughing,False,142604,A
150 | 3-142605-A-21.wav,3,21,sneezing,True,142605,A
151 | 3-143119-A-21.wav,3,21,sneezing,True,143119,A
152 | 3-143560-A-47.wav,3,47,airplane,False,143560,A
153 | 3-143562-A-47.wav,3,47,airplane,False,143562,A
154 | 3-143929-A-10.wav,3,10,rain,True,143929,A
155 | 3-143933-A-38.wav,3,38,clock_tick,True,143933,A
156 | 3-144028-A-0.wav,3,0,dog,True,144028,A
157 | 3-144106-A-32.wav,3,32,keyboard_typing,False,144106,A
158 | 3-144120-A-32.wav,3,32,keyboard_typing,False,144120,A
159 | 3-144128-A-23.wav,3,23,breathing,False,144128,A
160 | 3-144128-B-23.wav,3,23,breathing,False,144128,B
161 | 3-144253-A-29.wav,3,29,drinking_sipping,False,144253,A
162 | 3-144259-A-29.wav,3,29,drinking_sipping,False,144259,A
163 | 3-144510-A-30.wav,3,30,door_wood_knock,False,144510,A
164 | 3-144692-A-21.wav,3,21,sneezing,True,144692,A
165 | 3-144827-A-11.wav,3,11,sea_waves,True,144827,A
166 | 3-144827-B-11.wav,3,11,sea_waves,True,144827,B
167 | 3-144891-A-19.wav,3,19,thunderstorm,False,144891,A
168 | 3-144891-B-19.wav,3,19,thunderstorm,False,144891,B
169 | 3-145382-A-1.wav,3,1,rooster,True,145382,A
170 | 3-145387-A-29.wav,3,29,drinking_sipping,False,145387,A
171 | 3-145487-A-24.wav,3,24,coughing,False,145487,A
172 | 3-145577-A-43.wav,3,43,car_horn,False,145577,A
173 | 3-145719-A-17.wav,3,17,pouring_water,False,145719,A
174 | 3-145774-A-12.wav,3,12,crackling_fire,True,145774,A
175 | 3-146033-A-13.wav,3,13,crickets,False,146033,A
176 | 3-146186-A-44.wav,3,44,engine,False,146186,A
177 | 3-146697-A-43.wav,3,43,car_horn,False,146697,A
178 | 3-146873-A-24.wav,3,24,coughing,False,146873,A
179 | 3-146964-A-5.wav,3,5,cat,False,146964,A
180 | 3-146965-A-5.wav,3,5,cat,False,146965,A
181 | 3-146972-A-5.wav,3,5,cat,False,146972,A
182 | 3-147342-A-34.wav,3,34,can_opening,False,147342,A
183 | 3-147343-A-34.wav,3,34,can_opening,False,147343,A
184 | 3-147965-A-12.wav,3,12,crackling_fire,True,147965,A
185 | 3-148297-A-37.wav,3,37,clock_alarm,False,148297,A
186 | 3-148330-A-21.wav,3,21,sneezing,True,148330,A
187 | 3-148932-A-34.wav,3,34,can_opening,False,148932,A
188 | 3-149042-A-24.wav,3,24,coughing,False,149042,A
189 | 3-149189-A-1.wav,3,1,rooster,True,149189,A
190 | 3-149448-A-17.wav,3,17,pouring_water,False,149448,A
191 | 3-149465-A-22.wav,3,22,clapping,False,149465,A
192 | 3-150231-A-21.wav,3,21,sneezing,True,150231,A
193 | 3-150363-A-38.wav,3,38,clock_tick,True,150363,A
194 | 3-150979-A-40.wav,3,40,helicopter,True,150979,A
195 | 3-150979-B-40.wav,3,40,helicopter,True,150979,B
196 | 3-150979-C-40.wav,3,40,helicopter,True,150979,C
197 | 3-151080-A-20.wav,3,20,crying_baby,True,151080,A
198 | 3-151081-A-20.wav,3,20,crying_baby,True,151081,A
199 | 3-151081-B-20.wav,3,20,crying_baby,True,151081,B
200 | 3-151089-A-30.wav,3,30,door_wood_knock,False,151089,A
201 | 3-151206-A-23.wav,3,23,breathing,False,151206,A
202 | 3-151212-A-24.wav,3,24,coughing,False,151212,A
203 | 3-151213-A-24.wav,3,24,coughing,False,151213,A
204 | 3-151255-A-28.wav,3,28,snoring,False,151255,A
205 | 3-151269-A-35.wav,3,35,washing_machine,False,151269,A
206 | 3-151273-A-35.wav,3,35,washing_machine,False,151273,A
207 | 3-151557-A-28.wav,3,28,snoring,False,151557,A
208 | 3-151557-B-28.wav,3,28,snoring,False,151557,B
209 | 3-152007-A-20.wav,3,20,crying_baby,True,152007,A
210 | 3-152007-B-20.wav,3,20,crying_baby,True,152007,B
211 | 3-152007-C-20.wav,3,20,crying_baby,True,152007,C
212 | 3-152007-D-20.wav,3,20,crying_baby,True,152007,D
213 | 3-152007-E-20.wav,3,20,crying_baby,True,152007,E
214 | 3-152020-A-36.wav,3,36,vacuum_cleaner,False,152020,A
215 | 3-152020-B-36.wav,3,36,vacuum_cleaner,False,152020,B
216 | 3-152020-C-36.wav,3,36,vacuum_cleaner,False,152020,C
217 | 3-152039-A-3.wav,3,3,cow,False,152039,A
218 | 3-152039-B-3.wav,3,3,cow,False,152039,B
219 | 3-152594-A-30.wav,3,30,door_wood_knock,False,152594,A
220 | 3-152912-A-26.wav,3,26,laughing,False,152912,A
221 | 3-152997-A-26.wav,3,26,laughing,False,152997,A
222 | 3-153057-A-43.wav,3,43,car_horn,False,153057,A
223 | 3-153444-A-32.wav,3,32,keyboard_typing,False,153444,A
224 | 3-154378-A-30.wav,3,30,door_wood_knock,False,154378,A
225 | 3-154439-A-17.wav,3,17,pouring_water,False,154439,A
226 | 3-154758-A-44.wav,3,44,engine,False,154758,A
227 | 3-154781-A-32.wav,3,32,keyboard_typing,False,154781,A
228 | 3-154926-A-40.wav,3,40,helicopter,True,154926,A
229 | 3-154926-B-40.wav,3,40,helicopter,True,154926,B
230 | 3-154957-A-1.wav,3,1,rooster,True,154957,A
231 | 3-155130-A-43.wav,3,43,car_horn,False,155130,A
232 | 3-155234-A-43.wav,3,43,car_horn,False,155234,A
233 | 3-155312-A-0.wav,3,0,dog,True,155312,A
234 | 3-155556-A-31.wav,3,31,mouse_click,False,155556,A
235 | 3-155568-A-32.wav,3,32,keyboard_typing,False,155568,A
236 | 3-155570-A-32.wav,3,32,keyboard_typing,False,155570,A
237 | 3-155577-A-14.wav,3,14,chirping_birds,False,155577,A
238 | 3-155578-A-14.wav,3,14,chirping_birds,False,155578,A
239 | 3-155579-A-14.wav,3,14,chirping_birds,False,155579,A
240 | 3-155583-A-14.wav,3,14,chirping_birds,False,155583,A
241 | 3-155584-A-14.wav,3,14,chirping_birds,False,155584,A
242 | 3-155642-A-11.wav,3,11,sea_waves,True,155642,A
243 | 3-155642-B-11.wav,3,11,sea_waves,True,155642,B
244 | 3-155659-A-34.wav,3,34,can_opening,False,155659,A
245 | 3-155766-A-13.wav,3,13,crickets,False,155766,A
246 | 3-156391-A-35.wav,3,35,washing_machine,False,156391,A
247 | 3-156393-A-35.wav,3,35,washing_machine,False,156393,A
248 | 3-156558-A-21.wav,3,21,sneezing,True,156558,A
249 | 3-156581-A-14.wav,3,14,chirping_birds,False,156581,A
250 | 3-156581-B-14.wav,3,14,chirping_birds,False,156581,B
251 | 3-156907-A-15.wav,3,15,water_drops,False,156907,A
252 | 3-157149-A-10.wav,3,10,rain,True,157149,A
253 | 3-157187-A-12.wav,3,12,crackling_fire,True,157187,A
254 | 3-157487-A-10.wav,3,10,rain,True,157487,A
255 | 3-157492-A-45.wav,3,45,train,False,157492,A
256 | 3-157615-A-10.wav,3,10,rain,True,157615,A
257 | 3-157695-A-0.wav,3,0,dog,True,157695,A
258 | 3-158056-A-31.wav,3,31,mouse_click,False,158056,A
259 | 3-158056-B-31.wav,3,31,mouse_click,False,158056,B
260 | 3-158476-A-12.wav,3,12,crackling_fire,True,158476,A
261 | 3-159346-A-36.wav,3,36,vacuum_cleaner,False,159346,A
262 | 3-159346-B-36.wav,3,36,vacuum_cleaner,False,159346,B
263 | 3-159347-A-36.wav,3,36,vacuum_cleaner,False,159347,A
264 | 3-159347-B-36.wav,3,36,vacuum_cleaner,False,159347,B
265 | 3-159348-A-36.wav,3,36,vacuum_cleaner,False,159348,A
266 | 3-159445-A-45.wav,3,45,train,False,159445,A
267 | 3-159445-B-45.wav,3,45,train,False,159445,B
268 | 3-160119-A-15.wav,3,15,water_drops,False,160119,A
269 | 3-160993-A-3.wav,3,3,cow,False,160993,A
270 | 3-161010-A-43.wav,3,43,car_horn,False,161010,A
271 | 3-161500-A-17.wav,3,17,pouring_water,False,161500,A
272 | 3-162786-A-13.wav,3,13,crickets,False,162786,A
273 | 3-163288-A-1.wav,3,1,rooster,True,163288,A
274 | 3-163459-A-0.wav,3,0,dog,True,163459,A
275 | 3-163607-A-13.wav,3,13,crickets,False,163607,A
276 | 3-163607-B-13.wav,3,13,crickets,False,163607,B
277 | 3-163727-A-3.wav,3,3,cow,False,163727,A
278 | 3-164120-A-11.wav,3,11,sea_waves,True,164120,A
279 | 3-164216-A-6.wav,3,6,hen,False,164216,A
280 | 3-164216-B-6.wav,3,6,hen,False,164216,B
281 | 3-164216-C-6.wav,3,6,hen,False,164216,C
282 | 3-164592-A-15.wav,3,15,water_drops,False,164592,A
283 | 3-164593-A-15.wav,3,15,water_drops,False,164593,A
284 | 3-164594-A-15.wav,3,15,water_drops,False,164594,A
285 | 3-164595-A-15.wav,3,15,water_drops,False,164595,A
286 | 3-164630-A-11.wav,3,11,sea_waves,True,164630,A
287 | 3-164688-A-38.wav,3,38,clock_tick,True,164688,A
288 | 3-165856-A-41.wav,3,41,chainsaw,True,165856,A
289 | 3-166125-A-23.wav,3,23,breathing,False,166125,A
290 | 3-166125-B-23.wav,3,23,breathing,False,166125,B
291 | 3-166324-A-15.wav,3,15,water_drops,False,166324,A
292 | 3-166326-A-15.wav,3,15,water_drops,False,166326,A
293 | 3-166422-A-11.wav,3,11,sea_waves,True,166422,A
294 | 3-166546-A-34.wav,3,34,can_opening,False,166546,A
295 | 3-166546-B-34.wav,3,34,can_opening,False,166546,B
296 | 3-167096-A-31.wav,3,31,mouse_click,False,167096,A
297 | 3-169907-A-29.wav,3,29,drinking_sipping,False,169907,A
298 | 3-170002-A-34.wav,3,34,can_opening,False,170002,A
299 | 3-170015-A-0.wav,3,0,dog,True,170015,A
300 | 3-170312-A-31.wav,3,31,mouse_click,False,170312,A
301 | 3-170377-A-38.wav,3,38,clock_tick,True,170377,A
302 | 3-170383-A-38.wav,3,38,clock_tick,True,170383,A
303 | 3-170574-A-30.wav,3,30,door_wood_knock,False,170574,A
304 | 3-170851-A-31.wav,3,31,mouse_click,False,170851,A
305 | 3-171012-A-38.wav,3,38,clock_tick,True,171012,A
306 | 3-171041-A-38.wav,3,38,clock_tick,True,171041,A
307 | 3-171281-A-6.wav,3,6,hen,False,171281,A
308 | 3-171937-A-34.wav,3,34,can_opening,False,171937,A
309 | 3-172179-A-31.wav,3,31,mouse_click,False,172179,A
310 | 3-172881-A-48.wav,3,48,fireworks,False,172881,A
311 | 3-172922-A-48.wav,3,48,fireworks,False,172922,A
312 | 3-174840-A-43.wav,3,43,car_horn,False,174840,A
313 | 3-174866-A-29.wav,3,29,drinking_sipping,False,174866,A
314 | 3-177082-A-22.wav,3,22,clapping,False,177082,A
315 | 3-177083-A-22.wav,3,22,clapping,False,177083,A
316 | 3-178096-A-6.wav,3,6,hen,False,178096,A
317 | 3-180147-A-30.wav,3,30,door_wood_knock,False,180147,A
318 | 3-180256-A-0.wav,3,0,dog,True,180256,A
319 | 3-180977-A-0.wav,3,0,dog,True,180977,A
320 | 3-181132-A-14.wav,3,14,chirping_birds,False,181132,A
321 | 3-181278-A-22.wav,3,22,clapping,False,181278,A
322 | 3-182023-A-30.wav,3,30,door_wood_knock,False,182023,A
323 | 3-182025-A-30.wav,3,30,door_wood_knock,False,182025,A
324 | 3-182710-A-35.wav,3,35,washing_machine,False,182710,A
325 | 3-182710-B-35.wav,3,35,washing_machine,False,182710,B
326 | 3-185313-A-31.wav,3,31,mouse_click,False,185313,A
327 | 3-185456-A-29.wav,3,29,drinking_sipping,False,185456,A
328 | 3-187549-A-6.wav,3,6,hen,False,187549,A
329 | 3-187549-B-6.wav,3,6,hen,False,187549,B
330 | 3-187710-A-11.wav,3,11,sea_waves,True,187710,A
331 | 3-188390-A-6.wav,3,6,hen,False,188390,A
332 | 3-188726-A-35.wav,3,35,washing_machine,False,188726,A
333 | 3-193767-A-47.wav,3,47,airplane,False,193767,A
334 | 3-197408-A-8.wav,3,8,sheep,False,197408,A
335 | 3-197408-B-8.wav,3,8,sheep,False,197408,B
336 | 3-197408-C-8.wav,3,8,sheep,False,197408,C
337 | 3-197435-A-22.wav,3,22,clapping,False,197435,A
338 | 3-197435-B-22.wav,3,22,clapping,False,197435,B
339 | 3-203371-A-39.wav,3,39,glass_breaking,False,203371,A
340 | 3-203373-A-39.wav,3,39,glass_breaking,False,203373,A
341 | 3-203374-A-39.wav,3,39,glass_breaking,False,203374,A
342 | 3-203375-A-39.wav,3,39,glass_breaking,False,203375,A
343 | 3-203377-A-39.wav,3,39,glass_breaking,False,203377,A
344 | 3-20861-A-8.wav,3,8,sheep,False,20861,A
345 | 3-208820-A-49.wav,3,49,hand_saw,False,208820,A
346 | 3-216280-A-39.wav,3,39,glass_breaking,False,216280,A
347 | 3-216281-A-39.wav,3,39,glass_breaking,False,216281,A
348 | 3-216284-A-39.wav,3,39,glass_breaking,False,216284,A
349 | 3-233151-A-2.wav,3,2,pig,False,233151,A
350 | 3-243726-A-43.wav,3,43,car_horn,False,243726,A
351 | 3-246513-A-16.wav,3,16,wind,False,246513,A
352 | 3-246513-B-16.wav,3,16,wind,False,246513,B
353 | 3-249913-A-25.wav,3,25,footsteps,False,249913,A
354 | 3-251617-A-48.wav,3,48,fireworks,False,251617,A
355 | 3-253081-A-2.wav,3,2,pig,False,253081,A
356 | 3-253084-A-2.wav,3,2,pig,False,253084,A
357 | 3-253084-B-2.wav,3,2,pig,False,253084,B
358 | 3-253084-C-2.wav,3,2,pig,False,253084,C
359 | 3-253084-D-2.wav,3,2,pig,False,253084,D
360 | 3-253084-E-2.wav,3,2,pig,False,253084,E
361 | 3-257858-A-2.wav,3,2,pig,False,257858,A
362 | 3-259622-A-44.wav,3,44,engine,False,259622,A
363 | 3-51376-A-42.wav,3,42,siren,False,51376,A
364 | 3-51731-A-42.wav,3,42,siren,False,51731,A
365 | 3-51731-B-42.wav,3,42,siren,False,51731,B
366 | 3-51909-A-42.wav,3,42,siren,False,51909,A
367 | 3-51909-B-42.wav,3,42,siren,False,51909,B
368 | 3-58772-A-42.wav,3,42,siren,False,58772,A
369 | 3-62878-A-42.wav,3,42,siren,False,62878,A
370 | 3-62878-B-42.wav,3,42,siren,False,62878,B
371 | 3-65748-A-12.wav,3,12,crackling_fire,True,65748,A
372 | 3-68630-A-40.wav,3,40,helicopter,True,68630,A
373 | 3-68630-B-40.wav,3,40,helicopter,True,68630,B
374 | 3-68630-C-40.wav,3,40,helicopter,True,68630,C
375 | 3-70962-A-4.wav,3,4,frog,False,70962,A
376 | 3-70962-B-4.wav,3,4,frog,False,70962,B
377 | 3-70962-C-4.wav,3,4,frog,False,70962,C
378 | 3-71964-A-4.wav,3,4,frog,False,71964,A
379 | 3-71964-B-4.wav,3,4,frog,False,71964,B
380 | 3-71964-C-4.wav,3,4,frog,False,71964,C
381 | 3-83527-A-4.wav,3,4,frog,False,83527,A
382 | 3-87936-A-46.wav,3,46,church_bells,False,87936,A
383 | 3-87936-B-46.wav,3,46,church_bells,False,87936,B
384 | 3-92637-A-18.wav,3,18,toilet_flush,False,92637,A
385 | 3-93010-A-18.wav,3,18,toilet_flush,False,93010,A
386 | 3-94342-A-25.wav,3,25,footsteps,False,94342,A
387 | 3-94343-A-25.wav,3,25,footsteps,False,94343,A
388 | 3-94344-A-25.wav,3,25,footsteps,False,94344,A
389 | 3-94355-A-33.wav,3,33,door_wood_creaks,False,94355,A
390 | 3-95694-A-5.wav,3,5,cat,False,95694,A
391 | 3-95695-A-5.wav,3,5,cat,False,95695,A
392 | 3-95695-B-5.wav,3,5,cat,False,95695,B
393 | 3-95697-A-5.wav,3,5,cat,False,95697,A
394 | 3-95698-A-5.wav,3,5,cat,False,95698,A
395 | 3-96606-A-49.wav,3,49,hand_saw,False,96606,A
396 | 3-96606-B-49.wav,3,49,hand_saw,False,96606,B
397 | 3-97909-A-49.wav,3,49,hand_saw,False,97909,A
398 | 3-97909-B-49.wav,3,49,hand_saw,False,97909,B
399 | 3-98193-A-46.wav,3,46,church_bells,False,98193,A
400 | 3-98771-A-18.wav,3,18,toilet_flush,False,98771,A
401 | 3-98869-A-46.wav,3,46,church_bells,False,98869,A
402 | 


--------------------------------------------------------------------------------
/util_esc50/fold4_test.csv:
--------------------------------------------------------------------------------
  1 | filename,fold,target,category,esc10,src_file,take
  2 | 4-102844-A-49.wav,4,49,hand_saw,False,102844,A
  3 | 4-102844-B-49.wav,4,49,hand_saw,False,102844,B
  4 | 4-102844-C-49.wav,4,49,hand_saw,False,102844,C
  5 | 4-102871-A-42.wav,4,42,siren,False,102871,A
  6 | 4-107117-A-33.wav,4,33,door_wood_creaks,False,107117,A
  7 | 4-107120-A-33.wav,4,33,door_wood_creaks,False,107120,A
  8 | 4-107122-A-33.wav,4,33,door_wood_creaks,False,107122,A
  9 | 4-108352-A-33.wav,4,33,door_wood_creaks,False,108352,A
 10 | 4-111671-A-42.wav,4,42,siren,False,111671,A
 11 | 4-111671-B-42.wav,4,42,siren,False,111671,B
 12 | 4-117627-A-25.wav,4,25,footsteps,False,117627,A
 13 | 4-117630-A-25.wav,4,25,footsteps,False,117630,A
 14 | 4-119647-A-48.wav,4,48,fireworks,False,119647,A
 15 | 4-119647-B-48.wav,4,48,fireworks,False,119647,B
 16 | 4-119647-C-48.wav,4,48,fireworks,False,119647,C
 17 | 4-119647-D-48.wav,4,48,fireworks,False,119647,D
 18 | 4-119648-A-48.wav,4,48,fireworks,False,119648,A
 19 | 4-119648-B-48.wav,4,48,fireworks,False,119648,B
 20 | 4-119648-C-48.wav,4,48,fireworks,False,119648,C
 21 | 4-119648-D-48.wav,4,48,fireworks,False,119648,D
 22 | 4-119720-A-18.wav,4,18,toilet_flush,False,119720,A
 23 | 4-120160-A-5.wav,4,5,cat,False,120160,A
 24 | 4-121532-A-42.wav,4,42,siren,False,121532,A
 25 | 4-123680-A-18.wav,4,18,toilet_flush,False,123680,A
 26 | 4-125070-A-19.wav,4,19,thunderstorm,False,125070,A
 27 | 4-125071-A-19.wav,4,19,thunderstorm,False,125071,A
 28 | 4-125072-A-19.wav,4,19,thunderstorm,False,125072,A
 29 | 4-125072-B-19.wav,4,19,thunderstorm,False,125072,B
 30 | 4-125825-A-46.wav,4,46,church_bells,False,125825,A
 31 | 4-125825-B-46.wav,4,46,church_bells,False,125825,B
 32 | 4-125929-A-40.wav,4,40,helicopter,True,125929,A
 33 | 4-126046-A-18.wav,4,18,toilet_flush,False,126046,A
 34 | 4-126532-A-18.wav,4,18,toilet_flush,False,126532,A
 35 | 4-128659-A-33.wav,4,33,door_wood_creaks,False,128659,A
 36 | 4-130584-A-4.wav,4,4,frog,False,130584,A
 37 | 4-130891-A-7.wav,4,7,insects,False,130891,A
 38 | 4-132383-A-2.wav,4,2,pig,False,132383,A
 39 | 4-132383-B-2.wav,4,2,pig,False,132383,B
 40 | 4-132803-A-26.wav,4,26,laughing,False,132803,A
 41 | 4-132810-A-26.wav,4,26,laughing,False,132810,A
 42 | 4-132816-A-26.wav,4,26,laughing,False,132816,A
 43 | 4-132839-A-33.wav,4,33,door_wood_creaks,False,132839,A
 44 | 4-133047-A-5.wav,4,5,cat,False,133047,A
 45 | 4-133047-B-5.wav,4,5,cat,False,133047,B
 46 | 4-133047-C-5.wav,4,5,cat,False,133047,C
 47 | 4-133674-A-26.wav,4,26,laughing,False,133674,A
 48 | 4-133895-A-7.wav,4,7,insects,False,133895,A
 49 | 4-135439-A-18.wav,4,18,toilet_flush,False,135439,A
 50 | 4-136381-A-9.wav,4,9,crow,False,136381,A
 51 | 4-138344-A-9.wav,4,9,crow,False,138344,A
 52 | 4-140034-A-46.wav,4,46,church_bells,False,140034,A
 53 | 4-141365-A-18.wav,4,18,toilet_flush,False,141365,A
 54 | 4-143118-A-7.wav,4,7,insects,False,143118,A
 55 | 4-143118-B-7.wav,4,7,insects,False,143118,B
 56 | 4-144083-A-16.wav,4,16,wind,False,144083,A
 57 | 4-144083-B-16.wav,4,16,wind,False,144083,B
 58 | 4-144083-C-16.wav,4,16,wind,False,144083,C
 59 | 4-144085-A-16.wav,4,16,wind,False,144085,A
 60 | 4-144468-A-27.wav,4,27,brushing_teeth,False,144468,A
 61 | 4-144468-B-27.wav,4,27,brushing_teeth,False,144468,B
 62 | 4-145006-A-18.wav,4,18,toilet_flush,False,145006,A
 63 | 4-145081-A-9.wav,4,9,crow,False,145081,A
 64 | 4-146200-A-36.wav,4,36,vacuum_cleaner,False,146200,A
 65 | 4-147240-A-2.wav,4,2,pig,False,147240,A
 66 | 4-147240-B-2.wav,4,2,pig,False,147240,B
 67 | 4-147657-A-46.wav,4,46,church_bells,False,147657,A
 68 | 4-147658-A-46.wav,4,46,church_bells,False,147658,A
 69 | 4-149294-A-41.wav,4,41,chainsaw,True,149294,A
 70 | 4-149294-B-41.wav,4,41,chainsaw,True,149294,B
 71 | 4-149940-A-5.wav,4,5,cat,False,149940,A
 72 | 4-149940-B-5.wav,4,5,cat,False,149940,B
 73 | 4-150364-A-46.wav,4,46,church_bells,False,150364,A
 74 | 4-150364-B-46.wav,4,46,church_bells,False,150364,B
 75 | 4-151242-A-37.wav,4,37,clock_alarm,False,151242,A
 76 | 4-152958-A-18.wav,4,18,toilet_flush,False,152958,A
 77 | 4-152995-A-24.wav,4,24,coughing,False,152995,A
 78 | 4-154405-A-27.wav,4,27,brushing_teeth,False,154405,A
 79 | 4-154405-B-27.wav,4,27,brushing_teeth,False,154405,B
 80 | 4-154443-A-24.wav,4,24,coughing,False,154443,A
 81 | 4-154793-A-4.wav,4,4,frog,False,154793,A
 82 | 4-155650-A-24.wav,4,24,coughing,False,155650,A
 83 | 4-155650-B-24.wav,4,24,coughing,False,155650,B
 84 | 4-155670-A-26.wav,4,26,laughing,False,155670,A
 85 | 4-156227-A-9.wav,4,9,crow,False,156227,A
 86 | 4-156827-A-46.wav,4,46,church_bells,False,156827,A
 87 | 4-156843-A-21.wav,4,21,sneezing,True,156843,A
 88 | 4-156844-A-21.wav,4,21,sneezing,True,156844,A
 89 | 4-156993-A-19.wav,4,19,thunderstorm,False,156993,A
 90 | 4-157296-A-24.wav,4,24,coughing,False,157296,A
 91 | 4-157297-A-21.wav,4,21,sneezing,True,157297,A
 92 | 4-157611-A-41.wav,4,41,chainsaw,True,157611,A
 93 | 4-157611-B-41.wav,4,41,chainsaw,True,157611,B
 94 | 4-158653-A-32.wav,4,32,keyboard_typing,False,158653,A
 95 | 4-159426-A-9.wav,4,9,crow,False,159426,A
 96 | 4-159609-A-14.wav,4,14,chirping_birds,False,159609,A
 97 | 4-159609-B-14.wav,4,14,chirping_birds,False,159609,B
 98 | 4-160036-A-33.wav,4,33,door_wood_creaks,False,160036,A
 99 | 4-160036-B-33.wav,4,33,door_wood_creaks,False,160036,B
100 | 4-160999-A-10.wav,4,10,rain,True,160999,A
101 | 4-161099-A-47.wav,4,47,airplane,False,161099,A
102 | 4-161099-B-47.wav,4,47,airplane,False,161099,B
103 | 4-161100-A-47.wav,4,47,airplane,False,161100,A
104 | 4-161103-A-47.wav,4,47,airplane,False,161103,A
105 | 4-161105-A-47.wav,4,47,airplane,False,161105,A
106 | 4-161105-B-47.wav,4,47,airplane,False,161105,B
107 | 4-161127-A-10.wav,4,10,rain,True,161127,A
108 | 4-161303-A-5.wav,4,5,cat,False,161303,A
109 | 4-161303-B-5.wav,4,5,cat,False,161303,B
110 | 4-161519-A-19.wav,4,19,thunderstorm,False,161519,A
111 | 4-161579-A-40.wav,4,40,helicopter,True,161579,A
112 | 4-161579-B-40.wav,4,40,helicopter,True,161579,B
113 | 4-163264-A-10.wav,4,10,rain,True,163264,A
114 | 4-163606-A-16.wav,4,16,wind,False,163606,A
115 | 4-163608-B-16.wav,4,16,wind,False,163608,B
116 | 4-163609-A-16.wav,4,16,wind,False,163609,A
117 | 4-163609-B-16.wav,4,16,wind,False,163609,B
118 | 4-163697-A-13.wav,4,13,crickets,False,163697,A
119 | 4-164021-A-1.wav,4,1,rooster,True,164021,A
120 | 4-164064-A-1.wav,4,1,rooster,True,164064,A
121 | 4-164064-B-1.wav,4,1,rooster,True,164064,B
122 | 4-164064-C-1.wav,4,1,rooster,True,164064,C
123 | 4-164206-A-10.wav,4,10,rain,True,164206,A
124 | 4-164243-A-26.wav,4,26,laughing,False,164243,A
125 | 4-164661-A-12.wav,4,12,crackling_fire,True,164661,A
126 | 4-164661-B-12.wav,4,12,crackling_fire,True,164661,B
127 | 4-164859-A-1.wav,4,1,rooster,True,164859,A
128 | 4-165606-A-45.wav,4,45,train,False,165606,A
129 | 4-165791-A-45.wav,4,45,train,False,165791,A
130 | 4-165791-B-45.wav,4,45,train,False,165791,B
131 | 4-165813-A-19.wav,4,19,thunderstorm,False,165813,A
132 | 4-165813-B-19.wav,4,19,thunderstorm,False,165813,B
133 | 4-165823-A-41.wav,4,41,chainsaw,True,165823,A
134 | 4-165823-B-41.wav,4,41,chainsaw,True,165823,B
135 | 4-165845-A-45.wav,4,45,train,False,165845,A
136 | 4-165845-B-45.wav,4,45,train,False,165845,B
137 | 4-165933-A-45.wav,4,45,train,False,165933,A
138 | 4-166661-A-10.wav,4,10,rain,True,166661,A
139 | 4-167063-A-11.wav,4,11,sea_waves,True,167063,A
140 | 4-167063-B-11.wav,4,11,sea_waves,True,167063,B
141 | 4-167063-C-11.wav,4,11,sea_waves,True,167063,C
142 | 4-167077-A-20.wav,4,20,crying_baby,True,167077,A
143 | 4-167077-B-20.wav,4,20,crying_baby,True,167077,B
144 | 4-167077-C-20.wav,4,20,crying_baby,True,167077,C
145 | 4-167155-A-32.wav,4,32,keyboard_typing,False,167155,A
146 | 4-167571-A-26.wav,4,26,laughing,False,167571,A
147 | 4-167642-A-21.wav,4,21,sneezing,True,167642,A
148 | 4-168155-A-15.wav,4,15,water_drops,False,168155,A
149 | 4-168446-A-45.wav,4,45,train,False,168446,A
150 | 4-168868-A-17.wav,4,17,pouring_water,False,168868,A
151 | 4-169127-A-41.wav,4,41,chainsaw,True,169127,A
152 | 4-169127-B-41.wav,4,41,chainsaw,True,169127,B
153 | 4-169508-A-37.wav,4,37,clock_alarm,False,169508,A
154 | 4-169726-A-24.wav,4,24,coughing,False,169726,A
155 | 4-170078-A-1.wav,4,1,rooster,True,170078,A
156 | 4-170247-A-12.wav,4,12,crackling_fire,True,170247,A
157 | 4-170247-B-12.wav,4,12,crackling_fire,True,170247,B
158 | 4-171207-A-12.wav,4,12,crackling_fire,True,171207,A
159 | 4-171396-A-24.wav,4,24,coughing,False,171396,A
160 | 4-171519-A-21.wav,4,21,sneezing,True,171519,A
161 | 4-171652-A-44.wav,4,44,engine,False,171652,A
162 | 4-171706-A-23.wav,4,23,breathing,False,171706,A
163 | 4-171823-A-13.wav,4,13,crickets,False,171823,A
164 | 4-172143-A-13.wav,4,13,crickets,False,172143,A
165 | 4-172180-A-32.wav,4,32,keyboard_typing,False,172180,A
166 | 4-172366-A-37.wav,4,37,clock_alarm,False,172366,A
167 | 4-172377-A-17.wav,4,17,pouring_water,False,172377,A
168 | 4-172500-A-27.wav,4,27,brushing_teeth,False,172500,A
169 | 4-172500-B-27.wav,4,27,brushing_teeth,False,172500,B
170 | 4-172500-C-27.wav,4,27,brushing_teeth,False,172500,C
171 | 4-172500-D-27.wav,4,27,brushing_teeth,False,172500,D
172 | 4-172732-A-36.wav,4,36,vacuum_cleaner,False,172732,A
173 | 4-172733-A-36.wav,4,36,vacuum_cleaner,False,172733,A
174 | 4-172734-A-36.wav,4,36,vacuum_cleaner,False,172734,A
175 | 4-172736-A-36.wav,4,36,vacuum_cleaner,False,172736,A
176 | 4-172736-B-36.wav,4,36,vacuum_cleaner,False,172736,B
177 | 4-172742-A-32.wav,4,32,keyboard_typing,False,172742,A
178 | 4-173865-A-9.wav,4,9,crow,False,173865,A
179 | 4-173865-B-9.wav,4,9,crow,False,173865,B
180 | 4-174797-A-15.wav,4,15,water_drops,False,174797,A
181 | 4-174860-A-3.wav,4,3,cow,False,174860,A
182 | 4-174860-B-3.wav,4,3,cow,False,174860,B
183 | 4-175000-A-40.wav,4,40,helicopter,True,175000,A
184 | 4-175000-B-40.wav,4,40,helicopter,True,175000,B
185 | 4-175000-C-40.wav,4,40,helicopter,True,175000,C
186 | 4-175025-A-34.wav,4,34,can_opening,False,175025,A
187 | 4-175845-A-43.wav,4,43,car_horn,False,175845,A
188 | 4-175846-A-43.wav,4,43,car_horn,False,175846,A
189 | 4-175855-A-43.wav,4,43,car_horn,False,175855,A
190 | 4-175856-A-43.wav,4,43,car_horn,False,175856,A
191 | 4-175945-A-38.wav,4,38,clock_tick,True,175945,A
192 | 4-176631-A-43.wav,4,43,car_horn,False,176631,A
193 | 4-176638-A-43.wav,4,43,car_horn,False,176638,A
194 | 4-176914-A-23.wav,4,23,breathing,False,176914,A
195 | 4-177243-A-32.wav,4,32,keyboard_typing,False,177243,A
196 | 4-177250-A-10.wav,4,10,rain,True,177250,A
197 | 4-177835-A-24.wav,4,24,coughing,False,177835,A
198 | 4-178402-A-43.wav,4,43,car_horn,False,178402,A
199 | 4-178881-A-45.wav,4,45,train,False,178881,A
200 | 4-179836-A-34.wav,4,34,can_opening,False,179836,A
201 | 4-179984-A-38.wav,4,38,clock_tick,True,179984,A
202 | 4-180337-A-28.wav,4,28,snoring,False,180337,A
203 | 4-180380-A-10.wav,4,10,rain,True,180380,A
204 | 4-180453-A-17.wav,4,17,pouring_water,False,180453,A
205 | 4-181035-A-38.wav,4,38,clock_tick,True,181035,A
206 | 4-181286-A-10.wav,4,10,rain,True,181286,A
207 | 4-181362-A-13.wav,4,13,crickets,False,181362,A
208 | 4-181563-A-12.wav,4,12,crackling_fire,True,181563,A
209 | 4-181599-A-26.wav,4,26,laughing,False,181599,A
210 | 4-181628-A-17.wav,4,17,pouring_water,False,181628,A
211 | 4-181707-A-32.wav,4,32,keyboard_typing,False,181707,A
212 | 4-181708-A-32.wav,4,32,keyboard_typing,False,181708,A
213 | 4-181865-A-38.wav,4,38,clock_tick,True,181865,A
214 | 4-181955-A-3.wav,4,3,cow,False,181955,A
215 | 4-181955-B-3.wav,4,3,cow,False,181955,B
216 | 4-181955-C-3.wav,4,3,cow,False,181955,C
217 | 4-181999-A-36.wav,4,36,vacuum_cleaner,False,181999,A
218 | 4-182034-A-30.wav,4,30,door_wood_knock,False,182034,A
219 | 4-182039-A-30.wav,4,30,door_wood_knock,False,182039,A
220 | 4-182041-A-30.wav,4,30,door_wood_knock,False,182041,A
221 | 4-182314-A-7.wav,4,7,insects,False,182314,A
222 | 4-182314-B-7.wav,4,7,insects,False,182314,B
223 | 4-182368-A-12.wav,4,12,crackling_fire,True,182368,A
224 | 4-182369-A-12.wav,4,12,crackling_fire,True,182369,A
225 | 4-182395-A-0.wav,4,0,dog,True,182395,A
226 | 4-182613-A-11.wav,4,11,sea_waves,True,182613,A
227 | 4-182613-B-11.wav,4,11,sea_waves,True,182613,B
228 | 4-182795-A-14.wav,4,14,chirping_birds,False,182795,A
229 | 4-182839-A-17.wav,4,17,pouring_water,False,182839,A
230 | 4-183487-A-1.wav,4,1,rooster,True,183487,A
231 | 4-183882-A-28.wav,4,28,snoring,False,183882,A
232 | 4-183882-B-28.wav,4,28,snoring,False,183882,B
233 | 4-183992-A-0.wav,4,0,dog,True,183992,A
234 | 4-184235-A-28.wav,4,28,snoring,False,184235,A
235 | 4-184237-A-28.wav,4,28,snoring,False,184237,A
236 | 4-184434-A-21.wav,4,21,sneezing,True,184434,A
237 | 4-184575-A-0.wav,4,0,dog,True,184575,A
238 | 4-185415-A-21.wav,4,21,sneezing,True,185415,A
239 | 4-185575-A-20.wav,4,20,crying_baby,True,185575,A
240 | 4-185575-B-20.wav,4,20,crying_baby,True,185575,B
241 | 4-185575-C-20.wav,4,20,crying_baby,True,185575,C
242 | 4-185613-A-32.wav,4,32,keyboard_typing,False,185613,A
243 | 4-185619-A-21.wav,4,21,sneezing,True,185619,A
244 | 4-186518-A-30.wav,4,30,door_wood_knock,False,186518,A
245 | 4-186693-A-17.wav,4,17,pouring_water,False,186693,A
246 | 4-186935-A-44.wav,4,44,engine,False,186935,A
247 | 4-186936-A-44.wav,4,44,engine,False,186936,A
248 | 4-186938-A-44.wav,4,44,engine,False,186938,A
249 | 4-186940-A-44.wav,4,44,engine,False,186940,A
250 | 4-186962-A-44.wav,4,44,engine,False,186962,A
251 | 4-187284-A-30.wav,4,30,door_wood_knock,False,187284,A
252 | 4-187384-A-34.wav,4,34,can_opening,False,187384,A
253 | 4-187504-A-17.wav,4,17,pouring_water,False,187504,A
254 | 4-187504-B-17.wav,4,17,pouring_water,False,187504,B
255 | 4-187769-A-14.wav,4,14,chirping_birds,False,187769,A
256 | 4-187769-B-14.wav,4,14,chirping_birds,False,187769,B
257 | 4-188003-A-34.wav,4,34,can_opening,False,188003,A
258 | 4-188033-A-38.wav,4,38,clock_tick,True,188033,A
259 | 4-188191-A-29.wav,4,29,drinking_sipping,False,188191,A
260 | 4-188191-B-29.wav,4,29,drinking_sipping,False,188191,B
261 | 4-188191-C-29.wav,4,29,drinking_sipping,False,188191,C
262 | 4-188287-A-9.wav,4,9,crow,False,188287,A
263 | 4-188293-A-15.wav,4,15,water_drops,False,188293,A
264 | 4-188293-B-15.wav,4,15,water_drops,False,188293,B
265 | 4-188595-A-29.wav,4,29,drinking_sipping,False,188595,A
266 | 4-188703-A-8.wav,4,8,sheep,False,188703,A
267 | 4-188703-B-8.wav,4,8,sheep,False,188703,B
268 | 4-188703-C-8.wav,4,8,sheep,False,188703,C
269 | 4-188703-D-8.wav,4,8,sheep,False,188703,D
270 | 4-188878-A-30.wav,4,30,door_wood_knock,False,188878,A
271 | 4-189332-A-37.wav,4,37,clock_alarm,False,189332,A
272 | 4-189828-A-22.wav,4,22,clapping,False,189828,A
273 | 4-189830-A-22.wav,4,22,clapping,False,189830,A
274 | 4-189832-A-22.wav,4,22,clapping,False,189832,A
275 | 4-189832-B-22.wav,4,22,clapping,False,189832,B
276 | 4-189833-A-22.wav,4,22,clapping,False,189833,A
277 | 4-189836-A-22.wav,4,22,clapping,False,189836,A
278 | 4-189838-A-22.wav,4,22,clapping,False,189838,A
279 | 4-191015-A-43.wav,4,43,car_horn,False,191015,A
280 | 4-191246-A-7.wav,4,7,insects,False,191246,A
281 | 4-191246-B-7.wav,4,7,insects,False,191246,B
282 | 4-191297-A-28.wav,4,28,snoring,False,191297,A
283 | 4-191327-A-38.wav,4,38,clock_tick,True,191327,A
284 | 4-191687-A-0.wav,4,0,dog,True,191687,A
285 | 4-192068-A-31.wav,4,31,mouse_click,False,192068,A
286 | 4-192236-A-0.wav,4,0,dog,True,192236,A
287 | 4-193480-A-40.wav,4,40,helicopter,True,193480,A
288 | 4-193480-B-40.wav,4,40,helicopter,True,193480,B
289 | 4-194246-A-13.wav,4,13,crickets,False,194246,A
290 | 4-194680-A-36.wav,4,36,vacuum_cleaner,False,194680,A
291 | 4-194711-A-38.wav,4,38,clock_tick,True,194711,A
292 | 4-194754-A-0.wav,4,0,dog,True,194754,A
293 | 4-194808-A-29.wav,4,29,drinking_sipping,False,194808,A
294 | 4-194979-A-25.wav,4,25,footsteps,False,194979,A
295 | 4-194981-A-25.wav,4,25,footsteps,False,194981,A
296 | 4-195305-A-31.wav,4,31,mouse_click,False,195305,A
297 | 4-195451-A-44.wav,4,44,engine,False,195451,A
298 | 4-195497-A-11.wav,4,11,sea_waves,True,195497,A
299 | 4-195497-B-11.wav,4,11,sea_waves,True,195497,B
300 | 4-195707-A-13.wav,4,13,crickets,False,195707,A
301 | 4-195805-A-13.wav,4,13,crickets,False,195805,A
302 | 4-196671-A-8.wav,4,8,sheep,False,196671,A
303 | 4-196671-B-8.wav,4,8,sheep,False,196671,B
304 | 4-196672-A-8.wav,4,8,sheep,False,196672,A
305 | 4-197103-A-6.wav,4,6,hen,False,197103,A
306 | 4-197454-A-28.wav,4,28,snoring,False,197454,A
307 | 4-197454-B-28.wav,4,28,snoring,False,197454,B
308 | 4-197871-A-15.wav,4,15,water_drops,False,197871,A
309 | 4-198025-A-23.wav,4,23,breathing,False,198025,A
310 | 4-198360-A-49.wav,4,49,hand_saw,False,198360,A
311 | 4-198360-B-49.wav,4,49,hand_saw,False,198360,B
312 | 4-198360-C-49.wav,4,49,hand_saw,False,198360,C
313 | 4-198841-A-37.wav,4,37,clock_alarm,False,198841,A
314 | 4-198962-A-25.wav,4,25,footsteps,False,198962,A
315 | 4-198962-B-25.wav,4,25,footsteps,False,198962,B
316 | 4-198965-A-38.wav,4,38,clock_tick,True,198965,A
317 | 4-199261-A-0.wav,4,0,dog,True,199261,A
318 | 4-200330-A-6.wav,4,6,hen,False,200330,A
319 | 4-200330-B-6.wav,4,6,hen,False,200330,B
320 | 4-201300-A-31.wav,4,31,mouse_click,False,201300,A
321 | 4-201800-A-31.wav,4,31,mouse_click,False,201800,A
322 | 4-201988-A-44.wav,4,44,engine,False,201988,A
323 | 4-202298-A-31.wav,4,31,mouse_click,False,202298,A
324 | 4-202749-A-13.wav,4,13,crickets,False,202749,A
325 | 4-204115-A-39.wav,4,39,glass_breaking,False,204115,A
326 | 4-204119-A-39.wav,4,39,glass_breaking,False,204119,A
327 | 4-204121-A-39.wav,4,39,glass_breaking,False,204121,A
328 | 4-204123-A-39.wav,4,39,glass_breaking,False,204123,A
329 | 4-204202-A-29.wav,4,29,drinking_sipping,False,204202,A
330 | 4-204612-A-31.wav,4,31,mouse_click,False,204612,A
331 | 4-204618-A-11.wav,4,11,sea_waves,True,204618,A
332 | 4-204683-A-31.wav,4,31,mouse_click,False,204683,A
333 | 4-204684-A-37.wav,4,37,clock_alarm,False,204684,A
334 | 4-204777-A-39.wav,4,39,glass_breaking,False,204777,A
335 | 4-204777-B-39.wav,4,39,glass_breaking,False,204777,B
336 | 4-204777-C-39.wav,4,39,glass_breaking,False,204777,C
337 | 4-204830-A-6.wav,4,6,hen,False,204830,A
338 | 4-205526-A-23.wav,4,23,breathing,False,205526,A
339 | 4-205526-B-23.wav,4,23,breathing,False,205526,B
340 | 4-205738-A-22.wav,4,22,clapping,False,205738,A
341 | 4-207116-A-23.wav,4,23,breathing,False,207116,A
342 | 4-207124-A-0.wav,4,0,dog,True,207124,A
343 | 4-208021-A-1.wav,4,1,rooster,True,208021,A
344 | 4-209536-A-37.wav,4,37,clock_alarm,False,209536,A
345 | 4-209698-A-37.wav,4,37,clock_alarm,False,209698,A
346 | 4-210000-A-23.wav,4,23,breathing,False,210000,A
347 | 4-210000-B-23.wav,4,23,breathing,False,210000,B
348 | 4-210309-A-31.wav,4,31,mouse_click,False,210309,A
349 | 4-210593-A-29.wav,4,29,drinking_sipping,False,210593,A
350 | 4-211502-A-30.wav,4,30,door_wood_knock,False,211502,A
351 | 4-212604-A-15.wav,4,15,water_drops,False,212604,A
352 | 4-212604-B-15.wav,4,15,water_drops,False,212604,B
353 | 4-212604-C-15.wav,4,15,water_drops,False,212604,C
354 | 4-212698-A-39.wav,4,39,glass_breaking,False,212698,A
355 | 4-212728-A-34.wav,4,34,can_opening,False,212728,A
356 | 4-213193-A-29.wav,4,29,drinking_sipping,False,213193,A
357 | 4-213915-A-3.wav,4,3,cow,False,213915,A
358 | 4-213915-B-3.wav,4,3,cow,False,213915,B
359 | 4-213915-C-3.wav,4,3,cow,False,213915,C
360 | 4-215635-A-34.wav,4,34,can_opening,False,215635,A
361 | 4-216211-A-14.wav,4,14,chirping_birds,False,216211,A
362 | 4-216349-A-34.wav,4,34,can_opening,False,216349,A
363 | 4-218199-A-35.wav,4,35,washing_machine,False,218199,A
364 | 4-218199-B-35.wav,4,35,washing_machine,False,218199,B
365 | 4-218199-C-35.wav,4,35,washing_machine,False,218199,C
366 | 4-218199-D-35.wav,4,35,washing_machine,False,218199,D
367 | 4-218199-E-35.wav,4,35,washing_machine,False,218199,E
368 | 4-218199-F-35.wav,4,35,washing_machine,False,218199,F
369 | 4-218199-G-35.wav,4,35,washing_machine,False,218199,G
370 | 4-218199-H-35.wav,4,35,washing_machine,False,218199,H
371 | 4-218304-A-25.wav,4,25,footsteps,False,218304,A
372 | 4-218304-B-25.wav,4,25,footsteps,False,218304,B
373 | 4-223125-A-14.wav,4,14,chirping_birds,False,223125,A
374 | 4-223127-A-14.wav,4,14,chirping_birds,False,223127,A
375 | 4-232495-A-6.wav,4,6,hen,False,232495,A
376 | 4-234644-A-2.wav,4,2,pig,False,234644,A
377 | 4-234879-A-6.wav,4,6,hen,False,234879,A
378 | 4-244318-A-6.wav,4,6,hen,False,244318,A
379 | 4-250864-A-8.wav,4,8,sheep,False,250864,A
380 | 4-250869-A-2.wav,4,2,pig,False,250869,A
381 | 4-250869-B-2.wav,4,2,pig,False,250869,B
382 | 4-250869-C-2.wav,4,2,pig,False,250869,C
383 | 4-251645-A-49.wav,4,49,hand_saw,False,251645,A
384 | 4-251645-B-49.wav,4,49,hand_saw,False,251645,B
385 | 4-251959-A-47.wav,4,47,airplane,False,251959,A
386 | 4-253649-A-6.wav,4,6,hen,False,253649,A
387 | 4-255371-A-47.wav,4,47,airplane,False,255371,A
388 | 4-261068-A-30.wav,4,30,door_wood_knock,False,261068,A
389 | 4-264453-A-34.wav,4,34,can_opening,False,264453,A
390 | 4-59579-A-20.wav,4,20,crying_baby,True,59579,A
391 | 4-59579-B-20.wav,4,20,crying_baby,True,59579,B
392 | 4-67358-A-42.wav,4,42,siren,False,67358,A
393 | 4-80761-A-42.wav,4,42,siren,False,80761,A
394 | 4-90014-A-42.wav,4,42,siren,False,90014,A
395 | 4-90014-B-42.wav,4,42,siren,False,90014,B
396 | 4-99193-A-4.wav,4,4,frog,False,99193,A
397 | 4-99193-B-4.wav,4,4,frog,False,99193,B
398 | 4-99644-A-4.wav,4,4,frog,False,99644,A
399 | 4-99644-B-4.wav,4,4,frog,False,99644,B
400 | 4-99644-C-4.wav,4,4,frog,False,99644,C
401 | 4-99644-D-4.wav,4,4,frog,False,99644,D
402 | 


--------------------------------------------------------------------------------
/util_esc50/fold5_test.csv:
--------------------------------------------------------------------------------
  1 | filename,fold,target,category,esc10,src_file,take
  2 | 5-103415-A-2.wav,5,2,pig,False,103415,A
  3 | 5-103416-A-2.wav,5,2,pig,False,103416,A
  4 | 5-103418-A-2.wav,5,2,pig,False,103418,A
  5 | 5-103420-A-2.wav,5,2,pig,False,103420,A
  6 | 5-103421-A-2.wav,5,2,pig,False,103421,A
  7 | 5-103422-A-2.wav,5,2,pig,False,103422,A
  8 | 5-117118-A-42.wav,5,42,siren,False,117118,A
  9 | 5-117120-A-42.wav,5,42,siren,False,117120,A
 10 | 5-117122-A-42.wav,5,42,siren,False,117122,A
 11 | 5-117250-A-2.wav,5,2,pig,False,117250,A
 12 | 5-117773-A-16.wav,5,16,wind,False,117773,A
 13 | 5-127990-A-2.wav,5,2,pig,False,127990,A
 14 | 5-133989-A-42.wav,5,42,siren,False,133989,A
 15 | 5-133989-B-42.wav,5,42,siren,False,133989,B
 16 | 5-141683-A-35.wav,5,35,washing_machine,False,141683,A
 17 | 5-147297-A-27.wav,5,27,brushing_teeth,False,147297,A
 18 | 5-150409-A-42.wav,5,42,siren,False,150409,A
 19 | 5-151085-A-20.wav,5,20,crying_baby,True,151085,A
 20 | 5-156026-A-4.wav,5,4,frog,False,156026,A
 21 | 5-156026-B-4.wav,5,4,frog,False,156026,B
 22 | 5-156026-C-4.wav,5,4,frog,False,156026,C
 23 | 5-156026-D-4.wav,5,4,frog,False,156026,D
 24 | 5-156698-A-18.wav,5,18,toilet_flush,False,156698,A
 25 | 5-156999-A-19.wav,5,19,thunderstorm,False,156999,A
 26 | 5-156999-B-19.wav,5,19,thunderstorm,False,156999,B
 27 | 5-156999-C-19.wav,5,19,thunderstorm,False,156999,C
 28 | 5-156999-D-19.wav,5,19,thunderstorm,False,156999,D
 29 | 5-156999-E-19.wav,5,19,thunderstorm,False,156999,E
 30 | 5-157204-A-16.wav,5,16,wind,False,157204,A
 31 | 5-157204-B-16.wav,5,16,wind,False,157204,B
 32 | 5-160551-A-42.wav,5,42,siren,False,160551,A
 33 | 5-160614-A-48.wav,5,48,fireworks,False,160614,A
 34 | 5-160614-B-48.wav,5,48,fireworks,False,160614,B
 35 | 5-160614-C-48.wav,5,48,fireworks,False,160614,C
 36 | 5-160614-D-48.wav,5,48,fireworks,False,160614,D
 37 | 5-160614-E-48.wav,5,48,fireworks,False,160614,E
 38 | 5-160614-F-48.wav,5,48,fireworks,False,160614,F
 39 | 5-160614-G-48.wav,5,48,fireworks,False,160614,G
 40 | 5-160614-H-48.wav,5,48,fireworks,False,160614,H
 41 | 5-161270-A-33.wav,5,33,door_wood_creaks,False,161270,A
 42 | 5-161270-B-33.wav,5,33,door_wood_creaks,False,161270,B
 43 | 5-169983-A-5.wav,5,5,cat,False,169983,A
 44 | 5-170338-A-41.wav,5,41,chainsaw,True,170338,A
 45 | 5-170338-B-41.wav,5,41,chainsaw,True,170338,B
 46 | 5-171118-A-26.wav,5,26,laughing,False,171118,A
 47 | 5-171653-A-41.wav,5,41,chainsaw,True,171653,A
 48 | 5-172299-A-5.wav,5,5,cat,False,172299,A
 49 | 5-172639-A-5.wav,5,5,cat,False,172639,A
 50 | 5-173568-A-33.wav,5,33,door_wood_creaks,False,173568,A
 51 | 5-177034-A-18.wav,5,18,toilet_flush,False,177034,A
 52 | 5-177614-A-5.wav,5,5,cat,False,177614,A
 53 | 5-177779-A-33.wav,5,33,door_wood_creaks,False,177779,A
 54 | 5-177957-A-40.wav,5,40,helicopter,True,177957,A
 55 | 5-177957-B-40.wav,5,40,helicopter,True,177957,B
 56 | 5-177957-C-40.wav,5,40,helicopter,True,177957,C
 57 | 5-177957-D-40.wav,5,40,helicopter,True,177957,D
 58 | 5-177957-E-40.wav,5,40,helicopter,True,177957,E
 59 | 5-178997-A-24.wav,5,24,coughing,False,178997,A
 60 | 5-179294-A-46.wav,5,46,church_bells,False,179294,A
 61 | 5-179496-A-16.wav,5,16,wind,False,179496,A
 62 | 5-179496-B-16.wav,5,16,wind,False,179496,B
 63 | 5-179860-A-43.wav,5,43,car_horn,False,179860,A
 64 | 5-179863-A-43.wav,5,43,car_horn,False,179863,A
 65 | 5-179865-A-43.wav,5,43,car_horn,False,179865,A
 66 | 5-179866-A-43.wav,5,43,car_horn,False,179866,A
 67 | 5-179868-A-43.wav,5,43,car_horn,False,179868,A
 68 | 5-180156-A-43.wav,5,43,car_horn,False,180156,A
 69 | 5-180156-B-43.wav,5,43,car_horn,False,180156,B
 70 | 5-180156-C-43.wav,5,43,car_horn,False,180156,C
 71 | 5-180229-A-27.wav,5,27,brushing_teeth,False,180229,A
 72 | 5-181458-A-33.wav,5,33,door_wood_creaks,False,181458,A
 73 | 5-181766-A-10.wav,5,10,rain,True,181766,A
 74 | 5-181977-A-35.wav,5,35,washing_machine,False,181977,A
 75 | 5-182007-A-36.wav,5,36,vacuum_cleaner,False,182007,A
 76 | 5-182010-A-36.wav,5,36,vacuum_cleaner,False,182010,A
 77 | 5-182012-A-36.wav,5,36,vacuum_cleaner,False,182012,A
 78 | 5-182404-A-18.wav,5,18,toilet_flush,False,182404,A
 79 | 5-184323-A-42.wav,5,42,siren,False,184323,A
 80 | 5-184871-A-24.wav,5,24,coughing,False,184871,A
 81 | 5-185516-A-27.wav,5,27,brushing_teeth,False,185516,A
 82 | 5-185579-A-41.wav,5,41,chainsaw,True,185579,A
 83 | 5-185579-B-41.wav,5,41,chainsaw,True,185579,B
 84 | 5-185908-A-18.wav,5,18,toilet_flush,False,185908,A
 85 | 5-186924-A-12.wav,5,12,crackling_fire,True,186924,A
 86 | 5-187201-A-4.wav,5,4,frog,False,187201,A
 87 | 5-187201-B-4.wav,5,4,frog,False,187201,B
 88 | 5-187444-A-33.wav,5,33,door_wood_creaks,False,187444,A
 89 | 5-187979-A-21.wav,5,21,sneezing,True,187979,A
 90 | 5-188365-A-36.wav,5,36,vacuum_cleaner,False,188365,A
 91 | 5-188495-A-19.wav,5,19,thunderstorm,False,188495,A
 92 | 5-188606-A-33.wav,5,33,door_wood_creaks,False,188606,A
 93 | 5-188655-A-10.wav,5,10,rain,True,188655,A
 94 | 5-188716-A-46.wav,5,46,church_bells,False,188716,A
 95 | 5-188796-A-45.wav,5,45,train,False,188796,A
 96 | 5-188945-A-45.wav,5,45,train,False,188945,A
 97 | 5-189212-A-12.wav,5,12,crackling_fire,True,189212,A
 98 | 5-189237-A-12.wav,5,12,crackling_fire,True,189237,A
 99 | 5-189795-A-4.wav,5,4,frog,False,189795,A
100 | 5-191131-A-40.wav,5,40,helicopter,True,191131,A
101 | 5-191497-A-33.wav,5,33,door_wood_creaks,False,191497,A
102 | 5-192191-A-19.wav,5,19,thunderstorm,False,192191,A
103 | 5-193339-A-10.wav,5,10,rain,True,193339,A
104 | 5-193473-A-12.wav,5,12,crackling_fire,True,193473,A
105 | 5-193473-B-12.wav,5,12,crackling_fire,True,193473,B
106 | 5-194533-A-21.wav,5,21,sneezing,True,194533,A
107 | 5-194892-A-10.wav,5,10,rain,True,194892,A
108 | 5-194899-A-3.wav,5,3,cow,False,194899,A
109 | 5-194899-B-3.wav,5,3,cow,False,194899,B
110 | 5-194899-C-3.wav,5,3,cow,False,194899,C
111 | 5-194899-D-3.wav,5,3,cow,False,194899,D
112 | 5-194930-A-1.wav,5,1,rooster,True,194930,A
113 | 5-194930-B-1.wav,5,1,rooster,True,194930,B
114 | 5-194932-A-7.wav,5,7,insects,False,194932,A
115 | 5-195517-A-7.wav,5,7,insects,False,195517,A
116 | 5-195518-A-7.wav,5,7,insects,False,195518,A
117 | 5-195557-A-19.wav,5,19,thunderstorm,False,195557,A
118 | 5-195710-A-10.wav,5,10,rain,True,195710,A
119 | 5-197118-A-45.wav,5,45,train,False,197118,A
120 | 5-197121-A-45.wav,5,45,train,False,197121,A
121 | 5-197121-B-45.wav,5,45,train,False,197121,B
122 | 5-197446-A-7.wav,5,7,insects,False,197446,A
123 | 5-197913-A-18.wav,5,18,toilet_flush,False,197913,A
124 | 5-197988-A-46.wav,5,46,church_bells,False,197988,A
125 | 5-198278-A-7.wav,5,7,insects,False,198278,A
126 | 5-198278-B-7.wav,5,7,insects,False,198278,B
127 | 5-198278-C-7.wav,5,7,insects,False,198278,C
128 | 5-198321-A-10.wav,5,10,rain,True,198321,A
129 | 5-198373-A-46.wav,5,46,church_bells,False,198373,A
130 | 5-198411-A-20.wav,5,20,crying_baby,True,198411,A
131 | 5-198411-B-20.wav,5,20,crying_baby,True,198411,B
132 | 5-198411-C-20.wav,5,20,crying_baby,True,198411,C
133 | 5-198411-D-20.wav,5,20,crying_baby,True,198411,D
134 | 5-198411-E-20.wav,5,20,crying_baby,True,198411,E
135 | 5-198411-F-20.wav,5,20,crying_baby,True,198411,F
136 | 5-198411-G-20.wav,5,20,crying_baby,True,198411,G
137 | 5-198600-A-45.wav,5,45,train,False,198600,A
138 | 5-198891-A-8.wav,5,8,sheep,False,198891,A
139 | 5-198891-B-8.wav,5,8,sheep,False,198891,B
140 | 5-198891-C-8.wav,5,8,sheep,False,198891,C
141 | 5-198891-D-8.wav,5,8,sheep,False,198891,D
142 | 5-199284-A-45.wav,5,45,train,False,199284,A
143 | 5-199284-B-45.wav,5,45,train,False,199284,B
144 | 5-200329-A-8.wav,5,8,sheep,False,200329,A
145 | 5-200329-B-8.wav,5,8,sheep,False,200329,B
146 | 5-200329-C-8.wav,5,8,sheep,False,200329,C
147 | 5-200334-A-1.wav,5,1,rooster,True,200334,A
148 | 5-200334-B-1.wav,5,1,rooster,True,200334,B
149 | 5-200339-A-1.wav,5,1,rooster,True,200339,A
150 | 5-200461-A-11.wav,5,11,sea_waves,True,200461,A
151 | 5-200461-B-11.wav,5,11,sea_waves,True,200461,B
152 | 5-201170-A-46.wav,5,46,church_bells,False,201170,A
153 | 5-201172-A-46.wav,5,46,church_bells,False,201172,A
154 | 5-201194-A-38.wav,5,38,clock_tick,True,201194,A
155 | 5-201274-A-21.wav,5,21,sneezing,True,201274,A
156 | 5-201664-A-18.wav,5,18,toilet_flush,False,201664,A
157 | 5-202020-A-18.wav,5,18,toilet_flush,False,202020,A
158 | 5-202220-A-21.wav,5,21,sneezing,True,202220,A
159 | 5-202540-A-18.wav,5,18,toilet_flush,False,202540,A
160 | 5-202795-A-3.wav,5,3,cow,False,202795,A
161 | 5-202898-A-10.wav,5,10,rain,True,202898,A
162 | 5-203128-A-0.wav,5,0,dog,True,203128,A
163 | 5-203128-B-0.wav,5,0,dog,True,203128,B
164 | 5-203739-A-10.wav,5,10,rain,True,203739,A
165 | 5-204114-A-29.wav,5,29,drinking_sipping,False,204114,A
166 | 5-204352-A-13.wav,5,13,crickets,False,204352,A
167 | 5-204352-B-13.wav,5,13,crickets,False,204352,B
168 | 5-204604-A-24.wav,5,24,coughing,False,204604,A
169 | 5-204741-A-46.wav,5,46,church_bells,False,204741,A
170 | 5-205090-A-32.wav,5,32,keyboard_typing,False,205090,A
171 | 5-205589-A-17.wav,5,17,pouring_water,False,205589,A
172 | 5-205589-B-17.wav,5,17,pouring_water,False,205589,B
173 | 5-205898-A-40.wav,5,40,helicopter,True,205898,A
174 | 5-207681-A-24.wav,5,24,coughing,False,207681,A
175 | 5-207781-A-17.wav,5,17,pouring_water,False,207781,A
176 | 5-207811-A-35.wav,5,35,washing_machine,False,207811,A
177 | 5-207811-B-35.wav,5,35,washing_machine,False,207811,B
178 | 5-207836-A-29.wav,5,29,drinking_sipping,False,207836,A
179 | 5-207836-B-29.wav,5,29,drinking_sipping,False,207836,B
180 | 5-207836-C-29.wav,5,29,drinking_sipping,False,207836,C
181 | 5-207836-D-29.wav,5,29,drinking_sipping,False,207836,D
182 | 5-208030-A-0.wav,5,0,dog,True,208030,A
183 | 5-208624-A-38.wav,5,38,clock_tick,True,208624,A
184 | 5-208761-A-24.wav,5,24,coughing,False,208761,A
185 | 5-208810-A-11.wav,5,11,sea_waves,True,208810,A
186 | 5-208810-B-11.wav,5,11,sea_waves,True,208810,B
187 | 5-209698-A-38.wav,5,38,clock_tick,True,209698,A
188 | 5-209719-A-24.wav,5,24,coughing,False,209719,A
189 | 5-209833-A-38.wav,5,38,clock_tick,True,209833,A
190 | 5-209989-A-22.wav,5,22,clapping,False,209989,A
191 | 5-209992-A-44.wav,5,44,engine,False,209992,A
192 | 5-209992-B-44.wav,5,44,engine,False,209992,B
193 | 5-210540-A-13.wav,5,13,crickets,False,210540,A
194 | 5-210571-A-38.wav,5,38,clock_tick,True,210571,A
195 | 5-210612-A-37.wav,5,37,clock_alarm,False,210612,A
196 | 5-211197-A-24.wav,5,24,coughing,False,211197,A
197 | 5-211408-A-37.wav,5,37,clock_alarm,False,211408,A
198 | 5-212050-A-27.wav,5,27,brushing_teeth,False,212050,A
199 | 5-212054-A-36.wav,5,36,vacuum_cleaner,False,212054,A
200 | 5-212059-A-36.wav,5,36,vacuum_cleaner,False,212059,A
201 | 5-212181-A-38.wav,5,38,clock_tick,True,212181,A
202 | 5-212454-A-0.wav,5,0,dog,True,212454,A
203 | 5-212730-A-17.wav,5,17,pouring_water,False,212730,A
204 | 5-212734-A-17.wav,5,17,pouring_water,False,212734,A
205 | 5-212736-A-17.wav,5,17,pouring_water,False,212736,A
206 | 5-213077-A-11.wav,5,11,sea_waves,True,213077,A
207 | 5-213293-A-23.wav,5,23,breathing,False,213293,A
208 | 5-213802-A-12.wav,5,12,crackling_fire,True,213802,A
209 | 5-213836-A-9.wav,5,9,crow,False,213836,A
210 | 5-213836-B-9.wav,5,9,crow,False,213836,B
211 | 5-213836-C-9.wav,5,9,crow,False,213836,C
212 | 5-213836-D-9.wav,5,9,crow,False,213836,D
213 | 5-213855-A-0.wav,5,0,dog,True,213855,A
214 | 5-214362-A-17.wav,5,17,pouring_water,False,214362,A
215 | 5-214759-A-5.wav,5,5,cat,False,214759,A
216 | 5-214759-B-5.wav,5,5,cat,False,214759,B
217 | 5-214869-A-9.wav,5,9,crow,False,214869,A
218 | 5-215005-A-32.wav,5,32,keyboard_typing,False,215005,A
219 | 5-215172-A-13.wav,5,13,crickets,False,215172,A
220 | 5-215179-A-13.wav,5,13,crickets,False,215179,A
221 | 5-215445-A-47.wav,5,47,airplane,False,215445,A
222 | 5-215447-A-47.wav,5,47,airplane,False,215447,A
223 | 5-215449-A-47.wav,5,47,airplane,False,215449,A
224 | 5-215634-A-17.wav,5,17,pouring_water,False,215634,A
225 | 5-215658-A-12.wav,5,12,crackling_fire,True,215658,A
226 | 5-215658-B-12.wav,5,12,crackling_fire,True,215658,B
227 | 5-216131-A-32.wav,5,32,keyboard_typing,False,216131,A
228 | 5-216213-A-13.wav,5,13,crickets,False,216213,A
229 | 5-216214-A-13.wav,5,13,crickets,False,216214,A
230 | 5-216216-A-13.wav,5,13,crickets,False,216216,A
231 | 5-216368-A-28.wav,5,28,snoring,False,216368,A
232 | 5-216370-A-41.wav,5,41,chainsaw,True,216370,A
233 | 5-216370-B-41.wav,5,41,chainsaw,True,216370,B
234 | 5-217158-A-0.wav,5,0,dog,True,217158,A
235 | 5-217186-A-16.wav,5,16,wind,False,217186,A
236 | 5-217186-B-16.wav,5,16,wind,False,217186,B
237 | 5-217186-C-16.wav,5,16,wind,False,217186,C
238 | 5-218196-A-27.wav,5,27,brushing_teeth,False,218196,A
239 | 5-218196-B-27.wav,5,27,brushing_teeth,False,218196,B
240 | 5-218494-A-22.wav,5,22,clapping,False,218494,A
241 | 5-218980-A-30.wav,5,30,door_wood_knock,False,218980,A
242 | 5-218981-A-30.wav,5,30,door_wood_knock,False,218981,A
243 | 5-219044-A-46.wav,5,46,church_bells,False,219044,A
244 | 5-219242-A-37.wav,5,37,clock_alarm,False,219242,A
245 | 5-219242-B-37.wav,5,37,clock_alarm,False,219242,B
246 | 5-219318-A-31.wav,5,31,mouse_click,False,219318,A
247 | 5-219342-A-38.wav,5,38,clock_tick,True,219342,A
248 | 5-219379-A-11.wav,5,11,sea_waves,True,219379,A
249 | 5-219379-B-11.wav,5,11,sea_waves,True,219379,B
250 | 5-219379-C-11.wav,5,11,sea_waves,True,219379,C
251 | 5-220026-A-21.wav,5,21,sneezing,True,220026,A
252 | 5-220027-A-21.wav,5,21,sneezing,True,220027,A
253 | 5-220939-A-27.wav,5,27,brushing_teeth,False,220939,A
254 | 5-220955-A-40.wav,5,40,helicopter,True,220955,A
255 | 5-221518-A-21.wav,5,21,sneezing,True,221518,A
256 | 5-221528-A-39.wav,5,39,glass_breaking,False,221528,A
257 | 5-221529-A-39.wav,5,39,glass_breaking,False,221529,A
258 | 5-221567-A-22.wav,5,22,clapping,False,221567,A
259 | 5-221568-A-22.wav,5,22,clapping,False,221568,A
260 | 5-221593-A-21.wav,5,21,sneezing,True,221593,A
261 | 5-221878-A-34.wav,5,34,can_opening,False,221878,A
262 | 5-221950-A-22.wav,5,22,clapping,False,221950,A
263 | 5-222041-A-9.wav,5,9,crow,False,222041,A
264 | 5-222524-A-41.wav,5,41,chainsaw,True,222524,A
265 | 5-222894-A-32.wav,5,32,keyboard_typing,False,222894,A
266 | 5-223099-A-32.wav,5,32,keyboard_typing,False,223099,A
267 | 5-223099-B-32.wav,5,32,keyboard_typing,False,223099,B
268 | 5-223103-A-31.wav,5,31,mouse_click,False,223103,A
269 | 5-223176-A-37.wav,5,37,clock_alarm,False,223176,A
270 | 5-223317-A-31.wav,5,31,mouse_click,False,223317,A
271 | 5-223810-A-35.wav,5,35,washing_machine,False,223810,A
272 | 5-231551-A-36.wav,5,36,vacuum_cleaner,False,231551,A
273 | 5-231762-A-0.wav,5,0,dog,True,231762,A
274 | 5-232272-A-44.wav,5,44,engine,False,232272,A
275 | 5-232802-A-31.wav,5,31,mouse_click,False,232802,A
276 | 5-232816-A-23.wav,5,23,breathing,False,232816,A
277 | 5-233019-A-31.wav,5,31,mouse_click,False,233019,A
278 | 5-233160-A-1.wav,5,1,rooster,True,233160,A
279 | 5-233260-A-23.wav,5,23,breathing,False,233260,A
280 | 5-233312-A-28.wav,5,28,snoring,False,233312,A
281 | 5-233605-A-39.wav,5,39,glass_breaking,False,233605,A
282 | 5-233607-A-39.wav,5,39,glass_breaking,False,233607,A
283 | 5-233645-A-37.wav,5,37,clock_alarm,False,233645,A
284 | 5-233787-A-7.wav,5,7,insects,False,233787,A
285 | 5-234145-A-28.wav,5,28,snoring,False,234145,A
286 | 5-234247-A-37.wav,5,37,clock_alarm,False,234247,A
287 | 5-234263-A-25.wav,5,25,footsteps,False,234263,A
288 | 5-234335-A-23.wav,5,23,breathing,False,234335,A
289 | 5-234855-A-25.wav,5,25,footsteps,False,234855,A
290 | 5-234879-A-1.wav,5,1,rooster,True,234879,A
291 | 5-234879-B-1.wav,5,1,rooster,True,234879,B
292 | 5-234923-A-32.wav,5,32,keyboard_typing,False,234923,A
293 | 5-235507-A-44.wav,5,44,engine,False,235507,A
294 | 5-235593-A-23.wav,5,23,breathing,False,235593,A
295 | 5-235644-A-30.wav,5,30,door_wood_knock,False,235644,A
296 | 5-235671-A-38.wav,5,38,clock_tick,True,235671,A
297 | 5-235874-A-28.wav,5,28,snoring,False,235874,A
298 | 5-235893-A-28.wav,5,28,snoring,False,235893,A
299 | 5-235956-A-47.wav,5,47,airplane,False,235956,A
300 | 5-236288-A-28.wav,5,28,snoring,False,236288,A
301 | 5-236299-A-34.wav,5,34,can_opening,False,236299,A
302 | 5-237315-A-31.wav,5,31,mouse_click,False,237315,A
303 | 5-237499-A-4.wav,5,4,frog,False,237499,A
304 | 5-237795-A-34.wav,5,34,can_opening,False,237795,A
305 | 5-238021-A-35.wav,5,35,washing_machine,False,238021,A
306 | 5-238492-A-23.wav,5,23,breathing,False,238492,A
307 | 5-238926-A-31.wav,5,31,mouse_click,False,238926,A
308 | 5-238938-A-27.wav,5,27,brushing_teeth,False,238938,A
309 | 5-240671-A-44.wav,5,44,engine,False,240671,A
310 | 5-241846-A-15.wav,5,15,water_drops,False,241846,A
311 | 5-242490-A-14.wav,5,14,chirping_birds,False,242490,A
312 | 5-242491-A-14.wav,5,14,chirping_birds,False,242491,A
313 | 5-242492-A-3.wav,5,3,cow,False,242492,A
314 | 5-242711-A-9.wav,5,9,crow,False,242711,A
315 | 5-242932-A-26.wav,5,26,laughing,False,242932,A
316 | 5-242932-B-26.wav,5,26,laughing,False,242932,B
317 | 5-243025-A-25.wav,5,25,footsteps,False,243025,A
318 | 5-243036-A-29.wav,5,29,drinking_sipping,False,243036,A
319 | 5-243448-A-14.wav,5,14,chirping_birds,False,243448,A
320 | 5-243449-A-14.wav,5,14,chirping_birds,False,243449,A
321 | 5-243450-A-14.wav,5,14,chirping_birds,False,243450,A
322 | 5-243459-A-14.wav,5,14,chirping_birds,False,243459,A
323 | 5-243459-B-14.wav,5,14,chirping_birds,False,243459,B
324 | 5-243635-A-29.wav,5,29,drinking_sipping,False,243635,A
325 | 5-243773-A-44.wav,5,44,engine,False,243773,A
326 | 5-243773-B-44.wav,5,44,engine,False,243773,B
327 | 5-243783-A-44.wav,5,44,engine,False,243783,A
328 | 5-244178-A-32.wav,5,32,keyboard_typing,False,244178,A
329 | 5-244310-A-25.wav,5,25,footsteps,False,244310,A
330 | 5-244315-A-6.wav,5,6,hen,False,244315,A
331 | 5-244315-B-6.wav,5,6,hen,False,244315,B
332 | 5-244315-C-6.wav,5,6,hen,False,244315,C
333 | 5-244327-A-34.wav,5,34,can_opening,False,244327,A
334 | 5-244459-A-28.wav,5,28,snoring,False,244459,A
335 | 5-244526-A-26.wav,5,26,laughing,False,244526,A
336 | 5-244651-A-31.wav,5,31,mouse_click,False,244651,A
337 | 5-244933-A-34.wav,5,34,can_opening,False,244933,A
338 | 5-245040-A-35.wav,5,35,washing_machine,False,245040,A
339 | 5-248341-A-6.wav,5,6,hen,False,248341,A
340 | 5-248341-B-6.wav,5,6,hen,False,248341,B
341 | 5-248341-C-6.wav,5,6,hen,False,248341,C
342 | 5-249748-A-28.wav,5,28,snoring,False,249748,A
343 | 5-249937-A-22.wav,5,22,clapping,False,249937,A
344 | 5-250026-A-30.wav,5,30,door_wood_knock,False,250026,A
345 | 5-250026-B-30.wav,5,30,door_wood_knock,False,250026,B
346 | 5-250258-A-49.wav,5,49,hand_saw,False,250258,A
347 | 5-250629-A-37.wav,5,37,clock_alarm,False,250629,A
348 | 5-250753-A-34.wav,5,34,can_opening,False,250753,A
349 | 5-251426-A-30.wav,5,30,door_wood_knock,False,251426,A
350 | 5-251426-B-30.wav,5,30,door_wood_knock,False,251426,B
351 | 5-251489-A-24.wav,5,24,coughing,False,251489,A
352 | 5-251957-A-47.wav,5,47,airplane,False,251957,A
353 | 5-251962-A-47.wav,5,47,airplane,False,251962,A
354 | 5-251963-A-47.wav,5,47,airplane,False,251963,A
355 | 5-251971-A-47.wav,5,47,airplane,False,251971,A
356 | 5-252248-A-34.wav,5,34,can_opening,False,252248,A
357 | 5-253085-A-3.wav,5,3,cow,False,253085,A
358 | 5-253085-B-3.wav,5,3,cow,False,253085,B
359 | 5-253094-A-49.wav,5,49,hand_saw,False,253094,A
360 | 5-253094-B-49.wav,5,49,hand_saw,False,253094,B
361 | 5-253094-C-49.wav,5,49,hand_saw,False,253094,C
362 | 5-253094-D-49.wav,5,49,hand_saw,False,253094,D
363 | 5-253101-A-49.wav,5,49,hand_saw,False,253101,A
364 | 5-253101-B-49.wav,5,49,hand_saw,False,253101,B
365 | 5-253101-C-49.wav,5,49,hand_saw,False,253101,C
366 | 5-253534-A-26.wav,5,26,laughing,False,253534,A
367 | 5-254160-A-22.wav,5,22,clapping,False,254160,A
368 | 5-254832-A-15.wav,5,15,water_drops,False,254832,A
369 | 5-254832-B-15.wav,5,15,water_drops,False,254832,B
370 | 5-256452-A-5.wav,5,5,cat,False,256452,A
371 | 5-256512-A-30.wav,5,30,door_wood_knock,False,256512,A
372 | 5-257349-A-15.wav,5,15,water_drops,False,257349,A
373 | 5-257642-A-39.wav,5,39,glass_breaking,False,257642,A
374 | 5-257839-A-14.wav,5,14,chirping_birds,False,257839,A
375 | 5-259169-A-5.wav,5,5,cat,False,259169,A
376 | 5-259180-A-15.wav,5,15,water_drops,False,259180,A
377 | 5-259514-A-26.wav,5,26,laughing,False,259514,A
378 | 5-259640-A-29.wav,5,29,drinking_sipping,False,259640,A
379 | 5-260011-A-34.wav,5,34,can_opening,False,260011,A
380 | 5-260164-A-23.wav,5,23,breathing,False,260164,A
381 | 5-260432-A-39.wav,5,39,glass_breaking,False,260432,A
382 | 5-260433-A-39.wav,5,39,glass_breaking,False,260433,A
383 | 5-260434-A-39.wav,5,39,glass_breaking,False,260434,A
384 | 5-260875-A-35.wav,5,35,washing_machine,False,260875,A
385 | 5-261325-A-9.wav,5,9,crow,False,261325,A
386 | 5-261433-A-15.wav,5,15,water_drops,False,261433,A
387 | 5-261439-A-15.wav,5,15,water_drops,False,261439,A
388 | 5-261464-A-23.wav,5,23,breathing,False,261464,A
389 | 5-262641-A-15.wav,5,15,water_drops,False,262641,A
390 | 5-262957-A-22.wav,5,22,clapping,False,262957,A
391 | 5-263490-A-25.wav,5,25,footsteps,False,263490,A
392 | 5-263491-A-25.wav,5,25,footsteps,False,263491,A
393 | 5-263501-A-25.wav,5,25,footsteps,False,263501,A
394 | 5-263775-A-26.wav,5,26,laughing,False,263775,A
395 | 5-263775-B-26.wav,5,26,laughing,False,263775,B
396 | 5-263831-A-6.wav,5,6,hen,False,263831,A
397 | 5-263831-B-6.wav,5,6,hen,False,263831,B
398 | 5-263902-A-36.wav,5,36,vacuum_cleaner,False,263902,A
399 | 5-51149-A-25.wav,5,25,footsteps,False,51149,A
400 | 5-61635-A-8.wav,5,8,sheep,False,61635,A
401 | 5-9032-A-0.wav,5,0,dog,True,9032,A
402 | 


--------------------------------------------------------------------------------