├── fig ├── readme.md └── x1.png ├── util_esc50 ├── readme.md ├── SpecAugment │ ├── __init__.py │ ├── readme.md │ ├── spec_augment_pytorch.py │ ├── spec_augment_tensorflow.py │ └── sparse_image_warp_pytorch.py ├── __pycache__ │ ├── config.cpython-37.pyc │ └── utils.cpython-37.pyc ├── losses.py ├── cross_folds.py ├── config.py ├── pytorch_utils.py ├── evaluate.py ├── utils.py ├── data_generator.py ├── test.py ├── feature.py ├── main.py ├── audio.py ├── net.py ├── fold1_test.csv ├── fold2_test.csv ├── fold3_test.csv ├── fold4_test.csv └── fold5_test.csv ├── workspace └── readme.md ├── wrong_list └── readme.md ├── runme.sh └── README.md /fig/readme.md: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /util_esc50/readme.md: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /workspace/readme.md: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /wrong_list/readme.md: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /util_esc50/SpecAugment/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /util_esc50/SpecAugment/readme.md: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /fig/x1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hadryan/TFNet-for-Environmental-Sound-Classification/HEAD/fig/x1.png -------------------------------------------------------------------------------- /util_esc50/__pycache__/config.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hadryan/TFNet-for-Environmental-Sound-Classification/HEAD/util_esc50/__pycache__/config.cpython-37.pyc -------------------------------------------------------------------------------- /util_esc50/__pycache__/utils.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Hadryan/TFNet-for-Environmental-Sound-Classification/HEAD/util_esc50/__pycache__/utils.cpython-37.pyc -------------------------------------------------------------------------------- /util_esc50/losses.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn.functional as F 3 | 4 | 5 | def nll_loss(output, target): 6 | '''Negative likelihood loss. The output should be obtained using F.log_softmax(x). 7 | 8 | Args: 9 | output: (N, classes_num) 10 | target: (N, classes_num) 11 | ''' 12 | loss = - torch.mean(target * output) 13 | return loss -------------------------------------------------------------------------------- /runme.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # You need to modify this path to your downloaded dataset directory 3 | # Download ESC-10 and ESC-50 : https://github.com/karolpiczak/ESC-50 4 | # Download UrbanSound8k : https://urbansounddataset.weebly.com/urbansound8k.html 5 | DATASET_DIR='/.../ESC-50' 6 | #DATASET_DIR='/.../ESC-10' 7 | #DATASET_DIR='/.../UrbanSound8k' 8 | 9 | # You need to modify this path to your workspace to store features and models 10 | WORKSPACE='/workspace' 11 | 12 | # Hyper-parameters 13 | GPU_ID=0 14 | MODEL_TYPE='TFNet' 15 | BATCH_SIZE=32 16 | 17 | ############ Train and test on ESC50 dataset ############ 18 | # Calculate feature 19 | python util_esc50/feature.py calculate_feature_for_all_audio_files --dataset_dir=$DATASET_DIR --workspace=$WORKSPACE 20 | 21 | # Train and test 22 | CUDA_VISIBLE_DEVICES=$GPU_ID python util/main.py train --dataset_dir=$DATASET_DIR --workspace=$WORKSPACE --holdout_fold=1 --model_type=$MODEL_TYPE --batch_size=$BATCH_SIZE --cuda 23 | -------------------------------------------------------------------------------- /util_esc50/cross_folds.py: -------------------------------------------------------------------------------- 1 | # Cross-fold lists generator for ESC-50 Dataset. 2 | 3 | import pandas as pd 4 | 5 | def read_meta(metadata_path): 6 | df = pd.read_csv(metadata_path, sep=',') 7 | return df 8 | 9 | def make_folds(df, test_fold, train_fold): 10 | test_path = 'fold'+ str(test_fold)+ '_test.csv' 11 | train_path = 'fold'+ str(test_fold)+ '_train.csv' 12 | data = df 13 | data_test = data[data['fold'].isin([test_fold])] 14 | fp_test = open(test_path, 'w') 15 | fp_test.write(data_test.to_csv(header=True, index=False)) 16 | 17 | data_train = data[data['fold'].isin(train_fold)] 18 | fp_train = open(train_path, 'w') 19 | fp_train.write(data_train.to_csv(header=True, index=False)) 20 | 21 | 22 | if __name__ == '__main__': 23 | # You need to modify this path to your downloaded dataset directory 24 | metadata_path = '/.../ESC-50/meta/esc50.csv' 25 | df = read_meta(metadata_path) 26 | make_folds(df, 1, [2,3,4,5]) 27 | make_folds(df, 2, [1,3,4,5]) 28 | make_folds(df, 3, [1,2,4,5]) 29 | make_folds(df, 4, [1,2,3,5]) 30 | make_folds(df, 5, [1,2,3,4]) -------------------------------------------------------------------------------- /util_esc50/config.py: -------------------------------------------------------------------------------- 1 | sample_rate = 44100 2 | window_size = 1764 3 | hop_size = 882 4 | mel_bins = 40 5 | fmin = 50 # Hz 6 | fmax = int(sample_rate/4) # Hz 7 | 8 | frames_per_second = sample_rate // hop_size 9 | audio_duration = 5 # Audio recordings in ESC-50 are all 5 seconds 10 | total_samples = sample_rate * audio_duration 11 | audio_duration_clip = 5 12 | audio_stride_clip = 1 13 | total_frames = frames_per_second * audio_duration 14 | frames_num_clip = int(frames_per_second * audio_duration_clip) 15 | total_samples_clip = int(sample_rate * audio_duration_clip) 16 | frames_num = frames_per_second * audio_duration_clip 17 | audio_num = (audio_duration-audio_duration_clip)//audio_stride_clip + 1 18 | labels = [ 'dog', 'rooster', 'pig', 'cow', 'frog', 'cat', 'hen', 'insects', 19 | 'sheep', 'crow', 'rain', 'sea_waves', 'crackling_fire', 'crickets', 20 | 'chirping_birds', 'water_drops', 'wind', 'pouring_water', 'toilet_flush', 21 | 'thunderstorm', 'crying_baby', 'sneezing', 'clapping', 'breathing', 'coughing', 22 | 'footsteps', 'laughing', 'brushing_teeth', 'snoring', 'drinking_sipping', 23 | 'door_wood_knock', 'mouse_click', 'keyboard_typing', 'door_wood_creaks', 24 | 'can_opening', 'washing_machine', 'vacuum_cleaner', 'clock_alarm', 25 | 'clock_tick', 'glass_breaking', 'helicopter', 'chainsaw', 'siren', 'car_horn', 26 | 'engine', 'train', 'church_bells', 'airplane', 'fireworks', 'hand_saw'] 27 | classes_num = len(labels) 28 | lb_to_idx = {lb: idx for idx, lb in enumerate(labels)} 29 | idx_to_lb = {idx: lb for idx, lb in enumerate(labels)} 30 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # TFNet-for-Environmental-Sound-Classification 2 | 3 | by *Helin Wang*, *Yuexian Zou*, *Dading Chong*, *Wenwu Wang* 4 | 5 | ## Abstract 6 | Convolutional neural networks (CNN) are one of the best-performing neural network architectures for environmental sound classification (ESC). Recently, attention mechanisms have been used in CNN to capture the useful information from the audio signal for sound classification, especially for weakly labelled data where the timing information about the acoustic events is not available in the training data, apart from the availability of sound class labels. In these methods, however, the inherent time-frequency characteristics and variations are not explicitly exploited when obtaining the deep features. In this paper, we propose a new method, called time-frequency enhancement block (TFBlock), which temporal attention and frequency attention are employed to enhance the features from relevant frames and frequency bands. Compared with other attention mechanisms, in our method, parallel branches are constructed which allow the temporal and frequency features to be attended respectively in order to mitigate interference from the sections where no sound events happened in the acoustic environments. The experiments on three benchmark ESC datasets show that our method improves the classification performance and also exhibits robustness to noise. 7 | 8 | ## Introduction 9 | This repository is for the ICME 2020 paper (submitted), '[Learning discriminative and robust time-frequency representations for environmental sound classification](https://arxiv.org/pdf/1912.06808.pdf)'. 10 | 11 | ## Our network 12 | 13 | TFNet 14 | -------------------------------------------------------------------------------- /util_esc50/pytorch_utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | import config 4 | 5 | def move_data_to_gpu(x, cuda): 6 | if 'float' in str(x.dtype): 7 | x = torch.Tensor(x) 8 | elif 'int' in str(x.dtype): 9 | x = torch.LongTensor(x) 10 | else: 11 | raise Exception("Error!") 12 | 13 | if cuda: 14 | x = x.cuda() 15 | 16 | return x 17 | 18 | 19 | def append_to_dict(dict, key, value): 20 | if key in dict.keys(): 21 | dict[key].append(value) 22 | else: 23 | dict[key] = [value] 24 | 25 | 26 | def forward(model, generate_func, cuda, return_input=False, 27 | return_target=False): 28 | '''Forward data to model in mini-batch. 29 | 30 | Args: 31 | model: object 32 | generate_func: function 33 | cuda: bool 34 | return_input: bool 35 | return_target: bool 36 | max_validate_num: None | int, maximum mini-batch to forward to speed up validation 37 | ''' 38 | output_dict = {} 39 | audio_num = config.audio_num 40 | # Evaluate on mini-batch 41 | for batch_data_dict in generate_func: 42 | batch_output=[] 43 | for i in range(audio_num): 44 | # Predict 45 | batch_feature = move_data_to_gpu(batch_data_dict['feature'][:, i, :, :, :], cuda) 46 | 47 | with torch.no_grad(): 48 | model.eval() 49 | batch_outputx = model(batch_feature) 50 | if i == 0: 51 | batch_output = batch_outputx 52 | else: 53 | batch_output = batch_output + batch_outputx 54 | 55 | append_to_dict(output_dict, 'filename', batch_data_dict['filename']) 56 | 57 | append_to_dict(output_dict, 'output', batch_output.data.cpu().numpy()) 58 | 59 | if return_input: 60 | append_to_dict(output_dict, 'feature', batch_data_dict['feature']) 61 | 62 | if return_target: 63 | if 'target' in batch_data_dict.keys(): 64 | append_to_dict(output_dict, 'target', batch_data_dict['target']) 65 | 66 | for key in output_dict.keys(): 67 | output_dict[key] = np.concatenate(output_dict[key], axis=0) 68 | 69 | return output_dict 70 | 71 | -------------------------------------------------------------------------------- /util_esc50/evaluate.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import numpy as np 4 | import time 5 | import logging 6 | import matplotlib.pyplot as plt 7 | from sklearn import metrics 8 | import datetime 9 | import _pickle as cPickle 10 | import sed_eval 11 | from utils import get_filename, inverse_scale 12 | from pytorch_utils import forward 13 | import config 14 | 15 | 16 | class Evaluator(object): 17 | def __init__(self, model, data_generator, cuda=True): 18 | '''Evaluator to evaluate prediction performance. 19 | 20 | Args: 21 | model: object 22 | data_generator: object 23 | cuda: bool 24 | ''' 25 | 26 | self.model = model 27 | self.data_generator = data_generator 28 | self.cuda = cuda 29 | 30 | self.frames_per_second = config.frames_per_second 31 | self.labels = config.labels 32 | self.in_domain_classes_num = len(config.labels) - 1 33 | self.all_classes_num = len(config.labels) 34 | self.idx_to_lb = config.idx_to_lb 35 | self.lb_to_idx = config.lb_to_idx 36 | 37 | def evaluate(self, data_type, iteration, max_iteration=None, verbose=False): 38 | '''Evaluate the performance. 39 | 40 | Args: 41 | data_type: 'train' | 'validate' 42 | max_iteration: None | int, maximum iteration to run to speed up evaluation 43 | verbose: bool 44 | ''' 45 | 46 | generate_func = self.data_generator.generate_validate( 47 | data_type=data_type, 48 | max_iteration=max_iteration) 49 | 50 | # Forward 51 | output_dict = forward( 52 | model=self.model, 53 | generate_func=generate_func, 54 | cuda=self.cuda, 55 | return_target=True) 56 | file = 'wrong_list/'+ 'wrong_classification_' + str(iteration) 57 | output = output_dict['output'] # (audios_num, in_domain_classes_num) 58 | target = output_dict['target'] # (audios_num, in_domain_classes_num) 59 | filename = output_dict['filename'] 60 | 61 | prob = np.exp(output) # Subtask a, b use log softmax as output 62 | 63 | 64 | # Evaluate 65 | y_true = np.argmax(target, axis=-1) 66 | y_pred = np.argmax(prob, axis=-1) 67 | # print(y_pred) 68 | if data_type=='validate': 69 | for i in range(len(y_true)): 70 | if y_true[i] != y_pred[i]: 71 | with open(file,'a') as f: 72 | audioname = filename[i] 73 | true_idx = str(y_true[i]) 74 | pred_idx = str(y_pred[i]) 75 | true_label = self.idx_to_lb[y_true[i]] 76 | pred_label = self.idx_to_lb[y_pred[i]] 77 | f.write(audioname+'\t'+true_idx+'\t'+true_label+'\t'+pred_idx+'\t'+pred_label+'\n') 78 | 79 | 80 | confusion_matrix = metrics.confusion_matrix(y_true, y_pred, labels=np.arange(self.in_domain_classes_num)) 81 | 82 | classwise_accuracy = np.diag(confusion_matrix) \ 83 | / np.sum(confusion_matrix, axis=-1) 84 | 85 | logging.info('Data type: {}'.format(data_type)) 86 | 87 | 88 | logging.info(' Average ccuracy: {:.3f}'.format(np.mean(classwise_accuracy))) 89 | 90 | if verbose: 91 | classes_num = len(classwise_accuracy) 92 | for n in range(classes_num): 93 | logging.info('{:<20}{:.3f}'.format(self.labels[n], 94 | classwise_accuracy[n])) 95 | 96 | logging.info(confusion_matrix) 97 | 98 | statistics = { 99 | 'accuracy': classwise_accuracy, 100 | 'confusion_matrix': confusion_matrix} 101 | 102 | return statistics 103 | 104 | 105 | 106 | class StatisticsContainer(object): 107 | def __init__(self, statistics_path): 108 | '''Container of statistics during training. 109 | 110 | Args: 111 | statistics_path: string, path to write out 112 | ''' 113 | self.statistics_path = statistics_path 114 | 115 | self.backup_statistics_path = '{}_{}.pickle'.format( 116 | os.path.splitext(self.statistics_path)[0], 117 | datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S')) 118 | 119 | self.statistics_dict = {'data': []} 120 | 121 | def append_and_dump(self, iteration, statistics): 122 | '''Append statistics to container and dump the container. 123 | 124 | Args: 125 | iteration: int 126 | statistics: dict of statistics 127 | ''' 128 | statistics['iteration'] = iteration 129 | self.statistics_dict['data'].append(statistics) 130 | 131 | cPickle.dump(self.statistics_dict, open(self.statistics_path, 'wb')) 132 | cPickle.dump(self.statistics_dict, open(self.backup_statistics_path, 'wb')) 133 | logging.info(' Dump statistics to {}'.format(self.statistics_path)) -------------------------------------------------------------------------------- /util_esc50/SpecAugment/spec_augment_pytorch.py: -------------------------------------------------------------------------------- 1 | # # Reference : https://github.com/DemisEom/SpecAugment 2 | # Copyright 2019 RnD at Spoon Radio 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # ============================================================================== 16 | """SpecAugment Implementation for Tensorflow. 17 | Related paper : https://arxiv.org/pdf/1904.08779.pdf 18 | 19 | In this paper, show summarized parameters by each open datasets in Tabel 1. 20 | ----------------------------------------- 21 | Policy | W | F | m_F | T | p | m_T 22 | ----------------------------------------- 23 | None | 0 | 0 | - | 0 | - | - 24 | ----------------------------------------- 25 | LB | 80 | 27 | 1 | 100 | 1.0 | 1 26 | ----------------------------------------- 27 | LD | 80 | 27 | 2 | 100 | 1.0 | 2 28 | ----------------------------------------- 29 | SM | 40 | 15 | 2 | 70 | 0.2 | 2 30 | ----------------------------------------- 31 | SS | 40 | 27 | 2 | 70 | 0.2 | 2 32 | ----------------------------------------- 33 | LB : LibriSpeech basic 34 | LD : LibriSpeech double 35 | SM : Switchboard mild 36 | SS : Switchboard strong 37 | """ 38 | 39 | import librosa 40 | import librosa.display 41 | import math 42 | import numpy as np 43 | import random 44 | import matplotlib 45 | # matplotlib.use('TkAgg') 46 | import matplotlib.pyplot as plt 47 | 48 | 49 | def spec_augment(mel_spectrogram, using_time_warping=False, using_frequency_masking=False, using_time_masking=False, 50 | frequency_masking_para=4, time_masking_para=4, 51 | frequency_mask_num=2, time_mask_num=2): 52 | """Spec augmentation Calculation Function. 53 | 54 | 'SpecAugment' have 3 steps for audio data augmentation. 55 | first step is time warping using Tensorflow's image_sparse_warp function. 56 | Second step is frequency masking, last step is time masking. 57 | 58 | # Arguments: 59 | mel_spectrogram(numpy array): audio file path of you want to warping and masking. 60 | time_warping_para(float): Augmentation parameter, "time warp parameter W". 61 | If none, default = 80 for LibriSpeech. 62 | frequency_masking_para(float): Augmentation parameter, "frequency mask parameter F" 63 | If none, default = 100 for LibriSpeech. 64 | time_masking_para(float): Augmentation parameter, "time mask parameter T" 65 | If none, default = 27 for LibriSpeech. 66 | frequency_mask_num(float): number of frequency masking lines, "m_F". 67 | If none, default = 1 for LibriSpeech. 68 | time_mask_num(float): number of time masking lines, "m_T". 69 | If none, default = 1 for LibriSpeech. 70 | 71 | # Returns 72 | mel_spectrogram(numpy array): warped and masked mel spectrogram. 73 | """ 74 | # mel_spectrogram: 75 | # (batch_size, times_steps, freq_bins) 76 | # v : freq_bins 77 | v = mel_spectrogram.shape[3] 78 | # tau : times_steps 79 | tau = mel_spectrogram.shape[2] 80 | num = mel_spectrogram.shape[0] 81 | 82 | 83 | warped_mel_spectrogram = mel_spectrogram 84 | # Step 1 : Time warping (TO DO...) 85 | if using_time_warping: 86 | for n in range(num): 87 | for i in range(tau): 88 | for j in range(v): 89 | offset_x = random.randint(0, i-1) 90 | warped_mel_spectrogram[n, :, i, j] = mel_spectrogram[n, :, (i + offset_x) % tau, j] 91 | 92 | 93 | # Step 2 : Frequency masking 94 | if using_frequency_masking: 95 | for n in range(num): 96 | for i in range(frequency_mask_num): 97 | f = np.random.uniform(low=0.0, high=frequency_masking_para) 98 | f = int(f) 99 | f0 = random.randint(0, v - f) 100 | warped_mel_spectrogram[n, :, :, f0:f0 + f] = 0 101 | 102 | # Step 3 : Time masking 103 | if using_time_masking: 104 | for n in range(num): 105 | for i in range(time_mask_num): 106 | t = np.random.uniform(low=0.0, high=time_masking_para) 107 | t = int(t) 108 | t0 = random.randint(0, tau - t) 109 | warped_mel_spectrogram[n, :, t0:t0 + t, :] = 0 110 | 111 | return warped_mel_spectrogram 112 | 113 | 114 | def visualization_spectrogram(mel_spectrogram, title): 115 | """visualizing result of SpecAugment 116 | 117 | # Arguments: 118 | mel_spectrogram(ndarray): mel_spectrogram to visualize. 119 | title(String): plot figure's title 120 | """ 121 | # Show mel-spectrogram using librosa's specshow. 122 | plt.figure(figsize=(10, 4)) 123 | librosa.display.specshow(librosa.power_to_db(mel_spectrogram, ref=np.max), y_axis='mel', fmax=14000, x_axis='time') 124 | # plt.colorbar(format='%+2.0f dB') 125 | plt.title(title) 126 | plt.tight_layout() 127 | plt.show() 128 | 129 | -------------------------------------------------------------------------------- /util_esc50/SpecAugment/spec_augment_tensorflow.py: -------------------------------------------------------------------------------- 1 | # Reference : https://github.com/DemisEom/SpecAugment 2 | # Copyright 2019 RnD at Spoon Radio 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # ============================================================================== 16 | """SpecAugment Implementation for Tensorflow. 17 | Related paper : https://arxiv.org/pdf/1904.08779.pdf 18 | 19 | In this paper, show summarized parameters by each open datasets in Tabel 1. 20 | ----------------------------------------- 21 | Policy | W | F | m_F | T | p | m_T 22 | ----------------------------------------- 23 | None | 0 | 0 | - | 0 | - | - 24 | ----------------------------------------- 25 | LB | 80 | 27 | 1 | 100 | 1.0 | 1 26 | ----------------------------------------- 27 | LD | 80 | 27 | 2 | 100 | 1.0 | 2 28 | ----------------------------------------- 29 | SM | 40 | 15 | 2 | 70 | 0.2 | 2 30 | ----------------------------------------- 31 | SS | 40 | 27 | 2 | 70 | 0.2 | 2 32 | ----------------------------------------- 33 | LB : LibriSpeech basic 34 | LD : LibriSpeech double 35 | SM : Switchboard mild 36 | SS : Switchboard strong 37 | """ 38 | 39 | import librosa 40 | import librosa.display 41 | import tensorflow as tf 42 | from tensorflow.contrib.image import sparse_image_warp 43 | import numpy as np 44 | import random 45 | import matplotlib 46 | # matplotlib.use('TkAgg') 47 | import matplotlib.pyplot as plt 48 | 49 | 50 | def spec_augment(mel_spectrogram, time_warping_para=80, frequency_masking_para=27, 51 | time_masking_para=100, frequency_mask_num=1, time_mask_num=1): 52 | """Spec augmentation Calculation Function. 53 | 54 | 'SpecAugment' have 3 steps for audio data augmentation. 55 | first step is time warping using Tensorflow's image_sparse_warp function. 56 | Second step is frequency masking, last step is time masking. 57 | 58 | # Arguments: 59 | mel_spectrogram(numpy array): audio file path of you want to warping and masking. 60 | time_warping_para(float): Augmentation parameter, "time warp parameter W". 61 | If none, default = 80 for LibriSpeech. 62 | frequency_masking_para(float): Augmentation parameter, "frequency mask parameter F" 63 | If none, default = 100 for LibriSpeech. 64 | time_masking_para(float): Augmentation parameter, "time mask parameter T" 65 | If none, default = 27 for LibriSpeech. 66 | frequency_mask_num(float): number of frequency masking lines, "m_F". 67 | If none, default = 1 for LibriSpeech. 68 | time_mask_num(float): number of time masking lines, "m_T". 69 | If none, default = 1 for LibriSpeech. 70 | 71 | # Returns 72 | mel_spectrogram(numpy array): warped and masked mel spectrogram. 73 | """ 74 | v = mel_spectrogram.shape[0] 75 | tau = mel_spectrogram.shape[1] 76 | 77 | # Step 1 : Time warping 78 | # Image warping control point setting. 79 | mel_spectrogram_holder = tf.placeholder(tf.float32, shape=[1, v, tau, 1]) 80 | location_holder = tf.placeholder(tf.float32, shape=[1, 1, 2]) 81 | destination_holder = tf.placeholder(tf.float32, shape=[1, 1, 2]) 82 | 83 | center_position = v/2 84 | random_point = np.random.randint(low=time_warping_para, high=tau - time_warping_para) 85 | # warping distance chose. 86 | w = np.random.uniform(low=0, high=time_warping_para) 87 | 88 | control_point_locations = [[center_position, random_point]] 89 | control_point_locations = np.float32(np.expand_dims(control_point_locations, 0)) 90 | 91 | control_point_destination = [[center_position, random_point + w]] 92 | control_point_destination = np.float32(np.expand_dims(control_point_destination, 0)) 93 | 94 | # mel spectrogram data type convert to tensor constant for sparse_image_warp. 95 | mel_spectrogram = mel_spectrogram.reshape([1, mel_spectrogram.shape[0], mel_spectrogram.shape[1], 1]) 96 | mel_spectrogram = np.float32(mel_spectrogram) 97 | 98 | warped_mel_spectrogram_op, _ = sparse_image_warp(mel_spectrogram_holder, 99 | source_control_point_locations=location_holder, 100 | dest_control_point_locations=destination_holder, 101 | interpolation_order=2, 102 | regularization_weight=0, 103 | num_boundary_points=1 104 | ) 105 | 106 | # Change warp result's data type to numpy array for masking step. 107 | feed_dict = {mel_spectrogram_holder:mel_spectrogram, 108 | location_holder:control_point_locations, 109 | destination_holder:control_point_destination} 110 | 111 | with tf.Session() as sess: 112 | warped_mel_spectrogram = sess.run(warped_mel_spectrogram_op, feed_dict=feed_dict) 113 | 114 | warped_mel_spectrogram = warped_mel_spectrogram.reshape([warped_mel_spectrogram.shape[1], 115 | warped_mel_spectrogram.shape[2]]) 116 | 117 | # Step 2 : Frequency masking 118 | for i in range(frequency_mask_num): 119 | f = np.random.uniform(low=0.0, high=frequency_masking_para) 120 | f = int(f) 121 | f0 = random.randint(0, v - f) 122 | warped_mel_spectrogram[f0:f0 + f, :] = 0 123 | 124 | # Step 3 : Time masking 125 | for i in range(time_mask_num): 126 | t = np.random.uniform(low=0.0, high=time_masking_para) 127 | t = int(t) 128 | t0 = random.randint(0, tau - t) 129 | warped_mel_spectrogram[:, t0:t0 + t] = 0 130 | 131 | return warped_mel_spectrogram 132 | 133 | 134 | def visualization_spectrogram(mel_spectrogram, title): 135 | """visualizing result of SpecAugment 136 | 137 | # Arguments: 138 | mel_spectrogram(ndarray): mel_spectrogram to visualize. 139 | title(String): plot figure's title 140 | """ 141 | # Show mel-spectrogram using librosa's specshow. 142 | plt.figure(figsize=(10, 4)) 143 | librosa.display.specshow(librosa.power_to_db(mel_spectrogram, ref=np.max), y_axis='mel', fmax=8000, x_axis='time') 144 | # plt.colorbar(format='%+2.0f dB') 145 | plt.title(title) 146 | plt.tight_layout() 147 | plt.show() 148 | 149 | -------------------------------------------------------------------------------- /util_esc50/utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import os 3 | import sys 4 | import numpy as np 5 | import soundfile 6 | import librosa 7 | import h5py 8 | import math 9 | import pandas as pd 10 | from sklearn import metrics 11 | import logging 12 | import matplotlib.pyplot as plt 13 | import config 14 | 15 | def mixup_data(x, y, alpha=0.2): 16 | 17 | '''Compute the mixup data. Return mixed inputs, pairs of targets, and lambda''' 18 | if alpha > 0.: 19 | while True: 20 | lam = np.random.beta(alpha, alpha) 21 | if lam > 0.65 or lam < 0.35 : 22 | break 23 | else: 24 | lam = 1. 25 | batch_size = x.size()[0] 26 | index = torch.randperm(batch_size).cuda() 27 | mixed_x = lam * x + (1 - lam) * x[index, :] 28 | y_a, y_b = y, y[index] 29 | return mixed_x, y_a, y_b, lam 30 | 31 | def mixup_criterion(class_criterion, pred, y_a, y_b, lam): 32 | return lam * class_criterion(pred, y_a) + (1 - lam) * class_criterion(pred, y_b) 33 | def create_folder(fd): 34 | if not os.path.exists(fd): 35 | os.makedirs(fd) 36 | 37 | def get_filename(path): 38 | path = os.path.realpath(path) 39 | name_ext = path.split('/')[-1] 40 | name = os.path.splitext(name_ext)[0] 41 | return name 42 | 43 | 44 | def create_logging(log_dir, filemode): 45 | create_folder(log_dir) 46 | i1 = 0 47 | 48 | while os.path.isfile(os.path.join(log_dir, '{:04d}.log'.format(i1))): 49 | i1 += 1 50 | 51 | log_path = os.path.join(log_dir, '{:04d}.log'.format(i1)) 52 | logging.basicConfig( 53 | level=logging.DEBUG, 54 | format='%(asctime)s %(filename)s[line:%(lineno)d] %(levelname)s %(message)s', 55 | datefmt='%a, %d %b %Y %H:%M:%S', 56 | filename=log_path, 57 | filemode=filemode) 58 | 59 | # Print to console 60 | console = logging.StreamHandler() 61 | console.setLevel(logging.INFO) 62 | formatter = logging.Formatter('%(name)-12s: %(levelname)-8s %(message)s') 63 | console.setFormatter(formatter) 64 | logging.getLogger('').addHandler(console) 65 | 66 | return logging 67 | 68 | def data_pre(audio, audio_length, fs, audio_skip): 69 | stride = int(audio_skip * fs /2) 70 | loop = int((audio_length * fs) // stride - 1) 71 | area = 0 72 | maxamp = 0. 73 | i = 0 74 | out = audio 75 | while i < loop: 76 | win_data = out[i*stride: (i+2)*stride] 77 | maxamp = np.max(np.abs(win_data)) 78 | if maxamp < 0.005: 79 | loop = loop - 2 80 | out[i*stride: (loop+1)*stride] = out[(i+2)*stride: (loop+3)*stride] 81 | else: 82 | i = i + 1 83 | length = (audio_length * fs) // stride - loop - 1 84 | if length == 0: 85 | return out 86 | else: 87 | out[(loop + 1) * stride:(audio_length * fs // stride) * stride] = out[0:length * stride] 88 | if length < (audio_length * fs//stride)/2: 89 | out[(loop+1)*stride:(audio_length * fs//stride)*stride] = out[0:length*stride] 90 | return out 91 | else: 92 | out[(loop + 1) * stride:(loop + 1)*2 * stride] = out[0:(loop + 1) * stride] 93 | return data_pre(out, audio_length, fs, audio_skip) 94 | 95 | def read_audio(audio_path, target_fs=None): 96 | (audio, fs) = soundfile.read(audio_path) 97 | # audio = data_pre(audio=audio, audio_length=5, fs=fs, audio_skip=0.1) 98 | if audio.ndim > 1: 99 | audio = np.mean(audio, axis=1) 100 | 101 | if target_fs is not None and fs != target_fs: 102 | audio = librosa.resample(audio, orig_sr=fs, target_sr=target_fs) 103 | fs = target_fs 104 | 105 | return audio, fs 106 | def read_audio_1D(audio_path, target_fs=None): 107 | (audio, fs) = soundfile.read(audio_path) 108 | audio = data_pre(audio=audio, audio_length=5, fs=fs, audio_skip=0.1) 109 | if audio.ndim > 1: 110 | audio = np.mean(audio, axis=1) 111 | 112 | if target_fs is not None and fs != target_fs: 113 | audio = librosa.resample(audio, orig_sr=fs, target_sr=target_fs) 114 | fs = target_fs 115 | 116 | return audio, fs 117 | 118 | 119 | def read_left_audio(audio_path, target_fs=None): 120 | (audio, fs) = soundfile.read(audio_path) 121 | 122 | if audio.ndim > 1: 123 | audio = audio[0] 124 | 125 | if target_fs is not None and fs != target_fs: 126 | audio = librosa.resample(audio, orig_sr=fs, target_sr=target_fs) 127 | fs = target_fs 128 | 129 | return audio, fs 130 | 131 | def read_side_audio(audio_path, target_fs=None): 132 | (audio, fs) = soundfile.read(audio_path) 133 | 134 | if audio.ndim > 1: 135 | audio = audio[0]-audio[1] 136 | 137 | if target_fs is not None and fs != target_fs: 138 | audio = librosa.resample(audio, orig_sr=fs, target_sr=target_fs) 139 | fs = target_fs 140 | 141 | return audio, fs 142 | 143 | def read_right_audio(audio_path, target_fs=None): 144 | (audio, fs) = soundfile.read(audio_path) 145 | 146 | if audio.ndim > 1: 147 | audio = audio[1] 148 | 149 | if target_fs is not None and fs != target_fs: 150 | audio = librosa.resample(audio, orig_sr=fs, target_sr=target_fs) 151 | fs = target_fs 152 | 153 | return audio, fs 154 | 155 | def pad_truncate_sequence(x, max_len): 156 | # Data length Regularization 157 | if len(x) < max_len: 158 | return np.concatenate((x, np.zeros(max_len - len(x)))) 159 | else: 160 | return x[0 : max_len] 161 | 162 | 163 | 164 | def scale(x, mean, std): 165 | return (x - mean) / std 166 | 167 | 168 | def inverse_scale(x, mean, std): 169 | return x * std + mean 170 | 171 | 172 | 173 | def read_metadata(metadata_path): 174 | '''Read metadata from a csv file. 175 | 176 | Returns: 177 | meta_dict: dict of meta data, e.g.: 178 | {'filename': np.array(['1-100032-A-0.wav', '1-100038-A-14.wav', ...]), 179 | 'fold': np.array([1, 1, ...]), 180 | 'target': np.array([0, 14, ...]), 181 | 'category': np.array(['dog', 'chirping_birds', ...]), 182 | 'esc10': np.array(['True', 'False', ...]), 183 | 'src_file': np.array(['100032', '100038', ...]), 184 | 'take': np.array(['A', 'A', ...]) 185 | } 186 | ''' 187 | 188 | df = pd.read_csv(metadata_path, sep=',') 189 | meta_dict = {} 190 | meta_dict['filename'] = np.array( 191 | [name for name in df['filename'].tolist()]) 192 | 193 | if 'fold' in df.keys(): 194 | meta_dict['fold'] = np.array(df['fold']) 195 | if 'target' in df.keys(): 196 | meta_dict['target'] = np.array(df['target']) 197 | if 'category' in df.keys(): 198 | meta_dict['category'] = np.array(df['category']) 199 | if 'esc10' in df.keys(): 200 | meta_dict['esc10'] = np.array(df['esc10']) 201 | if 'src_file' in df.keys(): 202 | meta_dict['src_file'] = np.array(df['src_file']) 203 | if 'take' in df.keys(): 204 | meta_dict['take'] = np.array(df['take']) 205 | 206 | return meta_dict 207 | 208 | 209 | def sparse_to_categorical(x, n_out): 210 | x = x.astype(int) 211 | shape = x.shape 212 | x = x.flatten() 213 | N = len(x) 214 | x_categ = np.zeros((N,n_out)) 215 | x_categ[np.arange(N), x] = 1 216 | return x_categ.reshape((shape)+(n_out,)) 217 | 218 | 219 | -------------------------------------------------------------------------------- /util_esc50/data_generator.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import h5py 3 | import csv 4 | import time 5 | import logging 6 | import os 7 | import glob 8 | import matplotlib.pyplot as plt 9 | import logging 10 | 11 | from utils import read_metadata, sparse_to_categorical 12 | import config 13 | 14 | 15 | class Base(object): 16 | 17 | def __init__(self): 18 | '''Base class for data generator 19 | ''' 20 | pass 21 | 22 | def load_hdf5(self, hdf5_path): 23 | '''Load hdf5 file. 24 | 25 | Returns: 26 | data_dict: dict of data, e.g.: 27 | {'audio_name': np.array(['a.wav', 'b.wav', ...]), 28 | 'feature': (audios_num, frames_num, mel_bins) 29 | 'target': (audios_num,), 30 | ...} 31 | ''' 32 | data_dict = {} 33 | 34 | with h5py.File(hdf5_path, 'r') as hf: 35 | data_dict['filename'] = np.array( 36 | [filename.decode() for filename in hf['filename'][:]]) 37 | 38 | data_dict['feature'] = hf['feature'][:].astype(np.float32) 39 | 40 | if 'category' in hf.keys(): 41 | data_dict['target'] = np.array( 42 | [self.lb_to_idx[category.decode()] \ 43 | for category in hf['category'][:]]) 44 | 45 | if 'category' in hf.keys(): 46 | data_dict['category'] = np.array( 47 | [category.decode() \ 48 | for category in hf['category'][:]]) 49 | if 'fold' in hf.keys(): 50 | data_dict['fold'] = np.array( 51 | [fold \ 52 | for fold in hf['fold'][:]]) 53 | 54 | return data_dict 55 | 56 | 57 | 58 | class DataGenerator(Base): 59 | 60 | def __init__(self, feature_hdf5_path, train_csv, validate_csv, holdout_fold, batch_size, seed=1234): 61 | '''Data generator for training and validation. 62 | 63 | Args: 64 | feature_hdf5_path: string, path of hdf5 feature file 65 | train_csv: string, path of train csv file 66 | validate_csv: string, path of validate csv file 67 | holdout_fold: set 1 for development and none for training 68 | on all data without validation 69 | scalar: object, containing mean and std value 70 | batch_size: int 71 | seed: int, random seed 72 | ''' 73 | 74 | self.batch_size = batch_size 75 | self.random_state = np.random.RandomState(seed) 76 | 77 | # self.classes_num = classes_num 78 | self.in_domain_classes_num = len(config.labels) 79 | self.all_classes_num = len(config.labels) 80 | self.lb_to_idx = config.lb_to_idx 81 | self.idx_to_lb = config.idx_to_lb 82 | 83 | # Load training data 84 | load_time = time.time() 85 | 86 | self.data_dict = self.load_hdf5(feature_hdf5_path) 87 | 88 | train_meta = read_metadata(train_csv) 89 | validate_meta = read_metadata(validate_csv) 90 | 91 | self.train_audio_indexes = self.get_audio_indexes( 92 | train_meta, self.data_dict, holdout_fold, 'train') 93 | 94 | self.validate_audio_indexes = self.get_audio_indexes( 95 | validate_meta, self.data_dict, holdout_fold, 'validate') 96 | 97 | if holdout_fold == 'none': 98 | self.train_audio_indexes = np.concatenate( 99 | (self.train_audio_indexes, self.validate_audio_indexes), axis=0) 100 | 101 | self.validate_audio_indexes = np.array([]) 102 | 103 | logging.info('Load data time: {:.3f} s'.format(time.time() - load_time)) 104 | logging.info('Training audio num: {}'.format(len(self.train_audio_indexes))) 105 | logging.info('Validation audio num: {}'.format(len(self.validate_audio_indexes))) 106 | 107 | self.random_state.shuffle(self.train_audio_indexes) 108 | self.pointer = 0 109 | 110 | def get_audio_indexes(self, meta_data, data_dict, holdout_fold, data_type): 111 | '''Get train or validate indexes. 112 | ''' 113 | audio_indexes = [] 114 | 115 | for name in meta_data['filename']: 116 | loct = np.argwhere(data_dict['filename'] == name) 117 | 118 | if len(loct) > 0: 119 | index = loct[0, 0] 120 | label = self.idx_to_lb[self.data_dict['target'][index]] 121 | if holdout_fold != 'none': 122 | if data_type == 'train': 123 | audio_indexes.append(index) 124 | elif data_type == 'validate': 125 | audio_indexes.append(index) 126 | else: 127 | audio_indexes.append(index) 128 | 129 | return np.array(audio_indexes) 130 | 131 | def generate_train(self): 132 | '''Generate mini-batch data for training. 133 | 134 | Returns: 135 | batch_data_dict: dict containing audio_name, feature and target 136 | ''' 137 | 138 | while True: 139 | # Reset pointer 140 | if self.pointer >= len(self.train_audio_indexes): 141 | self.pointer = 0 142 | self.random_state.shuffle(self.train_audio_indexes) 143 | 144 | # Get batch audio_indexes 145 | batch_audio_indexes = self.train_audio_indexes[ 146 | self.pointer: self.pointer + self.batch_size] 147 | 148 | self.pointer += self.batch_size 149 | 150 | batch_data_dict = {} 151 | 152 | batch_data_dict['filename'] = \ 153 | self.data_dict['filename'][batch_audio_indexes] 154 | 155 | batch_feature = self.data_dict['feature'][batch_audio_indexes] 156 | batch_data_dict['feature'] = batch_feature 157 | 158 | sparse_target = self.data_dict['target'][batch_audio_indexes] 159 | batch_data_dict['target'] = sparse_to_categorical( 160 | sparse_target, self.in_domain_classes_num) 161 | 162 | yield batch_data_dict 163 | 164 | 165 | def generate_validate(self, data_type, max_iteration=None): 166 | '''Generate mini-batch data for training. 167 | 168 | Args: 169 | data_type: 'train' | 'validate' 170 | max_iteration: int, maximum iteration to validate to speed up validation 171 | 172 | Returns: 173 | batch_data_dict: dict containing audio_name, feature and target 174 | ''' 175 | 176 | batch_size = self.batch_size 177 | 178 | if data_type == 'train': 179 | audio_indexes = np.array(self.train_audio_indexes) 180 | elif data_type == 'validate': 181 | audio_indexes = np.array(self.validate_audio_indexes) 182 | else: 183 | raise Exception('Incorrect argument!') 184 | 185 | 186 | iteration = 0 187 | pointer = 0 188 | 189 | while True: 190 | if iteration == max_iteration: 191 | break 192 | 193 | # Reset pointer 194 | if pointer >= len(audio_indexes): 195 | break 196 | 197 | # Get batch audio_indexes 198 | batch_audio_indexes = audio_indexes[pointer: pointer + batch_size] 199 | pointer += batch_size 200 | iteration += 1 201 | 202 | batch_data_dict = {} 203 | 204 | batch_data_dict['filename'] = \ 205 | self.data_dict['filename'][batch_audio_indexes] 206 | 207 | batch_feature = self.data_dict['feature'][batch_audio_indexes] 208 | batch_data_dict['feature'] = batch_feature 209 | 210 | sparse_target = self.data_dict['target'][batch_audio_indexes] 211 | batch_data_dict['target'] = sparse_to_categorical( 212 | sparse_target, self.all_classes_num) 213 | 214 | yield batch_data_dict 215 | 216 | -------------------------------------------------------------------------------- /util_esc50/SpecAugment/sparse_image_warp_pytorch.py: -------------------------------------------------------------------------------- 1 | # Reference : https://github.com/DemisEom/SpecAugment 2 | # Copyright 2019 RnD at Spoon Radio 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # ============================================================================== 16 | 17 | import scipy as sp 18 | import numpy as np 19 | 20 | def _get_boundary_locations(image_height, image_width, num_points_per_edge): 21 | """Compute evenly-spaced indices along edge of image.""" 22 | y_range = np.linspace(0, image_height - 1, num_points_per_edge + 2) 23 | x_range = np.linspace(0, image_width - 1, num_points_per_edge + 2) 24 | ys, xs = np.meshgrid(y_range, x_range, indexing='ij') 25 | is_boundary = np.logical_or( 26 | np.logical_or(xs == 0, xs == image_width - 1), 27 | np.logical_or(ys == 0, ys == image_height - 1)) 28 | return np.stack([ys[is_boundary], xs[is_boundary]], axis=-1) 29 | 30 | 31 | def _add_zero_flow_controls_at_boundary(control_point_locations, 32 | control_point_flows, image_height, 33 | image_width, boundary_points_per_edge): 34 | """Add control points for zero-flow boundary conditions. 35 | Augment the set of control points with extra points on the 36 | boundary of the image that have zero flow. 37 | Args: 38 | control_point_locations: input control points 39 | control_point_flows: their flows 40 | image_height: image height 41 | image_width: image width 42 | boundary_points_per_edge: number of points to add in the middle of each 43 | edge (not including the corners). 44 | The total number of points added is 45 | 4 + 4*(boundary_points_per_edge). 46 | Returns: 47 | merged_control_point_locations: augmented set of control point locations 48 | merged_control_point_flows: augmented set of control point flows 49 | """ 50 | 51 | batch_size = tensor_shape.dimension_value(control_point_locations.shape[0]) 52 | 53 | boundary_point_locations = _get_boundary_locations(image_height, image_width, 54 | boundary_points_per_edge) 55 | 56 | boundary_point_flows = np.zeros([boundary_point_locations.shape[0], 2]) 57 | 58 | type_to_use = control_point_locations.dtype 59 | boundary_point_locations = constant_op.constant( 60 | _expand_to_minibatch(boundary_point_locations, batch_size), 61 | dtype=type_to_use) 62 | 63 | boundary_point_flows = constant_op.constant( 64 | _expand_to_minibatch(boundary_point_flows, batch_size), dtype=type_to_use) 65 | 66 | merged_control_point_locations = array_ops.concat( 67 | [control_point_locations, boundary_point_locations], 1) 68 | 69 | merged_control_point_flows = array_ops.concat( 70 | [control_point_flows, boundary_point_flows], 1) 71 | 72 | return merged_control_point_locations, merged_control_point_flows 73 | 74 | 75 | 76 | def _get_grid_locations(image_height, image_width): 77 | """Wrapper for np.meshgrid.""" 78 | 79 | y_range = np.linspace(0, image_height - 1, image_height) 80 | x_range = np.linspace(0, image_width - 1, image_width) 81 | y_grid, x_grid = np.meshgrid(y_range, x_range, indexing='ij') 82 | return np.stack((y_grid, x_grid), -1) 83 | 84 | def _expand_to_minibatch(np_array, batch_size): 85 | """Tile arbitrarily-sized np_array to include new batch dimension.""" 86 | tiles = [batch_size] + [1] * np_array.ndim 87 | return np.tile(np.expand_dims(np_array, 0), tiles) 88 | 89 | 90 | def sparse_image_warp(image, 91 | source_control_point_locations, 92 | dest_control_point_locations, 93 | interpolation_order=2, 94 | regularization_weight=0.0, 95 | num_boundary_points=0): 96 | """Image warping using correspondences between sparse control points. 97 | Apply a non-linear warp to the image, where the warp is specified by 98 | the source and destination locations of a (potentially small) number of 99 | control points. First, we use a polyharmonic spline 100 | (`tf.contrib.image.interpolate_spline`) to interpolate the displacements 101 | between the corresponding control points to a dense flow field. 102 | Then, we warp the image using this dense flow field 103 | (`tf.contrib.image.dense_image_warp`). 104 | Let t index our control points. For regularization_weight=0, we have: 105 | warped_image[b, dest_control_point_locations[b, t, 0], 106 | dest_control_point_locations[b, t, 1], :] = 107 | image[b, source_control_point_locations[b, t, 0], 108 | source_control_point_locations[b, t, 1], :]. 109 | For regularization_weight > 0, this condition is met approximately, since 110 | regularized interpolation trades off smoothness of the interpolant vs. 111 | reconstruction of the interpolant at the control points. 112 | See `tf.contrib.image.interpolate_spline` for further documentation of the 113 | interpolation_order and regularization_weight arguments. 114 | Args: 115 | image: `[batch, height, width, channels]` float `Tensor` 116 | source_control_point_locations: `[batch, num_control_points, 2]` float 117 | `Tensor` 118 | dest_control_point_locations: `[batch, num_control_points, 2]` float 119 | `Tensor` 120 | interpolation_order: polynomial order used by the spline interpolation 121 | regularization_weight: weight on smoothness regularizer in interpolation 122 | num_boundary_points: How many zero-flow boundary points to include at 123 | each image edge.Usage: 124 | num_boundary_points=0: don't add zero-flow points 125 | num_boundary_points=1: 4 corners of the image 126 | num_boundary_points=2: 4 corners and one in the middle of each edge 127 | (8 points total) 128 | num_boundary_points=n: 4 corners and n-1 along each edge 129 | name: A name for the operation (optional). 130 | Note that image and offsets can be of type tf.half, tf.float32, or 131 | tf.float64, and do not necessarily have to be the same type. 132 | Returns: 133 | warped_image: `[batch, height, width, channels]` float `Tensor` with same 134 | type as input image. 135 | flow_field: `[batch, height, width, 2]` float `Tensor` containing the dense 136 | flow field produced by the interpolation. 137 | """ 138 | 139 | image = ops.convert_to_tensor(image) 140 | source_control_point_locations = ops.convert_to_tensor( 141 | source_control_point_locations) 142 | dest_control_point_locations = ops.convert_to_tensor( 143 | dest_control_point_locations) 144 | 145 | control_point_flows = ( 146 | dest_control_point_locations - source_control_point_locations) 147 | 148 | clamp_boundaries = num_boundary_points > 0 149 | boundary_points_per_edge = num_boundary_points - 1 150 | 151 | with ops.name_scope(name): 152 | 153 | batch_size, image_height, image_width, _ = image.get_shape().as_list() 154 | 155 | # This generates the dense locations where the interpolant 156 | # will be evaluated. 157 | grid_locations = _get_grid_locations(image_height, image_width) 158 | 159 | flattened_grid_locations = np.reshape(grid_locations, 160 | [image_height * image_width, 2]) 161 | 162 | flattened_grid_locations = constant_op.constant( 163 | _expand_to_minibatch(flattened_grid_locations, batch_size), image.dtype) 164 | 165 | if clamp_boundaries: 166 | (dest_control_point_locations, 167 | control_point_flows) = _add_zero_flow_controls_at_boundary( 168 | dest_control_point_locations, control_point_flows, image_height, 169 | image_width, boundary_points_per_edge) 170 | 171 | flattened_flows = interpolate_spline.interpolate_spline( 172 | dest_control_point_locations, control_point_flows, 173 | flattened_grid_locations, interpolation_order, regularization_weight) 174 | 175 | dense_flows = array_ops.reshape(flattened_flows, 176 | [batch_size, image_height, image_width, 2]) 177 | 178 | warped_image = dense_image_warp.dense_image_warp(image, dense_flows) 179 | 180 | return warped_image, dense_flows 181 | 182 | -------------------------------------------------------------------------------- /util_esc50/test.py: -------------------------------------------------------------------------------- 1 | # visualize the feature maps 2 | 3 | import torch 4 | import numpy as np 5 | from net import Cnns,Cnns2 6 | from feature import * 7 | from matplotlib import pyplot as plt 8 | from net_vis import Cnns_deconv 9 | def data_pre(audio, audio_length, fs, audio_skip): 10 | stride = int(audio_skip * fs /2) 11 | loop = int((audio_length * fs) // stride - 1) 12 | area = 0 13 | maxamp = 0. 14 | i = 0 15 | out = audio 16 | while i < loop: 17 | win_data = out[i*stride: (i+2)*stride] 18 | maxamp = np.max(np.abs(win_data)) 19 | if maxamp < 0.005: 20 | loop = loop - 2 21 | out[i*stride: (loop+1)*stride] = out[(i+2)*stride: (loop+3)*stride] 22 | else: 23 | i = i + 1 24 | length = (audio_length * fs) // stride - loop - 1 25 | if length == 0: 26 | return out 27 | else: 28 | out[(loop + 1) * stride:(audio_length * fs // stride) * stride] = out[0:length * stride] 29 | if length < (audio_length * fs//stride)/2: 30 | out[(loop+1)*stride:(audio_length * fs//stride)*stride] = out[0:length*stride] 31 | return out 32 | else: 33 | out[(loop + 1) * stride:(loop + 1)*2 * stride] = out[0:(loop + 1) * stride] 34 | return data_pre(out, audio_length, fs, audio_skip) 35 | def add_noise(audio, percent=0.2): 36 | random_values = np.random.randn(len(audio)) 37 | print(np.mean(random_values)) 38 | print(np.abs(np.mean(audio))) 39 | out = audio + percent * random_values 40 | return out 41 | def fre_noise(feature, percent=0.2): 42 | out = feature 43 | for i in range(13): 44 | random_values = np.random.randn(feature.shape[1]) 45 | out[50+i, :] = out[50+i, :] + percent*random_values 46 | return out 47 | def time_noise(feature, percent=0.2): 48 | out = feature 49 | for i in range(5): 50 | random_values = np.random.randn(feature.shape[0]) 51 | out[:, 25+i] = out[:, 25+i] + percent*random_values 52 | return out 53 | 54 | if __name__ == '__main__': 55 | Model = eval('Cnns') 56 | model = Model(50, activation='logsoftmax') 57 | checkpoint_path = '3400_iterations.pth' 58 | checkpoint = torch.load(checkpoint_path) 59 | model.load_state_dict(checkpoint['model']) 60 | params=model.state_dict() 61 | 62 | audio_path = '1-85362-A-0.wav' 63 | 64 | (audio, _) = read_audio(audio_path=audio_path, target_fs=44100) 65 | audio = pad_truncate_sequence(audio, 44100*5) 66 | # audio = add_noise(audio, percent=0.2) 67 | 68 | feature_extractor = LogMelExtractor( 69 | sample_rate=44100, 70 | window_size=1764, 71 | hop_size=882, 72 | mel_bins=40, 73 | fmin=50, 74 | fmax=11025) 75 | feature = feature_extractor.transform(audio) 76 | feature = feature[0 : 250] 77 | feature = time_noise(feature, percent=3) 78 | x = np.transpose(feature, (1, 0)) 79 | plt.imshow(x, cmap = plt.cm.jet) 80 | plt.axis('off') 81 | fig = plt.gcf() 82 | height, width = x.shape 83 | fig.set_size_inches(width/5./8.,height/5./8.) 84 | plt.gca().xaxis.set_major_locator(plt.NullLocator()) 85 | plt.gca().yaxis.set_major_locator(plt.NullLocator()) 86 | plt.subplots_adjust(top = 1, bottom = 0, right = 1, left = 0, hspace = 0, wspace = 0) 87 | plt.margins(0,0) 88 | plt.savefig("ori_conv_noise2.png", dpi=500, pad_inches = 0) 89 | plt.show() 90 | feature = torch.from_numpy(feature[None, :, :]) 91 | # x1 = model.show(feature) 92 | x1, x2, x3, x4, out1, out2, out3, out4, out5, out6, out7= model.show(feature) 93 | 94 | feature = torch.squeeze(out4) 95 | x1 = np.transpose(feature.detach().numpy(), (1, 0)) 96 | plt.imshow(x1, cmap = plt.cm.jet) 97 | plt.axis('off') 98 | fig = plt.gcf() 99 | height, width = x1.shape 100 | fig.set_size_inches(width/5./8.,height/5./8.) 101 | plt.gca().xaxis.set_major_locator(plt.NullLocator()) 102 | plt.gca().yaxis.set_major_locator(plt.NullLocator()) 103 | plt.subplots_adjust(top = 1, bottom = 0, right = 1, left = 0, hspace = 0, wspace = 0) 104 | plt.margins(0,0) 105 | plt.savefig("conv_noise2.png", dpi=500, pad_inches = 0) 106 | plt.show() 107 | 108 | (audio, _) = read_audio(audio_path=audio_path, target_fs=44100) 109 | audio = pad_truncate_sequence(audio, 44100*5) 110 | # audio = add_noise(audio, percent=0.2) 111 | # audio = data_pre(audio=audio, audio_length=5, fs=44100, audio_skip=0.1) 112 | feature_extractor = LogMelExtractor( 113 | sample_rate=44100, 114 | window_size=1764, 115 | hop_size=882, 116 | mel_bins=40, 117 | fmin=50, 118 | fmax=11025) 119 | feature = feature_extractor.transform(audio) 120 | feature = feature[0 : 250] 121 | x = np.transpose(feature, (1, 0)) 122 | plt.imshow(x, cmap = plt.cm.jet) 123 | plt.axis('off') 124 | fig = plt.gcf() 125 | height, width = x.shape 126 | fig.set_size_inches(width/5./8.,height/5./8.) 127 | plt.gca().xaxis.set_major_locator(plt.NullLocator()) 128 | plt.gca().yaxis.set_major_locator(plt.NullLocator()) 129 | plt.subplots_adjust(top = 1, bottom = 0, right = 1, left = 0, hspace = 0, wspace = 0) 130 | plt.margins(0,0) 131 | plt.savefig("ori_conv.png", dpi=500, pad_inches = 0) 132 | plt.show() 133 | feature = torch.from_numpy(feature[None, :, :]) 134 | # x1 = model.show(feature) 135 | x1, x2, x3, x4, out1, out2, out3, out4, out5, out6, out7= model.show(feature) 136 | 137 | feature = torch.squeeze(out4) 138 | x1 = np.transpose(feature.detach().numpy(), (1, 0)) 139 | plt.imshow(x1, cmap = plt.cm.jet) 140 | plt.axis('off') 141 | fig = plt.gcf() 142 | height, width = x1.shape 143 | fig.set_size_inches(width/5./8.,height/5./8.) 144 | plt.gca().xaxis.set_major_locator(plt.NullLocator()) 145 | plt.gca().yaxis.set_major_locator(plt.NullLocator()) 146 | plt.subplots_adjust(top = 1, bottom = 0, right = 1, left = 0, hspace = 0, wspace = 0) 147 | plt.margins(0,0) 148 | plt.savefig("conv.png", dpi=500, pad_inches = 0) 149 | plt.show() 150 | 151 | (audio, _) = read_audio(audio_path=audio_path, target_fs=44100) 152 | audio = pad_truncate_sequence(audio, 44100*5) 153 | # audio = add_noise(audio, percent=0.2) 154 | # audio = data_pre(audio=audio, audio_length=5, fs=44100, audio_skip=0.1) 155 | feature_extractor = LogMelExtractor( 156 | sample_rate=44100, 157 | window_size=1764, 158 | hop_size=882, 159 | mel_bins=40, 160 | fmin=50, 161 | fmax=11025) 162 | feature = feature_extractor.transform(audio) 163 | feature = feature[0 : 250] 164 | feature = fre_noise(feature, percent=3) 165 | x = np.transpose(feature, (1, 0)) 166 | plt.imshow(x, cmap = plt.cm.jet) 167 | plt.axis('off') 168 | fig = plt.gcf() 169 | height, width = x.shape 170 | fig.set_size_inches(width/5./8.,height/5./8.) 171 | plt.gca().xaxis.set_major_locator(plt.NullLocator()) 172 | plt.gca().yaxis.set_major_locator(plt.NullLocator()) 173 | plt.subplots_adjust(top = 1, bottom = 0, right = 1, left = 0, hspace = 0, wspace = 0) 174 | plt.margins(0,0) 175 | plt.savefig("ori_conv_noise1.png", dpi=500, pad_inches = 0) 176 | plt.show() 177 | feature = torch.from_numpy(feature[None, :, :]) 178 | # x1 = model.show(feature) 179 | x1, x2, x3, x4, out1, out2, out3, out4, out5, out6, out7= model.show(feature) 180 | 181 | feature = torch.squeeze(out4) 182 | x1 = np.transpose(feature.detach().numpy(), (1, 0)) 183 | plt.imshow(x1, cmap = plt.cm.jet) 184 | plt.axis('off') 185 | fig = plt.gcf() 186 | height, width = x1.shape 187 | fig.set_size_inches(width/5./8.,height/5./8.) 188 | plt.gca().xaxis.set_major_locator(plt.NullLocator()) 189 | plt.gca().yaxis.set_major_locator(plt.NullLocator()) 190 | plt.subplots_adjust(top = 1, bottom = 0, right = 1, left = 0, hspace = 0, wspace = 0) 191 | plt.margins(0,0) 192 | plt.savefig("conv_noise1.png", dpi=500, pad_inches = 0) 193 | plt.show() 194 | 195 | (audio, _) = read_audio(audio_path=audio_path, target_fs=44100) 196 | audio = pad_truncate_sequence(audio, 44100*5) 197 | audio = add_noise(audio, percent=0.08) 198 | # audio = data_pre(audio=audio, audio_length=5, fs=44100, audio_skip=0.1) 199 | feature_extractor = LogMelExtractor( 200 | sample_rate=44100, 201 | window_size=1764, 202 | hop_size=882, 203 | mel_bins=40, 204 | fmin=50, 205 | fmax=11025) 206 | feature = feature_extractor.transform(audio) 207 | feature = feature[0 : 250] 208 | x = np.transpose(feature, (1, 0)) 209 | plt.imshow(x, cmap = plt.cm.jet) 210 | plt.axis('off') 211 | fig = plt.gcf() 212 | height, width = x.shape 213 | fig.set_size_inches(width/5./8.,height/5./8.) 214 | plt.gca().xaxis.set_major_locator(plt.NullLocator()) 215 | plt.gca().yaxis.set_major_locator(plt.NullLocator()) 216 | plt.subplots_adjust(top = 1, bottom = 0, right = 1, left = 0, hspace = 0, wspace = 0) 217 | plt.margins(0,0) 218 | # plt.savefig("ori_conv_noise3.png", dpi=500, pad_inches = 0) 219 | plt.show() 220 | feature = torch.from_numpy(feature[None, :, :]) 221 | # x1 = model.show(feature) 222 | x1, x2, x3, x4, out1, out2, out3, out4, out5, out6, out7= model.show(feature) 223 | 224 | feature = torch.squeeze(out4) 225 | x1 = np.transpose(feature.detach().numpy(), (1, 0)) 226 | plt.imshow(x1, cmap = plt.cm.jet) 227 | plt.axis('off') 228 | fig = plt.gcf() 229 | height, width = x1.shape 230 | fig.set_size_inches(width/5./8.,height/5./8.) 231 | plt.gca().xaxis.set_major_locator(plt.NullLocator()) 232 | plt.gca().yaxis.set_major_locator(plt.NullLocator()) 233 | plt.subplots_adjust(top = 1, bottom = 0, right = 1, left = 0, hspace = 0, wspace = 0) 234 | plt.margins(0,0) 235 | plt.savefig("conv_noise3.png", dpi=500, pad_inches = 0) 236 | plt.show() 237 | 238 | 239 | -------------------------------------------------------------------------------- /util_esc50/feature.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | sys.path.insert(1, os.path.join(sys.path[0], 'util')) 4 | import numpy as np 5 | import argparse 6 | import h5py 7 | import librosa 8 | from scipy import signal 9 | import matplotlib.pyplot as plt 10 | import time 11 | import math 12 | import pandas as pd 13 | import random 14 | import torch 15 | 16 | from utils import (create_folder, read_audio, pad_truncate_sequence, read_metadata) 17 | import config 18 | 19 | class LogMelExtractor(object): 20 | def __init__(self, sample_rate, window_size, hop_size, mel_bins, fmin, fmax): 21 | '''Log mel feature extractor. 22 | 23 | Args: 24 | sample_rate: int 25 | window_size: int 26 | hop_size: int 27 | mel_bins: int 28 | fmin: int, minimum frequency of mel filter banks 29 | fmax: int, maximum frequency of mel filter banks 30 | ''' 31 | 32 | self.window_size = window_size 33 | self.hop_size = hop_size 34 | self.window_func = np.hanning(window_size) 35 | 36 | self.melW = librosa.filters.mel( 37 | sr=sample_rate, 38 | n_fft=window_size, 39 | n_mels=mel_bins, 40 | fmin=fmin, 41 | fmax=fmax).T 42 | '''(n_fft // 2 + 1, mel_bins)''' 43 | 44 | def transform(self, audio): 45 | '''Extract feature of a singlechannel audio file. 46 | 47 | Args: 48 | audio: (samples,) 49 | 50 | Returns: 51 | feature: (frames_num, freq_bins) 52 | ''' 53 | 54 | window_size = self.window_size 55 | hop_size = self.hop_size 56 | window_func = self.window_func 57 | 58 | # Compute short-time Fourier transform 59 | stft_matrix = librosa.core.stft( 60 | y=audio, 61 | n_fft=window_size, 62 | hop_length=hop_size, 63 | window=window_func, 64 | center=True, 65 | dtype=np.complex64, 66 | pad_mode='reflect').T 67 | '''(N, n_fft // 2 + 1)''' 68 | 69 | # Mel spectrogram 70 | mel_spectrogram = np.dot(np.abs(stft_matrix) ** 2, self.melW) 71 | 72 | # Log mel spectrogram 73 | logmel_spectrogram = librosa.core.power_to_db( 74 | mel_spectrogram, ref=1.0, amin=1e-10, 75 | top_db=None) 76 | 77 | logmel_spectrogram = logmel_spectrogram.astype(np.float32) 78 | 79 | return logmel_spectrogram 80 | 81 | def deltas(X_in): 82 | X_out = (X_in[:,2:]-X_in[:,:-2])/10.0 83 | X_out = X_out[:,1:-1]+(X_in[:,4:]-X_in[:,:-4])/5.0 84 | out = np.zeros((X_in.shape[0], X_in.shape[1])) 85 | out[:,2:-2] = X_out 86 | return out 87 | 88 | def MaxMinNormalization(x): 89 | x = (x - np.min(x)) / (np.max(x) - np.min(x)) 90 | return x 91 | 92 | def calculate_feature_for_all_audio_files(args): 93 | '''Calculate feature of audio files and write out features to a hdf5 file. 94 | 95 | Args: 96 | dataset_dir: string 97 | workspace: string 98 | mini_data: bool, set True for debugging on a small part of data 99 | ''' 100 | 101 | # Arguments & parameters 102 | dataset_dir = args.dataset_dir 103 | workspace = args.workspace 104 | mini_data = args.mini_data 105 | 106 | sample_rate = config.sample_rate 107 | window_size = config.window_size 108 | hop_size = config.hop_size 109 | mel_bins = config.mel_bins 110 | fmin = config.fmin 111 | fmax = config.fmax 112 | frames_per_second = config.frames_per_second 113 | frames_num = config.frames_num 114 | total_samples = config.total_samples 115 | lb_to_idx = config.lb_to_idx 116 | audio_duration_clip = config.audio_duration_clip 117 | audio_stride_clip = config.audio_stride_clip 118 | audio_duration = config.audio_duration 119 | audio_num = config.audio_num 120 | total_frames = config.total_frames 121 | # Paths 122 | if mini_data: 123 | prefix = 'minidata_' 124 | else: 125 | prefix = '' 126 | 127 | audios_dir = os.path.join(dataset_dir, 'audio') 128 | metadata_path = os.path.join(dataset_dir, 'meta', 'esc50.csv') 129 | feature_path = os.path.join(workspace, 'features', 130 | '{}logmel_{}frames_{}melbins.h5'.format(prefix, frames_per_second, mel_bins)) 131 | create_folder(os.path.dirname(feature_path)) 132 | # Feature extractor 133 | feature_extractor = LogMelExtractor( 134 | sample_rate=sample_rate, 135 | window_size=window_size, 136 | hop_size=hop_size, 137 | mel_bins=mel_bins, 138 | fmin=fmin, 139 | fmax=fmax) 140 | 141 | # Read metadata 142 | meta_dict = read_metadata(metadata_path) 143 | 144 | # Extract features and targets 145 | if mini_data: 146 | mini_num = 10 147 | total_num = len(meta_dict['filename']) 148 | random_state = np.random.RandomState(1234) 149 | indexes = random_state.choice(total_num, size=mini_num, replace=False) 150 | for key in meta_dict.keys(): 151 | meta_dict[key] = meta_dict[key][indexes] 152 | 153 | print('Extracting features of all audio files ...') 154 | extract_time = time.time() 155 | # Hdf5 file for storing features and targets 156 | hf = h5py.File(feature_path, 'w') 157 | 158 | hf.create_dataset( 159 | name='filename', 160 | data=[filename.encode() for filename in meta_dict['filename']], 161 | dtype='S80') 162 | 163 | if 'fold' in meta_dict.keys(): 164 | hf.create_dataset( 165 | name='fold', 166 | data=[fold for fold in meta_dict['fold']], 167 | dtype=np.int64) 168 | 169 | if 'target' in meta_dict.keys(): 170 | hf.create_dataset( 171 | name='target', 172 | data=[target for target in meta_dict['target']], 173 | dtype=np.int64) 174 | 175 | if 'category' in meta_dict.keys(): 176 | hf.create_dataset( 177 | name='category', 178 | data=[category.encode() for category in meta_dict['category']], 179 | dtype='S80') 180 | if 'esc10' in meta_dict.keys(): 181 | hf.create_dataset( 182 | name='esc10', 183 | data=[esc10 for esc10 in meta_dict['esc10']], 184 | dtype=np.bool) 185 | if 'src_file' in meta_dict.keys(): 186 | hf.create_dataset( 187 | name='src_file', 188 | data=[src_file for src_file in meta_dict['src_file']], 189 | dtype=np.int64) 190 | if 'take' in meta_dict.keys(): 191 | hf.create_dataset( 192 | name='take', 193 | data=[take.encode() for take in meta_dict['take']], 194 | dtype='S24') 195 | 196 | 197 | hf.create_dataset( 198 | name='feature', 199 | shape=(0, audio_num, 3, frames_num, mel_bins), 200 | maxshape=(None, audio_num, 3, frames_num, mel_bins), 201 | dtype=np.float32) 202 | 203 | for (n, filename) in enumerate(meta_dict['filename']): 204 | audio_path = os.path.join(audios_dir, filename) 205 | print(n, audio_path) 206 | 207 | # Read audio 208 | (audio, _) = read_audio( 209 | audio_path=audio_path, 210 | target_fs=sample_rate) 211 | 212 | # Pad or truncate audio recording to the same length 213 | audio = pad_truncate_sequence(audio, total_samples) 214 | # Extract feature 215 | fea_list = [] 216 | # for i in range(audio_num): 217 | # audio_clip = audio[i*sample_rate*audio_stride_clip: (i+2)*sample_rate*audio_stride_clip] 218 | # feature = feature_extractor.transform(audio_clip) 219 | # feature = feature[0 : frames_per_second*audio_duration_clip] 220 | # fea_list.append(feature) 221 | feature = feature_extractor.transform(audio) 222 | # # Remove the extra log mel spectrogram frames caused by padding zero 223 | feature = feature[0 : total_frames] 224 | feature = MaxMinNormalization(feature) 225 | feature_delta = deltas(feature) 226 | feature_delta = MaxMinNormalization(feature_delta) 227 | feature_delta2 = deltas(feature_delta) 228 | feature_delta2 = MaxMinNormalization(feature_delta2) 229 | for i in range(audio_num): 230 | feature_clip = feature[i*frames_per_second*audio_stride_clip: (i+audio_duration_clip)*frames_per_second*audio_stride_clip] 231 | feature_delta_clip = feature_delta[i*frames_per_second*audio_stride_clip: (i+audio_duration_clip)*frames_per_second*audio_stride_clip] 232 | feature_delta2_clip = feature_delta2[i*frames_per_second*audio_stride_clip: (i+audio_duration_clip)*frames_per_second*audio_stride_clip] 233 | feature_clip = feature_clip[None, :, :] 234 | feature_delta_clip = feature_delta_clip[None, :, :] 235 | feature_delta2_clip = feature_delta2_clip[None, :, :] 236 | f = np.concatenate((feature_clip, feature_delta_clip, feature_delta2_clip), 0) 237 | fea_list.append(f) 238 | 239 | hf['feature'].resize((n + 1, audio_num, 3, frames_num, mel_bins)) 240 | hf['feature'][n] = fea_list 241 | 242 | hf.close() 243 | 244 | print('Write hdf5 file to {} using {:.3f} s'.format( 245 | feature_path, time.time() - extract_time)) 246 | 247 | 248 | 249 | if __name__ == '__main__': 250 | 251 | parser = argparse.ArgumentParser(description='') 252 | subparsers = parser.add_subparsers(dest='mode') 253 | 254 | # Calculate feature for all audio files 255 | parser_logmel = subparsers.add_parser('calculate_feature_for_all_audio_files') 256 | parser_logmel.add_argument('--dataset_dir', type=str, required=True, help='Directory of dataset.') 257 | parser_logmel.add_argument('--workspace', type=str, required=True, help='Directory of your workspace.') 258 | parser_logmel.add_argument('--mini_data', action='store_true', default=False, help='Set True for debugging on a small part of data.') 259 | 260 | 261 | # Parse arguments 262 | args = parser.parse_args() 263 | calculate_feature_for_all_audio_files(args) 264 | 265 | -------------------------------------------------------------------------------- /util_esc50/main.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import numpy as np 4 | import argparse 5 | import h5py 6 | import math 7 | import time 8 | import logging 9 | import matplotlib.pyplot as plt 10 | import torch 11 | import torch.nn as nn 12 | import torch.nn.functional as F 13 | import torch.optim as optim 14 | from SpecAugment import spec_augment_pytorch 15 | from utils import (create_folder, get_filename, create_logging, mixup_data, mixup_criterion) 16 | from data_generator import DataGenerator 17 | from net import TFNet 18 | from losses import nll_loss 19 | from evaluate import Evaluator, StatisticsContainer 20 | from pytorch_utils import move_data_to_gpu, forward 21 | import config 22 | 23 | 24 | def train(args, i): 25 | '''Training. Model will be saved after several iterations. 26 | 27 | Args: 28 | dataset_dir: string, directory of dataset 29 | workspace: string, directory of workspace 30 | holdout_fold: '1' | 'none', set 1 for development and none for training 31 | on all data without validation 32 | model_type: string, e.g. 'Cnn_9layers_AvgPooling' 33 | batch_size: int 34 | cuda: bool 35 | mini_data: bool, set True for debugging on a small part of data 36 | ''' 37 | 38 | # Arugments & parameters 39 | dataset_dir = args.dataset_dir 40 | workspace = args.workspace 41 | holdout_fold = args.holdout_fold 42 | model_type = args.model_type 43 | batch_size = args.batch_size 44 | cuda = args.cuda and torch.cuda.is_available() 45 | mini_data = args.mini_data 46 | filename = args.filename 47 | audio_num = config.audio_num 48 | mel_bins = config.mel_bins 49 | frames_per_second = config.frames_per_second 50 | max_iteration = None # Number of mini-batches to evaluate on training data 51 | reduce_lr = True 52 | in_domain_classes_num = len(config.labels) 53 | 54 | # Paths 55 | if mini_data: 56 | prefix = 'minidata_' 57 | else: 58 | prefix = '' 59 | 60 | train_csv = os.path.join(sys.path[0], 'fold'+str(i)+'_train.csv') 61 | 62 | validate_csv = os.path.join(sys.path[0], 'fold'+str(i)+'_test.csv') 63 | 64 | feature_hdf5_path = os.path.join(workspace, 'features', 65 | '{}logmel_{}frames_{}melbins.h5'.format(prefix, frames_per_second, mel_bins)) 66 | 67 | checkpoints_dir = os.path.join(workspace, 'checkpoints', filename, 68 | '{}logmel_{}frames_{}melbins.h5'.format(prefix, frames_per_second, mel_bins), 69 | 'holdout_fold={}'.format(holdout_fold), model_type) 70 | create_folder(checkpoints_dir) 71 | 72 | validate_statistics_path = os.path.join(workspace, 'statistics', filename, 73 | '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 74 | 'holdout_fold={}'.format(holdout_fold), 75 | model_type, 'validate_statistics.pickle') 76 | 77 | create_folder(os.path.dirname(validate_statistics_path)) 78 | 79 | logs_dir = os.path.join(workspace, 'logs', filename, args.mode, 80 | '{}logmel_{}frames_{}melbins'.format(prefix, frames_per_second, mel_bins), 81 | 'holdout_fold={}'.format(holdout_fold), model_type) 82 | create_logging(logs_dir, 'w') 83 | logging.info(args) 84 | 85 | if cuda: 86 | logging.info('Using GPU.') 87 | else: 88 | logging.info('Using CPU. Set --cuda flag to use GPU.') 89 | 90 | # Model 91 | Model = eval(model_type) 92 | 93 | model = Model(in_domain_classes_num, activation='logsoftmax') 94 | loss_func = nll_loss 95 | 96 | if cuda: 97 | model.cuda() 98 | 99 | # Optimizer 100 | optimizer = optim.Adam(model.parameters(), lr=1e-4, betas=(0.9, 0.999), 101 | eps=1e-08, weight_decay=0., amsgrad=True) 102 | # Data generator 103 | data_generator = DataGenerator( 104 | feature_hdf5_path=feature_hdf5_path, 105 | train_csv=train_csv, 106 | validate_csv=validate_csv, 107 | holdout_fold=holdout_fold, 108 | batch_size=batch_size) 109 | 110 | # Evaluator 111 | evaluator = Evaluator( 112 | model=model, 113 | data_generator=data_generator, 114 | cuda=cuda) 115 | 116 | # Statistics 117 | validate_statistics_container = StatisticsContainer(validate_statistics_path) 118 | 119 | train_bgn_time = time.time() 120 | iteration = 0 121 | 122 | # Train on mini batches 123 | for batch_data_dict in data_generator.generate_train(): 124 | 125 | # Evaluate 126 | if iteration % 100 == 0 and iteration >= 1000: 127 | logging.info('------------------------------------') 128 | logging.info('Iteration: {}'.format(iteration)) 129 | 130 | train_fin_time = time.time() 131 | 132 | 133 | train_statistics = evaluator.evaluate(data_type='train', iteration= iteration, 134 | max_iteration=None, verbose=False) 135 | 136 | if holdout_fold != 'none': 137 | validate_statistics = evaluator.evaluate(data_type='validate', 138 | iteration= iteration, max_iteration=None, verbose=False) 139 | validate_statistics_container.append_and_dump(iteration, validate_statistics) 140 | 141 | train_time = train_fin_time - train_bgn_time 142 | validate_time = time.time() - train_fin_time 143 | 144 | logging.info( 145 | 'Train time: {:.3f} s, validate time: {:.3f} s' 146 | ''.format(train_time, validate_time)) 147 | 148 | train_bgn_time = time.time() 149 | 150 | # Save model 151 | if iteration % 100 == 0 and iteration > 0: 152 | checkpoint = { 153 | 'iteration': iteration, 154 | 'model': model.state_dict(), 155 | 'optimizer': optimizer.state_dict()} 156 | 157 | checkpoint_path = os.path.join( 158 | checkpoints_dir, '{}_iterations.pth'.format(iteration)) 159 | 160 | torch.save(checkpoint, checkpoint_path) 161 | logging.info('Model saved to {}'.format(checkpoint_path)) 162 | 163 | # Reduce learning rate 164 | if reduce_lr and iteration % 100 == 0 and iteration > 0: 165 | for param_group in optimizer.param_groups: 166 | param_group['lr'] *= 0.9 167 | 168 | # Move data to GPU 169 | for key in batch_data_dict.keys(): 170 | if key in ['feature', 'target']: 171 | batch_data_dict[key] = move_data_to_gpu(batch_data_dict[key], cuda) 172 | 173 | if iteration % 3 == 0: 174 | # Train 175 | for i in range(audio_num): 176 | model.train() 177 | data, target_a, target_b, lam = mixup_data(x=batch_data_dict['feature'][:, i, :, :, :], y=batch_data_dict['target'], alpha=0.2) 178 | batch_output = model(data) 179 | # batch_output = model(batch_data_dict['feature']) 180 | # loss 181 | # loss = loss_func(batch_output, batch_data_dict['target']) 182 | loss = mixup_criterion(loss_func, batch_output, target_a, target_b, lam) 183 | 184 | # Backward 185 | optimizer.zero_grad() 186 | loss.backward() 187 | optimizer.step() 188 | 189 | if iteration % 3 == 1: 190 | # Train 191 | for i in range(audio_num): 192 | model.train() 193 | batch_output = model(batch_data_dict['feature'][:, i, :, :, :]) 194 | # loss 195 | loss = loss_func(batch_output, batch_data_dict['target']) 196 | 197 | # Backward 198 | optimizer.zero_grad() 199 | loss.backward() 200 | optimizer.step() 201 | 202 | if iteration % 3 == 2: 203 | # Train 204 | for i in range(audio_num): 205 | model.train() 206 | data = spec_augment_pytorch.spec_augment(batch_data_dict['feature'][:, i, :, :, :], using_frequency_masking=True, 207 | using_time_masking=True) 208 | batch_output = model(data) 209 | # loss 210 | loss = loss_func(batch_output, batch_data_dict['target']) 211 | 212 | # Backward 213 | optimizer.zero_grad() 214 | loss.backward() 215 | optimizer.step() 216 | 217 | # Stop learning 218 | if iteration == 4500: 219 | break 220 | 221 | iteration += 1 222 | 223 | 224 | 225 | if __name__ == '__main__': 226 | parser = argparse.ArgumentParser(description='Example of parser. ') 227 | subparsers = parser.add_subparsers(dest='mode') 228 | 229 | # Train 230 | parser_train = subparsers.add_parser('train') 231 | parser_train.add_argument('--dataset_dir', type=str, required=True, help='Directory of dataset.') 232 | parser_train.add_argument('--workspace', type=str, required=True, help='Directory of your workspace.') 233 | parser_train.add_argument('--holdout_fold', type=str, choices=['1', 'none'], required=True, help='Set 1 for development and none for training on all data without validation.') 234 | parser_train.add_argument('--model_type', type=str, required=True, help='E.g., Cnn_9layers_AvgPooling.') 235 | parser_train.add_argument('--batch_size', type=int, required=True) 236 | parser_train.add_argument('--cuda', action='store_true', default=False) 237 | parser_train.add_argument('--audio_num', type=int, default=4) 238 | parser_train.add_argument('--mini_data', action='store_true', default=False, help='Set True for debugging on a small part of data.') 239 | 240 | # Inference validation data 241 | parser_inference_validation = subparsers.add_parser('inference_validation') 242 | parser_inference_validation.add_argument('--dataset_dir', type=str, required=True, help='Directory of dataset.') 243 | parser_inference_validation.add_argument('--workspace', type=str, required=True, help='Directory of your workspace.') 244 | parser_inference_validation.add_argument('--holdout_fold', type=str, choices=['1'], required=True) 245 | parser_inference_validation.add_argument('--model_type', type=str, required=True, help='E.g., Cnn_9layers_AvgPooling.') 246 | parser_inference_validation.add_argument('--iteration', type=int, required=True, help='Load model of this iteration.') 247 | parser_inference_validation.add_argument('--batch_size', type=int, required=True) 248 | parser_inference_validation.add_argument('--cuda', action='store_true', default=False) 249 | parser_inference_validation.add_argument('--audio_num', type=int, default=4) 250 | parser_inference_validation.add_argument('--visualize', action='store_true', default=False, help='Visualize log mel spectrogram of different sound classes.') 251 | parser_inference_validation.add_argument('--mini_data', action='store_true', default=False, help='Set True for debugging on a small part of data.') 252 | 253 | # Parse arguments 254 | args = parser.parse_args() 255 | args.filename = get_filename(__file__) 256 | 257 | if args.mode == 'train': 258 | for i in range(5): 259 | train(args, i+1) 260 | 261 | else: 262 | raise Exception('Error argument!') -------------------------------------------------------------------------------- /util_esc50/audio.py: -------------------------------------------------------------------------------- 1 | import math 2 | import torch 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | from torch.distributions import Normal, Uniform, HalfNormal 6 | 7 | 8 | def torch_angle(t): 9 | return torch.atan2(t[...,1], t[...,0]) 10 | 11 | 12 | def spectrogram(sig, n_fft=2048, hop=None, window=None, **kwargs): 13 | """ 14 | sig -> (batch, channel, time) or (channel, time) 15 | stft -> (batch, channel, freq, hop, complex) or (channel, freq, hop, complex) 16 | """ 17 | if hop is None: 18 | hop = n_fft // 4 19 | if window is None: 20 | window = torch.hann_window(n_fft) 21 | 22 | if sig.dim() == 3: 23 | batch, channel, time = sig.size() 24 | out_shape = [batch, channel, n_fft//2+1, -1, 2] 25 | elif sig.dim() == 2: 26 | channel, time = sig.size() 27 | out_shape = [channel, n_fft//2+1, -1, 2] 28 | else: 29 | raise ValueError('Input tensor dim() must be either 2 or 3.') 30 | 31 | sig = sig.reshape(-1, time) 32 | 33 | stft = torch.stft(sig, n_fft, hop, window=window, **kwargs) 34 | 35 | stft = stft.reshape(out_shape) 36 | return stft 37 | 38 | 39 | def _hertz_to_mel(f): 40 | ''' 41 | Converting frequency into mel values using HTK formula 42 | ''' 43 | return 2595. * torch.log10(torch.tensor(1.) + (f / 700.)) 44 | 45 | 46 | def _mel_to_hertz(mel): 47 | ''' 48 | Converting mel values into frequency using HTK formula 49 | ''' 50 | return 700. * (10**(mel / 2595.) - 1.) 51 | 52 | 53 | def create_mel_filter(n_stft, sr, n_mels=128, f_min=0.0, f_max=None): 54 | ''' 55 | Creates filter matrix to transform fft frequency bins into mel frequency bins. 56 | Equivalent to librosa.filters.mel(sr, n_fft, htk=True, norm=None). 57 | ''' 58 | # Convert to find mel lower/upper bounds 59 | f_max = f_max if f_max else sr // 2 60 | m_min = 0. if f_min == 0 else _hertz_to_mel(f_min) 61 | m_max = _hertz_to_mel(f_max) 62 | 63 | # Compute stft frequency values 64 | stft_freqs = torch.linspace(f_min, f_max, n_stft) 65 | 66 | # Find mel values, and convert them to frequency units 67 | m_pts = torch.linspace(m_min, m_max, n_mels + 2) 68 | f_pts = _mel_to_hertz(m_pts) 69 | 70 | f_diff = f_pts[1:] - f_pts[:-1] # (n_mels + 1) 71 | slopes = f_pts.unsqueeze(0) - stft_freqs.unsqueeze(1) # (n_stft, n_mels + 2) 72 | 73 | down_slopes = (-1. * slopes[:, :-2]) / f_diff[:-1] # (n_stft, n_mels) 74 | up_slopes = slopes[:, 2:] / f_diff[1:] # (n_stft, n_mels) 75 | fb = torch.clamp(torch.min(down_slopes, up_slopes), min=0.) 76 | 77 | return fb, f_pts[:-2] 78 | 79 | 80 | def amplitude_to_db(spec, ref=1.0, amin=1e-10, top_db=80): 81 | """ 82 | Amplitude spectrogram to the db scale 83 | """ 84 | power = spec**2 85 | return power_to_db(power, ref, amin, top_db) 86 | 87 | 88 | def power_to_db(spec, ref=1.0, amin=1e-10, top_db=80.0): 89 | """ 90 | Power spectrogram to the db scale 91 | spec -> (*, freq, time) 92 | """ 93 | if amin <= 0: 94 | raise ParameterError('amin must be strictly positive') 95 | 96 | if callable(ref): 97 | ref_value = ref(spec) 98 | else: 99 | ref_value = torch.tensor(ref) 100 | 101 | log_spec = 10*torch.log10( torch.clamp(spec, min=amin) ) 102 | log_spec -= 10*torch.log10( torch.clamp(ref_value, min=amin) ) 103 | 104 | if top_db is not None: 105 | if top_db < 0: 106 | raise ParameterError('top_db must be non-negative') 107 | 108 | log_spec = torch.clamp(log_spec, min=(log_spec.max() - top_db)) 109 | 110 | #log_spec /= log_spec.max() 111 | return log_spec 112 | 113 | 114 | def spec_whiten(spec, eps=1): 115 | 116 | along_dim = lambda f, x: f(x, dim=-1).view(-1,1,1,1) 117 | 118 | lspec = torch.log10(spec + eps) 119 | 120 | batch = lspec.size(0) 121 | 122 | mean = along_dim(torch.mean, lspec.view(batch, -1)) 123 | std = along_dim(torch.std, lspec.view(batch, -1)) 124 | 125 | #std 126 | 127 | resu = (lspec - mean)/std 128 | 129 | return resu 130 | 131 | 132 | class Spectrogram(nn.Module): 133 | """ 134 | Module that outputs the spectrogram 135 | of an audio signal with shape (batch, channel, time_hop, frequency_bins). 136 | Its implemented as a layer so that the computation can be faster (done dynamically 137 | on GPU) and no need to store the transforms. More information: 138 | - https://github.com/keunwoochoi/kapre 139 | - https://arxiv.org/pdf/1706.05781.pdf 140 | 141 | Args: 142 | * hop: int > 0 143 | - Hop length between frames in sample, should be <= n_fft. 144 | - Default: None (in which case n_fft // 4 is used) 145 | * n_fft: int > 0 146 | - Size of the fft. 147 | - Default: 2048 148 | * pad: int >= 0 149 | - Amount of two sided zero padding to apply. 150 | - Default: 0 151 | * window: torch.Tensor, 152 | - Windowing used in the stft. 153 | - Default: None (in which case torch.hann_window(n_fft) is used) 154 | * sr: int > 0 155 | - Sampling rate of the audio signal. This may not be the same in all samples (?) 156 | - Default: 44100 157 | * spec_kwargs: 158 | - Any named arguments to be passed to the stft 159 | """ 160 | 161 | def __init__(self, hop=None, n_fft=2048, pad=0, window=None, sr=44100, stretch_param=None, **spec_kwargs): 162 | 163 | super(Spectrogram, self).__init__() 164 | 165 | if window is None: 166 | window = torch.hann_window(n_fft) 167 | 168 | self.window = self._build_window(window) 169 | self.hop = n_fft // 4 if hop is None else hop 170 | self.n_fft = n_fft 171 | self.pad = pad 172 | 173 | # Not all samples will have the same sr 174 | self.sr = sr 175 | self.spec_kwargs = spec_kwargs 176 | 177 | self.stretch_param = stretch_param 178 | self.prob = 0 179 | if self.stretch_param is not None: 180 | self._build_pv() 181 | 182 | def _build_pv(self): 183 | fft_size = self.n_fft//2 + 1 184 | self.phi_advance = nn.Parameter(torch.linspace(0, 185 | math.pi * self.hop, 186 | fft_size)[..., None], requires_grad=False) 187 | 188 | self.prob = self.stretch_param[0] 189 | self.dist = Uniform(-self.stretch_param[1], self.stretch_param[1]) 190 | 191 | 192 | def _get_rate(self): 193 | return 1 - self.dist.sample() 194 | 195 | 196 | def phase_vocoder(self, D): 197 | # phase_vocoder 198 | # D -> (freq, old_time, 2) 199 | # D -> (batch, channel, freq, old_time, 2) 200 | rate = self._get_rate() 201 | time_steps = torch.arange(0, D.size(3), rate, device=D.device) # (new_time) 202 | 203 | alphas = (time_steps % 1)#.unsqueeze(1) # (new_time) 204 | 205 | phase_0 = torch_angle(D[:,:,:,:1]) 206 | 207 | # Time Padding 208 | pad_shape = [0,0]+[0,2]+[0]*6 209 | D = F.pad(D, pad_shape) 210 | 211 | D0 = D[:,:,:,time_steps.long()] # (new_time, freq, 2) 212 | D1 = D[:,:,:,(time_steps + 1).long()] # (new_time, freq, 2) 213 | 214 | D0_angle = torch_angle(D0) # (new_time, freq) 215 | D1_angle = torch_angle(D1) # (new_time, freq) 216 | 217 | D0_norm = torch.norm(D0, dim=-1) # (new_time, freq) 218 | D1_norm = torch.norm(D1, dim=-1) # (new_time, freq) 219 | 220 | Dphase = D1_angle - D0_angle - self.phi_advance # (new_time, freq) 221 | Dphase = Dphase - 2 * math.pi * torch.round(Dphase / (2 * math.pi)) # (new_time, freq) 222 | 223 | # Compute Phase Accum 224 | phase = Dphase + self.phi_advance # (new_time, freq) 225 | 226 | phase = torch.cat([phase_0, phase[:,:,:,:-1]], dim=-1) 227 | 228 | phase_acc = torch.cumsum(phase, -1) # (new_time, freq) 229 | 230 | mag = alphas * D1_norm + (1-alphas) * D0_norm # (new_time, freq) 231 | 232 | Dstretch_real = mag * torch.cos(phase_acc) # (new_time, freq) 233 | Dstretch_imag = mag * torch.sin(phase_acc) # (new_time, freq) 234 | 235 | Dstretch = torch.stack([Dstretch_real, Dstretch_imag], dim=-1) 236 | 237 | return Dstretch, rate 238 | 239 | 240 | def _build_window(self, window): 241 | if window is None: 242 | window = torch.hann_window(n_fft) 243 | if not isinstance(window, torch.Tensor): 244 | raise TypeError('window must be a of type torch.Tensor') 245 | # In order for the window to be added as one of the Module's 246 | # parameters it has to be a nn.Parameter 247 | return nn.Parameter(window, requires_grad=False) 248 | 249 | 250 | def __dim_stft_mod(self, arr): 251 | if arr is None: 252 | return None 253 | return arr//self.hop+1 254 | 255 | def __dim_pv_mod(self, arr, rate): 256 | if arr is None: 257 | return None 258 | return (arr.float()/rate).long()+1 259 | 260 | def _out_dims(self, arr, rate=None): 261 | if arr is None: 262 | return None 263 | new_arr = self.__dim_stft_mod(arr) 264 | if rate is None: 265 | return new_arr 266 | return self.__dim_pv_mod(new_arr, rate) 267 | 268 | 269 | def _norm(self, stft): 270 | #return stft.pow(2).sum(-1).pow(1.0 / 2.0) 271 | return torch.norm(stft, dim=-1, p=2) 272 | 273 | def forward(self, x, lengths=None): 274 | """ 275 | If x is a padded tensor then lengths should have the 276 | corresponding sequence length for every element in the batch. 277 | Input: (batch, channel, signal) 278 | Output:(batch, channel, frequency_bins, time_hop) 279 | """ 280 | with torch.no_grad(): 281 | assert x.dim() == 3 282 | 283 | if self.pad > 0: 284 | with torch.no_grad(): 285 | x = F.pad(x, (self.pad, self.pad), "constant") 286 | 287 | spec = spectrogram(x, 288 | n_fft=self.n_fft, 289 | hop=self.hop, 290 | window=self.window, 291 | **self.spec_kwargs) 292 | 293 | rate = None 294 | 295 | if torch.rand(1)[0] <= self.prob and self.training: 296 | spec, rate = self.phase_vocoder(spec) 297 | #print(rate) 298 | 299 | lengths = self._out_dims(lengths, rate) 300 | 301 | spec = self._norm(spec) 302 | 303 | if lengths is not None: 304 | assert spec.size(0) == lengths.size(0) 305 | return spec, lengths 306 | 307 | return spec 308 | 309 | 310 | class Melspectrogram(Spectrogram): 311 | 312 | def __init__(self, hop=None, n_mels=128, n_fft=2048, pad=0, window=None, sr=44100, norm=None, **spec_kwargs): 313 | 314 | super(Melspectrogram, self).__init__(hop, n_fft, pad, window, sr, **spec_kwargs) 315 | 316 | self.n_fft = n_fft 317 | self.n_mels = n_mels 318 | self.mel_fb, self.mel_freq_vals = self._build_filter() 319 | self.norm = { 320 | 'whiten':spec_whiten, 321 | 'db' : amplitude_to_db 322 | }.get(norm, None) 323 | 324 | def _build_filter(self): 325 | # Get the mel filter matrix and the mel frequency values 326 | mel_fb, mel_f = create_mel_filter( 327 | self.n_fft//2 + 1, 328 | self.sr, 329 | n_mels=self.n_mels) 330 | # Cast filter matrix as nn.Parameter so it's loaded on model's device 331 | return nn.Parameter(mel_fb, requires_grad=False), mel_f 332 | 333 | def forward(self, x, lengths=None): 334 | spec = super(Melspectrogram, self).forward(x, lengths) 335 | if isinstance(spec, tuple): 336 | spec, lengths = spec 337 | 338 | spec = torch.matmul(spec.transpose(2,3), self.mel_fb).transpose(2,3) 339 | 340 | if self.norm is not None: 341 | spec = self.norm(spec) 342 | 343 | if lengths is not None: 344 | return spec, lengths 345 | 346 | return spec -------------------------------------------------------------------------------- /util_esc50/net.py: -------------------------------------------------------------------------------- 1 | import math 2 | import torch 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | from torch.autograd import Variable 6 | from torch.nn.utils import weight_norm 7 | 8 | class ConvBlock(nn.Module): 9 | def __init__(self, in_channels, out_channels): 10 | 11 | super(ConvBlock, self).__init__() 12 | self.conv1 = nn.Conv2d(in_channels=in_channels, 13 | out_channels=out_channels, 14 | kernel_size=(3, 3), stride=(1, 1), 15 | padding=(1, 1), bias=False) 16 | 17 | self.conv2 = nn.Conv2d(in_channels=out_channels, 18 | out_channels=out_channels, 19 | kernel_size=(3, 3), stride=(1, 1), 20 | padding=(1, 1), bias=False) 21 | self.bn1 = nn.BatchNorm2d(out_channels) 22 | self.bn2 = nn.BatchNorm2d(out_channels) 23 | 24 | 25 | def show(self, input, pool_size=(2, 2), pool_type='avg'): 26 | 27 | x = input 28 | x = F.relu_(self.bn1(self.conv1(x))) 29 | x = F.relu_(self.bn2(self.conv2(x))) 30 | return x 31 | 32 | def forward(self, input, pool_size=(2, 2), pool_type='avg'): 33 | 34 | x = input 35 | x = F.relu_(self.bn1(self.conv1(x))) 36 | x = F.relu_(self.bn2(self.conv2(x))) 37 | if pool_type == 'max': 38 | x = F.max_pool2d(x, kernel_size=pool_size) 39 | elif pool_type == 'avg': 40 | x = F.avg_pool2d(x, kernel_size=pool_size) 41 | else: 42 | raise Exception('Incorrect argument!') 43 | 44 | return x 45 | 46 | 47 | def conv3x3(in_planes, out_planes, stride=1): 48 | """3x3 convolution with padding""" 49 | return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, 50 | padding=1, bias=False) 51 | def conv1x1(in_planes, out_planes, stride=1): 52 | """1x1 convolution""" 53 | return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False) 54 | 55 | 56 | class TFBlock(nn.Module): 57 | def __init__(self, in_channels, out_channels): 58 | 59 | super(TFBlock, self).__init__() 60 | self.conv1 = nn.Conv2d(in_channels=in_channels, 61 | out_channels=out_channels, 62 | kernel_size=(3, 3), stride=(1, 1), 63 | padding=(1, 1), bias=False) 64 | 65 | self.conv2 = nn.Conv2d(in_channels=out_channels, 66 | out_channels=out_channels, 67 | kernel_size=(3, 3), stride=(1, 1), 68 | padding=(1, 1), bias=False) 69 | self.bn1 = nn.BatchNorm2d(out_channels) 70 | self.bn2 = nn.BatchNorm2d(out_channels) 71 | self.alpha = nn.Parameter(torch.cuda.FloatTensor([.1, .1, .1])) 72 | self.bnx = nn.BatchNorm2d(1) 73 | self.bny = nn.BatchNorm2d(1) 74 | self.bnz = nn.BatchNorm2d(out_channels) 75 | self.bna = nn.BatchNorm2d(out_channels) 76 | self.bnb = nn.BatchNorm2d(out_channels) 77 | self.conv3 = nn.Conv2d(in_channels=out_channels, 78 | out_channels=1, 79 | kernel_size=(1, 1), stride=(1, 1), 80 | padding=(0, 0), bias=False) 81 | self.conv4 = nn.Conv2d(in_channels=out_channels, 82 | out_channels=1, 83 | kernel_size=(1, 1), stride=(1, 1), 84 | padding=(0, 0), bias=False) 85 | if out_channels == 64: 86 | self.globalAvgPool2 = nn.AvgPool2d((250,1), stride=1) 87 | self.globalAvgPool3 = nn.AvgPool2d((1,40), stride=1) 88 | self.globalMaxPool2 = nn.MaxPool2d((1,64), stride=1) 89 | self.globalMaxPool3 = nn.MaxPool2d((64,1), stride=1) 90 | self.fc1 = nn.Linear(in_features=40, out_features=40) 91 | self.fc2 = nn.Linear(in_features=250, out_features=250) 92 | 93 | elif out_channels == 128: 94 | self.globalAvgPool2 = nn.AvgPool2d((125,1), stride=1) 95 | self.globalAvgPool3 = nn.AvgPool2d((1,20), stride=1) 96 | self.globalMaxPool2 = nn.MaxPool2d((1,128), stride=1) 97 | self.globalMaxPool3 = nn.MaxPool2d((128,1), stride=1) 98 | self.fc1 = nn.Linear(in_features=20, out_features=20) 99 | self.fc2 = nn.Linear(in_features=125, out_features=125) 100 | elif out_channels == 256: 101 | self.globalAvgPool2 = nn.AvgPool2d((62,1), stride=1) 102 | self.globalAvgPool3 = nn.AvgPool2d((1,10), stride=1) 103 | self.globalMaxPool2 = nn.MaxPool2d((1,128), stride=1) 104 | self.globalMaxPool3 = nn.MaxPool2d((128,1), stride=1) 105 | self.fc1 = nn.Linear(in_features=10, out_features=10) 106 | self.fc2 = nn.Linear(in_features=62, out_features=62) 107 | elif out_channels == 512: 108 | self.globalAvgPool2 = nn.AvgPool2d((31,1), stride=1) 109 | self.globalAvgPool3 = nn.AvgPool2d((1,5), stride=1) 110 | self.globalMaxPool2 = nn.MaxPool2d((1,128), stride=1) 111 | self.globalMaxPool3 = nn.MaxPool2d((128,1), stride=1) 112 | self.fc1 = nn.Linear(in_features=5, out_features=5) 113 | self.fc2 = nn.Linear(in_features=31, out_features=31) 114 | self.sigmoid = nn.Sigmoid() 115 | self.sigmoid2 = nn.Sigmoid() 116 | self.downsample = conv1x1(in_channels, out_channels) 117 | self.bn = nn.BatchNorm2d(out_channels) 118 | self.relu = nn.LeakyReLU(0.2) 119 | 120 | 121 | def show(self, input, pool_size=(2, 2), pool_type='avg'): 122 | 123 | x = input 124 | x = self.bn1(self.relu(self.conv1(x))) 125 | x = self.bn2(self.relu(self.conv2(x))) 126 | out1 = x.clone() 127 | res = x.clone() 128 | y = x.clone() 129 | y = self.bnx(self.relu(self.conv3(y))) 130 | out6 = y.clone() 131 | res_2 = x.clone() 132 | z = x.clone() 133 | z = self.bny(self.relu(self.conv4(z))) 134 | res_3 = x.clone() 135 | out7 = z.clone() 136 | h = x.clone() 137 | 138 | res_2 = res_2.transpose(1,3) 139 | y = y.transpose(1,3) 140 | y = self.globalAvgPool2(y) 141 | y = y.view(y.size(0), -1) 142 | y = self.sigmoid(y) 143 | y = y.view(y.size(0), y.size(1), 1, 1) 144 | y = y * res_2 145 | y = y.transpose(1,3) 146 | y = self.bna(y) 147 | out2=y.clone() 148 | res_3 = res_3.transpose(1,2) 149 | z = z.transpose(1,2) 150 | z = self.globalAvgPool3(z) 151 | z = z.view(z.size(0), -1) 152 | z = self.sigmoid(z) 153 | z = z.view(z.size(0), z.size(1), 1, 1) 154 | z = z * res_3 155 | z = z.transpose(1,2) 156 | z = self.bnb(z) 157 | out3 = z.clone() 158 | so_alpha = F.softmax(self.alpha,dim=0) 159 | x = so_alpha[0]*h + so_alpha[1]*y + so_alpha[2]*z 160 | x = self.relu(x) 161 | out4 = x.clone() 162 | if pool_type == 'max': 163 | x = F.max_pool2d(x, kernel_size=pool_size) 164 | elif pool_type == 'avg': 165 | x = F.avg_pool2d(x, kernel_size=pool_size) 166 | else: 167 | raise Exception('Incorrect argument!') 168 | out5 = x.clone() 169 | out1 = torch.mean(out1, dim=1) 170 | out2 = torch.mean(out2, dim=1) 171 | out3 = torch.mean(out3, dim=1) 172 | out4 = torch.mean(out4, dim=1) 173 | out5 = torch.mean(out5, dim=1) 174 | return out1, out2, out3, out4, out5, out6, out7 175 | 176 | def forward(self, input, pool_size=(2, 2), pool_type='avg'): 177 | 178 | x = input 179 | x = self.bn1(self.relu(self.conv1(x))) 180 | x = self.bn2(self.relu(self.conv2(x))) 181 | res = x.clone() 182 | y = x.clone() 183 | y = self.bnx(self.relu(self.conv3(y))) 184 | res_2 = x.clone() 185 | z = x.clone() 186 | z = self.bny(self.relu(self.conv4(z))) 187 | res_3 = x.clone() 188 | 189 | h = x.clone() 190 | 191 | res_2 = res_2.transpose(1,3) 192 | y = y.transpose(1,3) 193 | y = self.globalAvgPool2(y) 194 | y = y.view(y.size(0), -1) 195 | y = self.sigmoid(y) 196 | y = y.view(y.size(0), y.size(1), 1, 1) 197 | y = y * res_2 198 | y = y.transpose(1,3) 199 | y = self.bna(y) 200 | res_3 = res_3.transpose(1,2) 201 | z = z.transpose(1,2) 202 | z = self.globalAvgPool3(z) 203 | z = z.view(z.size(0), -1) 204 | z = self.sigmoid(z) 205 | z = z.view(z.size(0), z.size(1), 1, 1) 206 | z = z * res_3 207 | z = z.transpose(1,2) 208 | z = self.bnb(z) 209 | so_alpha = F.softmax(self.alpha,dim=0) 210 | x = so_alpha[0]*h + so_alpha[1]*y + so_alpha[2]*z 211 | x = self.relu(x) 212 | if pool_type == 'max': 213 | x = F.max_pool2d(x, kernel_size=pool_size) 214 | elif pool_type == 'avg': 215 | x = F.avg_pool2d(x, kernel_size=pool_size) 216 | else: 217 | raise Exception('Incorrect argument!') 218 | 219 | return x 220 | 221 | class TFNet(nn.Module): 222 | 223 | def __init__(self, classes_num=10, activation='logsoftmax'): 224 | super(TFNet, self).__init__() 225 | 226 | self.activation = activation 227 | 228 | self.tfblock1 = TFBlock(in_channels=1, out_channels=64) 229 | self.tfblock2 = TFBlock(in_channels=64, out_channels=128) 230 | self.tfblock3 = TFBlock(in_channels=128, out_channels=256) 231 | self.tfblock4 = TFBlock(in_channels=256, out_channels=512) 232 | self.fc = nn.Linear(512, classes_num, bias=True) 233 | 234 | 235 | def show(self, input): 236 | 237 | x = input[:,None,:,:] 238 | '''(batch_size, 1, times_steps, freq_bins)''' 239 | out1, out2, out3, out4, out5, out6, out7 = self.conv_block1.show(x) 240 | x = self.tfblock1(x, pool_size=(2, 2), pool_type='avg') 241 | x1 = torch.mean(x, dim=1) 242 | x = self.tfblock2(x, pool_size=(2, 2), pool_type='avg') 243 | x2 = torch.mean(x, dim=1) 244 | x = self.tfblock3(x, pool_size=(2, 2), pool_type='avg') 245 | x3 = torch.mean(x, dim=1) 246 | x = self.tfblock4(x, pool_size=(2, 2), pool_type='avg') 247 | x4 = torch.mean(x, dim=1) 248 | return x1, x2, x3, x4, out1, out2, out3, out4, out5, out6, out7 249 | 250 | def forward(self, input): 251 | ''' 252 | Input: (batch_size, seq_number, times_steps, freq_bins)''' 253 | 254 | x = input[:, 0 , : , :] 255 | x = x[:, None, :, :] 256 | '''(batch_size, 1, times_steps, freq_bins)''' 257 | 258 | x = self.tfblock1(x, pool_size=(2, 2), pool_type='avg') 259 | x = self.tfblock2(x, pool_size=(2, 2), pool_type='avg') 260 | x = self.tfblock3(x, pool_size=(2, 2), pool_type='avg') 261 | x = self.tfblock4(x, pool_size=(2, 2), pool_type='avg') 262 | '''(batch_size, feature_maps, time_steps, freq_bins)''' 263 | x = torch.mean(x, dim=3) # (batch_size, feature_maps, time_stpes) 264 | (x, _) = torch.max(x, dim=2) # (batch_size, feature_maps) 265 | x = self.fc(x) 266 | 267 | if self.activation == 'logsoftmax': 268 | output = F.log_softmax(x, dim=-1) 269 | 270 | elif self.activation == 'sigmoid': 271 | output = torch.sigmoid(x) 272 | 273 | return output 274 | 275 | 276 | class Cnn(nn.Module): 277 | 278 | def __init__(self, classes_num=50, activation='logsoftmax'): 279 | super(Cnn, self).__init__() 280 | 281 | self.activation = activation 282 | self.conv_block1 = ConvBlock(in_channels=1, out_channels=64) 283 | self.conv_block2 = ConvBlock(in_channels=64, out_channels=128) 284 | self.conv_block3 = ConvBlock(in_channels=128, out_channels=256) 285 | self.conv_block4 = ConvBlock(in_channels=256, out_channels=512) 286 | self.fc2 = nn.Linear(512, 512, bias=True) 287 | self.dropout = nn.Dropout(p=0.5) 288 | self.fc = nn.Linear(512, classes_num, bias=True) 289 | 290 | def forward(self, input): 291 | ''' 292 | Input: (batch_size, seq_number, times_steps, freq_bins)''' 293 | 294 | x = input[:, 0, :, :] 295 | x = x[:, None, :, :] 296 | '''(batch_size, 1, times_steps, freq_bins)''' 297 | 298 | x = self.conv_block1(x, pool_size=(2, 2), pool_type='avg') 299 | x = self.conv_block2(x, pool_size=(2, 2), pool_type='avg') 300 | x = self.conv_block3(x, pool_size=(2, 2), pool_type='avg') 301 | x = self.conv_block4(x, pool_size=(2, 2), pool_type='avg') 302 | '''(batch_size, feature_maps, time_steps, freq_bins)''' 303 | x = torch.mean(x, dim=3) # (batch_size, feature_maps, time_stpes) 304 | (x, _) = torch.max(x, dim=2) # (batch_size, feature_maps) 305 | x = self.fc(x) 306 | 307 | if self.activation == 'logsoftmax': 308 | output = F.log_softmax(x, dim=-1) 309 | 310 | elif self.activation == 'sigmoid': 311 | output = torch.sigmoid(x) 312 | 313 | return output 314 | -------------------------------------------------------------------------------- /util_esc50/fold1_test.csv: -------------------------------------------------------------------------------- 1 | filename,fold,target,category,esc10,src_file,take 2 | 1-100032-A-0.wav,1,0,dog,True,100032,A 3 | 1-100038-A-14.wav,1,14,chirping_birds,False,100038,A 4 | 1-100210-A-36.wav,1,36,vacuum_cleaner,False,100210,A 5 | 1-100210-B-36.wav,1,36,vacuum_cleaner,False,100210,B 6 | 1-101296-A-19.wav,1,19,thunderstorm,False,101296,A 7 | 1-101296-B-19.wav,1,19,thunderstorm,False,101296,B 8 | 1-101336-A-30.wav,1,30,door_wood_knock,False,101336,A 9 | 1-101404-A-34.wav,1,34,can_opening,False,101404,A 10 | 1-103298-A-9.wav,1,9,crow,False,103298,A 11 | 1-103995-A-30.wav,1,30,door_wood_knock,False,103995,A 12 | 1-103999-A-30.wav,1,30,door_wood_knock,False,103999,A 13 | 1-104089-A-22.wav,1,22,clapping,False,104089,A 14 | 1-104089-B-22.wav,1,22,clapping,False,104089,B 15 | 1-105224-A-22.wav,1,22,clapping,False,105224,A 16 | 1-110389-A-0.wav,1,0,dog,True,110389,A 17 | 1-110537-A-22.wav,1,22,clapping,False,110537,A 18 | 1-115521-A-19.wav,1,19,thunderstorm,False,115521,A 19 | 1-115545-A-48.wav,1,48,fireworks,False,115545,A 20 | 1-115545-B-48.wav,1,48,fireworks,False,115545,B 21 | 1-115545-C-48.wav,1,48,fireworks,False,115545,C 22 | 1-115546-A-48.wav,1,48,fireworks,False,115546,A 23 | 1-115920-A-22.wav,1,22,clapping,False,115920,A 24 | 1-115920-B-22.wav,1,22,clapping,False,115920,B 25 | 1-115921-A-22.wav,1,22,clapping,False,115921,A 26 | 1-116765-A-41.wav,1,41,chainsaw,True,116765,A 27 | 1-11687-A-47.wav,1,47,airplane,False,11687,A 28 | 1-118206-A-31.wav,1,31,mouse_click,False,118206,A 29 | 1-118559-A-17.wav,1,17,pouring_water,False,118559,A 30 | 1-119125-A-45.wav,1,45,train,False,119125,A 31 | 1-121951-A-8.wav,1,8,sheep,False,121951,A 32 | 1-12653-A-15.wav,1,15,water_drops,False,12653,A 33 | 1-12654-A-15.wav,1,15,water_drops,False,12654,A 34 | 1-12654-B-15.wav,1,15,water_drops,False,12654,B 35 | 1-13571-A-46.wav,1,46,church_bells,False,13571,A 36 | 1-13572-A-46.wav,1,46,church_bells,False,13572,A 37 | 1-13613-A-37.wav,1,37,clock_alarm,False,13613,A 38 | 1-137-A-32.wav,1,32,keyboard_typing,False,137,A 39 | 1-137296-A-16.wav,1,16,wind,False,137296,A 40 | 1-14262-A-37.wav,1,37,clock_alarm,False,14262,A 41 | 1-155858-A-25.wav,1,25,footsteps,False,155858,A 42 | 1-155858-B-25.wav,1,25,footsteps,False,155858,B 43 | 1-155858-C-25.wav,1,25,footsteps,False,155858,C 44 | 1-155858-D-25.wav,1,25,footsteps,False,155858,D 45 | 1-155858-E-25.wav,1,25,footsteps,False,155858,E 46 | 1-155858-F-25.wav,1,25,footsteps,False,155858,F 47 | 1-15689-A-4.wav,1,4,frog,False,15689,A 48 | 1-15689-B-4.wav,1,4,frog,False,15689,B 49 | 1-160563-A-48.wav,1,48,fireworks,False,160563,A 50 | 1-160563-B-48.wav,1,48,fireworks,False,160563,B 51 | 1-16568-A-3.wav,1,3,cow,False,16568,A 52 | 1-16746-A-15.wav,1,15,water_drops,False,16746,A 53 | 1-17092-A-27.wav,1,27,brushing_teeth,False,17092,A 54 | 1-17092-B-27.wav,1,27,brushing_teeth,False,17092,B 55 | 1-17124-A-43.wav,1,43,car_horn,False,17124,A 56 | 1-17150-A-12.wav,1,12,crackling_fire,True,17150,A 57 | 1-172649-A-40.wav,1,40,helicopter,True,172649,A 58 | 1-172649-B-40.wav,1,40,helicopter,True,172649,B 59 | 1-172649-C-40.wav,1,40,helicopter,True,172649,C 60 | 1-172649-D-40.wav,1,40,helicopter,True,172649,D 61 | 1-172649-E-40.wav,1,40,helicopter,True,172649,E 62 | 1-172649-F-40.wav,1,40,helicopter,True,172649,F 63 | 1-17295-A-29.wav,1,29,drinking_sipping,False,17295,A 64 | 1-17367-A-10.wav,1,10,rain,True,17367,A 65 | 1-17565-A-12.wav,1,12,crackling_fire,True,17565,A 66 | 1-17585-A-7.wav,1,7,insects,False,17585,A 67 | 1-17742-A-12.wav,1,12,crackling_fire,True,17742,A 68 | 1-17808-A-12.wav,1,12,crackling_fire,True,17808,A 69 | 1-17808-B-12.wav,1,12,crackling_fire,True,17808,B 70 | 1-1791-A-26.wav,1,26,laughing,False,1791,A 71 | 1-17970-A-4.wav,1,4,frog,False,17970,A 72 | 1-18074-A-6.wav,1,6,hen,False,18074,A 73 | 1-18074-B-6.wav,1,6,hen,False,18074,B 74 | 1-181071-A-40.wav,1,40,helicopter,True,181071,A 75 | 1-181071-B-40.wav,1,40,helicopter,True,181071,B 76 | 1-18527-A-44.wav,1,44,engine,False,18527,A 77 | 1-18527-B-44.wav,1,44,engine,False,18527,B 78 | 1-18631-A-23.wav,1,23,breathing,False,18631,A 79 | 1-18655-A-31.wav,1,31,mouse_click,False,18655,A 80 | 1-187207-A-20.wav,1,20,crying_baby,True,187207,A 81 | 1-18755-A-4.wav,1,4,frog,False,18755,A 82 | 1-18755-B-4.wav,1,4,frog,False,18755,B 83 | 1-18757-A-4.wav,1,4,frog,False,18757,A 84 | 1-18810-A-49.wav,1,49,hand_saw,False,18810,A 85 | 1-19026-A-43.wav,1,43,car_horn,False,19026,A 86 | 1-19111-A-24.wav,1,24,coughing,False,19111,A 87 | 1-19118-A-24.wav,1,24,coughing,False,19118,A 88 | 1-19501-A-7.wav,1,7,insects,False,19501,A 89 | 1-196660-A-8.wav,1,8,sheep,False,196660,A 90 | 1-196660-B-8.wav,1,8,sheep,False,196660,B 91 | 1-19840-A-36.wav,1,36,vacuum_cleaner,False,19840,A 92 | 1-19872-A-36.wav,1,36,vacuum_cleaner,False,19872,A 93 | 1-19872-B-36.wav,1,36,vacuum_cleaner,False,19872,B 94 | 1-19898-A-41.wav,1,41,chainsaw,True,19898,A 95 | 1-19898-B-41.wav,1,41,chainsaw,True,19898,B 96 | 1-19898-C-41.wav,1,41,chainsaw,True,19898,C 97 | 1-20133-A-39.wav,1,39,glass_breaking,False,20133,A 98 | 1-202111-A-3.wav,1,3,cow,False,202111,A 99 | 1-20545-A-28.wav,1,28,snoring,False,20545,A 100 | 1-20736-A-18.wav,1,18,toilet_flush,False,20736,A 101 | 1-208757-A-2.wav,1,2,pig,False,208757,A 102 | 1-208757-B-2.wav,1,2,pig,False,208757,B 103 | 1-208757-C-2.wav,1,2,pig,False,208757,C 104 | 1-208757-D-2.wav,1,2,pig,False,208757,D 105 | 1-208757-E-2.wav,1,2,pig,False,208757,E 106 | 1-211527-A-20.wav,1,20,crying_baby,True,211527,A 107 | 1-211527-B-20.wav,1,20,crying_baby,True,211527,B 108 | 1-211527-C-20.wav,1,20,crying_baby,True,211527,C 109 | 1-21189-A-10.wav,1,10,rain,True,21189,A 110 | 1-21421-A-46.wav,1,46,church_bells,False,21421,A 111 | 1-21896-A-35.wav,1,35,washing_machine,False,21896,A 112 | 1-21934-A-38.wav,1,38,clock_tick,True,21934,A 113 | 1-21935-A-38.wav,1,38,clock_tick,True,21935,A 114 | 1-223162-A-25.wav,1,25,footsteps,False,223162,A 115 | 1-22694-A-20.wav,1,20,crying_baby,True,22694,A 116 | 1-22694-B-20.wav,1,20,crying_baby,True,22694,B 117 | 1-22804-A-46.wav,1,46,church_bells,False,22804,A 118 | 1-22882-A-44.wav,1,44,engine,False,22882,A 119 | 1-23094-A-15.wav,1,15,water_drops,False,23094,A 120 | 1-23094-B-15.wav,1,15,water_drops,False,23094,B 121 | 1-23222-A-19.wav,1,19,thunderstorm,False,23222,A 122 | 1-23222-B-19.wav,1,19,thunderstorm,False,23222,B 123 | 1-23706-A-49.wav,1,49,hand_saw,False,23706,A 124 | 1-23996-A-35.wav,1,35,washing_machine,False,23996,A 125 | 1-23996-B-35.wav,1,35,washing_machine,False,23996,B 126 | 1-24074-A-43.wav,1,43,car_horn,False,24074,A 127 | 1-24076-A-43.wav,1,43,car_horn,False,24076,A 128 | 1-24524-A-19.wav,1,19,thunderstorm,False,24524,A 129 | 1-24524-B-19.wav,1,19,thunderstorm,False,24524,B 130 | 1-24524-C-19.wav,1,19,thunderstorm,False,24524,C 131 | 1-24796-A-47.wav,1,47,airplane,False,24796,A 132 | 1-254507-A-43.wav,1,43,car_horn,False,254507,A 133 | 1-25777-A-48.wav,1,48,fireworks,False,25777,A 134 | 1-25781-A-48.wav,1,48,fireworks,False,25781,A 135 | 1-260640-A-2.wav,1,2,pig,False,260640,A 136 | 1-260640-B-2.wav,1,2,pig,False,260640,B 137 | 1-260640-C-2.wav,1,2,pig,False,260640,C 138 | 1-26143-A-21.wav,1,21,sneezing,True,26143,A 139 | 1-26176-A-43.wav,1,43,car_horn,False,26176,A 140 | 1-26177-A-43.wav,1,43,car_horn,False,26177,A 141 | 1-26188-A-30.wav,1,30,door_wood_knock,False,26188,A 142 | 1-26222-A-10.wav,1,10,rain,True,26222,A 143 | 1-26806-A-1.wav,1,1,rooster,True,26806,A 144 | 1-27165-A-35.wav,1,35,washing_machine,False,27165,A 145 | 1-27166-A-35.wav,1,35,washing_machine,False,27166,A 146 | 1-27403-A-28.wav,1,28,snoring,False,27403,A 147 | 1-27405-A-28.wav,1,28,snoring,False,27405,A 148 | 1-27724-A-1.wav,1,1,rooster,True,27724,A 149 | 1-28005-A-18.wav,1,18,toilet_flush,False,28005,A 150 | 1-28135-A-11.wav,1,11,sea_waves,True,28135,A 151 | 1-28135-B-11.wav,1,11,sea_waves,True,28135,B 152 | 1-28808-A-43.wav,1,43,car_horn,False,28808,A 153 | 1-29532-A-16.wav,1,16,wind,False,29532,A 154 | 1-29561-A-10.wav,1,10,rain,True,29561,A 155 | 1-29680-A-21.wav,1,21,sneezing,True,29680,A 156 | 1-30039-A-26.wav,1,26,laughing,False,30039,A 157 | 1-30043-A-26.wav,1,26,laughing,False,30043,A 158 | 1-30214-A-18.wav,1,18,toilet_flush,False,30214,A 159 | 1-30226-A-0.wav,1,0,dog,True,30226,A 160 | 1-30344-A-0.wav,1,0,dog,True,30344,A 161 | 1-30709-A-23.wav,1,23,breathing,False,30709,A 162 | 1-30709-B-23.wav,1,23,breathing,False,30709,B 163 | 1-30709-C-23.wav,1,23,breathing,False,30709,C 164 | 1-30830-A-24.wav,1,24,coughing,False,30830,A 165 | 1-31251-A-6.wav,1,6,hen,False,31251,A 166 | 1-31251-B-6.wav,1,6,hen,False,31251,B 167 | 1-31482-A-42.wav,1,42,siren,False,31482,A 168 | 1-31482-B-42.wav,1,42,siren,False,31482,B 169 | 1-31748-A-21.wav,1,21,sneezing,True,31748,A 170 | 1-31836-A-4.wav,1,4,frog,False,31836,A 171 | 1-31836-B-4.wav,1,4,frog,False,31836,B 172 | 1-32318-A-0.wav,1,0,dog,True,32318,A 173 | 1-32373-A-35.wav,1,35,washing_machine,False,32373,A 174 | 1-32373-B-35.wav,1,35,washing_machine,False,32373,B 175 | 1-32579-A-29.wav,1,29,drinking_sipping,False,32579,A 176 | 1-33658-A-26.wav,1,26,laughing,False,33658,A 177 | 1-34094-A-5.wav,1,5,cat,False,34094,A 178 | 1-34094-B-5.wav,1,5,cat,False,34094,B 179 | 1-34119-A-1.wav,1,1,rooster,True,34119,A 180 | 1-34119-B-1.wav,1,1,rooster,True,34119,B 181 | 1-34495-A-14.wav,1,14,chirping_birds,False,34495,A 182 | 1-34497-A-14.wav,1,14,chirping_birds,False,34497,A 183 | 1-34853-A-37.wav,1,37,clock_alarm,False,34853,A 184 | 1-35687-A-38.wav,1,38,clock_tick,True,35687,A 185 | 1-36164-A-26.wav,1,26,laughing,False,36164,A 186 | 1-36164-B-26.wav,1,26,laughing,False,36164,B 187 | 1-36393-A-23.wav,1,23,breathing,False,36393,A 188 | 1-36397-A-23.wav,1,23,breathing,False,36397,A 189 | 1-36400-A-23.wav,1,23,breathing,False,36400,A 190 | 1-36402-A-23.wav,1,23,breathing,False,36402,A 191 | 1-36929-A-47.wav,1,47,airplane,False,36929,A 192 | 1-37226-A-29.wav,1,29,drinking_sipping,False,37226,A 193 | 1-38559-A-14.wav,1,14,chirping_birds,False,38559,A 194 | 1-38560-A-14.wav,1,14,chirping_birds,False,38560,A 195 | 1-39835-A-9.wav,1,9,crow,False,39835,A 196 | 1-39835-B-9.wav,1,9,crow,False,39835,B 197 | 1-39901-A-11.wav,1,11,sea_waves,True,39901,A 198 | 1-39901-B-11.wav,1,11,sea_waves,True,39901,B 199 | 1-39923-A-1.wav,1,1,rooster,True,39923,A 200 | 1-39937-A-28.wav,1,28,snoring,False,39937,A 201 | 1-40154-A-46.wav,1,46,church_bells,False,40154,A 202 | 1-40621-A-28.wav,1,28,snoring,False,40621,A 203 | 1-40730-A-1.wav,1,1,rooster,True,40730,A 204 | 1-40967-A-28.wav,1,28,snoring,False,40967,A 205 | 1-41615-A-34.wav,1,34,can_opening,False,41615,A 206 | 1-4211-A-12.wav,1,12,crackling_fire,True,4211,A 207 | 1-42139-A-38.wav,1,38,clock_tick,True,42139,A 208 | 1-43382-A-1.wav,1,1,rooster,True,43382,A 209 | 1-43760-A-11.wav,1,11,sea_waves,True,43760,A 210 | 1-43764-A-34.wav,1,34,can_opening,False,43764,A 211 | 1-43807-A-47.wav,1,47,airplane,False,43807,A 212 | 1-43807-B-47.wav,1,47,airplane,False,43807,B 213 | 1-43807-C-47.wav,1,47,airplane,False,43807,C 214 | 1-43807-D-47.wav,1,47,airplane,False,43807,D 215 | 1-44831-A-1.wav,1,1,rooster,True,44831,A 216 | 1-45641-A-27.wav,1,27,brushing_teeth,False,45641,A 217 | 1-45645-A-31.wav,1,31,mouse_click,False,45645,A 218 | 1-46040-A-14.wav,1,14,chirping_birds,False,46040,A 219 | 1-46272-A-12.wav,1,12,crackling_fire,True,46272,A 220 | 1-46274-A-18.wav,1,18,toilet_flush,False,46274,A 221 | 1-46353-A-49.wav,1,49,hand_saw,False,46353,A 222 | 1-46744-A-36.wav,1,36,vacuum_cleaner,False,46744,A 223 | 1-46938-A-7.wav,1,7,insects,False,46938,A 224 | 1-46938-B-7.wav,1,7,insects,False,46938,B 225 | 1-47250-A-41.wav,1,41,chainsaw,True,47250,A 226 | 1-47250-B-41.wav,1,41,chainsaw,True,47250,B 227 | 1-47273-A-21.wav,1,21,sneezing,True,47273,A 228 | 1-47274-A-21.wav,1,21,sneezing,True,47274,A 229 | 1-47709-A-16.wav,1,16,wind,False,47709,A 230 | 1-47714-A-16.wav,1,16,wind,False,47714,A 231 | 1-47819-A-5.wav,1,5,cat,False,47819,A 232 | 1-47819-B-5.wav,1,5,cat,False,47819,B 233 | 1-47819-C-5.wav,1,5,cat,False,47819,C 234 | 1-47923-A-28.wav,1,28,snoring,False,47923,A 235 | 1-48298-A-46.wav,1,46,church_bells,False,48298,A 236 | 1-48413-A-38.wav,1,38,clock_tick,True,48413,A 237 | 1-49098-A-35.wav,1,35,washing_machine,False,49098,A 238 | 1-49409-A-8.wav,1,8,sheep,False,49409,A 239 | 1-49409-B-8.wav,1,8,sheep,False,49409,B 240 | 1-50060-A-10.wav,1,10,rain,True,50060,A 241 | 1-50454-A-44.wav,1,44,engine,False,50454,A 242 | 1-50455-A-44.wav,1,44,engine,False,50455,A 243 | 1-50623-A-15.wav,1,15,water_drops,False,50623,A 244 | 1-50625-A-17.wav,1,17,pouring_water,False,50625,A 245 | 1-50661-A-44.wav,1,44,engine,False,50661,A 246 | 1-50688-A-17.wav,1,17,pouring_water,False,50688,A 247 | 1-51035-A-16.wav,1,16,wind,False,51035,A 248 | 1-51036-A-16.wav,1,16,wind,False,51036,A 249 | 1-51037-A-16.wav,1,16,wind,False,51037,A 250 | 1-51147-A-25.wav,1,25,footsteps,False,51147,A 251 | 1-51170-A-18.wav,1,18,toilet_flush,False,51170,A 252 | 1-51433-A-17.wav,1,17,pouring_water,False,51433,A 253 | 1-51436-A-17.wav,1,17,pouring_water,False,51436,A 254 | 1-51805-A-33.wav,1,33,door_wood_creaks,False,51805,A 255 | 1-51805-B-33.wav,1,33,door_wood_creaks,False,51805,B 256 | 1-51805-C-33.wav,1,33,door_wood_creaks,False,51805,C 257 | 1-51805-D-33.wav,1,33,door_wood_creaks,False,51805,D 258 | 1-51805-E-33.wav,1,33,door_wood_creaks,False,51805,E 259 | 1-51805-F-33.wav,1,33,door_wood_creaks,False,51805,F 260 | 1-51805-G-33.wav,1,33,door_wood_creaks,False,51805,G 261 | 1-51805-H-33.wav,1,33,door_wood_creaks,False,51805,H 262 | 1-52266-A-24.wav,1,24,coughing,False,52266,A 263 | 1-52290-A-30.wav,1,30,door_wood_knock,False,52290,A 264 | 1-52323-A-24.wav,1,24,coughing,False,52323,A 265 | 1-53444-A-28.wav,1,28,snoring,False,53444,A 266 | 1-53467-A-47.wav,1,47,airplane,False,53467,A 267 | 1-53501-A-32.wav,1,32,keyboard_typing,False,53501,A 268 | 1-53663-A-24.wav,1,24,coughing,False,53663,A 269 | 1-53670-A-18.wav,1,18,toilet_flush,False,53670,A 270 | 1-54065-A-45.wav,1,45,train,False,54065,A 271 | 1-54065-B-45.wav,1,45,train,False,54065,B 272 | 1-54084-A-42.wav,1,42,siren,False,54084,A 273 | 1-54505-A-21.wav,1,21,sneezing,True,54505,A 274 | 1-54747-A-46.wav,1,46,church_bells,False,54747,A 275 | 1-54752-A-18.wav,1,18,toilet_flush,False,54752,A 276 | 1-54918-A-14.wav,1,14,chirping_birds,False,54918,A 277 | 1-54918-B-14.wav,1,14,chirping_birds,False,54918,B 278 | 1-54958-A-10.wav,1,10,rain,True,54958,A 279 | 1-56233-A-9.wav,1,9,crow,False,56233,A 280 | 1-56234-A-9.wav,1,9,crow,False,56234,A 281 | 1-56269-A-18.wav,1,18,toilet_flush,False,56269,A 282 | 1-56270-A-29.wav,1,29,drinking_sipping,False,56270,A 283 | 1-56311-A-10.wav,1,10,rain,True,56311,A 284 | 1-56380-A-5.wav,1,5,cat,False,56380,A 285 | 1-56380-B-5.wav,1,5,cat,False,56380,B 286 | 1-56907-A-46.wav,1,46,church_bells,False,56907,A 287 | 1-57163-A-38.wav,1,38,clock_tick,True,57163,A 288 | 1-57316-A-13.wav,1,13,crickets,False,57316,A 289 | 1-57318-A-13.wav,1,13,crickets,False,57318,A 290 | 1-57795-A-8.wav,1,8,sheep,False,57795,A 291 | 1-58277-A-3.wav,1,3,cow,False,58277,A 292 | 1-58792-A-24.wav,1,24,coughing,False,58792,A 293 | 1-58846-A-34.wav,1,34,can_opening,False,58846,A 294 | 1-58923-A-27.wav,1,27,brushing_teeth,False,58923,A 295 | 1-58923-B-27.wav,1,27,brushing_teeth,False,58923,B 296 | 1-59102-A-13.wav,1,13,crickets,False,59102,A 297 | 1-59324-A-21.wav,1,21,sneezing,True,59324,A 298 | 1-59513-A-0.wav,1,0,dog,True,59513,A 299 | 1-5996-A-6.wav,1,6,hen,False,5996,A 300 | 1-60460-A-36.wav,1,36,vacuum_cleaner,False,60460,A 301 | 1-60676-A-34.wav,1,34,can_opening,False,60676,A 302 | 1-60997-A-20.wav,1,20,crying_baby,True,60997,A 303 | 1-60997-B-20.wav,1,20,crying_baby,True,60997,B 304 | 1-61212-A-32.wav,1,32,keyboard_typing,False,61212,A 305 | 1-61221-A-17.wav,1,17,pouring_water,False,61221,A 306 | 1-61252-A-11.wav,1,11,sea_waves,True,61252,A 307 | 1-61261-A-44.wav,1,44,engine,False,61261,A 308 | 1-61534-A-27.wav,1,27,brushing_teeth,False,61534,A 309 | 1-62509-A-45.wav,1,45,train,False,62509,A 310 | 1-62565-A-44.wav,1,44,engine,False,62565,A 311 | 1-62594-A-32.wav,1,32,keyboard_typing,False,62594,A 312 | 1-62849-A-38.wav,1,38,clock_tick,True,62849,A 313 | 1-62850-A-38.wav,1,38,clock_tick,True,62850,A 314 | 1-63679-A-24.wav,1,24,coughing,False,63679,A 315 | 1-63871-A-10.wav,1,10,rain,True,63871,A 316 | 1-64398-A-41.wav,1,41,chainsaw,True,64398,A 317 | 1-64398-B-41.wav,1,41,chainsaw,True,64398,B 318 | 1-64473-A-45.wav,1,45,train,False,64473,A 319 | 1-65483-A-13.wav,1,13,crickets,False,65483,A 320 | 1-67033-A-37.wav,1,37,clock_alarm,False,67033,A 321 | 1-67152-A-17.wav,1,17,pouring_water,False,67152,A 322 | 1-67230-A-29.wav,1,29,drinking_sipping,False,67230,A 323 | 1-67432-A-27.wav,1,27,brushing_teeth,False,67432,A 324 | 1-68628-A-27.wav,1,27,brushing_teeth,False,68628,A 325 | 1-68670-A-34.wav,1,34,can_opening,False,68670,A 326 | 1-68734-A-34.wav,1,34,can_opening,False,68734,A 327 | 1-69165-A-34.wav,1,34,can_opening,False,69165,A 328 | 1-69422-A-3.wav,1,3,cow,False,69422,A 329 | 1-69641-A-3.wav,1,3,cow,False,69641,A 330 | 1-69760-A-16.wav,1,16,wind,False,69760,A 331 | 1-70300-A-45.wav,1,45,train,False,70300,A 332 | 1-7057-A-12.wav,1,12,crackling_fire,True,7057,A 333 | 1-71030-A-6.wav,1,6,hen,False,71030,A 334 | 1-72195-A-37.wav,1,37,clock_alarm,False,72195,A 335 | 1-72195-B-37.wav,1,37,clock_alarm,False,72195,B 336 | 1-72229-A-6.wav,1,6,hen,False,72229,A 337 | 1-72229-B-6.wav,1,6,hen,False,72229,B 338 | 1-72695-A-26.wav,1,26,laughing,False,72695,A 339 | 1-73123-A-26.wav,1,26,laughing,False,73123,A 340 | 1-73585-A-7.wav,1,7,insects,False,73585,A 341 | 1-74517-A-37.wav,1,37,clock_alarm,False,74517,A 342 | 1-7456-A-13.wav,1,13,crickets,False,7456,A 343 | 1-75162-A-9.wav,1,9,crow,False,75162,A 344 | 1-75189-A-7.wav,1,7,insects,False,75189,A 345 | 1-75190-A-8.wav,1,8,sheep,False,75190,A 346 | 1-76831-A-42.wav,1,42,siren,False,76831,A 347 | 1-76831-B-42.wav,1,42,siren,False,76831,B 348 | 1-76831-C-42.wav,1,42,siren,False,76831,C 349 | 1-76831-D-42.wav,1,42,siren,False,76831,D 350 | 1-76831-E-42.wav,1,42,siren,False,76831,E 351 | 1-77160-A-3.wav,1,3,cow,False,77160,A 352 | 1-77241-A-3.wav,1,3,cow,False,77241,A 353 | 1-79113-A-5.wav,1,5,cat,False,79113,A 354 | 1-79146-A-29.wav,1,29,drinking_sipping,False,79146,A 355 | 1-79220-A-17.wav,1,17,pouring_water,False,79220,A 356 | 1-79236-A-36.wav,1,36,vacuum_cleaner,False,79236,A 357 | 1-79711-A-32.wav,1,32,keyboard_typing,False,79711,A 358 | 1-7973-A-7.wav,1,7,insects,False,7973,A 359 | 1-7974-A-49.wav,1,49,hand_saw,False,7974,A 360 | 1-7974-B-49.wav,1,49,hand_saw,False,7974,B 361 | 1-80785-A-7.wav,1,7,insects,False,80785,A 362 | 1-80840-A-13.wav,1,13,crickets,False,80840,A 363 | 1-80841-A-13.wav,1,13,crickets,False,80841,A 364 | 1-81001-A-30.wav,1,30,door_wood_knock,False,81001,A 365 | 1-81001-B-30.wav,1,30,door_wood_knock,False,81001,B 366 | 1-81269-A-3.wav,1,3,cow,False,81269,A 367 | 1-81851-A-31.wav,1,31,mouse_click,False,81851,A 368 | 1-81883-A-21.wav,1,21,sneezing,True,81883,A 369 | 1-82817-A-30.wav,1,30,door_wood_knock,False,82817,A 370 | 1-84393-A-32.wav,1,32,keyboard_typing,False,84393,A 371 | 1-84536-A-39.wav,1,39,glass_breaking,False,84536,A 372 | 1-84704-A-39.wav,1,39,glass_breaking,False,84704,A 373 | 1-84705-A-39.wav,1,39,glass_breaking,False,84705,A 374 | 1-85123-A-31.wav,1,31,mouse_click,False,85123,A 375 | 1-85168-A-39.wav,1,39,glass_breaking,False,85168,A 376 | 1-85184-A-39.wav,1,39,glass_breaking,False,85184,A 377 | 1-85362-A-0.wav,1,0,dog,True,85362,A 378 | 1-85909-A-29.wav,1,29,drinking_sipping,False,85909,A 379 | 1-87565-A-29.wav,1,29,drinking_sipping,False,87565,A 380 | 1-88409-A-45.wav,1,45,train,False,88409,A 381 | 1-88409-B-45.wav,1,45,train,False,88409,B 382 | 1-88574-A-8.wav,1,8,sheep,False,88574,A 383 | 1-88807-A-39.wav,1,39,glass_breaking,False,88807,A 384 | 1-90797-A-15.wav,1,15,water_drops,False,90797,A 385 | 1-91359-A-11.wav,1,11,sea_waves,True,91359,A 386 | 1-91359-B-11.wav,1,11,sea_waves,True,91359,B 387 | 1-94036-A-22.wav,1,22,clapping,False,94036,A 388 | 1-94231-A-32.wav,1,32,keyboard_typing,False,94231,A 389 | 1-94231-B-32.wav,1,32,keyboard_typing,False,94231,B 390 | 1-95563-A-31.wav,1,31,mouse_click,False,95563,A 391 | 1-96890-A-37.wav,1,37,clock_alarm,False,96890,A 392 | 1-96950-A-9.wav,1,9,crow,False,96950,A 393 | 1-96950-B-9.wav,1,9,crow,False,96950,B 394 | 1-97392-A-0.wav,1,0,dog,True,97392,A 395 | 1-977-A-39.wav,1,39,glass_breaking,False,977,A 396 | 1-97793-A-31.wav,1,31,mouse_click,False,97793,A 397 | 1-9841-A-13.wav,1,13,crickets,False,9841,A 398 | 1-9886-A-49.wav,1,49,hand_saw,False,9886,A 399 | 1-9887-A-49.wav,1,49,hand_saw,False,9887,A 400 | 1-9887-B-49.wav,1,49,hand_saw,False,9887,B 401 | 1-99958-A-31.wav,1,31,mouse_click,False,99958,A 402 | -------------------------------------------------------------------------------- /util_esc50/fold2_test.csv: -------------------------------------------------------------------------------- 1 | filename,fold,target,category,esc10,src_file,take 2 | 2-100648-A-43.wav,2,43,car_horn,False,100648,A 3 | 2-100786-A-1.wav,2,1,rooster,True,100786,A 4 | 2-101676-A-10.wav,2,10,rain,True,101676,A 5 | 2-102414-A-17.wav,2,17,pouring_water,False,102414,A 6 | 2-102414-B-17.wav,2,17,pouring_water,False,102414,B 7 | 2-102414-C-17.wav,2,17,pouring_water,False,102414,C 8 | 2-102414-D-17.wav,2,17,pouring_water,False,102414,D 9 | 2-102414-E-17.wav,2,17,pouring_water,False,102414,E 10 | 2-102414-F-17.wav,2,17,pouring_water,False,102414,F 11 | 2-102414-G-17.wav,2,17,pouring_water,False,102414,G 12 | 2-102435-A-37.wav,2,37,clock_alarm,False,102435,A 13 | 2-102567-A-35.wav,2,35,washing_machine,False,102567,A 14 | 2-102567-B-35.wav,2,35,washing_machine,False,102567,B 15 | 2-102567-C-35.wav,2,35,washing_machine,False,102567,C 16 | 2-102567-D-35.wav,2,35,washing_machine,False,102567,D 17 | 2-102568-A-35.wav,2,35,washing_machine,False,102568,A 18 | 2-102581-A-29.wav,2,29,drinking_sipping,False,102581,A 19 | 2-102581-B-29.wav,2,29,drinking_sipping,False,102581,B 20 | 2-102852-A-11.wav,2,11,sea_waves,True,102852,A 21 | 2-103423-A-3.wav,2,3,cow,False,103423,A 22 | 2-103424-A-3.wav,2,3,cow,False,103424,A 23 | 2-103426-A-3.wav,2,3,cow,False,103426,A 24 | 2-103427-A-3.wav,2,3,cow,False,103427,A 25 | 2-103428-A-3.wav,2,3,cow,False,103428,A 26 | 2-104105-A-19.wav,2,19,thunderstorm,False,104105,A 27 | 2-104105-B-19.wav,2,19,thunderstorm,False,104105,B 28 | 2-104168-A-32.wav,2,32,keyboard_typing,False,104168,A 29 | 2-104475-A-37.wav,2,37,clock_alarm,False,104475,A 30 | 2-104476-A-37.wav,2,37,clock_alarm,False,104476,A 31 | 2-104877-A-3.wav,2,3,cow,False,104877,A 32 | 2-104952-A-16.wav,2,16,wind,False,104952,A 33 | 2-104952-B-16.wav,2,16,wind,False,104952,B 34 | 2-105270-A-47.wav,2,47,airplane,False,105270,A 35 | 2-106014-A-44.wav,2,44,engine,False,106014,A 36 | 2-106015-A-44.wav,2,44,engine,False,106015,A 37 | 2-106015-B-44.wav,2,44,engine,False,106015,B 38 | 2-106019-A-13.wav,2,13,crickets,False,106019,A 39 | 2-106072-A-36.wav,2,36,vacuum_cleaner,False,106072,A 40 | 2-106073-A-36.wav,2,36,vacuum_cleaner,False,106073,A 41 | 2-106486-A-44.wav,2,44,engine,False,106486,A 42 | 2-106487-A-44.wav,2,44,engine,False,106487,A 43 | 2-106849-A-47.wav,2,47,airplane,False,106849,A 44 | 2-106881-A-39.wav,2,39,glass_breaking,False,106881,A 45 | 2-107228-A-44.wav,2,44,engine,False,107228,A 46 | 2-107228-B-44.wav,2,44,engine,False,107228,B 47 | 2-107351-A-20.wav,2,20,crying_baby,True,107351,A 48 | 2-107351-B-20.wav,2,20,crying_baby,True,107351,B 49 | 2-108017-A-24.wav,2,24,coughing,False,108017,A 50 | 2-108760-A-14.wav,2,14,chirping_birds,False,108760,A 51 | 2-108760-B-14.wav,2,14,chirping_birds,False,108760,B 52 | 2-108761-A-14.wav,2,14,chirping_birds,False,108761,A 53 | 2-108763-A-9.wav,2,9,crow,False,108763,A 54 | 2-108766-A-9.wav,2,9,crow,False,108766,A 55 | 2-108767-A-9.wav,2,9,crow,False,108767,A 56 | 2-108767-B-9.wav,2,9,crow,False,108767,B 57 | 2-108767-C-9.wav,2,9,crow,False,108767,C 58 | 2-109231-A-9.wav,2,9,crow,False,109231,A 59 | 2-109231-B-9.wav,2,9,crow,False,109231,B 60 | 2-109231-C-9.wav,2,9,crow,False,109231,C 61 | 2-109316-A-32.wav,2,32,keyboard_typing,False,109316,A 62 | 2-109371-A-16.wav,2,16,wind,False,109371,A 63 | 2-109371-B-16.wav,2,16,wind,False,109371,B 64 | 2-109371-C-16.wav,2,16,wind,False,109371,C 65 | 2-109371-D-16.wav,2,16,wind,False,109371,D 66 | 2-109374-A-16.wav,2,16,wind,False,109374,A 67 | 2-109505-A-21.wav,2,21,sneezing,True,109505,A 68 | 2-109759-A-26.wav,2,26,laughing,False,109759,A 69 | 2-109759-B-26.wav,2,26,laughing,False,109759,B 70 | 2-110010-A-5.wav,2,5,cat,False,110010,A 71 | 2-110011-A-5.wav,2,5,cat,False,110011,A 72 | 2-110417-A-28.wav,2,28,snoring,False,110417,A 73 | 2-110417-B-28.wav,2,28,snoring,False,110417,B 74 | 2-110613-A-13.wav,2,13,crickets,False,110613,A 75 | 2-110614-A-8.wav,2,8,sheep,False,110614,A 76 | 2-110614-B-8.wav,2,8,sheep,False,110614,B 77 | 2-112213-A-39.wav,2,39,glass_breaking,False,112213,A 78 | 2-112213-B-39.wav,2,39,glass_breaking,False,112213,B 79 | 2-114254-A-30.wav,2,30,door_wood_knock,False,114254,A 80 | 2-114280-A-0.wav,2,0,dog,True,114280,A 81 | 2-114587-A-0.wav,2,0,dog,True,114587,A 82 | 2-114609-A-28.wav,2,28,snoring,False,114609,A 83 | 2-114609-B-28.wav,2,28,snoring,False,114609,B 84 | 2-116400-A-0.wav,2,0,dog,True,116400,A 85 | 2-117116-A-37.wav,2,37,clock_alarm,False,117116,A 86 | 2-117271-A-0.wav,2,0,dog,True,117271,A 87 | 2-117330-A-28.wav,2,28,snoring,False,117330,A 88 | 2-117615-A-48.wav,2,48,fireworks,False,117615,A 89 | 2-117615-B-48.wav,2,48,fireworks,False,117615,B 90 | 2-117615-C-48.wav,2,48,fireworks,False,117615,C 91 | 2-117615-D-48.wav,2,48,fireworks,False,117615,D 92 | 2-117615-E-48.wav,2,48,fireworks,False,117615,E 93 | 2-117616-A-48.wav,2,48,fireworks,False,117616,A 94 | 2-117617-A-48.wav,2,48,fireworks,False,117617,A 95 | 2-117625-A-10.wav,2,10,rain,True,117625,A 96 | 2-117795-A-3.wav,2,3,cow,False,117795,A 97 | 2-117795-B-3.wav,2,3,cow,False,117795,B 98 | 2-118072-A-0.wav,2,0,dog,True,118072,A 99 | 2-118104-A-21.wav,2,21,sneezing,True,118104,A 100 | 2-118459-A-32.wav,2,32,keyboard_typing,False,118459,A 101 | 2-118459-B-32.wav,2,32,keyboard_typing,False,118459,B 102 | 2-118624-A-30.wav,2,30,door_wood_knock,False,118624,A 103 | 2-118625-A-30.wav,2,30,door_wood_knock,False,118625,A 104 | 2-118817-A-32.wav,2,32,keyboard_typing,False,118817,A 105 | 2-118964-A-0.wav,2,0,dog,True,118964,A 106 | 2-119102-A-21.wav,2,21,sneezing,True,119102,A 107 | 2-119139-A-31.wav,2,31,mouse_click,False,119139,A 108 | 2-119161-A-8.wav,2,8,sheep,False,119161,A 109 | 2-119161-B-8.wav,2,8,sheep,False,119161,B 110 | 2-119161-C-8.wav,2,8,sheep,False,119161,C 111 | 2-119748-A-38.wav,2,38,clock_tick,True,119748,A 112 | 2-120218-A-30.wav,2,30,door_wood_knock,False,120218,A 113 | 2-120218-B-30.wav,2,30,door_wood_knock,False,120218,B 114 | 2-120333-A-32.wav,2,32,keyboard_typing,False,120333,A 115 | 2-120586-A-6.wav,2,6,hen,False,120586,A 116 | 2-120587-A-6.wav,2,6,hen,False,120587,A 117 | 2-121909-A-35.wav,2,35,washing_machine,False,121909,A 118 | 2-121978-A-29.wav,2,29,drinking_sipping,False,121978,A 119 | 2-122066-A-45.wav,2,45,train,False,122066,A 120 | 2-122066-B-45.wav,2,45,train,False,122066,B 121 | 2-122067-A-45.wav,2,45,train,False,122067,A 122 | 2-122067-B-45.wav,2,45,train,False,122067,B 123 | 2-122104-A-0.wav,2,0,dog,True,122104,A 124 | 2-122104-B-0.wav,2,0,dog,True,122104,B 125 | 2-122616-A-14.wav,2,14,chirping_birds,False,122616,A 126 | 2-122763-A-29.wav,2,29,drinking_sipping,False,122763,A 127 | 2-122763-B-29.wav,2,29,drinking_sipping,False,122763,B 128 | 2-122820-A-36.wav,2,36,vacuum_cleaner,False,122820,A 129 | 2-122820-B-36.wav,2,36,vacuum_cleaner,False,122820,B 130 | 2-123712-A-33.wav,2,33,door_wood_creaks,False,123712,A 131 | 2-123896-A-24.wav,2,24,coughing,False,123896,A 132 | 2-124564-A-15.wav,2,15,water_drops,False,124564,A 133 | 2-124662-A-11.wav,2,11,sea_waves,True,124662,A 134 | 2-125520-A-43.wav,2,43,car_horn,False,125520,A 135 | 2-125821-A-13.wav,2,13,crickets,False,125821,A 136 | 2-125875-A-13.wav,2,13,crickets,False,125875,A 137 | 2-125966-A-11.wav,2,11,sea_waves,True,125966,A 138 | 2-126433-A-17.wav,2,17,pouring_water,False,126433,A 139 | 2-126756-A-29.wav,2,29,drinking_sipping,False,126756,A 140 | 2-127108-A-38.wav,2,38,clock_tick,True,127108,A 141 | 2-127109-A-6.wav,2,6,hen,False,127109,A 142 | 2-128465-A-43.wav,2,43,car_horn,False,128465,A 143 | 2-128465-B-43.wav,2,43,car_horn,False,128465,B 144 | 2-128631-A-21.wav,2,21,sneezing,True,128631,A 145 | 2-130245-A-34.wav,2,34,can_opening,False,130245,A 146 | 2-130978-A-21.wav,2,21,sneezing,True,130978,A 147 | 2-130979-A-21.wav,2,21,sneezing,True,130979,A 148 | 2-131943-A-38.wav,2,38,clock_tick,True,131943,A 149 | 2-132157-A-11.wav,2,11,sea_waves,True,132157,A 150 | 2-132157-B-11.wav,2,11,sea_waves,True,132157,B 151 | 2-133863-A-11.wav,2,11,sea_waves,True,133863,A 152 | 2-133889-A-30.wav,2,30,door_wood_knock,False,133889,A 153 | 2-134049-A-6.wav,2,6,hen,False,134049,A 154 | 2-134700-A-38.wav,2,38,clock_tick,True,134700,A 155 | 2-134915-A-30.wav,2,30,door_wood_knock,False,134915,A 156 | 2-135649-A-45.wav,2,45,train,False,135649,A 157 | 2-135649-B-45.wav,2,45,train,False,135649,B 158 | 2-135649-C-45.wav,2,45,train,False,135649,C 159 | 2-135728-A-38.wav,2,38,clock_tick,True,135728,A 160 | 2-135860-A-49.wav,2,49,hand_saw,False,135860,A 161 | 2-137162-A-11.wav,2,11,sea_waves,True,137162,A 162 | 2-138257-A-31.wav,2,31,mouse_click,False,138257,A 163 | 2-138465-A-43.wav,2,43,car_horn,False,138465,A 164 | 2-139748-A-15.wav,2,15,water_drops,False,139748,A 165 | 2-139748-B-15.wav,2,15,water_drops,False,139748,B 166 | 2-139749-A-15.wav,2,15,water_drops,False,139749,A 167 | 2-140147-A-38.wav,2,38,clock_tick,True,140147,A 168 | 2-140841-A-30.wav,2,30,door_wood_knock,False,140841,A 169 | 2-141563-A-39.wav,2,39,glass_breaking,False,141563,A 170 | 2-141584-A-38.wav,2,38,clock_tick,True,141584,A 171 | 2-141681-A-36.wav,2,36,vacuum_cleaner,False,141681,A 172 | 2-141681-B-36.wav,2,36,vacuum_cleaner,False,141681,B 173 | 2-141682-A-36.wav,2,36,vacuum_cleaner,False,141682,A 174 | 2-141682-B-36.wav,2,36,vacuum_cleaner,False,141682,B 175 | 2-144031-A-34.wav,2,34,can_opening,False,144031,A 176 | 2-144137-A-43.wav,2,43,car_horn,False,144137,A 177 | 2-146877-A-31.wav,2,31,mouse_click,False,146877,A 178 | 2-146877-B-31.wav,2,31,mouse_click,False,146877,B 179 | 2-151079-A-20.wav,2,20,crying_baby,True,151079,A 180 | 2-152895-A-31.wav,2,31,mouse_click,False,152895,A 181 | 2-152964-A-31.wav,2,31,mouse_click,False,152964,A 182 | 2-153388-A-31.wav,2,31,mouse_click,False,153388,A 183 | 2-154688-A-31.wav,2,31,mouse_click,False,154688,A 184 | 2-155801-A-11.wav,2,11,sea_waves,True,155801,A 185 | 2-157488-A-6.wav,2,6,hen,False,157488,A 186 | 2-158746-A-2.wav,2,2,pig,False,158746,A 187 | 2-158746-B-2.wav,2,2,pig,False,158746,B 188 | 2-158746-C-2.wav,2,2,pig,False,158746,C 189 | 2-158746-D-2.wav,2,2,pig,False,158746,D 190 | 2-160128-A-7.wav,2,7,insects,False,160128,A 191 | 2-160888-A-47.wav,2,47,airplane,False,160888,A 192 | 2-165801-A-48.wav,2,48,fireworks,False,165801,A 193 | 2-166644-A-2.wav,2,2,pig,False,166644,A 194 | 2-166644-B-2.wav,2,2,pig,False,166644,B 195 | 2-166644-C-2.wav,2,2,pig,False,166644,C 196 | 2-173559-A-39.wav,2,39,glass_breaking,False,173559,A 197 | 2-173607-A-39.wav,2,39,glass_breaking,False,173607,A 198 | 2-173618-A-39.wav,2,39,glass_breaking,False,173618,A 199 | 2-182508-A-8.wav,2,8,sheep,False,182508,A 200 | 2-182508-B-8.wav,2,8,sheep,False,182508,B 201 | 2-184077-A-49.wav,2,49,hand_saw,False,184077,A 202 | 2-18766-A-12.wav,2,12,crackling_fire,True,18766,A 203 | 2-18766-B-12.wav,2,12,crackling_fire,True,18766,B 204 | 2-188822-A-40.wav,2,40,helicopter,True,188822,A 205 | 2-188822-B-40.wav,2,40,helicopter,True,188822,B 206 | 2-188822-C-40.wav,2,40,helicopter,True,188822,C 207 | 2-188822-D-40.wav,2,40,helicopter,True,188822,D 208 | 2-196688-A-8.wav,2,8,sheep,False,196688,A 209 | 2-205966-A-16.wav,2,16,wind,False,205966,A 210 | 2-209471-A-25.wav,2,25,footsteps,False,209471,A 211 | 2-209472-A-25.wav,2,25,footsteps,False,209472,A 212 | 2-209473-A-25.wav,2,25,footsteps,False,209473,A 213 | 2-209474-A-25.wav,2,25,footsteps,False,209474,A 214 | 2-209475-A-25.wav,2,25,footsteps,False,209475,A 215 | 2-209476-A-25.wav,2,25,footsteps,False,209476,A 216 | 2-209477-A-25.wav,2,25,footsteps,False,209477,A 217 | 2-209478-A-25.wav,2,25,footsteps,False,209478,A 218 | 2-250710-A-39.wav,2,39,glass_breaking,False,250710,A 219 | 2-25292-A-22.wav,2,22,clapping,False,25292,A 220 | 2-25293-A-22.wav,2,22,clapping,False,25293,A 221 | 2-262579-A-45.wav,2,45,train,False,262579,A 222 | 2-28314-A-12.wav,2,12,crackling_fire,True,28314,A 223 | 2-28314-B-12.wav,2,12,crackling_fire,True,28314,B 224 | 2-30322-A-12.wav,2,12,crackling_fire,True,30322,A 225 | 2-30322-B-12.wav,2,12,crackling_fire,True,30322,B 226 | 2-32515-A-4.wav,2,4,frog,False,32515,A 227 | 2-32515-B-4.wav,2,4,frog,False,32515,B 228 | 2-32515-C-4.wav,2,4,frog,False,32515,C 229 | 2-32515-D-4.wav,2,4,frog,False,32515,D 230 | 2-32834-A-4.wav,2,4,frog,False,32834,A 231 | 2-37806-A-40.wav,2,40,helicopter,True,37806,A 232 | 2-37806-B-40.wav,2,40,helicopter,True,37806,B 233 | 2-37806-C-40.wav,2,40,helicopter,True,37806,C 234 | 2-37806-D-40.wav,2,40,helicopter,True,37806,D 235 | 2-37870-A-2.wav,2,2,pig,False,37870,A 236 | 2-39441-A-19.wav,2,19,thunderstorm,False,39441,A 237 | 2-39443-A-19.wav,2,19,thunderstorm,False,39443,A 238 | 2-39443-B-19.wav,2,19,thunderstorm,False,39443,B 239 | 2-39945-A-19.wav,2,19,thunderstorm,False,39945,A 240 | 2-39945-B-19.wav,2,19,thunderstorm,False,39945,B 241 | 2-39945-C-19.wav,2,19,thunderstorm,False,39945,C 242 | 2-42101-A-43.wav,2,43,car_horn,False,42101,A 243 | 2-43802-A-42.wav,2,42,siren,False,43802,A 244 | 2-43806-A-42.wav,2,42,siren,False,43806,A 245 | 2-50665-A-20.wav,2,20,crying_baby,True,50665,A 246 | 2-50666-A-20.wav,2,20,crying_baby,True,50666,A 247 | 2-50667-A-41.wav,2,41,chainsaw,True,50667,A 248 | 2-50667-B-41.wav,2,41,chainsaw,True,50667,B 249 | 2-50668-A-41.wav,2,41,chainsaw,True,50668,A 250 | 2-50668-B-41.wav,2,41,chainsaw,True,50668,B 251 | 2-50774-A-23.wav,2,23,breathing,False,50774,A 252 | 2-51173-A-35.wav,2,35,washing_machine,False,51173,A 253 | 2-51630-A-49.wav,2,49,hand_saw,False,51630,A 254 | 2-51630-B-49.wav,2,49,hand_saw,False,51630,B 255 | 2-52001-A-28.wav,2,28,snoring,False,52001,A 256 | 2-52001-B-28.wav,2,28,snoring,False,52001,B 257 | 2-52085-A-4.wav,2,4,frog,False,52085,A 258 | 2-52085-B-4.wav,2,4,frog,False,52085,B 259 | 2-52789-A-4.wav,2,4,frog,False,52789,A 260 | 2-54086-A-43.wav,2,43,car_horn,False,54086,A 261 | 2-54961-A-23.wav,2,23,breathing,False,54961,A 262 | 2-54962-A-23.wav,2,23,breathing,False,54962,A 263 | 2-56926-A-46.wav,2,46,church_bells,False,56926,A 264 | 2-57191-A-46.wav,2,46,church_bells,False,57191,A 265 | 2-57733-A-22.wav,2,22,clapping,False,57733,A 266 | 2-59241-A-35.wav,2,35,washing_machine,False,59241,A 267 | 2-59321-A-49.wav,2,49,hand_saw,False,59321,A 268 | 2-59565-A-46.wav,2,46,church_bells,False,59565,A 269 | 2-59566-A-46.wav,2,46,church_bells,False,59566,A 270 | 2-60180-A-49.wav,2,49,hand_saw,False,60180,A 271 | 2-60180-B-49.wav,2,49,hand_saw,False,60180,B 272 | 2-60791-A-26.wav,2,26,laughing,False,60791,A 273 | 2-60794-A-26.wav,2,26,laughing,False,60794,A 274 | 2-60795-A-26.wav,2,26,laughing,False,60795,A 275 | 2-60900-A-22.wav,2,22,clapping,False,60900,A 276 | 2-61311-A-12.wav,2,12,crackling_fire,True,61311,A 277 | 2-61618-A-46.wav,2,46,church_bells,False,61618,A 278 | 2-62226-A-26.wav,2,26,laughing,False,62226,A 279 | 2-64332-A-18.wav,2,18,toilet_flush,False,64332,A 280 | 2-64962-A-15.wav,2,15,water_drops,False,64962,A 281 | 2-64963-A-15.wav,2,15,water_drops,False,64963,A 282 | 2-65484-A-18.wav,2,18,toilet_flush,False,65484,A 283 | 2-65747-A-12.wav,2,12,crackling_fire,True,65747,A 284 | 2-65750-A-1.wav,2,1,rooster,True,65750,A 285 | 2-66205-A-23.wav,2,23,breathing,False,66205,A 286 | 2-66637-A-20.wav,2,20,crying_baby,True,66637,A 287 | 2-66637-B-20.wav,2,20,crying_baby,True,66637,B 288 | 2-67422-A-18.wav,2,18,toilet_flush,False,67422,A 289 | 2-68391-A-41.wav,2,41,chainsaw,True,68391,A 290 | 2-68391-B-41.wav,2,41,chainsaw,True,68391,B 291 | 2-68595-A-15.wav,2,15,water_drops,False,68595,A 292 | 2-68595-B-15.wav,2,15,water_drops,False,68595,B 293 | 2-69131-A-5.wav,2,5,cat,False,69131,A 294 | 2-69131-B-5.wav,2,5,cat,False,69131,B 295 | 2-70052-A-42.wav,2,42,siren,False,70052,A 296 | 2-70052-B-42.wav,2,42,siren,False,70052,B 297 | 2-70280-A-18.wav,2,18,toilet_flush,False,70280,A 298 | 2-70344-A-33.wav,2,33,door_wood_creaks,False,70344,A 299 | 2-70366-A-33.wav,2,33,door_wood_creaks,False,70366,A 300 | 2-70367-A-33.wav,2,33,door_wood_creaks,False,70367,A 301 | 2-70936-A-42.wav,2,42,siren,False,70936,A 302 | 2-70938-A-42.wav,2,42,siren,False,70938,A 303 | 2-70939-A-42.wav,2,42,siren,False,70939,A 304 | 2-71162-A-1.wav,2,1,rooster,True,71162,A 305 | 2-72268-A-6.wav,2,6,hen,False,72268,A 306 | 2-72547-A-14.wav,2,14,chirping_birds,False,72547,A 307 | 2-72547-B-14.wav,2,14,chirping_birds,False,72547,B 308 | 2-72547-C-14.wav,2,14,chirping_birds,False,72547,C 309 | 2-72547-D-14.wav,2,14,chirping_birds,False,72547,D 310 | 2-72677-A-18.wav,2,18,toilet_flush,False,72677,A 311 | 2-72688-A-42.wav,2,42,siren,False,72688,A 312 | 2-72970-A-10.wav,2,10,rain,True,72970,A 313 | 2-73027-A-10.wav,2,10,rain,True,73027,A 314 | 2-7321-A-33.wav,2,33,door_wood_creaks,False,7321,A 315 | 2-73260-A-10.wav,2,10,rain,True,73260,A 316 | 2-73544-A-27.wav,2,27,brushing_teeth,False,73544,A 317 | 2-74361-A-47.wav,2,47,airplane,False,74361,A 318 | 2-74977-A-18.wav,2,18,toilet_flush,False,74977,A 319 | 2-75726-A-6.wav,2,6,hen,False,75726,A 320 | 2-76408-A-22.wav,2,22,clapping,False,76408,A 321 | 2-76408-B-22.wav,2,22,clapping,False,76408,B 322 | 2-76408-C-22.wav,2,22,clapping,False,76408,C 323 | 2-76408-D-22.wav,2,22,clapping,False,76408,D 324 | 2-76868-A-6.wav,2,6,hen,False,76868,A 325 | 2-77346-A-46.wav,2,46,church_bells,False,77346,A 326 | 2-77347-A-46.wav,2,46,church_bells,False,77347,A 327 | 2-77945-A-41.wav,2,41,chainsaw,True,77945,A 328 | 2-77945-B-41.wav,2,41,chainsaw,True,77945,B 329 | 2-78381-A-46.wav,2,46,church_bells,False,78381,A 330 | 2-78562-A-37.wav,2,37,clock_alarm,False,78562,A 331 | 2-78562-B-37.wav,2,37,clock_alarm,False,78562,B 332 | 2-78651-A-44.wav,2,44,engine,False,78651,A 333 | 2-78781-A-47.wav,2,47,airplane,False,78781,A 334 | 2-78799-A-47.wav,2,47,airplane,False,78799,A 335 | 2-79769-A-26.wav,2,26,laughing,False,79769,A 336 | 2-79775-A-26.wav,2,26,laughing,False,79775,A 337 | 2-80313-A-28.wav,2,28,snoring,False,80313,A 338 | 2-80482-A-20.wav,2,20,crying_baby,True,80482,A 339 | 2-80844-A-13.wav,2,13,crickets,False,80844,A 340 | 2-81112-A-34.wav,2,34,can_opening,False,81112,A 341 | 2-81190-A-34.wav,2,34,can_opening,False,81190,A 342 | 2-81270-A-1.wav,2,1,rooster,True,81270,A 343 | 2-81731-A-10.wav,2,10,rain,True,81731,A 344 | 2-81970-A-7.wav,2,7,insects,False,81970,A 345 | 2-81970-B-7.wav,2,7,insects,False,81970,B 346 | 2-81970-C-7.wav,2,7,insects,False,81970,C 347 | 2-82071-A-27.wav,2,27,brushing_teeth,False,82071,A 348 | 2-82077-A-7.wav,2,7,insects,False,82077,A 349 | 2-82274-A-5.wav,2,5,cat,False,82274,A 350 | 2-82274-B-5.wav,2,5,cat,False,82274,B 351 | 2-82367-A-10.wav,2,10,rain,True,82367,A 352 | 2-82455-A-23.wav,2,23,breathing,False,82455,A 353 | 2-82538-A-21.wav,2,21,sneezing,True,82538,A 354 | 2-83270-A-13.wav,2,13,crickets,False,83270,A 355 | 2-83536-A-27.wav,2,27,brushing_teeth,False,83536,A 356 | 2-83667-A-34.wav,2,34,can_opening,False,83667,A 357 | 2-83688-A-34.wav,2,34,can_opening,False,83688,A 358 | 2-83934-A-5.wav,2,5,cat,False,83934,A 359 | 2-83934-B-5.wav,2,5,cat,False,83934,B 360 | 2-84693-A-49.wav,2,49,hand_saw,False,84693,A 361 | 2-84943-A-18.wav,2,18,toilet_flush,False,84943,A 362 | 2-84965-A-23.wav,2,23,breathing,False,84965,A 363 | 2-85139-A-13.wav,2,13,crickets,False,85139,A 364 | 2-85292-A-24.wav,2,24,coughing,False,85292,A 365 | 2-85434-A-27.wav,2,27,brushing_teeth,False,85434,A 366 | 2-85471-A-34.wav,2,34,can_opening,False,85471,A 367 | 2-85945-A-18.wav,2,18,toilet_flush,False,85945,A 368 | 2-86160-A-27.wav,2,27,brushing_teeth,False,86160,A 369 | 2-87282-A-34.wav,2,34,can_opening,False,87282,A 370 | 2-87412-A-24.wav,2,24,coughing,False,87412,A 371 | 2-87780-A-33.wav,2,33,door_wood_creaks,False,87780,A 372 | 2-87781-A-10.wav,2,10,rain,True,87781,A 373 | 2-87794-A-24.wav,2,24,coughing,False,87794,A 374 | 2-87795-A-24.wav,2,24,coughing,False,87795,A 375 | 2-87799-A-24.wav,2,24,coughing,False,87799,A 376 | 2-88724-A-38.wav,2,38,clock_tick,True,88724,A 377 | 2-89516-A-37.wav,2,37,clock_alarm,False,89516,A 378 | 2-91912-A-33.wav,2,33,door_wood_creaks,False,91912,A 379 | 2-91912-B-33.wav,2,33,door_wood_creaks,False,91912,B 380 | 2-92627-A-27.wav,2,27,brushing_teeth,False,92627,A 381 | 2-92978-A-29.wav,2,29,drinking_sipping,False,92978,A 382 | 2-93030-A-21.wav,2,21,sneezing,True,93030,A 383 | 2-94230-A-27.wav,2,27,brushing_teeth,False,94230,A 384 | 2-94807-A-29.wav,2,29,drinking_sipping,False,94807,A 385 | 2-95035-A-1.wav,2,1,rooster,True,95035,A 386 | 2-95258-A-1.wav,2,1,rooster,True,95258,A 387 | 2-95258-B-1.wav,2,1,rooster,True,95258,B 388 | 2-95567-A-23.wav,2,23,breathing,False,95567,A 389 | 2-96033-A-13.wav,2,13,crickets,False,96033,A 390 | 2-96063-A-37.wav,2,37,clock_alarm,False,96063,A 391 | 2-96460-A-1.wav,2,1,rooster,True,96460,A 392 | 2-96654-A-47.wav,2,47,airplane,False,96654,A 393 | 2-96904-A-27.wav,2,27,brushing_teeth,False,96904,A 394 | 2-98392-A-23.wav,2,23,breathing,False,98392,A 395 | 2-98676-A-24.wav,2,24,coughing,False,98676,A 396 | 2-98866-A-47.wav,2,47,airplane,False,98866,A 397 | 2-99795-A-32.wav,2,32,keyboard_typing,False,99795,A 398 | 2-99796-A-32.wav,2,32,keyboard_typing,False,99796,A 399 | 2-99955-A-7.wav,2,7,insects,False,99955,A 400 | 2-99955-B-7.wav,2,7,insects,False,99955,B 401 | 2-99955-C-7.wav,2,7,insects,False,99955,C 402 | -------------------------------------------------------------------------------- /util_esc50/fold3_test.csv: -------------------------------------------------------------------------------- 1 | filename,fold,target,category,esc10,src_file,take 2 | 3-100018-A-18.wav,3,18,toilet_flush,False,100018,A 3 | 3-100024-A-27.wav,3,27,brushing_teeth,False,100024,A 4 | 3-100024-B-27.wav,3,27,brushing_teeth,False,100024,B 5 | 3-101381-A-33.wav,3,33,door_wood_creaks,False,101381,A 6 | 3-101381-B-33.wav,3,33,door_wood_creaks,False,101381,B 7 | 3-102583-A-49.wav,3,49,hand_saw,False,102583,A 8 | 3-102583-B-49.wav,3,49,hand_saw,False,102583,B 9 | 3-102583-C-49.wav,3,49,hand_saw,False,102583,C 10 | 3-102908-A-4.wav,3,4,frog,False,102908,A 11 | 3-103050-A-19.wav,3,19,thunderstorm,False,103050,A 12 | 3-103051-A-19.wav,3,19,thunderstorm,False,103051,A 13 | 3-103051-B-19.wav,3,19,thunderstorm,False,103051,B 14 | 3-103051-C-19.wav,3,19,thunderstorm,False,103051,C 15 | 3-103401-A-33.wav,3,33,door_wood_creaks,False,103401,A 16 | 3-103401-B-33.wav,3,33,door_wood_creaks,False,103401,B 17 | 3-103401-C-33.wav,3,33,door_wood_creaks,False,103401,C 18 | 3-103401-D-33.wav,3,33,door_wood_creaks,False,103401,D 19 | 3-103597-A-25.wav,3,25,footsteps,False,103597,A 20 | 3-103598-A-25.wav,3,25,footsteps,False,103598,A 21 | 3-103599-A-25.wav,3,25,footsteps,False,103599,A 22 | 3-103599-B-25.wav,3,25,footsteps,False,103599,B 23 | 3-104632-A-12.wav,3,12,crackling_fire,True,104632,A 24 | 3-104761-A-7.wav,3,7,insects,False,104761,A 25 | 3-104761-B-7.wav,3,7,insects,False,104761,B 26 | 3-104958-A-12.wav,3,12,crackling_fire,True,104958,A 27 | 3-105235-A-7.wav,3,7,insects,False,105235,A 28 | 3-105236-A-7.wav,3,7,insects,False,105236,A 29 | 3-107123-A-26.wav,3,26,laughing,False,107123,A 30 | 3-107219-A-1.wav,3,1,rooster,True,107219,A 31 | 3-108160-A-23.wav,3,23,breathing,False,108160,A 32 | 3-108451-A-17.wav,3,17,pouring_water,False,108451,A 33 | 3-108451-B-17.wav,3,17,pouring_water,False,108451,B 34 | 3-108677-A-18.wav,3,18,toilet_flush,False,108677,A 35 | 3-108791-A-18.wav,3,18,toilet_flush,False,108791,A 36 | 3-110536-A-26.wav,3,26,laughing,False,110536,A 37 | 3-110913-A-7.wav,3,7,insects,False,110913,A 38 | 3-110913-B-7.wav,3,7,insects,False,110913,B 39 | 3-110913-C-7.wav,3,7,insects,False,110913,C 40 | 3-110913-D-7.wav,3,7,insects,False,110913,D 41 | 3-111102-A-46.wav,3,46,church_bells,False,111102,A 42 | 3-111102-B-46.wav,3,46,church_bells,False,111102,B 43 | 3-112356-A-18.wav,3,18,toilet_flush,False,112356,A 44 | 3-112397-A-9.wav,3,9,crow,False,112397,A 45 | 3-112522-A-27.wav,3,27,brushing_teeth,False,112522,A 46 | 3-112557-A-23.wav,3,23,breathing,False,112557,A 47 | 3-112557-B-23.wav,3,23,breathing,False,112557,B 48 | 3-115382-A-44.wav,3,44,engine,False,115382,A 49 | 3-115387-A-47.wav,3,47,airplane,False,115387,A 50 | 3-115387-B-47.wav,3,47,airplane,False,115387,B 51 | 3-115387-C-47.wav,3,47,airplane,False,115387,C 52 | 3-116135-A-1.wav,3,1,rooster,True,116135,A 53 | 3-117293-A-9.wav,3,9,crow,False,117293,A 54 | 3-117504-A-16.wav,3,16,wind,False,117504,A 55 | 3-117504-B-16.wav,3,16,wind,False,117504,B 56 | 3-117793-A-37.wav,3,37,clock_alarm,False,117793,A 57 | 3-117883-A-37.wav,3,37,clock_alarm,False,117883,A 58 | 3-118059-A-18.wav,3,18,toilet_flush,False,118059,A 59 | 3-118069-A-27.wav,3,27,brushing_teeth,False,118069,A 60 | 3-118069-B-27.wav,3,27,brushing_teeth,False,118069,B 61 | 3-118194-A-33.wav,3,33,door_wood_creaks,False,118194,A 62 | 3-118487-A-26.wav,3,26,laughing,False,118487,A 63 | 3-118656-A-41.wav,3,41,chainsaw,True,118656,A 64 | 3-118657-A-41.wav,3,41,chainsaw,True,118657,A 65 | 3-118657-B-41.wav,3,41,chainsaw,True,118657,B 66 | 3-118658-A-41.wav,3,41,chainsaw,True,118658,A 67 | 3-118658-B-41.wav,3,41,chainsaw,True,118658,B 68 | 3-118972-A-41.wav,3,41,chainsaw,True,118972,A 69 | 3-118972-B-41.wav,3,41,chainsaw,True,118972,B 70 | 3-119120-A-48.wav,3,48,fireworks,False,119120,A 71 | 3-119120-B-48.wav,3,48,fireworks,False,119120,B 72 | 3-119120-C-48.wav,3,48,fireworks,False,119120,C 73 | 3-119120-D-48.wav,3,48,fireworks,False,119120,D 74 | 3-119120-E-48.wav,3,48,fireworks,False,119120,E 75 | 3-119455-A-44.wav,3,44,engine,False,119455,A 76 | 3-119459-A-26.wav,3,26,laughing,False,119459,A 77 | 3-120526-A-37.wav,3,37,clock_alarm,False,120526,A 78 | 3-120526-B-37.wav,3,37,clock_alarm,False,120526,B 79 | 3-120644-A-12.wav,3,12,crackling_fire,True,120644,A 80 | 3-121348-A-9.wav,3,9,crow,False,121348,A 81 | 3-122110-A-46.wav,3,46,church_bells,False,122110,A 82 | 3-123086-A-28.wav,3,28,snoring,False,123086,A 83 | 3-123224-A-19.wav,3,19,thunderstorm,False,123224,A 84 | 3-124376-A-3.wav,3,3,cow,False,124376,A 85 | 3-124376-B-3.wav,3,3,cow,False,124376,B 86 | 3-124600-A-19.wav,3,19,thunderstorm,False,124600,A 87 | 3-124795-A-28.wav,3,28,snoring,False,124795,A 88 | 3-124925-A-9.wav,3,9,crow,False,124925,A 89 | 3-124958-A-28.wav,3,28,snoring,False,124958,A 90 | 3-125418-A-24.wav,3,24,coughing,False,125418,A 91 | 3-125548-A-32.wav,3,32,keyboard_typing,False,125548,A 92 | 3-126113-A-26.wav,3,26,laughing,False,126113,A 93 | 3-126358-A-3.wav,3,3,cow,False,126358,A 94 | 3-126358-B-3.wav,3,3,cow,False,126358,B 95 | 3-126391-A-27.wav,3,27,brushing_teeth,False,126391,A 96 | 3-126391-B-27.wav,3,27,brushing_teeth,False,126391,B 97 | 3-127874-A-17.wav,3,17,pouring_water,False,127874,A 98 | 3-127890-A-9.wav,3,9,crow,False,127890,A 99 | 3-127890-B-9.wav,3,9,crow,False,127890,B 100 | 3-127890-C-9.wav,3,9,crow,False,127890,C 101 | 3-128160-A-44.wav,3,44,engine,False,128160,A 102 | 3-128512-A-47.wav,3,47,airplane,False,128512,A 103 | 3-128512-B-47.wav,3,47,airplane,False,128512,B 104 | 3-129264-A-9.wav,3,9,crow,False,129264,A 105 | 3-129338-A-13.wav,3,13,crickets,False,129338,A 106 | 3-129678-A-13.wav,3,13,crickets,False,129678,A 107 | 3-130330-A-22.wav,3,22,clapping,False,130330,A 108 | 3-130998-A-28.wav,3,28,snoring,False,130998,A 109 | 3-130998-B-28.wav,3,28,snoring,False,130998,B 110 | 3-131943-A-37.wav,3,37,clock_alarm,False,131943,A 111 | 3-132340-A-37.wav,3,37,clock_alarm,False,132340,A 112 | 3-132601-A-24.wav,3,24,coughing,False,132601,A 113 | 3-132747-A-26.wav,3,26,laughing,False,132747,A 114 | 3-132830-A-32.wav,3,32,keyboard_typing,False,132830,A 115 | 3-132852-A-10.wav,3,10,rain,True,132852,A 116 | 3-133977-A-29.wav,3,29,drinking_sipping,False,133977,A 117 | 3-134049-A-1.wav,3,1,rooster,True,134049,A 118 | 3-134699-A-16.wav,3,16,wind,False,134699,A 119 | 3-134699-B-16.wav,3,16,wind,False,134699,B 120 | 3-134699-C-16.wav,3,16,wind,False,134699,C 121 | 3-134802-A-13.wav,3,13,crickets,False,134802,A 122 | 3-135469-A-35.wav,3,35,washing_machine,False,135469,A 123 | 3-135650-A-45.wav,3,45,train,False,135650,A 124 | 3-135650-B-45.wav,3,45,train,False,135650,B 125 | 3-136288-A-0.wav,3,0,dog,True,136288,A 126 | 3-136451-A-45.wav,3,45,train,False,136451,A 127 | 3-136608-A-16.wav,3,16,wind,False,136608,A 128 | 3-137152-A-1.wav,3,1,rooster,True,137152,A 129 | 3-138114-A-22.wav,3,22,clapping,False,138114,A 130 | 3-138212-A-45.wav,3,45,train,False,138212,A 131 | 3-139109-A-46.wav,3,46,church_bells,False,139109,A 132 | 3-139331-A-27.wav,3,27,brushing_teeth,False,139331,A 133 | 3-139958-A-37.wav,3,37,clock_alarm,False,139958,A 134 | 3-140199-A-8.wav,3,8,sheep,False,140199,A 135 | 3-140199-B-8.wav,3,8,sheep,False,140199,B 136 | 3-140199-C-8.wav,3,8,sheep,False,140199,C 137 | 3-140199-D-8.wav,3,8,sheep,False,140199,D 138 | 3-140323-A-29.wav,3,29,drinking_sipping,False,140323,A 139 | 3-140774-A-10.wav,3,10,rain,True,140774,A 140 | 3-141240-A-44.wav,3,44,engine,False,141240,A 141 | 3-141240-B-44.wav,3,44,engine,False,141240,B 142 | 3-141559-A-45.wav,3,45,train,False,141559,A 143 | 3-141684-A-21.wav,3,21,sneezing,True,141684,A 144 | 3-142005-A-10.wav,3,10,rain,True,142005,A 145 | 3-142006-A-10.wav,3,10,rain,True,142006,A 146 | 3-142349-A-17.wav,3,17,pouring_water,False,142349,A 147 | 3-142593-A-38.wav,3,38,clock_tick,True,142593,A 148 | 3-142601-A-21.wav,3,21,sneezing,True,142601,A 149 | 3-142604-A-24.wav,3,24,coughing,False,142604,A 150 | 3-142605-A-21.wav,3,21,sneezing,True,142605,A 151 | 3-143119-A-21.wav,3,21,sneezing,True,143119,A 152 | 3-143560-A-47.wav,3,47,airplane,False,143560,A 153 | 3-143562-A-47.wav,3,47,airplane,False,143562,A 154 | 3-143929-A-10.wav,3,10,rain,True,143929,A 155 | 3-143933-A-38.wav,3,38,clock_tick,True,143933,A 156 | 3-144028-A-0.wav,3,0,dog,True,144028,A 157 | 3-144106-A-32.wav,3,32,keyboard_typing,False,144106,A 158 | 3-144120-A-32.wav,3,32,keyboard_typing,False,144120,A 159 | 3-144128-A-23.wav,3,23,breathing,False,144128,A 160 | 3-144128-B-23.wav,3,23,breathing,False,144128,B 161 | 3-144253-A-29.wav,3,29,drinking_sipping,False,144253,A 162 | 3-144259-A-29.wav,3,29,drinking_sipping,False,144259,A 163 | 3-144510-A-30.wav,3,30,door_wood_knock,False,144510,A 164 | 3-144692-A-21.wav,3,21,sneezing,True,144692,A 165 | 3-144827-A-11.wav,3,11,sea_waves,True,144827,A 166 | 3-144827-B-11.wav,3,11,sea_waves,True,144827,B 167 | 3-144891-A-19.wav,3,19,thunderstorm,False,144891,A 168 | 3-144891-B-19.wav,3,19,thunderstorm,False,144891,B 169 | 3-145382-A-1.wav,3,1,rooster,True,145382,A 170 | 3-145387-A-29.wav,3,29,drinking_sipping,False,145387,A 171 | 3-145487-A-24.wav,3,24,coughing,False,145487,A 172 | 3-145577-A-43.wav,3,43,car_horn,False,145577,A 173 | 3-145719-A-17.wav,3,17,pouring_water,False,145719,A 174 | 3-145774-A-12.wav,3,12,crackling_fire,True,145774,A 175 | 3-146033-A-13.wav,3,13,crickets,False,146033,A 176 | 3-146186-A-44.wav,3,44,engine,False,146186,A 177 | 3-146697-A-43.wav,3,43,car_horn,False,146697,A 178 | 3-146873-A-24.wav,3,24,coughing,False,146873,A 179 | 3-146964-A-5.wav,3,5,cat,False,146964,A 180 | 3-146965-A-5.wav,3,5,cat,False,146965,A 181 | 3-146972-A-5.wav,3,5,cat,False,146972,A 182 | 3-147342-A-34.wav,3,34,can_opening,False,147342,A 183 | 3-147343-A-34.wav,3,34,can_opening,False,147343,A 184 | 3-147965-A-12.wav,3,12,crackling_fire,True,147965,A 185 | 3-148297-A-37.wav,3,37,clock_alarm,False,148297,A 186 | 3-148330-A-21.wav,3,21,sneezing,True,148330,A 187 | 3-148932-A-34.wav,3,34,can_opening,False,148932,A 188 | 3-149042-A-24.wav,3,24,coughing,False,149042,A 189 | 3-149189-A-1.wav,3,1,rooster,True,149189,A 190 | 3-149448-A-17.wav,3,17,pouring_water,False,149448,A 191 | 3-149465-A-22.wav,3,22,clapping,False,149465,A 192 | 3-150231-A-21.wav,3,21,sneezing,True,150231,A 193 | 3-150363-A-38.wav,3,38,clock_tick,True,150363,A 194 | 3-150979-A-40.wav,3,40,helicopter,True,150979,A 195 | 3-150979-B-40.wav,3,40,helicopter,True,150979,B 196 | 3-150979-C-40.wav,3,40,helicopter,True,150979,C 197 | 3-151080-A-20.wav,3,20,crying_baby,True,151080,A 198 | 3-151081-A-20.wav,3,20,crying_baby,True,151081,A 199 | 3-151081-B-20.wav,3,20,crying_baby,True,151081,B 200 | 3-151089-A-30.wav,3,30,door_wood_knock,False,151089,A 201 | 3-151206-A-23.wav,3,23,breathing,False,151206,A 202 | 3-151212-A-24.wav,3,24,coughing,False,151212,A 203 | 3-151213-A-24.wav,3,24,coughing,False,151213,A 204 | 3-151255-A-28.wav,3,28,snoring,False,151255,A 205 | 3-151269-A-35.wav,3,35,washing_machine,False,151269,A 206 | 3-151273-A-35.wav,3,35,washing_machine,False,151273,A 207 | 3-151557-A-28.wav,3,28,snoring,False,151557,A 208 | 3-151557-B-28.wav,3,28,snoring,False,151557,B 209 | 3-152007-A-20.wav,3,20,crying_baby,True,152007,A 210 | 3-152007-B-20.wav,3,20,crying_baby,True,152007,B 211 | 3-152007-C-20.wav,3,20,crying_baby,True,152007,C 212 | 3-152007-D-20.wav,3,20,crying_baby,True,152007,D 213 | 3-152007-E-20.wav,3,20,crying_baby,True,152007,E 214 | 3-152020-A-36.wav,3,36,vacuum_cleaner,False,152020,A 215 | 3-152020-B-36.wav,3,36,vacuum_cleaner,False,152020,B 216 | 3-152020-C-36.wav,3,36,vacuum_cleaner,False,152020,C 217 | 3-152039-A-3.wav,3,3,cow,False,152039,A 218 | 3-152039-B-3.wav,3,3,cow,False,152039,B 219 | 3-152594-A-30.wav,3,30,door_wood_knock,False,152594,A 220 | 3-152912-A-26.wav,3,26,laughing,False,152912,A 221 | 3-152997-A-26.wav,3,26,laughing,False,152997,A 222 | 3-153057-A-43.wav,3,43,car_horn,False,153057,A 223 | 3-153444-A-32.wav,3,32,keyboard_typing,False,153444,A 224 | 3-154378-A-30.wav,3,30,door_wood_knock,False,154378,A 225 | 3-154439-A-17.wav,3,17,pouring_water,False,154439,A 226 | 3-154758-A-44.wav,3,44,engine,False,154758,A 227 | 3-154781-A-32.wav,3,32,keyboard_typing,False,154781,A 228 | 3-154926-A-40.wav,3,40,helicopter,True,154926,A 229 | 3-154926-B-40.wav,3,40,helicopter,True,154926,B 230 | 3-154957-A-1.wav,3,1,rooster,True,154957,A 231 | 3-155130-A-43.wav,3,43,car_horn,False,155130,A 232 | 3-155234-A-43.wav,3,43,car_horn,False,155234,A 233 | 3-155312-A-0.wav,3,0,dog,True,155312,A 234 | 3-155556-A-31.wav,3,31,mouse_click,False,155556,A 235 | 3-155568-A-32.wav,3,32,keyboard_typing,False,155568,A 236 | 3-155570-A-32.wav,3,32,keyboard_typing,False,155570,A 237 | 3-155577-A-14.wav,3,14,chirping_birds,False,155577,A 238 | 3-155578-A-14.wav,3,14,chirping_birds,False,155578,A 239 | 3-155579-A-14.wav,3,14,chirping_birds,False,155579,A 240 | 3-155583-A-14.wav,3,14,chirping_birds,False,155583,A 241 | 3-155584-A-14.wav,3,14,chirping_birds,False,155584,A 242 | 3-155642-A-11.wav,3,11,sea_waves,True,155642,A 243 | 3-155642-B-11.wav,3,11,sea_waves,True,155642,B 244 | 3-155659-A-34.wav,3,34,can_opening,False,155659,A 245 | 3-155766-A-13.wav,3,13,crickets,False,155766,A 246 | 3-156391-A-35.wav,3,35,washing_machine,False,156391,A 247 | 3-156393-A-35.wav,3,35,washing_machine,False,156393,A 248 | 3-156558-A-21.wav,3,21,sneezing,True,156558,A 249 | 3-156581-A-14.wav,3,14,chirping_birds,False,156581,A 250 | 3-156581-B-14.wav,3,14,chirping_birds,False,156581,B 251 | 3-156907-A-15.wav,3,15,water_drops,False,156907,A 252 | 3-157149-A-10.wav,3,10,rain,True,157149,A 253 | 3-157187-A-12.wav,3,12,crackling_fire,True,157187,A 254 | 3-157487-A-10.wav,3,10,rain,True,157487,A 255 | 3-157492-A-45.wav,3,45,train,False,157492,A 256 | 3-157615-A-10.wav,3,10,rain,True,157615,A 257 | 3-157695-A-0.wav,3,0,dog,True,157695,A 258 | 3-158056-A-31.wav,3,31,mouse_click,False,158056,A 259 | 3-158056-B-31.wav,3,31,mouse_click,False,158056,B 260 | 3-158476-A-12.wav,3,12,crackling_fire,True,158476,A 261 | 3-159346-A-36.wav,3,36,vacuum_cleaner,False,159346,A 262 | 3-159346-B-36.wav,3,36,vacuum_cleaner,False,159346,B 263 | 3-159347-A-36.wav,3,36,vacuum_cleaner,False,159347,A 264 | 3-159347-B-36.wav,3,36,vacuum_cleaner,False,159347,B 265 | 3-159348-A-36.wav,3,36,vacuum_cleaner,False,159348,A 266 | 3-159445-A-45.wav,3,45,train,False,159445,A 267 | 3-159445-B-45.wav,3,45,train,False,159445,B 268 | 3-160119-A-15.wav,3,15,water_drops,False,160119,A 269 | 3-160993-A-3.wav,3,3,cow,False,160993,A 270 | 3-161010-A-43.wav,3,43,car_horn,False,161010,A 271 | 3-161500-A-17.wav,3,17,pouring_water,False,161500,A 272 | 3-162786-A-13.wav,3,13,crickets,False,162786,A 273 | 3-163288-A-1.wav,3,1,rooster,True,163288,A 274 | 3-163459-A-0.wav,3,0,dog,True,163459,A 275 | 3-163607-A-13.wav,3,13,crickets,False,163607,A 276 | 3-163607-B-13.wav,3,13,crickets,False,163607,B 277 | 3-163727-A-3.wav,3,3,cow,False,163727,A 278 | 3-164120-A-11.wav,3,11,sea_waves,True,164120,A 279 | 3-164216-A-6.wav,3,6,hen,False,164216,A 280 | 3-164216-B-6.wav,3,6,hen,False,164216,B 281 | 3-164216-C-6.wav,3,6,hen,False,164216,C 282 | 3-164592-A-15.wav,3,15,water_drops,False,164592,A 283 | 3-164593-A-15.wav,3,15,water_drops,False,164593,A 284 | 3-164594-A-15.wav,3,15,water_drops,False,164594,A 285 | 3-164595-A-15.wav,3,15,water_drops,False,164595,A 286 | 3-164630-A-11.wav,3,11,sea_waves,True,164630,A 287 | 3-164688-A-38.wav,3,38,clock_tick,True,164688,A 288 | 3-165856-A-41.wav,3,41,chainsaw,True,165856,A 289 | 3-166125-A-23.wav,3,23,breathing,False,166125,A 290 | 3-166125-B-23.wav,3,23,breathing,False,166125,B 291 | 3-166324-A-15.wav,3,15,water_drops,False,166324,A 292 | 3-166326-A-15.wav,3,15,water_drops,False,166326,A 293 | 3-166422-A-11.wav,3,11,sea_waves,True,166422,A 294 | 3-166546-A-34.wav,3,34,can_opening,False,166546,A 295 | 3-166546-B-34.wav,3,34,can_opening,False,166546,B 296 | 3-167096-A-31.wav,3,31,mouse_click,False,167096,A 297 | 3-169907-A-29.wav,3,29,drinking_sipping,False,169907,A 298 | 3-170002-A-34.wav,3,34,can_opening,False,170002,A 299 | 3-170015-A-0.wav,3,0,dog,True,170015,A 300 | 3-170312-A-31.wav,3,31,mouse_click,False,170312,A 301 | 3-170377-A-38.wav,3,38,clock_tick,True,170377,A 302 | 3-170383-A-38.wav,3,38,clock_tick,True,170383,A 303 | 3-170574-A-30.wav,3,30,door_wood_knock,False,170574,A 304 | 3-170851-A-31.wav,3,31,mouse_click,False,170851,A 305 | 3-171012-A-38.wav,3,38,clock_tick,True,171012,A 306 | 3-171041-A-38.wav,3,38,clock_tick,True,171041,A 307 | 3-171281-A-6.wav,3,6,hen,False,171281,A 308 | 3-171937-A-34.wav,3,34,can_opening,False,171937,A 309 | 3-172179-A-31.wav,3,31,mouse_click,False,172179,A 310 | 3-172881-A-48.wav,3,48,fireworks,False,172881,A 311 | 3-172922-A-48.wav,3,48,fireworks,False,172922,A 312 | 3-174840-A-43.wav,3,43,car_horn,False,174840,A 313 | 3-174866-A-29.wav,3,29,drinking_sipping,False,174866,A 314 | 3-177082-A-22.wav,3,22,clapping,False,177082,A 315 | 3-177083-A-22.wav,3,22,clapping,False,177083,A 316 | 3-178096-A-6.wav,3,6,hen,False,178096,A 317 | 3-180147-A-30.wav,3,30,door_wood_knock,False,180147,A 318 | 3-180256-A-0.wav,3,0,dog,True,180256,A 319 | 3-180977-A-0.wav,3,0,dog,True,180977,A 320 | 3-181132-A-14.wav,3,14,chirping_birds,False,181132,A 321 | 3-181278-A-22.wav,3,22,clapping,False,181278,A 322 | 3-182023-A-30.wav,3,30,door_wood_knock,False,182023,A 323 | 3-182025-A-30.wav,3,30,door_wood_knock,False,182025,A 324 | 3-182710-A-35.wav,3,35,washing_machine,False,182710,A 325 | 3-182710-B-35.wav,3,35,washing_machine,False,182710,B 326 | 3-185313-A-31.wav,3,31,mouse_click,False,185313,A 327 | 3-185456-A-29.wav,3,29,drinking_sipping,False,185456,A 328 | 3-187549-A-6.wav,3,6,hen,False,187549,A 329 | 3-187549-B-6.wav,3,6,hen,False,187549,B 330 | 3-187710-A-11.wav,3,11,sea_waves,True,187710,A 331 | 3-188390-A-6.wav,3,6,hen,False,188390,A 332 | 3-188726-A-35.wav,3,35,washing_machine,False,188726,A 333 | 3-193767-A-47.wav,3,47,airplane,False,193767,A 334 | 3-197408-A-8.wav,3,8,sheep,False,197408,A 335 | 3-197408-B-8.wav,3,8,sheep,False,197408,B 336 | 3-197408-C-8.wav,3,8,sheep,False,197408,C 337 | 3-197435-A-22.wav,3,22,clapping,False,197435,A 338 | 3-197435-B-22.wav,3,22,clapping,False,197435,B 339 | 3-203371-A-39.wav,3,39,glass_breaking,False,203371,A 340 | 3-203373-A-39.wav,3,39,glass_breaking,False,203373,A 341 | 3-203374-A-39.wav,3,39,glass_breaking,False,203374,A 342 | 3-203375-A-39.wav,3,39,glass_breaking,False,203375,A 343 | 3-203377-A-39.wav,3,39,glass_breaking,False,203377,A 344 | 3-20861-A-8.wav,3,8,sheep,False,20861,A 345 | 3-208820-A-49.wav,3,49,hand_saw,False,208820,A 346 | 3-216280-A-39.wav,3,39,glass_breaking,False,216280,A 347 | 3-216281-A-39.wav,3,39,glass_breaking,False,216281,A 348 | 3-216284-A-39.wav,3,39,glass_breaking,False,216284,A 349 | 3-233151-A-2.wav,3,2,pig,False,233151,A 350 | 3-243726-A-43.wav,3,43,car_horn,False,243726,A 351 | 3-246513-A-16.wav,3,16,wind,False,246513,A 352 | 3-246513-B-16.wav,3,16,wind,False,246513,B 353 | 3-249913-A-25.wav,3,25,footsteps,False,249913,A 354 | 3-251617-A-48.wav,3,48,fireworks,False,251617,A 355 | 3-253081-A-2.wav,3,2,pig,False,253081,A 356 | 3-253084-A-2.wav,3,2,pig,False,253084,A 357 | 3-253084-B-2.wav,3,2,pig,False,253084,B 358 | 3-253084-C-2.wav,3,2,pig,False,253084,C 359 | 3-253084-D-2.wav,3,2,pig,False,253084,D 360 | 3-253084-E-2.wav,3,2,pig,False,253084,E 361 | 3-257858-A-2.wav,3,2,pig,False,257858,A 362 | 3-259622-A-44.wav,3,44,engine,False,259622,A 363 | 3-51376-A-42.wav,3,42,siren,False,51376,A 364 | 3-51731-A-42.wav,3,42,siren,False,51731,A 365 | 3-51731-B-42.wav,3,42,siren,False,51731,B 366 | 3-51909-A-42.wav,3,42,siren,False,51909,A 367 | 3-51909-B-42.wav,3,42,siren,False,51909,B 368 | 3-58772-A-42.wav,3,42,siren,False,58772,A 369 | 3-62878-A-42.wav,3,42,siren,False,62878,A 370 | 3-62878-B-42.wav,3,42,siren,False,62878,B 371 | 3-65748-A-12.wav,3,12,crackling_fire,True,65748,A 372 | 3-68630-A-40.wav,3,40,helicopter,True,68630,A 373 | 3-68630-B-40.wav,3,40,helicopter,True,68630,B 374 | 3-68630-C-40.wav,3,40,helicopter,True,68630,C 375 | 3-70962-A-4.wav,3,4,frog,False,70962,A 376 | 3-70962-B-4.wav,3,4,frog,False,70962,B 377 | 3-70962-C-4.wav,3,4,frog,False,70962,C 378 | 3-71964-A-4.wav,3,4,frog,False,71964,A 379 | 3-71964-B-4.wav,3,4,frog,False,71964,B 380 | 3-71964-C-4.wav,3,4,frog,False,71964,C 381 | 3-83527-A-4.wav,3,4,frog,False,83527,A 382 | 3-87936-A-46.wav,3,46,church_bells,False,87936,A 383 | 3-87936-B-46.wav,3,46,church_bells,False,87936,B 384 | 3-92637-A-18.wav,3,18,toilet_flush,False,92637,A 385 | 3-93010-A-18.wav,3,18,toilet_flush,False,93010,A 386 | 3-94342-A-25.wav,3,25,footsteps,False,94342,A 387 | 3-94343-A-25.wav,3,25,footsteps,False,94343,A 388 | 3-94344-A-25.wav,3,25,footsteps,False,94344,A 389 | 3-94355-A-33.wav,3,33,door_wood_creaks,False,94355,A 390 | 3-95694-A-5.wav,3,5,cat,False,95694,A 391 | 3-95695-A-5.wav,3,5,cat,False,95695,A 392 | 3-95695-B-5.wav,3,5,cat,False,95695,B 393 | 3-95697-A-5.wav,3,5,cat,False,95697,A 394 | 3-95698-A-5.wav,3,5,cat,False,95698,A 395 | 3-96606-A-49.wav,3,49,hand_saw,False,96606,A 396 | 3-96606-B-49.wav,3,49,hand_saw,False,96606,B 397 | 3-97909-A-49.wav,3,49,hand_saw,False,97909,A 398 | 3-97909-B-49.wav,3,49,hand_saw,False,97909,B 399 | 3-98193-A-46.wav,3,46,church_bells,False,98193,A 400 | 3-98771-A-18.wav,3,18,toilet_flush,False,98771,A 401 | 3-98869-A-46.wav,3,46,church_bells,False,98869,A 402 | -------------------------------------------------------------------------------- /util_esc50/fold4_test.csv: -------------------------------------------------------------------------------- 1 | filename,fold,target,category,esc10,src_file,take 2 | 4-102844-A-49.wav,4,49,hand_saw,False,102844,A 3 | 4-102844-B-49.wav,4,49,hand_saw,False,102844,B 4 | 4-102844-C-49.wav,4,49,hand_saw,False,102844,C 5 | 4-102871-A-42.wav,4,42,siren,False,102871,A 6 | 4-107117-A-33.wav,4,33,door_wood_creaks,False,107117,A 7 | 4-107120-A-33.wav,4,33,door_wood_creaks,False,107120,A 8 | 4-107122-A-33.wav,4,33,door_wood_creaks,False,107122,A 9 | 4-108352-A-33.wav,4,33,door_wood_creaks,False,108352,A 10 | 4-111671-A-42.wav,4,42,siren,False,111671,A 11 | 4-111671-B-42.wav,4,42,siren,False,111671,B 12 | 4-117627-A-25.wav,4,25,footsteps,False,117627,A 13 | 4-117630-A-25.wav,4,25,footsteps,False,117630,A 14 | 4-119647-A-48.wav,4,48,fireworks,False,119647,A 15 | 4-119647-B-48.wav,4,48,fireworks,False,119647,B 16 | 4-119647-C-48.wav,4,48,fireworks,False,119647,C 17 | 4-119647-D-48.wav,4,48,fireworks,False,119647,D 18 | 4-119648-A-48.wav,4,48,fireworks,False,119648,A 19 | 4-119648-B-48.wav,4,48,fireworks,False,119648,B 20 | 4-119648-C-48.wav,4,48,fireworks,False,119648,C 21 | 4-119648-D-48.wav,4,48,fireworks,False,119648,D 22 | 4-119720-A-18.wav,4,18,toilet_flush,False,119720,A 23 | 4-120160-A-5.wav,4,5,cat,False,120160,A 24 | 4-121532-A-42.wav,4,42,siren,False,121532,A 25 | 4-123680-A-18.wav,4,18,toilet_flush,False,123680,A 26 | 4-125070-A-19.wav,4,19,thunderstorm,False,125070,A 27 | 4-125071-A-19.wav,4,19,thunderstorm,False,125071,A 28 | 4-125072-A-19.wav,4,19,thunderstorm,False,125072,A 29 | 4-125072-B-19.wav,4,19,thunderstorm,False,125072,B 30 | 4-125825-A-46.wav,4,46,church_bells,False,125825,A 31 | 4-125825-B-46.wav,4,46,church_bells,False,125825,B 32 | 4-125929-A-40.wav,4,40,helicopter,True,125929,A 33 | 4-126046-A-18.wav,4,18,toilet_flush,False,126046,A 34 | 4-126532-A-18.wav,4,18,toilet_flush,False,126532,A 35 | 4-128659-A-33.wav,4,33,door_wood_creaks,False,128659,A 36 | 4-130584-A-4.wav,4,4,frog,False,130584,A 37 | 4-130891-A-7.wav,4,7,insects,False,130891,A 38 | 4-132383-A-2.wav,4,2,pig,False,132383,A 39 | 4-132383-B-2.wav,4,2,pig,False,132383,B 40 | 4-132803-A-26.wav,4,26,laughing,False,132803,A 41 | 4-132810-A-26.wav,4,26,laughing,False,132810,A 42 | 4-132816-A-26.wav,4,26,laughing,False,132816,A 43 | 4-132839-A-33.wav,4,33,door_wood_creaks,False,132839,A 44 | 4-133047-A-5.wav,4,5,cat,False,133047,A 45 | 4-133047-B-5.wav,4,5,cat,False,133047,B 46 | 4-133047-C-5.wav,4,5,cat,False,133047,C 47 | 4-133674-A-26.wav,4,26,laughing,False,133674,A 48 | 4-133895-A-7.wav,4,7,insects,False,133895,A 49 | 4-135439-A-18.wav,4,18,toilet_flush,False,135439,A 50 | 4-136381-A-9.wav,4,9,crow,False,136381,A 51 | 4-138344-A-9.wav,4,9,crow,False,138344,A 52 | 4-140034-A-46.wav,4,46,church_bells,False,140034,A 53 | 4-141365-A-18.wav,4,18,toilet_flush,False,141365,A 54 | 4-143118-A-7.wav,4,7,insects,False,143118,A 55 | 4-143118-B-7.wav,4,7,insects,False,143118,B 56 | 4-144083-A-16.wav,4,16,wind,False,144083,A 57 | 4-144083-B-16.wav,4,16,wind,False,144083,B 58 | 4-144083-C-16.wav,4,16,wind,False,144083,C 59 | 4-144085-A-16.wav,4,16,wind,False,144085,A 60 | 4-144468-A-27.wav,4,27,brushing_teeth,False,144468,A 61 | 4-144468-B-27.wav,4,27,brushing_teeth,False,144468,B 62 | 4-145006-A-18.wav,4,18,toilet_flush,False,145006,A 63 | 4-145081-A-9.wav,4,9,crow,False,145081,A 64 | 4-146200-A-36.wav,4,36,vacuum_cleaner,False,146200,A 65 | 4-147240-A-2.wav,4,2,pig,False,147240,A 66 | 4-147240-B-2.wav,4,2,pig,False,147240,B 67 | 4-147657-A-46.wav,4,46,church_bells,False,147657,A 68 | 4-147658-A-46.wav,4,46,church_bells,False,147658,A 69 | 4-149294-A-41.wav,4,41,chainsaw,True,149294,A 70 | 4-149294-B-41.wav,4,41,chainsaw,True,149294,B 71 | 4-149940-A-5.wav,4,5,cat,False,149940,A 72 | 4-149940-B-5.wav,4,5,cat,False,149940,B 73 | 4-150364-A-46.wav,4,46,church_bells,False,150364,A 74 | 4-150364-B-46.wav,4,46,church_bells,False,150364,B 75 | 4-151242-A-37.wav,4,37,clock_alarm,False,151242,A 76 | 4-152958-A-18.wav,4,18,toilet_flush,False,152958,A 77 | 4-152995-A-24.wav,4,24,coughing,False,152995,A 78 | 4-154405-A-27.wav,4,27,brushing_teeth,False,154405,A 79 | 4-154405-B-27.wav,4,27,brushing_teeth,False,154405,B 80 | 4-154443-A-24.wav,4,24,coughing,False,154443,A 81 | 4-154793-A-4.wav,4,4,frog,False,154793,A 82 | 4-155650-A-24.wav,4,24,coughing,False,155650,A 83 | 4-155650-B-24.wav,4,24,coughing,False,155650,B 84 | 4-155670-A-26.wav,4,26,laughing,False,155670,A 85 | 4-156227-A-9.wav,4,9,crow,False,156227,A 86 | 4-156827-A-46.wav,4,46,church_bells,False,156827,A 87 | 4-156843-A-21.wav,4,21,sneezing,True,156843,A 88 | 4-156844-A-21.wav,4,21,sneezing,True,156844,A 89 | 4-156993-A-19.wav,4,19,thunderstorm,False,156993,A 90 | 4-157296-A-24.wav,4,24,coughing,False,157296,A 91 | 4-157297-A-21.wav,4,21,sneezing,True,157297,A 92 | 4-157611-A-41.wav,4,41,chainsaw,True,157611,A 93 | 4-157611-B-41.wav,4,41,chainsaw,True,157611,B 94 | 4-158653-A-32.wav,4,32,keyboard_typing,False,158653,A 95 | 4-159426-A-9.wav,4,9,crow,False,159426,A 96 | 4-159609-A-14.wav,4,14,chirping_birds,False,159609,A 97 | 4-159609-B-14.wav,4,14,chirping_birds,False,159609,B 98 | 4-160036-A-33.wav,4,33,door_wood_creaks,False,160036,A 99 | 4-160036-B-33.wav,4,33,door_wood_creaks,False,160036,B 100 | 4-160999-A-10.wav,4,10,rain,True,160999,A 101 | 4-161099-A-47.wav,4,47,airplane,False,161099,A 102 | 4-161099-B-47.wav,4,47,airplane,False,161099,B 103 | 4-161100-A-47.wav,4,47,airplane,False,161100,A 104 | 4-161103-A-47.wav,4,47,airplane,False,161103,A 105 | 4-161105-A-47.wav,4,47,airplane,False,161105,A 106 | 4-161105-B-47.wav,4,47,airplane,False,161105,B 107 | 4-161127-A-10.wav,4,10,rain,True,161127,A 108 | 4-161303-A-5.wav,4,5,cat,False,161303,A 109 | 4-161303-B-5.wav,4,5,cat,False,161303,B 110 | 4-161519-A-19.wav,4,19,thunderstorm,False,161519,A 111 | 4-161579-A-40.wav,4,40,helicopter,True,161579,A 112 | 4-161579-B-40.wav,4,40,helicopter,True,161579,B 113 | 4-163264-A-10.wav,4,10,rain,True,163264,A 114 | 4-163606-A-16.wav,4,16,wind,False,163606,A 115 | 4-163608-B-16.wav,4,16,wind,False,163608,B 116 | 4-163609-A-16.wav,4,16,wind,False,163609,A 117 | 4-163609-B-16.wav,4,16,wind,False,163609,B 118 | 4-163697-A-13.wav,4,13,crickets,False,163697,A 119 | 4-164021-A-1.wav,4,1,rooster,True,164021,A 120 | 4-164064-A-1.wav,4,1,rooster,True,164064,A 121 | 4-164064-B-1.wav,4,1,rooster,True,164064,B 122 | 4-164064-C-1.wav,4,1,rooster,True,164064,C 123 | 4-164206-A-10.wav,4,10,rain,True,164206,A 124 | 4-164243-A-26.wav,4,26,laughing,False,164243,A 125 | 4-164661-A-12.wav,4,12,crackling_fire,True,164661,A 126 | 4-164661-B-12.wav,4,12,crackling_fire,True,164661,B 127 | 4-164859-A-1.wav,4,1,rooster,True,164859,A 128 | 4-165606-A-45.wav,4,45,train,False,165606,A 129 | 4-165791-A-45.wav,4,45,train,False,165791,A 130 | 4-165791-B-45.wav,4,45,train,False,165791,B 131 | 4-165813-A-19.wav,4,19,thunderstorm,False,165813,A 132 | 4-165813-B-19.wav,4,19,thunderstorm,False,165813,B 133 | 4-165823-A-41.wav,4,41,chainsaw,True,165823,A 134 | 4-165823-B-41.wav,4,41,chainsaw,True,165823,B 135 | 4-165845-A-45.wav,4,45,train,False,165845,A 136 | 4-165845-B-45.wav,4,45,train,False,165845,B 137 | 4-165933-A-45.wav,4,45,train,False,165933,A 138 | 4-166661-A-10.wav,4,10,rain,True,166661,A 139 | 4-167063-A-11.wav,4,11,sea_waves,True,167063,A 140 | 4-167063-B-11.wav,4,11,sea_waves,True,167063,B 141 | 4-167063-C-11.wav,4,11,sea_waves,True,167063,C 142 | 4-167077-A-20.wav,4,20,crying_baby,True,167077,A 143 | 4-167077-B-20.wav,4,20,crying_baby,True,167077,B 144 | 4-167077-C-20.wav,4,20,crying_baby,True,167077,C 145 | 4-167155-A-32.wav,4,32,keyboard_typing,False,167155,A 146 | 4-167571-A-26.wav,4,26,laughing,False,167571,A 147 | 4-167642-A-21.wav,4,21,sneezing,True,167642,A 148 | 4-168155-A-15.wav,4,15,water_drops,False,168155,A 149 | 4-168446-A-45.wav,4,45,train,False,168446,A 150 | 4-168868-A-17.wav,4,17,pouring_water,False,168868,A 151 | 4-169127-A-41.wav,4,41,chainsaw,True,169127,A 152 | 4-169127-B-41.wav,4,41,chainsaw,True,169127,B 153 | 4-169508-A-37.wav,4,37,clock_alarm,False,169508,A 154 | 4-169726-A-24.wav,4,24,coughing,False,169726,A 155 | 4-170078-A-1.wav,4,1,rooster,True,170078,A 156 | 4-170247-A-12.wav,4,12,crackling_fire,True,170247,A 157 | 4-170247-B-12.wav,4,12,crackling_fire,True,170247,B 158 | 4-171207-A-12.wav,4,12,crackling_fire,True,171207,A 159 | 4-171396-A-24.wav,4,24,coughing,False,171396,A 160 | 4-171519-A-21.wav,4,21,sneezing,True,171519,A 161 | 4-171652-A-44.wav,4,44,engine,False,171652,A 162 | 4-171706-A-23.wav,4,23,breathing,False,171706,A 163 | 4-171823-A-13.wav,4,13,crickets,False,171823,A 164 | 4-172143-A-13.wav,4,13,crickets,False,172143,A 165 | 4-172180-A-32.wav,4,32,keyboard_typing,False,172180,A 166 | 4-172366-A-37.wav,4,37,clock_alarm,False,172366,A 167 | 4-172377-A-17.wav,4,17,pouring_water,False,172377,A 168 | 4-172500-A-27.wav,4,27,brushing_teeth,False,172500,A 169 | 4-172500-B-27.wav,4,27,brushing_teeth,False,172500,B 170 | 4-172500-C-27.wav,4,27,brushing_teeth,False,172500,C 171 | 4-172500-D-27.wav,4,27,brushing_teeth,False,172500,D 172 | 4-172732-A-36.wav,4,36,vacuum_cleaner,False,172732,A 173 | 4-172733-A-36.wav,4,36,vacuum_cleaner,False,172733,A 174 | 4-172734-A-36.wav,4,36,vacuum_cleaner,False,172734,A 175 | 4-172736-A-36.wav,4,36,vacuum_cleaner,False,172736,A 176 | 4-172736-B-36.wav,4,36,vacuum_cleaner,False,172736,B 177 | 4-172742-A-32.wav,4,32,keyboard_typing,False,172742,A 178 | 4-173865-A-9.wav,4,9,crow,False,173865,A 179 | 4-173865-B-9.wav,4,9,crow,False,173865,B 180 | 4-174797-A-15.wav,4,15,water_drops,False,174797,A 181 | 4-174860-A-3.wav,4,3,cow,False,174860,A 182 | 4-174860-B-3.wav,4,3,cow,False,174860,B 183 | 4-175000-A-40.wav,4,40,helicopter,True,175000,A 184 | 4-175000-B-40.wav,4,40,helicopter,True,175000,B 185 | 4-175000-C-40.wav,4,40,helicopter,True,175000,C 186 | 4-175025-A-34.wav,4,34,can_opening,False,175025,A 187 | 4-175845-A-43.wav,4,43,car_horn,False,175845,A 188 | 4-175846-A-43.wav,4,43,car_horn,False,175846,A 189 | 4-175855-A-43.wav,4,43,car_horn,False,175855,A 190 | 4-175856-A-43.wav,4,43,car_horn,False,175856,A 191 | 4-175945-A-38.wav,4,38,clock_tick,True,175945,A 192 | 4-176631-A-43.wav,4,43,car_horn,False,176631,A 193 | 4-176638-A-43.wav,4,43,car_horn,False,176638,A 194 | 4-176914-A-23.wav,4,23,breathing,False,176914,A 195 | 4-177243-A-32.wav,4,32,keyboard_typing,False,177243,A 196 | 4-177250-A-10.wav,4,10,rain,True,177250,A 197 | 4-177835-A-24.wav,4,24,coughing,False,177835,A 198 | 4-178402-A-43.wav,4,43,car_horn,False,178402,A 199 | 4-178881-A-45.wav,4,45,train,False,178881,A 200 | 4-179836-A-34.wav,4,34,can_opening,False,179836,A 201 | 4-179984-A-38.wav,4,38,clock_tick,True,179984,A 202 | 4-180337-A-28.wav,4,28,snoring,False,180337,A 203 | 4-180380-A-10.wav,4,10,rain,True,180380,A 204 | 4-180453-A-17.wav,4,17,pouring_water,False,180453,A 205 | 4-181035-A-38.wav,4,38,clock_tick,True,181035,A 206 | 4-181286-A-10.wav,4,10,rain,True,181286,A 207 | 4-181362-A-13.wav,4,13,crickets,False,181362,A 208 | 4-181563-A-12.wav,4,12,crackling_fire,True,181563,A 209 | 4-181599-A-26.wav,4,26,laughing,False,181599,A 210 | 4-181628-A-17.wav,4,17,pouring_water,False,181628,A 211 | 4-181707-A-32.wav,4,32,keyboard_typing,False,181707,A 212 | 4-181708-A-32.wav,4,32,keyboard_typing,False,181708,A 213 | 4-181865-A-38.wav,4,38,clock_tick,True,181865,A 214 | 4-181955-A-3.wav,4,3,cow,False,181955,A 215 | 4-181955-B-3.wav,4,3,cow,False,181955,B 216 | 4-181955-C-3.wav,4,3,cow,False,181955,C 217 | 4-181999-A-36.wav,4,36,vacuum_cleaner,False,181999,A 218 | 4-182034-A-30.wav,4,30,door_wood_knock,False,182034,A 219 | 4-182039-A-30.wav,4,30,door_wood_knock,False,182039,A 220 | 4-182041-A-30.wav,4,30,door_wood_knock,False,182041,A 221 | 4-182314-A-7.wav,4,7,insects,False,182314,A 222 | 4-182314-B-7.wav,4,7,insects,False,182314,B 223 | 4-182368-A-12.wav,4,12,crackling_fire,True,182368,A 224 | 4-182369-A-12.wav,4,12,crackling_fire,True,182369,A 225 | 4-182395-A-0.wav,4,0,dog,True,182395,A 226 | 4-182613-A-11.wav,4,11,sea_waves,True,182613,A 227 | 4-182613-B-11.wav,4,11,sea_waves,True,182613,B 228 | 4-182795-A-14.wav,4,14,chirping_birds,False,182795,A 229 | 4-182839-A-17.wav,4,17,pouring_water,False,182839,A 230 | 4-183487-A-1.wav,4,1,rooster,True,183487,A 231 | 4-183882-A-28.wav,4,28,snoring,False,183882,A 232 | 4-183882-B-28.wav,4,28,snoring,False,183882,B 233 | 4-183992-A-0.wav,4,0,dog,True,183992,A 234 | 4-184235-A-28.wav,4,28,snoring,False,184235,A 235 | 4-184237-A-28.wav,4,28,snoring,False,184237,A 236 | 4-184434-A-21.wav,4,21,sneezing,True,184434,A 237 | 4-184575-A-0.wav,4,0,dog,True,184575,A 238 | 4-185415-A-21.wav,4,21,sneezing,True,185415,A 239 | 4-185575-A-20.wav,4,20,crying_baby,True,185575,A 240 | 4-185575-B-20.wav,4,20,crying_baby,True,185575,B 241 | 4-185575-C-20.wav,4,20,crying_baby,True,185575,C 242 | 4-185613-A-32.wav,4,32,keyboard_typing,False,185613,A 243 | 4-185619-A-21.wav,4,21,sneezing,True,185619,A 244 | 4-186518-A-30.wav,4,30,door_wood_knock,False,186518,A 245 | 4-186693-A-17.wav,4,17,pouring_water,False,186693,A 246 | 4-186935-A-44.wav,4,44,engine,False,186935,A 247 | 4-186936-A-44.wav,4,44,engine,False,186936,A 248 | 4-186938-A-44.wav,4,44,engine,False,186938,A 249 | 4-186940-A-44.wav,4,44,engine,False,186940,A 250 | 4-186962-A-44.wav,4,44,engine,False,186962,A 251 | 4-187284-A-30.wav,4,30,door_wood_knock,False,187284,A 252 | 4-187384-A-34.wav,4,34,can_opening,False,187384,A 253 | 4-187504-A-17.wav,4,17,pouring_water,False,187504,A 254 | 4-187504-B-17.wav,4,17,pouring_water,False,187504,B 255 | 4-187769-A-14.wav,4,14,chirping_birds,False,187769,A 256 | 4-187769-B-14.wav,4,14,chirping_birds,False,187769,B 257 | 4-188003-A-34.wav,4,34,can_opening,False,188003,A 258 | 4-188033-A-38.wav,4,38,clock_tick,True,188033,A 259 | 4-188191-A-29.wav,4,29,drinking_sipping,False,188191,A 260 | 4-188191-B-29.wav,4,29,drinking_sipping,False,188191,B 261 | 4-188191-C-29.wav,4,29,drinking_sipping,False,188191,C 262 | 4-188287-A-9.wav,4,9,crow,False,188287,A 263 | 4-188293-A-15.wav,4,15,water_drops,False,188293,A 264 | 4-188293-B-15.wav,4,15,water_drops,False,188293,B 265 | 4-188595-A-29.wav,4,29,drinking_sipping,False,188595,A 266 | 4-188703-A-8.wav,4,8,sheep,False,188703,A 267 | 4-188703-B-8.wav,4,8,sheep,False,188703,B 268 | 4-188703-C-8.wav,4,8,sheep,False,188703,C 269 | 4-188703-D-8.wav,4,8,sheep,False,188703,D 270 | 4-188878-A-30.wav,4,30,door_wood_knock,False,188878,A 271 | 4-189332-A-37.wav,4,37,clock_alarm,False,189332,A 272 | 4-189828-A-22.wav,4,22,clapping,False,189828,A 273 | 4-189830-A-22.wav,4,22,clapping,False,189830,A 274 | 4-189832-A-22.wav,4,22,clapping,False,189832,A 275 | 4-189832-B-22.wav,4,22,clapping,False,189832,B 276 | 4-189833-A-22.wav,4,22,clapping,False,189833,A 277 | 4-189836-A-22.wav,4,22,clapping,False,189836,A 278 | 4-189838-A-22.wav,4,22,clapping,False,189838,A 279 | 4-191015-A-43.wav,4,43,car_horn,False,191015,A 280 | 4-191246-A-7.wav,4,7,insects,False,191246,A 281 | 4-191246-B-7.wav,4,7,insects,False,191246,B 282 | 4-191297-A-28.wav,4,28,snoring,False,191297,A 283 | 4-191327-A-38.wav,4,38,clock_tick,True,191327,A 284 | 4-191687-A-0.wav,4,0,dog,True,191687,A 285 | 4-192068-A-31.wav,4,31,mouse_click,False,192068,A 286 | 4-192236-A-0.wav,4,0,dog,True,192236,A 287 | 4-193480-A-40.wav,4,40,helicopter,True,193480,A 288 | 4-193480-B-40.wav,4,40,helicopter,True,193480,B 289 | 4-194246-A-13.wav,4,13,crickets,False,194246,A 290 | 4-194680-A-36.wav,4,36,vacuum_cleaner,False,194680,A 291 | 4-194711-A-38.wav,4,38,clock_tick,True,194711,A 292 | 4-194754-A-0.wav,4,0,dog,True,194754,A 293 | 4-194808-A-29.wav,4,29,drinking_sipping,False,194808,A 294 | 4-194979-A-25.wav,4,25,footsteps,False,194979,A 295 | 4-194981-A-25.wav,4,25,footsteps,False,194981,A 296 | 4-195305-A-31.wav,4,31,mouse_click,False,195305,A 297 | 4-195451-A-44.wav,4,44,engine,False,195451,A 298 | 4-195497-A-11.wav,4,11,sea_waves,True,195497,A 299 | 4-195497-B-11.wav,4,11,sea_waves,True,195497,B 300 | 4-195707-A-13.wav,4,13,crickets,False,195707,A 301 | 4-195805-A-13.wav,4,13,crickets,False,195805,A 302 | 4-196671-A-8.wav,4,8,sheep,False,196671,A 303 | 4-196671-B-8.wav,4,8,sheep,False,196671,B 304 | 4-196672-A-8.wav,4,8,sheep,False,196672,A 305 | 4-197103-A-6.wav,4,6,hen,False,197103,A 306 | 4-197454-A-28.wav,4,28,snoring,False,197454,A 307 | 4-197454-B-28.wav,4,28,snoring,False,197454,B 308 | 4-197871-A-15.wav,4,15,water_drops,False,197871,A 309 | 4-198025-A-23.wav,4,23,breathing,False,198025,A 310 | 4-198360-A-49.wav,4,49,hand_saw,False,198360,A 311 | 4-198360-B-49.wav,4,49,hand_saw,False,198360,B 312 | 4-198360-C-49.wav,4,49,hand_saw,False,198360,C 313 | 4-198841-A-37.wav,4,37,clock_alarm,False,198841,A 314 | 4-198962-A-25.wav,4,25,footsteps,False,198962,A 315 | 4-198962-B-25.wav,4,25,footsteps,False,198962,B 316 | 4-198965-A-38.wav,4,38,clock_tick,True,198965,A 317 | 4-199261-A-0.wav,4,0,dog,True,199261,A 318 | 4-200330-A-6.wav,4,6,hen,False,200330,A 319 | 4-200330-B-6.wav,4,6,hen,False,200330,B 320 | 4-201300-A-31.wav,4,31,mouse_click,False,201300,A 321 | 4-201800-A-31.wav,4,31,mouse_click,False,201800,A 322 | 4-201988-A-44.wav,4,44,engine,False,201988,A 323 | 4-202298-A-31.wav,4,31,mouse_click,False,202298,A 324 | 4-202749-A-13.wav,4,13,crickets,False,202749,A 325 | 4-204115-A-39.wav,4,39,glass_breaking,False,204115,A 326 | 4-204119-A-39.wav,4,39,glass_breaking,False,204119,A 327 | 4-204121-A-39.wav,4,39,glass_breaking,False,204121,A 328 | 4-204123-A-39.wav,4,39,glass_breaking,False,204123,A 329 | 4-204202-A-29.wav,4,29,drinking_sipping,False,204202,A 330 | 4-204612-A-31.wav,4,31,mouse_click,False,204612,A 331 | 4-204618-A-11.wav,4,11,sea_waves,True,204618,A 332 | 4-204683-A-31.wav,4,31,mouse_click,False,204683,A 333 | 4-204684-A-37.wav,4,37,clock_alarm,False,204684,A 334 | 4-204777-A-39.wav,4,39,glass_breaking,False,204777,A 335 | 4-204777-B-39.wav,4,39,glass_breaking,False,204777,B 336 | 4-204777-C-39.wav,4,39,glass_breaking,False,204777,C 337 | 4-204830-A-6.wav,4,6,hen,False,204830,A 338 | 4-205526-A-23.wav,4,23,breathing,False,205526,A 339 | 4-205526-B-23.wav,4,23,breathing,False,205526,B 340 | 4-205738-A-22.wav,4,22,clapping,False,205738,A 341 | 4-207116-A-23.wav,4,23,breathing,False,207116,A 342 | 4-207124-A-0.wav,4,0,dog,True,207124,A 343 | 4-208021-A-1.wav,4,1,rooster,True,208021,A 344 | 4-209536-A-37.wav,4,37,clock_alarm,False,209536,A 345 | 4-209698-A-37.wav,4,37,clock_alarm,False,209698,A 346 | 4-210000-A-23.wav,4,23,breathing,False,210000,A 347 | 4-210000-B-23.wav,4,23,breathing,False,210000,B 348 | 4-210309-A-31.wav,4,31,mouse_click,False,210309,A 349 | 4-210593-A-29.wav,4,29,drinking_sipping,False,210593,A 350 | 4-211502-A-30.wav,4,30,door_wood_knock,False,211502,A 351 | 4-212604-A-15.wav,4,15,water_drops,False,212604,A 352 | 4-212604-B-15.wav,4,15,water_drops,False,212604,B 353 | 4-212604-C-15.wav,4,15,water_drops,False,212604,C 354 | 4-212698-A-39.wav,4,39,glass_breaking,False,212698,A 355 | 4-212728-A-34.wav,4,34,can_opening,False,212728,A 356 | 4-213193-A-29.wav,4,29,drinking_sipping,False,213193,A 357 | 4-213915-A-3.wav,4,3,cow,False,213915,A 358 | 4-213915-B-3.wav,4,3,cow,False,213915,B 359 | 4-213915-C-3.wav,4,3,cow,False,213915,C 360 | 4-215635-A-34.wav,4,34,can_opening,False,215635,A 361 | 4-216211-A-14.wav,4,14,chirping_birds,False,216211,A 362 | 4-216349-A-34.wav,4,34,can_opening,False,216349,A 363 | 4-218199-A-35.wav,4,35,washing_machine,False,218199,A 364 | 4-218199-B-35.wav,4,35,washing_machine,False,218199,B 365 | 4-218199-C-35.wav,4,35,washing_machine,False,218199,C 366 | 4-218199-D-35.wav,4,35,washing_machine,False,218199,D 367 | 4-218199-E-35.wav,4,35,washing_machine,False,218199,E 368 | 4-218199-F-35.wav,4,35,washing_machine,False,218199,F 369 | 4-218199-G-35.wav,4,35,washing_machine,False,218199,G 370 | 4-218199-H-35.wav,4,35,washing_machine,False,218199,H 371 | 4-218304-A-25.wav,4,25,footsteps,False,218304,A 372 | 4-218304-B-25.wav,4,25,footsteps,False,218304,B 373 | 4-223125-A-14.wav,4,14,chirping_birds,False,223125,A 374 | 4-223127-A-14.wav,4,14,chirping_birds,False,223127,A 375 | 4-232495-A-6.wav,4,6,hen,False,232495,A 376 | 4-234644-A-2.wav,4,2,pig,False,234644,A 377 | 4-234879-A-6.wav,4,6,hen,False,234879,A 378 | 4-244318-A-6.wav,4,6,hen,False,244318,A 379 | 4-250864-A-8.wav,4,8,sheep,False,250864,A 380 | 4-250869-A-2.wav,4,2,pig,False,250869,A 381 | 4-250869-B-2.wav,4,2,pig,False,250869,B 382 | 4-250869-C-2.wav,4,2,pig,False,250869,C 383 | 4-251645-A-49.wav,4,49,hand_saw,False,251645,A 384 | 4-251645-B-49.wav,4,49,hand_saw,False,251645,B 385 | 4-251959-A-47.wav,4,47,airplane,False,251959,A 386 | 4-253649-A-6.wav,4,6,hen,False,253649,A 387 | 4-255371-A-47.wav,4,47,airplane,False,255371,A 388 | 4-261068-A-30.wav,4,30,door_wood_knock,False,261068,A 389 | 4-264453-A-34.wav,4,34,can_opening,False,264453,A 390 | 4-59579-A-20.wav,4,20,crying_baby,True,59579,A 391 | 4-59579-B-20.wav,4,20,crying_baby,True,59579,B 392 | 4-67358-A-42.wav,4,42,siren,False,67358,A 393 | 4-80761-A-42.wav,4,42,siren,False,80761,A 394 | 4-90014-A-42.wav,4,42,siren,False,90014,A 395 | 4-90014-B-42.wav,4,42,siren,False,90014,B 396 | 4-99193-A-4.wav,4,4,frog,False,99193,A 397 | 4-99193-B-4.wav,4,4,frog,False,99193,B 398 | 4-99644-A-4.wav,4,4,frog,False,99644,A 399 | 4-99644-B-4.wav,4,4,frog,False,99644,B 400 | 4-99644-C-4.wav,4,4,frog,False,99644,C 401 | 4-99644-D-4.wav,4,4,frog,False,99644,D 402 | -------------------------------------------------------------------------------- /util_esc50/fold5_test.csv: -------------------------------------------------------------------------------- 1 | filename,fold,target,category,esc10,src_file,take 2 | 5-103415-A-2.wav,5,2,pig,False,103415,A 3 | 5-103416-A-2.wav,5,2,pig,False,103416,A 4 | 5-103418-A-2.wav,5,2,pig,False,103418,A 5 | 5-103420-A-2.wav,5,2,pig,False,103420,A 6 | 5-103421-A-2.wav,5,2,pig,False,103421,A 7 | 5-103422-A-2.wav,5,2,pig,False,103422,A 8 | 5-117118-A-42.wav,5,42,siren,False,117118,A 9 | 5-117120-A-42.wav,5,42,siren,False,117120,A 10 | 5-117122-A-42.wav,5,42,siren,False,117122,A 11 | 5-117250-A-2.wav,5,2,pig,False,117250,A 12 | 5-117773-A-16.wav,5,16,wind,False,117773,A 13 | 5-127990-A-2.wav,5,2,pig,False,127990,A 14 | 5-133989-A-42.wav,5,42,siren,False,133989,A 15 | 5-133989-B-42.wav,5,42,siren,False,133989,B 16 | 5-141683-A-35.wav,5,35,washing_machine,False,141683,A 17 | 5-147297-A-27.wav,5,27,brushing_teeth,False,147297,A 18 | 5-150409-A-42.wav,5,42,siren,False,150409,A 19 | 5-151085-A-20.wav,5,20,crying_baby,True,151085,A 20 | 5-156026-A-4.wav,5,4,frog,False,156026,A 21 | 5-156026-B-4.wav,5,4,frog,False,156026,B 22 | 5-156026-C-4.wav,5,4,frog,False,156026,C 23 | 5-156026-D-4.wav,5,4,frog,False,156026,D 24 | 5-156698-A-18.wav,5,18,toilet_flush,False,156698,A 25 | 5-156999-A-19.wav,5,19,thunderstorm,False,156999,A 26 | 5-156999-B-19.wav,5,19,thunderstorm,False,156999,B 27 | 5-156999-C-19.wav,5,19,thunderstorm,False,156999,C 28 | 5-156999-D-19.wav,5,19,thunderstorm,False,156999,D 29 | 5-156999-E-19.wav,5,19,thunderstorm,False,156999,E 30 | 5-157204-A-16.wav,5,16,wind,False,157204,A 31 | 5-157204-B-16.wav,5,16,wind,False,157204,B 32 | 5-160551-A-42.wav,5,42,siren,False,160551,A 33 | 5-160614-A-48.wav,5,48,fireworks,False,160614,A 34 | 5-160614-B-48.wav,5,48,fireworks,False,160614,B 35 | 5-160614-C-48.wav,5,48,fireworks,False,160614,C 36 | 5-160614-D-48.wav,5,48,fireworks,False,160614,D 37 | 5-160614-E-48.wav,5,48,fireworks,False,160614,E 38 | 5-160614-F-48.wav,5,48,fireworks,False,160614,F 39 | 5-160614-G-48.wav,5,48,fireworks,False,160614,G 40 | 5-160614-H-48.wav,5,48,fireworks,False,160614,H 41 | 5-161270-A-33.wav,5,33,door_wood_creaks,False,161270,A 42 | 5-161270-B-33.wav,5,33,door_wood_creaks,False,161270,B 43 | 5-169983-A-5.wav,5,5,cat,False,169983,A 44 | 5-170338-A-41.wav,5,41,chainsaw,True,170338,A 45 | 5-170338-B-41.wav,5,41,chainsaw,True,170338,B 46 | 5-171118-A-26.wav,5,26,laughing,False,171118,A 47 | 5-171653-A-41.wav,5,41,chainsaw,True,171653,A 48 | 5-172299-A-5.wav,5,5,cat,False,172299,A 49 | 5-172639-A-5.wav,5,5,cat,False,172639,A 50 | 5-173568-A-33.wav,5,33,door_wood_creaks,False,173568,A 51 | 5-177034-A-18.wav,5,18,toilet_flush,False,177034,A 52 | 5-177614-A-5.wav,5,5,cat,False,177614,A 53 | 5-177779-A-33.wav,5,33,door_wood_creaks,False,177779,A 54 | 5-177957-A-40.wav,5,40,helicopter,True,177957,A 55 | 5-177957-B-40.wav,5,40,helicopter,True,177957,B 56 | 5-177957-C-40.wav,5,40,helicopter,True,177957,C 57 | 5-177957-D-40.wav,5,40,helicopter,True,177957,D 58 | 5-177957-E-40.wav,5,40,helicopter,True,177957,E 59 | 5-178997-A-24.wav,5,24,coughing,False,178997,A 60 | 5-179294-A-46.wav,5,46,church_bells,False,179294,A 61 | 5-179496-A-16.wav,5,16,wind,False,179496,A 62 | 5-179496-B-16.wav,5,16,wind,False,179496,B 63 | 5-179860-A-43.wav,5,43,car_horn,False,179860,A 64 | 5-179863-A-43.wav,5,43,car_horn,False,179863,A 65 | 5-179865-A-43.wav,5,43,car_horn,False,179865,A 66 | 5-179866-A-43.wav,5,43,car_horn,False,179866,A 67 | 5-179868-A-43.wav,5,43,car_horn,False,179868,A 68 | 5-180156-A-43.wav,5,43,car_horn,False,180156,A 69 | 5-180156-B-43.wav,5,43,car_horn,False,180156,B 70 | 5-180156-C-43.wav,5,43,car_horn,False,180156,C 71 | 5-180229-A-27.wav,5,27,brushing_teeth,False,180229,A 72 | 5-181458-A-33.wav,5,33,door_wood_creaks,False,181458,A 73 | 5-181766-A-10.wav,5,10,rain,True,181766,A 74 | 5-181977-A-35.wav,5,35,washing_machine,False,181977,A 75 | 5-182007-A-36.wav,5,36,vacuum_cleaner,False,182007,A 76 | 5-182010-A-36.wav,5,36,vacuum_cleaner,False,182010,A 77 | 5-182012-A-36.wav,5,36,vacuum_cleaner,False,182012,A 78 | 5-182404-A-18.wav,5,18,toilet_flush,False,182404,A 79 | 5-184323-A-42.wav,5,42,siren,False,184323,A 80 | 5-184871-A-24.wav,5,24,coughing,False,184871,A 81 | 5-185516-A-27.wav,5,27,brushing_teeth,False,185516,A 82 | 5-185579-A-41.wav,5,41,chainsaw,True,185579,A 83 | 5-185579-B-41.wav,5,41,chainsaw,True,185579,B 84 | 5-185908-A-18.wav,5,18,toilet_flush,False,185908,A 85 | 5-186924-A-12.wav,5,12,crackling_fire,True,186924,A 86 | 5-187201-A-4.wav,5,4,frog,False,187201,A 87 | 5-187201-B-4.wav,5,4,frog,False,187201,B 88 | 5-187444-A-33.wav,5,33,door_wood_creaks,False,187444,A 89 | 5-187979-A-21.wav,5,21,sneezing,True,187979,A 90 | 5-188365-A-36.wav,5,36,vacuum_cleaner,False,188365,A 91 | 5-188495-A-19.wav,5,19,thunderstorm,False,188495,A 92 | 5-188606-A-33.wav,5,33,door_wood_creaks,False,188606,A 93 | 5-188655-A-10.wav,5,10,rain,True,188655,A 94 | 5-188716-A-46.wav,5,46,church_bells,False,188716,A 95 | 5-188796-A-45.wav,5,45,train,False,188796,A 96 | 5-188945-A-45.wav,5,45,train,False,188945,A 97 | 5-189212-A-12.wav,5,12,crackling_fire,True,189212,A 98 | 5-189237-A-12.wav,5,12,crackling_fire,True,189237,A 99 | 5-189795-A-4.wav,5,4,frog,False,189795,A 100 | 5-191131-A-40.wav,5,40,helicopter,True,191131,A 101 | 5-191497-A-33.wav,5,33,door_wood_creaks,False,191497,A 102 | 5-192191-A-19.wav,5,19,thunderstorm,False,192191,A 103 | 5-193339-A-10.wav,5,10,rain,True,193339,A 104 | 5-193473-A-12.wav,5,12,crackling_fire,True,193473,A 105 | 5-193473-B-12.wav,5,12,crackling_fire,True,193473,B 106 | 5-194533-A-21.wav,5,21,sneezing,True,194533,A 107 | 5-194892-A-10.wav,5,10,rain,True,194892,A 108 | 5-194899-A-3.wav,5,3,cow,False,194899,A 109 | 5-194899-B-3.wav,5,3,cow,False,194899,B 110 | 5-194899-C-3.wav,5,3,cow,False,194899,C 111 | 5-194899-D-3.wav,5,3,cow,False,194899,D 112 | 5-194930-A-1.wav,5,1,rooster,True,194930,A 113 | 5-194930-B-1.wav,5,1,rooster,True,194930,B 114 | 5-194932-A-7.wav,5,7,insects,False,194932,A 115 | 5-195517-A-7.wav,5,7,insects,False,195517,A 116 | 5-195518-A-7.wav,5,7,insects,False,195518,A 117 | 5-195557-A-19.wav,5,19,thunderstorm,False,195557,A 118 | 5-195710-A-10.wav,5,10,rain,True,195710,A 119 | 5-197118-A-45.wav,5,45,train,False,197118,A 120 | 5-197121-A-45.wav,5,45,train,False,197121,A 121 | 5-197121-B-45.wav,5,45,train,False,197121,B 122 | 5-197446-A-7.wav,5,7,insects,False,197446,A 123 | 5-197913-A-18.wav,5,18,toilet_flush,False,197913,A 124 | 5-197988-A-46.wav,5,46,church_bells,False,197988,A 125 | 5-198278-A-7.wav,5,7,insects,False,198278,A 126 | 5-198278-B-7.wav,5,7,insects,False,198278,B 127 | 5-198278-C-7.wav,5,7,insects,False,198278,C 128 | 5-198321-A-10.wav,5,10,rain,True,198321,A 129 | 5-198373-A-46.wav,5,46,church_bells,False,198373,A 130 | 5-198411-A-20.wav,5,20,crying_baby,True,198411,A 131 | 5-198411-B-20.wav,5,20,crying_baby,True,198411,B 132 | 5-198411-C-20.wav,5,20,crying_baby,True,198411,C 133 | 5-198411-D-20.wav,5,20,crying_baby,True,198411,D 134 | 5-198411-E-20.wav,5,20,crying_baby,True,198411,E 135 | 5-198411-F-20.wav,5,20,crying_baby,True,198411,F 136 | 5-198411-G-20.wav,5,20,crying_baby,True,198411,G 137 | 5-198600-A-45.wav,5,45,train,False,198600,A 138 | 5-198891-A-8.wav,5,8,sheep,False,198891,A 139 | 5-198891-B-8.wav,5,8,sheep,False,198891,B 140 | 5-198891-C-8.wav,5,8,sheep,False,198891,C 141 | 5-198891-D-8.wav,5,8,sheep,False,198891,D 142 | 5-199284-A-45.wav,5,45,train,False,199284,A 143 | 5-199284-B-45.wav,5,45,train,False,199284,B 144 | 5-200329-A-8.wav,5,8,sheep,False,200329,A 145 | 5-200329-B-8.wav,5,8,sheep,False,200329,B 146 | 5-200329-C-8.wav,5,8,sheep,False,200329,C 147 | 5-200334-A-1.wav,5,1,rooster,True,200334,A 148 | 5-200334-B-1.wav,5,1,rooster,True,200334,B 149 | 5-200339-A-1.wav,5,1,rooster,True,200339,A 150 | 5-200461-A-11.wav,5,11,sea_waves,True,200461,A 151 | 5-200461-B-11.wav,5,11,sea_waves,True,200461,B 152 | 5-201170-A-46.wav,5,46,church_bells,False,201170,A 153 | 5-201172-A-46.wav,5,46,church_bells,False,201172,A 154 | 5-201194-A-38.wav,5,38,clock_tick,True,201194,A 155 | 5-201274-A-21.wav,5,21,sneezing,True,201274,A 156 | 5-201664-A-18.wav,5,18,toilet_flush,False,201664,A 157 | 5-202020-A-18.wav,5,18,toilet_flush,False,202020,A 158 | 5-202220-A-21.wav,5,21,sneezing,True,202220,A 159 | 5-202540-A-18.wav,5,18,toilet_flush,False,202540,A 160 | 5-202795-A-3.wav,5,3,cow,False,202795,A 161 | 5-202898-A-10.wav,5,10,rain,True,202898,A 162 | 5-203128-A-0.wav,5,0,dog,True,203128,A 163 | 5-203128-B-0.wav,5,0,dog,True,203128,B 164 | 5-203739-A-10.wav,5,10,rain,True,203739,A 165 | 5-204114-A-29.wav,5,29,drinking_sipping,False,204114,A 166 | 5-204352-A-13.wav,5,13,crickets,False,204352,A 167 | 5-204352-B-13.wav,5,13,crickets,False,204352,B 168 | 5-204604-A-24.wav,5,24,coughing,False,204604,A 169 | 5-204741-A-46.wav,5,46,church_bells,False,204741,A 170 | 5-205090-A-32.wav,5,32,keyboard_typing,False,205090,A 171 | 5-205589-A-17.wav,5,17,pouring_water,False,205589,A 172 | 5-205589-B-17.wav,5,17,pouring_water,False,205589,B 173 | 5-205898-A-40.wav,5,40,helicopter,True,205898,A 174 | 5-207681-A-24.wav,5,24,coughing,False,207681,A 175 | 5-207781-A-17.wav,5,17,pouring_water,False,207781,A 176 | 5-207811-A-35.wav,5,35,washing_machine,False,207811,A 177 | 5-207811-B-35.wav,5,35,washing_machine,False,207811,B 178 | 5-207836-A-29.wav,5,29,drinking_sipping,False,207836,A 179 | 5-207836-B-29.wav,5,29,drinking_sipping,False,207836,B 180 | 5-207836-C-29.wav,5,29,drinking_sipping,False,207836,C 181 | 5-207836-D-29.wav,5,29,drinking_sipping,False,207836,D 182 | 5-208030-A-0.wav,5,0,dog,True,208030,A 183 | 5-208624-A-38.wav,5,38,clock_tick,True,208624,A 184 | 5-208761-A-24.wav,5,24,coughing,False,208761,A 185 | 5-208810-A-11.wav,5,11,sea_waves,True,208810,A 186 | 5-208810-B-11.wav,5,11,sea_waves,True,208810,B 187 | 5-209698-A-38.wav,5,38,clock_tick,True,209698,A 188 | 5-209719-A-24.wav,5,24,coughing,False,209719,A 189 | 5-209833-A-38.wav,5,38,clock_tick,True,209833,A 190 | 5-209989-A-22.wav,5,22,clapping,False,209989,A 191 | 5-209992-A-44.wav,5,44,engine,False,209992,A 192 | 5-209992-B-44.wav,5,44,engine,False,209992,B 193 | 5-210540-A-13.wav,5,13,crickets,False,210540,A 194 | 5-210571-A-38.wav,5,38,clock_tick,True,210571,A 195 | 5-210612-A-37.wav,5,37,clock_alarm,False,210612,A 196 | 5-211197-A-24.wav,5,24,coughing,False,211197,A 197 | 5-211408-A-37.wav,5,37,clock_alarm,False,211408,A 198 | 5-212050-A-27.wav,5,27,brushing_teeth,False,212050,A 199 | 5-212054-A-36.wav,5,36,vacuum_cleaner,False,212054,A 200 | 5-212059-A-36.wav,5,36,vacuum_cleaner,False,212059,A 201 | 5-212181-A-38.wav,5,38,clock_tick,True,212181,A 202 | 5-212454-A-0.wav,5,0,dog,True,212454,A 203 | 5-212730-A-17.wav,5,17,pouring_water,False,212730,A 204 | 5-212734-A-17.wav,5,17,pouring_water,False,212734,A 205 | 5-212736-A-17.wav,5,17,pouring_water,False,212736,A 206 | 5-213077-A-11.wav,5,11,sea_waves,True,213077,A 207 | 5-213293-A-23.wav,5,23,breathing,False,213293,A 208 | 5-213802-A-12.wav,5,12,crackling_fire,True,213802,A 209 | 5-213836-A-9.wav,5,9,crow,False,213836,A 210 | 5-213836-B-9.wav,5,9,crow,False,213836,B 211 | 5-213836-C-9.wav,5,9,crow,False,213836,C 212 | 5-213836-D-9.wav,5,9,crow,False,213836,D 213 | 5-213855-A-0.wav,5,0,dog,True,213855,A 214 | 5-214362-A-17.wav,5,17,pouring_water,False,214362,A 215 | 5-214759-A-5.wav,5,5,cat,False,214759,A 216 | 5-214759-B-5.wav,5,5,cat,False,214759,B 217 | 5-214869-A-9.wav,5,9,crow,False,214869,A 218 | 5-215005-A-32.wav,5,32,keyboard_typing,False,215005,A 219 | 5-215172-A-13.wav,5,13,crickets,False,215172,A 220 | 5-215179-A-13.wav,5,13,crickets,False,215179,A 221 | 5-215445-A-47.wav,5,47,airplane,False,215445,A 222 | 5-215447-A-47.wav,5,47,airplane,False,215447,A 223 | 5-215449-A-47.wav,5,47,airplane,False,215449,A 224 | 5-215634-A-17.wav,5,17,pouring_water,False,215634,A 225 | 5-215658-A-12.wav,5,12,crackling_fire,True,215658,A 226 | 5-215658-B-12.wav,5,12,crackling_fire,True,215658,B 227 | 5-216131-A-32.wav,5,32,keyboard_typing,False,216131,A 228 | 5-216213-A-13.wav,5,13,crickets,False,216213,A 229 | 5-216214-A-13.wav,5,13,crickets,False,216214,A 230 | 5-216216-A-13.wav,5,13,crickets,False,216216,A 231 | 5-216368-A-28.wav,5,28,snoring,False,216368,A 232 | 5-216370-A-41.wav,5,41,chainsaw,True,216370,A 233 | 5-216370-B-41.wav,5,41,chainsaw,True,216370,B 234 | 5-217158-A-0.wav,5,0,dog,True,217158,A 235 | 5-217186-A-16.wav,5,16,wind,False,217186,A 236 | 5-217186-B-16.wav,5,16,wind,False,217186,B 237 | 5-217186-C-16.wav,5,16,wind,False,217186,C 238 | 5-218196-A-27.wav,5,27,brushing_teeth,False,218196,A 239 | 5-218196-B-27.wav,5,27,brushing_teeth,False,218196,B 240 | 5-218494-A-22.wav,5,22,clapping,False,218494,A 241 | 5-218980-A-30.wav,5,30,door_wood_knock,False,218980,A 242 | 5-218981-A-30.wav,5,30,door_wood_knock,False,218981,A 243 | 5-219044-A-46.wav,5,46,church_bells,False,219044,A 244 | 5-219242-A-37.wav,5,37,clock_alarm,False,219242,A 245 | 5-219242-B-37.wav,5,37,clock_alarm,False,219242,B 246 | 5-219318-A-31.wav,5,31,mouse_click,False,219318,A 247 | 5-219342-A-38.wav,5,38,clock_tick,True,219342,A 248 | 5-219379-A-11.wav,5,11,sea_waves,True,219379,A 249 | 5-219379-B-11.wav,5,11,sea_waves,True,219379,B 250 | 5-219379-C-11.wav,5,11,sea_waves,True,219379,C 251 | 5-220026-A-21.wav,5,21,sneezing,True,220026,A 252 | 5-220027-A-21.wav,5,21,sneezing,True,220027,A 253 | 5-220939-A-27.wav,5,27,brushing_teeth,False,220939,A 254 | 5-220955-A-40.wav,5,40,helicopter,True,220955,A 255 | 5-221518-A-21.wav,5,21,sneezing,True,221518,A 256 | 5-221528-A-39.wav,5,39,glass_breaking,False,221528,A 257 | 5-221529-A-39.wav,5,39,glass_breaking,False,221529,A 258 | 5-221567-A-22.wav,5,22,clapping,False,221567,A 259 | 5-221568-A-22.wav,5,22,clapping,False,221568,A 260 | 5-221593-A-21.wav,5,21,sneezing,True,221593,A 261 | 5-221878-A-34.wav,5,34,can_opening,False,221878,A 262 | 5-221950-A-22.wav,5,22,clapping,False,221950,A 263 | 5-222041-A-9.wav,5,9,crow,False,222041,A 264 | 5-222524-A-41.wav,5,41,chainsaw,True,222524,A 265 | 5-222894-A-32.wav,5,32,keyboard_typing,False,222894,A 266 | 5-223099-A-32.wav,5,32,keyboard_typing,False,223099,A 267 | 5-223099-B-32.wav,5,32,keyboard_typing,False,223099,B 268 | 5-223103-A-31.wav,5,31,mouse_click,False,223103,A 269 | 5-223176-A-37.wav,5,37,clock_alarm,False,223176,A 270 | 5-223317-A-31.wav,5,31,mouse_click,False,223317,A 271 | 5-223810-A-35.wav,5,35,washing_machine,False,223810,A 272 | 5-231551-A-36.wav,5,36,vacuum_cleaner,False,231551,A 273 | 5-231762-A-0.wav,5,0,dog,True,231762,A 274 | 5-232272-A-44.wav,5,44,engine,False,232272,A 275 | 5-232802-A-31.wav,5,31,mouse_click,False,232802,A 276 | 5-232816-A-23.wav,5,23,breathing,False,232816,A 277 | 5-233019-A-31.wav,5,31,mouse_click,False,233019,A 278 | 5-233160-A-1.wav,5,1,rooster,True,233160,A 279 | 5-233260-A-23.wav,5,23,breathing,False,233260,A 280 | 5-233312-A-28.wav,5,28,snoring,False,233312,A 281 | 5-233605-A-39.wav,5,39,glass_breaking,False,233605,A 282 | 5-233607-A-39.wav,5,39,glass_breaking,False,233607,A 283 | 5-233645-A-37.wav,5,37,clock_alarm,False,233645,A 284 | 5-233787-A-7.wav,5,7,insects,False,233787,A 285 | 5-234145-A-28.wav,5,28,snoring,False,234145,A 286 | 5-234247-A-37.wav,5,37,clock_alarm,False,234247,A 287 | 5-234263-A-25.wav,5,25,footsteps,False,234263,A 288 | 5-234335-A-23.wav,5,23,breathing,False,234335,A 289 | 5-234855-A-25.wav,5,25,footsteps,False,234855,A 290 | 5-234879-A-1.wav,5,1,rooster,True,234879,A 291 | 5-234879-B-1.wav,5,1,rooster,True,234879,B 292 | 5-234923-A-32.wav,5,32,keyboard_typing,False,234923,A 293 | 5-235507-A-44.wav,5,44,engine,False,235507,A 294 | 5-235593-A-23.wav,5,23,breathing,False,235593,A 295 | 5-235644-A-30.wav,5,30,door_wood_knock,False,235644,A 296 | 5-235671-A-38.wav,5,38,clock_tick,True,235671,A 297 | 5-235874-A-28.wav,5,28,snoring,False,235874,A 298 | 5-235893-A-28.wav,5,28,snoring,False,235893,A 299 | 5-235956-A-47.wav,5,47,airplane,False,235956,A 300 | 5-236288-A-28.wav,5,28,snoring,False,236288,A 301 | 5-236299-A-34.wav,5,34,can_opening,False,236299,A 302 | 5-237315-A-31.wav,5,31,mouse_click,False,237315,A 303 | 5-237499-A-4.wav,5,4,frog,False,237499,A 304 | 5-237795-A-34.wav,5,34,can_opening,False,237795,A 305 | 5-238021-A-35.wav,5,35,washing_machine,False,238021,A 306 | 5-238492-A-23.wav,5,23,breathing,False,238492,A 307 | 5-238926-A-31.wav,5,31,mouse_click,False,238926,A 308 | 5-238938-A-27.wav,5,27,brushing_teeth,False,238938,A 309 | 5-240671-A-44.wav,5,44,engine,False,240671,A 310 | 5-241846-A-15.wav,5,15,water_drops,False,241846,A 311 | 5-242490-A-14.wav,5,14,chirping_birds,False,242490,A 312 | 5-242491-A-14.wav,5,14,chirping_birds,False,242491,A 313 | 5-242492-A-3.wav,5,3,cow,False,242492,A 314 | 5-242711-A-9.wav,5,9,crow,False,242711,A 315 | 5-242932-A-26.wav,5,26,laughing,False,242932,A 316 | 5-242932-B-26.wav,5,26,laughing,False,242932,B 317 | 5-243025-A-25.wav,5,25,footsteps,False,243025,A 318 | 5-243036-A-29.wav,5,29,drinking_sipping,False,243036,A 319 | 5-243448-A-14.wav,5,14,chirping_birds,False,243448,A 320 | 5-243449-A-14.wav,5,14,chirping_birds,False,243449,A 321 | 5-243450-A-14.wav,5,14,chirping_birds,False,243450,A 322 | 5-243459-A-14.wav,5,14,chirping_birds,False,243459,A 323 | 5-243459-B-14.wav,5,14,chirping_birds,False,243459,B 324 | 5-243635-A-29.wav,5,29,drinking_sipping,False,243635,A 325 | 5-243773-A-44.wav,5,44,engine,False,243773,A 326 | 5-243773-B-44.wav,5,44,engine,False,243773,B 327 | 5-243783-A-44.wav,5,44,engine,False,243783,A 328 | 5-244178-A-32.wav,5,32,keyboard_typing,False,244178,A 329 | 5-244310-A-25.wav,5,25,footsteps,False,244310,A 330 | 5-244315-A-6.wav,5,6,hen,False,244315,A 331 | 5-244315-B-6.wav,5,6,hen,False,244315,B 332 | 5-244315-C-6.wav,5,6,hen,False,244315,C 333 | 5-244327-A-34.wav,5,34,can_opening,False,244327,A 334 | 5-244459-A-28.wav,5,28,snoring,False,244459,A 335 | 5-244526-A-26.wav,5,26,laughing,False,244526,A 336 | 5-244651-A-31.wav,5,31,mouse_click,False,244651,A 337 | 5-244933-A-34.wav,5,34,can_opening,False,244933,A 338 | 5-245040-A-35.wav,5,35,washing_machine,False,245040,A 339 | 5-248341-A-6.wav,5,6,hen,False,248341,A 340 | 5-248341-B-6.wav,5,6,hen,False,248341,B 341 | 5-248341-C-6.wav,5,6,hen,False,248341,C 342 | 5-249748-A-28.wav,5,28,snoring,False,249748,A 343 | 5-249937-A-22.wav,5,22,clapping,False,249937,A 344 | 5-250026-A-30.wav,5,30,door_wood_knock,False,250026,A 345 | 5-250026-B-30.wav,5,30,door_wood_knock,False,250026,B 346 | 5-250258-A-49.wav,5,49,hand_saw,False,250258,A 347 | 5-250629-A-37.wav,5,37,clock_alarm,False,250629,A 348 | 5-250753-A-34.wav,5,34,can_opening,False,250753,A 349 | 5-251426-A-30.wav,5,30,door_wood_knock,False,251426,A 350 | 5-251426-B-30.wav,5,30,door_wood_knock,False,251426,B 351 | 5-251489-A-24.wav,5,24,coughing,False,251489,A 352 | 5-251957-A-47.wav,5,47,airplane,False,251957,A 353 | 5-251962-A-47.wav,5,47,airplane,False,251962,A 354 | 5-251963-A-47.wav,5,47,airplane,False,251963,A 355 | 5-251971-A-47.wav,5,47,airplane,False,251971,A 356 | 5-252248-A-34.wav,5,34,can_opening,False,252248,A 357 | 5-253085-A-3.wav,5,3,cow,False,253085,A 358 | 5-253085-B-3.wav,5,3,cow,False,253085,B 359 | 5-253094-A-49.wav,5,49,hand_saw,False,253094,A 360 | 5-253094-B-49.wav,5,49,hand_saw,False,253094,B 361 | 5-253094-C-49.wav,5,49,hand_saw,False,253094,C 362 | 5-253094-D-49.wav,5,49,hand_saw,False,253094,D 363 | 5-253101-A-49.wav,5,49,hand_saw,False,253101,A 364 | 5-253101-B-49.wav,5,49,hand_saw,False,253101,B 365 | 5-253101-C-49.wav,5,49,hand_saw,False,253101,C 366 | 5-253534-A-26.wav,5,26,laughing,False,253534,A 367 | 5-254160-A-22.wav,5,22,clapping,False,254160,A 368 | 5-254832-A-15.wav,5,15,water_drops,False,254832,A 369 | 5-254832-B-15.wav,5,15,water_drops,False,254832,B 370 | 5-256452-A-5.wav,5,5,cat,False,256452,A 371 | 5-256512-A-30.wav,5,30,door_wood_knock,False,256512,A 372 | 5-257349-A-15.wav,5,15,water_drops,False,257349,A 373 | 5-257642-A-39.wav,5,39,glass_breaking,False,257642,A 374 | 5-257839-A-14.wav,5,14,chirping_birds,False,257839,A 375 | 5-259169-A-5.wav,5,5,cat,False,259169,A 376 | 5-259180-A-15.wav,5,15,water_drops,False,259180,A 377 | 5-259514-A-26.wav,5,26,laughing,False,259514,A 378 | 5-259640-A-29.wav,5,29,drinking_sipping,False,259640,A 379 | 5-260011-A-34.wav,5,34,can_opening,False,260011,A 380 | 5-260164-A-23.wav,5,23,breathing,False,260164,A 381 | 5-260432-A-39.wav,5,39,glass_breaking,False,260432,A 382 | 5-260433-A-39.wav,5,39,glass_breaking,False,260433,A 383 | 5-260434-A-39.wav,5,39,glass_breaking,False,260434,A 384 | 5-260875-A-35.wav,5,35,washing_machine,False,260875,A 385 | 5-261325-A-9.wav,5,9,crow,False,261325,A 386 | 5-261433-A-15.wav,5,15,water_drops,False,261433,A 387 | 5-261439-A-15.wav,5,15,water_drops,False,261439,A 388 | 5-261464-A-23.wav,5,23,breathing,False,261464,A 389 | 5-262641-A-15.wav,5,15,water_drops,False,262641,A 390 | 5-262957-A-22.wav,5,22,clapping,False,262957,A 391 | 5-263490-A-25.wav,5,25,footsteps,False,263490,A 392 | 5-263491-A-25.wav,5,25,footsteps,False,263491,A 393 | 5-263501-A-25.wav,5,25,footsteps,False,263501,A 394 | 5-263775-A-26.wav,5,26,laughing,False,263775,A 395 | 5-263775-B-26.wav,5,26,laughing,False,263775,B 396 | 5-263831-A-6.wav,5,6,hen,False,263831,A 397 | 5-263831-B-6.wav,5,6,hen,False,263831,B 398 | 5-263902-A-36.wav,5,36,vacuum_cleaner,False,263902,A 399 | 5-51149-A-25.wav,5,25,footsteps,False,51149,A 400 | 5-61635-A-8.wav,5,8,sheep,False,61635,A 401 | 5-9032-A-0.wav,5,0,dog,True,9032,A 402 | --------------------------------------------------------------------------------