├── README.md ├── config_2ch_raw_mbk_ipld.py ├── config_2ch_raw_spec_ipld.py ├── config_2ch_raw_wav_ipld.py ├── eer.py ├── main_cnnlstm_2ch_Spec_ipld_conv1d_keras_noILD_easy_fold4.py ├── main_cnnlstm_2ch_MBK_ipld_conv1d_keras_noIPD_easy_fold4.py ├── main_cnnlstm_2ch_wav_ipld_conv1d_keras_noILD_easy_noDP_fold4.py ├── main_cnnlstm_2ch_Spec_ipld_conv1d_keras_indIPD_easy_fold4.py ├── main_cnnlstm_2ch_MBK_ipld_conv1d_keras_indIPD_easy_fold4.py ├── main_cnnlstm_2ch_wav_ipld_conv1d_keras_indIPD_easy_fold4.py ├── recognize_cnnlstm_2ch_spec_keras_indIPD_easy.py ├── recognize_cnnlstm_2ch_mbk_keras_noILD_easy.py ├── recognize_cnnlstm_2ch_mbk_keras_indIPD_easy.py ├── recognize_cnnlstm_2ch_spec_keras_noIPD_easy.py ├── recognize_cnnlstm_2ch_raw_keras_noILD_easy.py ├── recognize_cnnlstm_2ch_raw_keras_indIPD_easy.py └── prepare_data_2ch_raw_ipd_ild_easy_Spec.py /README.md: -------------------------------------------------------------------------------- 1 | # cnn_rnn_spatial_audio_tagging 2 | This source code is for the paper "convolutional gated recurrent neural network incorporating spatial featues for audio tagging" 3 | 4 | It is also an implement for CLDNN (CNN-LSTM-DNN), recurrent convolutional neural network or convolutional recurrent neural network. It can be used for regression and also classification. You can adapt it to your task easily. 5 | 6 | accepted by IJCNN2017 7 | paper download at: "convolutional gated recurrent neural network incorporating spatial featues for audio tagging" 8 | http://epubs.surrey.ac.uk/813631/ 9 | -------------------------------------------------------------------------------- /config_2ch_raw_mbk_ipld.py: -------------------------------------------------------------------------------- 1 | 2 | # development 3 | dev_root = '../chime_home' 4 | dev_wav_fd = dev_root + '/chunks_16k_2ch' 5 | 6 | # temporary data folder 7 | scrap_fd = "../chime_home/DCASE2016_task4_scrap_2ch_mbk_ipd_ild_overlap" 8 | dev_fe_mel_fd_left = scrap_fd + '/Fe/Mel_l' 9 | dev_fe_mel_fd_right = scrap_fd + '/Fe/Mel_r' 10 | dev_fe_mel_fd_mean = scrap_fd + '/Fe/Mel_m' 11 | dev_fe_mel_fd_diff = scrap_fd + '/Fe/Mel_d' 12 | dev_fe_mel_fd_ipd = scrap_fd + '/Fe/Mel_ipd' 13 | dev_fe_mel_fd_ild = scrap_fd + '/Fe/Mel_ild' 14 | #dev_cv_csv_path = dev_root + '/development_chunks_refined_crossval_dcase2016.csv' 15 | dev_cv_csv_path = dev_root + '/development_chunks_raw_crossval_dcase2016.csv' 16 | 17 | # evaluation 18 | ''' 19 | eva_csv_path = root + '/evaluation_chunks_refined.csv' 20 | fe_mel_eva_fd = 'Fe_eva/Mel' 21 | ''' 22 | 23 | labels = [ 'c', 'm', 'f', 'v', 'p', 'b', 'o', 'S' ] 24 | lb_to_id = { lb:id for id, lb in enumerate(labels) } 25 | id_to_lb = { id:lb for id, lb in enumerate(labels) } 26 | 27 | fs = 16000. 28 | win = 512. 29 | -------------------------------------------------------------------------------- /config_2ch_raw_spec_ipld.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | # development 4 | dev_root = '../chime_home' 5 | dev_wav_fd = dev_root + '/chunks_16k_2ch' 6 | 7 | # temporary data folder 8 | scrap_fd = "../chime_home/DCASE2016_task4_scrap_2ch_spec_ipd_ild_overlap" 9 | dev_fe_mel_fd_left = scrap_fd + '/Fe/Mel_l' 10 | dev_fe_mel_fd_right = scrap_fd + '/Fe/Mel_r' 11 | dev_fe_mel_fd_mean = scrap_fd + '/Fe/Mel_m' 12 | dev_fe_mel_fd_diff = scrap_fd + '/Fe/Mel_d' 13 | dev_fe_mel_fd_ipd = scrap_fd + '/Fe/Mel_ipd' 14 | dev_fe_mel_fd_ild = scrap_fd + '/Fe/Mel_ild' 15 | #dev_cv_csv_path = dev_root + '/development_chunks_refined_crossval_dcase2016.csv' 16 | dev_cv_csv_path = dev_root + '/development_chunks_raw_crossval_dcase2016.csv' 17 | 18 | # evaluation 19 | ''' 20 | eva_csv_path = root + '/evaluation_chunks_refined.csv' 21 | fe_mel_eva_fd = 'Fe_eva/Mel' 22 | ''' 23 | 24 | labels = [ 'c', 'm', 'f', 'v', 'p', 'b', 'o', 'S' ] 25 | lb_to_id = { lb:id for id, lb in enumerate(labels) } 26 | id_to_lb = { id:lb for id, lb in enumerate(labels) } 27 | 28 | fs = 16000. 29 | win = 512. 30 | -------------------------------------------------------------------------------- /config_2ch_raw_wav_ipld.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | # development 4 | dev_root = '../chime_home' 5 | dev_wav_fd = dev_root + '/chunks_16k_2ch' 6 | 7 | # temporary data folder 8 | scrap_fd = "../chime_home/DCASE2016_task4_scrap_2ch_wav20ms_ipd_ild_overlap" 9 | dev_fe_mel_fd_left = scrap_fd + '/Fe/Mel_l' 10 | dev_fe_mel_fd_right = scrap_fd + '/Fe/Mel_r' 11 | dev_fe_mel_fd_mean = scrap_fd + '/Fe/Mel_m' 12 | dev_fe_mel_fd_diff = scrap_fd + '/Fe/Mel_d' 13 | dev_fe_mel_fd_ipd = scrap_fd + '/Fe/Mel_ipd' 14 | dev_fe_mel_fd_ild = scrap_fd + '/Fe/Mel_ild' 15 | #dev_cv_csv_path = dev_root + '/development_chunks_refined_crossval_dcase2016.csv' 16 | dev_cv_csv_path = dev_root + '/development_chunks_raw_crossval_dcase2016.csv' 17 | 18 | # evaluation 19 | ''' 20 | eva_csv_path = root + '/evaluation_chunks_refined.csv' 21 | fe_mel_eva_fd = 'Fe_eva/Mel' 22 | ''' 23 | 24 | labels = [ 'c', 'm', 'f', 'v', 'p', 'b', 'o', 'S' ] 25 | lb_to_id = { lb:id for id, lb in enumerate(labels) } 26 | id_to_lb = { id:lb for id, lb in enumerate(labels) } 27 | 28 | fs = 16000. 29 | win = 320. 30 | -------------------------------------------------------------------------------- /eer.py: -------------------------------------------------------------------------------- 1 | # DCASE 2016::Domestic Audio Tagging / Baseline System 2 | # Copyright (C) 2016 Peter Foster (p.a.foster@qmul.ac.uk) / QMUL 3 | # 4 | # This program is free software; you can redistribute it and/or modify 5 | # it under the terms of the GNU General Public License as published by 6 | # the Free Software Foundation; either version 3 of the License, or 7 | # (at your option) any later version. 8 | # 9 | # This program is distributed in the hope that it will be useful, 10 | # but WITHOUT ANY WARRANTY; without even the implied warranty of 11 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 | # GNU General Public License for more details. 13 | # 14 | # You should have received a copy of the GNU General Public License 15 | # along with this program. If not, see . 16 | 17 | from sklearn import metrics 18 | import numpy 19 | import csv 20 | 21 | def compute_eer(result_filename, label, label_assignments): 22 | """Compute the equal error rate (EER) from the plot of the false negative rate 23 | versus the false positive rate. 24 | 25 | Keyword arguments: 26 | result_filename -- The CSV file from which to read results. 27 | Each row in the file is of the form 28 | 29 | ,