├── README.md
├── config_2ch_raw_mbk_ipld.py
├── config_2ch_raw_spec_ipld.py
├── config_2ch_raw_wav_ipld.py
├── eer.py
├── main_cnnlstm_2ch_Spec_ipld_conv1d_keras_noILD_easy_fold4.py
├── main_cnnlstm_2ch_MBK_ipld_conv1d_keras_noIPD_easy_fold4.py
├── main_cnnlstm_2ch_wav_ipld_conv1d_keras_noILD_easy_noDP_fold4.py
├── main_cnnlstm_2ch_Spec_ipld_conv1d_keras_indIPD_easy_fold4.py
├── main_cnnlstm_2ch_MBK_ipld_conv1d_keras_indIPD_easy_fold4.py
├── main_cnnlstm_2ch_wav_ipld_conv1d_keras_indIPD_easy_fold4.py
├── recognize_cnnlstm_2ch_spec_keras_indIPD_easy.py
├── recognize_cnnlstm_2ch_mbk_keras_noILD_easy.py
├── recognize_cnnlstm_2ch_mbk_keras_indIPD_easy.py
├── recognize_cnnlstm_2ch_spec_keras_noIPD_easy.py
├── recognize_cnnlstm_2ch_raw_keras_noILD_easy.py
├── recognize_cnnlstm_2ch_raw_keras_indIPD_easy.py
└── prepare_data_2ch_raw_ipd_ild_easy_Spec.py


/README.md:
--------------------------------------------------------------------------------
1 | # cnn_rnn_spatial_audio_tagging
2 | This source code is for the paper "convolutional gated recurrent neural network incorporating spatial featues for audio tagging"
3 | 
4 | It is also an implement for CLDNN (CNN-LSTM-DNN), recurrent convolutional neural network or convolutional recurrent neural network. It can be used for regression and also classification. You can adapt it to your task easily.
5 | 
6 | accepted by IJCNN2017
7 | paper download at: "convolutional gated recurrent neural network incorporating spatial featues for audio tagging"
8 | http://epubs.surrey.ac.uk/813631/
9 | 


--------------------------------------------------------------------------------
/config_2ch_raw_mbk_ipld.py:
--------------------------------------------------------------------------------
 1 | 
 2 | # development
 3 | dev_root = '../chime_home'
 4 | dev_wav_fd = dev_root + '/chunks_16k_2ch'
 5 | 
 6 | # temporary data folder
 7 | scrap_fd = "../chime_home/DCASE2016_task4_scrap_2ch_mbk_ipd_ild_overlap"
 8 | dev_fe_mel_fd_left = scrap_fd + '/Fe/Mel_l'
 9 | dev_fe_mel_fd_right = scrap_fd + '/Fe/Mel_r'
10 | dev_fe_mel_fd_mean = scrap_fd + '/Fe/Mel_m'
11 | dev_fe_mel_fd_diff = scrap_fd + '/Fe/Mel_d'
12 | dev_fe_mel_fd_ipd = scrap_fd + '/Fe/Mel_ipd'
13 | dev_fe_mel_fd_ild = scrap_fd + '/Fe/Mel_ild'
14 | #dev_cv_csv_path = dev_root + '/development_chunks_refined_crossval_dcase2016.csv'
15 | dev_cv_csv_path = dev_root + '/development_chunks_raw_crossval_dcase2016.csv'
16 | 
17 | # evaluation
18 | '''
19 | eva_csv_path = root + '/evaluation_chunks_refined.csv'
20 | fe_mel_eva_fd = 'Fe_eva/Mel'
21 | '''
22 | 
23 | labels = [ 'c', 'm', 'f', 'v', 'p', 'b', 'o', 'S' ]
24 | lb_to_id = { lb:id for id, lb in enumerate(labels) }
25 | id_to_lb = { id:lb for id, lb in enumerate(labels) }
26 | 
27 | fs = 16000.
28 | win = 512.
29 | 


--------------------------------------------------------------------------------
/config_2ch_raw_spec_ipld.py:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | # development
 4 | dev_root = '../chime_home'
 5 | dev_wav_fd = dev_root + '/chunks_16k_2ch'
 6 | 
 7 | # temporary data folder
 8 | scrap_fd = "../chime_home/DCASE2016_task4_scrap_2ch_spec_ipd_ild_overlap"
 9 | dev_fe_mel_fd_left = scrap_fd + '/Fe/Mel_l'
10 | dev_fe_mel_fd_right = scrap_fd + '/Fe/Mel_r'
11 | dev_fe_mel_fd_mean = scrap_fd + '/Fe/Mel_m'
12 | dev_fe_mel_fd_diff = scrap_fd + '/Fe/Mel_d'
13 | dev_fe_mel_fd_ipd = scrap_fd + '/Fe/Mel_ipd'
14 | dev_fe_mel_fd_ild = scrap_fd + '/Fe/Mel_ild'
15 | #dev_cv_csv_path = dev_root + '/development_chunks_refined_crossval_dcase2016.csv'
16 | dev_cv_csv_path = dev_root + '/development_chunks_raw_crossval_dcase2016.csv'
17 | 
18 | # evaluation
19 | '''
20 | eva_csv_path = root + '/evaluation_chunks_refined.csv'
21 | fe_mel_eva_fd = 'Fe_eva/Mel'
22 | '''
23 | 
24 | labels = [ 'c', 'm', 'f', 'v', 'p', 'b', 'o', 'S' ]
25 | lb_to_id = { lb:id for id, lb in enumerate(labels) }
26 | id_to_lb = { id:lb for id, lb in enumerate(labels) }
27 | 
28 | fs = 16000.
29 | win = 512.
30 | 


--------------------------------------------------------------------------------
/config_2ch_raw_wav_ipld.py:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | # development
 4 | dev_root = '../chime_home'
 5 | dev_wav_fd = dev_root + '/chunks_16k_2ch'
 6 | 
 7 | # temporary data folder
 8 | scrap_fd = "../chime_home/DCASE2016_task4_scrap_2ch_wav20ms_ipd_ild_overlap"
 9 | dev_fe_mel_fd_left = scrap_fd + '/Fe/Mel_l'
10 | dev_fe_mel_fd_right = scrap_fd + '/Fe/Mel_r'
11 | dev_fe_mel_fd_mean = scrap_fd + '/Fe/Mel_m'
12 | dev_fe_mel_fd_diff = scrap_fd + '/Fe/Mel_d'
13 | dev_fe_mel_fd_ipd = scrap_fd + '/Fe/Mel_ipd'
14 | dev_fe_mel_fd_ild = scrap_fd + '/Fe/Mel_ild'
15 | #dev_cv_csv_path = dev_root + '/development_chunks_refined_crossval_dcase2016.csv'
16 | dev_cv_csv_path = dev_root + '/development_chunks_raw_crossval_dcase2016.csv'
17 | 
18 | # evaluation
19 | '''
20 | eva_csv_path = root + '/evaluation_chunks_refined.csv'
21 | fe_mel_eva_fd = 'Fe_eva/Mel'
22 | '''
23 | 
24 | labels = [ 'c', 'm', 'f', 'v', 'p', 'b', 'o', 'S' ]
25 | lb_to_id = { lb:id for id, lb in enumerate(labels) }
26 | id_to_lb = { id:lb for id, lb in enumerate(labels) }
27 | 
28 | fs = 16000.
29 | win = 320.
30 | 


--------------------------------------------------------------------------------
/eer.py:
--------------------------------------------------------------------------------
 1 | # DCASE 2016::Domestic Audio Tagging / Baseline System
 2 | # Copyright (C) 2016 Peter Foster (p.a.foster@qmul.ac.uk) / QMUL
 3 | #
 4 | # This program is free software; you can redistribute it and/or modify
 5 | # it under the terms of the GNU General Public License as published by
 6 | # the Free Software Foundation; either version 3 of the License, or
 7 | # (at your option) any later version.
 8 | #
 9 | # This program is distributed in the hope that it will be useful,
10 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 | # GNU General Public License for more details.
13 | #
14 | # You should have received a copy of the GNU General Public License
15 | # along with this program.  If not, see <http://www.gnu.org/licenses/>.
16 | 
17 | from sklearn import metrics
18 | import numpy
19 | import csv
20 | 
21 | def compute_eer(result_filename, label, label_assignments):
22 |     """Compute the equal error rate (EER) from the plot of the false negative rate 
23 |         versus the false positive rate.
24 |     
25 |     Keyword arguments:
26 |         result_filename -- The CSV file from which to read results.
27 |             Each row in the file is of the form
28 |     
29 |                 <filename>,<label>,<score>
30 |             where <filename> is an audio file name, <label> is a label identifier 
31 |             and where score is a classification score about the presence of
32 |             <label> in <filename>.
33 |         label -- The label identifier (as specified in result_filename) for which
34 |         to compute the EER.
35 |         label_assignments -- A dictionary whose keys are file names as contained 
36 |         in result_filename and whose values are ground truth assignments about 
37 |         the presence of the specified label.
38 |     """
39 |             
40 |     results = []
41 |     with open(result_filename, 'rt') as f:
42 |         for row in csv.reader(f, delimiter=','):
43 |             if len(row[1]) != 1 or not row[1].isalpha():
44 |                 raise ValueError('The label identfier "' + row[1] + '" in row ' + str(row) + ' is not valid.')
45 |             if row[1] == label:
46 |                 results.append((row[0], row[1], float(row[2])))
47 |                 
48 |     if len(numpy.unique([r[0] for r in results])) != len(results):
49 |         raise ValueError('File ' + result_filename + ' contains duplicate score assignments.')
50 |     if len(set([r[0] for r in results]).symmetric_difference(set(label_assignments.keys()))) != 0:
51 |         raise ValueError('One-to-one mapping between files listed in ' + result_filename + ' and ground truth assignments for label ' + label + ' not satisfied.')
52 |     
53 |     y_true = numpy.array([label_assignments[row[0]] for row in results])
54 |     y_score = numpy.array([row[2] for row in results])
55 |     
56 |     fpr, tpr, thresholds = metrics.roc_curve(y_true,y_score,drop_intermediate=True)
57 |     
58 |     eps = 1E-6
59 |     Points = [(0,0)]+zip(fpr, tpr)
60 |     for i, point in enumerate(Points):
61 |         if point[0]+eps >= 1-point[1]:
62 |             break
63 |     P1 = Points[i-1]; P2 = Points[i]
64 |         
65 |     #Interpolate between P1 and P2
66 |     if abs(P2[0]-P1[0]) < eps:
67 |         EER = P1[0]        
68 |     else:        
69 |         m = (P2[1]-P1[1]) / (P2[0]-P1[0])
70 |         o = P1[1] - m * P1[0]
71 |         EER = (1-o) / (1+m)        
72 |     return EER
73 | 


--------------------------------------------------------------------------------
/main_cnnlstm_2ch_Spec_ipld_conv1d_keras_noILD_easy_fold4.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | sys.path.append('/user/HS103/yx0001/Downloads/Hat')
  3 | import pickle
  4 | import numpy as np
  5 | np.random.seed(1515)
  6 | import os
  7 | import config_2ch_raw_spec_ipld as cfg
  8 | from Hat.preprocessing import reshape_3d_to_4d
  9 | import prepare_data_2ch_raw_ipd_ild_easy_Spec as pp_data
 10 | #from prepare_data import load_data
 11 | 
 12 | 
 13 | import keras
 14 | 
 15 | from keras.datasets import mnist, cifar10
 16 | from keras.models import Sequential, Graph
 17 | from keras.layers.core import Dense, Dropout, Activation, Flatten, Reshape
 18 | from keras.layers.convolutional import ZeroPadding2D, AveragePooling2D, Convolution2D,MaxPooling2D, Convolution1D,MaxPooling1D
 19 | from keras.utils import np_utils
 20 | from keras.layers import Merge, Input
 21 | from keras.regularizers import l1, l2, l1l2, activity_l2
 22 | from keras.constraints import nonneg
 23 | from keras.callbacks import ModelCheckpoint
 24 | from keras.layers.normalization import BatchNormalization
 25 | from keras.layers import LSTM, SimpleRNN, GRU, TimeDistributed, Bidirectional
 26 | import h5py
 27 | 
 28 | # resize data for fit into CNN. size: (batch_num*color_maps*height*weight)
 29 | def reshapeX( X ):
 30 |     N = len(X)
 31 |     return X.reshape( (N, 6, t_delay, feadim, 1) )
 32 | 
 33 | def reshapeX1( X ):
 34 |     N = len(X)
 35 |     return X.reshape( (N, t_delay, 1, feadim, 1) )
 36 | 
 37 | # resize data for fit into CNN. size: (batch_num*color_maps*height*weight)
 38 | def reshapeX2( X ):
 39 |     N = len(X)
 40 |     return X.reshape( (N, t_delay, feadim) )
 41 | 
 42 | feadim=257
 43 | t_delay=33
 44 | 
 45 | # hyper-params
 46 | fe_fd_left = cfg.dev_fe_mel_fd_left
 47 | fe_fd_right = cfg.dev_fe_mel_fd_right
 48 | fe_fd_mean = cfg.dev_fe_mel_fd_mean
 49 | fe_fd_diff = cfg.dev_fe_mel_fd_diff
 50 | fe_fd_ipd = cfg.dev_fe_mel_fd_ipd
 51 | fe_fd_ild = cfg.dev_fe_mel_fd_ild
 52 | 
 53 | #fe_fd_ori = cfg.dev_fe_mel_fd_ori
 54 | agg_num = 33        # concatenate frames
 55 | hop = 10            # step_len
 56 | n_hid = 1000
 57 | n_out = len( cfg.labels )
 58 | print n_out
 59 | fold = 4       # can be 0, 1, 2, 3, 4
 60 | 
 61 | # prepare data
 62 | tr_X1, tr_X2, tr_y, te_X1, te_X2, te_y = pp_data.GetAllData_separate( fe_fd_right, fe_fd_left, fe_fd_mean, fe_fd_diff, fe_fd_ipd, fe_fd_ild, agg_num, hop, fold )
 63 |     
 64 | tr_X1=reshapeX1(tr_X1)
 65 | te_X1=reshapeX1(te_X1)
 66 | 
 67 | print tr_X1.shape, tr_X2.shape, tr_y.shape
 68 | print te_X1.shape, te_X2.shape, te_y.shape
 69 | 
 70 | ###build model by keras
 71 | kernel_size=(200,1)
 72 | pool_size=(257-200+1,1)
 73 | #pool_size=(2,1)
 74 | 
 75 | model1 = Sequential()
 76 | model1.add(TimeDistributed(Convolution2D(128, 200, 1, border_mode='valid', bias=True, W_regularizer=l1l2(l1=0,l2=0)),input_shape=(t_delay,1,feadim, 1)))
 77 | model1.add(Activation('relu'))
 78 | #model1.add(Dropout(0.1))
 79 | model1.add(TimeDistributed(MaxPooling2D(pool_size=pool_size)))
 80 | 
 81 | #model1.add(TimeDistributed(Convolution2D(24, 3, 1)))
 82 | #model1.add(Activation('relu'))
 83 | #model1.add(TimeDistributed(MaxPooling2D(pool_size=pool_size)))
 84 | 
 85 | model1.add(TimeDistributed(Flatten()))
 86 | #model1.add(Reshape((t_delay, feadim),input_shape=(t_delay,feadim)))
 87 | print model1.output_shape
 88 | 
 89 | model1.add(Bidirectional(GRU(output_dim=128,return_sequences=True,dropout_W=0.0, dropout_U=0.0)))
 90 | #print model.output_shape
 91 | model1.add(Bidirectional(GRU(output_dim=128,return_sequences=True,dropout_W=0.0, dropout_U=0.0))) # GRU is better than LSTM on refined set, not verified on raw set
 92 | model1.add(Bidirectional(GRU(output_dim=128,return_sequences=False,dropout_W=0.0, dropout_U=0.0)))
 93 | 
 94 | model1.add(Dense(500))
 95 | model1.add(Activation('relu'))
 96 | model1.add(Dropout(0.2))
 97 | 
 98 | model1.add(Dense(n_out))
 99 | model1.add(Activation('sigmoid'))
100 | 
101 | model1.summary()
102 | 
103 | #adam=keras.optimizers.Adam(lr=1e-4)
104 | model1.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
105 | 
106 | dump_fd=cfg.scrap_fd+'/Md/cnn_keras_overlap50_noILD_1CNN128onSpec_fold4_weights.{epoch:02d}-{val_loss:.2f}.hdf5'
107 | 
108 | eachmodel=ModelCheckpoint(dump_fd,monitor='val_loss',verbose=0,save_best_only=False,save_weights_only=False,mode='auto')    
109 | 
110 | #model1.fit([tr_X1,tr_X2], tr_y, batch_size=100, nb_epoch=101,
111 | #              verbose=1, validation_data=([te_X1,te_X2], te_y), callbacks=[eachmodel]) #, callbacks=[best_model])
112 | 
113 | model1.fit(tr_X1, tr_y, batch_size=100, nb_epoch=21,
114 |               verbose=1, validation_data=(te_X1, te_y), callbacks=[eachmodel]) #, callbacks=[best_model])
115 | 
116 | #score = model1.evaluate(te_X, te_y, show_accuracy=True, verbose=0)
117 | #print('Test score:', score[0])
118 | #print('Test accuracy:', score[1])
119 | 
120 | 


--------------------------------------------------------------------------------
/main_cnnlstm_2ch_MBK_ipld_conv1d_keras_noIPD_easy_fold4.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | sys.path.append('../Hat')
  3 | import pickle
  4 | import numpy as np
  5 | np.random.seed(1515)
  6 | import os
  7 | import config_2ch_raw_mbk_ipld as cfg
  8 | from Hat.preprocessing import reshape_3d_to_4d
  9 | import prepare_data_2ch_raw_ipd_ild_easy as pp_data
 10 | #from prepare_data import load_data
 11 | 
 12 | 
 13 | import keras
 14 | 
 15 | from keras.datasets import mnist, cifar10
 16 | from keras.models import Sequential, Graph
 17 | from keras.layers.core import Dense, Dropout, Activation, Flatten, Reshape
 18 | from keras.layers.convolutional import ZeroPadding2D, AveragePooling2D, Convolution2D,MaxPooling2D, Convolution1D,MaxPooling1D
 19 | from keras.utils import np_utils
 20 | from keras.layers import Merge, Input
 21 | from keras.regularizers import l1, l2, l1l2, activity_l2
 22 | from keras.constraints import nonneg
 23 | from keras.callbacks import ModelCheckpoint
 24 | from keras.layers.normalization import BatchNormalization
 25 | from keras.layers import LSTM, SimpleRNN, GRU, TimeDistributed, Bidirectional
 26 | import h5py
 27 | 
 28 | # resize data for fit into CNN. size: (batch_num*color_maps*height*weight)
 29 | def reshapeX( X ):
 30 |     N = len(X)
 31 |     return X.reshape( (N, 6, t_delay, feadim, 1) )
 32 | 
 33 | def reshapeX1( X ):
 34 |     N = len(X)
 35 |     return X.reshape( (N, t_delay, 1, feadim, 1) )
 36 | 
 37 | # resize data for fit into CNN. size: (batch_num*color_maps*height*weight)
 38 | def reshapeX2( X ):
 39 |     N = len(X)
 40 |     return X.reshape( (N, t_delay, feadim) )
 41 | 
 42 | feadim=40
 43 | t_delay=33
 44 | 
 45 | # hyper-params
 46 | fe_fd_left = cfg.dev_fe_mel_fd_left
 47 | fe_fd_right = cfg.dev_fe_mel_fd_right
 48 | fe_fd_mean = cfg.dev_fe_mel_fd_mean
 49 | fe_fd_diff = cfg.dev_fe_mel_fd_diff
 50 | fe_fd_ipd = cfg.dev_fe_mel_fd_ipd
 51 | fe_fd_ild = cfg.dev_fe_mel_fd_ild
 52 | 
 53 | #fe_fd_ori = cfg.dev_fe_mel_fd_ori
 54 | agg_num = 33        # concatenate frames
 55 | hop = 10            # step_len
 56 | n_hid = 1000
 57 | n_out = len( cfg.labels )
 58 | print n_out
 59 | fold =4        # can be 0, 1, 2, 3, 4
 60 | 
 61 | # prepare data
 62 | tr_X1, tr_X2, tr_y, te_X1, te_X2, te_y = pp_data.GetAllData_separate( fe_fd_right, fe_fd_left, fe_fd_mean, fe_fd_diff, fe_fd_ipd, fe_fd_ild, agg_num, hop, fold )
 63 | #[batch_num, inmap, n_time, n_freq] = tr_X1.shape
 64 | print tr_X1.shape
 65 | print tr_X2.shape
 66 | #sys.exit()
 67 | tr_X1, te_X1 = reshapeX1(tr_X1), reshapeX1(te_X1)
 68 | ##tr_X2, te_X2 = reshapeX1(tr_X2), reshapeX1(te_X2)
 69 |     
 70 | print tr_X1.shape, tr_X2.shape, tr_y.shape
 71 | print te_X1.shape, te_X2.shape, te_y.shape
 72 | 
 73 | ###build model by keras
 74 | kernel_size=(30,1)
 75 | pool_size=(40-30+1,1)
 76 | #pool_size=(3,1)
 77 | 
 78 | model1 = Sequential()
 79 | model1.add(TimeDistributed(Convolution2D(128, 30, 1, border_mode='valid', bias=True, W_regularizer=l1l2(l1=0,l2=0)),input_shape=(t_delay,1,feadim, 1)))
 80 | model1.add(Activation('relu'))
 81 | #model1.add(Dropout(0.1))
 82 | model1.add(TimeDistributed(MaxPooling2D(pool_size=pool_size)))
 83 | 
 84 | 
 85 | 
 86 | model1.add(TimeDistributed(Flatten()))
 87 | #model1.add(Reshape((t_delay, feadim),input_shape=(t_delay,feadim)))
 88 | print model1.output_shape
 89 | 
 90 | #model1.add(Reshape((t_delay, 1, 257, 1), input_shape=(t_delay, 257)))
 91 | #print model1.output_shape
 92 | 
 93 | #model1.add(TimeDistributed(Convolution2D(128, 200, 1)))
 94 | #model1.add(Activation('relu'))
 95 | #model1.add(TimeDistributed(MaxPooling2D(pool_size=(257-200+1,1))))
 96 | #print model1.output_shape
 97 | 
 98 | 
 99 | model1.add(Bidirectional(GRU(output_dim=128,return_sequences=True,dropout_W=0.0, dropout_U=0.0)))
100 | #print model.output_shape
101 | model1.add(Bidirectional(GRU(output_dim=128,return_sequences=True,dropout_W=0.0, dropout_U=0.0))) # GRU is better than LSTM on refined set, not verified on raw set
102 | model1.add(Bidirectional(GRU(output_dim=128,return_sequences=False,dropout_W=0.0, dropout_U=0.0)))
103 | 
104 | model1.add(Dense(500))
105 | model1.add(Activation('relu'))
106 | model1.add(Dropout(0.2))
107 | 
108 | model1.add(Dense(n_out))
109 | model1.add(Activation('sigmoid'))
110 | 
111 | model1.summary()
112 | 
113 | #adam=keras.optimizers.Adam(lr=1e-4)
114 | model1.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
115 | 
116 | dump_fd=cfg.scrap_fd+'/Md/cnn_keras_overlap50_fold4_1CNN128onMBK40_noILD_weights.{epoch:02d}-{val_loss:.2f}.hdf5'
117 | 
118 | eachmodel=ModelCheckpoint(dump_fd,monitor='val_loss',verbose=0,save_best_only=False,save_weights_only=False,mode='auto')    
119 | 
120 | #model1.fit([tr_X1,tr_X2], tr_y, batch_size=100, nb_epoch=101,
121 | #              verbose=1, validation_data=([te_X1,te_X2], te_y), callbacks=[eachmodel]) #, callbacks=[best_model])
122 | 
123 | model1.fit(tr_X1, tr_y, batch_size=100, nb_epoch=21,
124 |               verbose=1, validation_data=(te_X1, te_y), callbacks=[eachmodel]) #, callbacks=[best_model])
125 | 
126 | #score = model1.evaluate(te_X, te_y, show_accuracy=True, verbose=0)
127 | #print('Test score:', score[0])
128 | #print('Test accuracy:', score[1])
129 | 
130 | 


--------------------------------------------------------------------------------
/main_cnnlstm_2ch_wav_ipld_conv1d_keras_noILD_easy_noDP_fold4.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | sys.path.append('/user/HS103/yx0001/Downloads/Hat')
  3 | import pickle
  4 | import numpy as np
  5 | np.random.seed(1515)
  6 | import os
  7 | import config_2ch_raw_wav32ms_ipld as cfg
  8 | from Hat.preprocessing import reshape_3d_to_4d
  9 | import prepare_data_2ch_raw_ipd_ild_easy as pp_data
 10 | #from prepare_data import load_data
 11 | 
 12 | 
 13 | import keras
 14 | 
 15 | from keras.datasets import mnist, cifar10
 16 | from keras.models import Sequential, Graph
 17 | from keras.layers.core import Dense, Dropout, Activation, Flatten, Reshape
 18 | from keras.layers.convolutional import ZeroPadding2D, AveragePooling2D, Convolution2D,MaxPooling2D, Convolution1D
 19 | from keras.layers.pooling import MaxPooling1D
 20 | from keras.utils import np_utils
 21 | from keras.layers import Merge
 22 | from keras.regularizers import l1, l2, l1l2, activity_l2
 23 | from keras.constraints import nonneg
 24 | from keras.callbacks import ModelCheckpoint
 25 | from keras.layers.normalization import BatchNormalization
 26 | from keras.layers import LSTM, SimpleRNN, GRU, TimeDistributed, Bidirectional
 27 | import h5py
 28 | 
 29 | # resize data for fit into CNN. size: (batch_num*color_maps*height*weight)
 30 | def reshapeX( X ):
 31 |     N = len(X)
 32 |     return X.reshape( (N, 6, t_delay, feadim, 1) )
 33 | 
 34 | def reshapeX1( X ):
 35 |     N = len(X)
 36 |     return X.reshape( (N, t_delay, 1, feadim, 1) )
 37 | 
 38 | # resize data for fit into CNN. size: (batch_num*color_maps*height*weight)
 39 | def reshapeX2( X ):
 40 |     N = len(X)
 41 |     return X.reshape( (N, t_delay, feadim) )
 42 | 
 43 | feadim=512
 44 | t_delay=33
 45 | 
 46 | # hyper-params
 47 | fe_fd_left = cfg.dev_fe_mel_fd_left
 48 | fe_fd_right = cfg.dev_fe_mel_fd_right
 49 | fe_fd_mean = cfg.dev_fe_mel_fd_mean
 50 | fe_fd_diff = cfg.dev_fe_mel_fd_diff
 51 | fe_fd_ipd = cfg.dev_fe_mel_fd_ipd
 52 | fe_fd_ild = cfg.dev_fe_mel_fd_ild
 53 | 
 54 | #fe_fd_ori = cfg.dev_fe_mel_fd_ori
 55 | agg_num = 33        # concatenate frames
 56 | hop = 10            # step_len
 57 | n_hid = 1000
 58 | n_out = len( cfg.labels )
 59 | print n_out
 60 | fold =4       # can be 0, 1, 2, 3, 4
 61 | 
 62 | # prepare data
 63 | tr_X1, tr_X2, tr_y, te_X1, te_X2, te_y = pp_data.GetAllData_separate( fe_fd_right, fe_fd_left, fe_fd_mean, fe_fd_diff, fe_fd_ipd, fe_fd_ild, agg_num, hop, fold )
 64 | #[batch_num, inmap, n_time, n_freq] = tr_X1.shape
 65 | print tr_X1.shape
 66 | print tr_X2.shape
 67 | #sys.exit()
 68 | tr_X1, te_X1 = reshapeX1(tr_X1), reshapeX1(te_X1)
 69 | #tr_X2, te_X2 = reshapeX1(tr_X2), reshapeX1(te_X2)
 70 |     
 71 | print tr_X1.shape, tr_X2.shape, tr_y.shape
 72 | print te_X1.shape, te_X2.shape, te_y.shape
 73 | 
 74 | ###build model by keras
 75 | kernel_size=(400,1)
 76 | pool_size=(512-400+1,1)
 77 | #tpool_size=(8,1)
 78 | #fpool_size=(2,1)
 79 | 
 80 | model1 = Sequential()
 81 | model1.add(TimeDistributed(Convolution2D(128, 400, 1, border_mode='valid', bias=True, W_regularizer=l1l2(l1=0,l2=0)),input_shape=(t_delay,1,feadim, 1)))
 82 | #model1.add(TimeDistributed(BatchNormalization(mode=0)))
 83 | model1.add(Activation('relu'))
 84 | #model1.add(Dropout(0.1))
 85 | model1.add(TimeDistributed(MaxPooling2D(pool_size=pool_size)))
 86 | 
 87 | #model1.add(TimeDistributed(Convolution2D(128, 3, 1, subsample=(2, 1))))
 88 | #model1.add(Activation('relu'))
 89 | #model1.add(TimeDistributed(MaxPooling2D(pool_size=fpool_size)))
 90 | 
 91 | model1.add(TimeDistributed(Flatten()))
 92 | print model1.output_shape
 93 | 
 94 | #model1.add(Reshape((t_delay, 1, 257, 1), input_shape=(t_delay, 257)))
 95 | #print model1.output_shape
 96 | 
 97 | #model1.add(TimeDistributed(Convolution2D(128, 200, 1)))
 98 | #model1.add(Activation('relu'))
 99 | #model1.add(TimeDistributed(MaxPooling2D(pool_size=(257-200+1,1))))
100 | #print model1.output_shape
101 | 
102 | #model1.add(TimeDistributed(Flatten()))
103 | #print model1.output_shape
104 | 
105 | 
106 | model1.add(Bidirectional(GRU(output_dim=128,return_sequences=True,dropout_W=0.0, dropout_U=0.0)))
107 | #print model.output_shape
108 | model1.add(Bidirectional(GRU(output_dim=128,return_sequences=True,dropout_W=0.0, dropout_U=0.0))) # GRU is better than LSTM on refined set, not verified on raw set
109 | model1.add(Bidirectional(GRU(output_dim=128,return_sequences=False,dropout_W=0.0, dropout_U=0.0)))
110 | 
111 | model1.add(Dense(500))
112 | model1.add(Activation('relu'))
113 | model1.add(Dropout(0.2))
114 | 
115 | model1.add(Dense(n_out))
116 | model1.add(Activation('sigmoid'))
117 | 
118 | model1.summary()
119 | 
120 | #adam=keras.optimizers.Adam(lr=1e-4)
121 | model1.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
122 | 
123 | dump_fd=cfg.scrap_fd+'/Md/tcnn64fcnn128_keras_overlap50_32ms_noDP_noILD_fold4_weights.{epoch:02d}-{val_loss:.2f}.hdf5'
124 | 
125 | eachmodel=ModelCheckpoint(dump_fd,monitor='val_loss',verbose=0,save_best_only=False,save_weights_only=False,mode='auto')    
126 | 
127 | #model1.fit([tr_X1,tr_X2], tr_y, batch_size=100, nb_epoch=101,
128 | #              verbose=1, validation_data=([te_X1,te_X2], te_y), callbacks=[eachmodel]) #, callbacks=[best_model])
129 | 
130 | model1.fit(tr_X1, tr_y, batch_size=100, nb_epoch=31,
131 |               verbose=1, validation_data=(te_X1, te_y), callbacks=[eachmodel]) #, callbacks=[best_model])
132 | 
133 | #score = model1.evaluate(te_X, te_y, show_accuracy=True, verbose=0)
134 | #print('Test score:', score[0])
135 | #print('Test accuracy:', score[1])
136 | 
137 | 


--------------------------------------------------------------------------------
/main_cnnlstm_2ch_Spec_ipld_conv1d_keras_indIPD_easy_fold4.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | sys.path.append('/user/HS103/yx0001/Downloads/Hat')
  3 | import pickle
  4 | import numpy as np
  5 | np.random.seed(1515)
  6 | import os
  7 | import config_2ch_raw_spec_ipld as cfg
  8 | from Hat.preprocessing import reshape_3d_to_4d
  9 | import prepare_data_2ch_raw_ipd_ild_easy_Spec as pp_data
 10 | #from prepare_data import load_data
 11 | 
 12 | 
 13 | import keras
 14 | 
 15 | from keras.datasets import mnist, cifar10
 16 | from keras.models import Sequential, Graph
 17 | from keras.layers.core import Dense, Dropout, Activation, Flatten, Reshape
 18 | from keras.layers.convolutional import ZeroPadding2D, AveragePooling2D, Convolution2D,MaxPooling2D, Convolution1D,MaxPooling1D
 19 | from keras.utils import np_utils
 20 | from keras.layers import Merge, Input
 21 | from keras.regularizers import l1, l2, l1l2, activity_l2
 22 | from keras.constraints import nonneg
 23 | from keras.callbacks import ModelCheckpoint
 24 | from keras.layers.normalization import BatchNormalization
 25 | from keras.layers import LSTM, SimpleRNN, GRU, TimeDistributed, Bidirectional
 26 | import h5py
 27 | 
 28 | # resize data for fit into CNN. size: (batch_num*color_maps*height*weight)
 29 | def reshapeX( X ):
 30 |     N = len(X)
 31 |     return X.reshape( (N, 6, t_delay, feadim, 1) )
 32 | 
 33 | def reshapeX1( X ):
 34 |     N = len(X)
 35 |     return X.reshape( (N, t_delay, 1, feadim, 1) )
 36 | 
 37 | # resize data for fit into CNN. size: (batch_num*color_maps*height*weight)
 38 | def reshapeX2( X ):
 39 |     N = len(X)
 40 |     return X.reshape( (N, t_delay, feadim) )
 41 | 
 42 | feadim=257
 43 | t_delay=33
 44 | 
 45 | # hyper-params
 46 | fe_fd_left = cfg.dev_fe_mel_fd_left
 47 | fe_fd_right = cfg.dev_fe_mel_fd_right
 48 | fe_fd_mean = cfg.dev_fe_mel_fd_mean
 49 | fe_fd_diff = cfg.dev_fe_mel_fd_diff
 50 | fe_fd_ipd = cfg.dev_fe_mel_fd_ipd
 51 | fe_fd_ild = cfg.dev_fe_mel_fd_ild
 52 | 
 53 | #fe_fd_ori = cfg.dev_fe_mel_fd_ori
 54 | agg_num = 33        # concatenate frames
 55 | hop = 10            # step_len
 56 | n_hid = 1000
 57 | n_out = len( cfg.labels )
 58 | print n_out
 59 | fold =4         # can be 0, 1, 2, 3, 4
 60 | 
 61 | # prepare data
 62 | tr_X1, tr_X2, tr_y, te_X1, te_X2, te_y = pp_data.GetAllData_separate( fe_fd_right, fe_fd_left, fe_fd_mean, fe_fd_diff, fe_fd_ipd, fe_fd_ild, agg_num, hop, fold )
 63 | #[batch_num, inmap, n_time, n_freq] = tr_X1.shape
 64 | print tr_X1.shape
 65 | print tr_X2.shape
 66 | #sys.exit()
 67 | tr_X1, te_X1 = reshapeX1(tr_X1), reshapeX1(te_X1)
 68 | tr_X2, te_X2 = reshapeX1(tr_X2), reshapeX1(te_X2)
 69 |     
 70 | print tr_X1.shape, tr_X2.shape, tr_y.shape
 71 | print te_X1.shape, te_X2.shape, te_y.shape
 72 | 
 73 | ###build model by keras
 74 | kernel_size=(200,1)
 75 | pool_size=(257-200+1,1)
 76 | #pool_size=(3,1)
 77 | 
 78 | model1 = Sequential()
 79 | model1.add(TimeDistributed(Convolution2D(128, 200, 1, border_mode='valid', bias=True, W_regularizer=l1l2(l1=0,l2=0)),input_shape=(t_delay,1,feadim, 1)))
 80 | model1.add(Activation('relu'))
 81 | #model1.add(Dropout(0.1))
 82 | model1.add(TimeDistributed(MaxPooling2D(pool_size=pool_size)))
 83 | 
 84 | 
 85 | 
 86 | model1.add(TimeDistributed(Flatten()))
 87 | #model1.add(Reshape((t_delay, feadim),input_shape=(t_delay,feadim)))
 88 | print model1.output_shape
 89 | 
 90 | #model1.add(Reshape((t_delay, 1, 257, 1), input_shape=(t_delay, 257)))
 91 | #print model1.output_shape
 92 | 
 93 | #model1.add(TimeDistributed(Convolution2D(128, 200, 1)))
 94 | #model1.add(Activation('relu'))
 95 | #model1.add(TimeDistributed(MaxPooling2D(pool_size=(257-200+1,1))))
 96 | #print model1.output_shape
 97 | 
 98 | #model1.add(TimeDistributed(Flatten()))
 99 | #print model1.output_shape
100 | 
101 | model2= Sequential()
102 | model2.add(TimeDistributed(Convolution2D(128, 200, 1, border_mode='valid', bias=True, W_regularizer=l1l2(l1=0,l2=0)),input_shape=(t_delay,1,feadim, 1)))
103 | model2.add(Activation('relu'))
104 | model2.add(TimeDistributed(MaxPooling2D(pool_size=pool_size)))
105 | model2.add(TimeDistributed(Flatten()))
106 | #model2.add(TimeDistributed(Dense(40),input_shape=(t_delay,40)))
107 | merged = Merge([model1, model2], mode='concat')
108 | model12=Sequential()
109 | model12.add(merged)
110 | 
111 | 
112 | model12.add(Bidirectional(GRU(output_dim=128,return_sequences=True,dropout_W=0.0, dropout_U=0.0)))
113 | #print model.output_shape
114 | model12.add(Bidirectional(GRU(output_dim=128,return_sequences=True,dropout_W=0.0, dropout_U=0.0))) # GRU is better than LSTM on refined set, not verified on raw set
115 | model12.add(Bidirectional(GRU(output_dim=128,return_sequences=False,dropout_W=0.0, dropout_U=0.0)))
116 | 
117 | model12.add(Dense(500))
118 | model12.add(Activation('relu'))
119 | model12.add(Dropout(0.2))
120 | 
121 | model12.add(Dense(n_out))
122 | model12.add(Activation('sigmoid'))
123 | 
124 | model12.summary()
125 | 
126 | #adam=keras.optimizers.Adam(lr=1e-4)
127 | model12.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
128 | 
129 | dump_fd=cfg.scrap_fd+'/Md/cnn_keras_overlap50_CNN128onSpec_fold4_CNN128onILD257_weights.{epoch:02d}-{val_loss:.2f}.hdf5'
130 | 
131 | eachmodel=ModelCheckpoint(dump_fd,monitor='val_loss',verbose=0,save_best_only=False,save_weights_only=False,mode='auto')    
132 | 
133 | model12.fit([tr_X1,tr_X2], tr_y, batch_size=100, nb_epoch=21,
134 |               verbose=1, validation_data=([te_X1,te_X2], te_y), callbacks=[eachmodel]) #, callbacks=[best_model])
135 | 
136 | #model1.fit(tr_X1, tr_y, batch_size=100, nb_epoch=101,
137 | #              verbose=1, validation_data=(te_X1, te_y), callbacks=[eachmodel]) #, callbacks=[best_model])
138 | 
139 | #score = model12.evaluate(te_X, te_y, show_accuracy=True, verbose=0)
140 | #print('Test score:', score[0])
141 | #print('Test accuracy:', score[1])
142 | 
143 | 


--------------------------------------------------------------------------------
/main_cnnlstm_2ch_MBK_ipld_conv1d_keras_indIPD_easy_fold4.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | sys.path.append('../Hat')
  3 | import pickle
  4 | import numpy as np
  5 | np.random.seed(1515)
  6 | import os
  7 | import config_2ch_raw_mbk_ipld as cfg
  8 | from Hat.preprocessing import reshape_3d_to_4d
  9 | import prepare_data_2ch_raw_ipd_ild_easy as pp_data
 10 | #from prepare_data import load_data
 11 | 
 12 | 
 13 | import keras
 14 | 
 15 | from keras.datasets import mnist, cifar10
 16 | from keras.models import Sequential, Graph
 17 | from keras.layers.core import Dense, Dropout, Activation, Flatten, Reshape
 18 | from keras.layers.convolutional import ZeroPadding2D, AveragePooling2D, Convolution2D,MaxPooling2D, Convolution1D,MaxPooling1D
 19 | from keras.utils import np_utils
 20 | from keras.layers import Merge, Input
 21 | from keras.regularizers import l1, l2, l1l2, activity_l2
 22 | from keras.constraints import nonneg
 23 | from keras.callbacks import ModelCheckpoint
 24 | from keras.layers.normalization import BatchNormalization
 25 | from keras.layers import LSTM, SimpleRNN, GRU, TimeDistributed, Bidirectional
 26 | import h5py
 27 | 
 28 | # resize data for fit into CNN. size: (batch_num*color_maps*height*weight)
 29 | def reshapeX( X ):
 30 |     N = len(X)
 31 |     return X.reshape( (N, 6, t_delay, feadim, 1) )
 32 | 
 33 | def reshapeX1( X ):
 34 |     N = len(X)
 35 |     return X.reshape( (N, t_delay, 1, feadim, 1) )
 36 | 
 37 | # resize data for fit into CNN. size: (batch_num*color_maps*height*weight)
 38 | def reshapeX2( X ):
 39 |     N = len(X)
 40 |     return X.reshape( (N, t_delay, feadim) )
 41 | 
 42 | def reshapeX3( X ):
 43 |     N = len(X)
 44 |     return X.reshape( (N, t_delay, 1, 257, 1) )
 45 | 
 46 | feadim=40
 47 | t_delay=33
 48 | 
 49 | # hyper-params
 50 | fe_fd_left = cfg.dev_fe_mel_fd_left
 51 | fe_fd_right = cfg.dev_fe_mel_fd_right
 52 | fe_fd_mean = cfg.dev_fe_mel_fd_mean
 53 | fe_fd_diff = cfg.dev_fe_mel_fd_diff
 54 | fe_fd_ipd = cfg.dev_fe_mel_fd_ipd
 55 | fe_fd_ild = cfg.dev_fe_mel_fd_ild
 56 | 
 57 | #fe_fd_ori = cfg.dev_fe_mel_fd_ori
 58 | agg_num = 33        # concatenate frames
 59 | hop = 10            # step_len
 60 | n_hid = 1000
 61 | n_out = len( cfg.labels )
 62 | print n_out
 63 | fold = 4        # can be 0, 1, 2, 3, 4
 64 | 
 65 | # prepare data
 66 | tr_X1, tr_X2, tr_y, te_X1, te_X2, te_y = pp_data.GetAllData_separate( fe_fd_right, fe_fd_left, fe_fd_mean, fe_fd_diff, fe_fd_ipd, fe_fd_ild, agg_num, hop, fold )
 67 | #[batch_num, inmap, n_time, n_freq] = tr_X1.shape
 68 | print tr_X1.shape
 69 | print tr_X2.shape
 70 | #sys.exit()
 71 | tr_X1, te_X1 = reshapeX1(tr_X1), reshapeX1(te_X1)
 72 | tr_X2, te_X2 = reshapeX3(tr_X2), reshapeX3(te_X2)
 73 |     
 74 | print tr_X1.shape, tr_X2.shape, tr_y.shape
 75 | print te_X1.shape, te_X2.shape, te_y.shape
 76 | 
 77 | ###build model by keras
 78 | kernel_size=(30,1)
 79 | pool_size=(40-30+1,1)
 80 | #pool_size=(3,1)
 81 | 
 82 | model1 = Sequential()
 83 | model1.add(TimeDistributed(Convolution2D(128, 30, 1, border_mode='valid', bias=True, W_regularizer=l1l2(l1=0,l2=0)),input_shape=(t_delay,1,feadim, 1)))
 84 | model1.add(Activation('relu'))
 85 | #model1.add(Dropout(0.1))
 86 | model1.add(TimeDistributed(MaxPooling2D(pool_size=pool_size)))
 87 | 
 88 | model1.add(TimeDistributed(Flatten()))
 89 | #model1.add(Reshape((t_delay, feadim),input_shape=(t_delay,feadim)))
 90 | print model1.output_shape
 91 | 
 92 | #model1.add(Reshape((t_delay, 1, 257, 1), input_shape=(t_delay, 257)))
 93 | #print model1.output_shape
 94 | 
 95 | #model1.add(TimeDistributed(Convolution2D(128, 200, 1)))
 96 | #model1.add(Activation('relu'))
 97 | #model1.add(TimeDistributed(MaxPooling2D(pool_size=(257-200+1,1))))
 98 | #print model1.output_shape
 99 | 
100 | #model1.add(TimeDistributed(Flatten()))
101 | #print model1.output_shape
102 | 
103 | model2= Sequential()
104 | model2= Sequential()
105 | model2.add(TimeDistributed(Convolution2D(128, 200, 1, border_mode='valid', bias=True, W_regularizer=l1l2(l1=0,l2=0)),input_shape=(t_delay,1,257, 1)))
106 | model2.add(Activation('relu'))
107 | model2.add(TimeDistributed(MaxPooling2D(pool_size=(257-200+1,1))))
108 | model2.add(TimeDistributed(Flatten()))
109 | #model2.add(TimeDistributed(Dense(40),input_shape=(t_delay,40)))
110 | merged = Merge([model1, model2], mode='concat')
111 | model12=Sequential()
112 | model12.add(merged)
113 | 
114 | 
115 | model12.add(Bidirectional(GRU(output_dim=128,return_sequences=True,dropout_W=0.0, dropout_U=0.0)))
116 | #print model.output_shape
117 | model12.add(Bidirectional(GRU(output_dim=128,return_sequences=True,dropout_W=0.0, dropout_U=0.0))) # GRU is better than LSTM on refined set, not verified on raw set
118 | model12.add(Bidirectional(GRU(output_dim=128,return_sequences=False,dropout_W=0.0, dropout_U=0.0)))
119 | 
120 | model12.add(Dense(500))
121 | model12.add(Activation('relu'))
122 | model12.add(Dropout(0.2))
123 | 
124 | model12.add(Dense(n_out))
125 | model12.add(Activation('sigmoid'))
126 | 
127 | model12.summary()
128 | 
129 | #adam=keras.optimizers.Adam(lr=1e-4)
130 | model12.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
131 | 
132 | dump_fd=cfg.scrap_fd+'/Md/cnn_keras_overlap50_cnn128onMBK_fold4_CNN128onILD257_weights.{epoch:02d}-{val_loss:.2f}.hdf5'
133 | 
134 | eachmodel=ModelCheckpoint(dump_fd,monitor='val_loss',verbose=0,save_best_only=False,save_weights_only=False,mode='auto')    
135 | 
136 | model12.fit([tr_X1,tr_X2], tr_y, batch_size=100, nb_epoch=21,
137 |               verbose=1, validation_data=([te_X1,te_X2], te_y), callbacks=[eachmodel]) #, callbacks=[best_model])
138 | 
139 | #model1.fit(tr_X1, tr_y, batch_size=100, nb_epoch=101,
140 | #              verbose=1, validation_data=(te_X1, te_y), callbacks=[eachmodel]) #, callbacks=[best_model])
141 | 
142 | #score = model12.evaluate(te_X, te_y, show_accuracy=True, verbose=0)
143 | #print('Test score:', score[0])
144 | #print('Test accuracy:', score[1])
145 | 
146 | 


--------------------------------------------------------------------------------
/main_cnnlstm_2ch_wav_ipld_conv1d_keras_indIPD_easy_fold4.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | sys.path.append('/user/HS103/yx0001/Downloads/Hat')
  3 | import pickle
  4 | import numpy as np
  5 | np.random.seed(1515)
  6 | import os
  7 | import config_2ch_raw_wav32ms_ipld as cfg
  8 | from Hat.preprocessing import reshape_3d_to_4d
  9 | import prepare_data_2ch_raw_ipd_ild_easy as pp_data
 10 | #from prepare_data import load_data
 11 | 
 12 | 
 13 | import keras
 14 | 
 15 | from keras.datasets import mnist, cifar10
 16 | from keras.models import Sequential, Graph
 17 | from keras.layers.core import Dense, Dropout, Activation, Flatten, Reshape
 18 | from keras.layers.convolutional import ZeroPadding2D, AveragePooling2D, Convolution2D,MaxPooling2D, Convolution1D,MaxPooling1D
 19 | from keras.utils import np_utils
 20 | from keras.layers import Merge
 21 | from keras.regularizers import l1, l2, l1l2, activity_l2
 22 | from keras.constraints import nonneg
 23 | from keras.callbacks import ModelCheckpoint
 24 | from keras.layers.normalization import BatchNormalization
 25 | from keras.layers import LSTM, SimpleRNN, GRU, TimeDistributed, Bidirectional
 26 | import h5py
 27 | 
 28 | # resize data for fit into CNN. size: (batch_num*color_maps*height*weight)
 29 | def reshapeX( X ):
 30 |     N = len(X)
 31 |     return X.reshape( (N, 6, t_delay, feadim, 1) )
 32 | 
 33 | def reshapeX1( X ):
 34 |     N = len(X)
 35 |     return X.reshape( (N, t_delay, 1, feadim, 1) )
 36 | 
 37 | # resize data for fit into CNN. size: (batch_num*color_maps*height*weight)
 38 | def reshapeX2( X ):
 39 |     N = len(X)
 40 |     return X.reshape( (N, t_delay, feadim) )
 41 | 
 42 | def reshapeX3( X ):
 43 |     N = len(X)
 44 |     return X.reshape( (N, t_delay, 1, 257, 1) )
 45 | 
 46 | feadim=512
 47 | t_delay=33
 48 | 
 49 | # hyper-params
 50 | fe_fd_left = cfg.dev_fe_mel_fd_left
 51 | fe_fd_right = cfg.dev_fe_mel_fd_right
 52 | fe_fd_mean = cfg.dev_fe_mel_fd_mean
 53 | fe_fd_diff = cfg.dev_fe_mel_fd_diff
 54 | fe_fd_ipd = cfg.dev_fe_mel_fd_ipd
 55 | fe_fd_ild = cfg.dev_fe_mel_fd_ild
 56 | 
 57 | #fe_fd_ori = cfg.dev_fe_mel_fd_ori
 58 | agg_num = 33        # concatenate frames
 59 | hop = 10            # step_len
 60 | n_hid = 1000
 61 | n_out = len( cfg.labels )
 62 | print n_out
 63 | fold = 4      # can be 0, 1, 2, 3, 4
 64 | 
 65 | # prepare data
 66 | tr_X1, tr_X2, tr_y, te_X1, te_X2, te_y = pp_data.GetAllData_separate( fe_fd_right, fe_fd_left, fe_fd_mean, fe_fd_diff, fe_fd_ipd, fe_fd_ild, agg_num, hop, fold )
 67 | #[batch_num, inmap, n_time, n_freq] = tr_X1.shape
 68 | print tr_X1.shape
 69 | print tr_X2.shape
 70 | #sys.exit()
 71 | tr_X1, te_X1 = reshapeX1(tr_X1), reshapeX1(te_X1)
 72 | tr_X2, te_X2 = reshapeX3(tr_X2), reshapeX3(te_X2)
 73 |     
 74 | print tr_X1.shape, tr_X2.shape, tr_y.shape
 75 | print te_X1.shape, te_X2.shape, te_y.shape
 76 | 
 77 | ###build model by keras
 78 | kernel_size=(400,1)
 79 | pool_size=(512-400+1,1)
 80 | #pool_size=(3,1)
 81 | 
 82 | model1 = Sequential()
 83 | model1.add(TimeDistributed(Convolution2D(128, 400, 1, border_mode='valid', bias=True, W_regularizer=l1l2(l1=0,l2=0)),input_shape=(t_delay,1,feadim, 1)))
 84 | model1.add(Activation('relu'))
 85 | #model1.add(Dropout(0.1))
 86 | model1.add(TimeDistributed(MaxPooling2D(pool_size=pool_size)))
 87 | 
 88 | #model1.add(TimeDistributed(Convolution2D(128, 4, 1)))
 89 | #model1.add(Activation('relu'))
 90 | #model1.add(TimeDistributed(MaxPooling2D(pool_size=pool_size)))
 91 | 
 92 | 
 93 | model1.add(TimeDistributed(Flatten()))
 94 | print model1.output_shape
 95 | 
 96 | #model1.add(Reshape((t_delay, 1, 257, 1), input_shape=(t_delay, 257)))
 97 | #print model1.output_shape
 98 | 
 99 | #model1.add(TimeDistributed(Convolution2D(128, 200, 1)))
100 | #model1.add(Activation('relu'))
101 | #model1.add(TimeDistributed(MaxPooling2D(pool_size=(257-200+1,1))))
102 | #print model1.output_shape
103 | 
104 | #model1.add(TimeDistributed(Flatten()))
105 | #print model1.output_shape
106 | 
107 | model2= Sequential()
108 | model2= Sequential()
109 | model2.add(TimeDistributed(Convolution2D(128, 200, 1, border_mode='valid', bias=True, W_regularizer=l1l2(l1=0,l2=0)),input_shape=(t_delay,1,257, 1)))
110 | model2.add(Activation('relu'))
111 | model2.add(TimeDistributed(MaxPooling2D(pool_size=(257-200+1,1))))
112 | model2.add(TimeDistributed(Flatten()))
113 | #model2.add(TimeDistributed(Dense(40),input_shape=(t_delay,40)))
114 | merged = Merge([model1, model2], mode='concat')
115 | model12=Sequential()
116 | model12.add(merged)
117 | 
118 | 
119 | model12.add(Bidirectional(GRU(output_dim=128,return_sequences=True,dropout_W=0.0, dropout_U=0.0)))
120 | #print model.output_shape
121 | model12.add(Bidirectional(GRU(output_dim=128,return_sequences=True,dropout_W=0.0, dropout_U=0.0))) # GRU is better than LSTM on refined set, not verified on raw set
122 | model12.add(Bidirectional(GRU(output_dim=128,return_sequences=False,dropout_W=0.0, dropout_U=0.0)))
123 | 
124 | model12.add(Dense(500))
125 | model12.add(Activation('relu'))
126 | model12.add(Dropout(0.2))
127 | 
128 | model12.add(Dense(n_out))
129 | model12.add(Activation('sigmoid'))
130 | 
131 | model12.summary()
132 | 
133 | #adam=keras.optimizers.Adam(lr=1e-4)
134 | model12.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
135 | 
136 | dump_fd=cfg.scrap_fd+'/Md/cnn_keras_overlap50_cnn128onRAW_fold4_CNN128onILD257_fold1_weights.{epoch:02d}-{val_loss:.2f}.hdf5'
137 | 
138 | eachmodel=ModelCheckpoint(dump_fd,monitor='val_loss',verbose=0,save_best_only=False,save_weights_only=False,mode='auto')    
139 | 
140 | model12.fit([tr_X1,tr_X2], tr_y, batch_size=100, nb_epoch=21,
141 |               verbose=1, validation_data=([te_X1,te_X2], te_y), callbacks=[eachmodel]) #, callbacks=[best_model])
142 | 
143 | #model1.fit(tr_X1, tr_y, batch_size=100, nb_epoch=101,
144 | #              verbose=1, validation_data=(te_X1, te_y), callbacks=[eachmodel]) #, callbacks=[best_model])
145 | 
146 | #score = model12.evaluate(te_X, te_y, show_accuracy=True, verbose=0)
147 | #print('Test score:', score[0])
148 | #print('Test accuracy:', score[1])
149 | 
150 | 


--------------------------------------------------------------------------------
/recognize_cnnlstm_2ch_spec_keras_indIPD_easy.py:
--------------------------------------------------------------------------------
  1 |  
  2 | import sys
  3 | sys.path.append('/user/HS103/yx0001/Downloads/Hat')
  4 | import pickle
  5 | import numpy as np
  6 | np.random.seed(1515)
  7 | import scipy.stats
  8 | 
  9 | import keras
 10 | from keras.models import load_model
 11 | 
 12 | from keras import backend as K
 13 | 
 14 | import config_2ch_raw_spec_ipld_eva as cfg
 15 | import prepare_data_2ch_raw_ipd_ild_easy_Spec as pp_data
 16 | import csv
 17 | from Hat.preprocessing import reshape_3d_to_4d
 18 | from Hat.preprocessing import sparse_to_categorical, mat_2d_to_3d
 19 | from Hat.preprocessing import sparse_to_categorical, mat_2d_to_3d, reshape_3d_to_4d, mat_concate_multiinmaps6in
 20 | from Hat.metrics import prec_recall_fvalue
 21 | import cPickle
 22 | import eer
 23 | import matplotlib.pyplot as plt
 24 | #from main_cnn import fe_fd, agg_num, hop, n_hid, fold
 25 | np.set_printoptions(threshold=np.nan, linewidth=1000, precision=2, suppress=True)
 26 | 
 27 | 
 28 | # resize data for fit into CNN. size: (batch_num*color_maps*height*weight)
 29 | def reshapeX( X ):
 30 |     N = len(X)
 31 |     return X.reshape( (N, 6, t_delay, feadim, 1) )
 32 | 
 33 | # resize data for fit into CNN. size: (batch_num*color_maps*height*weight)
 34 | def reshapeX2( X ):
 35 |     N = len(X)
 36 |     return X.reshape( (N, t_delay, feadim) )
 37 | 
 38 | def reshapeX1( X ):
 39 |     N = len(X)
 40 |     return X.reshape( (N, t_delay, 1, feadim, 1) )
 41 | 
 42 | feadim=257
 43 | t_delay=33
 44 | 
 45 | 
 46 | debug=0
 47 | # hyper-params
 48 | n_labels = len( cfg.labels )
 49 | fe_fd_left = cfg.dev_fe_mel_fd_left
 50 | fe_fd_right = cfg.dev_fe_mel_fd_right
 51 | fe_fd_mean = cfg.dev_fe_mel_fd_mean
 52 | fe_fd_diff = cfg.dev_fe_mel_fd_diff
 53 | fe_fd_ipd = cfg.dev_fe_mel_fd_ipd
 54 | fe_fd_ild = cfg.dev_fe_mel_fd_ild
 55 | #fe_fd_ori = cfg.dev_fe_mel_fd_ori
 56 | agg_num = 33        # concatenate frames
 57 | hop = 1          # step_len
 58 | n_hid = 1000
 59 | fold = 9      # can be 0, 1, 2, 3
 60 | 
 61 | # load model
 62 | # load model
 63 | #md = serializations.load( cfg.scrap_fd + '/Md/md20.p' )
 64 | md=load_model('/vol/vssp/msos/yx/chime_home/DCASE2016_task4_scrap_2ch_spec_ipd_ild_overlap/Md/cnn_keras_overlap50_CNN128onSpec_eva816_CNN128onILD257_weights.13-0.49.hdf5')
 65 | #md.summary()
 66 | 
 67 | def recognize():
 68 |     ## prepare data
 69 |     #_, _, te_X, te_y = pp_data.GetAllData(fe_fd_right, fe_fd_left, fe_fd_mean, fe_fd_diff, agg_num, hop, fold )
 70 |     ##te_X = reshapeX(te_X)
 71 |     #print te_X.shape
 72 |     
 73 |     # do recognize and evaluation
 74 |     thres = 0.4     # thres, tune to prec=recall, if smaller, make prec smaller
 75 |     n_labels = len( cfg.labels )
 76 |     
 77 |     gt_roll = []
 78 |     pred_roll = []
 79 |     result_roll = []
 80 |     y_true_binary_c = []
 81 |     y_true_file_c = []
 82 |     y_true_binary_m = []
 83 |     y_true_file_m = []
 84 |     y_true_binary_f = []
 85 |     y_true_file_f = []
 86 |     y_true_binary_v = []
 87 |     y_true_file_v = []
 88 |     y_true_binary_p = []
 89 |     y_true_file_p = []
 90 |     y_true_binary_b = []
 91 |     y_true_file_b = []
 92 |     y_true_binary_o = []
 93 |     y_true_file_o = []
 94 |     with open( cfg.dev_cv_csv_path, 'rb') as f:
 95 |         reader = csv.reader(f)
 96 |         lis = list(reader)
 97 |     
 98 |         # read one line
 99 |         for li in lis:
100 |             na = li[1]
101 |             curr_fold = int(li[2])
102 |             
103 |             if fold==curr_fold:
104 |                 # get features, tags
105 |                 fe_path_left = fe_fd_left + '/' + na + '.f'
106 |                 fe_path_right = fe_fd_right + '/' + na + '.f'
107 |                 fe_path_mean = fe_fd_mean + '/' + na + '.f'
108 |                 fe_path_diff = fe_fd_diff + '/' + na + '.f'
109 |                 fe_path_ipd = fe_fd_ipd + '/' + na + '.f'
110 |                 fe_path_ild = fe_fd_ild + '/' + na + '.f'
111 |                 #fe_path_ori = fe_fd_ori + '/' + na + '.f'
112 |                 info_path = cfg.dev_wav_fd + '/' + na + '.csv'
113 |                 #print na
114 |                 tags = pp_data.GetTags( info_path )
115 |                 #print tags
116 |                 y = pp_data.TagsToCategory( tags )
117 |                 #print y
118 |                 #sys.exit()
119 |                 #X_l = cPickle.load( open( fe_path_left, 'rb' ) )
120 |                 #X_r = cPickle.load( open( fe_path_right, 'rb' ) )
121 |                 X_m = cPickle.load( open( fe_path_mean, 'rb' ) )
122 |                 #X_d = cPickle.load( open( fe_path_diff, 'rb' ) )
123 |                 #X_ipd = cPickle.load( open( fe_path_ipd, 'rb' ) )
124 |                 X_ild = cPickle.load( open( fe_path_ild, 'rb' ) )
125 |                 #X_o = cPickle.load( open( fe_path_ori, 'rb' ) )
126 | 
127 |                 # aggregate data
128 |                 #X3d_l = mat_2d_to_3d( X_l, agg_num, hop )
129 |                 #X3d_r = mat_2d_to_3d( X_r, agg_num, hop )
130 |                 X3d_m = mat_2d_to_3d( X_m, agg_num, hop )
131 |    		#X3d_d = mat_2d_to_3d( X_d, agg_num, hop )
132 |                 #X3d_ipd = mat_2d_to_3d( X_ipd, agg_num, hop )
133 |    		X3d_ild = mat_2d_to_3d( X_ild, agg_num, hop )
134 |    		#X3d_o = mat_2d_to_3d( X_o, agg_num, hop )
135 |      	        ## reshape 3d to 4d
136 |        	        #X4d_l = reshape_3d_to_4d( X3d_l)
137 |                 #X4d_r = reshape_3d_to_4d( X3d_r)
138 |                 #X4d_m = reshape_3d_to_4d( X3d_m)
139 |                 #X4d_d = reshape_3d_to_4d( X3d_d)
140 |                 # concatenate
141 |                 #X4d=mat_concate_multiinmaps6in(X3d_l, X3d_r, X3d_m, X3d_d, X3d_ipd, X3d_ild)
142 |                 #X3d_m=reshapeX1(X3d_m)
143 |                 #X4d=np.swapaxes(X4d,1,2) # or np.transpose(x,(1,0,2))  1,0,2 is axis
144 |                 te_X1=X3d_m
145 |                 te_X2=X3d_ild
146 |                 te_X1 = reshapeX1(te_X1)
147 |                 te_X2 = reshapeX1(te_X2)
148 |                 
149 |                 if debug:
150 |                     # with a Sequential model
151 |                     #md.summary()
152 |                     print na
153 |                     get_3rd_layer_output = K.function([md.layers[0].input, K.learning_phase()], [md.layers[4].output])
154 |                     layer_output = get_3rd_layer_output([te_X1, 0])[0]
155 |                     print layer_output.shape
156 |                     #layer_output1=layer_output[5,:,:]
157 |                     layer_output1=layer_output[:,16,:]
158 |                     imgplot=plt.matshow((layer_output1.T))
159 |                     #imgplot.set_cmap('spectral')
160 |                     plt.colorbar()
161 |                     plt.show()
162 |                     sys.pause()
163 |                 
164 |                 p_y_pred = md.predict( [te_X1,te_X2] )
165 |                 #p_y_pred = md.predict( te_X1 )
166 |                 p_y_pred = np.mean( p_y_pred, axis=0 )     # shape:(n_label)
167 |                 pred = np.zeros(n_labels)
168 |                 pred[ np.where(p_y_pred>thres) ] = 1
169 |                 ind=0
170 |                 for la in cfg.labels:
171 |                     if la=='S':
172 |                         break
173 |                     elif la=='c':
174 |                         y_true_file_c.append(na)
175 |                         y_true_binary_c.append(y[ind])
176 |                     elif la=='m':
177 |                         y_true_file_m.append(na)
178 |                         y_true_binary_m.append(y[ind])
179 |                     elif la=='f':
180 |                         y_true_file_f.append(na)
181 |                         y_true_binary_f.append(y[ind])
182 |                     elif la=='v':
183 |                         y_true_file_v.append(na)
184 |                         y_true_binary_v.append(y[ind])
185 |                     elif la=='p':
186 |                         y_true_file_p.append(na)
187 |                         y_true_binary_p.append(y[ind])
188 |                     elif la=='b':
189 |                         y_true_file_b.append(na)
190 |                         y_true_binary_b.append(y[ind])
191 |                     elif la=='o':
192 |                         y_true_file_o.append(na)
193 |                         y_true_binary_o.append(y[ind])
194 |                     result=[na,la,p_y_pred[ind]]
195 |                     result_roll.append(result)
196 |                     ind=ind+1
197 |                 
198 |                 
199 |                 pred_roll.append( pred )
200 |                 gt_roll.append( y )
201 |     
202 |     pred_roll = np.array( pred_roll )
203 |     gt_roll = np.array( gt_roll )
204 |     #write csv for EER computation
205 |     csvfile=file('result.csv','wb')
206 |     writer=csv.writer(csvfile)
207 |     #writer.writerow(['fn','label','score'])
208 |     writer.writerows(result_roll)
209 |     csvfile.close()
210 |     
211 |     # calculate prec, recall, fvalue
212 |     prec, recall, fvalue = prec_recall_fvalue( pred_roll, gt_roll, thres )
213 |     # EER for each tag : [ 'c', 'm', 'f', 'v', 'p', 'b', 'o', 'S' ]
214 |     EER_c=eer.compute_eer('result.csv', 'c', dict(zip(y_true_file_c, y_true_binary_c)))
215 |     EER_m=eer.compute_eer('result.csv', 'm', dict(zip(y_true_file_m, y_true_binary_m)))
216 |     EER_f=eer.compute_eer('result.csv', 'f', dict(zip(y_true_file_f, y_true_binary_f)))
217 |     EER_v=eer.compute_eer('result.csv', 'v', dict(zip(y_true_file_v, y_true_binary_v)))
218 |     EER_p=eer.compute_eer('result.csv', 'p', dict(zip(y_true_file_p, y_true_binary_p)))
219 |     EER_b=eer.compute_eer('result.csv', 'b', dict(zip(y_true_file_b, y_true_binary_b)))
220 |     EER_o=eer.compute_eer('result.csv', 'o', dict(zip(y_true_file_o, y_true_binary_o)))
221 |     EER=(EER_c+EER_m+EER_v+EER_p+EER_f+EER_b+EER_o)/7.0
222 |     print prec, recall, fvalue
223 |     print EER_c,EER_m,EER_f,EER_v,EER_p,EER_b,EER_o
224 |     print EER
225 | 
226 | if __name__ == '__main__':
227 |     recognize()
228 | 


--------------------------------------------------------------------------------
/recognize_cnnlstm_2ch_mbk_keras_noILD_easy.py:
--------------------------------------------------------------------------------
  1 |  
  2 | import sys
  3 | sys.path.append('/user/HS103/yx0001/Downloads/Hat')
  4 | import pickle
  5 | import numpy as np
  6 | np.random.seed(1515)
  7 | import scipy.stats
  8 | 
  9 | import keras
 10 | from keras.models import load_model
 11 | 
 12 | from keras import backend as K
 13 | 
 14 | import config_2ch_raw_mbk_ipld_eva as cfg
 15 | import prepare_data_2ch_raw_ipd_ild_easy as pp_data
 16 | import csv
 17 | from Hat.preprocessing import reshape_3d_to_4d
 18 | from Hat.preprocessing import sparse_to_categorical, mat_2d_to_3d
 19 | from Hat.preprocessing import sparse_to_categorical, mat_2d_to_3d, reshape_3d_to_4d, mat_concate_multiinmaps6in
 20 | from Hat.metrics import prec_recall_fvalue
 21 | import cPickle
 22 | import eer
 23 | import matplotlib.pyplot as plt
 24 | #from main_cnn import fe_fd, agg_num, hop, n_hid, fold
 25 | np.set_printoptions(threshold=np.nan, linewidth=1000, precision=2, suppress=True)
 26 | 
 27 | 
 28 | # resize data for fit into CNN. size: (batch_num*color_maps*height*weight)
 29 | def reshapeX( X ):
 30 |     N = len(X)
 31 |     return X.reshape( (N, 6, t_delay, feadim, 1) )
 32 | 
 33 | # resize data for fit into CNN. size: (batch_num*color_maps*height*weight)
 34 | def reshapeX2( X ):
 35 |     N = len(X)
 36 |     return X.reshape( (N, t_delay, feadim) )
 37 | 
 38 | def reshapeX1( X ):
 39 |     N = len(X)
 40 |     return X.reshape( (N, t_delay, 1, feadim, 1) )
 41 | 
 42 | 
 43 | def reshapeX3( X ):
 44 |     N = len(X)
 45 |     return X.reshape( (N, t_delay*feadim) )
 46 | 
 47 | feadim=40
 48 | t_delay=33
 49 | 
 50 | 
 51 | debug=0
 52 | # hyper-params
 53 | n_labels = len( cfg.labels )
 54 | fe_fd_left = cfg.dev_fe_mel_fd_left
 55 | fe_fd_right = cfg.dev_fe_mel_fd_right
 56 | fe_fd_mean = cfg.dev_fe_mel_fd_mean
 57 | fe_fd_diff = cfg.dev_fe_mel_fd_diff
 58 | fe_fd_ipd = cfg.dev_fe_mel_fd_ipd
 59 | fe_fd_ild = cfg.dev_fe_mel_fd_ild
 60 | #fe_fd_ori = cfg.dev_fe_mel_fd_ori
 61 | agg_num = 33        # concatenate frames
 62 | hop = 1          # step_len
 63 | n_hid = 1000
 64 | fold = 9      # can be 0, 1, 2, 3
 65 | 
 66 | # load model
 67 | # load model
 68 | #md = serializations.load( cfg.scrap_fd + '/Md/md20.p' )
 69 | md=load_model('/vol/vssp/msos/yx/chime_home/DCASE2016_task4_scrap_2ch_mbk_ipd_ild_overlap/Md/cnn_keras_overlap50_eva816_1CNN128onMBK40_noILD_weights.19-0.56.hdf5')
 70 | #md.summary()
 71 | 
 72 | def recognize():
 73 |     ## prepare data
 74 |     #_, _, te_X, te_y = pp_data.GetAllData(fe_fd_right, fe_fd_left, fe_fd_mean, fe_fd_diff, agg_num, hop, fold )
 75 |     ##te_X = reshapeX(te_X)
 76 |     #print te_X.shape
 77 |     
 78 |     # do recognize and evaluation
 79 |     thres = 0.4     # thres, tune to prec=recall, if smaller, make prec smaller
 80 |     n_labels = len( cfg.labels )
 81 |     
 82 |     gt_roll = []
 83 |     pred_roll = []
 84 |     result_roll = []
 85 |     y_true_binary_c = []
 86 |     y_true_file_c = []
 87 |     y_true_binary_m = []
 88 |     y_true_file_m = []
 89 |     y_true_binary_f = []
 90 |     y_true_file_f = []
 91 |     y_true_binary_v = []
 92 |     y_true_file_v = []
 93 |     y_true_binary_p = []
 94 |     y_true_file_p = []
 95 |     y_true_binary_b = []
 96 |     y_true_file_b = []
 97 |     y_true_binary_o = []
 98 |     y_true_file_o = []
 99 |     with open( cfg.dev_cv_csv_path, 'rb') as f:
100 |         reader = csv.reader(f)
101 |         lis = list(reader)
102 |     
103 |         # read one line
104 |         for li in lis:
105 |             na = li[1]
106 |             curr_fold = int(li[2])
107 |             
108 |             if fold==curr_fold:
109 |                 # get features, tags
110 |                 fe_path_left = fe_fd_left + '/' + na + '.f'
111 |                 fe_path_right = fe_fd_right + '/' + na + '.f'
112 |                 fe_path_mean = fe_fd_mean + '/' + na + '.f'
113 |                 fe_path_diff = fe_fd_diff + '/' + na + '.f'
114 |                 fe_path_ipd = fe_fd_ipd + '/' + na + '.f'
115 |                 fe_path_ild = fe_fd_ild + '/' + na + '.f'
116 |                 #fe_path_ori = fe_fd_ori + '/' + na + '.f'
117 |                 info_path = cfg.dev_wav_fd + '/' + na + '.csv'
118 |                 #print na
119 |                 tags = pp_data.GetTags( info_path )
120 |                 #print tags
121 |                 y = pp_data.TagsToCategory( tags )
122 |                 #print y
123 |                 #sys.exit()
124 |                 #X_l = cPickle.load( open( fe_path_left, 'rb' ) )
125 |                 #X_r = cPickle.load( open( fe_path_right, 'rb' ) )
126 |                 X_m = cPickle.load( open( fe_path_mean, 'rb' ) )
127 |                 #X_d = cPickle.load( open( fe_path_diff, 'rb' ) )
128 |                 #X_ipd = cPickle.load( open( fe_path_ipd, 'rb' ) )
129 |                 #X_ild = cPickle.load( open( fe_path_ild, 'rb' ) )
130 |                 #X_o = cPickle.load( open( fe_path_ori, 'rb' ) )
131 | 
132 |                 # aggregate data
133 |                 #X3d_l = mat_2d_to_3d( X_l, agg_num, hop )
134 |                 #X3d_r = mat_2d_to_3d( X_r, agg_num, hop )
135 |                 X3d_m = mat_2d_to_3d( X_m, agg_num, hop )
136 |    		#X3d_d = mat_2d_to_3d( X_d, agg_num, hop )
137 |                 #X3d_ipd = mat_2d_to_3d( X_ipd, agg_num, hop )
138 |    		#X3d_ild = mat_2d_to_3d( X_ild, agg_num, hop )
139 |    		#X3d_o = mat_2d_to_3d( X_o, agg_num, hop )
140 |      	        ## reshape 3d to 4d
141 |        	        #X4d_l = reshape_3d_to_4d( X3d_l)
142 |                 #X4d_r = reshape_3d_to_4d( X3d_r)
143 |                 #X4d_m = reshape_3d_to_4d( X3d_m)
144 |                 #X4d_d = reshape_3d_to_4d( X3d_d)
145 |                 # concatenate
146 |                 #X4d=mat_concate_multiinmaps6in(X3d_l, X3d_r, X3d_m, X3d_d, X3d_ipd, X3d_ild)
147 |                 X3d_m=reshapeX1(X3d_m)
148 |                 #X4d=np.swapaxes(X4d,1,2) # or np.transpose(x,(1,0,2))  1,0,2 is axis
149 |                 te_X1=X3d_m
150 |                 #te_X2=X3d_ild
151 |                 #te_X1 = reshapeX1(te_X1)
152 |                 #te_X2 = reshapeX2(te_X2)
153 |                 
154 |                 if debug:
155 |                     # with a Sequential model
156 |                     #md.summary()
157 |                     print na
158 |                     get_3rd_layer_output = K.function([md.layers[0].input, K.learning_phase()], [md.layers[4].output])
159 |                     layer_output = get_3rd_layer_output([te_X1, 0])[0]
160 |                     print layer_output.shape
161 |                     #layer_output1=layer_output[5,:,:]
162 |                     layer_output1=layer_output[:,16,:]
163 |                     imgplot=plt.matshow((layer_output1.T))
164 |                     #imgplot.set_cmap('spectral')
165 |                     plt.colorbar()
166 |                     plt.show()
167 |                     sys.pause()
168 |                 
169 |                 #p_y_pred = md.predict( [te_X1,te_X2] )
170 |                 p_y_pred = md.predict( te_X1 )
171 |                 p_y_pred = np.mean( p_y_pred, axis=0 )     # shape:(n_label)
172 |                 pred = np.zeros(n_labels)
173 |                 pred[ np.where(p_y_pred>thres) ] = 1
174 |                 ind=0
175 |                 for la in cfg.labels:
176 |                     if la=='S':
177 |                         break
178 |                     elif la=='c':
179 |                         y_true_file_c.append(na)
180 |                         y_true_binary_c.append(y[ind])
181 |                     elif la=='m':
182 |                         y_true_file_m.append(na)
183 |                         y_true_binary_m.append(y[ind])
184 |                     elif la=='f':
185 |                         y_true_file_f.append(na)
186 |                         y_true_binary_f.append(y[ind])
187 |                     elif la=='v':
188 |                         y_true_file_v.append(na)
189 |                         y_true_binary_v.append(y[ind])
190 |                     elif la=='p':
191 |                         y_true_file_p.append(na)
192 |                         y_true_binary_p.append(y[ind])
193 |                     elif la=='b':
194 |                         y_true_file_b.append(na)
195 |                         y_true_binary_b.append(y[ind])
196 |                     elif la=='o':
197 |                         y_true_file_o.append(na)
198 |                         y_true_binary_o.append(y[ind])
199 |                     result=[na,la,p_y_pred[ind]]
200 |                     result_roll.append(result)
201 |                     ind=ind+1
202 |                 
203 |                 
204 |                 pred_roll.append( pred )
205 |                 gt_roll.append( y )
206 |     
207 |     pred_roll = np.array( pred_roll )
208 |     gt_roll = np.array( gt_roll )
209 |     #write csv for EER computation
210 |     csvfile=file('result.csv','wb')
211 |     writer=csv.writer(csvfile)
212 |     #writer.writerow(['fn','label','score'])
213 |     writer.writerows(result_roll)
214 |     csvfile.close()
215 |     
216 |     # calculate prec, recall, fvalue
217 |     prec, recall, fvalue = prec_recall_fvalue( pred_roll, gt_roll, thres )
218 |     # EER for each tag : [ 'c', 'm', 'f', 'v', 'p', 'b', 'o', 'S' ]
219 |     EER_c=eer.compute_eer('result.csv', 'c', dict(zip(y_true_file_c, y_true_binary_c)))
220 |     EER_m=eer.compute_eer('result.csv', 'm', dict(zip(y_true_file_m, y_true_binary_m)))
221 |     EER_f=eer.compute_eer('result.csv', 'f', dict(zip(y_true_file_f, y_true_binary_f)))
222 |     EER_v=eer.compute_eer('result.csv', 'v', dict(zip(y_true_file_v, y_true_binary_v)))
223 |     EER_p=eer.compute_eer('result.csv', 'p', dict(zip(y_true_file_p, y_true_binary_p)))
224 |     EER_b=eer.compute_eer('result.csv', 'b', dict(zip(y_true_file_b, y_true_binary_b)))
225 |     EER_o=eer.compute_eer('result.csv', 'o', dict(zip(y_true_file_o, y_true_binary_o)))
226 |     EER=(EER_c+EER_m+EER_v+EER_p+EER_f+EER_b+EER_o)/7.0
227 |     print prec, recall, fvalue
228 |     print EER_c,EER_m,EER_f,EER_v,EER_p,EER_b,EER_o
229 |     print EER
230 | 
231 | if __name__ == '__main__':
232 |     recognize()
233 | 


--------------------------------------------------------------------------------
/recognize_cnnlstm_2ch_mbk_keras_indIPD_easy.py:
--------------------------------------------------------------------------------
  1 |  
  2 | import sys
  3 | sys.path.append('/user/HS103/yx0001/Downloads/Hat')
  4 | import pickle
  5 | import numpy as np
  6 | np.random.seed(1515)
  7 | import scipy.stats
  8 | 
  9 | import keras
 10 | from keras.models import load_model
 11 | 
 12 | from keras import backend as K
 13 | 
 14 | import config_2ch_raw_mbk_ipld_eva as cfg
 15 | import prepare_data_2ch_raw_ipd_ild_easy as pp_data
 16 | import csv
 17 | from Hat.preprocessing import reshape_3d_to_4d
 18 | from Hat.preprocessing import sparse_to_categorical, mat_2d_to_3d
 19 | from Hat.preprocessing import sparse_to_categorical, mat_2d_to_3d, reshape_3d_to_4d, mat_concate_multiinmaps6in
 20 | from Hat.metrics import prec_recall_fvalue
 21 | import cPickle
 22 | import eer
 23 | import matplotlib.pyplot as plt
 24 | #from main_cnn import fe_fd, agg_num, hop, n_hid, fold
 25 | np.set_printoptions(threshold=np.nan, linewidth=1000, precision=2, suppress=True)
 26 | 
 27 | 
 28 | # resize data for fit into CNN. size: (batch_num*color_maps*height*weight)
 29 | def reshapeX( X ):
 30 |     N = len(X)
 31 |     return X.reshape( (N, 6, t_delay, feadim, 1) )
 32 | 
 33 | # resize data for fit into CNN. size: (batch_num*color_maps*height*weight)
 34 | def reshapeX2( X ):
 35 |     N = len(X)
 36 |     return X.reshape( (N, t_delay, feadim) )
 37 | 
 38 | def reshapeX1( X ):
 39 |     N = len(X)
 40 |     return X.reshape( (N, t_delay, 1, feadim, 1) )
 41 | 
 42 | def reshapeX3( X ):
 43 |     N = len(X)
 44 |     return X.reshape( (N, t_delay, 1, 257, 1) )
 45 | 
 46 | feadim=40
 47 | t_delay=33
 48 | 
 49 | 
 50 | debug=0
 51 | # hyper-params
 52 | n_labels = len( cfg.labels )
 53 | fe_fd_left = cfg.dev_fe_mel_fd_left
 54 | fe_fd_right = cfg.dev_fe_mel_fd_right
 55 | fe_fd_mean = cfg.dev_fe_mel_fd_mean
 56 | fe_fd_diff = cfg.dev_fe_mel_fd_diff
 57 | fe_fd_ipd = cfg.dev_fe_mel_fd_ipd
 58 | fe_fd_ild = cfg.dev_fe_mel_fd_ild
 59 | #fe_fd_ori = cfg.dev_fe_mel_fd_ori
 60 | agg_num = 33        # concatenate frames
 61 | hop = 1          # step_len
 62 | n_hid = 1000
 63 | fold = 9       # can be 0, 1, 2, 3
 64 | 
 65 | # load model
 66 | # load model
 67 | #md = serializations.load( cfg.scrap_fd + '/Md/md20.p' )
 68 | md=load_model('/vol/vssp/msos/yx/chime_home/DCASE2016_task4_scrap_2ch_mbk_ipd_ild_overlap/Md/cnn_keras_overlap50_cnn128onMBK_eva816_CNN128onILD257_weights.05-0.29.hdf5')
 69 | #md.summary()
 70 | 
 71 | def recognize():
 72 |     ## prepare data
 73 |     #_, _, te_X, te_y = pp_data.GetAllData(fe_fd_right, fe_fd_left, fe_fd_mean, fe_fd_diff, agg_num, hop, fold )
 74 |     ##te_X = reshapeX(te_X)
 75 |     #print te_X.shape
 76 |     
 77 |     # do recognize and evaluation
 78 |     thres = 0.4     # thres, tune to prec=recall, if smaller, make prec smaller
 79 |     n_labels = len( cfg.labels )
 80 |     
 81 |     gt_roll = []
 82 |     pred_roll = []
 83 |     result_roll = []
 84 |     y_true_binary_c = []
 85 |     y_true_file_c = []
 86 |     y_true_binary_m = []
 87 |     y_true_file_m = []
 88 |     y_true_binary_f = []
 89 |     y_true_file_f = []
 90 |     y_true_binary_v = []
 91 |     y_true_file_v = []
 92 |     y_true_binary_p = []
 93 |     y_true_file_p = []
 94 |     y_true_binary_b = []
 95 |     y_true_file_b = []
 96 |     y_true_binary_o = []
 97 |     y_true_file_o = []
 98 |     with open( cfg.dev_cv_csv_path, 'rb') as f:
 99 |         reader = csv.reader(f)
100 |         lis = list(reader)
101 |     
102 |         # read one line
103 |         for li in lis:
104 |             na = li[1]
105 |             curr_fold = int(li[2])
106 |             
107 |             if fold==curr_fold:
108 |                 # get features, tags
109 |                 fe_path_left = fe_fd_left + '/' + na + '.f'
110 |                 fe_path_right = fe_fd_right + '/' + na + '.f'
111 |                 fe_path_mean = fe_fd_mean + '/' + na + '.f'
112 |                 fe_path_diff = fe_fd_diff + '/' + na + '.f'
113 |                 fe_path_ipd = fe_fd_ipd + '/' + na + '.f'
114 |                 fe_path_ild = fe_fd_ild + '/' + na + '.f'
115 |                 #fe_path_ori = fe_fd_ori + '/' + na + '.f'
116 |                 info_path = cfg.dev_wav_fd + '/' + na + '.csv'
117 |                 #print na
118 |                 tags = pp_data.GetTags( info_path )
119 |                 #print tags
120 |                 y = pp_data.TagsToCategory( tags )
121 |                 #print y
122 |                 #sys.exit()
123 |                 #X_l = cPickle.load( open( fe_path_left, 'rb' ) )
124 |                 #X_r = cPickle.load( open( fe_path_right, 'rb' ) )
125 |                 X_m = cPickle.load( open( fe_path_mean, 'rb' ) )
126 |                 #X_d = cPickle.load( open( fe_path_diff, 'rb' ) )
127 |                 #X_ipd = cPickle.load( open( fe_path_ipd, 'rb' ) )
128 |                 X_ild = cPickle.load( open( fe_path_ild, 'rb' ) )
129 |                 #X_o = cPickle.load( open( fe_path_ori, 'rb' ) )
130 | 
131 |                 # aggregate data
132 |                 #X3d_l = mat_2d_to_3d( X_l, agg_num, hop )
133 |                 #X3d_r = mat_2d_to_3d( X_r, agg_num, hop )
134 |                 X3d_m = mat_2d_to_3d( X_m, agg_num, hop )
135 |    		#X3d_d = mat_2d_to_3d( X_d, agg_num, hop )
136 |                 #X3d_ipd = mat_2d_to_3d( X_ipd, agg_num, hop )
137 |    		X3d_ild = mat_2d_to_3d( X_ild, agg_num, hop )
138 |    		#X3d_o = mat_2d_to_3d( X_o, agg_num, hop )
139 |      	        ## reshape 3d to 4d
140 |        	        #X4d_l = reshape_3d_to_4d( X3d_l)
141 |                 #X4d_r = reshape_3d_to_4d( X3d_r)
142 |                 #X4d_m = reshape_3d_to_4d( X3d_m)
143 |                 #X4d_d = reshape_3d_to_4d( X3d_d)
144 |                 # concatenate
145 |                 #X4d=mat_concate_multiinmaps6in(X3d_l, X3d_r, X3d_m, X3d_d, X3d_ipd, X3d_ild)
146 |                 #X3d_m=reshapeX1(X3d_m)
147 |                 #X4d=np.swapaxes(X4d,1,2) # or np.transpose(x,(1,0,2))  1,0,2 is axis
148 |                 te_X1=X3d_m
149 |                 te_X2=X3d_ild
150 |                 te_X1 = reshapeX1(te_X1)
151 |                 te_X2 = reshapeX3(te_X2)
152 |                 
153 |                 if debug:
154 |                     # with a Sequential model
155 |                     #md.summary()
156 |                     print na
157 |                     get_3rd_layer_output = K.function([md.layers[0].input, K.learning_phase()], [md.layers[4].output])
158 |                     layer_output = get_3rd_layer_output([te_X1, 0])[0]
159 |                     print layer_output.shape
160 |                     #layer_output1=layer_output[5,:,:]
161 |                     layer_output1=layer_output[:,16,:]
162 |                     imgplot=plt.matshow((layer_output1.T))
163 |                     #imgplot.set_cmap('spectral')
164 |                     plt.colorbar()
165 |                     plt.show()
166 |                     sys.pause()
167 |                 
168 |                 p_y_pred = md.predict( [te_X1,te_X2] )
169 |                 #p_y_pred = md.predict( te_X1 )
170 |                 p_y_pred = np.mean( p_y_pred, axis=0 )     # shape:(n_label)
171 |                 pred = np.zeros(n_labels)
172 |                 pred[ np.where(p_y_pred>thres) ] = 1
173 |                 ind=0
174 |                 for la in cfg.labels:
175 |                     if la=='S':
176 |                         break
177 |                     elif la=='c':
178 |                         y_true_file_c.append(na)
179 |                         y_true_binary_c.append(y[ind])
180 |                     elif la=='m':
181 |                         y_true_file_m.append(na)
182 |                         y_true_binary_m.append(y[ind])
183 |                     elif la=='f':
184 |                         y_true_file_f.append(na)
185 |                         y_true_binary_f.append(y[ind])
186 |                     elif la=='v':
187 |                         y_true_file_v.append(na)
188 |                         y_true_binary_v.append(y[ind])
189 |                     elif la=='p':
190 |                         y_true_file_p.append(na)
191 |                         y_true_binary_p.append(y[ind])
192 |                     elif la=='b':
193 |                         y_true_file_b.append(na)
194 |                         y_true_binary_b.append(y[ind])
195 |                     elif la=='o':
196 |                         y_true_file_o.append(na)
197 |                         y_true_binary_o.append(y[ind])
198 |                     result=[na,la,p_y_pred[ind]]
199 |                     result_roll.append(result)
200 |                     ind=ind+1
201 |                 
202 |                 
203 |                 pred_roll.append( pred )
204 |                 gt_roll.append( y )
205 |     
206 |     pred_roll = np.array( pred_roll )
207 |     gt_roll = np.array( gt_roll )
208 |     #write csv for EER computation
209 |     csvfile=file('result.csv','wb')
210 |     writer=csv.writer(csvfile)
211 |     #writer.writerow(['fn','label','score'])
212 |     writer.writerows(result_roll)
213 |     csvfile.close()
214 |     
215 |     # calculate prec, recall, fvalue
216 |     prec, recall, fvalue = prec_recall_fvalue( pred_roll, gt_roll, thres )
217 |     # EER for each tag : [ 'c', 'm', 'f', 'v', 'p', 'b', 'o', 'S' ]
218 |     EER_c=eer.compute_eer('result.csv', 'c', dict(zip(y_true_file_c, y_true_binary_c)))
219 |     EER_m=eer.compute_eer('result.csv', 'm', dict(zip(y_true_file_m, y_true_binary_m)))
220 |     EER_f=eer.compute_eer('result.csv', 'f', dict(zip(y_true_file_f, y_true_binary_f)))
221 |     EER_v=eer.compute_eer('result.csv', 'v', dict(zip(y_true_file_v, y_true_binary_v)))
222 |     EER_p=eer.compute_eer('result.csv', 'p', dict(zip(y_true_file_p, y_true_binary_p)))
223 |     EER_b=eer.compute_eer('result.csv', 'b', dict(zip(y_true_file_b, y_true_binary_b)))
224 |     EER_o=eer.compute_eer('result.csv', 'o', dict(zip(y_true_file_o, y_true_binary_o)))
225 |     EER=(EER_c+EER_m+EER_v+EER_p+EER_f+EER_b+EER_o)/7.0
226 |     print prec, recall, fvalue
227 |     print EER_c,EER_m,EER_f,EER_v,EER_p,EER_b,EER_o
228 |     print EER
229 | 
230 | if __name__ == '__main__':
231 |     recognize()
232 | 


--------------------------------------------------------------------------------
/recognize_cnnlstm_2ch_spec_keras_noIPD_easy.py:
--------------------------------------------------------------------------------
  1 |  
  2 | import sys
  3 | sys.path.append('/user/HS103/yx0001/Downloads/Hat')
  4 | import pickle
  5 | import numpy as np
  6 | np.random.seed(1515)
  7 | import scipy.stats
  8 | 
  9 | import keras
 10 | from keras.models import load_model
 11 | 
 12 | from keras import backend as K
 13 | 
 14 | import config_2ch_raw_spec_ipld_eva as cfg
 15 | import prepare_data_2ch_raw_ipd_ild_easy_Spec as pp_data
 16 | import csv
 17 | from Hat.preprocessing import reshape_3d_to_4d
 18 | from Hat.preprocessing import sparse_to_categorical, mat_2d_to_3d
 19 | from Hat.preprocessing import sparse_to_categorical, mat_2d_to_3d, reshape_3d_to_4d, mat_concate_multiinmaps6in
 20 | from Hat.metrics import prec_recall_fvalue
 21 | import cPickle
 22 | import eer
 23 | import matplotlib.pyplot as plt
 24 | #from main_cnn import fe_fd, agg_num, hop, n_hid, fold
 25 | np.set_printoptions(threshold=np.nan, linewidth=1000, precision=2, suppress=True)
 26 | 
 27 | 
 28 | # resize data for fit into CNN. size: (batch_num*color_maps*height*weight)
 29 | def reshapeX( X ):
 30 |     N = len(X)
 31 |     return X.reshape( (N, 6, t_delay, feadim, 1) )
 32 | 
 33 | # resize data for fit into CNN. size: (batch_num*color_maps*height*weight)
 34 | def reshapeX2( X ):
 35 |     N = len(X)
 36 |     return X.reshape( (N, t_delay, feadim) )
 37 | 
 38 | def reshapeX1( X ):
 39 |     N = len(X)
 40 |     return X.reshape( (N, t_delay, 1, feadim, 1) )
 41 | 
 42 | def reshapeX3( X ):
 43 |     N = len(X)
 44 |     return X.reshape( (N, t_delay*feadim) )
 45 | 
 46 | feadim=257
 47 | t_delay=33
 48 | 
 49 | 
 50 | debug=0
 51 | # hyper-params
 52 | n_labels = len( cfg.labels )
 53 | fe_fd_left = cfg.dev_fe_mel_fd_left
 54 | fe_fd_right = cfg.dev_fe_mel_fd_right
 55 | fe_fd_mean = cfg.dev_fe_mel_fd_mean
 56 | fe_fd_diff = cfg.dev_fe_mel_fd_diff
 57 | fe_fd_ipd = cfg.dev_fe_mel_fd_ipd
 58 | fe_fd_ild = cfg.dev_fe_mel_fd_ild
 59 | #fe_fd_ori = cfg.dev_fe_mel_fd_ori
 60 | agg_num = 33        # concatenate frames
 61 | hop = 1          # step_len
 62 | n_hid = 1000
 63 | fold = 9        # can be 0, 1, 2, 3
 64 | 
 65 | # load model
 66 | # load model
 67 | #md = serializations.load( cfg.scrap_fd + '/Md/md20.p' )
 68 | md=load_model('/vol/vssp/msos/yx/chime_home/DCASE2016_task4_scrap_2ch_spec_ipd_ild_overlap/Md/cnn_keras_overlap50_noILD_1CNN128onSpec_eva816_weights.12-0.41.hdf5')
 69 | #md.summary()
 70 | 
 71 | def recognize():
 72 |     ## prepare data
 73 |     #_, _, te_X, te_y = pp_data.GetAllData(fe_fd_right, fe_fd_left, fe_fd_mean, fe_fd_diff, agg_num, hop, fold )
 74 |     ##te_X = reshapeX(te_X)
 75 |     #print te_X.shape
 76 |     
 77 |     # do recognize and evaluation
 78 |     thres = 0.4     # thres, tune to prec=recall, if smaller, make prec smaller
 79 |     n_labels = len( cfg.labels )
 80 |     
 81 |     gt_roll = []
 82 |     pred_roll = []
 83 |     result_roll = []
 84 |     y_true_binary_c = []
 85 |     y_true_file_c = []
 86 |     y_true_binary_m = []
 87 |     y_true_file_m = []
 88 |     y_true_binary_f = []
 89 |     y_true_file_f = []
 90 |     y_true_binary_v = []
 91 |     y_true_file_v = []
 92 |     y_true_binary_p = []
 93 |     y_true_file_p = []
 94 |     y_true_binary_b = []
 95 |     y_true_file_b = []
 96 |     y_true_binary_o = []
 97 |     y_true_file_o = []
 98 |     with open( cfg.dev_cv_csv_path, 'rb') as f:
 99 |         reader = csv.reader(f)
100 |         lis = list(reader)
101 |     
102 |         # read one line
103 |         xyln=0
104 |         for li in lis:
105 |             na = li[1]
106 |             curr_fold = int(li[2])
107 |             
108 |             if fold==curr_fold:
109 |                 xyln=xyln+1
110 |                 #print xyln
111 |                 # get features, tags
112 |                 fe_path_left = fe_fd_left + '/' + na + '.f'
113 |                 fe_path_right = fe_fd_right + '/' + na + '.f'
114 |                 fe_path_mean = fe_fd_mean + '/' + na + '.f'
115 |                 fe_path_diff = fe_fd_diff + '/' + na + '.f'
116 |                 fe_path_ipd = fe_fd_ipd + '/' + na + '.f'
117 |                 fe_path_ild = fe_fd_ild + '/' + na + '.f'
118 |                 #fe_path_ori = fe_fd_ori + '/' + na + '.f'
119 |                 info_path = cfg.dev_wav_fd + '/' + na + '.csv'
120 |                 tags = pp_data.GetTags( info_path )
121 |                 #print tags
122 |                 y = pp_data.TagsToCategory( tags )
123 |                 #sys.exit()
124 |                 #X_l = cPickle.load( open( fe_path_left, 'rb' ) )
125 |                 #X_r = cPickle.load( open( fe_path_right, 'rb' ) )
126 |                 X_m = cPickle.load( open( fe_path_mean, 'rb' ) )
127 |                 #X_d = cPickle.load( open( fe_path_diff, 'rb' ) )
128 |                 #X_ipd = cPickle.load( open( fe_path_ipd, 'rb' ) )
129 |                 #X_ild = cPickle.load( open( fe_path_ild, 'rb' ) )
130 |                 #X_o = cPickle.load( open( fe_path_ori, 'rb' ) )
131 | 
132 |                 # aggregate data
133 |                 #X3d_l = mat_2d_to_3d( X_l, agg_num, hop )
134 |                 #X3d_r = mat_2d_to_3d( X_r, agg_num, hop )
135 |                 X3d_m = mat_2d_to_3d( X_m, agg_num, hop )
136 |    		#X3d_d = mat_2d_to_3d( X_d, agg_num, hop )
137 |                 #X3d_ipd = mat_2d_to_3d( X_ipd, agg_num, hop )
138 |    		#X3d_ild = mat_2d_to_3d( X_ild, agg_num, hop )
139 |    		#X3d_o = mat_2d_to_3d( X_o, agg_num, hop )
140 |      	        ## reshape 3d to 4d
141 |        	        #X4d_l = reshape_3d_to_4d( X3d_l)
142 |                 #X4d_r = reshape_3d_to_4d( X3d_r)
143 |                 #X4d_m = reshape_3d_to_4d( X3d_m)
144 |                 #X4d_d = reshape_3d_to_4d( X3d_d)
145 |                 # concatenate
146 |                 #X4d=mat_concate_multiinmaps6in(X3d_l, X3d_r, X3d_m, X3d_d, X3d_ipd, X3d_ild)
147 |                 #X3d_m=reshapeX1(X3d_m)
148 |                 #X4d=np.swapaxes(X4d,1,2) # or np.transpose(x,(1,0,2))  1,0,2 is axis
149 |                 te_X1=X3d_m
150 |                 #te_X2=X3d_ild
151 |                 te_X1 = reshapeX1(te_X1)
152 |                 #te_X2 = reshapeX2(te_X2)
153 |                 
154 |                 if debug:
155 |                     # with a Sequential model
156 |                     #md.summary()
157 |                     print na
158 |                     get_3rd_layer_output = K.function([md.layers[0].input, K.learning_phase()], [md.layers[4].output])
159 |                     layer_output = get_3rd_layer_output([te_X1, 0])[0]
160 |                     print layer_output.shape
161 |                     #layer_output1=layer_output[5,:,:]
162 |                     layer_output1=layer_output[:,16,:]
163 |                     imgplot=plt.matshow((layer_output1.T))
164 |                     #imgplot.set_cmap('spectral')
165 |                     plt.colorbar()
166 |                     plt.show()
167 |                     sys.pause()
168 |                 
169 |                 #p_y_pred = md.predict( [te_X1,te_X2] )
170 |                 p_y_pred = md.predict( te_X1 )
171 |                 p_y_pred = np.mean( p_y_pred, axis=0 )     # shape:(n_label)
172 |                 pred = np.zeros(n_labels)
173 |                 pred[ np.where(p_y_pred>thres) ] = 1
174 |                 ind=0
175 |                 for la in cfg.labels:
176 |                     if la=='S':
177 |                         break
178 |                     elif la=='c':
179 |                         y_true_file_c.append(na)
180 |                         y_true_binary_c.append(y[ind])
181 |                     elif la=='m':
182 |                         y_true_file_m.append(na)
183 |                         y_true_binary_m.append(y[ind])
184 |                     elif la=='f':
185 |                         y_true_file_f.append(na)
186 |                         y_true_binary_f.append(y[ind])
187 |                     elif la=='v':
188 |                         y_true_file_v.append(na)
189 |                         y_true_binary_v.append(y[ind])
190 |                     elif la=='p':
191 |                         y_true_file_p.append(na)
192 |                         y_true_binary_p.append(y[ind])
193 |                     elif la=='b':
194 |                         y_true_file_b.append(na)
195 |                         y_true_binary_b.append(y[ind])
196 |                     elif la=='o':
197 |                         y_true_file_o.append(na)
198 |                         y_true_binary_o.append(y[ind])
199 |                     result=[na,la,p_y_pred[ind]]
200 |                     result_roll.append(result)
201 |                     ind=ind+1
202 |                 
203 |                 
204 |                 pred_roll.append( pred )
205 |                 gt_roll.append( y )
206 |     
207 |     pred_roll = np.array( pred_roll )
208 |     gt_roll = np.array( gt_roll )
209 |     #write csv for EER computation
210 |     csvfile=file('result.csv','wb')
211 |     writer=csv.writer(csvfile)
212 |     #writer.writerow(['fn','label','score'])
213 |     writer.writerows(result_roll)
214 |     csvfile.close()
215 |     
216 |     # calculate prec, recall, fvalue
217 |     prec, recall, fvalue = prec_recall_fvalue( pred_roll, gt_roll, thres )
218 |     # EER for each tag : [ 'c', 'm', 'f', 'v', 'p', 'b', 'o', 'S' ]
219 |     EER_c=eer.compute_eer('result.csv', 'c', dict(zip(y_true_file_c, y_true_binary_c)))
220 |     EER_m=eer.compute_eer('result.csv', 'm', dict(zip(y_true_file_m, y_true_binary_m)))
221 |     EER_f=eer.compute_eer('result.csv', 'f', dict(zip(y_true_file_f, y_true_binary_f)))
222 |     EER_v=eer.compute_eer('result.csv', 'v', dict(zip(y_true_file_v, y_true_binary_v)))
223 |     EER_p=eer.compute_eer('result.csv', 'p', dict(zip(y_true_file_p, y_true_binary_p)))
224 |     EER_b=eer.compute_eer('result.csv', 'b', dict(zip(y_true_file_b, y_true_binary_b)))
225 |     EER_o=eer.compute_eer('result.csv', 'o', dict(zip(y_true_file_o, y_true_binary_o)))
226 |     EER=(EER_c+EER_m+EER_v+EER_p+EER_f+EER_b+EER_o)/7.0
227 |     print prec, recall, fvalue
228 |     print EER_c,EER_m,EER_f,EER_v,EER_p,EER_b,EER_o
229 |     print EER
230 | 
231 | if __name__ == '__main__':
232 |     recognize()
233 | 


--------------------------------------------------------------------------------
/recognize_cnnlstm_2ch_raw_keras_noILD_easy.py:
--------------------------------------------------------------------------------
  1 |  
  2 | import sys
  3 | sys.path.append('/user/HS103/yx0001/Downloads/Hat')
  4 | import pickle
  5 | import numpy as np
  6 | np.random.seed(1515)
  7 | import scipy.stats
  8 | 
  9 | import keras
 10 | from keras.models import load_model
 11 | 
 12 | from keras import backend as K
 13 | 
 14 | import config_2ch_raw_wav32ms_ipld_eva as cfg
 15 | import prepare_data_2ch_raw_ipd_ild_easy as pp_data
 16 | import csv
 17 | from Hat.preprocessing import reshape_3d_to_4d
 18 | from Hat.preprocessing import sparse_to_categorical, mat_2d_to_3d
 19 | from Hat.preprocessing import sparse_to_categorical, mat_2d_to_3d, reshape_3d_to_4d, mat_concate_multiinmaps6in
 20 | from Hat.metrics import prec_recall_fvalue
 21 | import cPickle
 22 | import eer
 23 | import matplotlib.pyplot as plt
 24 | #from main_cnn import fe_fd, agg_num, hop, n_hid, fold
 25 | np.set_printoptions(threshold=np.nan, linewidth=1000, precision=2, suppress=True)
 26 | 
 27 | 
 28 | # resize data for fit into CNN. size: (batch_num*color_maps*height*weight)
 29 | def reshapeX( X ):
 30 |     N = len(X)
 31 |     return X.reshape( (N, 6, t_delay, feadim, 1) )
 32 | 
 33 | # resize data for fit into CNN. size: (batch_num*color_maps*height*weight)
 34 | def reshapeX2( X ):
 35 |     N = len(X)
 36 |     return X.reshape( (N, t_delay, feadim) )
 37 | 
 38 | def reshapeX1( X ):
 39 |     N = len(X)
 40 |     return X.reshape( (N, t_delay, 1, feadim, 1) )
 41 | 
 42 | def reshapeX3( X ):
 43 |     N = len(X)
 44 |     return X.reshape( (N, t_delay, 1, 257, 1) )
 45 | 
 46 | feadim=512
 47 | t_delay=33
 48 | 
 49 | 
 50 | debug=0
 51 | # hyper-params
 52 | n_labels = len( cfg.labels )
 53 | fe_fd_left = cfg.dev_fe_mel_fd_left
 54 | fe_fd_right = cfg.dev_fe_mel_fd_right
 55 | fe_fd_mean = cfg.dev_fe_mel_fd_mean
 56 | fe_fd_diff = cfg.dev_fe_mel_fd_diff
 57 | fe_fd_ipd = cfg.dev_fe_mel_fd_ipd
 58 | fe_fd_ild = cfg.dev_fe_mel_fd_ild
 59 | #fe_fd_ori = cfg.dev_fe_mel_fd_ori
 60 | agg_num = 33        # concatenate frames
 61 | hop = 1          # step_len
 62 | n_hid = 1000
 63 | fold = 9        # can be 0, 1, 2, 3
 64 | 
 65 | # load model
 66 | # load model
 67 | #md = serializations.load( cfg.scrap_fd + '/Md/md20.p' )
 68 | md=load_model('/vol/vssp/msos/yx/chime_home/DCASE2016_task4_scrap_2ch_mbk_ipd_ild_overlap/Md/cnn_keras_overlap50_eva816_1CNN128onMBK40_noILD_weights.07-0.31.hdf5')
 69 | #md.summary()
 70 | 
 71 | def recognize():
 72 |     ## prepare data
 73 |     #_, _, te_X, te_y = pp_data.GetAllData(fe_fd_right, fe_fd_left, fe_fd_mean, fe_fd_diff, agg_num, hop, fold )
 74 |     ##te_X = reshapeX(te_X)
 75 |     #print te_X.shape
 76 |     
 77 |     # do recognize and evaluation
 78 |     thres = 0.4     # thres, tune to prec=recall, if smaller, make prec smaller
 79 |     n_labels = len( cfg.labels )
 80 |     
 81 |     gt_roll = []
 82 |     pred_roll = []
 83 |     result_roll = []
 84 |     y_true_binary_c = []
 85 |     y_true_file_c = []
 86 |     y_true_binary_m = []
 87 |     y_true_file_m = []
 88 |     y_true_binary_f = []
 89 |     y_true_file_f = []
 90 |     y_true_binary_v = []
 91 |     y_true_file_v = []
 92 |     y_true_binary_p = []
 93 |     y_true_file_p = []
 94 |     y_true_binary_b = []
 95 |     y_true_file_b = []
 96 |     y_true_binary_o = []
 97 |     y_true_file_o = []
 98 |     with open( cfg.dev_cv_csv_path, 'rb') as f:
 99 |         reader = csv.reader(f)
100 |         lis = list(reader)
101 |     
102 |         # read one line
103 |         for li in lis:
104 |             na = li[1]
105 |             curr_fold = int(li[2])
106 |             
107 |             if fold==curr_fold:
108 |                 # get features, tags
109 |                 fe_path_left = fe_fd_left + '/' + na + '.f'
110 |                 fe_path_right = fe_fd_right + '/' + na + '.f'
111 |                 fe_path_mean = fe_fd_mean + '/' + na + '.f'
112 |                 fe_path_diff = fe_fd_diff + '/' + na + '.f'
113 |                 fe_path_ipd = fe_fd_ipd + '/' + na + '.f'
114 |                 fe_path_ild = fe_fd_ild + '/' + na + '.f'
115 |                 #fe_path_ori = fe_fd_ori + '/' + na + '.f'
116 |                 info_path = cfg.dev_wav_fd + '/' + na + '.csv'
117 |                 #print na
118 |                 tags = pp_data.GetTags( info_path )
119 |                 #print tags
120 |                 y = pp_data.TagsToCategory( tags )
121 |                 #print y
122 |                 #sys.exit()
123 |                 #X_l = cPickle.load( open( fe_path_left, 'rb' ) )
124 |                 #X_r = cPickle.load( open( fe_path_right, 'rb' ) )
125 |                 X_m = cPickle.load( open( fe_path_mean, 'rb' ) )
126 |                 #X_d = cPickle.load( open( fe_path_diff, 'rb' ) )
127 |                 #X_ipd = cPickle.load( open( fe_path_ipd, 'rb' ) )
128 |                 #X_ild = cPickle.load( open( fe_path_ild, 'rb' ) )
129 |                 #X_o = cPickle.load( open( fe_path_ori, 'rb' ) )
130 | 
131 |                 # aggregate data
132 |                 #X3d_l = mat_2d_to_3d( X_l, agg_num, hop )
133 |                 #X3d_r = mat_2d_to_3d( X_r, agg_num, hop )
134 |                 X3d_m = mat_2d_to_3d( X_m, agg_num, hop )
135 |    		#X3d_d = mat_2d_to_3d( X_d, agg_num, hop )
136 |                 #X3d_ipd = mat_2d_to_3d( X_ipd, agg_num, hop )
137 |    		#X3d_ild = mat_2d_to_3d( X_ild, agg_num, hop )
138 |    		#X3d_o = mat_2d_to_3d( X_o, agg_num, hop )
139 |      	        ## reshape 3d to 4d
140 |        	        #X4d_l = reshape_3d_to_4d( X3d_l)
141 |                 #X4d_r = reshape_3d_to_4d( X3d_r)
142 |                 #X4d_m = reshape_3d_to_4d( X3d_m)
143 |                 #X4d_d = reshape_3d_to_4d( X3d_d)
144 |                 # concatenate
145 |                 #X4d=mat_concate_multiinmaps6in(X3d_l, X3d_r, X3d_m, X3d_d, X3d_ipd, X3d_ild)
146 |                 #print X3d_m.shape
147 |                 X3d_m=reshapeX1(X3d_m)
148 |                 #X4d=np.swapaxes(X4d,1,2) # or np.transpose(x,(1,0,2))  1,0,2 is axis
149 |                 te_X1=X3d_m
150 |                 #te_X2=X3d_ild
151 |                 #te_X1 = reshapeX1(te_X1)
152 |                 #te_X2 = reshapeX3(te_X2)
153 |                 
154 |                 if debug:
155 |                     # with a Sequential model
156 |                     #md.summary()
157 |                     print na
158 |                     get_3rd_layer_output = K.function([md.layers[0].input, K.learning_phase()], [md.layers[4].output])
159 |                     layer_output = get_3rd_layer_output([te_X1, 0])[0]
160 |                     print layer_output.shape
161 |                     #layer_output1=layer_output[5,:,:]
162 |                     layer_output1=layer_output[:,16,:]
163 |                     imgplot=plt.matshow((layer_output1.T))
164 |                     #imgplot.set_cmap('spectral')
165 |                     plt.colorbar()
166 |                     plt.show()
167 |                     sys.pause()
168 |                 
169 |                 #p_y_pred = md.predict( [te_X1,te_X2] )
170 |                 p_y_pred = md.predict( te_X1 )
171 |                 p_y_pred = np.mean( p_y_pred, axis=0 )     # shape:(n_label)
172 |                 pred = np.zeros(n_labels)
173 |                 pred[ np.where(p_y_pred>thres) ] = 1
174 |                 ind=0
175 |                 for la in cfg.labels:
176 |                     if la=='S':
177 |                         break
178 |                     elif la=='c':
179 |                         y_true_file_c.append(na)
180 |                         y_true_binary_c.append(y[ind])
181 |                     elif la=='m':
182 |                         y_true_file_m.append(na)
183 |                         y_true_binary_m.append(y[ind])
184 |                     elif la=='f':
185 |                         y_true_file_f.append(na)
186 |                         y_true_binary_f.append(y[ind])
187 |                     elif la=='v':
188 |                         y_true_file_v.append(na)
189 |                         y_true_binary_v.append(y[ind])
190 |                     elif la=='p':
191 |                         y_true_file_p.append(na)
192 |                         y_true_binary_p.append(y[ind])
193 |                     elif la=='b':
194 |                         y_true_file_b.append(na)
195 |                         y_true_binary_b.append(y[ind])
196 |                     elif la=='o':
197 |                         y_true_file_o.append(na)
198 |                         y_true_binary_o.append(y[ind])
199 |                     result=[na,la,p_y_pred[ind]]
200 |                     result_roll.append(result)
201 |                     ind=ind+1
202 |                 
203 |                 
204 |                 pred_roll.append( pred )
205 |                 gt_roll.append( y )
206 |     
207 |     pred_roll = np.array( pred_roll )
208 |     gt_roll = np.array( gt_roll )
209 |     #write csv for EER computation
210 |     csvfile=file('result.csv','wb')
211 |     writer=csv.writer(csvfile)
212 |     #writer.writerow(['fn','label','score'])
213 |     writer.writerows(result_roll)
214 |     csvfile.close()
215 |     
216 |     # calculate prec, recall, fvalue
217 |     prec, recall, fvalue = prec_recall_fvalue( pred_roll, gt_roll, thres )
218 |     # EER for each tag : [ 'c', 'm', 'f', 'v', 'p', 'b', 'o', 'S' ]
219 |     EER_c=eer.compute_eer('result.csv', 'c', dict(zip(y_true_file_c, y_true_binary_c)))
220 |     EER_m=eer.compute_eer('result.csv', 'm', dict(zip(y_true_file_m, y_true_binary_m)))
221 |     EER_f=eer.compute_eer('result.csv', 'f', dict(zip(y_true_file_f, y_true_binary_f)))
222 |     EER_v=eer.compute_eer('result.csv', 'v', dict(zip(y_true_file_v, y_true_binary_v)))
223 |     EER_p=eer.compute_eer('result.csv', 'p', dict(zip(y_true_file_p, y_true_binary_p)))
224 |     EER_b=eer.compute_eer('result.csv', 'b', dict(zip(y_true_file_b, y_true_binary_b)))
225 |     EER_o=eer.compute_eer('result.csv', 'o', dict(zip(y_true_file_o, y_true_binary_o)))
226 |     EER=(EER_c+EER_m+EER_v+EER_p+EER_f+EER_b+EER_o)/7.0
227 |     print prec, recall, fvalue
228 |     print EER_c,EER_m,EER_f,EER_v,EER_p,EER_b,EER_o
229 |     print EER
230 | 
231 | if __name__ == '__main__':
232 |     recognize()
233 | 


--------------------------------------------------------------------------------
/recognize_cnnlstm_2ch_raw_keras_indIPD_easy.py:
--------------------------------------------------------------------------------
  1 |  
  2 | import sys
  3 | sys.path.append('/user/HS103/yx0001/Downloads/Hat')
  4 | import pickle
  5 | import numpy as np
  6 | np.random.seed(1515)
  7 | import scipy.stats
  8 | 
  9 | import keras
 10 | from keras.models import load_model
 11 | 
 12 | from keras import backend as K
 13 | 
 14 | import config_2ch_raw_wav32ms_ipld_eva as cfg
 15 | import prepare_data_2ch_raw_ipd_ild_easy as pp_data
 16 | import csv
 17 | from Hat.preprocessing import reshape_3d_to_4d
 18 | from Hat.preprocessing import sparse_to_categorical, mat_2d_to_3d
 19 | from Hat.preprocessing import sparse_to_categorical, mat_2d_to_3d, reshape_3d_to_4d, mat_concate_multiinmaps6in
 20 | from Hat.metrics import prec_recall_fvalue
 21 | import cPickle
 22 | import eer
 23 | import matplotlib.pyplot as plt
 24 | #from main_cnn import fe_fd, agg_num, hop, n_hid, fold
 25 | np.set_printoptions(threshold=np.nan, linewidth=1000, precision=2, suppress=True)
 26 | 
 27 | 
 28 | # resize data for fit into CNN. size: (batch_num*color_maps*height*weight)
 29 | def reshapeX( X ):
 30 |     N = len(X)
 31 |     return X.reshape( (N, 6, t_delay, feadim, 1) )
 32 | 
 33 | # resize data for fit into CNN. size: (batch_num*color_maps*height*weight)
 34 | def reshapeX2( X ):
 35 |     N = len(X)
 36 |     return X.reshape( (N, t_delay, feadim) )
 37 | 
 38 | def reshapeX1( X ):
 39 |     N = len(X)
 40 |     return X.reshape( (N, t_delay, 1, feadim, 1) )
 41 | 
 42 | def reshapeX3( X ):
 43 |     N = len(X)
 44 |     return X.reshape( (N, t_delay, 1, 257, 1) )
 45 | 
 46 | feadim=512
 47 | t_delay=33
 48 | 
 49 | 
 50 | debug=0
 51 | # hyper-params
 52 | n_labels = len( cfg.labels )
 53 | fe_fd_left = cfg.dev_fe_mel_fd_left
 54 | fe_fd_right = cfg.dev_fe_mel_fd_right
 55 | fe_fd_mean = cfg.dev_fe_mel_fd_mean
 56 | fe_fd_diff = cfg.dev_fe_mel_fd_diff
 57 | fe_fd_ipd = cfg.dev_fe_mel_fd_ipd
 58 | fe_fd_ild = cfg.dev_fe_mel_fd_ild
 59 | #fe_fd_ori = cfg.dev_fe_mel_fd_ori
 60 | agg_num = 33        # concatenate frames
 61 | hop = 1          # step_len
 62 | n_hid = 1000
 63 | fold = 9        # can be 0, 1, 2, 3
 64 | 
 65 | # load model
 66 | # load model
 67 | #md = serializations.load( cfg.scrap_fd + '/Md/md20.p' )
 68 | md=load_model('/vol/vssp/msos/yx/chime_home/DCASE2016_task4_scrap_2ch_wav_ipd_ild_overlap/Md/cnn_keras_overlap50_cnn128onRAW_eva816_CNN128onILD257_weights.18-0.62.hdf5')
 69 | #md.summary()
 70 | 
 71 | def recognize():
 72 |     ## prepare data
 73 |     #_, _, te_X, te_y = pp_data.GetAllData(fe_fd_right, fe_fd_left, fe_fd_mean, fe_fd_diff, agg_num, hop, fold )
 74 |     ##te_X = reshapeX(te_X)
 75 |     #print te_X.shape
 76 |     
 77 |     # do recognize and evaluation
 78 |     thres = 0.4     # thres, tune to prec=recall, if smaller, make prec smaller
 79 |     n_labels = len( cfg.labels )
 80 |     
 81 |     gt_roll = []
 82 |     pred_roll = []
 83 |     result_roll = []
 84 |     y_true_binary_c = []
 85 |     y_true_file_c = []
 86 |     y_true_binary_m = []
 87 |     y_true_file_m = []
 88 |     y_true_binary_f = []
 89 |     y_true_file_f = []
 90 |     y_true_binary_v = []
 91 |     y_true_file_v = []
 92 |     y_true_binary_p = []
 93 |     y_true_file_p = []
 94 |     y_true_binary_b = []
 95 |     y_true_file_b = []
 96 |     y_true_binary_o = []
 97 |     y_true_file_o = []
 98 |     with open( cfg.dev_cv_csv_path, 'rb') as f:
 99 |         reader = csv.reader(f)
100 |         lis = list(reader)
101 |     
102 |         # read one line
103 |         for li in lis:
104 |             na = li[1]
105 |             curr_fold = int(li[2])
106 |             
107 |             if fold==curr_fold:
108 |                 # get features, tags
109 |                 fe_path_left = fe_fd_left + '/' + na + '.f'
110 |                 fe_path_right = fe_fd_right + '/' + na + '.f'
111 |                 fe_path_mean = fe_fd_mean + '/' + na + '.f'
112 |                 fe_path_diff = fe_fd_diff + '/' + na + '.f'
113 |                 fe_path_ipd = fe_fd_ipd + '/' + na + '.f'
114 |                 fe_path_ild = fe_fd_ild + '/' + na + '.f'
115 |                 #fe_path_ori = fe_fd_ori + '/' + na + '.f'
116 |                 info_path = cfg.dev_wav_fd + '/' + na + '.csv'
117 |                 #print na
118 |                 tags = pp_data.GetTags( info_path )
119 |                 #print tags
120 |                 y = pp_data.TagsToCategory( tags )
121 |                 #print y
122 |                 #sys.exit()
123 |                 #X_l = cPickle.load( open( fe_path_left, 'rb' ) )
124 |                 #X_r = cPickle.load( open( fe_path_right, 'rb' ) )
125 |                 X_m = cPickle.load( open( fe_path_mean, 'rb' ) )
126 |                 #X_d = cPickle.load( open( fe_path_diff, 'rb' ) )
127 |                 #X_ipd = cPickle.load( open( fe_path_ipd, 'rb' ) )
128 |                 X_ild = cPickle.load( open( fe_path_ild, 'rb' ) )
129 |                 #X_o = cPickle.load( open( fe_path_ori, 'rb' ) )
130 | 
131 |                 # aggregate data
132 |                 #X3d_l = mat_2d_to_3d( X_l, agg_num, hop )
133 |                 #X3d_r = mat_2d_to_3d( X_r, agg_num, hop )
134 |                 X3d_m = mat_2d_to_3d( X_m, agg_num, hop )
135 |    		#X3d_d = mat_2d_to_3d( X_d, agg_num, hop )
136 |                 #X3d_ipd = mat_2d_to_3d( X_ipd, agg_num, hop )
137 |    		X3d_ild = mat_2d_to_3d( X_ild, agg_num, hop )
138 |    		#X3d_o = mat_2d_to_3d( X_o, agg_num, hop )
139 |      	        ## reshape 3d to 4d
140 |        	        #X4d_l = reshape_3d_to_4d( X3d_l)
141 |                 #X4d_r = reshape_3d_to_4d( X3d_r)
142 |                 #X4d_m = reshape_3d_to_4d( X3d_m)
143 |                 #X4d_d = reshape_3d_to_4d( X3d_d)
144 |                 # concatenate
145 |                 #X4d=mat_concate_multiinmaps6in(X3d_l, X3d_r, X3d_m, X3d_d, X3d_ipd, X3d_ild)
146 |                 #print X3d_m.shape
147 |                 #X3d_m=reshapeX1(X3d_m)
148 |                 #X4d=np.swapaxes(X4d,1,2) # or np.transpose(x,(1,0,2))  1,0,2 is axis
149 |                 te_X1=X3d_m
150 |                 te_X2=X3d_ild
151 |                 te_X1 = reshapeX1(te_X1)
152 |                 te_X2 = reshapeX3(te_X2)
153 |                 
154 |                 if debug:
155 |                     # with a Sequential model
156 |                     #md.summary()
157 |                     print na
158 |                     get_3rd_layer_output = K.function([md.layers[0].input, K.learning_phase()], [md.layers[4].output])
159 |                     layer_output = get_3rd_layer_output([te_X1, 0])[0]
160 |                     print layer_output.shape
161 |                     #layer_output1=layer_output[5,:,:]
162 |                     layer_output1=layer_output[:,16,:]
163 |                     imgplot=plt.matshow((layer_output1.T))
164 |                     #imgplot.set_cmap('spectral')
165 |                     plt.colorbar()
166 |                     plt.show()
167 |                     sys.pause()
168 |                 
169 |                 p_y_pred = md.predict( [te_X1,te_X2] )
170 |                 #p_y_pred = md.predict( te_X1 )
171 |                 p_y_pred = np.mean( p_y_pred, axis=0 )     # shape:(n_label)
172 |                 pred = np.zeros(n_labels)
173 |                 pred[ np.where(p_y_pred>thres) ] = 1
174 |                 ind=0
175 |                 for la in cfg.labels:
176 |                     if la=='S':
177 |                         break
178 |                     elif la=='c':
179 |                         y_true_file_c.append(na)
180 |                         y_true_binary_c.append(y[ind])
181 |                     elif la=='m':
182 |                         y_true_file_m.append(na)
183 |                         y_true_binary_m.append(y[ind])
184 |                     elif la=='f':
185 |                         y_true_file_f.append(na)
186 |                         y_true_binary_f.append(y[ind])
187 |                     elif la=='v':
188 |                         y_true_file_v.append(na)
189 |                         y_true_binary_v.append(y[ind])
190 |                     elif la=='p':
191 |                         y_true_file_p.append(na)
192 |                         y_true_binary_p.append(y[ind])
193 |                     elif la=='b':
194 |                         y_true_file_b.append(na)
195 |                         y_true_binary_b.append(y[ind])
196 |                     elif la=='o':
197 |                         y_true_file_o.append(na)
198 |                         y_true_binary_o.append(y[ind])
199 |                     result=[na,la,p_y_pred[ind]]
200 |                     result_roll.append(result)
201 |                     ind=ind+1
202 |                 
203 |                 
204 |                 pred_roll.append( pred )
205 |                 gt_roll.append( y )
206 |     
207 |     pred_roll = np.array( pred_roll )
208 |     gt_roll = np.array( gt_roll )
209 |     #write csv for EER computation
210 |     csvfile=file('result.csv','wb')
211 |     writer=csv.writer(csvfile)
212 |     #writer.writerow(['fn','label','score'])
213 |     writer.writerows(result_roll)
214 |     csvfile.close()
215 |     
216 |     # calculate prec, recall, fvalue
217 |     prec, recall, fvalue = prec_recall_fvalue( pred_roll, gt_roll, thres )
218 |     # EER for each tag : [ 'c', 'm', 'f', 'v', 'p', 'b', 'o', 'S' ]
219 |     EER_c=eer.compute_eer('result.csv', 'c', dict(zip(y_true_file_c, y_true_binary_c)))
220 |     EER_m=eer.compute_eer('result.csv', 'm', dict(zip(y_true_file_m, y_true_binary_m)))
221 |     EER_f=eer.compute_eer('result.csv', 'f', dict(zip(y_true_file_f, y_true_binary_f)))
222 |     EER_v=eer.compute_eer('result.csv', 'v', dict(zip(y_true_file_v, y_true_binary_v)))
223 |     EER_p=eer.compute_eer('result.csv', 'p', dict(zip(y_true_file_p, y_true_binary_p)))
224 |     EER_b=eer.compute_eer('result.csv', 'b', dict(zip(y_true_file_b, y_true_binary_b)))
225 |     EER_o=eer.compute_eer('result.csv', 'o', dict(zip(y_true_file_o, y_true_binary_o)))
226 |     EER=(EER_c+EER_m+EER_v+EER_p+EER_f+EER_b+EER_o)/7.0
227 |     print prec, recall, fvalue
228 |     print EER_c,EER_m,EER_f,EER_v,EER_p,EER_b,EER_o
229 |     print EER
230 | 
231 | if __name__ == '__main__':
232 |     recognize()
233 | 


--------------------------------------------------------------------------------
/prepare_data_2ch_raw_ipd_ild_easy_Spec.py:
--------------------------------------------------------------------------------
  1 |  
  2 | import sys
  3 | sys.path.append('../Hat')
  4 | from Hat.preprocessing import mat_2d_to_3d, reshape_3d_to_4d, mat_concate_multiinmaps6in
  5 | import numpy as np
  6 | from scipy import signal
  7 | import cPickle
  8 | import os
  9 | import sys
 10 | import matplotlib.pyplot as plt
 11 | from scipy import signal
 12 | import wavio
 13 | import librosa
 14 | import config_2ch_raw_spec_ipld as cfg
 15 | import csv
 16 | import scipy.stats
 17 | from sklearn import preprocessing
 18 | 
 19 | 
 20 | ### readwav
 21 | def readwav( path ):
 22 |     Struct = wavio.read( path )
 23 |     wav = Struct.data.astype(float) / np.power(2, Struct.sampwidth*8-1)
 24 |     fs = Struct.rate
 25 |     return wav, fs
 26 | 
 27 | ### def segment raw date
 28 | def segraw(x, nperseg, noverlap):
 29 |     step = nperseg - noverlap
 30 |     shape = x.shape[:-1]+((x.shape[-1]-noverlap)//step, nperseg)
 31 |     strides = x.strides[:-1]+(step*x.strides[-1], x.strides[-1])
 32 |     result = np.lib.stride_tricks.as_strided(x, shape=shape,strides=strides)
 33 |     return np.array(result)
 34 | 
 35 | # calculate mel feature
 36 | def GetMel( wav_fd, fe_fd_left, fe_fd_right, fe_fd_mean, fe_fd_diff, fe_fd_ipd, fe_fd_ild, n_delete ):
 37 |     names = [ na for na in os.listdir(wav_fd) if na.endswith('.16kHz2ch.wav') ]
 38 |     extlen=len('16kHz2ch.wav')+1
 39 |     #print extlen
 40 |     #sys.exit()
 41 |     names = sorted(names)
 42 |     for na in names:
 43 |         print na
 44 |         path = wav_fd + '/' + na
 45 |         wav, fs = readwav( path )
 46 |         #print wav.shape
 47 |         #print fs
 48 |         #sys.exit()
 49 |         if ( wav.ndim==2 ): 
 50 |             wav_m = np.mean( wav, axis=-1 ) # mean
 51 |             wav_l = wav[:,0]                # left
 52 |             wav_r = wav[:,1]                # right
 53 |             wav_d = wav_l-wav_r             # difference
 54 |             #wavio.write('wav_m.wav',wav_m,16000,sampwidth=2)
 55 |             #wavio.write('wav_l.wav',wav_l,16000,sampwidth=2)
 56 |             #wavio.write('wav_r.wav',wav_r,16000,sampwidth=2)
 57 |             #wavio.write('wav_d.wav',wav_d,16000,sampwidth=2)
 58 |         #print wav_m.shape, wav_d.shape, wav_l.shape, wav_r.shape
 59 |         #sys.exit()
 60 |         assert fs==cfg.fs
 61 |         ham_win = np.hamming(cfg.win)
 62 |         
 63 | 
 64 |         #X_m=segraw(wav_m, cfg.win, cfg.win/2)#overlap bigger, more training samples
 65 |         #X_r=segraw(wav_r, cfg.win, cfg.win/2)
 66 |         #X_d=segraw(wav_d, cfg.win, cfg.win/2)
 67 |         #X_l=segraw(wav_l, cfg.win, cfg.win/2)
 68 |         X_m=segraw(wav_m, cfg.win, 0)#overlap bigger, more training samples
 69 |         X_r=segraw(wav_r, cfg.win, 0)
 70 |         X_d=segraw(wav_d, cfg.win, 0)
 71 |         X_l=segraw(wav_l, cfg.win, 0)
 72 | 
 73 | 
 74 |         #[f_l, t_l, X_l] = signal.spectral.spectrogram( wav_l, window=ham_win, nperseg=cfg.win, noverlap=cfg.win/2, detrend=False, return_onesided=False, mode='magnitude' )
 75 |         [f_l, t_l, X_l] = signal.spectral.spectrogram( wav_l, window=ham_win, nperseg=cfg.win, noverlap=0, detrend=False, return_onesided=False, mode='magnitude' )
 76 |         X_l = X_l.T
 77 |         [f_r, t_r, X_r] = signal.spectral.spectrogram( wav_r, window=ham_win, nperseg=cfg.win, noverlap=0, detrend=False, return_onesided=False, mode='magnitude' )
 78 |         X_r = X_r.T
 79 |         
 80 |         #X_ild=20*np.log10(abs(np.divide(X_l,X_r)))
 81 |         X_ipd=np.angle(np.divide(X_l,X_r),deg=0)
 82 |         X_ild=X_l-X_r
 83 |         print X_ild.shape
 84 |         
 85 |         #print np.max(X_ild),np.min(X_ild)
 86 |         #system.exit()
 87 |         # DEBUG. print mel-spectrogram
 88 |         #plt.matshow(X_ild, origin='lower', aspect='auto')
 89 |         #plt.show()
 90 |         #pause
 91 |         
 92 |         out_path_left = fe_fd_left + '/' + na[0:-extlen] + '.f'
 93 |         out_path_right = fe_fd_right + '/' + na[0:-extlen] + '.f'
 94 |         out_path_mean = fe_fd_mean + '/' + na[0:-extlen] + '.f'
 95 |         out_path_ipd = fe_fd_ipd + '/' + na[0:-extlen] + '.f'
 96 |         out_path_ild = fe_fd_ild + '/' + na[0:-extlen] + '.f'
 97 |         out_path_diff = fe_fd_diff + '/' + na[0:-extlen] + '.f'
 98 |         cPickle.dump( X_l, open(out_path_left, 'wb'), protocol=cPickle.HIGHEST_PROTOCOL )
 99 |         cPickle.dump( X_r, open(out_path_right, 'wb'), protocol=cPickle.HIGHEST_PROTOCOL )
100 |         cPickle.dump( X_m, open(out_path_mean, 'wb'), protocol=cPickle.HIGHEST_PROTOCOL )
101 |         cPickle.dump( X_d, open(out_path_diff, 'wb'), protocol=cPickle.HIGHEST_PROTOCOL )
102 |         cPickle.dump( X_ipd, open(out_path_ipd, 'wb'), protocol=cPickle.HIGHEST_PROTOCOL )
103 |         cPickle.dump( X_ild, open(out_path_ild, 'wb'), protocol=cPickle.HIGHEST_PROTOCOL )
104 | 
105 | # calculate mel feature
106 | def GetMBK( wav_fd, fe_fd_left, fe_fd_right, fe_fd_mean, fe_fd_diff, fe_fd_ipd, fe_fd_ild, n_delete ):
107 |     names = [ na for na in os.listdir(wav_fd) if na.endswith('.16kHz2ch.wav') ]
108 |     extlen=len('16kHz2ch.wav')+1
109 |     #print extlen
110 |     #sys.exit()
111 |     names = sorted(names)
112 |     for na in names:
113 |         print na
114 |         path = wav_fd + '/' + na
115 |         wav, fs = readwav( path )
116 |         #print wav.shape
117 |         #print fs
118 |         #sys.exit()
119 |         if ( wav.ndim==2 ): 
120 |             wav_m = np.mean( wav, axis=-1 ) # mean
121 |             wav_l = wav[:,0]                # left
122 |             wav_r = wav[:,1]                # right
123 |             #wav_d = wav_l-wav_r             # difference
124 |             #wavio.write('wav_m.wav',wav_m,16000,sampwidth=2)
125 |             #wavio.write('wav_l.wav',wav_l,16000,sampwidth=2)
126 |             #wavio.write('wav_r.wav',wav_r,16000,sampwidth=2)
127 |             #wavio.write('wav_d.wav',wav_d,16000,sampwidth=2)
128 |         #print wav_m.shape, wav_d.shape, wav_l.shape, wav_r.shape
129 |         #sys.exit()
130 |         assert fs==cfg.fs
131 |         ham_win = np.hamming(cfg.win)
132 | 
133 |         [f_m, t_m, X_m] = signal.spectral.spectrogram( wav_m, window=ham_win, nperseg=cfg.win, noverlap=cfg.win/2, detrend=False, return_onesided=True, mode='magnitude' )
134 |         X_m = X_m.T
135 |         [f_l, t_l, X_l] = signal.spectral.spectrogram( wav_l, window=ham_win, nperseg=cfg.win, noverlap=cfg.win/2, detrend=False, return_onesided=False, mode='magnitude' )
136 |         X_l = X_l.T
137 |         [f_r, t_r, X_r] = signal.spectral.spectrogram( wav_r, window=ham_win, nperseg=cfg.win, noverlap=cfg.win/2, detrend=False, return_onesided=False, mode='magnitude' )
138 |         X_r = X_r.T
139 |         
140 |         #X_ild=20*np.log10(abs(np.divide(X_l,X_r)))
141 |         #X_ipd=np.angle(np.divide(X_l,X_r),deg=0)
142 |         X_ild=X_l-X_r
143 |         print X_ild.shape
144 | 
145 |         # define global melW, avoid init melW every time, to speed up. 
146 |         if globals().get('melW') is None:
147 |             global melW
148 |             melW = librosa.filters.mel( fs, n_fft=cfg.win, n_mels=40, fmin=0., fmax=8000 )
149 |             melW /= np.max(melW, axis=-1)[:,None]
150 |             
151 |         X_m = np.dot( X_m, melW.T )
152 |         print X_m.shape
153 | 
154 |         #print np.max(X_ild),np.min(X_ild)
155 |         #system.exit()
156 |         # DEBUG. print mel-spectrogram
157 |         #plt.matshow(X_ild, origin='lower', aspect='auto')
158 |         #plt.show()
159 |         #pause
160 |         
161 |         out_path_left = fe_fd_left + '/' + na[0:-extlen] + '.f'
162 |         out_path_right = fe_fd_right + '/' + na[0:-extlen] + '.f'
163 |         out_path_mean = fe_fd_mean + '/' + na[0:-extlen] + '.f'
164 |         out_path_ipd = fe_fd_ipd + '/' + na[0:-extlen] + '.f'
165 |         out_path_ild = fe_fd_ild + '/' + na[0:-extlen] + '.f'
166 |         out_path_diff = fe_fd_diff + '/' + na[0:-extlen] + '.f'
167 |         #cPickle.dump( X_l, open(out_path_left, 'wb'), protocol=cPickle.HIGHEST_PROTOCOL )
168 |         #cPickle.dump( X_r, open(out_path_right, 'wb'), protocol=cPickle.HIGHEST_PROTOCOL )
169 |         cPickle.dump( X_m, open(out_path_mean, 'wb'), protocol=cPickle.HIGHEST_PROTOCOL )
170 |         #cPickle.dump( X_d, open(out_path_diff, 'wb'), protocol=cPickle.HIGHEST_PROTOCOL )
171 |         #cPickle.dump( X_ipd, open(out_path_ipd, 'wb'), protocol=cPickle.HIGHEST_PROTOCOL )
172 |         cPickle.dump( X_ild, open(out_path_ild, 'wb'), protocol=cPickle.HIGHEST_PROTOCOL )
173 | 
174 | # calculate mel feature
175 | def GetSpec( wav_fd, fe_fd_left, fe_fd_right, fe_fd_mean, fe_fd_diff, fe_fd_ipd, fe_fd_ild, n_delete ):
176 |     names = [ na for na in os.listdir(wav_fd) if na.endswith('.16kHz2ch.wav') ]
177 |     extlen=len('16kHz2ch.wav')+1
178 |     #print extlen
179 |     #sys.exit()
180 |     num=0
181 |     names = sorted(names)
182 |     for na in names:
183 |         num=num+1
184 |         print na
185 |         path = wav_fd + '/' + na
186 |         wav, fs = readwav( path )
187 |         #print wav.shape
188 |         #print fs
189 |         #sys.exit()
190 |         if ( wav.ndim==2 ): 
191 |             wav_m = np.mean( wav, axis=-1 ) # mean
192 |             wav_l = wav[:,0]                # left
193 |             wav_r = wav[:,1]                # right
194 |             wav_d = wav_l-wav_r             # difference
195 |             #wavio.write('wav_m.wav',wav_m,16000,sampwidth=2)
196 |             #wavio.write('wav_l.wav',wav_l,16000,sampwidth=2)
197 |             #wavio.write('wav_r.wav',wav_r,16000,sampwidth=2)
198 |             #wavio.write('wav_d.wav',wav_d,16000,sampwidth=2)
199 |         #print wav_m.shape, wav_d.shape, wav_l.shape, wav_r.shape
200 |         #sys.exit()
201 |         assert fs==cfg.fs
202 |         ham_win = np.hamming(cfg.win)
203 | 
204 |         [f_m, t_m, X_m] = signal.spectral.spectrogram( wav_m, window=ham_win, nperseg=cfg.win, noverlap=cfg.win/2, detrend=False, return_onesided=True, mode='magnitude' )
205 |         X_m = X_m.T
206 | 
207 | 
208 |         [f_d, t_d, X_d] = signal.spectral.spectrogram( wav_d, window=ham_win, nperseg=cfg.win, noverlap=cfg.win/2, detrend=False, return_onesided=True, mode='magnitude' )
209 |         X_d = X_d.T
210 | 
211 |         [f_l, t_l, X_l] = signal.spectral.spectrogram( wav_l, window=ham_win, nperseg=cfg.win, noverlap=cfg.win/2, detrend=False, return_onesided=True, mode='magnitude' )
212 |         X_l = X_l.T
213 |         [f_r, t_r, X_r] = signal.spectral.spectrogram( wav_r, window=ham_win, nperseg=cfg.win, noverlap=cfg.win/2, detrend=False, return_onesided=True, mode='magnitude' )
214 |         X_r = X_r.T
215 | 
216 |         [f_l1, t_l1, X_l1] = signal.spectral.spectrogram( wav_l, window=ham_win, nperseg=cfg.win, noverlap=cfg.win/2, detrend=False, return_onesided=True, mode='complex' )
217 |         X_l1 = X_l1.T
218 |         [f_r1, t_r1, X_r1] = signal.spectral.spectrogram( wav_r, window=ham_win, nperseg=cfg.win, noverlap=cfg.win/2, detrend=False, return_onesided=True, mode='complex' )
219 |         X_r1 = X_r1.T
220 | 
221 |         #X_m = librosa.core.stft( wav_m, n_fft=512, hop_length=cfg.win/2, win_length=512, window=ham_win, center=True)
222 |         #X_m = X_m.T
223 |         #X_m=np.abs(X_m)
224 |         #X_l = librosa.core.stft( wav_l, n_fft=512, hop_length=cfg.win/2, win_length=512, window=ham_win, center=True)
225 |         #X_l = X_l.T
226 |         #X_r = librosa.core.stft( wav_r, n_fft=512, hop_length=cfg.win/2, win_length=512, window=ham_win, center=True)
227 |         #X_r = X_r.T
228 |         
229 |         X_ild1=20*np.log10(abs(np.divide((X_l1),(X_r1))))
230 |         #X_ild1=20*np.log10(((abs(X_l1)/abs(X_r1))))
231 |         #X_ild1=20*np.log10(abs((X_l1/X_r1)))
232 |         X_ipd1=np.angle(np.divide(X_l1,X_r1),deg=0)
233 |         #X_ipd=np.angle((X_l/X_r),deg=0)
234 |         X_ild=X_l-X_r
235 |         print X_ild.shape
236 | 
237 |         #print np.max(X_ild),np.min(X_ild)
238 |         #system.exit()
239 |         # DEBUG. print mel-spectrogram
240 |         print X_ild
241 |         va=1
242 |         if num == 3:
243 |             fig=plt.figure()
244 |             ax=fig.add_subplot(4,1,1)
245 |             ax.matshow(X_m.T, origin='lower', aspect='auto', vmin=-va,vmax=va)# cmap='gray_r'
246 |             plt.xlabel('Frames')
247 |             plt.ylabel('Frequency bins')
248 |             plt.title('Spectrogram',fontweight='bold',loc='right')
249 |             ax=fig.add_subplot(4,1,2)
250 |             ax.matshow(X_ild.T, origin='lower', aspect='auto', vmin=-va,vmax=va)
251 |             plt.xlabel('Frames')
252 |             plt.ylabel('Frequency bins')
253 |             plt.title('IMD',fontweight='bold',loc='right')
254 |             ax=fig.add_subplot(4,1,3)
255 |             ax.matshow(X_d.T, origin='lower', aspect='auto', vmin=-va,vmax=va)
256 |             plt.xlabel('Frames')
257 |             plt.ylabel('Frequency bins')
258 |             plt.title('ILD',fontweight='bold',loc='right')
259 |             ax=fig.add_subplot(4,1,4)
260 |             ax.matshow(X_ild1.T, origin='lower', aspect='auto', vmin=-va,vmax=va)
261 |             plt.xlabel('Frames')
262 |             plt.ylabel('Frequency bins')
263 |             plt.title('IPD',fontweight='bold',loc='right')
264 |             plt.show()
265 |             pause
266 |         
267 |         out_path_left = fe_fd_left + '/' + na[0:-extlen] + '.f'
268 |         out_path_right = fe_fd_right + '/' + na[0:-extlen] + '.f'
269 |         out_path_mean = fe_fd_mean + '/' + na[0:-extlen] + '.f'
270 |         out_path_ipd = fe_fd_ipd + '/' + na[0:-extlen] + '.f'
271 |         out_path_ild = fe_fd_ild + '/' + na[0:-extlen] + '.f'
272 |         out_path_diff = fe_fd_diff + '/' + na[0:-extlen] + '.f'
273 |         #cPickle.dump( X_l, open(out_path_left, 'wb'), protocol=cPickle.HIGHEST_PROTOCOL )
274 |         #cPickle.dump( X_r, open(out_path_right, 'wb'), protocol=cPickle.HIGHEST_PROTOCOL )
275 |         #cPickle.dump( X_m, open(out_path_mean, 'wb'), protocol=cPickle.HIGHEST_PROTOCOL )
276 |         #cPickle.dump( X_d, open(out_path_diff, 'wb'), protocol=cPickle.HIGHEST_PROTOCOL )
277 |         #cPickle.dump( X_ipd, open(out_path_ipd, 'wb'), protocol=cPickle.HIGHEST_PROTOCOL )
278 |         #cPickle.dump( X_ild, open(out_path_ild, 'wb'), protocol=cPickle.HIGHEST_PROTOCOL )
279 |           
280 | ### format label
281 | # get tags
282 | def GetTags( info_path ):
283 |     with open( info_path, 'rb') as f:
284 |         reader = csv.reader(f)
285 |         lis = list(reader)
286 |     tags = lis[-2][1]
287 |     return tags
288 |             
289 | # tags to categorical, shape: (n_labels)
290 | def TagsToCategory( tags ):
291 |     y = np.zeros( len(cfg.labels) )
292 |     for ch in tags:
293 |         y[ cfg.lb_to_id[ch] ] = 1
294 |     return y
295 | 
296 | # get chunk data, size: N*agg_num*n_in
297 | def GetAllData( fe_fd_right, fe_fd_left, fe_fd_mean, fe_fd_diff, fe_fd_ipd, fe_fd_ild, agg_num, hop, fold ):
298 |     with open( cfg.dev_cv_csv_path, 'rb') as f:
299 |         reader = csv.reader(f)
300 |         lis = list(reader)
301 |         
302 |     tr_Xlist, tr_ylist = [], []
303 |     te_Xlist, te_ylist = [], []
304 |         
305 |     # read one line
306 |     for li in lis:
307 |         na = li[1]
308 |         curr_fold = int(li[2])
309 |         
310 |         # get features, tags
311 |         fe_path_left = fe_fd_left + '/' + na + '.f'
312 |         fe_path_right = fe_fd_right + '/' + na + '.f'
313 |         fe_path_mean = fe_fd_mean + '/' + na + '.f'
314 |         fe_path_diff = fe_fd_diff + '/' + na + '.f'
315 |         fe_path_ipd = fe_fd_ipd + '/' + na + '.f'
316 |         fe_path_ild = fe_fd_ild + '/' + na + '.f'
317 |         #fe_path_ori = fe_fd_ori + '/' + na + '.f'
318 |         info_path = cfg.dev_wav_fd + '/' + na + '.csv'
319 |         tags = GetTags( info_path )
320 |         #print info_path
321 |         y = TagsToCategory( tags )
322 |         X_l = cPickle.load( open( fe_path_left, 'rb' ) )
323 |         X_r = cPickle.load( open( fe_path_right, 'rb' ) )
324 |         X_m = cPickle.load( open( fe_path_mean, 'rb' ) )
325 |         X_d = cPickle.load( open( fe_path_diff, 'rb' ) )
326 |         X_ipd = cPickle.load( open( fe_path_ipd, 'rb' ) )
327 |         X_ild = cPickle.load( open( fe_path_ild, 'rb' ) )
328 |         #X_o = cPickle.load( open( fe_path_ori, 'rb' ) )
329 |         
330 |         # aggregate data
331 |         #print X_l.shape #(nframe=125,ndim=257)
332 |         X3d_l = mat_2d_to_3d( X_l, agg_num, hop )
333 |         #print X3d_l.shape # (nsampelPERutt=10,contextfr=33,ndim=257)
334 |         X3d_r = mat_2d_to_3d( X_r, agg_num, hop )
335 |         X3d_m = mat_2d_to_3d( X_m, agg_num, hop )
336 |         X3d_d = mat_2d_to_3d( X_d, agg_num, hop )
337 |         X3d_ipd = mat_2d_to_3d( X_ipd, agg_num, hop )
338 |         X3d_ild = mat_2d_to_3d( X_ild, agg_num, hop )
339 |         #X3d_o = mat_2d_to_3d( X_o, agg_num, hop )
340 |         # reshape 3d to 4d
341 |         #X4d_l = reshape_3d_to_4d( X3d_l)
342 |         #X4d_r = reshape_3d_to_4d( X3d_r)
343 |         #X4d_m = reshape_3d_to_4d( X3d_m)
344 |         #X4d_d = reshape_3d_to_4d( X3d_d)
345 |         # concatenate
346 |         X4d=mat_concate_multiinmaps6in(X3d_l, X3d_r, X3d_m, X3d_d, X3d_ipd, X3d_ild)
347 |         #print X4d.shape      
348 |         #sys.exit()       
349 |         
350 |         if curr_fold==fold:
351 |             te_Xlist.append( X4d )
352 |             te_ylist += [ y ] * len( X4d )
353 |         else:
354 |             tr_Xlist.append( X4d )
355 |             tr_ylist += [ y ] * len( X4d )
356 | 
357 |     return np.concatenate( tr_Xlist, axis=0 ), np.array( tr_ylist ),\
358 |            np.concatenate( te_Xlist, axis=0 ), np.array( te_ylist )
359 | 
360 | 
361 | # get chunk data, size: N*agg_num*n_in
362 | def GetAllData_separate( fe_fd_right, fe_fd_left, fe_fd_mean, fe_fd_diff, fe_fd_ipd, fe_fd_ild, agg_num, hop, fold ):
363 |     with open( cfg.dev_cv_csv_path, 'rb') as f:
364 |         reader = csv.reader(f)
365 |         lis = list(reader)
366 |         
367 |     tr_Xlist1, tr_Xlist2, tr_ylist = [], [], []
368 |     te_Xlist1, te_Xlist2, te_ylist = [], [], []
369 |         
370 |     # read one line
371 |     for li in lis:
372 |         na = li[1]
373 |         curr_fold = int(li[2])
374 |         
375 |         # get features, tags
376 |         #fe_path_left = fe_fd_left + '/' + na + '.f'
377 |         #fe_path_right = fe_fd_right + '/' + na + '.f'
378 |         fe_path_mean = fe_fd_mean + '/' + na + '.f'
379 |         #fe_path_diff = fe_fd_diff + '/' + na + '.f'
380 |         #fe_path_ipd = fe_fd_ipd + '/' + na + '.f'
381 |         fe_path_ild = fe_fd_ild + '/' + na + '.f'
382 |         #fe_path_ori = fe_fd_ori + '/' + na + '.f'
383 |         info_path = cfg.dev_wav_fd + '/' + na + '.csv'
384 |         tags = GetTags( info_path )
385 |         #print info_path
386 |         y = TagsToCategory( tags )
387 |         #X_l = cPickle.load( open( fe_path_left, 'rb' ) )
388 |         #X_r = cPickle.load( open( fe_path_right, 'rb' ) )
389 |         X_m = cPickle.load( open( fe_path_mean, 'rb' ) )
390 |         #X_d = cPickle.load( open( fe_path_diff, 'rb' ) )
391 |         #X_ipd = cPickle.load( open( fe_path_ipd, 'rb' ) )
392 |         X_ild = cPickle.load( open( fe_path_ild, 'rb' ) )
393 |         #X_o = cPickle.load( open( fe_path_ori, 'rb' ) )
394 |         
395 |         # aggregate data
396 |         #print X_l.shape #(nframe=125,ndim=257)
397 |         #X3d_l = mat_2d_to_3d( X_l, agg_num, hop )
398 |         #print X3d_l.shape # (nsampelPERutt=10,contextfr=33,ndim=257)
399 |         #X3d_r = mat_2d_to_3d( X_r, agg_num, hop )
400 |         X3d_m = mat_2d_to_3d( X_m, agg_num, hop )
401 |         #X3d_d = mat_2d_to_3d( X_d, agg_num, hop )
402 |         #X3d_ipd = mat_2d_to_3d( X_ipd, agg_num, hop )
403 |         X3d_ild = mat_2d_to_3d( X_ild, agg_num, hop )
404 |         #X3d_o = mat_2d_to_3d( X_o, agg_num, hop )
405 |         # reshape 3d to 4d
406 |         #X4d_l = reshape_3d_to_4d( X3d_l)
407 |         #X4d_r = reshape_3d_to_4d( X3d_r)
408 |         #X4d_m = reshape_3d_to_4d( X3d_m)
409 |         #X4d_d = reshape_3d_to_4d( X3d_d)
410 |         # concatenate
411 |         #X4d=mat_concate_multiinmaps6in(X3d_l, X3d_r, X3d_m, X3d_d, X3d_ipd, X3d_ild)
412 |         #print X4d.shape      
413 |         #sys.exit()       
414 |         
415 |         if curr_fold==fold:
416 |             te_Xlist1.append( X3d_m )
417 |             te_Xlist2.append( X3d_ild )
418 |             te_ylist += [ y ] * len( X3d_m )
419 |         else:
420 |             tr_Xlist1.append( X3d_m )
421 |             tr_Xlist2.append( X3d_ild )
422 |             tr_ylist += [ y ] * len( X3d_m )
423 | 
424 |     return np.concatenate( tr_Xlist1, axis=0 ), np.concatenate( tr_Xlist2, axis=0 ), np.array( tr_ylist ),\
425 |            np.concatenate( te_Xlist1, axis=0 ), np.concatenate( te_Xlist2, axis=0 ), np.array( te_ylist )
426 |     
427 | # size: n_songs*n_chunks*agg_num*n_in
428 | def GetSegData( fe_fd, agg_num, hop, fold ):
429 |     with open( cfg.dev_cv_csv_path, 'rb') as f:
430 |         reader = csv.reader(f)
431 |         lis = list(reader)
432 |         
433 |     tr_Xlist, tr_ylist = [], []
434 |     te_Xlist, te_ylist = [], []
435 |         
436 |     # read one line
437 |     for li in lis:
438 |         na = li[1]
439 |         curr_fold = int(li[2])
440 |         
441 |         # get features, tags
442 |         fe_path = fe_fd + '/' + na + '.f'
443 |         info_path = cfg.dev_wav_fd + '/' + na + '.csv'
444 |         tags = GetTags( info_path )
445 |         y = TagsToCategory( tags )
446 |         X = cPickle.load( open( fe_path, 'rb' ) )
447 |         
448 |         # aggregate data
449 |         X3d = mat_2d_to_3d( X, agg_num, hop )    
450 |         
451 |         if curr_fold==fold:
452 |             te_Xlist.append( X3d )
453 |             te_ylist += [ y ]
454 |         else:
455 |             tr_Xlist.append( X3d )
456 |             tr_ylist += [ y ]
457 | 
458 |     return np.array( tr_Xlist ), np.array( tr_ylist ), \
459 |            np.array( te_Xlist ), np.array( te_ylist )
460 |            
461 | def GetScaler( fe_fd, fold ):
462 |     with open( cfg.dev_cv_csv_path, 'rb') as f:
463 |         reader = csv.reader(f)
464 |         lis = list(reader)
465 |         
466 |     tr_Xlist = []
467 |         
468 |     # read one line
469 |     for li in lis:
470 |         na = li[1]
471 |         curr_fold = int(li[2])
472 |         
473 |         # get features, tags
474 |         fe_path = fe_fd + '/' + na + '.f'
475 |         X = cPickle.load( open( fe_path, 'rb' ) )
476 |         if curr_fold!=fold:
477 |             tr_Xlist.append( X )
478 |             
479 |     Xall = np.concatenate( tr_Xlist, axis=0 )
480 |     scaler = preprocessing.StandardScaler( with_mean=True, with_std=True ).fit( Xall )
481 | 
482 |     return scaler
483 |            
484 | def GetScalerSegData( fe_fd, agg_num, hop, fold, scaler ):
485 |     with open( cfg.dev_cv_csv_path, 'rb') as f:
486 |         reader = csv.reader(f)
487 |         lis = list(reader)
488 |         
489 |     tr_Xlist, tr_ylist = [], []
490 |     te_Xlist, te_ylist = [], []
491 |         
492 |     # read one line
493 |     for li in lis:
494 |         na = li[1]
495 |         curr_fold = int(li[2])
496 |         
497 |         # get features, tags
498 |         fe_path = fe_fd + '/' + na + '.f'
499 |         info_path = cfg.dev_wav_fd + '/' + na + '.csv'
500 |         tags = GetTags( info_path )
501 |         y = TagsToCategory( tags )
502 |         X = cPickle.load( open( fe_path, 'rb' ) )
503 |         if scaler is not None:
504 |             X = scaler.transform( X )
505 |         
506 |         # aggregate data
507 |         X3d = mat_2d_to_3d( X, agg_num, hop )
508 |         
509 |         
510 |         if curr_fold==fold:
511 |             te_Xlist.append( X3d )
512 |             te_ylist += [ y ]
513 |         else:
514 |             tr_Xlist.append( X3d )
515 |             tr_ylist += [ y ]
516 | 
517 |     return np.array( tr_Xlist ), np.array( tr_ylist ), \
518 |            np.array( te_Xlist ), np.array( te_ylist )
519 |     
520 | ###
521 | # create an empty folder
522 | def CreateFolder( fd ):
523 |     if not os.path.exists(fd):
524 |         os.makedirs(fd)
525 |             
526 | if __name__ == "__main__":
527 |     CreateFolder( cfg.scrap_fd + '/Fe' )
528 |     CreateFolder( cfg.scrap_fd + '/Fe/Mel_l' )
529 |     CreateFolder( cfg.scrap_fd + '/Fe/Mel_r' )
530 |     CreateFolder( cfg.scrap_fd + '/Fe/Mel_m' )
531 |     CreateFolder( cfg.scrap_fd + '/Fe/Mel_d' )
532 |     CreateFolder( cfg.scrap_fd + '/Fe/Mel_ild' )
533 |     CreateFolder( cfg.scrap_fd + '/Fe/Mel_ipd' )
534 |     CreateFolder( cfg.scrap_fd + '/Results' )
535 |     CreateFolder( cfg.scrap_fd + '/Md' )
536 |     GetSpec( cfg.dev_wav_fd, cfg.dev_fe_mel_fd_left, cfg.dev_fe_mel_fd_right, cfg.dev_fe_mel_fd_mean, cfg.dev_fe_mel_fd_diff, cfg.dev_fe_mel_fd_ipd, cfg.dev_fe_mel_fd_ild, n_delete=0 )
537 | 


--------------------------------------------------------------------------------