├── .coveragerc.yml ├── .travis.yml ├── DNN ├── run_DNN.py └── utils.py ├── Data ├── 48_39.map ├── 48_idx_chr.map ├── RNN_testprob.npy ├── RNN_trainprob.npy ├── test.data ├── train.data ├── train.label ├── ytest_prob.npy └── ytrain_prob.npy ├── HMM_topRNN ├── HMM_utils.py └── run_HMM.py ├── README.md ├── RNN_LSTM ├── LSTM_utils.py ├── RNN_utils.py ├── activation.py ├── optimize.py ├── run_LSTM.py ├── run_RNN.py └── shortcuts.py ├── requirements.txt └── tests ├── __init__.py └── test_run.py /.coveragerc.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AaronYALai/Machine_Learning_and_Having_It_Deep_and_Structured/a9cde55cc3a6142eeb00f0faa0413908ffd4a1f3/.coveragerc.yml -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | cache: pip 3 | sudo: required 4 | 5 | 6 | python: 7 | - "3.5" 8 | 9 | 10 | before_install: 11 | - pip install -U pip 12 | - pip install wheel 13 | - pip install coveralls 14 | - sudo apt-get update 15 | 16 | 17 | env: 18 | global: 19 | - PIP_WHEEL_DIR=$HOME/.cache/pip/wheels 20 | - PIP_FIND_LINKS=file://$HOME/.cache/pip/wheels 21 | - THEANO_FLAGS='gcc.cxxflags="-march=core2",floatX=float32' 22 | 23 | 24 | install: 25 | - pip wheel -r requirements.txt 26 | - pip install -r requirements.txt 27 | 28 | 29 | script: 30 | - py.test . --cov=./ 31 | - flake8 ./ 32 | 33 | 34 | after_success: 35 | - coveralls 36 | -------------------------------------------------------------------------------- /DNN/run_DNN.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # @Author: aaronlai 3 | # @Date: 2016-10-11 18:46:54 4 | # @Last Modified by: AaronLai 5 | # @Last Modified time: 2016-11-06 23:04:16 6 | # flag: THEANO_FLAGS='floatX=float32' 7 | 8 | import numpy as np 9 | import pandas as pd 10 | import theano as th 11 | import theano.tensor as T 12 | import gc 13 | import os 14 | import sys 15 | 16 | sys.path.append(os.path.dirname(os.path.realpath(__file__))) # noqa 17 | 18 | from datetime import datetime 19 | from utils import load_data, load_label, initialize_NNet, maxout, \ 20 | softmax, update, gen_y_hat, accuracy 21 | 22 | 23 | def construct_DNN(n_input, n_output, n_hid_layers=2, archi=128, 24 | lr=1e-3, batchsize=40, dropout_rate=0.2, moment=0.95): 25 | """ 26 | Initialize and construct the deep neural netweok with dropout 27 | update the DNN using momentum and minibatch 28 | archi: number of neurons of each hidden layer 29 | """ 30 | # decide dropout or not, no dropout: stop_dropout > 1.05 31 | x = T.fmatrix() 32 | y_hat = T.fmatrix() 33 | stop_dropout = T.scalar() 34 | 35 | # initialize parameters 36 | Ws, bs, cache_Ws, cache_bs = initialize_NNet(n_input, n_output, 37 | archi, n_hid_layers) 38 | 39 | # ############ construct the neural network ############### 40 | Zs = [] 41 | As = [] 42 | 43 | # input layer 44 | Zs.append(T.dot(x, Ws[0]) + bs[0].dimshuffle('x', 0)) 45 | As.append(maxout(Zs[0], stop_dropout, archi, dropout_rate) / stop_dropout) 46 | 47 | # hidden layers 48 | for i in range(n_hid_layers): 49 | Zs.append(T.dot(As[i], Ws[i + 1]) + bs[i + 1].dimshuffle('x', 0)) 50 | act_out = maxout(Zs[i + 1], stop_dropout, archi, dropout_rate) 51 | As.append(act_out / stop_dropout) 52 | 53 | # output layer 54 | z_out = T.dot(As[n_hid_layers], Ws[n_hid_layers + 1]) 55 | Zs.append(z_out + bs[n_hid_layers + 1].dimshuffle('x', 0)) 56 | y = softmax(Zs[-1] / stop_dropout) 57 | 58 | # ############ construct the neural network ############### 59 | 60 | forward = th.function([x, stop_dropout], y) 61 | parameters = Ws + bs 62 | moment_cache = cache_Ws + cache_bs 63 | 64 | # objective is the binary crossentropy 65 | Cost = ((-T.log((y * y_hat).sum(axis=1))).sum()) / batchsize 66 | 67 | # calculate gradients 68 | grads = T.grad(Cost, parameters, disconnected_inputs='ignore') 69 | 70 | # update parameters using momentum 71 | update_func = update(parameters, grads, moment_cache, lr, moment) 72 | gradient_update = th.function(inputs=[x, y_hat, stop_dropout], 73 | updates=update_func, outputs=Cost) 74 | 75 | return gradient_update, forward 76 | 77 | 78 | def train_model(N, epoch, batchsize, gradient_update, feed_forward, 79 | data, label_data, n_output, dropout_rate): 80 | """train the deep neural network""" 81 | train_start = datetime.now() 82 | obj_history = [] 83 | valid_accu = [] 84 | cache = {} 85 | 86 | for j in range(epoch): 87 | indexes = np.random.permutation(N - 8) 88 | objective = 0 89 | 90 | # train the model 91 | for i in range(int(N / batchsize)): 92 | if i % 1000 == 0: 93 | gc.collect() 94 | 95 | # make the minibatch data 96 | use_inds = indexes[i * batchsize:(i + 1) * batchsize] + 4 97 | batch_X = [] 98 | 99 | for ind in use_inds: 100 | if ind < 4: 101 | sils = np.zeros((4 - ind) * data.shape[1]) 102 | dat = data.iloc[:(ind + 5)].values.ravel() 103 | batch_X.append(np.concatenate((sils, dat))) 104 | 105 | elif ind > (N - 5): 106 | dat = data.iloc[(ind - 4):].values.ravel() 107 | sils = np.zeros((5 - N + ind) * data.shape[1]) 108 | batch_X.append(np.concatenate((dat, sils))) 109 | 110 | else: 111 | dat = data.iloc[(ind - 4):(ind + 5)].values.ravel() 112 | batch_X.append(dat) 113 | 114 | batch_Y = [gen_y_hat(ind, n_output, data, label_data, cache) 115 | for ind in use_inds] 116 | # update the model 117 | objective += gradient_update(batch_X, batch_Y, 1) 118 | 119 | obj_history.append(objective / int(N / batchsize)) 120 | print('\tepoch: %d; obj: %.4f' % (j + 1, obj_history[-1])) 121 | 122 | # validation set 123 | valid_accu.append(accuracy(N, data.shape[0], data, feed_forward, 124 | n_output, label_data, cache, dropout_rate)) 125 | 126 | print("\tCost: %.4f; valid accu: %.2f %%, %.4f seconds used.\n" % 127 | (obj_history[-1], 100 * valid_accu[-1], 128 | (datetime.now() - train_start).total_seconds())) 129 | # early stop 130 | if (valid_accu[0] != valid_accu[-1]): 131 | if valid_accu[-2] * 0.98 > valid_accu[-1]: 132 | print("Validation accuracy starts decreasing, stop training") 133 | break 134 | 135 | return obj_history, valid_accu, cache 136 | 137 | 138 | def test_predict(test_file, label_map, forward, base_dir, dropout_rate, 139 | save_prob=False, filename='test_predict.csv'): 140 | """predict on test set and output the file""" 141 | print("Start predicting...") 142 | 143 | test_data = load_data(test_file) 144 | test_X = [] 145 | test_N = len(test_data) 146 | # generate test input data 147 | for i in range(test_N): 148 | if i < 4: 149 | sils = np.zeros((4 - i) * test_data.shape[1]) 150 | dat = test_data.iloc[:(i + 5)].values.ravel() 151 | test_X.append(np.concatenate((sils, dat))) 152 | 153 | elif i > (test_N - 5): 154 | dat = test_data.iloc[(i - 4):].values.ravel() 155 | sils = np.zeros((5 - test_N + i) * test_data.shape[1]) 156 | test_X.append(np.concatenate((dat, sils))) 157 | 158 | else: 159 | test_X.append(test_data.iloc[(i - 4):(i + 5)].values.ravel()) 160 | 161 | y_test_pred = forward(test_X, np.float32(1 / (1 - dropout_rate))) 162 | 163 | if save_prob: 164 | np.save('ytest_prob', y_test_pred) 165 | 166 | # find the mapping from int to phoneme 167 | phoneme_map = {} 168 | pmap = pd.read_csv(base_dir + '48_39.map', sep='\t', header=None) 169 | for p1, p2 in pmap.values: 170 | phoneme_map[p1] = p2 171 | 172 | int_phoneme_map = {} 173 | for key, val in label_map.items(): 174 | int_phoneme_map[val] = phoneme_map[key] 175 | 176 | test_phon = [int_phoneme_map[np.argmax(y_vec)] for y_vec in y_test_pred] 177 | data = {'Prediction': test_phon, 'Id': test_data.index.values} 178 | test_df = pd.DataFrame(data=data) 179 | test_df.to_csv(filename, index=None) 180 | 181 | 182 | def run_model(train_file, train_labfile, test_file=None, valid_ratio=0.1, 183 | batchsize=240, epoch=10, neurons=36, n_hiddenlayer=2, lr=1e-2, 184 | base_dir='../Data/', save_prob=False, dropout_rate=0.2): 185 | """Run the deep neural network with droput""" 186 | print("Start") 187 | st = datetime.now() 188 | 189 | data = load_data(base_dir + train_file) 190 | label_data, label_map = load_label(base_dir + train_labfile) 191 | 192 | # window size = 9, output = 48 phonemes 193 | n_input = data.shape[1] * 9 194 | n_output = 48 195 | N = int(data.shape[0] * (1 - valid_ratio)) 196 | 197 | print("Done loading data. Start constructing the model...") 198 | functions = construct_DNN(n_input, n_output, archi=neurons, 199 | n_hid_layers=n_hiddenlayer, lr=lr, 200 | dropout_rate=dropout_rate) 201 | gradient_update, feed_forward = functions 202 | 203 | print("Finish constructing the model. Start Training...") 204 | result = train_model(N, epoch, batchsize, gradient_update, 205 | feed_forward, data, label_data, n_output, 206 | dropout_rate) 207 | obj_history, valid_accu, cache = result 208 | 209 | # train accuracy 210 | train_accu = accuracy(0, N, data, feed_forward, n_output, 211 | label_data, cache, dropout_rate) 212 | print("Training Accuracy: %.4f %%" % (100 * train_accu)) 213 | 214 | # validation 215 | valid_accu = accuracy(N, data.shape[0], data, feed_forward, 216 | n_output, label_data, cache, dropout_rate) 217 | print("Validation Accuracy: %.4f %%" % (100 * valid_accu)) 218 | 219 | if save_prob: 220 | accuracy(0, data.shape[0], data, feed_forward, n_output, 221 | label_data, cache, dropout_rate, 222 | save_pred=True, save_name='ytrain_prob') 223 | 224 | if test_file: 225 | test_predict(base_dir + test_file, label_map, feed_forward, 226 | base_dir, dropout_rate, save_prob=save_prob) 227 | 228 | print("Done, Using %s." % str(datetime.now() - st)) 229 | 230 | 231 | def main(): 232 | run_model('train.data', 'train.label', 'test.data', 233 | neurons=256, n_hiddenlayer=2, save_prob=True) 234 | 235 | 236 | if __name__ == '__main__': 237 | main() 238 | -------------------------------------------------------------------------------- /DNN/utils.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # @Author: aaronlai 3 | # @Date: 2016-10-12 16:25:45 4 | # @Last Modified by: AaronLai 5 | # @Last Modified time: 2016-11-06 18:39:14 6 | 7 | import numpy as np 8 | import pandas as pd 9 | import theano as th 10 | import theano.tensor as T 11 | import gc 12 | 13 | from theano.ifelse import ifelse 14 | from theano.tensor.shared_randomstreams import RandomStreams 15 | 16 | 17 | def load_data(filename, nrows=None, normalize=True): 18 | """load data from file, first column as index, dtype=float32""" 19 | ind = pd.read_csv(filename, sep=' ', header=None, index_col=0, nrows=5) 20 | dtype_dict = {c: np.float32 for c in ind.columns} 21 | data = pd.read_csv(filename, sep=' ', header=None, index_col=0, 22 | dtype=dtype_dict, nrows=nrows) 23 | # normalize 24 | if normalize: 25 | data = (data - data.mean()) / data.std() 26 | gc.collect() 27 | 28 | return data 29 | 30 | 31 | def load_label(filename): 32 | label_data = pd.read_csv(filename, header=None, index_col=0) 33 | label_map = {} 34 | for ind, lab in enumerate(np.unique(label_data.values)): 35 | label_map[lab] = ind 36 | 37 | label_data = label_data.applymap(lambda x: label_map[x]) 38 | gc.collect() 39 | 40 | return label_data, label_map 41 | 42 | 43 | def random_number(shape, scale=1): 44 | return (scale * np.random.randn(*shape)).astype('float32') 45 | 46 | 47 | def zero_number(shape): 48 | return np.zeros(shape).astype('float32') 49 | 50 | 51 | def initialize_NNet(n_input, n_output, archi=128, 52 | n_hid_layers=3, scale=0.033): 53 | """initialize the NNet paramters, archi: hidden layer neurons""" 54 | Ws = [] 55 | bs = [] 56 | cache_Ws = [] 57 | cache_bs = [] 58 | 59 | # input layer 60 | Ws.append(th.shared(random_number([n_input, archi], scale=scale))) 61 | cache_Ws.append(th.shared(zero_number((n_input, archi)))) 62 | 63 | bs.append(th.shared(random_number([archi], scale=scale))) 64 | cache_bs.append(th.shared(zero_number(archi))) 65 | 66 | # hidden layers 67 | for i in range(n_hid_layers): 68 | Ws.append(th.shared(random_number([archi / 2, archi], scale=scale))) 69 | cache_Ws.append(th.shared(zero_number((archi / 2, archi)))) 70 | 71 | bs.append(th.shared(random_number([archi], scale=scale))) 72 | cache_bs.append(th.shared(zero_number(archi))) 73 | 74 | # output layer 75 | Ws.append(th.shared(random_number([archi / 2, n_output], scale=scale))) 76 | cache_Ws.append(th.shared(zero_number((archi / 2, n_output)))) 77 | 78 | bs.append(th.shared(random_number([n_output], scale=scale))) 79 | cache_bs.append(th.shared(zero_number(n_output))) 80 | 81 | return Ws, bs, cache_Ws, cache_bs 82 | 83 | 84 | def maxout(Z, stop_dropout, archi, dropout_rate, seed=5432): 85 | th.config.floatX = 'float32' 86 | Z_out = T.maximum(Z[:, :int(archi / 2)], Z[:, int(archi / 2):]) 87 | prob = (1 - dropout_rate) 88 | srng = RandomStreams(seed=seed) 89 | 90 | return ifelse(T.lt(stop_dropout, 1.05), 91 | Z_out * srng.binomial(size=T.shape(Z_out), 92 | p=prob).astype('float32'), 93 | Z_out) 94 | 95 | 96 | def softmax(z): 97 | Z = T.exp(z) 98 | results, _ = th.scan(lambda x: x / T.sum(x), sequences=Z) 99 | return results 100 | 101 | 102 | def update(para, grad, moment_cache, lr, moment): 103 | """theano update auxiliary function: use SGD plus momentum""" 104 | param_update = [] 105 | cache_update = [] 106 | 107 | for ix in range(len(grad)): 108 | change = moment * moment_cache[ix] - lr * grad[ix] 109 | param_update.append((para[ix], para[ix] + change)) 110 | cache_update.append((moment_cache[ix], change)) 111 | 112 | return param_update + cache_update 113 | 114 | 115 | def gen_y_hat(i, n_output, data, label_data, cache): 116 | """give the np array of y_hat""" 117 | try: 118 | return cache[i] 119 | 120 | except KeyError: 121 | y_h = np.zeros(n_output, dtype=np.float32) 122 | y_h[label_data[1].loc[data.index[i]]] = 1 123 | cache[i] = y_h 124 | 125 | return cache[i] 126 | 127 | 128 | def accuracy(from_ind, to_ind, data, forward, n_output, label_data, 129 | cache, dropout_rate, save_pred=False, save_name='pred_prob'): 130 | """compute the accuracy of the model""" 131 | X = [] 132 | y = [] 133 | 134 | for ind in range(from_ind, to_ind): 135 | if ind < from_ind + 4: 136 | sils = np.zeros((from_ind + 4 - ind) * data.shape[1]) 137 | dat = data.iloc[from_ind:(ind + 5)].values.ravel() 138 | X.append(np.concatenate((sils, dat))) 139 | 140 | elif ind > (to_ind - 5): 141 | dat = data.iloc[(ind - 4):to_ind].values.ravel() 142 | sils = np.zeros((5 - to_ind + ind) * data.shape[1]) 143 | X.append(np.concatenate((dat, sils))) 144 | 145 | else: 146 | X.append(data.iloc[(ind - 4):(ind + 5)].values.ravel()) 147 | 148 | y.append(gen_y_hat(ind, n_output, data, label_data, cache)) 149 | 150 | # stop_dropout > 1.05 the model won't do dropout 151 | y_pred = forward(X, 1 / (1 - dropout_rate)) 152 | if save_pred: 153 | np.save(save_name, y_pred) 154 | 155 | match = 0 156 | for i, ind in enumerate(range(from_ind, to_ind)): 157 | if np.argmax(y_pred[i]) == label_data[1].iloc[ind]: 158 | match += 1 159 | 160 | return match / len(y_pred) 161 | -------------------------------------------------------------------------------- /Data/48_39.map: -------------------------------------------------------------------------------- 1 | aa aa 2 | ae ae 3 | ah ah 4 | ao aa 5 | aw aw 6 | ax ah 7 | ay ay 8 | b b 9 | ch ch 10 | cl sil 11 | d d 12 | dh dh 13 | dx dx 14 | eh eh 15 | el l 16 | en n 17 | epi sil 18 | er er 19 | ey ey 20 | f f 21 | g g 22 | hh hh 23 | ih ih 24 | ix ih 25 | iy iy 26 | jh jh 27 | k k 28 | l l 29 | m m 30 | ng ng 31 | n n 32 | ow ow 33 | oy oy 34 | p p 35 | r r 36 | sh sh 37 | sil sil 38 | s s 39 | th th 40 | t t 41 | uh uh 42 | uw uw 43 | vcl sil 44 | v v 45 | w w 46 | y y 47 | zh sh 48 | z z 49 | -------------------------------------------------------------------------------- /Data/48_idx_chr.map: -------------------------------------------------------------------------------- 1 | aa 0 a 2 | ae 1 b 3 | ah 2 c 4 | ao 3 d 5 | aw 4 e 6 | ax 5 f 7 | ay 6 g 8 | b 7 h 9 | ch 8 i 10 | cl 9 j 11 | d 10 k 12 | dh 11 l 13 | dx 12 m 14 | eh 13 n 15 | el 14 o 16 | en 15 p 17 | epi 16 q 18 | er 17 r 19 | ey 18 s 20 | f 19 t 21 | g 20 u 22 | hh 21 v 23 | ih 22 w 24 | ix 23 x 25 | iy 24 y 26 | jh 25 z 27 | k 26 A 28 | l 27 B 29 | m 28 C 30 | n 29 D 31 | ng 30 E 32 | ow 31 F 33 | oy 32 G 34 | p 33 H 35 | r 34 I 36 | s 35 J 37 | sh 36 K 38 | sil 37 L 39 | t 38 M 40 | th 39 N 41 | uh 40 O 42 | uw 41 P 43 | v 42 Q 44 | vcl 43 R 45 | w 44 S 46 | y 45 T 47 | z 46 U 48 | zh 47 V 49 | -------------------------------------------------------------------------------- /Data/RNN_testprob.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AaronYALai/Machine_Learning_and_Having_It_Deep_and_Structured/a9cde55cc3a6142eeb00f0faa0413908ffd4a1f3/Data/RNN_testprob.npy -------------------------------------------------------------------------------- /Data/RNN_trainprob.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AaronYALai/Machine_Learning_and_Having_It_Deep_and_Structured/a9cde55cc3a6142eeb00f0faa0413908ffd4a1f3/Data/RNN_trainprob.npy -------------------------------------------------------------------------------- /Data/ytest_prob.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AaronYALai/Machine_Learning_and_Having_It_Deep_and_Structured/a9cde55cc3a6142eeb00f0faa0413908ffd4a1f3/Data/ytest_prob.npy -------------------------------------------------------------------------------- /Data/ytrain_prob.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AaronYALai/Machine_Learning_and_Having_It_Deep_and_Structured/a9cde55cc3a6142eeb00f0faa0413908ffd4a1f3/Data/ytrain_prob.npy -------------------------------------------------------------------------------- /HMM_topRNN/HMM_utils.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # @Author: aaronlai 3 | # @Date: 2016-11-09 16:02:20 4 | # @Last Modified by: AaronLai 5 | # @Last Modified time: 2016-11-09 19:06:57 6 | 7 | import numpy as np 8 | import pandas as pd 9 | import gc 10 | 11 | 12 | def load_label(filename): 13 | """load label data""" 14 | label_data = pd.read_csv(filename, header=None, index_col=0) 15 | label_map = {} 16 | for ind, lab in enumerate(np.unique(label_data.values)): 17 | label_map[lab] = ind 18 | 19 | label_data = label_data.applymap(lambda x: label_map[x]) 20 | gc.collect() 21 | 22 | return label_data, label_map 23 | 24 | 25 | def load_str_map(label_map, base_dir='../Data/'): 26 | """find the mapping from int to phoneme""" 27 | phoneme_map = {} 28 | phone_str_map = {} 29 | pmap = pd.read_csv(base_dir + '48_39.map', sep='\t', header=None) 30 | str_map = pd.read_csv(base_dir + '48_idx_chr.map', 31 | header=None, delim_whitespace=True) 32 | 33 | for p1, p2 in pmap.values: 34 | phoneme_map[p1] = p2 35 | 36 | for s1, s2, s3 in str_map.values: 37 | phone_str_map[s1] = s3 38 | 39 | int_str_map = {} 40 | for key, val in label_map.items(): 41 | int_str_map[val] = phone_str_map[phoneme_map[key]] 42 | 43 | return int_str_map 44 | 45 | 46 | def edit_dist(seq1, seq2): 47 | """edit distance""" 48 | seq1 = seq1.split() 49 | seq2 = seq2.split() 50 | 51 | d = np.zeros((len(seq1) + 1) * (len(seq2) + 1), dtype=np.uint8) 52 | d = d.reshape((len(seq1) + 1, len(seq2) + 1)) 53 | 54 | for i in range(len(seq1) + 1): 55 | for j in range(len(seq2) + 1): 56 | if i == 0: 57 | d[0][j] = j 58 | elif j == 0: 59 | d[i][0] = i 60 | 61 | for i in range(1, len(seq1) + 1): 62 | for j in range(1, len(seq2) + 1): 63 | if seq1[i - 1] == seq2[j - 1]: 64 | d[i][j] = d[i - 1][j - 1] 65 | else: 66 | substitution = d[i - 1][j - 1] + 1 67 | insertion = d[i][j - 1] + 1 68 | deletion = d[i - 1][j] + 1 69 | d[i][j] = min(substitution, insertion, deletion) 70 | 71 | return d[len(seq1)][len(seq2)] 72 | 73 | 74 | def sanity_check(seq, sep=' '): 75 | """Sanity Check function to correct unreasonable predictions""" 76 | seq = seq.split() 77 | 78 | for i in range(1, len(seq) - 1): 79 | # front == behind != me 80 | if seq[i - 1] == seq[i + 1] and seq[i] != seq[i - 1]: 81 | seq[i] = seq[i - 1] 82 | # me, front, behind are different 83 | elif seq[i] != seq[i + 1] and seq[i] != seq[i - 1]: 84 | seq[i] = seq[i - 1] 85 | 86 | return sep.join(seq) 87 | -------------------------------------------------------------------------------- /HMM_topRNN/run_HMM.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # @Author: aaronlai 3 | # @Date: 2016-11-09 15:54:45 4 | # @Last Modified by: AaronLai 5 | # @Last Modified time: 2016-11-09 22:35:03 6 | 7 | import numpy as np 8 | import pandas as pd 9 | import os 10 | import sys 11 | 12 | sys.path.append(os.path.dirname(os.path.realpath(__file__))) # noqa 13 | 14 | from HMM_utils import load_label, load_str_map, sanity_check, edit_dist 15 | from collections import defaultdict, Counter 16 | from datetime import datetime 17 | 18 | 19 | def make_transMat(labels, speakers, n_phoneme): 20 | """computing the transition matrix using label sequence at hand""" 21 | transition_prob = np.zeros((n_phoneme, n_phoneme)) 22 | 23 | for speaker in speakers: 24 | previous = labels[speaker][0][0] 25 | for phoneme in labels[speaker][1:]: 26 | transition_prob[phoneme[0], previous] += 1 27 | previous = phoneme[0] 28 | 29 | transition_prob = np.log(transition_prob + 2) 30 | transition_prob /= transition_prob.sum(axis=0) 31 | 32 | return transition_prob 33 | 34 | 35 | def HMM_predict(seq_probs, labels, speakers, n_phoneme, int_str_map, 36 | test_probs=None, duration=3, blending=False, n_bag=1): 37 | """generate a bag of prediction sequences for each speaker""" 38 | if not blending: 39 | n_bag = 1 40 | 41 | predict_bags = [] 42 | test_bags = [] 43 | for num in range(n_bag): 44 | predictions = [] 45 | test_predicts = [] 46 | 47 | # calculate transition prob 48 | if blending: 49 | bagspeakers = np.random.choice(speakers, len(speakers)) 50 | transition_prob = make_transMat(labels, bagspeakers, n_phoneme) 51 | else: 52 | transition_prob = make_transMat(labels, speakers, n_phoneme) 53 | 54 | for seq in seq_probs: 55 | prob_score = np.ones((n_phoneme,)) / n_phoneme 56 | predict_seq = defaultdict(list) 57 | 58 | for vec in seq: 59 | prob_matrix = prob_score * (vec**duration) * transition_prob 60 | prob_score = np.max(prob_matrix, axis=1) 61 | pred_inds = np.argmax(prob_matrix, axis=1) 62 | # normalize 63 | prob_score /= prob_score.sum() 64 | 65 | # compute the predicted phoneme with starting phoneme i 66 | for i in range(n_phoneme): 67 | predict_seq[i].append(int_str_map[pred_inds[i]]) 68 | 69 | # choose the sequence with the highest score 70 | predictions.append(predict_seq[np.argmax(prob_score)]) 71 | 72 | predict_bags.append(predictions) 73 | 74 | # test set 75 | if test_probs is None: 76 | continue 77 | 78 | for test_seq in test_probs: 79 | test_score = np.ones((n_phoneme,)) / n_phoneme 80 | testpred_seq = defaultdict(list) 81 | 82 | for test_vec in test_seq: 83 | test_matrix = transition_prob * (test_vec**duration) 84 | test_matrix *= test_score 85 | test_score = np.max(test_matrix, axis=1) 86 | test_inds = np.argmax(test_matrix, axis=1) 87 | # normalize 88 | test_score /= test_score.sum() 89 | 90 | # compute the predicted phoneme with starting phoneme i 91 | for i in range(n_phoneme): 92 | testpred_seq[i].append(int_str_map[test_inds[i]]) 93 | 94 | # choose the sequence with the highest score 95 | test_predicts.append(testpred_seq[np.argmax(test_score)]) 96 | 97 | test_bags.append(test_predicts) 98 | 99 | return predict_bags, test_bags 100 | 101 | 102 | def voting(predict_bags): 103 | """voting of a bag of sequences to make the final sequence""" 104 | result = [] 105 | for i in range(len(predict_bags[0])): 106 | bag_seqs = np.array([pred[i] for pred in predict_bags]) 107 | seq = [Counter(l).most_common()[0][0] for l in bag_seqs.T] 108 | result.append(seq) 109 | 110 | return result 111 | 112 | 113 | def output_seq(pred_seq, sep=''): 114 | pred_seq = sanity_check(' '.join(pred_seq)) 115 | 116 | phoneme_seq = '' 117 | now = '' 118 | for p in pred_seq.split(): 119 | if p != now: 120 | phoneme_seq += (p + sep) 121 | now = p 122 | 123 | return phoneme_seq.strip() 124 | 125 | 126 | def make_label_seq(labels, speakers, int_str_map): 127 | """transform the labels to str sequence""" 128 | label_result = [] 129 | 130 | for speaker in speakers: 131 | seq = ' '.join([int_str_map[ind[0]] for ind in labels[speaker]]) 132 | label_result.append(output_seq(seq, sep=' ')) 133 | 134 | return label_result 135 | 136 | 137 | def run_HMM(train_probfile, train_labfile, test_probfile=None, n_phoneme=48, 138 | duration=3, blending=False, n_bag=10, valid_ratio=0.1, 139 | base_dir='../Data/'): 140 | print("Start") 141 | st = datetime.now() 142 | 143 | # loading data 144 | label_data, label_map = load_label(base_dir + train_labfile) 145 | train_probs, train_speakers = np.load(base_dir + train_probfile) 146 | int_str_map = load_str_map(label_map, base_dir) 147 | 148 | if test_probfile: 149 | test_probs, test_speakers = np.load(base_dir + test_probfile) 150 | else: 151 | test_probs = None 152 | 153 | print('Done loading data, using %s.\n' % str(datetime.now() - st)) 154 | 155 | print('Start using HMM for predictions...') 156 | # computing label sequence for each speaker 157 | labels = {} 158 | for speaker in train_speakers: 159 | speaker_indexes = label_data.index.str.startswith(speaker) 160 | labels[speaker] = label_data.iloc[speaker_indexes].values 161 | 162 | # split into training and validation set 163 | n_speaker = len(train_speakers) 164 | rand_inds = np.random.permutation(n_speaker) 165 | valid_inds = rand_inds[:int(n_speaker * valid_ratio)] 166 | train_inds = rand_inds[int(n_speaker * valid_ratio):] 167 | 168 | # predict sequences using HMM with blending 169 | bags = HMM_predict(train_probs, labels, train_speakers[train_inds], 170 | n_phoneme, int_str_map, test_probs, duration, blending, 171 | n_bag) 172 | predict_bags, test_bags = bags 173 | predict_result = voting(predict_bags) 174 | 175 | if len(test_bags) > 0: 176 | test_predict = voting(test_bags) 177 | 178 | # transform to alphabet sequences and compute the edit distances 179 | predict_result = [output_seq(pred_seq, sep=' ') 180 | for pred_seq in predict_result] 181 | label_result = make_label_seq(labels, train_speakers, int_str_map) 182 | print('Done predicting, using %s.' % str(datetime.now() - st)) 183 | 184 | # evaluate training set 185 | train_predict = np.array(predict_result)[train_inds] 186 | train_lab = np.array(label_result)[train_inds] 187 | train_scores = [edit_dist(train_lab[i], train_predict[i]) 188 | for i in range(len(train_predict))] 189 | 190 | # evaluate validation set 191 | valid_predict = np.array(predict_result)[valid_inds] 192 | valid_lab = np.array(label_result)[valid_inds] 193 | valid_scores = [edit_dist(valid_lab[i], valid_predict[i]) 194 | for i in range(len(valid_predict))] 195 | 196 | print("\nEdit distance (train): %.4f" % np.mean(train_scores)) 197 | print("Edit distance (valid): %.4f\n" % np.mean(valid_scores)) 198 | 199 | # output predict file 200 | if test_probfile: 201 | test_predict_seqs = [output_seq(test_seq, sep='') 202 | for test_seq in test_predict] 203 | test_pred = {'id': test_speakers, 'phone_sequence': test_predict_seqs} 204 | test_df = pd.DataFrame(data=test_pred) 205 | test_df.to_csv('HMM_testpredict.csv', index=None) 206 | 207 | print("Done, Using %s." % str(datetime.now() - st)) 208 | 209 | 210 | def main(): 211 | run_HMM('RNN_trainprob.npy', 'train.label', 'RNN_testprob.npy', 212 | duration=3, blending=True, n_bag=100, valid_ratio=0.2, 213 | base_dir='../Data/') 214 | 215 | 216 | if __name__ == '__main__': 217 | main() 218 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Machine Learning and having it deep and structured 2 | ======== 3 | 4 | [![Build Status](https://travis-ci.org/AaronYALai/Machine_Learning_and_Having_It_Deep_and_Structured.svg?branch=master)](https://travis-ci.org/AaronYALai/Machine_Learning_and_Having_It_Deep_and_Structured) 5 | [![Coverage Status](https://coveralls.io/repos/github/AaronYALai/Machine_Learning_and_Having_It_Deep_and_Structured/badge.svg?branch=master)](https://coveralls.io/github/AaronYALai/Machine_Learning_and_Having_It_Deep_and_Structured?branch=master) 6 | 7 | About 8 | -------- 9 | 10 | Implementations and homeworks of the course [**Machine Learning and having it deep and structured**](http://nol.ntu.edu.tw/nol/coursesearch/print_table.php?course_id=942%20U0590&class=&dpt_code=9210&ser_no=51785&semester=104-1&lang=EN) of National Taiwan University (offered by [**Hung-yi Lee**](http://speech.ee.ntu.edu.tw/~tlkagk/index.html)): 11 | 12 | - Constructed and trained variants of neural networks by [**Theano**](http://deeplearning.net/software/theano/) 13 | - Attemped to solve the sequence labeling problem in speech recognition (phoneme labeling) 14 | - Deep Neural Network (DNN) with dropout, maxout and momentum optimization 15 | - Bidirectional Recurrent Neural Network (RNN) with dropout and RMSProp optimization 16 | - Bidirectional Long-Short Term Memory (LSTM) with peephole and NAG optimization 17 | - Hidden Markov Model (HMM) on top of RNN to improve the performance 18 | 19 | [**Course page**](http://speech.ee.ntu.edu.tw/~tlkagk/courses_MLSD15_2.html) 20 | 21 | Syllabus 22 | -------- 23 | 24 | Neural Networks and Training: 25 | - What is Machine Learning, Deep Learning and Structured Learning? 26 | - Neural Network Basics | Backpropagation | Theano: DNN 27 | - Tips for Training Deep Neural Network 28 | - Neural Network with Memory | Theano: RNN 29 | - Training Recurrent Neural Network 30 | - Convolutional Neural Network (by Prof. Winston) 31 | 32 | Structured Learning and Graphical Models: 33 | - Introduction of Structured Learning | Structured Linear Model | Structured SVM 34 | - Sequence Labeling Problem | Learning with Hidden Information 35 | - Graphical Model, Gibbs Sampling 36 | 37 | Extensions, New Applications and Trends: 38 | - Markov Logic Network 39 | - Deep Learning for Human Language Processing, Language Modeling 40 | - Caffe | Deep Reinforcement Learning | Visual Question Answering 41 | - Unsupervised Learning 42 | - Attention-based Model 43 | 44 | Content 45 | -------- 46 | 47 | Deep Neural Network (DNN)[[kaggle](https://inclass.kaggle.com/c/mlds-hw14)]: 48 | - Construct and train a deep neural network to classify pronunciation units (phonemes) in each time frame of a speech. 49 | - Inputs: MFCC features 50 | - Activation function: **Maxout** (generalization of ReLU, "learnable" activation function) 51 | - Output layer: Softmax 52 | - Cost function: cross entropy 53 | - Optimization: Momentum 54 | - With **Dropout** technique 55 | 56 | Bidirectional Recurrent Neural Network (RNN)[[kaggle](https://inclass.kaggle.com/c/104-1-mlds-hw2)]: 57 | - Construct and train a bidirectional deep recurrent neural network to classify pronunciation units (phonemes) in each time frame of a speech. 58 | - Inputs: prediction probabilities of each class from previous DNN 59 | - Activation function: ReLU 60 | - Output layer: Softmax 61 | - Cost function: Mean Squared Error 62 | - Optimization: Root Mean Square Propagation (RMSProp) 63 | - With **Dropout** technique 64 | 65 | Bidirectional Long-Short Term Memory (LSTM)[[kaggle](https://inclass.kaggle.com/c/104-1-mlds-hw2)]: 66 | - Construct and train a bidirectional deep Long-Short Term Memory to classify pronunciation units (phonemes) in each time frame of a speech. 67 | - Inputs: prediction probabilities of each class from previous DNN 68 | - Optimization: Nesterov Accelerated Gradient (NAG) 69 | - With **Peephole** 70 | - Using grad_clip in theano to prevent **gradient exploding** 71 | 72 | Structure Learning (output phone label sequence)[[kaggle](https://inclass.kaggle.com/c/104-1-mlds-hw3)]: 73 | - On top of results of RNN / LSTM, applies Hidden Markov Model (HMM) to model the phone transition probabilities and further improves the performance of RNN / LSTM on this sequence labeling problem. 74 | - Input: the whole utterance as one training data 75 | - Output: phone label sequence 76 | 77 | The performance is measured by Levenshtein distance (a.k.a. Edit distance). 78 | 79 | Usage 80 | -------- 81 | Clone the repo and use the [virtualenv](http://www.virtualenv.org/): 82 | 83 | git clone https://github.com/AaronYALai/Machine_Learning_and_Having_It_Deep_and_Structured 84 | 85 | cd Machine_Learning_and_Having_It_Deep_and_Structured 86 | 87 | virtualenv venv 88 | 89 | source venv/bin/activate 90 | 91 | Install all dependencies and run the model: 92 | 93 | pip install -r requirements.txt 94 | 95 | cd RNN_LSTM 96 | 97 | python run_RNN.py 98 | -------------------------------------------------------------------------------- /RNN_LSTM/LSTM_utils.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # @Author: aaronlai 3 | # @Date: 2016-11-07 02:10:33 4 | # @Last Modified by: AaronLai 5 | # @Last Modified time: 2016-11-09 00:27:23 6 | 7 | 8 | import numpy as np 9 | import theano as th 10 | 11 | 12 | def initialize_LSTM(n_input, n_output, archi=48, n_hid_layers=2, 13 | scale=0.01, scale_b=0.001, clip_thres=0.3): 14 | """initialize the LSTM paramters, archi: hidden layer neurons""" 15 | W_in_out = [] 16 | W_gate_forward = [] 17 | W_gate_backward = [] 18 | W_cell = [] 19 | W_peephole = [] 20 | 21 | b_in_out = [] 22 | b_gate_forward = [] 23 | b_gate_backward = [] 24 | 25 | # initial cell and output h 26 | a_0 = th.shared(random_number([archi], 0)) 27 | h_0 = th.shared(random_number([archi], 0)) 28 | 29 | # hidden layers 30 | for i in range(n_hid_layers): 31 | # initilize peephole parameters 32 | U = th.shared(random_number([archi, archi], scale)) 33 | Ui = th.shared(random_number([archi, archi], scale)) 34 | Uf = th.shared(identity_mat(archi, scale)) 35 | Uo = th.shared(random_number([archi, archi], scale)) 36 | W_peephole.append([U, Ui, Uf, Uo]) 37 | 38 | # initialize memory cell paramters 39 | Vi = th.shared(random_number([archi, archi], scale)) 40 | Vf = th.shared(identity_mat(archi, scale)) 41 | Vo = th.shared(random_number([archi, archi], scale)) 42 | W_cell.append([Vi, Vf, Vo]) 43 | 44 | # input layer 45 | if i == 0: 46 | Ws, bs = init_gate_params([n_input, archi], [archi], 47 | scale, scale_b) 48 | 49 | W_output = th.shared(random_number([2 * archi, n_output], scale)) 50 | W_in_out.append(Ws + [W_output]) 51 | b_in_out.append(bs) 52 | 53 | else: 54 | Ws_forw, bs_forw = init_gate_params([2 * archi, archi], [archi], 55 | scale, scale_b) 56 | W_gate_forward.append(Ws_forw) 57 | b_gate_forward.append(bs_forw) 58 | 59 | Ws_back, bs_back = init_gate_params([2 * archi, archi], [archi], 60 | scale, scale_b) 61 | W_gate_backward.append(Ws_back) 62 | b_gate_backward.append(bs_back) 63 | 64 | param_Ws = [W_in_out, W_gate_forward, W_gate_backward, W_peephole, W_cell] 65 | param_bs = [b_in_out, b_gate_forward, b_gate_backward] 66 | 67 | parameters = [w for Ws in param_Ws for W in Ws for w in W] 68 | parameters += [b for bs in param_bs for bb in bs for b in bb] 69 | 70 | # help to do advanced optimization (ex. NAG, RMSProp) 71 | auxis = [th.shared(zero_number(p.get_value().shape)) for p in parameters] 72 | 73 | # help to do mini-batch update (to store gradients) 74 | caches = [th.shared(zero_number(p.get_value().shape)) for p in parameters] 75 | 76 | # set the restricted numerical range for gradient values 77 | for i in range(len(param_Ws)): 78 | for j in range(len(param_Ws[i])): 79 | for k in range(len(param_Ws[i][j])): 80 | param_Ws[i][j][k] = th.gradient.grad_clip(param_Ws[i][j][k], 81 | -clip_thres, 82 | clip_thres) 83 | 84 | for i in range(len(param_bs)): 85 | for j in range(len(param_bs[i])): 86 | for k in range(len(param_bs[k])): 87 | param_bs[i][j][k] = th.gradient.grad_clip(param_bs[i][j][k], 88 | -clip_thres, 89 | clip_thres) 90 | 91 | return param_Ws, param_bs, auxis, caches, a_0, h_0, parameters 92 | 93 | 94 | def init_gate_params(W_shape, b_shape, scale, scale_b): 95 | W = th.shared(random_number(W_shape, scale)) 96 | Wi = th.shared(random_number(W_shape, scale)) 97 | Wf = th.shared(random_number(W_shape, scale) + np.float32(scale / 2)) 98 | Wo = th.shared(random_number(W_shape, scale)) 99 | 100 | b = th.shared(random_number(b_shape, scale_b)) 101 | bi = th.shared(random_number(b_shape, scale_b)) 102 | bf = th.shared(random_number(b_shape, scale_b)) 103 | bo = th.shared(random_number(b_shape, scale_b)) 104 | 105 | return [W, Wi, Wf, Wo], [b, bi, bf, bo] 106 | 107 | 108 | def random_number(shape, scale=1): 109 | return (scale * np.random.randn(*shape)).astype('float32') 110 | 111 | 112 | def zero_number(shape): 113 | return np.zeros(shape).astype('float32') 114 | 115 | 116 | def identity_mat(N, scale): 117 | return (scale * np.identity(N)).astype('float32') 118 | -------------------------------------------------------------------------------- /RNN_LSTM/RNN_utils.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # @Author: aaronlai 3 | # @Date: 2016-11-06 20:50:39 4 | # @Last Modified by: AaronLai 5 | # @Last Modified time: 2016-11-06 22:26:54 6 | 7 | 8 | import numpy as np 9 | import theano as th 10 | 11 | 12 | def initialize_RNN(n_input, n_output, archi=128, n_hid_layers=2, 13 | scale=0.033, scale_b=0.001, clip_thres=3.0): 14 | """initialize the RNN paramters, archi: hidden layer neurons""" 15 | W_in_out = [] 16 | W_out_forward = [] 17 | W_out_backward = [] 18 | W_memory = [] 19 | 20 | b_in_out = [] 21 | b_out_forward = [] 22 | b_out_backward = [] 23 | b_memory = [] 24 | 25 | # initial memory 26 | a_0 = th.shared(random_number([archi], 0)) 27 | 28 | # input layer 29 | W_in_out.append(th.shared(random_number([n_input, archi], scale))) 30 | b_in_out.append(th.shared(random_number([archi], scale_b))) 31 | 32 | # hidden layers 33 | for i in range(n_hid_layers): 34 | # initialize memory weights as identity matrix 35 | W_memory.append(th.shared(identity_mat(archi, scale))) 36 | b_memory.append(th.shared(random_number([archi], scale_b))) 37 | 38 | if i == (n_hid_layers - 1): 39 | continue 40 | 41 | W_out_forward.append(th.shared(random_number([2*archi, archi], scale))) 42 | rand_w = random_number([2*archi, archi], scale) 43 | W_out_backward.append(th.shared(rand_w)) 44 | b_out_forward.append(th.shared(random_number([archi], scale_b))) 45 | b_out_backward.append(th.shared(random_number([archi], scale_b))) 46 | 47 | # output layer 48 | W_in_out.append(th.shared(random_number([2 * archi, n_output], scale))) 49 | b_in_out.append(th.shared(random_number([n_output], scale_b))) 50 | 51 | param_Ws = [W_in_out, W_out_forward, W_out_backward, W_memory] 52 | param_bs = [b_in_out, b_out_forward, b_out_backward, b_memory] 53 | 54 | # help to do advanced optimization (ex. NAG, RMSProp) 55 | aux_Ws = [] 56 | aux_bs = [] 57 | 58 | # help to do mini-batch update (to store gradients) 59 | cache_Ws = [] 60 | cache_bs = [] 61 | 62 | parameters = [] 63 | for i in range(4): 64 | aux_W = [] 65 | aux_b = [] 66 | cache_W = [] 67 | cache_b = [] 68 | 69 | parameters += param_Ws[i] 70 | parameters += param_bs[i] 71 | 72 | for j in range(len(param_Ws[i])): 73 | W_shape = param_Ws[i][j].get_value().shape 74 | b_shape = param_bs[i][j].get_value().shape 75 | 76 | aux_W.append(th.shared(zero_number(W_shape))) 77 | aux_b.append(th.shared(zero_number(b_shape))) 78 | 79 | cache_W.append(th.shared(zero_number(W_shape))) 80 | cache_b.append(th.shared(zero_number(b_shape))) 81 | 82 | # set the restricted numerical range for gradient values 83 | param_Ws[i][j] = th.gradient.grad_clip(param_Ws[i][j], 84 | -clip_thres, clip_thres) 85 | 86 | param_bs[i][j] = th.gradient.grad_clip(param_bs[i][j], 87 | -clip_thres, clip_thres) 88 | 89 | aux_Ws.append(aux_W) 90 | aux_bs.append(aux_b) 91 | 92 | cache_Ws.append(cache_W) 93 | cache_bs.append(cache_b) 94 | 95 | # concatenate all auxilary and cache parameters 96 | auxis = [] 97 | caches = [] 98 | for i in range(4): 99 | auxis += aux_Ws[i] 100 | auxis += aux_bs[i] 101 | 102 | caches += cache_Ws[i] 103 | caches += cache_bs[i] 104 | 105 | return param_Ws, param_bs, auxis, caches, a_0, parameters 106 | 107 | 108 | def random_number(shape, scale=1): 109 | return (scale * np.random.randn(*shape)).astype('float32') 110 | 111 | 112 | def zero_number(shape): 113 | return np.zeros(shape).astype('float32') 114 | 115 | 116 | def identity_mat(N, scale): 117 | return (scale * np.identity(N)).astype('float32') 118 | -------------------------------------------------------------------------------- /RNN_LSTM/activation.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # @Author: aaronlai 3 | # @Date: 2016-11-06 21:06:57 4 | # @Last Modified by: AaronLai 5 | # @Last Modified time: 2016-11-06 21:10:01 6 | 7 | import theano.tensor as T 8 | import theano as th 9 | 10 | 11 | def tanh(Z): 12 | exp_m2z = T.exp(-2 * Z) 13 | return (1 - exp_m2z) / (1 + exp_m2z) 14 | 15 | 16 | def sigmoid(Z): 17 | return 1 / (1 + T.exp(-Z)) 18 | 19 | 20 | def ReLU(Z): 21 | return T.switch(Z < 0, 0, Z) 22 | 23 | 24 | def softmax(z): 25 | Z = T.exp(z) 26 | results, _ = th.scan(lambda x: x / T.sum(x), sequences=Z) 27 | return results 28 | -------------------------------------------------------------------------------- /RNN_LSTM/optimize.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # @Author: aaronlai 3 | # @Date: 2016-11-06 21:04:19 4 | # @Last Modified by: AaronLai 5 | # @Last Modified time: 2016-11-06 21:24:52 6 | 7 | import theano.tensor as T 8 | from theano.ifelse import ifelse 9 | 10 | 11 | def sgd(parameters, grads, lr, minibatch, batchsize, auxis, caches): 12 | updates = [] 13 | update_batch = ifelse(T.lt(minibatch, batchsize - 1), 0, 1) 14 | 15 | for ix in range(len(grads)): 16 | # update parameters if reaching batchsize 17 | move = -(lr / batchsize) * (caches[ix] + grads[ix]) 18 | updates.append((parameters[ix], parameters[ix] + move * update_batch)) 19 | new_cache = (caches[ix] + grads[ix]) * (1 - update_batch) 20 | updates.append((caches[ix], new_cache)) 21 | 22 | return updates 23 | 24 | 25 | def momentum(parameters, grads, lr, minibatch, batchsize, 26 | momentum, caches, moment=0.95): 27 | """theano update, optimized by Momentum""" 28 | updates = [] 29 | update_batch = ifelse(T.lt(minibatch, batchsize - 1), 0, 1) 30 | 31 | for ix in range(len(grads)): 32 | move = - (lr / batchsize) * (grads[ix] + caches[ix]) 33 | direction = moment * momentum[ix] + move 34 | 35 | # update parameters if reaching batchsize 36 | new_param = parameters[ix] + direction * update_batch 37 | updates.append((parameters[ix], new_param)) 38 | 39 | # remember the move if updating parameters 40 | new_mom = momentum[ix] * (1 - update_batch) + direction * update_batch 41 | updates.append((momentum[ix], new_mom)) 42 | 43 | # accumulate gradients if not reaching batchsize 44 | new_cache = (caches[ix] + grads[ix]) * (1 - update_batch) 45 | updates.append((caches[ix], new_cache)) 46 | 47 | return updates 48 | 49 | 50 | def NAG(parameters, grads, lr, minibatch, batchsize, 51 | real_pos, caches, moment=0.95): 52 | """theano update, optimized by NAG""" 53 | updates = [] 54 | update_batch = ifelse(T.lt(minibatch, batchsize - 1), 0, 1) 55 | 56 | for ix in range(len(grads)): 57 | move = -(lr / batchsize) * (caches[ix] + grads[ix]) 58 | real = parameters[ix] + move 59 | spy = real + moment * (real - real_pos[ix]) 60 | 61 | # update parameters to spy position if reaching batchsize 62 | new_param = spy * update_batch + parameters[ix] * (1 - update_batch) 63 | updates.append((parameters[ix], new_param)) 64 | 65 | # remember the real position if moved parameters 66 | new_realpos = real * update_batch + real_pos[ix] * (1 - update_batch) 67 | updates.append((real_pos[ix], new_realpos)) 68 | 69 | # accumulate gradients if not reaching batchsize 70 | new_cache = (caches[ix] + grads[ix]) * (1 - update_batch) 71 | updates.append((caches[ix], new_cache)) 72 | 73 | return updates 74 | 75 | 76 | def RMSProp(parameters, grads, lr, minibatch, batchsize, 77 | sigma_square, caches, alpha=0.9, const=1e-2): 78 | """theano update, optimized by RMSProp""" 79 | updates = [] 80 | update_batch = ifelse(T.lt(minibatch, batchsize - 1), 0, 1) 81 | 82 | for ix in range(len(grads)): 83 | move = (grads[ix] + caches[ix]) / batchsize 84 | factor = sigma_square[ix] * alpha + (1 - alpha) * (move**2) 85 | step = -lr * move / (T.sqrt(factor) + const) 86 | 87 | # update parameters to spy position if reaching batchsize 88 | new_param = (parameters[ix] + step) * update_batch 89 | new_param += parameters[ix] * (1 - update_batch) 90 | updates.append((parameters[ix], new_param)) 91 | 92 | # remember the scaling factors if reaching batchsize 93 | new_sig = factor * update_batch + sigma_square[ix] * (1 - update_batch) 94 | updates.append((sigma_square[ix], new_sig)) 95 | 96 | # accumulate gradients if not reaching batchsize 97 | new_cache = (caches[ix] + grads[ix]) * (1 - update_batch) 98 | updates.append((caches[ix], new_cache)) 99 | 100 | return updates 101 | -------------------------------------------------------------------------------- /RNN_LSTM/run_LSTM.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # @Author: aaronlai 3 | # @Date: 2016-11-06 23:56:38 4 | # @Last Modified by: AaronLai 5 | # @Last Modified time: 2016-11-09 15:11:55 6 | 7 | import numpy as np 8 | import theano as th 9 | import theano.tensor as T 10 | import os 11 | import sys 12 | 13 | sys.path.append(os.path.dirname(os.path.realpath(__file__))) # noqa 14 | 15 | from datetime import datetime 16 | from shortcuts import load_data, load_label, make_data, make_y, load_str_map,\ 17 | validate, validate_editdist, test_predict 18 | from activation import tanh, sigmoid, softmax 19 | from optimize import sgd, momentum, NAG, RMSProp 20 | from LSTM_utils import initialize_LSTM 21 | 22 | 23 | def set_step(W_peephole, W_cell): 24 | U, Ui, Uf, Uo = W_peephole 25 | Vi, Vf, Vo = W_cell 26 | 27 | def step(z_t, zi_t, zf_t, zo_t, c_tm1, h_tm1): 28 | # new information 29 | Z_t = tanh(z_t + T.dot(h_tm1, U)) 30 | 31 | # input gate 32 | Zi_t = sigmoid(zi_t + T.dot(h_tm1, Ui) + T.dot(c_tm1, Vi)) 33 | 34 | # forget gate 35 | Zf_t = sigmoid(zf_t + T.dot(h_tm1, Uf) + T.dot(c_tm1, Vf)) 36 | 37 | # new plus old/unforgetten memory 38 | c_t = Z_t * Zi_t + c_tm1 * Zf_t 39 | 40 | # output gate 41 | Zo_t = sigmoid(zo_t + T.dot(h_tm1, Uo) + T.dot(c_t, Vo)) 42 | 43 | # output information 44 | h_t = tanh(c_t) * Zo_t 45 | 46 | return c_t, h_t 47 | 48 | return step 49 | 50 | 51 | def construct_LSTM(n_input, n_output, n_hid_layers=2, archi=36, lr=1e-3, 52 | update_by='NAG', batchsize=1, scale=0.01, 53 | scale_b=0.001, clip_thres=1.0): 54 | """ 55 | Initialize and construct the bidirectional Long Short-term Memory (LSTM) 56 | Update the LSTM using minibatch and RMSProp 57 | archi: number of neurons of each hidden layer 58 | """ 59 | x_seq = T.fmatrix() 60 | y_hat = T.fmatrix() 61 | minibatch = T.scalar() 62 | 63 | # choose the optimization function 64 | optimiz_func = { 65 | 'sgd': sgd, 66 | 'momentum': momentum, 67 | 'NAG': NAG, 68 | 'RMSProp': RMSProp, 69 | } 70 | update_func = optimiz_func[update_by] 71 | 72 | # initialize the LSTM 73 | print('Start initializing LSTM...') 74 | init = initialize_LSTM(n_input, n_output, archi, n_hid_layers, 75 | scale, scale_b, clip_thres) 76 | param_Ws, param_bs, auxis, caches, a_0, h_0, parameters = init 77 | 78 | # ############ bidirectional Long Short-term Memory ############### 79 | 80 | # #### Hidden layers ###### 81 | for l in range(n_hid_layers): 82 | # computing gates 83 | if l == 0: 84 | a_seq = x_seq 85 | W, Wi, Wf, Wo = param_Ws[0][l][:-1] 86 | b, bi, bf, bo = param_bs[0][l] 87 | z_seq = T.dot(a_seq, W) + b.dimshuffle('x', 0) 88 | zi_seq = T.dot(a_seq, Wi) + bi.dimshuffle('x', 0) 89 | zf_seq = T.dot(a_seq, Wf) + bf.dimshuffle('x', 0) 90 | zo_seq = T.dot(a_seq, Wo) + bo.dimshuffle('x', 0) 91 | 92 | zf_seq, zif_seq, zff_seq, zof_seq = z_seq, zi_seq, zf_seq, zo_seq 93 | zb_seq, zib_seq, zfb_seq, zob_seq = z_seq, zi_seq, zf_seq, zo_seq 94 | else: 95 | # forward gates 96 | W_f, Wi_f, Wf_f, Wo_f = param_Ws[1][l - 1] 97 | b_f, bi_f, bf_f, bo_f = param_bs[1][l - 1] 98 | zf_seq = T.dot(a_seq, W_f) + b_f.dimshuffle('x', 0) 99 | zif_seq = T.dot(a_seq, Wi_f) + bi_f.dimshuffle('x', 0) 100 | zff_seq = T.dot(a_seq, Wf_f) + bf_f.dimshuffle('x', 0) 101 | zof_seq = T.dot(a_seq, Wo_f) + bo_f.dimshuffle('x', 0) 102 | 103 | # backward gates 104 | W_b, Wi_b, Wf_b, Wo_b = param_Ws[2][l - 1] 105 | b_b, bi_b, bf_b, bo_b = param_bs[2][l - 1] 106 | zb_seq = T.dot(a_seq, W_b) + b_b.dimshuffle('x', 0) 107 | zib_seq = T.dot(a_seq, Wi_b) + bi_b.dimshuffle('x', 0) 108 | zfb_seq = T.dot(a_seq, Wf_b) + bf_b.dimshuffle('x', 0) 109 | zob_seq = T.dot(a_seq, Wo_b) + bo_b.dimshuffle('x', 0) 110 | 111 | # computing cells 112 | step = set_step(param_Ws[3][l], param_Ws[4][l]) 113 | 114 | # Forward direction 115 | seqs = [zf_seq, zif_seq, zff_seq, zof_seq] 116 | [cf_seq, hf_seq], _ = th.scan(step, sequences=seqs, 117 | outputs_info=[a_0, h_0], 118 | truncate_gradient=-1) 119 | 120 | # Backward direction 121 | seqs = [zb_seq[::-1], zib_seq[::-1], zfb_seq[::-1], zob_seq[::-1]] 122 | [cb_seq, hb_seq], _ = th.scan(step, sequences=seqs, 123 | outputs_info=[a_0, h_0], 124 | truncate_gradient=-1) 125 | 126 | a_seq = T.concatenate([hf_seq, hb_seq[::-1]], axis=1) 127 | 128 | # #### End of Hidden layers ###### 129 | y_seq = softmax(T.dot(a_seq, param_Ws[0][0][-1])) 130 | forward = th.function(inputs=[x_seq], outputs=y_seq) 131 | 132 | cost = T.sum((y_seq - y_hat)**2) + minibatch * 0 133 | valid = th.function(inputs=[x_seq, y_hat, minibatch], outputs=cost) 134 | grads = T.grad(cost, parameters, disconnected_inputs='ignore') 135 | forward_grad = th.function([x_seq, y_hat, minibatch], grads) 136 | 137 | # ############ end of construction ############### 138 | 139 | updates = update_func(parameters, grads, lr, minibatch, 140 | batchsize, auxis, caches) 141 | lstm_train = th.function(inputs=[x_seq, y_hat, minibatch], 142 | outputs=cost, updates=updates) 143 | 144 | return forward, valid, lstm_train, forward_grad 145 | 146 | 147 | def train_LSTM(trainX, train_label, forward, valid, lstm_train, forward_grad, 148 | n_output, int_str_map, batchsize, epoch=10, valid_ratio=0.2, 149 | print_every=20): 150 | """train the deep LSTM neural network""" 151 | speakers = sorted(trainX.keys()) 152 | 153 | # making training y sequence 154 | trainY = {} 155 | for speaker in speakers: 156 | y = [make_y(lab, n_output) for lab in train_label[speaker].ravel()] 157 | trainY[speaker] = np.array(y).astype('float32') 158 | 159 | # split the validation set 160 | valid_n = round(len(speakers) * valid_ratio) 161 | rand_speakers = np.random.permutation(speakers) 162 | valid_speakers = rand_speakers[:valid_n] 163 | train_speakers = rand_speakers[valid_n:] 164 | 165 | valid_dists = [] 166 | train_cost = [] 167 | valid_cost = [] 168 | 169 | # training process 170 | for j in range(epoch): 171 | costs = 0 172 | n_instance = 0 173 | minibat_ind = 0 174 | 175 | # random shuffle the order 176 | indexes = np.random.permutation(len(train_speakers)) 177 | for ind, num in enumerate(indexes): 178 | X_seq = trainX[speakers[num]] 179 | costs += lstm_train(X_seq, trainY[speakers[num]], minibat_ind) 180 | n_instance += X_seq.shape[0] 181 | train_cost.append(costs / n_instance) 182 | 183 | # validation set 184 | if ind % print_every == (print_every - 1): 185 | v_cost = validate(trainX, trainY, valid_speakers, valid, None) 186 | valid_cost.append(v_cost) 187 | 188 | print('\tNow: %d; costs (train): %.4f ; costs (valid): %.4f' % 189 | (j + 1, train_cost[-1], valid_cost[-1])) 190 | 191 | val_dist = validate_editdist(trainX, trainY, valid_speakers, 192 | forward, None, int_str_map) 193 | valid_dists.append(val_dist) 194 | print("\tEdit distance (valid): %.4f\n" % val_dist) 195 | 196 | # minibatch indicator plus 1 197 | minibat_ind = (minibat_ind + 1) % batchsize 198 | 199 | return train_cost, valid_cost, valid_dists 200 | 201 | 202 | def run_LSTM_model(train_file, train_labfile, train_probfile, test_file=None, 203 | test_probfile=None, neurons=36, n_hiddenlayer=2, lr=1e-3, 204 | update_by='NAG', batchsize=1, epoch=10, valid_ratio=0.1, 205 | n_input=48, n_output=48, save_prob=False, 206 | base_dir='../Data/'): 207 | """Run the bidirectional deep Long Short-Term Memory network""" 208 | 209 | print("Start") 210 | st = datetime.now() 211 | 212 | data = load_data(base_dir + train_file) 213 | label_data, label_map = load_label(base_dir + train_labfile) 214 | int_str_map = load_str_map(label_map, base_dir) 215 | trainX, train_label = make_data(data, base_dir+train_probfile, label_data) 216 | print('Done loading data, using %s.' % str(datetime.now() - st)) 217 | 218 | lstm = construct_LSTM(n_input, n_output, n_hiddenlayer, neurons, lr, 219 | update_by, batchsize) 220 | forward, valid, lstm_train, forward_grad = lstm 221 | print('Done constructing the recurrent nueral network.') 222 | print('Using %s.\n' % str(datetime.now() - st)) 223 | 224 | print('Start training LSTM...') 225 | train_LSTM(trainX, train_label, forward, valid, lstm_train, forward_grad, 226 | n_output, int_str_map, batchsize, epoch, valid_ratio) 227 | print('Done training, using %s.' % str(datetime.now() - st)) 228 | 229 | if test_file and test_probfile: 230 | print('\nPredicting on test set...') 231 | test_predict(test_file, test_probfile, int_str_map, forward, 232 | None, base_dir=base_dir, save_prob=save_prob, 233 | prob_filename='LSTM_testprob') 234 | 235 | if save_prob: 236 | speakers = sorted(trainX.keys()) 237 | probs = [forward(trainX[speaker]) for speaker in speakers] 238 | np.save('LSTM_trainprob', [probs, speakers]) 239 | 240 | print("Done, Using %s." % str(datetime.now() - st)) 241 | 242 | 243 | def main(): 244 | run_LSTM_model('train.data', 'train.label', 'ytrain_prob.npy', 'test.data', 245 | 'ytest_prob.npy', neurons=36, n_hiddenlayer=2, lr=1e-4, 246 | update_by='NAG', batchsize=1, epoch=40, save_prob=True) 247 | 248 | 249 | if __name__ == '__main__': 250 | main() 251 | -------------------------------------------------------------------------------- /RNN_LSTM/run_RNN.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # @Author: aaronlai 3 | # @Date: 2016-11-03 11:40:23 4 | # @Last Modified by: AaronLai 5 | # @Last Modified time: 2016-11-09 15:28:40 6 | 7 | import numpy as np 8 | import theano as th 9 | import theano.tensor as T 10 | import os 11 | import sys 12 | 13 | sys.path.append(os.path.dirname(os.path.realpath(__file__))) # noqa 14 | 15 | from datetime import datetime 16 | from shortcuts import load_data, load_label, make_data, make_y, load_str_map,\ 17 | validate, validate_editdist, test_predict 18 | from activation import tanh, sigmoid, ReLU, softmax 19 | from optimize import sgd, momentum, NAG, RMSProp 20 | from RNN_utils import initialize_RNN 21 | 22 | 23 | from theano.ifelse import ifelse 24 | from theano.tensor.shared_randomstreams import RandomStreams 25 | 26 | 27 | def set_step(W_memory, b_memory, lay_j, acti_func='ReLU'): 28 | functions = { 29 | 'ReLU': ReLU, 30 | 'sigmoid': sigmoid, 31 | 'tanh': tanh, 32 | } 33 | activ = functions[acti_func] 34 | 35 | def step(zf_t, zb_t, af_tm1, ab_tm1): 36 | af_t = activ(zf_t + T.dot(af_tm1, W_memory[lay_j]) + b_memory[lay_j]) 37 | ab_t = activ(zb_t + T.dot(ab_tm1, W_memory[lay_j]) + b_memory[lay_j]) 38 | return af_t, ab_t 39 | 40 | return step 41 | 42 | 43 | def construct_RNN(n_input, n_output, n_hid_layers=2, archi=128, lr=1e-3, 44 | acti_func='ReLU', update_by='RMSProp', dropout_rate=0.2, 45 | batchsize=1, scale=0.033, scale_b=0.001, clip_thres=10.0, 46 | seed=42): 47 | """ 48 | Initialize and construct the bidirectional deep RNN with dropout 49 | Update the RNN using minibatch and RMSProp 50 | archi: number of neurons of each hidden layer 51 | """ 52 | x_seq = T.fmatrix() 53 | y_hat = T.fmatrix() 54 | minibatch = T.scalar() 55 | stop_dropout = T.scalar() 56 | 57 | # choose the optimization function 58 | optimiz_func = { 59 | 'sgd': sgd, 60 | 'momentum': momentum, 61 | 'NAG': NAG, 62 | 'RMSProp': RMSProp, 63 | } 64 | update_func = optimiz_func[update_by] 65 | 66 | # initialize the RNN 67 | print('Start initializing RNN...') 68 | init = initialize_RNN(n_input, n_output, archi, n_hid_layers, 69 | scale, scale_b, clip_thres) 70 | param_Ws, param_bs, auxis, caches, a_0, parameters = init 71 | 72 | # ############ bidirectional recurrent neural network ############### 73 | srng = RandomStreams(seed=seed) 74 | 75 | # #### Hidden layers ###### 76 | for l in range(n_hid_layers): 77 | if l == 0: 78 | a_seq = x_seq 79 | z_seq = T.dot(a_seq, param_Ws[0][l]) 80 | z_seq += param_bs[0][l].dimshuffle('x', 0) 81 | zf_seq = z_seq 82 | zb_seq = z_seq 83 | else: 84 | zf_seq = T.dot(a_seq, param_Ws[1][l - 1]) 85 | zf_seq += param_bs[1][l - 1].dimshuffle('x', 0) 86 | zb_seq = T.dot(a_seq, param_Ws[2][l - 1]) 87 | zb_seq += param_bs[2][l - 1].dimshuffle('x', 0) 88 | 89 | step = set_step(param_Ws[3], param_bs[3], l, acti_func) 90 | [af_seq, ab_seq], _ = th.scan(step, sequences=[zf_seq, zb_seq[::-1]], 91 | outputs_info=[a_0, a_0], 92 | truncate_gradient=-1) 93 | 94 | a_out = T.concatenate([af_seq, ab_seq[::-1]], axis=1) 95 | dropping = srng.binomial(size=T.shape(a_out), 96 | p=(1 - dropout_rate)) 97 | a_seq = ifelse(T.lt(stop_dropout, 1.05), 98 | (a_out * dropping).astype('float32'), a_out) 99 | a_seq /= stop_dropout 100 | 101 | # #### End of Hidden layers ###### 102 | 103 | y_pre = T.dot(a_seq, param_Ws[0][1]) + param_bs[0][1].dimshuffle('x', 0) 104 | y_seq = softmax(y_pre) 105 | forward = th.function(inputs=[x_seq, stop_dropout], outputs=y_seq) 106 | 107 | cost = T.sum((y_seq - y_hat)**2) + minibatch * 0 108 | valid = th.function(inputs=[x_seq, y_hat, minibatch, stop_dropout], 109 | outputs=cost) 110 | grads = T.grad(cost, parameters, disconnected_inputs='ignore') 111 | 112 | # ############ end of construction ############### 113 | 114 | updates = update_func(parameters, grads, lr, minibatch, 115 | batchsize, auxis, caches) 116 | rnn_train = th.function(inputs=[x_seq, y_hat, minibatch, stop_dropout], 117 | outputs=cost, updates=updates) 118 | 119 | return forward, valid, rnn_train 120 | 121 | 122 | def train_RNN(trainX, train_label, forward, valid, rnn_train, n_output, 123 | int_str_map, dropout_rate, batchsize, epoch=10, valid_ratio=0.2, 124 | print_every=20): 125 | """train the deep recurrent neural network""" 126 | speakers = sorted(trainX.keys()) 127 | 128 | # making training y sequence 129 | trainY = {} 130 | for speaker in speakers: 131 | y = [make_y(lab, n_output) for lab in train_label[speaker].ravel()] 132 | trainY[speaker] = np.array(y).astype('float32') 133 | 134 | # split the validation set 135 | valid_n = round(len(speakers) * valid_ratio) 136 | rand_speakers = np.random.permutation(speakers) 137 | valid_speakers = rand_speakers[:valid_n] 138 | train_speakers = rand_speakers[valid_n:] 139 | 140 | valid_dists = [] 141 | train_cost = [] 142 | valid_cost = [] 143 | 144 | # training process 145 | for j in range(epoch): 146 | costs = 0 147 | n_instance = 0 148 | minibat_ind = 0 149 | 150 | # random shuffle the order 151 | indexes = np.random.permutation(len(train_speakers)) 152 | for ind, num in enumerate(indexes): 153 | X_seq = trainX[speakers[num]] 154 | costs += rnn_train(X_seq, trainY[speakers[num]], minibat_ind, 1) 155 | n_instance += X_seq.shape[0] 156 | train_cost.append(costs / n_instance) 157 | 158 | # validation set 159 | if ind % print_every == (print_every - 1): 160 | v_cost = validate(trainX, trainY, valid_speakers, 161 | valid, dropout_rate) 162 | valid_cost.append(v_cost) 163 | 164 | print('\tNow: %d; costs (train): %.4f ; costs (valid): %.4f' % 165 | (j + 1, train_cost[-1], valid_cost[-1])) 166 | 167 | val_dist = validate_editdist(trainX, trainY, valid_speakers, 168 | forward, dropout_rate, 169 | int_str_map) 170 | valid_dists.append(val_dist) 171 | print("\tEdit distance (valid): %.4f\n" % val_dist) 172 | 173 | # minibatch indicator plus 1 174 | minibat_ind = (minibat_ind + 1) % batchsize 175 | 176 | return train_cost, valid_cost, valid_dists 177 | 178 | 179 | def run_RNN_model(train_file, train_labfile, train_probfile, test_file=None, 180 | test_probfile=None, neurons=36, n_hiddenlayer=2, lr=1e-3, 181 | acti_func='ReLU', update_by='RMSProp', dropout_rate=0.2, 182 | batchsize=1, epoch=10, valid_ratio=0.1, n_input=48, 183 | n_output=48, base_dir='../Data/', save_prob=False): 184 | """Run the bidirectional deep recurrent neural network with droput""" 185 | 186 | print("Start") 187 | st = datetime.now() 188 | 189 | data = load_data(base_dir + train_file) 190 | label_data, label_map = load_label(base_dir + train_labfile) 191 | int_str_map = load_str_map(label_map, base_dir) 192 | trainX, train_label = make_data(data, base_dir+train_probfile, label_data) 193 | print('Done loading data, using %s.' % str(datetime.now() - st)) 194 | 195 | rnn = construct_RNN(n_input, n_output, n_hiddenlayer, neurons, lr, 196 | acti_func, update_by, dropout_rate, batchsize) 197 | forward, valid, rnn_train = rnn 198 | print('Done constructing the recurrent nueral network.\n') 199 | 200 | print('Start training RNN...') 201 | train_RNN(trainX, train_label, forward, valid, rnn_train, n_output, 202 | int_str_map, dropout_rate, batchsize, epoch, valid_ratio) 203 | print('Done training, using %s.' % str(datetime.now() - st)) 204 | 205 | if test_file and test_probfile: 206 | print('\nPredicting on test set...') 207 | test_predict(test_file, test_probfile, int_str_map, forward, 208 | dropout_rate, base_dir=base_dir, save_prob=save_prob, 209 | prob_filename='RNN_testprob') 210 | 211 | if save_prob: 212 | speakers = sorted(trainX.keys()) 213 | stop = 1 / (1 - dropout_rate) 214 | probs = [forward(trainX[speaker], stop) for speaker in speakers] 215 | np.save('RNN_trainprob', [probs, speakers]) 216 | 217 | print("Done, Using %s." % str(datetime.now() - st)) 218 | 219 | 220 | def main(): 221 | run_RNN_model('train.data', 'train.label', 'ytrain_prob.npy', 'test.data', 222 | 'ytest_prob.npy', neurons=128, n_hiddenlayer=2, lr=1e-3, 223 | acti_func='ReLU', update_by='RMSProp', dropout_rate=0.2, 224 | batchsize=1, epoch=100, save_prob=True) 225 | 226 | 227 | if __name__ == '__main__': 228 | main() 229 | -------------------------------------------------------------------------------- /RNN_LSTM/shortcuts.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # @Author: aaronlai 3 | # @Date: 2016-10-12 16:25:45 4 | # @Last Modified by: AaronLai 5 | # @Last Modified time: 2016-11-09 19:07:16 6 | 7 | import numpy as np 8 | import pandas as pd 9 | import gc 10 | 11 | 12 | def load_data(filename, nrows=None, normalize=True): 13 | """load data from file, first column as index, dtype=float32""" 14 | ind = pd.read_csv(filename, sep=' ', header=None, index_col=0, nrows=5) 15 | dtype_dict = {c: np.float32 for c in ind.columns} 16 | data = pd.read_csv(filename, sep=' ', header=None, index_col=0, 17 | dtype=dtype_dict, nrows=nrows) 18 | # normalize 19 | if normalize: 20 | data = (data - data.mean()) / data.std() 21 | gc.collect() 22 | 23 | return data 24 | 25 | 26 | def load_label(filename): 27 | """load label data""" 28 | label_data = pd.read_csv(filename, header=None, index_col=0) 29 | label_map = {} 30 | for ind, lab in enumerate(np.unique(label_data.values)): 31 | label_map[lab] = ind 32 | 33 | label_data = label_data.applymap(lambda x: label_map[x]) 34 | gc.collect() 35 | 36 | return label_data, label_map 37 | 38 | 39 | def make_data(data, prob_file, label_data=None): 40 | """transform data into one sequence for each speaker""" 41 | prob_data = np.load(prob_file) 42 | df = pd.DataFrame(data=prob_data, index=data.index) 43 | speakers = list(set(['_'.join(name.split('_')[:2]) for name in df.index])) 44 | 45 | X = {} 46 | labels = {} 47 | for speaker in speakers: 48 | speaker_indexes = df.index.str.startswith(speaker) 49 | X[speaker] = (df.iloc[speaker_indexes].values).astype('float32') 50 | if label_data is not None: 51 | labels[speaker] = label_data.iloc[speaker_indexes].values 52 | 53 | return X, labels 54 | 55 | 56 | def make_y(lab, n_output): 57 | """make y vector""" 58 | y = np.zeros(n_output) 59 | y[lab] = 1 60 | return y 61 | 62 | 63 | def validate(trainX, trainY, valid_speakers, valid, dropout_rate): 64 | """Calculate the cost value on validation set""" 65 | objective = 0 66 | n_instance = 0 67 | 68 | if dropout_rate is None: 69 | stop = None 70 | else: 71 | stop = 1.0 / (1 - dropout_rate) 72 | 73 | for speaker in valid_speakers: 74 | if stop is None: 75 | objective += valid(trainX[speaker], trainY[speaker], 0) 76 | else: 77 | objective += valid(trainX[speaker], trainY[speaker], 0, stop) 78 | n_instance += trainX[speaker].shape[0] 79 | 80 | return objective / n_instance 81 | 82 | 83 | def load_str_map(label_map, base_dir='../Data/'): 84 | """find the mapping from int to phoneme""" 85 | phoneme_map = {} 86 | phone_str_map = {} 87 | pmap = pd.read_csv(base_dir + '48_39.map', sep='\t', header=None) 88 | str_map = pd.read_csv(base_dir + '48_idx_chr.map', 89 | header=None, delim_whitespace=True) 90 | 91 | for p1, p2 in pmap.values: 92 | phoneme_map[p1] = p2 93 | 94 | for s1, s2, s3 in str_map.values: 95 | phone_str_map[s1] = s3 96 | 97 | int_str_map = {} 98 | for key, val in label_map.items(): 99 | int_str_map[val] = phone_str_map[phoneme_map[key]] 100 | 101 | return int_str_map 102 | 103 | 104 | def sanity_check(seq, sep=' '): 105 | """Sanity Check function to correct unreasonable predictions""" 106 | seq = seq.split() 107 | 108 | for i in range(1, len(seq) - 1): 109 | # front == behind != me 110 | if seq[i - 1] == seq[i + 1] and seq[i] != seq[i - 1]: 111 | seq[i] = seq[i - 1] 112 | # me, front, behind are different 113 | elif seq[i] != seq[i + 1] and seq[i] != seq[i - 1]: 114 | seq[i] = seq[i - 1] 115 | 116 | return sep.join(seq) 117 | 118 | 119 | def edit_dist(seq1, seq2): 120 | """edit distance""" 121 | seq1 = seq1.split() 122 | seq2 = seq2.split() 123 | 124 | d = np.zeros((len(seq1) + 1) * (len(seq2) + 1), dtype=np.uint8) 125 | d = d.reshape((len(seq1) + 1, len(seq2) + 1)) 126 | 127 | for i in range(len(seq1) + 1): 128 | for j in range(len(seq2) + 1): 129 | if i == 0: 130 | d[0][j] = j 131 | elif j == 0: 132 | d[i][0] = i 133 | 134 | for i in range(1, len(seq1) + 1): 135 | for j in range(1, len(seq2) + 1): 136 | if seq1[i - 1] == seq2[j - 1]: 137 | d[i][j] = d[i - 1][j - 1] 138 | else: 139 | substitution = d[i - 1][j - 1] + 1 140 | insertion = d[i][j - 1] + 1 141 | deletion = d[i - 1][j] + 1 142 | d[i][j] = min(substitution, insertion, deletion) 143 | 144 | return d[len(seq1)][len(seq2)] 145 | 146 | 147 | def validate_editdist(trainX, trainY, valid_speakers, forward, 148 | dropout_rate, int_str_map): 149 | """Calculate the average edit distance on validation set""" 150 | if dropout_rate is None: 151 | stop = None 152 | else: 153 | stop = 1.0 / (1 - dropout_rate) 154 | 155 | valid_seq = [] 156 | valid_y_seq = [] 157 | for speaker in valid_speakers: 158 | 159 | if stop is None: 160 | ypred = forward(trainX[speaker]) 161 | else: 162 | ypred = forward(trainX[speaker], stop) 163 | 164 | pred_seq = ' '.join([int_str_map[np.argmax(pred)] for pred in ypred]) 165 | pred_seq = sanity_check(pred_seq) 166 | 167 | phoneme_seq = '' 168 | now = '' 169 | for p in pred_seq.split(): 170 | if p != now: 171 | phoneme_seq += (p + ' ') 172 | now = p 173 | 174 | yhat_seq = [int_str_map[np.argmax(l)] for l in trainY[speaker]] 175 | yhat = [] 176 | y_now = '' 177 | 178 | for y in yhat_seq: 179 | if y != y_now: 180 | yhat.append(y) 181 | y_now = y 182 | 183 | yhat = ' '.join(yhat) 184 | 185 | valid_seq.append(phoneme_seq.strip()) 186 | valid_y_seq.append(yhat) 187 | 188 | leng = len(valid_seq) 189 | dists = [edit_dist(valid_seq[i], valid_y_seq[i]) for i in range(leng)] 190 | valid_dist = np.mean(dists) 191 | 192 | return valid_dist 193 | 194 | 195 | def test_predict(testfile, testprob_file, int_str_map, forward, dropout_rate, 196 | filename='test.csv', base_dir='../Data/', save_prob=False, 197 | prob_filename='test_probs'): 198 | """predict on test set and output the file""" 199 | test_data = load_data(base_dir + testfile) 200 | testX, _ = make_data(test_data, base_dir + testprob_file) 201 | test_speakers = list(testX.keys()) 202 | 203 | if dropout_rate is None: 204 | stop = None 205 | else: 206 | stop = 1.0 / (1 - dropout_rate) 207 | 208 | test_speakers = [] 209 | now_speak = '' 210 | for s in test_data.index: 211 | speaker = '_'.join(s.split('_')[:2]) 212 | if speaker != now_speak: 213 | test_speakers.append(speaker) 214 | now_speak = speaker 215 | 216 | test_seq = [] 217 | for speaker in test_speakers: 218 | 219 | if stop is None: 220 | pred_seq = forward(testX[speaker]) 221 | else: 222 | pred_seq = forward(testX[speaker], stop) 223 | 224 | pred_seq = [int_str_map[np.argmax(pred)] for pred in pred_seq] 225 | pred_seq = ' '.join(pred_seq) 226 | pred_seq = sanity_check(pred_seq) 227 | 228 | seq = '' 229 | now = '' 230 | for p in pred_seq.split(): 231 | if p != now: 232 | seq += p 233 | now = p 234 | 235 | test_seq.append(seq) 236 | 237 | if save_prob: 238 | probs = [] 239 | for speaker in test_speakers: 240 | if stop is None: 241 | pred_seq = forward(testX[speaker]) 242 | else: 243 | pred_seq = forward(testX[speaker], stop) 244 | 245 | probs.append(pred_seq) 246 | np.save(prob_filename, [probs, test_speakers]) 247 | 248 | test_pred = {'id': test_speakers, 'phone_sequence': test_seq} 249 | test_df = pd.DataFrame(data=test_pred) 250 | test_df.to_csv(filename, index=None) 251 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | # Code coverage measurement for Python 2 | # https://pypi.python.org/pypi/coverage/4.0.3 3 | coverage==4.0.3 4 | 5 | # pytest: simple powerful testing with Python 6 | # https://pypi.python.org/pypi/pytest/2.9.2 7 | pytest==2.9.2 8 | 9 | # Pytest plugin for measuring coverage. 10 | # https://pypi.python.org/pypi/pytest-cov/2.2.0 11 | pytest-cov==2.2.0 12 | 13 | # the modular source code checker: pep8, pyflakes and co 14 | # https://pypi.python.org/pypi/flake8/2.5.4 15 | flake8==2.5.4 16 | 17 | # NumPy: array processing for numbers, strings, records, and objects. 18 | # https://pypi.python.org/pypi/numpy 19 | numpy==1.11.1 20 | 21 | # Powerful data structures for data analysis, time series,and statistics 22 | # https://pypi.python.org/pypi/pandas/0.18.1 23 | pandas==0.18.1 24 | 25 | # SciPy: a ecosystem of open-source software for mathematics, science, and engineering. 26 | # https://pypi.python.org/pypi/scipy/0.18.0rc2 27 | scipy==0.18.0 28 | 29 | # nose extends unittest to make testing easier 30 | # https://pypi.python.org/pypi/nose/1.3.7 31 | nose==1.3.7 32 | 33 | # Theano: define, optimize, and efficiently evaluate multi-dimensional arrays 34 | # https://pypi.python.org/pypi/Theano 35 | theano==0.8.2 36 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AaronYALai/Machine_Learning_and_Having_It_Deep_and_Structured/a9cde55cc3a6142eeb00f0faa0413908ffd4a1f3/tests/__init__.py -------------------------------------------------------------------------------- /tests/test_run.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # @Author: aaronlai 3 | # @Date: 2016-10-15 01:00:07 4 | # @Last Modified by: AaronLai 5 | # @Last Modified time: 2016-11-09 22:29:45 6 | 7 | from unittest import TestCase 8 | from DNN.run_DNN import run_model 9 | from RNN_LSTM.run_RNN import run_RNN_model 10 | from RNN_LSTM.run_LSTM import run_LSTM_model 11 | from HMM_topRNN.run_HMM import run_HMM 12 | 13 | 14 | class Test_running(TestCase): 15 | 16 | def test_DNN(self): 17 | run_model('train.data', 'train.label', 'test.data', 18 | base_dir='./Data/', save_prob=True, epoch=3) 19 | 20 | def test_RNN(self): 21 | run_RNN_model('train.data', 'train.label', 'ytrain_prob.npy', 22 | 'test.data', 'ytest_prob.npy', base_dir='./Data/', 23 | epoch=3) 24 | 25 | run_RNN_model('train.data', 'train.label', 'ytrain_prob.npy', 26 | 'test.data', 'ytest_prob.npy', base_dir='./Data/', 27 | acti_func='tanh', update_by='NAG', epoch=3) 28 | 29 | run_RNN_model('train.data', 'train.label', 'ytrain_prob.npy', 30 | 'test.data', 'ytest_prob.npy', base_dir='./Data/', 31 | acti_func='sigmoid', update_by='momentum', epoch=3) 32 | 33 | def test_LSTM(self): 34 | run_LSTM_model('train.data', 'train.label', 'ytrain_prob.npy', 35 | 'test.data', 'ytest_prob.npy', base_dir='./Data/', 36 | epoch=3, lr=1e-5) 37 | 38 | def test_HMM(self): 39 | run_HMM('RNN_trainprob.npy', 'train.label', 'RNN_testprob.npy', 40 | duration=3, blending=True, n_bag=10, valid_ratio=0.1, 41 | base_dir='./Data/') 42 | --------------------------------------------------------------------------------