├── .coveragerc.yml
├── .travis.yml
├── DNN
    ├── run_DNN.py
    └── utils.py
├── Data
    ├── 48_39.map
    ├── 48_idx_chr.map
    ├── RNN_testprob.npy
    ├── RNN_trainprob.npy
    ├── test.data
    ├── train.data
    ├── train.label
    ├── ytest_prob.npy
    └── ytrain_prob.npy
├── HMM_topRNN
    ├── HMM_utils.py
    └── run_HMM.py
├── README.md
├── RNN_LSTM
    ├── LSTM_utils.py
    ├── RNN_utils.py
    ├── activation.py
    ├── optimize.py
    ├── run_LSTM.py
    ├── run_RNN.py
    └── shortcuts.py
├── requirements.txt
└── tests
    ├── __init__.py
    └── test_run.py


/.coveragerc.yml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AaronYALai/Machine_Learning_and_Having_It_Deep_and_Structured/a9cde55cc3a6142eeb00f0faa0413908ffd4a1f3/.coveragerc.yml


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: python
 2 | cache: pip
 3 | sudo: required
 4 | 
 5 | 
 6 | python:
 7 |   - "3.5"
 8 | 
 9 | 
10 | before_install:
11 |   - pip install -U pip
12 |   - pip install wheel
13 |   - pip install coveralls
14 |   - sudo apt-get update
15 | 
16 | 
17 | env:
18 |   global:
19 |     - PIP_WHEEL_DIR=$HOME/.cache/pip/wheels
20 |     - PIP_FIND_LINKS=file://$HOME/.cache/pip/wheels
21 |     - THEANO_FLAGS='gcc.cxxflags="-march=core2",floatX=float32'
22 | 
23 | 
24 | install:
25 |   - pip wheel -r requirements.txt
26 |   - pip install -r requirements.txt
27 | 
28 | 
29 | script:
30 |   - py.test . --cov=./
31 |   - flake8 ./
32 | 
33 | 
34 | after_success:
35 |   - coveralls
36 | 


--------------------------------------------------------------------------------
/DNN/run_DNN.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | # @Author: aaronlai
  3 | # @Date:   2016-10-11 18:46:54
  4 | # @Last Modified by:   AaronLai
  5 | # @Last Modified time: 2016-11-06 23:04:16
  6 | # flag: THEANO_FLAGS='floatX=float32'
  7 | 
  8 | import numpy as np
  9 | import pandas as pd
 10 | import theano as th
 11 | import theano.tensor as T
 12 | import gc
 13 | import os
 14 | import sys
 15 | 
 16 | sys.path.append(os.path.dirname(os.path.realpath(__file__)))    # noqa
 17 | 
 18 | from datetime import datetime
 19 | from utils import load_data, load_label, initialize_NNet, maxout, \
 20 |                   softmax, update, gen_y_hat, accuracy
 21 | 
 22 | 
 23 | def construct_DNN(n_input, n_output, n_hid_layers=2, archi=128,
 24 |                   lr=1e-3, batchsize=40, dropout_rate=0.2, moment=0.95):
 25 |     """
 26 |     Initialize and construct the deep neural netweok with dropout
 27 |     update the DNN using momentum and minibatch
 28 |     archi: number of neurons of each hidden layer
 29 |     """
 30 |     # decide dropout or not, no dropout: stop_dropout > 1.05
 31 |     x = T.fmatrix()
 32 |     y_hat = T.fmatrix()
 33 |     stop_dropout = T.scalar()
 34 | 
 35 |     # initialize parameters
 36 |     Ws, bs, cache_Ws, cache_bs = initialize_NNet(n_input, n_output,
 37 |                                                  archi, n_hid_layers)
 38 | 
 39 |     # ############ construct the neural network ###############
 40 |     Zs = []
 41 |     As = []
 42 | 
 43 |     # input layer
 44 |     Zs.append(T.dot(x, Ws[0]) + bs[0].dimshuffle('x', 0))
 45 |     As.append(maxout(Zs[0], stop_dropout, archi, dropout_rate) / stop_dropout)
 46 | 
 47 |     # hidden layers
 48 |     for i in range(n_hid_layers):
 49 |         Zs.append(T.dot(As[i], Ws[i + 1]) + bs[i + 1].dimshuffle('x', 0))
 50 |         act_out = maxout(Zs[i + 1], stop_dropout, archi, dropout_rate)
 51 |         As.append(act_out / stop_dropout)
 52 | 
 53 |     # output layer
 54 |     z_out = T.dot(As[n_hid_layers], Ws[n_hid_layers + 1])
 55 |     Zs.append(z_out + bs[n_hid_layers + 1].dimshuffle('x', 0))
 56 |     y = softmax(Zs[-1] / stop_dropout)
 57 | 
 58 |     # ############ construct the neural network ###############
 59 | 
 60 |     forward = th.function([x, stop_dropout], y)
 61 |     parameters = Ws + bs
 62 |     moment_cache = cache_Ws + cache_bs
 63 | 
 64 |     # objective is the binary crossentropy
 65 |     Cost = ((-T.log((y * y_hat).sum(axis=1))).sum()) / batchsize
 66 | 
 67 |     # calculate gradients
 68 |     grads = T.grad(Cost, parameters, disconnected_inputs='ignore')
 69 | 
 70 |     # update parameters using momentum
 71 |     update_func = update(parameters, grads, moment_cache, lr, moment)
 72 |     gradient_update = th.function(inputs=[x, y_hat, stop_dropout],
 73 |                                   updates=update_func, outputs=Cost)
 74 | 
 75 |     return gradient_update, forward
 76 | 
 77 | 
 78 | def train_model(N, epoch, batchsize, gradient_update, feed_forward,
 79 |                 data, label_data, n_output, dropout_rate):
 80 |     """train the deep neural network"""
 81 |     train_start = datetime.now()
 82 |     obj_history = []
 83 |     valid_accu = []
 84 |     cache = {}
 85 | 
 86 |     for j in range(epoch):
 87 |         indexes = np.random.permutation(N - 8)
 88 |         objective = 0
 89 | 
 90 |         # train the model
 91 |         for i in range(int(N / batchsize)):
 92 |             if i % 1000 == 0:
 93 |                 gc.collect()
 94 | 
 95 |             # make the minibatch data
 96 |             use_inds = indexes[i * batchsize:(i + 1) * batchsize] + 4
 97 |             batch_X = []
 98 | 
 99 |             for ind in use_inds:
100 |                 if ind < 4:
101 |                     sils = np.zeros((4 - ind) * data.shape[1])
102 |                     dat = data.iloc[:(ind + 5)].values.ravel()
103 |                     batch_X.append(np.concatenate((sils, dat)))
104 | 
105 |                 elif ind > (N - 5):
106 |                     dat = data.iloc[(ind - 4):].values.ravel()
107 |                     sils = np.zeros((5 - N + ind) * data.shape[1])
108 |                     batch_X.append(np.concatenate((dat, sils)))
109 | 
110 |                 else:
111 |                     dat = data.iloc[(ind - 4):(ind + 5)].values.ravel()
112 |                     batch_X.append(dat)
113 | 
114 |             batch_Y = [gen_y_hat(ind, n_output, data, label_data, cache)
115 |                        for ind in use_inds]
116 |             # update the model
117 |             objective += gradient_update(batch_X, batch_Y, 1)
118 | 
119 |         obj_history.append(objective / int(N / batchsize))
120 |         print('\tepoch: %d; obj: %.4f' % (j + 1, obj_history[-1]))
121 | 
122 |         # validation set
123 |         valid_accu.append(accuracy(N, data.shape[0], data, feed_forward,
124 |                                    n_output, label_data, cache, dropout_rate))
125 | 
126 |         print("\tCost: %.4f; valid accu: %.2f %%, %.4f seconds used.\n" %
127 |               (obj_history[-1], 100 * valid_accu[-1],
128 |                (datetime.now() - train_start).total_seconds()))
129 |         # early stop
130 |         if (valid_accu[0] != valid_accu[-1]):
131 |             if valid_accu[-2] * 0.98 > valid_accu[-1]:
132 |                 print("Validation accuracy starts decreasing, stop training")
133 |                 break
134 | 
135 |     return obj_history, valid_accu, cache
136 | 
137 | 
138 | def test_predict(test_file, label_map, forward, base_dir, dropout_rate,
139 |                  save_prob=False, filename='test_predict.csv'):
140 |     """predict on test set and output the file"""
141 |     print("Start predicting...")
142 | 
143 |     test_data = load_data(test_file)
144 |     test_X = []
145 |     test_N = len(test_data)
146 |     # generate test input data
147 |     for i in range(test_N):
148 |         if i < 4:
149 |             sils = np.zeros((4 - i) * test_data.shape[1])
150 |             dat = test_data.iloc[:(i + 5)].values.ravel()
151 |             test_X.append(np.concatenate((sils, dat)))
152 | 
153 |         elif i > (test_N - 5):
154 |             dat = test_data.iloc[(i - 4):].values.ravel()
155 |             sils = np.zeros((5 - test_N + i) * test_data.shape[1])
156 |             test_X.append(np.concatenate((dat, sils)))
157 | 
158 |         else:
159 |             test_X.append(test_data.iloc[(i - 4):(i + 5)].values.ravel())
160 | 
161 |     y_test_pred = forward(test_X, np.float32(1 / (1 - dropout_rate)))
162 | 
163 |     if save_prob:
164 |         np.save('ytest_prob', y_test_pred)
165 | 
166 |     # find the mapping from int to phoneme
167 |     phoneme_map = {}
168 |     pmap = pd.read_csv(base_dir + '48_39.map', sep='\t', header=None)
169 |     for p1, p2 in pmap.values:
170 |         phoneme_map[p1] = p2
171 | 
172 |     int_phoneme_map = {}
173 |     for key, val in label_map.items():
174 |         int_phoneme_map[val] = phoneme_map[key]
175 | 
176 |     test_phon = [int_phoneme_map[np.argmax(y_vec)] for y_vec in y_test_pred]
177 |     data = {'Prediction': test_phon, 'Id': test_data.index.values}
178 |     test_df = pd.DataFrame(data=data)
179 |     test_df.to_csv(filename, index=None)
180 | 
181 | 
182 | def run_model(train_file, train_labfile, test_file=None, valid_ratio=0.1,
183 |               batchsize=240, epoch=10, neurons=36, n_hiddenlayer=2, lr=1e-2,
184 |               base_dir='../Data/', save_prob=False, dropout_rate=0.2):
185 |     """Run the deep neural network with droput"""
186 |     print("Start")
187 |     st = datetime.now()
188 | 
189 |     data = load_data(base_dir + train_file)
190 |     label_data, label_map = load_label(base_dir + train_labfile)
191 | 
192 |     # window size = 9, output = 48 phonemes
193 |     n_input = data.shape[1] * 9
194 |     n_output = 48
195 |     N = int(data.shape[0] * (1 - valid_ratio))
196 | 
197 |     print("Done loading data. Start constructing the model...")
198 |     functions = construct_DNN(n_input, n_output, archi=neurons,
199 |                               n_hid_layers=n_hiddenlayer, lr=lr,
200 |                               dropout_rate=dropout_rate)
201 |     gradient_update, feed_forward = functions
202 | 
203 |     print("Finish constructing the model. Start Training...")
204 |     result = train_model(N, epoch, batchsize, gradient_update,
205 |                          feed_forward, data, label_data, n_output,
206 |                          dropout_rate)
207 |     obj_history, valid_accu, cache = result
208 | 
209 |     # train accuracy
210 |     train_accu = accuracy(0, N, data, feed_forward, n_output,
211 |                           label_data, cache, dropout_rate)
212 |     print("Training Accuracy: %.4f %%" % (100 * train_accu))
213 | 
214 |     # validation
215 |     valid_accu = accuracy(N, data.shape[0], data, feed_forward,
216 |                           n_output, label_data, cache, dropout_rate)
217 |     print("Validation Accuracy: %.4f %%" % (100 * valid_accu))
218 | 
219 |     if save_prob:
220 |         accuracy(0, data.shape[0], data, feed_forward, n_output,
221 |                  label_data, cache, dropout_rate,
222 |                  save_pred=True, save_name='ytrain_prob')
223 | 
224 |     if test_file:
225 |         test_predict(base_dir + test_file, label_map, feed_forward,
226 |                      base_dir, dropout_rate, save_prob=save_prob)
227 | 
228 |     print("Done, Using %s." % str(datetime.now() - st))
229 | 
230 | 
231 | def main():
232 |     run_model('train.data', 'train.label', 'test.data',
233 |               neurons=256, n_hiddenlayer=2, save_prob=True)
234 | 
235 | 
236 | if __name__ == '__main__':
237 |     main()
238 | 


--------------------------------------------------------------------------------
/DNN/utils.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | # @Author: aaronlai
  3 | # @Date:   2016-10-12 16:25:45
  4 | # @Last Modified by:   AaronLai
  5 | # @Last Modified time: 2016-11-06 18:39:14
  6 | 
  7 | import numpy as np
  8 | import pandas as pd
  9 | import theano as th
 10 | import theano.tensor as T
 11 | import gc
 12 | 
 13 | from theano.ifelse import ifelse
 14 | from theano.tensor.shared_randomstreams import RandomStreams
 15 | 
 16 | 
 17 | def load_data(filename, nrows=None, normalize=True):
 18 |     """load data from file, first column as index, dtype=float32"""
 19 |     ind = pd.read_csv(filename, sep=' ', header=None, index_col=0, nrows=5)
 20 |     dtype_dict = {c: np.float32 for c in ind.columns}
 21 |     data = pd.read_csv(filename, sep=' ', header=None, index_col=0,
 22 |                        dtype=dtype_dict, nrows=nrows)
 23 |     # normalize
 24 |     if normalize:
 25 |         data = (data - data.mean()) / data.std()
 26 |         gc.collect()
 27 | 
 28 |     return data
 29 | 
 30 | 
 31 | def load_label(filename):
 32 |     label_data = pd.read_csv(filename, header=None, index_col=0)
 33 |     label_map = {}
 34 |     for ind, lab in enumerate(np.unique(label_data.values)):
 35 |         label_map[lab] = ind
 36 | 
 37 |     label_data = label_data.applymap(lambda x: label_map[x])
 38 |     gc.collect()
 39 | 
 40 |     return label_data, label_map
 41 | 
 42 | 
 43 | def random_number(shape, scale=1):
 44 |     return (scale * np.random.randn(*shape)).astype('float32')
 45 | 
 46 | 
 47 | def zero_number(shape):
 48 |     return np.zeros(shape).astype('float32')
 49 | 
 50 | 
 51 | def initialize_NNet(n_input, n_output, archi=128,
 52 |                     n_hid_layers=3, scale=0.033):
 53 |     """initialize the NNet paramters, archi: hidden layer neurons"""
 54 |     Ws = []
 55 |     bs = []
 56 |     cache_Ws = []
 57 |     cache_bs = []
 58 | 
 59 |     # input layer
 60 |     Ws.append(th.shared(random_number([n_input, archi], scale=scale)))
 61 |     cache_Ws.append(th.shared(zero_number((n_input, archi))))
 62 | 
 63 |     bs.append(th.shared(random_number([archi], scale=scale)))
 64 |     cache_bs.append(th.shared(zero_number(archi)))
 65 | 
 66 |     # hidden layers
 67 |     for i in range(n_hid_layers):
 68 |         Ws.append(th.shared(random_number([archi / 2, archi], scale=scale)))
 69 |         cache_Ws.append(th.shared(zero_number((archi / 2, archi))))
 70 | 
 71 |         bs.append(th.shared(random_number([archi], scale=scale)))
 72 |         cache_bs.append(th.shared(zero_number(archi)))
 73 | 
 74 |     # output layer
 75 |     Ws.append(th.shared(random_number([archi / 2, n_output], scale=scale)))
 76 |     cache_Ws.append(th.shared(zero_number((archi / 2, n_output))))
 77 | 
 78 |     bs.append(th.shared(random_number([n_output], scale=scale)))
 79 |     cache_bs.append(th.shared(zero_number(n_output)))
 80 | 
 81 |     return Ws, bs, cache_Ws, cache_bs
 82 | 
 83 | 
 84 | def maxout(Z, stop_dropout, archi, dropout_rate, seed=5432):
 85 |     th.config.floatX = 'float32'
 86 |     Z_out = T.maximum(Z[:, :int(archi / 2)], Z[:, int(archi / 2):])
 87 |     prob = (1 - dropout_rate)
 88 |     srng = RandomStreams(seed=seed)
 89 | 
 90 |     return ifelse(T.lt(stop_dropout, 1.05),
 91 |                   Z_out * srng.binomial(size=T.shape(Z_out),
 92 |                                         p=prob).astype('float32'),
 93 |                   Z_out)
 94 | 
 95 | 
 96 | def softmax(z):
 97 |     Z = T.exp(z)
 98 |     results, _ = th.scan(lambda x: x / T.sum(x), sequences=Z)
 99 |     return results
100 | 
101 | 
102 | def update(para, grad, moment_cache, lr, moment):
103 |     """theano update auxiliary function: use SGD plus momentum"""
104 |     param_update = []
105 |     cache_update = []
106 | 
107 |     for ix in range(len(grad)):
108 |         change = moment * moment_cache[ix] - lr * grad[ix]
109 |         param_update.append((para[ix], para[ix] + change))
110 |         cache_update.append((moment_cache[ix], change))
111 | 
112 |     return param_update + cache_update
113 | 
114 | 
115 | def gen_y_hat(i, n_output, data, label_data, cache):
116 |     """give the np array of y_hat"""
117 |     try:
118 |         return cache[i]
119 | 
120 |     except KeyError:
121 |         y_h = np.zeros(n_output, dtype=np.float32)
122 |         y_h[label_data[1].loc[data.index[i]]] = 1
123 |         cache[i] = y_h
124 | 
125 |         return cache[i]
126 | 
127 | 
128 | def accuracy(from_ind, to_ind, data, forward, n_output, label_data,
129 |              cache, dropout_rate, save_pred=False, save_name='pred_prob'):
130 |     """compute the accuracy of the model"""
131 |     X = []
132 |     y = []
133 | 
134 |     for ind in range(from_ind, to_ind):
135 |         if ind < from_ind + 4:
136 |             sils = np.zeros((from_ind + 4 - ind) * data.shape[1])
137 |             dat = data.iloc[from_ind:(ind + 5)].values.ravel()
138 |             X.append(np.concatenate((sils, dat)))
139 | 
140 |         elif ind > (to_ind - 5):
141 |             dat = data.iloc[(ind - 4):to_ind].values.ravel()
142 |             sils = np.zeros((5 - to_ind + ind) * data.shape[1])
143 |             X.append(np.concatenate((dat, sils)))
144 | 
145 |         else:
146 |             X.append(data.iloc[(ind - 4):(ind + 5)].values.ravel())
147 | 
148 |         y.append(gen_y_hat(ind, n_output, data, label_data, cache))
149 | 
150 |     # stop_dropout > 1.05 the model won't do dropout
151 |     y_pred = forward(X, 1 / (1 - dropout_rate))
152 |     if save_pred:
153 |         np.save(save_name, y_pred)
154 | 
155 |     match = 0
156 |     for i, ind in enumerate(range(from_ind, to_ind)):
157 |         if np.argmax(y_pred[i]) == label_data[1].iloc[ind]:
158 |             match += 1
159 | 
160 |     return match / len(y_pred)
161 | 


--------------------------------------------------------------------------------
/Data/48_39.map:
--------------------------------------------------------------------------------
 1 | aa	aa
 2 | ae	ae
 3 | ah	ah
 4 | ao	aa
 5 | aw	aw
 6 | ax	ah
 7 | ay	ay
 8 | b	b
 9 | ch	ch
10 | cl	sil
11 | d	d
12 | dh	dh
13 | dx	dx
14 | eh	eh
15 | el	l
16 | en	n
17 | epi	sil
18 | er	er
19 | ey	ey
20 | f	f
21 | g	g
22 | hh	hh
23 | ih	ih
24 | ix	ih
25 | iy	iy
26 | jh	jh
27 | k	k
28 | l	l
29 | m	m
30 | ng	ng
31 | n	n
32 | ow	ow
33 | oy	oy
34 | p	p
35 | r	r
36 | sh	sh
37 | sil	sil
38 | s	s
39 | th	th
40 | t	t
41 | uh	uh
42 | uw	uw
43 | vcl	sil
44 | v	v
45 | w	w
46 | y	y
47 | zh	sh
48 | z	z
49 | 


--------------------------------------------------------------------------------
/Data/48_idx_chr.map:
--------------------------------------------------------------------------------
 1 | aa	0       a
 2 | ae	1       b
 3 | ah	2       c
 4 | ao	3       d
 5 | aw	4       e
 6 | ax	5       f
 7 | ay	6       g
 8 | b	7       h
 9 | ch	8       i
10 | cl	9       j
11 | d	10      k
12 | dh	11      l
13 | dx	12      m
14 | eh	13      n
15 | el	14      o
16 | en	15      p
17 | epi	16      q
18 | er	17      r
19 | ey	18      s
20 | f	19      t
21 | g	20      u
22 | hh	21      v
23 | ih	22      w
24 | ix	23      x
25 | iy	24      y
26 | jh	25      z
27 | k	26      A
28 | l	27      B
29 | m	28      C
30 | n	29      D
31 | ng	30      E
32 | ow	31      F
33 | oy	32      G
34 | p	33      H
35 | r	34      I
36 | s	35      J
37 | sh	36      K
38 | sil	37      L
39 | t	38      M
40 | th	39      N
41 | uh	40      O
42 | uw	41      P
43 | v	42      Q
44 | vcl	43      R
45 | w	44      S
46 | y	45      T
47 | z	46      U
48 | zh	47      V
49 | 


--------------------------------------------------------------------------------
/Data/RNN_testprob.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AaronYALai/Machine_Learning_and_Having_It_Deep_and_Structured/a9cde55cc3a6142eeb00f0faa0413908ffd4a1f3/Data/RNN_testprob.npy


--------------------------------------------------------------------------------
/Data/RNN_trainprob.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AaronYALai/Machine_Learning_and_Having_It_Deep_and_Structured/a9cde55cc3a6142eeb00f0faa0413908ffd4a1f3/Data/RNN_trainprob.npy


--------------------------------------------------------------------------------
/Data/ytest_prob.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AaronYALai/Machine_Learning_and_Having_It_Deep_and_Structured/a9cde55cc3a6142eeb00f0faa0413908ffd4a1f3/Data/ytest_prob.npy


--------------------------------------------------------------------------------
/Data/ytrain_prob.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AaronYALai/Machine_Learning_and_Having_It_Deep_and_Structured/a9cde55cc3a6142eeb00f0faa0413908ffd4a1f3/Data/ytrain_prob.npy


--------------------------------------------------------------------------------
/HMM_topRNN/HMM_utils.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # @Author: aaronlai
 3 | # @Date:   2016-11-09 16:02:20
 4 | # @Last Modified by:   AaronLai
 5 | # @Last Modified time: 2016-11-09 19:06:57
 6 | 
 7 | import numpy as np
 8 | import pandas as pd
 9 | import gc
10 | 
11 | 
12 | def load_label(filename):
13 |     """load label data"""
14 |     label_data = pd.read_csv(filename, header=None, index_col=0)
15 |     label_map = {}
16 |     for ind, lab in enumerate(np.unique(label_data.values)):
17 |         label_map[lab] = ind
18 | 
19 |     label_data = label_data.applymap(lambda x: label_map[x])
20 |     gc.collect()
21 | 
22 |     return label_data, label_map
23 | 
24 | 
25 | def load_str_map(label_map, base_dir='../Data/'):
26 |     """find the mapping from int to phoneme"""
27 |     phoneme_map = {}
28 |     phone_str_map = {}
29 |     pmap = pd.read_csv(base_dir + '48_39.map', sep='\t', header=None)
30 |     str_map = pd.read_csv(base_dir + '48_idx_chr.map',
31 |                           header=None, delim_whitespace=True)
32 | 
33 |     for p1, p2 in pmap.values:
34 |         phoneme_map[p1] = p2
35 | 
36 |     for s1, s2, s3 in str_map.values:
37 |         phone_str_map[s1] = s3
38 | 
39 |     int_str_map = {}
40 |     for key, val in label_map.items():
41 |         int_str_map[val] = phone_str_map[phoneme_map[key]]
42 | 
43 |     return int_str_map
44 | 
45 | 
46 | def edit_dist(seq1, seq2):
47 |     """edit distance"""
48 |     seq1 = seq1.split()
49 |     seq2 = seq2.split()
50 | 
51 |     d = np.zeros((len(seq1) + 1) * (len(seq2) + 1), dtype=np.uint8)
52 |     d = d.reshape((len(seq1) + 1, len(seq2) + 1))
53 | 
54 |     for i in range(len(seq1) + 1):
55 |         for j in range(len(seq2) + 1):
56 |             if i == 0:
57 |                 d[0][j] = j
58 |             elif j == 0:
59 |                 d[i][0] = i
60 | 
61 |     for i in range(1, len(seq1) + 1):
62 |         for j in range(1, len(seq2) + 1):
63 |             if seq1[i - 1] == seq2[j - 1]:
64 |                 d[i][j] = d[i - 1][j - 1]
65 |             else:
66 |                 substitution = d[i - 1][j - 1] + 1
67 |                 insertion = d[i][j - 1] + 1
68 |                 deletion = d[i - 1][j] + 1
69 |                 d[i][j] = min(substitution, insertion, deletion)
70 | 
71 |     return d[len(seq1)][len(seq2)]
72 | 
73 | 
74 | def sanity_check(seq, sep=' '):
75 |     """Sanity Check function to correct unreasonable predictions"""
76 |     seq = seq.split()
77 | 
78 |     for i in range(1, len(seq) - 1):
79 |         # front == behind != me
80 |         if seq[i - 1] == seq[i + 1] and seq[i] != seq[i - 1]:
81 |             seq[i] = seq[i - 1]
82 |         # me, front, behind are different
83 |         elif seq[i] != seq[i + 1] and seq[i] != seq[i - 1]:
84 |             seq[i] = seq[i - 1]
85 | 
86 |     return sep.join(seq)
87 | 


--------------------------------------------------------------------------------
/HMM_topRNN/run_HMM.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | # @Author: aaronlai
  3 | # @Date:   2016-11-09 15:54:45
  4 | # @Last Modified by:   AaronLai
  5 | # @Last Modified time: 2016-11-09 22:35:03
  6 | 
  7 | import numpy as np
  8 | import pandas as pd
  9 | import os
 10 | import sys
 11 | 
 12 | sys.path.append(os.path.dirname(os.path.realpath(__file__)))    # noqa
 13 | 
 14 | from HMM_utils import load_label, load_str_map, sanity_check, edit_dist
 15 | from collections import defaultdict, Counter
 16 | from datetime import datetime
 17 | 
 18 | 
 19 | def make_transMat(labels, speakers, n_phoneme):
 20 |     """computing the transition matrix using label sequence at hand"""
 21 |     transition_prob = np.zeros((n_phoneme, n_phoneme))
 22 | 
 23 |     for speaker in speakers:
 24 |         previous = labels[speaker][0][0]
 25 |         for phoneme in labels[speaker][1:]:
 26 |             transition_prob[phoneme[0], previous] += 1
 27 |             previous = phoneme[0]
 28 | 
 29 |     transition_prob = np.log(transition_prob + 2)
 30 |     transition_prob /= transition_prob.sum(axis=0)
 31 | 
 32 |     return transition_prob
 33 | 
 34 | 
 35 | def HMM_predict(seq_probs, labels, speakers, n_phoneme, int_str_map,
 36 |                 test_probs=None, duration=3, blending=False, n_bag=1):
 37 |     """generate a bag of prediction sequences for each speaker"""
 38 |     if not blending:
 39 |         n_bag = 1
 40 | 
 41 |     predict_bags = []
 42 |     test_bags = []
 43 |     for num in range(n_bag):
 44 |         predictions = []
 45 |         test_predicts = []
 46 | 
 47 |         # calculate transition prob
 48 |         if blending:
 49 |             bagspeakers = np.random.choice(speakers, len(speakers))
 50 |             transition_prob = make_transMat(labels, bagspeakers, n_phoneme)
 51 |         else:
 52 |             transition_prob = make_transMat(labels, speakers, n_phoneme)
 53 | 
 54 |         for seq in seq_probs:
 55 |             prob_score = np.ones((n_phoneme,)) / n_phoneme
 56 |             predict_seq = defaultdict(list)
 57 | 
 58 |             for vec in seq:
 59 |                 prob_matrix = prob_score * (vec**duration) * transition_prob
 60 |                 prob_score = np.max(prob_matrix, axis=1)
 61 |                 pred_inds = np.argmax(prob_matrix, axis=1)
 62 |                 # normalize
 63 |                 prob_score /= prob_score.sum()
 64 | 
 65 |                 # compute the predicted phoneme with starting phoneme i
 66 |                 for i in range(n_phoneme):
 67 |                     predict_seq[i].append(int_str_map[pred_inds[i]])
 68 | 
 69 |             # choose the sequence with the highest score
 70 |             predictions.append(predict_seq[np.argmax(prob_score)])
 71 | 
 72 |         predict_bags.append(predictions)
 73 | 
 74 |         # test set
 75 |         if test_probs is None:
 76 |             continue
 77 | 
 78 |         for test_seq in test_probs:
 79 |             test_score = np.ones((n_phoneme,)) / n_phoneme
 80 |             testpred_seq = defaultdict(list)
 81 | 
 82 |             for test_vec in test_seq:
 83 |                 test_matrix = transition_prob * (test_vec**duration)
 84 |                 test_matrix *= test_score
 85 |                 test_score = np.max(test_matrix, axis=1)
 86 |                 test_inds = np.argmax(test_matrix, axis=1)
 87 |                 # normalize
 88 |                 test_score /= test_score.sum()
 89 | 
 90 |                 # compute the predicted phoneme with starting phoneme i
 91 |                 for i in range(n_phoneme):
 92 |                     testpred_seq[i].append(int_str_map[test_inds[i]])
 93 | 
 94 |             # choose the sequence with the highest score
 95 |             test_predicts.append(testpred_seq[np.argmax(test_score)])
 96 | 
 97 |         test_bags.append(test_predicts)
 98 | 
 99 |     return predict_bags, test_bags
100 | 
101 | 
102 | def voting(predict_bags):
103 |     """voting of a bag of sequences to make the final sequence"""
104 |     result = []
105 |     for i in range(len(predict_bags[0])):
106 |         bag_seqs = np.array([pred[i] for pred in predict_bags])
107 |         seq = [Counter(l).most_common()[0][0] for l in bag_seqs.T]
108 |         result.append(seq)
109 | 
110 |     return result
111 | 
112 | 
113 | def output_seq(pred_seq, sep=''):
114 |     pred_seq = sanity_check(' '.join(pred_seq))
115 | 
116 |     phoneme_seq = ''
117 |     now = ''
118 |     for p in pred_seq.split():
119 |         if p != now:
120 |             phoneme_seq += (p + sep)
121 |             now = p
122 | 
123 |     return phoneme_seq.strip()
124 | 
125 | 
126 | def make_label_seq(labels, speakers, int_str_map):
127 |     """transform the labels to str sequence"""
128 |     label_result = []
129 | 
130 |     for speaker in speakers:
131 |         seq = ' '.join([int_str_map[ind[0]] for ind in labels[speaker]])
132 |         label_result.append(output_seq(seq, sep=' '))
133 | 
134 |     return label_result
135 | 
136 | 
137 | def run_HMM(train_probfile, train_labfile, test_probfile=None, n_phoneme=48,
138 |             duration=3, blending=False, n_bag=10, valid_ratio=0.1,
139 |             base_dir='../Data/'):
140 |     print("Start")
141 |     st = datetime.now()
142 | 
143 |     # loading data
144 |     label_data, label_map = load_label(base_dir + train_labfile)
145 |     train_probs, train_speakers = np.load(base_dir + train_probfile)
146 |     int_str_map = load_str_map(label_map, base_dir)
147 | 
148 |     if test_probfile:
149 |         test_probs, test_speakers = np.load(base_dir + test_probfile)
150 |     else:
151 |         test_probs = None
152 | 
153 |     print('Done loading data, using %s.\n' % str(datetime.now() - st))
154 | 
155 |     print('Start using HMM for predictions...')
156 |     # computing label sequence for each speaker
157 |     labels = {}
158 |     for speaker in train_speakers:
159 |         speaker_indexes = label_data.index.str.startswith(speaker)
160 |         labels[speaker] = label_data.iloc[speaker_indexes].values
161 | 
162 |     # split into training and validation set
163 |     n_speaker = len(train_speakers)
164 |     rand_inds = np.random.permutation(n_speaker)
165 |     valid_inds = rand_inds[:int(n_speaker * valid_ratio)]
166 |     train_inds = rand_inds[int(n_speaker * valid_ratio):]
167 | 
168 |     # predict sequences using HMM with blending
169 |     bags = HMM_predict(train_probs, labels, train_speakers[train_inds],
170 |                        n_phoneme, int_str_map, test_probs, duration, blending,
171 |                        n_bag)
172 |     predict_bags, test_bags = bags
173 |     predict_result = voting(predict_bags)
174 | 
175 |     if len(test_bags) > 0:
176 |         test_predict = voting(test_bags)
177 | 
178 |     # transform to alphabet sequences and compute the edit distances
179 |     predict_result = [output_seq(pred_seq, sep=' ')
180 |                       for pred_seq in predict_result]
181 |     label_result = make_label_seq(labels, train_speakers, int_str_map)
182 |     print('Done predicting, using %s.' % str(datetime.now() - st))
183 | 
184 |     # evaluate training set
185 |     train_predict = np.array(predict_result)[train_inds]
186 |     train_lab = np.array(label_result)[train_inds]
187 |     train_scores = [edit_dist(train_lab[i], train_predict[i])
188 |                     for i in range(len(train_predict))]
189 | 
190 |     # evaluate validation set
191 |     valid_predict = np.array(predict_result)[valid_inds]
192 |     valid_lab = np.array(label_result)[valid_inds]
193 |     valid_scores = [edit_dist(valid_lab[i], valid_predict[i])
194 |                     for i in range(len(valid_predict))]
195 | 
196 |     print("\nEdit distance (train): %.4f" % np.mean(train_scores))
197 |     print("Edit distance (valid): %.4f\n" % np.mean(valid_scores))
198 | 
199 |     # output predict file
200 |     if test_probfile:
201 |         test_predict_seqs = [output_seq(test_seq, sep='')
202 |                              for test_seq in test_predict]
203 |         test_pred = {'id': test_speakers, 'phone_sequence': test_predict_seqs}
204 |         test_df = pd.DataFrame(data=test_pred)
205 |         test_df.to_csv('HMM_testpredict.csv', index=None)
206 | 
207 |     print("Done, Using %s." % str(datetime.now() - st))
208 | 
209 | 
210 | def main():
211 |     run_HMM('RNN_trainprob.npy', 'train.label', 'RNN_testprob.npy',
212 |             duration=3, blending=True, n_bag=100, valid_ratio=0.2,
213 |             base_dir='../Data/')
214 | 
215 | 
216 | if __name__ == '__main__':
217 |     main()
218 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | Machine Learning and having it deep and structured
 2 | ========
 3 | 
 4 | [![Build Status](https://travis-ci.org/AaronYALai/Machine_Learning_and_Having_It_Deep_and_Structured.svg?branch=master)](https://travis-ci.org/AaronYALai/Machine_Learning_and_Having_It_Deep_and_Structured)
 5 | [![Coverage Status](https://coveralls.io/repos/github/AaronYALai/Machine_Learning_and_Having_It_Deep_and_Structured/badge.svg?branch=master)](https://coveralls.io/github/AaronYALai/Machine_Learning_and_Having_It_Deep_and_Structured?branch=master)
 6 | 
 7 | About
 8 | --------
 9 | 
10 | Implementations and homeworks of the course [**Machine Learning and having it deep and structured**](http://nol.ntu.edu.tw/nol/coursesearch/print_table.php?course_id=942%20U0590&class=&dpt_code=9210&ser_no=51785&semester=104-1&lang=EN) of National Taiwan University (offered by [**Hung-yi Lee**](http://speech.ee.ntu.edu.tw/~tlkagk/index.html)):
11 | 
12 | - Constructed and trained variants of neural networks by [**Theano**](http://deeplearning.net/software/theano/)
13 | - Attemped to solve the sequence labeling problem in speech recognition (phoneme labeling)
14 | - Deep Neural Network (DNN) with dropout, maxout and momentum optimization
15 | - Bidirectional Recurrent Neural Network (RNN) with dropout and RMSProp optimization
16 | - Bidirectional Long-Short Term Memory (LSTM) with peephole and NAG optimization
17 | - Hidden Markov Model (HMM) on top of RNN to improve the performance
18 | 
19 | [**Course page**](http://speech.ee.ntu.edu.tw/~tlkagk/courses_MLSD15_2.html)
20 | 
21 | Syllabus
22 | --------
23 | 
24 | Neural Networks and Training:
25 | - What is Machine Learning, Deep Learning and Structured Learning?
26 | - Neural Network Basics | Backpropagation | Theano: DNN
27 | - Tips for Training Deep Neural Network
28 | - Neural Network with Memory | Theano: RNN
29 | - Training Recurrent Neural Network
30 | - Convolutional Neural Network (by Prof. Winston)
31 | 
32 | Structured Learning and Graphical Models:
33 | - Introduction of Structured Learning | Structured Linear Model | Structured SVM
34 | - Sequence Labeling Problem | Learning with Hidden Information
35 | - Graphical Model, Gibbs Sampling
36 | 
37 | Extensions, New Applications and Trends:
38 | - Markov Logic Network
39 | - Deep Learning for Human Language Processing, Language Modeling
40 | - Caffe | Deep Reinforcement Learning | Visual Question Answering
41 | - Unsupervised Learning
42 | - Attention-based Model
43 | 
44 | Content
45 | --------
46 | 
47 | Deep Neural Network (DNN)[[kaggle](https://inclass.kaggle.com/c/mlds-hw14)]:
48 | - Construct and train a deep neural network to classify pronunciation units (phonemes) in each time frame of a speech.
49 | - Inputs: MFCC features
50 | - Activation function: **Maxout** (generalization of ReLU, "learnable" activation function) 
51 | - Output layer: Softmax
52 | - Cost function: cross entropy
53 | - Optimization: Momentum
54 | - With **Dropout** technique
55 | 
56 | Bidirectional Recurrent Neural Network (RNN)[[kaggle](https://inclass.kaggle.com/c/104-1-mlds-hw2)]:
57 | - Construct and train a bidirectional deep recurrent neural network to classify pronunciation units (phonemes) in each time frame of a speech.
58 | - Inputs: prediction probabilities of each class from previous DNN
59 | - Activation function: ReLU
60 | - Output layer: Softmax
61 | - Cost function: Mean Squared Error
62 | - Optimization: Root Mean Square Propagation (RMSProp)
63 | - With **Dropout** technique
64 | 
65 | Bidirectional Long-Short Term Memory (LSTM)[[kaggle](https://inclass.kaggle.com/c/104-1-mlds-hw2)]:
66 | - Construct and train a bidirectional deep Long-Short Term Memory to classify pronunciation units (phonemes) in each time frame of a speech.
67 | - Inputs: prediction probabilities of each class from previous DNN
68 | - Optimization: Nesterov Accelerated Gradient (NAG)
69 | - With **Peephole**
70 | - Using grad_clip in theano to prevent **gradient exploding**
71 | 
72 | Structure Learning (output phone label sequence)[[kaggle](https://inclass.kaggle.com/c/104-1-mlds-hw3)]:
73 | - On top of results of RNN / LSTM, applies Hidden Markov Model (HMM) to model the phone transition probabilities and further improves the performance of RNN / LSTM on this sequence labeling problem.
74 | - Input: the whole utterance as one training data
75 | - Output: phone label sequence
76 | 
77 | The performance is measured by Levenshtein distance (a.k.a. Edit distance).
78 | 
79 | Usage
80 | --------
81 | Clone the repo and use the [virtualenv](http://www.virtualenv.org/):
82 | 
83 |     git clone https://github.com/AaronYALai/Machine_Learning_and_Having_It_Deep_and_Structured
84 | 
85 |     cd Machine_Learning_and_Having_It_Deep_and_Structured
86 | 
87 |     virtualenv venv
88 | 
89 |     source venv/bin/activate
90 | 
91 | Install all dependencies and run the model:
92 | 
93 |     pip install -r requirements.txt
94 | 
95 |     cd RNN_LSTM
96 | 
97 |     python run_RNN.py
98 | 


--------------------------------------------------------------------------------
/RNN_LSTM/LSTM_utils.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | # @Author: aaronlai
  3 | # @Date:   2016-11-07 02:10:33
  4 | # @Last Modified by:   AaronLai
  5 | # @Last Modified time: 2016-11-09 00:27:23
  6 | 
  7 | 
  8 | import numpy as np
  9 | import theano as th
 10 | 
 11 | 
 12 | def initialize_LSTM(n_input, n_output, archi=48, n_hid_layers=2,
 13 |                     scale=0.01, scale_b=0.001, clip_thres=0.3):
 14 |     """initialize the LSTM paramters, archi: hidden layer neurons"""
 15 |     W_in_out = []
 16 |     W_gate_forward = []
 17 |     W_gate_backward = []
 18 |     W_cell = []
 19 |     W_peephole = []
 20 | 
 21 |     b_in_out = []
 22 |     b_gate_forward = []
 23 |     b_gate_backward = []
 24 | 
 25 |     # initial cell and output h
 26 |     a_0 = th.shared(random_number([archi], 0))
 27 |     h_0 = th.shared(random_number([archi], 0))
 28 | 
 29 |     # hidden layers
 30 |     for i in range(n_hid_layers):
 31 |         # initilize peephole parameters
 32 |         U = th.shared(random_number([archi, archi], scale))
 33 |         Ui = th.shared(random_number([archi, archi], scale))
 34 |         Uf = th.shared(identity_mat(archi, scale))
 35 |         Uo = th.shared(random_number([archi, archi], scale))
 36 |         W_peephole.append([U, Ui, Uf, Uo])
 37 | 
 38 |         # initialize memory cell paramters
 39 |         Vi = th.shared(random_number([archi, archi], scale))
 40 |         Vf = th.shared(identity_mat(archi, scale))
 41 |         Vo = th.shared(random_number([archi, archi], scale))
 42 |         W_cell.append([Vi, Vf, Vo])
 43 | 
 44 |         # input layer
 45 |         if i == 0:
 46 |             Ws, bs = init_gate_params([n_input, archi], [archi],
 47 |                                       scale, scale_b)
 48 | 
 49 |             W_output = th.shared(random_number([2 * archi, n_output], scale))
 50 |             W_in_out.append(Ws + [W_output])
 51 |             b_in_out.append(bs)
 52 | 
 53 |         else:
 54 |             Ws_forw, bs_forw = init_gate_params([2 * archi, archi], [archi],
 55 |                                                 scale, scale_b)
 56 |             W_gate_forward.append(Ws_forw)
 57 |             b_gate_forward.append(bs_forw)
 58 | 
 59 |             Ws_back, bs_back = init_gate_params([2 * archi, archi], [archi],
 60 |                                                 scale, scale_b)
 61 |             W_gate_backward.append(Ws_back)
 62 |             b_gate_backward.append(bs_back)
 63 | 
 64 |     param_Ws = [W_in_out, W_gate_forward, W_gate_backward, W_peephole, W_cell]
 65 |     param_bs = [b_in_out, b_gate_forward, b_gate_backward]
 66 | 
 67 |     parameters = [w for Ws in param_Ws for W in Ws for w in W]
 68 |     parameters += [b for bs in param_bs for bb in bs for b in bb]
 69 | 
 70 |     # help to do advanced optimization (ex. NAG, RMSProp)
 71 |     auxis = [th.shared(zero_number(p.get_value().shape)) for p in parameters]
 72 | 
 73 |     # help to do mini-batch update (to store gradients)
 74 |     caches = [th.shared(zero_number(p.get_value().shape)) for p in parameters]
 75 | 
 76 |     # set the restricted numerical range for gradient values
 77 |     for i in range(len(param_Ws)):
 78 |         for j in range(len(param_Ws[i])):
 79 |             for k in range(len(param_Ws[i][j])):
 80 |                 param_Ws[i][j][k] = th.gradient.grad_clip(param_Ws[i][j][k],
 81 |                                                           -clip_thres,
 82 |                                                           clip_thres)
 83 | 
 84 |     for i in range(len(param_bs)):
 85 |         for j in range(len(param_bs[i])):
 86 |             for k in range(len(param_bs[k])):
 87 |                 param_bs[i][j][k] = th.gradient.grad_clip(param_bs[i][j][k],
 88 |                                                           -clip_thres,
 89 |                                                           clip_thres)
 90 | 
 91 |     return param_Ws, param_bs, auxis, caches, a_0, h_0, parameters
 92 | 
 93 | 
 94 | def init_gate_params(W_shape, b_shape, scale, scale_b):
 95 |     W = th.shared(random_number(W_shape, scale))
 96 |     Wi = th.shared(random_number(W_shape, scale))
 97 |     Wf = th.shared(random_number(W_shape, scale) + np.float32(scale / 2))
 98 |     Wo = th.shared(random_number(W_shape, scale))
 99 | 
100 |     b = th.shared(random_number(b_shape, scale_b))
101 |     bi = th.shared(random_number(b_shape, scale_b))
102 |     bf = th.shared(random_number(b_shape, scale_b))
103 |     bo = th.shared(random_number(b_shape, scale_b))
104 | 
105 |     return [W, Wi, Wf, Wo], [b, bi, bf, bo]
106 | 
107 | 
108 | def random_number(shape, scale=1):
109 |     return (scale * np.random.randn(*shape)).astype('float32')
110 | 
111 | 
112 | def zero_number(shape):
113 |     return np.zeros(shape).astype('float32')
114 | 
115 | 
116 | def identity_mat(N, scale):
117 |     return (scale * np.identity(N)).astype('float32')
118 | 


--------------------------------------------------------------------------------
/RNN_LSTM/RNN_utils.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | # @Author: aaronlai
  3 | # @Date:   2016-11-06 20:50:39
  4 | # @Last Modified by:   AaronLai
  5 | # @Last Modified time: 2016-11-06 22:26:54
  6 | 
  7 | 
  8 | import numpy as np
  9 | import theano as th
 10 | 
 11 | 
 12 | def initialize_RNN(n_input, n_output, archi=128, n_hid_layers=2,
 13 |                    scale=0.033, scale_b=0.001, clip_thres=3.0):
 14 |     """initialize the RNN paramters, archi: hidden layer neurons"""
 15 |     W_in_out = []
 16 |     W_out_forward = []
 17 |     W_out_backward = []
 18 |     W_memory = []
 19 | 
 20 |     b_in_out = []
 21 |     b_out_forward = []
 22 |     b_out_backward = []
 23 |     b_memory = []
 24 | 
 25 |     # initial memory
 26 |     a_0 = th.shared(random_number([archi], 0))
 27 | 
 28 |     # input layer
 29 |     W_in_out.append(th.shared(random_number([n_input, archi], scale)))
 30 |     b_in_out.append(th.shared(random_number([archi], scale_b)))
 31 | 
 32 |     # hidden layers
 33 |     for i in range(n_hid_layers):
 34 |         # initialize memory weights as identity matrix
 35 |         W_memory.append(th.shared(identity_mat(archi, scale)))
 36 |         b_memory.append(th.shared(random_number([archi], scale_b)))
 37 | 
 38 |         if i == (n_hid_layers - 1):
 39 |             continue
 40 | 
 41 |         W_out_forward.append(th.shared(random_number([2*archi, archi], scale)))
 42 |         rand_w = random_number([2*archi, archi], scale)
 43 |         W_out_backward.append(th.shared(rand_w))
 44 |         b_out_forward.append(th.shared(random_number([archi], scale_b)))
 45 |         b_out_backward.append(th.shared(random_number([archi], scale_b)))
 46 | 
 47 |     # output layer
 48 |     W_in_out.append(th.shared(random_number([2 * archi, n_output], scale)))
 49 |     b_in_out.append(th.shared(random_number([n_output], scale_b)))
 50 | 
 51 |     param_Ws = [W_in_out, W_out_forward, W_out_backward, W_memory]
 52 |     param_bs = [b_in_out, b_out_forward, b_out_backward, b_memory]
 53 | 
 54 |     # help to do advanced optimization (ex. NAG, RMSProp)
 55 |     aux_Ws = []
 56 |     aux_bs = []
 57 | 
 58 |     # help to do mini-batch update (to store gradients)
 59 |     cache_Ws = []
 60 |     cache_bs = []
 61 | 
 62 |     parameters = []
 63 |     for i in range(4):
 64 |         aux_W = []
 65 |         aux_b = []
 66 |         cache_W = []
 67 |         cache_b = []
 68 | 
 69 |         parameters += param_Ws[i]
 70 |         parameters += param_bs[i]
 71 | 
 72 |         for j in range(len(param_Ws[i])):
 73 |             W_shape = param_Ws[i][j].get_value().shape
 74 |             b_shape = param_bs[i][j].get_value().shape
 75 | 
 76 |             aux_W.append(th.shared(zero_number(W_shape)))
 77 |             aux_b.append(th.shared(zero_number(b_shape)))
 78 | 
 79 |             cache_W.append(th.shared(zero_number(W_shape)))
 80 |             cache_b.append(th.shared(zero_number(b_shape)))
 81 | 
 82 |             # set the restricted numerical range for gradient values
 83 |             param_Ws[i][j] = th.gradient.grad_clip(param_Ws[i][j],
 84 |                                                    -clip_thres, clip_thres)
 85 | 
 86 |             param_bs[i][j] = th.gradient.grad_clip(param_bs[i][j],
 87 |                                                    -clip_thres, clip_thres)
 88 | 
 89 |         aux_Ws.append(aux_W)
 90 |         aux_bs.append(aux_b)
 91 | 
 92 |         cache_Ws.append(cache_W)
 93 |         cache_bs.append(cache_b)
 94 | 
 95 |     # concatenate all auxilary and cache parameters
 96 |     auxis = []
 97 |     caches = []
 98 |     for i in range(4):
 99 |         auxis += aux_Ws[i]
100 |         auxis += aux_bs[i]
101 | 
102 |         caches += cache_Ws[i]
103 |         caches += cache_bs[i]
104 | 
105 |     return param_Ws, param_bs, auxis, caches, a_0, parameters
106 | 
107 | 
108 | def random_number(shape, scale=1):
109 |     return (scale * np.random.randn(*shape)).astype('float32')
110 | 
111 | 
112 | def zero_number(shape):
113 |     return np.zeros(shape).astype('float32')
114 | 
115 | 
116 | def identity_mat(N, scale):
117 |     return (scale * np.identity(N)).astype('float32')
118 | 


--------------------------------------------------------------------------------
/RNN_LSTM/activation.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # @Author: aaronlai
 3 | # @Date:   2016-11-06 21:06:57
 4 | # @Last Modified by:   AaronLai
 5 | # @Last Modified time: 2016-11-06 21:10:01
 6 | 
 7 | import theano.tensor as T
 8 | import theano as th
 9 | 
10 | 
11 | def tanh(Z):
12 |     exp_m2z = T.exp(-2 * Z)
13 |     return (1 - exp_m2z) / (1 + exp_m2z)
14 | 
15 | 
16 | def sigmoid(Z):
17 |     return 1 / (1 + T.exp(-Z))
18 | 
19 | 
20 | def ReLU(Z):
21 |     return T.switch(Z < 0, 0, Z)
22 | 
23 | 
24 | def softmax(z):
25 |     Z = T.exp(z)
26 |     results, _ = th.scan(lambda x: x / T.sum(x), sequences=Z)
27 |     return results
28 | 


--------------------------------------------------------------------------------
/RNN_LSTM/optimize.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | # @Author: aaronlai
  3 | # @Date:   2016-11-06 21:04:19
  4 | # @Last Modified by:   AaronLai
  5 | # @Last Modified time: 2016-11-06 21:24:52
  6 | 
  7 | import theano.tensor as T
  8 | from theano.ifelse import ifelse
  9 | 
 10 | 
 11 | def sgd(parameters, grads, lr, minibatch, batchsize, auxis, caches):
 12 |     updates = []
 13 |     update_batch = ifelse(T.lt(minibatch, batchsize - 1), 0, 1)
 14 | 
 15 |     for ix in range(len(grads)):
 16 |         # update parameters if reaching batchsize
 17 |         move = -(lr / batchsize) * (caches[ix] + grads[ix])
 18 |         updates.append((parameters[ix], parameters[ix] + move * update_batch))
 19 |         new_cache = (caches[ix] + grads[ix]) * (1 - update_batch)
 20 |         updates.append((caches[ix], new_cache))
 21 | 
 22 |     return updates
 23 | 
 24 | 
 25 | def momentum(parameters, grads, lr, minibatch, batchsize,
 26 |              momentum, caches, moment=0.95):
 27 |     """theano update, optimized by Momentum"""
 28 |     updates = []
 29 |     update_batch = ifelse(T.lt(minibatch, batchsize - 1), 0, 1)
 30 | 
 31 |     for ix in range(len(grads)):
 32 |         move = - (lr / batchsize) * (grads[ix] + caches[ix])
 33 |         direction = moment * momentum[ix] + move
 34 | 
 35 |         # update parameters if reaching batchsize
 36 |         new_param = parameters[ix] + direction * update_batch
 37 |         updates.append((parameters[ix], new_param))
 38 | 
 39 |         # remember the move if updating parameters
 40 |         new_mom = momentum[ix] * (1 - update_batch) + direction * update_batch
 41 |         updates.append((momentum[ix], new_mom))
 42 | 
 43 |         # accumulate gradients if not reaching batchsize
 44 |         new_cache = (caches[ix] + grads[ix]) * (1 - update_batch)
 45 |         updates.append((caches[ix], new_cache))
 46 | 
 47 |     return updates
 48 | 
 49 | 
 50 | def NAG(parameters, grads, lr, minibatch, batchsize,
 51 |         real_pos, caches, moment=0.95):
 52 |     """theano update, optimized by NAG"""
 53 |     updates = []
 54 |     update_batch = ifelse(T.lt(minibatch, batchsize - 1), 0, 1)
 55 | 
 56 |     for ix in range(len(grads)):
 57 |         move = -(lr / batchsize) * (caches[ix] + grads[ix])
 58 |         real = parameters[ix] + move
 59 |         spy = real + moment * (real - real_pos[ix])
 60 | 
 61 |         # update parameters to spy position if reaching batchsize
 62 |         new_param = spy * update_batch + parameters[ix] * (1 - update_batch)
 63 |         updates.append((parameters[ix], new_param))
 64 | 
 65 |         # remember the real position if moved parameters
 66 |         new_realpos = real * update_batch + real_pos[ix] * (1 - update_batch)
 67 |         updates.append((real_pos[ix], new_realpos))
 68 | 
 69 |         # accumulate gradients if not reaching batchsize
 70 |         new_cache = (caches[ix] + grads[ix]) * (1 - update_batch)
 71 |         updates.append((caches[ix], new_cache))
 72 | 
 73 |     return updates
 74 | 
 75 | 
 76 | def RMSProp(parameters, grads, lr, minibatch, batchsize,
 77 |             sigma_square, caches, alpha=0.9, const=1e-2):
 78 |     """theano update, optimized by RMSProp"""
 79 |     updates = []
 80 |     update_batch = ifelse(T.lt(minibatch, batchsize - 1), 0, 1)
 81 | 
 82 |     for ix in range(len(grads)):
 83 |         move = (grads[ix] + caches[ix]) / batchsize
 84 |         factor = sigma_square[ix] * alpha + (1 - alpha) * (move**2)
 85 |         step = -lr * move / (T.sqrt(factor) + const)
 86 | 
 87 |         # update parameters to spy position if reaching batchsize
 88 |         new_param = (parameters[ix] + step) * update_batch
 89 |         new_param += parameters[ix] * (1 - update_batch)
 90 |         updates.append((parameters[ix], new_param))
 91 | 
 92 |         # remember the scaling factors if reaching batchsize
 93 |         new_sig = factor * update_batch + sigma_square[ix] * (1 - update_batch)
 94 |         updates.append((sigma_square[ix], new_sig))
 95 | 
 96 |         # accumulate gradients if not reaching batchsize
 97 |         new_cache = (caches[ix] + grads[ix]) * (1 - update_batch)
 98 |         updates.append((caches[ix], new_cache))
 99 | 
100 |     return updates
101 | 


--------------------------------------------------------------------------------
/RNN_LSTM/run_LSTM.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | # @Author: aaronlai
  3 | # @Date:   2016-11-06 23:56:38
  4 | # @Last Modified by:   AaronLai
  5 | # @Last Modified time: 2016-11-09 15:11:55
  6 | 
  7 | import numpy as np
  8 | import theano as th
  9 | import theano.tensor as T
 10 | import os
 11 | import sys
 12 | 
 13 | sys.path.append(os.path.dirname(os.path.realpath(__file__)))    # noqa
 14 | 
 15 | from datetime import datetime
 16 | from shortcuts import load_data, load_label, make_data, make_y, load_str_map,\
 17 |                       validate, validate_editdist, test_predict
 18 | from activation import tanh, sigmoid, softmax
 19 | from optimize import sgd, momentum, NAG, RMSProp
 20 | from LSTM_utils import initialize_LSTM
 21 | 
 22 | 
 23 | def set_step(W_peephole, W_cell):
 24 |     U, Ui, Uf, Uo = W_peephole
 25 |     Vi, Vf, Vo = W_cell
 26 | 
 27 |     def step(z_t, zi_t, zf_t, zo_t, c_tm1, h_tm1):
 28 |         # new information
 29 |         Z_t = tanh(z_t + T.dot(h_tm1, U))
 30 | 
 31 |         # input gate
 32 |         Zi_t = sigmoid(zi_t + T.dot(h_tm1, Ui) + T.dot(c_tm1, Vi))
 33 | 
 34 |         # forget gate
 35 |         Zf_t = sigmoid(zf_t + T.dot(h_tm1, Uf) + T.dot(c_tm1, Vf))
 36 | 
 37 |         # new plus old/unforgetten memory
 38 |         c_t = Z_t * Zi_t + c_tm1 * Zf_t
 39 | 
 40 |         # output gate
 41 |         Zo_t = sigmoid(zo_t + T.dot(h_tm1, Uo) + T.dot(c_t, Vo))
 42 | 
 43 |         # output information
 44 |         h_t = tanh(c_t) * Zo_t
 45 | 
 46 |         return c_t, h_t
 47 | 
 48 |     return step
 49 | 
 50 | 
 51 | def construct_LSTM(n_input, n_output, n_hid_layers=2, archi=36, lr=1e-3,
 52 |                    update_by='NAG', batchsize=1, scale=0.01,
 53 |                    scale_b=0.001, clip_thres=1.0):
 54 |     """
 55 |     Initialize and construct the bidirectional Long Short-term Memory (LSTM)
 56 |     Update the LSTM using minibatch and RMSProp
 57 |     archi: number of neurons of each hidden layer
 58 |     """
 59 |     x_seq = T.fmatrix()
 60 |     y_hat = T.fmatrix()
 61 |     minibatch = T.scalar()
 62 | 
 63 |     # choose the optimization function
 64 |     optimiz_func = {
 65 |         'sgd': sgd,
 66 |         'momentum': momentum,
 67 |         'NAG': NAG,
 68 |         'RMSProp': RMSProp,
 69 |     }
 70 |     update_func = optimiz_func[update_by]
 71 | 
 72 |     # initialize the LSTM
 73 |     print('Start initializing LSTM...')
 74 |     init = initialize_LSTM(n_input, n_output, archi, n_hid_layers,
 75 |                            scale, scale_b, clip_thres)
 76 |     param_Ws, param_bs, auxis, caches, a_0, h_0, parameters = init
 77 | 
 78 |     # ############ bidirectional Long Short-term Memory ###############
 79 | 
 80 |     # #### Hidden layers ######
 81 |     for l in range(n_hid_layers):
 82 |         # computing gates
 83 |         if l == 0:
 84 |             a_seq = x_seq
 85 |             W, Wi, Wf, Wo = param_Ws[0][l][:-1]
 86 |             b, bi, bf, bo = param_bs[0][l]
 87 |             z_seq = T.dot(a_seq, W) + b.dimshuffle('x', 0)
 88 |             zi_seq = T.dot(a_seq, Wi) + bi.dimshuffle('x', 0)
 89 |             zf_seq = T.dot(a_seq, Wf) + bf.dimshuffle('x', 0)
 90 |             zo_seq = T.dot(a_seq, Wo) + bo.dimshuffle('x', 0)
 91 | 
 92 |             zf_seq, zif_seq, zff_seq, zof_seq = z_seq, zi_seq, zf_seq, zo_seq
 93 |             zb_seq, zib_seq, zfb_seq, zob_seq = z_seq, zi_seq, zf_seq, zo_seq
 94 |         else:
 95 |             # forward gates
 96 |             W_f, Wi_f, Wf_f, Wo_f = param_Ws[1][l - 1]
 97 |             b_f, bi_f, bf_f, bo_f = param_bs[1][l - 1]
 98 |             zf_seq = T.dot(a_seq, W_f) + b_f.dimshuffle('x', 0)
 99 |             zif_seq = T.dot(a_seq, Wi_f) + bi_f.dimshuffle('x', 0)
100 |             zff_seq = T.dot(a_seq, Wf_f) + bf_f.dimshuffle('x', 0)
101 |             zof_seq = T.dot(a_seq, Wo_f) + bo_f.dimshuffle('x', 0)
102 | 
103 |             # backward gates
104 |             W_b, Wi_b, Wf_b, Wo_b = param_Ws[2][l - 1]
105 |             b_b, bi_b, bf_b, bo_b = param_bs[2][l - 1]
106 |             zb_seq = T.dot(a_seq, W_b) + b_b.dimshuffle('x', 0)
107 |             zib_seq = T.dot(a_seq, Wi_b) + bi_b.dimshuffle('x', 0)
108 |             zfb_seq = T.dot(a_seq, Wf_b) + bf_b.dimshuffle('x', 0)
109 |             zob_seq = T.dot(a_seq, Wo_b) + bo_b.dimshuffle('x', 0)
110 | 
111 |         # computing cells
112 |         step = set_step(param_Ws[3][l], param_Ws[4][l])
113 | 
114 |         # Forward direction
115 |         seqs = [zf_seq, zif_seq, zff_seq, zof_seq]
116 |         [cf_seq, hf_seq], _ = th.scan(step, sequences=seqs,
117 |                                       outputs_info=[a_0, h_0],
118 |                                       truncate_gradient=-1)
119 | 
120 |         # Backward direction
121 |         seqs = [zb_seq[::-1], zib_seq[::-1], zfb_seq[::-1], zob_seq[::-1]]
122 |         [cb_seq, hb_seq], _ = th.scan(step, sequences=seqs,
123 |                                       outputs_info=[a_0, h_0],
124 |                                       truncate_gradient=-1)
125 | 
126 |         a_seq = T.concatenate([hf_seq, hb_seq[::-1]], axis=1)
127 | 
128 |     # #### End of Hidden layers ######
129 |     y_seq = softmax(T.dot(a_seq, param_Ws[0][0][-1]))
130 |     forward = th.function(inputs=[x_seq], outputs=y_seq)
131 | 
132 |     cost = T.sum((y_seq - y_hat)**2) + minibatch * 0
133 |     valid = th.function(inputs=[x_seq, y_hat, minibatch], outputs=cost)
134 |     grads = T.grad(cost, parameters, disconnected_inputs='ignore')
135 |     forward_grad = th.function([x_seq, y_hat, minibatch], grads)
136 | 
137 |     # ############ end of construction ###############
138 | 
139 |     updates = update_func(parameters, grads, lr, minibatch,
140 |                           batchsize, auxis, caches)
141 |     lstm_train = th.function(inputs=[x_seq, y_hat, minibatch],
142 |                              outputs=cost, updates=updates)
143 | 
144 |     return forward, valid, lstm_train, forward_grad
145 | 
146 | 
147 | def train_LSTM(trainX, train_label, forward, valid, lstm_train, forward_grad,
148 |                n_output, int_str_map, batchsize, epoch=10, valid_ratio=0.2,
149 |                print_every=20):
150 |     """train the deep LSTM neural network"""
151 |     speakers = sorted(trainX.keys())
152 | 
153 |     # making training y sequence
154 |     trainY = {}
155 |     for speaker in speakers:
156 |         y = [make_y(lab, n_output) for lab in train_label[speaker].ravel()]
157 |         trainY[speaker] = np.array(y).astype('float32')
158 | 
159 |     # split the validation set
160 |     valid_n = round(len(speakers) * valid_ratio)
161 |     rand_speakers = np.random.permutation(speakers)
162 |     valid_speakers = rand_speakers[:valid_n]
163 |     train_speakers = rand_speakers[valid_n:]
164 | 
165 |     valid_dists = []
166 |     train_cost = []
167 |     valid_cost = []
168 | 
169 |     # training process
170 |     for j in range(epoch):
171 |         costs = 0
172 |         n_instance = 0
173 |         minibat_ind = 0
174 | 
175 |         # random shuffle the order
176 |         indexes = np.random.permutation(len(train_speakers))
177 |         for ind, num in enumerate(indexes):
178 |             X_seq = trainX[speakers[num]]
179 |             costs += lstm_train(X_seq, trainY[speakers[num]], minibat_ind)
180 |             n_instance += X_seq.shape[0]
181 |             train_cost.append(costs / n_instance)
182 | 
183 |             # validation set
184 |             if ind % print_every == (print_every - 1):
185 |                 v_cost = validate(trainX, trainY, valid_speakers, valid, None)
186 |                 valid_cost.append(v_cost)
187 | 
188 |                 print('\tNow: %d; costs (train): %.4f ; costs (valid): %.4f' %
189 |                       (j + 1, train_cost[-1], valid_cost[-1]))
190 | 
191 |                 val_dist = validate_editdist(trainX, trainY, valid_speakers,
192 |                                              forward, None, int_str_map)
193 |                 valid_dists.append(val_dist)
194 |                 print("\tEdit distance (valid): %.4f\n" % val_dist)
195 | 
196 |             # minibatch indicator plus 1
197 |             minibat_ind = (minibat_ind + 1) % batchsize
198 | 
199 |     return train_cost, valid_cost, valid_dists
200 | 
201 | 
202 | def run_LSTM_model(train_file, train_labfile, train_probfile, test_file=None,
203 |                    test_probfile=None, neurons=36, n_hiddenlayer=2, lr=1e-3,
204 |                    update_by='NAG', batchsize=1, epoch=10, valid_ratio=0.1,
205 |                    n_input=48, n_output=48, save_prob=False,
206 |                    base_dir='../Data/'):
207 |     """Run the bidirectional deep Long Short-Term Memory network"""
208 | 
209 |     print("Start")
210 |     st = datetime.now()
211 | 
212 |     data = load_data(base_dir + train_file)
213 |     label_data, label_map = load_label(base_dir + train_labfile)
214 |     int_str_map = load_str_map(label_map, base_dir)
215 |     trainX, train_label = make_data(data, base_dir+train_probfile, label_data)
216 |     print('Done loading data, using %s.' % str(datetime.now() - st))
217 | 
218 |     lstm = construct_LSTM(n_input, n_output, n_hiddenlayer, neurons, lr,
219 |                           update_by, batchsize)
220 |     forward, valid, lstm_train, forward_grad = lstm
221 |     print('Done constructing the recurrent nueral network.')
222 |     print('Using %s.\n' % str(datetime.now() - st))
223 | 
224 |     print('Start training LSTM...')
225 |     train_LSTM(trainX, train_label, forward, valid, lstm_train, forward_grad,
226 |                n_output, int_str_map, batchsize, epoch, valid_ratio)
227 |     print('Done training, using %s.' % str(datetime.now() - st))
228 | 
229 |     if test_file and test_probfile:
230 |         print('\nPredicting on test set...')
231 |         test_predict(test_file, test_probfile, int_str_map, forward,
232 |                      None, base_dir=base_dir, save_prob=save_prob,
233 |                      prob_filename='LSTM_testprob')
234 | 
235 |     if save_prob:
236 |         speakers = sorted(trainX.keys())
237 |         probs = [forward(trainX[speaker]) for speaker in speakers]
238 |         np.save('LSTM_trainprob', [probs, speakers])
239 | 
240 |     print("Done, Using %s." % str(datetime.now() - st))
241 | 
242 | 
243 | def main():
244 |     run_LSTM_model('train.data', 'train.label', 'ytrain_prob.npy', 'test.data',
245 |                    'ytest_prob.npy', neurons=36, n_hiddenlayer=2, lr=1e-4,
246 |                    update_by='NAG', batchsize=1, epoch=40, save_prob=True)
247 | 
248 | 
249 | if __name__ == '__main__':
250 |     main()
251 | 


--------------------------------------------------------------------------------
/RNN_LSTM/run_RNN.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | # @Author: aaronlai
  3 | # @Date:   2016-11-03 11:40:23
  4 | # @Last Modified by:   AaronLai
  5 | # @Last Modified time: 2016-11-09 15:28:40
  6 | 
  7 | import numpy as np
  8 | import theano as th
  9 | import theano.tensor as T
 10 | import os
 11 | import sys
 12 | 
 13 | sys.path.append(os.path.dirname(os.path.realpath(__file__)))    # noqa
 14 | 
 15 | from datetime import datetime
 16 | from shortcuts import load_data, load_label, make_data, make_y, load_str_map,\
 17 |                       validate, validate_editdist, test_predict
 18 | from activation import tanh, sigmoid, ReLU, softmax
 19 | from optimize import sgd, momentum, NAG, RMSProp
 20 | from RNN_utils import initialize_RNN
 21 | 
 22 | 
 23 | from theano.ifelse import ifelse
 24 | from theano.tensor.shared_randomstreams import RandomStreams
 25 | 
 26 | 
 27 | def set_step(W_memory, b_memory, lay_j, acti_func='ReLU'):
 28 |     functions = {
 29 |         'ReLU': ReLU,
 30 |         'sigmoid': sigmoid,
 31 |         'tanh': tanh,
 32 |     }
 33 |     activ = functions[acti_func]
 34 | 
 35 |     def step(zf_t, zb_t, af_tm1, ab_tm1):
 36 |         af_t = activ(zf_t + T.dot(af_tm1, W_memory[lay_j]) + b_memory[lay_j])
 37 |         ab_t = activ(zb_t + T.dot(ab_tm1, W_memory[lay_j]) + b_memory[lay_j])
 38 |         return af_t, ab_t
 39 | 
 40 |     return step
 41 | 
 42 | 
 43 | def construct_RNN(n_input, n_output, n_hid_layers=2, archi=128, lr=1e-3,
 44 |                   acti_func='ReLU', update_by='RMSProp', dropout_rate=0.2,
 45 |                   batchsize=1, scale=0.033, scale_b=0.001, clip_thres=10.0,
 46 |                   seed=42):
 47 |     """
 48 |     Initialize and construct the bidirectional deep RNN with dropout
 49 |     Update the RNN using minibatch and RMSProp
 50 |     archi: number of neurons of each hidden layer
 51 |     """
 52 |     x_seq = T.fmatrix()
 53 |     y_hat = T.fmatrix()
 54 |     minibatch = T.scalar()
 55 |     stop_dropout = T.scalar()
 56 | 
 57 |     # choose the optimization function
 58 |     optimiz_func = {
 59 |         'sgd': sgd,
 60 |         'momentum': momentum,
 61 |         'NAG': NAG,
 62 |         'RMSProp': RMSProp,
 63 |     }
 64 |     update_func = optimiz_func[update_by]
 65 | 
 66 |     # initialize the RNN
 67 |     print('Start initializing RNN...')
 68 |     init = initialize_RNN(n_input, n_output, archi, n_hid_layers,
 69 |                           scale, scale_b, clip_thres)
 70 |     param_Ws, param_bs, auxis, caches, a_0, parameters = init
 71 | 
 72 |     # ############ bidirectional recurrent neural network ###############
 73 |     srng = RandomStreams(seed=seed)
 74 | 
 75 |     # #### Hidden layers ######
 76 |     for l in range(n_hid_layers):
 77 |         if l == 0:
 78 |             a_seq = x_seq
 79 |             z_seq = T.dot(a_seq, param_Ws[0][l])
 80 |             z_seq += param_bs[0][l].dimshuffle('x', 0)
 81 |             zf_seq = z_seq
 82 |             zb_seq = z_seq
 83 |         else:
 84 |             zf_seq = T.dot(a_seq, param_Ws[1][l - 1])
 85 |             zf_seq += param_bs[1][l - 1].dimshuffle('x', 0)
 86 |             zb_seq = T.dot(a_seq, param_Ws[2][l - 1])
 87 |             zb_seq += param_bs[2][l - 1].dimshuffle('x', 0)
 88 | 
 89 |         step = set_step(param_Ws[3], param_bs[3], l, acti_func)
 90 |         [af_seq, ab_seq], _ = th.scan(step, sequences=[zf_seq, zb_seq[::-1]],
 91 |                                       outputs_info=[a_0, a_0],
 92 |                                       truncate_gradient=-1)
 93 | 
 94 |         a_out = T.concatenate([af_seq, ab_seq[::-1]], axis=1)
 95 |         dropping = srng.binomial(size=T.shape(a_out),
 96 |                                  p=(1 - dropout_rate))
 97 |         a_seq = ifelse(T.lt(stop_dropout, 1.05),
 98 |                        (a_out * dropping).astype('float32'), a_out)
 99 |         a_seq /= stop_dropout
100 | 
101 |     # #### End of Hidden layers ######
102 | 
103 |     y_pre = T.dot(a_seq, param_Ws[0][1]) + param_bs[0][1].dimshuffle('x', 0)
104 |     y_seq = softmax(y_pre)
105 |     forward = th.function(inputs=[x_seq, stop_dropout], outputs=y_seq)
106 | 
107 |     cost = T.sum((y_seq - y_hat)**2) + minibatch * 0
108 |     valid = th.function(inputs=[x_seq, y_hat, minibatch, stop_dropout],
109 |                         outputs=cost)
110 |     grads = T.grad(cost, parameters, disconnected_inputs='ignore')
111 | 
112 |     # ############ end of construction ###############
113 | 
114 |     updates = update_func(parameters, grads, lr, minibatch,
115 |                           batchsize, auxis, caches)
116 |     rnn_train = th.function(inputs=[x_seq, y_hat, minibatch, stop_dropout],
117 |                             outputs=cost, updates=updates)
118 | 
119 |     return forward, valid, rnn_train
120 | 
121 | 
122 | def train_RNN(trainX, train_label, forward, valid, rnn_train, n_output,
123 |               int_str_map, dropout_rate, batchsize, epoch=10, valid_ratio=0.2,
124 |               print_every=20):
125 |     """train the deep recurrent neural network"""
126 |     speakers = sorted(trainX.keys())
127 | 
128 |     # making training y sequence
129 |     trainY = {}
130 |     for speaker in speakers:
131 |         y = [make_y(lab, n_output) for lab in train_label[speaker].ravel()]
132 |         trainY[speaker] = np.array(y).astype('float32')
133 | 
134 |     # split the validation set
135 |     valid_n = round(len(speakers) * valid_ratio)
136 |     rand_speakers = np.random.permutation(speakers)
137 |     valid_speakers = rand_speakers[:valid_n]
138 |     train_speakers = rand_speakers[valid_n:]
139 | 
140 |     valid_dists = []
141 |     train_cost = []
142 |     valid_cost = []
143 | 
144 |     # training process
145 |     for j in range(epoch):
146 |         costs = 0
147 |         n_instance = 0
148 |         minibat_ind = 0
149 | 
150 |         # random shuffle the order
151 |         indexes = np.random.permutation(len(train_speakers))
152 |         for ind, num in enumerate(indexes):
153 |             X_seq = trainX[speakers[num]]
154 |             costs += rnn_train(X_seq, trainY[speakers[num]], minibat_ind, 1)
155 |             n_instance += X_seq.shape[0]
156 |             train_cost.append(costs / n_instance)
157 | 
158 |             # validation set
159 |             if ind % print_every == (print_every - 1):
160 |                 v_cost = validate(trainX, trainY, valid_speakers,
161 |                                   valid, dropout_rate)
162 |                 valid_cost.append(v_cost)
163 | 
164 |                 print('\tNow: %d; costs (train): %.4f ; costs (valid): %.4f' %
165 |                       (j + 1, train_cost[-1], valid_cost[-1]))
166 | 
167 |                 val_dist = validate_editdist(trainX, trainY, valid_speakers,
168 |                                              forward, dropout_rate,
169 |                                              int_str_map)
170 |                 valid_dists.append(val_dist)
171 |                 print("\tEdit distance (valid): %.4f\n" % val_dist)
172 | 
173 |             # minibatch indicator plus 1
174 |             minibat_ind = (minibat_ind + 1) % batchsize
175 | 
176 |     return train_cost, valid_cost, valid_dists
177 | 
178 | 
179 | def run_RNN_model(train_file, train_labfile, train_probfile, test_file=None,
180 |                   test_probfile=None, neurons=36, n_hiddenlayer=2, lr=1e-3,
181 |                   acti_func='ReLU', update_by='RMSProp', dropout_rate=0.2,
182 |                   batchsize=1, epoch=10, valid_ratio=0.1, n_input=48,
183 |                   n_output=48, base_dir='../Data/', save_prob=False):
184 |     """Run the bidirectional deep recurrent neural network with droput"""
185 | 
186 |     print("Start")
187 |     st = datetime.now()
188 | 
189 |     data = load_data(base_dir + train_file)
190 |     label_data, label_map = load_label(base_dir + train_labfile)
191 |     int_str_map = load_str_map(label_map, base_dir)
192 |     trainX, train_label = make_data(data, base_dir+train_probfile, label_data)
193 |     print('Done loading data, using %s.' % str(datetime.now() - st))
194 | 
195 |     rnn = construct_RNN(n_input, n_output, n_hiddenlayer, neurons, lr,
196 |                         acti_func, update_by, dropout_rate, batchsize)
197 |     forward, valid, rnn_train = rnn
198 |     print('Done constructing the recurrent nueral network.\n')
199 | 
200 |     print('Start training RNN...')
201 |     train_RNN(trainX, train_label, forward, valid, rnn_train, n_output,
202 |               int_str_map, dropout_rate, batchsize, epoch, valid_ratio)
203 |     print('Done training, using %s.' % str(datetime.now() - st))
204 | 
205 |     if test_file and test_probfile:
206 |         print('\nPredicting on test set...')
207 |         test_predict(test_file, test_probfile, int_str_map, forward,
208 |                      dropout_rate, base_dir=base_dir, save_prob=save_prob,
209 |                      prob_filename='RNN_testprob')
210 | 
211 |     if save_prob:
212 |         speakers = sorted(trainX.keys())
213 |         stop = 1 / (1 - dropout_rate)
214 |         probs = [forward(trainX[speaker], stop) for speaker in speakers]
215 |         np.save('RNN_trainprob', [probs, speakers])
216 | 
217 |     print("Done, Using %s." % str(datetime.now() - st))
218 | 
219 | 
220 | def main():
221 |     run_RNN_model('train.data', 'train.label', 'ytrain_prob.npy', 'test.data',
222 |                   'ytest_prob.npy', neurons=128, n_hiddenlayer=2, lr=1e-3,
223 |                   acti_func='ReLU', update_by='RMSProp', dropout_rate=0.2,
224 |                   batchsize=1, epoch=100, save_prob=True)
225 | 
226 | 
227 | if __name__ == '__main__':
228 |     main()
229 | 


--------------------------------------------------------------------------------
/RNN_LSTM/shortcuts.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | # @Author: aaronlai
  3 | # @Date:   2016-10-12 16:25:45
  4 | # @Last Modified by:   AaronLai
  5 | # @Last Modified time: 2016-11-09 19:07:16
  6 | 
  7 | import numpy as np
  8 | import pandas as pd
  9 | import gc
 10 | 
 11 | 
 12 | def load_data(filename, nrows=None, normalize=True):
 13 |     """load data from file, first column as index, dtype=float32"""
 14 |     ind = pd.read_csv(filename, sep=' ', header=None, index_col=0, nrows=5)
 15 |     dtype_dict = {c: np.float32 for c in ind.columns}
 16 |     data = pd.read_csv(filename, sep=' ', header=None, index_col=0,
 17 |                        dtype=dtype_dict, nrows=nrows)
 18 |     # normalize
 19 |     if normalize:
 20 |         data = (data - data.mean()) / data.std()
 21 |         gc.collect()
 22 | 
 23 |     return data
 24 | 
 25 | 
 26 | def load_label(filename):
 27 |     """load label data"""
 28 |     label_data = pd.read_csv(filename, header=None, index_col=0)
 29 |     label_map = {}
 30 |     for ind, lab in enumerate(np.unique(label_data.values)):
 31 |         label_map[lab] = ind
 32 | 
 33 |     label_data = label_data.applymap(lambda x: label_map[x])
 34 |     gc.collect()
 35 | 
 36 |     return label_data, label_map
 37 | 
 38 | 
 39 | def make_data(data, prob_file, label_data=None):
 40 |     """transform data into one sequence for each speaker"""
 41 |     prob_data = np.load(prob_file)
 42 |     df = pd.DataFrame(data=prob_data, index=data.index)
 43 |     speakers = list(set(['_'.join(name.split('_')[:2]) for name in df.index]))
 44 | 
 45 |     X = {}
 46 |     labels = {}
 47 |     for speaker in speakers:
 48 |         speaker_indexes = df.index.str.startswith(speaker)
 49 |         X[speaker] = (df.iloc[speaker_indexes].values).astype('float32')
 50 |         if label_data is not None:
 51 |             labels[speaker] = label_data.iloc[speaker_indexes].values
 52 | 
 53 |     return X, labels
 54 | 
 55 | 
 56 | def make_y(lab, n_output):
 57 |     """make y vector"""
 58 |     y = np.zeros(n_output)
 59 |     y[lab] = 1
 60 |     return y
 61 | 
 62 | 
 63 | def validate(trainX, trainY, valid_speakers, valid, dropout_rate):
 64 |     """Calculate the cost value on validation set"""
 65 |     objective = 0
 66 |     n_instance = 0
 67 | 
 68 |     if dropout_rate is None:
 69 |         stop = None
 70 |     else:
 71 |         stop = 1.0 / (1 - dropout_rate)
 72 | 
 73 |     for speaker in valid_speakers:
 74 |         if stop is None:
 75 |             objective += valid(trainX[speaker], trainY[speaker], 0)
 76 |         else:
 77 |             objective += valid(trainX[speaker], trainY[speaker], 0, stop)
 78 |         n_instance += trainX[speaker].shape[0]
 79 | 
 80 |     return objective / n_instance
 81 | 
 82 | 
 83 | def load_str_map(label_map, base_dir='../Data/'):
 84 |     """find the mapping from int to phoneme"""
 85 |     phoneme_map = {}
 86 |     phone_str_map = {}
 87 |     pmap = pd.read_csv(base_dir + '48_39.map', sep='\t', header=None)
 88 |     str_map = pd.read_csv(base_dir + '48_idx_chr.map',
 89 |                           header=None, delim_whitespace=True)
 90 | 
 91 |     for p1, p2 in pmap.values:
 92 |         phoneme_map[p1] = p2
 93 | 
 94 |     for s1, s2, s3 in str_map.values:
 95 |         phone_str_map[s1] = s3
 96 | 
 97 |     int_str_map = {}
 98 |     for key, val in label_map.items():
 99 |         int_str_map[val] = phone_str_map[phoneme_map[key]]
100 | 
101 |     return int_str_map
102 | 
103 | 
104 | def sanity_check(seq, sep=' '):
105 |     """Sanity Check function to correct unreasonable predictions"""
106 |     seq = seq.split()
107 | 
108 |     for i in range(1, len(seq) - 1):
109 |         # front == behind != me
110 |         if seq[i - 1] == seq[i + 1] and seq[i] != seq[i - 1]:
111 |             seq[i] = seq[i - 1]
112 |         # me, front, behind are different
113 |         elif seq[i] != seq[i + 1] and seq[i] != seq[i - 1]:
114 |             seq[i] = seq[i - 1]
115 | 
116 |     return sep.join(seq)
117 | 
118 | 
119 | def edit_dist(seq1, seq2):
120 |     """edit distance"""
121 |     seq1 = seq1.split()
122 |     seq2 = seq2.split()
123 | 
124 |     d = np.zeros((len(seq1) + 1) * (len(seq2) + 1), dtype=np.uint8)
125 |     d = d.reshape((len(seq1) + 1, len(seq2) + 1))
126 | 
127 |     for i in range(len(seq1) + 1):
128 |         for j in range(len(seq2) + 1):
129 |             if i == 0:
130 |                 d[0][j] = j
131 |             elif j == 0:
132 |                 d[i][0] = i
133 | 
134 |     for i in range(1, len(seq1) + 1):
135 |         for j in range(1, len(seq2) + 1):
136 |             if seq1[i - 1] == seq2[j - 1]:
137 |                 d[i][j] = d[i - 1][j - 1]
138 |             else:
139 |                 substitution = d[i - 1][j - 1] + 1
140 |                 insertion = d[i][j - 1] + 1
141 |                 deletion = d[i - 1][j] + 1
142 |                 d[i][j] = min(substitution, insertion, deletion)
143 | 
144 |     return d[len(seq1)][len(seq2)]
145 | 
146 | 
147 | def validate_editdist(trainX, trainY, valid_speakers, forward,
148 |                       dropout_rate, int_str_map):
149 |     """Calculate the average edit distance on validation set"""
150 |     if dropout_rate is None:
151 |         stop = None
152 |     else:
153 |         stop = 1.0 / (1 - dropout_rate)
154 | 
155 |     valid_seq = []
156 |     valid_y_seq = []
157 |     for speaker in valid_speakers:
158 | 
159 |         if stop is None:
160 |             ypred = forward(trainX[speaker])
161 |         else:
162 |             ypred = forward(trainX[speaker], stop)
163 | 
164 |         pred_seq = ' '.join([int_str_map[np.argmax(pred)] for pred in ypred])
165 |         pred_seq = sanity_check(pred_seq)
166 | 
167 |         phoneme_seq = ''
168 |         now = ''
169 |         for p in pred_seq.split():
170 |             if p != now:
171 |                 phoneme_seq += (p + ' ')
172 |                 now = p
173 | 
174 |         yhat_seq = [int_str_map[np.argmax(l)] for l in trainY[speaker]]
175 |         yhat = []
176 |         y_now = ''
177 | 
178 |         for y in yhat_seq:
179 |             if y != y_now:
180 |                 yhat.append(y)
181 |                 y_now = y
182 | 
183 |         yhat = ' '.join(yhat)
184 | 
185 |         valid_seq.append(phoneme_seq.strip())
186 |         valid_y_seq.append(yhat)
187 | 
188 |     leng = len(valid_seq)
189 |     dists = [edit_dist(valid_seq[i], valid_y_seq[i]) for i in range(leng)]
190 |     valid_dist = np.mean(dists)
191 | 
192 |     return valid_dist
193 | 
194 | 
195 | def test_predict(testfile, testprob_file, int_str_map, forward, dropout_rate,
196 |                  filename='test.csv', base_dir='../Data/', save_prob=False,
197 |                  prob_filename='test_probs'):
198 |     """predict on test set and output the file"""
199 |     test_data = load_data(base_dir + testfile)
200 |     testX, _ = make_data(test_data, base_dir + testprob_file)
201 |     test_speakers = list(testX.keys())
202 | 
203 |     if dropout_rate is None:
204 |         stop = None
205 |     else:
206 |         stop = 1.0 / (1 - dropout_rate)
207 | 
208 |     test_speakers = []
209 |     now_speak = ''
210 |     for s in test_data.index:
211 |         speaker = '_'.join(s.split('_')[:2])
212 |         if speaker != now_speak:
213 |             test_speakers.append(speaker)
214 |             now_speak = speaker
215 | 
216 |     test_seq = []
217 |     for speaker in test_speakers:
218 | 
219 |         if stop is None:
220 |             pred_seq = forward(testX[speaker])
221 |         else:
222 |             pred_seq = forward(testX[speaker], stop)
223 | 
224 |         pred_seq = [int_str_map[np.argmax(pred)] for pred in pred_seq]
225 |         pred_seq = ' '.join(pred_seq)
226 |         pred_seq = sanity_check(pred_seq)
227 | 
228 |         seq = ''
229 |         now = ''
230 |         for p in pred_seq.split():
231 |             if p != now:
232 |                 seq += p
233 |                 now = p
234 | 
235 |         test_seq.append(seq)
236 | 
237 |     if save_prob:
238 |         probs = []
239 |         for speaker in test_speakers:
240 |             if stop is None:
241 |                 pred_seq = forward(testX[speaker])
242 |             else:
243 |                 pred_seq = forward(testX[speaker], stop)
244 | 
245 |             probs.append(pred_seq)
246 |         np.save(prob_filename, [probs, test_speakers])
247 | 
248 |     test_pred = {'id': test_speakers, 'phone_sequence': test_seq}
249 |     test_df = pd.DataFrame(data=test_pred)
250 |     test_df.to_csv(filename, index=None)
251 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | # Code coverage measurement for Python
 2 | # https://pypi.python.org/pypi/coverage/4.0.3
 3 | coverage==4.0.3
 4 | 
 5 | # pytest: simple powerful testing with Python
 6 | # https://pypi.python.org/pypi/pytest/2.9.2
 7 | pytest==2.9.2
 8 | 
 9 | # Pytest plugin for measuring coverage.
10 | # https://pypi.python.org/pypi/pytest-cov/2.2.0
11 | pytest-cov==2.2.0
12 | 
13 | # the modular source code checker: pep8, pyflakes and co
14 | # https://pypi.python.org/pypi/flake8/2.5.4
15 | flake8==2.5.4
16 | 
17 | # NumPy: array processing for numbers, strings, records, and objects.
18 | # https://pypi.python.org/pypi/numpy
19 | numpy==1.11.1
20 | 
21 | # Powerful data structures for data analysis, time series,and statistics
22 | # https://pypi.python.org/pypi/pandas/0.18.1
23 | pandas==0.18.1
24 | 
25 | # SciPy: a ecosystem of open-source software for mathematics, science, and engineering.
26 | # https://pypi.python.org/pypi/scipy/0.18.0rc2
27 | scipy==0.18.0
28 | 
29 | # nose extends unittest to make testing easier
30 | # https://pypi.python.org/pypi/nose/1.3.7
31 | nose==1.3.7
32 | 
33 | # Theano: define, optimize, and efficiently evaluate  multi-dimensional arrays
34 | # https://pypi.python.org/pypi/Theano
35 | theano==0.8.2
36 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AaronYALai/Machine_Learning_and_Having_It_Deep_and_Structured/a9cde55cc3a6142eeb00f0faa0413908ffd4a1f3/tests/__init__.py


--------------------------------------------------------------------------------
/tests/test_run.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # @Author: aaronlai
 3 | # @Date:   2016-10-15 01:00:07
 4 | # @Last Modified by:   AaronLai
 5 | # @Last Modified time: 2016-11-09 22:29:45
 6 | 
 7 | from unittest import TestCase
 8 | from DNN.run_DNN import run_model
 9 | from RNN_LSTM.run_RNN import run_RNN_model
10 | from RNN_LSTM.run_LSTM import run_LSTM_model
11 | from HMM_topRNN.run_HMM import run_HMM
12 | 
13 | 
14 | class Test_running(TestCase):
15 | 
16 |     def test_DNN(self):
17 |         run_model('train.data', 'train.label', 'test.data',
18 |                   base_dir='./Data/', save_prob=True, epoch=3)
19 | 
20 |     def test_RNN(self):
21 |         run_RNN_model('train.data', 'train.label', 'ytrain_prob.npy',
22 |                       'test.data', 'ytest_prob.npy', base_dir='./Data/',
23 |                       epoch=3)
24 | 
25 |         run_RNN_model('train.data', 'train.label', 'ytrain_prob.npy',
26 |                       'test.data', 'ytest_prob.npy', base_dir='./Data/',
27 |                       acti_func='tanh', update_by='NAG', epoch=3)
28 | 
29 |         run_RNN_model('train.data', 'train.label', 'ytrain_prob.npy',
30 |                       'test.data', 'ytest_prob.npy', base_dir='./Data/',
31 |                       acti_func='sigmoid', update_by='momentum', epoch=3)
32 | 
33 |     def test_LSTM(self):
34 |         run_LSTM_model('train.data', 'train.label', 'ytrain_prob.npy',
35 |                        'test.data', 'ytest_prob.npy', base_dir='./Data/',
36 |                        epoch=3, lr=1e-5)
37 | 
38 |     def test_HMM(self):
39 |         run_HMM('RNN_trainprob.npy', 'train.label', 'RNN_testprob.npy',
40 |                 duration=3, blending=True, n_bag=10, valid_ratio=0.1,
41 |                 base_dir='./Data/')
42 | 


--------------------------------------------------------------------------------