├── CNNModel.py
├── GRUModel.py
├── README.md
├── main.py
├── model.jpg
└── utils.py


/CNNModel.py:
--------------------------------------------------------------------------------
 1 | #coding:utf-8
 2 | from utils import NormalInit, OrthogonalInit, add_to_params
 3 | import theano
 4 | import numpy as np
 5 | import theano.tensor as T
 6 | 
 7 | class SentenceEncoder_CNN(): #用CNN学习句子向量表示
 8 |     def init_params(self, word_embedding_param):
 9 |         # Initialzie W_emb to given word embeddings
10 |         assert(word_embedding_param != None)
11 |         self.W_emb = word_embedding_param
12 | 
13 |         """ sent weights """
14 |         self.Filter1 = add_to_params(self.params, theano.shared(value=NormalInit(self.rng, self.rankdim, self.qdim_encoder), name='Filter1'+self.name))
15 |         self.Filter2 = add_to_params(self.params, theano.shared(value=NormalInit(self.rng, 2*self.rankdim, self.qdim_encoder), name='Filter2'+self.name))
16 |         self.Filter3 = add_to_params(self.params, theano.shared(value=NormalInit(self.rng, 3*self.rankdim, self.qdim_encoder), name='Filter3'+self.name))
17 |         
18 |         self.b_1 = add_to_params(self.params, theano.shared(value=np.zeros((self.qdim_encoder,), dtype='float32'), name='cnn_b1'+self.name))
19 |         self.b_2 = add_to_params(self.params, theano.shared(value=np.zeros((self.qdim_encoder,), dtype='float32'), name='cnn_b2'+self.name))
20 |         self.b_3 = add_to_params(self.params, theano.shared(value=np.zeros((self.qdim_encoder,), dtype='float32'), name='cnn_b3'+self.name))
21 | 
22 |     # This function takes as input word indices and extracts their corresponding word embeddings
23 |     def approx_embedder(self, x):
24 |         return self.W_emb[x]
25 |     
26 |     def ConvLayer1(self, q1):
27 |         output = T.dot(q1, self.Filter1) + self.b_1
28 |         return output
29 |     
30 |     def ConvLayer2(self, q1, q2):
31 |         output = T.dot(T.concatenate([q1, q2], axis=1), self.Filter2) + self.b_2
32 |         return output
33 |     
34 |     def ConvLayer3(self, q1, q2, q3):
35 |         output = T.dot(T.concatenate([q1, q2, q3], axis=1), self.Filter3) + self.b_3
36 |         return output
37 |     
38 |     def Convolution(self, x, mask):
39 |         xe = self.approx_embedder(x)
40 |         _mask = self.tmp[mask]
41 |         
42 |         _res1, _ = theano.scan(self.ConvLayer1, sequences=[xe])
43 |         _res2, _ = theano.scan(self.ConvLayer2, sequences=[xe[:-1], xe[1:]])
44 |         _res3, _ = theano.scan(self.ConvLayer3, sequences=[xe[:-2],xe[1:-1],xe[2:]])
45 |         
46 |         hidden1 = T.tanh(T.max(_res1*_mask, axis=0)).dimshuffle('x',0,1)
47 |         hidden2 = T.tanh(T.max(_res2*_mask[:-1], axis=0)).dimshuffle('x',0,1)
48 |         hidden3 = T.tanh(T.max(_res3*_mask[:-2], axis=0)).dimshuffle('x',0,1)
49 |         
50 |         return T.mean(T.concatenate([hidden1, hidden2, hidden3], axis=0), axis=0)
51 |         #return hidden3
52 |         #return (hidden1 + hidden2 + hidden3)/3.0
53 |         #return x[:5]
54 |         #return (hidden1 + hidden2)/2.0
55 |     
56 |     def build_encoder(self, x, mask): #x是一个matrix
57 |         res = self.Convolution(x, mask)
58 |         
59 |         return res
60 |         
61 |     def __init__(self, word_embedding_param, name, config):
62 |         self.name = name
63 |         self.rankdim = config.w_dim
64 |         self.qdim_encoder = config.h_dim
65 |         self.params = []
66 |         self.rng = np.random.RandomState(23333)
67 |         self.init_params(word_embedding_param)
68 |         a = np.zeros((2, self.qdim_encoder))
69 |         a[1] = 1
70 |         self.tmp = theano.shared(value=a)


--------------------------------------------------------------------------------
/GRUModel.py:
--------------------------------------------------------------------------------
 1 | #coding:utf-8
 2 | from utils import NormalInit, OrthogonalInit, add_to_params
 3 | import theano
 4 | import numpy as np
 5 | import theano.tensor as T
 6 | 
 7 | class SentenceEncoder():
 8 |     def init_params(self, word_embedding_param):
 9 |         # Initialzie W_emb to given word embeddings
10 |         assert(word_embedding_param != None)
11 |         self.W_emb = word_embedding_param
12 | 
13 |         """ sent weights """
14 |         self.W_in = add_to_params(self.params, theano.shared(value=NormalInit(self.rng, self.rankdim, self.qdim_encoder), name='W_in'+self.name))
15 |         self.W_hh = add_to_params(self.params, theano.shared(value=OrthogonalInit(self.rng, self.qdim_encoder, self.qdim_encoder), name='W_hh'+self.name))
16 |         self.b_hh = add_to_params(self.params, theano.shared(value=np.zeros((self.qdim_encoder,), dtype='float32'), name='b_hh'+self.name))
17 |         
18 |         self.W_in_r = add_to_params(self.params, theano.shared(value=NormalInit(self.rng, self.rankdim, self.qdim_encoder), name='W_in_r'+self.name))
19 |         self.W_in_z = add_to_params(self.params, theano.shared(value=NormalInit(self.rng, self.rankdim, self.qdim_encoder), name='W_in_z'+self.name))
20 |         self.W_hh_r = add_to_params(self.params, theano.shared(value=OrthogonalInit(self.rng, self.qdim_encoder, self.qdim_encoder), name='W_hh_r'+self.name))
21 |         self.W_hh_z = add_to_params(self.params, theano.shared(value=OrthogonalInit(self.rng, self.qdim_encoder, self.qdim_encoder), name='W_hh_z'+self.name))
22 |         self.b_z = add_to_params(self.params, theano.shared(value=np.zeros((self.qdim_encoder,), dtype='float32'), name='b_z'+self.name))
23 |         self.b_r = add_to_params(self.params, theano.shared(value=np.zeros((self.qdim_encoder,), dtype='float32'), name='b_r'+self.name))
24 | 
25 |     # This function takes as input word indices and extracts their corresponding word embeddings
26 |     
27 |     def approx_embedder(self, x):
28 |         return self.W_emb[x]
29 | 
30 |     def GRU_sent_step(self, x_t, m_t, ph_t):
31 |         hr_tm1 = ph_t
32 | 
33 |         r_t = T.nnet.sigmoid(T.dot(x_t, self.W_in_r) + T.dot(hr_tm1, self.W_hh_r) + self.b_r)
34 |         z_t = T.nnet.sigmoid(T.dot(x_t, self.W_in_z) + T.dot(hr_tm1, self.W_hh_z) + self.b_z)
35 |         h_tilde = T.tanh(T.dot(x_t, self.W_in) + T.dot(r_t * hr_tm1, self.W_hh) + self.b_hh)
36 |         h_t = z_t * hr_tm1 + (np.float32(1.0) - z_t) * h_tilde
37 |         
38 |         m_t = m_t.dimshuffle(0, 'x') #make a column out of a 1d vector (N to Nx1)
39 |         h_t = (m_t) * h_t + (1 - m_t) * ph_t
40 |         
41 |         # return both reset state and non-reset state
42 |         return h_t, r_t, z_t, h_tilde
43 | 
44 |     def build_encoder(self, x, mask, prev_state): #x是一个matrix
45 |         xe = self.approx_embedder(x)
46 |         
47 |         hs_0 = prev_state
48 |         _res, _ = theano.scan(self.GRU_sent_step,
49 |                           sequences=[xe, mask],\
50 |                           outputs_info=[hs_0, None, None, None])#每次循环输入GRU_sent_step是一个矩阵，shape为N*w_dim(N为x的列维度)
51 | 
52 |         # Get the hidden state sequence
53 |         h = _res[0] #返回f_enc函数每次调用的第一个输出值，在RGU中h[i]会作为f_enc第i+1次迭代的输入，得到h[i+1]
54 |         return h, mask
55 | 
56 |     def __init__(self, word_embedding_param, name, config):
57 |         self.name = name
58 |         self.rankdim = config.w_dim
59 |         self.qdim_encoder = config.h_dim
60 |         self.params = []
61 |         self.rng = np.random.RandomState(23333)
62 |         self.init_params(word_embedding_param)
63 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # GRU-CNN
 2 | 本模型利用GRU或CNN对存在某种关系的两个句子进行建模。模型大致结构为利用GRU（[Cho et al., 2014b](http://arxiv.org/abs/1406.1078), RNN中的一种）或CNN学习句子表示，两个句子不共享一套参数。然后再用一层神经网络学习两个句子的联合表示，最后利用一个sigmoid层对两个句子进行打分，输入关系强弱的值，训练方法采用正负例训练。模型结构如下图所示
 3 | 
 4 | ![model](model.jpg?raw=true "model")
 5 | 
 6 | 该模型可用于：连贯性任务（相当于窗口取2，只看前后两句话）；答案选取任务（针对QA数据集，问-答对正好是具有关联的两个句子）；以及对话质量评估（针对单轮对话，有点类似于一问一答那种形式（也是两个句子），模型评价对话的质量，即评价在聊天机器人系统中生成的对话质量如何）。
 7 | 
 8 | ## 输入文件格式
 9 | 由于模型目前仅对两个句子进行建模，所以输入文件为两个文件，一个文件存储第一句，另一个文件存储下句（对应存储，对于中文需要分词，按空格隔开）。注意：除了修改main.py中的file1（第一句）和file2（第二句）以外，还需要修改ReadDate函数中的数值来确定训练数据和测试数据的规模。
10 | 
11 | ## 模型参数
12 | main.py文件里面有以下参数可以设定：
13 | - margin：正负例得分间隔
14 | - iter：总共迭代次数
15 | - learning_rate：学习率
16 | - test_freq：每迭代多少次进行一次测试
17 | - h_dim：隐层维度，即句子向量的维度
18 | - vocab_size：词表大小，选取最高频的N个词
19 | - w_dim：词向量维度
20 | - neg_sample：负例采样的数目
21 | - up_dim：句子联合表示的向量维度
22 | - CNN_Flag：是否使用CNN模型，为False时不使用（使用GRU模型）
23 | - save_file：保存测试结果的文件名
24 | 
25 | ## 运行说明
26 | 在命令行中输入：
27 | 
28 |     python main.py
29 | 
30 | ## 实验结果
31 | 实验所用的数据为100W个对话对，有点类似于QA语料。实验设置为90W用于训练，10W用于测试，测试数据中5W为正例，5W为负例，使用GRU模型。实验结果如下：
32 | 
33 | **Iter 0:**
34 | 
35 | >cost: 3.025  
36 | >cost time: 195146.85 s  
37 | >Test...  
38 | >Accuracy: 0.75045  
39 | >Test Done  
40 | 
41 | **Iter 1:**
42 | 
43 | >cost: 2.428  
44 | >cost time: 190828.23 s  
45 | >Test...  
46 | >Accuracy: **0.79202**  
47 | >Test Done  
48 | 
49 | **Iter 2:**
50 | 
51 | >cost: 2.255  
52 | >cost time: 187904.05 s  
53 | >Test...  
54 | >Accuracy: 0.76932  
55 | >Test Done  
56 | 
57 | **Iter 3:**
58 | 
59 | >cost: 2.169  
60 | >cost time: 155178.83 s  
61 | >Test...  
62 | >Accuracy: 0.78361  
63 | >Test Done  
64 | 
65 | 使用CNN模型，实验结果如下：
66 | 
67 | **Iter 0:**
68 | 
69 | >cost: 0.998  
70 | >cost time: 137159.67 s  
71 | >Test...  
72 | >Accuracy: 0.68731  
73 | >Test Done  
74 | 
75 | **Iter 1:**
76 | 
77 | >cost: 0.753  
78 | >cost time: 72665.73 s  
79 | >Test...  
80 | >Accuracy: 0.7221  
81 | >Test Done  
82 | 
83 | **Iter 2:**
84 | 
85 | >cost: 0.737  
86 | >cost time: 68464.48 s  
87 | >Test...  
88 | >Accuracy: **0.75117**  
89 | >Test Done  
90 | 


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
  1 | #coding:utf-8
  2 | import theano, random
  3 | import numpy as np
  4 | import cPickle,time
  5 | import theano.tensor as T
  6 | from collections import OrderedDict, Counter
  7 | import logging
  8 | from utils import compute_updates, NormalInit, add_to_params
  9 | from GRUModel import SentenceEncoder
 10 | from CNNModel import SentenceEncoder_CNN
 11 | 
 12 | logging.basicConfig(level=logging.DEBUG)
 13 | 
 14 | class Configuration(object):
 15 |     margin = 0.6 #正负例得分间隔
 16 |     iter = 6 #迭代次数
 17 |     learning_rate = 0.0003
 18 |     test_freq = 1 #每迭代多少次进行一次测试
 19 |     h_dim = 300 #句子向量维度
 20 |     vocab_size = 60000
 21 |     w_dim = 100 #词向量维度
 22 |     neg_sample = 10
 23 |     up_dim = 500 #句子联合表示向量维度
 24 |     CNN_Flag = True #是否使用CNN，为False时使用GRU
 25 |     save_file = 'test_res' #保存测试结果的文件名
 26 | 
 27 | config = Configuration()
 28 | 
 29 | def ReadDate(file1, file2): #选90W作为训练数据，10W作为测试数据
 30 |     Que = []
 31 |     Ans = []
 32 |     allword = []
 33 |     with open(file1,'r') as fq, open(file2,'r') as fa:
 34 |         for line in fq:
 35 |             tmp = line.split()
 36 |             allword += tmp
 37 |             if config.CNN_Flag:
 38 |                 while len(tmp) < 3 and len(tmp) > 0: #当使用CNN模型时，需要做padding
 39 |                     tmp.append('OOV')
 40 |                 Que.append(tmp)
 41 |             else:
 42 |                 Que.append(tmp)
 43 |         for line in fa:
 44 |             tmp = line.split()
 45 |             allword += tmp
 46 |             if config.CNN_Flag:
 47 |                 while len(tmp) < 3 and len(tmp) > 0:
 48 |                     tmp.append('OOV')
 49 |                 Ans.append(tmp)
 50 |             else:
 51 |                 Ans.append(tmp)
 52 |     
 53 |     assert(len(Que)==len(Ans))
 54 |     traindata = []
 55 |     testdata = []
 56 |     c = Counter(allword)
 57 |     vocab = [i[0] for i in c.most_common(config.vocab_size-1)]
 58 |     for q,a in zip(Que[:900000],Ans[:900000]):
 59 |         traindata.append((q,a))
 60 |     for q,a in zip(Que[900000:950000],Ans[900000:950000]):
 61 |         testdata.append((q,a,1))
 62 |     for q in Que[950000:]:
 63 |         a = Ans[random.randint(0,200000)]
 64 |         testdata.append((q,a,0))
 65 |     
 66 |     return traindata, testdata, vocab
 67 | 
 68 |     
 69 | print 'Loading the data...'
 70 | traindata, testdata, vocab = ReadDate('100w.q', '100w.a')#'100w.q'全是question，'100w.a'是对应的answers，请替换成自己的文件。
 71 | print len(traindata), len(testdata)
 72 | print ' Done'
 73 | str_to_id = dict([(j,i) for i,j in enumerate(vocab)]+[('OOV',config.vocab_size-1)])
 74 | assert(len(str_to_id)==config.vocab_size)
 75 | 
 76 | 
 77 | print 'Build model...'
 78 | rng = np.random.RandomState(23455)
 79 | params = []
 80 | W_emb = add_to_params(params, theano.shared(value=NormalInit(rng, config.vocab_size, config.w_dim), name='W_emb'))
 81 | 
 82 | T_que = T.imatrix('question')
 83 | T_ans = T.imatrix('answer')
 84 | T_neg = T.imatrix('neg_sample')
 85 | M_que = T.imatrix('question')
 86 | M_ans = T.imatrix('answer')
 87 | M_neg = T.imatrix('neg_sample')
 88 | 
 89 | if config.CNN_Flag == False:
 90 |     print 'use GRU model...'
 91 |     Question_Encoder = SentenceEncoder(W_emb, 'Question', config)
 92 |     Answer_Encoder = SentenceEncoder(W_emb, 'Answer', config)
 93 | 
 94 |     que_ph = theano.shared(value=np.zeros((1, config.h_dim), dtype='float32'), name='que_ph')
 95 |     ans_ph = theano.shared(value=np.zeros((1, config.h_dim), dtype='float32'), name='ans_ph')
 96 |     neg_ph = theano.shared(value=np.zeros((config.neg_sample, config.h_dim), dtype='float32'), name='neg_ph')
 97 | 
 98 |     que_h, _ = Question_Encoder.build_encoder(T_que, T.eq(M_que,1), que_ph)
 99 |     ans_h, _ = Answer_Encoder.build_encoder(T_ans, T.eq(M_ans,1), ans_ph)
100 |     neg_h, _test_mask = Answer_Encoder.build_encoder(T_neg, T.eq(M_neg,1), neg_ph)
101 | 
102 |     que_emb = que_h[-1]
103 |     ans_emb = ans_h[-1]
104 |     neg_emb = neg_h[-1]
105 |     
106 | else:
107 |     print 'use CNN model...'
108 |     Question_Encoder = SentenceEncoder_CNN(W_emb, 'Question', config)
109 |     Answer_Encoder = SentenceEncoder_CNN(W_emb, 'Answer', config)
110 |     
111 |     que_emb = Question_Encoder.build_encoder(T_que, T.eq(M_que,1))
112 |     ans_emb = Answer_Encoder.build_encoder(T_ans, T.eq(M_ans,1))
113 |     neg_emb = Answer_Encoder.build_encoder(T_neg, T.eq(M_neg,1))
114 | 
115 | W_up = add_to_params(params, theano.shared(value=NormalInit(rng, 2*config.h_dim, config.up_dim), name='W_up'))
116 | W_up_b = add_to_params(params, theano.shared(value=np.zeros((config.up_dim,), dtype='float32'), name='W_up_b'))
117 | Sen_U = add_to_params(params, theano.shared(value=NormalInit(rng, config.up_dim, 1), name='Sen_U'))
118 | Sen_b = add_to_params(params, theano.shared(value=np.zeros((1,), dtype='float32'), name='Sen_b'))
119 | 
120 | join_emb = T.concatenate([que_emb, ans_emb], axis=1)
121 | join_hidden = T.tanh(T.dot(T.concatenate([que_emb, ans_emb], axis=1), W_up)+W_up_b)
122 | #join_hidden = T.tanh(T.dot(W_up, join_emb.T)+W_up_b)
123 | f_x = T.nnet.sigmoid(T.dot(join_hidden, Sen_U)+Sen_b)
124 | 
125 | neg_join_hidden = T.tanh(T.dot(T.concatenate([T.repeat(que_emb, config.neg_sample, axis=0), neg_emb], axis=1), W_up)+W_up_b)
126 | f_neg = T.nnet.sigmoid(T.dot(neg_join_hidden, Sen_U)+Sen_b)
127 | 
128 | cost = T.maximum(0, config.margin - f_x.sum() + f_neg)
129 | training_cost = cost.sum()
130 | 
131 | updates = compute_updates(training_cost, params+Question_Encoder.params+Answer_Encoder.params, config)
132 | 
133 | train_model = theano.function([T_que, T_ans, T_neg, M_que, M_ans, M_neg],[training_cost],updates=updates, on_unused_input='ignore', name="train_fn")
134 | #train_model = theano.function([T_que, T_ans, T_neg, M_que, M_ans, M_neg],[que_emb, ans_emb, neg_emb], on_unused_input='ignore', name="train_fn")
135 | test_model = theano.function([T_que, T_ans, M_que, M_ans], [f_x], on_unused_input='ignore', name="train_fn")
136 | print 'function build finish!'
137 | 
138 | 
139 | print 'Training...'
140 | for step in range(1, config.iter+1):
141 |     print 'iter: ',step
142 |     cost = 0
143 |     length = 0
144 |     stime = time.time()
145 |     for idx in range(len(traindata)):
146 |         if idx % 200000 == 0:
147 |             print 'training on ', idx 
148 |         data = traindata[idx]
149 |         que = data[0]
150 |         ans = data[1]
151 |         #print ' '.join(que)
152 |         #print ' '.join(ans)
153 |         if not que or not ans:
154 |             continue
155 |         #_range = range(len(traindata))
156 |         #_range.pop(idx)
157 |         #nsample = random.sample(_range, neg_sample)
158 |         nsample = []
159 |         n_traindata = len(traindata)
160 |         neg_matrix = []
161 |         max_lenght = 0
162 |         while len(nsample) < config.neg_sample:
163 |             _rand = random.randint(10, n_traindata-10)
164 |             if _rand != idx and _rand not in nsample:
165 |                 tmp = []
166 |                 if not traindata[_rand][1]:
167 |                     continue
168 |                 for wd in traindata[_rand][1]:
169 |                     if wd in str_to_id:
170 |                         tmp.append(str_to_id[wd])
171 |                     else:
172 |                         tmp.append(str_to_id['OOV'])
173 |                 neg_matrix.append(tmp)
174 |                 max_lenght = max(max_lenght, len(tmp))
175 |                 nsample.append(_rand)
176 |         #print len(nsample)
177 |         
178 |         for i_ in nsample:
179 |             tmp = []
180 |             for wd in traindata[i_][1]:
181 |                 if wd in str_to_id:
182 |                     tmp.append(str_to_id[wd])
183 |                 else:
184 |                     tmp.append(str_to_id['OOV'])
185 |             neg_matrix.append(tmp)
186 |             max_lenght = max(max_lenght, len(tmp))
187 |             
188 |         neg_mask = []
189 |         new_neg_matrix = []
190 |         for i in range(max_lenght):
191 |             tmp = []
192 |             tmp_mask = []
193 |             for j in range(config.neg_sample):
194 |                 if i < len(neg_matrix[j]):
195 |                     tmp.append(neg_matrix[j][i])
196 |                     tmp_mask.append(1)
197 |                 else:
198 |                     tmp.append(0)
199 |                     tmp_mask.append(0)
200 |             new_neg_matrix.append(tmp)
201 |             neg_mask.append(tmp_mask)
202 |         
203 |         #print neg_matrix
204 |         #print new_neg_matrix
205 |         #print neg_mask
206 |         
207 |         que_array = []
208 |         que_mask = []
209 |         for wd in que:
210 |             if wd in str_to_id:
211 |                 que_array.append([str_to_id[wd]])
212 |             else:
213 |                 que_array.append([str_to_id['OOV']])
214 |             que_mask.append([1])
215 |         ans_array = []
216 |         ans_mask = []
217 |         for wd in ans:
218 |             if wd in str_to_id:
219 |                 ans_array.append([str_to_id[wd]])
220 |             else:
221 |                 ans_array.append([str_to_id['OOV']])
222 |             ans_mask.append([1])
223 |         
224 |         que_matrix = np.array(que_array, dtype=np.int32)
225 |         ans_matrix = np.array(ans_array, dtype=np.int32)
226 |         neg_matrix = np.array(new_neg_matrix, dtype=np.int32)
227 |         
228 |         que_mask = np.array(que_mask, dtype=np.int32)
229 |         ans_mask = np.array(ans_mask, dtype=np.int32)
230 |         neg_mask = np.array(neg_mask, dtype=np.int32)
231 |         
232 |         c = train_model(que_matrix, ans_matrix, neg_matrix, que_mask, ans_mask, neg_mask)[0]
233 |         #print que_matrix.shape, ans_matrix.shape, neg_matrix.shape
234 |         #a, b, c = train_model(que_matrix, ans_matrix, neg_matrix, que_mask, ans_mask, neg_mask)
235 |         #print a.shape, b.shape, c.shape
236 |         #print c
237 |         
238 |         if np.isinf(c) or np.isnan(c):
239 |             continue
240 |         cost += c
241 |         length += 1
242 |         #break
243 |     #f = foo()
244 |     etime = time.time()    
245 |     print 'Cost: ', cost/length
246 |     print 'cost time: ', etime-stime,'s'
247 |     
248 |     if step%config.test_freq == 0:
249 |         print 'Test...'
250 |         fw_valid = open(config.save_file+'_%d.txt'%step, 'w')
251 |         test_length = 0
252 |         test_right = 0
253 |         for data in testdata:
254 |             que = data[0]
255 |             ans = data[1]
256 |             label = data[2]
257 |             
258 |             if not que or not ans:
259 |                 continue
260 |             
261 |             test_length += 1
262 |             que_array = []
263 |             que_mask = []
264 |             for wd in que:
265 |                 if wd in str_to_id:
266 |                     que_array.append([str_to_id[wd]])
267 |                 else:
268 |                     que_array.append([str_to_id['OOV']])
269 |                 que_mask.append([1])
270 |             ans_array = []
271 |             ans_mask = []
272 |             for wd in ans:
273 |                 if wd in str_to_id:
274 |                     ans_array.append([str_to_id[wd]])
275 |                 else:
276 |                     ans_array.append([str_to_id['OOV']])
277 |                 ans_mask.append([1])
278 |             
279 |             que_matrix = np.array(que_array, dtype=np.int32)
280 |             ans_matrix = np.array(ans_array, dtype=np.int32)
281 |             
282 |             que_mask = np.array(que_mask, dtype=np.int32)
283 |             ans_mask = np.array(ans_mask, dtype=np.int32)
284 |             
285 |             prob = test_model(que_matrix, ans_matrix, que_mask, ans_mask)[0]
286 |             prob = prob[0][0]
287 |             
288 |             if label == 1 and prob > 0.5:
289 |                 test_right += 1
290 |             if label == 0 and prob < 0.5:
291 |                 test_right += 1
292 |             fw_valid.write('Prob: ' + str(prob) + ' ' + str(label) + '\r\n')
293 |             #break
294 |         accuracy = 1.0 * test_right / test_length
295 |         fw_valid.write('\r\n'+'Accuracy: ' + str(accuracy))
296 |         fw_valid.close()
297 |         print 'Accuracy: ', accuracy
298 |         #vals = dict([(x.name, x.get_value()) for x in [Wd_out, bd_out]])
299 |         #np.savez('models/model_%d.npz'%step, **vals)
300 |         print 'Test Done' 
301 |         


--------------------------------------------------------------------------------
/model.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hit-computer/GRU-or-CNN/b5c119c027aca51335475339a3fa163be163c654/model.jpg


--------------------------------------------------------------------------------
/utils.py:
--------------------------------------------------------------------------------
  1 | #coding:utf-8
  2 | import theano, random
  3 | import numpy as np
  4 | import theano.tensor as T
  5 | from collections import OrderedDict
  6 | 
  7 | def sharedX(value, name=None, borrow=False, dtype=None):
  8 |     if dtype is None:
  9 |         dtype = theano.config.floatX
 10 |     return theano.shared(theano._asarray(value, dtype=dtype),
 11 |                          name=name,
 12 |                          borrow=borrow)
 13 | 
 14 | def Adam(grads, lr=0.0002, b1=0.1, b2=0.001, e=1e-8):
 15 |     updates = []
 16 |     i = sharedX(0.)
 17 |     i_t = i + 1.
 18 |     fix1 = 1. - (1. - b1)**i_t
 19 |     fix2 = 1. - (1. - b2)**i_t
 20 |     lr_t = lr * (T.sqrt(fix2) / fix1)
 21 |     for p, g in grads.items():
 22 |         m = sharedX(p.get_value() * 0.)
 23 |         v = sharedX(p.get_value() * 0.)
 24 |         m_t = (b1 * g) + ((1. - b1) * m)
 25 |         v_t = (b2 * T.sqr(g)) + ((1. - b2) * v)
 26 |         g_t = m_t / (T.sqrt(v_t) + e)
 27 |         p_t = p - (lr_t * g_t)
 28 |         updates.append((m, m_t))
 29 |         updates.append((v, v_t))
 30 |         updates.append((p, p_t))
 31 |     updates.append((i, i_t))
 32 |     return updates
 33 |     
 34 | def compute_updates(training_cost, params, config):
 35 |     updates = []
 36 |      
 37 |     grads = T.grad(training_cost, params)
 38 |     grads = OrderedDict(zip(params, grads))
 39 | 
 40 |     # Clip stuff
 41 |     c = np.float32(1.)
 42 |     clip_grads = []
 43 |     
 44 |     norm_gs = T.sqrt(sum(T.sum(g ** 2) for p, g in grads.items()))
 45 |     normalization = T.switch(T.ge(norm_gs, c), c / norm_gs, np.float32(1.))
 46 |     notfinite = T.or_(T.isnan(norm_gs), T.isinf(norm_gs))
 47 |      
 48 |     for p, g in grads.items():
 49 |         clip_grads.append((p, T.switch(notfinite, np.float32(.1) * p, g * normalization)))
 50 |     
 51 |     grads = OrderedDict(clip_grads)
 52 | 
 53 |     updates = Adam(grads, config.learning_rate) #使用adam梯度更新策略
 54 | 
 55 |     return updates
 56 |     
 57 | def NormalInit(rng, sizeX, sizeY, scale=0.01, sparsity=-1):
 58 |     """ 
 59 |     Normal Initialization
 60 |     """
 61 |     sizeX = int(sizeX)
 62 |     sizeY = int(sizeY)
 63 |     
 64 |     if sparsity < 0:
 65 |         sparsity = sizeY
 66 |      
 67 |     sparsity = np.minimum(sizeY, sparsity)
 68 |     values = np.zeros((sizeX, sizeY), dtype=theano.config.floatX)
 69 |     for dx in xrange(sizeX):
 70 |         perm = rng.permutation(sizeY)
 71 |         new_vals = rng.normal(loc=0, scale=scale, size=(sparsity,))
 72 |         values[dx, perm[:sparsity]] = new_vals
 73 |         
 74 |     return values.astype(theano.config.floatX)
 75 |     
 76 | def OrthogonalInit(rng, sizeX, sizeY, sparsity=-1, scale=1):
 77 |     """ 
 78 |     Orthogonal Initialization
 79 |     """
 80 | 
 81 |     sizeX = int(sizeX)
 82 |     sizeY = int(sizeY)
 83 | 
 84 |     assert sizeX == sizeY, 'for orthogonal init, sizeX == sizeY'
 85 | 
 86 |     if sparsity < 0:
 87 |         sparsity = sizeY
 88 |     else:
 89 |         sparsity = np.minimum(sizeY, sparsity)
 90 | 
 91 |     values = np.zeros((sizeX, sizeY), dtype=theano.config.floatX)
 92 |     for dx in xrange(sizeX):
 93 |         perm = rng.permutation(sizeY)
 94 |         new_vals = rng.normal(loc=0, scale=scale, size=(sparsity,))
 95 |         values[dx, perm[:sparsity]] = new_vals
 96 | 
 97 |     # Use SciPy:
 98 |     if sizeX*sizeY > 20000000:
 99 |         import scipy
100 |         u,s,v = scipy.linalg.svd(values)
101 |     else:
102 |         u,s,v = np.linalg.svd(values)
103 |     values = u * scale
104 |     return values.astype(theano.config.floatX)
105 |     
106 | def SoftMax(x):
107 |     x = T.exp(x - T.max(x, axis=x.ndim-1, keepdims=True))
108 |     return x / T.sum(x, axis=x.ndim-1, keepdims=True)
109 |     
110 | def add_to_params(params, new_param):
111 |     params.append(new_param)
112 |     return new_param


--------------------------------------------------------------------------------