├── train_lstm.py ├── train_autoencoder.py ├── README.md ├── .gitignore ├── config.txt ├── layer.py ├── train_dnn_lstm.py ├── data_io_lstm.py ├── sparse_autoencoder.py ├── LICENSE └── dnn_lstm.py /train_lstm.py: -------------------------------------------------------------------------------- 1 | import sys 2 | from config_reader import ConfigReader 3 | from train_dnn_lstm import TrainDNN 4 | from train_autoencoder import TrainAutoencoder 5 | 6 | if __name__ == '__main__': 7 | if len(sys.argv) != 2: 8 | print 'Usage: python train_lstm.py config_file' 9 | exit(1) 10 | config_file = sys.argv[1] 11 | conf = ConfigReader(config_file) 12 | conf.ReadConfig() 13 | #TrainAutoencoder(conf) 14 | TrainDNN(conf) 15 | -------------------------------------------------------------------------------- /train_autoencoder.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import numpy as np 3 | #import theano 4 | import data_io 5 | import tensorflow as tf 6 | from sparse_autoencoder import Autoencoder 7 | 8 | def TrainAutoencoder(conf): 9 | train_data_file = conf.GetKey('query_log_file_path') 10 | word_lookup_table_file = conf.GetKey('word_lookup_table_file_path') 11 | sparsity_level = float(conf.GetKey('sparsity_level')) 12 | sparse_reg = float(conf.GetKey('sparse_reg')) 13 | n_hidden = int(conf.GetKey('ae_hidden_layer_size')) 14 | batch_size = int(conf.GetKey('ae_batch_size')) 15 | learning_rate = float(conf.GetKey('ae_learning_rate')) 16 | n_epoches = int(conf.GetKey('ae_n_epoches')) 17 | weights_file = conf.GetKey('ae_param_file_path') 18 | 19 | print 'Reading lookup table...' 20 | (word_lookup_table, word_id, id_to_word) = data_io.ReadWordLookupTable(word_lookup_table_file) 21 | print 'Reading training data...' 22 | word_lookup_table = np.asarray(word_lookup_table, dtype = np.float32) 23 | train_data = data_io.ReadUnlabelData(train_data_file, word_id) 24 | n_in = len(word_lookup_table[0]) 25 | dnn = Autoencoder(n_in, n_hidden, sparsity_level, sparse_reg) 26 | dnn.Fit(train_data, word_lookup_table, word_id, batch_size, learning_rate, n_epoches, weights_file) 27 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Query_Classfication_LSTM 2 | This file realized DNN Query Classification based on DNN and LSTM RNN. 3 | Some features in this program: 4 | * Using LSTM RNN to understand the context of a specific word in a sentence 5 | * Using skip layers to learn the linear relations between regular features and output 6 | * 7 | 8 | ## Input information 9 | * Train Query Size:46000+ 10 | * Test Query Size:6000+ 11 | ## Parameters 12 | * max query length:20 13 | * bach size: 10 14 | * dropout 0.3 15 | ##Structure and Layers 16 | * Input (1*300)—> LSTM RNN layer —> Fully Connected layer A 17 | * Reg Expression (1*200)—> Fully Connected layer A 18 | * Input (1*300) —> Skip Layer A 19 | * Reg Expression (1*200) —> Skip Layer B 20 | * Fully Connected Layer A +Skip Layer A+ Skip Layer B—> Output Layer 21 | 22 | ##Result 23 | Training Accuray : 98% 24 | Test Accuracy : 93.7% 25 | 26 | | Method | Test Accuracy | 27 | |:--------:|:------------:| 28 | | One NN| 90% | 29 | | Two NNs | 91.3% | 30 | | LSTM+NN | 93.5% | 31 | | GRU +NN| 93.7| 32 | 33 | ##Files Explanation 34 | *data_io_lstm.py: manipulating raw data, generating training data and test data 35 | *train_lstm.py: Initialize the entire program 36 | *train_dnn_lstm.py: Initialize loading data, call DNN to fit and train model 37 | *dnn_lstm.py: All the model structure, training and fitting process 38 | 39 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | 27 | # PyInstaller 28 | # Usually these files are written by a python script from a template 29 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 30 | *.manifest 31 | *.spec 32 | 33 | # Installer logs 34 | pip-log.txt 35 | pip-delete-this-directory.txt 36 | 37 | # Unit test / coverage reports 38 | htmlcov/ 39 | .tox/ 40 | .coverage 41 | .coverage.* 42 | .cache 43 | nosetests.xml 44 | coverage.xml 45 | *,cover 46 | .hypothesis/ 47 | 48 | # Translations 49 | *.mo 50 | *.pot 51 | 52 | # Django stuff: 53 | *.log 54 | local_settings.py 55 | 56 | # Flask stuff: 57 | instance/ 58 | .webassets-cache 59 | 60 | # Scrapy stuff: 61 | .scrapy 62 | 63 | # Sphinx documentation 64 | docs/_build/ 65 | 66 | # PyBuilder 67 | target/ 68 | 69 | # IPython Notebook 70 | .ipynb_checkpoints 71 | 72 | # pyenv 73 | .python-version 74 | 75 | # celery beat schedule file 76 | celerybeat-schedule 77 | 78 | # dotenv 79 | .env 80 | 81 | # virtualenv 82 | venv/ 83 | ENV/ 84 | 85 | # Spyder project settings 86 | .spyderproject 87 | 88 | # Rope project settings 89 | .ropeproject 90 | -------------------------------------------------------------------------------- /config.txt: -------------------------------------------------------------------------------- 1 | #Config for autoencoder 2 | 3 | #The unlabeled query log for training autoencoder 4 | query_log_file_path=/Users/yuwang/Documents/nlpdnn_tensorflow/text_logs.txt 5 | 6 | #The path where parameters of autoencoder is saved. It will be saved in .npz format 7 | #It is both the output of the autoencoder and the input to the dnn. 8 | #If you don't train an autoencoder, this parameter file will be copied automatically (See run.sh) 9 | #If you do, it will be over-written. 10 | ae_param_file_path=/Users/yuwang/Documents/nlpdnn_tensorflow/ae_weights 11 | 12 | #Config for the dnn 13 | 14 | #The file that lists all the labels 15 | label_id_file_path=/Users/yuwang/Documents/nlpdnn_tensorflow/label_id.txt 16 | 17 | #The training data 18 | train_feature_file_path=/Users/yuwang/Documents/nlpdnn_tensorflow/train.all.input.txt 19 | #train_feature_file_path=/Users/yuwang/Documents/nlpdnn_tensorflow/train_feature.txt 20 | 21 | #The dev data 22 | dev_feature_file_path=/Users/yuwang/Documents/nlpdnn_tensorflow/dev.input.txt 23 | #dev_feature_file_path=/Users/yuwang/Documents/nlpdnn_tensorflow/dev_feature.txt 24 | 25 | #Word lookup table. You can train it using word2vec 26 | word_lookup_table_file_path=/Users/yuwang/Documents/nlpdnn_tensorflow/word_vectors_300.txt 27 | 28 | #The dir where parameters of dnn (and autoencoder) is saved 29 | dnn_output_dir=./dnn_output/ 30 | 31 | ###### Hyperparameters of autoencoder and dnn ##################################### 32 | ###### Be carefull! Normally, you don't need to change these parameters ############# 33 | ae_hidden_layer_size=200 34 | sparsity_level=0.01 35 | sparse_reg=0.1 36 | ae_batch_size=20 37 | ae_learning_rate=0.2 38 | ae_n_epoches=5 39 | 40 | dnn_hidden_layer_size=200 41 | dnn_L2_reg=0.0001 42 | dnn_batch_size=10 43 | dnn_learning_rate=0.1 44 | dnn_n_epoches=10000 45 | -------------------------------------------------------------------------------- /layer.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | # modified by tensorflow, Done by Yu Wang --2016/7/1 4 | class Layer(object): 5 | def __init__(self, rng, n_in, n_out, activation_type, learning_rate, batch_grad): 6 | 7 | #n_in=300, n_out=200 for sparse 8 | #n_in=200, n_out=? for DNN 9 | W=tf.Variable(tf.random_uniform([n_in, n_out],minval=-tf.sqrt(tf.cast(6./(n_in+n_out),tf.float32)), maxval=tf.sqrt(tf.cast((6./(n_in+n_out)),tf.float32)),dtype=tf.float32), name= 'W') 10 | b=tf.Variable(tf.zeros((n_out,1), dtype=tf.float32), name='b') 11 | b = tf.Variable(tf.zeros((1,n_out), dtype=tf.float32), name='b1') 12 | 13 | #why activation_type will change the sigmoid weights 14 | if activation_type == 'sigmoid': 15 | W = tf.Variable(tf.random_uniform([n_in, n_out], minval=-4*tf.sqrt(tf.cast(6. / (n_in + n_out), tf.float32)), 16 | maxval=4*tf.sqrt(tf.cast((6. / (n_in + n_out)), tf.float32)), 17 | dtype=tf.float32), name='W') 18 | 19 | self.W = W 20 | self.b = b 21 | #self.b1 = b1 22 | 23 | learning_rate[self.W]=tf.Variable(tf.ones([n_in,n_out],dtype=tf.float32)) 24 | learning_rate[self.b]=tf.Variable(tf.ones((n_out,1),dtype=tf.float32)) 25 | batch_grad[self.W]=tf.Variable(tf.zeros([n_in, n_out], dtype = tf.float32)) 26 | batch_grad[self.b] =tf.Variable(tf.zeros((n_out,1), dtype=tf.float32)) 27 | 28 | self.params = [self.W, self.b] 29 | 30 | 31 | class AELayer(Layer): 32 | def __init__(self, rng, n_in, n_out, activation_type, learning_rate, batch_grad): 33 | Layer.__init__(self, rng, n_in, n_out, activation_type, learning_rate, batch_grad) 34 | 35 | b_prime=tf.Variable(tf.zeros((1,n_in),dtype=tf.float32)) 36 | 37 | self.b_prime = b_prime 38 | 39 | learning_rate[self.b_prime]=tf.Variable(tf.ones((n_in,1),dtype=tf.float32)) 40 | batch_grad[self.b_prime]=tf.Variable(tf.ones((n_in,1), dtype=tf.float32)) 41 | 42 | self.params = [self.W, self.b, self.b_prime] 43 | -------------------------------------------------------------------------------- /train_dnn_lstm.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import numpy as np 3 | #import theano 4 | import data_io_lstm 5 | import tensorflow as tf 6 | from dnn_lstm import DNN 7 | from sparse_autoencoder import Autoencoder 8 | 9 | def TrainDNN(conf): 10 | sess = tf.Session() 11 | sess.run(tf.initialize_all_variables()) 12 | print 'Reading label id...' 13 | label_id_file = conf.GetKey("label_id_file_path") 14 | (label_id, id_to_label) = data_io_lstm.ReadLable(label_id_file) 15 | 16 | print 'Reading word lookup table...' 17 | word_lookup_table_file = conf.GetKey("word_lookup_table_file_path") 18 | (word_lookup_table, word_id, id_to_word) = data_io_lstm.ReadWordLookupTable(word_lookup_table_file) 19 | word_lookup_table = np.asarray(word_lookup_table, dtype = np.float32) 20 | 21 | print 'Loading autoencoder...' 22 | embedding_size = len(word_lookup_table[0]) 23 | ae_hidden_layer_size = int(conf.GetKey("ae_hidden_layer_size")) 24 | auto_encoder = Autoencoder(embedding_size, ae_hidden_layer_size) 25 | ae_param_file = conf.GetKey("ae_param_file_path") + ".npz" 26 | auto_encoder.LoadParam(ae_param_file) 27 | #output, feed_dict=auto_encoder.CompileEncodeFun(), Move it to read feature instead 28 | #sess.run(output,feed_dict={feed_dict:}) 29 | 30 | reg_exp_dict = {} 31 | id_to_reg_exp = [] 32 | print 'Reading training data...' 33 | train_feature_file = conf.GetKey("train_feature_file_path") 34 | (train_data, train_ans) = data_io_lstm.ReadFeature(train_feature_file, label_id, word_id, reg_exp_dict, 35 | id_to_reg_exp, ae_hidden_layer_size, word_lookup_table, auto_encoder,sess,1) 36 | # 1 for training 37 | #print 'train data display' 38 | #print train_data # this is the single line of data that is printed out (200,1) ,n*1 39 | 40 | print 'Reading dev data...' 41 | dev_feature_file = conf.GetKey("dev_feature_file_path") 42 | (dev_data, dev_ans) = data_io_lstm.ReadFeature(dev_feature_file, label_id, word_id, reg_exp_dict, 43 | id_to_reg_exp, ae_hidden_layer_size, word_lookup_table, auto_encoder, sess, 2) 44 | #2 for testing 45 | 46 | hidden_layer_size = int(conf.GetKey("dnn_hidden_layer_size")) 47 | output_layer_size = len(label_id)+1 48 | L2_reg = float(conf.GetKey("dnn_L2_reg")) 49 | batch_size = int(conf.GetKey("dnn_batch_size")) 50 | learning_rate = float(conf.GetKey("dnn_learning_rate")) 51 | n_epoches = int(conf.GetKey("dnn_n_epoches")) 52 | weights_file_dir = conf.GetKey("dnn_output_dir") 53 | 54 | 55 | 56 | 57 | dnn = DNN(hidden_layer_size, output_layer_size, len(reg_exp_dict)+1, ae_hidden_layer_size, 58 | id_to_reg_exp, id_to_word, word_lookup_table, auto_encoder, L2_reg) 59 | 60 | dnn.Fit(train_data, train_ans, dev_data, dev_ans, sess, batch_size, learning_rate, n_epoches, weights_file_dir) 61 | -------------------------------------------------------------------------------- /data_io_lstm.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | import random 4 | def ReadWordLookupTable(lookup_table_file): 5 | #Find the word in word2vec 300 table 6 | lt = [] 7 | word_id = {} 8 | id_to_word = [] 9 | V = 0 10 | f = open(lookup_table_file, 'r') 11 | f.readline() # omit the header 12 | for line in f: 13 | line = line.strip().decode('utf-8') 14 | line = line.split() 15 | tmp = [float(t) for t in line[1:len(line)]] 16 | lt.append(tmp) 17 | word_id[line[0]] = V 18 | id_to_word.append(line[0]) 19 | V += 1 20 | f.close() 21 | return (lt, word_id, id_to_word) 22 | 23 | def ReadLable(label_id_file): 24 | label_id = {} 25 | id_to_label = [] 26 | V = 0 27 | f = open(label_id_file, 'r') 28 | for line in f: 29 | line = line.strip().decode('utf-8') 30 | label_id[line] = V 31 | id_to_label.append(line) 32 | V += 1 33 | return (label_id, id_to_label) 34 | 35 | def ReadFeature(feature_file, label_id, word_id, reg_exp_dict, id_to_reg_exp, ae_hidden_layer_size, word_lookup_table, auto_encoder,sess, mode): 36 | data = [] 37 | ans = [] 38 | f = open(feature_file,'r') 39 | count=0; 40 | ''' 41 | for line in f:# reading in the word segment in batches 42 | line = line.strip().decode('utf-8') 43 | line = line.split('\t') 44 | unigram = [] 45 | senPack=[] 46 | for word in line[1].split(' '): 47 | if word not in word_id: 48 | continue 49 | unigram.append(word_id[word]) 50 | 51 | if len(unigram) == 0: 52 | rep = np.zeros(ae_hidden_layer_size, dtype=np.float32) 53 | else: 54 | sen = word_lookup_table[unigram].mean(axis=0) 55 | sen = np.asmatrix(sen) 56 | senPack.append(sen) 57 | print len(senPack) 58 | ''' 59 | output, feed_dict = auto_encoder.CompileEncodeFun() 60 | sess.run(tf.initialize_all_variables()) 61 | 62 | for line in f: 63 | count=count+1 64 | line = line.strip().decode('utf-8') 65 | line = line.split('\t') 66 | tag = line[0] 67 | # unigram feature 68 | unigram = [] 69 | senPack = [] 70 | for word in line[1].split(' '): 71 | if word not in word_id: 72 | continue 73 | unigram.append(word_id[word]) 74 | 75 | if len(unigram) == 0: 76 | rep = np.zeros(ae_hidden_layer_size, dtype=np.float32) 77 | else: 78 | sen = word_lookup_table[unigram]#using data directly 79 | sen = np.asmatrix(sen) 80 | if sen.shape[0]>=20: 81 | sen=sen[0:20,:] 82 | else: 83 | padded=random.random()*np.ones((20,sen.shape[1])) 84 | padded[0:sen.shape[0], 0:sen.shape[1]] = sen 85 | sen=padded 86 | 87 | # reg_exp feature 88 | reg_exp = [] 89 | for reg in line[2].split(' '): 90 | if reg not in reg_exp_dict: 91 | reg_exp_dict[reg] = len(reg_exp_dict)# reg_exp_dict store the length of each expression 92 | id_to_reg_exp.append(reg) 93 | print 'the reg expression is'+ repr(reg) 94 | #print repr(id_to_reg_exp) 95 | reg_exp.append(reg_exp_dict[reg]) 96 | 97 | 98 | rep = sess.run(output, feed_dict={feed_dict: sen}) 99 | rep = np.asarray(rep, dtype=np.float32) 100 | 101 | data.append((rep, reg_exp)) 102 | ans.append(label_id[tag]) # give a number of label_id in the label list here 103 | print 'Round No. is' + repr(count) 104 | 105 | if (mode==1) and (count>100000): #mode 1 is for training 106 | break 107 | if (mode==2) and (count>20000):# mode 2 is for testing 108 | break 109 | f.close() 110 | return (data, ans)# ans is the label of corresponding category 111 | 112 | def ReadUnlabelData(data_file, word_id): 113 | f = open(data_file,'r') 114 | train_data = [] 115 | for line in f: 116 | line = line.strip().decode('utf-8') 117 | line = line.split(' ') 118 | sen = [] 119 | for word in line: 120 | if word in word_id: 121 | sen.append(word_id[word]) 122 | if len(sen) == 0: 123 | continue 124 | train_data.append(sen) 125 | f.close() 126 | return train_data 127 | 128 | 129 | -------------------------------------------------------------------------------- /sparse_autoencoder.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import time 3 | import numpy as np 4 | import random 5 | import math 6 | import heapq 7 | import tensorflow as tf 8 | from collections import OrderedDict 9 | from numpy import linalg as LA 10 | from layer import AELayer 11 | import layer 12 | 13 | class Autoencoder(object): 14 | def __init__(self, n_in, n_hidden, sparsity_level = 0.05, sparse_reg = 0.001): 15 | self.n_in = n_in 16 | self.n_hidden = n_hidden 17 | self.activation = tf.sigmoid 18 | self.sparsity_level= np.repeat([0.05], self.n_hidden).astype(np.float32) 19 | self.sparse_reg = sparse_reg 20 | self.sen_vec=tf.placeholder(tf.float32, [None, 300]) 21 | rng = np.random.RandomState(random.randint(1, 2**30)) 22 | 23 | # Adapting learning rate 24 | self.learning_rate = OrderedDict({}) 25 | self.batch_grad = OrderedDict({}) 26 | 27 | # Hidden Layer 28 | self.hiddenLayer = AELayer(rng, n_in, n_hidden, "sigmoid", self.learning_rate, self.batch_grad) 29 | 30 | self.params = self.hiddenLayer.params 31 | 32 | def LoadParam(self, weights_file): 33 | params = np.load(weights_file) 34 | #print params.shape 35 | print "data is", params['w_hid'].shape 36 | a=self.hiddenLayer.W.assign(tf.cast(params['w_hid'],tf.float32)) 37 | b=self.hiddenLayer.b.assign(tf.cast(tf.reshape(params['b_hid'],[1,200]),tf.float32)) 38 | c=self.hiddenLayer.b_prime.assign(tf.cast(tf.reshape(params['b_vis'],[1,300]),tf.float32)) 39 | 40 | def kl_divergence(self, p, p_hat): 41 | return p * tf.log(p) - p * tf.log(p_hat) + (1 - p) * tf.log(1 - p) - (1 - p) * tf.log(1 - p_hat) 42 | 43 | def Forward(self, x_in): 44 | lin_h = tf.matmul(x_in, self.hiddenLayer.W) + self.hiddenLayer.b 45 | h = self.activation(lin_h) 46 | #notice sparsity_level read from numpy can be dtype64, should using astype to float32 47 | kl_div = self.kl_divergence(self.sparsity_level, h) 48 | 49 | lin_output = tf.matmul(tf.transpose(self.hiddenLayer.W), h) + self.hiddenLayer.b_prime 50 | return lin_output, kl_div 51 | 52 | def Encode(self, x_in): 53 | lin_h = tf.matmul(x_in,self.hiddenLayer.W) + self.hiddenLayer.b 54 | h = self.activation(lin_h) 55 | return h 56 | 57 | def TrainNN(self): 58 | #sen_vec = T.vector() 59 | sen_vec= self.sen_vec 60 | 61 | updates = OrderedDict({}) 62 | output1, output2 = self.Forward(sen_vec) 63 | cost = tf.reduce_sum((sen_vec - output1) ** 2)+ self.sparse_reg * output2 64 | 65 | self.gparams=tf.gradients(cost,self.params) 66 | 67 | for param, gparam in zip(self.params, self.gparams): 68 | #print param 69 | updates[self.batch_grad[param]] = self.batch_grad[param] + gparam 70 | feed_dict = sen_vec 71 | return cost, feed_dict 72 | 73 | def CompileEncodeFun(self): 74 | #sen_vec = T.vector() 75 | sen_vec=self.sen_vec 76 | 77 | updates = OrderedDict({}) 78 | output = self.Encode(sen_vec) 79 | #f = theano.function([sen_vec], output, updates = updates) 80 | feed_dict = sen_vec 81 | #self.encode_fun = {output,feed_dict} 82 | 83 | return output, feed_dict 84 | 85 | def Fit(self, train_data, word_lookup_table, word_id, batch_size = 20, alpha = 0.2, n_epoches = 5, weights_file = '/weights'): 86 | print 'Compling training function...' 87 | cost, feed_dict=self.TrainNN() 88 | sess = tf.Session() 89 | sess.run(tf.initialize_all_variables()) 90 | M = len(train_data) 91 | batch_cnt = M/batch_size + 1 92 | 93 | hidden_W = None 94 | hidden_b = None 95 | output_W = None 96 | output_b = None 97 | best = 0.0 98 | 99 | print 'Start training...' 100 | sys.stdout.flush() 101 | for epoch in xrange(n_epoches): 102 | costs = 0.0 103 | error = 0.0 104 | print 'test1' 105 | start_time = time.time() 106 | for batch in xrange(0, batch_cnt + 1): 107 | start = batch * batch_size 108 | end = min((batch + 1) * batch_size, M) 109 | print batch 110 | if start >= M: 111 | break 112 | for index in xrange(start, end): 113 | print 'test3' 114 | #print 'batch %d index %d' % (batch, index) 115 | data = word_lookup_table[train_data[index]].mean(axis = 0) 116 | #tmp_cost = train_nn(data) 117 | #print train_nn 118 | #print data.shape 119 | data = np.reshape(data, (300, 1)) 120 | tmp_cost=sess.run(cost, feed_dict={feed_dict:data}) 121 | costs += tmp_cost 122 | for param in self.params: 123 | print 'test4' 124 | # sess.run(tf.initialize_all_variables()) 125 | 126 | old_param=param 127 | oldParam=sess.run(param) 128 | #grad=tf.div(self.batch_grad[param],(end-start+1)) 129 | 130 | 131 | grad=sess.run(self.batch_grad[param])/(end-start+1) 132 | tmp = sess.run(self.learning_rate[param])+grad * grad 133 | lr = alpha / (np.sqrt(tmp) + 1.0e-6) 134 | new_param = oldParam-lr*grad 135 | 136 | p=param.assign(new_param) 137 | t = self.learning_rate[param].assign(tmp) 138 | 139 | sess.run(p) 140 | sess.run(t) 141 | print new_param 142 | print sess.run(t) 143 | for param in self.params: 144 | print 'test5' 145 | #self.batch_grad[param].set_value(np.zeros_like(self.batch_grad[param].get_value(), dtype=tf.float32)) 146 | paraGrad=self.batch_grad[param].assign(np.zeros_like(sess.run(self.batch_grad[param]), dtype=np.float32)) 147 | sess.run(paraGrad) 148 | end_time = time.time() 149 | minu = int((end_time - start_time)/60) 150 | sec = (end_time - start_time) - 60 * minu 151 | print 'Time: %d min %.2f sec' % (minu, sec) 152 | cur_cost = costs/M 153 | print 'Traning at epoch %d, cost = %f' % (epoch + 1, cur_cost) 154 | sys.stdout.flush() 155 | 156 | w_hid = sess.run(self.hiddenLayer.W) 157 | b_hid = sess.run(self.hiddenLayer.b) 158 | b_vis = sess.run(self.hiddenLayer.b_prime) 159 | 160 | w_hid, b_hid, b_vis = self.params 161 | 162 | np.savez(weights_file, w_hid = w_hid, b_hid = b_hid, b_vis = b_vis) 163 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "{}" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright {yyyy} {name of copyright owner} 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /dnn_lstm.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import time 3 | import numpy as np 4 | import tensorflow as tf 5 | from tensorflow.python.ops import rnn, rnn_cell 6 | import random 7 | import math 8 | import heapq 9 | from collections import OrderedDict 10 | from layer import Layer 11 | 12 | #rewritte by tensorflow 13 | class DNN(object): 14 | def __init__(self, n_hidden, n_out, reg_exp_size, ae_size, id_to_reg_exp, 15 | id_to_word, word_lookup_table, auto_encoder, L2_reg=0.0001): 16 | # sess = tf.Session() 17 | 18 | self.n_hidden = n_hidden 19 | self.n_out = n_out 20 | self.L2_reg = L2_reg 21 | self.activation = tf.tanh #modification 1 22 | self.auto_encoder = auto_encoder 23 | self.word_lookup_table = word_lookup_table 24 | self.id_to_word = id_to_word 25 | self.id_to_reg_exp = id_to_reg_exp 26 | rng = np.random.RandomState(random.randint(1, 2 ** 30)) 27 | 28 | # Adapting learning rate 29 | self.learning_rate = OrderedDict({}) 30 | self.batch_grad = OrderedDict({}) 31 | 32 | # word dict size and ner dict size and reg_exp_dict size 33 | self.ae_size = ae_size 34 | self.reg_V = reg_exp_size 35 | 36 | self.x_in=tf.placeholder(tf.float32, shape=(None, 20, 200))#memory size is 5 37 | self.reg_x=tf.placeholder(tf.int32, shape=(None,)) 38 | self.y=tf.placeholder(tf.int32) 39 | self.i=0 40 | 41 | # Skip Layer for encoder 42 | # The detailed tensorflow structure is used in Layer method 43 | 44 | self.skip_layer_ae = Layer(rng, ae_size, n_out, "tanh", self.learning_rate, self.batch_grad) 45 | # Skip Layer for reg, 46 | self.skip_layer_re = Layer(rng, self.reg_V, n_out, "tanh", self.learning_rate, self.batch_grad) 47 | # Hidden Layer, ae_size=n_hidden=200 48 | self.hiddenLayer = Layer(rng, ae_size, n_hidden, "tanh", self.learning_rate, self.batch_grad) 49 | # Output Layer 50 | self.outputLayer = Layer(rng, n_hidden, n_out, "tanh", self.learning_rate, self.batch_grad) 51 | 52 | # Lookup table for reg 53 | """ 54 | reg_lookup_table_value = rng.uniform(low=-0.01, high=0.01, size=(self.reg_V, n_hidden)) 55 | reg_lookup_table_value = np.asarray(reg_lookup_table_value, dtype=theano.config.floatX) 56 | self.reg_lookup_table = theano.shared(value=reg_lookup_table_value, name='rlt', borrow=True) 57 | self.learning_rate[self.reg_lookup_table] = theano.shared(value=np.ones(reg_lookup_table_value.shape, 58 | dtype=theano.config.floatX), 59 | borrow=True) 60 | self.batch_grad[self.reg_lookup_table] = theano.shared(value=np.zeros(reg_lookup_table_value.shape, 61 | dtype=theano.config.floatX), borrow=True) 62 | """ 63 | reg_lookup_table_value = rng.uniform(low=-0.01, high=0.01, size=(self.reg_V, n_hidden)) 64 | self.reg_lookup_table = tf.Variable(np.asarray(reg_lookup_table_value), dtype=tf.float64, name='rlt') 65 | self.learning_rate[self.reg_lookup_table]=tf.Variable(np.ones(reg_lookup_table_value.shape),dtype=tf.float64, name='learnrate') 66 | 67 | print (reg_lookup_table_value.shape) 68 | self.batch_grad[self.reg_lookup_table]=tf.Variable(np.zeros(reg_lookup_table_value.shape),dtype=tf.float64,name='batchgrad') 69 | self.params = self.hiddenLayer.params + self.outputLayer.params + self.skip_layer_ae.params + self.skip_layer_re.params + [ 70 | self.reg_lookup_table] 71 | 72 | #sess.run(tf.initialize_all_variables()) 73 | 74 | def LoadParam(self, weights_file,sess): 75 | params = np.load(weights_file) 76 | #notice that W here is a tensor variable declared in the layer file 77 | sess.run(self.hiddenLayer.W.assign(params['hidden_W'])) 78 | sess.run(self.hiddenLayer.b.assign(params['hidden_b'])) 79 | sess.run(self.outputLayer.W.assign(params['output_W'])) 80 | sess.run(self.outputLayer.b.assign(params['output_b'])) 81 | sess.run(self.skip_layer_ae.W.assign(params['skip_ae_W'])) 82 | sess.run(self.skip_layer_ae.b.assign(params['skip_ae_b'])) 83 | sess.run(self.skip_layer_re.W.assign(params['skip_re_W'])) 84 | sess.run(self.skip_layer_re.b.assign(params['skip_re_b'])) 85 | sess.run(self.reg_lookup_table.assign(params['reg_lookup_table'])) 86 | 87 | def SaveMatrix(self, f, matrix): 88 | r = matrix.shape[0] 89 | c = matrix.shape[1] 90 | 91 | f.write('%d %d\n' % (r, c)) 92 | for line in matrix: 93 | ret = [str(val) for val in line] 94 | ret = ' '.join(ret) 95 | f.write("%s\n" % ret) 96 | 97 | def SaveVector(self, f, vector): 98 | r = vector.shape[0] 99 | 100 | f.write('%d\n' % r) 101 | ret = [str(val) for val in vector] 102 | ret = ' '.join(ret) 103 | f.write("%s\n" % ret) 104 | 105 | def SaveLookupTable(self, f, lt, id_to_name): 106 | r = lt.shape[0] 107 | c = lt.shape[1] 108 | 109 | f.write('%d %d\n' % (r, c)) 110 | for i in xrange(len(lt)): 111 | ret = [str(val) for val in lt[i]] 112 | ret = ' '.join(ret) 113 | f.write("%s %s\n" % (id_to_name[i].encode('utf-8'), ret)) 114 | 115 | def SaveParam(self, weights_file_dir, sess): 116 | word_lookup_table_file = weights_file_dir + 'word_lookup_table.txt' 117 | f = open(word_lookup_table_file, 'w') 118 | self.SaveLookupTable(f, self.word_lookup_table, self.id_to_word) 119 | f.close() 120 | reg_lookup_table_file = weights_file_dir + 'reg_lookup_table.txt' 121 | f = open(reg_lookup_table_file, 'w') 122 | self.SaveLookupTable(f, sess.run(self.reg_lookup_table), self.id_to_reg_exp) 123 | f.close() 124 | dnn_weights_file = weights_file_dir + 'dnn_layer_weights.txt' 125 | f = open(dnn_weights_file, 'w') 126 | # encode layer 127 | self.SaveMatrix(f, sess.run(self.auto_encoder.hiddenLayer.W)) 128 | self.SaveVector(f, sess.run(self.auto_encoder.hiddenLayer.b)) 129 | # hidden layer 130 | self.SaveMatrix(f, sess.run(self.hiddenLayer.W)) 131 | self.SaveVector(f, sess.run(self.hiddenLayer.b)) 132 | # output layer 133 | self.SaveMatrix(f, sess.run(self.outputLayer.W)) 134 | self.SaveVector(f, sess.run(self.outputLayer.b)) 135 | # skip layer for ae 136 | self.SaveMatrix(f, sess.run(self.skip_layer_ae.W)) 137 | self.SaveVector(f, sess.run(self.skip_layer_ae.b)) 138 | # skip layer for re 139 | self.SaveMatrix(f, sess.run(self.skip_layer_re.W)) 140 | self.SaveVector(f, sess.run(self.skip_layer_re.b)) 141 | f.close() 142 | 143 | def Forward(self, sess): 144 | lstm= tf.nn.rnn_cell.BasicLSTMCell(200, forget_bias=1.0)#LSTM size 145 | #lstm=tf.nn.rnn_cell.GRUCell(10) 146 | state=tf.zeros([1,200])# batch size, state_num=2*step_size 147 | num_steps=20# we don't need time step actually, the length of sentence is time-step 148 | x_in_batch = tf.transpose(self.x_in, [1, 0, 2])#change to 20*1*200 149 | x_in = tf.reshape(x_in_batch, [-1, 200])#change to 20*200 150 | x_in = tf.split(0, 20, x_in)#this will return a list, i.e. 20 sequences of 1*200 151 | 152 | if self.i == 0: 153 | with tf.variable_scope('output'): 154 | output_lstm, state=rnn.rnn(lstm, x_in, dtype=tf.float32) 155 | #output_lstm, state= lstm(x_in,state)#200*1 156 | else: 157 | with tf.variable_scope('output', reuse=True): 158 | output_lstm, state = rnn.rnn(lstm, x_in, dtype=tf.float32) 159 | #output_lstm, state= lstm(x_in,state) 160 | self.i+=1 161 | 162 | output_lstm=output_lstm[-1]# get the last element of a list 163 | 164 | lin_h=tf.matmul(output_lstm,self.hiddenLayer.W)+self.hiddenLayer.b 165 | #x_in=1*200, W=200*200 166 | 167 | reg_h = tf.reduce_sum(tf.gather(self.reg_lookup_table, self.reg_x), 0)#Num*200 168 | print "reg_h is" 169 | print reg_h 170 | h = self.activation(lin_h + tf.cast(reg_h,tf.float32))#1*200 171 | 172 | lin_output_pre = tf.matmul(h, self.outputLayer.W) + self.outputLayer.b 173 | lin_output = tf.nn.dropout(lin_output_pre, keep_prob=0.6) 174 | 175 | #h=1*200, outputLayer.W=200*63, lin_outupt=1*63 176 | #re.W:19156*63 177 | reg_output = tf.reduce_sum(tf.gather(self.skip_layer_re.W, self.reg_x), 0) + self.skip_layer_re.b 178 | print reg_output 179 | 180 | #x_in=1*200. ae.W=200*63 181 | ae_output = tf.matmul(x_in[-1], self.skip_layer_ae.W) + self.skip_layer_ae.b#use the last element as skip layer input 182 | ae_output = tf.nn.dropout(ae_output, keep_prob=0.5) 183 | 184 | output = tf.nn.softmax(lin_output + ae_output + reg_output)#XXX*63 185 | 186 | return output 187 | 188 | def TrainNN(self,sess): 189 | gradient_step=0.1 190 | output = self.Forward(sess) 191 | 192 | cost=-tf.gather(tf.log(tf.gather(tf.transpose(output),self.y)),0) 193 | train_op = tf.train.GradientDescentOptimizer(gradient_step).minimize(cost) 194 | train_op = tf.train.AdagradOptimizer(gradient_step).minimize(cost) 195 | 196 | return train_op, output, cost 197 | 198 | def GetResult(self,sess): 199 | 200 | output = self.Forward(sess) 201 | 202 | return tf.gather(output,0) 203 | 204 | def GetTopK(self, output, k): 205 | result = [t[0] for t in heapq.nlargest(k, enumerate(output), lambda t: t[1])] 206 | return result 207 | 208 | 209 | def Fit(self, train_data, train_ans, dev_data, dev_ans, sess, batch_size=10, alpha=0.5, n_epoches=25, 210 | weights_file_dir='mlp_output/'): 211 | print 'Compling training function...' 212 | 213 | train_fetch, output_fetch, cost_fetch= self.TrainNN(sess) 214 | get_output = self.GetResult(sess) 215 | sess.run(tf.initialize_all_variables()) 216 | 217 | M = len(train_data) 218 | N = len(dev_data) 219 | batch_cnt = M / batch_size + 1 220 | 221 | hidden_W = None 222 | hidden_b = None 223 | output_W = None 224 | output_b = None 225 | skip_ae_W = None 226 | skip_ae_b = None 227 | skip_re_W = None 228 | skip_re_b = None 229 | reg_lookup_table = None 230 | best = 0.0 231 | memorySize=5 #LSTM memory size 232 | ########################## 233 | ########################## 234 | 235 | print 'Start training...' 236 | sys.stdout.flush() 237 | ''' 238 | # combining data into 1000 239 | for index in xrange(0, M-memorySize): 240 | trainDataLSTM = np.transpose(np.asarray(train_data[index][0])) 241 | regLSTM = np.transpose(np.asarray(train_data[index][1])) # has to transpose to use concatenate 242 | for idx in xrange(index + 1, index + memorySize): 243 | if np.asarray(train_data[idx][0]).shape == (1, 200): 244 | trainDataLSTM = np.concatenate((trainDataLSTM, np.asarray(np.transpose(train_data[idx][0])))) 245 | else: 246 | trainDataLSTM = np.concatenate((trainDataLSTM, np.asarray(train_data[idx][0]))) 247 | 248 | regLSTM = np.concatenate((regLSTM, np.asarray(np.transpose(train_data[idx][1])))) 249 | trainDataInput = np.concatenate(trainDataInput,np.transpose(trainDataLSTM)) 250 | regLSTMInput = np.concatenate(regLSTMInput,np.transpose(regLSTM)) 251 | ''' 252 | 253 | 254 | for epoch in xrange(n_epoches): 255 | costs = 0.0 256 | error = 0.0 257 | rightTrain=M 258 | totalTrain=M 259 | rightDev=N 260 | totalDev=N 261 | start_time = time.time() 262 | for batch in xrange(0, batch_cnt + 1): 263 | start = batch * batch_size 264 | end = min((batch + 1) * batch_size, M) 265 | if start >= M: 266 | break 267 | for index in xrange(start, end): 268 | print 'Epoch'+repr(epoch)+'Batch' +repr(batch)+','+'index is'+repr(index) 269 | if np.asmatrix(train_data[index][0]).shape[1] == 200: 270 | [tmp_cost, train_op] = sess.run([cost_fetch, train_fetch], feed_dict={self.x_in:np.expand_dims(np.asmatrix(train_data[index][0]),axis=0), self.reg_x:train_data[index][1], self.y:(train_ans[index])}) 271 | else: 272 | [tmp_cost, train_op] = sess.run([cost_fetch, train_fetch], feed_dict={self.x_in:np.expand_dims(np.transpose(np.asmatrix(train_data[index][0])),axis=0),self.reg_x: train_data[index][1],self.y: (train_ans[index])}) 273 | costs += tmp_cost 274 | print 'temp cost is' 275 | print tmp_cost 276 | 277 | print 'Training Cost is'+ repr(costs) 278 | 279 | end_time = time.time() 280 | minu = int((end_time - start_time) / 60) 281 | sec = (end_time - start_time) - 60 * minu 282 | print 'Time: %d min %.2f sec' % (minu, sec) 283 | cur_cost = costs / M 284 | #print 'Traning at epoch=%d, cost = %f' % (epoch + 1, cur_cost) 285 | 286 | for index in xrange(0, M): 287 | if np.asmatrix(train_data[index][0]).shape[1]==200: 288 | tempRight = ((train_ans[index]) == np.argmax(sess.run(get_output,feed_dict={self.x_in:np.expand_dims(np.asmatrix(train_data[index][0]),axis=0), self.reg_x:train_data[index][1]}))) 289 | else: 290 | tempRight = ((train_ans[index]) == np.argmax(sess.run(get_output,feed_dict={self.x_in:np.expand_dims(np.transpose(np.asmatrix(train_data[index][0])),axis=0), self.reg_x:train_data[index][1]})))+1 291 | rightTrain+=tempRight 292 | totalTrain+=M 293 | pre = (1.0 * rightTrain) / totalTrain * 100 294 | print 'Train pre: %f' % pre 295 | 296 | for index in xrange(0, N): 297 | if np.asmatrix(dev_data[index][0]).shape[1] == 200: 298 | tempRight = ((dev_ans[index]) == np.argmax(sess.run(get_output,feed_dict={self.x_in:np.expand_dims(np.asmatrix(dev_data[index][0]),axis=0), self.reg_x:(dev_data[index][1])}))) 299 | else: 300 | tempRight = ((dev_ans[index]) == np.argmax(sess.run(get_output, feed_dict={self.x_in: np.expand_dims(np.transpose(np.asmatrix(dev_data[index][0])),axis=0), self.reg_x:(dev_data[index][1])})))+1 301 | rightDev+=tempRight 302 | totalDev+=N 303 | pre = (1.0 * rightDev) / totalDev* 100 304 | print 'Dev pre: %f' % pre 305 | sys.stdout.flush() 306 | if pre > best: 307 | best = pre 308 | hidden_W = sess.run(self.hiddenLayer.W) 309 | hidden_b = sess.run(self.hiddenLayer.b) 310 | output_W = sess.run(self.outputLayer.W) 311 | output_b = sess.run(self.outputLayer.b) 312 | skip_ae_W = sess.run(self.skip_layer_ae.W) 313 | skip_ae_b = sess.run(self.skip_layer_ae.b) 314 | skip_re_W = sess.run(self.skip_layer_re.W) 315 | skip_re_b = sess.run(self.skip_layer_re.b) 316 | reg_lookup_table = sess.run(self.reg_lookup_table) 317 | # np.savez(weights_file, hidden_W = hidden_W, hidden_b = hidden_b, output_W = output_W, output_b = output_b, 318 | # skip_ae_W = skip_ae_W, skip_ae_b = skip_ae_b, skip_re_W = skip_re_W, skip_re_b = skip_re_b, reg_lookup_table = reg_lookup_table) 319 | sess.run(self.hiddenLayer.W.assign(hidden_W)) 320 | sess.run(self.hiddenLayer.b.assign(hidden_b)) 321 | sess.run(self.outputLayer.W.assign(output_W)) 322 | sess.run(self.outputLayer.b.assign(output_b)) 323 | sess.run(self.skip_layer_ae.W.assign(skip_ae_W)) 324 | sess.run(self.skip_layer_ae.b.assign(skip_ae_b)) 325 | sess.run(self.skip_layer_re.W.assign(skip_re_W)) 326 | sess.run(self.skip_layer_re.b.assign(skip_re_b)) 327 | sess.run(self.reg_lookup_table.assign(reg_lookup_table)) 328 | #print 'Saving results...' 329 | #self.SaveParam(weights_file_dir,sess) 330 | --------------------------------------------------------------------------------