├── README.md
├── data
    └── subj0.pkl
├── data_helper.py
├── rnn_CNN_model.py
└── train_rnn_cnn_classify.py


/README.md:
--------------------------------------------------------------------------------
1 | # LSTM-CNN_CLASSIFICATION


--------------------------------------------------------------------------------
/data/subj0.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zjrn/LSTM-CNN_CLASSIFICATION/47b5da4c1daf745e1dbee2d782e3ccedf2d65121/data/subj0.pkl


--------------------------------------------------------------------------------
/data_helper.py:
--------------------------------------------------------------------------------
  1 | """
  2 | description: this file helps to load raw file and gennerate batch x,y
  3 | author:luchi
  4 | date:22/11/2016
  5 | """
  6 | import numpy as np
  7 | import pickle as pkl
  8 | 
  9 | 
 10 | #file path
 11 | dataset_path='data/subj0.pkl'
 12 | 
 13 | def set_dataset_path(path):
 14 |     dataset_path=path
 15 | 
 16 | 
 17 | 
 18 | 
 19 | def load_data(max_len,batch_size,n_words=20000,valid_portion=0.1,sort_by_len=True):
 20 |     f=open(dataset_path,'rb')
 21 |     print ('load data from %s',dataset_path)
 22 |     train_set = np.array(pkl.load(f))
 23 |     test_set = np.array(pkl.load(f))
 24 |     f.close()
 25 | 
 26 |     train_set_x,train_set_y = train_set
 27 | 
 28 |     #train_set length
 29 |     n_samples= len(train_set_x)
 30 |     #shuffle and generate train and valid dataset
 31 |     sidx = np.random.permutation(n_samples)
 32 |     n_train = int(np.round(n_samples * (1. - valid_portion)))
 33 |     valid_set_x = [train_set_x[s] for s in sidx[n_train:]]
 34 |     valid_set_y = [train_set_y[s] for s in sidx[n_train:]]
 35 |     train_set_x = [train_set_x[s] for s in sidx[:n_train]]
 36 |     train_set_y = [train_set_y[s] for s in sidx[:n_train]]
 37 | 
 38 | 
 39 |     train_set = (train_set_x, train_set_y)
 40 |     valid_set = (valid_set_x, valid_set_y)
 41 | 
 42 | 
 43 |     #remove unknow words
 44 |     def remove_unk(x):
 45 |         return [[1 if w >= n_words else w for w in sen] for sen in x]
 46 | 
 47 |     test_set_x, test_set_y = test_set
 48 |     valid_set_x, valid_set_y = valid_set
 49 |     train_set_x, train_set_y = train_set
 50 | 
 51 |     train_set_x = remove_unk(train_set_x)
 52 |     valid_set_x = remove_unk(valid_set_x)
 53 |     test_set_x = remove_unk(test_set_x)
 54 | 
 55 | 
 56 | 
 57 |     def len_argsort(seq):
 58 |         return sorted(range(len(seq)), key=lambda x: len(seq[x]))
 59 | 
 60 |     if sort_by_len:
 61 |         sorted_index = len_argsort(test_set_x)
 62 |         test_set_x = [test_set_x[i] for i in sorted_index]
 63 |         test_set_y = [test_set_y[i] for i in sorted_index]
 64 | 
 65 |         sorted_index = len_argsort(valid_set_x)
 66 |         valid_set_x = [valid_set_x[i] for i in sorted_index]
 67 |         valid_set_y = [valid_set_y[i] for i in sorted_index]
 68 | 
 69 | 
 70 |         sorted_index = len_argsort(train_set_x)
 71 |         train_set_x = [train_set_x[i] for i in sorted_index]
 72 |         train_set_y = [train_set_y[i] for i in sorted_index]
 73 | 
 74 |     train_set=(train_set_x,train_set_y)
 75 |     valid_set=(valid_set_x,valid_set_y)
 76 |     test_set=(test_set_x,test_set_y)
 77 | 
 78 | 
 79 | 
 80 | 
 81 |     new_train_set_x=np.zeros([len(train_set[0]),max_len])
 82 |     new_train_set_y=np.zeros(len(train_set[0]))
 83 | 
 84 |     new_valid_set_x=np.zeros([len(valid_set[0]),max_len])
 85 |     new_valid_set_y=np.zeros(len(valid_set[0]))
 86 | 
 87 |     new_test_set_x=np.zeros([len(test_set[0]),max_len])
 88 |     new_test_set_y=np.zeros(len(test_set[0]))
 89 | 
 90 |     mask_train_x=np.zeros([max_len,len(train_set[0])])
 91 |     mask_test_x=np.zeros([max_len,len(test_set[0])])
 92 |     mask_valid_x=np.zeros([max_len,len(valid_set[0])])
 93 | 
 94 | 
 95 | 
 96 |     def padding_and_generate_mask(x,y,new_x,new_y,new_mask_x):
 97 | 
 98 |         for i,(x,y) in enumerate(zip(x,y)):
 99 |             #whether to remove sentences with length larger than maxlen
100 |             if len(x)<=max_len:
101 |                 new_x[i,0:len(x)]=x
102 |                 new_mask_x[0:len(x),i]=1
103 |                 new_y[i]=y
104 |             else:
105 |                 new_x[i]=(x[0:max_len])
106 |                 new_mask_x[:,i]=1
107 |                 new_y[i]=y
108 |         new_set =(new_x,new_y,new_mask_x)
109 |         del new_x,new_y
110 |         return new_set
111 | 
112 |     train_set=padding_and_generate_mask(train_set[0],train_set[1],new_train_set_x,new_train_set_y,mask_train_x)
113 |     test_set=padding_and_generate_mask(test_set[0],test_set[1],new_test_set_x,new_test_set_y,mask_test_x)
114 |     valid_set=padding_and_generate_mask(valid_set[0],valid_set[1],new_valid_set_x,new_valid_set_y,mask_valid_x)
115 | 
116 |     return train_set,valid_set,test_set
117 | 
118 | 
119 | #return batch dataset
120 | def batch_iter(data,batch_size):
121 | 
122 |     #get dataset and label
123 |     x,y,mask_x=data
124 |     x=np.array(x)
125 |     y=np.array(y)
126 |     data_size=len(x)
127 |     num_batches_per_epoch=data_size//batch_size
128 |     for batch_index in range(num_batches_per_epoch):
129 |         start_index=batch_index*batch_size
130 |         end_index=min((batch_index+1)*batch_size,data_size)
131 |         return_x = x[start_index:end_index]
132 |         return_y = y[start_index:end_index]
133 |         return_mask_x = mask_x[:,start_index:end_index]
134 |         # if(len(return_x)<batch_size):
135 |         #     print(len(return_x))
136 |         #     print return_x
137 |         #     print return_y
138 |         #     print return_mask_x
139 |         #     import sys
140 |         #     sys.exit(0)
141 |         yield (return_x,return_y,return_mask_x)
142 | 
143 | 
144 | 


--------------------------------------------------------------------------------
/rnn_CNN_model.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import tensorflow as tf
  3 | import numpy as np
  4 | 
  5 | class LSTM_CNN_Model(object):
  6 | 
  7 | 
  8 | 
  9 |     def __init__(self,config,is_training=True):
 10 | 
 11 |         self.keep_prob=config.keep_prob
 12 |         self.batch_size = 64
 13 | 
 14 |         num_step=config.num_step
 15 |         self.input_data=tf.placeholder(tf.int32,[None,num_step])
 16 |         self.target = tf.placeholder(tf.int64,[None])
 17 |         self.mask_x = tf.placeholder(tf.float32,[num_step,None])
 18 | 
 19 |         class_num=config.class_num
 20 |         hidden_neural_size=config.hidden_neural_size
 21 |         vocabulary_size=config.vocabulary_size
 22 |         embed_dim=config.embed_dim
 23 |         hidden_layer_num=config.hidden_layer_num
 24 | 
 25 |         #build LSTM network
 26 | 
 27 |         lstm_cell = tf.contrib.rnn.BasicLSTMCell(hidden_neural_size,forget_bias=0.0,state_is_tuple=True)
 28 |         if self.keep_prob<1:
 29 |             lstm_cell =  tf.contrib.rnn.DropoutWrapper(
 30 |                 lstm_cell,output_keep_prob=self.keep_prob
 31 |             )
 32 | 
 33 |         cell = tf.contrib.rnn.MultiRNNCell([lstm_cell]*hidden_layer_num,state_is_tuple=True)
 34 | 
 35 |         self._initial_state = cell.zero_state(self.batch_size,tf.float32)
 36 | 
 37 |         #embedding layer
 38 |         with tf.device("/cpu:0"),tf.name_scope("embedding_layer"):
 39 |             embedding = tf.get_variable("embedding",[vocabulary_size,embed_dim],dtype=tf.float32)
 40 |             inputs=tf.nn.embedding_lookup(embedding,self.input_data)
 41 | 
 42 |         if self.keep_prob<1:
 43 |             inputs = tf.nn.dropout(inputs,self.keep_prob)
 44 | 
 45 |         out_put=[]
 46 |         state=self._initial_state
 47 |         with tf.variable_scope("LSTM_layer"):
 48 |             for time_step in range(num_step):
 49 |                 if time_step>0: tf.get_variable_scope().reuse_variables()
 50 |                 (cell_output,state)=cell(inputs[:,time_step,:],state)
 51 |                 out_put.append(cell_output)
 52 |         out_put=out_put*self.mask_x[:,:,None]
 53 | 
 54 |         with tf.name_scope("Conv_layer"):
 55 |             out_put = tf.transpose(out_put,[1,2,0])
 56 |             out_put = tf.reshape(out_put , [self.batch_size,hidden_neural_size,num_step,-1])
 57 | 
 58 |             print(out_put)
 59 |             W_conv = tf.get_variable(name="conv_w" , initializer=tf.truncated_normal(shape=[600,5,1,200],stddev=0.1))
 60 |             B_conv = tf.get_variable(name="conv_b", initializer=tf.constant(0.1,shape=[200]))
 61 | 
 62 |             conv_output = tf.nn.relu(tf.nn.conv2d(out_put , W_conv , strides=[1,1,1,1],padding='VALID') + B_conv)
 63 |             conv_output = tf.reshape(conv_output,[self.batch_size,36,200,1])
 64 |             max_pool_out = tf.nn.max_pool(conv_output,ksize=[1,36,1,1],strides=[1,1,1,1],padding='VALID')
 65 |             max_pool_out = tf.reshape(max_pool_out,[self.batch_size,200])
 66 | 
 67 | 
 68 |         with tf.name_scope("Softmax_layer_and_output"):
 69 |             softmax_w = tf.get_variable("softmax_w",[200,class_num],dtype=tf.float32)
 70 |             softmax_b = tf.get_variable("softmax_b",[class_num],dtype=tf.float32)
 71 |             self.logits = tf.matmul(max_pool_out,softmax_w)+softmax_b
 72 | 
 73 |         with tf.name_scope("loss"):
 74 |             self.loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=self.logits+1e-10,labels=self.target)
 75 |             self.cost = tf.reduce_mean(self.loss)
 76 | 
 77 |         with tf.name_scope("accuracy"):
 78 |             self.prediction = tf.argmax(self.logits,1)
 79 |             correct_prediction = tf.equal(self.prediction,self.target)
 80 |             self.correct_num=tf.reduce_sum(tf.cast(correct_prediction,tf.float32))
 81 |             self.accuracy = tf.reduce_mean(tf.cast(correct_prediction,tf.float32),name="accuracy")
 82 | 
 83 |         #add summary
 84 |         loss_summary = tf.summary.scalar("loss",self.cost)
 85 |         #add summary
 86 |         accuracy_summary=tf.summary.scalar("accuracy_summary",self.accuracy)
 87 | 
 88 |         if not is_training:
 89 |             return
 90 | 
 91 |         self.globle_step = tf.Variable(tf.constant(0),dtype=tf.int32,name="globle_step",trainable=False)
 92 |         self.lr = tf.Variable(tf.constant(0.8),dtype=tf.float32,trainable=False)
 93 | 
 94 |         tvars = tf.trainable_variables()
 95 |         grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars),
 96 |                                       config.max_grad_norm)
 97 | 
 98 | 
 99 |         # Keep track of gradient values and sparsity (optional)
100 |         grad_summaries = []
101 |         for g, v in zip(grads, tvars):
102 |             if g is not None:
103 |                 grad_hist_summary = tf.summary.histogram("{}/grad/hist".format(v.name), g)
104 |                 sparsity_summary = tf.summary.scalar("{}/grad/sparsity".format(v.name), tf.nn.zero_fraction(g))
105 |                 grad_summaries.append(grad_hist_summary)
106 |                 grad_summaries.append(sparsity_summary)
107 |         self.grad_summaries_merged = tf.summary.merge(grad_summaries)
108 | 
109 |         self.summary =tf.summary.merge([loss_summary,accuracy_summary,self.grad_summaries_merged])
110 | 
111 | 
112 | 
113 |         optimizer = tf.train.GradientDescentOptimizer(self.lr)
114 |         optimizer.apply_gradients(zip(grads, tvars))
115 |         self.train_op=optimizer.apply_gradients(zip(grads, tvars))
116 | 
117 |         self.new_lr = tf.placeholder(tf.float32,shape=[],name="new_learning_rate")
118 |         self._lr_update = tf.assign(self.lr,self.new_lr)
119 | 
120 |     def assign_new_lr(self,session,lr_value):
121 |         session.run(self._lr_update,feed_dict={self.new_lr:lr_value})
122 | 
123 | 
124 | 
125 | 
126 | 
127 | 
128 | 
129 | 
130 | 
131 | 
132 | 
133 | 
134 | 
135 | 
136 | 
137 | 
138 | 
139 | 
140 | 
141 | 
142 | 


--------------------------------------------------------------------------------
/train_rnn_cnn_classify.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | import numpy as np
  3 | import os
  4 | import time
  5 | import datetime
  6 | from rnn_CNN_model import LSTM_CNN_Model
  7 | import data_helper
  8 | 
  9 | 
 10 | flags =tf.app.flags
 11 | FLAGS = flags.FLAGS
 12 | 
 13 | 
 14 | flags.DEFINE_integer('batch_size',64,'the batch_size of the training procedure')
 15 | flags.DEFINE_float('lr',0.05,'the learning rate')
 16 | flags.DEFINE_float('lr_decay',0.6,'the learning rate decay')
 17 | flags.DEFINE_integer('vocabulary_size',20000,'vocabulary_size')
 18 | flags.DEFINE_integer('emdedding_dim',128,'embedding dim')
 19 | flags.DEFINE_integer('hidden_neural_size',600,'LSTM hidden neural size')
 20 | flags.DEFINE_integer('hidden_layer_num',1,'LSTM hidden layer num')
 21 | flags.DEFINE_string('dataset_path','data/subj0.pkl','dataset path')
 22 | flags.DEFINE_integer('max_len',40,'max_len of training sentence')
 23 | flags.DEFINE_integer('valid_num',100,'epoch num of validation')
 24 | flags.DEFINE_integer('checkpoint_num',1000,'epoch num of checkpoint')
 25 | flags.DEFINE_float('init_scale',0.1,'init scale')
 26 | flags.DEFINE_integer('class_num',2,'class num')
 27 | flags.DEFINE_float('keep_prob',0.5,'dropout rate')
 28 | flags.DEFINE_integer('num_epoch',30,'num epoch')
 29 | flags.DEFINE_integer('max_decay_epoch',30,'num epoch')
 30 | flags.DEFINE_integer('max_grad_norm',5,'max_grad_norm')
 31 | flags.DEFINE_string('out_dir',os.path.abspath(os.path.join(os.path.curdir,"runs")),'output directory')
 32 | flags.DEFINE_integer('check_point_every',10,'checkpoint every num epoch ')
 33 | 
 34 | class Config(object):
 35 | 
 36 |     hidden_neural_size=FLAGS.hidden_neural_size
 37 |     vocabulary_size=FLAGS.vocabulary_size
 38 |     embed_dim=FLAGS.emdedding_dim
 39 |     hidden_layer_num=FLAGS.hidden_layer_num
 40 |     class_num=FLAGS.class_num
 41 |     keep_prob=FLAGS.keep_prob
 42 |     lr = FLAGS.lr
 43 |     lr_decay = FLAGS.lr_decay
 44 |     batch_size=FLAGS.batch_size
 45 |     num_step = FLAGS.max_len
 46 |     max_grad_norm=FLAGS.max_grad_norm
 47 |     num_epoch = FLAGS.num_epoch
 48 |     max_decay_epoch = FLAGS.max_decay_epoch
 49 |     valid_num=FLAGS.valid_num
 50 |     out_dir=FLAGS.out_dir
 51 |     checkpoint_every = FLAGS.check_point_every
 52 | 
 53 | 
 54 | def evaluate(model,session,data,global_steps=None,summary_writer=None):
 55 | 
 56 | 
 57 |     correct_num=0
 58 |     total_num=len(data[0])
 59 |     for step, (x,y,mask_x) in enumerate(data_helper.batch_iter(data,batch_size=FLAGS.batch_size)):
 60 | 
 61 |          fetches = model.correct_num
 62 |          feed_dict={}
 63 |          feed_dict[model.input_data]=x
 64 |          feed_dict[model.target]=y
 65 |          feed_dict[model.mask_x]=mask_x
 66 |          state = session.run(model._initial_state)
 67 |          for i , (c,h) in enumerate(model._initial_state):
 68 |             feed_dict[c]=state[i].c
 69 |             feed_dict[h]=state[i].h
 70 |          count=session.run(fetches,feed_dict)
 71 |          correct_num+=count
 72 | 
 73 |     accuracy=float(correct_num)/total_num
 74 |     dev_summary = tf.summary.scalar('dev_accuracy',accuracy)
 75 |     dev_summary = session.run(dev_summary)
 76 |     if summary_writer:
 77 |         summary_writer.add_summary(dev_summary,global_steps)
 78 |         summary_writer.flush()
 79 |     return accuracy
 80 | 
 81 | def run_epoch(model,session,data,global_steps,train_summary_writer,valid_summary_writer=None):
 82 |     for step, (x,y,mask_x) in enumerate(data_helper.batch_iter(data,batch_size=FLAGS.batch_size)):
 83 | 
 84 |         feed_dict={}
 85 |         feed_dict[model.input_data]=x
 86 |         feed_dict[model.target]=y
 87 |         feed_dict[model.mask_x]=mask_x
 88 |         fetches = [model.cost,model.accuracy,model.train_op,model.summary]
 89 |         state = session.run(model._initial_state)
 90 |         for i , (c,h) in enumerate(model._initial_state):
 91 |             feed_dict[c]=state[i].c
 92 |             feed_dict[h]=state[i].h
 93 |         cost,accuracy,_,summary = session.run(fetches,feed_dict)
 94 |         train_summary_writer.add_summary(summary,global_steps)
 95 |         train_summary_writer.flush()
 96 |         if(global_steps%100==0):
 97 |             print("the %i step, train cost is: %f and the train accuracy is %f "%(global_steps,cost,accuracy))
 98 |         global_steps+=1
 99 | 
100 |     return global_steps
101 | 
102 | 
103 | 
104 | 
105 | 
106 | def train_step():
107 | 
108 |     print("loading the dataset...")
109 |     config = Config()
110 |     eval_config=Config()
111 |     eval_config.keep_prob=1.0
112 | 
113 |     train_data,valid_data,test_data=data_helper.load_data(FLAGS.max_len,batch_size=config.batch_size)
114 | 
115 |     print("begin training")
116 | 
117 |     # gpu_config=tf.ConfigProto()
118 |     # gpu_config.gpu_options.allow_growth=True
119 |     with tf.Graph().as_default(), tf.Session() as session:
120 |         initializer = tf.random_uniform_initializer(-1*FLAGS.init_scale,1*FLAGS.init_scale)
121 |         #session.run(tf.global_variables_initializer())
122 |         with tf.variable_scope("model",reuse=None,initializer=initializer):
123 |             model = RNN_Model(config=config,is_training=True)
124 | 
125 |         with tf.variable_scope("model",reuse=True,initializer=initializer):
126 |             valid_model = RNN_Model(config=eval_config,is_training=False)
127 |             test_model = RNN_Model(config=eval_config,is_training=False)
128 | 
129 |         #add summary
130 |         # train_summary_op = tf.merge_summary([model.loss_summary,model.accuracy])
131 |         train_summary_dir = os.path.join(config.out_dir,"summaries","train")
132 |         train_summary_writer =  tf.summary.FileWriter(train_summary_dir,session.graph)
133 | 
134 |         # dev_summary_op = tf.merge_summary([valid_model.loss_summary,valid_model.accuracy])
135 |         dev_summary_dir = os.path.join(eval_config.out_dir,"summaries","dev")
136 |         dev_summary_writer =  tf.summary.FileWriter(dev_summary_dir,session.graph)
137 | 
138 |         #add checkpoint
139 |         checkpoint_dir = os.path.abspath(os.path.join(config.out_dir, "checkpoints"))
140 |         checkpoint_prefix = os.path.join(checkpoint_dir, "model")
141 |         if not os.path.exists(checkpoint_dir):
142 |             os.makedirs(checkpoint_dir)
143 |         saver = tf.train.Saver(tf.global_variables())
144 | 
145 |         tf.global_variables_initializer().run()
146 |         global_steps=1
147 |         begin_time=int(time.time())
148 | 
149 |         for i in range(config.num_epoch):
150 |             print("the %d epoch training..."%(i+1))
151 |             lr_decay = config.lr_decay ** max(i-config.max_decay_epoch,0.0)
152 |             model.assign_new_lr(session,config.lr*lr_decay)
153 |             print("learning_rate : %f" %(config.lr*lr_decay))
154 |             global_steps=run_epoch(model,session,train_data,global_steps,train_summary_writer,dev_summary_writer)
155 | 
156 |             #if i% config.checkpoint_every==0:
157 |             #    path = saver.save(session,checkpoint_prefix,global_steps)
158 |             #   print("Saved model chechpoint to{}\n".format(path))
159 | 
160 |         print("the train is finished")
161 |         end_time=int(time.time())
162 |         print("training takes %d seconds already\n"%(end_time-begin_time))
163 |         test_accuracy = evaluate(test_model,session,test_data)
164 |         valid_accuracy = evaluate(valid_model, session, valid_data)
165 |         print("RNN_CNN_model:the test data accuracy is %f" % test_accuracy)
166 |         print("RNN_CNN_model:the valid data accuracy is %f" % valid_accuracy)
167 |         print("program end!")
168 | 
169 | 
170 | 
171 | def main(_):
172 |     train_step()
173 | 
174 | 
175 | if __name__ == "__main__":
176 |     tf.app.run()
177 | 
178 | 
179 | 
180 | 
181 | 
182 | 
183 | 


--------------------------------------------------------------------------------