├── README.md ├── data └── subj0.pkl ├── data_helper.py ├── rnn_CNN_model.py └── train_rnn_cnn_classify.py /README.md: -------------------------------------------------------------------------------- 1 | # LSTM-CNN_CLASSIFICATION -------------------------------------------------------------------------------- /data/subj0.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zjrn/LSTM-CNN_CLASSIFICATION/47b5da4c1daf745e1dbee2d782e3ccedf2d65121/data/subj0.pkl -------------------------------------------------------------------------------- /data_helper.py: -------------------------------------------------------------------------------- 1 | """ 2 | description: this file helps to load raw file and gennerate batch x,y 3 | author:luchi 4 | date:22/11/2016 5 | """ 6 | import numpy as np 7 | import pickle as pkl 8 | 9 | 10 | #file path 11 | dataset_path='data/subj0.pkl' 12 | 13 | def set_dataset_path(path): 14 | dataset_path=path 15 | 16 | 17 | 18 | 19 | def load_data(max_len,batch_size,n_words=20000,valid_portion=0.1,sort_by_len=True): 20 | f=open(dataset_path,'rb') 21 | print ('load data from %s',dataset_path) 22 | train_set = np.array(pkl.load(f)) 23 | test_set = np.array(pkl.load(f)) 24 | f.close() 25 | 26 | train_set_x,train_set_y = train_set 27 | 28 | #train_set length 29 | n_samples= len(train_set_x) 30 | #shuffle and generate train and valid dataset 31 | sidx = np.random.permutation(n_samples) 32 | n_train = int(np.round(n_samples * (1. - valid_portion))) 33 | valid_set_x = [train_set_x[s] for s in sidx[n_train:]] 34 | valid_set_y = [train_set_y[s] for s in sidx[n_train:]] 35 | train_set_x = [train_set_x[s] for s in sidx[:n_train]] 36 | train_set_y = [train_set_y[s] for s in sidx[:n_train]] 37 | 38 | 39 | train_set = (train_set_x, train_set_y) 40 | valid_set = (valid_set_x, valid_set_y) 41 | 42 | 43 | #remove unknow words 44 | def remove_unk(x): 45 | return [[1 if w >= n_words else w for w in sen] for sen in x] 46 | 47 | test_set_x, test_set_y = test_set 48 | valid_set_x, valid_set_y = valid_set 49 | train_set_x, train_set_y = train_set 50 | 51 | train_set_x = remove_unk(train_set_x) 52 | valid_set_x = remove_unk(valid_set_x) 53 | test_set_x = remove_unk(test_set_x) 54 | 55 | 56 | 57 | def len_argsort(seq): 58 | return sorted(range(len(seq)), key=lambda x: len(seq[x])) 59 | 60 | if sort_by_len: 61 | sorted_index = len_argsort(test_set_x) 62 | test_set_x = [test_set_x[i] for i in sorted_index] 63 | test_set_y = [test_set_y[i] for i in sorted_index] 64 | 65 | sorted_index = len_argsort(valid_set_x) 66 | valid_set_x = [valid_set_x[i] for i in sorted_index] 67 | valid_set_y = [valid_set_y[i] for i in sorted_index] 68 | 69 | 70 | sorted_index = len_argsort(train_set_x) 71 | train_set_x = [train_set_x[i] for i in sorted_index] 72 | train_set_y = [train_set_y[i] for i in sorted_index] 73 | 74 | train_set=(train_set_x,train_set_y) 75 | valid_set=(valid_set_x,valid_set_y) 76 | test_set=(test_set_x,test_set_y) 77 | 78 | 79 | 80 | 81 | new_train_set_x=np.zeros([len(train_set[0]),max_len]) 82 | new_train_set_y=np.zeros(len(train_set[0])) 83 | 84 | new_valid_set_x=np.zeros([len(valid_set[0]),max_len]) 85 | new_valid_set_y=np.zeros(len(valid_set[0])) 86 | 87 | new_test_set_x=np.zeros([len(test_set[0]),max_len]) 88 | new_test_set_y=np.zeros(len(test_set[0])) 89 | 90 | mask_train_x=np.zeros([max_len,len(train_set[0])]) 91 | mask_test_x=np.zeros([max_len,len(test_set[0])]) 92 | mask_valid_x=np.zeros([max_len,len(valid_set[0])]) 93 | 94 | 95 | 96 | def padding_and_generate_mask(x,y,new_x,new_y,new_mask_x): 97 | 98 | for i,(x,y) in enumerate(zip(x,y)): 99 | #whether to remove sentences with length larger than maxlen 100 | if len(x)<=max_len: 101 | new_x[i,0:len(x)]=x 102 | new_mask_x[0:len(x),i]=1 103 | new_y[i]=y 104 | else: 105 | new_x[i]=(x[0:max_len]) 106 | new_mask_x[:,i]=1 107 | new_y[i]=y 108 | new_set =(new_x,new_y,new_mask_x) 109 | del new_x,new_y 110 | return new_set 111 | 112 | train_set=padding_and_generate_mask(train_set[0],train_set[1],new_train_set_x,new_train_set_y,mask_train_x) 113 | test_set=padding_and_generate_mask(test_set[0],test_set[1],new_test_set_x,new_test_set_y,mask_test_x) 114 | valid_set=padding_and_generate_mask(valid_set[0],valid_set[1],new_valid_set_x,new_valid_set_y,mask_valid_x) 115 | 116 | return train_set,valid_set,test_set 117 | 118 | 119 | #return batch dataset 120 | def batch_iter(data,batch_size): 121 | 122 | #get dataset and label 123 | x,y,mask_x=data 124 | x=np.array(x) 125 | y=np.array(y) 126 | data_size=len(x) 127 | num_batches_per_epoch=data_size//batch_size 128 | for batch_index in range(num_batches_per_epoch): 129 | start_index=batch_index*batch_size 130 | end_index=min((batch_index+1)*batch_size,data_size) 131 | return_x = x[start_index:end_index] 132 | return_y = y[start_index:end_index] 133 | return_mask_x = mask_x[:,start_index:end_index] 134 | # if(len(return_x)0: tf.get_variable_scope().reuse_variables() 50 | (cell_output,state)=cell(inputs[:,time_step,:],state) 51 | out_put.append(cell_output) 52 | out_put=out_put*self.mask_x[:,:,None] 53 | 54 | with tf.name_scope("Conv_layer"): 55 | out_put = tf.transpose(out_put,[1,2,0]) 56 | out_put = tf.reshape(out_put , [self.batch_size,hidden_neural_size,num_step,-1]) 57 | 58 | print(out_put) 59 | W_conv = tf.get_variable(name="conv_w" , initializer=tf.truncated_normal(shape=[600,5,1,200],stddev=0.1)) 60 | B_conv = tf.get_variable(name="conv_b", initializer=tf.constant(0.1,shape=[200])) 61 | 62 | conv_output = tf.nn.relu(tf.nn.conv2d(out_put , W_conv , strides=[1,1,1,1],padding='VALID') + B_conv) 63 | conv_output = tf.reshape(conv_output,[self.batch_size,36,200,1]) 64 | max_pool_out = tf.nn.max_pool(conv_output,ksize=[1,36,1,1],strides=[1,1,1,1],padding='VALID') 65 | max_pool_out = tf.reshape(max_pool_out,[self.batch_size,200]) 66 | 67 | 68 | with tf.name_scope("Softmax_layer_and_output"): 69 | softmax_w = tf.get_variable("softmax_w",[200,class_num],dtype=tf.float32) 70 | softmax_b = tf.get_variable("softmax_b",[class_num],dtype=tf.float32) 71 | self.logits = tf.matmul(max_pool_out,softmax_w)+softmax_b 72 | 73 | with tf.name_scope("loss"): 74 | self.loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=self.logits+1e-10,labels=self.target) 75 | self.cost = tf.reduce_mean(self.loss) 76 | 77 | with tf.name_scope("accuracy"): 78 | self.prediction = tf.argmax(self.logits,1) 79 | correct_prediction = tf.equal(self.prediction,self.target) 80 | self.correct_num=tf.reduce_sum(tf.cast(correct_prediction,tf.float32)) 81 | self.accuracy = tf.reduce_mean(tf.cast(correct_prediction,tf.float32),name="accuracy") 82 | 83 | #add summary 84 | loss_summary = tf.summary.scalar("loss",self.cost) 85 | #add summary 86 | accuracy_summary=tf.summary.scalar("accuracy_summary",self.accuracy) 87 | 88 | if not is_training: 89 | return 90 | 91 | self.globle_step = tf.Variable(tf.constant(0),dtype=tf.int32,name="globle_step",trainable=False) 92 | self.lr = tf.Variable(tf.constant(0.8),dtype=tf.float32,trainable=False) 93 | 94 | tvars = tf.trainable_variables() 95 | grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars), 96 | config.max_grad_norm) 97 | 98 | 99 | # Keep track of gradient values and sparsity (optional) 100 | grad_summaries = [] 101 | for g, v in zip(grads, tvars): 102 | if g is not None: 103 | grad_hist_summary = tf.summary.histogram("{}/grad/hist".format(v.name), g) 104 | sparsity_summary = tf.summary.scalar("{}/grad/sparsity".format(v.name), tf.nn.zero_fraction(g)) 105 | grad_summaries.append(grad_hist_summary) 106 | grad_summaries.append(sparsity_summary) 107 | self.grad_summaries_merged = tf.summary.merge(grad_summaries) 108 | 109 | self.summary =tf.summary.merge([loss_summary,accuracy_summary,self.grad_summaries_merged]) 110 | 111 | 112 | 113 | optimizer = tf.train.GradientDescentOptimizer(self.lr) 114 | optimizer.apply_gradients(zip(grads, tvars)) 115 | self.train_op=optimizer.apply_gradients(zip(grads, tvars)) 116 | 117 | self.new_lr = tf.placeholder(tf.float32,shape=[],name="new_learning_rate") 118 | self._lr_update = tf.assign(self.lr,self.new_lr) 119 | 120 | def assign_new_lr(self,session,lr_value): 121 | session.run(self._lr_update,feed_dict={self.new_lr:lr_value}) 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | -------------------------------------------------------------------------------- /train_rnn_cnn_classify.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import numpy as np 3 | import os 4 | import time 5 | import datetime 6 | from rnn_CNN_model import LSTM_CNN_Model 7 | import data_helper 8 | 9 | 10 | flags =tf.app.flags 11 | FLAGS = flags.FLAGS 12 | 13 | 14 | flags.DEFINE_integer('batch_size',64,'the batch_size of the training procedure') 15 | flags.DEFINE_float('lr',0.05,'the learning rate') 16 | flags.DEFINE_float('lr_decay',0.6,'the learning rate decay') 17 | flags.DEFINE_integer('vocabulary_size',20000,'vocabulary_size') 18 | flags.DEFINE_integer('emdedding_dim',128,'embedding dim') 19 | flags.DEFINE_integer('hidden_neural_size',600,'LSTM hidden neural size') 20 | flags.DEFINE_integer('hidden_layer_num',1,'LSTM hidden layer num') 21 | flags.DEFINE_string('dataset_path','data/subj0.pkl','dataset path') 22 | flags.DEFINE_integer('max_len',40,'max_len of training sentence') 23 | flags.DEFINE_integer('valid_num',100,'epoch num of validation') 24 | flags.DEFINE_integer('checkpoint_num',1000,'epoch num of checkpoint') 25 | flags.DEFINE_float('init_scale',0.1,'init scale') 26 | flags.DEFINE_integer('class_num',2,'class num') 27 | flags.DEFINE_float('keep_prob',0.5,'dropout rate') 28 | flags.DEFINE_integer('num_epoch',30,'num epoch') 29 | flags.DEFINE_integer('max_decay_epoch',30,'num epoch') 30 | flags.DEFINE_integer('max_grad_norm',5,'max_grad_norm') 31 | flags.DEFINE_string('out_dir',os.path.abspath(os.path.join(os.path.curdir,"runs")),'output directory') 32 | flags.DEFINE_integer('check_point_every',10,'checkpoint every num epoch ') 33 | 34 | class Config(object): 35 | 36 | hidden_neural_size=FLAGS.hidden_neural_size 37 | vocabulary_size=FLAGS.vocabulary_size 38 | embed_dim=FLAGS.emdedding_dim 39 | hidden_layer_num=FLAGS.hidden_layer_num 40 | class_num=FLAGS.class_num 41 | keep_prob=FLAGS.keep_prob 42 | lr = FLAGS.lr 43 | lr_decay = FLAGS.lr_decay 44 | batch_size=FLAGS.batch_size 45 | num_step = FLAGS.max_len 46 | max_grad_norm=FLAGS.max_grad_norm 47 | num_epoch = FLAGS.num_epoch 48 | max_decay_epoch = FLAGS.max_decay_epoch 49 | valid_num=FLAGS.valid_num 50 | out_dir=FLAGS.out_dir 51 | checkpoint_every = FLAGS.check_point_every 52 | 53 | 54 | def evaluate(model,session,data,global_steps=None,summary_writer=None): 55 | 56 | 57 | correct_num=0 58 | total_num=len(data[0]) 59 | for step, (x,y,mask_x) in enumerate(data_helper.batch_iter(data,batch_size=FLAGS.batch_size)): 60 | 61 | fetches = model.correct_num 62 | feed_dict={} 63 | feed_dict[model.input_data]=x 64 | feed_dict[model.target]=y 65 | feed_dict[model.mask_x]=mask_x 66 | state = session.run(model._initial_state) 67 | for i , (c,h) in enumerate(model._initial_state): 68 | feed_dict[c]=state[i].c 69 | feed_dict[h]=state[i].h 70 | count=session.run(fetches,feed_dict) 71 | correct_num+=count 72 | 73 | accuracy=float(correct_num)/total_num 74 | dev_summary = tf.summary.scalar('dev_accuracy',accuracy) 75 | dev_summary = session.run(dev_summary) 76 | if summary_writer: 77 | summary_writer.add_summary(dev_summary,global_steps) 78 | summary_writer.flush() 79 | return accuracy 80 | 81 | def run_epoch(model,session,data,global_steps,train_summary_writer,valid_summary_writer=None): 82 | for step, (x,y,mask_x) in enumerate(data_helper.batch_iter(data,batch_size=FLAGS.batch_size)): 83 | 84 | feed_dict={} 85 | feed_dict[model.input_data]=x 86 | feed_dict[model.target]=y 87 | feed_dict[model.mask_x]=mask_x 88 | fetches = [model.cost,model.accuracy,model.train_op,model.summary] 89 | state = session.run(model._initial_state) 90 | for i , (c,h) in enumerate(model._initial_state): 91 | feed_dict[c]=state[i].c 92 | feed_dict[h]=state[i].h 93 | cost,accuracy,_,summary = session.run(fetches,feed_dict) 94 | train_summary_writer.add_summary(summary,global_steps) 95 | train_summary_writer.flush() 96 | if(global_steps%100==0): 97 | print("the %i step, train cost is: %f and the train accuracy is %f "%(global_steps,cost,accuracy)) 98 | global_steps+=1 99 | 100 | return global_steps 101 | 102 | 103 | 104 | 105 | 106 | def train_step(): 107 | 108 | print("loading the dataset...") 109 | config = Config() 110 | eval_config=Config() 111 | eval_config.keep_prob=1.0 112 | 113 | train_data,valid_data,test_data=data_helper.load_data(FLAGS.max_len,batch_size=config.batch_size) 114 | 115 | print("begin training") 116 | 117 | # gpu_config=tf.ConfigProto() 118 | # gpu_config.gpu_options.allow_growth=True 119 | with tf.Graph().as_default(), tf.Session() as session: 120 | initializer = tf.random_uniform_initializer(-1*FLAGS.init_scale,1*FLAGS.init_scale) 121 | #session.run(tf.global_variables_initializer()) 122 | with tf.variable_scope("model",reuse=None,initializer=initializer): 123 | model = RNN_Model(config=config,is_training=True) 124 | 125 | with tf.variable_scope("model",reuse=True,initializer=initializer): 126 | valid_model = RNN_Model(config=eval_config,is_training=False) 127 | test_model = RNN_Model(config=eval_config,is_training=False) 128 | 129 | #add summary 130 | # train_summary_op = tf.merge_summary([model.loss_summary,model.accuracy]) 131 | train_summary_dir = os.path.join(config.out_dir,"summaries","train") 132 | train_summary_writer = tf.summary.FileWriter(train_summary_dir,session.graph) 133 | 134 | # dev_summary_op = tf.merge_summary([valid_model.loss_summary,valid_model.accuracy]) 135 | dev_summary_dir = os.path.join(eval_config.out_dir,"summaries","dev") 136 | dev_summary_writer = tf.summary.FileWriter(dev_summary_dir,session.graph) 137 | 138 | #add checkpoint 139 | checkpoint_dir = os.path.abspath(os.path.join(config.out_dir, "checkpoints")) 140 | checkpoint_prefix = os.path.join(checkpoint_dir, "model") 141 | if not os.path.exists(checkpoint_dir): 142 | os.makedirs(checkpoint_dir) 143 | saver = tf.train.Saver(tf.global_variables()) 144 | 145 | tf.global_variables_initializer().run() 146 | global_steps=1 147 | begin_time=int(time.time()) 148 | 149 | for i in range(config.num_epoch): 150 | print("the %d epoch training..."%(i+1)) 151 | lr_decay = config.lr_decay ** max(i-config.max_decay_epoch,0.0) 152 | model.assign_new_lr(session,config.lr*lr_decay) 153 | print("learning_rate : %f" %(config.lr*lr_decay)) 154 | global_steps=run_epoch(model,session,train_data,global_steps,train_summary_writer,dev_summary_writer) 155 | 156 | #if i% config.checkpoint_every==0: 157 | # path = saver.save(session,checkpoint_prefix,global_steps) 158 | # print("Saved model chechpoint to{}\n".format(path)) 159 | 160 | print("the train is finished") 161 | end_time=int(time.time()) 162 | print("training takes %d seconds already\n"%(end_time-begin_time)) 163 | test_accuracy = evaluate(test_model,session,test_data) 164 | valid_accuracy = evaluate(valid_model, session, valid_data) 165 | print("RNN_CNN_model:the test data accuracy is %f" % test_accuracy) 166 | print("RNN_CNN_model:the valid data accuracy is %f" % valid_accuracy) 167 | print("program end!") 168 | 169 | 170 | 171 | def main(_): 172 | train_step() 173 | 174 | 175 | if __name__ == "__main__": 176 | tf.app.run() 177 | 178 | 179 | 180 | 181 | 182 | 183 | --------------------------------------------------------------------------------