├── LICENSE ├── README.md ├── generator_model.py ├── pointer_decoder.py └── pointer_model.py /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Ujjawal Prasad 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # question-generation 2 | Neural Models for Key Phrase Detection and Question Generation 3 | 4 | (IN PROGRESS) 5 | 6 | Tensorflow implementation of paper - 7 | 8 | 9 | https://arxiv.org/pdf/1706.04560v2.pdf 10 | -------------------------------------------------------------------------------- /generator_model.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | import tensorflow as tf 4 | from tensorflow.contrib.rnn import LSTMCell, MultiRNNCell, DropoutWrapper 5 | from tensorflow.contrib import seq2seq 6 | 7 | class Question_Generator(object): 8 | def __init__(self): 9 | 10 | self.input_dim = input_dim 11 | self.start_index = tf.placeholder() 12 | self.stop_index = tf.placeholder() 13 | self.question = tf.placeholder() 14 | 15 | with tf.name_scope('word-repres'): 16 | self.passage_repres = tf.placeholder(tf.float32, [None,None,None]) 17 | 18 | if with_char and char_vocab is not None: 19 | self.passage_char_lengths = tf.placeholder(tf.float32, [None,None]) 20 | 21 | self.passage_chars = tf.placeholder(tf.int32, [None, None, None]) # [batch_size, passage_len, p_char_len] 22 | input_shape = tf.shape(self.answer_chars) 23 | batch_size = input_shape[0] 24 | a_char_len = input_shape[2] 25 | input_shape = tf.shape(self.passage_chars) 26 | passage_len = input_shape[1] 27 | p_char_len = input_shape[2] 28 | char_dim = char_vocab.word_dim 29 | self.char_embedding = tf.get_variable("char_embedding", initializer=tf.constant(char_vocab.word_vecs), 30 | dtype=tf.float32) 31 | passage_char_repres = tf.nn.embedding_lookup(self.char_embedding, self.passage_chars) # [batch_size, passage_len, p_char_len, char_dim] 32 | passage_char_repres = tf.reshape(passage_char_repres, shape=[-1, p_char_len, char_dim]) 33 | passage_char_lengths = tf.reshape(self.passage_char_lengths, [-1]) 34 | with tf.variable_scope('char_lstm'): 35 | # lstm cell 36 | char_lstm_cell = LSTMCell(char_lstm_dim) 37 | # dropout 38 | if is_training: char_lstm_cell = DropoutWrapper(char_lstm_cell, 39 | output_keep_prob=(1 - dropout_rate)) 40 | char_lstm_cell = MultiRNNCell([char_lstm_cell]) 41 | 42 | tf.get_variable_scope().reuse_variables() 43 | # passage representation 44 | passage_char_outputs = tf.nn.dynamic_rnn(char_lstm_cell, passage_char_repres, 45 | sequence_length=passage_char_lengths,dtype=tf.float32)[0] # [batch_size*answer_len, q_char_len, char_lstm_dim] 46 | passage_char_outputs = passage_char_outputs[:,-1,:] 47 | passage_char_outputs = tf.reshape(passage_char_outputs, [batch_size, passage_len, char_lstm_dim]) 48 | 49 | passage_repres.append(passage_char_outputs) 50 | self.input_dim += char_lstm_dim 51 | 52 | self.passage_repres = tf.concat(2, self.passage_repres) # [batch_size, passage_len, dim] 53 | 54 | with tf,name_scope('encoder-1'): 55 | encoder_cell_f = LSTMCell(hidden_dim) 56 | encoder_cell_b = LSTMCell(hidden_dim) 57 | 58 | encoder_ouputs , _ = tf.nn.bidirectional_dynamic_rnn(encoder_cell_f, encoder_cell_b, self.passage_repres) 59 | 60 | h_d = tf.concat(encoder_ouputs, axis =2) 61 | 62 | with tf.name_scope('answer-encoding'): 63 | unstacked_h_d = tf.unstack(h_d) 64 | h_a_ = [] 65 | 66 | for i in range(len(unstacked_h_d)): 67 | temp = unstacked_h_d[i] 68 | h_a_.append(temp[start_index[i],stop_index[i]]) 69 | 70 | answer_encoder_f = LSTMCell(hidden_dim) 71 | answer_encoder_b = LSTMCell(hidden_dim) 72 | 73 | h_a , _ = tf.nn.bidirectional_dynamic_rnn(answer_encoder_f,answer_encoder_b,inputs = tf.stack(h_a_)) 74 | 75 | h_a_argmax = tf.argmax(h_a,2) 76 | 77 | with tf.name_scope('decoder'): 78 | cascading_cell_1 = LSTMCell(hidden_dim) 79 | cascading_cell_2 = LSTMCell(hidden_dim) 80 | 81 | def cascading_cells_condition(t,,state, tensor_): 82 | 83 | ################POINTER DECODER############### 84 | with tf.variable_scope('weights-pointer'): 85 | W_1 = tf.get_variable() 86 | W_2 = tf.get_variable() 87 | b_1 = tf.get_variable() 88 | b_2 = tf.get_variable() 89 | 90 | 91 | temp_input = tf.concat(h_d_i,h_a_argmax,cascading_cell_1_output) 92 | v_t = tf.matmul((tf.matmul(temp_input,W_1)+b_1),W_2) + b_2 93 | 94 | alpha_t = 95 | 96 | 97 | 98 | 99 | 100 | ################GENERATIVE DECODER############### 101 | with tf.variable_scope('weights-generator'): 102 | W_1_g = tf.get_variable() 103 | W_2_g = tf.get_variable() 104 | b_1_g = tf.get_variable() 105 | b_2_g = tf.get_variable() 106 | 107 | t +=1 108 | return t,state,tensor_ 109 | 110 | with tf.variable_scope('cascading-cells'): 111 | tensor_ = tf.TensorArray(dtype = tf.float32, size = hidden_dim) 112 | 113 | condition = lambda p,q,r,s: tf.less(p, ) 114 | body = lambda p,q,r,s: cascading_cells_condition(p,q,r,s) 115 | t = tf.constant(0) 116 | 117 | cascading_loop = tf.while_loop(cond =condition , body = body, 118 | loop_vars = (t, )) 119 | 120 | 121 | 122 | with tf.name_scope('predictions'): 123 | 124 | 125 | -------------------------------------------------------------------------------- /pointer_decoder.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | import tensorflow as tf 4 | from tensorflow.contrib import layers 5 | from tensorflow.contrib import seq2seq 6 | from tensorflow.contrib.rnn import LSTMCell, MultiRNNCell 7 | from tensorflow.python.util import nest 8 | 9 | dynamic_rnn_decoder = seq2seq.dynamic_rnn_decoder 10 | simple_decoder_fn_train = seq2seq.simple_decoder_fn_train 11 | 12 | def Decoder(cell, inputs, enc_outputs, enc_final_states, 13 | seq_lenth, hidden_dim, batch_size, is_train, 14 | num_glimpse, initializer = None, max_length = None): 15 | with tf.variable_scope('decoder-network') as scope: 16 | 17 | 18 | 19 | if is_train: 20 | decoder_fn = simple_decoder_fn_train(enc_final_states) 21 | 22 | else: 23 | maximum_length = tf.convert_to_tensor(max_length, tf.int32) 24 | 25 | def decoder_fn(): 26 | cell_output = 27 | 28 | 29 | 30 | 31 | 32 | -------------------------------------------------------------------------------- /pointer_model.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | import tensorflow as tf 4 | from tensorflow.contrib.rnn import BasicLSTMCell, MultiRNNCell, LSTMCell 5 | 6 | class Pointer_Model(object): 7 | """docstring for Question_Generator""" 8 | def __init__(self, config): 9 | self.task = config.task 10 | self.debug = config.debug 11 | self.config = config 12 | 13 | self.input_dim = config.input_dim 14 | self.hidden_dim = config.hidden_dim 15 | self.num_layers = config.num_layers 16 | 17 | self.max_enc_length = config.max_enc_length 18 | self.max_dec_length = config.max_dec_length 19 | self.num_glimpse = config.num_glimpse 20 | 21 | self.init_min_val = config.init_min_val 22 | self.init_max_val = config.init_max_val 23 | 24 | self.initializer = tf.random_uniform_initializer(self.init_min_val, self.init_max_val) 25 | 26 | self.use_terminal_symbol = config.use_terminal_symbol 27 | 28 | self.lr_start = config.lr_start 29 | self.lr_decay_step = config.lr_decay_step 30 | self.lr_decay_rate = config.lr_decay_rate 31 | self.max_grad_norm = config.max_grad_norm 32 | 33 | self.layer_dict = {} 34 | 35 | self.encoder_inputs = tf.placeholder() 36 | self.decoder_targets = tf.placeholder() 37 | self.encoder_seq_length = tf.placeholder() 38 | self.decoder_seq_length = tf.placeholder() 39 | self.mask = tf.placeholder() 40 | 41 | if self.use_terminal_symbol: 42 | self.decoder_seq_length +=1 43 | 44 | def _build_model(self, inputs): 45 | self.global_step = tf.Variable(0, trainable = False) 46 | 47 | with tf.variable_scope('embedding'): 48 | self.embedding = tf.get_variable(name = 'embedding' , shape = [self.vocab_size, self.embedding_dim], 49 | initializer = self.initializer) 50 | 51 | self.embedding_lookup = tf.nn.embedding_lookup() 52 | 53 | with tf.variable_scope('encoder'): 54 | encoder_cell = LSTMCell(self.hidden_dim, 55 | initializer = self.initializer) 56 | 57 | if self.num_layers>1: 58 | cells = [encoder_cell] * self.num_layers 59 | encoder_cell = MultiRNNCell(cells) 60 | 61 | self.encoder_outputs, self.encoder_final_state = tf.nn.dynamic_rnn(encoder_cell, self.encoder_inputs, sequence_length = self.encoder_seq_length) 62 | 63 | 64 | with tf.variable_scope('decoder'): 65 | self.decoder_cell = LSTMCell(self.hidden_dim, initializer = self.initializer) 66 | 67 | if self.num_layers > 1: 68 | cells = [self.decoder_cell] * self.num_layers 69 | self.decoder_cell = MultiRNNCell(cells) 70 | 71 | #self.decoder_rnn = tf.contrib.seq2seq.Decoder() 72 | 73 | self.decoder_pred_logits , _ = decoder_rnn.step(, ) 74 | 75 | self.dec_pred_prob = tf.nn.softmax( 76 | self.dec_pred_logits, 2, name="dec_pred_prob") 77 | 78 | self.dec_pred = tf.argmax( 79 | self.dec_pred_logits, 2, name="dec_pred") 80 | 81 | with tf.variable_scope('decoder', reuse = True): 82 | self.decoder_pred_logits , _ = 83 | 84 | self.dec_inference_prob = tf.nn.softmax( 85 | self.dec_inference_logits, 2, name="dec_inference_logits") 86 | self.dec_inference = tf.argmax( 87 | self.dec_inference_logits, 2, name="dec_inference") 88 | 89 | 90 | 91 | def _build_optim(self): 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | --------------------------------------------------------------------------------