├── README.md
├── data
├── download.py
├── fetch_and_preprocess.sh
├── filter_glove.py
├── preprocess_data.py
└── preprocess_vocab.py
├── model
├── __init__.py
├── data_helpers.py
├── eval.py
├── model_ESIM.py
└── train.py
└── scripts
├── test.sh
└── train.sh
/README.md:
--------------------------------------------------------------------------------
1 | # Enhanced LSTM for Natural Language Inference
2 | Implementation of the ESIM model for natural language inference with Tensorflow
3 |
4 | This repository contains an implementation with Tensorflow of the sequential model presented in the paper ["Enhanced LSTM for Natural Language Inference"](http://www.aclweb.org/anthology/P17-1152) by Chen et al. in 2017.
5 |
6 | # Dependencies
7 | Python 2.7
8 | Tensorflow 1.4.0
9 |
10 | # Running the scripts
11 | ## Download and preprocess
12 | ```
13 | cd data
14 | bash fetch_and_preprocess.sh
15 | ```
16 |
17 | ## Train and test a new model
18 | ```
19 | cd scripts
20 | bash train.sh
21 | ```
22 | The training process and results are in log.txt file.
23 |
24 | ## Test a trained model
25 | ```
26 | bash test.sh
27 | ```
28 | The test results are in log_test.txt file.
29 |
--------------------------------------------------------------------------------
/data/download.py:
--------------------------------------------------------------------------------
1 | """
2 | Downloads the following:
3 | - Glove vectors
4 | - Stanford Natural Language Inference (SNLI) Corpus
5 |
6 | """
7 |
8 | import sys
9 | import os
10 | import zipfile
11 | import gzip
12 |
13 | def download(url, dirpath):
14 | filename = url.split('/')[-1]
15 | filepath = os.path.join(dirpath, filename)
16 | os.system('wget {} -O {}'.format(url, filepath))
17 | return filepath
18 |
19 | def unzip(filepath):
20 | print("Extracting: " + filepath)
21 | dirpath = os.path.dirname(filepath)
22 | with zipfile.ZipFile(filepath) as zf:
23 | zf.extractall(dirpath)
24 | os.remove(filepath)
25 |
26 | def download_wordvecs(dirpath):
27 | if os.path.exists(dirpath):
28 | print('Found Glove vectors - skip')
29 | return
30 | else:
31 | os.makedirs(dirpath)
32 | url = 'http://www-nlp.stanford.edu/data/glove.840B.300d.zip'
33 | unzip(download(url, dirpath))
34 |
35 | def download_snli(dirpath):
36 | if os.path.exists(dirpath):
37 | print('Found SNLI dataset - skip')
38 | return
39 | else:
40 | os.makedirs(dirpath)
41 | url = 'https://nlp.stanford.edu/projects/snli/snli_1.0.zip'
42 | unzip(download(url, dirpath))
43 |
44 |
45 | if __name__ == '__main__':
46 | base_dir = os.path.dirname(os.path.realpath(__file__))
47 | snli_dir = os.path.join(base_dir, 'snli')
48 | wordvec_dir = os.path.join(base_dir, 'glove')
49 | download_snli(snli_dir)
50 | download_wordvecs(wordvec_dir)
51 |
52 |
--------------------------------------------------------------------------------
/data/fetch_and_preprocess.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | set -e
3 | python download.py
4 | python preprocess_data.py
5 | python preprocess_vocab.py
6 | python filter_glove.py
--------------------------------------------------------------------------------
/data/filter_glove.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | base_dir = os.path.dirname(os.path.realpath(__file__))
4 |
5 | vocab_file = os.path.join(base_dir, 'word_sequence/vocab.txt')
6 | vocab = []
7 | with open(vocab_file, 'rb') as f:
8 | for line in f:
9 | line = line.decode('utf-8').strip()
10 | vocab.append(line)
11 | print("Vocabulary size: {}".format(len(vocab)))
12 |
13 |
14 | print("Filtering glove embedding ...")
15 | glove_file = vocab_file = os.path.join(base_dir, 'glove/glove.840B.300d.txt')
16 | vectors = {}
17 | with open(glove_file, 'rt') as f:
18 | for line in f:
19 | items = line.strip().split(' ')
20 | if len(items[0]) <= 0:
21 | continue
22 | vec = [float(items[i]) for i in range(1, 300+1)]
23 | vectors[items[0]] = vec
24 | print("Glove size: {}".format(len(vectors)))
25 |
26 |
27 | filtered_vectors = {}
28 | NOT = 0
29 | for word in vocab:
30 | if word in vectors:
31 | filtered_vectors[word] = vectors[word]
32 | else:
33 | NOT += 1
34 | print("Filtered vectors size: {}".format(len(filtered_vectors)))
35 | print("Words not in glove size: {}".format(NOT))
36 |
37 |
38 | filtered_glove_file = os.path.join(base_dir, 'glove/filtered_glove_840B_300d.txt')
39 | with open(filtered_glove_file, 'w') as f:
40 | for word,vector in filtered_vectors.items():
41 | to_write = []
42 | to_write.append(word)
43 | vector = [str(ele) for ele in vector]
44 | to_write.extend(vector)
45 | f.write(" ".join(to_write))
46 | f.write("\n")
47 | print("Write to {} finished.".format(filtered_glove_file))
48 |
--------------------------------------------------------------------------------
/data/preprocess_data.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python
2 | import sys
3 | import os
4 | import numpy
5 | import cPickle as pkl
6 |
7 | from collections import OrderedDict
8 |
9 | dic = {'entailment': '0', 'neutral': '1', 'contradiction': '2'}
10 |
11 | def build_dictionary(filepaths, dst_path, lowercase=False):
12 | word_freqs = OrderedDict()
13 | for filepath in filepaths:
14 | print 'Processing', filepath
15 | with open(filepath, 'r') as f:
16 | for line in f:
17 | if lowercase:
18 | line = line.lower()
19 | words_in = line.strip().split(' ')
20 | for w in words_in:
21 | if w not in word_freqs:
22 | word_freqs[w] = 0
23 | word_freqs[w] += 1
24 |
25 | words = word_freqs.keys()
26 | freqs = word_freqs.values()
27 |
28 | sorted_idx = numpy.argsort(freqs)
29 | sorted_words = [words[ii] for ii in sorted_idx[::-1]]
30 |
31 | worddict = OrderedDict()
32 | worddict['_PAD_'] = 0 # default, padding
33 | worddict['_UNK_'] = 1 # out-of-vocabulary
34 | worddict['_BOS_'] = 2 # begin of sentence token
35 | worddict['_EOS_'] = 3 # end of sentence token
36 |
37 | for ii, ww in enumerate(sorted_words):
38 | worddict[ww] = ii + 4
39 |
40 | with open(dst_path, 'wb') as f:
41 | pkl.dump(worddict, f)
42 |
43 | print 'Dict size', len(worddict)
44 | print 'Done'
45 |
46 |
47 | def build_sequence(filepath, dst_dir):
48 | filename = os.path.basename(filepath)
49 | print filename
50 | len_p = []
51 | len_h = []
52 | with open(filepath) as f, \
53 | open(os.path.join(dst_dir, 'premise_%s'%filename), 'w') as f1, \
54 | open(os.path.join(dst_dir, 'hypothesis_%s'%filename), 'w') as f2, \
55 | open(os.path.join(dst_dir, 'label_%s'%filename), 'w') as f3:
56 | next(f) # skip the header row
57 | for line in f:
58 | sents = line.strip().split('\t')
59 | if sents[0] is '-':
60 | continue
61 |
62 | words_in = sents[1].strip().split(' ')
63 | words_in = [x for x in words_in if x not in ('(',')')]
64 | f1.write(' '.join(words_in) + '\n')
65 | len_p.append(len(words_in))
66 |
67 | words_in = sents[2].strip().split(' ')
68 | words_in = [x for x in words_in if x not in ('(',')')]
69 | f2.write(' '.join(words_in) + '\n')
70 | len_h.append(len(words_in))
71 |
72 | f3.write(dic[sents[0]] + '\n')
73 |
74 | print 'max min len premise', max(len_p), min(len_p)
75 | print 'max min len hypothesis', max(len_h), min(len_h)
76 |
77 |
78 | def make_dirs(dirs):
79 | for d in dirs:
80 | if not os.path.exists(d):
81 | os.makedirs(d)
82 |
83 | if __name__ == '__main__':
84 | print('=' * 80)
85 | print('Preprocessing snli_1.0 dataset')
86 | print('=' * 80)
87 | base_dir = os.path.dirname(os.path.realpath(__file__))
88 | dst_dir = os.path.join(base_dir, 'word_sequence')
89 | snli_dir = os.path.join(base_dir, 'snli/snli_1.0')
90 | make_dirs([dst_dir])
91 |
92 | build_sequence(os.path.join(snli_dir, 'snli_1.0_dev.txt'), dst_dir)
93 | build_sequence(os.path.join(snli_dir, 'snli_1.0_test.txt'), dst_dir)
94 | build_sequence(os.path.join(snli_dir, 'snli_1.0_train.txt'), dst_dir)
95 |
96 | build_dictionary([os.path.join(dst_dir, 'premise_snli_1.0_train.txt'),
97 | os.path.join(dst_dir, 'hypothesis_snli_1.0_train.txt')],
98 | os.path.join(dst_dir, 'vocab_cased.pkl'))
99 |
100 |
--------------------------------------------------------------------------------
/data/preprocess_vocab.py:
--------------------------------------------------------------------------------
1 | import os
2 | import cPickle
3 |
4 | base_dir = os.path.dirname(os.path.realpath(__file__))
5 | dictionary = os.path.join(base_dir, 'word_sequence/vocab_cased.pkl')
6 | vocab = os.path.join(base_dir, 'word_sequence/vocab.txt')
7 |
8 | with open(dictionary, 'rb') as f:
9 | worddicts = cPickle.load(f)
10 |
11 | with open(vocab, 'w') as f:
12 | for k, v in worddicts.items():
13 | f.write(k)
14 | f.write('\n')
15 | print("Preprocess vocab done.")
16 |
--------------------------------------------------------------------------------
/model/__init__.py:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/model/data_helpers.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import random
3 |
4 |
5 | def loadVocab(fname):
6 | '''
7 | vocab = {"": 0, ...}
8 | idf = { 0: log(total_doc/doc_freq)}
9 | '''
10 | vocab={}
11 | idf={}
12 | with open(fname, 'rt') as f:
13 | for index, word in enumerate(f):
14 | word = word.decode('utf-8').strip()
15 | vocab[word] = index
16 | return vocab, idf
17 |
18 | def toVec(tokens, vocab, maxlen):
19 | '''
20 | length: length of the input sequence
21 | vec: map the token to the vocab_id, return a varied-length array [3, 6, 4, 3, ...]
22 | '''
23 | n = len(tokens)
24 | length = 0
25 | vec=[]
26 | for i in range(n):
27 | length += 1
28 | if tokens[i] in vocab:
29 | vec.append(vocab[tokens[i]])
30 | else:
31 | vec.append(vocab["_UNK_"])
32 |
33 | return length, np.array(vec)
34 |
35 |
36 | def loadDataset(premise_file, hypothesis_file, label_file, vocab, maxlen):
37 |
38 | # premise
39 | premise_tokens = []
40 | premise_vec = []
41 | premise_len = []
42 | with open(premise_file, 'rt') as f1:
43 | for line in f1:
44 | line = line.decode('utf-8').strip()
45 | p_tokens = line.split(' ')[:maxlen]
46 | p_len, p_vec = toVec(p_tokens, vocab, maxlen)
47 | premise_tokens.append(p_tokens)
48 | premise_vec.append(p_vec)
49 | premise_len.append(p_len)
50 |
51 | # hypothesis
52 | hypothesis_tokens = []
53 | hypothesis_vec = []
54 | hypothesis_len = []
55 | with open(hypothesis_file, 'rt') as f2:
56 | for line in f2:
57 | line = line.decode('utf-8').strip()
58 | h_tokens = line.split(' ')[:maxlen]
59 | h_len, h_vec = toVec(h_tokens, vocab, maxlen)
60 | hypothesis_tokens.append(h_tokens)
61 | hypothesis_vec.append(h_vec)
62 | hypothesis_len.append(h_len)
63 |
64 | # label
65 | label = []
66 | with open(label_file, 'rt') as f3:
67 | for line in f3:
68 | line = line.decode('utf-8').strip()
69 | label.append(int(line))
70 |
71 | assert len(premise_tokens) == len(hypothesis_tokens)
72 | assert len(hypothesis_tokens) == len(label)
73 |
74 | # dataset
75 | dataset = []
76 | for i in range(len(label)):
77 | dataset.append( (premise_tokens[i], premise_vec[i], premise_len[i],
78 | label[i],
79 | hypothesis_tokens[i], hypothesis_vec[i], hypothesis_len[i]) )
80 |
81 | return dataset
82 |
83 |
84 | def word_count(q_vec, a_vec, q_len, a_len, idf):
85 | q_set = set([q_vec[i] for i in range(q_len) if q_vec[i] > 100])
86 | a_set = set([a_vec[i] for i in range(a_len) if a_vec[i] > 100])
87 | new_q_len = float(max(len(q_set), 1))
88 | count1 = 0.0
89 | count2 = 0.0
90 | for id1 in q_set:
91 | if id1 in a_set:
92 | count1 += 1.0
93 | if id1 in idf:
94 | count2 += idf[id1]
95 | return count1/new_q_len, count2/new_q_len
96 |
97 | def common_words(q_vec, a_vec, q_len, a_len):
98 | q_set = set([q_vec[i] for i in range(q_len) if q_vec[i] > 100])
99 | a_set = set([a_vec[i] for i in range(a_len) if a_vec[i] > 100])
100 | return q_set.intersection(a_set)
101 |
102 | def tfidf_feature(id_list, common_id_set, idf):
103 | word_freq={}
104 | for t in id_list:
105 | if t in common_id_set:
106 | if t in word_freq:
107 | word_freq[t] += 1
108 | else:
109 | word_freq[t] = 1
110 | tfidf_feature={}
111 | for t in common_id_set:
112 | if t in idf:
113 | tfidf_feature[t] = word_freq[t] * idf[t]
114 | else:
115 | tfidf_feature[t] = word_freq[t]
116 | return tfidf_feature
117 |
118 | def word_feature(id_list, tfidf):
119 | len1 = len(id_list)
120 | features = np.zeros((len1, 2), dtype='float32')
121 | for idx, t in enumerate(id_list):
122 | if t in tfidf:
123 | features[idx, 0] = 1
124 | features[idx, 1] = tfidf[t]
125 | return features
126 |
127 | def normalize_vec(vec, maxlen):
128 | '''
129 | pad the original vec to the same maxlen
130 | [3, 4, 7] maxlen=5 --> [3, 4, 7, 0, 0]
131 | '''
132 | if len(vec) == maxlen:
133 | return vec
134 |
135 | new_vec = np.zeros(maxlen, dtype='int32')
136 | for i in range(len(vec)):
137 | new_vec[i] = vec[i]
138 | return new_vec
139 |
140 |
141 | def batch_iter(data, batch_size, num_epochs, idf, maxlen, shuffle=True):
142 | """
143 | Generates a batch iterator for a dataset.
144 | """
145 | data_size = len(data)
146 | num_batches_per_epoch = int(len(data)/batch_size) + 1
147 | for epoch in range(num_epochs):
148 | # Shuffle the data at each epoch
149 | if shuffle:
150 | random.shuffle(data)
151 | for batch_num in range(num_batches_per_epoch):
152 | start_index = batch_num * batch_size
153 | end_index = min((batch_num + 1) * batch_size, data_size)
154 |
155 | x_premise = []
156 | x_hypothesis = []
157 | x_premise_len = []
158 | x_hypothesis_len = []
159 |
160 | targets = []
161 | p_features=[]
162 | h_features=[]
163 | extra_feature =[]
164 |
165 | for rowIdx in range(start_index, end_index):
166 | premise_tokens, premise_vec, premise_len,\
167 | label, \
168 | hypothesis_tokens, hypothesis_vec, hypothesis_len = data[rowIdx]
169 |
170 | # feature 1
171 | word_count_feature1, word_count_feature2 = word_count(premise_vec, hypothesis_vec, premise_len, hypothesis_len, idf) # scalar feature
172 | common_ids = common_words(premise_vec, hypothesis_vec, premise_len, hypothesis_len) # list: q_set.intersection(a_set) when word_id > 100
173 | tfidf = tfidf_feature(premise_vec, common_ids, idf) # dict: { id: scalar feature }
174 |
175 | # normalize premise_vec and hypothesis_vec
176 | new_premise_vec = normalize_vec(premise_vec, maxlen) # pad the original vec to the same maxlen
177 | new_hypothesis_vec = normalize_vec(hypothesis_vec, maxlen)
178 |
179 | # feature 2
180 | p_word_feature = word_feature(new_premise_vec, tfidf) # feature of np.array( maxlen, 2 )
181 | h_word_feature = word_feature(new_hypothesis_vec, tfidf)
182 |
183 | x_premise.append(new_premise_vec)
184 | x_premise_len.append(premise_len)
185 | x_hypothesis.append(new_hypothesis_vec)
186 | x_hypothesis_len.append(hypothesis_len)
187 | targets.append(label)
188 |
189 | p_features.append(p_word_feature)
190 | h_features.append(h_word_feature)
191 |
192 | extra_feature.append(np.array([word_count_feature1, word_count_feature2], dtype="float32") )
193 |
194 | yield np.array(x_premise), np.array(x_hypothesis), np.array(x_premise_len), np.array(x_hypothesis_len),\
195 | np.array(targets), np.array(extra_feature), np.array(p_features), np.array(h_features)
196 |
197 |
--------------------------------------------------------------------------------
/model/eval.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | import numpy as np
3 | from model import data_helpers
4 |
5 |
6 | # Files
7 | tf.flags.DEFINE_string("test_premise_file", "", "test premise file")
8 | tf.flags.DEFINE_string("test_hypothesis_file", "", "test hypothesis file")
9 | tf.flags.DEFINE_string("test_label_file", "", "test label file")
10 | tf.flags.DEFINE_string("vocab_file", "", "vocabulary file (map word to integer)")
11 |
12 | # Data Parameters
13 | tf.flags.DEFINE_integer("batch_size", 128, "Batch Size (default: 64)")
14 | tf.flags.DEFINE_string("checkpoint_dir", "", "Checkpoint directory from training run")
15 | tf.flags.DEFINE_integer("max_sequence_length", 100, "max sequence length")
16 |
17 | # Misc Parameters
18 | tf.flags.DEFINE_boolean("allow_soft_placement", True, "Allow device soft device placement")
19 | tf.flags.DEFINE_boolean("log_device_placement", False, "Log placement of ops on devices")
20 |
21 |
22 | FLAGS = tf.flags.FLAGS
23 | FLAGS._parse_flags()
24 | print("\nParameters:")
25 | for attr, value in sorted(FLAGS.__flags.items()):
26 | print("{}={}".format(attr.upper(), value))
27 | print("")
28 |
29 | vocab, idf = data_helpers.loadVocab(FLAGS.vocab_file)
30 | print('vocabulary size: {}'.format(len(vocab)))
31 |
32 | SEQ_LEN = FLAGS.max_sequence_length
33 | test_dataset = data_helpers.loadDataset(FLAGS.test_premise_file, FLAGS.test_hypothesis_file, FLAGS.test_label_file, vocab, SEQ_LEN)
34 | print('test_dataset: {}'.format(len(test_dataset)))
35 |
36 | print("\nEvaluating...\n")
37 |
38 | checkpoint_file = tf.train.latest_checkpoint(FLAGS.checkpoint_dir)
39 | print(checkpoint_file)
40 |
41 | graph = tf.Graph()
42 | with graph.as_default():
43 | session_conf = tf.ConfigProto(
44 | allow_soft_placement=FLAGS.allow_soft_placement,
45 | log_device_placement=FLAGS.log_device_placement)
46 | sess = tf.Session(config=session_conf)
47 | with sess.as_default():
48 | # Load the saved meta graph and restore variables
49 | saver = tf.train.import_meta_graph("{}.meta".format(checkpoint_file))
50 | saver.restore(sess, checkpoint_file)
51 |
52 | # Get the placeholders from the graph by name
53 | premise = graph.get_operation_by_name("premise").outputs[0]
54 | hypothesis = graph.get_operation_by_name("hypothesis").outputs[0]
55 |
56 | premise_len = graph.get_operation_by_name("premise_len").outputs[0]
57 | hypothesis_len = graph.get_operation_by_name("hypothesis_len").outputs[0]
58 |
59 | dropout_keep_prob = graph.get_operation_by_name("dropout_keep_prob").outputs[0]
60 | model_extra_feature = graph.get_operation_by_name("extra_feature").outputs[0]
61 |
62 | premise_word_feature = graph.get_operation_by_name("premise_word_feature").outputs[0]
63 | hypothesis_word_feature = graph.get_operation_by_name("hypothesis_word_feature").outputs[0]
64 |
65 | # Tensors we want to evaluate
66 | prob = graph.get_operation_by_name("prediction_layer/prob").outputs[0]
67 |
68 | num_test = 0
69 | prob_list = []
70 | target_list = []
71 | test_batches = data_helpers.batch_iter(test_dataset, FLAGS.batch_size, 1, idf, SEQ_LEN, shuffle=False)
72 | for test_batch in test_batches:
73 | x_premise, x_hypothesis, x_premise_len, x_hypothesis_len, \
74 | targets, extra_feature, p_features, h_features = test_batch
75 | feed_dict = {
76 | premise: x_premise,
77 | hypothesis: x_hypothesis,
78 | premise_len: x_premise_len,
79 | hypothesis_len: x_hypothesis_len,
80 | dropout_keep_prob: 1.0,
81 | model_extra_feature: extra_feature,
82 | premise_word_feature: p_features,
83 | hypothesis_word_feature: h_features,
84 | }
85 | predicted_prob = sess.run(prob, feed_dict)
86 | prob_list.append(predicted_prob)
87 | target_list.append(targets)
88 | num_test += len(predicted_prob)
89 | print('num_test_sample={}'.format(num_test))
90 |
91 | probs_aggre = np.concatenate(prob_list, axis=0)
92 | labels_aggre = np.concatenate(target_list, axis=0)
93 |
94 | prediction = np.argmax(probs_aggre, axis=1)
95 | accuracy = np.equal(prediction, labels_aggre)
96 | accuracy = np.mean(accuracy)
97 |
98 | print('num_test_samples: {} accuracy: {}'.format(num_test, round(accuracy, 3)))
99 |
--------------------------------------------------------------------------------
/model/model_ESIM.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | import numpy as np
3 |
4 | FLAGS = tf.flags.FLAGS
5 |
6 | def get_embeddings(vocab):
7 | print("get_embedding")
8 | initializer = load_word_embeddings(vocab, FLAGS.embedding_dim)
9 | return tf.constant(initializer, name="word_embedding")
10 | # return tf.get_variable(initializer=initializer, name="word_embedding")
11 |
12 | def load_embed_vectors(fname, dim):
13 | vectors = {}
14 | for line in open(fname, 'rt'):
15 | items = line.strip().split(' ')
16 | if len(items[0]) <= 0:
17 | continue
18 | vec = [float(items[i]) for i in range(1, dim+1)]
19 | vectors[items[0]] = vec
20 |
21 | return vectors
22 |
23 | def load_word_embeddings(vocab, dim):
24 | vectors = load_embed_vectors(FLAGS.embedded_vector_file, dim)
25 | vocab_size = len(vocab)
26 | embeddings = np.zeros((vocab_size, dim), dtype='float32')
27 | for word, code in vocab.items():
28 | if word in vectors:
29 | embeddings[code] = vectors[word]
30 | else:
31 | embeddings[code] = np.random.uniform(-0.25, 0.25, dim)
32 |
33 | return embeddings
34 |
35 |
36 | def lstm_layer(inputs, input_seq_len, rnn_size, dropout_keep_prob, scope, scope_reuse=False):
37 | with tf.variable_scope(scope, reuse=scope_reuse) as vs:
38 | fw_cell = tf.contrib.rnn.LSTMCell(rnn_size, forget_bias=1.0, state_is_tuple=True, reuse=scope_reuse)
39 | fw_cell = tf.contrib.rnn.DropoutWrapper(fw_cell, output_keep_prob=dropout_keep_prob)
40 | bw_cell = tf.contrib.rnn.LSTMCell(rnn_size, forget_bias=1.0, state_is_tuple=True, reuse=scope_reuse)
41 | bw_cell = tf.contrib.rnn.DropoutWrapper(bw_cell, output_keep_prob=dropout_keep_prob)
42 | rnn_outputs, rnn_states = tf.nn.bidirectional_dynamic_rnn(cell_fw=fw_cell, cell_bw=bw_cell,
43 | inputs=inputs,
44 | sequence_length=input_seq_len,
45 | dtype=tf.float32)
46 | return rnn_outputs, rnn_states
47 |
48 | # output = tanh( xW + b )
49 | def ffnn_layer(inputs, output_size, dropout_keep_prob, scope, scope_reuse=False):
50 | with tf.variable_scope(scope, reuse=scope_reuse):
51 | input_size = inputs.get_shape()[-1].value
52 | W = tf.get_variable("W_trans", shape=[input_size, output_size], initializer=tf.orthogonal_initializer())
53 | b = tf.get_variable("b_trans", shape=[output_size, ], initializer=tf.zeros_initializer())
54 | outputs = tf.nn.relu(tf.einsum('aij,jk->aik', inputs, W) + b)
55 | outputs = tf.nn.dropout(outputs, keep_prob=dropout_keep_prob)
56 | return outputs
57 |
58 | def premise_hypothesis_similarity_matrix(premise, hypothesis):
59 | #[batch_size, dim, p_len]
60 | p2 = tf.transpose(premise, perm=[0,2,1])
61 |
62 | #[batch_size, h_len, p_len]
63 | similarity = tf.matmul(hypothesis, p2, name='similarity_matrix')
64 |
65 | return similarity
66 |
67 | def self_attended(similarity_matrix, inputs):
68 | #similarity_matrix: [batch_size, len, len]
69 | #inputs: [batch_size, len, dim]
70 |
71 | attended_w = tf.nn.softmax(similarity_matrix, dim=-1)
72 |
73 | #[batch_size, len, dim]
74 | attended_out = tf.matmul(attended_w, inputs)
75 | return attended_out
76 |
77 | def attend_hypothesis(similarity_matrix, premise, premise_len, maxlen):
78 | #similarity_matrix: [batch_size, h_len, p_len]
79 | #premise: [batch_size, p_len, dim]
80 |
81 | # masked similarity_matrix
82 | mask_p = tf.sequence_mask(premise_len, maxlen, dtype=tf.float32) # [batch_size, p_len]
83 | mask_p = tf.expand_dims(mask_p, 1) # [batch_size, 1, p_len]
84 | similarity_matrix = similarity_matrix * mask_p + -1e9 * (1-mask_p) # [batch_size, h_len, p_len]
85 |
86 | #[batch_size, h_len, p_len]
87 | attention_weight_for_p = tf.nn.softmax(similarity_matrix, dim=-1)
88 |
89 | #[batch_size, a_len, dim]
90 | attended_hypothesis = tf.matmul(attention_weight_for_p, premise)
91 | return attended_hypothesis
92 |
93 | def attend_premise(similarity_matrix, hypothesis, hypothesis_len, maxlen):
94 | #similarity_matrix: [batch_size, h_len, p_len]
95 | #hypothesis: [batch_size, h_len, dim]
96 |
97 | # masked similarity_matrix
98 | mask_h = tf.sequence_mask(hypothesis_len, maxlen, dtype=tf.float32) # [batch_size, h_len]
99 | mask_h = tf.expand_dims(mask_h, 2) # [batch_size, h_len, 1]
100 | similarity_matrix = similarity_matrix * mask_h + -1e9 * (1-mask_h) # [batch_size, h_len, p_len]
101 |
102 | #[batch_size, p_len, h_len]
103 | attention_weight_for_h = tf.nn.softmax(tf.transpose(similarity_matrix, perm=[0,2,1]), dim=-1)
104 |
105 | #[batch_size, p_len, dim]
106 | attended_premise = tf.matmul(attention_weight_for_h, hypothesis)
107 | return attended_premise
108 |
109 |
110 | class ESIM(object):
111 | def __init__(
112 | self, sequence_length, vocab_size, embedding_size, vocab, rnn_size, l2_reg_lambda=0.0):
113 |
114 | self.premise = tf.placeholder(tf.int32, [None, sequence_length], name="premise")
115 | self.hypothesis = tf.placeholder(tf.int32, [None, sequence_length], name="hypothesis")
116 |
117 | self.premise_len = tf.placeholder(tf.int32, [None], name="premise_len")
118 | self.hypothesis_len = tf.placeholder(tf.int32, [None], name="hypothesis_len")
119 |
120 | self.target = tf.placeholder(tf.int64, [None], name="target")
121 | self.dropout_keep_prob = tf.placeholder(tf.float32, name="dropout_keep_prob")
122 | self.extra_feature = tf.placeholder(tf.float32, [None, 2], name="extra_feature")
123 |
124 | self.p_word_feature = tf.placeholder(tf.float32, [None, sequence_length, 2], name="premise_word_feature")
125 | self.h_word_feature = tf.placeholder(tf.float32, [None, sequence_length, 2], name="hypothesis_word_feature")
126 |
127 | l2_loss = tf.constant(0.0)
128 |
129 | # =============================== Embedding layer ===============================
130 | # 1. word embedding layer
131 | with tf.name_scope("embedding"):
132 | W = get_embeddings(vocab) # tf.constant( np.array(vocab_size of task_dataset, dim) )
133 | premise_embedded = tf.nn.embedding_lookup(W, self.premise) # [batch_size, q_len, word_dim]
134 | hypothesis_embedded = tf.nn.embedding_lookup(W, self.hypothesis)
135 |
136 | premise_embedded = tf.nn.dropout(premise_embedded, keep_prob=self.dropout_keep_prob)
137 | hypothesis_embedded = tf.nn.dropout(hypothesis_embedded, keep_prob=self.dropout_keep_prob)
138 | print("shape of premise_embedded: {}".format(premise_embedded.get_shape()))
139 | print("shape of hypothesis_embedded: {}".format(hypothesis_embedded.get_shape()))
140 |
141 | # =============================== Encoding layer ===============================
142 | with tf.variable_scope("encoding_layer") as vs:
143 | rnn_scope_name = "bidirectional_rnn"
144 | p_rnn_output, p_rnn_states = lstm_layer(premise_embedded, self.premise_len, rnn_size, self.dropout_keep_prob, rnn_scope_name, scope_reuse=False) # [batch_size, sequence_length, rnn_size(200)]
145 | premise_output = tf.concat(axis=2, values=p_rnn_output) # [batch_size, maxlen, rnn_size*2]
146 | h_rnn_output, h_rnn_states = lstm_layer(hypothesis_embedded, self.hypothesis_len, rnn_size, self.dropout_keep_prob, rnn_scope_name, scope_reuse=True)
147 | hypothesis_output = tf.concat(axis=2, values=h_rnn_output) # [batch_size, maxlen, rnn_size*2]
148 | print('Incorporate single_lstm_layer successfully.')
149 |
150 | # =============================== Matching layer ===============================
151 | with tf.variable_scope("matching_layer") as vs:
152 | similarity = premise_hypothesis_similarity_matrix(premise_output, hypothesis_output) #[batch_size, answer_len, question_len]
153 | attended_premise = attend_premise(similarity, hypothesis_output, self.hypothesis_len, sequence_length) #[batch_size, maxlen, dim]
154 | attended_hypothesis = attend_hypothesis(similarity, premise_output, self.premise_len, sequence_length) #[batch_size, maxlen, dim]
155 |
156 | m_p = tf.concat(axis=2, values=[premise_output, attended_premise, tf.multiply(premise_output, attended_premise), premise_output-attended_premise])
157 | m_h = tf.concat(axis=2, values=[hypothesis_output, attended_hypothesis, tf.multiply(hypothesis_output, attended_hypothesis), hypothesis_output-attended_hypothesis])
158 |
159 | # m_ffnn
160 | m_input_size = m_p.get_shape()[-1].value
161 | m_output_size = m_input_size
162 | m_p = ffnn_layer(m_p, m_output_size, self.dropout_keep_prob, "m_ffnn", scope_reuse=False)
163 | m_h = ffnn_layer(m_h, m_output_size, self.dropout_keep_prob, "m_ffnn", scope_reuse=True)
164 | print('Incorporate ffnn_layer after cross attention successfully.')
165 |
166 | rnn_scope_cross = 'bidirectional_rnn_cross'
167 | rnn_size_layer_2 = rnn_size
168 | rnn_output_p_2, rnn_states_p_2 = lstm_layer(m_p, self.premise_len, rnn_size_layer_2, self.dropout_keep_prob, rnn_scope_cross, scope_reuse=False)
169 | rnn_output_h_2, rnn_states_h_2 = lstm_layer(m_h, self.hypothesis_len, rnn_size_layer_2, self.dropout_keep_prob, rnn_scope_cross, scope_reuse=True)
170 |
171 | premise_output_cross = tf.concat(axis=2, values=rnn_output_p_2) # [batch_size, sequence_length, 2*rnn_size(400)]
172 | hypothesis_output_cross = tf.concat(axis=2, values=rnn_output_h_2)
173 |
174 | # =============================== Aggregation layer ===============================
175 | with tf.variable_scope("aggregation_layer") as vs:
176 | premise_max = tf.reduce_max(premise_output_cross, axis=1) # [batch_size, 2*rnn_size(400)]
177 | hypothesis_max = tf.reduce_max(hypothesis_output_cross, axis=1)
178 |
179 | premise_mean = tf.reduce_mean(premise_output_cross, axis=1) # [batch_size, 2*rnn_size(400)]
180 | hypothesis_mean = tf.reduce_mean(hypothesis_output_cross, axis=1)
181 |
182 | # premise_state = tf.concat(axis=1, values=[rnn_states_p_2[0].h, rnn_states_p_2[1].h]) # [batch_size, 2*rnn_size(400)]
183 | # hypothesis_state = tf.concat(axis=1, values=[rnn_states_h_2[0].h, rnn_states_h_2[1].h])
184 |
185 | joined_feature = tf.concat(axis=1, values=[premise_max, hypothesis_max, premise_mean, hypothesis_mean]) # [batch_size, 8*rnn_size(1600)]
186 | print("shape of joined feature: {}".format(joined_feature.get_shape()))
187 |
188 | # =============================== Prediction layer ===============================
189 | with tf.variable_scope("prediction_layer") as vs:
190 | hidden_input_size = joined_feature.get_shape()[1].value
191 | hidden_output_size = 256
192 | regularizer = tf.contrib.layers.l2_regularizer(l2_reg_lambda)
193 | #regularizer = None
194 | joined_feature = tf.nn.dropout(joined_feature, keep_prob=self.dropout_keep_prob)
195 | full_out = tf.contrib.layers.fully_connected(joined_feature, hidden_output_size,
196 | activation_fn=tf.nn.relu,
197 | reuse=False,
198 | trainable=True,
199 | scope="projected_layer") # [batch_size, hidden_output_size(256)]
200 | full_out = tf.nn.dropout(full_out, keep_prob=self.dropout_keep_prob)
201 | #full_out = tf.concat(axis=1, values=[full_out, self.extra_feature])
202 |
203 | last_weight_dim = full_out.get_shape()[1].value
204 | print("last_weight_dim: {}".format(last_weight_dim))
205 | bias = tf.Variable(tf.constant(0.1, shape=[3]), name="bias")
206 | s_w = tf.get_variable("s_w", shape=[last_weight_dim, 3], initializer=tf.contrib.layers.xavier_initializer())
207 | logits = tf.matmul(full_out, s_w) + bias # [batch_size, 3]
208 | print("shape of logits: {}".format(logits.get_shape()))
209 |
210 | self.probs = tf.nn.softmax(logits, name="prob") # [batch_size, n_class(3)]
211 |
212 | losses = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=self.target)
213 | self.mean_loss = tf.reduce_mean(losses, name="mean_loss") + l2_reg_lambda * l2_loss + sum(
214 | tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))
215 |
216 | with tf.name_scope("accuracy"):
217 | correct_prediction = tf.equal(tf.argmax(self.probs, 1), self.target)
218 | self.accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"), name="accuracy")
219 |
--------------------------------------------------------------------------------
/model/train.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | import numpy as np
3 | import os
4 | import time
5 | import datetime
6 | from model import data_helpers
7 | from model.model_ESIM import ESIM
8 | import operator
9 | from collections import defaultdict
10 |
11 | # Files
12 | tf.flags.DEFINE_string("train_premise_file", "", "train premise file")
13 | tf.flags.DEFINE_string("train_hypothesis_file", "", "train hypothesis file")
14 | tf.flags.DEFINE_string("train_label_file", "", "train label file")
15 | tf.flags.DEFINE_string("dev_premise_file", "", "dev premise file")
16 | tf.flags.DEFINE_string("dev_hypothesis_file", "", "dev hypothesis file")
17 | tf.flags.DEFINE_string("dev_label_file", "", "dev label file")
18 | tf.flags.DEFINE_string("test_premise_file", "", "test premise file")
19 | tf.flags.DEFINE_string("test_hypothesis_file", "", "test hypothesis file")
20 | tf.flags.DEFINE_string("test_label_file", "", "test label file")
21 | tf.flags.DEFINE_string("embedded_vector_file", "", "pre-trained embedded word vector")
22 | tf.flags.DEFINE_string("vocab_file", "", "vocabulary file (map word to integer)")
23 |
24 | # Training parameters
25 | tf.flags.DEFINE_integer("batch_size", 1024, "Batch Size (default: 64)")
26 | tf.flags.DEFINE_integer("num_epochs", 5000000, "Number of training epochs (default: 200)")
27 | tf.flags.DEFINE_integer("evaluate_every", 1000, "Evaluate model on dev set after this many steps (default: 100)")
28 |
29 | # Model Hyperparameters
30 | tf.flags.DEFINE_integer("embedding_dim", 100, "Dimensionality of character embedding (default: 128)")
31 | tf.flags.DEFINE_float("dropout_keep_prob", 1.0, "Dropout keep probability (default: 0.5)")
32 | tf.flags.DEFINE_float("l2_reg_lambda", 0.000005, "L2 regularizaion lambda (default: 0.0)")
33 | tf.flags.DEFINE_integer("max_sequence_length", 200, "max sequence length")
34 | tf.flags.DEFINE_integer("rnn_size", 200, "number of RNN units")
35 |
36 | # Misc Parameters
37 | tf.flags.DEFINE_boolean("allow_soft_placement", True, "Allow device soft device placement")
38 | tf.flags.DEFINE_boolean("log_device_placement", False, "Log placement of ops on devices")
39 |
40 | FLAGS = tf.flags.FLAGS
41 | FLAGS._parse_flags()
42 | print("\nParameters:")
43 | for attr, value in sorted(FLAGS.__flags.items()):
44 | print("{}={}".format(attr.upper(), value))
45 | print("")
46 |
47 |
48 | # Data Preparation
49 | print("Loading data...")
50 |
51 | # vocab = {"": 0, ...}
52 | vocab, idf = data_helpers.loadVocab(FLAGS.vocab_file)
53 | print('vocabulary size: {}'.format(len(vocab)))
54 |
55 | SEQ_LEN = FLAGS.max_sequence_length
56 | train_dataset = data_helpers.loadDataset(FLAGS.train_premise_file, FLAGS.train_hypothesis_file, FLAGS.train_label_file, vocab, SEQ_LEN)
57 | print('train_dataset: {}'.format(len(train_dataset)))
58 | dev_dataset = data_helpers.loadDataset(FLAGS.dev_premise_file, FLAGS.dev_hypothesis_file, FLAGS.dev_label_file, vocab, SEQ_LEN)
59 | print('dev_dataset: {}'.format(len(dev_dataset)))
60 | test_dataset = data_helpers.loadDataset(FLAGS.test_premise_file, FLAGS.test_hypothesis_file, FLAGS.test_label_file, vocab, SEQ_LEN)
61 | print('test_dataset: {}'.format(len(test_dataset)))
62 |
63 |
64 | with tf.Graph().as_default():
65 | session_conf = tf.ConfigProto(
66 | allow_soft_placement=FLAGS.allow_soft_placement,
67 | log_device_placement=FLAGS.log_device_placement)
68 | sess = tf.Session(config=session_conf)
69 | with sess.as_default():
70 | esim = ESIM(
71 | sequence_length=SEQ_LEN,
72 | vocab_size=len(vocab),
73 | embedding_size=FLAGS.embedding_dim,
74 | vocab=vocab,
75 | rnn_size=FLAGS.rnn_size,
76 | l2_reg_lambda=FLAGS.l2_reg_lambda)
77 | # Define Training procedure
78 | global_step = tf.Variable(0, name="global_step", trainable=False)
79 | starter_learning_rate = 0.001
80 | learning_rate = tf.train.exponential_decay(starter_learning_rate, global_step,
81 | 5000, 0.96, staircase=True)
82 | optimizer = tf.train.AdamOptimizer(learning_rate)
83 | grads_and_vars = optimizer.compute_gradients(esim.mean_loss)
84 | train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step)
85 |
86 | # Keep track of gradient values and sparsity (optional)
87 | """
88 | grad_summaries = []
89 | for g, v in grads_and_vars:
90 | if g is not None:
91 | grad_hist_summary = tf.histogram_summary("{}/grad/hist".format(v.name), g)
92 | sparsity_summary = tf.scalar_summary("{}/grad/sparsity".format(v.name), tf.nn.zero_fraction(g))
93 | grad_summaries.append(grad_hist_summary)
94 | grad_summaries.append(sparsity_summary)
95 | grad_summaries_merged = tf.merge_summary(grad_summaries)
96 | """
97 |
98 | # Output directory for models and summaries
99 | timestamp = str(int(time.time()))
100 | out_dir = os.path.abspath(os.path.join(os.path.curdir, "runs", timestamp))
101 | print("Writing to {}\n".format(out_dir))
102 |
103 | # Summaries for loss and accuracy
104 | """
105 | loss_summary = tf.scalar_summary("loss", esim.mean_loss)
106 | acc_summary = tf.scalar_summary("accuracy", esim.accuracy)
107 |
108 | # Train Summaries
109 | train_summary_op = tf.merge_summary([loss_summary, acc_summary, grad_summaries_merged])
110 | train_summary_dir = os.path.join(out_dir, "summaries", "train")
111 | train_summary_writer = tf.train.SummaryWriter(train_summary_dir, sess.graph_def)
112 |
113 | # Dev summaries
114 | dev_summary_op = tf.merge_summary([loss_summary, acc_summary])
115 | dev_summary_dir = os.path.join(out_dir, "summaries", "dev")
116 | dev_summary_writer = tf.train.SummaryWriter(dev_summary_dir, sess.graph_def)
117 | """
118 |
119 | # Checkpoint directory. Tensorflow assumes this directory already exists so we need to create it
120 | checkpoint_dir = os.path.abspath(os.path.join(out_dir, "checkpoints"))
121 | checkpoint_prefix = os.path.join(checkpoint_dir, "model")
122 | if not os.path.exists(checkpoint_dir):
123 | os.makedirs(checkpoint_dir)
124 | saver = tf.train.Saver(tf.global_variables())
125 |
126 | # Initialize all variables
127 | sess.run(tf.global_variables_initializer())
128 |
129 | def train_step(x_premise, x_hypothesis, x_premise_len, x_hypothesis_len,
130 | targets, extra_feature, p_features, h_features):
131 | """
132 | A single training step
133 | """
134 | feed_dict = {
135 | esim.premise: x_premise,
136 | esim.hypothesis: x_hypothesis,
137 | esim.premise_len: x_premise_len,
138 | esim.hypothesis_len: x_hypothesis_len,
139 | esim.target: targets,
140 | esim.dropout_keep_prob: FLAGS.dropout_keep_prob,
141 | esim.extra_feature: extra_feature,
142 | esim.p_word_feature: p_features,
143 | esim.h_word_feature: h_features
144 | }
145 |
146 | _, step, loss, accuracy, predicted_prob = sess.run(
147 | [train_op, global_step, esim.mean_loss, esim.accuracy, esim.probs],
148 | feed_dict)
149 |
150 | time_str = datetime.datetime.now().isoformat()
151 | if step % 100 == 0:
152 | print("{}: step {}, loss {:g}, acc {:g}".format(time_str, step, loss, accuracy))
153 | #train_summary_writer.add_summary(summaries, step)
154 |
155 |
156 | def check_step(dataset, shuffle=False):
157 | results = defaultdict(list)
158 | num_test = 0
159 | num_correct = 0.0
160 | batches = data_helpers.batch_iter(dataset, FLAGS.batch_size, 1, idf, SEQ_LEN, shuffle=shuffle)
161 | for batch in batches:
162 | x_premise, x_hypothesis, x_premise_len, x_hypothesis_len, \
163 | targets, extra_feature, p_features, h_features = batch
164 | feed_dict = {
165 | esim.premise: x_premise,
166 | esim.hypothesis: x_hypothesis,
167 | esim.premise_len: x_premise_len,
168 | esim.hypothesis_len: x_hypothesis_len,
169 | esim.target: targets,
170 | esim.dropout_keep_prob: 1.0,
171 | esim.extra_feature: extra_feature,
172 | esim.p_word_feature: p_features,
173 | esim.h_word_feature: h_features
174 | }
175 | batch_accuracy, predicted_prob = sess.run([esim.accuracy, esim.probs], feed_dict)
176 | num_test += len(predicted_prob)
177 | if num_test % 1000 == 0:
178 | print(num_test)
179 |
180 | num_correct += len(predicted_prob) * batch_accuracy
181 |
182 | # calculate Accuracy
183 | acc = num_correct / num_test
184 | print('num_test_samples: {} accuracy: {}'.format(num_test, acc))
185 |
186 | return acc
187 |
188 | best_acc = 0.0
189 | EPOCH = 0
190 | batches = data_helpers.batch_iter(train_dataset, FLAGS.batch_size, FLAGS.num_epochs, idf, SEQ_LEN, shuffle=True)
191 | for batch in batches:
192 | x_premise, x_hypothesis, x_premise_len, x_hypothesis_len, \
193 | targets, extra_feature, p_features, h_features = batch
194 | train_step(x_premise, x_hypothesis, x_premise_len, x_hypothesis_len, targets, extra_feature, p_features, h_features)
195 | current_step = tf.train.global_step(sess, global_step)
196 | if current_step % FLAGS.evaluate_every == 0:
197 | EPOCH += 1
198 | print("\nEPOCH: {}".format(EPOCH))
199 | print("Evaluation on dev:")
200 | valid_acc = check_step(dev_dataset, shuffle=True)
201 | print("\nEvaluation on test:")
202 | test_acc = check_step(test_dataset, shuffle=False)
203 | if valid_acc > best_acc:
204 | best_acc = valid_acc
205 | path = saver.save(sess, checkpoint_prefix, global_step=current_step)
206 | print("Saved model checkpoint to {}\n".format(path))
207 |
208 |
--------------------------------------------------------------------------------
/scripts/test.sh:
--------------------------------------------------------------------------------
1 | cur_dir=`pwd`
2 | parentdir="$(dirname $cur_dir)"
3 |
4 | DATA_DIR=${parentdir}/data
5 |
6 | latest_run=`ls -dt runs/* |head -n 1`
7 | latest_checkpoint=${latest_run}/checkpoints
8 | # latest_checkpoint=runs/1541064267/checkpoints # or edit the path to the model requires testing here
9 | echo $latest_checkpoint
10 |
11 | test_premise_file=$DATA_DIR/word_sequence/premise_snli_1.0_test.txt
12 | test_hypothesis_file=$DATA_DIR/word_sequence/hypothesis_snli_1.0_test.txt
13 | test_label_file=$DATA_DIR/word_sequence/label_snli_1.0_test.txt
14 | vocab_file=$DATA_DIR/word_sequence/vocab.txt
15 |
16 | batch_size=128
17 | max_sequence_length=100
18 |
19 | PKG_DIR=${parentdir}
20 |
21 | PYTHONPATH=${PKG_DIR}:$PYTHONPATH CUDA_VISIBLE_DEVICES=1 python -u ${PKG_DIR}/model/eval.py \
22 | --test_premise_file $test_premise_file \
23 | --test_hypothesis_file $test_hypothesis_file \
24 | --test_label_file $test_label_file \
25 | --vocab_file $vocab_file \
26 | --max_sequence_length $max_sequence_length \
27 | --batch_size $batch_size \
28 | --checkpoint_dir $latest_checkpoint > log_test.txt 2>&1 &
29 |
--------------------------------------------------------------------------------
/scripts/train.sh:
--------------------------------------------------------------------------------
1 | cur_dir=`pwd`
2 | parentdir="$(dirname $cur_dir)"
3 |
4 | DATA_DIR=${parentdir}/data
5 |
6 | train_premise_file=$DATA_DIR/word_sequence/premise_snli_1.0_train.txt
7 | train_hypothesis_file=$DATA_DIR/word_sequence/hypothesis_snli_1.0_train.txt
8 | train_label_file=$DATA_DIR/word_sequence/label_snli_1.0_train.txt
9 |
10 | dev_premise_file=$DATA_DIR/word_sequence/premise_snli_1.0_dev.txt
11 | dev_hypothesis_file=$DATA_DIR/word_sequence/hypothesis_snli_1.0_dev.txt
12 | dev_label_file=$DATA_DIR/word_sequence/label_snli_1.0_dev.txt
13 |
14 | test_premise_file=$DATA_DIR/word_sequence/premise_snli_1.0_test.txt
15 | test_hypothesis_file=$DATA_DIR/word_sequence/hypothesis_snli_1.0_test.txt
16 | test_label_file=$DATA_DIR/word_sequence/label_snli_1.0_test.txt
17 |
18 | embedded_vector_file=$DATA_DIR/glove/filtered_glove_840B_300d.txt
19 | vocab_file=$DATA_DIR/word_sequence/vocab.txt
20 |
21 | lambda=0
22 | dropout_keep_prob=0.8
23 | batch_size=128
24 | max_sequence_length=100
25 | DIM=300
26 | rnn_size=300
27 | evaluate_every=4292
28 |
29 | PKG_DIR=${parentdir}
30 |
31 | PYTHONPATH=${PKG_DIR}:$PYTHONPATH CUDA_VISIBLE_DEVICES=1 python -u ${PKG_DIR}/model/train.py \
32 | --train_premise_file $train_premise_file \
33 | --train_hypothesis_file $train_hypothesis_file \
34 | --train_label_file $train_label_file \
35 | --dev_premise_file $dev_premise_file \
36 | --dev_hypothesis_file $dev_hypothesis_file \
37 | --dev_label_file $dev_label_file \
38 | --test_premise_file $test_premise_file \
39 | --test_hypothesis_file $test_hypothesis_file \
40 | --test_label_file $test_label_file \
41 | --embedded_vector_file $embedded_vector_file \
42 | --vocab_file $vocab_file \
43 | --max_sequence_length $max_sequence_length \
44 | --embedding_dim $DIM \
45 | --l2_reg_lambda $lambda \
46 | --dropout_keep_prob $dropout_keep_prob \
47 | --batch_size $batch_size \
48 | --rnn_size $rnn_size \
49 | --evaluate_every $evaluate_every > log.txt 2>&1 &
50 |
--------------------------------------------------------------------------------