├── CITATION.cff
├── MLBiNet.py
├── README.md
├── ace_model_evaluation.py
├── data-ACE
    ├── example_new.dev
    ├── example_new.test
    └── example_new.train
├── dict
    ├── dict_gen.py
    ├── event_types.txt
    ├── ner_1.txt
    ├── ner_2.txt
    └── vocab.txt
├── embedding
    └── embeddings.txt
├── requirements.txt
├── run_experiments_multi.py
├── train_MLBiNet.py
└── utils_init.py


/CITATION.cff:
--------------------------------------------------------------------------------
 1 | cff-version: "1.0.0"
 2 | message: "If you use this code, please cite it using these metadata."
 3 | title: "doced"
 4 | repository-code: "https://github.com/zjunlp/DocED"
 5 | authors: 
 6 |   - family-names: Lou
 7 |     given-names: Dongfang
 8 |   - family-names: Liao
 9 |     given-names: Zhilin
10 |   - family-names: Deng
11 |     given-names: Shumin
12 |   - family-names: Zhang
13 |     given-names: Ningyu
14 |   - family-names: Chen
15 |     given-names: Huajun
16 | preferred-citation:
17 |   type: article
18 |   title: "MLBiNet: A Cross-Sentence Collective Event Detection Network"
19 |   authors:
20 |   - family-names: Lou
21 |     given-names: Dongfang
22 |   - family-names: Liao
23 |     given-names: Zhilin
24 |   - family-names: Deng
25 |     given-names: Shumin
26 |   - family-names: Zhang
27 |     given-names: Ningyu
28 |   - family-names: Chen
29 |     given-names: Huajun
30 |   journal: "arXiv preprint arXiv:2105.09458"
31 |   year: 2021
32 | 


--------------------------------------------------------------------------------
/MLBiNet.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | import tensorflow as tf
  5 | from tensorflow.contrib.layers.python.layers import initializers
  6 | from tensorflow.contrib import rnn
  7 | 
  8 | 
  9 | class MLBiNet:
 10 |     def __init__(self,
 11 |                  encode_h,  # hidden size of sentence encoding
 12 |                  decode_h,  # hidden size of sentence decoding
 13 |                  tag_dim,  # hidden size of event tag
 14 |                  event_info_h,  # hidden size of event info integration model
 15 |                  word_emb_mat,  # word embedding matrix
 16 |                  batch_size,  # batch size
 17 |                  max_doc_len,  # max length of doc
 18 |                  max_seq_len,  # max length of sequence
 19 |                  id_O,  # location of other event / negative event
 20 |                  num_tag_layers,  # number of tagging layers
 21 |                  weight_decay,  # weight decay of each tagging layer
 22 |                  reverse_seq,  # reverse the sequence or not when aggregating information of next sentence
 23 |                  class_size,  # class size
 24 |                  tagging_mechanism="bidirectional_decoder",  # forward_decoder, backward_decoder, bidirectional_decoder
 25 |                  ner_size_1=None,  # size of level-1 ner vocab
 26 |                  ner_dim_1=None,  # dimension of level-1 ner embedding
 27 |                  ner_size_2=None,  # size of level-2 ner vocab
 28 |                  ner_dim_2=None,  # dimension of level-2 ner embedding
 29 |                  self_att_not=1,  # concat word embedding or not
 30 |                  context_info=1,  # 0: single sentence information, 1: information of two neighbor sentences
 31 |                  event_vector_trans=1 # nonlinear transformation for the event vector
 32 |                  ):
 33 |         self.encode_h = encode_h
 34 |         self.decode_h = decode_h
 35 |         self.tag_dim = tag_dim
 36 |         self.event_info_h = event_info_h
 37 |         self.word_emb_mat = word_emb_mat
 38 |         self.batch_size = batch_size
 39 |         self.max_doc_len = max_doc_len
 40 |         self.max_seq_len = max_seq_len
 41 |         self.id_O = id_O
 42 |         self.num_tag_layers = num_tag_layers
 43 |         self.weight_decay = weight_decay
 44 |         self.reverse_seq = reverse_seq
 45 |         self.class_size = class_size
 46 |         self.tagging_mechanism = tagging_mechanism
 47 | 
 48 |         self.ner_size_1 = ner_size_1
 49 |         self.ner_dim_1 = ner_dim_1
 50 |         self.ner_size_2 = ner_size_2
 51 |         self.ner_dim_2 = ner_dim_2
 52 |         self.self_att_not = self_att_not
 53 | 
 54 |         self.context_info = context_info
 55 |         self.event_vector_trans = event_vector_trans
 56 | 
 57 |         # global initializer
 58 |         self.initializer = initializers.xavier_initializer()
 59 | 
 60 |         # initialize the word embedding matrix
 61 |         self.word_emb_mat = tf.cast(self.word_emb_mat, dtype=tf.float32)
 62 |         self.word_embedding_init()
 63 | 
 64 |         # placeholders
 65 |         self.input_docs = tf.placeholder(dtype=tf.int32, shape=[self.batch_size,
 66 |                                                                 self.max_doc_len, self.max_seq_len], name='input_docs')
 67 |         self.ner_docs_1 = tf.placeholder(dtype=tf.int32, shape=[self.batch_size,
 68 |                                                                 self.max_doc_len, self.max_seq_len], name='ner_docs_1')
 69 |         self.ner_docs_2 = tf.placeholder(dtype=tf.int32, shape=[self.batch_size,
 70 |                                                                 self.max_doc_len, self.max_seq_len], name='ner_docs_2')
 71 |         self.input_label_docs = tf.placeholder(dtype=tf.int32,
 72 |                                                shape=[self.batch_size, self.max_doc_len, self.max_seq_len],
 73 |                                                name='input_label_docs')
 74 |         self.valid_batch = tf.placeholder(dtype=tf.int32, shape=(), name='valid_batch')
 75 |         self.valid_sent_len = tf.placeholder(dtype=tf.int32, shape=[self.batch_size], name='valid_sent_len')
 76 |         self.valid_words_len = tf.placeholder(dtype=tf.int32, shape=[self.batch_size, self.max_doc_len],
 77 |                                               name='valid_words_len')
 78 |         self.dropout_rate = tf.placeholder(dtype=tf.float32, shape=(), name='dropout_rate')
 79 |         self.positive_weights = tf.placeholder(dtype=tf.float32, shape=(), name='positive_weights')
 80 | 
 81 |         # embedding layer
 82 |         self.word_embedding_lookup = self.embedding_layer()
 83 | 
 84 |         # [unk] event and semantic information aggregation embedding
 85 |         self.unk_event_semantic = tf.Variable(tf.truncated_normal(shape=[1, self.event_info_h], stddev=0.1),
 86 |                                               trainable=True, name="unk_event_semantic")
 87 |         # self.unk_event_semantic = tf.zeros(shape=[1,self.event_info_h])
 88 | 
 89 |         # sentence encoding layer
 90 |         emb_size_curr = self.word_embedding_lookup.get_shape().as_list()[-1]
 91 |         self.lstm_inputs = tf.nn.dropout(self.word_embedding_lookup, keep_prob=1 - self.dropout_rate)
 92 | 
 93 |         print("embedding dimension before encoding layer:\t", emb_size_curr)
 94 | 
 95 |         words_enc, _, _ = self.sent_encode_layer(
 96 |             tf.reshape(self.lstm_inputs, [self.batch_size * self.max_doc_len,
 97 |                                           self.max_seq_len, emb_size_curr]),
 98 |             tf.reshape(self.valid_words_len, shape=[-1]), name='sent_enc_model')
 99 | 
100 |         print("embedding dimension after encoding layer:\t", words_enc.get_shape().as_list()[-1])
101 | 
102 |         # self-attention
103 |         words_enc = tf.reshape(words_enc, [self.batch_size, self.max_doc_len, self.max_seq_len, -1])
104 |         if self.self_att_not:
105 |             words_enc = self.sent_self_att(words_enc, self.valid_words_len)
106 | 
107 |         print("embedding dimension after self-attention:\t", words_enc.get_shape().as_list()[-1])
108 | 
109 |         # concat with looking up embedding
110 |         words_enc = tf.concat([words_enc, self.word_embedding_lookup], axis=-1)
111 |         words_enc = tf.nn.dropout(words_enc, keep_prob=1 - self.dropout_rate)
112 | 
113 |         print("embedding dimension before decoding:\t", words_enc.get_shape().as_list()[-1])
114 | 
115 |         # mask all padding vectors
116 |         dim_curr = words_enc.get_shape().as_list()[-1]
117 |         mask_padding_ind = tf.sequence_mask(self.valid_words_len, maxlen=self.max_seq_len, dtype=tf.float32)
118 |         self.mask_padding_ind = tf.tile(tf.expand_dims(mask_padding_ind, axis=3), multiples=[1, 1, 1, dim_curr])
119 | 
120 |         self.words_enc = words_enc * self.mask_padding_ind
121 | 
122 |         # tagging via multi-tagging network
123 |         if self.tagging_mechanism == "forward_decoder":
124 |             tag_vect, tag_vect_layerwise = self.forward_cross_sent_ED(words_enc=self.words_enc, tag_dim=self.tag_dim,
125 |                                                                       num_tag_layers=self.num_tag_layers,
126 |                                                                       weight_decay=self.weight_decay)
127 |         elif self.tagging_mechanism == "backward_decoder":
128 |             tag_vect, tag_vect_layerwise = self.backward_cross_sent_ED(words_enc=self.words_enc, tag_dim=self.tag_dim,
129 |                                                                        num_tag_layers=self.num_tag_layers,
130 |                                                                        weight_decay=self.weight_decay)
131 |         elif self.tagging_mechanism == "bidirectional_decoder":
132 |             tag_vect_fw, tag_vect_bw, tag_vect_lw_fw, tag_vect_lw_bw = self.biderectional_cross_sent_ED(
133 |                 words_enc=self.words_enc, tag_dim=self.tag_dim, num_tag_layers=self.num_tag_layers,
134 |                 weight_decay=self.weight_decay)
135 |             tag_vect = tf.concat([tag_vect_fw, tag_vect_bw], axis=-1)
136 |             tag_vect_layerwise = tf.concat([tag_vect_lw_fw, tag_vect_lw_bw], axis=-1)
137 |         elif self.tagging_mechanism == "agg_average":
138 |             tag_vect_fw, tag_vect_bw, tag_vect_lw_fw, tag_vect_lw_bw = self.agg_choice_cross_sent_ED(
139 |                                             words_enc=self.words_enc,
140 |                                             tag_dim=self.tag_dim,
141 |                                             num_tag_layers=self.num_tag_layers,
142 |                                             weight_decay=self.weight_decay,
143 |                                             agg_choice="average")
144 |             tag_vect = tf.concat([tag_vect_fw, tag_vect_bw], axis=-1)
145 |             tag_vect_layerwise = tf.concat([tag_vect_lw_fw, tag_vect_lw_bw], axis=-1)
146 |         elif self.tagging_mechanism == "agg_concat":
147 |             tag_vect_fw, tag_vect_bw, tag_vect_lw_fw, tag_vect_lw_bw = self.agg_choice_cross_sent_ED(
148 |                                             words_enc=self.words_enc,
149 |                                             tag_dim=self.tag_dim,
150 |                                             num_tag_layers=self.num_tag_layers,
151 |                                             weight_decay=self.weight_decay,
152 |                                             agg_choice="concat")
153 |             tag_vect = tf.concat([tag_vect_fw, tag_vect_bw], axis=-1)
154 |             tag_vect_layerwise = tf.concat([tag_vect_lw_fw, tag_vect_lw_bw], axis=-1)
155 |         else:
156 |             print("tagging_mechanism assigned is not supported!")
157 | 
158 |         # self loss function
159 |         self.loss, self.label_true, self.label_pred, self.valid_len_list = self.loss_layer(tag_vect)
160 | 
161 |     def word_embedding_init(self):
162 |         """
163 |         initialize the word embedding matrix
164 |         """
165 |         if self.word_emb_mat is None:
166 |             print("The embedding matrix must be initialized!")
167 |         else:
168 |             self.word_emb_mat = tf.Variable(self.word_emb_mat, trainable=True, name='word_emb_mat')
169 | 
170 |     def embedding_layer(self):
171 |         """
172 |         embedding layer with respect to the word embedding matrix
173 |         """
174 |         embedding_tmp = tf.nn.embedding_lookup(self.word_emb_mat, self.input_docs)
175 |         # looking up the level-1 ner embedding
176 |         if self.ner_size_1 is not None:
177 |             ner_mat_1 = tf.get_variable(name="ner_mat_1", shape=[self.ner_size_1, self.ner_dim_1],
178 |                                         dtype=tf.float32, initializer=self.initializer)
179 |             emb_ner1_tmp = tf.nn.embedding_lookup(ner_mat_1, self.ner_docs_1)
180 |             embedding_tmp = tf.concat([embedding_tmp, emb_ner1_tmp], axis=-1)
181 |         # looking up the level-2 ner embedding
182 |         if self.ner_size_2 is not None:
183 |             ner_mat_2 = tf.get_variable(name="ner_mat_2", shape=[self.ner_size_2, self.ner_dim_2],
184 |                                         dtype=tf.float32, initializer=self.initializer)
185 |             emb_ner2_tmp = tf.nn.embedding_lookup(ner_mat_2, self.ner_docs_1)
186 |             embedding_tmp = tf.concat([embedding_tmp, emb_ner2_tmp], axis=-1)
187 |         return embedding_tmp
188 | 
189 |     def sent_encode_layer(self, embedding_input, valid_len, name):
190 |         """
191 |         sentence encoding layer to get representation of each words
192 |         """
193 |         with tf.variable_scope(name, reuse=tf.AUTO_REUSE):
194 |             lstm_cell = {}
195 |             for direction in ["forward", "backward"]:
196 |                 with tf.variable_scope(direction):
197 |                     lstm_cell[direction] = rnn.CoupledInputForgetGateLSTMCell(
198 |                         self.encode_h,
199 |                         use_peepholes=True,
200 |                         initializer=self.initializer,
201 |                         state_is_tuple=True
202 |                     )
203 |             (outputs,
204 |              (encoder_fw_final_state,
205 |               encoder_bw_final_state)) = tf.nn.bidirectional_dynamic_rnn(
206 |                 lstm_cell["forward"],
207 |                 lstm_cell["backward"],
208 |                 inputs=embedding_input,
209 |                 dtype=tf.float32,
210 |                 sequence_length=valid_len
211 |             )
212 |         words_out = tf.concat(outputs, axis=-1)
213 |         final_state = tf.concat((encoder_fw_final_state.h, encoder_bw_final_state.h), -1)
214 |         final_state_add = (encoder_fw_final_state.h + encoder_bw_final_state.h) / 2
215 |         return words_out, final_state, final_state_add
216 | 
217 |     def sent_self_att(self, words_enc, valid_words_len):
218 |         """
219 |         sentence-level self-attention
220 |         :param words_enc: batch_size * max_doc_size * max_seq_len * dim
221 |         :param valid_words_len: batch_size * max_doc_size
222 |         """
223 |         enc_dim_tmp = words_enc.get_shape().as_list()[-1]
224 |         words_enc_new0 = tf.reshape(words_enc, [self.batch_size * self.max_doc_len, self.max_seq_len, enc_dim_tmp])
225 |         valid_words_len_new = tf.reshape(valid_words_len, shape=[-1])
226 | 
227 |         def self_att(variable_scope="attention", weight_name="att_W"):
228 |             """
229 |             sentence level self attention with different window size
230 |             """
231 |             with tf.variable_scope(variable_scope, reuse=tf.AUTO_REUSE):
232 |                 W = tf.get_variable(weight_name,
233 |                                     shape=[enc_dim_tmp, enc_dim_tmp],
234 |                                     dtype=tf.float32,
235 |                                     initializer=self.initializer,
236 |                                     )
237 |                 # x'Wx
238 |                 words_enc_new = tf.reshape(words_enc,
239 |                                            [self.batch_size * self.max_doc_len * self.max_seq_len, enc_dim_tmp])
240 |                 words_enc_new = tf.matmul(words_enc_new, W)
241 |                 words_enc_new = tf.reshape(words_enc_new,
242 |                                            [self.batch_size * self.max_doc_len, self.max_seq_len, enc_dim_tmp])
243 |                 # tanh(x'Wx)
244 |                 logit_self_att = tf.matmul(words_enc_new, tf.transpose(words_enc_new0, perm=[0, 2, 1]))
245 |                 logit_self_att = tf.tanh(logit_self_att)
246 |                 probs = tf.nn.softmax(logit_self_att)
247 | 
248 |                 # mask invalid words
249 |                 mask_words = tf.sequence_mask(valid_words_len_new, maxlen=self.max_seq_len,
250 |                                               dtype=tf.float32)  # 160 * 100
251 |                 mask_words = tf.tile(tf.expand_dims(mask_words, axis=1),
252 |                                      multiples=[1, self.max_seq_len, 1])  # 160 * 100 * 100
253 |                 probs = probs * mask_words
254 |                 probs = tf.matmul(tf.matrix_diag(1 / (tf.reduce_sum(probs, axis=-1) + 1e-8)),
255 |                                   probs)  # re-standardize the probability
256 |                 # attention output
257 |                 att_output = tf.matmul(probs, words_enc_new0)
258 |                 att_output = tf.reshape(att_output,
259 |                                         shape=[self.batch_size, self.max_doc_len, self.max_seq_len, enc_dim_tmp])
260 |             return att_output
261 | 
262 |         att_output = self_att(variable_scope="attention", weight_name="att_W")
263 |         return att_output
264 | 
265 |     def info_agg_layer(self, pred_tag_vect, reverse_seq=False):
266 |         """
267 |         sentence-level event and semantic information aggregation layer
268 |         """
269 |         dim_curr = pred_tag_vect.get_shape().as_list()[-1]
270 | 
271 |         # mask invalid words
272 |         mask_padding_ind = tf.sequence_mask(self.valid_words_len, maxlen=self.max_seq_len, dtype=tf.float32)
273 |         mask_padding_ind = tf.tile(tf.expand_dims(mask_padding_ind, axis=3), multiples=[1, 1, 1, dim_curr])
274 |         pred_tag_vect = pred_tag_vect * mask_padding_ind
275 | 
276 |         # reverse the sequence
277 |         if reverse_seq:
278 |             pred_tag_vect = pred_tag_vect[:, :, ::-1, :]
279 |             var_name = "reversed_sent_info_agg_layer"
280 |         else:
281 |             var_name = "sent_info_agg_layer"
282 | 
283 |         info_agg_lstm_cell = tf.nn.rnn_cell.BasicLSTMCell(self.event_info_h, forget_bias=0.0, state_is_tuple=True,
284 |                                                           name=var_name, reuse=tf.AUTO_REUSE)
285 |         info_agg_lstm_cell = tf.nn.rnn_cell.DropoutWrapper(info_agg_lstm_cell, output_keep_prob=1 - self.dropout_rate)
286 |         # todo, change to bidirectional_dynamic_rnn
287 |         # _, _, sent_event_sematic_info = self.sent_encode_layer(
288 |         #     embedding_input=tf.reshape(pred_tag_vect, shape=[self.batch_size * self.max_doc_len, self.max_seq_len, -1]),
289 |         #     valid_len=tf.reshape(self.valid_words_len, [-1]),
290 |         #     name=var_name
291 |         # )
292 |         _, (_, sent_event_sematic_info) = tf.nn.dynamic_rnn(cell=info_agg_lstm_cell,
293 |                                                             inputs=tf.reshape(pred_tag_vect,
294 |                                                                               shape=[self.batch_size * self.max_doc_len,
295 |                                                                                      self.max_seq_len, -1]),
296 |                                                             sequence_length=tf.reshape(self.valid_words_len, [-1]),
297 |                                                             dtype=tf.float32
298 |                                                             )
299 |         sent_event_sematic_info = tf.reshape(sent_event_sematic_info,
300 |                                              shape=[self.batch_size, self.max_doc_len, -1])
301 |         return sent_event_sematic_info
302 | 
303 |     def info_agg_layer_bi(self, pred_tag_vect, reverse_seq=False):
304 |         """
305 |         sentence-level event and semantic information aggregation layer
306 |         """
307 |         dim_curr = pred_tag_vect.get_shape().as_list()[-1]
308 | 
309 |         # mask invalid words
310 |         mask_padding_ind = tf.sequence_mask(self.valid_words_len, maxlen=self.max_seq_len, dtype=tf.float32)
311 |         mask_padding_ind = tf.tile(tf.expand_dims(mask_padding_ind, axis=3), multiples=[1, 1, 1, dim_curr])
312 |         pred_tag_vect = pred_tag_vect * mask_padding_ind
313 | 
314 |         # reverse the sequence
315 |         if reverse_seq:
316 |             pred_tag_vect = pred_tag_vect[:, :, ::-1, :]
317 |             var_name = "reversed_sent_info_agg_layer"
318 |         else:
319 |             var_name = "sent_info_agg_layer"
320 | 
321 |         # info_agg_lstm_cell = tf.nn.rnn_cell.BasicLSTMCell(self.event_info_h, forget_bias=0.0, state_is_tuple=True,
322 |         #                                                   name=var_name, reuse=tf.AUTO_REUSE)
323 |         # info_agg_lstm_cell = tf.nn.rnn_cell.DropoutWrapper(info_agg_lstm_cell, output_keep_prob=1 - self.dropout_rate)
324 |         # todo, change to bidirectional_dynamic_rnn
325 |         _, _, sent_event_sematic_info = self.sent_encode_layer(
326 |             embedding_input=tf.reshape(pred_tag_vect, shape=[self.batch_size * self.max_doc_len, self.max_seq_len, -1]),
327 |             valid_len=tf.reshape(self.valid_words_len, [-1]),
328 |             name=var_name
329 |         )
330 |         # _, (_, sent_event_sematic_info) = tf.nn.dynamic_rnn(cell=info_agg_lstm_cell,
331 |         #                                                     inputs=tf.reshape(pred_tag_vect,
332 |         #                                                                       shape=[self.batch_size * self.max_doc_len,
333 |         #                                                                              self.max_seq_len, -1]),
334 |         #                                                     sequence_length=tf.reshape(self.valid_words_len, [-1]),
335 |         #                                                     dtype=tf.float32
336 |         #                                                     )
337 |         sent_event_sematic_info = tf.reshape(sent_event_sematic_info,
338 |                                              shape=[self.batch_size, self.max_doc_len, -1])
339 |         return sent_event_sematic_info
340 | 
341 |     def project(self, h_state, lstm_dim):
342 |         """
343 |         project the output of decoder model to a tag vector
344 |         """
345 |         enc_dim = h_state.get_shape().as_list()[-1]
346 |         with tf.variable_scope("tag_project_layer", reuse=tf.AUTO_REUSE):
347 |             W = tf.get_variable("W",
348 |                                 shape=[enc_dim, lstm_dim],
349 |                                 dtype=tf.float32,
350 |                                 initializer=self.initializer,
351 |                                 )
352 |             b = tf.get_variable("b",
353 |                                 shape=[lstm_dim],
354 |                                 dtype=tf.float32,
355 |                                 initializer=tf.zeros_initializer()
356 |                                 )
357 |             y_pre = tf.add(tf.matmul(h_state, W), b)
358 |             tag_pre = tf.cast(tf.argmax(tf.nn.softmax(y_pre), axis=-1), tf.float32)
359 |             return y_pre, tag_pre
360 | 
361 |     def forward_cross_sent_ED(self, words_enc, tag_dim, num_tag_layers, weight_decay):
362 |         """
363 |         forward-wise cross-sentence event tag event detection, modeling the forward-wise event correlation
364 |         :param words_enc: words encoding
365 |         :param num_tag_layers: number of tagging layers
366 |         :param weight_decay: weight decay of tagging vectors of different layers
367 |         """
368 |         # decoding layer
369 |         # all layers share the same decoder layer
370 |         # for the first decoder layer, we set c_{i-1} and c_{i+1} with unk_event_semantic
371 |         lstm_outputs = tf.reshape(words_enc, shape=[self.batch_size * self.max_doc_len, self.max_seq_len, -1])
372 |         lstm_cell = tf.nn.rnn_cell.BasicLSTMCell(self.decode_h, forget_bias=0.0, state_is_tuple=True,
373 |                                                  name="forward_rnn_decoder", reuse=tf.AUTO_REUSE)
374 |         lstm_cell = tf.nn.rnn_cell.DropoutWrapper(lstm_cell, output_keep_prob=1 - self.dropout_rate)
375 | 
376 |         # mutli-tagging block
377 |         tag_final = tf.zeros(shape=[self.batch_size, self.max_doc_len, self.max_seq_len, tag_dim], dtype=tf.float32)
378 |         tag_final_list = []
379 | 
380 |         init_state = lstm_cell.zero_state(self.batch_size * self.max_doc_len, dtype=tf.float32)
381 |         # event and semantic information of the previous sentence and next sentence sentence
382 |         info_event_sem_pre_sent = tf.tile(self.unk_event_semantic,
383 |                                           multiples=[self.batch_size * self.max_doc_len, 1])
384 |         info_event_sem_next_sent = tf.tile(self.unk_event_semantic,
385 |                                            multiples=[self.batch_size * self.max_doc_len, 1])
386 | 
387 |         # event and semantic information of the beginning sentence
388 |         info_event_sem_init_sent = tf.tile(self.unk_event_semantic, multiples=[self.batch_size, 1])
389 |         info_event_sem_init_sent = tf.expand_dims(info_event_sem_init_sent, axis=1)
390 |         info_event_sem_mat0 = tf.tile(tf.expand_dims(self.unk_event_semantic, axis=0),
391 |                                       multiples=[self.batch_size, self.max_doc_len, 1])
392 |         with tf.variable_scope("forward_rnn_decoding_layer", reuse=tf.AUTO_REUSE):
393 |             for layer_id in range(num_tag_layers):
394 |                 # initialize for each layer
395 |                 c_state, h_state = init_state
396 |                 tag_pre = tf.zeros([self.batch_size * self.max_doc_len, tag_dim])
397 |                 tag_outputs = []
398 |                 for time_step in range(self.max_seq_len):
399 |                     if time_step > 0:
400 |                         tf.get_variable_scope().reuse_variables()
401 |                     if self.num_tag_layers > 1:
402 |                         two_info = tf.concat([info_event_sem_pre_sent, info_event_sem_next_sent], axis=-1)
403 |                         input_all = tf.concat([lstm_outputs[:, time_step, :], two_info, tag_pre], axis=-1)
404 |                     else:
405 |                         input_all = tf.concat([lstm_outputs[:, time_step, :], tag_pre], axis=-1)
406 |                     (cell_output, (c_state, h_state)) = lstm_cell(input_all, (c_state, h_state))
407 |                     tag_pre, tag_result = self.project(cell_output, tag_dim)
408 |                     if self.event_vector_trans:
409 |                         tag_pre = tf.tanh(tag_pre)
410 |                     tag_outputs.append(tag_pre)
411 |                 tag_outputs = tf.reshape(tf.transpose(tag_outputs, [1, 0, 2]), [self.batch_size, self.max_doc_len,
412 |                                                                                 self.max_seq_len, tag_dim])
413 |                 if self.num_tag_layers > 1:
414 |                     # info aggregation of current sentence, [batch_size, max_doc_len,event_info_h]
415 |                     info_event_sem_current_sent = self.info_agg_layer(tag_outputs, reverse_seq=False)
416 | 
417 |                     # corresponds to the information of previous sentence
418 |                     info_event_sem_pre_sent = tf.concat([info_event_sem_init_sent,
419 |                                                          info_event_sem_current_sent[:, :-1, :]], axis=1)
420 |                     info_event_sem_pre_sent = tf.reshape(info_event_sem_pre_sent,
421 |                                                          shape=[self.batch_size * self.max_doc_len, -1])
422 | 
423 |                     # find valid sentence firstly, and replace with emebedding of unk
424 |                     info_event_sem_current_sent_bw = self.info_agg_layer(tag_outputs, reverse_seq=self.reverse_seq)
425 | 
426 |                     valid_sent_ind = tf.sequence_mask(self.valid_sent_len, maxlen=self.max_doc_len, dtype=tf.float32)
427 |                     valid_sent_ind = tf.tile(tf.expand_dims(valid_sent_ind, axis=2), multiples=[1, 1, self.event_info_h])
428 |                     info_event_sem_current_sent_bw = info_event_sem_current_sent_bw * valid_sent_ind + \
429 |                                                      info_event_sem_mat0 * (1 - valid_sent_ind)
430 | 
431 |                     # corresponds to the information of previous sentence
432 |                     info_event_sem_next_sent = tf.concat([info_event_sem_current_sent_bw[:, 1:, :], info_event_sem_init_sent],
433 |                                                          axis=1)
434 |                     info_event_sem_next_sent = tf.reshape(info_event_sem_next_sent,
435 |                                                           shape=[self.batch_size * self.max_doc_len, -1])
436 | 
437 |                 tag_final += weight_decay ** layer_id * tag_outputs
438 |                 tag_final_list.append(tag_outputs)
439 |         return tag_final, tag_final_list
440 | 
441 | 
442 |     def backward_cross_sent_ED(self, words_enc, tag_dim, num_tag_layers, weight_decay):
443 |         """
444 |         backward-wise cross-sentence event tag event detection, modeling the backward-wise event correlation
445 |         """
446 |         # reshape the inputs and reverse it to cater to backward event extraction
447 |         lstm_outputs = tf.reshape(words_enc, shape=[self.batch_size * self.max_doc_len, self.max_seq_len, -1])
448 |         lstm_outputs = lstm_outputs[:, ::-1, :]
449 | 
450 |         # decoding layer
451 |         # all layers share the same decoder layer
452 |         # for the first decoder layer, we set c_{i-1} and c_{i+1} with unk_event_semantic
453 |         lstm_cell = tf.nn.rnn_cell.BasicLSTMCell(self.decode_h, forget_bias=0.0, state_is_tuple=True,
454 |                                                  name="backward_rnn_decoder", reuse=tf.AUTO_REUSE)
455 |         lstm_cell = tf.nn.rnn_cell.DropoutWrapper(lstm_cell, output_keep_prob=1 - self.dropout_rate)
456 | 
457 |         # mutli-tagging block
458 |         tag_final = tf.zeros(shape=[self.batch_size, self.max_doc_len, self.max_seq_len, tag_dim], dtype=tf.float32)
459 |         tag_final_list = []
460 | 
461 |         init_state = lstm_cell.zero_state(self.batch_size * self.max_doc_len, dtype=tf.float32)
462 |         # event and semantic information of the previous sentence and next sentence sentence
463 |         info_event_sem_pre_sent = tf.tile(self.unk_event_semantic,
464 |                                           multiples=[self.batch_size * self.max_doc_len, 1])
465 |         info_event_sem_next_sent = tf.tile(self.unk_event_semantic,
466 |                                            multiples=[self.batch_size * self.max_doc_len, 1])
467 | 
468 |         # event and semantic information of the final sentence
469 |         info_event_sem_init_sent = tf.tile(self.unk_event_semantic, multiples=[self.batch_size, 1])
470 |         info_event_sem_init_sent = tf.expand_dims(info_event_sem_init_sent, axis=1)
471 |         info_event_sem_mat0 = tf.tile(tf.expand_dims(self.unk_event_semantic, axis=0),
472 |                                       multiples=[self.batch_size, self.max_doc_len, 1])
473 | 
474 |         with tf.variable_scope("backward_rnn_decoding_layer", reuse=tf.AUTO_REUSE):
475 |             for layer_id in range(num_tag_layers):
476 |                 # initialize for each layer
477 |                 c_state, h_state = init_state
478 |                 tag_next = tf.zeros([self.batch_size * self.max_doc_len, tag_dim])
479 |                 tag_outputs = []
480 |                 for time_step in range(self.max_seq_len):
481 |                     if time_step > 0:
482 |                         tf.get_variable_scope().reuse_variables()
483 |                     if self.num_tag_layers > 1:
484 |                         two_info = tf.concat([info_event_sem_pre_sent, info_event_sem_next_sent], axis=-1)
485 |                         input_all = tf.concat([lstm_outputs[:, time_step, :], two_info, tag_next], axis=-1)
486 |                     else:
487 |                         input_all = tf.concat([lstm_outputs[:, time_step, :], tag_next], axis=-1)
488 |                     (cell_output, (c_state, h_state)) = lstm_cell(input_all, (c_state, h_state))
489 |                     tag_next, tag_result = self.project(cell_output, tag_dim)
490 |                     if self.event_vector_trans:
491 |                         tag_next = tf.tanh(tag_next)
492 |                     tag_outputs.append(tag_next)
493 |                 tag_outputs = tf.reshape(tf.transpose(tag_outputs, [1, 0, 2]), [self.batch_size, self.max_doc_len,
494 |                                                                                 self.max_seq_len, tag_dim])
495 |                 # recover the tag_outputs in order
496 |                 tag_outputs = tag_outputs[:, :, ::-1, :]
497 | 
498 |                 if self.num_tag_layers > 1:
499 |                     # info aggregation of current sentence, [batch_size, max_doc_len,event_info_h]
500 |                     info_event_sem_current_sent = self.info_agg_layer(tag_outputs, reverse_seq=self.reverse_seq)
501 | 
502 |                     # find valid sentence firstly, and replace with emebedding of unk
503 |                     valid_sent_ind = tf.sequence_mask(self.valid_sent_len, maxlen=self.max_doc_len, dtype=tf.float32)
504 |                     valid_sent_ind = tf.tile(tf.expand_dims(valid_sent_ind, axis=2), multiples=[1, 1, self.event_info_h])
505 |                     info_event_sem_current_sent = info_event_sem_current_sent * valid_sent_ind + \
506 |                                                   info_event_sem_mat0 * (1 - valid_sent_ind)
507 | 
508 |                     # corresponds to the information of previous sentence
509 |                     info_event_sem_next_sent = tf.concat([info_event_sem_current_sent[:, 1:, :], info_event_sem_init_sent],
510 |                                                          axis=1)
511 |                     info_event_sem_next_sent = tf.reshape(info_event_sem_next_sent,
512 |                                                           shape=[self.batch_size * self.max_doc_len, -1])
513 | 
514 |                     # information of previous sentence, [batch_size, max_doc_len,event_info_h]
515 |                     info_event_sem_current_sent = self.info_agg_layer(tag_outputs, reverse_seq=False)
516 |                     info_event_sem_pre_sent = tf.concat([info_event_sem_init_sent, info_event_sem_current_sent[:, :-1, :]],
517 |                                                         axis=1)
518 |                     info_event_sem_pre_sent = tf.reshape(info_event_sem_pre_sent,
519 |                                                          shape=[self.batch_size * self.max_doc_len, -1])
520 | 
521 |                 tag_final += weight_decay ** layer_id * tag_outputs
522 |                 tag_final_list.append(tag_outputs)
523 |         return tag_final, tag_final_list
524 | 
525 | 
526 |     def biderectional_cross_sent_ED(self, words_enc, tag_dim, num_tag_layers, weight_decay):
527 |         """
528 |         birectional cross-sentence event tag event detection, modeling birectional event correlation
529 |         """
530 |         # decoding layer
531 |         # all layers share the same decoder layer
532 |         # for the first decoder layer, we set c_{i-1} and c_{i+1} with unk_event_semantic
533 |         lstm_outputs = tf.reshape(words_enc, shape=[self.batch_size * self.max_doc_len, self.max_seq_len, -1])
534 |         backward_lstm_outputs = lstm_outputs[:, ::-1, :]
535 | 
536 |         fw_lstm_cell = tf.nn.rnn_cell.BasicLSTMCell(self.decode_h, forget_bias=0.0, state_is_tuple=True,
537 |                                                     name="forward_rnn_decoder", reuse=tf.AUTO_REUSE)
538 |         fw_lstm_cell = tf.nn.rnn_cell.DropoutWrapper(fw_lstm_cell, output_keep_prob=1 - self.dropout_rate)
539 | 
540 |         bw_lstm_cell = tf.nn.rnn_cell.BasicLSTMCell(self.decode_h, forget_bias=0.0, state_is_tuple=True,
541 |                                                     name="backward_rnn_decoder", reuse=tf.AUTO_REUSE)
542 |         bw_lstm_cell = tf.nn.rnn_cell.DropoutWrapper(bw_lstm_cell, output_keep_prob=1 - self.dropout_rate)
543 | 
544 |         # mutli-tagging block
545 |         tag_final_fw = tf.zeros(shape=[self.batch_size, self.max_doc_len, self.max_seq_len, tag_dim], dtype=tf.float32)
546 |         tag_final_bw = tf.zeros(shape=[self.batch_size, self.max_doc_len, self.max_seq_len, tag_dim], dtype=tf.float32)
547 |         tag_final_list_fw = []
548 |         tag_final_list_bw = []
549 | 
550 |         fw_init_state = fw_lstm_cell.zero_state(self.batch_size * self.max_doc_len, dtype=tf.float32)
551 |         bw_init_state = bw_lstm_cell.zero_state(self.batch_size * self.max_doc_len, dtype=tf.float32)
552 |         # event and semantic information of the previous sentence and next sentence sentence
553 |         info_event_sem_pre_sent = tf.tile(self.unk_event_semantic,
554 |                                           multiples=[self.batch_size * self.max_doc_len, 1])
555 |         info_event_sem_next_sent = tf.tile(self.unk_event_semantic,
556 |                                            multiples=[self.batch_size * self.max_doc_len, 1])
557 | 
558 |         # event and semantic information of the beginning sentence
559 |         info_event_sem_init_sent = tf.tile(self.unk_event_semantic, multiples=[self.batch_size, 1])
560 |         info_event_sem_init_sent = tf.expand_dims(info_event_sem_init_sent, axis=1)
561 |         info_event_sem_mat0 = tf.tile(tf.expand_dims(self.unk_event_semantic, axis=0),
562 |                                       multiples=[self.batch_size, self.max_doc_len, 1])
563 | 
564 |         with tf.variable_scope("bidirectional_rnn_decoding_layer", reuse=tf.AUTO_REUSE):
565 |             for layer_id in range(num_tag_layers):
566 |                 # initialize for each layer
567 |                 fw_c_state, fw_h_state = fw_init_state
568 |                 bw_c_state, bw_h_state = bw_init_state
569 |                 tag_fw = tf.zeros([self.batch_size * self.max_doc_len, tag_dim])
570 |                 tag_bw = tf.zeros([self.batch_size * self.max_doc_len, tag_dim])
571 |                 fw_tag_outputs = []
572 |                 bw_tag_outputs = []
573 |                 for time_step in range(self.max_seq_len):
574 |                     if time_step > 0:
575 |                         tf.get_variable_scope().reuse_variables()
576 |                     # concat two event information
577 |                     if self.num_tag_layers > 1:
578 |                         if not self.context_info:
579 |                             fw_input_all = tf.concat([lstm_outputs[:, time_step, :], info_event_sem_pre_sent, tag_fw],
580 |                                                      axis=-1)
581 |                             bw_input_all = tf.concat([backward_lstm_outputs[:, time_step, :],
582 |                                                       info_event_sem_next_sent, tag_bw], axis=-1)
583 |                         else:
584 |                             two_info = tf.concat([info_event_sem_pre_sent, info_event_sem_next_sent], axis=-1)
585 |                             fw_input_all = tf.concat([lstm_outputs[:, time_step, :], two_info, tag_fw], axis=-1)
586 |                             bw_input_all = tf.concat([backward_lstm_outputs[:, time_step, :], two_info, tag_bw], axis=-1)
587 |                     else:
588 |                         fw_input_all = tf.concat([lstm_outputs[:, time_step, :], tag_fw],
589 |                                                  axis=-1)
590 |                         bw_input_all = tf.concat([backward_lstm_outputs[:, time_step, :], tag_bw], axis=-1)
591 |                     # forward decoder
592 |                     (fw_cell_output, (fw_c_state, fw_h_state)) = fw_lstm_cell(fw_input_all, (fw_c_state, fw_h_state))
593 |                     tag_fw, _ = self.project(fw_cell_output, tag_dim)
594 |                     if self.event_vector_trans:
595 |                         tag_fw = tf.tanh(tag_fw)
596 |                     fw_tag_outputs.append(tag_fw)
597 | 
598 |                     # backward decoder
599 |                     (bw_cell_output, (bw_c_state, bw_h_state)) = bw_lstm_cell(bw_input_all, (bw_c_state, bw_h_state))
600 |                     tag_bw, _ = self.project(bw_cell_output, tag_dim)
601 |                     if self.event_vector_trans:
602 |                         tag_bw = tf.tanh(tag_bw)
603 |                     bw_tag_outputs.append(tag_bw)
604 | 
605 |                 fw_tag_outputs = tf.reshape(tf.transpose(fw_tag_outputs, [1, 0, 2]), [self.batch_size, self.max_doc_len,
606 |                                                                                       self.max_seq_len, tag_dim])
607 |                 bw_tag_outputs = tf.reshape(tf.transpose(bw_tag_outputs, [1, 0, 2]), [self.batch_size, self.max_doc_len,
608 |                                                                                       self.max_seq_len, tag_dim])
609 |                 # recover the bw_tag_outputs in order
610 |                 bw_tag_outputs = bw_tag_outputs[:, :, ::-1, :]
611 | 
612 |                 tag_final_fw += weight_decay ** layer_id * fw_tag_outputs
613 |                 tag_final_list_fw.append(fw_tag_outputs)
614 |                 tag_final_bw += weight_decay ** layer_id * bw_tag_outputs
615 |                 tag_final_list_bw.append(bw_tag_outputs)
616 |                 if self.num_tag_layers > 1:
617 |                     # -----------update event and semantic information for the previous and next setence----------
618 |                     # info aggregation of current sentence, [batch_size, max_doc_len,event_info_h]
619 |                     info_event_sem_current_sent_fw = self.info_agg_layer(tf.concat([fw_tag_outputs, bw_tag_outputs],
620 |                                                                                    axis=-1), reverse_seq=False)
621 |                     # corresponds to the information of previous sentence
622 |                     info_event_sem_pre_sent = tf.concat([info_event_sem_init_sent, info_event_sem_current_sent_fw[:, :-1, :]],
623 |                                                         axis=1)
624 |                     info_event_sem_pre_sent = tf.reshape(info_event_sem_pre_sent,
625 |                                                          shape=[self.batch_size * self.max_doc_len, -1])
626 | 
627 |                     # find valid sentence firstly, and replace with emebedding of unk
628 |                     # if self.reverse_seq:
629 |                     info_event_sem_current_sent_bw = self.info_agg_layer(tf.concat([fw_tag_outputs, bw_tag_outputs],
630 |                                                                                    axis=-1), reverse_seq=self.reverse_seq)
631 |                     # else:
632 |                     #     info_event_sem_current_sent_bw = info_event_sem_current_sent_fw
633 | 
634 |                     valid_sent_ind = tf.sequence_mask(self.valid_sent_len, maxlen=self.max_doc_len, dtype=tf.float32)
635 |                     valid_sent_ind = tf.tile(tf.expand_dims(valid_sent_ind, axis=2), multiples=[1, 1, self.event_info_h])
636 |                     info_event_sem_current_sent_bw = info_event_sem_current_sent_bw * valid_sent_ind + \
637 |                                                      info_event_sem_mat0 * (1 - valid_sent_ind)
638 | 
639 |                     # corresponds to the information of previous sentence
640 |                     info_event_sem_next_sent = tf.concat([info_event_sem_current_sent_bw[:, 1:, :], info_event_sem_init_sent],
641 |                                                          axis=1)
642 |                     info_event_sem_next_sent = tf.reshape(info_event_sem_next_sent,
643 |                                                           shape=[self.batch_size * self.max_doc_len, -1])
644 |         return tag_final_fw, tag_final_bw, tag_final_list_fw, tag_final_list_bw
645 | 
646 | 
647 |     def agg_choice_cross_sent_ED(self, words_enc, tag_dim, num_tag_layers, weight_decay, agg_choice="lstm"):
648 |         """
649 |         different choice of aggregation function
650 |         agg_choice: average, lstm, or concat (concat state)
651 |         """
652 |         # decoding layer
653 |         # all layers share the same decoder layer
654 |         # for the first decoder layer, we set c_{i-1} and c_{i+1} with unk_event_semantic
655 |         lstm_outputs = tf.reshape(words_enc, shape=[self.batch_size * self.max_doc_len, self.max_seq_len, -1])
656 |         backward_lstm_outputs = lstm_outputs[:, ::-1, :]
657 | 
658 |         fw_lstm_cell = tf.nn.rnn_cell.BasicLSTMCell(self.decode_h, forget_bias=0.0, state_is_tuple=True,
659 |                                                     name="forward_rnn_decoder", reuse=tf.AUTO_REUSE)
660 |         fw_lstm_cell = tf.nn.rnn_cell.DropoutWrapper(fw_lstm_cell, output_keep_prob=1 - self.dropout_rate)
661 | 
662 |         bw_lstm_cell = tf.nn.rnn_cell.BasicLSTMCell(self.decode_h, forget_bias=0.0, state_is_tuple=True,
663 |                                                     name="backward_rnn_decoder", reuse=tf.AUTO_REUSE)
664 |         bw_lstm_cell = tf.nn.rnn_cell.DropoutWrapper(bw_lstm_cell, output_keep_prob=1 - self.dropout_rate)
665 | 
666 |         # mutli-tagging block
667 |         tag_final_fw = tf.zeros(shape=[self.batch_size, self.max_doc_len, self.max_seq_len, tag_dim], dtype=tf.float32)
668 |         tag_final_bw = tf.zeros(shape=[self.batch_size, self.max_doc_len, self.max_seq_len, tag_dim], dtype=tf.float32)
669 |         tag_final_list_fw = []
670 |         tag_final_list_bw = []
671 | 
672 |         fw_init_state = fw_lstm_cell.zero_state(self.batch_size * self.max_doc_len, dtype=tf.float32)
673 |         bw_init_state = bw_lstm_cell.zero_state(self.batch_size * self.max_doc_len, dtype=tf.float32)
674 |         # event and semantic information of the previous sentence and next sentence sentence
675 |         if agg_choice == "lstm":
676 |             info_event_sem_pre_sent = tf.tile(self.unk_event_semantic,
677 |                                               multiples=[self.batch_size * self.max_doc_len, 1])
678 |             info_event_sem_next_sent = tf.tile(self.unk_event_semantic,
679 |                                                multiples=[self.batch_size * self.max_doc_len, 1])
680 |         else:
681 |             info_event_sem_pre_sent = tf.zeros(shape=[self.batch_size * self.max_doc_len, 1 * tag_dim])
682 |             info_event_sem_next_sent = tf.zeros(shape=[self.batch_size * self.max_doc_len, 1 * tag_dim])
683 | 
684 |         # event and semantic information of the beginning sentence
685 |         info_event_sem_init_sent = tf.tile(self.unk_event_semantic, multiples=[self.batch_size, 1])
686 |         info_event_sem_init_sent = tf.expand_dims(info_event_sem_init_sent, axis=1)
687 |         info_event_sem_mat0 = tf.tile(tf.expand_dims(self.unk_event_semantic, axis=0),
688 |                                       multiples=[self.batch_size, self.max_doc_len, 1])
689 | 
690 |         with tf.variable_scope("bidirectional_rnn_decoding_layer", reuse=tf.AUTO_REUSE):
691 |             for layer_id in range(num_tag_layers):
692 |                 # initialize for each layer
693 |                 fw_c_state, fw_h_state = fw_init_state
694 |                 bw_c_state, bw_h_state = bw_init_state
695 |                 tag_fw = tf.zeros([self.batch_size * self.max_doc_len, tag_dim])
696 |                 tag_bw = tf.zeros([self.batch_size * self.max_doc_len, tag_dim])
697 |                 fw_tag_outputs = []
698 |                 bw_tag_outputs = []
699 |                 for time_step in range(self.max_seq_len):
700 |                     if time_step > 0:
701 |                         tf.get_variable_scope().reuse_variables()
702 |                     # concat two event information
703 |                     if self.num_tag_layers > 1:
704 |                         if not self.context_info:
705 |                             fw_input_all = tf.concat([lstm_outputs[:, time_step, :], info_event_sem_pre_sent, tag_fw],
706 |                                                      axis=-1)
707 |                             bw_input_all = tf.concat([backward_lstm_outputs[:, time_step, :],
708 |                                                       info_event_sem_next_sent, tag_bw], axis=-1)
709 |                         else:
710 |                             two_info = tf.concat([info_event_sem_pre_sent, info_event_sem_next_sent], axis=-1)
711 |                             fw_input_all = tf.concat([lstm_outputs[:, time_step, :], two_info, tag_fw], axis=-1)
712 |                             bw_input_all = tf.concat([backward_lstm_outputs[:, time_step, :], two_info, tag_bw],
713 |                                                      axis=-1)
714 |                     else:
715 |                         fw_input_all = tf.concat([lstm_outputs[:, time_step, :], tag_fw],
716 |                                                  axis=-1)
717 |                         bw_input_all = tf.concat([backward_lstm_outputs[:, time_step, :], tag_bw], axis=-1)
718 |                     # forward decoder
719 |                     (fw_cell_output, (fw_c_state, fw_h_state)) = fw_lstm_cell(fw_input_all, (fw_c_state, fw_h_state))
720 |                     tag_fw, _ = self.project(fw_cell_output, tag_dim)
721 |                     if self.event_vector_trans:
722 |                         tag_fw = tf.tanh(tag_fw)
723 |                     fw_tag_outputs.append(tag_fw)
724 | 
725 |                     # backward decoder
726 |                     (bw_cell_output, (bw_c_state, bw_h_state)) = bw_lstm_cell(bw_input_all, (bw_c_state, bw_h_state))
727 |                     tag_bw, _ = self.project(bw_cell_output, tag_dim)
728 |                     if self.event_vector_trans:
729 |                         tag_bw = tf.tanh(tag_bw)
730 |                     bw_tag_outputs.append(tag_bw)
731 | 
732 |                 fw_tag_outputs = tf.reshape(tf.transpose(fw_tag_outputs, [1, 0, 2]), [self.batch_size, self.max_doc_len,
733 |                                                                                       self.max_seq_len, tag_dim])
734 |                 bw_tag_outputs = tf.reshape(tf.transpose(bw_tag_outputs, [1, 0, 2]), [self.batch_size, self.max_doc_len,
735 |                                                                                       self.max_seq_len, tag_dim])
736 |                 # recover the bw_tag_outputs in order
737 |                 bw_tag_outputs = bw_tag_outputs[:, :, ::-1, :]
738 | 
739 |                 tag_final_fw += weight_decay ** layer_id * fw_tag_outputs
740 |                 tag_final_list_fw.append(fw_tag_outputs)
741 |                 tag_final_bw += weight_decay ** layer_id * bw_tag_outputs
742 |                 tag_final_list_bw.append(bw_tag_outputs)
743 |                 if self.num_tag_layers > 1:
744 |                     # -----------update event and semantic information for the previous and next setence----------
745 |                     # info aggregation of current sentence, [batch_size, max_doc_len,event_info_h]
746 |                     if agg_choice == "lstm":
747 |                         info_event_sem_current_sent_fw = self.info_agg_layer(tf.concat([fw_tag_outputs, bw_tag_outputs],
748 |                                                                                        axis=-1), reverse_seq=False)
749 |                         # corresponds to the information of previous sentence
750 |                         info_event_sem_pre_sent = tf.concat(
751 |                             [info_event_sem_init_sent, info_event_sem_current_sent_fw[:, :-1, :]],
752 |                             axis=1)
753 |                         info_event_sem_pre_sent = tf.reshape(info_event_sem_pre_sent,
754 |                                                              shape=[self.batch_size * self.max_doc_len, -1])
755 | 
756 |                         # find valid sentence firstly, and replace with emebedding of unk
757 |                         info_event_sem_current_sent_bw = self.info_agg_layer(
758 |                             tf.concat([fw_tag_outputs, bw_tag_outputs],axis=-1),
759 |                             reverse_seq=self.reverse_seq)
760 | 
761 |                         valid_sent_ind = tf.sequence_mask(self.valid_sent_len, maxlen=self.max_doc_len, dtype=tf.float32)
762 |                         valid_sent_ind = tf.tile(tf.expand_dims(valid_sent_ind, axis=2),
763 |                                                  multiples=[1, 1, self.event_info_h])
764 |                         info_event_sem_current_sent_bw = info_event_sem_current_sent_bw * valid_sent_ind + \
765 |                                                          info_event_sem_mat0 * (1 - valid_sent_ind)
766 | 
767 |                         # corresponds to the information of previous sentence
768 |                         info_event_sem_next_sent = tf.concat(
769 |                             [info_event_sem_current_sent_bw[:, 1:, :], info_event_sem_init_sent],
770 |                             axis=1)
771 |                         info_event_sem_next_sent = tf.reshape(info_event_sem_next_sent,
772 |                                                               shape=[self.batch_size * self.max_doc_len, -1])
773 |                     elif agg_choice == "average":
774 |                         # two_outputs = tf.concat([fw_tag_outputs, bw_tag_outputs], axis=-1)
775 |                         two_outputs =  (fw_tag_outputs + bw_tag_outputs) / 2
776 |                         dim_tmp = two_outputs.get_shape().as_list()[-1]
777 | 
778 |                         valid_sent_ind = tf.sequence_mask(self.valid_words_len, maxlen= self.max_seq_len)
779 |                         valid_sent_ind = tf.tile(tf.expand_dims(valid_sent_ind, axis=3), [1, 1, 1, dim_tmp])
780 |                         avg_vect = tf.reduce_sum(two_outputs * tf.cast(valid_sent_ind, dtype=tf.float32), axis=-2)
781 | 
782 |                         valid_words_inv = tf.tile(tf.expand_dims(1/self.valid_words_len, axis=2),
783 |                                                   [1, 1, dim_tmp])
784 |                         avg_vect = avg_vect * tf.cast(valid_words_inv, dtype=tf.float32)
785 |                         pad_vect = tf.zeros(shape=[self.batch_size, 1, dim_tmp])
786 | 
787 |                         info_event_sem_pre_sent = tf.concat([pad_vect, avg_vect[:, :-1, :]], axis=1)
788 |                         info_event_sem_pre_sent = tf.reshape(info_event_sem_pre_sent,
789 |                                                              shape=[self.batch_size * self.max_doc_len, -1])
790 |                         info_event_sem_next_sent = tf.concat([avg_vect[:, 1:, :], pad_vect], axis=1)
791 |                         info_event_sem_next_sent = tf.reshape(info_event_sem_next_sent,
792 |                                                              shape=[self.batch_size * self.max_doc_len, -1])
793 |                     elif agg_choice == "concat":
794 |                         """
795 |                         element-wise sum
796 |                         """
797 |                         # two_outputs = tf.concat([fw_tag_outputs, bw_tag_outputs], axis=-1)
798 |                         two_outputs = (fw_tag_outputs + bw_tag_outputs) / 2
799 |                         dim_tmp = two_outputs.get_shape().as_list()[-1]
800 | 
801 |                         valid_sent_ind = tf.one_hot(self.valid_words_len, depth=self.max_seq_len)
802 |                         valid_sent_ind = tf.tile(tf.expand_dims(valid_sent_ind, axis=3),[1,1,1,dim_tmp])
803 |                         print("shape of two_outputs:\t",two_outputs.get_shape())
804 |                         print("shape of valid_sent_ind:\t",valid_sent_ind.get_shape())
805 | 
806 |                         first_vect = two_outputs[:, :, 0, :]
807 |                         last_vect = tf.reduce_sum(two_outputs * valid_sent_ind,axis=-2)
808 |                         print("shape of last_vect:\t",last_vect.get_shape())
809 | 
810 |                         # sent_vect = tf.concat([first_vect, last_vect], axis=-1)
811 |                         sent_vect = (first_vect + last_vect) / 2
812 |                         pad_vect = tf.zeros(shape=[self.batch_size, 1, sent_vect.get_shape().as_list()[-1]])
813 |                         info_event_sem_pre_sent = tf.concat([pad_vect, sent_vect[:, :-1, :]], axis=1)
814 |                         info_event_sem_pre_sent = tf.reshape(info_event_sem_pre_sent,
815 |                                                              shape=[self.batch_size * self.max_doc_len, -1])
816 |                         info_event_sem_next_sent = tf.concat([sent_vect[:, 1:, :], pad_vect], axis=1)
817 |                         info_event_sem_next_sent = tf.reshape(info_event_sem_next_sent,
818 |                                                              shape=[self.batch_size * self.max_doc_len, -1])
819 |                     else:
820 |                         print("agg_choice is not suppoted!")
821 |         return tag_final_fw, tag_final_bw, tag_final_list_fw, tag_final_list_bw
822 | 
823 | 
824 |     def fully_connected_layer(self, tag_vects):
825 |         """
826 |         fully connected layer
827 |         """
828 |         tag_vects = tf.nn.dropout(tag_vects, keep_prob=1 - self.dropout_rate)
829 |         enc_dim = tag_vects.get_shape().as_list()[-1]
830 |         with tf.variable_scope("logits"):
831 |             W = tf.get_variable("W",
832 |                                 shape=[enc_dim, self.class_size],
833 |                                 dtype=tf.float32,
834 |                                 initializer=self.initializer
835 |                                 )
836 |             b = tf.get_variable("b",
837 |                                 shape=[self.class_size],
838 |                                 dtype=tf.float32,
839 |                                 initializer=tf.zeros_initializer()
840 |                                 )
841 |             output = tf.reshape(tag_vects, shape=[-1, enc_dim])
842 |             logits_ed = tf.nn.xw_plus_b(output, W, b)
843 |             logits_ed = tf.reshape(logits_ed, [self.batch_size, self.max_doc_len, self.max_seq_len, self.class_size])
844 |         return logits_ed
845 | 
846 | 
847 |     def loss_layer(self, tag_vects):
848 |         """
849 |         define the loss function
850 |         """
851 |         # projection layer
852 |         logits_ed = self.fully_connected_layer(tag_vects)
853 | 
854 |         # calculate loss
855 |         with tf.variable_scope("loss"):
856 |             losses = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits_ed, labels=self.input_label_docs)
857 | 
858 |             # mask invalid batches
859 |             mask_batches_0 = tf.sequence_mask(self.valid_batch, maxlen=self.batch_size)
860 |             valid_len_list = tf.boolean_mask(self.valid_words_len, mask_batches_0)
861 |             mask_batches = tf.tile(tf.expand_dims(tf.expand_dims(mask_batches_0, 1), 2),
862 |                                    multiples=[1, self.max_doc_len, self.max_seq_len])
863 |             # mask invalid sents
864 |             mask_sents = tf.sequence_mask(self.valid_sent_len, maxlen=self.max_doc_len)
865 |             valid_len_list = tf.boolean_mask(valid_len_list, tf.boolean_mask(mask_sents, mask_batches_0))
866 |             mask_sents = tf.tile(tf.expand_dims(mask_sents, axis=2), multiples=[1, 1, self.max_seq_len])
867 | 
868 |             # mask invalid words
869 |             mask_words = tf.sequence_mask(self.valid_words_len, maxlen=self.max_seq_len)
870 | 
871 |             valid_ind = tf.cast(mask_batches, tf.float32) * tf.cast(mask_sents, tf.float32) * tf.cast(mask_words,
872 |                                                                                                       tf.float32)
873 |             losses = losses * valid_ind
874 | 
875 |             # weight the loss of positive events
876 |             ind_id_O = tf.cast(tf.equal(self.input_label_docs, self.id_O), tf.float32)
877 |             losses = losses * ind_id_O + self.positive_weights * losses * (1 - ind_id_O)
878 | 
879 |             loss = tf.reduce_sum(losses) / tf.reduce_sum(valid_ind)
880 | 
881 |             mask_all_invalid = tf.cast(valid_ind, dtype=tf.bool)
882 | 
883 |             label_pred = tf.boolean_mask(tf.cast(tf.argmax(logits_ed, axis=-1), tf.float32), mask_all_invalid)
884 |             label_true = tf.boolean_mask(tf.cast(self.input_label_docs, dtype=tf.float32), mask_all_invalid)
885 | 
886 |             self.final_words_id = tf.boolean_mask(self.input_docs, mask_all_invalid)
887 | 
888 |         return loss, label_true, label_pred, valid_len_list
889 | 
890 | 
891 | if __name__ == "__main__":
892 |     pass
893 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # DocED
 2 | This repository is the official implementation of the ACL 2021 paper [MLBiNet: A Cross-Sentence Collective Event Detection Network](https://arxiv.org/pdf/2105.09458v1.pdf).
 3 | 
 4 | ## Requirements
 5 | ### To install basic requirements:
 6 | pip install requirements.txt
 7 | 
 8 | ## Datasets
 9 | ACE2005 can be found here: https://catalog.ldc.upenn.edu/LDC2006T06
10 | 
11 | ## Basic training
12 | ### To evaluate a setting with serveral random trials, execute
13 | python run_experiments_multi.py
14 | 
15 | #### Main hyperparameters in train_MLBiNet.py include:
16 | --tagging_mechanism,   mechanism to model event inter-dependency, you can choose one of "forward_decoder", "backward_decoder" or "bidirectional_decoder"
17 | 
18 | --num_tag_layers,   number of tagging layers, 1 indicates that we do sentence-level ED, 2 indicates that information of adjacent sentences were aggregated, ...
19 | 
20 | --max_doc_len,   maximum number of consecutive sentences are extracted as a mini-document, we can set it as 8 or 16
21 | 
22 | --tag_dim,   dimension of an uni-directional event tagging vector
23 | 
24 | --self_att_not,   whether to apply self-attention mechanism in sentence encoder 
25 | 
26 | ## Main results
27 | ### Overall performance on ACE2005
28 | ![image](https://user-images.githubusercontent.com/32415352/118842889-252e6900-b8fc-11eb-9de8-dba5f82377f4.png)
29 | 
30 | ### Performance on detecting multiple events collectively
31 | ![image](https://user-images.githubusercontent.com/32415352/118843522-b9003500-b8fc-11eb-8e3f-759f6d37f98a.png)
32 | 
33 | where 1/1 means one sentence that has one event; otherwise, 1/n is used.
34 | 
35 | ### Performance of our proposed method with different multi-layer settings or decoder methods
36 | ![image](https://user-images.githubusercontent.com/32415352/118843910-11cfcd80-b8fd-11eb-965c-fbcde1319983.png)
37 | 
38 | ## How to Cite
39 | 
40 | ```bibtex
41 | @inproceedings{ACL2021_MLBiNet,
42 |   author    = {Dongfang Lou and
43 |                Zhilin Liao and
44 |                Shumin Deng and
45 |                Ningyu Zhang and
46 |                Huajun Chen},
47 |   title     = {MLBiNet: A Cross-Sentence Collective Event Detection Network},
48 |   booktitle = {{ACL}},
49 |   publisher = {Association for Computational Linguistics},
50 |   year      = {2021}
51 | }
52 | ```
53 | 


--------------------------------------------------------------------------------
/ace_model_evaluation.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | #-*- coding: utf-8 -*-
 3 | 
 4 | 
 5 | def ace_pred_result_stat(filename):
 6 |     wlast_true = ""
 7 |     wlast_pred = ""
 8 |     true_dict = set()
 9 |     pred_dict = set()
10 |     id_true_init, id_true_end, id_pred_init, id_pred_end = 0, 0, 0, 0
11 |     with open(filename,encoding='utf-8',mode='r') as f:
12 |         for i,line in enumerate(f):
13 |             line = line.strip()
14 |             if len(line) > 0:
15 |                 line_split = line.split('\t')
16 |                 ## true label stats
17 |                 if line_split[1].startswith("B-"):
18 |                     if wlast_true != '':  
19 |                         true_dict.add('\t'.join([str(id_true_init), str(max(id_true_init, id_true_end)), wlast_true]))
20 |                     id_true_init = i  # init id
21 |                     id_true_end = i  # end id
22 |                     wlast_true = line_split[1][2:]
23 |                 elif "I-" + wlast_true != line_split[1]:  # the last id is end of a trigger
24 |                     if wlast_true != '':  # the last one is a trigger
25 |                         true_dict.add('\t'.join([str(id_true_init), str(max(id_true_init, id_true_end)), wlast_true]))
26 |                     wlast_true = ""
27 |                 elif "I-" + wlast_true == line_split[1]:  # the same with the last event type
28 |                     id_true_end = i
29 |                     wlast_true = line_split[1][2:]
30 |                 else:  # different from last label, and not start with B- 
31 |                     if wlast_true != '':
32 |                         true_dict.add('\t'.join([str(id_true_init), str(max(id_true_init, id_true_end)), wlast_true]))
33 |                     wlast_true = ""
34 | 
35 |                 ## pred label stats
36 |                 if line_split[2].startswith("B-"):
37 |                     if wlast_pred != '':
38 |                         pred_dict.add('\t'.join([str(id_pred_init), str(max(id_pred_init, id_pred_end)), wlast_pred]))
39 |                     id_pred_init = i
40 |                     id_pred_end = i
41 |                     wlast_pred = line_split[2][2:]
42 |                 elif "I-" + wlast_pred != line_split[2]:  # begging of new trigger
43 |                     if wlast_pred != '':
44 |                         pred_dict.add('\t'.join([str(id_pred_init), str(max(id_pred_init, id_pred_end)), wlast_pred]))
45 |                     wlast_pred = ""
46 |                 elif "I-" + wlast_pred == line_split[2]:
47 |                     id_pred_end = i
48 |                     wlast_pred = line_split[2][2:]
49 |                 else:
50 |                     if wlast_pred != '':
51 |                         pred_dict.add('\t'.join([str(id_pred_init), str(max(id_pred_init, id_pred_end)), wlast_pred]))
52 |                     wlast_pred = ""
53 |             else:
54 |                 if wlast_true != '':
55 |                     true_dict.add('\t'.join([str(id_true_init), str(max(id_true_init, id_true_end)), wlast_true]))
56 |                 if wlast_pred != '':
57 |                     pred_dict.add('\t'.join([str(id_pred_init), str(max(id_pred_init, id_pred_end)), wlast_pred]))
58 |                 wlast_true = ""
59 |                 wlast_pred = ""
60 | 
61 |     true_cnt = len(true_dict)
62 |     pred_cnt = len(pred_dict)
63 |     acc_cnt = len(pred_dict & true_dict)
64 |     prec_tmp = acc_cnt / (pred_cnt + 1e-8)
65 |     recall_tmp = acc_cnt / (true_cnt + 1e-8)
66 |     f1_tmp = 2 * prec_tmp * recall_tmp / (prec_tmp + recall_tmp + 1e-8)
67 |     return prec_tmp,recall_tmp,f1_tmp
68 | 
69 | 
70 | def write_2_file(filename, ED_2_id, label_true_list,valid_len_list,words_sents, label_pred_list, id_2_vocab):
71 |     id_to_ner_final = {v: u for u, v in ED_2_id.items()}
72 |     with open(filename, encoding='utf-8', mode='w') as f:
73 |         init_step = 0
74 |         k = 0
75 |         len_all = len(label_true_list)
76 |         while init_step < len_all:
77 |             end_step = init_step + valid_len_list[k]
78 |             words_tmp = words_sents[init_step:end_step]
79 |             pred_label_tmp_tmp = label_pred_list[init_step:end_step]
80 |             true_label_tmp_tmp = label_true_list[init_step:end_step]
81 |             for i in range(len(words_tmp)):
82 |                 f.write('\t'.join([id_2_vocab[words_tmp[i]],
83 |                                    id_to_ner_final[true_label_tmp_tmp[i]],
84 |                                    id_to_ner_final[pred_label_tmp_tmp[i]]]) + '\n')
85 |             f.write('\n')
86 |             init_step = end_step
87 |             k += 1
88 | 
89 | 
90 | if __name__ == "__main__":
91 |     pass
92 | 
93 | 
94 | 
95 | 
96 | 
97 | 
98 | 


--------------------------------------------------------------------------------
/data-ACE/example_new.dev:
--------------------------------------------------------------------------------
 1 | BEGIN CNN_IP_20030405.1600.01-1 O O O
 2 | VIDEOTAPE CNN_IP_20030405.1600.01-1 O O O
 3 | -RRB- CNN_IP_20030405.1600.01-1 O O O
 4 | MILES CNN_IP_20030405.1600.01-1 B-1_PER B-2_Individual O
 5 | O'BRIEN CNN_IP_20030405.1600.01-1 I-1_PER I-2_Individual O
 6 | ， CNN_IP_20030405.1600.01-1 O O O
 7 | CNN CNN_IP_20030405.1600.01-1 B-1_ORG B-2_Media O
 8 | CORRESPONDENT CNN_IP_20030405.1600.01-1 B-1_PER B-2_Individual O
 9 | -LRB- CNN_IP_20030405.1600.01-1 O O O
10 | voice-over CNN_IP_20030405.1600.01-1 O O O
11 | -RRB- CNN_IP_20030405.1600.01-1 O O O
12 | Seven-eleven CNN_IP_20030405.1600.01-1 B-1_Time B-2_Time O
13 | a.m. CNN_IP_20030405.1600.01-1 I-1_Time I-2_Time O
14 | Eastern CNN_IP_20030405.1600.01-1 I-1_Time I-2_Time O
15 | ， CNN_IP_20030405.1600.01-1 O O O
16 | 4:11 CNN_IP_20030405.1600.01-1 B-1_Time B-2_Time O
17 | p.m. CNN_IP_20030405.1600.01-1 I-1_Time I-2_Time O
18 | in CNN_IP_20030405.1600.01-1 I-1_Time I-2_Time O
19 | Iraq CNN_IP_20030405.1600.01-1 B-1_GPE B-2_Nation O
20 | 


--------------------------------------------------------------------------------
/data-ACE/example_new.test:
--------------------------------------------------------------------------------
 1 | WASHINGTON APW_ENG_20030304.0555 B-1_GPE B-2_Population_Center O
 2 | -LRB- APW_ENG_20030304.0555 O O O
 3 | AP APW_ENG_20030304.0555 B-1_ORG B-2_Media O
 4 | -RRB- APW_ENG_20030304.0555 O O O
 5 | With APW_ENG_20030304.0555 O O O
 6 | opposition APW_ENG_20030304.0555 O O O
 7 | hardening APW_ENG_20030304.0555 O O O
 8 | ， APW_ENG_20030304.0555 O O O
 9 | the APW_ENG_20030304.0555 O O O
10 | White APW_ENG_20030304.0555 B-1_GPE B-2_Nation O
11 | House APW_ENG_20030304.0555 I-1_GPE I-2_Nation O
12 | left APW_ENG_20030304.0555 O O O
13 | open APW_ENG_20030304.0555 O O O
14 | the APW_ENG_20030304.0555 O O O
15 | possibility APW_ENG_20030304.0555 O O O
16 | Tuesday APW_ENG_20030304.0555 B-1_Time B-2_Time O
17 | that APW_ENG_20030304.0555 O O O
18 | it APW_ENG_20030304.0555 B-1_GPE B-2_Nation O
19 | would APW_ENG_20030304.0555 O O O
20 | not APW_ENG_20030304.0555 O O O
21 | seek APW_ENG_20030304.0555 O O O
22 | a APW_ENG_20030304.0555 O O O
23 | United APW_ENG_20030304.0555 B-1_ORG B-2_Non_Governmental O
24 | Nations APW_ENG_20030304.0555 I-1_ORG I-2_Non_Governmental O
25 | vote APW_ENG_20030304.0555 O O O
26 | on APW_ENG_20030304.0555 O O O
27 | its APW_ENG_20030304.0555 B-1_GPE B-2_Nation O
28 | war-making APW_ENG_20030304.0555 O O O
29 | resolution APW_ENG_20030304.0555 O O O
30 | if APW_ENG_20030304.0555 O O O
31 | the APW_ENG_20030304.0555 O O O
32 | measure APW_ENG_20030304.0555 O O O
33 | was APW_ENG_20030304.0555 O O O
34 | headed APW_ENG_20030304.0555 O O O
35 | for APW_ENG_20030304.0555 O O O
36 | defeat APW_ENG_20030304.0555 O O O
37 | '' APW_ENG_20030304.0555 O O O
38 | 
39 | The APW_ENG_20030304.0555 O O O
40 | vote APW_ENG_20030304.0555 O O O
41 | is APW_ENG_20030304.0555 O O O
42 | desirable APW_ENG_20030304.0555 O O O
43 | 


--------------------------------------------------------------------------------
/data-ACE/example_new.train:
--------------------------------------------------------------------------------
 1 | the CNN_ENG_20030512_190454.7 O O O
 2 | story CNN_ENG_20030512_190454.7 O O O
 3 | of CNN_ENG_20030512_190454.7 O O O
 4 | a CNN_ENG_20030512_190454.7 O O O
 5 | woman CNN_ENG_20030512_190454.7 B-1_PER B-2_Individual O
 6 | in CNN_ENG_20030512_190454.7 O O O
 7 | an CNN_ENG_20030512_190454.7 O O O
 8 | east CNN_ENG_20030512_190454.7 B-1_LOC B-2_Region_General O
 9 | texas CNN_ENG_20030512_190454.7 I-1_LOC I-2_Region_General O
10 | jail CNN_ENG_20030512_190454.7 O O O
11 | cell CNN_ENG_20030512_190454.7 B-1_FAC B-2_Subarea_Facility O
12 | tonight CNN_ENG_20030512_190454.7 B-1_Time B-2_Time O
13 | 
14 | we CNN_ENG_20030512_190454.7 B-1_PER B-2_Group O
15 | 're CNN_ENG_20030512_190454.7 O O O
16 | told CNN_ENG_20030512_190454.7 O O O
17 | sometimes CNN_ENG_20030512_190454.7 O O O
18 | she CNN_ENG_20030512_190454.7 B-1_PER B-2_Individual O
19 | sits CNN_ENG_20030512_190454.7 O O O
20 | in CNN_ENG_20030512_190454.7 O O O
21 | the CNN_ENG_20030512_190454.7 O O O
22 | fetal CNN_ENG_20030512_190454.7 O O O
23 | position CNN_ENG_20030512_190454.7 O O O
24 | ， CNN_ENG_20030512_190454.7 O O O
25 | other CNN_ENG_20030512_190454.7 O O O
26 | times CNN_ENG_20030512_190454.7 O O O
27 | singing CNN_ENG_20030512_190454.7 O O O
28 | gospel CNN_ENG_20030512_190454.7 O O O
29 | music CNN_ENG_20030512_190454.7 O O O
30 | 
31 | occasionally CNN_ENG_20030512_190454.7 O O O
32 | she CNN_ENG_20030512_190454.7 B-1_PER B-2_Individual O
33 | 'll CNN_ENG_20030512_190454.7 O O O
34 | pray CNN_ENG_20030512_190454.7 O O O
35 | ， CNN_ENG_20030512_190454.7 O O O
36 | sometimes CNN_ENG_20030512_190454.7 O O O
37 | cries CNN_ENG_20030512_190454.7 O O O
38 | hysterically CNN_ENG_20030512_190454.7 O O O
39 | 
40 | her CNN_ENG_20030512_190454.7 B-1_PER B-2_Individual O
41 | name CNN_ENG_20030512_190454.7 O O O
42 | is CNN_ENG_20030512_190454.7 O O O
43 | deanna CNN_ENG_20030512_190454.7 B-1_PER B-2_Individual O
44 | lejeune CNN_ENG_20030512_190454.7 I-1_PER I-2_Individual O
45 | laney CNN_ENG_20030512_190454.7 I-1_PER I-2_Individual O
46 | 
47 | she CNN_ENG_20030512_190454.7 B-1_PER B-2_Individual O
48 | 's CNN_ENG_20030512_190454.7 O O O
49 | accused CNN_ENG_20030512_190454.7 O O O
50 | of CNN_ENG_20030512_190454.7 O O O
51 | beating CNN_ENG_20030512_190454.7 O O O
52 | two CNN_ENG_20030512_190454.7 B-1_PER B-2_Group O
53 | of CNN_ENG_20030512_190454.7 O O O
54 | her CNN_ENG_20030512_190454.7 B-1_PER B-2_Individual O
55 | three CNN_ENG_20030512_190454.7 O O O
56 | children CNN_ENG_20030512_190454.7 B-1_PER B-2_Group O
57 | to CNN_ENG_20030512_190454.7 O O O
58 | death CNN_ENG_20030512_190454.7 O O B-Life_Die
59 | because CNN_ENG_20030512_190454.7 O O O
60 | ， CNN_ENG_20030512_190454.7 O O O
61 | she CNN_ENG_20030512_190454.7 B-1_PER B-2_Individual O
62 | says CNN_ENG_20030512_190454.7 O O O
63 | ， CNN_ENG_20030512_190454.7 O O O
64 | god CNN_ENG_20030512_190454.7 B-1_PER B-2_Individual O
65 | told CNN_ENG_20030512_190454.7 O O O
66 | her CNN_ENG_20030512_190454.7 B-1_PER B-2_Individual O
67 | to CNN_ENG_20030512_190454.7 O O O
68 | 
69 | 


--------------------------------------------------------------------------------
/dict/dict_gen.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | #-*- coding: utf-8 -*-
 3 | 
 4 | """
 5 | @version:
 6 | @software:PyCharm
 7 | @file:dict_gen.py
 8 | @time:2020/10/27 16:29
 9 | """
10 | 
11 | def event_dict_gen():
12 |     files = ["../data-ACE/example_new.train", "../data-ACE/example_new.dev", "../data-ACE/example_new.test"]
13 |     vocab_set = set()
14 |     vocab_set_ner_1 = set()
15 |     vocab_set_ner_2 = set()
16 |     for filei in files:
17 |         with open(filei, encoding="utf-8", mode="r") as f:
18 |             for line in f:
19 |                 line = line.strip().split(" ")
20 |                 if len(line) == 5:
21 |                     vocab_set.add(line[-1])
22 |                     vocab_set_ner_1.add(line[-3])
23 |                     vocab_set_ner_2.add(line[-2])
24 |     vocab_list = list(vocab_set)
25 |     vocab_list = [x for x in vocab_list if x == "O" or x.startswith("B-")]
26 |     vocab_list += ["I-" + x[2:] for x in vocab_list if x.startswith("B-")]
27 |     vocab_list = sorted(vocab_list, key=lambda x: x, reverse=True)
28 |     with open("event_types.txt", encoding="utf-8", mode="w") as fw:
29 |         for line in vocab_list:
30 |             fw.write(line + "\n")
31 | 
32 |     vocab_list_ner_1 = list(vocab_set_ner_1)
33 |     vocab_list_ner_1 = sorted(vocab_list_ner_1, key=lambda x: x, reverse=True)
34 |     vocab_set_ner_2 = list(vocab_set_ner_2)
35 |     vocab_set_ner_2 = sorted(vocab_set_ner_2, key=lambda x: x, reverse=True)
36 | 
37 |     with open("ner_1.txt", encoding="utf-8", mode="w") as fw:
38 |         for line in vocab_list_ner_1:
39 |             fw.write(line + "\n")
40 | 
41 |     with open("ner_2.txt", encoding="utf-8", mode="w") as fw:
42 |         for line in vocab_set_ner_2:
43 |             fw.write(line + "\n")
44 | 
45 | 
46 | if __name__ == "__main__":
47 |     event_dict_gen()
48 | 
49 | 
50 | 
51 | 
52 | 


--------------------------------------------------------------------------------
/dict/event_types.txt:
--------------------------------------------------------------------------------
 1 | O
 2 | I-Transaction_Transfer_Ownership
 3 | I-Transaction_Transfer_Money
 4 | I-Personnel_Start_Position
 5 | I-Personnel_Nominate
 6 | I-Personnel_End_Position
 7 | I-Personnel_Elect
 8 | I-Movement_Transport
 9 | I-Life_Marry
10 | I-Life_Injure
11 | I-Life_Divorce
12 | I-Life_Die
13 | I-Life_Be_Born
14 | I-Justice_Trial_Hearing
15 | I-Justice_Sue
16 | I-Justice_Sentence
17 | I-Justice_Release_Parole
18 | I-Justice_Pardon
19 | I-Justice_Fine
20 | I-Justice_Extradite
21 | I-Justice_Execute
22 | I-Justice_Convict
23 | I-Justice_Charge_Indict
24 | I-Justice_Arrest_Jail
25 | I-Justice_Appeal
26 | I-Justice_Acquit
27 | I-Contact_Phone_Write
28 | I-Contact_Meet
29 | I-Conflict_Demonstrate
30 | I-Conflict_Attack
31 | I-Business_Start_Org
32 | I-Business_Merge_Org
33 | I-Business_End_Org
34 | I-Business_Declare_Bankruptcy
35 | B-Transaction_Transfer_Ownership
36 | B-Transaction_Transfer_Money
37 | B-Personnel_Start_Position
38 | B-Personnel_Nominate
39 | B-Personnel_End_Position
40 | B-Personnel_Elect
41 | B-Movement_Transport
42 | B-Life_Marry
43 | B-Life_Injure
44 | B-Life_Divorce
45 | B-Life_Die
46 | B-Life_Be_Born
47 | B-Justice_Trial_Hearing
48 | B-Justice_Sue
49 | B-Justice_Sentence
50 | B-Justice_Release_Parole
51 | B-Justice_Pardon
52 | B-Justice_Fine
53 | B-Justice_Extradite
54 | B-Justice_Execute
55 | B-Justice_Convict
56 | B-Justice_Charge_Indict
57 | B-Justice_Arrest_Jail
58 | B-Justice_Appeal
59 | B-Justice_Acquit
60 | B-Contact_Phone_Write
61 | B-Contact_Meet
62 | B-Conflict_Demonstrate
63 | B-Conflict_Attack
64 | B-Business_Start_Org
65 | B-Business_Merge_Org
66 | B-Business_End_Org
67 | B-Business_Declare_Bankruptcy
68 | 


--------------------------------------------------------------------------------
/dict/ner_1.txt:
--------------------------------------------------------------------------------
 1 | O
 2 | I-1_WEA
 3 | I-1_VEH
 4 | I-1_Time
 5 | I-1_Sentence
 6 | I-1_PER
 7 | I-1_ORG
 8 | I-1_Numeric
 9 | I-1_LOC
10 | I-1_Job_Title
11 | I-1_GPE
12 | I-1_FAC
13 | I-1_Crime
14 | I-1_Contact_Info
15 | B-1_WEA
16 | B-1_VEH
17 | B-1_Time
18 | B-1_Sentence
19 | B-1_PER
20 | B-1_ORG
21 | B-1_Numeric
22 | B-1_LOC
23 | B-1_Job_Title
24 | B-1_GPE
25 | B-1_FAC
26 | B-1_Crime
27 | B-1_Contact_Info
28 | 


--------------------------------------------------------------------------------
/dict/ner_2.txt:
--------------------------------------------------------------------------------
 1 | O
 2 | I-2_Water_Body
 3 | I-2_Water
 4 | I-2_Underspecified
 5 | I-2_Time
 6 | I-2_Subarea_Vehicle
 7 | I-2_Subarea_Facility
 8 | I-2_State_or_Province
 9 | I-2_Sports
10 | I-2_Special
11 | I-2_Shooting
12 | I-2_Sentence
13 | I-2_Religious
14 | I-2_Region_International
15 | I-2_Region_General
16 | I-2_Projectile
17 | I-2_Population_Center
18 | I-2_Path
19 | I-2_Numeric
20 | I-2_Non_Governmental
21 | I-2_Nation
22 | I-2_Medical_Science
23 | I-2_Media
24 | I-2_Land_Region_Natural
25 | I-2_Land
26 | I-2_Job_Title
27 | I-2_Individual
28 | I-2_Indeterminate
29 | I-2_Group
30 | I-2_Government
31 | I-2_GPE_Cluster
32 | I-2_Exploding
33 | I-2_Entertainment
34 | I-2_Educational
35 | I-2_Crime
36 | I-2_County_or_District
37 | I-2_Continent
38 | I-2_Contact_Info
39 | I-2_Commercial
40 | I-2_Celestial
41 | I-2_Building_Grounds
42 | I-2_Biological
43 | I-2_Airport
44 | I-2_Air
45 | I-2_Address
46 | B-2_Water_Body
47 | B-2_Water
48 | B-2_Underspecified
49 | B-2_Time
50 | B-2_Subarea_Vehicle
51 | B-2_Subarea_Facility
52 | B-2_State_or_Province
53 | B-2_Sports
54 | B-2_Special
55 | B-2_Shooting
56 | B-2_Sharp
57 | B-2_Sentence
58 | B-2_Religious
59 | B-2_Region_International
60 | B-2_Region_General
61 | B-2_Projectile
62 | B-2_Population_Center
63 | B-2_Plant
64 | B-2_Path
65 | B-2_Numeric
66 | B-2_Nuclear
67 | B-2_Non_Governmental
68 | B-2_Nation
69 | B-2_Medical_Science
70 | B-2_Media
71 | B-2_Land_Region_Natural
72 | B-2_Land
73 | B-2_Job_Title
74 | B-2_Individual
75 | B-2_Indeterminate
76 | B-2_Group
77 | B-2_Government
78 | B-2_GPE_Cluster
79 | B-2_Exploding
80 | B-2_Entertainment
81 | B-2_Educational
82 | B-2_Crime
83 | B-2_County_or_District
84 | B-2_Continent
85 | B-2_Contact_Info
86 | B-2_Commercial
87 | B-2_Chemical
88 | B-2_Celestial
89 | B-2_Building_Grounds
90 | B-2_Boundary
91 | B-2_Blunt
92 | B-2_Biological
93 | B-2_Airport
94 | B-2_Air
95 | B-2_Address
96 | 


--------------------------------------------------------------------------------
/dict/vocab.txt:
--------------------------------------------------------------------------------
   1 | EU
   2 | German
   3 | call
   4 | to
   5 | boycott
   6 | British
   7 | .
   8 | Peter
   9 | Blackburn
  10 | BRUSSELS
  11 | 1996-08-22
  12 | The
  13 | European
  14 | Commission
  15 | said
  16 | on
  17 | Thursday
  18 | it
  19 | with
  20 | advice
  21 | consumers
  22 | until
  23 | scientists
  24 | determine
  25 | whether
  26 | mad
  27 | cow
  28 | disease
  29 | can
  30 | be
  31 | sheep
  32 | Germany
  33 | 's
  34 | representative
  35 | the
  36 | Union
  37 | veterinary
  38 | committee
  39 | Wednesday
  40 | should
  41 | buy
  42 | from
  43 | countries
  44 | other
  45 | than
  46 | Britain
  47 | scientific
  48 | was
  49 | "
  50 | We
  51 | do
  52 | n't
  53 | support
  54 | any
  55 | such
  56 | because
  57 | we
  58 | see
  59 | grounds
  60 | for
  61 | ,
  62 | chief
  63 | spokesman
  64 | van
  65 | der
  66 | told
  67 | a
  68 | news
  69 | briefing
  70 | He
  71 | further
  72 | study
  73 | required
  74 | and
  75 | if
  76 | found
  77 | that
  78 | action
  79 | needed
  80 | taken
  81 | by
  82 | proposal
  83 | last
  84 | month
  85 | Commissioner
  86 | Fischler
  87 | ban
  88 | spinal
  89 | human
  90 | animal
  91 | food
  92 | chains
  93 | highly
  94 | specific
  95 | move
  96 | health
  97 | proposed
  98 | measures
  99 | after
 100 | reports
 101 | France
 102 | under
 103 | conditions
 104 | could
 105 | contract
 106 | (
 107 | BSE
 108 | )
 109 | --
 110 | But
 111 | agreed
 112 | review
 113 | his
 114 | standing
 115 | officials
 116 | questioned
 117 | as
 118 | there
 119 | only
 120 | slight
 121 | risk
 122 | Spanish
 123 | Minister
 124 | de
 125 | had
 126 | earlier
 127 | accused
 128 | at
 129 | an
 130 | farm
 131 | ministers
 132 | '
 133 | meeting
 134 | of
 135 | through
 136 | dangerous
 137 | Only
 138 | backed
 139 | are
 140 | due
 141 | issue
 142 | early
 143 | next
 144 | make
 145 | senior
 146 | have
 147 | long
 148 | been
 149 | known
 150 | similar
 151 | which
 152 | is
 153 | believed
 154 | cattle
 155 | feed
 156 | containing
 157 | waste
 158 | farmers
 159 | denied
 160 | danger
 161 | their
 162 | but
 163 | expressed
 164 | concern
 165 | government
 166 | avoid
 167 | might
 168 | across
 169 | Europe
 170 | What
 171 | extremely
 172 | how
 173 | going
 174 | take
 175 | lead
 176 | National
 177 | chairman
 178 | John
 179 | Lloyd
 180 | Jones
 181 | radio
 182 | Bonn
 183 | has
 184 | led
 185 | efforts
 186 | public
 187 | consumer
 188 | confidence
 189 | in
 190 | March
 191 | report
 192 | illness
 193 | eating
 194 | beef
 195 | imported
 196 | year
 197 | nearly
 198 | half
 199 | total
 200 | imports
 201 | It
 202 | brought
 203 | tonnes
 204 | some
 205 | 10
 206 | percent
 207 | overall
 208 | -DOCSTART-
 209 | Hendrix
 210 | draft
 211 | almost
 212 | $
 213 | LONDON
 214 | A
 215 | rare
 216 | U.S.
 217 | sold
 218 | auction
 219 | late
 220 | Florida
 221 | restaurant
 222 | paid
 223 | pounds
 224 | no
 225 | telling
 226 | piece
 227 | London
 228 | hotel
 229 | At
 230 | end
 231 | January
 232 | English
 233 | city
 234 | Nottingham
 235 | he
 236 | threw
 237 | sheet
 238 | paper
 239 | into
 240 | audience
 241 | where
 242 | also
 243 | snapped
 244 | up
 245 | 16
 246 | items
 247 | were
 248 | put
 249 | former
 250 | who
 251 | lived
 252 | him
 253 | They
 254 | included
 255 | black
 256 | mother
 257 | box
 258 | used
 259 | store
 260 | drugs
 261 | Australian
 262 | bought
 263 | died
 264 | aged
 265 | 27
 266 | China
 267 | says
 268 | Taiwan
 269 | atmosphere
 270 | talks
 271 | BEIJING
 272 | Taipei
 273 | visit
 274 | Ukraine
 275 | President
 276 | this
 277 | week
 278 | Beijing
 279 | hours
 280 | Chinese
 281 | state
 282 | media
 283 | time
 284 | right
 285 | political
 286 | Foreign
 287 | Ministry
 288 | Reuters
 289 | :
 290 | necessary
 291 | opening
 292 | authorities
 293 | State
 294 | quoted
 295 | top
 296 | negotiator
 297 | Tang
 298 | visiting
 299 | group
 300 | rivals
 301 | hold
 302 | Now
 303 | two
 304 | sides
 305 | ...
 306 | overseas
 307 | edition
 308 | People
 309 | Daily
 310 | saying
 311 | foreign
 312 | ministry
 313 | interview
 314 | read
 315 | comments
 316 | gave
 317 | details
 318 | why
 319 | considered
 320 | renegade
 321 | province
 322 | opposed
 323 | all
 324 | gain
 325 | international
 326 | rival
 327 | island
 328 | towards
 329 | goal
 330 | held
 331 | set
 332 | official
 333 | agency
 334 | executive
 335 | vice
 336 | Association
 337 | July
 338 | car
 339 | registrations
 340 | pct
 341 | yr
 342 | /
 343 | FRANKFURT
 344 | motor
 345 | vehicles
 346 | period
 347 | Federal
 348 | office
 349 | new
 350 | cars
 351 | registered
 352 | 1996
 353 | passenger
 354 | figures
 355 | increase
 356 | decline
 357 | 1995
 358 | registration
 359 | rose
 360 | growth
 361 | partly
 362 | increased
 363 | number
 364 | buying
 365 | abroad
 366 | while
 367 | manufacturers
 368 | domestic
 369 | demand
 370 | weak
 371 | federal
 372 | posted
 373 | gains
 374 | numbers
 375 | AG
 376 | won
 377 | slightly
 378 | more
 379 | quarter
 380 | together
 381 | General
 382 | came
 383 | second
 384 | place
 385 | figure
 386 | Third
 387 | Ford
 388 | or
 389 | fewer
 390 | compared
 391 | fell
 392 | TO
 393 | PM
 394 | FOR
 395 | ATHENS
 396 | Greek
 397 | party
 398 | bureau
 399 | green
 400 | light
 401 | Prime
 402 | Costas
 403 | Simitis
 404 | snap
 405 | elections
 406 | its
 407 | general
 408 | secretary
 409 | reporters
 410 | announcement
 411 | cabinet
 412 | later
 413 | Dimitris
 414 | Kontogiannis
 415 | Athens
 416 | Newsroom
 417 | +301
 418 | 3311812-4
 419 | sets
 420 | C$
 421 | 100
 422 | million
 423 | bond
 424 | following
 425 | announced
 426 | manager
 427 | Toronto
 428 | ISS
 429 | PRICE
 430 | PAY
 431 | DATE
 432 | BP
 433 | MOODY
 434 | =
 435 | S&P
 436 | US
 437 | UK
 438 | GERMAN
 439 | 7.0
 440 | PCT
 441 | 2001
 442 | NOTES
 443 | +44
 444 | 171
 445 | 542
 446 | 300
 447 | 1999
 448 | Lehman
 449 | International
 450 | NATIONAL
 451 | -
 452 | LAST
 453 | FIRST
 454 | 2
 455 | ENGLISH
 456 | 5
 457 | Port
 458 | Syria
 459 | Service
 460 | Aug
 461 | waiting
 462 | 24
 463 | Israel
 464 | plays
 465 | down
 466 | fears
 467 | war
 468 | JERUSALEM
 469 | peace
 470 | current
 471 | between
 472 | appeared
 473 | storm
 474 | ambassador
 475 | Washington
 476 | conducted
 477 | negotiations
 478 | Radio
 479 | looked
 480 | like
 481 | Damascus
 482 | wanted
 483 | talk
 484 | rather
 485 | fight
 486 | appears
 487 | me
 488 | still
 489 | they
 490 | definitely
 491 | tense
 492 | assessment
 493 | here
 494 | term
 495 | will
 496 | replaced
 497 | Israeli
 498 | envoy
 499 | Egypt
 500 | right-wing
 501 | politician
 502 | sent
 503 | message
 504 | via
 505 | committed
 506 | open
 507 | without
 508 | what
 509 | called
 510 | campaign
 511 | against
 512 | television
 513 | reported
 514 | recently
 515 | test
 516 | fired
 517 | arms
 518 | ready
 519 | enter
 520 | David
 521 | Levy
 522 | since
 523 | Benjamin
 524 | Netanyahu
 525 | took
 526 | June
 527 | retain
 528 | captured
 529 | Middle
 530 | East
 531 | over
 532 | 1991
 533 | despite
 534 | previous
 535 | Peace
 536 | February
 537 | coming
 538 | out
 539 | bad
 540 | not
 541 | good
 542 | full
 543 | must
 544 | very
 545 | those
 546 | become
 547 | prisoners
 548 | expect
 549 | face
 550 | answer
 551 | our
 552 | want
 553 | God
 554 | No
 555 | one
 556 | Two
 557 | signal
 558 | source
 559 | confirm
 560 | Cairo
 561 | United
 562 | States
 563 | Moscow
 564 | Polish
 565 | diplomat
 566 | nurses
 567 | Libya
 568 | trying
 569 | return
 570 | home
 571 | working
 572 | North
 573 | African
 574 | country
 575 | This
 576 | true
 577 | Up
 578 | today
 579 | kept
 580 | her
 581 | received
 582 | embassy
 583 | charge
 584 | telephone
 585 | Poland
 586 | labour
 587 | would
 588 | send
 589 | team
 590 | probe
 591 | prompted
 592 | about
 593 | work
 594 | estimated
 595 | 800
 596 | Iranian
 597 | opposition
 598 | leaders
 599 | meet
 600 | Baghdad
 601 | Hassan
 602 | BAGHDAD
 603 | An
 604 | exile
 605 | based
 606 | Iraq
 607 | vowed
 608 | Iran
 609 | Kurdish
 610 | rebels
 611 | attacked
 612 | troops
 613 | inside
 614 | statement
 615 | leader
 616 | met
 617 | Kurdistan
 618 | Democratic
 619 | Party
 620 | rebel
 621 | Kurds
 622 | continue
 623 | stand
 624 | side
 625 | movement
 626 | level
 627 | cooperation
 628 | heavily
 629 | targets
 630 | northern
 631 | pursuit
 632 | guerrillas
 633 | Iraqi
 634 | areas
 635 | outside
 636 | control
 637 | Patriotic
 638 | PUK
 639 | KDP
 640 | main
 641 | factions
 642 | forces
 643 | ousted
 644 | Kuwait
 645 | Gulf
 646 | War
 647 | parties
 648 | broke
 649 | weekend
 650 | most
 651 | serious
 652 | fighting
 653 | ceasefire
 654 | shelling
 655 | positions
 656 | region
 657 | near
 658 | border
 659 | days
 660 | killed
 661 | wounded
 662 | attack
 663 | Both
 664 | Turkey
 665 | air
 666 | land
 667 | strikes
 668 | own
 669 | U.S.-led
 670 | force
 671 | southern
 672 | possible
 673 | attacks
 674 | Saudi
 675 | riyal
 676 | rates
 677 | steady
 678 | quiet
 679 | summer
 680 | trade
 681 | spot
 682 | dollar
 683 | deposit
 684 | mainly
 685 | dealers
 686 | kingdom
 687 | There
 688 | changes
 689 | market
 690 | dealer
 691 | three
 692 | months
 693 | 1/2
 694 | six
 695 | 5/8
 696 | funds
 697 | Arafat
 698 | flight
 699 | West
 700 | Bank
 701 | Palestinian
 702 | Yasser
 703 | permission
 704 | fly
 705 | territory
 706 | ending
 707 | brief
 708 | crisis
 709 | problem
 710 | president
 711 | aircraft
 712 | pass
 713 | expected
 714 | travel
 715 | before
 716 | Monday
 717 | Abu
 718 | scheduled
 719 | prime
 720 | minister
 721 | Shimon
 722 | Peres
 723 | town
 724 | Ramallah
 725 | venue
 726 | changed
 727 | Gaza
 728 | stop
 729 | keeping
 730 | cancelled
 731 | PLO
 732 | civilian
 733 | affairs
 734 | Allenby
 735 | Bridge
 736 | crossing
 737 | Jordan
 738 | decided
 739 | flying
 740 | lifted
 741 | schedule
 742 | free
 743 | Palestinians
 744 | barred
 745 | planned
 746 | helicopter
 747 | attempt
 748 | defeated
 749 | May
 750 | Afghan
 751 | UAE
 752 | Taleban
 753 | guards
 754 | DUBAI
 755 | Three
 756 | Arab
 757 | Russian
 758 | hostages
 759 | escaped
 760 | militia
 761 | Afghanistan
 762 | few
 763 | Our
 764 | Their
 765 | them
 766 | documents
 767 | added
 768 | Islamic
 769 | seven
 770 | Friday
 771 | board
 772 | Kandahar
 773 | hand
 774 | Red
 775 | possibly
 776 | Tuesday
 777 | When
 778 | asked
 779 | back
 780 | capital
 781 | Kabul
 782 | That
 783 | headquarters
 784 | controlled
 785 | Rabbani
 786 | men
 787 | currently
 788 | did
 789 | Russians
 790 | firm
 791 | republic
 792 | hostage
 793 | forced
 794 | cargo
 795 | plane
 796 | August
 797 | shipment
 798 | Albania
 799 | evidence
 800 | military
 801 | crew
 802 | diplomatic
 803 | attempts
 804 | failed
 805 | armed
 806 | doing
 807 | regular
 808 | 76
 809 | left
 810 | Sunday
 811 | Saddam
 812 | meets
 813 | Russia
 814 | Zhirinovsky
 815 | Hussein
 816 | Vladimir
 817 | maintain
 818 | newspapers
 819 | during
 820 | parliament
 821 | calling
 822 | immediate
 823 | lifting
 824 | embargo
 825 | imposed
 826 | 1990
 827 | press
 828 | help
 829 | U.N.
 830 | sanctions
 831 | blamed
 832 | establishment
 833 | ties
 834 | economic
 835 | resume
 836 | visited
 837 | twice
 838 | Last
 839 | October
 840 | invited
 841 | attend
 842 | referendum
 843 | presidency
 844 | extended
 845 | years
 846 | PRESS
 847 | DIGEST
 848 | 22
 849 | These
 850 | leading
 851 | stories
 852 | verified
 853 | these
 854 | does
 855 | vouch
 856 | accuracy
 857 | democratic
 858 | Turkish
 859 | part
 860 | fair
 861 | November
 862 | 12
 863 | rice
 864 | arrives
 865 | port
 866 | Lebanon
 867 | Beirut
 868 | threats
 869 | serve
 870 | Parliament
 871 | Speaker
 872 | preparing
 873 | battle
 874 | ..
 875 | prepared
 876 | law
 877 | violation
 878 | incidents
 879 | occurred
 880 | Financial
 881 | Pakistan
 882 | step
 883 | election
 884 | list
 885 | violations
 886 | live
 887 | calls
 888 | range
 889 | mixed
 890 | CHICAGO
 891 | futures
 892 | cent
 893 | higher
 894 | lower
 895 | livestock
 896 | analysts
 897 | continued
 898 | strong
 899 | cash
 900 | markets
 901 | prompt
 902 | bullish
 903 | However
 904 | likely
 905 | prices
 906 | evening
 907 | ahead
 908 | Cash
 909 | record
 910 | amount
 911 | traded
 912 | debt
 913 | hit
 914 | results
 915 | Inc
 916 | mean
 917 | loss
 918 | 1.2
 919 | fiscal
 920 | 1997
 921 | first
 922 | company
 923 | began
 924 | 1
 925 | 30
 926 | par
 927 | value
 928 | outstanding
 929 | Philip
 930 | financial
 931 | officer
 932 | child
 933 | care
 934 | offered
 935 | opportunity
 936 | reduce
 937 | average
 938 | interest
 939 | costs
 940 | improve
 941 | future
 942 | earnings
 943 | RESEARCH
 944 | ALERT
 945 | starts
 946 | analyst
 947 | started
 948 | Southern
 949 | New
 950 | England
 951 | Corp
 952 | outperform
 953 | rating
 954 | price
 955 | target
 956 | 45
 957 | estimate
 958 | per
 959 | share
 960 | immediately
 961 | available
 962 | closed
 963 | Wall
 964 | Street
 965 | Data
 966 | Q2
 967 | net
 968 | rises
 969 | Summary
 970 | In
 971 | Thousands
 972 | except
 973 | data
 974 | Six
 975 | Jul
 976 | 31
 977 | Income
 978 | Total
 979 | Revenue
 980 | Operating
 981 | 599
 982 | Net
 983 | Jan
 984 | Capital
 985 | 93
 986 | give
 987 | backing
 988 | Le
 989 | Monde
 990 | PARIS
 991 | afternoon
 992 | daily
 993 | dated
 994 | 23
 995 | seeking
 996 | residence
 997 | rights
 998 | say
 999 | Alain
1000 | Juppe
1001 | proposals
1002 | strike
1003 | day
1004 | Paris
1005 | church
1006 | rally
1007 | 8,000
1008 | nationalist
1009 | truce
1010 | night
1011 | French
1012 | points
1013 | industry
1014 | competition
1015 | failure
1016 | keep
1017 | trends
1018 | Secretary
1019 | union
1020 | social
1021 | unrest
1022 | weeks
1023 | 42
1024 | 21
1025 | 53
1026 | 81
1027 | lift
1028 | oil
1029 | output
1030 | water
1031 | wells
1032 | off
1033 | reopened
1034 | operator
1035 | AS
1036 | 30,000
1037 | barrels
1038 | bpd
1039 | according
1040 | problems
1041 | newsroom
1042 | 50
1043 | 41
1044 | April
1045 | surplus
1046 | 3.8
1047 | billion
1048 | markka
1049 | HELSINKI
1050 | Finland
1051 | Board
1052 | exports
1053 | Trade
1054 | balance
1055 | 96
1056 | 95
1057 | import
1058 | revised
1059 | export
1060 | 3.2
1061 | monthly
1062 | behind
1063 | customs
1064 | when
1065 | joined
1066 | start
1067 | 0
1068 | Dutch
1069 | sale
1070 | AMSTERDAM
1071 | Finance
1072 | raised
1073 | sales
1074 | September
1075 | being
1076 | GMT
1077 | guilders
1078 | close
1079 | Amsterdam
1080 | +31
1081 | 20
1082 | 504
1083 | 5000
1084 | BONN
1085 | Agriculture
1086 | animals
1087 | cleared
1088 | done
1089 | quickly
1090 | I
1091 | concrete
1092 | too
1093 | many
1094 | holes
1095 | know
1096 | filled
1097 | ensure
1098 | protection
1099 | dealing
1100 | erupted
1101 | use
1102 | suspect
1103 | experts
1104 | members
1105 | given
1106 | question
1107 | After
1108 | admitted
1109 | link
1110 | fatal
1111 | equivalent
1112 | worldwide
1113 | takes
1114 | GOLF
1115 | SCORES
1116 | AT
1117 | WORLD
1118 | SERIES
1119 | OF
1120 | AKRON
1121 | Ohio
1122 | Scores
1123 | NEC
1124 | World
1125 | Series
1126 | Golf
1127 | round
1128 | 70
1129 | course
1130 | players
1131 | unless
1132 | stated
1133 | 66
1134 | Paul
1135 | Billy
1136 | Mayfair
1137 | Japan
1138 | 68
1139 | Steve
1140 | 69
1141 | Justin
1142 | Mark
1143 | Brooks
1144 | Tim
1145 | Davis
1146 | Anders
1147 | Sweden
1148 | Nick
1149 | Phil
1150 | Mickelson
1151 | Greg
1152 | Norman
1153 | Australia
1154 | 71
1155 | Els
1156 | South
1157 | Africa
1158 | Scott
1159 | 72
1160 | Rose
1161 | Fred
1162 | Sven
1163 | Alexander
1164 | Tom
1165 | 73
1166 | Brad
1167 | Craig
1168 | Stewart
1169 | Stadler
1170 | 74
1171 | Costantino
1172 | Rocca
1173 | Italy
1174 | 75
1175 | Jim
1176 | 77
1177 | Wayne
1178 | 79
1179 | SOCCER
1180 | BEAT
1181 | 2-1
1182 | F.C.
1183 | Gloria
1184 | Bistrita
1185 | Romania
1186 | beat
1187 | halftime
1188 | 1-1
1189 | Valletta
1190 | Malta
1191 | Cup
1192 | winners
1193 | match
1194 | leg
1195 | preliminary
1196 | Scorers
1197 | La
1198 | Gilbert
1199 | 24th
1200 | Attendance
1201 | 4-2
1202 | aggregate
1203 | qualified
1204 | RACING
1205 | YORK
1206 | Sir
1207 | landed
1208 | victory
1209 | 25
1210 | chance
1211 | veteran
1212 | George
1213 | short
1214 | head
1215 | deny
1216 | trained
1217 | Henry
1218 | Chris
1219 | Prix
1220 | winner
1221 | finished
1222 | third
1223 | away
1224 | 7-4
1225 | Games
1226 | fourth
1227 | Royal
1228 | may
1229 | now
1230 | aimed
1231 | season
1232 | sprint
1233 | race
1234 | reluctant
1235 | go
1236 | result
1237 | never
1238 | so
1239 | thought
1240 | better
1241 | wait
1242 | bit
1243 | longer
1244 | am
1245 | daughter
1246 | Jack
1247 | gone
1248 | search
1249 | success
1250 | around
1251 | disappointed
1252 | feel
1253 | well
1254 | metres
1255 | just
1256 | RESULTS
1257 | Result
1258 | run
1259 | five
1260 | km
1261 | 1.
1262 | 2.
1263 | 3.
1264 | Jason
1265 | Weaver
1266 | Eight
1267 | ran
1268 | owned
1269 | Park
1270 | sterling
1271 | TENNIS
1272 | TOSHIBA
1273 | CLASSIC
1274 | CARLSBAD
1275 | California
1276 | 1996-08-21
1277 | Results
1278 | 450,000
1279 | Toshiba
1280 | Classic
1281 | tennis
1282 | tournament
1283 | prefix
1284 | denotes
1285 | seeding
1286 | Second
1287 | Arantxa
1288 | Sanchez
1289 | Vicario
1290 | Spain
1291 | Naoko
1292 | 1-6
1293 | 6-4
1294 | 6-3
1295 | 4
1296 | Kimiko
1297 | Date
1298 | 6-2
1299 | 7-5
1300 | 7
1301 | 4-6
1302 | 8
1303 | Nathalie
1304 | Tauziat
1305 | Wang
1306 | CUP
1307 | York
1308 | Hamlet
1309 | Michael
1310 | Chang
1311 | Sergi
1312 | Bruguera
1313 | Joyce
1314 | 3
1315 | 3-6
1316 | Martin
1317 | Damm
1318 | Czech
1319 | Republic
1320 | 6
1321 | El
1322 | Morocco
1323 | 5-7
1324 | 3-0
1325 | retired
1326 | Karol
1327 | Kucera
1328 | Slovakia
1329 | 7-6
1330 | Kenny
1331 | spoke
1332 | leaving
1333 | club
1334 | premier
1335 | league
1336 | title
1337 | mutual
1338 | confessed
1339 | taking
1340 | division
1341 | local
1342 | newspaper
1343 | holiday
1344 | same
1345 | opinion
1346 | little
1347 | If
1348 | opened
1349 | my
1350 | stayed
1351 | way
1352 | let
1353 | get
1354 | job
1355 | then
1356 | past
1357 | 15
1358 | director
1359 | football
1360 | CRICKET
1361 | COUNTY
1362 | CHAMPIONSHIP
1363 | Close
1364 | play
1365 | scores
1366 | four-day
1367 | County
1368 | Championship
1369 | cricket
1370 | matches
1371 | Durham
1372 | 326
1373 | D.
1374 | S.
1375 | Campbell
1376 | ;
1377 | G.
1378 | Somerset
1379 | M.
1380 | 85
1381 | Colchester
1382 | Gloucestershire
1383 | 280
1384 | J.
1385 | Russell
1386 | 63
1387 | A.
1388 | 52
1389 | Essex
1390 | Cardiff
1391 | Kent
1392 | Walker
1393 | 59
1394 | v
1395 | Glamorgan
1396 | Leicester
1397 | Leicestershire
1398 | P.
1399 | 108
1400 | 67
1401 | Hampshire
1402 | Northampton
1403 | Sussex
1404 | N.
1405 | 145
1406 | V.
1407 | Wells
1408 | 51
1409 | Northamptonshire
1410 | Trent
1411 | Nottinghamshire
1412 | 107
1413 | Surrey
1414 | Warwickshire
1415 | Giles
1416 | 57
1417 | W.
1418 | Khan
1419 | Worcestershire
1420 | Headingley
1421 | Yorkshire
1422 | C.
1423 | White
1424 | Moxon
1425 | Lancashire
1426 | ENGLAND
1427 | V
1428 | PAKISTAN
1429 | FINAL
1430 | TEST
1431 | SCOREBOARD
1432 | Scoreboard
1433 | final
1434 | Oval
1435 | innings
1436 | Atherton
1437 | b
1438 | Waqar
1439 | Younis
1440 | Mushtaq
1441 | Ahmed
1442 | 44
1443 | Hussain
1444 | c
1445 | Saeed
1446 | Anwar
1447 | Thorpe
1448 | lbw
1449 | Mohammad
1450 | Akram
1451 | 54
1452 | Crawley
1453 | Knight
1454 | 17
1455 | Lewis
1456 | Wasim
1457 | Salisbury
1458 | Extras
1459 | wickets
1460 | 278
1461 | Fall
1462 | To
1463 | bat
1464 | R.
1465 | Croft
1466 | Cork
1467 | Mullally
1468 | Bowling
1469 | date
1470 | Aamir
1471 | Sohail
1472 | Ijaz
1473 | Inzamam-ul-Haq
1474 | Salim
1475 | Malik
1476 | Asif
1477 | Mujtaba
1478 | Moin
1479 | Akam
1480 | IN
1481 | SCOTTISH
1482 | SQUAD
1483 | AFTER
1484 | Everton
1485 | Ferguson
1486 | scored
1487 | Manchester
1488 | picked
1489 | Scottish
1490 | squad
1491 | Rangers
1492 | striker
1493 | another
1494 | man
1495 | form
1496 | four
1497 | named
1498 | qualifier
1499 | Austria
1500 | Vienna
1501 | served
1502 | jail
1503 | Scotland
1504 | caps
1505 | December
1506 | 1994
1507 | Brown
1508 | 've
1509 | great
1510 | scoring
1511 | moment
1512 | successful
1513 | ON
1514 | THIRD
1515 | lunch
1516 | WITH
1517 | MANCHESTER
1518 | Ireland
1519 | midfielder
1520 | signed
1521 | champions
1522 | deal
1523 | game
1524 | Alex
1525 | CANADIAN
1526 | OPEN
1527 | TORONTO
1528 | Canadian
1529 | Open
1530 | Daniel
1531 | Nestor
1532 | Canada
1533 | Thomas
1534 | Muster
1535 | Mikael
1536 | Tillstrom
1537 | Goran
1538 | Ivanisevic
1539 | Croatia
1540 | 6-7
1541 | Ferreira
1542 | Marcelo
1543 | Rios
1544 | Chile
1545 | Kenneth
1546 | Denmark
1547 | MaliVai
1548 | Todd
1549 | 7-3
1550 | Philippoussis
1551 | Marc
1552 | Rosset
1553 | Switzerland
1554 | 8-6
1555 | 9
1556 | Cedric
1557 | Pioline
1558 | 7-1
1559 | Patrick
1560 | Rafter
1561 | 11
1562 | Alberto
1563 | 6-1
1564 | Petr
1565 | Korda
1566 | Francisco
1567 | Vacek
1568 | 13
1569 | Stoltenberg
1570 | Woodbridge
1571 | O'Brien
1572 | Black
1573 | Zimbabwe
1574 | 7-2
1575 | Bohdan
1576 | Ulihrach
1577 | Andrea
1578 | Henman
1579 | walkover
1580 | provincial
1581 | fast
1582 | professional
1583 | EUROPEAN
1584 | TIRANA
1585 | Winners
1586 | qualifying
1587 | soccer
1588 | Tirana
1589 | Chemlon
1590 | Humenne
1591 | 0-0
1592 | minute
1593 | 54th
1594 | 5,000
1595 | win
1596 | Chorzow
1597 | Ruch
1598 | Wales
1599 | 1-0
1600 | 47th
1601 | Larnaca
1602 | Cyprus
1603 | 2-0
1604 | 60th
1605 | penalty
1606 | 5-1
1607 | Lithuania
1608 | Sion
1609 | Nyva
1610 | Estonia
1611 | 3,000
1612 | Aggregate
1613 | score
1614 | 2-2
1615 | goals
1616 | rule
1617 | Brann
1618 | Norway
1619 | 10th
1620 | 5-2
1621 | Sofia
1622 | Bulgaria
1623 | Slovenia
1624 | 58th
1625 | 25,000
1626 | 4-3
1627 | penalties
1628 | Vaduz
1629 | Latvia
1630 | 90th
1631 | Luxembourg
1632 | Dynamo
1633 | Georgia
1634 | Prague
1635 | Sparta
1636 | Northern
1637 | 4-0
1638 | 26th
1639 | 19th
1640 | 80th
1641 | 86th
1642 | Hearts
1643 | Star
1644 | Belgrade
1645 | Yugoslavia
1646 | 59th
1647 | Hapoel
1648 | Moldova
1649 | Hungary
1650 | Add
1651 | 1,500
1652 | OUT
1653 | BUDAPEST
1654 | drew
1655 | tie
1656 | played
1657 | 4-1
1658 | 15th
1659 | Andreas
1660 | DE
1661 | Brazilian
1662 | championship
1663 | Atletico
1664 | SYDNEY
1665 | captain
1666 | Newcombe
1667 | resignation
1668 | Wimbledon
1669 | champion
1670 | coach
1671 | Tony
1672 | determined
1673 | events
1674 | lose
1675 | look
1676 | giving
1677 | someone
1678 | else
1679 | Sydney
1680 | world
1681 | Under
1682 | leadership
1683 | slipped
1684 | Since
1685 | doubles
1686 | partner
1687 | wins
1688 | losses
1689 | selected
1690 | semifinalist
1691 | Olympic
1692 | Croatian
1693 | best
1694 | described
1695 | faced
1696 | 1986
1697 | beaten
1698 | ago
1699 | Men
1700 | singles
1701 | 9/16
1702 | Korea
1703 | 15-7
1704 | Malaysia
1705 | Abdul
1706 | 3/4
1707 | Van
1708 | Netherlands
1709 | 15-11
1710 | Indonesia
1711 | 15-6
1712 | 15-8
1713 | 15-12
1714 | Women
1715 | 11-6
1716 | Sun
1717 | Liu
1718 | Zealand
1719 | 'S
1720 | DRAW
1721 | NEW
1722 | draw
1723 | championships
1724 | beginning
1725 | U.S
1726 | Tennis
1727 | Centre
1728 | Pete
1729 | Sampras
1730 | vs.
1731 | Adrian
1732 | Magnus
1733 | vs
1734 | Qualifier
1735 | Andrei
1736 | Roberto
1737 | ------------------------
1738 | Christian
1739 | Grant
1740 | Fernando
1741 | Brazil
1742 | Kafelnikov
1743 | Johansson
1744 | Medvedev
1745 | Fleurian
1746 | 14
1747 | Costa
1748 | Jonathan
1749 | Bernd
1750 | Stefan
1751 | Edberg
1752 | Richard
1753 | Krajicek
1754 | Andre
1755 | Agassi
1756 | Colombia
1757 | Carlos
1758 | Kevin
1759 | Kim
1760 | Nicolas
1761 | Ecuador
1762 | Alami
1763 | Enqvist
1764 | Stephane
1765 | Belgium
1766 | Bahamas
1767 | Stich
1768 | Adams
1769 | Javier
1770 | Argentina
1771 | Stefano
1772 | Venezuela
1773 | Jeff
1774 | BASEBALL
1775 | BALTIMORE
1776 | Baltimore
1777 | Orioles
1778 | Johnson
1779 | miss
1780 | Seattle
1781 | Mariners
1782 | hospital
1783 | treated
1784 | William
1785 | adding
1786 | released
1787 | Andy
1788 | manage
1789 | absence
1790 | Angels
1791 | Columbia
1792 | Hospital
1793 | blood
1794 | eight
1795 | seasons
1796 | Cincinnati
1797 | Reds
1798 | League
1799 | games
1800 | pull
1801 | within
1802 | Yankees
1803 | American
1804 | Division
1805 | MAJOR
1806 | LEAGUE
1807 | STANDINGS
1808 | WEDNESDAY
1809 | GAMES
1810 | Major
1811 | Baseball
1812 | standings
1813 | tabulate
1814 | lost
1815 | winning
1816 | percentage
1817 | AMERICAN
1818 | EASTERN
1819 | DIVISION
1820 | W
1821 | L
1822 | GB
1823 | 58
1824 | BOSTON
1825 | 64
1826 | .496
1827 | .457
1828 | DETROIT
1829 | 82
1830 | 28
1831 | CENTRAL
1832 | CLEVELAND
1833 | MINNESOTA
1834 | .500
1835 | MILWAUKEE
1836 | 60
1837 | .469
1838 | KANSAS
1839 | CITY
1840 | 18
1841 | WESTERN
1842 | TEXAS
1843 | SEATTLE
1844 | 61
1845 | OAKLAND
1846 | 62
1847 | CALIFORNIA
1848 | THURSDAY
1849 | AUGUST
1850 | SCHEDULE
1851 | ATLANTA
1852 | 46
1853 | MONTREAL
1854 | FLORIDA
1855 | PHILADELPHIA
1856 | HOUSTON
1857 | ST
1858 | LOUIS
1859 | .504
1860 | CINCINNATI
1861 | PITTSBURGH
1862 | SAN
1863 | DIEGO
1864 | .543
1865 | LOS
1866 | ANGELES
1867 | COLORADO
1868 | 65
1869 | FRANCISCO
1870 | CAPS
1871 | Chicago
1872 | Milwaukee
1873 | Oakland
1874 | Texas
1875 | Pittsburgh
1876 | St
1877 | Louis
1878 | Philadelphia
1879 | Montreal
1880 | two-run
1881 | homer
1882 | inning
1883 | rallied
1884 | Cleveland
1885 | Indians
1886 | rubber
1887 | three-game
1888 | series
1889 | With
1890 | pitch
1891 | tried
1892 | big
1893 | crowd
1894 | shot
1895 | Rodriguez
1896 | double
1897 | Johnny
1898 | Terry
1899 | Kennedy
1900 | 40
1901 | nine
1902 | meetings
1903 | Western
1904 | Field
1905 | teams
1906 | Central
1907 | extra
1908 | ninth
1909 | single
1910 | save
1911 | got
1912 | allowing
1913 | hits
1914 | walks
1915 | strikeouts
1916 | scoreless
1917 | Dean
1918 | 30th
1919 | Ripken
1920 | solo
1921 | Bobby
1922 | Bonilla
1923 | three-run
1924 | seventh
1925 | power
1926 | runs
1927 | fifth
1928 | bottom
1929 | 21st
1930 | starter
1931 | blast
1932 | Young
1933 | allowed
1934 | relief
1935 | RBI
1936 | cut
1937 | straight
1938 | homers
1939 | dropped
1940 | row
1941 | became
1942 | major-league
1943 | history
1944 | 34
1945 | scattered
1946 | debut
1947 | each
1948 | Derek
1949 | settled
1950 | Jimmy
1951 | Key
1952 | interim
1953 | Boston
1954 | Mike
1955 | Sox
1956 | Athletics
1957 | owns
1958 | career
1959 | bases
1960 | loaded
1961 | went
1962 | walk
1963 | stole
1964 | homered
1965 | drove
1966 | Detroit
1967 | capped
1968 | eighth
1969 | Tigers
1970 | consecutive
1971 | Kansas
1972 | City
1973 | Juan
1974 | Guzman
1975 | span
1976 | earned
1977 | Minnesota
1978 | five-run
1979 | Brewers
1980 | Twins
1981 | Jose
1982 | PSV
1983 | WIN
1984 | Cocu
1985 | Eindhoven
1986 | Nijmegen
1987 | kick
1988 | minutes
1989 | Arthur
1990 | Nilis
1991 | Ajax
1992 | defence
1993 | NAC
1994 | Breda
1995 | DUTCH
1996 | SUMMARY
1997 | 11th
1998 | Halftime
1999 | 1-2
2000 | RESULT
2001 | GENEVA
2002 | 26
2003 | recalled
2004 | Jorge
2005 | finals
2006 | clearly
2007 | progress
2008 | beyond
2009 | phase
2010 | Euro
2011 | Squad
2012 | Marco
2013 | Grasshoppers
2014 | Hamburg
2015 | Antonio
2016 | Stuttgart
2017 | Milan
2018 | Borussia
2019 | Dortmund
2020 | ATHLETICS
2021 | RECORD
2022 | 40,000
2023 | THE
2024 | Brussels
2025 | grand
2026 | prix
2027 | athletes
2028 | glass
2029 | Belgian
2030 | pay
2031 | organisers
2032 | rounds
2033 | ever
2034 | already
2035 | records
2036 | broken
2037 | women
2038 | 1,000
2039 | ROUND
2040 | Leading
2041 | golf
2042 | Broadhurst
2043 | Raymond
2044 | Ian
2045 | Woosnam
2046 | Roe
2047 | Carl
2048 | Stephen
2049 | Lawrie
2050 | Max
2051 | Carter
2052 | Lee
2053 | Miguel
2054 | Angel
2055 | Gary
2056 | Eales
2057 | Williams
2058 | Andrew
2059 | Robert
2060 | Marcus
2061 | Pedro
2062 | Linhart
2063 | Price
2064 | UEFA
2065 | additional
2066 | headed
2067 | rankings
2068 | account
2069 | factors
2070 | including
2071 | red
2072 | national
2073 | allocated
2074 | places
2075 | 4.
2076 | 5.
2077 | 6.
2078 | 7.
2079 | 8.
2080 | Belarus
2081 | 9.
2082 | 10.
2083 | 13.
2084 | 15.
2085 | 16.
2086 | Portugal
2087 | Greece
2088 | MATCH
2089 | COLOMBO
2090 | Armed
2091 | police
2092 | ground
2093 | tour
2094 | Sri
2095 | Lanka
2096 | youth
2097 | limited
2098 | overs
2099 | includes
2100 | India
2101 | promised
2102 | presence
2103 | policemen
2104 | making
2105 | ethnic
2106 | violence
2107 | balls
2108 | fours
2109 | made
2110 | SOFIA
2111 | One
2112 | Romanian
2113 | others
2114 | injured
2115 | bus
2116 | collided
2117 | Bulgarian
2118 | morning
2119 | road
2120 | towns
2121 | woman
2122 | Maria
2123 | 35
2124 | accident
2125 | OFFICIAL
2126 | OJ
2127 | *
2128 | Note
2129 | contents
2130 | displayed
2131 | order
2132 | printed
2133 | Journal
2134 | Regulation
2135 | invitation
2136 | tender
2137 | refunds
2138 | white
2139 | certain
2140 | payments
2141 | system
2142 | producers
2143 | values
2144 | END
2145 | Home
2146 | Health
2147 | appeal
2148 | District
2149 | Court
2150 | decision
2151 | reimbursement
2152 | previously
2153 | regarding
2154 | related
2155 | community
2156 | personnel
2157 | continues
2158 | believe
2159 | majority
2160 | terms
2161 | Medicare
2162 | program
2163 | resolution
2164 | recorded
2165 | reserve
2166 | equal
2167 | revenue
2168 | ruled
2169 | court
2170 | dispute
2171 | pleased
2172 | significant
2173 | toward
2174 | Newsdesk
2175 | div
2176 | distribution
2177 | rate
2178 | quarterly
2179 | improved
2180 | ended
2181 | declared
2182 | partnership
2183 | unit
2184 | Best
2185 | sees
2186 | Co
2187 | Chairman
2188 | Chief
2189 | retailer
2190 | annual
2191 | even
2192 | emerged
2193 | always
2194 | particularly
2195 | you
2196 | something
2197 | stores
2198 | fall
2199 | closing
2200 | plan
2201 | states
2202 | For
2203 | 29
2204 | 1996-08-23
2205 | researchers
2206 | Swedish
2207 | born
2208 | caught
2209 | developed
2210 | cases
2211 | School
2212 | delivered
2213 | University
2214 | 1949
2215 | Four
2216 | children
2217 | wrote
2218 | medical
2219 | sometimes
2220 | require
2221 | surgery
2222 | pain
2223 | weight
2224 | involved
2225 | especially
2226 | often
2227 | cause
2228 | Most
2229 | high
2230 | baby
2231 | All
2232 | key
2233 | industrial
2234 | Following
2235 | survey
2236 | Industry
2237 | AUG
2238 | book
2239 | stocks
2240 | goods
2241 | expectations
2242 | above
2243 | normal
2244 | below
2245 | companies
2246 | representing
2247 | employees
2248 | brokers
2249 | built
2250 | 1989
2251 | buyers
2252 | Some
2253 | subject
2254 | pills
2255 | cholesterol
2256 | finds
2257 | show
2258 | doctors
2259 | Oxford
2260 | people
2261 | benefit
2262 | garlic
2263 | levels
2264 | 900
2265 | groups
2266 | receiving
2267 | Several
2268 | pressure
2269 | either
2270 | special
2271 | trial
2272 | makes
2273 | address
2274 | whole
2275 | affect
2276 | gives
2277 | aid
2278 | Caribbean
2279 | 39
2280 | development
2281 | much
2282 | population
2283 | living
2284 | south
2285 | fled
2286 | times
2287 | Plymouth
2288 | north
2289 | provide
2290 | track
2291 | programme
2292 | area
2293 | active
2294 | recent
2295 | shown
2296 | remain
2297 | she
2298 | Bill
2299 | Grand
2300 | Slam
2301 | seed
2302 | Graf
2303 | aiming
2304 | able
2305 | major
2306 | begins
2307 | opens
2308 | crown
2309 | Basuki
2310 | ceremony
2311 | both
2312 | holders
2313 | rising
2314 | defeat
2315 | While
2316 | semifinal
2317 | star
2318 | tested
2319 | probably
2320 | repeat
2321 | landing
2322 | sixth
2323 | avoided
2324 | possibility
2325 | quarter-finals
2326 | ranked
2327 | Monica
2328 | Seles
2329 | Anne
2330 | Miller
2331 | victim
2332 | Austrian
2333 | seeded
2334 | faces
2335 | playing
2336 | yet
2337 | unfortunate
2338 | first-round
2339 | popular
2340 | affair
2341 | Frenchman
2342 | injury
2343 | runner-up
2344 | Anke
2345 | Huber
2346 | Conchita
2347 | Martinez
2348 | Lindsay
2349 | Davenport
2350 | looking
2351 | opponents
2352 | Amanda
2353 | Coetzer
2354 | young
2355 | Hingis
2356 | clash
2357 | Jana
2358 | Novotna
2359 | 61-2
2360 | 9373-1800
2361 | RTRS
2362 | worked
2363 | along
2364 | unseeded
2365 | Other
2366 | hour
2367 | 55
2368 | point
2369 | break
2370 | compatriot
2371 | 13th
2372 | knew
2373 | service
2374 | serving
2375 | really
2376 | chances
2377 | every
2378 | come
2379 | Playing
2380 | maybe
2381 | difference
2382 | deficit
2383 | surprise
2384 | 32
2385 | quick
2386 | arrived
2387 | And
2388 | soon
2389 | 'm
2390 | fought
2391 | hoped
2392 | tight
2393 | helped
2394 | Soccer
2395 | Korean
2396 | SEOUL
2397 | Anyang
2398 | Puchon
2399 | Suwon
2400 | Standings
2401 | drawn
2402 | D
2403 | G
2404 | F
2405 | P
2406 | Chonan
2407 | outbreak
2408 | kills
2409 | central
2410 | staff
2411 | appear
2412 | 160
2413 | miles
2414 | southeast
2415 | Nigerian
2416 | Liberia
2417 | commander
2418 | latest
2419 | civil
2420 | officers
2421 | dozen
2422 | accords
2423 | difficult
2424 | painful
2425 | Nations
2426 | observers
2427 | travelling
2428 | western
2429 | delayed
2430 | shooting
2431 | highway
2432 | Anthony
2433 | finally
2434 | faction
2435 | Saturday
2436 | breaking
2437 | disarmament
2438 | 10,000
2439 | Community
2440 | Guinea
2441 | prayer
2442 | repeatedly
2443 | dead
2444 | reason
2445 | clear
2446 | Organisation
2447 | Conference
2448 | prayers
2449 | army
2450 | ordered
2451 | crackdown
2452 | seized
2453 | death
2454 | 1993
2455 | bottle
2456 | JOHANNESBURG
2457 | boy
2458 | girl
2459 | whose
2460 | Nelson
2461 | old
2462 | prison
2463 | son
2464 | Island
2465 | winter
2466 | letter
2467 | ordinary
2468 | post
2469 | age
2470 | reply
2471 | Atlantic
2472 | Ocean
2473 | belonging
2474 | couple
2475 | front
2476 | garden
2477 | house
2478 | Johannesburg
2479 | His
2480 | body
2481 | parents
2482 | unclear
2483 | enough
2484 | becoming
2485 | CPI
2486 | m
2487 | Current
2488 | NBH
2489 | bln
2490 | %
2491 | Government
2492 | 1998
2493 | &
2494 | Thomson
2495 | Moody
2496 | Investors
2497 | Rating
2498 | Agency
2499 | flow
2500 | Budapest
2501 | 36
2502 | die
2503 | MOSCOW
2504 | least
2505 | separatist
2506 | Chechen
2507 | Grozny
2508 | Interfax
2509 | command
2510 | Chechnya
2511 | 200
2512 | interior
2513 | mission
2514 | confirmed
2515 | Lebed
2516 | chief-of-staff
2517 | Aslan
2518 | Maskhadov
2519 | agreement
2520 | noon
2521 | letters
2522 | threatening
2523 | explained
2524 | money
2525 | residents
2526 | lives
2527 | nearby
2528 | railway
2529 | station
2530 | school
2531 | books
2532 | case
2533 | poor
2534 | family
2535 | trouble
2536 | index
2537 | pts
2538 | sign
2539 | Boris
2540 | Yeltsin
2541 | security
2542 | renewed
2543 | document
2544 | negotiated
2545 | village
2546 | Itar-Tass
2547 | provided
2548 | aide
2549 | Press
2550 | completed
2551 | showed
2552 | nominee
2553 | returned
2554 | Kremlin
2555 | two-day
2556 | Bosnia
2557 | Sarajevo
2558 | Bosnian
2559 | federation
2560 | common
2561 | taxes
2562 | kuna
2563 | mark
2564 | currency
2565 | introduced
2566 | Serbian
2567 | mines
2568 | According
2569 | mine
2570 | briefly
2571 | overnight
2572 | planes
2573 | artillery
2574 | although
2575 | heard
2576 | speaking
2577 | flew
2578 | firing
2579 | anything
2580 | separatists
2581 | halt
2582 | threatened
2583 | bombing
2584 | assault
2585 | passengers
2586 | rescued
2587 | Colombian
2588 | coast
2589 | BOGOTA
2590 | Coast
2591 | boat
2592 | Pacific
2593 | missing
2594 | trip
2595 | fuel
2596 | sea
2597 | Argentine
2598 | BUENOS
2599 | AIRES
2600 | iron
2601 | production
2602 | cold
2603 | Buenos
2604 | Aires
2605 | Peru
2606 | kill
2607 | terrorist
2608 | sources
2609 | Maoist
2610 | small
2611 | northeast
2612 | propaganda
2613 | centre
2614 | By
2615 | 1992
2616 | activities
2617 | guerrilla
2618 | cost
2619 | damage
2620 | 1980
2621 | Former
2622 | Brunswijk
2623 | custody
2624 | charged
2625 | attempted
2626 | murder
2627 | turned
2628 | himself
2629 | Pinas
2630 | mining
2631 | 56
2632 | 90
2633 | east
2634 | showing
2635 | cuts
2636 | wife
2637 | charges
2638 | merely
2639 | less
2640 | regime
2641 | conflict
2642 | 500
2643 | caused
2644 | thousands
2645 | neighbouring
2646 | 1980s
2647 | eventually
2648 | Despite
2649 | businessman
2650 | interests
2651 | saw
2652 | leads
2653 | Thai
2654 | heroin
2655 | BANGKOK
2656 | Hong
2657 | Kong
2658 | arrested
2659 | kg
2660 | searched
2661 | Police
2662 | several
2663 | street
2664 | baht
2665 | Officials
2666 | detained
2667 | pending
2668 | formal
2669 | follows
2670 | nations
2671 | consulate
2672 | colony
2673 | sell
2674 | issues
2675 | include
2676 | plans
2677 | pact
2678 | Canberra
2679 | Tibet
2680 | exiled
2681 | conservative
2682 | Information
2683 | written
2684 | critics
2685 | self-rule
2686 | deals
2687 | anyone
2688 | legal
2689 | Amman
2690 | owner
2691 | copies
2692 | citizen
2693 | accord
2694 | On
2695 | confiscated
2696 | insisted
2697 | Authority
2698 | strategy
2699 | freedom
2700 | resulted
2701 | mistakes
2702 | explain
2703 | journalists
2704 | banned
2705 | sure
2706 | selling
2707 | think
2708 | ?
2709 | Jewish
2710 | handed
2711 | parts
2712 | Istanbul
2713 | CAIRO
2714 | airport
2715 | fire
2716 | taxi
2717 | onto
2718 | line
2719 | Mohamed
2720 | conference
2721 | instead
2722 | yards
2723 | aviation
2724 | noted
2725 | pilot
2726 | Its
2727 | private
2728 | wants
2729 | nothing
2730 | Sudanese
2731 | Egyptian
2732 | militants
2733 | Mubarak
2734 | speech
2735 | Moslem
2736 | Security
2737 | Council
2738 | flights
2739 | Khartoum
2740 | incident
2741 | effect
2742 | Sudan
2743 | fails
2744 | cannot
2745 | Ethiopia
2746 | Front
2747 | far
2748 | relations
2749 | shares
2750 | shed
2751 | profit-taking
2752 | amid
2753 | volume
2754 | trillion
2755 | rise
2756 | profit
2757 | session
2758 | gained
2759 | Shares
2760 | Of
2761 | stable
2762 | Miss
2763 | Universe
2764 | Machado
2765 | Mexico
2766 | questions
2767 | claims
2768 | crash
2769 | 19
2770 | Los
2771 | Angeles
2772 | attended
2773 | USA
2774 | Inc.
2775 | drop
2776 | losing
2777 | 112
2778 | blocked
2779 | appearance
2780 | stage
2781 | She
2782 | stay
2783 | returning
2784 | Sept
2785 | alleged
2786 | putting
2787 | fine
2788 | happened
2789 | associated
2790 | routine
2791 | Kevorkian
2792 | suicide
2793 | bringing
2794 | suffering
2795 | multiple
2796 | emergency
2797 | room
2798 | Patricia
2799 | Smith
2800 | midday
2801 | attending
2802 | starting
2803 | doctor
2804 | lawyer
2805 | husband
2806 | father
2807 | James
2808 | Judith
2809 | Massachusetts
2810 | suffered
2811 | life
2812 | As
2813 | Sale
2814 | Amount
2815 | Municipal
2816 | Desk
2817 | 212-859-1650
2818 | jailed
2819 | sentenced
2820 | Lauck
2821 | base
2822 | Lincoln
2823 | abuse
2824 | struggle
2825 | network
2826 | material
2827 | judge
2828 | greatest
2829 | Nazi
2830 | slaughter
2831 | millions
2832 | complex
2833 | prosecutor
2834 | demanded
2835 | five-year
2836 | satisfied
2837 | sentence
2838 | illegal
2839 | laws
2840 | produce
2841 | magazines
2842 | Interior
2843 | biggest
2844 | Social
2845 | Democrats
2846 | suit
2847 | spent
2848 | nor
2849 | extradition
2850 | truth
2851 | actions
2852 | carried
2853 | Socialist
2854 | Workers
2855 | name
2856 | Battle
2857 | magazine
2858 | request
2859 | convicted
2860 | organisations
2861 | arrest
2862 | NATIONS
2863 | arrangements
2864 | quite
2865 | monitors
2866 | carry
2867 | Department
2868 | deputy
2869 | Algeria
2870 | TV
2871 | Algerian
2872 | suspected
2873 | killing
2874 | sought
2875 | Roman
2876 | Catholic
2877 | bomb
2878 | Algiers
2879 | 50,000
2880 | 110
2881 | foreigners
2882 | radical
2883 | Islamists
2884 | commanding
2885 | flown
2886 | table
2887 | shows
2888 | airports
2889 | association
2890 | Berlin
2891 | 202
2892 | Bremen
2893 | 4.4
2894 | Frankfurt
2895 | 1.5
2896 | 3.5
2897 | Cologne
2898 | Munich
2899 | TOTAL
2900 | Air
2901 | research
2902 | Not
2903 | forecast
2904 | performance
2905 | EPS
2906 | Dividend
2907 | Fax
2908 | Clinton
2909 | Ballybunion
2910 | fans
2911 | resort
2912 | Irish
2913 | America
2914 | Dick
2915 | Spring
2916 | convention
2917 | bring
2918 | addressed
2919 | packed
2920 | Dublin
2921 | us
2922 | Frank
2923 | process
2924 | Ischinger
2925 | seek
2926 | solution
2927 | Cooperation
2928 | OSCE
2929 | personal
2930 | threat
2931 | positive
2932 | responsible
2933 | global
2934 | nuclear
2935 | treaty
2936 | Delhi
2937 | intended
2938 | Gujral
2939 | block
2940 | Geneva
2941 | entering
2942 | signing
2943 | weapons
2944 | Treaty
2945 | bilateral
2946 | position
2947 | clause
2948 | Asked
2949 | Assembly
2950 | cross
2951 | fact
2952 | weapon
2953 | concerns
2954 | impossible
2955 | option
2956 | carrying
2957 | tests
2958 | accept
2959 | DHAKA
2960 | Dhaka
2961 | Bangladesh
2962 | importance
2963 | Mia
2964 | Commonwealth
2965 | Affairs
2966 | Liam
2967 | Fox
2968 | arriving
2969 | injuries
2970 | complained
2971 | wrist
2972 | act
2973 | seriously
2974 | reasons
2975 | important
2976 | leave
2977 | Nepal
2978 | governments
2979 | House
2980 | matter
2981 | investigation
2982 | outcome
2983 | connection
2984 | business
2985 | Kashmir
2986 | polls
2987 | planning
2988 | troubled
2989 | seems
2990 | 1987
2991 | direct
2992 | create
2993 | among
2994 | growing
2995 | independence
2996 | hopes
2997 | 20,000
2998 | Over
2999 | engineering
3000 | banks
3001 | edged
3002 | sharply
3003 | investors
3004 | Stock
3005 | Exchange
3006 | turnover
3007 | taka
3008 | remained
3009 | unchanged
3010 | recovered
3011 | edge
3012 | ratio
3013 | Reserve
3014 | bank
3015 | governor
3016 | Rangarajan
3017 | maintained
3018 | reduced
3019 | having
3020 | supply
3021 | increasing
3022 | stood
3023 | commitment
3024 | response
3025 | gross
3026 | product
3027 | GDP
3028 | real
3029 | Bombay
3030 | Mother
3031 | Teresa
3032 | Nobel
3033 | Prize
3034 | hope
3035 | love
3036 | nun
3037 | Albanian
3038 | need
3039 | missionary
3040 | intensive
3041 | Indian
3042 | heart
3043 | 86
3044 | condition
3045 | task
3046 | alone
3047 | Calcutta
3048 | grew
3049 | 80
3050 | highest
3051 | award
3052 | Her
3053 | Vatican
3054 | Missionaries
3055 | Charity
3056 | More
3057 | delegates
3058 | elect
3059 | Rome
3060 | receive
3061 | failing
3062 | stopped
3063 | Serbia
3064 | religious
3065 | begin
3066 | approved
3067 | training
3068 | missionaries
3069 | rest
3070 | 150
3071 | homes
3072 | destitute
3073 | founded
3074 | forecasts
3075 | boost
3076 | Bernard
3077 | News
3078 | Ltd
3079 | 1995/96
3080 | profits
3081 | film
3082 | Day
3083 | 1996/97
3084 | From
3085 | begun
3086 | motion
3087 | picture
3088 | orders
3089 | advertising
3090 | shareholders
3091 | A$
3092 | US$
3093 | cents
3094 | Analysts
3095 | disappointing
3096 | outlook
3097 | First
3098 | offset
3099 | operations
3100 | hard
3101 | operating
3102 | cover
3103 | Times
3104 | revenues
3105 | arm
3106 | operation
3107 | hurt
3108 | San
3109 | minimum
3110 | setting
3111 | pulled
3112 | 're
3113 | internal
3114 | declined
3115 | budget
3116 | realised
3117 | raising
3118 | pledged
3119 | effects
3120 | activity
3121 | generally
3122 | long-term
3123 | investment
3124 | NZ
3125 | lending
3126 | WELLINGTON
3127 | cutting
3128 | 10.5
3129 | Wellington
3130 | Power
3131 | approach
3132 | followed
3133 | equipment
3134 | hunt
3135 | Thailand
3136 | launched
3137 | Bangkok
3138 | drug
3139 | escape
3140 | confident
3141 | trafficking
3142 | window
3143 | climbed
3144 | bed
3145 | department
3146 | prevent
3147 | 266
3148 | Tokyo
3149 | parent
3150 | TOKYO
3151 | Year
3152 | billions
3153 | yen
3154 | specified
3155 | LATEST
3156 | ACTUAL
3157 | FORECAST
3158 | YEAR-AGO
3159 | Sales
3160 | 400
3161 | NOTE
3162 | HK$
3163 | 43
3164 | mln
3165 | HONG
3166 | KONG
3167 | placed
3168 | basis
3169 | 852
3170 | applications
3171 | believes
3172 | respect
3173 | safety
3174 | maximum
3175 | 2000
3176 | JAKARTA
3177 | Indonesian
3178 | **
3179 | Jakarta
3180 | involving
3181 | demonstrators
3182 | Dow
3183 | streak
3184 | beating
3185 | rupiah
3186 | trading
3187 | Super
3188 | offer
3189 | subsidiary
3190 | loans
3191 | management
3192 | build
3193 | property
3194 | projects
3195 | worth
3196 | Surabaya
3197 | stock
3198 | change
3199 | lows
3200 | Also
3201 | Japanese
3202 | pound
3203 | gold
3204 | closes
3205 | ....
3206 | Gold
3207 | AND
3208 | Nikkei
3209 | Dec
3210 | CAC-40
3211 | marks
3212 | post-Soviet
3213 | effectively
3214 | blow
3215 | Soviet
3216 | largest
3217 | initially
3218 | accompanied
3219 | signs
3220 | economy
3221 | Palace
3222 | reforms
3223 | doubt
3224 | replace
3225 | 33
3226 | postpone
3227 | NATO
3228 | policy
3229 | protest
3230 | Kiev
3231 | itself
3232 | eastern
3233 | strategic
3234 | though
3235 | push
3236 | membership
3237 | anniversary
3238 | factor
3239 | united
3240 | complete
3241 | example
3242 | Gazeta
3243 | published
3244 | language
3245 | barter
3246 | concerned
3247 | Volkova
3248 | Committee
3249 | issued
3250 | decree
3251 | situation
3252 | traders
3253 | 180
3254 | preferred
3255 | systems
3256 | various
3257 | +7095
3258 | 941
3259 | 8520
3260 | release
3261 | version
3262 | Goldman
3263 | Brian
3264 | De
3265 | criticised
3266 | batsman
3267 | delay
3268 | century
3269 | rain
3270 | improvement
3271 | weather
3272 | announce
3273 | event
3274 | unbeaten
3275 | bowled
3276 | MOTOR
3277 | BELGIAN
3278 | GRAND
3279 | PRIX
3280 | TIMES
3281 | Gerhard
3282 | Berger
3283 | Benetton
3284 | seconds
3285 | McLaren
3286 | Jacques
3287 | Villeneuve
3288 | Mika
3289 | Hakkinen
3290 | Jean
3291 | Alesi
3292 | Damon
3293 | Hill
3294 | Schumacher
3295 | 11.
3296 | Herbert
3297 | Sauber
3298 | 12.
3299 | Olivier
3300 | Ligier
3301 | [
3302 | ]
3303 | Karina
3304 | Habsudova
3305 | Portsmouth
3306 | Queens
3307 | Tranmere
3308 | Grimsby
3309 | Stirling
3310 | Gooch
3311 | 389
3312 | K.
3313 | 310
3314 | 83
3315 | 181
3316 | 109
3317 | Fairbrother
3318 | Pollock
3319 | county
3320 | returns
3321 | 106
3322 | 116
3323 | wicket
3324 | 1-106
3325 | 4-95
3326 | Middlesbrough
3327 | Italian
3328 | seen
3329 | forward
3330 | fun
3331 | foot
3332 | Chelsea
3333 | reached
3334 | advanced
3335 | Americans
3336 | 2-6
3337 | getting
3338 | pretty
3339 | things
3340 | missed
3341 | felt
3342 | midnight
3343 | person
3344 | aggressive
3345 | affected
3346 | Tour
3347 | hitting
3348 | ball
3349 | lot
3350 | shots
3351 | mostly
3352 | RUGBY
3353 | UNION
3354 | SECOND
3355 | check
3356 | means
3357 | speculation
3358 | wing
3359 | shortly
3360 | seedings
3361 | 15-10
3362 | 15-9
3363 | Rubin
3364 | player
3365 | moved
3366 | Mary
3367 | McGrath
3368 | 47
3369 | .531
3370 | slam
3371 | pair
3372 | Brady
3373 | Anderson
3374 | walked
3375 | none
3376 | struck
3377 | Charlton
3378 | Ken
3379 | Edgar
3380 | advantage
3381 | heading
3382 | Roger
3383 | pinch-hitter
3384 | majors
3385 | pitched
3386 | 0-1
3387 | apiece
3388 | claimed
3389 | Thompson
3390 | route
3391 | Erik
3392 | Fernandez
3393 | LISBON
3394 | Sporting
3395 | Luis
3396 | 3-1
3397 | Although
3398 | 35th
3399 | restored
3400 | 38th
3401 | 57th
3402 | reigning
3403 | Porto
3404 | Benfica
3405 | Portuguese
3406 | Pauli
3407 | candidates
3408 | produced
3409 | Bundesliga
3410 | Schalke
3411 | 64th
3412 | thanks
3413 | Hansa
3414 | Rostock
3415 | clocked
3416 | SUMMARIES
3417 | Summaries
3418 | FRENCH
3419 | Nancy
3420 | Germain
3421 | 15,000
3422 | Svetlana
3423 | Masterkova
3424 | Zurich
3425 | lap
3426 | Mozambique
3427 | Mutola
3428 | stadium
3429 | pushed
3430 | capacity
3431 | Atlanta
3432 | plus
3433 | 100,000
3434 | tabulated
3435 | 129
3436 | Diego
3437 | Trinidad
3438 | Cea
3439 | Barry
3440 | Dennis
3441 | Mitchell
3442 | present
3443 | bright
3444 | Donovan
3445 | Bailey
3446 | Linford
3447 | Christie
3448 | 1988
3449 | sport
3450 | bronze
3451 | medallist
3452 | hurdles
3453 | Allen
3454 | Colin
3455 | Kingdom
3456 | seemed
3457 | finish
3458 | faster
3459 | speed
3460 | stepped
3461 | crashed
3462 | Michelle
3463 | Freeman
3464 | Cuban
3465 | Lopez
3466 | Seven
3467 | medal
3468 | settle
3469 | 11.00
3470 | Natalya
3471 | Irina
3472 | Jamaica
3473 | Cuba
3474 | Julie
3475 | Johan
3476 | Kenya
3477 | Nigeria
3478 | Jon
3479 | Sarah
3480 | Joseph
3481 | Uganda
3482 | Frankie
3483 | Fredericks
3484 | Namibia
3485 | Bob
3486 | jump
3487 | Edwards
3488 | Wellman
3489 | Burundi
3490 | Rwanda
3491 | Anton
3492 | UP
3493 | Paulo
3494 | Juventus
3495 | member
3496 | Santos
3497 | entire
3498 | friendly
3499 | Zabrze
3500 | nominated
3501 | Jens
3502 | however
3503 | defender
3504 | formally
3505 | 'll
3506 | themselves
3507 | Oliver
3508 | Klinsmann
3509 | cup
3510 | Legia
3511 | Warsaw
3512 | Brugge
3513 | SRI
3514 | LANKA
3515 | guilty
3516 | controversial
3517 | excellent
3518 | Colombo
3519 | Lankan
3520 | heavy
3521 | Healy
3522 | Angolan
3523 | Unita
3524 | joint
3525 | administration
3526 | installed
3527 | timetable
3528 | sending
3529 | estimates
3530 | MON
3531 | Gencor
3532 | YR
3533 | DIV
3534 | N
3535 | McCarthy
3536 | Group
3537 | falls
3538 | Shr
3539 | shr
3540 | Bureau
3541 | 416
3542 | discuss
3543 | fever
3544 | junior
3545 | changing
3546 | hands
3547 | developments
3548 | formed
3549 | Research
3550 | newsdesk
3551 | stake
3552 | WASHINGTON
3553 | 8.0
3554 | Securities
3555 | holding
3556 | holds
3557 | investments
3558 | large
3559 | reach
3560 | allow
3561 | operate
3562 | Miami
3563 | shift
3564 | weekly
3565 | add
3566 | status
3567 | services
3568 | Mideast
3569 | IOC
3570 | options
3571 | Samsung
3572 | Singapore
3573 | LG
3574 | Akron
3575 | worst
3576 | My
3577 | Maybe
3578 | your
3579 | field
3580 | Taibe
3581 | fields
3582 | Arabs
3583 | crowds
3584 | thing
3585 | promoted
3586 | Jerusalem
3587 | Haj
3588 | Yihye
3589 | treatment
3590 | driving
3591 | hearing
3592 | load
3593 | elsewhere
3594 | mayor
3595 | Football
3596 | Liverpool
3597 | Lynch
3598 | C
3599 | Rapid
3600 | Old
3601 | Trafford
3602 | Barcelona
3603 | suspects
3604 | Rwandan
3605 | Hutu
3606 | refugees
3607 | Zaire
3608 | Rally
3609 | Democracy
3610 | blown
3611 | Serb
3612 | casualties
3613 | find
3614 | Serbs
3615 | Yugoslav
3616 | minority
3617 | 200,000
3618 | reserves
3619 | assets
3620 | crime
3621 | WARSAW
3622 | organised
3623 | Kohl
3624 | sealed
3625 | links
3626 | materials
3627 | Chechens
3628 | fighters
3629 | soldiers
3630 | effective
3631 | relatively
3632 | roads
3633 | soldier
3634 | dark
3635 | try
3636 | BELGRADE
3637 | aboard
3638 | jet
3639 | diplomats
3640 | ends
3641 | powers
3642 | presidential
3643 | palace
3644 | different
3645 | sectors
3646 | elected
3647 | platform
3648 | nation
3649 | BRASILIA
3650 | bid
3651 | difficulties
3652 | huge
3653 | river
3654 | Tsang
3655 | Ali
3656 | Deputy
3657 | bread
3658 | riots
3659 | passed
3660 | Ibrahim
3661 | spying
3662 | comment
3663 | assistant
3664 | tension
3665 | King
3666 | Jordanian
3667 | secure
3668 | Islamist
3669 | houses
3670 | streets
3671 | communist
3672 | smaller
3673 | clashes
3674 | policies
3675 | blame
3676 | protests
3677 | Kurd
3678 | ANKARA
3679 | PKK
3680 | 12-year-old
3681 | autonomy
3682 | Davies
3683 | contacts
3684 | pursue
3685 | apparently
3686 | intent
3687 | sexual
3688 | a.m.
3689 | girls
3690 | Virginia
3691 | entered
3692 | sexually
3693 | II
3694 | detective
3695 | allegedly
3696 | stabbed
3697 | once
3698 | identified
3699 | USDA
3700 | monitoring
3701 | Dan
3702 | corn
3703 | purchase
3704 | wheat
3705 | Republican
3706 | Gov
3707 | Weld
3708 | Senate
3709 | seat
3710 | incumbent
3711 | vote
3712 | facing
3713 | Cambridge
3714 | friend
3715 | Roosevelt
3716 | politics
3717 | friends
3718 | transport
3719 | Dm
3720 | 125
3721 | inch
3722 | mm
3723 | showers
3724 | isolated
3725 | Corporation
3726 | Tamil
3727 | directed
3728 | terrorism
3729 | finance
3730 | Liberation
3731 | king
3732 | Premier
3733 | giant
3734 | neighbours
3735 | distance
3736 | Queen
3737 | Nepali
3738 | Post
3739 | volatility
3740 | falling
3741 | trader
3742 | low
3743 | size
3744 | perhaps
3745 | You
3746 | makers
3747 | potential
3748 | yield
3749 | Bonds
3750 | Sachs
3751 | warrants
3752 | controls
3753 | DEM
3754 | X
3755 | challenge
3756 | Diana
3757 | Princess
3758 | criminal
3759 | Stenning
3760 | contest
3761 | everything
3762 | driver
3763 | rider
3764 | compensation
3765 | Prince
3766 | Charles
3767 | remove
3768 | asking
3769 | spring
3770 | effort
3771 | Necmettin
3772 | Erbakan
3773 | B
3774 | Turnover
3775 | listed
3776 | Index
3777 | editorial
3778 | Zenith
3779 | plant
3780 | set-top
3781 | boxes
3782 | gets
3783 | develop
3784 | addition
3785 | partners
3786 | Corp.
3787 | SBC
3788 | join
3789 | jobs
3790 | profitable
3791 | retail
3792 | Law
3793 | hundreds
3794 | comes
3795 | candidate
3796 | running
3797 | Many
3798 | involves
3799 | word
3800 | Windows
3801 | Microsoft
3802 | computer
3803 | fallen
3804 | software
3805 | launch
3806 | lines
3807 | customers
3808 | dollars
3809 | building
3810 | impact
3811 | lawsuit
3812 | tied
3813 | space
3814 | products
3815 | Office
3816 | handling
3817 | technical
3818 | corporate
3819 | adopted
3820 | 4.0
3821 | sweeping
3822 | slow
3823 | intercepted
3824 | everyone
3825 | hijacking
3826 | hijackers
3827 | hijacked
3828 | commercial
3829 | welfare
3830 | Wisconsin
3831 | administrative
3832 | Tommy
3833 | Human
3834 | reform
3835 | acquire
3836 | directly
3837 | limits
3838 | train
3839 | Alaska
3840 | boxcar
3841 | Phan
3842 | agents
3843 | Arkansas
3844 | churches
3845 | kms
3846 | located
3847 | investigating
3848 | idea
3849 | established
3850 | scene
3851 | St.
3852 | Turner
3853 | surrounded
3854 | cotton
3855 | rural
3856 | shared
3857 | suspicion
3858 | income
3859 | short-term
3860 | Calif.
3861 | Systems
3862 | Business
3863 | 212
3864 | PTT
3865 | largely
3866 | view
3867 | ING
3868 | continuing
3869 | sound
3870 | weaker
3871 | bourse
3872 | prospects
3873 | first-half
3874 | ordering
3875 | poll
3876 | wounds
3877 | investigations
3878 | H1
3879 | traffic
3880 | freight
3881 | whales
3882 | lions
3883 | ca
3884 | article
3885 | model
3886 | using
3887 | Howard
3888 | warned
3889 | sharp
3890 | spending
3891 | Greens
3892 | Toyota
3893 | workers
3894 | 2,000
3895 | striking
3896 | voted
3897 | Melbourne
3898 | assembly
3899 | Niugini
3900 | surge
3901 | 38
3902 | considering
3903 | copper
3904 | project
3905 | students
3906 | gathered
3907 | staged
3908 | violent
3909 | university
3910 | intervention
3911 | supporting
3912 | nil
3913 | 48
3914 | Shanghai
3915 | stockpile
3916 | Just
3917 | metal
3918 | Export
3919 | CNIEC
3920 | Traders
3921 | tax
3922 | otherwise
3923 | spend
3924 | moving
3925 | exact
3926 | LME
3927 | behalf
3928 | tonne
3929 | secret
3930 | owners
3931 | posts
3932 | Company
3933 | Holdings
3934 | construction
3935 | exchange
3936 | acquisition
3937 | maker
3938 | yuan
3939 | Promodes
3940 | decide
3941 | Metro
3942 | discussions
3943 | 4221
3944 | highlights
3945 | fund
3946 | Taylor
3947 | council
3948 | yesterday
3949 | Merrill
3950 | 1996-08-24
3951 | truck
3952 | vehicle
3953 | engine
3954 | RALLYING
3955 | JYVASKYLA
3956 | Juha
3957 | Kankkunen
3958 | 37
3959 | stages
3960 | Lakes
3961 | prospect
3962 | Tommi
3963 | Makinen
3964 | Mitsubishi
3965 | fined
3966 | sports
3967 | Subaru
3968 | 6.5
3969 | boss
3970 | pace
3971 | Spaniard
3972 | kph
3973 | Ferrari
3974 | Eddie
3975 | Tyrrell
3976 | 14.
3977 | Did
3978 | RALLY
3979 | unnamed
3980 | suspended
3981 | suspension
3982 | +1
3983 | +3
3984 | SUPER
3985 | rugby
3986 | Bradford
3987 | Wigan
3988 | 78
3989 | Helens
3990 | Warrington
3991 | 555
3992 | Sheffield
3993 | Oldham
3994 | Leeds
3995 | 398
3996 | 325
3997 | TITLE
3998 | favour
3999 | removed
4000 | Graham
4001 | 88
4002 | Notts
4003 | successive
4004 | follow
4005 | Darren
4006 | Gough
4007 | 176
4008 | Final
4009 | Match
4010 | abandoned
4011 | 111
4012 | Irani
4013 | Raith
4014 | Dundee
4015 | Aberdeen
4016 | Hamilton
4017 | Ross
4018 | Played
4019 | Newcastle
4020 | Alan
4021 | header
4022 | Tottenham
4023 | Gianluca
4024 | Arsenal
4025 | Villa
4026 | Southampton
4027 | Bolton
4028 | Norwich
4029 | Bromwich
4030 | Crystal
4031 | Ipswich
4032 | Reading
4033 | Southend
4034 | Birmingham
4035 | Stoke
4036 | Swindon
4037 | Vale
4038 | Wolverhampton
4039 | Barnsley
4040 | Huddersfield
4041 | Bristol
4042 | Blackpool
4043 | Chesterfield
4044 | Preston
4045 | Rovers
4046 | Swansea
4047 | Doncaster
4048 | SOUTH
4049 | Blacks
4050 | tries
4051 | Wilson
4052 | replacement
4053 | Zinzan
4054 | Brooke
4055 | Simon
4056 | Partizan
4057 | Spartak
4058 | Krakow
4059 | Wroclaw
4060 | Odra
4061 | GKS
4062 | Polonia
4063 | Lodz
4064 | BASKETBALL
4065 | KOREAN
4066 | Hyundai
4067 | Haitai
4068 | Hanwha
4069 | OB
4070 | Lotte
4071 | Ssangbangwool
4072 | .527
4073 | .538
4074 | Heerenveen
4075 | Veldman
4076 | goalkeeper
4077 | defenders
4078 | strikers
4079 | Groningen
4080 | Antwerp
4081 | Charleroi
4082 | LEADING
4083 | Bastia
4084 | Marseille
4085 | Lille
4086 | Monaco
4087 | Smicer
4088 | Lens
4089 | Christopher
4090 | Guingamp
4091 | Nantes
4092 | Nice
4093 | Rennes
4094 | Bordeaux
4095 | Auxerre
4096 | Metz
4097 | Strasbourg
4098 | Havre
4099 | Caen
4100 | 70th
4101 | Lyon
4102 | Montpellier
4103 | Cannes
4104 | Graafschap
4105 | Doetinchem
4106 | RKC
4107 | Waalwijk
4108 | Willem
4109 | Tilburg
4110 | Fortuna
4111 | Sittard
4112 | Rotterdam
4113 | Twente
4114 | Enschede
4115 | Vitesse
4116 | Arnhem
4117 | Utrecht
4118 | Feyenoord
4119 | Roda
4120 | JC
4121 | Kerkrade
4122 | Volendam
4123 | AZ
4124 | Alkmaar
4125 | Bochum
4126 | Arminia
4127 | Bielefeld
4128 | Moenchengladbach
4129 | Karlsruhe
4130 | 33rd
4131 | Werder
4132 | 1860
4133 | Bayer
4134 | Leverkusen
4135 | Duesseldorf
4136 | Freiburg
4137 | VfB
4138 | Bayern
4139 | MSV
4140 | Duisburg
4141 | FC
4142 | Linz
4143 | SV
4144 | one-day
4145 | Waugh
4146 | Tendulkar
4147 | Frederic
4148 | ransom
4149 | tanks
4150 | civilians
4151 | whom
4152 | spokeswoman
4153 | role
4154 | Doctors
4155 | Arabia
4156 | About
4157 | climb
4158 | racing
4159 | mountain
4160 | Team
4161 | walking
4162 | servants
4163 | Public
4164 | dismissed
4165 | Labour
4166 | Florence
4167 | firms
4168 | demands
4169 | wage
4170 | Opposition
4171 | unions
4172 | Civil
4173 | inflation
4174 | camps
4175 | genocide
4176 | fear
4177 | counterpart
4178 | skull
4179 | scientist
4180 | forest
4181 | arrests
4182 | Eritrea
4183 | alliance
4184 | accuses
4185 | accusing
4186 | congress
4187 | turn
4188 | electoral
4189 | class
4190 | stance
4191 | consensus
4192 | Nicaraguan
4193 | Nicaragua
4194 | Chamorro
4195 | column
4196 | wo
4197 | split
4198 | widespread
4199 | activists
4200 | communications
4201 | Revolutionary
4202 | site
4203 | executed
4204 | lb
4205 | Movement
4206 | MDS
4207 | sentences
4208 | judicial
4209 | withdrew
4210 | camp
4211 | west
4212 | independent
4213 | allegations
4214 | grenades
4215 | intends
4216 | gathering
4217 | Dole
4218 | Congress
4219 | sex
4220 | offers
4221 | again
4222 | heads
4223 | drive
4224 | happen
4225 | bill
4226 | remarkable
4227 | offering
4228 | demanding
4229 | stronger
4230 | families
4231 | fellow
4232 | attention
4233 | Hurricane
4234 | Edouard
4235 | winds
4236 | ETA
4237 | Basque
4238 | Defence
4239 | mandate
4240 | Bossi
4241 | ancient
4242 | Corsica
4243 | talking
4244 | bodies
4245 | Stevanin
4246 | unable
4247 | investigators
4248 | paedophile
4249 | scandal
4250 | kidnapping
4251 | throughout
4252 | Dutroux
4253 | managed
4254 | freed
4255 | abduction
4256 | imprisonment
4257 | Bourlet
4258 | Melissa
4259 | kidnapped
4260 | Eefje
4261 | unknown
4262 | Bratislava
4263 | Five
4264 | Among
4265 | information
4266 | theft
4267 | accepted
4268 | hear
4269 | inquiry
4270 | Rights
4271 | cannabis
4272 | 7.5
4273 | container
4274 | MADRID
4275 | supported
4276 | neither
4277 | margin
4278 | Pakistani
4279 | inches
4280 | rail
4281 | witnesses
4282 | Army
4283 | Internet
4284 | Startup
4285 | Java
4286 | 1996-08-25
4287 | JavaSoft
4288 | venture
4289 | acting
4290 | experience
4291 | aims
4292 | individual
4293 | marketing
4294 | WINS
4295 | style
4296 | easy
4297 | feet
4298 | 12th
4299 | defending
4300 | 6-0
4301 | LAKES
4302 | Celica
4303 | Escort
4304 | Eriksson
4305 | Honda
4306 | Kawasaki
4307 | Yamaha
4308 | placings
4309 | GTR
4310 | laps
4311 | 170
4312 | 162
4313 | Fastest
4314 | drivers
4315 | Takeda
4316 | Haga
4317 | Yoshikawa
4318 | Corser
4319 | Ducati
4320 | Kocinski
4321 | Slight
4322 | Fogarty
4323 | Aoki
4324 | names
4325 | strongly
4326 | riders
4327 | reaching
4328 | 521-8
4329 | PREMIER
4330 | 87
4331 | mate
4332 | forcing
4333 | ONE-DAY
4334 | Headley
4335 | Togo
4336 | BUCHAREST
4337 | Bucharest
4338 | Dinamo
4339 | 49
4340 | Brisbane
4341 | Queensland
4342 | Auckland
4343 | 10-15
4344 | .470
4345 | reliever
4346 | contact
4347 | acquired
4348 | 6-5
4349 | O'Neill
4350 | Cardinals
4351 | Padres
4352 | Braves
4353 | Cubs
4354 | Colorado
4355 | Pirates
4356 | Marlins
4357 | Dodgers
4358 | Phillies
4359 | Expos
4360 | shut
4361 | Giants
4362 | Houston
4363 | Astros
4364 | Shane
4365 | equaliser
4366 | lack
4367 | knee
4368 | MILAN
4369 | Batistuta
4370 | Fiorentina
4371 | serie
4372 | 83rd
4373 | ITALIAN
4374 | Norwegian
4375 | Swiss
4376 | Neuchatel
4377 | TOUR
4378 | Montgomerie
4379 | CYCLING
4380 | cycling
4381 | Ferrigato
4382 | Lance
4383 | Armstrong
4384 | narrow
4385 | Briton
4386 | 199
4387 | Baker
4388 | FROM
4389 | Ronald
4390 | Club
4391 | Arnold
4392 | Blind
4393 | Azharuddin
4394 | Singer
4395 | 1996-08-26
4396 | choice
4397 | riding
4398 | supporters
4399 | Rugby
4400 | apartheid
4401 | commission
4402 | paint
4403 | amnesty
4404 | tell
4405 | abuses
4406 | achieve
4407 | 1948
4408 | express
4409 | Doboj
4410 | Moslems
4411 | Dayton
4412 | refugee
4413 | thrown
4414 | warning
4415 | pullout
4416 | Mexican
4417 | MEXICO
4418 | leftist
4419 | EPR
4420 | Guerrero
4421 | Commander
4422 | protesters
4423 | Ten
4424 | northwest
4425 | dragged
4426 | linking
4427 | allied
4428 | supplies
4429 | reveal
4430 | finding
4431 | democracy
4432 | Kabariti
4433 | speaker
4434 | Weizman
4435 | speak
4436 | Earlier
4437 | cool
4438 | education
4439 | establish
4440 | parliamentary
4441 | voters
4442 | thin
4443 | stations
4444 | employee
4445 | skilled
4446 | contribute
4447 | Sandra
4448 | O'Neal
4449 | customer
4450 | agenda
4451 | ask
4452 | rated
4453 | decisions
4454 | insurance
4455 | fraud
4456 | Salang
4457 | Supreme
4458 | postponement
4459 | Hasina
4460 | Bogra
4461 | ruling
4462 | Nearly
4463 | drought
4464 | regions
4465 | diesel
4466 | grain
4467 | sector
4468 | transition
4469 | reputation
4470 | dictator
4471 | quality
4472 | Wheat
4473 | Institute
4474 | Celsius
4475 | rainfall
4476 | farms
4477 | acres
4478 | 220
4479 | badly
4480 | autumn
4481 | negative
4482 | attributable
4483 | Gilbertson
4484 | rand
4485 | substantial
4486 | shutdown
4487 | rains
4488 | merger
4489 | Plc
4490 | portfolio
4491 | Advanced
4492 | Medical
4493 | IVAC
4494 | infusion
4495 | therapy
4496 | IMED
4497 | plants
4498 | 120
4499 | expects
4500 | delivery
4501 | Morris
4502 | boosted
4503 | bonds
4504 | Treasury
4505 | secured
4506 | hat-trick
4507 | decisive
4508 | Sharpe
4509 | Botham
4510 | Middlesex
4511 | BY
4512 | FRIENDLY
4513 | Rica
4514 | Mauritius
4515 | crude
4516 | Gabon
4517 | legs
4518 | postponed
4519 | swimming
4520 | Popov
4521 | titles
4522 | medicine
4523 | Olympics
4524 | Slovak
4525 | Hungarian
4526 | Petra
4527 | SK
4528 | Leon
4529 | catch
4530 | Sao
4531 | Rio
4532 | usual
4533 | hot
4534 | Perry
4535 | Larry
4536 | Magdalena
4537 | Maleeva
4538 | cement
4539 | tough
4540 | Olsza
4541 | Barbara
4542 | ranking
4543 | mind
4544 | original
4545 | MONDAY
4546 | Stephanie
4547 | Oncins
4548 | 6-
4549 | Elena
4550 | 3-2
4551 | .466
4552 | Sosa
4553 | Boer
4554 | Madrid
4555 | ZIMBABWE
4556 | Strang
4557 | Whittall
4558 | Brandes
4559 | Hogg
4560 | Reiffel
4561 | Flower
4562 | filed
4563 | existing
4564 | ferry
4565 | boats
4566 | voting
4567 | municipal
4568 | irregularities
4569 | Election
4570 | consider
4571 | 600,000
4572 | Ambassador
4573 | cast
4574 | ballot
4575 | voter
4576 | High
4577 | appointed
4578 | compromise
4579 | settlement
4580 | insistence
4581 | quit
4582 | Federation
4583 | recover
4584 | Zastava
4585 | factory
4586 | wages
4587 | settlements
4588 | 5.5
4589 | controllers
4590 | Raichev
4591 | Tass
4592 | eye
4593 | Mostostal
4594 | premium
4595 | 2.6
4596 | JOSE
4597 | kidnappers
4598 | tourist
4599 | photographs
4600 | Rican
4601 | peso
4602 | limit
4603 | Dealers
4604 | Hashimoto
4605 | credit
4606 | auctions
4607 | gunman
4608 | 1996-08-27
4609 | gun
4610 | Petroleum
4611 | contracts
4612 | Spot
4613 | liquidity
4614 | deposed
4615 | Specter
4616 | resign
4617 | Airways
4618 | Airbus
4619 | surrender
4620 | asylum
4621 | dozens
4622 | grade
4623 | Midwest
4624 | 6.0
4625 | 859
4626 | tree
4627 | bounce
4628 | fairly
4629 | hog
4630 | sparked
4631 | Gore
4632 | Gingrich
4633 | Convention
4634 | re-election
4635 | coalition
4636 | justice
4637 | painted
4638 | Republicans
4639 | praised
4640 | Reeve
4641 | politicians
4642 | Gephardt
4643 | Reagan
4644 | legislature
4645 | abortion
4646 | nomination
4647 | gas
4648 | Oklahoma
4649 | handgun
4650 | Karachi
4651 | fob
4652 | ARA
4653 | sentiment
4654 | bids
4655 | frost
4656 | 5.0
4657 | moisture
4658 | 32.0
4659 | 12.0
4660 | Brush
4661 | suits
4662 | Salomon
4663 | margins
4664 | versus
4665 | CALL
4666 | CONFIRMED
4667 | Tapie
4668 | instructions
4669 | procedure
4670 | francs
4671 | Rabobank
4672 | epidemic
4673 | debate
4674 | Banharn
4675 | fix
4676 | unq
4677 | Ekeus
4678 | Tamils
4679 | LTTE
4680 | Tankan
4681 | franc
4682 | Iraqis
4683 | diverted
4684 | Stansted
4685 | explosives
4686 | larger
4687 | festival
4688 | Ata-ur-Rehman
4689 | Cozma
4690 | lei
4691 | tunnel
4692 | 12-15
4693 | TUESDAY
4694 | 85th
4695 | Cyprien
4696 | Moura
4697 | STAGE
4698 | NETHERLANDS
4699 | kilometre
4700 | Colonna
4701 | Mapei
4702 | McEwen
4703 | Koerts
4704 | Palmans
4705 | Teutenberg
4706 | Postal
4707 | Aki
4708 | Capiot
4709 | Collstrop
4710 | Neths
4711 | TVM
4712 | Wolf
4713 | Motorola
4714 | Cofinec
4715 | MFS
4716 | certainly
4717 | Faulding
4718 | N.J.
4719 | Kelly
4720 | killer
4721 | SPLA
4722 | representatives
4723 | weakening
4724 | Iliescu
4725 | PUNR
4726 | oppose
4727 | Meri
4728 | Ruutel
4729 | votes
4730 | college
4731 | Reform
4732 | MPs
4733 | acts
4734 | Gajdos
4735 | Interpol
4736 | films
4737 | offices
4738 | victims
4739 | plantation
4740 | note
4741 | CVG
4742 | privatization
4743 | Banespa
4744 | Globo
4745 | Bamerindus
4746 | loan
4747 | Lanusse
4748 | coup
4749 | collision
4750 | Seoul
4751 | remote
4752 | mainland
4753 | humanitarian
4754 | understanding
4755 | Democrat
4756 | bushel
4757 | crop
4758 | Chg
4759 | .02
4760 | unc
4761 | ---
4762 | dn
4763 | HRW
4764 | ABC
4765 | Perot
4766 | bulk
4767 | barley
4768 | Vermont
4769 | medals
4770 | recovery
4771 | Rifkind
4772 | normally
4773 | Hamas
4774 | Rohrabacher
4775 | Tehran
4776 | rupees
4777 | Soyoil
4778 | refined
4779 | Yellow
4780 | n.a
4781 | Helibor
4782 | Barrick
4783 | Arequipa
4784 | Insurance
4785 | Commercial
4786 | AOL
4787 | provinces
4788 | Nasdaq
4789 | Latest
4790 | CDU
4791 | CSU
4792 | SPD
4793 | FDP
4794 | PDS
4795 | Emnid
4796 | Elect
4797 | Res
4798 | Heathrow
4799 | Gatwick
4800 | birds
4801 | Drew
4802 | commentary
4803 | Sakai
4804 | Kan
4805 | shipments
4806 | inventories
4807 | Commodities
4808 | publicly
4809 | SEC
4810 | rules
4811 | 1996-08-28
4812 | Saskatchewan
4813 | units
4814 | Heeswijk
4815 | Telekom
4816 | Boardman
4817 | Collinelli
4818 | 4,000
4819 | Fowler
4820 | 56th
4821 | Albert
4822 | Parma
4823 | Genoa
4824 | Pires
4825 | Foe
4826 | PSG
4827 | relay
4828 | Owens
4829 | Jayasuriya
4830 | Kaluwitharana
4831 | Silva
4832 | airliner
4833 | Arch
4834 | Leduc
4835 | Reef
4836 | Biogen
4837 | Berlex
4838 | Avonex
4839 | Betaseron
4840 | Oct
4841 | noise
4842 | exclusive
4843 | Sierra
4844 | dissidents
4845 | Ivorian
4846 | Bedie
4847 | ADRs
4848 | Gazprom
4849 | Rosati
4850 | quake
4851 | 1996-08-29
4852 | Nov
4853 | endorse
4854 | marijuana
4855 | SER
4856 | Amoco
4857 | Yemen
4858 | defends
4859 | Bhutto
4860 | Sharif
4861 | ACC
4862 | Palkhivala
4863 | pipes
4864 | fishermen
4865 | ND
4866 | Evert
4867 | BOJ
4868 | Lamm
4869 | Names
4870 | gestures
4871 | prosecutors
4872 | GPG
4873 | bush
4874 | Khmer
4875 | Ieng
4876 | Pol
4877 | Pot
4878 | Kubo
4879 | Sakigake
4880 | Takemura
4881 | Hatoyama
4882 | Banisadr
4883 | Ballanger
4884 | 1996-08-30
4885 | Grobbelaar
4886 | SEP
4887 | Kornblum
4888 | roubles
4889 | virus
4890 | ICAC
4891 | Yassin
4892 | A-rated
4893 | muscle
4894 | soybeans
4895 | jewelry
4896 | Simpson
4897 | surgeon
4898 | meal
4899 | Gluten
4900 | Gas
4901 | KV
4902 | Pro
4903 | Slough
4904 | Warburg
4905 | kerosene
4906 | cargoes
4907 | OM
4908 | PULPEX
4909 | pulp
4910 | pesetas
4911 | trend
4912 | Hope
4913 | Daewoo
4914 | Dacom
4915 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | tensorflow==1.13.1
2 | 


--------------------------------------------------------------------------------
/run_experiments_multi.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | #-*- coding: utf-8 -*-
 3 | 
 4 | 
 5 | 
 6 | from train_MLBiNet import train
 7 | 
 8 | 
 9 | if __name__ == "__main__":
10 |     niter = 10
11 |     for i in range(niter):
12 |         train(seed_id=i)
13 | 


--------------------------------------------------------------------------------
/train_MLBiNet.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | #-*- coding: utf-8 -*-
  3 | 
  4 | import os
  5 | import time
  6 | import json
  7 | import random
  8 | import numpy as np
  9 | import tensorflow as tf
 10 | 
 11 | tf.flags.DEFINE_integer('encode_h', 100, 'dim of encoding layer')
 12 | tf.flags.DEFINE_integer('decode_h', 200, 'dim of decoding layer')
 13 | tf.flags.DEFINE_integer('tag_dim', 100, 'dimension of tags')
 14 | tf.flags.DEFINE_integer('event_info_h', 100, 'hidden size of sentence level information aggregation layer')
 15 | tf.flags.DEFINE_integer('batch_size', 64, 'batch size')
 16 | tf.flags.DEFINE_integer('max_doc_len', 8, 'max number of sentences in a document')
 17 | tf.flags.DEFINE_integer('max_seq_len', 50, 'maximum length of sequence')
 18 | tf.flags.DEFINE_integer('num_tag_layers', 2, 'number of tagging layers')
 19 | tf.flags.DEFINE_integer('reverse_seq', 1, 'decoder mechanism')
 20 | tf.flags.DEFINE_string('tagging_mechanism', "backward_decoder", 'decoder mechanism')
 21 | tf.flags.DEFINE_integer('ner_dim_1', 20, 'embedding size of level-1 NER')
 22 | tf.flags.DEFINE_integer('ner_dim_2', 20, 'embedding size of level-2 NER')
 23 | tf.flags.DEFINE_integer('self_att_not', 1, 'self attention or not')
 24 | tf.flags.DEFINE_integer('context_info', 1,
 25 |                         '0: single sentence information, 1: information of two neighbor sentences')
 26 | tf.flags.DEFINE_float('penalty_coef', 2e-5, 'penalty coefficient')
 27 | tf.flags.DEFINE_float('event_vector_trans', 1, 'event_vector_trans')
 28 | 
 29 | tf.flags.DEFINE_integer('num_epochs', 50, 'Number of epoches')
 30 | tf.flags.DEFINE_integer('eval_every_steps', 100, 'Number of epoches')
 31 | tf.flags.DEFINE_integer('num_epochs_warm', 0, 'Number of epoches of warm start')
 32 | tf.flags.DEFINE_integer('nconsect_epoch', 3, 'early stopping epoches')
 33 | tf.flags.DEFINE_float('weight_decay', 1, 'truncation of event attention weights')
 34 | 
 35 | tf.flags.DEFINE_float('warm_learning_rate', 1e-5, 'warm-up learning rate')
 36 | tf.flags.DEFINE_float('learning_rate', 5e-4, 'learning rate')
 37 | tf.flags.DEFINE_float('decay_rate', 0.99, 'decay rate')
 38 | 
 39 | tf.flags.DEFINE_float('dropout_rate', 0.5, 'dropout rate')
 40 | tf.flags.DEFINE_float('grad_clip', 10, 'grad clip to prevent gradient exlode')
 41 | tf.flags.DEFINE_float('positive_weights', 1, 'weights for positive sample')
 42 | 
 43 | tf.flags.DEFINE_string('train_file', './data-ACE/example_new.train', 'train file')
 44 | tf.flags.DEFINE_string('dev_file', './data-ACE/example_new.dev', 'dev file')
 45 | tf.flags.DEFINE_string('test_file', './data-ACE/example_new.test', 'test file')
 46 | tf.flags.DEFINE_string('embedding_file','./embedding/embeddings.txt','pretrained embedding file')
 47 | tf.flags.DEFINE_integer('word_emb_dim', 100, 'word embedding size')
 48 | 
 49 | tf.flags.DEFINE_string('NER_dict_file', './dict/event_types.txt', 'ner dict file')
 50 | tf.flags.DEFINE_string('ner_1_dict_file', './dict/ner_1.txt', 'level-1 ner dict file')
 51 | tf.flags.DEFINE_string('ner_2_dict_file', './dict/ner_2.txt', 'level-2 ner dict file')
 52 | 
 53 | FLAGS = tf.flags.FLAGS
 54 | 
 55 | lower_case = False
 56 | 
 57 | os.environ["CUDA_VISIBLE_DEVICES"] = "0"
 58 | config_gpu = tf.ConfigProto()
 59 | config_gpu.gpu_options.per_process_gpu_memory_fraction = 0.6
 60 | 
 61 | 
 62 | def train(seed_id=1):
 63 |     # set seed
 64 |     tf.set_random_seed(seed_id)
 65 | 
 66 |     from MLBiNet import MLBiNet
 67 | 
 68 |     from utils_init import load_ED_data
 69 |     from utils_init import data_transformation_doc
 70 |     from utils_init import batch_generation_doc
 71 | 
 72 |     from utils_init import load_vocab
 73 |     from utils_init import load_pretrain
 74 | 
 75 |     from ace_model_evaluation import write_2_file, ace_pred_result_stat
 76 | 
 77 |     with tf.Graph().as_default() as g:
 78 |         # loading the embedding matrix
 79 |         embedding_matrix, vocab_words, vocab_2_id, id_2_vocab = load_pretrain(FLAGS.embedding_file,
 80 |                                                                                     FLAGS.word_emb_dim)
 81 |         print('shape of embedding_matrix is:', np.asmatrix(embedding_matrix).shape)
 82 | 
 83 |         # load train, dev, test data
 84 |         sents_train, ners_train, ner_vocab, ner_1_train, ner_2_train, doc_file_to_sents_train = \
 85 |             load_ED_data(FLAGS.train_file, lower_case=lower_case)
 86 | 
 87 |         # load the vocab of event type
 88 |         _, ED_2_id = load_vocab(FLAGS.NER_dict_file)
 89 |         print("ner_2_id is:\t", ED_2_id)
 90 | 
 91 |         sents_dev, ners_dev, _, ner_1_dev, ner_2_dev, doc_file_to_sents_dev = \
 92 |             load_ED_data(FLAGS.dev_file, lower_case=lower_case)
 93 |         sents_test, ners_test, _, ner_1_test, ner_2_test, doc_file_to_sents_test = \
 94 |             load_ED_data(FLAGS.test_file, lower_case=lower_case)
 95 |         print("load_ner_data finished!")
 96 |         print("doc_file_to_sents_test:\t", doc_file_to_sents_test)
 97 | 
 98 |         # load NER label
 99 |         ner_vocab_1, ner_to_id_1 = load_vocab(FLAGS.ner_1_dict_file)
100 |         ner_vocab_2, ner_to_id_2 = load_vocab(FLAGS.ner_2_dict_file)
101 |         print("NER vocab loaded!")
102 | 
103 |         # encoding the train, dev, test data
104 |         encode_train = data_transformation_doc(sents_train, ner_1_train, ner_2_train, ners_train,
105 |                                                vocab_2_id, ED_2_id, vocab_2_id['<UNK>'], ner_to_id_1, ner_to_id_2)
106 |         encode_dev = data_transformation_doc(sents_dev, ner_1_dev, ner_2_dev, ners_dev,
107 |                                              vocab_2_id, ED_2_id, vocab_2_id['<UNK>'], ner_to_id_1, ner_to_id_2)
108 |         encode_test = data_transformation_doc(sents_test, ner_1_test, ner_2_test, ners_test,
109 |                                               vocab_2_id, ED_2_id, vocab_2_id['<UNK>'], ner_to_id_1, ner_to_id_2)
110 |         print("Document data transformation finished!")
111 | 
112 |         # batch generating
113 |         train_batches = batch_generation_doc(doc_file_to_sents_train, encode_train, FLAGS.batch_size, FLAGS.max_doc_len,
114 |                                              FLAGS.max_seq_len, vocab_2_id, ED_2_id, num_epoches=FLAGS.num_epochs)
115 |         dev_batches = batch_generation_doc(doc_file_to_sents_dev, encode_dev, FLAGS.batch_size, FLAGS.max_doc_len,
116 |                                            FLAGS.max_seq_len, vocab_2_id, ED_2_id, num_epoches=1)
117 |         test_batches = batch_generation_doc(doc_file_to_sents_test, encode_test, FLAGS.batch_size, FLAGS.max_doc_len,
118 |                                             FLAGS.max_seq_len, vocab_2_id, ED_2_id, num_epoches=1)
119 |         print("batch_generation_doc finished!")
120 | 
121 |         print('Begin model initialization!')
122 |         with tf.Session(config=config_gpu) as sess:
123 |             model = MLBiNet(
124 |                 encode_h = FLAGS.encode_h,
125 |                 decode_h = FLAGS.decode_h,
126 |                 tag_dim = FLAGS.tag_dim,
127 |                 event_info_h = FLAGS.event_info_h,
128 |                 word_emb_mat = np.array(embedding_matrix),
129 |                 batch_size = FLAGS.batch_size,
130 |                 max_doc_len = FLAGS.max_doc_len,
131 |                 max_seq_len = FLAGS.max_seq_len,
132 |                 id_O = ED_2_id['O'],
133 |                 num_tag_layers = FLAGS.num_tag_layers,
134 |                 weight_decay = FLAGS.weight_decay,
135 |                 reverse_seq = FLAGS.reverse_seq,
136 |                 class_size = len(ED_2_id),
137 |                 tagging_mechanism = FLAGS.tagging_mechanism,
138 |                 ner_size_1 = len(ner_to_id_1),
139 |                 ner_dim_1 = FLAGS.ner_dim_1,
140 |                 ner_size_2 = len(ner_to_id_2),
141 |                 ner_dim_2 = FLAGS.ner_dim_2,
142 |                 self_att_not = FLAGS.self_att_not,
143 |                 context_info = FLAGS.context_info,
144 |                 event_vector_trans = FLAGS.event_vector_trans
145 |             )
146 |             print('encoder-decoder model initialized!')
147 | 
148 |             loss_ed = model.loss
149 |             for tvarsi in tf.trainable_variables():
150 |                 if tvarsi.name != 'word_emb_mat:0':
151 |                     loss_ed += FLAGS.penalty_coef * tf.reduce_sum(tvarsi ** 2)
152 |                 else:
153 |                     print("\n\n{} is not penalied!\n\n".format(tvarsi))
154 | 
155 |             with tf.name_scope('accuracy'):
156 |                 label_pred_naive = model.label_pred
157 |                 label_pred = model.label_pred
158 |                 label_true = model.label_true
159 |                 acc_cnt_naive = tf.reduce_sum(tf.cast(tf.equal(label_pred_naive,label_true),dtype=tf.float32))
160 |                 acc_cnt = tf.reduce_sum(tf.cast(tf.equal(label_pred,label_true),dtype=tf.float32))
161 |                 cnt_all = tf.reduce_sum(tf.cast(tf.greater(label_true,-1),dtype=tf.float32))
162 |                 acc_rate = acc_cnt / cnt_all
163 | 
164 |                 valid_len_final = model.valid_len_list
165 | 
166 |             timestamp = str(int(time.time()))
167 |             out_dir = os.path.join('./runs', timestamp)
168 |             checkpoint_dir = os.path.join(out_dir, "checkpoints")
169 |             if not os.path.exists(checkpoint_dir):
170 |                 os.makedirs(checkpoint_dir)
171 | 
172 |             param_dict = FLAGS.flag_values_dict()
173 |             param_dict['lower_case'] = lower_case
174 | 
175 |             with open(os.path.join(checkpoint_dir,'config.json'), "w") as f:
176 |                 f.write(json.dumps(param_dict, indent=2, ensure_ascii=False))
177 | 
178 |             tvars = tf.trainable_variables()
179 |             for kk, tvarsi in enumerate(tvars):
180 |                 print('The %d-th tvars is %s' % (kk, tvarsi))
181 | 
182 |             global_step = tf.Variable(0, trainable=False)
183 | 
184 |             learning_rate = tf.train.exponential_decay(learning_rate=FLAGS.learning_rate,
185 |                                                        global_step=global_step,
186 |                                                        decay_steps=len(train_batches) // int(FLAGS.num_epochs),
187 |                                                        decay_rate=FLAGS.decay_rate)
188 |             tvars_no_emb = [x for x in tvars if 'word_emb_mat' not in x.name]
189 |             opt_ed_NO_emb_sent = tf.train.AdamOptimizer(learning_rate)
190 |             #
191 |             grads_trig_sent_NO_EMB, _ = tf.clip_by_global_norm(tf.gradients(loss_ed, tvars_no_emb), FLAGS.grad_clip)
192 |             grads_and_vars_trig_sent_NO_EMB = tuple(zip(grads_trig_sent_NO_EMB, tvars_no_emb))
193 |             train_ed_NO_emb = opt_ed_NO_emb_sent.apply_gradients(grads_and_vars_trig_sent_NO_EMB, global_step=global_step)
194 |             sess.run(tf.global_variables_initializer())
195 | 
196 |             def train_step(train_batch,epoch):
197 |                 positive_weights = FLAGS.positive_weights
198 | 
199 |                 feed_dict = {
200 |                     model.dropout_rate: FLAGS.dropout_rate,
201 |                     model.input_docs: np.array(train_batch[0]),
202 |                     model.ner_docs_1: np.array(train_batch[1]),
203 |                     model.ner_docs_2: np.array(train_batch[2]),
204 |                     model.input_label_docs: np.array(train_batch[3]),
205 |                     model.valid_batch: train_batch[4],
206 |                     model.valid_sent_len: np.array(train_batch[5]),
207 |                     model.valid_words_len: np.array(train_batch[6]),
208 |                     model.positive_weights: positive_weights
209 |                 }
210 | 
211 |                 _, loss_trigger_tmp, acc_rate_tmp, step_curr = sess.run([train_ed_NO_emb, loss_ed, acc_rate, global_step],
212 |                                                                         feed_dict)
213 |                 return loss_trigger_tmp,step_curr,acc_rate_tmp
214 | 
215 | 
216 |             def dev_test_step(dev_batches):
217 |                 def dev_ont_batch(dev_batch):
218 |                     feed_dict = {
219 |                         model.dropout_rate: 0,
220 |                         model.input_docs: np.array(dev_batch[0]),
221 |                         model.ner_docs_1: np.array(dev_batch[1]),
222 |                         model.ner_docs_2: np.array(dev_batch[2]),
223 |                         model.input_label_docs: np.array(dev_batch[3]),
224 |                         model.valid_batch: dev_batch[4],
225 |                         model.valid_sent_len: np.array(dev_batch[5]),
226 |                         model.valid_words_len: np.array(dev_batch[6]),
227 |                         model.positive_weights: 1.0
228 |                     }
229 |                     acc_cnt_tmp, cnt_all_tmp,acc_cnt_naive_tmp,valid_len_tmp,\
230 |                     label_pred_tmp, label_pred_naive_tmp, label_true_tmp, final_words_id_tmp, loss_tmp \
231 |                         = sess.run([acc_cnt,cnt_all,acc_cnt_naive,valid_len_final,label_pred, label_pred_naive,
232 |                                     label_true,model.final_words_id,loss_ed], feed_dict)
233 |                     return acc_cnt_tmp, cnt_all_tmp,acc_cnt_naive_tmp,valid_len_tmp,label_pred_tmp, \
234 |                            label_pred_naive_tmp, label_true_tmp,final_words_id_tmp, loss_tmp
235 | 
236 |                 acc_cnt_list, cnt_all_list = [], []
237 |                 acc_cnt_naive_list, cnt_all_naive_list = [], []
238 |                 label_pred_list, label_pred_naive_list, label_true_list = [],[],[]
239 |                 valid_len_list = []
240 |                 words_sents = []
241 |                 loss_dev_test = 0
242 |                 len_seq_all = 0
243 |                 for dev_batchi in dev_batches:
244 |                     acc_cnt_tmp, cnt_all_tmp,acc_cnt_naive_tmp,valid_len_tmp,\
245 |                     label_pred_tmp, label_pred_naive_tmp, label_true_tmp,final_words_id_tmp, loss_tmp_i\
246 |                         = dev_ont_batch(dev_batchi)
247 |                     acc_cnt_list.append(acc_cnt_tmp)
248 |                     cnt_all_list.append(cnt_all_tmp)
249 |                     acc_cnt_naive_list.append(acc_cnt_naive_tmp)
250 |                     label_pred_list.extend(label_pred_tmp)
251 |                     label_pred_naive_list.extend(label_pred_naive_tmp)
252 |                     label_true_list.extend(label_true_tmp)
253 |                     valid_len_list.extend(valid_len_tmp)
254 |                     words_sents.extend(final_words_id_tmp)
255 |                     loss_dev_test += loss_tmp_i * len(label_pred_naive_tmp)
256 |                     len_seq_all += len(label_pred_naive_tmp)
257 |                 loss_dev_test = loss_dev_test / (len_seq_all + 1e-8)
258 | 
259 |                 prec_dev = sum(acc_cnt_list) / sum(cnt_all_list)
260 |                 prec_dev_naive = sum(acc_cnt_naive_list) / sum(cnt_all_list)
261 |                 return prec_dev,prec_dev_naive,words_sents,label_pred_list,\
262 |                        label_true_list,valid_len_list,loss_dev_test
263 | 
264 |             print('Total train batch is:\t',len(train_batches),flush=True)
265 | 
266 |             prec_test_best = 0
267 |             loss_dev_best = 10000
268 |             loss_dev_second = 10000
269 |             loss_dev_list = []
270 |             nconsect = 0
271 |             print("total train steps:\t",  len(train_batches))
272 |             for i, train_batchi in enumerate(train_batches):
273 |                 epoch = i // FLAGS.eval_every_steps
274 |                 loss_trigger_tmp, step_curr, acc_rate_tmp = train_step(train_batchi,0)
275 |                 if i % 1e1 == 0:
276 |                     print('epoch {}, step: {},loss: {},acc_rate: {}'.format(
277 |                         epoch,step_curr,loss_trigger_tmp,acc_rate_tmp), flush=True)
278 | 
279 |                 if i % FLAGS.eval_every_steps == 0 or i == len(train_batches) - 1:
280 |                     prec_dev,prec_dev_naive,words_sents,label_pred_list,\
281 |                         label_true_list,valid_len_list,loss_dev_ = dev_test_step(dev_batches)
282 |                     print('epoch {} prec_dev is: \n'.format(epoch), prec_dev, flush=True)
283 |                     if epoch == 0:
284 |                         os.makedirs(os.path.join(checkpoint_dir, 'dev'))
285 |                     filename_dev = os.path.join(checkpoint_dir, 'dev/test_result_{}.txt').format(step_curr)
286 |                     write_2_file(filename_dev, ED_2_id, label_true_list,valid_len_list,
287 |                                  words_sents, label_pred_list, id_2_vocab)
288 |                     prec_event_dev, recall_event_dev, f1_event_dev = ace_pred_result_stat(filename_dev)
289 |                     print('epoch: {}, loss_dev_: {}'.format(epoch, loss_dev_), flush=True)
290 |                     print('epoch: {}, prec_event_dev: {}, recall_event_dev: {}, f1_event_dev: {}'.format(
291 |                         epoch, prec_event_dev, recall_event_dev, f1_event_dev), flush=True)
292 | 
293 |                     loss_dev_list.append(loss_dev_)
294 |                     loss_dev_list = sorted(loss_dev_list,key=lambda x: x, reverse=False)
295 |                     if len(loss_dev_list) > 2:
296 |                         loss_dev_second = loss_dev_list[2]
297 |                     if loss_dev_ > loss_dev_best:
298 |                         if loss_dev_ > loss_dev_second:
299 |                             nconsect += 1
300 |                         else:
301 |                             nconsect = 0
302 |                     else:
303 |                         nconsect = 0
304 |                         loss_dev_best = loss_dev_
305 | 
306 |                     print('\n')
307 |                     prec_test,prec_test_naive,words_sents,label_pred_list,\
308 |                     label_true_list,valid_len_list, loss_test_ = dev_test_step(test_batches)
309 |                     print('epoch {} prec_test is: \n'.format(epoch), prec_test, flush=True)
310 |                     print('\n')
311 |                     # write to file
312 |                     if epoch == 0:
313 |                         os.makedirs(os.path.join(checkpoint_dir, 'test'))
314 |                     filename_test = os.path.join(checkpoint_dir, 'test/test_result_{}.txt').format(step_curr)
315 |                     write_2_file(filename_test, ED_2_id, label_true_list,valid_len_list,
316 |                                  words_sents, label_pred_list, id_2_vocab)
317 |                     prec_event_test, recall_event_test, f1_event_test = ace_pred_result_stat(filename_test)
318 |                     print('epoch: {}, prec_event_test: {}, recall_event_test: {}, f1_event_test:{}'.format(
319 |                         epoch, prec_event_test, recall_event_test, f1_event_test), flush=True)
320 | 
321 |                     if prec_test_best < f1_event_test:
322 |                         prec_test_best = f1_event_test
323 | 
324 |                     print('The best dev loss value is:\t', [loss_dev_best,nconsect])
325 |                     # print('The best dev f1 value is:\t', [prec_dev_best,nconsect])
326 |                     print('The best test f1 value is:\t', prec_test_best)
327 |                     with open(os.path.join(checkpoint_dir, 'test_result.txt'), encoding='utf-8', mode='a') as f:
328 |                         f.write('\t'.join([str(epoch), str(prec_event_test),str(recall_event_test),
329 |                                            str(f1_event_test), str(loss_dev_best), str(loss_dev_second), str(nconsect)]) + '\n')
330 | 
331 |                     if nconsect >= FLAGS.nconsect_epoch:
332 |                         break
333 |     tf.reset_default_graph()
334 | 
335 | if __name__ == "__main__":
336 |     # train()
337 |     pass


--------------------------------------------------------------------------------
/utils_init.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | #-*- coding: utf-8 -*-
  3 | 
  4 | 
  5 | import random
  6 | import numpy as np
  7 | 
  8 | 
  9 | def load_vocab(filename):
 10 |     vocab = []
 11 |     with open(filename,encoding='utf-8',mode='r') as f:
 12 |         for line in f:
 13 |             vocab.append(line.strip())
 14 |     vocab_to_id = {u:i for i,u in enumerate(vocab)}
 15 |     return vocab,vocab_to_id
 16 | 
 17 | 
 18 | def load_pretrain(glove_file,word_emb_dim):
 19 |     embedding_matrix,vocab = [], []
 20 |     with open(glove_file,encoding='utf-8',mode='r') as f:
 21 |         for i,line in enumerate(f):
 22 |             if i % 1e5 == 0:
 23 |                 print('Current index is %d' %i)
 24 |             try:
 25 |                 line_split = line.strip().split()
 26 |                 if len(line_split) == word_emb_dim + 1:
 27 |                     # if line_split[0] in vocab_set:
 28 |                     vocab.append(line_split[0])
 29 |                     embedding_matrix.append([float(x) for x in line_split[1:]])
 30 |             except:
 31 |                 pass
 32 |     vocab_to_id = {u:i for i,u in enumerate(vocab)}
 33 |     id_to_vocab = {v:u for u,v in vocab_to_id.items()}
 34 |     return embedding_matrix,vocab,vocab_to_id,id_to_vocab
 35 | 
 36 | 
 37 | def load_ED_data(filename,lower_case=False):
 38 |     """
 39 |     loading ner data, sentence and its corresponding word-level ner label
 40 |     """
 41 |     sents_all = []
 42 |     ners_all = []
 43 |     ner_1 = []
 44 |     ner_2 = []
 45 |     sent_tmp = []
 46 |     ner_tmp = []
 47 |     ner_1_tmp = []
 48 |     ner_2_tmp = []
 49 |     ner_vocab = set()
 50 |     doc_file_to_sents = {}
 51 |     with open(filename,encoding='utf-8',mode='r') as f:
 52 |         w_last = ''
 53 |         for line in f:
 54 |             line = line.strip()
 55 |             line_split = line.split(' ')
 56 |             if len(line_split) == 5:
 57 |                 doc_file = line_split[1]
 58 |                 if lower_case:
 59 |                     line_split[0] = str(line_split).lower()
 60 |                 sent_tmp.append(line_split[0])
 61 | 
 62 |                 ner_tmp.append(line_split[-1])
 63 |                 ner_vocab.add(line_split[-1])
 64 |                 ner_1_tmp_tmp = line_split[2]
 65 |                 ner_1_tmp_tmp = ner_1_tmp_tmp
 66 |                 ner_1_tmp.append(ner_1_tmp_tmp)
 67 |                 ner_2_tmp_tmp = line_split[3]
 68 |                 ner_2_tmp_tmp = ner_2_tmp_tmp
 69 |                 ner_2_tmp.append(ner_2_tmp_tmp)
 70 |             else:
 71 |                 if len(sent_tmp):
 72 |                     sents_all.append(sent_tmp)
 73 |                     ners_all.append(ner_tmp)
 74 |                     ner_1.append(ner_1_tmp)
 75 |                     ner_2.append(ner_2_tmp)
 76 |                 sent_tmp = []
 77 |                 ner_tmp = []
 78 |                 ner_1_tmp = []
 79 |                 ner_2_tmp = []
 80 |                 if doc_file not in doc_file_to_sents:
 81 |                     doc_file_to_sents[doc_file] = [len(sents_all) - 1]
 82 |                 else:
 83 |                     doc_file_to_sents[doc_file] += [len(sents_all) - 1]
 84 |             w_last = line_split[0]
 85 |         if len(sent_tmp) > 0:
 86 |             sents_all.append(sent_tmp)
 87 |             ners_all.append(ner_tmp)
 88 |             ner_1.append(ner_1_tmp)
 89 |             ner_2.append(ner_2_tmp)
 90 |             if doc_file not in doc_file_to_sents:
 91 |                 doc_file_to_sents[doc_file] = [len(sents_all) - 1]
 92 |             else:
 93 |                 doc_file_to_sents[doc_file] += [len(sents_all) - 1]
 94 |     return sents_all,ners_all,ner_vocab,ner_1,ner_2,doc_file_to_sents
 95 | 
 96 | 
 97 | def data_transformation_doc(sents_list,ner_1_list,ner_2_list,ner_list,vocab_2_id,ner_2_id,word_unk_id,ner_to_id_1,ner_to_id_2):
 98 |     """
 99 |     transform the raw data into numerics
100 |     """
101 |     encode_res = []
102 |     for i,senti in enumerate(sents_list):
103 |         neri = ner_list[i]
104 |         ner_1_i = ner_1_list[i]
105 |         ner_2_i = ner_2_list[i]
106 |         ner_tmp = []
107 |         sent_tmp = []
108 |         ner_1_tmp = []
109 |         ner_2_tmp = []
110 |         for k, wordk in enumerate(senti):
111 |             nerk = neri[k]
112 |             try:
113 |                 sent_tmp.append(vocab_2_id[wordk])
114 |             except:
115 |                 sent_tmp.append(word_unk_id)
116 |             ner_tmp.append(ner_2_id[nerk])
117 |             ner_1_tmp.append(ner_to_id_1[ner_1_i[k]])
118 |             ner_2_tmp.append(ner_to_id_2[ner_2_i[k]])
119 |         encode_res.append([sent_tmp,ner_1_tmp,ner_2_tmp,ner_tmp])
120 |     return encode_res
121 | 
122 | 
123 | def batch_generation_doc(doc_to_sents,enc_list,batch_size,max_doc_len,max_seq_len,vocab_2_id,ner_2_id, num_epoches=1):
124 |     # padding and trimming
125 |     ner_pad = ner_2_id['O']
126 |     word_pad = vocab_2_id['<PAD>']
127 |     valid_len_list = []
128 |     for i,linei in enumerate(enc_list):
129 |         senti = linei[0]
130 |         ner_1_i = linei[1]
131 |         ner_2_i = linei[2]
132 |         neri = linei[3]
133 |         valid_len_list.append(min(len(senti),max_seq_len))
134 |         senti = senti[:max_seq_len]
135 |         senti = senti + [word_pad] * max(0,max_seq_len-len(senti))
136 |         neri = neri[:max_seq_len]
137 |         neri = neri + [ner_pad] * max(0, max_seq_len - len(neri))
138 |         ner_1_i = ner_1_i[:max_seq_len]
139 |         ner_1_i = ner_1_i + [0] * max(0, max_seq_len - len(ner_1_i))
140 |         ner_2_i = ner_2_i[:max_seq_len]
141 |         ner_2_i = ner_2_i + [0] * max(0, max_seq_len - len(ner_2_i))
142 |         enc_list[i] = [senti,ner_1_i,ner_2_i,neri]
143 | 
144 |     docs_all = []
145 |     for kk,dockk in enumerate(list(doc_to_sents.keys())):
146 |         sent_ids = doc_to_sents[dockk]
147 |         if len(sent_ids) <= max_doc_len:
148 |             sent_all = []
149 |             ner_1_all = []
150 |             ner_2_all = []
151 |             ner_all = []
152 |             valid_sents = len(sent_ids)
153 |             valid_words = []
154 |             for idi in sent_ids:
155 |                 sent_all.append(enc_list[idi][0])
156 |                 ner_1_all.append(enc_list[idi][1])
157 |                 ner_2_all.append(enc_list[idi][2])
158 |                 ner_all.append(enc_list[idi][3])
159 |                 valid_words.append(valid_len_list[idi])
160 |             for kk in range(max_doc_len - valid_sents):
161 |                 sent_all.append(enc_list[idi][0])
162 |                 ner_1_all.append(enc_list[idi][1])
163 |                 ner_2_all.append(enc_list[idi][2])
164 |                 ner_all.append(enc_list[idi][3])
165 |                 valid_words.append(valid_len_list[idi])
166 |             docs_all.append([sent_all,ner_1_all,ner_2_all,ner_all,valid_sents,valid_words])
167 |         else:
168 |             len_all = len(sent_ids)
169 |             ndocs_mini = int(np.ceil(len_all / max_doc_len))
170 |             for kk in range(ndocs_mini):
171 |                 init_step = kk * max_doc_len
172 |                 end_step = kk * max_doc_len + max_doc_len
173 |                 ids_tmp = sent_ids[init_step:end_step]
174 |                 sent_all = []
175 |                 ner_1_all = []
176 |                 ner_2_all = []
177 |                 ner_all = []
178 |                 valid_sents = len(ids_tmp)
179 |                 valid_words = []
180 |                 for idi in ids_tmp:
181 |                     sent_all.append(enc_list[idi][0])
182 |                     ner_1_all.append(enc_list[idi][1])
183 |                     ner_2_all.append(enc_list[idi][2])
184 |                     ner_all.append(enc_list[idi][3])
185 |                     valid_words.append(valid_len_list[idi])
186 |                 for kk in range(max_doc_len - valid_sents):
187 |                     sent_all.append(enc_list[idi][0])
188 |                     ner_1_all.append(enc_list[idi][1])
189 |                     ner_2_all.append(enc_list[idi][2])
190 |                     ner_all.append(enc_list[idi][3])
191 |                     valid_words.append(valid_len_list[idi])
192 |                 docs_all.append([sent_all, ner_1_all, ner_2_all, ner_all, valid_sents, valid_words])
193 |     random.shuffle(docs_all)
194 | 
195 |     batches_all = []
196 |     sent_alls = []
197 |     ner_1_alls = []
198 |     ner_2_alls = []
199 |     ner_alls = []
200 |     valid_sentss = []
201 |     valid_wordss = []
202 | 
203 |     docs_all = docs_all * num_epoches
204 | 
205 |     for k,dock in enumerate(docs_all):
206 |         if k % batch_size == 0 and k > 0:
207 |             batches_all.append([sent_alls,ner_1_alls,ner_2_alls,ner_alls,batch_size,valid_sentss,valid_wordss])
208 |             sent_alls = []
209 |             ner_1_alls = []
210 |             ner_2_alls = []
211 |             ner_alls = []
212 |             valid_sentss = []
213 |             valid_wordss = []
214 |             sent_alls.append(dock[0])
215 |             ner_1_alls.append(dock[1])
216 |             ner_2_alls.append(dock[2])
217 |             ner_alls.append(dock[3])
218 |             valid_sentss.append(dock[4])
219 |             valid_wordss.append(dock[5])
220 |         else:
221 |             sent_alls.append(dock[0])
222 |             ner_1_alls.append(dock[1])
223 |             ner_2_alls.append(dock[2])
224 |             ner_alls.append(dock[3])
225 |             valid_sentss.append(dock[4])
226 |             valid_wordss.append(dock[5])
227 |     # paste the final
228 |     len_valid = len(sent_alls)
229 |     if len_valid == batch_size:
230 |         batches_all.append([sent_alls, ner_1_alls, ner_2_alls, ner_alls, len_valid, valid_sentss, valid_wordss])
231 |     else:
232 |         sent_alls += [sent_alls[-1]] * (batch_size - len_valid)
233 |         ner_1_alls += [ner_1_alls[-1]] * (batch_size - len_valid)
234 |         ner_2_alls += [ner_2_alls[-1]] * (batch_size - len_valid)
235 |         ner_alls += [ner_alls[-1]] * (batch_size - len_valid)
236 |         valid_sentss += [valid_sentss[-1]] * (batch_size - len_valid)
237 |         valid_wordss += [valid_wordss[-1]] * (batch_size - len_valid)
238 |         batches_all.append([sent_alls, ner_1_alls, ner_2_alls, ner_alls, len_valid, valid_sentss, valid_wordss])
239 |     return batches_all
240 | 
241 | 
242 | 
243 | 
244 | if __name__ == "__main__":
245 |     pass
246 | 
247 | 


--------------------------------------------------------------------------------