├── .gitignore ├── ALL.mfs.txt ├── README.md ├── Scorer.class ├── backoff_mfs.py ├── bert_input_handler.py ├── instances_reader.py ├── model.py ├── runall_s1.bash ├── semeval2007.mfs.txt ├── semeval2013.mfs.txt ├── semeval2015.mfs.txt ├── senseval2.mfs.txt ├── senseval3.mfs.txt ├── test.py ├── test_postproc.sh ├── train.py └── utils.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Word Sense Disambiguation (WSD) Using Contextualized Word Representations 2 | This repository contains the source code for the paper 3 | 4 | ``` 5 | @inproceedings{hadiwinoto-et-al-2019-improved, 6 | title = "Improved word sense disambiguation using pre-trained contextualized word representations (to appear)", 7 | author = "Hadiwinoto, Christian and 8 | Ng, Hwee Tou and 9 | Gan, Wee Chung", 10 | booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and 9th International Joint Conference on Natural Language Processing", 11 | month = "nov", 12 | year = "2019", 13 | } 14 | ``` 15 | -------------------------------------------------------------------------------- /Scorer.class: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nusnlp/contextemb-wsd/467ef910848208363bea2defa8ee274bd2533cec/Scorer.class -------------------------------------------------------------------------------- /backoff_mfs.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # File: backoff_mfs.py 4 | # @author: chrhad 5 | # Given neural-tagged WSD output and MFS output, back-off to MFS output if the former is unknown 6 | import argparse 7 | import io 8 | import sys 9 | import os 10 | 11 | from instances_reader import open_file 12 | 13 | if __name__ == '__main__': 14 | argparser = argparse.ArgumentParser( 15 | "Run most-frequent sense backoff if the prediction is unknown") 16 | argparser.add_argument('pred_path', help="Output prediction by neural system") 17 | argparser.add_argument('mfs_path', help="Output prediction by MFS system") 18 | args = argparser.parse_args() 19 | 20 | mfs_senses = {} 21 | with open_file(args.mfs_path, 'r') as f: 22 | for line in f: 23 | l = line.strip() 24 | toks = l.split() 25 | mfs_senses[toks[0]] = toks[1] 26 | f.close() 27 | 28 | with open_file(args.pred_path, 'r') as f: 29 | for line in f: 30 | l = line.strip() 31 | toks = l.split() 32 | iid = toks[0] 33 | sense = mfs_senses.get(iid, 'U') if toks[1] == 'U' else toks[1] 34 | print("{0} {1}".format(iid, sense)) 35 | f.close() 36 | -------------------------------------------------------------------------------- /bert_input_handler.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # File: bert_input_handler.py 4 | # @author: chrhad 5 | # Input handler to tokenize and indexify text 6 | import sys 7 | import torch 8 | from pytorch_pretrained_bert import BertTokenizer 9 | 10 | PAD_TOK = "[PAD]" 11 | MAXLEN = 512 12 | 13 | def split_tuple_list(tupseq): 14 | return map(lambda i:list(map(lambda x:x[i], tupseq)), range(3)) 15 | 16 | class BertInputHandler: 17 | def __init__(self, pretrained_model_name): 18 | do_lower_case = pretrained_model_name.endswith("-uncased") 19 | self.tokenizer = BertTokenizer.from_pretrained(pretrained_model_name, 20 | do_lower_case=do_lower_case) 21 | 22 | """ 23 | Tokenize left, head, and right and convert to vocab index in the BERT vocab 24 | """ 25 | def tokenize_indexify(self, triplet, bos=None, maxlen=MAXLEN): # beginning of sentence, e.g. [CLS] 26 | if maxlen <= 0: 27 | maxlen = MAXLEN 28 | # sentence is a list of list of tuples 29 | left, sentence, right = triplet 30 | left_tups = list(self.tokenize_tuples(left)) 31 | sentence_tups = list(self.tokenize_tuples(sentence)) 32 | right_tups = list(self.tokenize_tuples(right)) 33 | while len(left_tups) + len(sentence_tups) + len(right_tups) >= maxlen and len(left_tups) + len(right_tups) > 0: 34 | if len(left_tups) > 0: 35 | left_tups.pop(0) 36 | if len(right_tups) > 0: 37 | right_tups.pop(-1) 38 | if bos is not None and len(bos) > 0: 39 | left_tups = [(bos, '#', '#')] + left_tups 40 | tokens, all_lexels, all_iids = tuple(split_tuple_list(left_tups + sentence_tups + right_tups)) 41 | tokens_ids = torch.tensor(self.tokenizer.convert_tokens_to_ids(tokens), 42 | dtype=torch.long) 43 | lexels = [lex for lex in all_lexels if (lex != '*' and lex != '#')] 44 | iids = [iid for iid in all_iids if (iid != '*' and iid != '#')] 45 | head_offsets = torch.tensor([i for (i, x) in enumerate(all_iids) if (x != '*' and x!= '#')], dtype=torch.long) 46 | return (tokens_ids, lexels, iids, head_offsets, all_lexels) 47 | 48 | def tokenize_tuples(self, sentence): 49 | for tup in sentence: 50 | subtoks = self.tokenizer.tokenize(tup[0]) 51 | yield (subtoks[0], tup[1], tup[2]) 52 | for t in subtoks[1:]: 53 | yield (t, '*', '*') 54 | 55 | def pad_idx(self): 56 | return self.tokenizer.convert_tokens_to_ids([PAD_TOK])[0] 57 | -------------------------------------------------------------------------------- /instances_reader.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- encoding: utf-8 -*- 3 | # @author: chrhad 4 | # File: instances_reader.py 5 | # Interface to read files and load to annotations 6 | import gzip 7 | import io 8 | import glob 9 | import os 10 | import sys 11 | import re 12 | import nltk 13 | from nltk.tokenize import TreebankWordTokenizer 14 | from unicodedata import category 15 | import xml.etree.ElementTree as ET 16 | from lxml import etree 17 | from utils import open_file 18 | 19 | class AbstractLoader: 20 | def __init__(self, fpath, n_surrounding=1): 21 | """ 22 | Parameters 23 | ---------- 24 | fpath : str 25 | The file path containing the XML file 26 | n_surrounding : int 27 | The number of surrounding sentences to capture context 28 | """ 29 | self.sent_instances = [] 30 | 31 | def __iter__(self): 32 | self._n = 0 33 | return self 34 | 35 | def __next__(self): 36 | if self._n < len(self.sent_instances): 37 | result = self.sent_instances[self._n] 38 | self._n += 1 39 | return result 40 | else: 41 | raise StopIteration 42 | 43 | @classmethod 44 | def record_keys(cls, fpath): 45 | raise NotImplementedError 46 | 47 | @classmethod 48 | def write_output(cls, iid, sense): 49 | raise NotImplementedError 50 | 51 | 52 | class SemEval13Loader(AbstractLoader): 53 | def __init__(self, fpath, n_surrounding=1): 54 | """ 55 | Parameters 56 | ---------- 57 | fpath : str 58 | The file path containing the XML file 59 | n_surrounding : int 60 | The number of surrounding sentences to capture context 61 | """ 62 | super(SemEval13Loader, self).__init__(fpath, n_surrounding) 63 | tree = ET.parse(fpath) 64 | root = tree.getroot() 65 | assert root.tag.lower() == "corpus", "Root element must be named 'root'" 66 | for text in root: 67 | text_buffer = [] # all sentences in a text 68 | for sent in text: 69 | sent_buffer = [] 70 | for tok in sent: 71 | entries = [] 72 | if tok.tag == "instance": 73 | lexel = tok.get("lemma") + '.' + tok.get("pos") 74 | iid = tok.get("id") 75 | tokspl = tok.text.split(' ') 76 | entries.append((tokspl[0], lexel, iid)) 77 | for t in tokspl[1:]: 78 | entries.append((t, '*', '*')) 79 | elif tok.tag == "wf": 80 | entries.append((tok.text, '#', '#')) 81 | sent_buffer += entries 82 | text_buffer.append(sent_buffer) 83 | 84 | # create instances 85 | for i in range(len(text_buffer)): 86 | left_instance = [] 87 | for j in range(max(i-n_surrounding, 0), i, 1): 88 | left_instance += [(t[0], '#', '#') for t in text_buffer[j]] + [("[SEP]", '#', '#')] 89 | right_instance = [] 90 | for j in range(i+1, min(i+n_surrounding+1, len(text_buffer)), 1): 91 | right_instance += [("[SEP]", '#', '#')] + [(t[0], '#', '#') for t in text_buffer[j]] 92 | self.sent_instances.append([left_instance, text_buffer[i], right_instance]) 93 | 94 | @classmethod 95 | def record_keys(cls, fpath): 96 | keys = {} 97 | with open_file(fpath, 'r') as f: 98 | for line in f: 99 | l = line.strip() 100 | toks = l.split(' ') 101 | key_str = toks[1] 102 | keys[toks[0]] = key_str 103 | f.close() 104 | return keys 105 | 106 | @classmethod 107 | def write_output(cls, iid, sense): 108 | return "{0} {1}".format(iid, sense) 109 | 110 | 111 | class Senseval2LSLoader(AbstractLoader): 112 | def __init__(self, fpath, n_surrounding=1): 113 | """ 114 | Parameters 115 | ---------- 116 | fpath : str 117 | The file path containing the XML file 118 | n_surrounding : int 119 | The number of surrounding sentences to capture context 120 | """ 121 | super(Senseval2LSLoader, self).__init__(fpath, n_surrounding) 122 | parser = etree.XMLParser(dtd_validation=True) 123 | tree = ET.parse(fpath, parser) 124 | root = tree.getroot() 125 | assert root.tag.lower() == "corpus", "Root element must be named 'root'" 126 | self.tokenizer = TreebankWordTokenizer() 127 | self.sent_segmenter = nltk.data.load('tokenizers/punkt/english.pickle') 128 | for lexelt in root: 129 | lexel_orig = lexelt.get("item") 130 | print(lexel_orig, file=sys.stderr, flush=True) 131 | for instance in lexelt: 132 | # record instance ID 133 | iid = instance.get("id") 134 | context = instance.find('context') 135 | sentences = [] 136 | leftstr = context.text # everything before the head 137 | headelem = context[0] # head element 138 | headstr = headelem.text 139 | 140 | # left part of sentence: left context + head 141 | leftstr += headstr 142 | sentences += [self.remove_control_characters(s) for s in self.sent_segmenter.tokenize(leftstr.replace('\n', ' '))] 143 | sent_offset = len(sentences) - 1 144 | tok_char_end_offset = len(sentences[-1]) # offset after the last char of head 145 | tok_char_offset = tok_char_end_offset - len(headstr) 146 | 147 | sats_attr = headelem.get("sats") 148 | rightstr = '' 149 | if headelem.tail is not None: 150 | rightstr += headelem.tail 151 | lexel_sat = None 152 | if sats_attr is not None: 153 | lexel_sat = sats_attr.split(' ')[0].split('.')[0] + '.' + lexel.split('.')[-1] 154 | for i in range(1, len(context)): 155 | rightstr += context[i].text 156 | rightstr += context[i].tail 157 | righttoks = [self.remove_control_characters(s) for s in self.sent_segmenter.tokenize(rightstr.replace('\n', ' '))] 158 | if len(righttoks) > 0: 159 | sentences[-1] += righttoks[0] 160 | sentences += righttoks[1:] 161 | 162 | # tokenize sentences 163 | tok_sentences, char2toks = zip(*[self.tokenize(sen) for sen in sentences]) 164 | 165 | # create instances 166 | left_instance = [] 167 | left_begin = max(0, sent_offset - n_surrounding) if n_surrounding >= 0 else 0 168 | for i in range(left_begin, sent_offset): 169 | left_instance += [(t, '#', '#') for t in tok_sentences[i]] + [("[SEP]", '#', '#')] 170 | 171 | head_instance = [] 172 | tok_offset = char2toks[sent_offset][tok_char_offset] 173 | tok_end_offset = char2toks[sent_offset][tok_char_end_offset-1] 174 | 175 | head_instance += [(t, '#', '#') for t in tok_sentences[sent_offset][:tok_offset]] 176 | lexel = lexel_sat if lexel_sat is not None else lexel_orig 177 | head_instance.append((tok_sentences[sent_offset][tok_offset], lexel, iid)) 178 | head_instance += [(t, '*', '*') for t in tok_sentences[sent_offset][tok_offset+1:tok_end_offset+1]] 179 | head_instance += [(t, '#', '#') for t in tok_sentences[sent_offset][tok_end_offset+1:]] 180 | 181 | right_instance = [] 182 | right_end = min(sent_offset + n_surrounding + 1, len(sentences)) 183 | for i in range(sent_offset + 1, right_end): 184 | right_instance += [("[SEP]", '#', '#')] + [(t, '#', '#') for t in tok_sentences[i]] 185 | 186 | self.sent_instances.append([left_instance, head_instance, right_instance]) 187 | 188 | @classmethod 189 | def record_keys(cls, fpath): 190 | keys = {} 191 | with open_file(fpath, 'r') as f: 192 | for line in f: 193 | l = line.strip() 194 | toks = l.split(' ') 195 | iid = toks[1] 196 | key_strs = [t for t in toks[2:] if (t != 'P' and t != 'U')] 197 | key_str = 'U' if len(key_strs) == 0 else key_strs[0] 198 | keys[iid] = key_str 199 | f.close() 200 | return keys 201 | 202 | @classmethod 203 | def write_output(cls, iid, sense): 204 | return "{0} {1} {2}".format(iid.split('.')[0], iid, sense) 205 | 206 | def remove_control_characters(self, s): 207 | return ''.join(ch for ch in s if category(ch)[0]!='C' or category(ch) == 'Cf') 208 | 209 | def tokenize(self, sentence): 210 | tokspans = self.tokenizer.span_tokenize(sentence) 211 | char2tok = {} 212 | tokens = [] 213 | for i, (s, e) in enumerate(tokspans): 214 | tokens.append(sentence[s:e]) 215 | for j in range(s, e): 216 | char2tok[j] = i 217 | return (tokens, char2tok) 218 | 219 | class Senseval3LSLoader(Senseval2LSLoader): 220 | def __init__(self, fpath, n_surrounding=1): 221 | """ 222 | Parameters 223 | ---------- 224 | fpath : str 225 | The file path containing the XML file 226 | n_surrounding : int 227 | The number of surrounding sentences to capture context 228 | """ 229 | super(Senseval3LSLoader, self).__init__(fpath, n_surrounding) 230 | 231 | @classmethod 232 | def write_output(cls, iid, sense): 233 | return "{0} {1} {2}".format('.'.join(iid.split('.')[0:2]), iid, sense) 234 | 235 | class SemEval13InductionLoader(AbstractLoader): 236 | def __init__(self, fpath, n_surrounding=1): 237 | """ 238 | Parameters 239 | ---------- 240 | fpath : str 241 | The file path containing the XML file 242 | n_surrounding : int 243 | The number of surrounding sentences to capture context 244 | """ 245 | super(SemEval13InductionLoader, self).__init__(fpath, n_surrounding) 246 | self.tokenizer = TreebankWordTokenizer() 247 | self.sent_segmenter = nltk.data.load('tokenizers/punkt/english.pickle') 248 | fnames = sorted(glob.glob(fpath + "/*.xml")) 249 | for fname in fnames: 250 | tree = ET.parse(fname) 251 | root = tree.getroot() 252 | assert root.tag.lower() == "instances", "Root element must be named 'instances'" 253 | for instance in root: 254 | lexel = instance.get("lemma") + '.' + instance.get("partOfSpeech") 255 | iid = instance.get("id") 256 | start_offset = int(instance.get("tokenStart")) 257 | end_offset = int(instance.get("tokenEnd")) 258 | sentence = instance.text 259 | 260 | # tokenize sentences 261 | tok_sentence, char2tok = self.tokenize(sentence) 262 | 263 | head_instance = [] 264 | tok_offset = char2tok[start_offset] 265 | tok_end_offset = char2tok[end_offset-1] 266 | 267 | head_instance += [(t, '#', '#') for t in tok_sentence[:tok_offset]] 268 | head_instance.append((tok_sentence[tok_offset], lexel, iid)) 269 | head_instance += [(t, '*', '*') for t in tok_sentence[tok_offset+1:tok_end_offset+1]] 270 | head_instance += [(t, '#', '#') for t in tok_sentence[tok_end_offset+1:]] 271 | 272 | self.sent_instances.append([[], head_instance, []]) 273 | 274 | @classmethod 275 | def record_keys(cls, fpath): 276 | keys = {} 277 | with open_file(fpath, 'r') as f: 278 | for line in f: 279 | l = line.strip() 280 | toks = l.split(' ') 281 | key_str = toks[1] 282 | keys[toks[0]] = key_str 283 | f.close() 284 | return keys 285 | 286 | @classmethod 287 | def write_output(cls, iid, sense): 288 | return "{0} {1}".format(iid, sense) 289 | 290 | def remove_control_characters(self, s): 291 | return ''.join(ch for ch in s if category(ch)[0]!='C' or category(ch) == 'Cf') 292 | 293 | def tokenize(self, sentence): 294 | tokspans = self.tokenizer.span_tokenize(sentence) 295 | char2tok = {} 296 | tokens = [] 297 | for i, (s, e) in enumerate(tokspans): 298 | tokens.append(sentence[s:e]) 299 | for j in range(s, e): 300 | char2tok[j] = i 301 | return (tokens, char2tok) 302 | 303 | 304 | class InputLoaderFactory: 305 | def __init__(self, xml_format): 306 | self._xml_format = xml_format 307 | 308 | def load(self, train_path, num_context, key_path=None): 309 | InputLoader = self._get_input_loader(self._xml_format) 310 | inputs = InputLoader(train_path, num_context) 311 | keys = InputLoader.record_keys(key_path) if key_path is not None else None 312 | return (inputs, keys) 313 | 314 | def _get_input_loader(self, xml_format): 315 | if xml_format == 'semeval13': 316 | return SemEval13Loader 317 | elif xml_format == 'senseval2ls': 318 | return Senseval2LSLoader 319 | elif xml_format == 'senseval3ls': 320 | return Senseval3LSLoader 321 | elif xml_format == 'semeval13induction': 322 | return SemEval13InductionLoader 323 | else: 324 | raise ValueError(format) 325 | 326 | 327 | if __name__ == '__main__': 328 | instances = SemEval13InductionLoader(sys.argv[1], 2) 329 | for inst in instances: 330 | print(inst) 331 | -------------------------------------------------------------------------------- /model.py: -------------------------------------------------------------------------------- 1 | # /usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # File: model.py 4 | # @author: chrhad 5 | # BERT classifier model for various lexelts 6 | import copy 7 | import math 8 | import sys 9 | import torch 10 | import torch.nn as nn 11 | import torch.nn.functional as F 12 | from apex.normalization.fused_layer_norm import FusedLayerNorm as LayerNorm 13 | from pytorch_pretrained_bert import BertConfig, BertModel 14 | 15 | def pad(seqs, pad_ix=0): 16 | maxlen = max([len(seq) for seq in seqs]) 17 | ret = torch.zeros(len(seqs), maxlen, dtype=torch.long) 18 | if pad_ix != 0: 19 | ret.fill_(pad_ix) 20 | for i, seq in enumerate(seqs): 21 | ret[i][:len(seq)] = seq 22 | return ret 23 | 24 | # Multi-headed self-attention 25 | class MultiHeadedAttention(nn.Module): 26 | def __init__(self, hidden_dim, num_heads, transform_value=True, dropout=0.): 27 | super(MultiHeadedAttention, self).__init__() 28 | if hidden_dim % num_heads != 0: 29 | raise ValueError( 30 | "The hidden size ({0:d}) is not a multiple of the number of attention \ 31 | heads ({1:d})".format(hidden_dim, num_heads)) 32 | self.num_heads = num_heads 33 | self.attn_head_dim = hidden_dim // num_heads 34 | self.all_head_dim = self.num_heads * self.attn_head_dim 35 | 36 | self.query = nn.Linear(hidden_dim, self.all_head_dim) 37 | self.key = nn.Linear(hidden_dim, self.all_head_dim) 38 | self.value = nn.Linear(hidden_dim, self.all_head_dim) if transform_value \ 39 | else None 40 | 41 | self.dropout = nn.Dropout(dropout) 42 | 43 | def transpose_for_scores(self, x): 44 | new_x_shape = x.size()[:-1] + (self.num_heads, self.attn_head_dim) 45 | x = x.view(*new_x_shape) 46 | return x.permute(0, 2, 1, 3) 47 | 48 | def forward(self, query_in, key_in, target, attention_mask): 49 | mx_query_layer = self.query(query_in) 50 | mx_key_layer = self.key(key_in) 51 | mx_value_layer = self.value(target) if self.value is not None else target 52 | 53 | query_layer = self.transpose_for_scores(mx_query_layer) 54 | key_layer = self.transpose_for_scores(mx_key_layer) 55 | value_layer = self.transpose_for_scores(mx_value_layer) 56 | 57 | # Dot product between query and key 58 | attn_scores = torch.matmul(query_layer, key_layer.transpose(-1, -2)) 59 | attn_scores = attn_scores / math.sqrt(self.attn_head_dim) 60 | ext_attn_mask = attention_mask.unsqueeze(1).unsqueeze(2) 61 | attn_scores = attn_scores + (1. - ext_attn_mask) * -10000. 62 | attn_probs = F.softmax(attn_scores, dim=-1) 63 | 64 | attn_probs = self.dropout(attn_probs) 65 | 66 | context_layer = torch.matmul(attn_probs, value_layer) 67 | context_layer = context_layer.permute(0, 2, 1, 3).contiguous() 68 | new_context_layer_dim = context_layer.size()[:-2] + (self.all_head_dim, ) 69 | context_layer = context_layer.view(*new_context_layer_dim) 70 | return context_layer 71 | 72 | class AddNorm(nn.Module): # add input and output 73 | def __init__(self, in_dim, hidden_dim, dropout=0.): 74 | super(AddNorm, self).__init__() 75 | self.dense = nn.Linear(in_dim, hidden_dim) # W^O (Vaswani et al, 2017) 76 | self.layer_norm = LayerNorm(hidden_dim, eps=1e-12) 77 | self.dropout = nn.Dropout(dropout) 78 | 79 | def forward(self, hidden_states, in_tensor): 80 | hidden_states = self.dense(hidden_states) 81 | hidden_states = self.dropout(hidden_states) 82 | hidden_states = self.layer_norm(hidden_states + in_tensor) 83 | return hidden_states 84 | 85 | def gelu(x): 86 | """Implementation of the Gaussian linear unit activation function. 87 | """ 88 | return x * 0.5 * (1.0 + torch.erf(x / math.sqrt(2.0))) 89 | 90 | ACT2FN = {'gelu': gelu, "relu": F.relu} 91 | 92 | class PositionwiseFeedForward(nn.Module): 93 | def __init__(self, hidden_dim, intermediate_dim, hidden_act='gelu'): 94 | super(PositionwiseFeedForward, self).__init__() 95 | self.dense = nn.Linear(hidden_dim, intermediate_dim) 96 | self.act_fn = ACT2FN[hidden_act] 97 | 98 | def forward(self, hidden_states): 99 | hidden_states = self.dense(hidden_states) 100 | hidden_states = self.act_fn(hidden_states) 101 | return hidden_states 102 | 103 | class EncoderLayer(nn.Module): 104 | def __init__(self, hidden_dim, num_heads, intermediate_dim, hidden_act='gelu', 105 | dropout=0.): 106 | super(EncoderLayer, self).__init__() 107 | self.attn = MultiHeadedAttention(hidden_dim, num_heads, dropout) 108 | self.attn_output = AddNorm(hidden_dim, hidden_dim, dropout) 109 | self.intermediate = PositionwiseFeedForward(hidden_dim, intermediate_dim, 110 | hidden_act) 111 | self.output = AddNorm(intermediate_dim, hidden_dim, dropout) 112 | 113 | def forward(self, in_tensor, attention_mask): 114 | attn_out = self.attn(in_tensor, attention_mask) 115 | attn_out = self.attn_output(attn_out, in_tensor) 116 | inter_out = self.intermediate(attn_out) 117 | output = self.output(inter_out, attn_out) 118 | return output 119 | 120 | # Multi-layer perceptron (MLP) with dropout 121 | class MLP(nn.Module): 122 | def __init__(self, input_dim, output_dim, hidden_dims=None, dropout=0.): 123 | super(MLP, self).__init__() 124 | affines = [] 125 | prev = input_dim 126 | if hidden_dims is not None: # create hidden layers of affine transformation 127 | for dim in hidden_dims: 128 | affines.append(nn.Linear(prev, dim)) 129 | nn.init.xavier_normal_(affines[-1].weight) 130 | prev = dim 131 | affines.append(nn.Linear(prev, output_dim)) # the last, might be the only one 132 | self.num_layers = len(affines) 133 | self.affine_seq = nn.ModuleList(affines) 134 | self.dropout = nn.Dropout(dropout) 135 | 136 | def forward(self, x): 137 | for i, tr in enumerate(self.affine_seq): 138 | x = tr(x) 139 | if i < self.num_layers - 1: 140 | x = F.tanh(x) 141 | x = self.dropout(x) # dropout only applied to intermediate layers 142 | return x 143 | 144 | 145 | class BertSenseClassifier(nn.Module): 146 | def __init__(self, model_name, lexelt_sense_num, sense_lex_filter, mlp_dropout=0., 147 | attn_dropout=0., pad_ix=0, unk_ix=0, layer=-1, use_glu=False, residual_glu=False, 148 | act_fn='gelu', top_attn_head=1, sent_attn_query=False, freeze_bert=False): 149 | super(BertSenseClassifier, self).__init__() 150 | 151 | # BERT parameters 152 | self.bert_model = BertModel.from_pretrained(model_name) 153 | self.hidden_size = self.bert_model.config.hidden_size 154 | self.maxlen = self.bert_model.config.max_position_embeddings 155 | for p in self.bert_model.parameters(): 156 | p.requires_grad = False 157 | 158 | self.pad_ix = pad_ix 159 | self.unk_ix = unk_ix 160 | # layer-wise attention to weight different layer outputs 161 | self.layer = layer 162 | self.layer_attn = MultiHeadedAttention(self.hidden_size, top_attn_head, 163 | transform_value=False, dropout=attn_dropout) if layer < 0 else None 164 | if self.layer_attn is not None: 165 | self.layer_attn.apply(self.init_weights) 166 | self.uquery = nn.Parameter(torch.empty(1, 1, self.hidden_size, dtype=torch.float)) \ 167 | if (layer < 0 and not sent_attn_query) else None 168 | if self.uquery is not None: 169 | self.uquery.data.normal_(mean=0.0, std=self.bert_model.config.initializer_range) 170 | 171 | # Sense classifier 172 | self.use_glu = use_glu 173 | self.residual_glu = residual_glu 174 | self.glu_gate = nn.Linear(self.hidden_size, 2 * self.hidden_size) if use_glu else None 175 | self.dropout = nn.Dropout(mlp_dropout) 176 | self.mlp_in_size = self.hidden_size 177 | self.mlp = MLP(self.mlp_in_size, lexelt_sense_num, dropout=mlp_dropout) 178 | 179 | # sense_lex_filter 180 | self.sense_lex_filter = nn.Parameter(torch.tensor( 181 | [sense_lex_filter], dtype=torch.long), requires_grad=False) 182 | 183 | def init_weights(self, module): 184 | if isinstance(module, nn.Linear): 185 | module.weight.data.normal_(mean=0.0, std=self.bert_model.config.initializer_range) 186 | elif isinstance(module, LayerNorm): 187 | module.bias.data.zero_() 188 | module.weight.data.fill_(1.0) 189 | if isinstance(module, nn.Linear) and module.bias is not None: 190 | module.bias.data.zero_() 191 | 192 | def forward(self, sentences, offsets, lexelts, is_log=True): 193 | encoded = self._bert_encode(sentences) # encoded: batch_size x maxlen x hidden_size 194 | #attention_mask = sentences.ne(self.pad_ix) 195 | maxlen = encoded.size(1) 196 | offset_cat = self._flatten_offsets(offsets, maxlen) 197 | lexelt_cat = torch.cat(lexelts) 198 | encoded_btflat = encoded.view(-1, encoded.size(2), self.hidden_size) # batch_size * maxlen x num_layers x hidden_size 199 | slices = encoded_btflat.index_select(0, offset_cat) 200 | 201 | if self.layer_attn is not None: # compute layerwise attention 202 | sent_offset_cat = offset_cat // maxlen * maxlen # sent_offset_cat: batch_size x maxlen x num_layers x hidden_size 203 | query_exp = self.uquery.expand(slices.size(0), -1, -1) if self.uquery is not None \ 204 | else encoded_btflat.index_select(0, sent_offset_cat)[:,-1:,:] 205 | layer_attn_mask = torch.ones(slices.size(0), slices.size(1)) 206 | layer_attn_mask = layer_attn_mask.to(dtype=query_exp.dtype, device=query_exp.device) 207 | slices = self.layer_attn(query_exp, slices, slices, layer_attn_mask).squeeze(1) 208 | else: 209 | slices = slices[:,self.layer,:] 210 | 211 | if self.use_glu: 212 | glu_in = self.dropout(slices) 213 | glu_in = self.glu_gate(glu_in) 214 | if self.residual_glu: 215 | glu_in[:,:slices.size(1)] = (glu_in[:,:slices.size(1)] + slices) * math.sqrt(0.5) 216 | glu_out = F.glu(glu_in, dim=-1) 217 | slices = glu_out 218 | 219 | logits = self.mlp(slices) 220 | logits = logits + F.relu(1. - self._create_mask(lexelt_cat)) * -10000. 221 | if is_log: 222 | return F.log_softmax(logits, dim=-1) 223 | else: 224 | return F.softmax(logits, dim=-1) 225 | 226 | def train_parameters(self): 227 | return filter(lambda p: p.requires_grad, self.parameters()) 228 | 229 | def _bert_encode(self, sentences): 230 | # Pass to BERT model 231 | with torch.no_grad(): 232 | encoded, _ = self.bert_model(sentences, 233 | attention_mask=sentences.ne(self.pad_ix)) 234 | return torch.stack(encoded, dim=2) 235 | 236 | def _create_mask(self, lexelts): 237 | return self.sense_lex_filter.eq(lexelts.unsqueeze(1)).float() + \ 238 | self.sense_lex_filter.eq(self.unk_ix).float() 239 | 240 | def _flatten_offsets(self, offsets, maxlen): 241 | return torch.cat([i * maxlen + x for i, x in enumerate(offsets)]) 242 | 243 | -------------------------------------------------------------------------------- /runall_s1.bash: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | WSD_DATA_DIR=${HOME}/demo/data/WSD_Unified_Evaluation_Datasets 3 | WSD_TRAIN_DIR=${HOME}/demo/data/WSD_Training_Corpora/SemCor 4 | MODELS_DIR=models/AW 5 | 6 | mkdir -p ${MODELS_DIR} 7 | for SD in 111 222 123; do 8 | # 1. Simple (1sent+1sur) 9 | python train.py --bert-model bert-base-cased --dropout 0 --optimizer bert-adam --layer 11 \ 10 | --num-context 1 --seed ${SD} \ 11 | --devset-path ${WSD_DATA_DIR}/semeval2007/semeval2007.data.xml \ 12 | --devkey-path ${WSD_DATA_DIR}/semeval2007/semeval2007.gold.key.txt \ 13 | ${WSD_TRAIN_DIR}/semcor.data.xml ${WSD_TRAIN_DIR}/semcor.gold.key.txt \ 14 | ${MODELS_DIR}/semcor_enbert_s1_sd${SD}_model 15 | 16 | for TESTSET in semeval2007 senseval2 senseval3 semeval2013 semeval2015; do 17 | bash test_postproc.sh ${TESTSET} ${MODELS_DIR}/semcor_enbert_s1_sd${SD}_model 18 | done 19 | # 2. LW (1sent+1sur) 20 | python train.py --bert-model bert-base-cased --dropout 0 --optimizer bert-adam \ 21 | --num-context 1 --seed ${SD} \ 22 | --devset-path ${WSD_DATA_DIR}/semeval2007/semeval2007.data.xml \ 23 | --devkey-path ${WSD_DATA_DIR}/semeval2007/semeval2007.gold.key.txt \ 24 | ${WSD_TRAIN_DIR}/semcor.data.xml ${WSD_TRAIN_DIR}/semcor.gold.key.txt \ 25 | ${MODELS_DIR}/semcor_lwa_enbert_s1_sd${SD}_model 26 | 27 | for TESTSET in semeval2007 senseval2 senseval3 semeval2013 semeval2015; do 28 | bash test_postproc.sh ${TESTSET} ${MODELS_DIR}/semcor_lwa_enbert_s1_sd${SD}_model 29 | done 30 | # 3. GLU (1sent+1sur) 31 | python train.py --bert-model bert-base-cased --dropout 0.1 --optimizer bert-adam --layer 11 \ 32 | --num-context 1 --use-glu --residual-glu --seed ${SD} \ 33 | --devset-path ${WSD_DATA_DIR}/semeval2007/semeval2007.data.xml \ 34 | --devkey-path ${WSD_DATA_DIR}/semeval2007/semeval2007.gold.key.txt \ 35 | ${WSD_TRAIN_DIR}/semcor.data.xml ${WSD_TRAIN_DIR}/semcor.gold.key.txt \ 36 | ${MODELS_DIR}/semcor_glur_do0.1_enbert_s1_sd${SD}_model 37 | 38 | for TESTSET in semeval2007 senseval2 senseval3 semeval2013 semeval2015; do 39 | bash test_postproc.sh ${TESTSET} ${MODELS_DIR}/semcor_glur_do0.1_enbert_s1_sd${SD}_model 40 | done 41 | # 4. GLU+LW (1sent+1sur) 42 | python train.py --bert-model bert-base-cased --dropout 0.1 --optimizer bert-adam \ 43 | --num-context 1 --use-glu --residual-glu --seed ${SD} \ 44 | --devset-path ${WSD_DATA_DIR}/semeval2007/semeval2007.data.xml \ 45 | --devkey-path ${WSD_DATA_DIR}/semeval2007/semeval2007.gold.key.txt \ 46 | ${WSD_TRAIN_DIR}/semcor.data.xml ${WSD_TRAIN_DIR}/semcor.gold.key.txt \ 47 | ${MODELS_DIR}/semcor_lwaglur_do0.1_enbert_s1_sd${SD}_model 48 | 49 | for TESTSET in semeval2007 senseval2 senseval3 semeval2013 semeval2015; do 50 | bash test_postproc.sh ${TESTSET} ${MODELS_DIR}/semcor_lwaglur_do0.1_enbert_s1_sd${SD}_model 51 | done 52 | 53 | done 54 | -------------------------------------------------------------------------------- /semeval2007.mfs.txt: -------------------------------------------------------------------------------- 1 | d000.s021.t000 fall%2:38:03:: 2 | d001.s027.t000 enter%2:38:00:: 3 | d001.s007.t001 mandate%2:41:00:: 4 | d001.s032.t002 seem%2:39:00:: 5 | d002.s008.t000 seem%2:39:00:: 6 | d002.s042.t000 yell%2:32:01:: 7 | d002.s043.t000 yell%2:32:01:: 8 | d000.s000.t001 research%1:04:00:: 9 | d000.s026.t000 research%1:04:00:: 10 | d001.s016.t001 receive%2:40:00:: 11 | d001.s019.t000 receive%2:40:00:: 12 | d001.s035.t002 describe%2:32:00:: 13 | d002.s010.t000 hold%2:42:00:: 14 | d002.s013.t000 hold%2:42:00:: 15 | d002.s027.t009 hold%2:42:00:: 16 | d002.s049.t007 hitch%1:28:00:: 17 | d000.s011.t001 prevent%2:41:01:: 18 | d000.s009.t001 problem%1:26:00:: 19 | d000.s016.t000 problem%1:26:00:: 20 | d001.s003.t005 book%1:10:00:: 21 | d001.s005.t000 book%1:10:00:: 22 | d001.s013.t005 career%1:04:00:: 23 | d002.s017.t001 hate%2:37:00:: 24 | d002.s046.t004 scramble%2:38:00:: 25 | d002.s046.t006 scramble%2:38:00:: 26 | d000.s009.t002 predispose%2:31:00:: 27 | d000.s014.t000 study%1:04:00:: 28 | d000.s015.t000 study%1:04:00:: 29 | d001.s025.t000 wrestle%2:33:01:: 30 | d002.s004.t002 scoff%2:32:00:: 31 | d002.s006.t000 balloon%2:38:00:: 32 | d002.s017.t002 balloon%2:38:00:: 33 | d000.s008.t002 shelter%1:06:00:: 34 | d000.s009.t007 shelter%1:06:00:: 35 | d002.s043.t001 leap%2:38:00:: 36 | d002.s004.t001 light%2:30:00:: 37 | d001.s036.t001 government%1:14:00:: 38 | d002.s009.t004 estimate%1:09:00:: 39 | d002.s008.t002 lead%1:07:02:: 40 | d001.s028.t004 scandal%1:10:00:: 41 | d001.s031.t000 one%1:23:00:: 42 | d002.s024.t000 pilot%1:18:00:: 43 | d002.s042.t001 pilot%1:18:00:: 44 | d002.s044.t000 pilot%1:18:00:: 45 | d002.s043.t002 wear%2:29:00:: 46 | d000.s025.t001 consider%2:31:00:: 47 | d001.s026.t000 change%2:30:01:: 48 | d001.s037.t003 change%2:30:01:: 49 | d001.s036.t006 maintain%2:42:00:: 50 | d002.s009.t001 pass%2:38:00:: 51 | d000.s028.t000 march%1:28:00:: 52 | d001.s011.t000 locate%2:40:00:: 53 | d001.s024.t001 clothes%1:06:00:: 54 | d002.s010.t002 attract%2:35:00:: 55 | d001.s005.t001 revolve_around%2:42:00:: 56 | d002.s042.t003 plunge%2:35:01:: 57 | d002.s045.t001 plunge%2:35:01:: 58 | d000.s002.t001 statement%1:10:00:: 59 | d000.s004.t001 understand%2:31:00:: 60 | d000.s033.t002 increase%2:30:00:: 61 | d001.s029.t004 end%2:42:00:: 62 | d002.s025.t002 end%2:42:00:: 63 | d002.s041.t001 hiss%2:32:00:: 64 | d000.s008.t001 lack%2:42:00:: 65 | d000.s008.t003 lack%2:42:00:: 66 | d000.s009.t006 lack%2:42:00:: 67 | d001.s006.t000 start%2:30:00:: 68 | d001.s013.t000 start%2:30:00:: 69 | d002.s002.t000 man%1:18:00:: 70 | d002.s052.t005 tee%1:15:00:: 71 | d000.s028.t007 know%2:31:01:: 72 | d001.s036.t000 know%2:31:01:: 73 | d001.s027.t002 arrest%2:35:00:: 74 | d000.s003.t000 become%2:30:00:: 75 | d001.s005.t002 become%2:30:00:: 76 | d001.s010.t000 become%2:30:00:: 77 | d001.s017.t000 become%2:30:00:: 78 | d001.s026.t002 become%2:30:00:: 79 | d001.s006.t004 programme%1:10:00:: 80 | d000.s018.t001 director%1:18:00:: 81 | d002.s057.t001 resist%2:42:01:: 82 | d002.s051.t000 exercise%1:04:00:: 83 | d000.s009.t004 category%1:14:00:: 84 | d001.s028.t003 program%1:09:00:: 85 | d001.s029.t005 program%1:09:00:: 86 | d001.s032.t001 program%1:09:00:: 87 | d001.s033.t000 program%1:09:00:: 88 | d001.s016.t000 seek%2:40:00:: 89 | d002.s031.t002 hand%1:08:00:: 90 | d002.s003.t002 come_to%2:39:00:: 91 | d002.s036.t003 burner%1:06:00:: 92 | d002.s044.t002 burner%1:06:00:: 93 | d002.s000.t002 ask%2:32:00:: 94 | d000.s015.t004 combination%1:14:00:: 95 | d001.s030.t001 gold%1:21:00:: 96 | d002.s050.t010 yank%2:35:00:: 97 | d000.s029.t000 force%1:07:01:: 98 | d000.s028.t002 cite%2:32:00:: 99 | d000.s010.t000 interaction%1:04:00:: 100 | d001.s015.t001 use%1:04:00:: 101 | d002.s028.t002 listen%2:39:00:: 102 | d001.s030.t000 leave%2:38:01:: 103 | d002.s056.t000 leave%2:38:01:: 104 | d002.s050.t011 punch%2:35:00:: 105 | d002.s005.t001 response%1:19:00:: 106 | d002.s011.t002 status%1:26:00:: 107 | d002.s014.t000 feel%2:37:00:: 108 | d002.s030.t000 feel%2:37:00:: 109 | d002.s036.t006 feel%2:37:00:: 110 | d001.s015.t002 retain%2:42:00:: 111 | d001.s004.t003 company%1:14:01:: 112 | d001.s012.t000 company%1:14:01:: 113 | d001.s013.t003 company%1:14:01:: 114 | d001.s036.t010 influence%2:41:00:: 115 | d001.s003.t004 read%2:31:00:: 116 | d002.s001.t000 read%2:31:00:: 117 | d000.s011.t000 look_to%2:31:01:: 118 | d002.s000.t004 ride%2:38:01:: 119 | d002.s036.t007 ride%2:38:01:: 120 | d002.s056.t001 ride%2:38:01:: 121 | d002.s022.t000 talk%2:32:01:: 122 | d000.s031.t005 survey%1:04:02:: 123 | d002.s027.t005 stand%2:35:00:: 124 | d002.s049.t008 pull_out%2:38:00:: 125 | d002.s050.t006 craft%1:04:00:: 126 | d002.s004.t004 morning%1:28:00:: 127 | d002.s039.t001 cruise%2:38:03:: 128 | d002.s046.t001 rest%2:35:00:: 129 | d002.s050.t003 rest%2:35:00:: 130 | d002.s029.t002 drift%2:38:02:: 131 | d000.s024.t005 number%1:07:00:: 132 | d002.s009.t000 number%1:07:00:: 133 | d000.s009.t000 point_out%2:32:01:: 134 | d002.s030.t002 point_out%2:32:01:: 135 | d002.s001.t001 take_up%2:30:00:: 136 | d002.s041.t000 do%2:41:01:: 137 | d002.s029.t006 amble%2:38:00:: 138 | d001.s004.t002 travel%2:38:00:: 139 | d000.s006.t003 have%2:40:00:: 140 | d000.s020.t000 have%2:40:00:: 141 | d001.s009.t001 have%2:40:00:: 142 | d001.s031.t001 have%2:40:00:: 143 | d001.s037.t000 have%2:40:00:: 144 | d002.s003.t006 have%2:40:00:: 145 | d002.s046.t002 have%2:40:00:: 146 | d002.s051.t001 have%2:40:00:: 147 | d001.s030.t004 walk%2:38:00:: 148 | d001.s036.t007 establishment%1:04:00:: 149 | d002.s049.t000 drive%2:38:01:: 150 | d002.s051.t003 drive%2:38:01:: 151 | d001.s012.t004 angle%1:25:00:: 152 | d001.s013.t006 bribe%2:40:00:: 153 | d001.s023.t000 bribe%2:40:00:: 154 | d002.s041.t002 companion%1:18:00:: 155 | d001.s030.t003 surprise%1:12:00:: 156 | d001.s020.t001 fate%1:11:00:: 157 | d002.s036.t005 top%1:15:01:: 158 | d000.s000.t000 refer%2:32:01:: 159 | d002.s021.t000 refer%2:32:01:: 160 | d002.s025.t000 refer%2:32:01:: 161 | d001.s021.t000 fall_short%2:37:12:: 162 | d000.s024.t001 view%1:09:02:: 163 | d002.s044.t003 lift%2:38:00:: 164 | d002.s004.t003 interrupt%2:32:00:: 165 | d000.s015.t001 show%2:39:02:: 166 | d001.s022.t000 show%2:39:02:: 167 | d000.s010.t001 defy%2:42:00:: 168 | d000.s032.t003 put%2:35:00:: 169 | d002.s031.t001 put%2:35:00:: 170 | d001.s006.t002 roll%2:38:00:: 171 | d002.s003.t003 try%2:41:00:: 172 | d001.s028.t005 take_place%2:30:00:: 173 | d001.s032.t000 take_place%2:30:00:: 174 | d000.s009.t003 person%1:03:00:: 175 | d000.s016.t004 person%1:03:00:: 176 | d001.s020.t003 person%1:03:00:: 177 | d001.s000.t001 begin%2:30:00:: 178 | d001.s013.t004 begin%2:30:00:: 179 | d002.s025.t001 begin%2:30:00:: 180 | d000.s001.t003 cause%1:11:00:: 181 | d000.s004.t000 cause%1:11:00:: 182 | d002.s040.t001 descend%2:38:00:: 183 | d002.s050.t000 rendezvous%2:41:00:: 184 | d000.s024.t003 play%2:33:00:: 185 | d000.s009.t005 compose%2:42:00:: 186 | d001.s036.t002 redistribute%2:35:00:: 187 | d000.s001.t004 find%2:40:02:: 188 | d000.s033.t001 find%2:40:02:: 189 | d001.s020.t000 find%2:40:02:: 190 | d001.s022.t002 find%2:40:02:: 191 | d002.s003.t004 find%2:40:02:: 192 | d000.s028.t008 thing%1:26:00:: 193 | d002.s015.t000 thing%1:26:00:: 194 | d002.s043.t003 loafer%1:18:00:: 195 | d002.s020.t001 go%2:38:00:: 196 | d002.s036.t000 go%2:38:00:: 197 | d002.s038.t000 go%2:38:00:: 198 | d001.s002.t003 reader%1:18:00:: 199 | d002.s050.t007 activity%1:04:00:: 200 | d000.s005.t000 quote%2:32:00:: 201 | d000.s018.t000 quote%2:32:00:: 202 | d000.s001.t000 comment%1:10:00:: 203 | d000.s028.t011 base%2:31:00:: 204 | d000.s032.t001 executive%1:18:00:: 205 | d001.s023.t001 shut_up%2:32:00:: 206 | d002.s002.t002 attempt%1:04:00:: 207 | d000.s032.t002 include%2:42:00:: 208 | d001.s013.t008 include%2:42:00:: 209 | d001.s015.t003 include%2:42:00:: 210 | d002.s010.t001 include%2:42:00:: 211 | d002.s050.t008 include%2:42:00:: 212 | d000.s025.t000 bother%2:41:00:: 213 | d001.s003.t000 question%1:10:00:: 214 | d001.s004.t004 contractor%1:18:00:: 215 | d002.s000.t001 lean%2:38:00:: 216 | d002.s028.t001 progress%1:04:01:: 217 | d002.s057.t003 salute%2:34:00:: 218 | d001.s009.t003 ownership%1:21:00:: 219 | d002.s024.t001 speak%2:32:00:: 220 | d000.s018.t002 center%1:15:01:: 221 | d001.s020.t002 befall%2:30:01:: 222 | d000.s016.t003 connect%2:35:00:: 223 | d001.s004.t001 path%1:04:00:: 224 | d002.s034.t001 steer%2:38:00:: 225 | d002.s050.t014 cram%2:35:00:: 226 | d002.s003.t001 state%1:15:01:: 227 | d000.s006.t000 note%2:32:00:: 228 | d001.s002.t004 accept%2:31:00:: 229 | d001.s012.t002 keep%2:42:00:: 230 | d002.s030.t001 keep%2:42:00:: 231 | d002.s013.t001 attraction%1:19:00:: 232 | d002.s027.t006 decide%2:31:00:: 233 | d000.s016.t001 create%2:36:00:: 234 | d000.s032.t004 deprive%2:40:01:: 235 | d001.s035.t003 belong%2:40:00:: 236 | d002.s002.t001 represent%2:42:02:: 237 | d000.s031.t003 suggest%2:32:00:: 238 | d000.s012.t001 develop%2:36:01:: 239 | d000.s012.t004 develop%2:36:01:: 240 | d000.s024.t000 dismiss%2:32:00:: 241 | d001.s034.t000 provide%2:40:00:: 242 | d000.s030.t002 participate%2:41:00:: 243 | d001.s036.t011 broker%2:40:00:: 244 | d002.s046.t005 half%1:23:00:: 245 | d001.s031.t002 characteristic%1:09:00:: 246 | d002.s009.t002 test%1:09:02:: 247 | d000.s028.t010 crusade%1:04:00:: 248 | d001.s002.t005 write_about%2:36:00:: 249 | d000.s000.t002 report%2:32:00:: 250 | d002.s055.t000 clamber%2:38:00:: 251 | d001.s036.t003 wealth%1:26:00:: 252 | d002.s052.t006 sit%2:35:00:: 253 | d002.s002.t004 bit%1:23:01:: 254 | d000.s006.t002 examine%2:31:00:: 255 | d001.s003.t001 author%1:18:00:: 256 | d001.s026.t003 author%1:18:00:: 257 | d001.s028.t001 author%1:18:00:: 258 | d001.s002.t006 decline%2:30:01:: 259 | d000.s015.t003 exhibit%2:42:00:: 260 | d002.s010.t003 balloon%1:06:00:: 261 | d002.s011.t000 balloon%1:06:00:: 262 | d002.s027.t003 balloon%1:06:00:: 263 | d002.s039.t000 balloon%1:06:00:: 264 | d002.s050.t001 balloon%1:06:00:: 265 | d002.s050.t009 routine%1:04:00:: 266 | d001.s036.t008 money%1:21:00:: 267 | d001.s029.t000 come_around%2:31:00:: 268 | d002.s003.t000 embody%2:42:01:: 269 | d001.s016.t002 assistance%1:04:00:: 270 | d002.s008.t003 take_to%2:37:00:: 271 | d001.s008.t000 realize%2:31:01:: 272 | d000.s031.t004 conduct%2:41:00:: 273 | d002.s052.t000 mean%2:32:01:: 274 | d002.s026.t001 occur%2:30:00:: 275 | d002.s026.t000 flight%1:14:01:: 276 | d000.s031.t002 examination%1:04:00:: 277 | d002.s014.t001 sign_up%2:41:00:: 278 | d001.s004.t000 lead%2:38:01:: 279 | d002.s029.t005 cow%1:05:01:: 280 | d001.s009.t002 falsify%2:32:00:: 281 | d002.s038.t002 hour%1:28:00:: 282 | d002.s046.t003 pleasure%1:12:00:: 283 | d002.s049.t002 get_stuck%2:38:00:: 284 | d002.s009.t005 run%2:38:00:: 285 | d000.s006.t001 people%1:14:00:: 286 | d000.s008.t000 people%1:14:00:: 287 | d001.s035.t001 people%1:14:00:: 288 | d000.s024.t006 woman%1:18:00:: 289 | d000.s012.t003 possess%2:42:00:: 290 | d000.s006.t004 multitude%1:23:00:: 291 | d001.s004.t005 entrust%2:40:00:: 292 | d001.s039.t000 issue%1:09:01:: 293 | d002.s015.t003 zip%1:23:00:: 294 | d001.s033.t001 eliminate%2:30:01:: 295 | d002.s027.t004 inflate%2:30:01:: 296 | d002.s008.t001 follow%2:38:00:: 297 | d002.s028.t000 follow%2:38:00:: 298 | d002.s051.t002 follow%2:38:00:: 299 | d002.s049.t006 get_out%2:38:01:: 300 | d002.s057.t000 streak%2:38:00:: 301 | d000.s012.t002 understanding%1:09:01:: 302 | d001.s026.t001 name%1:10:00:: 303 | d000.s029.t002 advertise%2:32:01:: 304 | d000.s028.t004 insinuate%2:38:00:: 305 | d000.s033.t000 predict%2:32:00:: 306 | d001.s005.t003 partner%1:18:01:: 307 | d001.s009.t000 partner%1:18:01:: 308 | d001.s017.t001 partner%1:18:01:: 309 | d001.s019.t001 equity%1:21:01:: 310 | d002.s052.t003 duffer%1:18:00:: 311 | d002.s050.t013 roll_up%2:30:01:: 312 | d002.s011.t001 deny%2:32:00:: 313 | d002.s045.t000 scuttle%2:38:00:: 314 | d000.s016.t002 result%2:42:00:: 315 | d002.s029.t004 rise%2:38:00:: 316 | d001.s000.t000 account%1:10:00:: 317 | d001.s004.t006 produce%2:36:02:: 318 | d002.s037.t001 car%1:06:00:: 319 | d001.s018.t000 management%1:04:00:: 320 | d001.s012.t003 use%2:34:01:: 321 | d001.s014.t000 use%2:34:01:: 322 | d001.s018.t001 use%2:34:01:: 323 | d002.s038.t001 average%2:42:00:: 324 | d000.s018.t003 sleep%2:29:00:: 325 | d000.s016.t005 live%2:42:08:: 326 | d000.s028.t003 group%1:03:00:: 327 | d001.s035.t000 group%1:03:00:: 328 | d000.s001.t001 imply%2:32:00:: 329 | d001.s022.t001 ingenuity%1:09:01:: 330 | d000.s007.t000 suffer%2:39:01:: 331 | d001.s013.t002 sentence%1:10:00:: 332 | d001.s024.t000 want%2:37:00:: 333 | d001.s029.t003 want%2:37:00:: 334 | d002.s000.t003 want%2:37:00:: 335 | d002.s020.t002 want%2:37:00:: 336 | d002.s000.t000 reach%2:38:01:: 337 | d000.s030.t001 organization%1:14:00:: 338 | d002.s005.t000 win%2:33:00:: 339 | d002.s049.t005 farmer%1:18:00:: 340 | d001.s002.t000 offer%2:40:02:: 341 | d001.s039.t001 raise%2:30:01:: 342 | d002.s016.t000 look%2:39:00:: 343 | d002.s018.t001 look%2:39:00:: 344 | d002.s047.t000 look%2:39:00:: 345 | d002.s027.t002 watch%2:39:00:: 346 | d002.s029.t003 watch%2:39:00:: 347 | d002.s050.t004 watch%2:39:00:: 348 | d002.s052.t002 watch%2:39:00:: 349 | d001.s012.t001 rebuild%2:36:00:: 350 | d000.s028.t001 choose%2:31:00:: 351 | d000.s032.t000 choose%2:31:00:: 352 | d001.s034.t001 clue%1:10:00:: 353 | d001.s038.t002 pocket%1:06:00:: 354 | d002.s029.t001 minute%1:28:00:: 355 | d002.s049.t004 aid%1:07:00:: 356 | d002.s004.t000 diner%1:18:00:: 357 | d000.s010.t002 generalization%1:09:01:: 358 | d000.s030.t000 mention%2:32:02:: 359 | d001.s007.t000 creation%1:04:00:: 360 | d002.s002.t003 introduce%2:32:00:: 361 | d000.s002.t000 make%2:41:00:: 362 | d000.s026.t003 make%2:41:00:: 363 | d001.s011.t002 make%2:41:00:: 364 | d001.s015.t000 make%2:41:00:: 365 | d001.s036.t009 make%2:41:00:: 366 | d002.s036.t004 make%2:41:00:: 367 | d002.s037.t000 make%2:41:00:: 368 | d002.s036.t008 current%1:19:01:: 369 | d000.s028.t005 get%2:40:00:: 370 | d001.s006.t001 get%2:40:00:: 371 | d001.s027.t001 get%2:40:00:: 372 | d002.s020.t003 get%2:40:00:: 373 | d002.s049.t001 get%2:40:00:: 374 | d002.s054.t000 figure%2:31:01:: 375 | d002.s052.t004 maul%2:35:01:: 376 | d002.s050.t005 disassemble%2:36:00:: 377 | d000.s031.t001 undergo%2:39:04:: 378 | d000.s018.t004 rob%2:40:00:: 379 | d002.s010.t004 shape%2:31:00:: 380 | d000.s008.t004 necessity%1:26:00:: 381 | d002.s003.t005 machine%1:06:00:: 382 | d001.s003.t002 believe%2:31:00:: 383 | d000.s026.t002 assertion%1:10:00:: 384 | d002.s028.t004 holler%2:32:06:: 385 | d002.s052.t001 return%2:38:00:: 386 | d000.s001.t002 discover%2:39:03:: 387 | d001.s006.t003 discover%2:39:03:: 388 | d001.s030.t005 scoop%2:35:01:: 389 | d001.s036.t004 regulate%2:30:00:: 390 | d002.s027.t007 fly%2:38:00:: 391 | d001.s029.t002 court%1:14:00:: 392 | d002.s010.t005 resemble%2:42:00:: 393 | d001.s037.t001 wish%1:12:00:: 394 | d001.s007.t003 award%2:40:00:: 395 | d001.s002.t001 trip%1:04:00:: 396 | d001.s018.t002 system%1:06:00:: 397 | d001.s008.t001 qualify%2:42:00:: 398 | d001.s007.t002 contract%1:10:00:: 399 | d001.s030.t002 express%2:32:01:: 400 | d001.s036.t012 decision%1:04:00:: 401 | d001.s002.t002 tell%2:32:04:: 402 | d002.s015.t001 tell%2:32:04:: 403 | d002.s017.t000 tell%2:32:04:: 404 | d002.s020.t000 tell%2:32:04:: 405 | d002.s036.t002 air%1:27:00:: 406 | d002.s050.t012 air%1:27:00:: 407 | d000.s014.t001 say%2:32:00:: 408 | d001.s029.t001 say%2:32:00:: 409 | d002.s018.t000 say%2:32:00:: 410 | d002.s031.t003 squint%2:29:00:: 411 | d001.s038.t000 exist%2:42:00:: 412 | d000.s005.t001 emphasize%2:32:00:: 413 | d000.s024.t004 role%1:04:00:: 414 | d001.s001.t000 give%2:40:03:: 415 | d002.s028.t003 driver%1:18:00:: 416 | d000.s015.t002 make_up%2:42:00:: 417 | d002.s052.t007 ego%1:12:01:: 418 | d001.s028.t000 absorb%2:35:01:: 419 | d000.s028.t009 see%2:39:00:: 420 | d001.s037.t002 see%2:39:00:: 421 | d002.s034.t000 see%2:39:00:: 422 | d002.s036.t001 heat%2:30:01:: 423 | d001.s013.t007 official%1:18:01:: 424 | d001.s003.t003 answer%2:32:00:: 425 | d001.s036.t005 commerce%1:04:00:: 426 | d000.s026.t001 stop%2:38:00:: 427 | d002.s027.t000 come%2:38:00:: 428 | d002.s029.t000 come%2:38:00:: 429 | d002.s031.t000 come%2:38:00:: 430 | d002.s033.t000 come%2:38:00:: 431 | d002.s046.t000 come%2:38:00:: 432 | d002.s050.t002 come%2:38:00:: 433 | d000.s019.t000 fend%2:41:00:: 434 | d002.s027.t001 lot%1:23:00:: 435 | d001.s011.t001 area%1:15:01:: 436 | d000.s029.t001 subscribe%2:40:01:: 437 | d000.s024.t002 reduction%1:04:00:: 438 | d002.s009.t003 swell%2:30:02:: 439 | d002.s057.t002 rear%2:38:00:: 440 | d002.s044.t001 laugh%2:29:00:: 441 | d001.s001.t001 example%1:09:00:: 442 | d001.s013.t001 serve%2:42:03:: 443 | d002.s027.t008 basket%1:06:00:: 444 | d002.s042.t002 basket%1:06:00:: 445 | d002.s050.t015 basket%1:06:00:: 446 | d002.s049.t003 enlist%2:33:00:: 447 | d000.s028.t006 support%1:04:04:: 448 | d002.s019.t000 ascend%2:38:02:: 449 | d002.s040.t000 ascend%2:38:02:: 450 | d000.s031.t000 homeless%1:18:00:: 451 | d000.s020.t001 addiction%1:26:00:: 452 | d001.s038.t001 line%1:14:03:: 453 | d000.s012.t000 require%2:42:00:: 454 | d002.s015.t002 require%2:42:00:: 455 | d001.s028.t002 gloss_over%2:41:00:: 456 | -------------------------------------------------------------------------------- /semeval2013.mfs.txt: -------------------------------------------------------------------------------- 1 | d011.s007.t003 horde%1:14:00:: 2 | d011.s018.t007 arrival%1:04:00:: 3 | d011.s020.t009 nobel_prize%1:10:00:: 4 | d004.s008.t003 investment%1:04:00:: 5 | d006.s019.t002 degree%1:07:00:: 6 | d011.s006.t001 degree%1:07:00:: 7 | d011.s009.t001 degree%1:07:00:: 8 | d004.s006.t002 kitty%1:21:00:: 9 | d007.s014.t001 research%1:04:00:: 10 | d007.s026.t005 research%1:04:00:: 11 | d007.s027.t000 research%1:04:00:: 12 | d008.s028.t007 ecuador%1:15:00:: 13 | d010.s002.t001 costa_rica%1:15:00:: 14 | d010.s003.t001 costa_rica%1:15:00:: 15 | d010.s021.t001 costa_rica%1:15:00:: 16 | d012.s022.t000 legal_action%1:04:00:: 17 | d003.s018.t004 fee%1:21:00:: 18 | d003.s010.t002 bombing%1:04:00:: 19 | d002.s006.t007 recovery%1:11:00:: 20 | d008.s016.t003 recovery%1:11:00:: 21 | d001.s003.t003 triumph%1:11:00:: 22 | d000.s001.t010 burden%1:09:01:: 23 | d011.s000.t001 burden%1:09:01:: 24 | d011.s008.t002 burden%1:09:01:: 25 | d011.s011.t002 burden%1:09:01:: 26 | d003.s002.t005 firm%1:14:00:: 27 | d003.s011.t001 firm%1:14:00:: 28 | d003.s011.t007 firm%1:14:00:: 29 | d003.s013.t001 firm%1:14:00:: 30 | d009.s006.t002 firm%1:14:00:: 31 | d009.s007.t000 firm%1:14:00:: 32 | d000.s023.t001 urgency%1:26:00:: 33 | d005.s006.t000 reporting%1:10:00:: 34 | d011.s021.t000 migrant%1:18:00:: 35 | d005.s015.t006 death_penalty%1:04:00:: 36 | d005.s020.t001 death_penalty%1:04:00:: 37 | d009.s020.t001 critic%1:18:00:: 38 | d009.s024.t000 critic%1:18:00:: 39 | d006.s007.t000 ethos%1:07:00:: 40 | d007.s010.t000 study%1:04:00:: 41 | d007.s017.t003 study%1:04:00:: 42 | d009.s023.t001 administration%1:04:00:: 43 | d004.s004.t006 bonus%1:09:00:: 44 | d005.s012.t008 carnival%1:04:00:: 45 | d000.s008.t006 period%1:28:00:: 46 | d001.s013.t000 period%1:28:00:: 47 | d011.s015.t001 reality%1:09:00:: 48 | d000.s020.t003 treaty%1:10:00:: 49 | d000.s021.t002 treaty%1:10:00:: 50 | d005.s013.t003 movie%1:10:00:: 51 | d011.s021.t002 social_system%1:14:00:: 52 | d008.s028.t006 distinction%1:09:00:: 53 | d004.s010.t003 capital%1:21:01:: 54 | d002.s006.t004 opinion%1:09:00:: 55 | d012.s000.t002 usa%1:15:00:: 56 | d012.s001.t005 usa%1:15:00:: 57 | d012.s002.t005 usa%1:15:00:: 58 | d012.s018.t002 usa%1:15:00:: 59 | d002.s008.t004 estimate%1:09:00:: 60 | d003.s001.t003 government%1:14:00:: 61 | d003.s011.t005 government%1:14:00:: 62 | d008.s014.t003 government%1:14:00:: 63 | d005.s004.t006 imposition%1:04:02:: 64 | d000.s006.t004 representative%1:18:00:: 65 | d000.s015.t006 common_sense%1:09:00:: 66 | d002.s000.t001 momentum%1:07:00:: 67 | d007.s013.t001 environment%1:26:00:: 68 | d008.s001.t004 private_enterprise%1:14:00:: 69 | d008.s024.t002 private_enterprise%1:14:00:: 70 | d008.s024.t006 private_enterprise%1:14:00:: 71 | d008.s026.t003 private_enterprise%1:14:00:: 72 | d000.s006.t002 process%1:04:00:: 73 | d000.s022.t005 process%1:04:00:: 74 | d000.s026.t002 wednesday%1:28:00:: 75 | d004.s002.t000 wednesday%1:28:00:: 76 | d001.s012.t003 help%1:04:00:: 77 | d000.s003.t000 advance%1:11:00:: 78 | d005.s028.t004 adaptation%1:10:00:: 79 | d006.s022.t000 assumption%1:10:00:: 80 | d003.s008.t001 door%1:06:00:: 81 | d001.s006.t002 first_period%1:28:00:: 82 | d002.s001.t007 dow_jones%1:10:00:: 83 | d000.s002.t003 greenhouse_gas%1:27:00:: 84 | d000.s018.t003 greenhouse_gas%1:27:00:: 85 | d000.s015.t003 america%1:15:00:: 86 | d011.s012.t000 economist%1:18:00:: 87 | d010.s004.t000 europe%1:17:00:: 88 | d009.s004.t004 march%1:28:00:: 89 | d008.s002.t001 worry%1:09:00:: 90 | d002.s001.t004 indicator%1:10:00:: 91 | d002.s006.t002 indicator%1:10:00:: 92 | d008.s004.t001 poll%1:09:00:: 93 | d008.s005.t000 poll%1:09:00:: 94 | d008.s006.t003 poll%1:09:00:: 95 | d008.s022.t001 poll%1:09:00:: 96 | d000.s008.t000 outline%1:15:00:: 97 | d011.s018.t001 surgeon%1:18:00:: 98 | d007.s013.t010 science%1:09:00:: 99 | d001.s007.t000 quarter%1:23:01:: 100 | d001.s012.t008 quarter%1:23:01:: 101 | d002.s000.t000 wall_street%1:15:00:: 102 | d006.s008.t000 part%1:24:00:: 103 | d006.s021.t002 part%1:24:00:: 104 | d007.s014.t000 part%1:24:00:: 105 | d007.s026.t004 part%1:24:00:: 106 | d006.s008.t008 honor%1:10:00:: 107 | d005.s005.t000 lawyer%1:18:00:: 108 | d005.s014.t000 lawyer%1:18:00:: 109 | d005.s017.t001 lawyer%1:18:00:: 110 | d005.s025.t004 lawyer%1:18:00:: 111 | d005.s028.t000 lawyer%1:18:00:: 112 | d005.s029.t000 lawyer%1:18:00:: 113 | d009.s005.t000 lawyer%1:18:00:: 114 | d011.s022.t006 age%1:07:00:: 115 | d010.s004.t003 record%1:10:03:: 116 | d010.s009.t007 record%1:10:03:: 117 | d010.s010.t001 record%1:10:03:: 118 | d000.s001.t004 focus%1:09:00:: 119 | d005.s026.t000 focus%1:09:00:: 120 | d000.s012.t003 language%1:10:00:: 121 | d000.s005.t005 rise%1:11:00:: 122 | d002.s009.t001 rise%1:11:00:: 123 | d011.s019.t000 rise%1:11:00:: 124 | d002.s012.t004 evening%1:28:00:: 125 | d002.s012.t007 evening%1:28:00:: 126 | d008.s008.t000 second%1:28:00:: 127 | d001.s004.t002 scoreboard%1:06:00:: 128 | d001.s012.t004 scoreboard%1:06:00:: 129 | d008.s007.t001 mood%1:12:00:: 130 | d008.s019.t000 mood%1:12:00:: 131 | d000.s010.t011 deforestation%1:26:00:: 132 | d008.s015.t001 woe%1:26:00:: 133 | d012.s021.t005 mortgage%1:21:00:: 134 | d003.s017.t003 commitment%1:07:01:: 135 | d000.s010.t000 european_union%1:14:00:: 136 | d009.s015.t008 program%1:09:00:: 137 | d011.s010.t000 program%1:09:00:: 138 | d012.s002.t008 dispute%1:10:00:: 139 | d012.s014.t000 U 140 | d011.s002.t003 talent%1:09:00:: 141 | d000.s022.t008 india%1:15:00:: 142 | d000.s010.t010 drought%1:26:00:: 143 | d005.s025.t003 courthouse%1:06:01:: 144 | d005.s013.t002 television_show%1:10:00:: 145 | d004.s010.t000 thursday%1:28:00:: 146 | d005.s004.t000 thursday%1:28:00:: 147 | d007.s005.t003 thursday%1:28:00:: 148 | d009.s002.t001 thursday%1:28:00:: 149 | d011.s011.t004 asset%1:07:00:: 150 | d002.s007.t004 row%1:14:00:: 151 | d005.s001.t002 row%1:14:00:: 152 | d010.s007.t001 world_cup%1:11:00:: 153 | d007.s004.t000 phosphorus%1:27:00:: 154 | d007.s006.t006 phosphorus%1:27:00:: 155 | d007.s007.t004 phosphorus%1:27:00:: 156 | d007.s018.t001 phosphorus%1:27:00:: 157 | d007.s019.t003 phosphorus%1:27:00:: 158 | d007.s021.t001 phosphorus%1:27:00:: 159 | d007.s023.t001 phosphorus%1:27:00:: 160 | d007.s024.t004 phosphorus%1:27:00:: 161 | d005.s006.t003 use%1:04:00:: 162 | d000.s003.t003 world%1:17:01:: 163 | d000.s016.t001 world%1:17:01:: 164 | d000.s017.t007 world%1:17:01:: 165 | d003.s010.t009 world%1:17:01:: 166 | d006.s006.t004 world%1:17:01:: 167 | d008.s016.t000 world%1:17:01:: 168 | d011.s005.t008 world%1:17:01:: 169 | d011.s012.t006 world%1:17:01:: 170 | d011.s022.t000 world%1:17:01:: 171 | d000.s019.t000 game%1:04:00:: 172 | d001.s004.t000 game%1:04:00:: 173 | d001.s007.t002 game%1:04:00:: 174 | d001.s009.t000 game%1:04:00:: 175 | d001.s009.t004 game%1:04:00:: 176 | d001.s015.t003 game%1:04:00:: 177 | d006.s002.t002 game%1:04:00:: 178 | d006.s003.t002 game%1:04:00:: 179 | d006.s004.t005 game%1:04:00:: 180 | d006.s005.t002 game%1:04:00:: 181 | d006.s011.t008 game%1:04:00:: 182 | d006.s016.t003 game%1:04:00:: 183 | d006.s017.t001 game%1:04:00:: 184 | d006.s018.t007 game%1:04:00:: 185 | d006.s019.t010 game%1:04:00:: 186 | d010.s001.t002 game%1:04:00:: 187 | d010.s002.t003 game%1:04:00:: 188 | d010.s003.t004 game%1:04:00:: 189 | d010.s004.t005 game%1:04:00:: 190 | d010.s005.t002 game%1:04:00:: 191 | d010.s009.t005 game%1:04:00:: 192 | d010.s013.t001 game%1:04:00:: 193 | d010.s016.t000 game%1:04:00:: 194 | d010.s021.t000 game%1:04:00:: 195 | d012.s001.t000 trouble%1:09:00:: 196 | d005.s012.t001 united_states_supreme_court%1:14:00:: 197 | d003.s016.t001 track%1:17:00:: 198 | d006.s015.t003 violator%1:18:01:: 199 | d012.s004.t000 regulator%1:06:00:: 200 | d012.s013.t000 regulator%1:06:00:: 201 | d012.s019.t001 regulator%1:06:00:: 202 | d012.s023.t000 regulator%1:06:00:: 203 | d011.s021.t004 european_country%1:15:00:: 204 | d011.s001.t010 status%1:26:00:: 205 | d009.s004.t007 swing%1:26:01:: 206 | d011.s005.t001 tone%1:10:01:: 207 | d001.s002.t002 involvement%1:04:00:: 208 | d003.s018.t003 barrel%1:06:01:: 209 | d008.s008.t001 stability%1:07:00:: 210 | d003.s003.t000 company%1:14:01:: 211 | d003.s004.t000 company%1:14:01:: 212 | d003.s006.t000 company%1:14:01:: 213 | d003.s008.t005 company%1:14:01:: 214 | d003.s012.t000 company%1:14:01:: 215 | d003.s014.t000 company%1:14:01:: 216 | d003.s015.t002 company%1:14:01:: 217 | d003.s017.t000 company%1:14:01:: 218 | d003.s018.t000 company%1:14:01:: 219 | d008.s026.t005 company%1:14:01:: 220 | d009.s016.t003 company%1:14:01:: 221 | d012.s016.t002 company%1:14:01:: 222 | d011.s022.t002 organisation%1:14:01:: 223 | d004.s004.t001 restriction%1:09:00:: 224 | d011.s006.t002 generosity%1:07:00:: 225 | d011.s009.t002 generosity%1:07:00:: 226 | d000.s024.t003 hall%1:06:03:: 227 | d000.s003.t005 copenhagen%1:15:00:: 228 | d000.s020.t004 copenhagen%1:15:00:: 229 | d000.s026.t003 copenhagen%1:15:00:: 230 | d005.s015.t000 reason%1:16:00:: 231 | d002.s008.t000 confidence%1:09:00:: 232 | d008.s014.t000 confidence%1:09:00:: 233 | d000.s002.t002 deal%1:04:02:: 234 | d000.s008.t001 deal%1:04:02:: 235 | d000.s014.t001 deal%1:04:02:: 236 | d000.s020.t007 deal%1:04:02:: 237 | d000.s023.t005 deal%1:04:02:: 238 | d003.s000.t000 deal%1:04:02:: 239 | d003.s002.t000 deal%1:04:02:: 240 | d003.s006.t001 deal%1:04:02:: 241 | d003.s012.t001 deal%1:04:02:: 242 | d009.s016.t000 proximity%1:07:00:: 243 | d005.s012.t004 osteopath%1:18:00:: 244 | d002.s009.t006 sector%1:25:00:: 245 | d011.s016.t001 sector%1:25:00:: 246 | d011.s016.t004 sector%1:25:00:: 247 | d002.s006.t005 relation%1:03:00:: 248 | d005.s006.t006 detail%1:09:00:: 249 | d007.s011.t002 detail%1:09:00:: 250 | d006.s018.t004 championship%1:26:00:: 251 | d005.s001.t001 courtroom%1:06:00:: 252 | d005.s007.t004 courtroom%1:06:00:: 253 | d005.s008.t003 courtroom%1:06:00:: 254 | d005.s014.t008 courtroom%1:06:00:: 255 | d005.s016.t005 courtroom%1:06:00:: 256 | d012.s002.t010 credit_union%1:14:00:: 257 | d012.s004.t004 credit_union%1:14:00:: 258 | d012.s005.t003 credit_union%1:14:00:: 259 | d012.s010.t001 credit_union%1:14:00:: 260 | d011.s011.t000 notion%1:09:02:: 261 | d011.s014.t003 notion%1:09:02:: 262 | d000.s021.t005 morning%1:28:00:: 263 | d005.s017.t003 execution%1:04:00:: 264 | d012.s020.t000 wave%1:11:01:: 265 | d012.s001.t001 uproar%1:26:00:: 266 | d002.s010.t002 ground%1:17:00:: 267 | d008.s018.t000 united_nations%1:14:00:: 268 | d005.s009.t007 evidence%1:09:00:: 269 | d011.s004.t002 left%1:15:00:: 270 | d011.s005.t006 left%1:15:00:: 271 | d011.s007.t005 left%1:15:00:: 272 | d006.s006.t005 football%1:04:00:: 273 | d002.s001.t003 market%1:04:00:: 274 | d002.s004.t000 market%1:04:00:: 275 | d002.s006.t003 market%1:04:00:: 276 | d003.s010.t010 reserve%1:07:01:: 277 | d001.s007.t007 home%1:15:04:: 278 | d012.s002.t001 home%1:15:04:: 279 | d006.s005.t003 trainer%1:18:00:: 280 | d004.s010.t004 increase%1:23:00:: 281 | d008.s013.t007 increase%1:23:00:: 282 | d011.s019.t002 increase%1:23:00:: 283 | d011.s007.t001 invasion%1:04:00:: 284 | d000.s001.t003 recrimination%1:10:00:: 285 | d002.s005.t000 narrow_margin%1:07:00:: 286 | d006.s001.t000 trick%1:04:05:: 287 | d000.s018.t002 cost%1:21:00:: 288 | d006.s022.t002 application%1:04:02:: 289 | d007.s019.t001 chemical%1:27:00:: 290 | d000.s013.t003 bush_administration%1:14:00:: 291 | d002.s006.t008 analyst%1:18:00:: 292 | d003.s007.t005 analyst%1:18:00:: 293 | d003.s010.t011 analyst%1:18:00:: 294 | d002.s012.t002 yield%1:04:00:: 295 | d002.s012.t006 yield%1:04:00:: 296 | d000.s016.t002 major_power%1:14:00:: 297 | d000.s018.t001 major_power%1:14:00:: 298 | d005.s012.t005 murder%1:04:00:: 299 | d009.s019.t000 appearance%1:07:00:: 300 | d011.s020.t010 winner%1:18:00:: 301 | d009.s004.t005 outcome%1:11:00:: 302 | d009.s008.t003 outcome%1:11:00:: 303 | d000.s017.t006 nation%1:14:00:: 304 | d002.s002.t000 closing%1:04:01:: 305 | d004.s002.t004 fund%1:21:00:: 306 | d000.s025.t000 gathering%1:14:00:: 307 | d007.s003.t001 capability%1:07:00:: 308 | d006.s013.t000 thing%1:26:00:: 309 | d008.s006.t000 thing%1:26:00:: 310 | d000.s015.t005 negotiator%1:18:00:: 311 | d006.s019.t011 cycle%1:28:00:: 312 | d006.s019.t017 cycle%1:28:00:: 313 | d011.s004.t004 subject%1:10:00:: 314 | d000.s017.t010 imperative%1:24:00:: 315 | d002.s002.t001 figure%1:10:00:: 316 | d008.s013.t005 figure%1:10:00:: 317 | d008.s023.t005 figure%1:10:00:: 318 | d006.s008.t001 debate%1:10:01:: 319 | d006.s009.t000 debate%1:10:01:: 320 | d003.s002.t007 iraq%1:15:00:: 321 | d003.s007.t003 iraq%1:15:00:: 322 | d003.s008.t000 iraq%1:15:00:: 323 | d003.s009.t002 iraq%1:15:00:: 324 | d003.s010.t008 iraq%1:15:00:: 325 | d003.s017.t004 iraq%1:15:00:: 326 | d002.s001.t008 nasdaq%1:06:00:: 327 | d002.s002.t004 nasdaq%1:06:00:: 328 | d007.s002.t005 element%1:09:00:: 329 | d007.s006.t000 element%1:09:00:: 330 | d007.s013.t003 element%1:09:00:: 331 | d007.s013.t006 element%1:09:00:: 332 | d007.s015.t002 element%1:09:00:: 333 | d007.s020.t003 element%1:09:00:: 334 | d005.s013.t000 crime%1:04:00:: 335 | d008.s002.t000 crime%1:04:00:: 336 | d008.s021.t003 crime%1:04:00:: 337 | d008.s022.t005 crime%1:04:00:: 338 | d008.s023.t002 crime%1:04:00:: 339 | d012.s004.t001 string%1:06:00:: 340 | d005.s019.t002 victim%1:18:00:: 341 | d008.s023.t001 victim%1:18:00:: 342 | d006.s011.t005 aspect%1:09:00:: 343 | d011.s001.t002 aspect%1:09:00:: 344 | d000.s014.t006 observer%1:18:00:: 345 | d003.s016.t002 power%1:07:00:: 346 | d007.s027.t002 department_of_energy%1:14:00:: 347 | d007.s024.t001 microbiology%1:09:00:: 348 | d012.s023.t002 responsibility%1:04:00:: 349 | d008.s020.t005 progress%1:04:01:: 350 | d006.s020.t000 round%1:06:01:: 351 | d007.s027.t003 national_institutes_of_health%1:14:00:: 352 | d011.s000.t002 opportunity%1:26:00:: 353 | d008.s020.t003 president%1:18:01:: 354 | d008.s026.t004 president%1:18:01:: 355 | d009.s017.t008 president%1:18:01:: 356 | d008.s014.t002 congress%1:14:01:: 357 | d009.s003.t006 congress%1:14:01:: 358 | d000.s019.t003 central_america%1:15:00:: 359 | d008.s021.t001 central_america%1:15:00:: 360 | d008.s028.t004 central_america%1:15:00:: 361 | d000.s013.t000 move%1:04:00:: 362 | d004.s000.t001 state%1:15:01:: 363 | d004.s001.t002 state%1:15:01:: 364 | d004.s003.t003 state%1:15:01:: 365 | d004.s009.t002 state%1:15:01:: 366 | d005.s009.t001 state%1:15:01:: 367 | d009.s003.t003 state%1:15:01:: 368 | d005.s006.t008 passion%1:12:00:: 369 | d005.s009.t009 passion%1:12:00:: 370 | d000.s013.t002 china%1:15:00:: 371 | d000.s015.t009 china%1:15:00:: 372 | d000.s017.t003 china%1:15:00:: 373 | d000.s022.t007 china%1:15:00:: 374 | d003.s002.t002 china%1:15:00:: 375 | d003.s016.t000 china%1:15:00:: 376 | d003.s018.t006 output%1:06:00:: 377 | d008.s011.t003 output%1:06:00:: 378 | d011.s023.t001 mandate%1:10:00:: 379 | d011.s001.t005 service%1:04:08:: 380 | d011.s020.t012 yahoo%1:18:01:: 381 | d000.s022.t003 push%1:04:00:: 382 | d000.s027.t002 policy%1:09:00:: 383 | d005.s027.t007 policy%1:09:00:: 384 | d011.s001.t001 policy%1:09:00:: 385 | d011.s001.t006 policy%1:09:00:: 386 | d011.s001.t009 policy%1:09:00:: 387 | d005.s028.t003 change%1:11:00:: 388 | d006.s019.t005 change%1:11:00:: 389 | d008.s005.t001 change%1:11:00:: 390 | d003.s001.t000 concern%1:09:00:: 391 | d003.s010.t000 concern%1:09:00:: 392 | d005.s015.t001 concern%1:09:00:: 393 | d009.s012.t001 event%1:03:00:: 394 | d009.s015.t003 event%1:03:00:: 395 | d009.s015.t009 event%1:03:00:: 396 | d009.s019.t002 event%1:03:00:: 397 | d011.s001.t008 compatibility%1:12:00:: 398 | d001.s001.t006 point%1:09:00:: 399 | d001.s008.t001 point%1:09:00:: 400 | d001.s008.t002 point%1:09:00:: 401 | d002.s002.t002 point%1:09:00:: 402 | d002.s002.t003 point%1:09:00:: 403 | d002.s002.t007 point%1:09:00:: 404 | d002.s002.t008 point%1:09:00:: 405 | d002.s003.t000 point%1:09:00:: 406 | d002.s003.t001 point%1:09:00:: 407 | d010.s023.t000 point%1:09:00:: 408 | d011.s023.t002 point%1:09:00:: 409 | d005.s014.t003 half%1:23:00:: 410 | d008.s012.t000 half%1:23:00:: 411 | d010.s001.t001 spain%1:15:00:: 412 | d010.s018.t001 spain%1:15:00:: 413 | d003.s012.t002 ministry%1:14:01:: 414 | d003.s018.t005 ministry%1:14:01:: 415 | d007.s006.t005 oxygen%1:27:00:: 416 | d008.s005.t002 attitude%1:09:00:: 417 | d008.s008.t002 attitude%1:09:00:: 418 | d008.s011.t002 drop%1:25:00:: 419 | d003.s013.t003 advice%1:10:00:: 420 | d001.s000.t000 victory%1:11:00:: 421 | d001.s001.t000 victory%1:11:00:: 422 | d001.s002.t005 victory%1:11:00:: 423 | d001.s015.t004 victory%1:11:00:: 424 | d008.s001.t001 democracy%1:09:00:: 425 | d008.s008.t003 democracy%1:09:00:: 426 | d008.s009.t001 democracy%1:09:00:: 427 | d008.s010.t005 democracy%1:09:00:: 428 | d008.s013.t003 democracy%1:09:00:: 429 | d008.s015.t004 democracy%1:09:00:: 430 | d006.s025.t001 factor%1:11:00:: 431 | d002.s005.t003 case%1:11:00:: 432 | d003.s011.t002 case%1:11:00:: 433 | d005.s011.t003 case%1:11:00:: 434 | d005.s014.t010 case%1:11:00:: 435 | d005.s023.t003 case%1:11:00:: 436 | d005.s024.t002 case%1:11:00:: 437 | d005.s031.t000 case%1:11:00:: 438 | d006.s008.t002 case%1:11:00:: 439 | d006.s022.t004 case%1:11:00:: 440 | d006.s024.t005 case%1:11:00:: 441 | d008.s014.t005 case%1:11:00:: 442 | d009.s000.t004 case%1:11:00:: 443 | d009.s001.t009 case%1:11:00:: 444 | d009.s003.t000 case%1:11:00:: 445 | d009.s009.t006 case%1:11:00:: 446 | d009.s016.t002 case%1:11:00:: 447 | d009.s018.t003 case%1:11:00:: 448 | d009.s024.t001 case%1:11:00:: 449 | d008.s000.t000 routine%1:04:00:: 450 | d005.s012.t003 cleveland%1:15:00:: 451 | d012.s018.t001 wind%1:19:00:: 452 | d002.s001.t001 heading%1:10:00:: 453 | d007.s013.t004 biochemistry%1:09:00:: 454 | d007.s015.t003 biochemistry%1:09:00:: 455 | d007.s023.t004 life_form%1:08:00:: 456 | d007.s025.t004 life_form%1:08:00:: 457 | d005.s009.t004 ruling%1:04:00:: 458 | d005.s012.t000 ruling%1:04:00:: 459 | d009.s019.t001 assistance%1:04:00:: 460 | d007.s002.t003 life%1:26:01:: 461 | d007.s005.t001 life%1:26:01:: 462 | d007.s006.t001 life%1:26:01:: 463 | d007.s009.t002 life%1:26:01:: 464 | d007.s009.t005 life%1:26:01:: 465 | d007.s009.t006 life%1:26:01:: 466 | d007.s013.t007 life%1:26:01:: 467 | d007.s026.t001 life%1:26:01:: 468 | d007.s026.t002 life%1:26:01:: 469 | d000.s000.t001 plan%1:09:00:: 470 | d005.s003.t002 message%1:10:01:: 471 | d005.s006.t004 circus%1:14:00:: 472 | d009.s000.t002 challenger%1:18:00:: 473 | d005.s008.t006 defense_lawyer%1:18:00:: 474 | d005.s018.t000 defense_lawyer%1:18:00:: 475 | d005.s020.t000 defense_lawyer%1:18:00:: 476 | d007.s015.t004 structure%1:06:00:: 477 | d009.s013.t001 ethic%1:09:00:: 478 | d009.s017.t001 ethic%1:09:00:: 479 | d008.s012.t006 democrat%1:18:01:: 480 | d000.s012.t002 envoy%1:18:00:: 481 | d004.s004.t007 charge%1:04:01:: 482 | d000.s015.t001 hour%1:28:00:: 483 | d009.s004.t002 hour%1:28:00:: 484 | d009.s009.t003 hour%1:28:00:: 485 | d005.s010.t000 last_word%1:10:00:: 486 | d003.s011.t004 rival%1:18:00:: 487 | d006.s016.t001 rival%1:18:00:: 488 | d005.s012.t006 wife%1:18:00:: 489 | d009.s021.t000 wife%1:18:00:: 490 | d005.s003.t006 bed%1:06:00:: 491 | d011.s015.t004 labor%1:14:00:: 492 | d011.s015.t007 labor%1:14:00:: 493 | d011.s016.t002 labor%1:14:00:: 494 | d011.s022.t001 labor%1:14:00:: 495 | d005.s023.t008 candidate%1:18:01:: 496 | d008.s003.t002 region%1:15:00:: 497 | d008.s005.t003 region%1:15:00:: 498 | d008.s012.t004 region%1:15:00:: 499 | d008.s013.t001 region%1:15:00:: 500 | d008.s024.t005 region%1:15:00:: 501 | d008.s027.t003 region%1:15:00:: 502 | d001.s010.t002 light%1:19:00:: 503 | d007.s002.t000 researcher%1:18:00:: 504 | d007.s007.t000 researcher%1:18:00:: 505 | d007.s009.t004 researcher%1:18:00:: 506 | d007.s022.t000 researcher%1:18:00:: 507 | d007.s025.t003 researcher%1:18:00:: 508 | d006.s024.t000 border%1:15:00:: 509 | d009.s025.t000 indication%1:10:00:: 510 | d011.s006.t000 difference%1:07:00:: 511 | d011.s009.t000 difference%1:07:00:: 512 | d010.s004.t004 selection%1:04:00:: 513 | d010.s009.t001 selection%1:04:00:: 514 | d000.s025.t001 people%1:14:00:: 515 | d011.s012.t004 people%1:14:00:: 516 | d010.s006.t003 england%1:15:00:: 517 | d010.s009.t004 england%1:15:00:: 518 | d004.s001.t000 us%1:15:00:: 519 | d004.s003.t001 us%1:15:00:: 520 | d004.s010.t001 us%1:15:00:: 521 | d012.s010.t000 us%1:15:00:: 522 | d012.s016.t003 us%1:15:00:: 523 | d012.s017.t000 us%1:15:00:: 524 | d012.s021.t001 us%1:15:00:: 525 | d012.s002.t009 bankruptcy%1:26:01:: 526 | d012.s010.t002 bankruptcy%1:26:01:: 527 | d002.s010.t001 security%1:26:00:: 528 | d003.s010.t001 security%1:26:00:: 529 | d012.s005.t001 security%1:26:00:: 530 | d004.s002.t003 protection%1:04:00:: 531 | d008.s022.t004 whole%1:09:00:: 532 | d006.s008.t007 nobility%1:14:00:: 533 | d002.s007.t002 november%1:28:00:: 534 | d008.s017.t001 safety_net%1:10:00:: 535 | d001.s008.t003 night%1:28:00:: 536 | d001.s004.t003 start%1:11:00:: 537 | d011.s023.t000 start%1:11:00:: 538 | d007.s001.t001 bacterium%1:05:00:: 539 | d007.s002.t001 bacterium%1:05:00:: 540 | d007.s007.t001 bacterium%1:05:00:: 541 | d007.s022.t001 bacterium%1:05:00:: 542 | d000.s017.t001 economy%1:14:00:: 543 | d008.s011.t001 economy%1:14:00:: 544 | d011.s000.t003 economy%1:14:00:: 545 | d011.s008.t003 economy%1:14:00:: 546 | d003.s016.t003 spokesman%1:18:00:: 547 | d004.s011.t000 spokesman%1:18:00:: 548 | d012.s006.t003 spokesman%1:18:00:: 549 | d009.s022.t000 conservative%1:18:00:: 550 | d011.s012.t009 specialist%1:18:00:: 551 | d000.s021.t000 member%1:18:00:: 552 | d005.s004.t002 member%1:18:00:: 553 | d008.s018.t003 proportion%1:24:00:: 554 | d011.s020.t006 proportion%1:24:00:: 555 | d005.s022.t004 interest%1:09:00:: 556 | d009.s016.t004 interest%1:09:00:: 557 | d009.s017.t000 breach%1:04:01:: 558 | d001.s002.t004 end%1:15:00:: 559 | d001.s006.t001 end%1:15:00:: 560 | d001.s012.t007 end%1:15:00:: 561 | d002.s005.t004 end%1:15:00:: 562 | d005.s003.t000 end%1:15:00:: 563 | d005.s004.t004 end%1:15:00:: 564 | d006.s003.t001 end%1:15:00:: 565 | d002.s005.t001 trading%1:04:00:: 566 | d012.s010.t003 customer%1:18:00:: 567 | d003.s011.t008 risk%1:26:00:: 568 | d000.s001.t000 climate%1:26:00:: 569 | d000.s007.t003 climate%1:26:00:: 570 | d000.s012.t001 climate%1:26:00:: 571 | d000.s015.t004 climate%1:26:00:: 572 | d000.s022.t000 climate%1:26:00:: 573 | d010.s002.t000 side%1:15:02:: 574 | d010.s015.t000 side%1:15:02:: 575 | d010.s019.t000 side%1:15:02:: 576 | d011.s008.t000 side%1:15:02:: 577 | d011.s022.t008 receipt%1:04:00:: 578 | d011.s015.t003 replacement%1:04:00:: 579 | d008.s018.t004 poverty%1:26:00:: 580 | d006.s014.t001 complaint%1:26:00:: 581 | d012.s009.t000 chairman%1:18:01:: 582 | d002.s006.t001 publication%1:10:00:: 583 | d000.s019.t001 hegemony%1:14:00:: 584 | d000.s010.t008 climate_change%1:22:00:: 585 | d000.s019.t004 climate_change%1:22:00:: 586 | d000.s020.t002 climate_change%1:22:00:: 587 | d001.s000.t001 israel%1:15:00:: 588 | d000.s007.t005 picture%1:06:00:: 589 | d006.s010.t000 matter%1:09:01:: 590 | d000.s027.t000 big_league%1:14:00:: 591 | d012.s005.t000 value%1:09:00:: 592 | d012.s022.t002 value%1:09:00:: 593 | d008.s012.t001 respondent%1:18:01:: 594 | d008.s013.t000 respondent%1:18:01:: 595 | d008.s022.t002 respondent%1:18:01:: 596 | d008.s024.t004 respondent%1:18:01:: 597 | d008.s026.t000 respondent%1:18:01:: 598 | d008.s027.t004 respondent%1:18:01:: 599 | d008.s028.t002 respondent%1:18:01:: 600 | d000.s001.t001 conference%1:14:00:: 601 | d009.s002.t003 conference%1:14:00:: 602 | d009.s009.t005 conference%1:14:00:: 603 | d009.s015.t000 code%1:10:01:: 604 | d009.s017.t003 code%1:10:01:: 605 | d002.s012.t001 bond%1:19:00:: 606 | d002.s012.t005 bond%1:19:00:: 607 | d011.s012.t015 employment%1:26:00:: 608 | d011.s013.t002 employment%1:26:00:: 609 | d011.s016.t007 employment%1:26:00:: 610 | d001.s012.t000 player%1:18:01:: 611 | d006.s002.t001 player%1:18:01:: 612 | d006.s002.t005 player%1:18:01:: 613 | d006.s003.t004 player%1:18:01:: 614 | d006.s004.t003 player%1:18:01:: 615 | d006.s005.t006 player%1:18:01:: 616 | d006.s008.t009 player%1:18:01:: 617 | d006.s011.t004 player%1:18:01:: 618 | d006.s012.t003 player%1:18:01:: 619 | d006.s013.t002 player%1:18:01:: 620 | d006.s016.t007 player%1:18:01:: 621 | d006.s017.t003 player%1:18:01:: 622 | d006.s018.t002 player%1:18:01:: 623 | d006.s019.t008 player%1:18:01:: 624 | d006.s022.t001 player%1:18:01:: 625 | d010.s001.t000 player%1:18:01:: 626 | d010.s002.t002 player%1:18:01:: 627 | d010.s003.t002 player%1:18:01:: 628 | d010.s008.t000 player%1:18:01:: 629 | d010.s011.t000 player%1:18:01:: 630 | d010.s012.t001 player%1:18:01:: 631 | d007.s024.t000 assistant_professor%1:18:00:: 632 | d011.s002.t000 harassment%1:12:00:: 633 | d011.s008.t001 immigrant%1:18:00:: 634 | d011.s009.t003 immigrant%1:18:00:: 635 | d011.s012.t017 immigrant%1:18:00:: 636 | d011.s014.t002 immigrant%1:18:00:: 637 | d011.s015.t002 immigrant%1:18:00:: 638 | d011.s016.t000 immigrant%1:18:00:: 639 | d011.s018.t006 immigrant%1:18:00:: 640 | d011.s018.t008 immigrant%1:18:00:: 641 | d011.s020.t002 immigrant%1:18:00:: 642 | d011.s020.t007 immigrant%1:18:00:: 643 | d011.s020.t013 immigrant%1:18:00:: 644 | d011.s022.t005 immigrant%1:18:00:: 645 | d011.s024.t001 immigrant%1:18:00:: 646 | d001.s003.t000 success%1:11:00:: 647 | d003.s008.t008 news%1:10:00:: 648 | d005.s023.t004 news%1:10:00:: 649 | d005.s027.t003 news%1:10:00:: 650 | d011.s000.t000 immigration%1:04:00:: 651 | d011.s001.t000 immigration%1:04:00:: 652 | d011.s010.t002 immigration%1:04:00:: 653 | d011.s011.t001 immigration%1:04:00:: 654 | d011.s012.t003 immigration%1:04:00:: 655 | d011.s013.t000 immigration%1:04:00:: 656 | d006.s008.t012 history%1:28:00:: 657 | d003.s002.t010 war%1:04:00:: 658 | d003.s008.t003 war%1:04:00:: 659 | d006.s010.t002 damage%1:11:00:: 660 | d006.s026.t000 damage%1:11:00:: 661 | d005.s020.t006 verdict%1:04:00:: 662 | d005.s023.t001 verdict%1:04:00:: 663 | d001.s002.t001 absence%1:26:00:: 664 | d007.s021.t002 machinery%1:06:00:: 665 | d010.s004.t001 number_one%1:18:00:: 666 | d007.s001.t003 nutrient%1:03:00:: 667 | d011.s022.t004 germany%1:15:00:: 668 | d000.s006.t003 washington%1:15:01:: 669 | d009.s009.t001 washington%1:15:01:: 670 | d006.s008.t005 organization%1:14:00:: 671 | d009.s015.t004 organization%1:14:00:: 672 | d010.s009.t000 peak%1:23:00:: 673 | d010.s000.t000 familiarity%1:09:00:: 674 | d011.s007.t000 fear%1:12:00:: 675 | d011.s014.t000 fear%1:12:00:: 676 | d003.s005.t000 bid%1:10:03:: 677 | d003.s015.t000 bid%1:10:03:: 678 | d003.s018.t001 bid%1:10:03:: 679 | d000.s024.t005 news_conference%1:10:00:: 680 | d007.s011.t001 news_conference%1:10:00:: 681 | d000.s004.t000 working_group%1:14:00:: 682 | d000.s017.t002 planet%1:17:00:: 683 | d007.s023.t007 planet%1:17:00:: 684 | d001.s013.t001 minute%1:28:00:: 685 | d006.s003.t000 minute%1:28:00:: 686 | d003.s003.t001 stake%1:21:02:: 687 | d003.s015.t003 stake%1:21:02:: 688 | d009.s008.t002 stake%1:21:02:: 689 | d004.s001.t003 shackle%1:06:00:: 690 | d007.s013.t011 open_university%1:14:00:: 691 | d011.s020.t003 creation%1:04:00:: 692 | d006.s013.t004 violation%1:04:00:: 693 | d000.s010.t001 talk%1:10:00:: 694 | d000.s011.t002 talk%1:10:00:: 695 | d000.s020.t006 talk%1:10:00:: 696 | d000.s023.t000 talk%1:10:00:: 697 | d000.s020.t005 participant%1:18:00:: 698 | d010.s007.t000 experience%1:09:01:: 699 | d010.s008.t001 experience%1:09:01:: 700 | d011.s015.t000 experience%1:09:01:: 701 | d000.s012.t004 cut%1:21:00:: 702 | d000.s014.t005 cut%1:21:00:: 703 | d005.s018.t005 photograph%1:06:00:: 704 | d005.s019.t001 photograph%1:06:00:: 705 | d010.s003.t006 san_jose%1:15:01:: 706 | d008.s009.t004 table%1:14:00:: 707 | d003.s008.t006 way%1:07:01:: 708 | d005.s014.t007 way%1:07:01:: 709 | d006.s004.t000 way%1:07:01:: 710 | d006.s025.t003 way%1:07:01:: 711 | d007.s026.t000 way%1:07:01:: 712 | d011.s018.t000 way%1:07:01:: 713 | d000.s001.t006 release%1:06:00:: 714 | d007.s009.t001 search%1:04:00:: 715 | d008.s003.t000 brazil%1:15:00:: 716 | d008.s012.t003 brazil%1:15:00:: 717 | d008.s020.t000 brazil%1:15:00:: 718 | d008.s027.t000 brazil%1:15:00:: 719 | d009.s006.t004 legal_principle%1:09:00:: 720 | d011.s022.t013 lifetime%1:28:00:: 721 | d007.s013.t012 united_kingdom%1:15:00:: 722 | d005.s005.t001 court%1:14:00:: 723 | d005.s005.t005 court%1:14:00:: 724 | d005.s027.t006 court%1:14:00:: 725 | d005.s028.t005 court%1:14:00:: 726 | d009.s000.t003 court%1:14:00:: 727 | d009.s004.t009 court%1:14:00:: 728 | d009.s005.t001 court%1:14:00:: 729 | d009.s009.t004 court%1:14:00:: 730 | d009.s017.t006 court%1:14:00:: 731 | d009.s018.t004 court%1:14:00:: 732 | d009.s020.t003 court%1:14:00:: 733 | d012.s006.t002 court%1:14:00:: 734 | d006.s019.t016 place%1:15:00:: 735 | d000.s012.t000 u.s.%1:14:00:: 736 | d003.s008.t004 u.s.%1:14:00:: 737 | d003.s010.t005 u.s.%1:14:00:: 738 | d003.s011.t000 u.s.%1:14:00:: 739 | d003.s013.t000 u.s.%1:14:00:: 740 | d002.s009.t007 situation%1:26:00:: 741 | d006.s016.t000 situation%1:26:00:: 742 | d006.s011.t003 integrity%1:26:00:: 743 | d008.s025.t001 system%1:06:00:: 744 | d003.s013.t002 tie%1:06:01:: 745 | d003.s014.t001 contract%1:10:00:: 746 | d003.s015.t001 contract%1:10:00:: 747 | d003.s015.t004 contract%1:10:00:: 748 | d005.s012.t007 decision%1:04:00:: 749 | d005.s020.t007 decision%1:04:00:: 750 | d005.s030.t002 decision%1:04:00:: 751 | d006.s021.t003 decision%1:04:00:: 752 | d006.s023.t000 decision%1:04:00:: 753 | d009.s004.t010 decision%1:04:00:: 754 | d005.s016.t004 communication%1:10:01:: 755 | d012.s000.t000 pound%1:23:09:: 756 | d012.s001.t003 pound%1:23:09:: 757 | d006.s013.t003 legal_fraud%1:04:00:: 758 | d006.s022.t005 legal_fraud%1:04:00:: 759 | d012.s023.t004 sum%1:21:00:: 760 | d008.s024.t007 development%1:04:01:: 761 | d010.s009.t006 london%1:15:00:: 762 | d000.s008.t004 level%1:07:00:: 763 | d003.s018.t007 level%1:07:00:: 764 | d007.s017.t005 level%1:07:00:: 765 | d009.s023.t005 level%1:07:00:: 766 | d010.s008.t003 level%1:07:00:: 767 | d012.s021.t003 fannie_mae%1:14:00:: 768 | d005.s004.t005 judge%1:18:00:: 769 | d005.s007.t002 judge%1:18:00:: 770 | d005.s016.t002 judge%1:18:00:: 771 | d005.s018.t002 judge%1:18:00:: 772 | d005.s020.t002 judge%1:18:00:: 773 | d009.s015.t002 judge%1:18:00:: 774 | d009.s015.t005 judge%1:18:00:: 775 | d006.s002.t004 behavior%1:04:00:: 776 | d006.s009.t002 behavior%1:04:00:: 777 | d007.s019.t002 behavior%1:04:00:: 778 | d001.s001.t003 loss%1:21:01:: 779 | d012.s011.t001 loss%1:21:01:: 780 | d012.s023.t003 loss%1:21:01:: 781 | d005.s004.t003 family%1:14:02:: 782 | d000.s007.t004 official%1:18:01:: 783 | d000.s013.t004 official%1:18:01:: 784 | d000.s024.t000 official%1:18:01:: 785 | d011.s002.t005 waiter%1:18:00:: 786 | d000.s010.t009 flood%1:19:00:: 787 | d003.s002.t009 decade%1:28:00:: 788 | d005.s011.t000 decade%1:28:00:: 789 | d008.s015.t000 decade%1:28:00:: 790 | d005.s006.t005 atmosphere%1:26:01:: 791 | d005.s012.t009 atmosphere%1:26:01:: 792 | d006.s009.t003 principle%1:09:03:: 793 | d006.s026.t001 principle%1:09:03:: 794 | d000.s001.t002 posturing%1:07:00:: 795 | d009.s020.t000 regard%1:09:00:: 796 | d010.s003.t005 today%1:28:00:: 797 | d010.s010.t000 today%1:28:00:: 798 | d004.s004.t002 acceptance%1:09:00:: 799 | d005.s003.t004 rape%1:20:00:: 800 | d009.s003.t002 suit%1:06:00:: 801 | d009.s004.t001 suit%1:06:00:: 802 | d000.s003.t002 time%1:11:00:: 803 | d001.s015.t005 time%1:11:00:: 804 | d005.s014.t009 time%1:11:00:: 805 | d006.s003.t005 time%1:11:00:: 806 | d007.s025.t001 time%1:11:00:: 807 | d008.s022.t000 time%1:11:00:: 808 | d009.s004.t008 time%1:11:00:: 809 | d007.s015.t001 substitution%1:11:00:: 810 | d007.s009.t000 find%1:09:00:: 811 | d007.s011.t003 find%1:09:00:: 812 | d005.s018.t001 example%1:09:00:: 813 | d007.s024.t005 example%1:09:00:: 814 | d008.s010.t000 example%1:09:00:: 815 | d011.s022.t003 example%1:09:00:: 816 | d005.s015.t005 connecticut%1:15:00:: 817 | d005.s023.t005 connecticut%1:15:00:: 818 | d004.s005.t000 investor%1:18:00:: 819 | d012.s019.t000 investor%1:18:00:: 820 | d003.s008.t007 editor%1:18:00:: 821 | d005.s003.t007 house%1:06:00:: 822 | d011.s005.t002 unemployed%1:14:00:: 823 | d009.s001.t006 justice%1:07:00:: 824 | d009.s002.t002 justice%1:07:00:: 825 | d009.s004.t000 justice%1:07:00:: 826 | d009.s012.t000 justice%1:07:00:: 827 | d009.s014.t001 justice%1:07:00:: 828 | d009.s014.t004 justice%1:07:00:: 829 | d009.s017.t005 justice%1:07:00:: 830 | d009.s018.t000 justice%1:07:00:: 831 | d009.s022.t001 justice%1:07:00:: 832 | d000.s022.t004 pressure%1:19:00:: 833 | d001.s009.t003 pressure%1:19:00:: 834 | d002.s009.t005 pressure%1:19:00:: 835 | d005.s020.t005 pressure%1:19:00:: 836 | d011.s014.t006 pressure%1:19:00:: 837 | d007.s011.t005 internet%1:06:00:: 838 | d011.s007.t002 territory%1:15:00:: 839 | d001.s001.t001 visit%1:04:02:: 840 | d002.s009.t004 euro%1:23:00:: 841 | d011.s022.t011 euro%1:23:00:: 842 | d012.s002.t007 euro%1:23:00:: 843 | d003.s009.t000 long_time%1:28:00:: 844 | d005.s008.t002 inclusion%1:26:00:: 845 | d006.s025.t000 controversy%1:10:00:: 846 | d009.s018.t002 attorney%1:18:00:: 847 | d003.s001.t007 saturday%1:28:00:: 848 | d010.s009.t003 saturday%1:28:00:: 849 | d012.s012.t001 kind%1:09:00:: 850 | d001.s011.t000 disadvantage%1:07:00:: 851 | d003.s011.t003 disadvantage%1:07:00:: 852 | d009.s003.t009 rallying_cry%1:10:00:: 853 | d004.s003.t002 broadcaster%1:18:00:: 854 | d000.s018.t000 delegate%1:18:00:: 855 | d000.s023.t002 delegate%1:18:00:: 856 | d002.s012.t003 tuesday%1:28:00:: 857 | d003.s010.t003 tuesday%1:28:00:: 858 | d011.s003.t000 viewpoint%1:09:00:: 859 | d006.s008.t010 act%1:10:01:: 860 | d002.s006.t006 robustness%1:07:00:: 861 | d000.s017.t005 obligation%1:04:00:: 862 | d006.s025.t004 transmission%1:04:01:: 863 | d008.s022.t007 problem%1:26:00:: 864 | d011.s005.t003 problem%1:26:00:: 865 | d011.s010.t003 problem%1:26:00:: 866 | d011.s011.t003 problem%1:26:00:: 867 | d012.s002.t000 problem%1:26:00:: 868 | d011.s012.t001 book%1:10:00:: 869 | d010.s012.t004 striker%1:18:02:: 870 | d010.s015.t002 striker%1:18:02:: 871 | d005.s022.t001 effect%1:19:00:: 872 | d011.s017.t000 effect%1:19:00:: 873 | d011.s018.t009 effect%1:19:00:: 874 | d011.s024.t002 effect%1:19:00:: 875 | d011.s001.t003 congestion%1:26:00:: 876 | d000.s006.t001 flexibility%1:07:02:: 877 | d000.s024.t001 minister%1:18:00:: 878 | d000.s026.t001 minister%1:18:00:: 879 | d012.s001.t002 ceo%1:18:00:: 880 | d002.s006.t000 trend%1:15:02:: 881 | d004.s003.t000 report%1:10:03:: 882 | d005.s002.t000 report%1:10:03:: 883 | d005.s015.t007 report%1:10:03:: 884 | d005.s027.t004 report%1:10:03:: 885 | d007.s005.t002 report%1:10:03:: 886 | d007.s008.t000 report%1:10:03:: 887 | d007.s011.t006 report%1:10:03:: 888 | d006.s017.t000 strategy%1:09:00:: 889 | d005.s003.t001 defense%1:04:00:: 890 | d006.s004.t008 phase%1:28:00:: 891 | d006.s018.t003 phase%1:28:00:: 892 | d007.s001.t005 form%1:10:00:: 893 | d007.s005.t000 form%1:10:00:: 894 | d009.s015.t007 guest_of_honor%1:18:00:: 895 | d000.s013.t001 hard_line%1:09:00:: 896 | d003.s011.t009 shareholder%1:18:00:: 897 | d011.s008.t004 society%1:14:00:: 898 | d005.s018.t003 juror%1:18:00:: 899 | d005.s019.t000 juror%1:18:00:: 900 | d005.s020.t003 juror%1:18:00:: 901 | d005.s021.t000 juror%1:18:00:: 902 | d008.s010.t002 economic_growth%1:22:00:: 903 | d011.s012.t013 economic_growth%1:22:00:: 904 | d011.s020.t005 economic_growth%1:22:00:: 905 | d005.s008.t005 right%1:07:00:: 906 | d011.s004.t003 right%1:07:00:: 907 | d011.s005.t000 right%1:07:00:: 908 | d011.s007.t006 right%1:07:00:: 909 | d000.s005.t001 option%1:21:00:: 910 | d006.s008.t011 dramatization%1:04:01:: 911 | d004.s004.t000 institution%1:14:00:: 912 | d008.s008.t004 institution%1:14:00:: 913 | d001.s007.t008 lead%1:07:02:: 914 | d001.s012.t005 lead%1:07:02:: 915 | d005.s019.t003 daughter%1:18:00:: 916 | d012.s021.t002 lender%1:18:00:: 917 | d008.s028.t009 nicaragua%1:15:00:: 918 | d000.s004.t004 one%1:23:00:: 919 | d000.s012.t006 one%1:23:00:: 920 | d011.s014.t005 one%1:23:00:: 921 | d011.s016.t003 one%1:23:00:: 922 | d005.s011.t006 fairness%1:07:00:: 923 | d001.s006.t000 scorer%1:18:00:: 924 | d010.s022.t000 scorer%1:18:00:: 925 | d012.s002.t004 past%1:28:00:: 926 | d003.s014.t002 weekend%1:28:00:: 927 | d005.s020.t009 weekend%1:28:00:: 928 | d004.s000.t000 bank%1:17:01:: 929 | d004.s001.t001 bank%1:17:01:: 930 | d004.s002.t002 bank%1:17:01:: 931 | d004.s007.t000 bank%1:17:01:: 932 | d004.s008.t000 bank%1:17:01:: 933 | d004.s008.t004 bank%1:17:01:: 934 | d004.s010.t002 bank%1:17:01:: 935 | d012.s004.t002 bank%1:17:01:: 936 | d012.s007.t000 bank%1:17:01:: 937 | d012.s009.t002 bank%1:17:01:: 938 | d012.s013.t001 bank%1:17:01:: 939 | d012.s018.t000 bank%1:17:01:: 940 | d012.s021.t000 bank%1:17:01:: 941 | d012.s023.t001 bank%1:17:01:: 942 | d000.s024.t006 claim%1:10:00:: 943 | d005.s007.t000 claim%1:10:00:: 944 | d005.s009.t003 claim%1:10:00:: 945 | d005.s027.t002 claim%1:10:00:: 946 | d005.s029.t001 claim%1:10:00:: 947 | d009.s017.t004 claim%1:10:00:: 948 | d009.s019.t003 claim%1:10:00:: 949 | d012.s019.t003 claim%1:10:00:: 950 | d012.s020.t001 claim%1:10:00:: 951 | d012.s024.t000 claim%1:10:00:: 952 | d012.s024.t002 claim%1:10:00:: 953 | d000.s002.t001 basis%1:24:00:: 954 | d001.s005.t002 speculation%1:10:03:: 955 | d007.s011.t004 speculation%1:10:03:: 956 | d009.s015.t006 speaker%1:18:00:: 957 | d001.s015.t006 season%1:28:02:: 958 | d005.s009.t002 superior_court%1:14:00:: 959 | d010.s022.t001 goal%1:09:00:: 960 | d011.s012.t007 future%1:28:00:: 961 | d008.s018.t001 economic_commission_for_latin_america%1:14:00:: 962 | d000.s001.t011 term%1:10:00:: 963 | d000.s004.t006 term%1:10:00:: 964 | d006.s019.t000 term%1:10:00:: 965 | d007.s006.t003 hydrogen%1:27:00:: 966 | d005.s030.t001 supreme_court%1:14:00:: 967 | d009.s001.t001 supreme_court%1:14:00:: 968 | d005.s027.t001 journalist%1:18:00:: 969 | d009.s003.t005 overhaul%1:04:00:: 970 | d012.s007.t002 settlement%1:14:00:: 971 | d012.s012.t000 settlement%1:14:00:: 972 | d002.s007.t001 sale%1:04:00:: 973 | d004.s006.t003 sale%1:04:00:: 974 | d012.s003.t001 sale%1:04:00:: 975 | d000.s021.t003 proposal%1:10:00:: 976 | d000.s025.t007 proposal%1:10:00:: 977 | d003.s017.t002 competitor%1:18:00:: 978 | d010.s019.t001 presence%1:26:00:: 979 | d005.s004.t007 jury%1:14:00:: 980 | d005.s006.t007 jury%1:14:00:: 981 | d005.s009.t008 jury%1:14:00:: 982 | d005.s020.t008 jury%1:14:00:: 983 | d010.s006.t008 shock%1:12:01:: 984 | d011.s004.t001 consensus%1:26:00:: 985 | d011.s012.t008 consensus%1:26:00:: 986 | d007.s007.t002 calif.%1:15:00:: 987 | d011.s012.t002 knowledge%1:03:00:: 988 | d001.s009.t002 hand%1:08:00:: 989 | d000.s003.t006 week%1:28:00:: 990 | d000.s011.t003 week%1:28:00:: 991 | d000.s015.t007 week%1:28:00:: 992 | d000.s024.t007 week%1:28:00:: 993 | d005.s009.t005 week%1:28:00:: 994 | d008.s018.t002 week%1:28:00:: 995 | d010.s012.t000 week%1:28:00:: 996 | d008.s012.t007 average%1:09:00:: 997 | d011.s022.t010 average%1:09:00:: 998 | d003.s007.t000 representation%1:09:00:: 999 | d002.s008.t005 index%1:24:00:: 1000 | d002.s009.t000 index%1:24:00:: 1001 | d006.s016.t004 result%1:19:00:: 1002 | d007.s020.t000 result%1:19:00:: 1003 | d008.s006.t001 result%1:19:00:: 1004 | d009.s018.t001 fundraiser%1:18:00:: 1005 | d003.s006.t003 june%1:28:00:: 1006 | d004.s012.t001 june%1:28:00:: 1007 | d009.s002.t004 petition%1:10:00:: 1008 | d005.s005.t004 argument%1:10:02:: 1009 | d005.s014.t005 argument%1:10:02:: 1010 | d005.s022.t000 argument%1:10:02:: 1011 | d005.s028.t002 argument%1:10:02:: 1012 | d009.s004.t003 argument%1:10:02:: 1013 | d006.s019.t015 accumulation%1:22:00:: 1014 | d006.s019.t003 indetermination%1:07:00:: 1015 | d012.s011.t002 billion%1:23:01:: 1016 | d006.s009.t001 type%1:09:00:: 1017 | d006.s010.t001 type%1:09:00:: 1018 | d006.s015.t001 type%1:09:00:: 1019 | d007.s000.t002 type%1:09:00:: 1020 | d000.s024.t004 meeting%1:14:00:: 1021 | d011.s007.t004 plumber%1:18:00:: 1022 | d005.s023.t009 united_states_senate%1:14:00:: 1023 | d006.s006.t002 circumstance%1:26:01:: 1024 | d000.s022.t002 behalf%1:04:00:: 1025 | d010.s003.t000 contrast%1:24:00:: 1026 | d004.s004.t004 limit%1:07:00:: 1027 | d006.s003.t007 card%1:06:00:: 1028 | d006.s004.t007 card%1:06:00:: 1029 | d006.s006.t001 card%1:06:00:: 1030 | d006.s018.t005 card%1:06:00:: 1031 | d006.s019.t012 card%1:06:00:: 1032 | d006.s019.t014 card%1:06:00:: 1033 | d005.s020.t004 prosecutor%1:18:00:: 1034 | d007.s020.t002 hard_time%1:26:00:: 1035 | d009.s010.t001 senate%1:14:00:: 1036 | d007.s002.t002 earth%1:17:00:: 1037 | d007.s009.t003 earth%1:17:00:: 1038 | d007.s010.t002 earth%1:17:00:: 1039 | d007.s023.t006 earth%1:17:00:: 1040 | d007.s025.t002 earth%1:17:00:: 1041 | d007.s026.t003 earth%1:17:00:: 1042 | d006.s016.t002 benefit%1:21:00:: 1043 | d011.s002.t002 france%1:15:00:: 1044 | d011.s005.t007 france%1:15:00:: 1045 | d000.s008.t003 percent%1:24:00:: 1046 | d000.s008.t007 percent%1:24:00:: 1047 | d000.s009.t002 percent%1:24:00:: 1048 | d005.s003.t003 trial%1:04:00:: 1049 | d005.s004.t001 trial%1:04:00:: 1050 | d005.s006.t001 trial%1:04:00:: 1051 | d005.s008.t000 trial%1:04:00:: 1052 | d005.s011.t005 trial%1:04:00:: 1053 | d005.s012.t010 trial%1:04:00:: 1054 | d005.s015.t003 trial%1:04:00:: 1055 | d005.s016.t006 trial%1:04:00:: 1056 | d005.s022.t003 trial%1:04:00:: 1057 | d005.s027.t009 trial%1:04:00:: 1058 | d005.s030.t004 trial%1:04:00:: 1059 | d012.s013.t002 summer%1:28:00:: 1060 | d007.s001.t004 scope%1:07:00:: 1061 | d004.s009.t001 liability%1:26:01:: 1062 | d009.s001.t010 high_court%1:14:00:: 1063 | d010.s005.t000 number%1:07:00:: 1064 | d010.s013.t000 number%1:07:00:: 1065 | d011.s001.t007 number%1:07:00:: 1066 | d012.s016.t001 number%1:07:00:: 1067 | d012.s019.t002 number%1:07:00:: 1068 | d006.s002.t003 ink%1:27:01:: 1069 | d000.s001.t007 document%1:10:00:: 1070 | d000.s007.t000 document%1:10:00:: 1071 | d001.s005.t001 perimeter%1:25:00:: 1072 | d006.s011.t000 advantage%1:07:00:: 1073 | d006.s012.t000 advantage%1:07:00:: 1074 | d006.s012.t004 advantage%1:07:00:: 1075 | d006.s015.t002 advantage%1:07:00:: 1076 | d006.s018.t000 advantage%1:07:00:: 1077 | d006.s019.t001 advantage%1:07:00:: 1078 | d006.s020.t001 advantage%1:07:00:: 1079 | d006.s021.t004 advantage%1:07:00:: 1080 | d003.s009.t001 offer%1:10:01:: 1081 | d003.s018.t002 offer%1:10:01:: 1082 | d011.s016.t006 offer%1:10:01:: 1083 | d012.s016.t000 fate%1:11:00:: 1084 | d001.s001.t007 top%1:15:01:: 1085 | d010.s004.t002 top%1:15:01:: 1086 | d012.s021.t006 transaction%1:04:00:: 1087 | d012.s022.t001 transaction%1:04:00:: 1088 | d005.s027.t005 view%1:09:02:: 1089 | d008.s026.t002 view%1:09:02:: 1090 | d009.s002.t005 review%1:09:00:: 1091 | d002.s012.t000 treasury%1:21:00:: 1092 | d005.s025.t001 camera%1:06:00:: 1093 | d000.s003.t004 leader%1:18:00:: 1094 | d006.s004.t001 leader%1:18:00:: 1095 | d001.s007.t006 return%1:10:01:: 1096 | d001.s010.t001 return%1:10:01:: 1097 | d007.s000.t000 microbe%1:05:00:: 1098 | d007.s010.t001 microbe%1:05:00:: 1099 | d007.s018.t002 microbe%1:05:00:: 1100 | d007.s023.t000 microbe%1:05:00:: 1101 | d007.s006.t007 sulfur%1:27:00:: 1102 | d008.s023.t000 relative%1:18:00:: 1103 | d007.s017.t001 astrobiology%1:09:00:: 1104 | d001.s007.t004 whistle%1:11:00:: 1105 | d003.s002.t004 oil%1:27:00:: 1106 | d003.s007.t001 oil%1:27:00:: 1107 | d000.s025.t009 e-mail%1:10:00:: 1108 | d006.s017.t002 club%1:14:01:: 1109 | d010.s006.t002 club%1:14:01:: 1110 | d007.s011.t000 announcement%1:10:01:: 1111 | d009.s011.t000 guest%1:18:00:: 1112 | d011.s001.t004 police%1:14:00:: 1113 | d008.s004.t000 finding%1:04:00:: 1114 | d000.s027.t001 vice_president%1:18:00:: 1115 | d010.s009.t009 match%1:06:00:: 1116 | d010.s010.t002 match%1:06:00:: 1117 | d010.s011.t001 match%1:06:00:: 1118 | d010.s012.t003 match%1:06:00:: 1119 | d010.s015.t001 match%1:06:00:: 1120 | d010.s018.t000 match%1:06:00:: 1121 | d000.s017.t008 two_dozen%1:23:00:: 1122 | d009.s006.t001 two_dozen%1:23:00:: 1123 | d009.s013.t000 nothing%1:23:00:: 1124 | d004.s008.t002 crisis%1:26:00:: 1125 | d012.s005.t002 crisis%1:26:00:: 1126 | d012.s017.t003 crisis%1:26:00:: 1127 | d010.s006.t006 cup%1:06:00:: 1128 | d011.s021.t001 contributor%1:18:00:: 1129 | d008.s002.t002 unemployment%1:26:00:: 1130 | d008.s022.t006 unemployment%1:26:00:: 1131 | d005.s012.t002 conviction%1:09:00:: 1132 | d005.s014.t002 conviction%1:09:00:: 1133 | d005.s030.t003 conviction%1:09:00:: 1134 | d007.s006.t002 carbon%1:27:00:: 1135 | d001.s005.t004 wrist%1:08:00:: 1136 | d005.s002.t001 testimony%1:10:00:: 1137 | d006.s019.t006 addition%1:06:00:: 1138 | d008.s017.t000 addition%1:06:00:: 1139 | d007.s022.t002 concentration%1:07:02:: 1140 | d008.s027.t002 leadership%1:04:00:: 1141 | d000.s015.t000 comment%1:10:00:: 1142 | d005.s014.t004 century%1:28:00:: 1143 | d005.s031.t001 century%1:28:00:: 1144 | d002.s007.t003 month%1:28:01:: 1145 | d002.s008.t003 month%1:28:01:: 1146 | d005.s023.t002 month%1:28:01:: 1147 | d006.s000.t000 sport%1:04:00:: 1148 | d006.s007.t002 sport%1:04:00:: 1149 | d006.s009.t004 sport%1:04:00:: 1150 | d006.s009.t008 sport%1:04:00:: 1151 | d006.s011.t001 sport%1:04:00:: 1152 | d006.s026.t002 sport%1:04:00:: 1153 | d008.s010.t006 low%1:26:00:: 1154 | d005.s006.t002 reporter%1:18:00:: 1155 | d005.s025.t002 reporter%1:18:00:: 1156 | d000.s002.t004 obstacle%1:09:00:: 1157 | d005.s009.t006 news_media%1:10:00:: 1158 | d005.s012.t011 news_media%1:10:00:: 1159 | d005.s024.t000 news_media%1:10:00:: 1160 | d006.s013.t008 punishment%1:04:00:: 1161 | d006.s023.t001 punishment%1:04:00:: 1162 | d011.s012.t014 wage%1:21:00:: 1163 | d011.s014.t007 wage%1:21:00:: 1164 | d011.s017.t001 wage%1:21:00:: 1165 | d000.s026.t000 intensity%1:07:03:: 1166 | d009.s014.t002 code_of_conduct%1:09:00:: 1167 | d001.s007.t003 work%1:04:00:: 1168 | d007.s015.t000 work%1:04:00:: 1169 | d008.s001.t003 backing%1:04:00:: 1170 | d000.s011.t001 sort%1:09:00:: 1171 | d005.s011.t001 appeals_court%1:14:00:: 1172 | d005.s014.t001 appeals_court%1:14:00:: 1173 | d005.s028.t001 appeals_court%1:14:00:: 1174 | d012.s021.t004 freddie_mac%1:14:00:: 1175 | d000.s005.t002 question%1:10:00:: 1176 | d005.s007.t003 question%1:10:00:: 1177 | d005.s011.t004 question%1:10:00:: 1178 | d006.s015.t000 question%1:10:00:: 1179 | d009.s001.t002 question%1:10:00:: 1180 | d011.s024.t000 question%1:10:00:: 1181 | d006.s001.t001 wittiness%1:10:00:: 1182 | d006.s024.t001 wittiness%1:10:00:: 1183 | d006.s024.t006 wittiness%1:10:00:: 1184 | d000.s004.t005 global_warming%1:22:00:: 1185 | d000.s015.t010 global_warming%1:22:00:: 1186 | d010.s006.t000 football_player%1:18:00:: 1187 | d005.s023.t007 campaign%1:11:00:: 1188 | d007.s006.t004 nitrogen%1:27:00:: 1189 | d000.s004.t003 country%1:14:00:: 1190 | d000.s009.t000 country%1:14:00:: 1191 | d000.s010.t006 country%1:14:00:: 1192 | d000.s017.t009 country%1:14:00:: 1193 | d000.s018.t004 country%1:14:00:: 1194 | d000.s020.t000 country%1:14:00:: 1195 | d000.s025.t005 country%1:14:00:: 1196 | d008.s004.t002 country%1:14:00:: 1197 | d008.s009.t002 country%1:14:00:: 1198 | d008.s012.t005 country%1:14:00:: 1199 | d008.s013.t002 country%1:14:00:: 1200 | d008.s014.t001 country%1:14:00:: 1201 | d008.s016.t001 country%1:14:00:: 1202 | d008.s019.t001 country%1:14:00:: 1203 | d008.s019.t002 country%1:14:00:: 1204 | d008.s022.t008 country%1:14:00:: 1205 | d008.s025.t002 country%1:14:00:: 1206 | d008.s027.t001 country%1:14:00:: 1207 | d008.s028.t001 country%1:14:00:: 1208 | d010.s003.t003 country%1:14:00:: 1209 | d011.s001.t011 country%1:14:00:: 1210 | d011.s001.t012 country%1:14:00:: 1211 | d011.s005.t004 country%1:14:00:: 1212 | d011.s012.t016 country%1:14:00:: 1213 | d011.s018.t002 country%1:14:00:: 1214 | d000.s002.t005 path%1:04:00:: 1215 | d000.s028.t000 heavyweight%1:18:04:: 1216 | d006.s008.t013 technician%1:18:00:: 1217 | d004.s002.t005 amount%1:21:00:: 1218 | d004.s006.t001 amount%1:21:00:: 1219 | d006.s005.t005 amount%1:21:00:: 1220 | d000.s017.t000 battle%1:04:00:: 1221 | d003.s002.t003 russia%1:15:00:: 1222 | d006.s006.t003 practice%1:04:00:: 1223 | d008.s013.t004 practice%1:04:00:: 1224 | d009.s008.t004 litigation%1:04:00:: 1225 | d002.s009.t003 high%1:07:00:: 1226 | d011.s018.t005 complementarity%1:24:01:: 1227 | d011.s012.t012 flow%1:11:00:: 1228 | d011.s019.t001 flow%1:11:00:: 1229 | d012.s017.t001 september%1:28:00:: 1230 | d006.s019.t013 suspension%1:27:00:: 1231 | d003.s003.t002 field%1:15:00:: 1232 | d003.s006.t002 field%1:15:00:: 1233 | d003.s015.t005 field%1:15:00:: 1234 | d003.s018.t008 field%1:15:00:: 1235 | d006.s012.t002 lawbreaker%1:18:00:: 1236 | d007.s003.t000 organism%1:03:00:: 1237 | d007.s013.t002 organism%1:03:00:: 1238 | d007.s020.t001 organism%1:03:00:: 1239 | d003.s011.t006 energy%1:19:00:: 1240 | d003.s010.t006 military%1:14:00:: 1241 | d005.s023.t006 election%1:04:01:: 1242 | d007.s023.t008 moon%1:17:01:: 1243 | d006.s000.t002 law%1:14:00:: 1244 | d006.s024.t002 law%1:14:00:: 1245 | d009.s000.t001 law%1:14:00:: 1246 | d009.s001.t005 law%1:14:00:: 1247 | d009.s003.t008 law%1:14:00:: 1248 | d009.s005.t002 law%1:14:00:: 1249 | d009.s007.t002 law%1:14:00:: 1250 | d009.s023.t003 law%1:14:00:: 1251 | d009.s024.t002 law%1:14:00:: 1252 | d007.s001.t000 discovery%1:04:00:: 1253 | d007.s011.t007 discovery%1:04:00:: 1254 | d007.s013.t000 discovery%1:04:00:: 1255 | d007.s016.t000 discovery%1:04:00:: 1256 | d000.s005.t000 range%1:07:00:: 1257 | d004.s010.t005 reference%1:10:02:: 1258 | d006.s018.t006 participation%1:04:00:: 1259 | d001.s001.t004 performance%1:10:00:: 1260 | d008.s020.t001 performance%1:10:00:: 1261 | d008.s024.t000 performance%1:10:00:: 1262 | d009.s008.t000 sponsor%1:18:00:: 1263 | d000.s004.t002 money%1:21:00:: 1264 | d004.s004.t003 money%1:21:00:: 1265 | d000.s024.t002 plane%1:06:01:: 1266 | d003.s001.t006 auction%1:04:01:: 1267 | d003.s004.t001 auction%1:04:01:: 1268 | d005.s024.t001 emotion%1:12:00:: 1269 | d005.s008.t001 danger%1:26:00:: 1270 | d006.s011.t007 danger%1:26:00:: 1271 | d004.s008.t001 height%1:07:00:: 1272 | d012.s017.t002 height%1:07:00:: 1273 | d009.s010.t002 opponent%1:18:02:: 1274 | d002.s008.t001 consumer%1:18:00:: 1275 | d009.s010.t000 attendance%1:04:00:: 1276 | d000.s025.t003 activist%1:18:00:: 1277 | d009.s003.t010 activist%1:18:00:: 1278 | d009.s016.t005 activist%1:18:00:: 1279 | d009.s021.t001 activist%1:18:00:: 1280 | d005.s025.t000 crush%1:27:00:: 1281 | d005.s025.t005 new_haven%1:15:00:: 1282 | d009.s023.t004 trial_court%1:14:00:: 1283 | d000.s019.t005 adviser%1:18:00:: 1284 | d004.s010.t006 circle%1:25:00:: 1285 | d000.s010.t007 impact%1:11:00:: 1286 | d000.s020.t001 impact%1:11:00:: 1287 | d005.s015.t002 impact%1:11:00:: 1288 | d011.s012.t010 impact%1:11:00:: 1289 | d011.s014.t001 impact%1:11:00:: 1290 | d002.s001.t000 new_york_stock_exchange%1:06:00:: 1291 | d004.s006.t004 business%1:14:00:: 1292 | d009.s020.t002 business%1:14:00:: 1293 | d006.s004.t006 league%1:14:01:: 1294 | d010.s006.t001 league%1:14:01:: 1295 | d010.s006.t005 league%1:14:01:: 1296 | d011.s005.t005 worker%1:18:00:: 1297 | d011.s014.t004 worker%1:18:00:: 1298 | d007.s017.t002 institute%1:14:00:: 1299 | d005.s007.t005 rule%1:09:00:: 1300 | d005.s014.t006 rule%1:09:00:: 1301 | d005.s030.t006 rule%1:09:00:: 1302 | d006.s007.t001 rule%1:09:00:: 1303 | d006.s013.t005 rule%1:09:00:: 1304 | d006.s021.t005 rule%1:09:00:: 1305 | d009.s013.t002 rule%1:09:00:: 1306 | d009.s017.t007 rule%1:09:00:: 1307 | d004.s005.t001 loan%1:21:00:: 1308 | d009.s023.t000 solicitor_general%1:18:00:: 1309 | d011.s018.t010 pay%1:21:01:: 1310 | d011.s019.t003 pay%1:21:01:: 1311 | d012.s002.t002 industry%1:14:00:: 1312 | d001.s015.t002 last_minute%1:28:00:: 1313 | d011.s022.t009 expenditure%1:21:00:: 1314 | d011.s020.t001 diversity%1:07:02:: 1315 | d000.s023.t004 issue%1:09:01:: 1316 | d005.s010.t001 issue%1:09:01:: 1317 | d005.s017.t000 issue%1:09:01:: 1318 | d009.s003.t001 issue%1:09:01:: 1319 | d009.s022.t002 issue%1:09:01:: 1320 | d012.s003.t000 issue%1:09:01:: 1321 | d012.s006.t000 issue%1:09:01:: 1322 | d005.s027.t000 interview%1:10:01:: 1323 | d009.s009.t002 hotel%1:06:00:: 1324 | d007.s013.t009 space%1:03:00:: 1325 | d012.s006.t004 new_york%1:15:01:: 1326 | d000.s000.t002 emission%1:04:00:: 1327 | d000.s008.t002 emission%1:04:00:: 1328 | d000.s009.t001 emission%1:04:00:: 1329 | d000.s012.t005 emission%1:04:00:: 1330 | d004.s006.t005 unit%1:23:00:: 1331 | d002.s008.t002 december%1:28:00:: 1332 | d000.s014.t000 united_states%1:15:00:: 1333 | d000.s015.t008 united_states%1:15:00:: 1334 | d000.s017.t004 united_states%1:15:00:: 1335 | d002.s001.t005 united_states%1:15:00:: 1336 | d002.s007.t000 united_states%1:15:00:: 1337 | d008.s003.t001 united_states%1:15:00:: 1338 | d008.s027.t006 united_states%1:15:00:: 1339 | d008.s028.t000 united_states%1:15:00:: 1340 | d007.s005.t004 edition%1:10:02:: 1341 | d012.s004.t003 promise%1:10:00:: 1342 | d005.s015.t004 professor%1:18:00:: 1343 | d005.s016.t000 professor%1:18:00:: 1344 | d007.s013.t008 professor%1:18:00:: 1345 | d000.s002.t000 text%1:10:00:: 1346 | d000.s004.t001 text%1:10:00:: 1347 | d000.s007.t002 text%1:10:00:: 1348 | d000.s014.t004 text%1:10:00:: 1349 | d000.s019.t002 mexico%1:15:00:: 1350 | d008.s011.t000 mexico%1:15:00:: 1351 | d008.s012.t002 mexico%1:15:00:: 1352 | d008.s021.t000 mexico%1:15:00:: 1353 | d008.s028.t003 mexico%1:15:00:: 1354 | d008.s024.t003 market_economy%1:14:00:: 1355 | d008.s025.t000 market_economy%1:14:00:: 1356 | d003.s001.t005 infrastructure%1:06:01:: 1357 | d012.s000.t001 fine%1:21:00:: 1358 | d012.s001.t004 fine%1:21:00:: 1359 | d010.s006.t007 competition%1:24:01:: 1360 | d000.s023.t003 fact%1:09:01:: 1361 | d006.s008.t003 fact%1:09:01:: 1362 | d006.s018.t001 fact%1:09:01:: 1363 | d009.s014.t000 fact%1:09:01:: 1364 | d011.s020.t000 fact%1:09:01:: 1365 | d003.s001.t002 instability%1:26:00:: 1366 | d003.s010.t004 instability%1:26:00:: 1367 | d011.s013.t001 growth%1:22:00:: 1368 | d011.s023.t003 growth%1:22:00:: 1369 | d001.s003.t001 sideline%1:15:00:: 1370 | d006.s009.t010 sense%1:09:05:: 1371 | d008.s020.t004 sense%1:09:05:: 1372 | d011.s015.t005 trade%1:04:05:: 1373 | d002.s000.t002 dollar%1:23:00:: 1374 | d002.s001.t006 dollar%1:23:00:: 1375 | d002.s009.t002 dollar%1:23:00:: 1376 | d012.s002.t006 dollar%1:23:00:: 1377 | d012.s008.t000 dollar%1:23:00:: 1378 | d012.s021.t007 dollar%1:23:00:: 1379 | d012.s022.t003 dollar%1:23:00:: 1380 | d007.s001.t002 arsenic%1:27:01:: 1381 | d007.s004.t001 arsenic%1:27:01:: 1382 | d007.s007.t003 arsenic%1:27:01:: 1383 | d007.s017.t004 arsenic%1:27:01:: 1384 | d007.s018.t000 arsenic%1:27:01:: 1385 | d007.s019.t000 arsenic%1:27:01:: 1386 | d007.s021.t000 arsenic%1:27:01:: 1387 | d007.s022.t003 arsenic%1:27:01:: 1388 | d007.s023.t002 arsenic%1:27:01:: 1389 | d007.s023.t005 arsenic%1:27:01:: 1390 | d007.s023.t009 arsenic%1:27:01:: 1391 | d007.s024.t003 arsenic%1:27:01:: 1392 | d007.s025.t000 arsenic%1:27:01:: 1393 | d011.s005.t009 misery%1:26:00:: 1394 | d000.s014.t003 action%1:04:02:: 1395 | d000.s017.t011 action%1:04:02:: 1396 | d000.s018.t005 action%1:04:02:: 1397 | d000.s022.t006 action%1:04:02:: 1398 | d006.s009.t005 action%1:04:02:: 1399 | d006.s009.t007 action%1:04:02:: 1400 | d006.s011.t006 action%1:04:02:: 1401 | d006.s013.t001 action%1:04:02:: 1402 | d006.s013.t007 action%1:04:02:: 1403 | d006.s015.t004 action%1:04:02:: 1404 | d006.s016.t005 action%1:04:02:: 1405 | d006.s019.t007 action%1:04:02:: 1406 | d006.s021.t000 action%1:04:02:: 1407 | d006.s026.t003 action%1:04:02:: 1408 | d009.s014.t003 action%1:04:02:: 1409 | d003.s008.t009 outlet%1:06:01:: 1410 | d000.s025.t002 day%1:28:00:: 1411 | d009.s001.t000 day%1:28:00:: 1412 | d006.s005.t000 committee%1:14:00:: 1413 | d000.s016.t000 trajectory%1:19:00:: 1414 | d002.s005.t002 volume%1:23:00:: 1415 | d000.s001.t005 friday%1:28:00:: 1416 | d000.s010.t003 friday%1:28:00:: 1417 | d000.s011.t000 friday%1:28:00:: 1418 | d000.s021.t004 friday%1:28:00:: 1419 | d002.s001.t002 friday%1:28:00:: 1420 | d003.s004.t002 friday%1:28:00:: 1421 | d010.s005.t001 friday%1:28:00:: 1422 | d005.s005.t002 filing%1:10:00:: 1423 | d005.s030.t000 filing%1:10:00:: 1424 | d008.s020.t002 popularity%1:07:00:: 1425 | d001.s009.t001 halftime%1:28:00:: 1426 | d000.s005.t003 upper_limit%1:23:00:: 1427 | d000.s008.t005 developing_country%1:14:00:: 1428 | d000.s014.t002 developing_country%1:14:00:: 1429 | d006.s005.t001 discipline%1:09:00:: 1430 | d008.s012.t008 chart%1:10:00:: 1431 | d008.s013.t008 chart%1:10:00:: 1432 | d008.s019.t003 chart%1:10:00:: 1433 | d008.s022.t009 chart%1:10:00:: 1434 | d008.s024.t008 chart%1:10:00:: 1435 | d000.s000.t000 group%1:03:00:: 1436 | d001.s001.t008 group%1:03:00:: 1437 | d006.s004.t002 group%1:03:00:: 1438 | d009.s020.t004 group%1:03:00:: 1439 | d010.s009.t002 goalkeeper%1:18:00:: 1440 | d010.s009.t008 goalkeeper%1:18:00:: 1441 | d010.s005.t003 panama%1:15:00:: 1442 | d001.s002.t003 teamwork%1:04:00:: 1443 | d000.s001.t009 year%1:28:01:: 1444 | d000.s010.t004 year%1:28:01:: 1445 | d000.s010.t005 year%1:28:01:: 1446 | d000.s013.t005 year%1:28:01:: 1447 | d002.s005.t005 year%1:28:01:: 1448 | d003.s008.t002 year%1:28:01:: 1449 | d003.s013.t004 year%1:28:01:: 1450 | d005.s005.t006 year%1:28:01:: 1451 | d005.s017.t002 year%1:28:01:: 1452 | d008.s006.t002 year%1:28:01:: 1453 | d008.s010.t003 year%1:28:01:: 1454 | d008.s010.t007 year%1:28:01:: 1455 | d008.s011.t004 year%1:28:01:: 1456 | d008.s013.t006 year%1:28:01:: 1457 | d008.s014.t004 year%1:28:01:: 1458 | d008.s018.t005 year%1:28:01:: 1459 | d008.s023.t003 year%1:28:01:: 1460 | d008.s023.t004 year%1:28:01:: 1461 | d008.s025.t003 year%1:28:01:: 1462 | d008.s027.t005 year%1:28:01:: 1463 | d008.s027.t007 year%1:28:01:: 1464 | d008.s027.t009 year%1:28:01:: 1465 | d009.s003.t007 year%1:28:01:: 1466 | d009.s012.t002 year%1:28:01:: 1467 | d010.s006.t004 year%1:28:01:: 1468 | d012.s018.t003 year%1:28:01:: 1469 | d012.s007.t001 culpability%1:26:00:: 1470 | d000.s021.t001 draft%1:21:00:: 1471 | d000.s025.t006 draft%1:21:00:: 1472 | d011.s010.t001 banality%1:10:00:: 1473 | d006.s003.t006 referee%1:18:00:: 1474 | d006.s013.t009 referee%1:18:00:: 1475 | d009.s001.t008 law_firm%1:14:00:: 1476 | d009.s006.t000 law_firm%1:14:00:: 1477 | d009.s016.t001 law_firm%1:14:00:: 1478 | d008.s009.t003 south_america%1:17:00:: 1479 | d002.s002.t005 technology%1:04:00:: 1480 | d002.s010.t000 technology%1:04:00:: 1481 | d008.s016.t002 recession%1:26:00:: 1482 | d008.s021.t002 recession%1:26:00:: 1483 | d012.s011.t000 compensation%1:21:00:: 1484 | d012.s019.t004 compensation%1:21:00:: 1485 | d000.s025.t008 paper%1:27:00:: 1486 | d000.s025.t004 show%1:04:00:: 1487 | d008.s026.t001 venezuela%1:15:00:: 1488 | d008.s027.t008 venezuela%1:15:00:: 1489 | d008.s028.t005 venezuela%1:15:00:: 1490 | d009.s017.t002 indifference%1:12:00:: 1491 | d008.s028.t008 dominican_republic%1:15:00:: 1492 | d011.s022.t007 contribution%1:04:02:: 1493 | d011.s001.t013 human_right%1:07:00:: 1494 | d012.s019.t005 misconduct%1:04:01:: 1495 | d005.s003.t005 child%1:18:00:: 1496 | d000.s016.t003 fault_line%1:15:00:: 1497 | d004.s009.t000 cash%1:21:00:: 1498 | d004.s006.t000 rest%1:24:00:: 1499 | d010.s020.t000 rest%1:24:00:: 1500 | d005.s018.t004 reaction%1:22:00:: 1501 | d001.s012.t001 couple%1:14:00:: 1502 | d002.s002.t006 predominance%1:26:00:: 1503 | d003.s017.t001 par%1:23:00:: 1504 | d006.s008.t006 hymn%1:10:00:: 1505 | d009.s015.t001 canon%1:10:00:: 1506 | d012.s009.t001 responsiveness%1:09:00:: 1507 | d011.s002.t004 bricklayer%1:18:00:: 1508 | d005.s013.t001 inspiration%1:09:02:: 1509 | d006.s010.t003 spirit%1:18:01:: 1510 | d006.s011.t002 spirit%1:18:01:: 1511 | d006.s021.t001 spirit%1:18:01:: 1512 | d003.s002.t008 oil_industry%1:14:00:: 1513 | d003.s007.t004 oil_industry%1:14:00:: 1514 | d003.s001.t001 violence%1:04:01:: 1515 | d007.s013.t005 set%1:14:00:: 1516 | d005.s016.t003 idea%1:09:00:: 1517 | d007.s024.t002 idea%1:09:00:: 1518 | d011.s020.t004 idea%1:09:00:: 1519 | d009.s001.t007 dinner%1:13:00:: 1520 | d009.s006.t003 dinner%1:13:00:: 1521 | d009.s007.t001 dinner%1:13:00:: 1522 | d009.s009.t000 dinner%1:13:00:: 1523 | d009.s011.t001 dinner%1:13:00:: 1524 | d000.s015.t002 foreign_minister%1:18:00:: 1525 | d008.s006.t004 october%1:28:00:: 1526 | d012.s024.t001 intention%1:09:00:: 1527 | d006.s006.t000 provocation%1:04:00:: 1528 | d011.s015.t006 shortage%1:07:00:: 1529 | d011.s018.t003 shortage%1:07:00:: 1530 | d001.s007.t005 applause%1:10:00:: 1531 | d011.s021.t003 budget%1:21:02:: 1532 | d011.s022.t012 budget%1:21:02:: 1533 | d001.s012.t002 play%1:10:01:: 1534 | d001.s001.t002 team%1:14:00:: 1535 | d001.s001.t005 team%1:14:00:: 1536 | d001.s002.t000 team%1:14:00:: 1537 | d001.s007.t001 team%1:14:00:: 1538 | d001.s008.t000 team%1:14:00:: 1539 | d001.s012.t006 team%1:14:00:: 1540 | d001.s014.t000 team%1:14:00:: 1541 | d001.s014.t001 team%1:14:00:: 1542 | d001.s015.t001 team%1:14:00:: 1543 | d006.s003.t003 team%1:14:00:: 1544 | d006.s005.t004 team%1:14:00:: 1545 | d006.s008.t004 team%1:14:00:: 1546 | d006.s012.t001 team%1:14:00:: 1547 | d006.s012.t005 team%1:14:00:: 1548 | d006.s016.t006 team%1:14:00:: 1549 | d006.s019.t004 team%1:14:00:: 1550 | d007.s014.t002 team%1:14:00:: 1551 | d007.s026.t006 team%1:14:00:: 1552 | d010.s012.t002 team%1:14:00:: 1553 | d007.s005.t005 journal%1:10:01:: 1554 | d006.s002.t000 expulsion%1:04:01:: 1555 | d006.s025.t002 expulsion%1:04:01:: 1556 | d009.s019.t004 impartiality%1:09:00:: 1557 | d012.s010.t004 deposit%1:19:00:: 1558 | d011.s016.t005 training%1:04:00:: 1559 | d009.s004.t006 race%1:11:01:: 1560 | d001.s004.t001 favor%1:04:00:: 1561 | d004.s004.t005 manager%1:18:00:: 1562 | d001.s010.t000 role%1:04:00:: 1563 | d003.s002.t006 role%1:04:00:: 1564 | d005.s009.t000 trial_judge%1:18:00:: 1565 | d011.s020.t008 american%1:18:00:: 1566 | d006.s024.t004 occasion%1:11:00:: 1567 | d009.s002.t000 occasion%1:11:00:: 1568 | d007.s000.t001 possibility%1:09:03:: 1569 | d007.s002.t004 possibility%1:09:03:: 1570 | d007.s023.t003 possibility%1:09:03:: 1571 | d005.s000.t000 appeal%1:10:00:: 1572 | d005.s005.t003 appeal%1:10:00:: 1573 | d005.s007.t001 appeal%1:10:00:: 1574 | d005.s016.t001 appeal%1:10:00:: 1575 | d005.s001.t000 half_a_dozen%1:23:00:: 1576 | d006.s009.t009 regulation%1:10:00:: 1577 | d011.s012.t005 migration%1:04:00:: 1578 | d011.s012.t011 migration%1:04:00:: 1579 | d003.s000.t001 oil_company%1:14:00:: 1580 | d003.s001.t004 oil_company%1:14:00:: 1581 | d003.s002.t001 oil_company%1:14:00:: 1582 | d003.s010.t007 oil_company%1:14:00:: 1583 | d003.s007.t002 giant%1:05:00:: 1584 | d009.s008.t001 giant%1:05:00:: 1585 | d012.s002.t003 giant%1:05:00:: 1586 | d000.s006.t000 lot%1:23:00:: 1587 | d001.s003.t002 lot%1:23:00:: 1588 | d011.s004.t000 lot%1:23:00:: 1589 | d010.s008.t002 area%1:15:01:: 1590 | d011.s020.t011 google%1:10:00:: 1591 | d001.s015.t000 spite%1:12:00:: 1592 | d005.s011.t002 coverage%1:21:00:: 1593 | d005.s022.t002 coverage%1:21:00:: 1594 | d005.s026.t001 coverage%1:21:00:: 1595 | d005.s027.t008 coverage%1:21:00:: 1596 | d005.s030.t005 coverage%1:21:00:: 1597 | d000.s025.t010 challenge%1:26:00:: 1598 | d005.s023.t000 challenge%1:26:00:: 1599 | d009.s001.t003 challenge%1:26:00:: 1600 | d009.s023.t002 challenge%1:26:00:: 1601 | d000.s001.t008 reduction%1:04:00:: 1602 | d012.s006.t001 party%1:14:01:: 1603 | d008.s001.t002 latin_america%1:15:00:: 1604 | d008.s007.t000 latin_america%1:15:00:: 1605 | d008.s015.t002 latin_america%1:15:00:: 1606 | d008.s022.t003 latin_america%1:15:00:: 1607 | d011.s002.t001 student%1:18:00:: 1608 | d000.s005.t004 temperature%1:07:00:: 1609 | d008.s010.t001 peru%1:15:00:: 1610 | d000.s010.t002 boost%1:04:00:: 1611 | d001.s005.t000 basket%1:06:00:: 1612 | d006.s001.t002 will%1:09:00:: 1613 | d004.s000.t002 support%1:04:04:: 1614 | d004.s002.t001 support%1:04:04:: 1615 | d004.s003.t004 support%1:04:04:: 1616 | d004.s012.t000 support%1:04:04:: 1617 | d008.s001.t000 support%1:04:04:: 1618 | d008.s009.t000 support%1:04:04:: 1619 | d008.s010.t004 support%1:04:04:: 1620 | d008.s015.t003 support%1:04:04:: 1621 | d008.s024.t001 support%1:04:04:: 1622 | d005.s008.t004 individual%1:03:00:: 1623 | d011.s018.t004 anaesthetist%1:18:00:: 1624 | d006.s025.t005 directive%1:10:00:: 1625 | d000.s003.t001 negotiation%1:10:00:: 1626 | d000.s007.t001 negotiation%1:10:00:: 1627 | d000.s022.t001 negotiation%1:10:00:: 1628 | d001.s014.t002 line%1:14:03:: 1629 | d003.s002.t011 sanction%1:10:00:: 1630 | d006.s001.t003 sanction%1:10:00:: 1631 | d006.s004.t004 sanction%1:10:00:: 1632 | d006.s013.t006 sanction%1:10:00:: 1633 | d006.s014.t000 sanction%1:10:00:: 1634 | d006.s019.t009 sanction%1:10:00:: 1635 | d006.s022.t003 sanction%1:10:00:: 1636 | d009.s000.t000 healthcare%1:04:01:: 1637 | d009.s001.t004 healthcare%1:04:01:: 1638 | d009.s003.t004 healthcare%1:04:01:: 1639 | d006.s000.t001 fraud%1:04:00:: 1640 | d006.s009.t006 fraud%1:04:00:: 1641 | d006.s024.t003 fraud%1:04:00:: 1642 | d001.s005.t003 net%1:06:01:: 1643 | d007.s017.t000 nasa%1:14:00:: 1644 | d007.s027.t001 nasa%1:14:00:: 1645 | -------------------------------------------------------------------------------- /semeval2015.mfs.txt: -------------------------------------------------------------------------------- 1 | d001.s005.t002 window%1:06:00:: 2 | d001.s006.t001 window%1:06:00:: 3 | d001.s007.t001 window%1:06:00:: 4 | d001.s035.t001 window%1:06:00:: 5 | d001.s050.t006 window%1:06:00:: 6 | d001.s018.t000 parser%1:10:00:: 7 | d000.s031.t001 survival%1:26:00:: 8 | d000.s034.t005 survival%1:26:00:: 9 | d000.s035.t006 survival%1:26:00:: 10 | d000.s036.t009 survival%1:26:00:: 11 | d000.s037.t007 survival%1:26:00:: 12 | d002.s018.t001 alternative%1:09:00:: 13 | d000.s005.t004 drip%2:38:00:: 14 | d003.s018.t002 owner%1:18:00:: 15 | d002.s009.t004 portfolio%1:06:00:: 16 | d002.s018.t007 concern%2:42:00:: 17 | d002.s018.t006 primarily%4:02:01:: 18 | d001.s002.t000 actually%4:02:01:: 19 | d000.s001.t003 product%1:06:01:: 20 | d000.s020.t004 product%1:06:01:: 21 | d001.s028.t009 product%1:06:01:: 22 | d003.s001.t003 product%1:06:01:: 23 | d000.s008.t004 pleural%3:01:00:: 24 | d000.s025.t002 pleural%3:01:00:: 25 | d000.s031.t005 pleural%3:01:00:: 26 | d002.s020.t007 small%3:00:00:: 27 | d000.s017.t002 prevent%2:41:01:: 28 | d000.s017.t005 prevent%2:41:01:: 29 | d003.s008.t002 prevent%2:41:01:: 30 | d000.s023.t004 block%2:35:00:: 31 | d003.s012.t000 block%2:35:00:: 32 | d000.s009.t019 chemotherapy%1:04:00:: 33 | d000.s010.t008 chemotherapy%1:04:00:: 34 | d000.s013.t006 chemotherapy%1:04:00:: 35 | d000.s025.t010 chemotherapy%1:04:00:: 36 | d000.s027.t015 chemotherapy%1:04:00:: 37 | d000.s028.t010 chemotherapy%1:04:00:: 38 | d000.s034.t002 chemotherapy%1:04:00:: 39 | d000.s035.t003 chemotherapy%1:04:00:: 40 | d003.s008.t006 chemotherapy%1:04:00:: 41 | d002.s017.t000 thirdly%4:02:00:: 42 | d002.s005.t003 comparative%3:01:00:: 43 | d000.s001.t007 study%1:04:00:: 44 | d000.s025.t006 study%1:04:00:: 45 | d000.s027.t007 study%1:04:00:: 46 | d000.s028.t005 study%1:04:00:: 47 | d000.s029.t000 study%1:04:00:: 48 | d000.s030.t002 study%1:04:00:: 49 | d000.s036.t001 study%1:04:00:: 50 | d002.s005.t004 study%1:04:00:: 51 | d002.s006.t000 study%1:04:00:: 52 | d002.s013.t001 study%1:04:00:: 53 | d003.s001.t007 study%1:04:00:: 54 | d003.s014.t001 study%1:04:00:: 55 | d003.s015.t002 study%1:04:00:: 56 | d003.s016.t001 study%1:04:00:: 57 | d001.s018.t003 check%2:31:00:: 58 | d000.s009.t008 malignant%3:00:02:: 59 | d000.s025.t001 malignant%3:00:02:: 60 | d000.s031.t004 malignant%3:00:02:: 61 | d002.s007.t003 metal%1:27:00:: 62 | d002.s017.t009 especially%4:02:00:: 63 | d001.s015.t003 root%1:20:00:: 64 | d002.s004.t008 government%1:14:00:: 65 | d000.s003.t007 discussion%1:10:02:: 66 | d001.s010.t002 common_sense%1:09:00:: 67 | d002.s006.t007 representative%1:18:00:: 68 | d000.s009.t017 not%4:02:00:: 69 | d000.s009.t026 not%4:02:00:: 70 | d000.s010.t003 not%4:02:00:: 71 | d000.s019.t000 not%4:02:00:: 72 | d000.s025.t008 not%4:02:00:: 73 | d000.s028.t008 not%4:02:00:: 74 | d000.s035.t001 not%4:02:00:: 75 | d000.s036.t004 not%4:02:00:: 76 | d001.s002.t001 not%4:02:00:: 77 | d001.s009.t009 not%4:02:00:: 78 | d001.s010.t003 not%4:02:00:: 79 | d001.s019.t002 not%4:02:00:: 80 | d001.s029.t000 not%4:02:00:: 81 | d001.s049.t003 not%4:02:00:: 82 | d003.s017.t007 not%4:02:00:: 83 | d003.s023.t000 not%4:02:00:: 84 | d003.s021.t010 period_of_time%1:28:00:: 85 | d001.s030.t002 times%1:28:01:: 86 | d000.s023.t014 material%1:27:00:: 87 | d002.s008.t010 directorate%1:14:00:: 88 | d000.s022.t008 cancer_cell%1:08:00:: 89 | d002.s009.t010 recruitment%1:04:00:: 90 | d002.s015.t007 recruitment%1:04:00:: 91 | d001.s039.t005 previous%5:00:00:preceding:00 92 | d002.s005.t009 direct%3:00:00:: 93 | d000.s015.t006 last%2:42:00:: 94 | d001.s009.t001 understand%2:31:00:: 95 | d000.s031.t000 increase%2:30:00:: 96 | d002.s005.t013 increase%2:30:00:: 97 | d001.s041.t005 reuse%2:34:00:: 98 | d000.s029.t002 measure%1:04:01:: 99 | d003.s009.t002 measure%1:04:01:: 100 | d003.s019.t007 measure%1:04:01:: 101 | d002.s016.t006 pay%2:40:00:: 102 | d003.s022.t005 contact%1:04:02:: 103 | d002.s008.t008 relations%1:24:00:: 104 | d003.s017.t003 start%2:30:00:: 105 | d000.s002.t010 part%1:24:00:: 106 | d000.s003.t009 part%1:24:00:: 107 | d000.s009.t014 part%1:24:00:: 108 | d000.s020.t006 part%1:24:00:: 109 | d002.s005.t005 part%1:24:00:: 110 | d002.s016.t000 perspective%1:09:00:: 111 | d002.s015.t004 age%1:07:00:: 112 | d002.s016.t009 age%1:07:00:: 113 | d002.s017.t007 age%1:07:00:: 114 | d002.s019.t005 age%1:07:00:: 115 | d002.s020.t004 age%1:07:00:: 116 | d001.s048.t001 focus%1:09:00:: 117 | d001.s011.t004 similar%3:00:00:: 118 | d001.s008.t002 input%1:10:00:: 119 | d001.s009.t007 input%1:10:00:: 120 | d001.s048.t002 input%1:10:00:: 121 | d000.s006.t001 active%3:00:02:: 122 | d000.s022.t000 active%3:00:02:: 123 | d000.s023.t002 active%3:00:02:: 124 | d003.s003.t001 active%3:00:02:: 125 | d003.s009.t007 diet%1:13:00:: 126 | d001.s038.t003 sequentially%4:02:00:: 127 | d001.s046.t000 program%1:09:00:: 128 | d002.s018.t000 seek%2:40:00:: 129 | d002.s020.t006 rural_area%1:15:00:: 130 | d000.s016.t001 side_effect%1:26:00:: 131 | d000.s018.t011 side_effect%1:26:00:: 132 | d000.s009.t007 alone%5:00:00:unsocial:00 133 | d003.s004.t005 so%4:02:02:: 134 | d000.s002.t013 pharmacist%1:18:00:: 135 | d003.s003.t003 available%3:00:00:: 136 | d000.s008.t012 asbestos%1:27:00:: 137 | d000.s009.t028 squamous_cell%1:08:00:: 138 | d000.s036.t006 squamous_cell%1:08:00:: 139 | d000.s037.t004 squamous_cell%1:08:00:: 140 | d000.s001.t005 use%1:04:00:: 141 | d000.s013.t004 use%1:04:00:: 142 | d000.s019.t002 use%1:04:00:: 143 | d003.s001.t005 use%1:04:00:: 144 | d001.s012.t002 multiplication%1:04:01:: 145 | d000.s009.t016 patient%1:18:00:: 146 | d000.s010.t002 patient%1:18:00:: 147 | d000.s010.t005 patient%1:18:00:: 148 | d000.s014.t007 patient%1:18:00:: 149 | d000.s016.t002 patient%1:18:00:: 150 | d000.s018.t005 patient%1:18:00:: 151 | d000.s019.t003 patient%1:18:00:: 152 | d000.s025.t007 patient%1:18:00:: 153 | d000.s027.t009 patient%1:18:00:: 154 | d000.s028.t007 patient%1:18:00:: 155 | d000.s029.t005 patient%1:18:00:: 156 | d000.s031.t003 patient%1:18:00:: 157 | d000.s032.t000 patient%1:18:00:: 158 | d000.s033.t001 patient%1:18:00:: 159 | d000.s034.t000 patient%1:18:00:: 160 | d000.s035.t000 patient%1:18:00:: 161 | d000.s036.t002 patient%1:18:00:: 162 | d000.s037.t001 patient%1:18:00:: 163 | d003.s014.t005 patient%1:18:00:: 164 | d002.s004.t003 spanish%3:01:00:: 165 | d000.s009.t025 lung_cancer%1:26:00:: 166 | d000.s027.t002 lung_cancer%1:26:00:: 167 | d000.s028.t011 lung_cancer%1:26:00:: 168 | d000.s033.t003 lung_cancer%1:26:00:: 169 | d001.s014.t001 possible%3:00:00:: 170 | d002.s007.t000 important%3:00:00:: 171 | d003.s014.t008 european_country%1:15:00:: 172 | d000.s009.t002 anticancer%3:01:00:: 173 | d000.s013.t005 anticancer%3:01:00:: 174 | d000.s027.t005 anticancer%3:01:00:: 175 | d000.s028.t002 anticancer%3:01:00:: 176 | d000.s008.t007 lining%1:06:00:: 177 | d000.s029.t006 survive%2:42:01:: 178 | d000.s032.t002 survive%2:42:01:: 179 | d002.s002.t014 common%3:00:02:: 180 | d000.s002.t001 more%3:00:01:: 181 | d000.s003.t001 more%3:00:01:: 182 | d000.s020.t000 more%3:00:01:: 183 | d002.s002.t009 involvement%1:04:00:: 184 | d002.s002.t008 planning%1:04:00:: 185 | d002.s011.t001 analyse%2:31:00:: 186 | d002.s015.t009 analyse%2:31:00:: 187 | d002.s001.t015 organisation%1:14:01:: 188 | d002.s006.t010 organisation%1:14:01:: 189 | d002.s011.t008 organisation%1:14:01:: 190 | d000.s002.t006 read%2:31:00:: 191 | d000.s003.t005 read%2:31:00:: 192 | d000.s026.t003 alone%4:02:00:: 193 | d000.s032.t007 alone%4:02:00:: 194 | d000.s027.t010 locally%4:02:00:: 195 | d000.s014.t000 dose%1:06:00:: 196 | d000.s017.t011 dose%1:06:00:: 197 | d000.s018.t003 dose%1:06:00:: 198 | d003.s021.t008 dose%1:06:00:: 199 | d002.s002.t013 more%4:02:00:: 200 | d003.s016.t003 more%4:02:00:: 201 | d002.s007.t002 sector%1:25:00:: 202 | d002.s007.t006 sector%1:25:00:: 203 | d002.s017.t006 sector%1:25:00:: 204 | d002.s001.t011 authority%1:07:00:: 205 | d001.s026.t001 useful%3:00:00:: 206 | d001.s031.t000 useful%3:00:00:: 207 | d001.s007.t002 consist%2:42:00:: 208 | d003.s016.t016 prevention%1:04:00:: 209 | d000.s018.t008 have%2:40:00:: 210 | d000.s036.t007 have%2:40:00:: 211 | d000.s037.t005 have%2:40:00:: 212 | d001.s030.t003 have%2:40:00:: 213 | d001.s032.t001 have%2:40:00:: 214 | d003.s004.t001 have%2:40:00:: 215 | d002.s008.t004 left%1:15:00:: 216 | d001.s044.t006 publish%2:36:01:: 217 | d002.s005.t002 publish%2:36:01:: 218 | d002.s014.t003 publish%2:36:01:: 219 | d000.s025.t011 disease%1:26:00:: 220 | d000.s027.t013 disease%1:26:00:: 221 | d001.s016.t003 variable%1:17:00:: 222 | d001.s032.t003 variable%1:17:00:: 223 | d001.s034.t003 variable%1:17:00:: 224 | d000.s016.t003 take%2:41:04:: 225 | d002.s013.t007 overall%5:00:00:general:00 226 | d000.s019.t001 recommend%2:32:01:: 227 | d003.s021.t007 recommend%2:32:01:: 228 | d001.s009.t003 best%3:00:00:: 229 | d002.s010.t002 select%2:31:00:: 230 | d000.s014.t009 weight%1:07:00:: 231 | d001.s042.t000 generate%2:36:00:: 232 | d001.s038.t001 instruction%1:10:04:: 233 | d001.s041.t001 instruction%1:10:04:: 234 | d001.s034.t002 expression%1:07:00:: 235 | d001.s041.t003 begin%2:30:00:: 236 | d002.s015.t005 barrier%1:06:00:: 237 | d002.s016.t010 barrier%1:06:00:: 238 | d002.s017.t008 barrier%1:06:00:: 239 | d003.s022.t001 person%1:03:00:: 240 | d002.s015.t010 cause%1:11:00:: 241 | d003.s019.t013 cause%1:11:00:: 242 | d001.s001.t003 simple%3:00:02:: 243 | d002.s010.t003 further%5:00:00:far:00 244 | d002.s002.t011 provision%1:10:00:: 245 | d001.s009.t004 thing%1:26:00:: 246 | d000.s000.t001 summary%1:10:00:: 247 | d000.s020.t003 summary%1:10:00:: 248 | d003.s000.t001 summary%1:10:00:: 249 | d003.s009.t004 veterinary%3:01:00:: 250 | d003.s014.t006 veterinary%3:01:00:: 251 | d003.s019.t009 veterinary%3:01:00:: 252 | d002.s009.t006 present%2:39:00:: 253 | d000.s023.t005 activity%1:04:00:: 254 | d002.s003.t003 different%3:00:00:: 255 | d001.s013.t000 power%1:07:00:: 256 | d001.s015.t000 power%1:07:00:: 257 | d003.s004.t006 halve%2:31:00:: 258 | d000.s009.t027 affect%2:30:00:: 259 | d000.s036.t005 affect%2:30:00:: 260 | d000.s037.t003 affect%2:30:00:: 261 | d000.s023.t006 enzyme%1:27:00:: 262 | d001.s021.t001 definition%1:10:00:: 263 | d002.s002.t004 attempt%1:04:00:: 264 | d002.s001.t009 include%2:42:00:: 265 | d002.s007.t008 include%2:42:00:: 266 | d002.s017.t005 private%3:00:00:: 267 | d002.s019.t001 ethnic%5:00:00:social:00 268 | d002.s005.t006 project%1:04:00:: 269 | d002.s017.t001 project%1:04:00:: 270 | d002.s018.t005 project%1:04:00:: 271 | d002.s020.t000 project%1:04:00:: 272 | d002.s021.t003 project%1:04:00:: 273 | d002.s001.t003 attend%2:42:00:: 274 | d001.s016.t002 bounded%5:00:00:finite:00 275 | d002.s021.t001 state%1:15:01:: 276 | d000.s002.t004 condition%1:26:00:: 277 | d001.s021.t000 condition%1:26:00:: 278 | d001.s023.t001 condition%1:26:00:: 279 | d001.s023.t003 condition%1:26:00:: 280 | d001.s023.t007 condition%1:26:00:: 281 | d001.s016.t001 specify%2:32:03:: 282 | d001.s017.t001 specify%2:32:03:: 283 | d001.s049.t005 specify%2:32:03:: 284 | d002.s001.t017 service%1:04:08:: 285 | d002.s002.t007 service%1:04:08:: 286 | d002.s002.t010 service%1:04:08:: 287 | d002.s004.t017 service%1:04:08:: 288 | d002.s016.t004 particular%5:00:00:specific:00 289 | d003.s023.t002 particular%5:00:00:specific:00 290 | d003.s009.t011 vomiting%1:04:00:: 291 | d003.s016.t007 vomiting%1:04:00:: 292 | d003.s016.t017 vomiting%1:04:00:: 293 | d003.s017.t001 vomiting%1:04:00:: 294 | d003.s017.t005 vomiting%1:04:00:: 295 | d003.s019.t002 vomiting%1:04:00:: 296 | d003.s019.t014 vomiting%1:04:00:: 297 | d000.s008.t001 treat%2:41:00:: 298 | d000.s010.t004 treat%2:41:00:: 299 | d001.s019.t000 treat%2:41:00:: 300 | d001.s019.t003 treat%2:41:00:: 301 | d000.s009.t009 spread%2:35:00:: 302 | d000.s009.t011 spread%2:35:00:: 303 | d002.s009.t007 policy%1:09:00:: 304 | d003.s023.t001 represent%2:42:02:: 305 | d003.s012.t002 act%2:41:00:: 306 | d002.s013.t002 form%2:41:00:: 307 | d002.s020.t008 enterprise%1:04:00:: 308 | d002.s004.t004 ministry%1:14:01:: 309 | d000.s023.t010 building_block%1:17:00:: 310 | d002.s016.t003 examine%2:31:00:: 311 | d002.s019.t007 examine%2:31:00:: 312 | d002.s018.t004 redundancy%1:10:00:: 313 | d002.s002.t002 case%1:11:00:: 314 | d001.s048.t004 box%1:06:00:: 315 | d001.s019.t004 user%1:18:00:: 316 | d001.s027.t002 user%1:18:00:: 317 | d002.s001.t018 user%1:18:00:: 318 | d002.s002.t006 user%1:18:00:: 319 | d002.s000.t006 critical%3:00:01:: 320 | d000.s001.t002 medicinal%5:00:00:healthful:00 321 | d003.s001.t002 medicinal%5:00:00:healthful:00 322 | d001.s026.t000 mainly%4:02:00:: 323 | d000.s017.t010 after%4:02:01:: 324 | d000.s036.t000 however%4:02:00:: 325 | d002.s001.t008 actor%1:18:00:: 326 | d003.s014.t000 large_number%1:23:00:: 327 | d000.s008.t010 cause%2:36:00:: 328 | d003.s008.t005 cause%2:36:00:: 329 | d003.s008.t012 cause%2:36:00:: 330 | d001.s009.t006 realize%2:31:01:: 331 | d001.s037.t000 load%2:35:01:: 332 | d000.s023.t013 genetic%5:00:00:inheritable:00 333 | d002.s006.t008 national%3:01:00:: 334 | d002.s009.t001 national%3:01:00:: 335 | d002.s013.t000 national%3:01:00:: 336 | d002.s009.t000 case_study%1:10:00:: 337 | d002.s010.t005 case_study%1:10:00:: 338 | d002.s011.t000 case_study%1:10:00:: 339 | d001.s052.t004 mode%1:07:00:: 340 | d000.s002.t012 doctor%1:18:00:: 341 | d000.s013.t003 doctor%1:18:00:: 342 | d002.s000.t003 organise%2:30:00:: 343 | d001.s004.t000 initially%4:02:00:: 344 | d001.s017.t002 high%3:00:02:: 345 | d002.s008.t013 high%3:00:02:: 346 | d002.s019.t000 gender%1:10:00:: 347 | d001.s017.t003 priority%1:26:00:: 348 | d002.s021.t005 kingdom%1:26:00:: 349 | d003.s023.t004 people%1:14:00:: 350 | d001.s049.t002 typical%3:00:00:: 351 | d002.s008.t015 member%1:18:00:: 352 | d002.s021.t000 member%1:18:00:: 353 | d000.s001.t012 medicine%1:09:00:: 354 | d000.s009.t003 medicine%1:09:00:: 355 | d000.s009.t020 medicine%1:09:00:: 356 | d000.s011.t000 medicine%1:09:00:: 357 | d000.s017.t001 medicine%1:09:00:: 358 | d000.s022.t003 medicine%1:09:00:: 359 | d000.s022.t004 medicine%1:09:00:: 360 | d000.s027.t006 medicine%1:09:00:: 361 | d000.s028.t003 medicine%1:09:00:: 362 | d003.s001.t012 medicine%1:09:00:: 363 | d003.s008.t007 medicine%1:09:00:: 364 | d003.s016.t011 medicine%1:09:00:: 365 | d003.s022.t003 medicine%1:09:00:: 366 | d003.s009.t014 follow%2:38:00:: 367 | d003.s012.t001 receptor%1:08:01:: 368 | d003.s020.t000 risk%1:26:00:: 369 | d003.s023.t003 risk%1:26:00:: 370 | d001.s002.t002 necessary%3:00:00:: 371 | d001.s009.t010 necessary%3:00:00:: 372 | d001.s049.t004 necessary%3:00:00:: 373 | d003.s004.t004 side%1:15:02:: 374 | d003.s005.t003 side%1:15:02:: 375 | d003.s005.t007 side%1:15:02:: 376 | d003.s021.t009 extended%5:00:00:long:02 377 | d000.s009.t006 surgery%1:09:00:: 378 | d002.s007.t005 banking%1:04:01:: 379 | d000.s006.t000 contain%2:42:00:: 380 | d002.s009.t003 contain%2:42:00:: 381 | d003.s003.t000 contain%2:42:00:: 382 | d000.s009.t001 together_with%4:02:00:: 383 | d003.s019.t004 together_with%4:02:00:: 384 | d000.s009.t023 metastatic%3:01:00:: 385 | d000.s027.t012 metastatic%3:01:00:: 386 | d000.s029.t003 effectiveness%1:07:00:: 387 | d001.s020.t002 value%1:09:00:: 388 | d001.s033.t003 value%1:09:00:: 389 | d001.s034.t004 value%1:09:00:: 390 | d000.s023.t008 produce%2:36:02:: 391 | d003.s019.t010 therapy%1:04:00:: 392 | d002.s000.t004 conference%1:14:00:: 393 | d002.s004.t010 conference%1:14:00:: 394 | d002.s008.t003 conference%1:14:00:: 395 | d002.s014.t005 conference%1:14:00:: 396 | d002.s008.t006 employment%1:26:00:: 397 | d002.s011.t005 employment%1:26:00:: 398 | d000.s022.t011 antimetabolite%1:06:00:: 399 | d000.s001.t011 use%2:34:01:: 400 | d000.s007.t000 use%2:34:01:: 401 | d000.s008.t000 use%2:34:01:: 402 | d000.s009.t000 use%2:34:01:: 403 | d000.s010.t000 use%2:34:01:: 404 | d000.s012.t000 use%2:34:01:: 405 | d000.s014.t006 use%2:34:01:: 406 | d001.s002.t004 use%2:34:01:: 407 | d001.s004.t003 use%2:34:01:: 408 | d001.s013.t001 use%2:34:01:: 409 | d001.s014.t002 use%2:34:01:: 410 | d001.s017.t000 use%2:34:01:: 411 | d001.s020.t000 use%2:34:01:: 412 | d001.s023.t002 use%2:34:01:: 413 | d001.s025.t000 use%2:34:01:: 414 | d001.s030.t000 use%2:34:01:: 415 | d001.s042.t004 use%2:34:01:: 416 | d001.s049.t001 use%2:34:01:: 417 | d001.s052.t001 use%2:34:01:: 418 | d003.s001.t011 use%2:34:01:: 419 | d003.s006.t000 use%2:34:01:: 420 | d003.s008.t000 use%2:34:01:: 421 | d003.s008.t008 use%2:34:01:: 422 | d003.s009.t009 use%2:34:01:: 423 | d000.s013.t002 supervision%1:04:00:: 424 | d002.s014.t007 autumn%1:28:00:: 425 | d000.s023.t011 dna%1:27:00:: 426 | d001.s012.t001 subtraction%1:04:01:: 427 | d000.s005.t005 vein%1:08:00:: 428 | d001.s028.t004 sin%1:07:00:: 429 | d003.s005.t005 quantity%1:03:00:: 430 | d000.s003.t000 want%2:37:00:: 431 | d001.s009.t000 want%2:37:00:: 432 | d001.s039.t001 want%2:37:00:: 433 | d001.s049.t000 want%2:37:00:: 434 | d001.s001.t007 graph%1:10:00:: 435 | d001.s003.t001 graph%1:10:00:: 436 | d001.s007.t004 graph%1:10:00:: 437 | d001.s007.t006 graph%1:10:00:: 438 | d001.s047.t002 graph%1:10:00:: 439 | d001.s047.t005 graph%1:10:00:: 440 | d001.s050.t004 graph%1:10:00:: 441 | d001.s051.t002 graph%1:10:00:: 442 | d001.s053.t001 graph%1:10:00:: 443 | d000.s001.t009 reach%2:38:01:: 444 | d003.s001.t009 reach%2:38:01:: 445 | d000.s009.t022 advanced%5:00:00:precocious:00 446 | d000.s027.t011 advanced%5:00:00:precocious:00 447 | d003.s014.t002 carry_out%2:36:00:: 448 | d002.s018.t009 reintegrate%2:30:00:: 449 | d000.s005.t000 powder%1:27:00:: 450 | d000.s002.t007 package%1:14:00:: 451 | d002.s001.t014 community%1:14:00:: 452 | d002.s009.t017 community%1:14:00:: 453 | d003.s021.t000 generally%4:02:02:: 454 | d000.s015.t007 minute%1:28:00:: 455 | d000.s008.t008 lung%1:08:00:: 456 | d000.s009.t012 easily%4:02:01:: 457 | d001.s001.t002 make%2:41:00:: 458 | d001.s015.t002 make%2:41:00:: 459 | d002.s001.t000 participant%1:18:00:: 460 | d003.s021.t001 well%4:02:00:: 461 | d002.s016.t005 attention%1:09:00:: 462 | d003.s021.t013 duration%1:28:02:: 463 | d002.s000.t005 deal%2:32:08:: 464 | d002.s021.t004 united%3:00:00:: 465 | d003.s007.t000 antiemetic%1:06:00:: 466 | d001.s039.t000 nice%3:00:00:: 467 | d003.s014.t003 laboratory%1:06:00:: 468 | d001.s011.t001 closely%4:02:00:: 469 | d001.s015.t001 way%1:07:01:: 470 | d001.s016.t000 way%1:07:01:: 471 | d001.s022.t000 way%1:07:01:: 472 | d001.s033.t005 way%1:07:01:: 473 | d002.s004.t002 commission%1:14:00:: 474 | d002.s008.t012 commission%1:14:00:: 475 | d001.s046.t001 down%4:02:00:: 476 | d002.s018.t008 retention%1:04:00:: 477 | d000.s018.t001 delay%2:30:00:: 478 | d000.s022.t005 kill%2:35:00:: 479 | d000.s025.t005 main%5:00:00:important:00 480 | d000.s029.t001 main%5:00:00:important:00 481 | d001.s005.t001 main%5:00:00:important:00 482 | d001.s006.t000 main%5:00:00:important:00 483 | d001.s007.t000 main%5:00:00:important:00 484 | d001.s050.t005 main%5:00:00:important:00 485 | d002.s001.t001 main%5:00:00:important:00 486 | d002.s009.t016 place%1:15:00:: 487 | d002.s004.t012 exclusion%1:26:00:: 488 | d002.s008.t002 exclusion%1:26:00:: 489 | d000.s001.t000 explain%2:32:00:: 490 | d003.s001.t000 explain%2:32:00:: 491 | d001.s033.t007 log%1:27:01:: 492 | d001.s036.t000 log%1:27:01:: 493 | d001.s043.t001 log%1:27:01:: 494 | d001.s044.t001 log%1:27:01:: 495 | d002.s019.t004 relate%2:31:00:: 496 | d002.s001.t004 debate%2:31:00:: 497 | d001.s028.t008 sum%1:21:00:: 498 | d000.s013.t001 give%2:40:03:: 499 | d000.s015.t000 give%2:40:03:: 500 | d000.s017.t000 give%2:40:03:: 501 | d000.s017.t008 give%2:40:03:: 502 | d003.s010.t001 give%2:40:03:: 503 | d003.s022.t002 give%2:40:03:: 504 | d002.s003.t007 welfare%1:04:00:: 505 | d002.s004.t016 welfare%1:04:00:: 506 | d002.s015.t002 document%2:32:00:: 507 | d000.s005.t001 make_up%2:42:00:: 508 | d000.s016.t008 injection%1:04:00:: 509 | d003.s003.t006 injection%1:04:00:: 510 | d003.s009.t012 injection%1:04:00:: 511 | d003.s009.t013 injection%1:04:00:: 512 | d003.s017.t004 injection%1:04:00:: 513 | d002.s009.t012 development%1:04:01:: 514 | d002.s006.t009 level%1:07:00:: 515 | d000.s017.t003 vomit%2:29:00:: 516 | d003.s007.t003 vomit%2:29:00:: 517 | d003.s008.t003 vomit%2:29:00:: 518 | d003.s008.t004 vomit%2:29:00:: 519 | d003.s008.t013 vomit%2:29:00:: 520 | d001.s047.t000 add%2:30:00:: 521 | d001.s047.t006 add%2:30:00:: 522 | d001.s047.t007 add%2:30:00:: 523 | d001.s052.t002 add%2:30:00:: 524 | d000.s020.t002 see%2:39:00:: 525 | d001.s033.t000 see%2:39:00:: 526 | d003.s016.t008 see%2:39:00:: 527 | d001.s023.t009 last%5:00:00:past:00 528 | d001.s034.t006 last%5:00:00:past:00 529 | d001.s053.t002 color%1:07:00:: 530 | d002.s006.t005 official%1:18:01:: 531 | d003.s001.t004 veterinary%1:18:00:: 532 | d001.s028.t006 trigonometric%3:01:00:: 533 | d003.s007.t002 stop%2:38:00:: 534 | d002.s007.t001 industrial%3:01:00:: 535 | d002.s008.t007 industrial%3:01:00:: 536 | d001.s022.t002 conditional%5:00:00:qualified:02 537 | d001.s000.t002 markup%1:21:00:: 538 | d000.s031.t002 time%1:11:00:: 539 | d000.s034.t006 time%1:11:00:: 540 | d000.s035.t007 time%1:11:00:: 541 | d000.s036.t010 time%1:11:00:: 542 | d000.s037.t008 time%1:11:00:: 543 | d001.s034.t000 time%1:11:00:: 544 | d003.s021.t006 time%1:11:00:: 545 | d003.s021.t011 time%1:11:00:: 546 | d002.s014.t006 stage%2:36:00:: 547 | d002.s017.t010 old%3:00:02:: 548 | d002.s018.t011 old%3:00:02:: 549 | d000.s001.t006 assess%2:31:00:: 550 | d002.s005.t007 assess%2:31:00:: 551 | d002.s011.t009 assess%2:31:00:: 552 | d002.s017.t002 assess%2:31:00:: 553 | d003.s001.t006 assess%2:31:00:: 554 | d001.s023.t005 true%3:00:00:: 555 | d002.s012.t000 visit%1:04:02:: 556 | d001.s029.t002 kind%1:09:00:: 557 | d001.s023.t008 enter%2:38:00:: 558 | d001.s034.t001 enter%2:38:00:: 559 | d001.s050.t000 enter%2:38:00:: 560 | d000.s016.t007 vitamin%1:27:00:: 561 | d000.s009.t018 receive%2:40:00:: 562 | d000.s010.t006 receive%2:40:00:: 563 | d000.s025.t009 receive%2:40:00:: 564 | d000.s027.t014 receive%2:40:00:: 565 | d000.s028.t009 receive%2:40:00:: 566 | d000.s032.t001 receive%2:40:00:: 567 | d000.s034.t001 receive%2:40:00:: 568 | d000.s035.t002 receive%2:40:00:: 569 | d003.s016.t010 receive%2:40:00:: 570 | d003.s016.t013 receive%2:40:00:: 571 | d000.s002.t009 also%4:02:00:: 572 | d000.s003.t008 also%4:02:00:: 573 | d000.s017.t007 also%4:02:00:: 574 | d000.s020.t005 also%4:02:00:: 575 | d000.s028.t000 also%4:02:00:: 576 | d001.s014.t000 also%4:02:00:: 577 | d002.s003.t001 also%4:02:00:: 578 | d002.s007.t007 also%4:02:00:: 579 | d002.s012.t004 also%4:02:00:: 580 | d002.s016.t002 also%4:02:00:: 581 | d002.s020.t001 also%4:02:00:: 582 | d003.s009.t008 also%4:02:00:: 583 | d003.s019.t000 also%4:02:00:: 584 | d001.s042.t005 text_editor%1:18:00:: 585 | d000.s019.t007 problem%1:26:00:: 586 | d002.s011.t011 problem%1:26:00:: 587 | d003.s022.t000 precaution%1:04:00:: 588 | d001.s050.t002 ok%5:00:00:satisfactory:00 589 | d002.s000.t001 recently%4:02:00:: 590 | d002.s005.t001 recently%4:02:00:: 591 | d000.s026.t000 effect%1:19:00:: 592 | d000.s027.t003 effect%1:19:00:: 593 | d000.s000.t005 report%1:10:03:: 594 | d002.s009.t002 report%1:10:03:: 595 | d002.s013.t005 report%1:10:03:: 596 | d002.s014.t000 report%1:10:03:: 597 | d003.s000.t005 report%1:10:03:: 598 | d000.s023.t003 form%1:10:00:: 599 | d000.s002.t000 need%2:42:00:: 600 | d000.s022.t007 divide%2:41:00:: 601 | d002.s019.t006 discrimination%1:04:00:: 602 | d002.s020.t005 discrimination%1:04:00:: 603 | d002.s018.t003 retirement%1:26:00:: 604 | d000.s015.t004 intravenous%3:01:00:: 605 | d002.s004.t007 regional%3:01:00:: 606 | d002.s008.t005 right%1:07:00:: 607 | d000.s010.t009 past%1:28:00:: 608 | d000.s027.t016 past%1:28:00:: 609 | d000.s028.t012 past%1:28:00:: 610 | d000.s034.t003 past%1:28:00:: 611 | d000.s035.t004 past%1:28:00:: 612 | d001.s039.t003 library%1:06:01:: 613 | d001.s011.t003 very%4:02:00:: 614 | d002.s001.t012 social%3:01:00:: 615 | d002.s004.t005 social%3:01:00:: 616 | d002.s004.t011 social%3:01:00:: 617 | d002.s006.t006 social%3:01:00:: 618 | d002.s008.t001 social%3:01:00:: 619 | d002.s008.t022 social%3:01:00:: 620 | d002.s016.t001 social%3:01:00:: 621 | d003.s009.t006 special%5:00:01:specific:00 622 | d002.s020.t002 consider%2:31:00:: 623 | d001.s033.t002 change%2:30:01:: 624 | d001.s034.t005 change%2:30:01:: 625 | d000.s003.t003 basis%1:24:00:: 626 | d002.s013.t003 basis%1:24:00:: 627 | d002.s004.t013 major%3:00:06:: 628 | d002.s008.t000 speaker%1:18:00:: 629 | d003.s005.t002 logo%1:10:00:: 630 | d001.s044.t005 print%2:36:02:: 631 | d000.s001.t008 perform%2:36:00:: 632 | d003.s001.t008 perform%2:36:00:: 633 | d001.s002.t003 know%2:31:01:: 634 | d001.s009.t011 know%2:31:01:: 635 | d003.s008.t011 know%2:31:01:: 636 | d002.s014.t001 casebook%1:10:00:: 637 | d003.s004.t002 score%1:09:00:: 638 | d000.s008.t005 mesothelioma%1:26:00:: 639 | d000.s025.t003 mesothelioma%1:26:00:: 640 | d000.s031.t006 mesothelioma%1:26:00:: 641 | d002.s016.t007 proposal%1:10:00:: 642 | d000.s036.t008 long%3:00:02:: 643 | d001.s012.t003 division%1:14:00:: 644 | d001.s001.t000 nowadays%4:02:00:: 645 | d001.s029.t001 matter%2:42:00:: 646 | d001.s008.t000 below%4:02:01:: 647 | d001.s004.t006 knowledge%1:03:00:: 648 | d000.s029.t004 long%4:02:00:: 649 | d003.s005.t004 letter%1:10:00:: 650 | d003.s017.t010 intact%5:00:00:whole:00 651 | d000.s015.t003 week%1:28:00:: 652 | d000.s032.t003 average%1:09:00:: 653 | d000.s005.t003 infusion%1:27:00:: 654 | d000.s015.t005 infusion%1:27:00:: 655 | d001.s027.t001 ask%2:32:00:: 656 | d001.s045.t000 quit%2:42:04:: 657 | d000.s008.t009 usually%4:02:00:: 658 | d001.s034.t007 result%1:19:00:: 659 | d001.s044.t002 result%1:19:00:: 660 | d003.s016.t000 result%1:19:00:: 661 | d000.s017.t004 fluid%1:27:02:: 662 | d000.s010.t001 combination%1:14:00:: 663 | d000.s026.t001 combination%1:14:00:: 664 | d000.s028.t004 combination%1:14:00:: 665 | d003.s009.t000 combination%1:14:00:: 666 | d002.s009.t009 improve%2:30:01:: 667 | d001.s001.t006 3d%1:10:00:: 668 | d001.s007.t005 3d%1:10:00:: 669 | d000.s008.t002 type%1:09:00:: 670 | d000.s010.t007 type%1:09:00:: 671 | d000.s016.t006 type%1:09:00:: 672 | d001.s004.t001 orient%2:42:00:: 673 | d000.s023.t007 involve%2:42:01:: 674 | d000.s027.t008 involve%2:42:01:: 675 | d000.s028.t006 involve%2:42:01:: 676 | d002.s000.t002 involve%2:42:01:: 677 | d002.s002.t005 involve%2:42:01:: 678 | d001.s028.t001 operate%2:41:00:: 679 | d000.s003.t006 scientific%3:01:00:: 680 | d003.s022.t004 come_into%2:40:00:: 681 | d000.s019.t006 kidney%1:08:00:: 682 | d001.s010.t001 basically%4:02:00:: 683 | d000.s016.t004 corticosteroid%1:27:00:: 684 | d002.s005.t014 profitability%1:07:00:: 685 | d000.s023.t001 convert%2:30:00:: 686 | d001.s009.t008 convert%2:30:00:: 687 | d000.s037.t000 contrast%1:24:00:: 688 | d001.s028.t000 easy%3:00:01:: 689 | d001.s042.t002 easy%3:00:01:: 690 | d000.s030.t000 benefit%1:21:00:: 691 | d002.s011.t010 benefit%1:21:00:: 692 | d003.s015.t000 benefit%1:21:00:: 693 | d000.s006.t002 substance%1:03:00:: 694 | d000.s022.t001 substance%1:03:00:: 695 | d003.s003.t002 substance%1:03:00:: 696 | d002.s003.t002 variety%1:14:01:: 697 | d001.s044.t003 html%1:10:00:: 698 | d001.s008.t006 do%2:41:01:: 699 | d001.s009.t005 do%2:41:01:: 700 | d001.s047.t003 do%2:41:01:: 701 | d000.s002.t003 medical%3:01:00:: 702 | d000.s011.t003 prescription%1:10:00:: 703 | d000.s000.t000 document%1:10:00:: 704 | d003.s000.t000 document%1:10:00:: 705 | d000.s002.t002 information%1:10:00:: 706 | d000.s003.t002 information%1:10:00:: 707 | d000.s020.t001 information%1:10:00:: 708 | d001.s051.t000 set%2:35:00:: 709 | d001.s053.t000 set%2:35:00:: 710 | d003.s003.t004 tablet%1:06:02:: 711 | d003.s004.t000 tablet%1:06:02:: 712 | d003.s005.t000 tablet%1:06:02:: 713 | d003.s009.t015 tablet%1:06:02:: 714 | d003.s017.t009 tablet%1:06:02:: 715 | d003.s017.t011 tablet%1:06:02:: 716 | d003.s018.t004 tablet%1:06:02:: 717 | d001.s051.t001 several%5:00:00:some:00 718 | d003.s014.t007 several%5:00:00:some:00 719 | d001.s051.t004 view%1:09:02:: 720 | d003.s016.t005 placebo%1:06:00:: 721 | d003.s016.t014 placebo%1:06:00:: 722 | d000.s030.t001 show%2:39:02:: 723 | d003.s015.t001 show%2:39:02:: 724 | d003.s016.t002 show%2:39:02:: 725 | d001.s008.t007 calculation%1:04:00:: 726 | d001.s042.t003 fix%2:30:01:: 727 | d000.s019.t005 severe%5:00:00:intense:00 728 | d002.s008.t018 vice_president%1:18:00:: 729 | d001.s040.t000 save%2:41:01:: 730 | d001.s041.t000 save%2:41:01:: 731 | d001.s043.t000 save%2:41:01:: 732 | d001.s044.t000 save%2:41:01:: 733 | d001.s039.t004 resume%2:30:00:: 734 | d002.s010.t004 depth%1:07:00:: 735 | d003.s016.t006 less%3:00:00:: 736 | d002.s003.t005 play%2:33:00:: 737 | d001.s035.t000 new%3:00:00:: 738 | d001.s047.t001 new%3:00:00:: 739 | d001.s047.t008 new%3:00:00:: 740 | d002.s011.t006 pattern%1:09:00:: 741 | d000.s018.t006 blood_count%1:23:00:: 742 | d000.s016.t009 vitamin_b12%1:27:00:: 743 | d001.s008.t001 find%2:40:02:: 744 | d001.s018.t001 find%2:40:02:: 745 | d001.s023.t006 find%2:40:02:: 746 | d002.s002.t012 find%2:40:02:: 747 | d001.s004.t004 little%3:00:01:: 748 | d001.s047.t004 go%2:38:00:: 749 | d001.s012.t000 addition%1:06:00:: 750 | d000.s023.t009 nucleotide%1:27:00:: 751 | d000.s032.t004 month%1:28:01:: 752 | d000.s032.t006 month%1:28:01:: 753 | d000.s034.t007 month%1:28:01:: 754 | d000.s034.t009 month%1:28:01:: 755 | d000.s035.t008 month%1:28:01:: 756 | d003.s005.t001 mark%2:35:09:: 757 | d000.s017.t006 dehydration%1:26:00:: 758 | d001.s000.t001 base%2:31:00:: 759 | d001.s003.t000 base%2:31:00:: 760 | d001.s010.t000 base%2:31:00:: 761 | d002.s006.t001 base%2:31:00:: 762 | d003.s019.t012 underlie%2:42:00:: 763 | d001.s039.t006 work%1:04:00:: 764 | d002.s005.t012 work%1:04:00:: 765 | d002.s009.t015 work%1:04:00:: 766 | d002.s011.t007 work%1:04:00:: 767 | d000.s005.t002 solution%1:27:00:: 768 | d003.s003.t005 solution%1:27:00:: 769 | d000.s000.t003 public%3:00:00:: 770 | d002.s001.t010 public%3:00:00:: 771 | d002.s003.t006 public%3:00:00:: 772 | d002.s004.t015 public%3:00:00:: 773 | d002.s017.t004 public%3:00:00:: 774 | d003.s000.t003 public%3:00:00:: 775 | d001.s028.t003 cos%1:24:00:: 776 | d001.s000.t000 mathematical%3:01:00:: 777 | d001.s004.t005 mathematical%3:01:00:: 778 | d000.s009.t010 likely%3:00:00:: 779 | d001.s038.t000 execute%2:41:00:: 780 | d002.s006.t013 country%1:14:00:: 781 | d002.s010.t000 country%1:14:00:: 782 | d001.s030.t001 plus%1:07:00:: 783 | d002.s000.t000 foundation%1:24:00:: 784 | d002.s004.t000 foundation%1:24:00:: 785 | d002.s005.t000 foundation%1:24:00:: 786 | d002.s015.t000 foundation%1:24:00:: 787 | d000.s008.t003 cancer%1:26:00:: 788 | d000.s008.t006 cancer%1:26:00:: 789 | d000.s009.t004 cancer%1:26:00:: 790 | d000.s009.t021 cancer%1:26:00:: 791 | d000.s036.t003 cancer%1:26:00:: 792 | d000.s037.t002 cancer%1:26:00:: 793 | d003.s008.t010 cancer%1:26:00:: 794 | d002.s009.t008 practice%1:04:00:: 795 | d002.s013.t008 practice%1:04:00:: 796 | d001.s023.t010 instance%1:11:00:: 797 | d002.s005.t011 humanise%2:37:00:: 798 | d002.s014.t004 provide%2:40:00:: 799 | d000.s009.t015 body%1:08:00:: 800 | d000.s014.t003 body%1:08:00:: 801 | d000.s023.t000 body%1:08:00:: 802 | d000.s018.t007 abnormal%3:00:00:: 803 | d002.s009.t005 initiative%1:07:00:: 804 | d002.s010.t001 initiative%1:07:00:: 805 | d002.s011.t003 initiative%1:07:00:: 806 | d002.s011.t012 initiative%1:07:00:: 807 | d002.s014.t002 initiative%1:07:00:: 808 | d002.s017.t003 initiative%1:07:00:: 809 | d002.s020.t003 initiative%1:07:00:: 810 | d003.s017.t012 again%4:02:00:: 811 | d002.s015.t003 characteristic%1:09:00:: 812 | d003.s010.t000 purpose%1:09:00:: 813 | d001.s008.t003 field%1:15:00:: 814 | d002.s002.t003 report%2:32:00:: 815 | d003.s021.t005 dosage%1:23:00:: 816 | d001.s015.t004 too%4:02:00:: 817 | d000.s037.t006 short%3:00:02:: 818 | d002.s021.t002 cover%2:35:00:: 819 | d003.s018.t003 administer%2:41:00:: 820 | d003.s021.t003 administer%2:41:00:: 821 | d000.s001.t004 human%3:01:00:: 822 | d002.s005.t010 participation%1:04:00:: 823 | d000.s016.t005 folic_acid%1:27:00:: 824 | d000.s024.t000 study%2:31:02:: 825 | d000.s025.t004 study%2:31:02:: 826 | d003.s013.t000 study%2:31:02:: 827 | d000.s033.t004 as%4:02:00:: 828 | d000.s033.t006 as%4:02:00:: 829 | d002.s008.t021 economic%3:01:01:: 830 | d003.s018.t000 follow-up%1:06:00:: 831 | d002.s001.t007 key%5:00:00:important:00 832 | d002.s012.t002 key%5:00:00:important:00 833 | d000.s016.t000 reduce%2:30:00:: 834 | d000.s018.t004 reduce%2:30:00:: 835 | d001.s032.t002 list%1:10:00:: 836 | d001.s052.t003 list%1:10:00:: 837 | d000.s000.t004 assessment%1:09:00:: 838 | d003.s000.t004 assessment%1:09:00:: 839 | d000.s014.t008 height%1:07:00:: 840 | d003.s007.t001 mean%2:32:01:: 841 | d002.s006.t004 senior%3:00:00:: 842 | d002.s013.t006 synthesise%2:31:00:: 843 | d002.s011.t004 impact%1:11:00:: 844 | d002.s001.t016 as_well%4:02:01:: 845 | d000.s009.t024 cell%1:06:03:: 846 | d000.s022.t006 cell%1:06:03:: 847 | d000.s023.t015 cell%1:06:03:: 848 | d000.s027.t001 cell%1:06:03:: 849 | d000.s033.t002 cell%1:06:03:: 850 | d000.s033.t005 effective%3:00:00:: 851 | d003.s016.t004 effective%3:00:00:: 852 | d000.s018.t009 certain%5:00:00:definite:00 853 | d002.s009.t014 worker%1:18:00:: 854 | d002.s017.t011 worker%1:18:00:: 855 | d002.s018.t012 worker%1:18:00:: 856 | d002.s019.t003 worker%1:18:00:: 857 | d000.s002.t011 contact%2:32:00:: 858 | d002.s007.t004 industry%1:14:00:: 859 | d002.s012.t003 personnel%1:14:00:: 860 | d000.s014.t005 calculate%2:31:00:: 861 | d002.s006.t003 interview%1:10:01:: 862 | d002.s012.t001 interview%1:10:01:: 863 | d002.s001.t005 issue%1:09:01:: 864 | d001.s050.t003 display%2:39:00:: 865 | d001.s048.t000 then%4:02:00:: 866 | d001.s018.t004 operator%1:24:00:: 867 | d001.s019.t001 operator%1:24:00:: 868 | d001.s030.t004 operator%1:24:00:: 869 | d002.s005.t008 extent%1:26:00:: 870 | d002.s015.t006 job%1:04:00:: 871 | d002.s009.t013 age%2:30:01:: 872 | d001.s048.t003 text%1:10:00:: 873 | d002.s015.t001 aim%2:33:00:: 874 | d000.s034.t004 average%5:00:02:normal:01 875 | d000.s035.t005 average%5:00:02:normal:01 876 | d002.s011.t002 background%1:07:00:: 877 | d002.s019.t002 background%1:07:00:: 878 | d000.s009.t013 other%3:00:00:: 879 | d000.s018.t010 other%3:00:00:: 880 | d001.s028.t005 other%3:00:00:: 881 | d003.s005.t006 other%3:00:00:: 882 | d003.s009.t003 other%3:00:00:: 883 | d003.s019.t005 other%3:00:00:: 884 | d003.s019.t008 other%3:00:00:: 885 | d002.s006.t002 personal%3:00:00:: 886 | d001.s001.t001 capable%3:00:00:: 887 | d001.s023.t004 only_if%4:02:00:: 888 | d001.s033.t001 let%2:41:00:: 889 | d001.s008.t004 type%2:32:00:: 890 | d001.s041.t002 type%2:32:00:: 891 | d001.s048.t005 type%2:32:00:: 892 | d002.s002.t000 relatively%4:02:00:: 893 | d002.s001.t002 interest_group%1:14:00:: 894 | d000.s017.t009 before%4:02:03:: 895 | d000.s025.t012 before%4:02:03:: 896 | d000.s001.t001 committee%1:14:00:: 897 | d002.s008.t014 committee%1:14:00:: 898 | d002.s008.t023 committee%1:14:00:: 899 | d003.s001.t001 committee%1:14:00:: 900 | d003.s010.t002 day%1:28:00:: 901 | d001.s032.t000 there%4:02:00:: 902 | d000.s022.t009 belong_to%2:42:00:: 903 | d000.s026.t002 compare%2:31:00:: 904 | d000.s027.t004 compare%2:31:00:: 905 | d000.s028.t001 compare%2:31:00:: 906 | d000.s032.t005 compare%2:31:00:: 907 | d000.s034.t008 compare%2:31:00:: 908 | d001.s004.t002 now%4:02:05:: 909 | d001.s027.t000 now%4:02:05:: 910 | d000.s015.t001 once%4:02:00:: 911 | d000.s022.t010 group%1:03:00:: 912 | d000.s035.t009 group%1:03:00:: 913 | d002.s018.t010 retrain%2:31:01:: 914 | d002.s002.t001 few%3:00:00:: 915 | d000.s009.t005 remove%2:30:00:: 916 | d002.s016.t008 remove%2:30:00:: 917 | d000.s019.t004 moderate%3:00:00:: 918 | d001.s038.t002 file%1:10:00:: 919 | d003.s017.t006 animal%1:03:00:: 920 | d003.s022.t006 animal%1:03:00:: 921 | d001.s001.t004 operation%1:26:00:: 922 | d001.s022.t003 operation%1:26:00:: 923 | d001.s020.t001 define%2:42:00:: 924 | d001.s022.t001 define%2:42:00:: 925 | d001.s025.t001 define%2:42:00:: 926 | d001.s039.t002 define%2:42:00:: 927 | d001.s011.t000 look%2:39:00:: 928 | d001.s011.t002 look%2:39:00:: 929 | d001.s007.t007 dictionary%1:10:00:: 930 | d001.s041.t004 able%3:00:00:: 931 | d001.s044.t004 able%3:00:00:: 932 | d003.s017.t008 able%3:00:00:: 933 | d000.s000.t002 european%3:01:00:: 934 | d002.s004.t001 european%3:01:00:: 935 | d002.s004.t009 european%3:01:00:: 936 | d002.s006.t012 european%3:01:00:: 937 | d002.s008.t011 european%3:01:00:: 938 | d002.s008.t016 european%3:01:00:: 939 | d002.s008.t019 european%3:01:00:: 940 | d002.s013.t004 european%3:01:00:: 941 | d003.s000.t002 european%3:01:00:: 942 | d000.s014.t001 mg%1:23:00:: 943 | d000.s022.t002 cytotoxic%3:01:00:: 944 | d002.s018.t002 early%3:00:00:: 945 | d001.s007.t003 console%1:06:03:: 946 | d001.s023.t000 introduce%2:32:00:: 947 | d000.s023.t012 rna%1:27:00:: 948 | d001.s042.t001 text_file%1:10:00:: 949 | d000.s011.t002 obtain%2:40:00:: 950 | d002.s006.t011 fifteen%5:00:00:cardinal:00 951 | d003.s012.t003 central_nervous_system%1:08:00:: 952 | d000.s008.t011 exposure%1:07:00:: 953 | d001.s033.t006 trick%2:41:00:: 954 | d000.s002.t005 treatment%1:04:00:: 955 | d000.s016.t010 treatment%1:04:00:: 956 | d000.s018.t000 treatment%1:04:00:: 957 | d000.s025.t000 treatment%1:04:00:: 958 | d000.s027.t000 treatment%1:04:00:: 959 | d000.s033.t000 treatment%1:04:00:: 960 | d003.s008.t009 treatment%1:04:00:: 961 | d003.s009.t005 treatment%1:04:00:: 962 | d003.s009.t010 treatment%1:04:00:: 963 | d003.s016.t015 treatment%1:04:00:: 964 | d003.s017.t000 treatment%1:04:00:: 965 | d003.s018.t001 treatment%1:04:00:: 966 | d003.s019.t001 treatment%1:04:00:: 967 | d003.s021.t014 treatment%1:04:00:: 968 | d002.s008.t009 social_affair%1:14:00:: 969 | d003.s008.t001 dog%1:05:00:: 970 | d003.s014.t004 dog%1:05:00:: 971 | d003.s016.t009 dog%1:05:00:: 972 | d003.s016.t012 dog%1:05:00:: 973 | d003.s017.t002 dog%1:05:00:: 974 | d003.s018.t005 dog%1:05:00:: 975 | d003.s021.t004 daily%4:02:00:: 976 | d001.s033.t004 just%4:02:00:: 977 | d001.s052.t000 just%4:02:00:: 978 | d003.s021.t002 tolerate%2:31:00:: 979 | d002.s008.t017 parliament%1:14:00:: 980 | d002.s008.t020 parliament%1:14:00:: 981 | d000.s011.t001 only%4:02:02:: 982 | d000.s013.t000 only%4:02:02:: 983 | d003.s019.t003 only%4:02:02:: 984 | d002.s009.t011 training%1:04:00:: 985 | d002.s015.t008 training%1:04:00:: 986 | d000.s014.t002 square_metre%1:23:00:: 987 | d002.s001.t006 role%1:04:00:: 988 | d002.s003.t004 role%1:04:00:: 989 | d003.s009.t001 supportive%3:00:00:: 990 | d003.s019.t006 supportive%3:00:00:: 991 | d001.s010.t004 hard%3:00:06:: 992 | d002.s001.t013 voluntary%3:00:01:: 993 | d000.s015.t002 every%5:00:00:all:00 994 | d000.s001.t010 recommendation%1:10:00:: 995 | d000.s003.t004 recommendation%1:10:00:: 996 | d003.s001.t010 recommendation%1:10:00:: 997 | d000.s014.t004 surface_area%1:07:00:: 998 | d001.s008.t005 function%1:24:00:: 999 | d001.s016.t004 function%1:24:00:: 1000 | d001.s018.t002 function%1:24:00:: 1001 | d001.s019.t005 function%1:24:00:: 1002 | d001.s028.t002 function%1:24:00:: 1003 | d001.s028.t007 function%1:24:00:: 1004 | d001.s047.t009 function%1:24:00:: 1005 | d001.s048.t006 function%1:24:00:: 1006 | d001.s050.t001 function%1:24:00:: 1007 | d002.s004.t006 affairs%1:09:00:: 1008 | d000.s018.t002 discontinue%2:42:00:: 1009 | d002.s012.t005 undertake%2:36:00:: 1010 | d003.s019.t011 address%2:32:00:: 1011 | d000.s002.t008 leaflet%1:08:00:: 1012 | d003.s021.t012 maximum%3:00:00:: 1013 | d001.s009.t002 work%2:41:02:: 1014 | d001.s026.t002 work%2:41:02:: 1015 | d001.s005.t000 here%4:02:00:: 1016 | d002.s000.t007 challenge%1:26:00:: 1017 | d002.s004.t014 challenge%1:26:00:: 1018 | d003.s020.t001 associate%2:31:00:: 1019 | d002.s003.t000 note%1:10:00:: 1020 | d001.s051.t003 same%3:00:02:: 1021 | d003.s004.t003 line%1:14:03:: 1022 | d001.s001.t005 arithmetic%3:01:00:: 1023 | -------------------------------------------------------------------------------- /test.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # File: test.py 4 | # @author: chrhad 5 | # Run BERT classifier for all word WSD task 6 | import argparse 7 | import io 8 | import sys 9 | import os 10 | 11 | import torch 12 | import torch.nn as nn 13 | import torch.nn.functional as F 14 | 15 | from bert_input_handler import BertInputHandler 16 | from instances_reader import InputLoaderFactory 17 | from utils import open_file 18 | from model import BertSenseClassifier, pad 19 | 20 | 21 | if __name__ == '__main__': 22 | argparser = argparse.ArgumentParser( 23 | "Run word sense disambiguation model for all lexical items") 24 | argparser.add_argument('test_path', help="Test instance file in XML format") 25 | argparser.add_argument('model_dir', help="Directory to pre-trained model") 26 | argparser.add_argument('output_dir', help="Directory to output predictions") 27 | 28 | # Optional arguments 29 | argparser.add_argument('--batch-size', type=int, default=32, 30 | help="Training batch size (default: 32)") 31 | argparser.add_argument('--xml-format', choices=[None, 'semeval13', 'senseval2ls', 'senseval3ls', 'semeval13induction'], 32 | default=None, help="Input XML file format (default: training model default)") 33 | args = argparser.parse_args() 34 | 35 | mdir = args.model_dir # path to store trained model 36 | odir = args.output_dir # path to store results 37 | if not os.path.exists(odir): 38 | os.mkdir(odir) 39 | elif not os.path.isdir(odir): 40 | sys.exit("{0} exists but not a directory".format(odir)) 41 | 42 | # 0. Load model and instances, assign indices 43 | lexelt2ix = torch.load("{0}/lexelt_idx.bin".format(mdir)) 44 | ix2key = torch.load("{0}/sense_idx.bin".format(mdir)) 45 | key2lexelt = torch.load("{0}/sense_lexelt".format(mdir)) 46 | params = torch.load("{0}/params.bin".format(mdir)) 47 | 48 | print("| Loding BERT tokenizer from {0}".format(params.bert_model), 49 | file=sys.stderr, flush=True) 50 | input_handler = BertInputHandler(params.bert_model) 51 | pad_ix = input_handler.pad_idx() 52 | 53 | print("| Loading model from {0}".format(mdir)) 54 | use_glu = params.use_glu if 'use_glu' in params else False 55 | residual_glu = params.residual_glu if 'residual_glu' in params else False 56 | sent_attn_query = params.sent_attn_query if 'sent_attn_query' in params else False 57 | model_fname = "{0}/model.bin".format(mdir) 58 | model = BertSenseClassifier(params.bert_model, len(ix2key), key2lexelt, 59 | mlp_dropout=params.dropout, attn_dropout=params.attn_dropout, 60 | layer=params.layer, use_glu=use_glu, residual_glu=residual_glu, 61 | top_attn_head=params.top_attn_head, sent_attn_query=sent_attn_query) 62 | model.load_state_dict(torch.load(model_fname)) 63 | model.cuda() 64 | model.eval() 65 | 66 | xml_format = params.xml_format if args.xml_format is None else args.xml_format 67 | print("| Loading test instances from {0} with {1} XML format".format(args.test_path, xml_format), 68 | file=sys.stderr, flush=True) 69 | input_loader = InputLoaderFactory(xml_format) 70 | sentences, _ = input_loader.load(args.test_path, params.num_context) 71 | instances = [] 72 | iids = [] 73 | lexelts = [] 74 | heads = [] 75 | 76 | def predict_batch(instances, iids, lexelts, heads): 77 | batch_sents = pad(instances).cuda() 78 | batch_heads = [x.cuda() for x in heads] 79 | batch_lexelts = [x.cuda() for x in lexelts] 80 | 81 | lprobs = model(batch_sents, batch_heads, batch_lexelts) 82 | _, argmaxs = lprobs.max(dim=-1) 83 | i = 0 # iterate returned element 84 | for sent_iid in iids: 85 | for iid in sent_iid: 86 | yield (iid, ix2key[argmaxs[i]]) 87 | i += 1 88 | 89 | bsz = args.batch_size 90 | cnt = 0 91 | with open_file("{0}/result.result".format(odir), 'w') as f: 92 | for inst in sentences: 93 | if cnt == bsz: 94 | # predict batch 95 | for iid, out in predict_batch(instances, iids, lexelts, heads): 96 | print("{0} {1}".format(iid, out), file=f, flush=True) 97 | 98 | # clear buffer 99 | del(instances[:]) 100 | del(iids[:]) 101 | del(lexelts[:]) 102 | del(heads[:]) 103 | cnt = 0 104 | 105 | token_ids, sent_lexelt, sent_iid, sent_head, _ = \ 106 | input_handler.tokenize_indexify(inst, ('[CLS]' if sent_attn_query else None)) 107 | instances.append(token_ids) 108 | 109 | # find involved lexelts and senses 110 | sent_lexid = [] 111 | for lexelt, iid in zip(sent_lexelt, sent_iid): 112 | lex_id = lexelt2ix.get(lexelt, 0) 113 | sent_lexid.append(lex_id) 114 | 115 | iids.append(sent_iid) 116 | lexelts.append(torch.tensor(sent_lexid, dtype=torch.long)) 117 | heads.append(sent_head) 118 | cnt += 1 119 | 120 | # predict batch 121 | for iid, out in predict_batch(instances, iids, lexelts, heads): 122 | print("{0} {1}".format(iid, out), file=f, flush=True) 123 | f.close() 124 | -------------------------------------------------------------------------------- /test_postproc.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | DATA_DIR=${HOME}/demo/data 3 | WSD_TESTDIR=${DATA_DIR}/WSD_Unified_Evaluation_Datasets 4 | TESTSET=$1 5 | WSD_TESTPF=${WSD_TESTDIR}/${TESTSET}/${TESTSET} 6 | MODEL_DIR=$2 7 | OUTPUT_DIR=${MODEL_DIR%_model}_${TESTSET}_output 8 | 9 | python test.py ${WSD_TESTPF}.data.xml ${MODEL_DIR} ${OUTPUT_DIR} 10 | python backoff_mfs.py ${OUTPUT_DIR}/result.result ${TESTSET}.mfs.txt > ${OUTPUT_DIR}/pp.result 11 | java -cp ${WSD_TESTDIR}:. Scorer ${WSD_TESTPF}.gold.key.txt ${OUTPUT_DIR}/pp.result 12 | -------------------------------------------------------------------------------- /train.py: -------------------------------------------------------------------------------- 1 | # /usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # File: train.py 4 | # @author: chrhad 5 | # Train BERT classifier for all words WSD 6 | import argparse 7 | import glob 8 | import io 9 | import random 10 | import sys 11 | import os 12 | 13 | import torch 14 | import torch.nn as nn 15 | import torch.nn.functional as F 16 | import torch.optim as optim 17 | from sklearn.model_selection import train_test_split 18 | from pytorch_pretrained_bert import BertAdam 19 | from bert_input_handler import BertInputHandler 20 | from instances_reader import InputLoaderFactory 21 | from utils import open_file 22 | from model import BertSenseClassifier, pad 23 | 24 | def split_tuple_list(tuplist): 25 | return tuple(map(lambda i: list(map(lambda x:x[i], tuplist)), range(len(tuplist[0])))) 26 | 27 | if __name__ == '__main__': 28 | argparser = argparse.ArgumentParser( 29 | "Train word sense disambiguation model for one lexelt") 30 | argparser.add_argument('train_path', help="Training instance file in XML format") 31 | argparser.add_argument('key_path', help="Training key file") 32 | argparser.add_argument('model_dir', help="Directory to output model") 33 | 34 | # Optional arguments 35 | argparser.add_argument('--xml-format', choices=['semeval13', 'senseval2ls', 'senseval3ls'], 36 | default='semeval13', help='Input XML file format (default: semeval13)') 37 | argparser.add_argument('--dev-ratio', type=float, default=0.2, 38 | help="ratio of development set to take from the training set (default: 0.2)") 39 | argparser.add_argument('--devset-path', help='File path to the development data') 40 | argparser.add_argument('--devkey-path', help='File path to the development key') 41 | argparser.add_argument('--bert-model', default="bert-base-cased", 42 | help="BERT pre-trained model to use") 43 | argparser.add_argument('--num-context', type=int, default=1, 44 | help="Number of sentences to the left and right each (default: 1)") 45 | argparser.add_argument('--layer', type=int, default=11, 46 | help="BERT layer for word representation (default: 11 = top layer of bert-base-cased)") 47 | argparser.add_argument('--use-glu', action='store_true', 48 | help="Use gated linear unit on the word by the sentence representation (default: not used)") 49 | argparser.add_argument('--residual-glu', action='store_true', 50 | help="Add the original BERT output to the GLU output (default: no)") 51 | argparser.add_argument('--top-attn-head', type=int, default=1, 52 | help="Number of attention head before FFNN prediction (default: 1)") 53 | argparser.add_argument('--sent-attn-query', action='store_true', 54 | help="Use attention query from sentence vector instead of a common random variable (default: random)") 55 | argparser.add_argument('--optimizer', choices=['sgd', 'adam', 'bert-adam'], default='bert-adam', 56 | help="Gradient-based training algorithm (default: bert-adam)") 57 | argparser.add_argument('--lr', type=float, default=1e-3, 58 | help="Learning rate (default: 1e-3)") 59 | argparser.add_argument('--dropout', type=float, default=0, 60 | help="Dropout rate for top layer FFNN (default: 0)") 61 | argparser.add_argument('--attn-dropout', type=float, default=0., 62 | help="Dropout rate for top layer attention (default: 0)") 63 | argparser.add_argument('--num-epochs', type=int, default=50, 64 | help="Number of training epochs (default: 50)") 65 | argparser.add_argument('--batch-size', type=int, default=16, 66 | help="Training batch size (default: 16)") 67 | argparser.add_argument('--dev-batch-size', type=int, default=16, 68 | help="Development batch size (default: 16)") 69 | argparser.add_argument('--no-shuffle', action='store_true', default=False, 70 | help="Do not shuffle training data (default: shuffle)") 71 | argparser.add_argument('--patience', type=int, default=10, 72 | help="Number of epochs after best development set result does not improve (default: 10)") 73 | argparser.add_argument('--iter-checkpoint', type=int, default=1000, 74 | help="Number of iterations to show training progress outside epoch (default: 1000)") 75 | argparser.add_argument('--seed', type=int, default=123, help="Random seed (default: 123)") 76 | args = argparser.parse_args() 77 | 78 | torch.manual_seed(args.seed) # set fixed random seed 79 | torch.cuda.manual_seed(args.seed) # set fixed random seed 80 | torch.backends.cudnn.deterministic=True 81 | 82 | mdir = args.model_dir # path to store trained model 83 | if not os.path.exists(mdir): 84 | os.mkdir(mdir) 85 | elif not os.path.isdir(mdir): 86 | sys.exit("{0} exists but not a directory".format(mdir)) 87 | 88 | # 0. Load keys and instances, assign indices 89 | print("| Loding BERT tokenizer from {0}".format(args.bert_model), 90 | file=sys.stderr, flush=True) 91 | input_handler = BertInputHandler(args.bert_model) 92 | pad_ix = input_handler.pad_idx() 93 | 94 | print("| Loading training instances from {0} with format of {1}".format(args.train_path, args.xml_format), 95 | file=sys.stderr, flush=True) 96 | input_loader = InputLoaderFactory(args.xml_format) 97 | train_sentences, train_keys = input_loader.load(args.train_path, args.num_context, args.key_path) 98 | 99 | dev_sentences, dev_keys = (None, None) 100 | if args.devset_path is not None: 101 | print("| Loading development instances from {0}".format(args.devset_path), file=sys.stderr, flush=True) 102 | dev_sentences, dev_keys = input_loader.load(args.devset_path, args.num_context, args.devkey_path) 103 | 104 | lexelt2ix = {'unk.UNK': 0} 105 | ix2lexelt = ['unk.UNK'] 106 | key2ix = {'U': 0} 107 | ix2key = ['U'] 108 | key2lexelt = [0] 109 | 110 | train_instances = [] 111 | train_iids = [] 112 | train_lexelts = [] 113 | train_heads = [] 114 | train_senses = [] 115 | for inst in train_sentences: 116 | token_ids, sent_lexelt, sent_iid, sent_head, _ = \ 117 | input_handler.tokenize_indexify(inst, ('[CLS]' if args.sent_attn_query else None)) 118 | train_instances.append(token_ids) 119 | 120 | # find or record index for involved lexelts and senses 121 | sent_senses = [] 122 | sent_lexid = [] 123 | for lexelt, iid in zip(sent_lexelt, sent_iid): 124 | lex_id = lexelt2ix.setdefault(lexelt, len(ix2lexelt)) 125 | sent_lexid.append(lex_id) 126 | sense = train_keys.get(iid, 'U') 127 | if lex_id == len(ix2lexelt): 128 | ix2lexelt.append(lexelt) 129 | sense_id = key2ix.setdefault(sense, len(ix2key)) 130 | sent_senses.append(sense_id) 131 | if sense_id == len(ix2key): 132 | ix2key.append(sense) 133 | key2lexelt.append(lex_id) 134 | 135 | train_iids.append(sent_iid) 136 | train_lexelts.append(torch.tensor(sent_lexid, dtype=torch.long)) 137 | train_heads.append(sent_head) 138 | train_senses.append(torch.tensor(sent_senses, dtype=torch.long)) 139 | train_len = len(train_instances) 140 | unk_sensidx = 0 141 | 142 | torch.save(lexelt2ix, "{0}/lexelt_idx.bin".format(mdir)) 143 | torch.save(ix2key, "{0}/sense_idx.bin".format(mdir)) 144 | torch.save(key2lexelt, "{0}/sense_lexelt".format(mdir)) 145 | torch.save(args, "{0}/params.bin".format(mdir)) 146 | 147 | # 0.2 Developement data 148 | dev_instances = [] 149 | dev_iids = [] 150 | dev_lexelts = [] 151 | dev_heads = [] 152 | dev_senses = [] 153 | if dev_sentences is not None: 154 | for inst in dev_sentences: 155 | token_ids, sent_lexelt, sent_iid, sent_head, _ = \ 156 | input_handler.tokenize_indexify(inst, ('[CLS]' if args.sent_attn_query else None)) 157 | dev_instances.append(token_ids) 158 | 159 | # find involved lexelts and senses 160 | sent_senses = [] 161 | sent_lexid = [] 162 | for lexelt, iid in zip(sent_lexelt, sent_iid): 163 | lex_id = lexelt2ix.get(lexelt, 0) 164 | sense = dev_keys.get(iid, 'U') 165 | sense_id = key2ix.get(sense, key2ix['U']) 166 | sent_lexid.append(lex_id) 167 | sent_senses.append(sense_id) 168 | 169 | dev_iids.append(sent_iid) 170 | dev_lexelts.append(torch.tensor(sent_lexid, dtype=torch.long)) 171 | dev_heads.append(sent_head) 172 | dev_senses.append(torch.tensor(sent_senses, dtype=torch.long)) 173 | elif args.dev_ratio > 0.: 174 | all_X = list(zip(train_instances, train_iids, train_lexelts, train_heads)) 175 | train_X, dev_X, train_y, dev_senses = train_test_split( 176 | all_X, train_senses, test_size=args.dev_ratio, random_state=args.seed) 177 | 178 | # repopulate train_instances 179 | train_instances, train_iids, train_lexelts, train_heads = split_tuple_list(train_X) 180 | dev_instances, dev_iids, dev_lexelts, dev_heads = split_tuple_list(dev_X) 181 | train_senses = train_y 182 | train_len = len(train_instances) 183 | dev_len = len(dev_instances) 184 | 185 | # 2. Construct model and define loss function 186 | print("| Building network architecture for {1:d} lexical items with BERT pretrained model: {0}".format( 187 | args.bert_model, len(lexelt2ix)), file=sys.stderr, flush=True) 188 | model = BertSenseClassifier(args.bert_model, len(ix2key), key2lexelt, 189 | mlp_dropout=args.dropout, attn_dropout=args.attn_dropout, 190 | layer=args.layer, use_glu=args.use_glu, residual_glu=args.residual_glu, 191 | top_attn_head=args.top_attn_head, sent_attn_query=args.sent_attn_query) 192 | model.cuda() 193 | 194 | # loss function definition 195 | print("| Defining optimization with the algorithm: {0}".format(args.optimizer), 196 | file=sys.stderr, flush=True) 197 | loss_fn = nn.NLLLoss(ignore_index=unk_sensidx) 198 | 199 | # optimizer definition 200 | optimizer = None 201 | if args.optimizer == 'bert-adam': 202 | optimizer = BertAdam(model.train_parameters(), lr=args.lr) 203 | elif args.optimizer == 'adam': 204 | optimizer = optim.Adam(model.train_parameters(), lr=args.lr) 205 | elif args.optimizer == 'sgd': 206 | optimizer = optim.SGD(model.train_parameters(), lr=args.lr) 207 | 208 | # 3. Train model (shuffle lexelt list, then shuffle each item inside) 209 | bsz = args.batch_size 210 | dbsz = args.dev_batch_size 211 | best_error = float('inf') 212 | stalled = 0 213 | model_fname = "{0}/model.bin".format(mdir) 214 | iter_chk = args.iter_checkpoint 215 | patience = args.patience 216 | for epoch in range(args.num_epochs): 217 | print("| Epoch {0:3d} started.".format(epoch+1), file=sys.stderr, flush=True) 218 | model.train() 219 | train_loss = 0. 220 | num_iter = 0 221 | num_inst = 0 222 | # Shuffle training instances to prevent overfitting, first the lexelts 223 | if not args.no_shuffle: 224 | random.seed(args.seed * (epoch+1)) # reset random seed at each epoch, useful if training is restartable 225 | train_lex_tuples = list(zip(train_instances, train_iids, train_heads, train_lexelts, train_senses)) 226 | random.shuffle(train_lex_tuples) 227 | train_instances, train_iids, train_heads, train_lexelts, train_senses = \ 228 | split_tuple_list(train_lex_tuples) 229 | 230 | for bstart in range(0, train_len, bsz): 231 | model.zero_grad() 232 | bend = min(bstart + bsz, train_len) 233 | 234 | batch_sents = pad(train_instances[bstart:bend], pad_ix) 235 | batch_heads = train_heads[bstart:bend] 236 | batch_lexelts = train_lexelts[bstart:bend] 237 | batch_senses = train_senses[bstart:bend] 238 | 239 | batch_sents = batch_sents.cuda() 240 | batch_heads = [x.cuda() for x in batch_heads] 241 | batch_lexelts = [x.cuda() for x in batch_lexelts] 242 | batch_senses = torch.cat([x.cuda() for x in batch_senses]) 243 | 244 | # Compute log-probabilities of each sense 245 | lprobs = model(batch_sents, batch_heads, batch_lexelts) 246 | loss = loss_fn(lprobs, batch_senses) 247 | loss.backward() # compute gradient 248 | optimizer.step() # update parameters 249 | loss_val = loss.item() # keep track of loss value, to print at the epoch end 250 | 251 | # record statistics 252 | train_loss += loss_val * batch_senses.size(0) 253 | num_iter += 1 254 | num_inst += batch_senses.size(0) 255 | if num_iter % iter_chk == 0: 256 | print("| Epoch {0:3d} iter {1:d} training loss = {2:.4f}.".format( 257 | epoch+1, num_iter, loss_val), file=sys.stderr, 258 | flush=True) 259 | 260 | train_loss /= num_inst 261 | 262 | # Measure on development data (seen items only) 263 | with torch.no_grad(): 264 | model.eval() 265 | errcnt = 0 266 | instcnt = 0 267 | for bstart in range(0, dev_len, dbsz): 268 | bend = min(bstart + dbsz, dev_len) 269 | 270 | batch_sents = pad(dev_instances[bstart:bend], pad_ix) 271 | batch_heads = dev_heads[bstart:bend] 272 | batch_lexelts = dev_lexelts[bstart:bend] 273 | batch_senses = dev_senses[bstart:bend] 274 | 275 | batch_sents = batch_sents.cuda() 276 | batch_heads = [x.cuda() for x in batch_heads] 277 | batch_lexelts = [x.cuda() for x in batch_lexelts] 278 | batch_senses = torch.cat([x.cuda() for x in batch_senses]) 279 | 280 | # Compute model prediction 281 | lprobs = model(batch_sents, batch_heads, batch_lexelts) 282 | _ , argmax = lprobs.max(dim=-1) 283 | batch_errcnt = argmax.ne(batch_senses).sum().item() 284 | errcnt += batch_errcnt 285 | instcnt += sum([len(x) for x in batch_heads]) 286 | error_rate = errcnt / instcnt if instcnt > 0 else 0 287 | is_best = (error_rate == 0 or error_rate < best_error) 288 | best_str = "" 289 | if is_best: 290 | stalled = 0 291 | best_error = error_rate 292 | torch.save(model.state_dict(), model_fname) 293 | best_str += " new best" 294 | else: 295 | stalled += 1 296 | 297 | print("| Epoch {0:3d} final training loss = {1:.4f}, dev error = {2:.4f}{3}".format( 298 | epoch+1, train_loss, error_rate, best_str), file=sys.stderr, flush=True) 299 | 300 | if stalled >= patience: 301 | print("| Early stopping, no further improvement.", file=sys.stderr, flush=True) 302 | break 303 | -------------------------------------------------------------------------------- /utils.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- encoding: utf-8 -*- 3 | # @author: chrhad 4 | # File: utils.py 5 | import gzip 6 | 7 | # Open file descriptor based on extension 8 | def open_file(fname, mode='rt', encoding='utf-8'): 9 | if mode == 'r' or mode == 'w': 10 | mode += 't' 11 | if mode.endswith('b'): 12 | f = gzip.open(fname, mode) if fname.endswith('.gz') \ 13 | else open(fname, mode) 14 | return f 15 | else: 16 | f = gzip.open(fname, mode, encoding=encoding) if fname.endswith('.gz') \ 17 | else open(fname, mode, encoding=encoding) 18 | return f 19 | --------------------------------------------------------------------------------