├── text_classification ├── __init__.py ├── __pycache__ │ ├── nets.cpython-35.pyc │ ├── nets.cpython-36.pyc │ ├── nets.cpython-37.pyc │ ├── __init__.cpython-35.pyc │ ├── __init__.cpython-36.pyc │ ├── __init__.cpython-37.pyc │ ├── nlp_utils.cpython-35.pyc │ ├── nlp_utils.cpython-36.pyc │ ├── nlp_utils.cpython-37.pyc │ ├── text_datasets.cpython-35.pyc │ ├── text_datasets.cpython-36.pyc │ └── text_datasets.cpython-37.pyc ├── nlp_utils.py ├── text_datasets.py └── nets.py ├── global.config ├── evaluator.py ├── README.md ├── args_of_text_classifier.py ├── triggers.py ├── utils.py ├── train_text_classifier.py ├── cbert_finetune.py ├── cbert_augdata.py ├── cbert_utils.py ├── nets.py ├── aug_data ├── TREC │ └── test.tsv └── mpqa │ ├── dev.tsv │ └── test.tsv └── datasets ├── TREC └── test.tsv └── mpqa └── dev.tsv /text_classification/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /text_classification/__pycache__/nets.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/1024er/cbert_aug/HEAD/text_classification/__pycache__/nets.cpython-35.pyc -------------------------------------------------------------------------------- /text_classification/__pycache__/nets.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/1024er/cbert_aug/HEAD/text_classification/__pycache__/nets.cpython-36.pyc -------------------------------------------------------------------------------- /text_classification/__pycache__/nets.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/1024er/cbert_aug/HEAD/text_classification/__pycache__/nets.cpython-37.pyc -------------------------------------------------------------------------------- /text_classification/__pycache__/__init__.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/1024er/cbert_aug/HEAD/text_classification/__pycache__/__init__.cpython-35.pyc -------------------------------------------------------------------------------- /text_classification/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/1024er/cbert_aug/HEAD/text_classification/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /text_classification/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/1024er/cbert_aug/HEAD/text_classification/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /text_classification/__pycache__/nlp_utils.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/1024er/cbert_aug/HEAD/text_classification/__pycache__/nlp_utils.cpython-35.pyc -------------------------------------------------------------------------------- /text_classification/__pycache__/nlp_utils.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/1024er/cbert_aug/HEAD/text_classification/__pycache__/nlp_utils.cpython-36.pyc -------------------------------------------------------------------------------- /text_classification/__pycache__/nlp_utils.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/1024er/cbert_aug/HEAD/text_classification/__pycache__/nlp_utils.cpython-37.pyc -------------------------------------------------------------------------------- /text_classification/__pycache__/text_datasets.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/1024er/cbert_aug/HEAD/text_classification/__pycache__/text_datasets.cpython-35.pyc -------------------------------------------------------------------------------- /text_classification/__pycache__/text_datasets.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/1024er/cbert_aug/HEAD/text_classification/__pycache__/text_datasets.cpython-36.pyc -------------------------------------------------------------------------------- /text_classification/__pycache__/text_datasets.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/1024er/cbert_aug/HEAD/text_classification/__pycache__/text_datasets.cpython-37.pyc -------------------------------------------------------------------------------- /global.config: -------------------------------------------------------------------------------- 1 | { 2 | "dataset":"subj", 3 | "bilm_mode": "sampling", 4 | "stop_epoch": 20, 5 | "bilm_wordwise": true, 6 | "bilm_gumbel": false, 7 | "no_label": false, 8 | "dropout": 0.5, 9 | "epoch": 100, 10 | "bilm_dropout": 0.0, 11 | "unit": 300, 12 | "bilm_layer": 1, 13 | "save_model": false, 14 | "learning_rate": 0.0001, 15 | "bilm_residual": 0.0, 16 | "bilm_add_original": 0.0, 17 | "bilm_temp": 1.0, 18 | "out": "result", 19 | "validation": true, 20 | "seed": 2018, 21 | "bilm_ratio": 0.25, 22 | "layer": 1, 23 | "model": "rnn", 24 | "bilm": null, 25 | "gpu": 0, 26 | "bilm_unit": 1024, 27 | "resume_vocab": null, 28 | "batchsize": 64 29 | } 30 | -------------------------------------------------------------------------------- /evaluator.py: -------------------------------------------------------------------------------- 1 | import collections 2 | import copy 3 | 4 | import six 5 | 6 | import chainer 7 | from chainer import configuration 8 | from chainer.dataset import convert 9 | from chainer.dataset import iterator as iterator_module 10 | from chainer import function 11 | from chainer import link 12 | from chainer import reporter as reporter_module 13 | from chainer.training import extension 14 | 15 | 16 | class MicroEvaluator(chainer.training.extensions.Evaluator): 17 | 18 | def evaluate(self): 19 | iterator = self._iterators['main'] 20 | eval_func = self.eval_func or self._targets['main'] 21 | 22 | if self.eval_hook: 23 | self.eval_hook(self) 24 | 25 | if hasattr(iterator, 'reset'): 26 | iterator.reset() 27 | it = iterator 28 | else: 29 | it = copy.copy(iterator) 30 | 31 | # summary = reporter_module.DictSummary() 32 | summary = collections.defaultdict(list) 33 | 34 | for batch in it: 35 | observation = {} 36 | with reporter_module.report_scope(observation): 37 | in_arrays = self.converter(batch, self.device) 38 | with function.no_backprop_mode(): 39 | if isinstance(in_arrays, tuple): 40 | eval_func(*in_arrays) 41 | elif isinstance(in_arrays, dict): 42 | eval_func(**in_arrays) 43 | else: 44 | eval_func(in_arrays) 45 | n_data = len(batch) 46 | summary['n'].append(n_data) 47 | # summary.add(observation) 48 | for k, v in observation.items(): 49 | summary[k].append(v) 50 | 51 | mean = dict() 52 | ns = summary['n'] 53 | del summary['n'] 54 | for k, vs in summary.items(): 55 | mean[k] = sum(v * n for v, n in zip(vs, ns)) / sum(ns) 56 | return mean 57 | # return summary.compute_mean() 58 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # cbert_aug 2 | 3 | Thanks @liuyaxin 's effort to rewrite the code with huggingface's latest transformer library. 4 | If you want to reproduce the results in paper, you can switch to the develop branch. 5 | 6 | 7 | We arrange the original code of cbert from https://github.com/1024er/cbert_aug.git. 8 | Our original implementation was two-stage, for convenience, we rewrite the code. 9 | 10 | The *global.config* contains the global configuration for bert and classifier. 11 | The datasets directory contains files for bert, and the aug_data directory contain augmented files for classifier. 12 | 13 | You can run the code by: 14 | 15 | 1.finetune bert on each dataset before run cbert_augdata.py 16 | 17 | ```python cbert_finetune.py``` 18 | 19 | you can use *python cbert_finetune.py --task_name='TREC'* to change the task you want to perform, you can also set your own parameters in the same way to acquire different results. 20 | 21 | 2.then load fine-tuned bert in cbert_augdata.py 22 | 23 | ```python cbert_augdata.py``` 24 | 25 | notice that if you want to change the default dataset used in original code, you have to alter the parameter "dataset" in *global.config* firstly. 26 | 27 | The hyperparameters of the models and training were selected by a grid-search using baseline models without data augmentation in each task’s validation set individually. 28 | 29 | We upload the runing log with dropout=0.5 for all datasets, this is very close to the results in paper. You can achieve the results in paper by grid-search the hyperparameters. 30 | 31 | If you have any question, please open an issue. 32 | 33 | Please cite this paper if you use this method or codes: 34 | ```sh 35 | @inproceedings{wu2019conditional, 36 | title={Conditional BERT Contextual Augmentation}, 37 | author={Wu, Xing and Lv, Shangwen and Zang, Liangjun and Han, Jizhong and Hu, Songlin}, 38 | booktitle={International Conference on Computational Science}, 39 | pages={84--95}, 40 | year={2019}, 41 | organization={Springer} 42 | } 43 | ``` 44 | 45 | 46 | The classifier code is from , thanks to the author. 47 | -------------------------------------------------------------------------------- /text_classification/nlp_utils.py: -------------------------------------------------------------------------------- 1 | import collections 2 | import io 3 | 4 | import numpy 5 | 6 | import chainer 7 | from chainer import cuda 8 | 9 | 10 | def split_text(text, char_based=False): 11 | if char_based: 12 | return list(text) 13 | else: 14 | return text.split() 15 | 16 | 17 | def normalize_text(text): 18 | return text.strip().lower() 19 | 20 | 21 | def make_vocab(dataset, max_vocab_size=50000, min_freq=1): 22 | counts = collections.defaultdict(int) 23 | for tokens, _ in dataset: 24 | for token in tokens: 25 | counts[token] += 1 26 | 27 | vocab = {'': 0, '': 1} 28 | for w, c in sorted(counts.items(), key=lambda x: (-x[1], x[0])): 29 | if len(vocab) >= max_vocab_size or c < min_freq: 30 | break 31 | vocab[w] = len(vocab) 32 | return vocab 33 | 34 | 35 | def read_vocab_list(path, max_vocab_size=20000): 36 | vocab = {'': 0, '': 1} 37 | with io.open(path, encoding='utf-8', errors='ignore') as f: 38 | for l in f: 39 | w = l.strip() 40 | if w not in vocab and w: 41 | vocab[w] = len(vocab) 42 | if len(vocab) >= max_vocab_size: 43 | break 44 | return vocab 45 | 46 | 47 | def make_array(tokens, vocab, add_eos=True, add_bos=True): 48 | unk_id = vocab[''] 49 | eos_id = vocab[''] 50 | ids = [vocab.get(token, unk_id) for token in tokens] 51 | if add_eos: 52 | ids.append(eos_id) 53 | if add_bos: 54 | ids = [eos_id] + ids 55 | return numpy.array(ids, 'i') 56 | 57 | 58 | def transform_to_array(dataset, vocab, with_label=True): 59 | if with_label: 60 | return [(make_array(tokens, vocab), numpy.array([cls], 'i')) 61 | for tokens, cls in dataset] 62 | else: 63 | return [make_array(tokens, vocab) 64 | for tokens in dataset] 65 | 66 | 67 | def convert_seq(batch, device=None, with_label=True): 68 | def to_device_batch(batch): 69 | #if device is None: 70 | # return batch 71 | #elif device < 0: 72 | # return [chainer.dataset.to_device(device, x) for x in batch] 73 | #else: 74 | xp = cuda.cupy.get_array_module(*batch) 75 | concat = xp.concatenate(batch, axis=0) 76 | sections = numpy.cumsum([len(x) for x in batch[:-1]], dtype='i') 77 | concat_dev = chainer.dataset.to_device(device, concat) 78 | batch_dev = cuda.cupy.split(concat_dev, sections) 79 | return batch_dev 80 | 81 | if with_label: 82 | return [to_device_batch([x for x, _ in batch]), 83 | to_device_batch([y for _, y in batch])] 84 | # return {'xs': to_device_batch([x for x, _ in batch]), 85 | # 'ys': to_device_batch([y for _, y in batch])} 86 | else: 87 | return to_device_batch([x for x in batch]) 88 | -------------------------------------------------------------------------------- /args_of_text_classifier.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from __future__ import print_function 3 | 4 | import argparse 5 | import json 6 | 7 | 8 | def get_basic_arg_parser(): 9 | parser = argparse.ArgumentParser() 10 | parser.add_argument('--batchsize', '-b', type=int, default=64, 11 | help='Number of images in each mini-batch') 12 | parser.add_argument('--epoch', '-e', type=int, default=100, 13 | help='Number of sweeps over the dataset to train') 14 | parser.add_argument('--gpu', '-g', type=int, default=0, 15 | help='GPU ID (negative value indicates CPU)') 16 | parser.add_argument('--out', '-o', default='result', 17 | help='Directory to output the result') 18 | parser.add_argument('--unit', '-u', type=int, default=300, 19 | help='Number of units') 20 | parser.add_argument('--layer', '-l', type=int, default=1, 21 | help='Number of layers of RNN or MLP following CNN') 22 | parser.add_argument('--dropout', '-d', type=float, default=0.4, 23 | help='Dropout rate') 24 | parser.add_argument('--learning-rate', '-lr', type=float, default=1e-4, 25 | help='Learning rate') 26 | parser.add_argument('--dataset', '-data', default='mpqa', 27 | choices=['dbpedia', 'imdb.binary', 'imdb.fine', 28 | 'TREC', 'stsa.binary', 'stsa.fine', 29 | 'custrev', 'mpqa', 'rt-polarity', 'subj'], 30 | help='Name of dataset.') 31 | parser.add_argument('--model', '-model', default='cnn', 32 | choices=['cnn', 'rnn', 'bow'], 33 | help='Name of encoder model type.') 34 | 35 | parser.add_argument('--bilm', '-bilm') 36 | parser.add_argument('--bilm-unit', '-bilm-u', type=int, default=1024) 37 | parser.add_argument('--bilm-layer', '-bilm-l', type=int, default=1) 38 | parser.add_argument('--bilm-dropout', '-bilm-d', type=float, default=0.) 39 | 40 | parser.add_argument('--bilm-ratio', '-bilm-r', type=float, default=0.25) 41 | parser.add_argument('--bilm-temp', '-bilm-t', type=float, default=1.) 42 | parser.add_argument('--bilm-mode', '-bilm-m', default='sampling', 43 | choices=['weighted_sum', 'sampling']) 44 | parser.add_argument('--bilm-gumbel', action='store_true') 45 | parser.add_argument('--bilm-wordwise', action='store_true', default=True) 46 | parser.add_argument('--bilm-add-original', type=float, default=0.) 47 | parser.add_argument('--bilm-residual', type=float, default=0., 48 | help='if not 0, (original + context) * THIS') 49 | 50 | parser.add_argument('--resume-vocab') 51 | 52 | parser.add_argument('--validation', default=True) 53 | parser.add_argument('--seed', type=int, default=2018) 54 | parser.add_argument('--save-model', action='store_true') 55 | parser.add_argument('--stop-epoch', type=int, default=20) 56 | 57 | parser.add_argument('--no-label', action='store_true') 58 | 59 | return parser 60 | 61 | 62 | if __name__ == '__main__': 63 | parser = get_basic_arg_parser() 64 | args = parser.parse_args() 65 | print(json.dumps(args.__dict__, indent=2)) 66 | -------------------------------------------------------------------------------- /triggers.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | 3 | import chainer 4 | from chainer import cuda 5 | from chainer import function 6 | from chainer import reporter 7 | from chainer.training import util 8 | from chainer import utils 9 | from chainer.utils import type_check 10 | from chainer.training import triggers 11 | 12 | 13 | class FailBestValueTrigger(triggers.IntervalTrigger): 14 | 15 | """Trigger invoked when specific value fails to become best. 16 | 17 | Args: 18 | key (str): Key of value. 19 | compare (function): Compare function which takes current best value and 20 | new value and returns whether new value is better than current 21 | best. 22 | trigger: Trigger that decides the comparison interval between current 23 | best value and new value. This must be a tuple in the form of 24 | ``, 'epoch'`` or ``, 'iteration'`` which is passed to 25 | :class:`~chainer.training.triggers.IntervalTrigger`. 26 | 27 | """ 28 | 29 | def __init__(self, key, compare, trigger=(1, 'epoch'), 30 | n_times=5, max_trigger=None, print_triger=False): 31 | self.period = max_trigger 32 | self.unit = 'epoch' 33 | 34 | self._key = key 35 | self._best_value = None 36 | self._interval_trigger = util.get_trigger(trigger) 37 | self._init_summary() 38 | self._compare = compare 39 | self._print_triger = print_triger 40 | 41 | self._n_times = n_times 42 | self._n_fails = 0 43 | 44 | self._max_trigger = max_trigger 45 | self._n_triggers = 0 46 | 47 | def __call__(self, trainer): 48 | """Decides whether the extension should be called on this iteration. 49 | 50 | Args: 51 | trainer (~chainer.training.Trainer): Trainer object that this 52 | trigger is associated with. The ``observation`` of this trainer 53 | is used to determine if the trigger should fire. 54 | 55 | Returns: 56 | bool: ``True`` if the corresponding extension should be invoked in 57 | this iteration. 58 | 59 | """ 60 | 61 | observation = trainer.observation 62 | summary = self._summary 63 | key = self._key 64 | if key in observation: 65 | summary.add({key: observation[key]}) 66 | 67 | if not self._interval_trigger(trainer): 68 | return False 69 | 70 | stats = summary.compute_mean() 71 | value = float(stats[key]) # copy to CPU 72 | self._init_summary() 73 | 74 | self._n_triggers += 1 75 | if self._n_triggers == 1: 76 | return False 77 | if self._max_trigger is not None \ 78 | and self._n_triggers >= self._max_trigger: 79 | return True 80 | 81 | if self._best_value is None or self._compare(self._best_value, value): 82 | self._best_value = value 83 | self._n_fails = 0 84 | return False 85 | self._n_fails += 1 86 | if self._n_fails >= self._n_times: 87 | return True 88 | else: 89 | return False 90 | 91 | def _init_summary(self): 92 | self._summary = reporter.DictSummary() 93 | 94 | 95 | class FailMaxValueTrigger(FailBestValueTrigger): 96 | def __init__(self, key, trigger=(1, 'epoch'), n_times=5, max_trigger=None): 97 | super(FailMaxValueTrigger, self).__init__( 98 | key, lambda max_value, new_value: new_value > max_value, 99 | trigger, n_times, max_trigger) 100 | 101 | 102 | class FailMinValueTrigger(FailBestValueTrigger): 103 | def __init__(self, key, trigger=(1, 'epoch'), n_times=5, max_trigger=None): 104 | super(FailMinValueTrigger, self).__init__( 105 | key, lambda min_value, new_value: new_value < min_value, 106 | trigger, n_times, max_trigger) 107 | -------------------------------------------------------------------------------- /utils.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | from __future__ import print_function 3 | import collections 4 | import io 5 | import json 6 | import os 7 | 8 | import numpy as np 9 | import progressbar 10 | 11 | import chainer 12 | from chainer import cuda 13 | from chainer.dataset import convert 14 | 15 | class Outer(list): 16 | def __init__(self): 17 | list.__init__(self) 18 | def write(self, item): 19 | if '\n' not in item: 20 | self.append(item.strip()) 21 | 22 | class UnkDropout(chainer.dataset.DatasetMixin): 23 | def __init__(self, dataset, unk, ratio=0.01): 24 | self.dataset = dataset 25 | self.unk = unk 26 | self.ratio = ratio 27 | 28 | def __len__(self): 29 | return len(self.dataset) 30 | 31 | def get_example(self, i): 32 | x, y = self.dataset[i] 33 | if chainer.config.train: 34 | rand = np.random.rand(x.size - 2) < self.ratio 35 | ## keep bos and eos 36 | _x = x.copy() 37 | _x[1:-1] = np.where(rand, self.unk, _x[1:-1]) 38 | return (_x, y) 39 | return (x, y) 40 | 41 | 42 | def convert_xt_batch_seq(xt_batch_seq, gpu): 43 | batchsize = len(xt_batch_seq[0]) 44 | seq_len = len(xt_batch_seq) 45 | xt_batch_seq = np.array(xt_batch_seq, 'i') 46 | ## (bproplen, batch, 2) 47 | xt_batch_seq = convert.to_device(gpu, xt_batch_seq) 48 | xp = cuda.get_array_module(xt_batch_seq) 49 | x_seq_batch = xp.split( 50 | xt_batch_seq[:, :, 0].T.reshape(batchsize * seq_len), 51 | batchsize, axis=0) 52 | t_seq_batch = xp.split( 53 | xt_batch_seq[:, :, 1].T.reshape(batchsize * seq_len), 54 | batchsize, axis=0) 55 | return x_seq_batch, t_seq_batch 56 | 57 | 58 | def count_words_from_file(counts, file_path): 59 | bar = progressbar.ProgressBar() 60 | for l in bar(io.open(file_path, encoding='utf-8')): 61 | ## TODO: parallel 62 | if l.strip(): 63 | words = l.strip().split() 64 | for word in words: 65 | counts[word] += 1 66 | return counts 67 | 68 | 69 | def count_words(dataset, alpha=0.4): 70 | counts = collections.defaultdict(int) 71 | for w in dataset: 72 | counts[w] += 1 73 | counts = [counts[i] for i in range(len(counts))] 74 | counts = np.array(counts, 'f') 75 | counts /= counts.sum() 76 | counts = counts ** alpha 77 | counts = counts.tolist() 78 | return counts 79 | 80 | 81 | def make_chain_dataset(file_path, vocab={}, update_vocab=False, 82 | chain_length=2): 83 | dataset = [] 84 | chain = [] 85 | unk_id = vocab[''] 86 | 87 | def make_array(chain): 88 | array_chain = [] 89 | for words in chain: 90 | tokens = [] 91 | for word in words: 92 | if update_vocab: 93 | if word not in vocab: 94 | vocab[word] = len(vocab) 95 | tokens.append(vocab.get(word, unk_id)) 96 | array_chain.append(np.array(tokens, 'i')) 97 | return array_chain 98 | 99 | for line in io.open(file_path, encoding='utf-8'): 100 | if not line.strip(): 101 | if len(chain) >= chain_length: 102 | dataset.append(make_array(chain)) 103 | chain = [] 104 | continue 105 | words = line.strip().split() + [''] 106 | chain.append(words) 107 | if len(chain) >= chain_length: 108 | dataset.append(make_array(chain)) 109 | return dataset, vocab 110 | 111 | 112 | def tokenize_text(file_path, vocab={}, update_vocab=False): 113 | tokens = [] 114 | unk_id = vocab[''] 115 | with io.open(file_path, encoding='utf-8') as f: 116 | for line in f: 117 | words = line.split() + [''] 118 | for word in words: 119 | if update_vocab: 120 | if word not in vocab: 121 | vocab[word] = len(vocab) 122 | tokens.append(vocab.get(word, unk_id)) 123 | return tokens, vocab 124 | 125 | 126 | def get_wikitext_words_and_vocab( 127 | name='wikitext-2', base_dir='datasets', vocab=None): 128 | assert(name in ['wikitext-2', 'wikitext-103']) 129 | base_dir2 = os.path.join(base_dir, name) 130 | predata_path = os.path.join(base_dir2, 'preprocessed_data.json') 131 | if os.path.exists(predata_path) and vocab is None: 132 | train, valid, test, vocab = json.load(open(predata_path)) 133 | else: 134 | prepared_vocab = (vocab is not None) 135 | if not prepared_vocab: 136 | vocab = {'': 0, '': 1} 137 | train, vocab = tokenize_text( 138 | os.path.join(base_dir2, 'wiki.train.tokens'), 139 | vocab, update_vocab=not prepared_vocab) 140 | valid, _ = tokenize_text( 141 | os.path.join(base_dir2, 'wiki.valid.tokens'), 142 | vocab, update_vocab=False) 143 | test, _ = tokenize_text( 144 | os.path.join(base_dir2, 'wiki.test.tokens'), 145 | vocab, update_vocab=False) 146 | json.dump([train, valid, test, vocab], open(predata_path, 'w')) 147 | return train, valid, test, vocab 148 | -------------------------------------------------------------------------------- /train_text_classifier.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | import argparse 4 | import datetime 5 | import json 6 | import os 7 | import numpy 8 | 9 | import cupy 10 | import nets as bilm_nets 11 | import chainer 12 | from chainer import training 13 | from chainer.training import extensions 14 | from evaluator import MicroEvaluator 15 | 16 | from text_classification import nets as class_nets 17 | from text_classification.nlp_utils import convert_seq 18 | from text_classification import text_datasets 19 | 20 | import args_of_text_classifier 21 | from utils import UnkDropout, Outer 22 | 23 | class DottableDict(dict): 24 | def __init__(self, *args, **kwargs): 25 | dict.__init__(self, *args, **kwargs) 26 | self.__dict__ = self 27 | def allowDotting(self, state=True): 28 | if state: 29 | self.__dict__ = self 30 | else: 31 | self.__dict__ = dict() 32 | 33 | """load global parameters""" 34 | with open("global.config", "r", encoding='utf-8') as f: 35 | args = DottableDict(json.load(f)) 36 | 37 | def main(): 38 | print(json.dumps(args.__dict__, indent=2)) 39 | train(dir="aug_data", print_log=True) 40 | 41 | def train(dir="datasets", print_log=False): 42 | chainer.CHAINER_SEED = args.seed 43 | numpy.random.seed(args.seed) 44 | 45 | vocab = None 46 | 47 | """load a dataset""" 48 | if args.dataset == 'dbpedia': 49 | train, test, vocab = text_datasets.get_dbpedia( 50 | vocab=vocab) 51 | elif args.dataset.startswith('imdb.'): 52 | train, test, vocab = text_datasets.get_imdb( 53 | fine_grained=args.dataset.endswith('.fine'), 54 | vocab=vocab) 55 | elif args.dataset in ['TREC', 'stsa.binary', 'stsa.fine', 56 | 'custrev', 'mpqa', 'rt-polarity', 'subj']: 57 | train, test, real_test, vocab = text_datasets.read_text_dataset( 58 | args.dataset, vocab=None, dir=dir) 59 | n_class = len(set([int(d[1]) for d in train])) 60 | 61 | ## str.format() uses '{}' and ':' to replace '%' 62 | print(' # train data: {}'.format(len(train))) 63 | print(' # test data: {}'.format(len(test))) 64 | print(' # vocab: {}'.format(len(vocab))) 65 | print(' # class: {}'.format(n_class)) 66 | 67 | chainer.CHAINER_SEED = args.seed 68 | numpy.random.seed(args.seed) 69 | train = UnkDropout(train, vocab[''], 0.01) 70 | train_iter = chainer.iterators.SerialIterator(train, args.batchsize) 71 | test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) 72 | 73 | ## Setup a model 74 | chainer.CHAINER_SEED = args.seed 75 | numpy.random.seed(args.seed) 76 | if args.model == 'rnn': 77 | Encoder = class_nets.RNNEncoder 78 | elif args.model == 'cnn': 79 | Encoder = class_nets.CNNEncoder 80 | elif args.model == 'bow': 81 | Encoder = class_nets.BOWMLPEncoder 82 | encoder = Encoder(n_layers=args.layer, n_vocab=len(vocab), 83 | n_units=args.unit, dropout=args.dropout) 84 | model = class_nets.TextClassifier(encoder, n_class) 85 | 86 | if args.bilm: 87 | bilm = bilm_nets.BiLanguageModel( 88 | len(vocab), args.bilm_units, args.bilm_layer, args.bilm_dropout) 89 | n_labels = len(set([int(v[1]) for v in test])) 90 | print('# labels = ', n_labels) 91 | if not args.no_label: 92 | print('add label') 93 | bilm.add_label_condition_nets(n_labels, args.bilm_unit) 94 | else: 95 | print('not using label') 96 | chainer.serializers.load_npz(args.bilm, bilm) 97 | with model.encoder.init_scope(): 98 | initialW = numpy.array(model.encoder.embed.W.data) 99 | del model.encoder.embed 100 | model.encoder.embed = bilm_nets.PredictiveEmbed( 101 | len(vocab), args.unit, bilm, args.dropout, 102 | initialW=initialW) 103 | model.encoder.use_predict_embed = True 104 | 105 | model.encoder.embed.setup( 106 | mode=args.bilm_mode, 107 | temp=args.bilm_temp, 108 | word_lower_bound=0., 109 | gold_lower_bound=0., 110 | gumbel=args.bilm_gumbel, 111 | residual=args.bilm_residual, 112 | wordwise=args.bilm_wordwise, 113 | add_original=args.bilm_add_original, 114 | augment_ratio=args.bilm_ratio, 115 | ignore_unk=vocab['']) 116 | 117 | if args.gpu >= 0: 118 | ## Make a specified GPU current 119 | chainer.cuda.get_device_from_id(args.gpu).use() 120 | model.to_gpu() # copy the model to the GPU 121 | model.xp.random.seed(args.seed) 122 | chainer.CHAINER_SEED = args.seed 123 | numpy.random.seed(args.seed) 124 | 125 | ## Setup an optimizer 126 | optimizer = chainer.optimizers.Adam(args.learning_rate) 127 | optimizer.setup(model) 128 | 129 | ## Setup a trainer 130 | updater = training.StandardUpdater( 131 | train_iter, optimizer, 132 | converter=convert_seq, device=args.gpu) 133 | 134 | from triggers import FailMaxValueTrigger 135 | stop_trigger = FailMaxValueTrigger( 136 | key='validation/main/accuracy', trigger=(1, 'epoch'), 137 | n_times=args.stop_epoch, max_trigger=args.epoch) 138 | trainer = training.Trainer( 139 | updater, stop_trigger, out=args.out) 140 | 141 | ## Evaluate the model with the test dataset for each epoch 142 | ## validation set 143 | trainer.extend(MicroEvaluator( 144 | test_iter, model, 145 | converter=convert_seq, device=args.gpu)) 146 | 147 | if args.validation: 148 | real_test_iter = chainer.iterators.SerialIterator( 149 | real_test, args.batchsize, 150 | repeat=False, shuffle=False) 151 | eval_on_real_test = MicroEvaluator( 152 | real_test_iter, model, 153 | converter=convert_seq, device=args.gpu) 154 | eval_on_real_test.default_name = 'test' 155 | trainer.extend(eval_on_real_test) 156 | 157 | ## Take a best snapshot 158 | record_trigger = training.triggers.MaxValueTrigger( 159 | 'validation/main/accuracy', (1, 'epoch')) 160 | if args.save_model: 161 | trainer.extend(extensions.snapshot_object( 162 | model, 'best_model.npz'), 163 | trigger=record_trigger) 164 | 165 | ## Write a log of evaluation statistics for each epoch 166 | out = Outer() 167 | trainer.extend(extensions.LogReport()) 168 | if print_log: 169 | trainer.extend(extensions.PrintReport( 170 | ['epoch', 'main/loss', 'validation/main/loss', 171 | 'main/accuracy', 'validation/main/accuracy', 172 | 'test/main/loss', 'test/main/accuracy', 173 | 'elapsed_time']), trigger=record_trigger) 174 | else: 175 | trainer.extend(extensions.PrintReport( 176 | ['main/accuracy', 'validation/main/accuracy', 177 | 'test/main/accuracy'], out=out), trigger=record_trigger) 178 | 179 | ## Run the training 180 | trainer.run() 181 | 182 | ## Free all unused memory blocks "cached" in the memory pool 183 | mempool = cupy.get_default_memory_pool() 184 | mempool.free_all_blocks() 185 | return float(out[-1]) 186 | 187 | if __name__ == '__main__': 188 | main() 189 | 190 | -------------------------------------------------------------------------------- /text_classification/text_datasets.py: -------------------------------------------------------------------------------- 1 | import csv 2 | import glob 3 | import io 4 | import os 5 | import shutil 6 | import tarfile 7 | import tempfile 8 | 9 | import numpy 10 | 11 | import chainer 12 | 13 | from .nlp_utils import make_vocab 14 | from .nlp_utils import normalize_text 15 | from .nlp_utils import split_text 16 | from .nlp_utils import transform_to_array 17 | import json 18 | URL_DBPEDIA = 'https://github.com/le-scientifique/torchDatasets/raw/master/dbpedia_csv.tar.gz' # NOQA 19 | URL_IMDB = 'http://ai.stanford.edu/~amaas/data/sentiment/aclImdb_v1.tar.gz' 20 | URL_OTHER_BASE = 'https://raw.githubusercontent.com/harvardnlp/sent-conv-torch/master/data/' # NOQA 21 | 22 | 23 | def download_dbpedia(): 24 | path = chainer.dataset.cached_download(URL_DBPEDIA) 25 | tf = tarfile.open(path, 'r') 26 | return tf 27 | 28 | 29 | def read_dbpedia(tf, split, shrink=1, char_based=False): 30 | dataset = [] 31 | f = tf.extractfile('dbpedia_csv/{}.csv'.format(split)) 32 | for i, (label, title, text) in enumerate(csv.reader(f)): 33 | if i % shrink != 0: 34 | continue 35 | label = int(label) - 1 # Index begins from 1 36 | tokens = split_text(normalize_text(text), char_based) 37 | dataset.append((tokens, label)) 38 | return dataset 39 | 40 | 41 | def get_dbpedia(vocab=None, shrink=1, char_based=False): 42 | tf = download_dbpedia() 43 | 44 | print('read dbpedia') 45 | train = read_dbpedia(tf, 'train', shrink=shrink, char_based=char_based) 46 | test = read_dbpedia(tf, 'test', shrink=shrink, char_based=char_based) 47 | 48 | if vocab is None: 49 | print('constract vocabulary based on frequency') 50 | vocab = make_vocab(train) 51 | 52 | train = transform_to_array(train, vocab) 53 | test = transform_to_array(test, vocab) 54 | 55 | return train, test, vocab 56 | 57 | 58 | def download_imdb(): 59 | path = chainer.dataset.cached_download(URL_IMDB) 60 | tf = tarfile.open(path, 'r') 61 | # To read many files fast, tarfile is untared 62 | path = tempfile.mkdtemp() 63 | tf.extractall(path) 64 | return path 65 | 66 | 67 | def read_imdb(path, split, 68 | shrink=1, fine_grained=False, char_based=False): 69 | fg_label_dict = {'1': 0, '2': 0, '3': 1, '4': 1, 70 | '7': 2, '8': 2, '9': 3, '10': 3} 71 | 72 | def read_and_label(posneg, label): 73 | dataset = [] 74 | target = os.path.join(path, 'aclImdb', split, posneg, '*') 75 | for i, f_path in enumerate(glob.glob(target)): 76 | if i % shrink != 0: 77 | continue 78 | with io.open(f_path, encoding='utf-8', errors='ignore') as f: 79 | text = f.read().strip() 80 | tokens = split_text(normalize_text(text), char_based) 81 | if fine_grained: 82 | # extract from f_path. e.g. /pos/200_8.txt -> 8 83 | label = fg_label_dict[f_path.split('_')[-1][:-4]] 84 | dataset.append((tokens, label)) 85 | else: 86 | dataset.append((tokens, label)) 87 | return dataset 88 | 89 | pos_dataset = read_and_label('pos', 0) 90 | neg_dataset = read_and_label('neg', 1) 91 | return pos_dataset + neg_dataset 92 | 93 | 94 | def get_imdb(vocab=None, shrink=1, fine_grained=False, 95 | char_based=False): 96 | tmp_path = download_imdb() 97 | 98 | print('read imdb') 99 | train = read_imdb(tmp_path, 'train', 100 | shrink=shrink, fine_grained=fine_grained, 101 | char_based=char_based) 102 | test = read_imdb(tmp_path, 'test', 103 | shrink=shrink, fine_grained=fine_grained, 104 | char_based=char_based) 105 | 106 | shutil.rmtree(tmp_path) 107 | 108 | if vocab is None: 109 | print('constract vocabulary based on frequency') 110 | vocab = make_vocab(train) 111 | 112 | train = transform_to_array(train, vocab) 113 | test = transform_to_array(test, vocab) 114 | 115 | return train, test, vocab 116 | 117 | 118 | def download_other_dataset(name): 119 | if name in ['custrev', 'mpqa', 'rt-polarity', 'subj']: 120 | files = [name + '.all'] 121 | elif name == 'TREC': 122 | files = [name + suff for suff in ['.train.all', '.test.all']] 123 | else: 124 | files = [name + suff for suff in ['.train', '.test']] 125 | file_paths = [] 126 | for f_name in files: 127 | url = os.path.join(URL_OTHER_BASE, f_name) 128 | path = chainer.dataset.cached_download(url) 129 | file_paths.append(path) 130 | return file_paths 131 | 132 | 133 | def read_other_dataset(path, shrink=1, char_based=False): 134 | dataset = [] 135 | with io.open(path, encoding='utf-8', errors='ignore') as f: 136 | for i, l in enumerate(f): 137 | if i % shrink != 0 or not len(l.strip()) >= 3: 138 | continue 139 | label, text = l.strip().split(None, 1) 140 | label = int(label) 141 | tokens = split_text(normalize_text(text), char_based) 142 | dataset.append((tokens, label)) 143 | return dataset 144 | 145 | 146 | def _read_tsv(input_file, quotechar=None): 147 | """Reads a tab separated value file.""" 148 | with open(input_file, "r") as f: 149 | reader = csv.reader(f, delimiter="\t", quotechar=quotechar) 150 | lines = [] 151 | for line in reader: 152 | lines.append((split_text(normalize_text(line[0])), line[1])) 153 | return lines[1:] 154 | 155 | def read_text_dataset(name, vocab=None, dir="datasets"): 156 | assert(name in ['TREC', 'stsa.binary', 'stsa.fine', 157 | 'custrev', 'mpqa', 'rt-polarity', 'subj']) 158 | train_path = os.path.join(dir, name, "train.tsv") 159 | eval_path = os.path.join("datasets", name, "dev.tsv") 160 | test_path = os.path.join("datasets", name, "test.tsv") 161 | 162 | train = _read_tsv(train_path) 163 | eval = _read_tsv(eval_path) 164 | test = _read_tsv(test_path) 165 | 166 | if vocab is None: 167 | print('constract vocabulary based on frequency') 168 | all_data = [] 169 | all_data.extend(train) 170 | all_data.extend(eval) 171 | vocab = make_vocab(all_data) 172 | 173 | train = transform_to_array(train, vocab) 174 | eval = transform_to_array(eval, vocab) 175 | test = transform_to_array(test, vocab) 176 | 177 | return train, eval, test, vocab 178 | 179 | def get_other_text_dataset(name, vocab=None, shrink=1, 180 | char_based=False, seed=777): 181 | assert(name in ['TREC', 'stsa.binary', 'stsa.fine', 182 | 'custrev', 'mpqa', 'rt-polarity', 'subj']) 183 | datasets = download_other_dataset(name) 184 | train = read_other_dataset( 185 | datasets[0], shrink=shrink, char_based=char_based) 186 | if len(datasets) == 2: 187 | test = read_other_dataset( 188 | datasets[1], shrink=shrink, char_based=char_based) 189 | else: 190 | numpy.random.seed(seed) 191 | alldata = numpy.random.permutation(train) 192 | train = alldata[:-len(alldata) // 10] 193 | test = alldata[-len(alldata) // 10:] 194 | 195 | if vocab is None: 196 | print('constract vocabulary based on frequency') 197 | vocab = make_vocab(train) 198 | 199 | train = transform_to_array(train, vocab) 200 | test = transform_to_array(test, vocab) 201 | 202 | return train, test, vocab 203 | -------------------------------------------------------------------------------- /cbert_finetune.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import os 6 | import shutil 7 | import logging 8 | import argparse 9 | import random 10 | import json 11 | from tqdm import tqdm, trange 12 | 13 | import numpy as np 14 | import torch 15 | from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler 16 | 17 | from transformers import BertTokenizer, BertModel, BertForMaskedLM, AdamW, WarmupLinearSchedule 18 | #import train_text_classifier_new 19 | 20 | import cbert_utils 21 | 22 | """initialize logger""" 23 | logging.basicConfig(format='%(asctime)s - %(levelname)s - %(name)s - %(message)s', 24 | datefmt='%m/%d/%Y %H:%M:%S', 25 | level=logging.INFO) 26 | logger = logging.getLogger(__name__) 27 | 28 | """cuda or cpu""" 29 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 30 | 31 | 32 | def main(): 33 | parser = argparse.ArgumentParser() 34 | 35 | ## Required parameters 36 | parser.add_argument("--data_dir", default="datasets", type=str, 37 | help="The input data dir. Should contain the .tsv files (or other data files) for the task.") 38 | parser.add_argument("--output_dir", default="aug_data", type=str, 39 | help="The output dir for augmented dataset.") 40 | parser.add_argument("--save_model_dir", default="cbert_model", type=str, 41 | help="The cache dir for saved model.") 42 | parser.add_argument("--bert_model", default="bert-base-uncased", type=str, 43 | help="The path of pretrained bert model.") 44 | parser.add_argument("--task_name", default="subj", type=str, 45 | help="The name of the task to train.") 46 | parser.add_argument("--max_seq_length", default=64, type=int, 47 | help="The maximum total input sequence length after WordPiece tokenization. \n" 48 | "Sequence longer than this will be truncated, and sequences shorter \n" 49 | "than this wille be padded.") 50 | parser.add_argument("--do_lower_case", default=False, action='store_true', 51 | help="Set this flag if you are using an uncased model.") 52 | parser.add_argument("--train_batch_size", default=32, type=int, 53 | help="Total batch size for training.") 54 | parser.add_argument("--learning_rate", default=5e-5, type=float, 55 | help="The initial learning rate for Adam.") 56 | parser.add_argument("--num_train_epochs", default=10.0, type=float, 57 | help="Total number of training epochs to perform.") 58 | parser.add_argument("--warmup_proportion", default=0.1, type=float, 59 | help="Proportion of training to perform linear learning rate warmup for." 60 | "E.g., 0.1 = 10%% of training.") 61 | parser.add_argument("--seed", type=int, default=42, 62 | help="random seed for initialization") 63 | parser.add_argument("--save_every_epoch", default=True, action='store_true') 64 | 65 | args = parser.parse_args() 66 | print(args) 67 | 68 | """prepare processors""" 69 | AugProcessor = cbert_utils.AugProcessor() 70 | processors = { 71 | ## you can add your processor here 72 | "TREC": AugProcessor, 73 | "stsa.fine": AugProcessor, 74 | "stsa.binary": AugProcessor, 75 | "mpqa": AugProcessor, 76 | "rt-polarity": AugProcessor, 77 | "subj": AugProcessor, 78 | } 79 | 80 | task_name = args.task_name 81 | if task_name not in processors: 82 | raise ValueError("Task not found: %s" % (task_name)) 83 | processor = processors[task_name] 84 | label_list = processor.get_labels(task_name) 85 | 86 | """prepare model""" 87 | random.seed(args.seed) 88 | np.random.seed(args.seed) 89 | torch.manual_seed(args.seed) 90 | 91 | ## leveraging lastest bert module in Transformers to load pre-trained model tokenizer 92 | tokenizer = BertTokenizer.from_pretrained(args.bert_model) 93 | 94 | ## leveraging lastest bert module in Transformers to load pre-trained model (weights) 95 | model = BertForMaskedLM.from_pretrained(args.bert_model) 96 | 97 | if task_name == 'stsa.fine': 98 | model.bert.embeddings.token_type_embeddings = torch.nn.Embedding(5, 768) 99 | model.bert.embeddings.token_type_embeddings.weight.data.normal_(mean=0.0, std=0.02) 100 | elif task_name == 'TREC': 101 | model.bert.embeddings.token_type_embeddings = torch.nn.Embedding(6, 768) 102 | model.bert.embeddings.token_type_embeddings.weight.data.normal_(mean=0.0, std=0.02) 103 | 104 | args.data_dir = os.path.join(args.data_dir, task_name) 105 | args.output_dir = os.path.join(args.output_dir, task_name) 106 | os.makedirs(args.output_dir, exist_ok=True) 107 | 108 | train_examples = processor.get_train_examples(args.data_dir) 109 | train_features, num_train_steps, train_dataloader = \ 110 | cbert_utils.construct_train_dataloader(train_examples, label_list, args.max_seq_length, 111 | args.train_batch_size, args.num_train_epochs, tokenizer, device) 112 | 113 | ## if you have a GPU, put everything on cuda 114 | model.cuda() 115 | 116 | logger.info("***** Running training *****") 117 | logger.info(" Num examples = %d", len(train_features)) 118 | logger.info(" Batch size = %d", args.train_batch_size) 119 | logger.info(" Num steps = %d", num_train_steps) 120 | 121 | ## in Transformers, optimizer and schedules are splitted and instantiated like this: 122 | param_optimizer = list(model.named_parameters()) 123 | no_decay = ['bias', 'gamma', 'beta'] 124 | optimizer_grounded_parameters = [ 125 | {'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)], 'weight_decay_rate': 0.01}, 126 | {'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay_rate': 0.0} 127 | ] 128 | optimizer = AdamW(optimizer_grounded_parameters, lr=args.learning_rate, correct_bias=False) 129 | model.train() 130 | 131 | os.makedirs(args.save_model_dir, exist_ok=True) 132 | save_model_dir = os.path.join(args.save_model_dir, task_name) 133 | if not os.path.exists(save_model_dir): 134 | os.mkdir(save_model_dir) 135 | 136 | for e in trange(int(args.num_train_epochs), desc="Epoch"): 137 | avg_loss = 0. 138 | 139 | for step, batch in enumerate(train_dataloader): 140 | batch = tuple(t.cuda() for t in batch) 141 | _, input_ids, input_mask, segment_ids, masked_ids = batch 142 | """train generator at each batch""" 143 | optimizer.zero_grad() 144 | outputs = model(input_ids, input_mask, segment_ids, 145 | masked_lm_labels=masked_ids) 146 | loss = outputs[0] 147 | loss.backward() 148 | avg_loss += loss.item() 149 | optimizer.step() 150 | if (step + 1) % 50 == 0: 151 | print("avg_loss: {}".format(avg_loss / 50)) 152 | avg_loss = 0 153 | if args.save_every_epoch: 154 | save_model_name = "BertForMaskedLM_" + task_name + "_epoch_" + str(e + 1) 155 | save_model_path = os.path.join(save_model_dir, save_model_name) 156 | torch.save(model, save_model_path) 157 | else: 158 | if (e + 1) % 10 == 0: 159 | save_model_name = "BertForMaskedLM_" + task_name + "_epoch_" + str(e + 1) 160 | save_model_path = os.path.join(save_model_dir, save_model_name) 161 | torch.save(model, save_model_path) 162 | 163 | if __name__ == "__main__": 164 | main() -------------------------------------------------------------------------------- /cbert_augdata.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import csv 6 | import os 7 | import shutil 8 | import logging 9 | import argparse 10 | import random 11 | from tqdm import tqdm, trange 12 | import json 13 | 14 | import numpy as np 15 | import torch 16 | from torch.utils.data import TensorDataset, DataLoader, RandomSampler 17 | 18 | from transformers import BertTokenizer, BertModel, BertForMaskedLM, AdamW 19 | 20 | import cbert_utils 21 | import train_text_classifier 22 | 23 | #PYTORCH_PRETRAINED_BERT_CACHE = ".pytorch_pretrained_bert" 24 | 25 | """initialize logger""" 26 | logging.basicConfig(format='%(asctime)s - %(levelname)s - %(name)s - %(message)s', 27 | datefmt='%m/%d/%Y %H:%M:%S', 28 | level=logging.INFO) 29 | logger = logging.getLogger(__name__) 30 | 31 | """cuda or cpu""" 32 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 33 | 34 | def convert_ids_to_str(ids, tokenizer): 35 | """converts token_ids into str.""" 36 | tokens = [] 37 | for token_id in ids: 38 | token = tokenizer._convert_id_to_token(token_id) 39 | tokens.append(token) 40 | outputs = cbert_utils.rev_wordpiece(tokens) 41 | return outputs 42 | 43 | def main(): 44 | parser = argparse.ArgumentParser() 45 | 46 | ## required parameters 47 | parser.add_argument("--data_dir", default="datasets", type=str, 48 | help="The input data dir. Should contain the .tsv files (or other data files) for the task.") 49 | parser.add_argument("--output_dir", default="aug_data", type=str, 50 | help="The output dir for augmented dataset") 51 | parser.add_argument("--save_model_dir", default="cbert_model", type=str, 52 | help="The cache dir for saved model.") 53 | parser.add_argument("--bert_model", default="bert-base-uncased", type=str, 54 | help="The path of pretrained bert model.") 55 | parser.add_argument("--task_name", default="subj",type=str, 56 | help="The name of the task to train.") 57 | parser.add_argument("--max_seq_length", default=64, type=int, 58 | help="The maximum total input sequence length after WordPiece tokenization. \n" 59 | "Sequences longer than this will be truncated, and sequences shorter \n" 60 | "than this will be padded.") 61 | parser.add_argument("--do_lower_case", default=False, action='store_true', 62 | help="Set this flag if you are using an uncased model.") 63 | parser.add_argument("--train_batch_size", default=32, type=int, 64 | help="Total batch size for training.") 65 | parser.add_argument("--learning_rate", default=5e-5, type=float, 66 | help="The initial learning rate for Adam.") 67 | parser.add_argument("--num_train_epochs", default=9.0, type=float, 68 | help="Total number of training epochs to perform.") 69 | parser.add_argument("--warmup_proportion", default=0.1, type=float, 70 | help="Proportion of training to perform linear learning rate warmup for. " 71 | "E.g., 0.1 = 10%% of training.") 72 | parser.add_argument('--seed', default=42, type=int, 73 | help="random seed for initialization") 74 | parser.add_argument('--sample_num', default=1, type=int, 75 | help="sample number") 76 | parser.add_argument('--sample_ratio', default=7, type=int, 77 | help="sample ratio") 78 | parser.add_argument('--gpu', default=0, type=int, 79 | help="gpu id") 80 | parser.add_argument('--temp', default=1.0, type=float, 81 | help="temperature") 82 | 83 | 84 | args = parser.parse_args() 85 | with open("global.config", 'r') as f: 86 | configs_dict = json.load(f) 87 | 88 | args.task_name = configs_dict.get("dataset") 89 | args.output_dir = args.output_dir + '_{}_{}_{}_{}'.format(args.sample_num, args.sample_ratio, args.gpu, args.temp) 90 | print(args) 91 | 92 | """prepare processors""" 93 | AugProcessor = cbert_utils.AugProcessor() 94 | processors = { 95 | ## you can add your processor here 96 | "TREC": AugProcessor, 97 | "stsa.fine": AugProcessor, 98 | "stsa.binary": AugProcessor, 99 | "mpqa": AugProcessor, 100 | "rt-polarity": AugProcessor, 101 | "subj": AugProcessor, 102 | } 103 | 104 | task_name = args.task_name 105 | if task_name not in processors: 106 | raise ValueError("Task not found: %s" % (task_name)) 107 | processor = processors[task_name] 108 | label_list = processor.get_labels(task_name) 109 | 110 | ## prepare for model 111 | random.seed(args.seed) 112 | np.random.seed(args.seed) 113 | torch.manual_seed(args.seed) 114 | tokenizer = BertTokenizer.from_pretrained(args.bert_model, do_lower_case=args.do_lower_case) 115 | 116 | def load_model(model_name): 117 | weights_path = os.path.join(args.save_model_dir, model_name) 118 | model = torch.load(weights_path) 119 | return model 120 | 121 | args.data_dir = os.path.join(args.data_dir, task_name) 122 | args.output_dir = os.path.join(args.output_dir, task_name) 123 | if os.path.exists(args.output_dir): 124 | shutil.rmtree(args.output_dir) 125 | shutil.copytree("aug_data/{}".format(task_name), args.output_dir) 126 | 127 | ## prepare for training 128 | train_examples = processor.get_train_examples(args.data_dir) 129 | train_features, num_train_steps, train_dataloader = \ 130 | cbert_utils.construct_train_dataloader(train_examples, label_list, args.max_seq_length, 131 | args.train_batch_size, args.num_train_epochs, tokenizer, device) 132 | 133 | logger.info("***** Running training *****") 134 | logger.info(" Num examples = %d", len(train_examples)) 135 | logger.info(" Batch size = %d", args.train_batch_size) 136 | logger.info(" Num steps = %d", num_train_steps) 137 | 138 | save_model_dir = os.path.join(args.save_model_dir, task_name) 139 | if not os.path.exists(save_model_dir): 140 | os.mkdir(save_model_dir) 141 | MASK_id = cbert_utils.convert_tokens_to_ids(['[MASK]'], tokenizer)[0] 142 | 143 | origin_train_path = os.path.join(args.output_dir, "train_origin.tsv") 144 | save_train_path = os.path.join(args.output_dir, "train.tsv") 145 | shutil.copy(origin_train_path, save_train_path) 146 | best_test_acc = train_text_classifier.train("aug_data_{}_{}_{}_{}".format(args.sample_num, args.sample_ratio, args.gpu, args.temp)) 147 | print("before augment best acc:{}".format(best_test_acc)) 148 | 149 | for e in trange(int(args.num_train_epochs), desc="Epoch"): 150 | torch.cuda.empty_cache() 151 | cbert_name = "{}/BertForMaskedLM_{}_epoch_{}".format(task_name.lower(), task_name.lower(), e+1) 152 | model = load_model(cbert_name) 153 | model.cuda() 154 | shutil.copy(origin_train_path, save_train_path) 155 | save_train_file = open(save_train_path, 'a') 156 | tsv_writer = csv.writer(save_train_file, delimiter='\t') 157 | for _, batch in enumerate(train_dataloader): 158 | model.eval() 159 | batch = tuple(t.cuda() for t in batch) 160 | init_ids, _, input_mask, segment_ids, _ = batch 161 | input_lens = [sum(mask).item() for mask in input_mask] 162 | masked_idx = np.squeeze([np.random.randint(0, l, max(l//args.sample_ratio, 1)) for l in input_lens]) 163 | for ids, idx in zip(init_ids, masked_idx): 164 | ids[idx] = MASK_id 165 | predictions = model(init_ids, input_mask, segment_ids) 166 | predictions = torch.nn.functional.softmax(predictions[0]/args.temp, dim=2) 167 | for ids, idx, preds, seg in zip(init_ids, masked_idx, predictions, segment_ids): 168 | preds = torch.multinomial(preds, args.sample_num, replacement=True)[idx] 169 | if len(preds.size()) == 2: 170 | preds = torch.transpose(preds, 0, 1) 171 | for pred in preds: 172 | ids[idx] = pred 173 | new_str = convert_ids_to_str(ids.cpu().numpy(), tokenizer) 174 | tsv_writer.writerow([new_str, seg[0].item()]) 175 | torch.cuda.empty_cache() 176 | predictions = predictions.detach().cpu() 177 | model.cpu() 178 | torch.cuda.empty_cache() 179 | bak_train_path = os.path.join(args.output_dir, "train_epoch_{}.tsv".format(e)) 180 | shutil.copy(save_train_path, bak_train_path) 181 | best_test_acc = train_text_classifier.train("aug_data_{}_{}_{}_{}".format(args.sample_num, args.sample_ratio, args.gpu, args.temp)) 182 | print("epoch {} augment best acc:{}".format(e, best_test_acc)) 183 | 184 | if __name__ == "__main__": 185 | main() 186 | -------------------------------------------------------------------------------- /text_classification/nets.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | 3 | import chainer 4 | import chainer.functions as F 5 | import chainer.links as L 6 | from chainer import reporter 7 | 8 | embed_init = chainer.initializers.Uniform(.25) 9 | 10 | 11 | def sequence_embed(embed, xs, dropout=0.): 12 | """Efficient embedding function for variable-length sequences 13 | 14 | This output is equally to 15 | "return [F.dropout(embed(x), ratio=dropout) for x in xs]". 16 | However, calling the functions is one-shot and faster. 17 | 18 | Args: 19 | embed (callable): A :func:`~chainer.functions.embed_id` function 20 | or :class:`~chainer.links.EmbedID` link. 21 | xs (list of :class:`~chainer.Variable` or :class:`numpy.ndarray` or \ 22 | :class:`cupy.ndarray`): i-th element in the list is an input variable, 23 | which is a :math:`(L_i, )`-shaped int array. 24 | dropout (float): Dropout ratio. 25 | 26 | Returns: 27 | list of ~chainer.Variable: Output variables. i-th element in the 28 | list is an output variable, which is a :math:`(L_i, N)`-shaped 29 | float array. :math:`(N)` is the number of dimensions of word embedding. 30 | 31 | """ 32 | x_len = [len(x) for x in xs] 33 | x_section = numpy.cumsum(x_len[:-1]) 34 | ex = embed(F.concat(xs, axis=0)) 35 | ex = F.dropout(ex, ratio=dropout) 36 | exs = F.split_axis(ex, x_section, 0) 37 | return exs 38 | 39 | 40 | def block_embed(embed, x, dropout=0.): 41 | """Embedding function followed by convolution 42 | 43 | Args: 44 | embed (callable): A :func:`~chainer.functions.embed_id` function 45 | or :class:`~chainer.links.EmbedID` link. 46 | x (:class:`~chainer.Variable` or :class:`numpy.ndarray` or \ 47 | :class:`cupy.ndarray`): Input variable, which 48 | is a :math:`(B, L)`-shaped int array. Its first dimension 49 | :math:`(B)` is assumed to be the *minibatch dimension*. 50 | The second dimension :math:`(L)` is the length of padded 51 | sentences. 52 | dropout (float): Dropout ratio. 53 | 54 | Returns: 55 | ~chainer.Variable: Output variable. A float array with shape 56 | of :math:`(B, N, L, 1)`. :math:`(N)` is the number of dimensions 57 | of word embedding. 58 | 59 | """ 60 | e = embed(x) 61 | e = F.dropout(e, ratio=dropout) 62 | e = F.transpose(e, (0, 2, 1)) 63 | e = e[:, :, :, None] 64 | return e 65 | 66 | 67 | class TextClassifier(chainer.Chain): 68 | 69 | """A classifier using a given encoder. 70 | 71 | This chain encodes a sentence and classifies it into classes. 72 | 73 | Args: 74 | encoder (Link): A callable encoder, which extracts a feature. 75 | Input is a list of variables whose shapes are 76 | "(sentence_length, )". 77 | Output is a variable whose shape is "(batchsize, n_units)". 78 | n_class (int): The number of classes to be predicted. 79 | 80 | """ 81 | 82 | def __init__(self, encoder, n_class, dropout=0.1): 83 | super(TextClassifier, self).__init__() 84 | with self.init_scope(): 85 | self.encoder = encoder 86 | self.output = L.Linear(encoder.out_units, n_class) 87 | self.dropout = dropout 88 | 89 | def __call__(self, xs, ys=None): 90 | if ys is None: 91 | xs, ys = xs 92 | concat_outputs = self.predict(xs, ys=ys) 93 | concat_truths = F.concat(ys, axis=0) 94 | 95 | loss = F.softmax_cross_entropy(concat_outputs, concat_truths) 96 | accuracy = F.accuracy(concat_outputs, concat_truths) 97 | reporter.report({'loss': loss.data}, self) 98 | reporter.report({'accuracy': accuracy.data}, self) 99 | return loss 100 | 101 | def predict(self, xs, ys=None, softmax=False, argmax=False): 102 | concat_encodings = F.dropout(self.encoder(xs, labels=ys), 103 | ratio=self.dropout) 104 | concat_outputs = self.output(concat_encodings) 105 | if softmax: 106 | return F.softmax(concat_outputs).data 107 | elif argmax: 108 | return self.xp.argmax(concat_outputs.data, axis=1) 109 | else: 110 | return concat_outputs 111 | 112 | 113 | class RNNEncoder(chainer.Chain): 114 | 115 | """A LSTM-RNN Encoder with Word Embedding. 116 | 117 | This model encodes a sentence sequentially using LSTM. 118 | 119 | Args: 120 | n_layers (int): The number of LSTM layers. 121 | n_vocab (int): The size of vocabulary. 122 | n_units (int): The number of units of a LSTM layer and word embedding. 123 | dropout (float): The dropout ratio. 124 | 125 | """ 126 | 127 | def __init__(self, n_layers, n_vocab, n_units, dropout=0.1): 128 | super(RNNEncoder, self).__init__( 129 | embed=L.EmbedID(n_vocab, n_units, 130 | initialW=embed_init), 131 | encoder=L.NStepLSTM(n_layers, n_units, n_units, dropout), 132 | ) 133 | self.n_layers = n_layers 134 | self.out_units = n_units 135 | self.dropout = dropout 136 | self.use_predict_embed = False 137 | 138 | def __call__(self, xs, labels=None): 139 | exs = sequence_embed(self.embed, xs, self.dropout) 140 | if self.use_predict_embed and chainer.config.train: 141 | exs = self.embed.embed_xs_with_prediction( 142 | xs, labels=labels, batch='list') 143 | last_h, last_c, ys = self.encoder(None, None, exs) 144 | assert(last_h.shape == (self.n_layers, len(xs), self.out_units)) 145 | concat_outputs = last_h[-1] 146 | return concat_outputs 147 | 148 | 149 | class CNNEncoder(chainer.Chain): 150 | 151 | """A CNN encoder with word embedding. 152 | 153 | This model encodes a sentence as a set of n-gram chunks 154 | using convolutional filters. 155 | Following the convolution, max-pooling is applied over time. 156 | Finally, the output is fed into a multilayer perceptron. 157 | 158 | Args: 159 | n_layers (int): The number of layers of MLP. 160 | n_vocab (int): The size of vocabulary. 161 | n_units (int): The number of units of MLP and word embedding. 162 | dropout (float): The dropout ratio. 163 | 164 | """ 165 | 166 | def __init__(self, n_layers, n_vocab, n_units, dropout=0.1): 167 | out_units = n_units // 3 168 | super(CNNEncoder, self).__init__( 169 | embed=L.EmbedID(n_vocab, n_units, ignore_label=-1, 170 | initialW=embed_init), 171 | cnn_w3=L.Convolution2D( 172 | n_units, out_units, ksize=(3, 1), stride=1, pad=(2, 0), 173 | nobias=True), 174 | cnn_w4=L.Convolution2D( 175 | n_units, out_units, ksize=(4, 1), stride=1, pad=(3, 0), 176 | nobias=True), 177 | cnn_w5=L.Convolution2D( 178 | n_units, out_units, ksize=(5, 1), stride=1, pad=(4, 0), 179 | nobias=True), 180 | mlp=MLP(n_layers, out_units * 3, dropout) 181 | ) 182 | self.out_units = out_units * 3 183 | self.dropout = dropout 184 | self.use_predict_embed = False 185 | 186 | def __call__(self, xs, labels=None): 187 | x_block = chainer.dataset.convert.concat_examples(xs, padding=-1) 188 | ex_block = block_embed(self.embed, x_block, self.dropout) 189 | if self.use_predict_embed and chainer.config.train: 190 | ex_block = self.embed.embed_xs_with_prediction( 191 | xs, labels=labels, batch='concat') 192 | h_w3 = F.max(self.cnn_w3(ex_block), axis=2) 193 | h_w4 = F.max(self.cnn_w4(ex_block), axis=2) 194 | h_w5 = F.max(self.cnn_w5(ex_block), axis=2) 195 | h = F.concat([h_w3, h_w4, h_w5], axis=1) 196 | h = F.relu(h) 197 | h = F.dropout(h, ratio=self.dropout) 198 | h = self.mlp(h) 199 | return h 200 | 201 | 202 | class MLP(chainer.ChainList): 203 | 204 | """A multilayer perceptron. 205 | 206 | Args: 207 | n_vocab (int): The size of vocabulary. 208 | n_units (int): The number of units in a hidden or output layer. 209 | dropout (float): The dropout ratio. 210 | 211 | """ 212 | 213 | def __init__(self, n_layers, n_units, dropout=0.1): 214 | super(MLP, self).__init__() 215 | for i in range(n_layers): 216 | self.add_link(L.Linear(None, n_units)) 217 | self.dropout = dropout 218 | self.out_units = n_units 219 | 220 | def __call__(self, x): 221 | for i, link in enumerate(self.children()): 222 | x = F.dropout(x, ratio=self.dropout) 223 | x = F.relu(link(x)) 224 | return x 225 | 226 | 227 | class BOWEncoder(chainer.Chain): 228 | 229 | """A BoW encoder with word embedding. 230 | 231 | This model encodes a sentence as just a set of words by averaging. 232 | 233 | Args: 234 | n_vocab (int): The size of vocabulary. 235 | n_units (int): The number of units of word embedding. 236 | dropout (float): The dropout ratio. 237 | 238 | """ 239 | 240 | def __init__(self, n_vocab, n_units, dropout=0.1): 241 | super(BOWEncoder, self).__init__( 242 | embed=L.EmbedID(n_vocab, n_units, ignore_label=-1, 243 | initialW=embed_init), 244 | ) 245 | self.out_units = n_units 246 | self.dropout = dropout 247 | 248 | def __call__(self, xs): 249 | x_block = chainer.dataset.convert.concat_examples(xs, padding=-1) 250 | ex_block = block_embed(self.embed, x_block) 251 | x_len = self.xp.array([len(x) for x in xs], 'i')[:, None, None] 252 | h = F.sum(ex_block, axis=2) / x_len 253 | return h 254 | 255 | 256 | class BOWMLPEncoder(chainer.Chain): 257 | 258 | """A BOW encoder with word embedding and MLP. 259 | 260 | This model encodes a sentence as just a set of words by averaging. 261 | Additionally, its output is fed into a multilayer perceptron. 262 | 263 | Args: 264 | n_layers (int): The number of layers of MLP. 265 | n_vocab (int): The size of vocabulary. 266 | n_units (int): The number of units of MLP and word embedding. 267 | dropout (float): The dropout ratio. 268 | 269 | """ 270 | 271 | def __init__(self, n_layers, n_vocab, n_units, dropout=0.1): 272 | super(BOWMLPEncoder, self).__init__( 273 | bow_encoder=BOWEncoder(n_vocab, n_units, dropout), 274 | mlp_encoder=MLP(n_layers, n_units, dropout) 275 | ) 276 | self.out_units = n_units 277 | 278 | def __call__(self, xs): 279 | h = self.bow_encoder(xs) 280 | h = self.mlp_encoder(h) 281 | return h 282 | -------------------------------------------------------------------------------- /cbert_utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import csv 3 | import logging 4 | import random 5 | 6 | import torch 7 | from torch.utils.data import TensorDataset, DataLoader, RandomSampler 8 | 9 | """initialize logger""" 10 | logging.basicConfig(format='%(asctime)s - %(levelname)s - %(name)s - %(message)s', 11 | datefmt='%m/%d/%Y %H:%M:%S', 12 | level=logging.INFO) 13 | logger = logging.getLogger(__name__) 14 | 15 | class InputExample(object): 16 | """A single training/test example for simple sequence classification.""" 17 | def __init__(self, guid, text_a, label=None): 18 | """Constructs a InputExample/ 19 | 20 | Args: 21 | guid: Unique id for the example. 22 | text: string. The untokenized text of the first sequence. For single 23 | sequence tasks, only this sequence must be specified. 24 | """ 25 | self.guid = guid 26 | self.text_a = text_a 27 | self.label = label 28 | 29 | class InputFeature(object): 30 | """A single set of features of data.""" 31 | 32 | def __init__(self, init_ids, input_ids, input_mask, segment_ids, masked_lm_labels): 33 | self.init_ids = init_ids 34 | self.input_ids = input_ids 35 | self.input_mask = input_mask 36 | self.segment_ids = segment_ids 37 | self.masked_lm_labels = masked_lm_labels 38 | 39 | class DataProcessor(object): 40 | """Base class for data converters for sequence classification data sets.""" 41 | 42 | def get_train_examples(self, data_dir): 43 | """Gets a collection of 'InputExample's for the train set.""" 44 | raise NotImplementedError() 45 | 46 | def get_dev_examples(self, data_dir): 47 | """Gets a collection of 'InputExample's for the dev set.""" 48 | raise NotImplementedError() 49 | 50 | def get_labels(self): 51 | """Gets the list of labels for this data set.""" 52 | raise NotImplementedError() 53 | 54 | @classmethod 55 | def _read_tsv(cls, input_file, quotechar=None): 56 | """Reads a tab separated value file.""" 57 | with open(input_file, "r") as f: 58 | reader = csv.reader(f, delimiter="\t", quotechar=quotechar) 59 | lines = [] 60 | for line in reader: 61 | lines.append(line) 62 | return lines 63 | 64 | class AugProcessor(DataProcessor): 65 | """Processor for dataset to be augmented.""" 66 | 67 | def get_train_examples(self, data_dir): 68 | """See base calss.""" 69 | return self._create_examples( 70 | self._read_tsv(os.path.join(data_dir, "train.tsv")), "train") 71 | 72 | def get_dev_examples(self, data_dir): 73 | """See base class.""" 74 | return self._create_examples( 75 | self._read_tsv(os.path.join(data_dir, "dev.tsv")), "dev") 76 | 77 | def get_labels(self, name): 78 | """add your dataset here""" 79 | if name in ['stsa.binary', 'mpqa', 'rt-polarity', 'subj']: 80 | return ["0", "1"] 81 | elif name in ['stsa.fine']: 82 | return ["0", "1", "2", "3", "4"] 83 | elif name in ['TREC']: 84 | return ["0", "1", "2", "3", "4", "5"] 85 | 86 | def _create_examples(self, lines, set_type): 87 | """Create examples for the training and dev sets.""" 88 | examples = [] 89 | for (i, line) in enumerate(lines): 90 | if i == 0: 91 | continue 92 | guid = "%s-%s" % (set_type, i) 93 | text_a = line[0] 94 | label = line[-1] 95 | examples.append( 96 | InputExample(guid=guid, text_a=text_a, label=label)) 97 | return examples 98 | 99 | def convert_examples_to_features(examples, label_list, max_seq_length, tokenizer): 100 | """Loads a data file into a list of 'InputBatch's.""" 101 | 102 | label_map = {} 103 | for (i, label) in enumerate(label_list): 104 | label_map[label] = i 105 | 106 | features = [] 107 | for (ex_index, example) in enumerate(examples): 108 | # The convention in BERT is: 109 | # tokens: [CLS] is this jack ##son ##ville ? [SEP] 110 | # type_ids: 0 0 0 0 0 0 0 0 111 | tokens_a = tokenizer._tokenize(example.text_a) 112 | tokens_label = label_map[example.label] 113 | tokens, init_ids, input_ids, input_mask, segment_ids, masked_lm_labels = \ 114 | extract_features(tokens_a, tokens_label, max_seq_length, tokenizer) 115 | 116 | """convert label to label_id""" 117 | label_id = label_map[example.label] 118 | 119 | """consturct features""" 120 | features.append( 121 | InputFeature( 122 | init_ids=init_ids, 123 | input_ids=input_ids, 124 | input_mask=input_mask, 125 | segment_ids=segment_ids, 126 | masked_lm_labels=masked_lm_labels)) 127 | 128 | """print examples""" 129 | if ex_index < 5: 130 | logger.info("[cbert] *** Example ***") 131 | logger.info("[cbert] guid: %s" % (example.guid)) 132 | logger.info("[cbert] tokens: %s" % " ".join( 133 | [str(x) for x in tokens])) 134 | logger.info("[cbert] init_ids: %s" % " ".join([str(x) for x in init_ids])) 135 | logger.info("[cbert] input_ids: %s" % " ".join([str(x) for x in input_ids])) 136 | logger.info("[cbert] input_mask: %s" % " ".join([str(x) for x in input_mask])) 137 | logger.info("[cbert] segment_ids: %s" % " ".join([str(x) for x in segment_ids])) 138 | logger.info("[cbert] masked_lm_labels: %s" % " ".join([str(x) for x in masked_lm_labels])) 139 | return features 140 | 141 | def construct_train_dataloader(train_examples, label_list, max_seq_length, train_batch_size, num_train_epochs, tokenizer, device): 142 | """construct dataloader for training data""" 143 | 144 | num_train_steps = None 145 | global_step = 0 146 | train_features = convert_examples_to_features( 147 | train_examples, label_list, max_seq_length, tokenizer) 148 | num_train_steps = int(len(train_features) / train_batch_size * num_train_epochs) 149 | 150 | all_init_ids = torch.tensor([f.init_ids for f in train_features], dtype=torch.long, device=device) 151 | all_input_ids = torch.tensor([f.input_ids for f in train_features], dtype=torch.long, device=device) 152 | all_input_mask = torch.tensor([f.input_mask for f in train_features], dtype=torch.long, device=device) 153 | all_segment_ids = torch.tensor([f.segment_ids for f in train_features], dtype=torch.long, device=device) 154 | all_masked_lm_labels = torch.tensor([f.masked_lm_labels for f in train_features], dtype=torch.long, device=device) 155 | 156 | tensor_dataset = TensorDataset(all_init_ids, all_input_ids, all_input_mask, 157 | all_segment_ids, all_masked_lm_labels) 158 | train_sampler = RandomSampler(tensor_dataset) 159 | train_dataloader = DataLoader(tensor_dataset, sampler=train_sampler, batch_size=train_batch_size) 160 | return train_features, num_train_steps, train_dataloader 161 | 162 | def rev_wordpiece(str): 163 | """wordpiece function used in cbert""" 164 | 165 | #print(str) 166 | if len(str) > 1: 167 | for i in range(len(str)-1, 0, -1): 168 | if str[i] == '[PAD]': 169 | str.remove(str[i]) 170 | elif len(str[i]) > 1 and str[i][0]=='#' and str[i][1]=='#': 171 | str[i-1] += str[i][2:] 172 | str.remove(str[i]) 173 | return " ".join(str[1:-1]) 174 | 175 | 176 | def extract_features(tokens_a, tokens_label, max_seq_length, tokenizer): 177 | """extract features from tokens""" 178 | 179 | if len(tokens_a) > max_seq_length - 2: 180 | tokens_a = tokens_a[0: (max_seq_length - 2)] 181 | 182 | tokens = [] 183 | segment_ids = [] 184 | tokens.append('[CLS]') 185 | segment_ids.append(tokens_label) 186 | for token in tokens_a: 187 | tokens.append(token) 188 | segment_ids.append(tokens_label) 189 | tokens.append('[SEP]') 190 | segment_ids.append(tokens_label) 191 | 192 | ## construct init_ids for each example 193 | init_ids = convert_tokens_to_ids(tokens, tokenizer) 194 | 195 | ## construct input_ids for each example, we replace the word_id using 196 | ## the ids of masked words (mask words based on original sentence) 197 | masked_lm_probs = 0.15 198 | max_predictions_per_seq = 20 199 | rng = random.Random(12345) 200 | original_masked_lm_labels = [-1] * max_seq_length 201 | (output_tokens, masked_lm_positions, 202 | masked_lm_labels) = create_masked_lm_predictions( 203 | tokens, masked_lm_probs, original_masked_lm_labels, max_predictions_per_seq, rng, tokenizer) 204 | input_ids = convert_tokens_to_ids(output_tokens, tokenizer) 205 | 206 | # The mask has 1 for real tokens and 0 for padding tokens. Only real 207 | # tokens are attended to. 208 | input_mask = [1] * len(input_ids) 209 | 210 | # Zero-pad up to the sequence length. 211 | while len(input_ids) < max_seq_length: 212 | init_ids.append(0) 213 | input_ids.append(0) 214 | input_mask.append(0) 215 | segment_ids.append(0) 216 | 217 | assert len(init_ids) == max_seq_length 218 | assert len(input_ids) == max_seq_length 219 | assert len(input_mask) == max_seq_length 220 | assert len(segment_ids) == max_seq_length 221 | 222 | return tokens, init_ids, input_ids, input_mask, segment_ids, masked_lm_labels 223 | 224 | def convert_tokens_to_ids(tokens, tokenizer): 225 | """Converts tokens into ids using the vocab.""" 226 | ids = [] 227 | for token in tokens: 228 | token_id = tokenizer._convert_token_to_id(token) 229 | ids.append(token_id) 230 | return ids 231 | 232 | def create_masked_lm_predictions(tokens, masked_lm_probs, masked_lm_labels, 233 | max_predictions_per_seq, rng, tokenizer): 234 | """Creates the predictions for the masked LM objective.""" 235 | 236 | #vocab_words = list(tokenizer.vocab.keys()) 237 | 238 | cand_indexes = [] 239 | for (i, token) in enumerate(tokens): 240 | if token == "[CLS]" or token == "[SEP]": 241 | continue 242 | cand_indexes.append(i) 243 | 244 | rng.shuffle(cand_indexes) 245 | len_cand = len(cand_indexes) 246 | output_tokens = list(tokens) 247 | num_to_predict = min(max_predictions_per_seq, 248 | max(1, int(round(len(tokens) * masked_lm_probs)))) 249 | 250 | masked_lm_positions = [] 251 | covered_indexes = set() 252 | for index in cand_indexes: 253 | if len(masked_lm_positions) >= num_to_predict: 254 | break 255 | if index in covered_indexes: 256 | continue 257 | covered_indexes.add(index) 258 | 259 | masked_token = None 260 | ## 80% of the time, replace with [MASK] 261 | if rng.random() < 0.8: 262 | masked_token = "[MASK]" 263 | else: 264 | ## 10% of the time, keep original 265 | if rng.random() < 0.5: 266 | masked_token = tokens[index] 267 | ## 10% of the time, replace with random word 268 | else: 269 | masked_token = tokens[cand_indexes[rng.randint(0, len_cand - 1)]] 270 | 271 | masked_lm_labels[index] = convert_tokens_to_ids([tokens[index]], tokenizer)[0] 272 | output_tokens[index] = masked_token 273 | masked_lm_positions.append(index) 274 | return output_tokens, masked_lm_positions, masked_lm_labels -------------------------------------------------------------------------------- /nets.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """Sample script of recurrent neural network language model. 3 | 4 | This code is ported from the following implementation written in Torch. 5 | https://github.com/tomsercu/lstm 6 | 7 | """ 8 | from __future__ import division 9 | from __future__ import print_function 10 | import argparse 11 | import json 12 | import warnings 13 | 14 | import numpy as np 15 | 16 | import chainer 17 | from chainer import cuda 18 | import chainer.functions as F 19 | import chainer.links as L 20 | from chainer import training 21 | from chainer.training import extensions 22 | from chainer import reporter 23 | 24 | embed_init = chainer.initializers.Uniform(.25) 25 | 26 | 27 | def embed_seq_batch(embed, seq_batch, dropout=0., context=None): 28 | x_len = [len(seq) for seq in seq_batch] 29 | x_section = np.cumsum(x_len[:-1]) 30 | ex = embed(F.concat(seq_batch, axis=0)) 31 | ex = F.dropout(ex, dropout) 32 | if context is not None: 33 | ids = [embed.xp.full((l, ), i).astype('i') 34 | for i, l in enumerate(x_len)] 35 | ids = embed.xp.concatenate(ids, axis=0) 36 | cx = F.embed_id(ids, context) 37 | ex = F.concat([ex, cx], axis=1) 38 | exs = F.split_axis(ex, x_section, 0) 39 | return exs 40 | 41 | 42 | class NormalOutputLayer(L.Linear): 43 | 44 | def __init__(self, *args, **kwargs): 45 | super(NormalOutputLayer, self).__init__(*args, **kwargs) 46 | 47 | def output_and_loss(self, h, t, reduce='mean'): 48 | logit = self(h) 49 | return F.softmax_cross_entropy( 50 | logit, t, normalize=False, reduce=reduce) 51 | 52 | def output(self, h, t=None): 53 | return self(h) 54 | 55 | 56 | class MLP(chainer.Chain): 57 | def __init__(self, n_hidden, in_units, hidden_units, out_units, dropout=0.): 58 | super(MLP, self).__init__() 59 | with self.init_scope(): 60 | self.l1 = L.Linear(in_units, hidden_units) 61 | self.lo = L.Linear(hidden_units, out_units) 62 | for i in range(2, n_hidden + 2): 63 | setattr(self, 'l{}'.format(i), 64 | L.Linear(hidden_units, hidden_units)) 65 | self.n_hidden = n_hidden 66 | self.dropout = dropout 67 | 68 | def __call__(self, x, label=None): 69 | x = self.l1(x) 70 | for i in range(2, self.n_hidden + 2): 71 | x = F.relu(x) 72 | x = F.dropout(x, self.dropout) 73 | x = getattr(self, 'l{}'.format(i))(x) 74 | x = F.relu(x) 75 | x = F.dropout(x, self.dropout) 76 | x = self.lo(x) 77 | x = F.relu(x) 78 | if hasattr(self, 'l1_label') and label is not None: 79 | x += self.l1_label(label) 80 | return x 81 | 82 | 83 | class BiLanguageModel(chainer.Chain): 84 | 85 | def __init__(self, n_vocab, n_units, n_layers=2, dropout=0.5): 86 | super(BiLanguageModel, self).__init__() 87 | with self.init_scope(): 88 | self.embed = L.EmbedID(n_vocab, n_units) 89 | RNN = L.NStepLSTM 90 | self.encoder_fw = RNN(n_layers, n_units, n_units, dropout) 91 | self.encoder_bw = RNN(n_layers, n_units, n_units, dropout) 92 | self.output = NormalOutputLayer(n_units, n_vocab) 93 | self.mlp = MLP(1, n_units * 2, n_units, n_units, dropout) 94 | self.dropout = dropout 95 | self.n_units = n_units 96 | self.n_layers = n_layers 97 | 98 | def add_label_condition_nets(self, n_labels, label_units): 99 | with self.init_scope(): 100 | self.mlp.add_link( 101 | 'l1_label', 102 | L.Linear(None, self.mlp.l1.b.size, nobias=True, 103 | initialW=chainer.initializers.Uniform(0.4))) 104 | self.n_labels = n_labels 105 | 106 | def encode(self, seq_batch, labels=None): 107 | seq_batch_wo_2bos = [seq[2::] for seq in seq_batch] 108 | revseq_batch_wo_2bos = [seq[::-1] for seq in seq_batch_wo_2bos] 109 | seq_batch_wo_2eos = [seq[:-2] for seq in seq_batch] 110 | bwe_seq_batch = self.embed_seq_batch(revseq_batch_wo_2bos) 111 | fwe_seq_batch = self.embed_seq_batch(seq_batch_wo_2eos) 112 | bwt_out_batch = self.encode_seq_batch( 113 | bwe_seq_batch, self.encoder_bw)[-1] 114 | fwt_out_batch = self.encode_seq_batch( 115 | fwe_seq_batch, self.encoder_fw)[-1] 116 | revbwt_concat = F.concat( 117 | [b[::-1] for b in bwt_out_batch], axis=0) 118 | fwt_concat = F.concat(fwt_out_batch, axis=0) 119 | t_out_concat = F.concat([fwt_concat, revbwt_concat], axis=1) 120 | t_out_concat = F.dropout(t_out_concat, self.dropout) 121 | if hasattr(self.mlp, 'l1_label') and labels is not None: 122 | labels = [[labels[i]] * f.shape[0] 123 | for i, f in enumerate(fwt_out_batch)] 124 | labels = self.xp.concatenate(sum(labels, []), axis=0) 125 | label_concat = self.xp.zeros( 126 | (t_out_concat.shape[0], self.n_labels)).astype('f') 127 | label_concat[self.xp.arange(len(labels)), labels] = 1. 128 | t_out_concat = self.mlp(t_out_concat, label_concat) 129 | else: 130 | t_out_concat = self.mlp(t_out_concat) 131 | return t_out_concat 132 | 133 | def embed_seq_batch(self, x_seq_batch, context=None): 134 | e_seq_batch = embed_seq_batch( 135 | self.embed, x_seq_batch, 136 | dropout=self.dropout, 137 | context=context) 138 | return e_seq_batch 139 | 140 | def encode_seq_batch(self, e_seq_batch, encoder): 141 | hs, cs, y_seq_batch = encoder(None, None, e_seq_batch) 142 | return hs, cs, y_seq_batch 143 | 144 | def calculate_loss(self, input_chain, **args): 145 | seq_batch = sum(input_chain, []) 146 | t_out_concat = self.encode(seq_batch) 147 | seq_batch_mid = [seq[1:-1] for seq in seq_batch] 148 | seq_mid_concat = F.concat(seq_batch_mid, axis=0) 149 | n_tok = sum(len(s) for s in seq_batch_mid) 150 | loss = self.output_and_loss_from_concat( 151 | t_out_concat, seq_mid_concat, 152 | normalize=n_tok) 153 | reporter.report({'perp': self.xp.exp(loss.data)}, self) 154 | return loss 155 | 156 | def output_and_loss_from_concat(self, y, t, normalize=None): 157 | y = F.dropout(y, ratio=self.dropout) 158 | loss = self.output.output_and_loss(y, t) 159 | if normalize is not None: 160 | loss *= 1. * t.shape[0] / normalize 161 | else: 162 | loss *= t.shape[0] 163 | return loss 164 | 165 | def calculate_loss_with_labels(self, seq_batch_with_labels): 166 | seq_batch, labels = seq_batch_with_labels 167 | t_out_concat = self.encode(seq_batch, labels=labels) 168 | seq_batch_mid = [seq[1:-1] for seq in seq_batch] 169 | seq_mid_concat = F.concat(seq_batch_mid, axis=0) 170 | n_tok = sum(len(s) for s in seq_batch_mid) 171 | loss = self.output_and_loss_from_concat( 172 | t_out_concat, seq_mid_concat, 173 | normalize=n_tok) 174 | reporter.report({'perp': self.xp.exp(loss.data)}, self) 175 | return loss 176 | 177 | def predict(self, xs, labels=None): 178 | with chainer.using_config('train', False), chainer.no_backprop_mode(): 179 | t_out_concat = self.encode(xs, labels=labels, add_original=0.) 180 | prob_concat = F.softmax(self.output.output(t_out_concat)).data 181 | x_len = [len(x) for x in xs] 182 | x_section = np.cumsum(x_len[:-1]) 183 | ps = np.split(cuda.to_cpu(prob_concat), x_section, 0) 184 | return ps 185 | 186 | def predict_embed(self, 187 | xs, embedW, 188 | labels=None, 189 | dropout=0., 190 | mode='sampling', 191 | temp=1., 192 | word_lower_bound=0., 193 | gold_lower_bound=0., 194 | gumbel=True, 195 | residual=0., 196 | wordwise=True, 197 | add_original=0., 198 | augment_ratio=0.25): 199 | x_len = [len(x) for x in xs] 200 | with chainer.using_config('train', False), chainer.no_backprop_mode(): 201 | t_out_concat = self.encode(xs, labels=labels) 202 | prob_concat = self.output.output(t_out_concat).data 203 | prob_concat /= temp 204 | prob_concat += self.xp.random.gumbel( 205 | size=prob_concat.shape).astype('f') 206 | prob_concat = F.softmax(prob_concat).data 207 | 208 | out_concat = F.embed_id( 209 | self.xp.argmax(prob_concat, axis=1).astype(np.int32), embedW) 210 | 211 | # insert eos 212 | eos = embedW[0][None] 213 | new_out = [] 214 | count = 0 215 | for i, x in enumerate(xs): 216 | new_out.append(eos) 217 | new_out.append(out_concat[count:count + len(xs) - 2]) 218 | new_out.append(eos) 219 | count += len(xs) - 2 220 | out_concat = F.concat(new_out, axis=0) 221 | 222 | def embed_func(x): return F.embed_id(x, embedW, ignore_label=-1) 223 | raw_concat = F.concat( 224 | sequence_embed(embed_func, xs, self.dropout), axis=0) 225 | b, u = raw_concat.shape 226 | 227 | mask = self.xp.broadcast_to( 228 | (self.xp.random.rand(b, 1) < augment_ratio), 229 | raw_concat.shape) 230 | out_concat = F.where(mask, out_concat, raw_concat) 231 | 232 | x_len = [len(x) for x in xs] 233 | x_section = np.cumsum(x_len[:-1]) 234 | out_concat = F.dropout(out_concat, dropout) 235 | exs = F.split_axis(out_concat, x_section, 0) 236 | return exs 237 | 238 | 239 | def sequence_embed(embed, xs, dropout=0.): 240 | """Efficient embedding function for variable-length sequences 241 | 242 | This output is equally to 243 | "return [F.dropout(embed(x), ratio=dropout) for x in xs]". 244 | However, calling the functions is one-shot and faster. 245 | 246 | Args: 247 | embed (callable): A :func:`~chainer.functions.embed_id` function 248 | or :class:`~chainer.links.EmbedID` link. 249 | xs (list of :class:`~chainer.Variable` or :class:`numpy.ndarray` or \ 250 | :class:`cupy.ndarray`): i-th element in the list is an input variable, 251 | which is a :math:`(L_i, )`-shaped int array. 252 | dropout (float): Dropout ratio. 253 | 254 | Returns: 255 | list of ~chainer.Variable: Output variables. i-th element in the 256 | list is an output variable, which is a :math:`(L_i, N)`-shaped 257 | float array. :math:`(N)` is the number of dimensions of word embedding. 258 | 259 | """ 260 | x_len = [len(x) for x in xs] 261 | x_section = np.cumsum(x_len[:-1]) 262 | ex = embed(F.concat(xs, axis=0)) 263 | ex = F.dropout(ex, ratio=dropout) 264 | exs = F.split_axis(ex, x_section, 0) 265 | return exs 266 | 267 | 268 | def block_embed(embed, x, dropout=0.): 269 | """Embedding function followed by convolution 270 | 271 | Args: 272 | embed (callable): A :func:`~chainer.functions.embed_id` function 273 | or :class:`~chainer.links.EmbedID` link. 274 | x (:class:`~chainer.Variable` or :class:`numpy.ndarray` or \ 275 | :class:`cupy.ndarray`): Input variable, which 276 | is a :math:`(B, L)`-shaped int array. Its first dimension 277 | :math:`(B)` is assumed to be the *minibatch dimension*. 278 | The second dimension :math:`(L)` is the length of padded 279 | sentences. 280 | dropout (float): Dropout ratio. 281 | 282 | Returns: 283 | ~chainer.Variable: Output variable. A float array with shape 284 | of :math:`(B, N, L, 1)`. :math:`(N)` is the number of dimensions 285 | of word embedding. 286 | 287 | """ 288 | e = embed(x) 289 | e = F.dropout(e, ratio=dropout) 290 | e = F.transpose(e, (0, 2, 1)) 291 | e = e[:, :, :, None] 292 | return e 293 | 294 | 295 | class PredictiveEmbed(chainer.Chain): 296 | def __init__(self, n_vocab, n_units, bilm, 297 | dropout=0., initialW=embed_init): 298 | super(PredictiveEmbed, self).__init__() 299 | with self.init_scope(): 300 | self.embed = L.EmbedID(n_vocab, n_units, ignore_label=-1, 301 | initialW=initialW) 302 | self.bilm = bilm 303 | self.n_vocab = n_vocab 304 | self.n_units = n_units 305 | self.dropout = dropout 306 | 307 | def __call__(self, x): 308 | return self.embed(x) 309 | 310 | def setup(self, 311 | mode='weighted_sum', 312 | temp=1., 313 | word_lower_bound=0., 314 | gold_lower_bound=0., 315 | gumbel=True, 316 | residual=0., 317 | wordwise=True, 318 | add_original=1., 319 | augment_ratio=0.5, 320 | ignore_unk=-1): 321 | self.config = { 322 | 'dropout': self.dropout, 323 | 'mode': mode, 324 | 'temp': temp, 325 | 'word_lower_bound': 0., 326 | 'gold_lower_bound': 0., 327 | 'gumbel': gumbel, 328 | 'residual': residual, 329 | 'wordwise': wordwise, 330 | 'add_original': add_original, 331 | 'augment_ratio': augment_ratio 332 | } 333 | if ignore_unk >= 0: 334 | self.bilm.output.b.data[ignore_unk] = -1e5 335 | 336 | def embed_xs(self, xs, batch='concat'): 337 | if batch == 'concat': 338 | x_block = chainer.dataset.convert.concat_examples(xs, padding=-1) 339 | ex_block = block_embed(self.embed, x_block, self.dropout) 340 | return ex_block 341 | elif batch == 'list': 342 | exs = sequence_embed(self.embed, xs, self.dropout) 343 | return exs 344 | else: 345 | raise NotImplementedError 346 | 347 | def embed_xs_with_prediction(self, xs, labels=None, batch='concat'): 348 | predicted_exs = self.bilm.predict_embed( 349 | xs, self.embed.W, 350 | labels=labels, 351 | dropout=self.config['dropout'], 352 | mode=self.config['mode'], 353 | temp=self.config['temp'], 354 | word_lower_bound=self.config['word_lower_bound'], 355 | gold_lower_bound=self.config['gold_lower_bound'], 356 | gumbel=self.config['gumbel'], 357 | residual=self.config['residual'], 358 | wordwise=self.config['wordwise'], 359 | add_original=self.config['add_original'], 360 | augment_ratio=self.config['augment_ratio']) 361 | if batch == 'concat': 362 | predicted_ex_block = F.pad_sequence(predicted_exs, padding=0.) 363 | predicted_ex_block = F.transpose( 364 | predicted_ex_block, (0, 2, 1))[:, :, :, None] 365 | return predicted_ex_block 366 | elif batch == 'list': 367 | return predicted_exs 368 | else: 369 | raise NotImplementedError 370 | -------------------------------------------------------------------------------- /aug_data/TREC/test.tsv: -------------------------------------------------------------------------------- 1 | sentence label 2 | How far is it from Denver to Aspen ? 5 3 | What county is Modesto , California in ? 4 4 | Who was Galileo ? 3 5 | What is an atom ? 0 6 | When did Hawaii become a state ? 5 7 | How tall is the Sears Building ? 5 8 | George Bush purchased a small interest in which baseball team ? 3 9 | What is Australia 's national flower ? 1 10 | Why does the moon turn orange ? 0 11 | What is autism ? 0 12 | What city had a world fair in 1900 ? 4 13 | What person 's head is on a dime ? 3 14 | What is the average weight of a Yellow Labrador ? 5 15 | Who was the first man to fly across the Pacific Ocean ? 3 16 | When did Idaho become a state ? 5 17 | What is the life expectancy for crickets ? 5 18 | What metal has the highest melting point ? 1 19 | Who developed the vaccination against polio ? 3 20 | What is epilepsy ? 0 21 | What year did the Titanic sink ? 5 22 | Who was the first American to walk in space ? 3 23 | What is a biosphere ? 0 24 | What river in the US is known as the Big Muddy ? 4 25 | What is bipolar disorder ? 0 26 | What is cholesterol ? 0 27 | Who developed the Macintosh computer ? 3 28 | What is caffeine ? 0 29 | What imaginary line is halfway between the North and South Poles ? 4 30 | Where is John Wayne airport ? 4 31 | What hemisphere is the Philippines in ? 4 32 | What is the average speed of the horses at the Kentucky Derby ? 5 33 | Where are the Rocky Mountains ? 4 34 | What are invertebrates ? 0 35 | What is the temperature at the center of the earth ? 5 36 | When did John F. Kennedy get elected as President ? 5 37 | How old was Elvis Presley when he died ? 5 38 | Where is the Orinoco River ? 4 39 | How far is the service line from the net in tennis ? 5 40 | How much fiber should you have per day ? 5 41 | How many Great Lakes are there ? 5 42 | Material called linen is made from what plant ? 1 43 | What is Teflon ? 0 44 | What is amitriptyline ? 0 45 | What is a shaman ? 0 46 | What is the proper name for a female walrus ? 1 47 | What is a group of turkeys called ? 1 48 | How long did Rip Van Winkle sleep ? 5 49 | What are triglycerides ? 0 50 | How many liters in a gallon ? 5 51 | What is the name of the chocolate company in San Francisco ? 3 52 | What are amphibians ? 0 53 | Who discovered x-rays ? 3 54 | Which comedian 's signature line is `` Can we talk '' ? 3 55 | What is fibromyalgia ? 0 56 | What is done with worn or outdated flags ? 0 57 | What does cc in engines mean ? 0 58 | When did Elvis Presley die ? 5 59 | What is the capital of Yugoslavia ? 4 60 | Where is Milan ? 4 61 | What is the speed hummingbirds fly ? 5 62 | What is the oldest city in the United States ? 4 63 | What was W.C. Fields ' real name ? 3 64 | What river flows between Fargo , North Dakota and Moorhead , Minnesota ? 4 65 | What do bats eat ? 1 66 | What state did the Battle of Bighorn take place in ? 4 67 | Who was Abraham Lincoln ? 3 68 | What do you call a newborn kangaroo ? 1 69 | What are spider veins ? 0 70 | What day and month did John Lennon die ? 5 71 | What strait separates North America from Asia ? 4 72 | What is the population of Seattle ? 5 73 | How much was a ticket for the Titanic ? 5 74 | What is the largest city in the world ? 4 75 | What American composer wrote the music for `` West Side Story '' ? 3 76 | Where is the Mall of the America ? 4 77 | What is the pH scale ? 0 78 | What type of currency is used in Australia ? 1 79 | How tall is the Gateway Arch in St. Louis , MO ? 5 80 | How much does the human adult female brain weigh ? 5 81 | Who was the first governor of Alaska ? 3 82 | What is a prism ? 0 83 | When was the first liver transplant ? 5 84 | Who was elected president of South Africa in 1994 ? 3 85 | What is the population of China ? 5 86 | When was Rosa Parks born ? 5 87 | Why is a ladybug helpful ? 0 88 | What is amoxicillin ? 0 89 | Who was the first female United States Representative ? 3 90 | What are xerophytes ? 0 91 | What country did Ponce de Leon come from ? 4 92 | The U.S. Department of Treasury first issued paper currency for the U.S. during which war ? 1 93 | What is desktop publishing ? 0 94 | What is the temperature of the sun 's surface ? 5 95 | What year did Canada join the United Nations ? 5 96 | What is the oldest university in the US ? 3 97 | Where is Prince Edward Island ? 4 98 | Mercury , what year was it discovered ? 5 99 | What is cryogenics ? 0 100 | What are coral reefs ? 0 101 | What is the longest major league baseball-winning streak ? 1 102 | What is neurology ? 0 103 | Who invented the calculator ? 3 104 | How do you measure earthquakes ? 0 105 | Who is Duke Ellington ? 3 106 | What county is Phoenix , AZ in ? 4 107 | What is a micron ? 0 108 | The sun 's core , what is the temperature ? 5 109 | What is the Ohio state bird ? 1 110 | When were William Shakespeare 's twins born ? 5 111 | What is the highest dam in the U.S. ? 4 112 | What color is a poison arrow frog ? 1 113 | What is acupuncture ? 0 114 | What is the length of the coastline of the state of Alaska ? 5 115 | What is the name of Neil Armstrong 's wife ? 3 116 | What is Hawaii 's state flower ? 1 117 | Who won Ms. American in 1989 ? 3 118 | When did the Hindenberg crash ? 5 119 | What mineral helps prevent osteoporosis ? 1 120 | What was the last year that the Chicago Cubs won the World Series ? 5 121 | Where is Perth ? 4 122 | What year did WWII begin ? 5 123 | What is the diameter of a golf ball ? 5 124 | What is an eclipse ? 0 125 | Who discovered America ? 3 126 | What is the earth 's diameter ? 5 127 | Which president was unmarried ? 3 128 | How wide is the Milky Way galaxy ? 5 129 | During which season do most thunderstorms occur ? 5 130 | What is Wimbledon ? 0 131 | What is the gestation period for a cat ? 5 132 | How far is a nautical mile ? 5 133 | Who was the abolitionist who led the raid on Harper 's Ferry in 1859 ? 3 134 | What does target heart rate mean ? 0 135 | What was the first satellite to go into space ? 1 136 | What is foreclosure ? 0 137 | What is the major fault line near Kentucky ? 1 138 | Where is the Holland Tunnel ? 4 139 | Who wrote the hymn `` Amazing Grace '' ? 3 140 | What position did Willie Davis play in baseball ? 3 141 | What are platelets ? 0 142 | What is severance pay ? 0 143 | What is the name of Roy Roger 's dog ? 1 144 | Where are the National Archives ? 4 145 | What is a baby turkey called ? 1 146 | What is poliomyelitis ? 0 147 | What is the longest bone in the human body ? 1 148 | Who is a German philosopher ? 3 149 | What were Christopher Columbus ' three ships ? 1 150 | What does Phi Beta Kappa mean ? 0 151 | What is nicotine ? 0 152 | What is another name for vitamin B1 ? 1 153 | Who discovered radium ? 3 154 | What are sunspots ? 0 155 | When was Algeria colonized ? 5 156 | What baseball team was the first to make numbers part of their uniform ? 3 157 | What continent is Egypt on ? 4 158 | What is the capital of Mongolia ? 4 159 | What is nanotechnology ? 0 160 | In the late 1700 's British convicts were used to populate which colony ? 4 161 | What state is the geographic center of the lower 48 states ? 4 162 | What is an obtuse angle ? 0 163 | What are polymers ? 0 164 | When is hurricane season in the Caribbean ? 5 165 | Where is the volcano Mauna Loa ? 4 166 | What is another astronomic term for the Northern Lights ? 1 167 | What peninsula is Spain part of ? 4 168 | When was Lyndon B. Johnson born ? 5 169 | What is acetaminophen ? 0 170 | What state has the least amount of rain per year ? 4 171 | Who founded American Red Cross ? 3 172 | What year did the Milwaukee Braves become the Atlanta Braves ? 5 173 | How fast is alcohol absorbed ? 5 174 | When is the summer solstice ? 5 175 | What is supernova ? 0 176 | Where is the Shawnee National Forest ? 4 177 | What U.S. state 's motto is `` Live free or Die '' ? 4 178 | Where is the Lourve ? 4 179 | When was the first stamp issued ? 5 180 | What primary colors do you mix to make orange ? 1 181 | How far is Pluto from the sun ? 5 182 | What body of water are the Canary Islands in ? 4 183 | What is neuropathy ? 0 184 | Where is the Euphrates River ? 4 185 | What is cryptography ? 0 186 | What is natural gas composed of ? 1 187 | Who is the Prime Minister of Canada ? 3 188 | What French ruler was defeated at the battle of Waterloo ? 3 189 | What is leukemia ? 0 190 | Where did Howard Hughes die ? 4 191 | What is the birthstone for June ? 1 192 | What is the sales tax in Minnesota ? 1 193 | What is the distance in miles from the earth to the sun ? 5 194 | What is the average life span for a chicken ? 5 195 | When was the first Wal-Mart store opened ? 5 196 | What is relative humidity ? 0 197 | What city has the zip code of 35824 ? 4 198 | What currency is used in Algeria ? 1 199 | Who invented the hula hoop ? 3 200 | What was the most popular toy in 1957 ? 1 201 | What is pastrami made of ? 1 202 | What is the name of the satellite that the Soviet Union sent into space in 1957 ? 1 203 | What city 's newspaper is called `` The Enquirer '' ? 4 204 | Who invented the slinky ? 3 205 | What are the animals that don 't have backbones called ? 1 206 | What is the melting point of copper ? 5 207 | Where is the volcano Olympus Mons located ? 4 208 | Who was the 23rd president of the United States ? 3 209 | What is the average body temperature ? 5 210 | What does a defibrillator do ? 0 211 | What is the effect of acid rain ? 0 212 | What year did the United States abolish the draft ? 5 213 | How fast is the speed of light ? 5 214 | What province is Montreal in ? 4 215 | What New York City structure is also known as the Twin Towers ? 4 216 | What is fungus ? 0 217 | What is the most frequently spoken language in the Netherlands ? 1 218 | What is sodium chloride ? 0 219 | What are the spots on dominoes called ? 1 220 | How many pounds in a ton ? 5 221 | What is influenza ? 0 222 | What is ozone depletion ? 0 223 | What year was the Mona Lisa painted ? 5 224 | What does `` Sitting Shiva '' mean ? 0 225 | What is the electrical output in Madrid , Spain ? 1 226 | Which mountain range in North America stretches from Maine to Georgia ? 4 227 | What is plastic made of ? 1 228 | What is the population of Nigeria ? 5 229 | What does your spleen do ? 0 230 | Where is the Grand Canyon ? 4 231 | Who invented the telephone ? 3 232 | What year did the U.S. buy Alaska ? 5 233 | What is the name of the leader of Ireland ? 3 234 | What is phenylalanine ? 0 235 | How many gallons of water are there in a cubic foot ? 5 236 | What are the two houses of the Legislative branch ? 1 237 | What is sonar ? 0 238 | In Poland , where do most people live ? 4 239 | What is phosphorus ? 0 240 | What is the location of the Sea of Tranquility ? 4 241 | How fast is sound ? 5 242 | What French province is cognac produced in ? 4 243 | What is Valentine 's Day ? 0 244 | What causes gray hair ? 0 245 | What is hypertension ? 0 246 | What is bandwidth ? 0 247 | What is the longest suspension bridge in the U.S. ? 4 248 | What is a parasite ? 0 249 | What is home equity ? 0 250 | What do meteorologists do ? 0 251 | What is the criterion for being legally blind ? 1 252 | Who is the tallest man in the world ? 3 253 | What are the twin cities ? 4 254 | What did Edward Binney and Howard Smith invent in 1903 ? 1 255 | What is the statue of liberty made of ? 1 256 | What is pilates ? 0 257 | What planet is known as the `` red '' planet ? 4 258 | What is the depth of the Nile river ? 5 259 | What is the colorful Korean traditional dress called ? 1 260 | What is Mardi Gras ? 0 261 | Mexican pesos are worth what in U.S. dollars ? 5 262 | Who was the first African American to play for the Brooklyn Dodgers ? 3 263 | Who was the first Prime Minister of Canada ? 3 264 | How many Admirals are there in the U.S. Navy ? 5 265 | What instrument did Glenn Miller play ? 1 266 | How old was Joan of Arc when she died ? 5 267 | What does the word fortnight mean ? 0 268 | What is dianetics ? 0 269 | What is the capital of Ethiopia ? 4 270 | For how long is an elephant pregnant ? 5 271 | How did Janice Joplin die ? 0 272 | What is the primary language in Iceland ? 1 273 | What is the difference between AM radio stations and FM radio stations ? 0 274 | What is osteoporosis ? 0 275 | Who was the first woman governor in the U.S. ? 3 276 | What is peyote ? 0 277 | What is the esophagus used for ? 0 278 | What is viscosity ? 0 279 | What year did Oklahoma become a state ? 5 280 | What is the abbreviation for Texas ? 2 281 | What is a mirror made out of ? 1 282 | Where on the body is a mortarboard worn ? 4 283 | What was J.F.K. 's wife 's name ? 3 284 | What does I.V. stand for ? 2 285 | What is the chunnel ? 0 286 | Where is Hitler buried ? 4 287 | What are antacids ? 0 288 | What is pulmonary fibrosis ? 0 289 | What are Quaaludes ? 0 290 | What is naproxen ? 0 291 | What is strep throat ? 0 292 | What is the largest city in the U.S. ? 4 293 | What is foot and mouth disease ? 1 294 | What is the life expectancy of a dollar bill ? 5 295 | What do you call a professional map drawer ? 1 296 | What are Aborigines ? 0 297 | What is hybridization ? 0 298 | What color is indigo ? 1 299 | How old do you have to be in order to rent a car in Italy ? 5 300 | What does a barometer measure ? 1 301 | What color is a giraffe 's tongue ? 1 302 | What does USPS stand for ? 2 303 | What year did the NFL go on strike ? 5 304 | What is solar wind ? 0 305 | What date did Neil Armstrong land on the moon ? 5 306 | When was Hiroshima bombed ? 5 307 | Where is the Savannah River ? 4 308 | Who was the first woman killed in the Vietnam War ? 3 309 | What planet has the strongest magnetic field of all the planets ? 4 310 | Who is the governor of Alaska ? 3 311 | What year did Mussolini seize power in Italy ? 5 312 | What is the capital of Persia ? 4 313 | Where is the Eiffel Tower ? 4 314 | How many hearts does an octopus have ? 5 315 | What is pneumonia ? 0 316 | What is the deepest lake in the US ? 4 317 | What is a fuel cell ? 0 318 | Who was the first U.S. president to appear on TV ? 3 319 | Where is the Little League Museum ? 4 320 | What are the two types of twins ? 1 321 | What is the brightest star ? 4 322 | What is diabetes ? 0 323 | When was President Kennedy shot ? 5 324 | What is TMJ ? 2 325 | What color is yak milk ? 1 326 | What date was Dwight D. Eisenhower born ? 5 327 | What does the technical term ISDN mean ? 2 328 | Why is the sun yellow ? 0 329 | What is the conversion rate between dollars and pounds ? 5 330 | When was Abraham Lincoln born ? 5 331 | What is the Milky Way ? 0 332 | What is mold ? 0 333 | What year was Mozart born ? 5 334 | What is a group of frogs called ? 1 335 | What is the name of William Penn 's ship ? 1 336 | What is the melting point of gold ? 5 337 | What is the street address of the White House ? 4 338 | What is semolina ? 0 339 | What fruit is Melba sauce made from ? 1 340 | What is Ursa Major ? 0 341 | What is the percentage of water content in the human body ? 5 342 | How much does water weigh ? 5 343 | What was President Lyndon Johnson 's reform program called ? 1 344 | What is the murder rate in Windsor , Ontario ? 5 345 | Who is the only president to serve 2 non-consecutive terms ? 3 346 | What is the population of Australia ? 5 347 | Who painted the ceiling of the Sistine Chapel ? 3 348 | Name a stimulant . 1 349 | What is the effect of volcanoes on the climate ? 0 350 | What year did the Andy Griffith show begin ? 5 351 | What is acid rain ? 0 352 | What is the date of Mexico 's independence ? 5 353 | What is the location of Lake Champlain ? 4 354 | What is the Illinois state flower ? 1 355 | What is Maryland 's state bird ? 1 356 | What is quicksilver ? 0 357 | Who wrote `` The Divine Comedy '' ? 3 358 | What is the speed of light ? 5 359 | What is the width of a football field ? 5 360 | Why in tennis are zero points called love ? 0 361 | What kind of dog was Toto in the Wizard of Oz ? 1 362 | What is a thyroid ? 0 363 | What does ciao mean ? 0 364 | What is the only artery that carries blue blood from the heart to the lungs ? 1 365 | How often does Old Faithful erupt at Yellowstone National Park ? 5 366 | What is acetic acid ? 0 367 | What is the elevation of St. Louis , MO ? 5 368 | What color does litmus paper turn when it comes into contact with a strong acid ? 1 369 | What are the colors of the German flag ? 1 370 | What is the Moulin Rouge ? 0 371 | What soviet seaport is on the Black Sea ? 4 372 | What is the atomic weight of silver ? 5 373 | What currency do they use in Brazil ? 1 374 | What are pathogens ? 0 375 | What is mad cow disease ? 0 376 | Name a food high in zinc . 1 377 | When did North Carolina enter the union ? 5 378 | Where do apple snails live ? 4 379 | What are ethics ? 0 380 | What does CPR stand for ? 2 381 | What is an annuity ? 0 382 | Who killed John F. Kennedy ? 3 383 | Who was the first vice president of the U.S. ? 3 384 | What birthstone is turquoise ? 1 385 | Who was the first US President to ride in an automobile to his inauguration ? 3 386 | How old was the youngest president of the United States ? 5 387 | When was Ulysses S. Grant born ? 5 388 | What is Muscular Dystrophy ? 0 389 | Who lived in the Neuschwanstein castle ? 3 390 | What is propylene glycol ? 0 391 | What is a panic disorder ? 0 392 | Who invented the instant Polaroid camera ? 3 393 | What is a carcinogen ? 0 394 | What is a baby lion called ? 1 395 | What is the world 's population ? 5 396 | What is nepotism ? 0 397 | What is die-casting ? 0 398 | What is myopia ? 0 399 | What is the sales tax rate in New York ? 5 400 | Developing nations comprise what percentage of the world 's population ? 5 401 | What is the fourth highest mountain in the world ? 4 402 | What is Shakespeare 's nickname ? 3 403 | What is the heaviest naturally occurring element ? 1 404 | When is Father 's Day ? 5 405 | What does the acronym NASA stand for ? 2 406 | How long is the Columbia River in miles ? 5 407 | What city 's newspaper is called `` The Star '' ? 4 408 | What is carbon dioxide ? 0 409 | Where is the Mason/Dixon line ? 4 410 | When was the Boston tea party ? 5 411 | What is metabolism ? 0 412 | Which U.S.A. president appeared on `` Laugh-In '' ? 3 413 | What are cigarettes made of ? 1 414 | What is the capital of Zimbabwe ? 4 415 | What does NASA stand for ? 2 416 | What is the state flower of Michigan ? 1 417 | What are semiconductors ? 0 418 | What is nuclear power ? 0 419 | What is a tsunami ? 0 420 | Who is the congressman from state of Texas on the armed forces committee ? 3 421 | Who was president in 1913 ? 3 422 | When was the first kidney transplant ? 5 423 | What are Canada 's two territories ? 4 424 | What was the name of the plane Lindbergh flew solo across the Atlantic ? 1 425 | What is genocide ? 0 426 | What continent is Argentina on ? 4 427 | What monastery was raided by Vikings in the late eighth century ? 1 428 | What is an earthquake ? 0 429 | Where is the tallest roller coaster located ? 4 430 | What are enzymes ? 0 431 | Who discovered oxygen ? 3 432 | What is bangers and mash ? 0 433 | What is the name given to the Tiger at Louisiana State University ? 1 434 | Where are the British crown jewels kept ? 4 435 | Who was the first person to reach the North Pole ? 3 436 | What is an ulcer ? 0 437 | What is vertigo ? 0 438 | What is the spirometer test ? 0 439 | When is the official first day of summer ? 5 440 | What does the abbreviation SOS mean ? 2 441 | What is the smallest bird in Britain ? 1 442 | Who invented Trivial Pursuit ? 3 443 | What gasses are in the troposphere ? 1 444 | Which country has the most water pollution ? 4 445 | What is the scientific name for elephant ? 1 446 | Who is the actress known for her role in the movie `` Gypsy '' ? 3 447 | What breed of hunting dog did the Beverly Hillbillies own ? 1 448 | What is the rainiest place on Earth ? 4 449 | Who was the first African American to win the Nobel Prize in literature ? 3 450 | When is St. Patrick 's Day ? 5 451 | What was FDR 's dog 's name ? 1 452 | What colors need to be mixed to get the color pink ? 1 453 | What is the most popular sport in Japan ? 1 454 | What is the active ingredient in baking soda ? 1 455 | When was Thomas Jefferson born ? 5 456 | How cold should a refrigerator be ? 5 457 | When was the telephone invented ? 5 458 | What is the most common eye color ? 1 459 | Where was the first golf course in the United States ? 4 460 | What is schizophrenia ? 0 461 | What is angiotensin ? 0 462 | What did Jesse Jackson organize ? 3 463 | What is New York 's state bird ? 1 464 | What is the National Park in Utah ? 4 465 | What is Susan B. Anthony 's birthday ? 5 466 | In which state would you find the Catskill Mountains ? 4 467 | What do you call a word that is spelled the same backwards and forwards ? 1 468 | What are pediatricians ? 0 469 | What chain store is headquartered in Bentonville , Arkansas ? 3 470 | What are solar cells ? 0 471 | What is compounded interest ? 0 472 | What are capers ? 0 473 | What is an antigen ? 0 474 | What currency does Luxembourg use ? 1 475 | What is the population of Venezuela ? 5 476 | What type of polymer is used for bulletproof vests ? 1 477 | What currency does Argentina use ? 1 478 | What is a thermometer ? 0 479 | What Canadian city has the largest population ? 4 480 | What color are crickets ? 1 481 | Which country gave New York the Statue of Liberty ? 4 482 | What was the name of the first U.S. satellite sent into space ? 1 483 | What precious stone is a form of pure carbon ? 1 484 | What kind of gas is in a fluorescent bulb ? 1 485 | What is rheumatoid arthritis ? 0 486 | What river runs through Rowe , Italy ? 4 487 | What is cerebral palsy ? 0 488 | What city is also known as `` The Gateway to the West '' ? 4 489 | How far away is the moon ? 5 490 | What is the source of natural gas ? 1 491 | In what spacecraft did U.S. astronaut Alan Shepard make his historic 1961 flight ? 1 492 | What is pectin ? 0 493 | What is bio-diversity ? 0 494 | What 's the easiest way to remove wallpaper ? 1 495 | What year did the Titanic start on its journey ? 5 496 | How much of an apple is water ? 5 497 | Who was the 22nd President of the US ? 3 498 | What is the money they use in Zambia ? 1 499 | How many feet in a mile ? 5 500 | What is the birthstone of October ? 1 501 | What is e-coli ? 0 502 | -------------------------------------------------------------------------------- /datasets/TREC/test.tsv: -------------------------------------------------------------------------------- 1 | sentence label 2 | How far is it from Denver to Aspen ? 5 3 | What county is Modesto , California in ? 4 4 | Who was Galileo ? 3 5 | What is an atom ? 0 6 | When did Hawaii become a state ? 5 7 | How tall is the Sears Building ? 5 8 | George Bush purchased a small interest in which baseball team ? 3 9 | What is Australia 's national flower ? 1 10 | Why does the moon turn orange ? 0 11 | What is autism ? 0 12 | What city had a world fair in 1900 ? 4 13 | What person 's head is on a dime ? 3 14 | What is the average weight of a Yellow Labrador ? 5 15 | Who was the first man to fly across the Pacific Ocean ? 3 16 | When did Idaho become a state ? 5 17 | What is the life expectancy for crickets ? 5 18 | What metal has the highest melting point ? 1 19 | Who developed the vaccination against polio ? 3 20 | What is epilepsy ? 0 21 | What year did the Titanic sink ? 5 22 | Who was the first American to walk in space ? 3 23 | What is a biosphere ? 0 24 | What river in the US is known as the Big Muddy ? 4 25 | What is bipolar disorder ? 0 26 | What is cholesterol ? 0 27 | Who developed the Macintosh computer ? 3 28 | What is caffeine ? 0 29 | What imaginary line is halfway between the North and South Poles ? 4 30 | Where is John Wayne airport ? 4 31 | What hemisphere is the Philippines in ? 4 32 | What is the average speed of the horses at the Kentucky Derby ? 5 33 | Where are the Rocky Mountains ? 4 34 | What are invertebrates ? 0 35 | What is the temperature at the center of the earth ? 5 36 | When did John F. Kennedy get elected as President ? 5 37 | How old was Elvis Presley when he died ? 5 38 | Where is the Orinoco River ? 4 39 | How far is the service line from the net in tennis ? 5 40 | How much fiber should you have per day ? 5 41 | How many Great Lakes are there ? 5 42 | Material called linen is made from what plant ? 1 43 | What is Teflon ? 0 44 | What is amitriptyline ? 0 45 | What is a shaman ? 0 46 | What is the proper name for a female walrus ? 1 47 | What is a group of turkeys called ? 1 48 | How long did Rip Van Winkle sleep ? 5 49 | What are triglycerides ? 0 50 | How many liters in a gallon ? 5 51 | What is the name of the chocolate company in San Francisco ? 3 52 | What are amphibians ? 0 53 | Who discovered x-rays ? 3 54 | Which comedian 's signature line is `` Can we talk '' ? 3 55 | What is fibromyalgia ? 0 56 | What is done with worn or outdated flags ? 0 57 | What does cc in engines mean ? 0 58 | When did Elvis Presley die ? 5 59 | What is the capital of Yugoslavia ? 4 60 | Where is Milan ? 4 61 | What is the speed hummingbirds fly ? 5 62 | What is the oldest city in the United States ? 4 63 | What was W.C. Fields ' real name ? 3 64 | What river flows between Fargo , North Dakota and Moorhead , Minnesota ? 4 65 | What do bats eat ? 1 66 | What state did the Battle of Bighorn take place in ? 4 67 | Who was Abraham Lincoln ? 3 68 | What do you call a newborn kangaroo ? 1 69 | What are spider veins ? 0 70 | What day and month did John Lennon die ? 5 71 | What strait separates North America from Asia ? 4 72 | What is the population of Seattle ? 5 73 | How much was a ticket for the Titanic ? 5 74 | What is the largest city in the world ? 4 75 | What American composer wrote the music for `` West Side Story '' ? 3 76 | Where is the Mall of the America ? 4 77 | What is the pH scale ? 0 78 | What type of currency is used in Australia ? 1 79 | How tall is the Gateway Arch in St. Louis , MO ? 5 80 | How much does the human adult female brain weigh ? 5 81 | Who was the first governor of Alaska ? 3 82 | What is a prism ? 0 83 | When was the first liver transplant ? 5 84 | Who was elected president of South Africa in 1994 ? 3 85 | What is the population of China ? 5 86 | When was Rosa Parks born ? 5 87 | Why is a ladybug helpful ? 0 88 | What is amoxicillin ? 0 89 | Who was the first female United States Representative ? 3 90 | What are xerophytes ? 0 91 | What country did Ponce de Leon come from ? 4 92 | The U.S. Department of Treasury first issued paper currency for the U.S. during which war ? 1 93 | What is desktop publishing ? 0 94 | What is the temperature of the sun 's surface ? 5 95 | What year did Canada join the United Nations ? 5 96 | What is the oldest university in the US ? 3 97 | Where is Prince Edward Island ? 4 98 | Mercury , what year was it discovered ? 5 99 | What is cryogenics ? 0 100 | What are coral reefs ? 0 101 | What is the longest major league baseball-winning streak ? 1 102 | What is neurology ? 0 103 | Who invented the calculator ? 3 104 | How do you measure earthquakes ? 0 105 | Who is Duke Ellington ? 3 106 | What county is Phoenix , AZ in ? 4 107 | What is a micron ? 0 108 | The sun 's core , what is the temperature ? 5 109 | What is the Ohio state bird ? 1 110 | When were William Shakespeare 's twins born ? 5 111 | What is the highest dam in the U.S. ? 4 112 | What color is a poison arrow frog ? 1 113 | What is acupuncture ? 0 114 | What is the length of the coastline of the state of Alaska ? 5 115 | What is the name of Neil Armstrong 's wife ? 3 116 | What is Hawaii 's state flower ? 1 117 | Who won Ms. American in 1989 ? 3 118 | When did the Hindenberg crash ? 5 119 | What mineral helps prevent osteoporosis ? 1 120 | What was the last year that the Chicago Cubs won the World Series ? 5 121 | Where is Perth ? 4 122 | What year did WWII begin ? 5 123 | What is the diameter of a golf ball ? 5 124 | What is an eclipse ? 0 125 | Who discovered America ? 3 126 | What is the earth 's diameter ? 5 127 | Which president was unmarried ? 3 128 | How wide is the Milky Way galaxy ? 5 129 | During which season do most thunderstorms occur ? 5 130 | What is Wimbledon ? 0 131 | What is the gestation period for a cat ? 5 132 | How far is a nautical mile ? 5 133 | Who was the abolitionist who led the raid on Harper 's Ferry in 1859 ? 3 134 | What does target heart rate mean ? 0 135 | What was the first satellite to go into space ? 1 136 | What is foreclosure ? 0 137 | What is the major fault line near Kentucky ? 1 138 | Where is the Holland Tunnel ? 4 139 | Who wrote the hymn `` Amazing Grace '' ? 3 140 | What position did Willie Davis play in baseball ? 3 141 | What are platelets ? 0 142 | What is severance pay ? 0 143 | What is the name of Roy Roger 's dog ? 1 144 | Where are the National Archives ? 4 145 | What is a baby turkey called ? 1 146 | What is poliomyelitis ? 0 147 | What is the longest bone in the human body ? 1 148 | Who is a German philosopher ? 3 149 | What were Christopher Columbus ' three ships ? 1 150 | What does Phi Beta Kappa mean ? 0 151 | What is nicotine ? 0 152 | What is another name for vitamin B1 ? 1 153 | Who discovered radium ? 3 154 | What are sunspots ? 0 155 | When was Algeria colonized ? 5 156 | What baseball team was the first to make numbers part of their uniform ? 3 157 | What continent is Egypt on ? 4 158 | What is the capital of Mongolia ? 4 159 | What is nanotechnology ? 0 160 | In the late 1700 's British convicts were used to populate which colony ? 4 161 | What state is the geographic center of the lower 48 states ? 4 162 | What is an obtuse angle ? 0 163 | What are polymers ? 0 164 | When is hurricane season in the Caribbean ? 5 165 | Where is the volcano Mauna Loa ? 4 166 | What is another astronomic term for the Northern Lights ? 1 167 | What peninsula is Spain part of ? 4 168 | When was Lyndon B. Johnson born ? 5 169 | What is acetaminophen ? 0 170 | What state has the least amount of rain per year ? 4 171 | Who founded American Red Cross ? 3 172 | What year did the Milwaukee Braves become the Atlanta Braves ? 5 173 | How fast is alcohol absorbed ? 5 174 | When is the summer solstice ? 5 175 | What is supernova ? 0 176 | Where is the Shawnee National Forest ? 4 177 | What U.S. state 's motto is `` Live free or Die '' ? 4 178 | Where is the Lourve ? 4 179 | When was the first stamp issued ? 5 180 | What primary colors do you mix to make orange ? 1 181 | How far is Pluto from the sun ? 5 182 | What body of water are the Canary Islands in ? 4 183 | What is neuropathy ? 0 184 | Where is the Euphrates River ? 4 185 | What is cryptography ? 0 186 | What is natural gas composed of ? 1 187 | Who is the Prime Minister of Canada ? 3 188 | What French ruler was defeated at the battle of Waterloo ? 3 189 | What is leukemia ? 0 190 | Where did Howard Hughes die ? 4 191 | What is the birthstone for June ? 1 192 | What is the sales tax in Minnesota ? 1 193 | What is the distance in miles from the earth to the sun ? 5 194 | What is the average life span for a chicken ? 5 195 | When was the first Wal-Mart store opened ? 5 196 | What is relative humidity ? 0 197 | What city has the zip code of 35824 ? 4 198 | What currency is used in Algeria ? 1 199 | Who invented the hula hoop ? 3 200 | What was the most popular toy in 1957 ? 1 201 | What is pastrami made of ? 1 202 | What is the name of the satellite that the Soviet Union sent into space in 1957 ? 1 203 | What city 's newspaper is called `` The Enquirer '' ? 4 204 | Who invented the slinky ? 3 205 | What are the animals that don 't have backbones called ? 1 206 | What is the melting point of copper ? 5 207 | Where is the volcano Olympus Mons located ? 4 208 | Who was the 23rd president of the United States ? 3 209 | What is the average body temperature ? 5 210 | What does a defibrillator do ? 0 211 | What is the effect of acid rain ? 0 212 | What year did the United States abolish the draft ? 5 213 | How fast is the speed of light ? 5 214 | What province is Montreal in ? 4 215 | What New York City structure is also known as the Twin Towers ? 4 216 | What is fungus ? 0 217 | What is the most frequently spoken language in the Netherlands ? 1 218 | What is sodium chloride ? 0 219 | What are the spots on dominoes called ? 1 220 | How many pounds in a ton ? 5 221 | What is influenza ? 0 222 | What is ozone depletion ? 0 223 | What year was the Mona Lisa painted ? 5 224 | What does `` Sitting Shiva '' mean ? 0 225 | What is the electrical output in Madrid , Spain ? 1 226 | Which mountain range in North America stretches from Maine to Georgia ? 4 227 | What is plastic made of ? 1 228 | What is the population of Nigeria ? 5 229 | What does your spleen do ? 0 230 | Where is the Grand Canyon ? 4 231 | Who invented the telephone ? 3 232 | What year did the U.S. buy Alaska ? 5 233 | What is the name of the leader of Ireland ? 3 234 | What is phenylalanine ? 0 235 | How many gallons of water are there in a cubic foot ? 5 236 | What are the two houses of the Legislative branch ? 1 237 | What is sonar ? 0 238 | In Poland , where do most people live ? 4 239 | What is phosphorus ? 0 240 | What is the location of the Sea of Tranquility ? 4 241 | How fast is sound ? 5 242 | What French province is cognac produced in ? 4 243 | What is Valentine 's Day ? 0 244 | What causes gray hair ? 0 245 | What is hypertension ? 0 246 | What is bandwidth ? 0 247 | What is the longest suspension bridge in the U.S. ? 4 248 | What is a parasite ? 0 249 | What is home equity ? 0 250 | What do meteorologists do ? 0 251 | What is the criterion for being legally blind ? 1 252 | Who is the tallest man in the world ? 3 253 | What are the twin cities ? 4 254 | What did Edward Binney and Howard Smith invent in 1903 ? 1 255 | What is the statue of liberty made of ? 1 256 | What is pilates ? 0 257 | What planet is known as the `` red '' planet ? 4 258 | What is the depth of the Nile river ? 5 259 | What is the colorful Korean traditional dress called ? 1 260 | What is Mardi Gras ? 0 261 | Mexican pesos are worth what in U.S. dollars ? 5 262 | Who was the first African American to play for the Brooklyn Dodgers ? 3 263 | Who was the first Prime Minister of Canada ? 3 264 | How many Admirals are there in the U.S. Navy ? 5 265 | What instrument did Glenn Miller play ? 1 266 | How old was Joan of Arc when she died ? 5 267 | What does the word fortnight mean ? 0 268 | What is dianetics ? 0 269 | What is the capital of Ethiopia ? 4 270 | For how long is an elephant pregnant ? 5 271 | How did Janice Joplin die ? 0 272 | What is the primary language in Iceland ? 1 273 | What is the difference between AM radio stations and FM radio stations ? 0 274 | What is osteoporosis ? 0 275 | Who was the first woman governor in the U.S. ? 3 276 | What is peyote ? 0 277 | What is the esophagus used for ? 0 278 | What is viscosity ? 0 279 | What year did Oklahoma become a state ? 5 280 | What is the abbreviation for Texas ? 2 281 | What is a mirror made out of ? 1 282 | Where on the body is a mortarboard worn ? 4 283 | What was J.F.K. 's wife 's name ? 3 284 | What does I.V. stand for ? 2 285 | What is the chunnel ? 0 286 | Where is Hitler buried ? 4 287 | What are antacids ? 0 288 | What is pulmonary fibrosis ? 0 289 | What are Quaaludes ? 0 290 | What is naproxen ? 0 291 | What is strep throat ? 0 292 | What is the largest city in the U.S. ? 4 293 | What is foot and mouth disease ? 1 294 | What is the life expectancy of a dollar bill ? 5 295 | What do you call a professional map drawer ? 1 296 | What are Aborigines ? 0 297 | What is hybridization ? 0 298 | What color is indigo ? 1 299 | How old do you have to be in order to rent a car in Italy ? 5 300 | What does a barometer measure ? 1 301 | What color is a giraffe 's tongue ? 1 302 | What does USPS stand for ? 2 303 | What year did the NFL go on strike ? 5 304 | What is solar wind ? 0 305 | What date did Neil Armstrong land on the moon ? 5 306 | When was Hiroshima bombed ? 5 307 | Where is the Savannah River ? 4 308 | Who was the first woman killed in the Vietnam War ? 3 309 | What planet has the strongest magnetic field of all the planets ? 4 310 | Who is the governor of Alaska ? 3 311 | What year did Mussolini seize power in Italy ? 5 312 | What is the capital of Persia ? 4 313 | Where is the Eiffel Tower ? 4 314 | How many hearts does an octopus have ? 5 315 | What is pneumonia ? 0 316 | What is the deepest lake in the US ? 4 317 | What is a fuel cell ? 0 318 | Who was the first U.S. president to appear on TV ? 3 319 | Where is the Little League Museum ? 4 320 | What are the two types of twins ? 1 321 | What is the brightest star ? 4 322 | What is diabetes ? 0 323 | When was President Kennedy shot ? 5 324 | What is TMJ ? 2 325 | What color is yak milk ? 1 326 | What date was Dwight D. Eisenhower born ? 5 327 | What does the technical term ISDN mean ? 2 328 | Why is the sun yellow ? 0 329 | What is the conversion rate between dollars and pounds ? 5 330 | When was Abraham Lincoln born ? 5 331 | What is the Milky Way ? 0 332 | What is mold ? 0 333 | What year was Mozart born ? 5 334 | What is a group of frogs called ? 1 335 | What is the name of William Penn 's ship ? 1 336 | What is the melting point of gold ? 5 337 | What is the street address of the White House ? 4 338 | What is semolina ? 0 339 | What fruit is Melba sauce made from ? 1 340 | What is Ursa Major ? 0 341 | What is the percentage of water content in the human body ? 5 342 | How much does water weigh ? 5 343 | What was President Lyndon Johnson 's reform program called ? 1 344 | What is the murder rate in Windsor , Ontario ? 5 345 | Who is the only president to serve 2 non-consecutive terms ? 3 346 | What is the population of Australia ? 5 347 | Who painted the ceiling of the Sistine Chapel ? 3 348 | Name a stimulant . 1 349 | What is the effect of volcanoes on the climate ? 0 350 | What year did the Andy Griffith show begin ? 5 351 | What is acid rain ? 0 352 | What is the date of Mexico 's independence ? 5 353 | What is the location of Lake Champlain ? 4 354 | What is the Illinois state flower ? 1 355 | What is Maryland 's state bird ? 1 356 | What is quicksilver ? 0 357 | Who wrote `` The Divine Comedy '' ? 3 358 | What is the speed of light ? 5 359 | What is the width of a football field ? 5 360 | Why in tennis are zero points called love ? 0 361 | What kind of dog was Toto in the Wizard of Oz ? 1 362 | What is a thyroid ? 0 363 | What does ciao mean ? 0 364 | What is the only artery that carries blue blood from the heart to the lungs ? 1 365 | How often does Old Faithful erupt at Yellowstone National Park ? 5 366 | What is acetic acid ? 0 367 | What is the elevation of St. Louis , MO ? 5 368 | What color does litmus paper turn when it comes into contact with a strong acid ? 1 369 | What are the colors of the German flag ? 1 370 | What is the Moulin Rouge ? 0 371 | What soviet seaport is on the Black Sea ? 4 372 | What is the atomic weight of silver ? 5 373 | What currency do they use in Brazil ? 1 374 | What are pathogens ? 0 375 | What is mad cow disease ? 0 376 | Name a food high in zinc . 1 377 | When did North Carolina enter the union ? 5 378 | Where do apple snails live ? 4 379 | What are ethics ? 0 380 | What does CPR stand for ? 2 381 | What is an annuity ? 0 382 | Who killed John F. Kennedy ? 3 383 | Who was the first vice president of the U.S. ? 3 384 | What birthstone is turquoise ? 1 385 | Who was the first US President to ride in an automobile to his inauguration ? 3 386 | How old was the youngest president of the United States ? 5 387 | When was Ulysses S. Grant born ? 5 388 | What is Muscular Dystrophy ? 0 389 | Who lived in the Neuschwanstein castle ? 3 390 | What is propylene glycol ? 0 391 | What is a panic disorder ? 0 392 | Who invented the instant Polaroid camera ? 3 393 | What is a carcinogen ? 0 394 | What is a baby lion called ? 1 395 | What is the world 's population ? 5 396 | What is nepotism ? 0 397 | What is die-casting ? 0 398 | What is myopia ? 0 399 | What is the sales tax rate in New York ? 5 400 | Developing nations comprise what percentage of the world 's population ? 5 401 | What is the fourth highest mountain in the world ? 4 402 | What is Shakespeare 's nickname ? 3 403 | What is the heaviest naturally occurring element ? 1 404 | When is Father 's Day ? 5 405 | What does the acronym NASA stand for ? 2 406 | How long is the Columbia River in miles ? 5 407 | What city 's newspaper is called `` The Star '' ? 4 408 | What is carbon dioxide ? 0 409 | Where is the Mason/Dixon line ? 4 410 | When was the Boston tea party ? 5 411 | What is metabolism ? 0 412 | Which U.S.A. president appeared on `` Laugh-In '' ? 3 413 | What are cigarettes made of ? 1 414 | What is the capital of Zimbabwe ? 4 415 | What does NASA stand for ? 2 416 | What is the state flower of Michigan ? 1 417 | What are semiconductors ? 0 418 | What is nuclear power ? 0 419 | What is a tsunami ? 0 420 | Who is the congressman from state of Texas on the armed forces committee ? 3 421 | Who was president in 1913 ? 3 422 | When was the first kidney transplant ? 5 423 | What are Canada 's two territories ? 4 424 | What was the name of the plane Lindbergh flew solo across the Atlantic ? 1 425 | What is genocide ? 0 426 | What continent is Argentina on ? 4 427 | What monastery was raided by Vikings in the late eighth century ? 1 428 | What is an earthquake ? 0 429 | Where is the tallest roller coaster located ? 4 430 | What are enzymes ? 0 431 | Who discovered oxygen ? 3 432 | What is bangers and mash ? 0 433 | What is the name given to the Tiger at Louisiana State University ? 1 434 | Where are the British crown jewels kept ? 4 435 | Who was the first person to reach the North Pole ? 3 436 | What is an ulcer ? 0 437 | What is vertigo ? 0 438 | What is the spirometer test ? 0 439 | When is the official first day of summer ? 5 440 | What does the abbreviation SOS mean ? 2 441 | What is the smallest bird in Britain ? 1 442 | Who invented Trivial Pursuit ? 3 443 | What gasses are in the troposphere ? 1 444 | Which country has the most water pollution ? 4 445 | What is the scientific name for elephant ? 1 446 | Who is the actress known for her role in the movie `` Gypsy '' ? 3 447 | What breed of hunting dog did the Beverly Hillbillies own ? 1 448 | What is the rainiest place on Earth ? 4 449 | Who was the first African American to win the Nobel Prize in literature ? 3 450 | When is St. Patrick 's Day ? 5 451 | What was FDR 's dog 's name ? 1 452 | What colors need to be mixed to get the color pink ? 1 453 | What is the most popular sport in Japan ? 1 454 | What is the active ingredient in baking soda ? 1 455 | When was Thomas Jefferson born ? 5 456 | How cold should a refrigerator be ? 5 457 | When was the telephone invented ? 5 458 | What is the most common eye color ? 1 459 | Where was the first golf course in the United States ? 4 460 | What is schizophrenia ? 0 461 | What is angiotensin ? 0 462 | What did Jesse Jackson organize ? 3 463 | What is New York 's state bird ? 1 464 | What is the National Park in Utah ? 4 465 | What is Susan B. Anthony 's birthday ? 5 466 | In which state would you find the Catskill Mountains ? 4 467 | What do you call a word that is spelled the same backwards and forwards ? 1 468 | What are pediatricians ? 0 469 | What chain store is headquartered in Bentonville , Arkansas ? 3 470 | What are solar cells ? 0 471 | What is compounded interest ? 0 472 | What are capers ? 0 473 | What is an antigen ? 0 474 | What currency does Luxembourg use ? 1 475 | What is the population of Venezuela ? 5 476 | What type of polymer is used for bulletproof vests ? 1 477 | What currency does Argentina use ? 1 478 | What is a thermometer ? 0 479 | What Canadian city has the largest population ? 4 480 | What color are crickets ? 1 481 | Which country gave New York the Statue of Liberty ? 4 482 | What was the name of the first U.S. satellite sent into space ? 1 483 | What precious stone is a form of pure carbon ? 1 484 | What kind of gas is in a fluorescent bulb ? 1 485 | What is rheumatoid arthritis ? 0 486 | What river runs through Rowe , Italy ? 4 487 | What is cerebral palsy ? 0 488 | What city is also known as `` The Gateway to the West '' ? 4 489 | How far away is the moon ? 5 490 | What is the source of natural gas ? 1 491 | In what spacecraft did U.S. astronaut Alan Shepard make his historic 1961 flight ? 1 492 | What is pectin ? 0 493 | What is bio-diversity ? 0 494 | What 's the easiest way to remove wallpaper ? 1 495 | What year did the Titanic start on its journey ? 5 496 | How much of an apple is water ? 5 497 | Who was the 22nd President of the US ? 3 498 | What is the money they use in Zambia ? 1 499 | How many feet in a mile ? 5 500 | What is the birthstone of October ? 1 501 | What is e-coli ? 0 502 | -------------------------------------------------------------------------------- /aug_data/mpqa/dev.tsv: -------------------------------------------------------------------------------- 1 | sentence label 2 | unfettered support 1 3 | such a mindset will impair 0 4 | has revealed george bush's talents as a war leader 1 5 | provide support to the us , as it did 1 6 | kill 0 7 | play with 0 8 | embark on trampling upon human rights of civilians 0 9 | in such a complex state 0 10 | pursue 0 11 | will continue to support 1 12 | places strains 0 13 | wish 1 14 | subjected 0 15 | benefits 0 16 | highly respected 1 17 | this agreement 1 18 | under the strong influence 0 19 | preach 1 20 | hardest hit 0 21 | would be easier 1 22 | will be irresponsible 0 23 | conspicuous policy of playing games 0 24 | to agree 1 25 | to ask 1 26 | was very strong about this 1 27 | were even more full of praise 1 28 | can not be taken seriously 0 29 | felt gratified and relieved 1 30 | invited 1 31 | languages without nuances 0 32 | scheme 0 33 | incapacity to put crime under control 0 34 | it is pure rhetoric 0 35 | charged 0 36 | attacks 0 37 | anger 0 38 | vowed 1 39 | ridiculous 0 40 | are ready 1 41 | were accused 0 42 | will of 1 43 | unheeded 0 44 | opposes and rejects 0 45 | outcry 0 46 | join forces 1 47 | ca n't be burying its head in the sand 0 48 | insidious 0 49 | would not accept 0 50 | too costly to meet 0 51 | should not 0 52 | does not reflect 0 53 | invited 1 54 | top priority of 1 55 | only benefits corporate america 0 56 | still 0 57 | support 1 58 | not intent on revitalizing its economies 0 59 | angry anti - us protests erupted 0 60 | hopes 1 61 | m feeling much better 1 62 | branded 0 63 | the danger 0 64 | proposal to reunify 1 65 | nothing seemed more normal than to settle in someone else's territory 0 66 | to blame 0 67 | have been growing less visionary 0 68 | will fight on 0 69 | shortcomings 0 70 | deadly 0 71 | would incur its displeasure 0 72 | like a dangerous virus 0 73 | it is a futile illusion because it is a lie 0 74 | are irritated 0 75 | in a way our government has not been 0 76 | neither good nor bad , just incorrigible 0 77 | destroyed 0 78 | respecting 1 79 | to promote 1 80 | witch - hunting 0 81 | biggest terrorist country 0 82 | legitimate dissent 1 83 | even against vehicles transporting pregnant women in labor and against unarmed citizens 0 84 | niceties 1 85 | his ship has been sailing towards a wrong direction for far too long 0 86 | critics 0 87 | once again turn its back 0 88 | growing unrest 0 89 | defects 0 90 | has no love lost 0 91 | violent protests 0 92 | confined to minute cells 0 93 | immediately 1 94 | corrupt politicians 0 95 | a crime against humanity 0 96 | the rights and privileges that they would automatically get under geneva convention 0 97 | perils of delusion 0 98 | would be better off 1 99 | have been concerned 0 100 | uproar 0 101 | pained him 0 102 | terrorism 0 103 | not only prejudice 0 104 | a brutal occupation 0 105 | meddling 0 106 | falls at a bad time 0 107 | ugliest crimes 0 108 | picking on 0 109 | allegations 0 110 | are being well treated 1 111 | the serious crisis 0 112 | will be a monumental tragedy 0 113 | is proud 1 114 | thus 0 115 | oh , my god 0 116 | will not allow 0 117 | the worst crisis 0 118 | turn its back 0 119 | deepened 0 120 | will complain 0 121 | very hard line 0 122 | opposition 0 123 | would not dilute 1 124 | secret 0 125 | opposition 0 126 | illusory 0 127 | not adequately free and fair 0 128 | should follow 1 129 | impossible 0 130 | an aberration that goes against mankind 0 131 | have preference for 1 132 | all around us 0 133 | very , very large 1 134 | lie 0 135 | were ill - received 0 136 | difficult 0 137 | and then assert the laws of war do not apply 0 138 | from upholding religious and cultural values 1 139 | preferred 1 140 | full support 1 141 | criticizing 0 142 | immediate exploitation 0 143 | ignored 0 144 | has unashamedly seen fit 1 145 | support 1 146 | now a reality 1 147 | gaffe 0 148 | virtually nothing 0 149 | no place in israel can be considered safe 0 150 | to invite 1 151 | prevent such a development 1 152 | rigged 0 153 | logical solution 1 154 | bungled its dealings 0 155 | demerits 0 156 | like the suffering 0 157 | unacceptable 0 158 | supports 1 159 | 100 percent 1 160 | does not appear 0 161 | pretext 0 162 | what begins to happen when we hate our friends 0 163 | denies 0 164 | intimidation 0 165 | children skipping school 0 166 | criticism 0 167 | violence and intimidation 0 168 | blatantly obstructed 0 169 | reactions of repulsion 0 170 | are not happy 0 171 | criticized 0 172 | a growing impression that misuari could pose a security threat 0 173 | left behind 1 174 | they have not succeeded , and will never succeed 1 175 | it's shameful 0 176 | beyond the reach of any legal regime 0 177 | is pessimistic 0 178 | has no right 0 179 | are masterminded 0 180 | regret 0 181 | will weaken soon 0 182 | terrible tragedy 0 183 | there is no alternative to it but conflict , isolation , nationalism , and ultimately war 0 184 | freak show 0 185 | insisted 1 186 | axis of evil 0 187 | come under fire 0 188 | threats expressed by 0 189 | the support 1 190 | wrong 0 191 | has resisted 0 192 | the darkest hour is always before the dawn 1 193 | the entire palestinian people 0 194 | denounced 0 195 | very serious threat 0 196 | only 0 197 | making a spectacle of 0 198 | promote meetings 1 199 | to mould the electoral process in his favour 0 200 | not a single day would pass in peace and with no palestinians shedding blood 0 201 | crises 0 202 | hoping 1 203 | to intimidate women and children 0 204 | disputes 0 205 | extreme right 0 206 | most dangerous 0 207 | may even get better 0 208 | promise 1 209 | has pledged 1 210 | one body with two heads 0 211 | feel at ease 1 212 | turning a blind eye 0 213 | has floundered 0 214 | warns 0 215 | most cogent argument 1 216 | lump 0 217 | deals a blow 0 218 | bad treatment 0 219 | concerned 0 220 | have criticized 0 221 | you can hardly speak of a targeting error 0 222 | certain countries resort to 0 223 | called for 1 224 | violating human rights 0 225 | unusual 0 226 | would undermine 0 227 | a terrorist act 0 228 | threat 0 229 | objectives 1 230 | can be abated 1 231 | muscle - flexing 0 232 | pursued 1 233 | brilliant 1 234 | committed 1 235 | committing themselves 1 236 | support 1 237 | the importance of china 1 238 | only one single 0 239 | hoped 1 240 | closed ranks behind 1 241 | peace 1 242 | would not be a bad idea 1 243 | legitimate 1 244 | closer to that of cowboys than to a civilized mentality 0 245 | such fruitful results 1 246 | double crime 0 247 | perfectly at ease 1 248 | mistake 0 249 | repeatedly accused 0 250 | ceased to be a soldier of the fatherland 0 251 | however 0 252 | apocalyptic savagery 0 253 | a border of peace and good neighbourliness 1 254 | call for 1 255 | hurt 0 256 | declined to endorse 0 257 | countries such as iran , iraq and north korea represent an ``axis of evil 0 258 | more serious 0 259 | am confident 1 260 | picking on 0 261 | whether 0 262 | go out into the streets to defend 1 263 | committed one more mistake 0 264 | the agreement 1 265 | charging 0 266 | other aggressive acts against lebanon 0 267 | enjoying 1 268 | decided 0 269 | hard - line 0 270 | feels 0 271 | do n't want 0 272 | the threats launched 0 273 | rejected 0 274 | breaking fundamental concepts 0 275 | increasingly tyrannical 0 276 | undisguised declaration of war and a rhetoric threatening aggression 0 277 | want 1 278 | hyperbole , 0 279 | patronizing 0 280 | arbitrary arrests 0 281 | jeopardy 0 282 | could not be said to adequately comply 0 283 | with good economic management , israel should have little trouble 1 284 | terrorist allies 0 285 | swift criticism from 0 286 | instead 0 287 | steering the economy into disaster 0 288 | grew so unhappy 0 289 | will not resort 0 290 | has refused 0 291 | may not be feasible 0 292 | danger of being shelved altogether 0 293 | unmanageable 0 294 | favouring independence 1 295 | misrule 0 296 | misery 0 297 | steering the nation to prosperity and progress 1 298 | america's biding 0 299 | compounded the problem 0 300 | designed to benefit mugabe 0 301 | supposed to be 0 302 | appraised 1 303 | stand beside right and justice 1 304 | refuses 0 305 | surged 1 306 | denied 0 307 | fake imposter 0 308 | great evil on open display 0 309 | ideal , sunny clime 1 310 | seems to be determined to expand the scope of the anti - terror war 0 311 | a body blow 0 312 | as full citizens in the same sate 1 313 | lecturing 0 314 | suffered 0 315 | harmed 0 316 | criticized 0 317 | scores of 0 318 | ignore the consequences 0 319 | territorial ambition 0 320 | not 0 321 | no one has the right to wage war against the history of this nation 0 322 | wants 1 323 | hatred 0 324 | what occured in the united states on 11 september 0 325 | oneupmanship 0 326 | labeling 0 327 | showed little - disguised irritation 0 328 | concern 0 329 | even 0 330 | the possibility of a democratic , stable and prosperous 1 331 | strongly criticized and condemned 0 332 | will not admit 0 333 | persuade 1 334 | was more than confident 1 335 | had not heeded 0 336 | cost - effective 1 337 | mistake 0 338 | would adhere to a pluralistic vision 1 339 | had asked 1 340 | replete with 1 341 | shameful 0 342 | will only 0 343 | accidental slight 0 344 | nothing whatsoever 0 345 | could no longer tolerate 0 346 | leeway 1 347 | the doubts 0 348 | aggressions against 0 349 | raving 0 350 | are blamed by 0 351 | so many uncertainties 0 352 | destined to collapse 1 353 | confidence crisis 0 354 | are accusing 0 355 | warned 0 356 | such pessimism 0 357 | anxiety 0 358 | cause a rift 0 359 | no politically prudent 0 360 | held out an olive branch 1 361 | continues to demolish 0 362 | does not endorse 0 363 | failed 1 364 | comprehensive destructive policy 0 365 | sounds clumsy 0 366 | fails to meet the standard of being free and fair 0 367 | willfulness 1 368 | tough policy 0 369 | has not satisfied 0 370 | extremist inclinations 0 371 | unilateral 0 372 | the iranians have not done what the pakistan government has done 0 373 | because 0 374 | boycotted 0 375 | without just compensation 0 376 | desperation of the people 0 377 | damage 0 378 | dominating the world 0 379 | sought 1 380 | fearing 0 381 | that concessions are sufficiently concrete 1 382 | put his nation first 1 383 | expresses the concern 0 384 | can contaminate 0 385 | to put it mildly 0 386 | will do its utmost 1 387 | endorsed 1 388 | will surely be on the president's lips 0 389 | with typical understatement 0 390 | lambasted 0 391 | affirmed 1 392 | vitriol 0 393 | in compliance 0 394 | numerous serious abuses 0 395 | asked 1 396 | confidence 1 397 | played the same tactic again 0 398 | are enthusiastic 1 399 | valued ally and friend 1 400 | so - called 0 401 | are regarding 0 402 | running out of meaningful options 0 403 | can not accept 0 404 | is concerned 0 405 | further criticism 0 406 | of all the unconscionable things 0 407 | longest destructive war 0 408 | plight of 0 409 | can no longer stand shoulder to shoulder 0 410 | it would be madness to hold elections now 0 411 | should be looking up smiling 1 412 | if one goes by the us logic , only the us spy plane is permitted to spy at other people's doorsteps 0 413 | begins to bear fruit 1 414 | rigged 0 415 | serious division 0 416 | enjoys no social base 0 417 | very constructive 1 418 | grateful 1 419 | turned thugs 0 420 | was unconstitutional 0 421 | scares away 0 422 | declining to endorse 0 423 | violate human rights 0 424 | accused 0 425 | negatively 0 426 | due regard 1 427 | simplistic 0 428 | will help renew 1 429 | succumbing 0 430 | peace to prevail 1 431 | despite all of this , however 0 432 | proves this beyond the shadow of a doubt 1 433 | is accused 0 434 | intolerant 0 435 | the visit has achieved positive and successful results 1 436 | wantonly infringing 0 437 | assassin 0 438 | continue to obstruct 0 439 | interests 1 440 | was a strong desire among 1 441 | to denounce 0 442 | left no avenue unexplored 1 443 | endorsed 1 444 | would also ensure a durable peace 1 445 | as a vehicle for increasing personal popularity 1 446 | would have been sending a very bad signal 0 447 | continues to refuse 0 448 | pursuit 0 449 | committing themselves to peace 1 450 | accuses 0 451 | as if they were quarries 0 452 | far worse than those prevailing in camp x - ray 0 453 | inhumanely 0 454 | appropriate 1 455 | poor 0 456 | good education system 1 457 | illegal 0 458 | devastating 0 459 | to create the impression 0 460 | exacerbating 0 461 | faltered as a result of israel's intransigence 0 462 | his career was like a rough sea , with highs and lows and never calm 0 463 | generally approved of 1 464 | deviant 0 465 | without trying to maneuver , place obstacles , or set impossible conditions 0 466 | the fire is raging at home 0 467 | lacks credibility and can not withstand any objective scrutiny 0 468 | endorsed 1 469 | assassinate innocent activists and citizens 0 470 | than they deserve 0 471 | refusal to respect its obligations 0 472 | would not 0 473 | for the first time 1 474 | beautiful historic coincidence 1 475 | hardly elastic enough 0 476 | approve 1 477 | hope 1 478 | was so hard on 0 479 | most serious consequences 0 480 | will have trouble wriggling out of the need to explain and justify 0 481 | not completely reliable 0 482 | democracy exhausted all its generosity 0 483 | relatively calm 1 484 | are extremely concerned 0 485 | bringing an end to terrorism and the taliban 1 486 | reject 0 487 | stand firm 1 488 | new political bogey 0 489 | abstractly recommend 1 490 | was worried 0 491 | hardline 0 492 | crash course 0 493 | recognition 1 494 | repeated denunciations 0 495 | hope 1 496 | carnage 0 497 | threats 0 498 | swapping the silver - visored helmet of a space cadet for the green eyeshade of a consummate bean counter 0 499 | to voice his concern 0 500 | significant 0 501 | can bring security and stability to all the parties without exception 1 502 | would consider 1 503 | be reproached 0 504 | or so it claims 0 505 | harboring serious doubts 0 506 | if it becomes more of a nuisance 0 507 | need to establish a just peace 1 508 | immediate support 1 509 | discrediting 0 510 | picking a quarrel 0 511 | bridle the israeli oppressive activity 1 512 | the euro , our currency 1 513 | edifying photograph 1 514 | wrong judgment 0 515 | preservation of global peace and security 1 516 | sharply questioned 0 517 | denied 0 518 | respect 1 519 | unfeasible 0 520 | has promised 1 521 | fierce demonstrations 0 522 | to thwart 1 523 | disrespect of advice 0 524 | war on 0 525 | as the saying goes , when you pull up the turnip , mud comes with it 0 526 | smiling 1 527 | judgment 0 528 | an axis of evil 0 529 | criticizes 0 530 | costly burden 0 531 | detrimental 0 532 | earned eternal notoriety 0 533 | making liberal use of the but construction 0 534 | plotting 0 535 | financial disaster 0 536 | would be regarded 0 537 | parasitic economies 0 538 | wanted 1 539 | more demagogy than arguments 0 540 | israel's superior ability to punish palestinians 0 541 | flirting with 0 542 | was perceived 0 543 | what is europe 0 544 | the best role model 1 545 | inaccurate and fabricated 0 546 | recognition 1 547 | axis of evil rhetoric 0 548 | lost their illegitimate interests 0 549 | not a man who likes to improvise 0 550 | is criticized 0 551 | can trust 1 552 | foresaw 0 553 | heresy 0 554 | him has not been pretty 0 555 | demonstrations and rallies against 0 556 | axis of evil 0 557 | cold war heritage 0 558 | possible regression 0 559 | will guarantee 0 560 | came out in protest 0 561 | progressive 1 562 | peaceful protests 0 563 | difficulty , of course 0 564 | put on a spectacle 0 565 | dogma 0 566 | devastating what remains 0 567 | is stiffening in its attitude toward 0 568 | stuck for 18 months on ground zero 0 569 | put most of the blame 0 570 | very large 0 571 | once again 0 572 | it is almost impossible 0 573 | has criticised 0 574 | axis of evil 0 575 | humiliation of 0 576 | calm 1 577 | are tired 0 578 | enjoy 1 579 | wanted 1 580 | has blamed 0 581 | unprecedented force 0 582 | provocative 0 583 | far preferable 1 584 | repeated warnings 0 585 | neither free nor fair 0 586 | backing 1 587 | widespread debates against 0 588 | using violence , intimidation , special laws and dirty tricks to fix the two - day election 0 589 | continue to obstruct 0 590 | seeking 1 591 | biased attitude 0 592 | long neglected 0 593 | success of western europe after world war ii laid in its creation of the good neighborly field 1 594 | even risking his own future 0 595 | accused 0 596 | denied 0 597 | desperate tableau 0 598 | could n't wait 1 599 | blessed 1 600 | nuclear club 0 601 | most important 1 602 | wants 0 603 | questioning 0 604 | no one is listening 0 605 | complicates any situation 0 606 | who were expelled from their homeland 0 607 | distortion of reality 0 608 | had the advantage for washington of weakening opec 1 609 | the support 1 610 | may god be satisfied with him 1 611 | continue to rise 0 612 | will not be able lay claim to a worthy place in the civilized world 0 613 | has to be maintained at any cost 0 614 | thugs 0 615 | apprehensions 0 616 | want 1 617 | recommendations 1 618 | one of the few enlightened officials 1 619 | democratic achievements 1 620 | tensions between 0 621 | see 0 622 | infuriating 0 623 | first organized protest 0 624 | feared by 0 625 | ignored 0 626 | believe 0 627 | particular concern is raised 0 628 | main problems are in the economic area 0 629 | damages the credibility 0 630 | declining to comply 0 631 | blamed 0 632 | allegedly threatening 0 633 | dismisses 0 634 | the unconditional support 1 635 | not satisfactory 0 636 | negated 0 637 | berating 0 638 | would remake venezuela to benefit the poor 1 639 | `world judge of human rights' 0 640 | were only to be expected 0 641 | cooperation 1 642 | issuing a letter of objection 0 643 | would support wholeheartedly 1 644 | giving him medium approval 1 645 | at a loss 0 646 | dwindling moral values 0 647 | is self - inflicted 0 648 | if it is successful 0 649 | impose 0 650 | eradication 0 651 | feared 0 652 | scupper any hope 0 653 | backing away 0 654 | held 0 655 | will have no universally - acknowledged 0 656 | did not show the slightest sympathy , still less the least regret 0 657 | advanced a mendacious critique 0 658 | disputes between 0 659 | encouraged 1 660 | another setback for the imf 0 661 | wanted 1 662 | clear priority 1 663 | crimes of war 0 664 | destroyed 0 665 | warned 0 666 | decided to back 1 667 | violates the united nations charter 0 668 | interfere 0 669 | have n't already violated 0 670 | defenceless 0 671 | was effective especially 1 672 | is accusing 0 673 | a man blinded by power 0 674 | backing 1 675 | it's really strange 0 676 | his favourite newfie 0 677 | mercurial strongman 0 678 | that four shots would solve the problem 0 679 | would not accept 0 680 | there is no reason for it to be impossible 1 681 | poor response 0 682 | aspirations 1 683 | to the healthy development of sino - us relations 1 684 | resolute commitment 1 685 | democracy 1 686 | the concern 0 687 | only wants 1 688 | needlessly 0 689 | secretly behind every local misfortune 0 690 | positive 1 691 | is disgusted 0 692 | high degree of difficulty 0 693 | improvement 1 694 | spurned 0 695 | has denounced 0 696 | axis of evil theory 0 697 | extensive support 1 698 | worst 0 699 | its selfishness 0 700 | ambition 1 701 | find no grounds for any support 0 702 | achieving better results 1 703 | no tears will be shed in this corner 0 704 | is a telling example of a new - and perhaps risky - approach 0 705 | will become one of the best elements 1 706 | was also quite naive 0 707 | the criticism 0 708 | desire to work 1 709 | for the sake of peace 1 710 | double standard 0 711 | pretended 0 712 | alarm 0 713 | discontent 0 714 | furthermore 0 715 | massacring thousands of innocent people 0 716 | immense gulf between 0 717 | still wants 1 718 | basically sound 1 719 | repeatedly threatened 0 720 | mendacious 0 721 | beyond reproach 1 722 | but 0 723 | neat stuff 1 724 | purposely play up 0 725 | blatant campaign of intimidation 0 726 | disappointment 0 727 | provoked 0 728 | intends to 0 729 | worried 0 730 | real danger 0 731 | supported 1 732 | support 1 733 | were plotting 0 734 | agreed 1 735 | most realistic 1 736 | violating 0 737 | value sharing 1 738 | to its knees 0 739 | to request 1 740 | get out ! . 0 741 | the axis of evil 0 742 | assassins , assassins , assassins 0 743 | is the appropriate one 1 744 | resistance 1 745 | indulging in blood - shed and their lunaticism 0 746 | desperate 0 747 | to describe 0 748 | can ask 1 749 | know 0 750 | invited 1 751 | to express satisfaction 1 752 | harmonious and close 1 753 | support 1 754 | has been inclined toward 1 755 | incited 0 756 | was like giving away the country to its former colonial masters 0 757 | the destruction 0 758 | already volatile 0 759 | overwhelming evidence 0 760 | imperative for harmonious society 1 761 | nor does it seem proper 0 762 | storming palestinian territories 0 763 | is reflected 1 764 | expressed satisfaction 1 765 | intends 1 766 | ruined 0 767 | made public a blacklist 0 768 | those who seek to attack and destroy law and order and legitimate government 0 769 | unlawful 0 770 | freely 1 771 | reassurances 1 772 | agreeable 1 773 | dont want 0 774 | did not agree 0 775 | to deny 0 776 | perception 0 777 | had particularly harsh words for 0 778 | want 1 779 | the brutality with which this closure was implemented was unheard of 0 780 | friendship 1 781 | axis of evil 0 782 | supported 1 783 | slaughter of more than 0 784 | will be asked 1 785 | just , legitimate rights 1 786 | condemned 0 787 | thanked 1 788 | unstable situation 0 789 | merely an interlude 0 790 | depends on 0 791 | has been efficient enough 1 792 | grand 1 793 | any step backward for democracy 0 794 | quite supportive 1 795 | great leader 1 796 | would help 1 797 | formed a close friendship 1 798 | uphold 1 799 | is feared by 0 800 | used to boast 1 801 | regarded 1 802 | would violate international standards prohibiting cruel , inhuman or degrading treatment 0 803 | convince the americans that the bush administration has not only succeeded in removing a regime of religious madmen 0 804 | has put west asia on the brink of another war 0 805 | has also backed 1 806 | attacks 0 807 | banned 0 808 | dishonoring 0 809 | sent congratulations 1 810 | thought 0 811 | may be disconcerted 0 812 | concern 0 813 | will be inviting 1 814 | have to bash 0 815 | plan 1 816 | guaranteed 1 817 | the mistake is to assume 0 818 | insists 1 819 | sticking to the polluting policies 0 820 | can be difficult 0 821 | can prevent 1 822 | extremely dangerous 0 823 | would beg for surrender 0 824 | protesting 0 825 | proclaimed 1 826 | to applaud everything 1 827 | the most ideal way 1 828 | accused 0 829 | what fate awaits argentina's crisis - stricken economy 0 830 | especially those who are dangerous and suicidal 0 831 | has failed for a decade now 0 832 | had likely deteriorated beyond repair 0 833 | attempts to suppress 0 834 | endorse 1 835 | if there were serious problems 0 836 | designed to achieve an outcome - power at all costs 0 837 | criticism of 0 838 | everything 0 839 | so that we would not become terrorists 1 840 | higher transparency 1 841 | warning 0 842 | does not justify all the means 0 843 | the further 0 844 | artificial obstacles 0 845 | would improve 0 846 | has now made himself heard 1 847 | openly invited 1 848 | argued 0 849 | decisions 0 850 | admittedly 1 851 | still prefer 1 852 | had planned 0 853 | sacrifice himself 0 854 | very pleasant for 1 855 | prevented 0 856 | favorable opinions 1 857 | no one we know to have planned such deeds will escape 0 858 | to express openly dissenting political and religious views 0 859 | confidence 1 860 | no business 0 861 | has provoked concern 0 862 | had aligned against 0 863 | under the pretext of fighting terrorism 0 864 | violation of the palestinian people's human rights 0 865 | understanding and approval 1 866 | concerns 0 867 | universal character of the prophets 1 868 | force himself 0 869 | getting mixed up with crime , drugs , and violence 0 870 | bold and fair 1 871 | like an upholder of justice 1 872 | factually inaccurate 0 873 | criticism 0 874 | was shocked 0 875 | saw 1 876 | had refused to accept 0 877 | would make it possible 1 878 | domino effect 0 879 | palestinian hand that is stretched for peace 0 880 | has refused to bow 0 881 | despite the perils 0 882 | will be instrumental 1 883 | concerns 0 884 | those who are really behind it all , those who are behind this business , use the imf 0 885 | accommodate 1 886 | doubts 0 887 | slammed 0 888 | will seek 1 889 | grieving 0 890 | called for 1 891 | isolated and frustrated 0 892 | in the name of self - righteousness 0 893 | intensify and accelerate 0 894 | keeping a wary eye 0 895 | took to the streets 0 896 | gave free rein 0 897 | to rid 1 898 | wanted 1 899 | betrayal 0 900 | the branding 0 901 | collapse 0 902 | i lost any sense of dignity 0 903 | massive intimidatory show of force that was evident in the deployment of security forces 0 904 | unpopular system 0 905 | regarded 0 906 | has posed serious threats 0 907 | to draw up an action campaign aimed 0 908 | illegally 0 909 | reputation was ruined 0 910 | the opposition 0 911 | cracks are appearing 0 912 | could fray the bilateral goodwill 0 913 | was deemed 0 914 | has renounced 0 915 | the cia would organize coups and assassinations to further us interests 0 916 | the definition of a democratic regime is subjected to double standards 0 917 | strike 0 918 | challenged 0 919 | sympathy 0 920 | rubbed their palms at length 1 921 | shook 0 922 | sided with 1 923 | trying to move the goal posts 0 924 | lose popular support among 0 925 | will only invite worse criticism and rejection 0 926 | axis of evil 0 927 | absolutely not permitted 0 928 | found little to object to 0 929 | justify 1 930 | extension of its characteristic policy of hegemony 0 931 | unrealistic 0 932 | warned against 0 933 | crippled 0 934 | well 1 935 | it is ineffective 0 936 | is now promoting 1 937 | called 0 938 | in spite of the good offices 0 939 | genuinely 1 940 | for more than two years 0 941 | tried to encourage 1 942 | loathsome action 0 943 | reckless 0 944 | and killed 0 945 | likening 0 946 | will now agree 1 947 | exalted 1 948 | would like 1 949 | hoped 1 950 | even more dismal 0 951 | gives new life 1 952 | rejected 0 953 | predatory 0 954 | viciously spoke ill 0 955 | in protest against 0 956 | was remarkable 1 957 | -------------------------------------------------------------------------------- /datasets/mpqa/dev.tsv: -------------------------------------------------------------------------------- 1 | sentence label 2 | unfettered support 1 3 | such a mindset will impair 0 4 | has revealed george bush's talents as a war leader 1 5 | provide support to the us , as it did 1 6 | kill 0 7 | play with 0 8 | embark on trampling upon human rights of civilians 0 9 | in such a complex state 0 10 | pursue 0 11 | will continue to support 1 12 | places strains 0 13 | wish 1 14 | subjected 0 15 | benefits 0 16 | highly respected 1 17 | this agreement 1 18 | under the strong influence 0 19 | preach 1 20 | hardest hit 0 21 | would be easier 1 22 | will be irresponsible 0 23 | conspicuous policy of playing games 0 24 | to agree 1 25 | to ask 1 26 | was very strong about this 1 27 | were even more full of praise 1 28 | can not be taken seriously 0 29 | felt gratified and relieved 1 30 | invited 1 31 | languages without nuances 0 32 | scheme 0 33 | incapacity to put crime under control 0 34 | it is pure rhetoric 0 35 | charged 0 36 | attacks 0 37 | anger 0 38 | vowed 1 39 | ridiculous 0 40 | are ready 1 41 | were accused 0 42 | will of 1 43 | unheeded 0 44 | opposes and rejects 0 45 | outcry 0 46 | join forces 1 47 | ca n't be burying its head in the sand 0 48 | insidious 0 49 | would not accept 0 50 | too costly to meet 0 51 | should not 0 52 | does not reflect 0 53 | invited 1 54 | top priority of 1 55 | only benefits corporate america 0 56 | still 0 57 | support 1 58 | not intent on revitalizing its economies 0 59 | angry anti - us protests erupted 0 60 | hopes 1 61 | m feeling much better 1 62 | branded 0 63 | the danger 0 64 | proposal to reunify 1 65 | nothing seemed more normal than to settle in someone else's territory 0 66 | to blame 0 67 | have been growing less visionary 0 68 | will fight on 0 69 | shortcomings 0 70 | deadly 0 71 | would incur its displeasure 0 72 | like a dangerous virus 0 73 | it is a futile illusion because it is a lie 0 74 | are irritated 0 75 | in a way our government has not been 0 76 | neither good nor bad , just incorrigible 0 77 | destroyed 0 78 | respecting 1 79 | to promote 1 80 | witch - hunting 0 81 | biggest terrorist country 0 82 | legitimate dissent 1 83 | even against vehicles transporting pregnant women in labor and against unarmed citizens 0 84 | niceties 1 85 | his ship has been sailing towards a wrong direction for far too long 0 86 | critics 0 87 | once again turn its back 0 88 | growing unrest 0 89 | defects 0 90 | has no love lost 0 91 | violent protests 0 92 | confined to minute cells 0 93 | immediately 1 94 | corrupt politicians 0 95 | a crime against humanity 0 96 | the rights and privileges that they would automatically get under geneva convention 0 97 | perils of delusion 0 98 | would be better off 1 99 | have been concerned 0 100 | uproar 0 101 | pained him 0 102 | terrorism 0 103 | not only prejudice 0 104 | a brutal occupation 0 105 | meddling 0 106 | falls at a bad time 0 107 | ugliest crimes 0 108 | picking on 0 109 | allegations 0 110 | are being well treated 1 111 | the serious crisis 0 112 | will be a monumental tragedy 0 113 | is proud 1 114 | thus 0 115 | oh , my god 0 116 | will not allow 0 117 | the worst crisis 0 118 | turn its back 0 119 | deepened 0 120 | will complain 0 121 | very hard line 0 122 | opposition 0 123 | would not dilute 1 124 | secret 0 125 | opposition 0 126 | illusory 0 127 | not adequately free and fair 0 128 | should follow 1 129 | impossible 0 130 | an aberration that goes against mankind 0 131 | have preference for 1 132 | all around us 0 133 | very , very large 1 134 | lie 0 135 | were ill - received 0 136 | difficult 0 137 | and then assert the laws of war do not apply 0 138 | from upholding religious and cultural values 1 139 | preferred 1 140 | full support 1 141 | criticizing 0 142 | immediate exploitation 0 143 | ignored 0 144 | has unashamedly seen fit 1 145 | support 1 146 | now a reality 1 147 | gaffe 0 148 | virtually nothing 0 149 | no place in israel can be considered safe 0 150 | to invite 1 151 | prevent such a development 1 152 | rigged 0 153 | logical solution 1 154 | bungled its dealings 0 155 | demerits 0 156 | like the suffering 0 157 | unacceptable 0 158 | supports 1 159 | 100 percent 1 160 | does not appear 0 161 | pretext 0 162 | what begins to happen when we hate our friends 0 163 | denies 0 164 | intimidation 0 165 | children skipping school 0 166 | criticism 0 167 | violence and intimidation 0 168 | blatantly obstructed 0 169 | reactions of repulsion 0 170 | are not happy 0 171 | criticized 0 172 | a growing impression that misuari could pose a security threat 0 173 | left behind 1 174 | they have not succeeded , and will never succeed 1 175 | it's shameful 0 176 | beyond the reach of any legal regime 0 177 | is pessimistic 0 178 | has no right 0 179 | are masterminded 0 180 | regret 0 181 | will weaken soon 0 182 | terrible tragedy 0 183 | there is no alternative to it but conflict , isolation , nationalism , and ultimately war 0 184 | freak show 0 185 | insisted 1 186 | axis of evil 0 187 | come under fire 0 188 | threats expressed by 0 189 | the support 1 190 | wrong 0 191 | has resisted 0 192 | the darkest hour is always before the dawn 1 193 | the entire palestinian people 0 194 | denounced 0 195 | very serious threat 0 196 | only 0 197 | making a spectacle of 0 198 | promote meetings 1 199 | to mould the electoral process in his favour 0 200 | not a single day would pass in peace and with no palestinians shedding blood 0 201 | crises 0 202 | hoping 1 203 | to intimidate women and children 0 204 | disputes 0 205 | extreme right 0 206 | most dangerous 0 207 | may even get better 0 208 | promise 1 209 | has pledged 1 210 | one body with two heads 0 211 | feel at ease 1 212 | turning a blind eye 0 213 | has floundered 0 214 | warns 0 215 | most cogent argument 1 216 | lump 0 217 | deals a blow 0 218 | bad treatment 0 219 | concerned 0 220 | have criticized 0 221 | you can hardly speak of a targeting error 0 222 | certain countries resort to 0 223 | called for 1 224 | violating human rights 0 225 | unusual 0 226 | would undermine 0 227 | a terrorist act 0 228 | threat 0 229 | objectives 1 230 | can be abated 1 231 | muscle - flexing 0 232 | pursued 1 233 | brilliant 1 234 | committed 1 235 | committing themselves 1 236 | support 1 237 | the importance of china 1 238 | only one single 0 239 | hoped 1 240 | closed ranks behind 1 241 | peace 1 242 | would not be a bad idea 1 243 | legitimate 1 244 | closer to that of cowboys than to a civilized mentality 0 245 | such fruitful results 1 246 | double crime 0 247 | perfectly at ease 1 248 | mistake 0 249 | repeatedly accused 0 250 | ceased to be a soldier of the fatherland 0 251 | however 0 252 | apocalyptic savagery 0 253 | a border of peace and good neighbourliness 1 254 | call for 1 255 | hurt 0 256 | declined to endorse 0 257 | countries such as iran , iraq and north korea represent an ``axis of evil 0 258 | more serious 0 259 | am confident 1 260 | picking on 0 261 | whether 0 262 | go out into the streets to defend 1 263 | committed one more mistake 0 264 | the agreement 1 265 | charging 0 266 | other aggressive acts against lebanon 0 267 | enjoying 1 268 | decided 0 269 | hard - line 0 270 | feels 0 271 | do n't want 0 272 | the threats launched 0 273 | rejected 0 274 | breaking fundamental concepts 0 275 | increasingly tyrannical 0 276 | undisguised declaration of war and a rhetoric threatening aggression 0 277 | want 1 278 | hyperbole , 0 279 | patronizing 0 280 | arbitrary arrests 0 281 | jeopardy 0 282 | could not be said to adequately comply 0 283 | with good economic management , israel should have little trouble 1 284 | terrorist allies 0 285 | swift criticism from 0 286 | instead 0 287 | steering the economy into disaster 0 288 | grew so unhappy 0 289 | will not resort 0 290 | has refused 0 291 | may not be feasible 0 292 | danger of being shelved altogether 0 293 | unmanageable 0 294 | favouring independence 1 295 | misrule 0 296 | misery 0 297 | steering the nation to prosperity and progress 1 298 | america's biding 0 299 | compounded the problem 0 300 | designed to benefit mugabe 0 301 | supposed to be 0 302 | appraised 1 303 | stand beside right and justice 1 304 | refuses 0 305 | surged 1 306 | denied 0 307 | fake imposter 0 308 | great evil on open display 0 309 | ideal , sunny clime 1 310 | seems to be determined to expand the scope of the anti - terror war 0 311 | a body blow 0 312 | as full citizens in the same sate 1 313 | lecturing 0 314 | suffered 0 315 | harmed 0 316 | criticized 0 317 | scores of 0 318 | ignore the consequences 0 319 | territorial ambition 0 320 | not 0 321 | no one has the right to wage war against the history of this nation 0 322 | wants 1 323 | hatred 0 324 | what occured in the united states on 11 september 0 325 | oneupmanship 0 326 | labeling 0 327 | showed little - disguised irritation 0 328 | concern 0 329 | even 0 330 | the possibility of a democratic , stable and prosperous 1 331 | strongly criticized and condemned 0 332 | will not admit 0 333 | persuade 1 334 | was more than confident 1 335 | had not heeded 0 336 | cost - effective 1 337 | mistake 0 338 | would adhere to a pluralistic vision 1 339 | had asked 1 340 | replete with 1 341 | shameful 0 342 | will only 0 343 | accidental slight 0 344 | nothing whatsoever 0 345 | could no longer tolerate 0 346 | leeway 1 347 | the doubts 0 348 | aggressions against 0 349 | raving 0 350 | are blamed by 0 351 | so many uncertainties 0 352 | destined to collapse 1 353 | confidence crisis 0 354 | are accusing 0 355 | warned 0 356 | such pessimism 0 357 | anxiety 0 358 | cause a rift 0 359 | no politically prudent 0 360 | held out an olive branch 1 361 | continues to demolish 0 362 | does not endorse 0 363 | failed 1 364 | comprehensive destructive policy 0 365 | sounds clumsy 0 366 | fails to meet the standard of being free and fair 0 367 | willfulness 1 368 | tough policy 0 369 | has not satisfied 0 370 | extremist inclinations 0 371 | unilateral 0 372 | the iranians have not done what the pakistan government has done 0 373 | because 0 374 | boycotted 0 375 | without just compensation 0 376 | desperation of the people 0 377 | damage 0 378 | dominating the world 0 379 | sought 1 380 | fearing 0 381 | that concessions are sufficiently concrete 1 382 | put his nation first 1 383 | expresses the concern 0 384 | can contaminate 0 385 | to put it mildly 0 386 | will do its utmost 1 387 | endorsed 1 388 | will surely be on the president's lips 0 389 | with typical understatement 0 390 | lambasted 0 391 | affirmed 1 392 | vitriol 0 393 | in compliance 0 394 | numerous serious abuses 0 395 | asked 1 396 | confidence 1 397 | played the same tactic again 0 398 | are enthusiastic 1 399 | valued ally and friend 1 400 | so - called 0 401 | are regarding 0 402 | running out of meaningful options 0 403 | can not accept 0 404 | is concerned 0 405 | further criticism 0 406 | of all the unconscionable things 0 407 | longest destructive war 0 408 | plight of 0 409 | can no longer stand shoulder to shoulder 0 410 | it would be madness to hold elections now 0 411 | should be looking up smiling 1 412 | if one goes by the us logic , only the us spy plane is permitted to spy at other people's doorsteps 0 413 | begins to bear fruit 1 414 | rigged 0 415 | serious division 0 416 | enjoys no social base 0 417 | very constructive 1 418 | grateful 1 419 | turned thugs 0 420 | was unconstitutional 0 421 | scares away 0 422 | declining to endorse 0 423 | violate human rights 0 424 | accused 0 425 | negatively 0 426 | due regard 1 427 | simplistic 0 428 | will help renew 1 429 | succumbing 0 430 | peace to prevail 1 431 | despite all of this , however 0 432 | proves this beyond the shadow of a doubt 1 433 | is accused 0 434 | intolerant 0 435 | the visit has achieved positive and successful results 1 436 | wantonly infringing 0 437 | assassin 0 438 | continue to obstruct 0 439 | interests 1 440 | was a strong desire among 1 441 | to denounce 0 442 | left no avenue unexplored 1 443 | endorsed 1 444 | would also ensure a durable peace 1 445 | as a vehicle for increasing personal popularity 1 446 | would have been sending a very bad signal 0 447 | continues to refuse 0 448 | pursuit 0 449 | committing themselves to peace 1 450 | accuses 0 451 | as if they were quarries 0 452 | far worse than those prevailing in camp x - ray 0 453 | inhumanely 0 454 | appropriate 1 455 | poor 0 456 | good education system 1 457 | illegal 0 458 | devastating 0 459 | to create the impression 0 460 | exacerbating 0 461 | faltered as a result of israel's intransigence 0 462 | his career was like a rough sea , with highs and lows and never calm 0 463 | generally approved of 1 464 | deviant 0 465 | without trying to maneuver , place obstacles , or set impossible conditions 0 466 | the fire is raging at home 0 467 | lacks credibility and can not withstand any objective scrutiny 0 468 | endorsed 1 469 | assassinate innocent activists and citizens 0 470 | than they deserve 0 471 | refusal to respect its obligations 0 472 | would not 0 473 | for the first time 1 474 | beautiful historic coincidence 1 475 | hardly elastic enough 0 476 | approve 1 477 | hope 1 478 | was so hard on 0 479 | most serious consequences 0 480 | will have trouble wriggling out of the need to explain and justify 0 481 | not completely reliable 0 482 | democracy exhausted all its generosity 0 483 | relatively calm 1 484 | are extremely concerned 0 485 | bringing an end to terrorism and the taliban 1 486 | reject 0 487 | stand firm 1 488 | new political bogey 0 489 | abstractly recommend 1 490 | was worried 0 491 | hardline 0 492 | crash course 0 493 | recognition 1 494 | repeated denunciations 0 495 | hope 1 496 | carnage 0 497 | threats 0 498 | swapping the silver - visored helmet of a space cadet for the green eyeshade of a consummate bean counter 0 499 | to voice his concern 0 500 | significant 0 501 | can bring security and stability to all the parties without exception 1 502 | would consider 1 503 | be reproached 0 504 | or so it claims 0 505 | harboring serious doubts 0 506 | if it becomes more of a nuisance 0 507 | need to establish a just peace 1 508 | immediate support 1 509 | discrediting 0 510 | picking a quarrel 0 511 | bridle the israeli oppressive activity 1 512 | the euro , our currency 1 513 | edifying photograph 1 514 | wrong judgment 0 515 | preservation of global peace and security 1 516 | sharply questioned 0 517 | denied 0 518 | respect 1 519 | unfeasible 0 520 | has promised 1 521 | fierce demonstrations 0 522 | to thwart 1 523 | disrespect of advice 0 524 | war on 0 525 | as the saying goes , when you pull up the turnip , mud comes with it 0 526 | smiling 1 527 | judgment 0 528 | an axis of evil 0 529 | criticizes 0 530 | costly burden 0 531 | detrimental 0 532 | earned eternal notoriety 0 533 | making liberal use of the but construction 0 534 | plotting 0 535 | financial disaster 0 536 | would be regarded 0 537 | parasitic economies 0 538 | wanted 1 539 | more demagogy than arguments 0 540 | israel's superior ability to punish palestinians 0 541 | flirting with 0 542 | was perceived 0 543 | what is europe 0 544 | the best role model 1 545 | inaccurate and fabricated 0 546 | recognition 1 547 | axis of evil rhetoric 0 548 | lost their illegitimate interests 0 549 | not a man who likes to improvise 0 550 | is criticized 0 551 | can trust 1 552 | foresaw 0 553 | heresy 0 554 | him has not been pretty 0 555 | demonstrations and rallies against 0 556 | axis of evil 0 557 | cold war heritage 0 558 | possible regression 0 559 | will guarantee 0 560 | came out in protest 0 561 | progressive 1 562 | peaceful protests 0 563 | difficulty , of course 0 564 | put on a spectacle 0 565 | dogma 0 566 | devastating what remains 0 567 | is stiffening in its attitude toward 0 568 | stuck for 18 months on ground zero 0 569 | put most of the blame 0 570 | very large 0 571 | once again 0 572 | it is almost impossible 0 573 | has criticised 0 574 | axis of evil 0 575 | humiliation of 0 576 | calm 1 577 | are tired 0 578 | enjoy 1 579 | wanted 1 580 | has blamed 0 581 | unprecedented force 0 582 | provocative 0 583 | far preferable 1 584 | repeated warnings 0 585 | neither free nor fair 0 586 | backing 1 587 | widespread debates against 0 588 | using violence , intimidation , special laws and dirty tricks to fix the two - day election 0 589 | continue to obstruct 0 590 | seeking 1 591 | biased attitude 0 592 | long neglected 0 593 | success of western europe after world war ii laid in its creation of the good neighborly field 1 594 | even risking his own future 0 595 | accused 0 596 | denied 0 597 | desperate tableau 0 598 | could n't wait 1 599 | blessed 1 600 | nuclear club 0 601 | most important 1 602 | wants 0 603 | questioning 0 604 | no one is listening 0 605 | complicates any situation 0 606 | who were expelled from their homeland 0 607 | distortion of reality 0 608 | had the advantage for washington of weakening opec 1 609 | the support 1 610 | may god be satisfied with him 1 611 | continue to rise 0 612 | will not be able lay claim to a worthy place in the civilized world 0 613 | has to be maintained at any cost 0 614 | thugs 0 615 | apprehensions 0 616 | want 1 617 | recommendations 1 618 | one of the few enlightened officials 1 619 | democratic achievements 1 620 | tensions between 0 621 | see 0 622 | infuriating 0 623 | first organized protest 0 624 | feared by 0 625 | ignored 0 626 | believe 0 627 | particular concern is raised 0 628 | main problems are in the economic area 0 629 | damages the credibility 0 630 | declining to comply 0 631 | blamed 0 632 | allegedly threatening 0 633 | dismisses 0 634 | the unconditional support 1 635 | not satisfactory 0 636 | negated 0 637 | berating 0 638 | would remake venezuela to benefit the poor 1 639 | `world judge of human rights' 0 640 | were only to be expected 0 641 | cooperation 1 642 | issuing a letter of objection 0 643 | would support wholeheartedly 1 644 | giving him medium approval 1 645 | at a loss 0 646 | dwindling moral values 0 647 | is self - inflicted 0 648 | if it is successful 0 649 | impose 0 650 | eradication 0 651 | feared 0 652 | scupper any hope 0 653 | backing away 0 654 | held 0 655 | will have no universally - acknowledged 0 656 | did not show the slightest sympathy , still less the least regret 0 657 | advanced a mendacious critique 0 658 | disputes between 0 659 | encouraged 1 660 | another setback for the imf 0 661 | wanted 1 662 | clear priority 1 663 | crimes of war 0 664 | destroyed 0 665 | warned 0 666 | decided to back 1 667 | violates the united nations charter 0 668 | interfere 0 669 | have n't already violated 0 670 | defenceless 0 671 | was effective especially 1 672 | is accusing 0 673 | a man blinded by power 0 674 | backing 1 675 | it's really strange 0 676 | his favourite newfie 0 677 | mercurial strongman 0 678 | that four shots would solve the problem 0 679 | would not accept 0 680 | there is no reason for it to be impossible 1 681 | poor response 0 682 | aspirations 1 683 | to the healthy development of sino - us relations 1 684 | resolute commitment 1 685 | democracy 1 686 | the concern 0 687 | only wants 1 688 | needlessly 0 689 | secretly behind every local misfortune 0 690 | positive 1 691 | is disgusted 0 692 | high degree of difficulty 0 693 | improvement 1 694 | spurned 0 695 | has denounced 0 696 | axis of evil theory 0 697 | extensive support 1 698 | worst 0 699 | its selfishness 0 700 | ambition 1 701 | find no grounds for any support 0 702 | achieving better results 1 703 | no tears will be shed in this corner 0 704 | is a telling example of a new - and perhaps risky - approach 0 705 | will become one of the best elements 1 706 | was also quite naive 0 707 | the criticism 0 708 | desire to work 1 709 | for the sake of peace 1 710 | double standard 0 711 | pretended 0 712 | alarm 0 713 | discontent 0 714 | furthermore 0 715 | massacring thousands of innocent people 0 716 | immense gulf between 0 717 | still wants 1 718 | basically sound 1 719 | repeatedly threatened 0 720 | mendacious 0 721 | beyond reproach 1 722 | but 0 723 | neat stuff 1 724 | purposely play up 0 725 | blatant campaign of intimidation 0 726 | disappointment 0 727 | provoked 0 728 | intends to 0 729 | worried 0 730 | real danger 0 731 | supported 1 732 | support 1 733 | were plotting 0 734 | agreed 1 735 | most realistic 1 736 | violating 0 737 | value sharing 1 738 | to its knees 0 739 | to request 1 740 | get out ! . 0 741 | the axis of evil 0 742 | assassins , assassins , assassins 0 743 | is the appropriate one 1 744 | resistance 1 745 | indulging in blood - shed and their lunaticism 0 746 | desperate 0 747 | to describe 0 748 | can ask 1 749 | know 0 750 | invited 1 751 | to express satisfaction 1 752 | harmonious and close 1 753 | support 1 754 | has been inclined toward 1 755 | incited 0 756 | was like giving away the country to its former colonial masters 0 757 | the destruction 0 758 | already volatile 0 759 | overwhelming evidence 0 760 | imperative for harmonious society 1 761 | nor does it seem proper 0 762 | storming palestinian territories 0 763 | is reflected 1 764 | expressed satisfaction 1 765 | intends 1 766 | ruined 0 767 | made public a blacklist 0 768 | those who seek to attack and destroy law and order and legitimate government 0 769 | unlawful 0 770 | freely 1 771 | reassurances 1 772 | agreeable 1 773 | dont want 0 774 | did not agree 0 775 | to deny 0 776 | perception 0 777 | had particularly harsh words for 0 778 | want 1 779 | the brutality with which this closure was implemented was unheard of 0 780 | friendship 1 781 | axis of evil 0 782 | supported 1 783 | slaughter of more than 0 784 | will be asked 1 785 | just , legitimate rights 1 786 | condemned 0 787 | thanked 1 788 | unstable situation 0 789 | merely an interlude 0 790 | depends on 0 791 | has been efficient enough 1 792 | grand 1 793 | any step backward for democracy 0 794 | quite supportive 1 795 | great leader 1 796 | would help 1 797 | formed a close friendship 1 798 | uphold 1 799 | is feared by 0 800 | used to boast 1 801 | regarded 1 802 | would violate international standards prohibiting cruel , inhuman or degrading treatment 0 803 | convince the americans that the bush administration has not only succeeded in removing a regime of religious madmen 0 804 | has put west asia on the brink of another war 0 805 | has also backed 1 806 | attacks 0 807 | banned 0 808 | dishonoring 0 809 | sent congratulations 1 810 | thought 0 811 | may be disconcerted 0 812 | concern 0 813 | will be inviting 1 814 | have to bash 0 815 | plan 1 816 | guaranteed 1 817 | the mistake is to assume 0 818 | insists 1 819 | sticking to the polluting policies 0 820 | can be difficult 0 821 | can prevent 1 822 | extremely dangerous 0 823 | would beg for surrender 0 824 | protesting 0 825 | proclaimed 1 826 | to applaud everything 1 827 | the most ideal way 1 828 | accused 0 829 | what fate awaits argentina's crisis - stricken economy 0 830 | especially those who are dangerous and suicidal 0 831 | has failed for a decade now 0 832 | had likely deteriorated beyond repair 0 833 | attempts to suppress 0 834 | endorse 1 835 | if there were serious problems 0 836 | designed to achieve an outcome - power at all costs 0 837 | criticism of 0 838 | everything 0 839 | so that we would not become terrorists 1 840 | higher transparency 1 841 | warning 0 842 | does not justify all the means 0 843 | the further 0 844 | artificial obstacles 0 845 | would improve 0 846 | has now made himself heard 1 847 | openly invited 1 848 | argued 0 849 | decisions 0 850 | admittedly 1 851 | still prefer 1 852 | had planned 0 853 | sacrifice himself 0 854 | very pleasant for 1 855 | prevented 0 856 | favorable opinions 1 857 | no one we know to have planned such deeds will escape 0 858 | to express openly dissenting political and religious views 0 859 | confidence 1 860 | no business 0 861 | has provoked concern 0 862 | had aligned against 0 863 | under the pretext of fighting terrorism 0 864 | violation of the palestinian people's human rights 0 865 | understanding and approval 1 866 | concerns 0 867 | universal character of the prophets 1 868 | force himself 0 869 | getting mixed up with crime , drugs , and violence 0 870 | bold and fair 1 871 | like an upholder of justice 1 872 | factually inaccurate 0 873 | criticism 0 874 | was shocked 0 875 | saw 1 876 | had refused to accept 0 877 | would make it possible 1 878 | domino effect 0 879 | palestinian hand that is stretched for peace 0 880 | has refused to bow 0 881 | despite the perils 0 882 | will be instrumental 1 883 | concerns 0 884 | those who are really behind it all , those who are behind this business , use the imf 0 885 | accommodate 1 886 | doubts 0 887 | slammed 0 888 | will seek 1 889 | grieving 0 890 | called for 1 891 | isolated and frustrated 0 892 | in the name of self - righteousness 0 893 | intensify and accelerate 0 894 | keeping a wary eye 0 895 | took to the streets 0 896 | gave free rein 0 897 | to rid 1 898 | wanted 1 899 | betrayal 0 900 | the branding 0 901 | collapse 0 902 | i lost any sense of dignity 0 903 | massive intimidatory show of force that was evident in the deployment of security forces 0 904 | unpopular system 0 905 | regarded 0 906 | has posed serious threats 0 907 | to draw up an action campaign aimed 0 908 | illegally 0 909 | reputation was ruined 0 910 | the opposition 0 911 | cracks are appearing 0 912 | could fray the bilateral goodwill 0 913 | was deemed 0 914 | has renounced 0 915 | the cia would organize coups and assassinations to further us interests 0 916 | the definition of a democratic regime is subjected to double standards 0 917 | strike 0 918 | challenged 0 919 | sympathy 0 920 | rubbed their palms at length 1 921 | shook 0 922 | sided with 1 923 | trying to move the goal posts 0 924 | lose popular support among 0 925 | will only invite worse criticism and rejection 0 926 | axis of evil 0 927 | absolutely not permitted 0 928 | found little to object to 0 929 | justify 1 930 | extension of its characteristic policy of hegemony 0 931 | unrealistic 0 932 | warned against 0 933 | crippled 0 934 | well 1 935 | it is ineffective 0 936 | is now promoting 1 937 | called 0 938 | in spite of the good offices 0 939 | genuinely 1 940 | for more than two years 0 941 | tried to encourage 1 942 | loathsome action 0 943 | reckless 0 944 | and killed 0 945 | likening 0 946 | will now agree 1 947 | exalted 1 948 | would like 1 949 | hoped 1 950 | even more dismal 0 951 | gives new life 1 952 | rejected 0 953 | predatory 0 954 | viciously spoke ill 0 955 | in protest against 0 956 | was remarkable 1 957 | -------------------------------------------------------------------------------- /aug_data/mpqa/test.tsv: -------------------------------------------------------------------------------- 1 | sentence label 2 | to criticize 0 3 | drive him away 0 4 | did not want to give up 0 5 | humane 1 6 | criticism 0 7 | does not serve the interest of peace 0 8 | support 1 9 | most widespread protest 0 10 | the labyrinth 0 11 | not in the self - interests 0 12 | his hands would be empty 0 13 | the world did not always realize how good they were . 0 14 | according to washington's claim 0 15 | strongly denounced 0 16 | had expressed doubt 0 17 | the demonstrations 0 18 | interests 1 19 | may still hope 1 20 | it makes no sense 0 21 | were regrettable 0 22 | zimbabwe is for zimbabweans 1 23 | will not be able to take the steps absolutely necessary 0 24 | which is getting worse day after day 0 25 | is not shaping up as a romance 0 26 | bush has made a 'big mistake 0 27 | needs to cooperate 1 28 | coup farce 0 29 | lost reason 0 30 | no small number of critical shots 0 31 | charged 0 32 | joyous 1 33 | thousands of its supporters denied the vote 0 34 | debacle 0 35 | did not adequately allow the free and fair expression of will 0 36 | attempted 0 37 | charges brought 0 38 | went along 0 39 | torture and inhuman treatment 0 40 | is due to its relations with the global economy 0 41 | supporting 1 42 | gradually promoted 1 43 | certainly not raised us image 0 44 | staged 0 45 | an illusion 0 46 | solid and often eloquent 1 47 | so - called 0 48 | single - handedly destroyed 0 49 | friendship and cooperation 1 50 | to convince 1 51 | is ignoring its commitments in the current situation 0 52 | suspicions 0 53 | victory of democracy 1 54 | lumping 0 55 | constitute a breeding ground for bilateral tensions 0 56 | retaliate 0 57 | entitled at all times to decent treatment 1 58 | full support 1 59 | unfortunately 0 60 | fraud 0 61 | are seeking 1 62 | dispute 0 63 | appreciating 1 64 | the principles of kyoto were fine 1 65 | a casualty of his own government's attempt to maintain favor in washington 0 66 | complaints 0 67 | real 1 68 | had been legitimate 1 69 | stolen 0 70 | humane 1 71 | proof that international law protects human beings 0 72 | would not hurt taiwan's interests 1 73 | want 1 74 | but not democratic legitimacy 0 75 | are lining up to condemn 0 76 | ideological fight against capitalism 0 77 | see 0 78 | only one battle 0 79 | is considered 1 80 | suffering 0 81 | hopes 1 82 | being power - hungry 0 83 | yardstick for progress 1 84 | would be a great tragedy 0 85 | it is unacceptable for canada to allow any ambiguity in policy 0 86 | modern , civilized instruments of coercion 0 87 | to deny 0 88 | were supported by 1 89 | grimly replied 0 90 | terrorists 0 91 | not free and fair 0 92 | unlawful combatants 0 93 | abundantly clear 0 94 | whoever says that these are small steps is incapable of recognizing the european union's experience 0 95 | threatening 0 96 | can not wholly agree with 0 97 | needs to discipline 0 98 | had himself to blame 0 99 | supported 1 100 | warned 0 101 | new labyrinths and tunnels 0 102 | biggest electoral fraud 0 103 | turned down 0 104 | loyal 1 105 | criticism 0 106 | refuses to consider 0 107 | have criticised 0 108 | it did not apply even to black people held in jail in the southern united states until a few years ago 0 109 | are strengthening his country's appetite for a public campaign for change 1 110 | rejection of 0 111 | has kept a close relationship 1 112 | irresponsible saber rattling 0 113 | absurdly 0 114 | increasingly angry opposition 0 115 | legitimate 1 116 | opposes and condemns 0 117 | to trample upon 0 118 | little one can do about it 0 119 | would not find it at all strange 0 120 | wants 1 121 | all manner of 1 122 | only just begun 0 123 | wariness 0 124 | the criminals 0 125 | will want to know 1 126 | deleterious 0 127 | biggest global problem we are facing today 0 128 | it is true 1 129 | bellowing 0 130 | burden sharing 0 131 | inevitably succumb 0 132 | fulfill its pledges 1 133 | be considered 1 134 | weak or even nonexistant 0 135 | formation of good neighborly field 1 136 | superb ability 1 137 | look forward 1 138 | his dynamism is an asset 1 139 | openly took issue 0 140 | eventually offered talks 1 141 | had reservations 0 142 | deems 0 143 | had argued 0 144 | were at odds 0 145 | let's support them 1 146 | thrust into its worst crisis 0 147 | can not adequately explain 0 148 | humanely 1 149 | issued a statement denouncing 0 150 | gain the upper hand 0 151 | illegitimate 0 152 | it is the united states that is threatening 0 153 | hardly convincing 0 154 | were openly supporting 1 155 | never an encouraging word if 0 156 | immediate 0 157 | wants 1 158 | will not change 0 159 | his goal 1 160 | unwarranted 0 161 | concern 0 162 | backing out 0 163 | personally offended 0 164 | enjoying hygienic and food conditions that were very superior to what they could have known in afghanistan 1 165 | this is bullshit 0 166 | pot calling the kettle black 0 167 | repeatedly made outrageous 0 168 | sensational 0 169 | indecisive 0 170 | humiliation 0 171 | have the final say 0 172 | have a conviction 0 173 | security 1 174 | work better than anything i've seen so far 1 175 | regards 0 176 | concerning their life and death 0 177 | rejects 0 178 | concerns 0 179 | took such a concerted stance critical 0 180 | most atrocious human rights violations 0 181 | warned 0 182 | should take urgent measures 1 183 | the rare opportunity 1 184 | dictatorial tactics 0 185 | concern 0 186 | wants to speak the language of violence 0 187 | says with a bright smile 1 188 | want to make sure 1 189 | simultaneously raises apprehensions and resentment 0 190 | long dominated 0 191 | is afraid 0 192 | a rising tide of international criticism 0 193 | axis of evil remark 0 194 | closed ranks 0 195 | can at best 1 196 | below minimum standards 0 197 | deviate 0 198 | inflicted nuclear disaster on humankind 0 199 | would possibly forfeit all recent efforts 0 200 | congratulatory message 1 201 | nothing practical 0 202 | significant results 1 203 | would like to tell 1 204 | wants 1 205 | assassinating citizens it alleges have links to security activities 0 206 | have severed ties 0 207 | deteriorated 0 208 | imperialism is the source of war 0 209 | victor , 0 210 | can never dampen or bend 1 211 | questioned 0 212 | calling for 1 213 | is being sharply criticized 0 214 | can create trouble and instability 0 215 | hopes 1 216 | opposed 0 217 | boycott 0 218 | repeated its allegations 0 219 | in doubt 0 220 | illusion 0 221 | good luck 1 222 | violated human rights 0 223 | has escalated wildly 0 224 | there is no scope for such madness 0 225 | favored 1 226 | feel 0 227 | to dismiss 0 228 | giving him free rein to arrest at will 0 229 | all the worse 0 230 | have denied 0 231 | under siege 0 232 | sabre - rattling 0 233 | was charged with treason 0 234 | potential carrot 1 235 | the ire of 0 236 | constant aggressive attitude 0 237 | gives the saudi government its due 1 238 | pent - up opposition 0 239 | meddling 0 240 | was an affront to the muslim people 0 241 | understandable 1 242 | will become much more difficult 0 243 | illegal combatants 0 244 | impatience 0 245 | is optimistic 1 246 | without 0 247 | optimistic and hopeful 1 248 | was angered 0 249 | has called into question 0 250 | reiterated 1 251 | that's what happened in argentina 0 252 | totally biased 0 253 | sharply criticised 0 254 | will prove respect for human dignity 1 255 | irresponsible 0 256 | hoped 1 257 | was wrong 0 258 | are committed 1 259 | to promote 1 260 | engulfed by violence and crime 0 261 | demonstrated 0 262 | fundamental importance 0 263 | arbitrary 0 264 | expressed support 1 265 | has demoralized and corrupted 0 266 | war maniacs 0 267 | unfortunately for the nation 0 268 | hotly contested 0 269 | to dictate 0 270 | chaos bordering on a binge of wanton killing 0 271 | urged 1 272 | false premises regarding anticipated government revenues 0 273 | rigged 0 274 | have hotly denied 0 275 | not selective 1 276 | railing 0 277 | go into the streets en masse and defend and support 1 278 | want 1 279 | fully grasp 1 280 | gave their thumbs up 1 281 | countries of special concern 0 282 | marched against 0 283 | total confusion of historical data 0 284 | never 0 285 | beseeched 1 286 | puts every jew and every israeli to shame 0 287 | epic - sized default 0 288 | agitated 0 289 | collusive nexus 0 290 | policy of isolation and stifling 0 291 | preying 0 292 | nor 0 293 | chooses to disregard 0 294 | agreement 0 295 | but those were the us needs 0 296 | powerful compulsions it is apparently labouring under 0 297 | too small 0 298 | frantically making efforts 1 299 | not tolerable 0 300 | would still soar 0 301 | making fun of 0 302 | rejected 0 303 | position and particular stance opposed to 0 304 | planning called for 1 305 | nevertheless 1 306 | legitimate 1 307 | condemned 0 308 | thought 0 309 | claims 0 310 | lack of understanding and tolerance 0 311 | are only dictated to him 0 312 | support 1 313 | because of its repeated violation 0 314 | there is a breakdown in law and order 0 315 | they discredit the usa 0 316 | was not enough 0 317 | made a harsh appraisal 0 318 | favorably describing 1 319 | the plotters 0 320 | invisible 0 321 | deadlock 0 322 | very abrupt 0 323 | disappoint 0 324 | very despicable act 0 325 | does not entitle 0 326 | has accused 0 327 | specifically named 0 328 | did not intend to ratify 0 329 | social explosion 0 330 | mounting rivalry 0 331 | violator of human rights 0 332 | at present 0 333 | endorses 1 334 | riots and widespread strikes 0 335 | points fingers 0 336 | turned him down 0 337 | if a rocket launch fails 0 338 | claiming 0 339 | more attentive 1 340 | accuse 0 341 | the united states does not want to learn the lesson 0 342 | sees 0 343 | positively assessed 1 344 | old foes 0 345 | against 0 346 | mere 0 347 | putting at least part of the blame 0 348 | having opted for 1 349 | continuously denounced 0 350 | backing 1 351 | oppose 0 352 | never seen anything as beautiful as 1 353 | refuse 0 354 | completely isolated 0 355 | a strong turnout favored him 1 356 | extremely dangerous 0 357 | loyal attitude of 1 358 | would like 1 359 | elevates its image in international society 1 360 | reserves 0 361 | sound 1 362 | criticize 0 363 | consider 1 364 | was hoping 1 365 | made too many enemies 0 366 | seeking 1 367 | hope 1 368 | claimed 0 369 | that extreme 0 370 | axis of evil 0 371 | alarming 0 372 | wrote 0 373 | disturbingly long - term 0 374 | well disposed 1 375 | conflict of interests 0 376 | an advocate of nations' right to choose their leaders 1 377 | in cages 0 378 | warned 0 379 | more democratic 1 380 | crimes 0 381 | never again should 0 382 | concluding 0 383 | guilty of using excessive force 0 384 | intends 1 385 | press ahead 1 386 | widespread concerns 0 387 | seem logical . 0 388 | continued growth 1 389 | support 1 390 | removed from the reality 0 391 | considered 0 392 | the consensus 1 393 | as friends or not evil 1 394 | has requested 1 395 | congratulate 1 396 | bellyaching 0 397 | would have to be drastically revised and curtailed 0 398 | new - found 0 399 | is also paying the price 0 400 | humanly acceptable 1 401 | comrades 0 402 | does not want anything to do with 0 403 | blamed 0 404 | would be unjust and counterproductive 0 405 | will not be able to change until the end of mankind 0 406 | treating people this way 0 407 | faces of evil 0 408 | like 1 409 | continues to call for 1 410 | no protection 0 411 | must condemn 0 412 | imposed by military force 0 413 | warm 1 414 | anti - iran reports 0 415 | merely 0 416 | will come to realize 1 417 | condemnation 0 418 | would only have sharpened 0 419 | unfortunately 0 420 | active 1 421 | interest 1 422 | defended 1 423 | the crime they have committed together 0 424 | like to speak for 1 425 | vengeance 0 426 | desperate 0 427 | as an opportunity 0 428 | has asked 1 429 | should not be condoned 0 430 | adopted 1 431 | was baffled 0 432 | venezuela's strong slap on the face of the united states 1 433 | who were deprived of the minimum political and human rights 0 434 | was emphatic 1 435 | is merely to entertain 0 436 | will 1 437 | yelled 0 438 | have been cool 0 439 | were treated with suspicion 0 440 | no spirit of independence 0 441 | continued violence and aggression 0 442 | thanks to gains 1 443 | sponsor terrorism 0 444 | disagreed 0 445 | disagreement with 0 446 | suffering from difficulties 0 447 | not implemented in practice 0 448 | ugly travesty 0 449 | is also interested 1 450 | has always resolutely opposed 0 451 | add fuel to the raging fire 0 452 | having supported 1 453 | denunciation 0 454 | axis of evil 0 455 | not honest about championing 0 456 | war crimes or bloody massacres 0 457 | no reason at all 0 458 | as an act of submission 0 459 | wanted 1 460 | noted critically 0 461 | refused 0 462 | do not even have 0 463 | greatly miscalculating 0 464 | imperialists 0 465 | forced 0 466 | corruption 0 467 | we should remain calm 1 468 | loudly trumpeted 0 469 | accused 0 470 | neo - colonialism 0 471 | an alien seedling 0 472 | storm the castle 0 473 | shouting insults 0 474 | significantly weakened 0 475 | desperate hatred 0 476 | true threat 0 477 | axis of evil 0 478 | wants 1 479 | always likes to 1 480 | is now condemned 0 481 | persistent alignment 1 482 | has lost control 0 483 | expression of mankind's progress 1 484 | as close as 1 485 | is promising 1 486 | nothing but a mere public relations tool 0 487 | rogue 0 488 | is based on mutual respect and understanding 1 489 | pledged to fight 0 490 | hand it the bill 0 491 | play its trump card 0 492 | not profitable but costly 0 493 | hurt 0 494 | stands in brutal contrast 0 495 | hopes 1 496 | more prudent 0 497 | have made clear their stand against 0 498 | how frequently 0 499 | dishonoring 0 500 | impose 0 501 | calling for respect 1 502 | do n't like 0 503 | outlined 0 504 | even in some cases pure falsehoods 0 505 | against palestinian terrorism everywhere it exists 0 506 | considered 0 507 | agenda against 0 508 | are seeking 1 509 | decisive influence 1 510 | accused 0 511 | war crimes 0 512 | gratitude 1 513 | support 1 514 | wishes 1 515 | so that 1 516 | threatened 0 517 | changes its stand 0 518 | have each accused the other 0 519 | standing in the way 0 520 | tortured 0 521 | warned 0 522 | has been widely criticized 0 523 | crush 0 524 | warning 0 525 | revolted 0 526 | strong protest and denunciation 0 527 | but there is little i can do 0 528 | higher than any time in the past several hundred thousand years 0 529 | stubborn enough 0 530 | has no consensus 0 531 | denouncing 0 532 | very easily lost 0 533 | relegating the defense of democratic principles to a subordinate place 0 534 | it will not be free and fair 0 535 | ever firmer 1 536 | had urged 1 537 | had been marred 0 538 | accepted 1 539 | calculated 0 540 | rigging 0 541 | phenomenon 0 542 | grave concern 0 543 | was equally critical 0 544 | strong initiative 1 545 | a kind of collective punishment 0 546 | supports 1 547 | time and time again 0 548 | return to confrontation 0 549 | endorsed 1 550 | obvious disagreement 0 551 | held 0 552 | the latest intimidation intended to steal the election 0 553 | when will a coup not be called a coup 0 554 | the need for a major revision 0 555 | recalling 1 556 | urge 1 557 | holds 0 558 | acts of violence against innocent civilians 0 559 | disparaging 0 560 | threat to its security 0 561 | meddling 0 562 | impossible to achieve coexistence 0 563 | in death rather than staying alive under occupation 0 564 | agrees 1 565 | oozed confidence 1 566 | did n't want 0 567 | should criticize 0 568 | divide and rule 0 569 | desire 1 570 | early 1 571 | severe consequences 0 572 | axis of evil remark 0 573 | only got worse 0 574 | is perforce a bit scary 0 575 | disapproved 0 576 | threatening 0 577 | suggestions 0 578 | the real ringleader of evil 0 579 | substantially free and fair 1 580 | dictation from the us 0 581 | represents the pessimistic outlook 0 582 | have filed a complaint 0 583 | will be tarnished 0 584 | criticizing 0 585 | safeguarding peace and security 1 586 | inhumane 0 587 | want 1 588 | adulteration or deceitful use 0 589 | cow 0 590 | israeli right - wing policies 0 591 | truly inconceivable 0 592 | nutritious 1 593 | has tended to undermine the growth of a human rights culture 0 594 | argentina will be more competitive 1 595 | objections 0 596 | taken responsibility for 1 597 | renegade province 0 598 | warned 0 599 | very serious 0 600 | aimed at crushing 0 601 | we will go to war 0 602 | would therefore not be an impartial observer 0 603 | of all places 0 604 | know 1 605 | feels constrained to reject 0 606 | better 1 607 | dragging its feet 0 608 | filing the complaint 0 609 | you face a catastrophe 0 610 | dangerous flashpoint 0 611 | violates international agreements 0 612 | were supporting 1 613 | wants 1 614 | have been extremely critical 0 615 | guaranteed 1 616 | mild and routine criticism 0 617 | will be a disaster for bush 0 618 | is already under fire 0 619 | more credible 1 620 | oppressed 0 621 | progressive 1 622 | giving up with india 0 623 | even committed such an atrocious act 0 624 | long urged 1 625 | him in so ferocious an image wearing a t - shirt and jogging pants bearing some english letters , baring his teeth and with a glare of hatred in his eyes 0 626 | hailed 1 627 | assertively holds so dear to his heart 0 628 | perception 0 629 | humanely 1 630 | does not support 0 631 | meddle 0 632 | never 0 633 | failed 0 634 | had rebelled 0 635 | illegal 0 636 | masterminding 0 637 | backdrop of uncertainty 0 638 | goat f - - 0 639 | applaud everything 0 640 | unwarranted 0 641 | has taken a negative stand 0 642 | were not so devastating 0 643 | one that would be adversely affected 0 644 | to polish their own image 0 645 | preferring to concentrate 1 646 | never doing anything unprofitable 0 647 | meaningful action now 1 648 | utilizing force in an inappropriate volume 0 649 | indeed a good day 1 650 | will be able 1 651 | the superpower has decided to wage war and administer justice all on its own 0 652 | insists 1 653 | also suggested that death would be the only way out of the conflict 0 654 | near impossible 0 655 | no international problem 1 656 | they may no longer stand shoulder to shoulder 0 657 | acted in collusion 0 658 | nations with a terrible history 0 659 | support 1 660 | if i do not save them , i do not save myself 0 661 | national aspirations 1 662 | to understand 1 663 | other goals 1 664 | aggression 0 665 | typical texan mentality 0 666 | until the imf brought them to the point of death 0 667 | false 0 668 | love 1 669 | there is irony - - including from the sober 0 670 | excessive 0 671 | praised 1 672 | support 1 673 | stays true 1 674 | the worst 0 675 | threats against freedom of expression , democracy , and legal security 0 676 | dangerous situation 0 677 | humanely 1 678 | find nothing positive 0 679 | has an interest 1 680 | feels itself committed 0 681 | sat back with arms folded would have made us all hostages 0 682 | was almost overly humane 0 683 | looked like one of hitler's 0 684 | be willing 1 685 | scandal of 0 686 | violent enemies 0 687 | misguided 0 688 | damn yankees 0 689 | in supporting 1 690 | humanely 1 691 | but 0 692 | manipulation 0 693 | the civilized world . 1 694 | they get fed better than us 0 695 | the whiff of scandal and conspiracy is in the air 0 696 | brought the peace process to a deadlock 0 697 | have n't always been consistent 0 698 | the most fraudulent , terrorist and extremist 0 699 | dismissed 0 700 | would have been logical and acceptable if 1 701 | praised 1 702 | in order to try to impede 0 703 | not only 0 704 | anyway 0 705 | their insistence 1 706 | attacks on the freedoms and values 0 707 | in a way that threatens 0 708 | grew and grew 0 709 | systematic campaign of violence 0 710 | axis of evil remarks 0 711 | position 1 712 | is based on injustice 0 713 | richest 1 714 | breakthrough 1 715 | atrocious conditions 0 716 | to express concern 0 717 | bothered 0 718 | how capricious 0 719 | how far the arab world has come 1 720 | to protest 0 721 | forced exodus of hundreds of thousands 0 722 | wants 1 723 | should have called government officials to account sooner 0 724 | defend themsleves against their oppressors 1 725 | satisfied 1 726 | was especially satisfied 1 727 | outsource our moral obligations 0 728 | feeling of uncertainty 0 729 | mate up with terrorist organizations 0 730 | legitimate rejection 0 731 | staunch supporter 1 732 | something special , as god's own country . 1 733 | to seek reconciliation 1 734 | support 1 735 | tense relationship between 0 736 | adopted a resolution criticizing 0 737 | not a successful choice 0 738 | bound to be serious 0 739 | victimizing the lives 0 740 | seems to be 1 741 | could force 0 742 | goodwill 1 743 | because of the legal nuances 0 744 | litany 0 745 | forced him 0 746 | insisted 1 747 | lending any support 1 748 | overwhelmingly 1 749 | are the violators of human rights 0 750 | which means an indefinite suspension from the commonwealth , a ban on travel for mugabe's cronies and to freeze their personal assets 0 751 | under fire 0 752 | hope 1 753 | chilly 0 754 | working around the clock 1 755 | the most bizarre twist 0 756 | inflicted terrible death and pain on people on this continent 0 757 | twists and turns 0 758 | top concern 0 759 | should take place 1 760 | more concrete support 1 761 | excessive or at the wrong time 0 762 | harm 0 763 | yet another burden 0 764 | method reporting 0 765 | has called on 1 766 | wave of protests 0 767 | spiteful attitude 0 768 | mere reproduction of unverified and unsubstantiated reports 0 769 | exactly the opposite 0 770 | better ways 1 771 | cronies 0 772 | though mr . tobin has finally mastered the diapering process , his baby who is 15 years old is not amused 0 773 | promised 1 774 | is reluctant 0 775 | very substantial 0 776 | quick return to the negotiating table 1 777 | expressed his hope 1 778 | will do whatever it takes to defend our security 1 779 | appreciation 1 780 | the victim of state terrorism 0 781 | unlawful combatants 0 782 | rigged 0 783 | respect 1 784 | running as a hopeful 1 785 | wanted to avoid 0 786 | are the targets of suppression 0 787 | is a true oligarch 0 788 | allow for a free and fair presidential election 1 789 | support 1 790 | provocations 0 791 | the dissent 0 792 | perpetrators 0 793 | would be badly damaged 0 794 | between civilization and barbarity 0 795 | fantasy of a world kingdom 0 796 | displeasure 0 797 | also unclear 0 798 | sap the country's energy 0 799 | by some ironical trick of history 0 800 | non - white anglo - saxon - protestant humanity 0 801 | wants 1 802 | difficult 0 803 | voices criticizing 0 804 | reacted strongly 0 805 | damaged 0 806 | or even eliminated 1 807 | tens of thousands 0 808 | is stuck minding us 0 809 | suggestions 1 810 | is not in control 0 811 | the hopes of 1 812 | reduced to a series of bantustans 0 813 | stopped short of being free and fair 0 814 | alarming 0 815 | cooperated closely 1 816 | to set priorities 1 817 | support for 1 818 | rigged and unacceptable 0 819 | for fear of 0 820 | so - far spurned 0 821 | against neo - liberal globalization 0 822 | radical 0 823 | long - winded 0 824 | would never 0 825 | strong protest and criticism 0 826 | described 0 827 | oppressive 0 828 | traumatized 0 829 | neither especially virtuous 0 830 | stormy celebration 1 831 | heavy toll 0 832 | great efforts 1 833 | denied 0 834 | will force the wto to change 1 835 | that axis of evil statement 0 836 | the us was the only judge of what was reasonable 0 837 | resistance against 0 838 | by any means 0 839 | hope 1 840 | fear 0 841 | disenfranchise 0 842 | charges of 0 843 | wanted 1 844 | peaceful 1 845 | are not sometimes inclined to sacrifice these values for the good cause 0 846 | condemned 0 847 | absolute indifference 0 848 | true , legitimate , and elected 1 849 | upholder of justice 1 850 | everything good and nice 0 851 | such remarks 0 852 | to contest 0 853 | strong support 1 854 | popular support 1 855 | a power 1 856 | would like 1 857 | will never accept 0 858 | argued 1 859 | is after imposing 1 860 | upgrade its competitiveness 1 861 | good will 1 862 | axis of evil 0 863 | significant 1 864 | flawlessly 1 865 | neither very complicated nor very expensive 1 866 | excessive 0 867 | baseless charges 0 868 | would not recognize 0 869 | clearly show serious unbalanced states of mind 0 870 | was reluctant 0 871 | the disapproval 0 872 | end of the era of violence and counterviolence 1 873 | cast away the chance 0 874 | absolute indifference 0 875 | calls 0 876 | delaying the day of reckoning 0 877 | assured 1 878 | plight 0 879 | instead of 0 880 | humanitarian law was violated regularly 0 881 | in gleaming suits 0 882 | is consensus 1 883 | have refused to serve 0 884 | advise 1 885 | true 1 886 | brutally 0 887 | accused 0 888 | had agreed 1 889 | assassinate' the rights 0 890 | resorting to 0 891 | am happy 1 892 | mutual respect 1 893 | unfair 0 894 | bent on 0 895 | more seriously than anyone would have thought 0 896 | unfairly 0 897 | not yet 0 898 | to back 1 899 | would therefore be appropriate 1 900 | purposely play up 0 901 | our full support 1 902 | most likely 0 903 | refusal 0 904 | military bastions and bases 0 905 | enemy 0 906 | is an act of objectionable brutality 0 907 | commitment 1 908 | the sphere of its domination 0 909 | chaotic 0 910 | fodder 0 911 | criticism from 0 912 | favourable comment 1 913 | wounding palestinians without hesitation 0 914 | will be just as good 1 915 | rejection 0 916 | axis of evil 0 917 | bloody terrorism 0 918 | war - mongering 0 919 | a power vacuum 0 920 | jubilant 1 921 | recommendations 1 922 | drove the imf even further away 0 923 | hailed 1 924 | undoubtedly strong and well thought out 1 925 | unlawful combatants not entitled to the protection 0 926 | torture ! . 0 927 | had guaranteed 1 928 | fair 1 929 | crackdown 0 930 | stressed 1 931 | was critical of 0 932 | condemned 0 933 | positive 1 934 | complacency 0 935 | dangerous 0 936 | but 0 937 | a friend 1 938 | disapprove 0 939 | a naive , patently stupid young man who chose the wrong heroes 0 940 | claims 0 941 | lacking good will 0 942 | not ruin its relations with the arabs 1 943 | the perpetrators 0 944 | other slightly troubling images 0 945 | complaints and inquires 0 946 | without belt - tightening there is no money , and without money there is no belt - tightening 0 947 | falsified 0 948 | axis of evil 0 949 | urge 1 950 | refused to recognize 0 951 | whose eyes are not bluest of the blue 0 952 | cried 0 953 | relished 0 954 | good health system 1 955 | proudly exercise 1 956 | warned 0 957 | want 1 958 | would have happened here 0 959 | defend the rights of his homeland and people against 1 960 | not in line with international standards 0 961 | refused 0 962 | taking advantage of 0 963 | voluntary 1 964 | crippling recession 0 965 | refused to apologize 0 966 | too good 0 967 | false 0 968 | they rubbed their palms at length 0 969 | the rule of law is under fire 0 970 | the protests 0 971 | discrimination 0 972 | bowed to 0 973 | calling for 1 974 | however fraudulent 0 975 | can not speak openly 0 976 | will back 1 977 | appeasement 0 978 | is subordinated to his own personal sentiments and ambition 0 979 | feeding on the growing frustration 0 980 | the explanation is , in fact is deceiving 0 981 | refusal to accept 0 982 | like 1 983 | grown too tired 0 984 | some unpalatable measures 0 985 | binding wounds 1 986 | hallucination 0 987 | coveted 1 988 | believe 1 989 | one of the most serious crises of our time 0 990 | repetitive 0 991 | hope 1 992 | set out 1 993 | pure fiction 0 994 | kennels 0 995 | desperate hatred 0 996 | demand 1 997 | does not want 1 998 | cooperation 1 999 | felt betrayed 0 1000 | demonstrates strong commitments 1 1001 | have it better here 1 1002 | concerns of 0 1003 | have been warned 0 1004 | just to gain popularity 0 1005 | playing with words 0 1006 | is wide awake 1 1007 | has accused 0 1008 | imperialist war maniacs 0 1009 | are critical 0 1010 | problem is 0 1011 | exploiting 0 1012 | sent congratulations 1 1013 | prejudged 0 1014 | worst 0 1015 | risks 0 1016 | settle the accounts 0 1017 | also too much interference 0 1018 | protests 0 1019 | was quite neat and tidy 0 1020 | hold in 1 1021 | do not enjoy the minimum guarantees 0 1022 | niche of affection and solidarity 1 1023 | all this time 0 1024 | are not competitive enough 0 1025 | the legitimacy 1 1026 | that sort of trust takes several years , if not an entire generation , to rebuild 0 1027 | would boost 0 1028 | fair settlement 1 1029 | human characteristics to a lump of stone 0 1030 | promised 1 1031 | supported 1 1032 | with some effort , he pins the flailing child down on the changing table only to discover there are no diapers in the house 0 1033 | wishes to state 1 1034 | will be even keener 1 1035 | befuddled 0 1036 | must no longer be encysted 0 1037 | are prepared to accept 1 1038 | denied 0 1039 | sympathy 0 1040 | calling 0 1041 | years behind 0 1042 | simply because 0 1043 | inalienable palestinian rights 0 1044 | opposition 0 1045 | refusing to restate 0 1046 | beginning a new offensive on religious minorities , persecuting muslims 0 1047 | confidence 1 1048 | would not be harmed 1 1049 | would not accept 0 1050 | congratulated 1 1051 | contrary to 0 1052 | other so - called civilized nations 0 1053 | are nervous 0 1054 | goal 1 1055 | better clothed 1 1056 | have joined the opposition in alleging 0 1057 | declined 0 1058 | flawed 0 1059 | will press on 1 1060 | cataclysmic consequences 0 1061 | dry up the roots of terrorism 1 1062 | premature 0 1063 | --------------------------------------------------------------------------------