├── .editorconfig
├── .gitignore
├── README.md
├── acceptability
    ├── LICENSE.md
    ├── __init__.py
    ├── generate.py
    ├── generate_sweep.py
    ├── lm_evaluate.py
    ├── lm_run.py
    ├── models
    │   ├── __init__.py
    │   ├── cbow_classifier.py
    │   ├── elmo_classifier.py
    │   ├── generators
    │   │   ├── __init__.py
    │   │   ├── lm.py
    │   │   └── lstm_lm.py
    │   ├── linear_classifier.py
    │   └── lstm_classifiers.py
    ├── modules
    │   ├── __init__.py
    │   ├── dataset.py
    │   ├── early_stopping.py
    │   ├── lm_evaluator.py
    │   ├── lm_generator.py
    │   ├── lm_trainer.py
    │   ├── logger.py
    │   ├── meter.py
    │   └── trainer.py
    ├── run.py
    ├── scripts
    │   ├── kaggle
    │   │   ├── converter.py
    │   │   ├── converter_data.py
    │   │   └── converter_random.py
    │   └── permuting.py
    ├── test.py
    └── utils
    │   ├── __init__.py
    │   ├── checkpoint.py
    │   ├── flags.py
    │   ├── general.py
    │   ├── lm.py
    │   └── timer.py
├── acceptability_corpus
    ├── cola_public.zip
    ├── cola_public
    │   ├── README
    │   ├── raw
    │   │   ├── in_domain_dev.tsv
    │   │   ├── in_domain_train.tsv
    │   │   └── out_of_domain_dev.tsv
    │   └── tokenized
    │   │   ├── in_domain_dev.tsv
    │   │   ├── in_domain_train.tsv
    │   │   └── out_of_domain_dev.tsv
    ├── phenomena
    │   ├── inchoative.tsv
    │   ├── reflexive.tsv
    │   ├── singular_pl.tsv
    │   ├── svo.tsv
    │   └── wh_extraction.tsv
    ├── raw
    │   ├── all.tsv
    │   ├── in_domain.tsv
    │   ├── in_domain_dev.tsv
    │   ├── in_domain_train.tsv
    │   ├── mixed_dev.tsv
    │   ├── out_of_domain.tsv
    │   └── out_of_domain_dev.tsv
    └── tokenized
    │   ├── in_domain_dev.tsv
    │   ├── in_domain_train.tsv
    │   ├── mixed_dev.tsv
    │   └── out_of_domain_dev.tsv
├── requirements.txt
└── setup.py


/.editorconfig:
--------------------------------------------------------------------------------
 1 | root = true
 2 | 
 3 | [*.py]
 4 | charset = utf-8
 5 | trim_trailing_whitespace = true
 6 | end_of_line = lf
 7 | insert_final_newline = true
 8 | indent_style = space
 9 | indent_size = 4
10 | 
11 | [*.md]
12 | trim_trailing_whitespace = false
13 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | __pycache__
 2 | *.pyc
 3 | *.egg*
 4 | ./raw
 5 | save
 6 | .vscode
 7 | .vector_cache
 8 | .idea
 9 | acceptability_corpus/raw/in_domain_test.tsv
10 | acceptability_corpus/source/ad03.tsv
11 | acceptability_corpus/source/bc01.tsv
12 | acceptability_corpus/source/ks08.tsv
13 | acceptability_corpus/source/l-93.tsv
14 | acceptability_corpus/source/r-67.tsv
15 | acceptability_corpus/tokenized/run/dev.tsv
16 | acceptability_corpus/tokenized/run/test.tsv
17 | acceptability_corpus/tokenized/run/train.tsv
18 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # CoLA Baselines
 2 | 
 3 | Baselines accompanying paper [Neural Network Acceptability Judgments](https://www.nyu.edu/projects/bowman/neural_network_acceptability.pdf). Check the [CoLA website](https://nyu-mll.github.io/CoLA) to download The Corpus of Linguistic Acceptability (CoLA), for more information, and for a demo model.
 4 | 
 5 | ## Dataset
 6 | 
 7 | Training and validation sets for CoLA are available under [acceptability_corpus/raw](acceptability_corpus/raw) with a tokenized version available under [tokenized](acceptability_corpus/tokenized). Test data (unlabeled) is available here: [in domain](https://www.kaggle.com/c/cola-in-domain-open-evaluation) [out of domain](https://www.kaggle.com/c/cola-out-of-domain-open-evaluation). All models require tokenized data (we use the default NLTK tokenizer).
 8 | 
 9 | ## Requirements
10 | 
11 | - Python >= 3
12 | - PyTorch v0.3.0
13 | - TorchNet and TorchText
14 | - NLTK (Optional: For NLTK preprocessing)
15 | 
16 | ## Running
17 | 
18 | Install [Pytorch](https://pytorch.org/) v0.3.0.
19 | 
20 | Then run:
21 | 
22 | ```
23 | git clone https://github.com/nyu-mll/acceptability-judgments.git
24 | cd acceptability-judgments
25 | pip3 install -r requirements.txt
26 | python setup.py develop
27 | ```
28 | 
29 | Run a simple training session by:
30 | 
31 | `python acceptability/run.py -d acceptability_corpus/tokenized`
32 | 
33 | This will use default classifier model and all of the default settings.
34 | 
35 | ## Model
36 | 
37 | Our general model structure looks like figure below. Follow paper for more in-depth details.
38 | 
39 | ![Model](https://i.imgur.com/eI4tNvd.png)
40 | 
41 | ## Complex Run
42 | 
43 | Example of a command for running ELMo + Real/Fake on top of transferred encoder:
44 | 
45 | The directory containing data (value of `-d`) must contain three files, `train.tsv`, `dev.tsv` and `test.tsv`. Download vocabulary file used by us in our experiments from this [link](https://drive.google.com/file/d/14HNMByzrUM2ZJBjOqCzelFz5yJMHskFb/view?usp=sharing).
46 | 
47 | ```
48 | python acceptability/run.py -m linear_classifier -d data --save_loc save --vocab_file ./vocab_100k.tsv --logs_dir ./logs -g -r -p 40 -se 2 -n 1000 --encoder_path ./elmo_best_real_fake/experiment_lstm_pooling_elmo_h_528_l_3_lr_0.0001_e_360_do_0.2.pth --encoding_size 528 --embedding_size 217 --embedding_path ./elmo_best_real_fake/experiment_lstm_pooling_elmo_h_528_l_3_lr_0.0001_e_360_do_0.2.emb -lr 0.00005 -nl 3 -hs 1134 -do 0.2
49 | ```
50 | 
51 | ## Pretrained Models and Testing
52 | 
53 | Pretrained models are available at this [link](https://drive.google.com/drive/folders/1HoHjdkc68fh7MTUBKAGZETGH5jfjsXR8?usp=sharing)
54 | 
55 | To do a test run over a dataset:
56 | 
57 | - Create your data-folder in same format as acceptability_corpus with `train.tsv`, `dev.tsv` and `test.tsv`.
58 | - Download one of the pretrained encoders
59 | - Download vocabulary file from this [link](https://drive.google.com/file/d/14HNMByzrUM2ZJBjOqCzelFz5yJMHskFb/view?usp=sharing)
60 | - OPTIONAL: Train a classifier on CoLA using the command above
61 | - Run (with the `-g` flag to use GPU hardware) the following command, e.g. for the downloadable ELMo real/fake encoder (without CoLA training) it would be something like:  
62 |   `python acceptability/test.py -mf elmo.pth -vf vocab_100k.tsv -ef elmo.emb -d data/test.tsv -g`
63 | 
64 | To save the model predictions for each of the sentences in `test.tsv`, you can append to the last command the additional flag `-o predictions.txt`, which will write them in the `predictions.txt` file.
65 | 
66 | ## Cite
67 | 
68 | If you use CoLA or the baselines in your research, please cite the accompanying paper using the following entry:
69 | 
70 | ```
71 | @article{warstadt2019neural,
72 |   title={Neural network acceptability judgments},
73 |   author={Warstadt, Alex and Singh, Amanpreet and Bowman, Samuel R},
74 |   journal={Transactions of the Association for Computational Linguistics},
75 |   volume={7},
76 |   pages={625--641},
77 |   year={2019},
78 |   publisher={MIT Press}
79 | }
80 | ```
81 | 
82 | ## License
83 | 
84 | Baseline code is available under MIT license.
85 | 
86 | The text in this corpus is excerpted from the published works available on website, and copyright (where applicable) remains with the original authors or publishers. We expect that research use within the US is legal under fair use, but make no guarantee of this.
87 | 


--------------------------------------------------------------------------------
/acceptability/LICENSE.md:
--------------------------------------------------------------------------------
1 | Copyright 2018, New York University
2 | 
3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
4 | 
5 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
6 | 
7 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.


--------------------------------------------------------------------------------
/acceptability/__init__.py:
--------------------------------------------------------------------------------
1 | __all__ = ['utils', 'modules', 'models']
2 | 
3 | __version__ = '0.1.0'
4 | 
5 | from . import utils
6 | from . import modules
7 | from . import models
8 | 


--------------------------------------------------------------------------------
/acceptability/generate.py:
--------------------------------------------------------------------------------
1 | from acceptability.modules import LMGenerator
2 | 
3 | if __name__ == '__main__':
4 |     trainer = LMGenerator()
5 |     trainer.load()
6 |     trainer.generate()
7 | 


--------------------------------------------------------------------------------
/acceptability/generate_sweep.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import os
  3 | import hyperopt.pyll.stochastic as stoc
  4 | 
  5 | from copy import deepcopy
  6 | from hyperopt import hp
  7 | 
  8 | parser = argparse.ArgumentParser()
  9 | 
 10 | parser.add_argument('-f', '--folder', default="/home/$USER/acceptability-judgments",
 11 |                     help="Path for acceptability judgments repository")
 12 | parser.add_argument('-sf', '--slurm_folder', default="./slurm_jobs",
 13 |                     help="Folder in which we should generate sbatch files")
 14 | parser.add_argument('-n', '--num_sweeps', type=int, default=1,
 15 |                     help="Number of sweeps to generate")
 16 | 
 17 | parser.add_argument('-j', '--job_name', default=None,
 18 |                     help="Job name, sweep sample number will be appended to this")
 19 | parser.add_argument('-t', '--time', default="47:00:00",
 20 |                     help="Time limit of sweep")
 21 | parser.add_argument('-me', '--mem', default="32GB",
 22 |                     help="Memory for sweep")
 23 | parser.add_argument('-g', '--gres', default="gpu:1",
 24 |                     help="GPU type to be specified in sweep")
 25 | parser.add_argument('-c', '--cpus-per-task', default="2",
 26 |                     help="CPUs per task to be specified in sweeps")
 27 | parser.add_argument('-p', '--patience', type=int, default=4, help="Early stopping patience")
 28 | parser.add_argument('-l', '--logs_dir', default='./logs',
 29 |                     help="Directory for storing logs")
 30 | parser.add_argument('-s', '--save_loc', default='./save',
 31 |                     help="Directory for saving models")
 32 | parser.add_argument('-e', '--epochs', type=int, default=None,
 33 |                     help="Epochs")
 34 | parser.add_argument('-d', '--data', default='./data',
 35 |                     help="Folder containing data tsvs")
 36 | parser.add_argument('-eu', '--email', default=None,
 37 |                     help="Email to be mailed for slurm notification")
 38 | parser.add_argument('-pr', '--pre_command', default=None,
 39 |                     help="Shell command to run before running main command")
 40 | parser.add_argument('-ps', '--post_command', default=None,
 41 |                     help="Shell command to run after running main command")
 42 | parser.add_argument('-se', '--stages_per_epoch', type=int, default=None,
 43 |                     help="Number of evaluation steps, if not passed default will be used")
 44 | 
 45 | 
 46 | subparsers = parser.add_subparsers()
 47 | lm_parser = subparsers.add_parser('lm', help="Generate sweeps for lm")
 48 | 
 49 | lm_parser.add_argument('-v', '--vocab', help="Vocab file location")
 50 | lm_parser.add_argument('-m', '--model', default="lstm",
 51 |                        help="Model type to be used for lm")
 52 | lm_parser.set_defaults(sweep_type="lm")
 53 | 
 54 | classifier_parser = subparsers.add_parser('classifier',
 55 |                             help="Generate sweeps for classifier")
 56 | classifier_parser.set_defaults(sweep_type="classifier")
 57 | 
 58 | classifier_parser.add_argument('-v', '--vocab', type=str,
 59 |                                help="Vocab file location")
 60 | classifier_parser.add_argument('-o', "--output_dir", type=str, default=None,
 61 |                                help="Location of output directory")
 62 | classifier_parser.add_argument('--encoder_path', type=str, default=None,
 63 |                                   help="Location of encoder checkpoint")
 64 | classifier_parser.add_argument('--encoding_type', type=str, default=None,
 65 |                                   help="Class of encoder")
 66 | classifier_parser.add_argument('--embedding_path', type=str, default=None,
 67 |                                   help="Path of embedding to load")
 68 | classifier_parser.add_argument('--encoding_size', type=int, default=None,
 69 |                                   help="Size of encoding, only to be used if you are loading a pretrained encoder")
 70 | classifier_parser.add_argument('--encoder_num_layers', type=int, default=None,
 71 |                                   help="Num layers of encoder, only to be used if you are loading a pretrained encoder")
 72 | classifier_parser.add_argument('--embedding_size', type=int, default=None,
 73 |                                   help="Embedding size, only to used if you are loading a pretrained encoder")
 74 | classifier_parser.add_argument('--embedding', type=str, default=None,
 75 |                                   help="Embedding, use to enter name of GloVe embedding")
 76 | classifier_parser.add_argument('--max_pool', action="store_true", default=False,
 77 |                                   help="Use max-pooling for CBOW")
 78 | classifier_parser.add_argument('--train_embeddings', action="store_true", default=False,
 79 |                                   help="Train word embeddings")
 80 | classifier_parser.add_argument('--glove', action="store_true", default=False,
 81 |                                   help="Use glove")
 82 | classifier_parser.add_argument('--lm_path', type=str, default=None,
 83 |                                   help="LM path, to be used with elmo classifier")
 84 | classifier_parser.add_argument('-m', '--model', type=str, default=None,
 85 |                                   help="Model")
 86 | classifier_parser.add_argument("--should_not_preprocess_data", action="store_true", default=False,
 87 |                                   help="Whether to preprocess data? Default: true (Will preprocess)")
 88 | classifier_parser.add_argument("--imbalance", action="store_true", default=False,
 89 |                                   help="Is there class imbalance?")
 90 | classifier_parser.add_argument("--should_not_lowercase", action="store_true", default=False,
 91 |                                   help="Should lowercase data? Default: true (Will lowercase)")
 92 | classifier_parser.add_argument("--preprocess_tokenizer", default=None, type=str,
 93 |                                   help="Type of tokenizer to use (space|nltk)")
 94 | 
 95 | hashbang_line = '#!/bin/bash'
 96 | 
 97 | space = {
 98 |     'lm': hp.choice('lm', [{
 99 |             'hidden_size': hp.uniform('hidden_size', 300, 1200),
100 |             'embedding_size': hp.uniform('embedding_size', 200, 600),
101 |             'learning_rate': hp.uniform('learning_rate', -4, -2.5),
102 |             'num_layers': hp.uniform('num_layers', 1, 5),
103 |             'dropout': hp.choice('dropout', [0.2, 0.5])
104 |     }]),
105 |     'classifier': hp.choice('classifier', [{
106 |             'hidden_size': hp.uniform('hidden_size', 20, 1200),
107 |             'embedding_size': hp.uniform('embedding_size', 200, 600),
108 |             'learning_rate': hp.uniform('learning_rate', -4, -5),
109 |             'num_layers': hp.uniform('num_layers', 1, 2),
110 |             'encoding_size': hp.uniform('encoding_size', 300, 1200),
111 |             'encoder_num_layers': hp.uniform('encoder_num_layers', 1, 5),
112 |             'dropout': hp.choice('dropout', [0.2, 0.5])
113 |     }])
114 | }
115 | 
116 | def generate_lm_sweeps(args):
117 |     all_lines, post_shell = get_fixed_lines(args)
118 | 
119 |     run_line = get_fixed_lm_run_params(args)
120 | 
121 |     run_line = 'python -u acceptability/lm_run.py ' + run_line
122 | 
123 |     current_space = space[args.sweep_type]
124 | 
125 |     for index in range(args.num_sweeps):
126 |         lines = deepcopy(all_lines)
127 |         params_line, output_name = get_sampled_params_for_lm(current_space, index)
128 | 
129 |         lines[4] += str(index)
130 | 
131 |         if args.email:
132 |             lines[9] = lines[9] + '-' + str(index) + '-%j_' + output_name
133 |         else:
134 |             lines[7] = lines[7] + '-' + str(index) + '-%j_' + output_name
135 | 
136 |         params_line = run_line + ' ' + params_line
137 | 
138 |         lines.append(params_line)
139 | 
140 |         lines = lines + post_shell
141 |         slurm_file = '\n'.join(lines)
142 | 
143 |         write_slurm_file(slurm_file, args.slurm_folder, args.sweep_type, args.model, index)
144 | 
145 | 
146 | def generate_classifier_sweeps(args):
147 |     all_lines, post_shell = get_fixed_lines(args)
148 |     run_line = get_fixed_classifier_run_params(args)
149 | 
150 |     run_line = 'python -u acceptability/run.py ' + run_line
151 | 
152 |     current_space = space[args.sweep_type]
153 | 
154 |     has_pretrained_encoder = args.encoder_path is not None
155 |     for index in range(args.num_sweeps):
156 |         lines = deepcopy(all_lines)
157 | 
158 |         params_line, output_name = get_sampled_params_for_classifier(args, current_space,
159 |                                    index, has_pretrained_encoder)
160 | 
161 |         lines[4] += str(index)
162 | 
163 |         if args.email:
164 |             lines[9] = lines[9] + '-' + str(index) + '-%j_' + output_name
165 |         else:
166 |             lines[7] = lines[7] + '-' + str(index) + '-%j_' + output_name
167 | 
168 |         params_line = run_line + ' ' + params_line
169 | 
170 |         lines.append(params_line)
171 | 
172 |         lines = lines + post_shell
173 |         slurm_file = '\n'.join(lines)
174 | 
175 |         write_slurm_file(slurm_file, args.slurm_folder, args.sweep_type, args.model, index)
176 | 
177 | 
178 | def get_fixed_lines(args):
179 |     all_lines = [hashbang_line, '']
180 | 
181 |     sbatch_lines = generate_sbatch_params(args)
182 |     module_lines = [get_module_load_lines()]
183 |     cd_lines = ['cdir=' + args.folder, 'cd $cdir']
184 |     pre_shell = get_shell_line(args.pre_command)
185 |     post_shell = get_shell_line(args.post_command)
186 |     xdg_line = [get_xdg_line()]
187 | 
188 |     all_lines = all_lines + sbatch_lines + module_lines + cd_lines + pre_shell + xdg_line
189 | 
190 |     return all_lines, post_shell
191 | 
192 | def write_slurm_file(data, folder, typ, model_name, index):
193 |     if not os.path.exists(folder):
194 |         os.makedirs(folder)
195 |     file_name = 'run_acceptabilty_%s_%s_%d.sbatch' % (typ, model_name, index)
196 |     with open(os.path.join(folder, file_name), 'w') as f:
197 |         f.write(data)
198 | 
199 | def generate_sbatch_params(args):
200 |     params = {
201 |         'job-name': 'a' + args.sweep_type if args.job_name is None else args.job_name,
202 |         'output': 'slurm',
203 |         'nodes': 1,
204 |         'cpus-per-task': args.cpus_per_task,
205 |         'mem': args.mem,
206 |         'time': args.time,
207 |         'gres': args.gres,
208 |     }
209 | 
210 |     if args.email:
211 |         params['mail-type'] = 'ALL'
212 |         params['mail-user'] = args.email
213 | 
214 |     lines = []
215 |     sbatch_prepend = '#SBATCH '
216 |     for key in sorted(list(params.keys())):
217 |         lines.append('%s --%s=%s' % (sbatch_prepend, key, str(params[key])))
218 | 
219 |     return lines
220 | 
221 | def get_module_load_lines():
222 |     return """module purge
223 | module load cuda/8.0.44
224 | module load cudnn/8.0v5.1
225 | """
226 | 
227 | def get_fixed_lm_run_params(args):
228 |     params = ['-d', args.data, '-v', args.vocab, '--save_loc', args.save_loc,
229 |               '--logs_dir', args.logs_dir, '-g', '-r', '-p', str(args.patience)]
230 | 
231 |     if args.stages_per_epoch is not None:
232 |         params.append('-se')
233 |         params.append(str(args.stages_per_epoch))
234 | 
235 |     if args.epochs is not None:
236 |         params.append('-e')
237 |         params.append(str(args.epochs))
238 | 
239 |     return ' '.join(params)
240 | 
241 | def get_fixed_classifier_run_params(args):
242 |     params = ['-m', args.model, '-d', args.data, '--save_loc', args.save_loc, '--vocab_file', args.vocab,
243 |               '--logs_dir', args.logs_dir, '-g', '-r', '-p', str(args.patience)]
244 | 
245 |     if args.output_dir is not None:
246 |         params.append('-o')
247 |         params.append(args.output_dir)
248 | 
249 |     if args.max_pool:
250 |         params.append('--max_pool')
251 | 
252 |     if args.should_not_preprocess_data:
253 |         params.append('--should_not_preprocess_data')
254 | 
255 |     if args.should_not_lowercase:
256 |         params.append('--should_not_lowercase')
257 | 
258 |     if args.imbalance:
259 |         params.append('--imbalance')
260 | 
261 |     if args.train_embeddings:
262 |         params.append('--train_embeddings')
263 | 
264 |     if args.glove:
265 |         params.append('--glove')
266 | 
267 |     if args.preprocess_tokenizer is not None:
268 |         params.append('--preprocess_tokenizer')
269 |         params.append(args.preprocess_tokenizer)
270 | 
271 |     if args.stages_per_epoch is not None:
272 |         params.append('-se')
273 |         params.append(str(args.stages_per_epoch))
274 | 
275 |     if args.epochs is not None:
276 |         params.append('-n')
277 |         params.append(str(args.epochs))
278 | 
279 |     if args.encoder_path is not None:
280 |         params.append('--encoder_path')
281 |         params.append(str(args.encoder_path))
282 | 
283 |         if args.encoder_num_layers is not None:
284 |             params.append('--encoder_num_layers')
285 |             params.append(str(args.encoder_num_layers))
286 | 
287 |         if args.encoding_size is not None:
288 |             params.append('--encoding_size')
289 |             params.append(str(args.encoding_size))
290 | 
291 |     if args.embedding_size is not None:
292 |         params.append('--embedding_size')
293 |         params.append(str(args.embedding_size))
294 | 
295 |     if args.encoding_type is not None:
296 |         params.append('--encoding_type')
297 |         params.append(str(args.encoding_type))
298 | 
299 |     if args.embedding_path is not None:
300 |         params.append('--embedding_path')
301 |         params.append(str(args.embedding_path))
302 | 
303 |     if args.embedding is not None:
304 |         params.append('--embedding')
305 |         params.append(str(args.embedding))
306 | 
307 |     if args.lm_path is not None:
308 |         params.append('--lm_path')
309 |         params.append(str(args.lm_path))
310 | 
311 |     # if args.number_experiment is not None:
312 |     #     params.append('--experiment_name')
313 |     #     params.append(str(args.job_name))
314 | 
315 |     return ' '.join(params)
316 | 
317 | def get_sampled_params_for_classifier(args, space, index=1, has_pretrained_encoder=False):
318 |     sample = stoc.sample(space)
319 |     sample['learning_rate'] = 10 ** (sample['learning_rate'])
320 |     sample['hidden_size'] = int(sample['hidden_size'])
321 |     sample['embedding_size'] = int(sample['embedding_size'])
322 |     sample['num_layers'] = int(sample['num_layers'])
323 |     sample['encoding_size'] = int(sample['encoding_size'])
324 |     sample['encoder_num_layers'] = int(sample['encoder_num_layers'])
325 | 
326 | 
327 |     output = 'lr_%.5f_nl_%d_hs_%d_do_%.1f' % (sample['learning_rate'],
328 |              sample['num_layers'], sample['hidden_size'], sample['dropout'])
329 | 
330 |     params = '-lr %.5f -nl %d -hs %d -do %.1f' % (sample['learning_rate'],
331 |              sample['num_layers'], sample['hidden_size'], sample['dropout'])
332 | 
333 |     embedding_size = sample["embedding_size"] if args.embedding_size is None else args.embedding_size
334 | 
335 |     if has_pretrained_encoder:
336 |         sample.pop('encoder_num_layers')
337 |         sample.pop('encoding_size')
338 |         sample.pop('embedding_size')
339 |         output += '_ed_%d_es_%d_enl_%d.out' % (embedding_size,
340 |                   args.encoder_num_layers, args.encoding_size)
341 |     else:
342 |         output += '_ed_%d_es_%d_enl_%d.out' % (embedding_size, sample['encoding_size'],
343 |                   sample['encoder_num_layers'])
344 |         params += ' -es %d --encoding_size %d --encoder_num_layers %d' % (embedding_size,
345 |                     sample['encoding_size'],
346 |                     sample['encoder_num_layers'])
347 | 
348 |     print("Sweep ", index, sample)
349 | 
350 |     return params, output
351 | 
352 | def get_sampled_params_for_lm(space, index=1):
353 |     sample = stoc.sample(space)
354 |     sample['learning_rate'] = 10 ** (sample['learning_rate'])
355 |     sample['embedding_size'] = int(sample['embedding_size'])
356 |     sample['hidden_size'] = int(sample['hidden_size'])
357 |     sample['num_layers'] = int(sample['num_layers'])
358 | 
359 |     print("Sweep ", index, sample)
360 | 
361 |     output = 'lr_%.5f_do_%.1f_nl_%d_hs_%d_es_%d.out' % (sample['learning_rate'],
362 |              sample['dropout'], sample['num_layers'], sample['hidden_size'],
363 |              sample['embedding_size'])
364 | 
365 |     params = '-lr %.5f -do %.1f -nl %d -hs %d -es %d' % (sample['learning_rate'],
366 |              sample['dropout'], sample['num_layers'], sample['hidden_size'],
367 |              sample['embedding_size'])
368 | 
369 |     return params, output
370 | 
371 | def get_xdg_line():
372 |     return """cat<<EOF
373 | 
374 | Running Acceptability task
375 | EOF
376 | unset XDG_RUNTIME_DIR
377 | if [ "$SLURM_JOBTMP" != "" ]; then
378 |     export XDG_RUNTIME_DIR=$SLURM_JOBTMP
379 | fi
380 | """
381 | 
382 | def get_shell_line(script):
383 |     if script == None:
384 |         return ['']
385 |     else:
386 |         return [script]
387 | 
388 | 
389 | if __name__ == '__main__':
390 |     args = parser.parse_args()
391 | 
392 |     if args.sweep_type == 'lm':
393 |         generate_lm_sweeps(args)
394 |     elif args.sweep_type == 'classifier':
395 |         generate_classifier_sweeps(args)
396 |     else:
397 |         print("Must select one from lm or classifier")
398 | 


--------------------------------------------------------------------------------
/acceptability/lm_evaluate.py:
--------------------------------------------------------------------------------
1 | from acceptability.modules import LMEvaluator
2 | 
3 | if __name__ == '__main__':
4 |     trainer = LMEvaluator()
5 |     trainer.load()
6 |     trainer.eval()
7 | 


--------------------------------------------------------------------------------
/acceptability/lm_run.py:
--------------------------------------------------------------------------------
1 | from acceptability.modules import LMTrainer
2 | 
3 | if __name__ == '__main__':
4 |     trainer = LMTrainer()
5 |     trainer.load()
6 |     trainer.train()
7 | 


--------------------------------------------------------------------------------
/acceptability/models/__init__.py:
--------------------------------------------------------------------------------
 1 | __all__ = ['CBOWClassifier', 'LinearClassifier',
 2 |            'LSTMClassifier', 'LSTMPoolingClassifier', 'ELMOClassifier',
 3 |            'LSTMPoolingClassifierWithELMo'
 4 |            'LinearClassifierWithEncoder', 'LSTMLanguageModel']
 5 | 
 6 | from .elmo_classifier import ELMOClassifier
 7 | from .lstm_classifiers import LSTMClassifier, LSTMPoolingClassifier, LSTMPoolingClassifierWithELMo
 8 | from .linear_classifier import LinearClassifier, LinearClassifierWithEncoder
 9 | from .cbow_classifier import CBOWClassifier
10 | from .generators.lstm_lm import LSTMLanguageModel
11 | 


--------------------------------------------------------------------------------
/acceptability/models/cbow_classifier.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch import nn
 3 | 
 4 | class CBOWClassifier(nn.Module):
 5 |     """
 6 |     Continuous bag of words classifier.
 7 |     """
 8 |     def __init__(self, hidden_size, input_size, max_pool, dropout=0.5):
 9 |         """
10 |         :param hidden_size:
11 |         :param input_size:
12 |         :param max_pool: if true then max pool over word embeddings,
13 |                          else sum word embeddings
14 |         """
15 |         super(CBOWClassifier, self).__init__()
16 |         self.hidden_size = hidden_size
17 |         self.input_size = input_size
18 |         self.max_pool = max_pool
19 |         self.dropout = nn.Dropout(p=dropout)
20 |         self.i2h = nn.Linear(self.input_size, self.hidden_size)
21 |         self.h2o = nn.Linear(self.hidden_size, 1)
22 |         self.sigmoid = nn.Sigmoid()
23 |         self.tanh = nn.Tanh()
24 | 
25 |     def forward(self, x):
26 |         if self.max_pool:
27 |             encoding = nn.functional.max_pool1d(x.transpose(1, 2),
28 |                                                 x.shape[1])
29 |             encoding = encoding.transpose(1, 2).squeeze()
30 |         else:
31 |             encoding = x.sum(1)
32 |         encoding = self.dropout(encoding)
33 |         hidden = self.tanh(self.dropout(self.i2h(encoding)))
34 |         out = self.sigmoid(self.h2o(hidden))
35 |         return out
36 | 


--------------------------------------------------------------------------------
/acceptability/models/elmo_classifier.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | from torch import nn
 4 | 
 5 | 
 6 | class ELMOClassifier(nn.Module):
 7 |     def __init__(self, lm_path, last_hid, dropout=0.5, use_gpu=True):
 8 |         super(ELMOClassifier, self).__init__()
 9 | 
10 |         self.lm = torch.load(lm_path)
11 |         self.emb_dim = self.lm.emb_dim
12 |         self.seq_length = self.lm.seq_length
13 |         self.hidden_dim = self.lm.hidden_dim
14 |         self.batch_size = self.lm.batch_size
15 |         self.num_layers = self.lm.num_layers
16 | 
17 |         self.last_hid = last_hid
18 | 
19 |         self.lstms = []
20 | 
21 |         lm_lstm = self.lm.lstm
22 |         lm_state_dict = lm_lstm.state_dict()
23 | 
24 |         for i in range(self.num_layers):
25 |             # Hacks to get hidden states for each timestep for each layer
26 |             # of LSTM
27 |             if i == 0:
28 |                 lstm_inp_dim = self.emb_dim
29 |             else:
30 |                 lstm_inp_dim = self.hidden_dim
31 |             lstm = nn.LSTM(lstm_inp_dim, self.hidden_dim, 1,
32 |                            dropout=0, batch_first=True)
33 |             layer_num = str(i)
34 |             ih_weight = lm_state_dict['weight_ih_l' + layer_num]
35 |             hh_weight = lm_state_dict['weight_hh_l' + layer_num]
36 |             ih_bias = lm_state_dict['bias_ih_l' + layer_num]
37 |             hh_bias = lm_state_dict['bias_hh_l' + layer_num]
38 | 
39 |             curr_state_dict = {
40 |                 'weight_ih_l0': ih_weight,
41 |                 'bias_ih_l0': ih_bias,
42 |                 'weight_hh_l0': hh_weight,
43 |                 'bias_hh_l0': hh_bias
44 |             }
45 | 
46 |             lstm.load_state_dict(curr_state_dict)
47 | 
48 |             # Freeze LSTM
49 |             for p in lstm.parameters():
50 |                 p.requires_grad = False
51 |             self.lstms.append(lstm)
52 | 
53 |         self.lstms = nn.ModuleList(self.lstms)
54 |         self.dropout = nn.Dropout(p=dropout)
55 |         self.linear_comb = nn.Linear(self.num_layers, 1)
56 |         self.fc1 = nn.Linear(self.hidden_dim, self.last_hid)
57 |         self.relu = nn.ReLU()
58 |         self.out = nn.Linear(self.last_hid, 1)
59 |         self.sigmoid = nn.Sigmoid()
60 | 
61 |     def forward(self, x):
62 |         hidden_states = []
63 |         hidden = x
64 |         for l in range(self.num_layers):
65 |             hidden, _ = self.lstms[l](hidden)
66 |             # hidden: B x T x H
67 |             hidden_states.append(hidden)
68 | 
69 |         # [B x T x H] => B x T x H x L
70 |         hidden_states = torch.stack(hidden_states, hidden.dim())
71 |         # B x T x H => B x T x H x L
72 |         hidden_states = self.linear_comb(hidden_states).squeeze()
73 | 
74 |         if hidden_states.dim() < 3:
75 |             hidden_states = hidden_states.unsqueeze(0)
76 |         num_timesteps = x.shape[1]
77 | 
78 |         # B x T x H => B x H
79 |         max_pooled = nn.functional.max_pool1d(hidden_states.transpose(1, 2), num_timesteps)
80 |         max_pooled = max_pooled.squeeze()
81 | 
82 |         non_lineared = self.relu(self.fc1(max_pooled))
83 | 
84 |         return self.sigmoid(self.out(non_lineared)), hidden_states
85 | 


--------------------------------------------------------------------------------
/acceptability/models/generators/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nyu-mll/CoLA-baselines/dd095d3646ed05a315280aaa8ed4ec84ba435b3e/acceptability/models/generators/__init__.py


--------------------------------------------------------------------------------
/acceptability/models/generators/lm.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | from torch import nn
 4 | from torch.autograd import Variable
 5 | 
 6 | 
 7 | class LMGeneratorLSTM(nn.Module):
 8 |     """
 9 |     LSTM language model used primarily to generate "fake" sentences.
10 |     Could also be used as encoder.
11 |     uses LSTMCell instead of LSTM because forward takes in one time
12 |     step at a time for greedy (or beam search) generation
13 |     """
14 |     def __init__(self, input_size, hidden_size, output_size, n_layers):
15 |         super(LMGeneratorLSTM, self).__init__()
16 | 
17 |         self.input_size = input_size
18 |         self.hidden_size = hidden_size
19 |         self.output_size = output_size
20 |         self.n_layers = n_layers
21 | 
22 |         self.ih2h = nn.LSTMCell(input_size, hidden_size)
23 |         self.h2h = nn.LSTMCell(input_size=hidden_size, hidden_size=hidden_size)
24 |         self.h2o = nn.Linear(hidden_size, output_size)
25 | 
26 |         self.log_softmax = nn.functional.log_softmax
27 |         self.softmax = nn.functional.softmax
28 | 
29 |     def forward(self, input, hidden_states):
30 |         """
31 |         call separately for each time step
32 |         in evaluation or generation mode,
33 |         use output at t-1 to sample input at t
34 |         :param input: batch_size X 1, all tokens of a given time step
35 |         :param hidden_states:
36 |         :return: distribution over vocabulary
37 |         """
38 |         h, c = self.ih2h(input, hidden_states[0])
39 |         next_hiddens = [(h, c)]
40 |         h, c = self.h2h(h, hidden_states[1])
41 |         next_hiddens.append((h, c))
42 |         output = self.log_softmax(self.h2o(h))
43 |         return output, next_hiddens
44 | 
45 |     def init_hidden(self, batch_size):
46 |         hidden_states = []
47 |         for i in range(self.n_layers + 1):
48 |             hidden_states.append((
49 |                 Variable(torch.zeros(batch_size, self.hidden_size)),
50 |                 Variable(torch.zeros(batch_size, self.hidden_size))))
51 |         return hidden_states
52 | 
53 |     def init_hidden_single(self):
54 |         hidden_states = []
55 |         for i in range(self.n_layers + 1):
56 |             hidden_states.append((Variable(torch.zeros(1, self.hidden_size)),
57 |                                   Variable(torch.zeros(1, self.hidden_size))))
58 |         return hidden_states
59 | 
60 |     def n_params(self):
61 |         return (self.input_size + self.hidden_size) * self.hidden_size + \
62 |             self.n_layers * self.hidden_size * self.hidden_size + \
63 |             self.hidden_size * self.output_size
64 | 


--------------------------------------------------------------------------------
/acceptability/models/generators/lstm_lm.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | from torch import nn
 4 | from torch.autograd import Variable
 5 | 
 6 | 
 7 | class LSTMLanguageModel(nn.Module):
 8 |     def __init__(self, emb_dim, seq_length, hidden_dim, batch_size, vocab_size,
 9 |                  num_layers, dropout=0.5, bidirectional=False):
10 |         super(LSTMLanguageModel, self).__init__()
11 |         self.emb_dim = emb_dim
12 |         self.seq_length = seq_length
13 |         self.hidden_dim = hidden_dim
14 |         self.batch_size = batch_size
15 |         self.vocab_size = vocab_size
16 |         self.num_layers = num_layers
17 |         self.dropout = nn.Dropout(p=dropout)
18 | 
19 |         self.embedding = nn.Embedding(self.vocab_size, self.emb_dim)
20 |         self.lstm = nn.LSTM(input_size=emb_dim, hidden_size=self.hidden_dim,
21 |                             num_layers=num_layers, dropout=dropout,
22 |                             bidirectional=bidirectional)
23 |         self.fc = nn.Linear(self.hidden_dim, vocab_size)
24 | 
25 |         self.init_weights()
26 | 
27 |     def init_weights(self):
28 |         init_range = 0.1
29 | 
30 |         self.embedding.weight.data.uniform_(-init_range, init_range)
31 |         self.fc.weight.data.uniform_(-init_range, init_range)
32 |         self.fc.bias.data.fill_(0.0)
33 | 
34 |     def forward(self, x, hidden):
35 |         x = self.dropout(self.embedding(x))
36 |         out, hidden = self.lstm(x, hidden)
37 |         out = self.dropout(out)
38 |         logits = self.fc(out.view(-1, self.hidden_dim))
39 |         return logits, hidden
40 | 
41 |     def init_hidden(self, bsz=None):
42 |         if not bsz:
43 |             bsz = self.batch_size
44 |         weight = next(self.parameters()).data
45 |         return (Variable(weight.new(self.num_layers, bsz, self.hidden_dim).zero_()),
46 |                 Variable(weight.new(self.num_layers, bsz, self.hidden_dim).zero_()))
47 | 


--------------------------------------------------------------------------------
/acceptability/models/linear_classifier.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | from torch import nn
 4 | from .lstm_classifiers import LSTMPoolingClassifier, LSTMPoolingClassifierWithELMo
 5 | 
 6 | class LinearClassifier(nn.Module):
 7 |     """
 8 |     A basic linear classifier for acceptability judgments.
 9 |     Input sentence embedding (with size = 2*encoding_size,
10 |     factor of 2 comes from bidirectional LSTM encoding)
11 |     Hidden layer (encoding 2 * encoding_size * hidden_size)
12 |     Output layer (hidden_size * 1)
13 |     """
14 |     def __init__(self, hidden_size, encoding_size, dropout=0.5):
15 |         super(LinearClassifier, self).__init__()
16 |         self.hidden_size = hidden_size
17 |         self.dropout = nn.Dropout(dropout)
18 |         self.enc2h = nn.Linear(2 * encoding_size, self.hidden_size)
19 |         self.h20 = nn.Linear(self.hidden_size, 1)
20 |         self.sigmoid = nn.Sigmoid()
21 |         self.tanh = nn.Tanh()
22 |         self.softmax = nn.Softmax()
23 | 
24 |     def forward(self, sentence_vecs):
25 |         hidden = self.tanh(self.dropout(self.enc2h(sentence_vecs)))
26 |         out = self.sigmoid(self.h20(hidden))
27 |         return out
28 | 
29 | class LinearClassifierWithEncoder(nn.Module):
30 |     def __init__(self, hidden_size, encoding_size,
31 |                  embedding_size, num_layers, dropout=0.5,
32 |                  encoder_type="lstm_pooling_classifier",
33 |                  encoder_num_layers=1,
34 |                  encoder_path=None,
35 |                  gpu=True):
36 |         super(LinearClassifierWithEncoder, self).__init__()
37 |         self.hidden_size = hidden_size
38 |         self.encoding_size = encoding_size
39 |         self.embedding_size = embedding_size
40 |         self.num_layers = num_layers
41 |         self.encoder_num_layers = encoder_num_layers
42 | 
43 |         self.model = LinearClassifier(self.hidden_size, self.encoding_size, dropout)
44 |         self.encoder = get_encoder_instance(encoder_type, encoding_size,
45 |                                             embedding_size, encoder_num_layers,
46 |                                             encoder_path, gpu)
47 | 
48 |     def forward(self, x):
49 |         _, encoding = self.encoder(x)
50 |         output = self.model.forward(encoding)
51 |         return output, None
52 | 
53 | 
54 | def get_encoder_instance(encoder_type, encoding_size, embedding_size,
55 |                          encoder_num_layers,
56 |                          encoder_path=None,
57 |                          gpu=True):
58 | 
59 |     encoder = lambda x: x
60 | 
61 |     if encoder_type == "lstm_pooling_classifier":
62 |         encoder = LSTMPoolingClassifier(
63 |             hidden_size=encoding_size,
64 |             embedding_size=embedding_size,
65 |             num_layers=encoder_num_layers
66 |         )
67 | 
68 |         if encoder_path is not None:
69 |             if gpu:
70 |                 pth = torch.load(encoder_path)
71 |             else:
72 |                 pth = torch.load(encoder_path, map_location=lambda storage, loc: storage)
73 |             if type(pth) is LSTMPoolingClassifierWithELMo:
74 |                 encoder = pth
75 |             try:
76 |                 if 'model' in pth:
77 |                     encoder.load_state_dict(pth['model'])
78 |                 else:
79 |                     encoder.load_state_dict(pth.state_dict())
80 |             except TypeError:
81 |                 if hasattr(pth, 'model'):
82 |                     encoder.load_state_dict(pth.model)
83 |                 else:
84 |                     encoder.load_state_dict(pth.state_dict())
85 |             # Since we have loaded freeze params
86 |             for p in encoder.parameters():
87 |                 p.requires_grad = False
88 | 
89 |     elif encoder_type == "lstm_pooling_elmo":
90 |         encoder = torch.load(encoder_path)
91 |         for p in encoder.parameters():
92 |             p.requires_grad = False
93 | 
94 |     return encoder
95 | 
96 | 


--------------------------------------------------------------------------------
/acceptability/models/lstm_classifiers.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch import nn
 3 | from acceptability.models import ELMOClassifier
 4 | 
 5 | 
 6 | class LSTMClassifier(nn.Module):
 7 |     def __init__(self, hidden_size, embedding_size, num_layers,
 8 |                  reduction_size):
 9 |         super(LSTMClassifier, self).__init__()
10 |         self.hidden_size = hidden_size
11 |         self.embedding_size = embedding_size
12 |         self.num_layers = num_layers
13 |         self.reduction_size = reduction_size
14 |         self.ih2h = nn.LSTM(embedding_size, hidden_size,
15 |                             num_layers=num_layers,
16 |                             bidirectional=True, batch_first=True)
17 |         self.h2r = nn.Linear(2 * hidden_size, reduction_size)
18 |         self.r2o = nn.Linear(reduction_size, 1)
19 |         self.sigmoid = nn.Sigmoid()
20 |         self.softmax = nn.Softmax()
21 | 
22 |     def forward(self, x, hidden_states):
23 |         o, _ = self.ih2h(x, hidden_states)
24 |         reduction = self.sigmoid(self.h2r(o[-1]))
25 |         output = self.sigmoid(self.r2o(reduction))
26 |         return output, reduction
27 | 
28 | 
29 | class LSTMPoolingClassifier(nn.Module):
30 |     def __init__(self, hidden_size, embedding_size, num_layers, dropout=0.5):
31 |         super(LSTMPoolingClassifier, self).__init__()
32 |         self.hidden_size = hidden_size
33 |         self.embedding_size = embedding_size
34 |         self.num_layers = num_layers
35 |         self.ih2h = nn.LSTM(embedding_size, hidden_size, num_layers=num_layers,
36 |                             bidirectional=True, batch_first=True, dropout=dropout)
37 |         self.pool2o = nn.Linear(2 * hidden_size, 1)
38 |         self.sigmoid = nn.Sigmoid()
39 |         self.softmax = nn.Softmax()
40 |         self.dropout = nn.Dropout(p=dropout)
41 | 
42 |     def forward(self, x):
43 |         o, _ = self.ih2h(x)
44 |         pool = nn.functional.max_pool1d(o.transpose(1, 2), x.shape[1])
45 |         pool = pool.transpose(1, 2).squeeze()
46 |         pool = self.dropout(pool)
47 |         output = self.sigmoid(self.pool2o(pool))
48 |         return output.squeeze(), pool
49 | 
50 | 
51 | class LSTMPoolingClassifierWithELMo(nn.Module):
52 |     def __init__(self, lm_path, hidden_size, num_layers, dropout=0.5):
53 |         super(LSTMPoolingClassifierWithELMo, self).__init__()
54 | 
55 |         self.elmo = ELMOClassifier(lm_path, hidden_size, dropout)
56 | 
57 |         # Embedding dim would be hidden dim of the ELMoClassifier
58 |         self.embedding_size = self.elmo.hidden_dim
59 |         self.pooling_classifier = LSTMPoolingClassifier(hidden_size, self.embedding_size,
60 |                                                         num_layers, dropout)
61 | 
62 |     def forward(self, x):
63 |         _, x = self.elmo(x)
64 | 
65 |         return self.pooling_classifier(x)
66 | 


--------------------------------------------------------------------------------
/acceptability/modules/__init__.py:
--------------------------------------------------------------------------------
1 | __all__ = ['Trainer', 'LMTrainer', 'LMGenerator', "LMEvaluator"]
2 | 
3 | from .trainer import Trainer
4 | from .lm_trainer import LMTrainer
5 | from .lm_generator import LMGenerator
6 | from .lm_evaluator import LMEvaluator
7 | 


--------------------------------------------------------------------------------
/acceptability/modules/dataset.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | import torch
  4 | from torch.utils.data import Dataset
  5 | from torchtext import vocab, data
  6 | from collections import defaultdict
  7 | from acceptability.utils import pad_sentences
  8 | 
  9 | 
 10 | class AcceptabilityDataset(Dataset):
 11 |     def __init__(self, args, path, vocab):
 12 |         self.pairs = []
 13 |         self.sentences = []
 14 |         self.actual = []
 15 |         self.args = args
 16 |         if not os.path.exists(path):
 17 |             # TODO: log failure here
 18 |             raise Exception("Path %s does not exist" % path)
 19 | 
 20 |         self.vocab = vocab
 21 | 
 22 |         with open(path, 'r') as f:
 23 |             for line in f:
 24 |                 line = line.split("\t")
 25 | 
 26 |                 if len(line) >= 4:
 27 |                     self.pairs.append((int(line[1]), line[0]))
 28 |                     self.actual.append(line[3])
 29 |                     self.sentences.append(self.preprocess(line[3].strip()))
 30 | 
 31 |         # TODO: Maybe try later using collate_fn?
 32 |         self.sentences, self.sizes = pad_sentences(self.sentences, self.vocab,
 33 |                                                    self.args.crop_pad_length)
 34 | 
 35 |     def preprocess(self, line):
 36 |         tokenizer = lambda x: x
 37 |         if not self.args.should_not_preprocess_data:
 38 |             if self.args.preprocess_tokenizer == 'nltk':
 39 |                 tokenizer = nltk_tokenize
 40 |             elif self.args.preprocess_tokenizer == 'space':
 41 |                 tokenizer = lambda x: x.split(' ')
 42 | 
 43 |         if not self.args.should_not_lowercase:
 44 |             line = line.lower()
 45 | 
 46 |         line = tokenizer(line)
 47 |         line = [self.vocab.stoi[word] for word in line]
 48 | 
 49 |         return line
 50 | 
 51 |     def __len__(self):
 52 |         return len(self.pairs)
 53 | 
 54 |     def __getitem__(self, index):
 55 |         return self.sentences[index], self.pairs[index][0], self.pairs[index][1]
 56 | 
 57 | 
 58 | def nltk_tokenize(sentence):
 59 |     import nltk
 60 |     return nltk.word_tokenize(sentence)
 61 | 
 62 | def preprocess_label(label):
 63 |     if float(label) > 0:
 64 |         return '1'
 65 |     else:
 66 |         return '0'
 67 | 
 68 | def get_datasets(args):
 69 |     if args.glove:
 70 |         vocab = GloVeIntersectedVocab(args, True)
 71 |     else:
 72 |         vocab = Vocab(args.vocab_file, True)
 73 | 
 74 |     train_dataset = AcceptabilityDataset(args, os.path.join(args.data, 'train.tsv'),
 75 |                                             vocab)
 76 |     valid_dataset = AcceptabilityDataset(args, os.path.join(args.data, 'dev.tsv'),
 77 |                                             vocab)
 78 |     test_dataset = AcceptabilityDataset(args, os.path.join(args.data, 'test.tsv'),
 79 |                                         vocab)
 80 | 
 81 |     return train_dataset, valid_dataset, test_dataset, vocab
 82 | 
 83 | class Vocab:
 84 |     UNK_TOKEN = '<unk>'
 85 |     SOS_TOKEN = '<s>'
 86 |     EOS_TOKEN = '</s>'
 87 |     PAD_TOKEN = '<pad>'
 88 | 
 89 |     UNK_INDEX = 0
 90 |     SOS_INDEX = 1
 91 |     EOS_INDEX = 2
 92 |     PAD_INDEX = 3
 93 | 
 94 | 
 95 |     def __init__(self, vocab_path, use_pad=False):
 96 |         if not os.path.exists(vocab_path):
 97 |             print("Vocab not found at " + vocab_path)
 98 |             sys.exit(1)
 99 | 
100 |         self.itos = [''] * 3
101 |         self.itos[self.UNK_INDEX] = self.UNK_TOKEN
102 |         self.itos[self.SOS_INDEX] = self.SOS_TOKEN
103 |         self.itos[self.EOS_INDEX] = self.EOS_TOKEN
104 | 
105 |         if use_pad:
106 |             self.itos.append(self.PAD_INDEX)
107 | 
108 |         # Return unk index by default
109 |         self.stoi = defaultdict(lambda: self.UNK_INDEX)
110 |         self.stoi[self.SOS_TOKEN] = self.SOS_INDEX
111 |         self.stoi[self.EOS_TOKEN] = self.EOS_INDEX
112 |         self.stoi[self.UNK_TOKEN] = self.UNK_INDEX
113 | 
114 |         if use_pad:
115 |             self.stoi[self.PAD_INDEX] = self.PAD_INDEX
116 | 
117 |         index = len(self.itos)
118 | 
119 |         with open(vocab_path, 'r') as f:
120 |             for line in f:
121 |                 self.itos.append(line.strip())
122 |                 self.stoi[line.strip()] = index
123 |                 index += 1
124 | 
125 |     def get_itos(self):
126 |         return self.itos
127 | 
128 | 
129 |     def get_stoi(self):
130 |         return self.stoi
131 | 
132 |     def get_size(self):
133 |         return len(self.itos)
134 | 
135 | 
136 | class GloVeIntersectedVocab(Vocab):
137 |     def __init__(self, args, use_pad=True):
138 |         super(GloVeIntersectedVocab, self).__init__(args.vocab_file, use_pad)
139 |         name = args.embedding.split('.')[1]
140 |         dim = args.embedding.split('.')[2][:-1]
141 |         glove = vocab.GloVe(name, int(dim))
142 | 
143 |         self.vectors = torch.FloatTensor(self.get_size(), len(glove.vectors[0]))
144 |         self.vectors[0].zero_()
145 | 
146 |         for i in range(1, 4):
147 |             self.vectors[i] = torch.ones_like(self.vectors[i]) * 0.1 * i
148 | 
149 |         for i in range(4, self.get_size()):
150 |             word = self.itos[i]
151 |             glove_index = glove.stoi.get(word, None)
152 | 
153 |             if glove_index is None:
154 |                 self.vectors[i] = self.vectors[self.UNK_INDEX].clone()
155 |             else:
156 |                 self.vectors[i] = glove.vectors[glove_index]
157 | 
158 | class LMDataset():
159 |     def __init__(self, dataset_path, vocab_path):
160 |         if not os.path.exists(dataset_path):
161 |             print("Dataset not found at " + dataset_path)
162 |             sys.exit(1)
163 | 
164 |         self.vocab = Vocab(vocab_path)
165 | 
166 |         num_tokens = 0
167 |         with open(dataset_path, 'r') as f:
168 |             for line in f:
169 |                 line = line.split("\t")
170 |                 if len(line) >= 4:
171 |                     words = self.preprocess(line[3].split(' '))
172 |                     num_tokens += len(words)
173 | 
174 |         self.tokens = torch.LongTensor(num_tokens)
175 | 
176 |         num_tokens = 0
177 |         with open(dataset_path, 'r') as f:
178 |             for line in f:
179 |                 line = line.split("\t")
180 | 
181 |                 if len(line) >= 4:
182 |                     words = self.preprocess(line[3].strip().split(' '))
183 |                     for word in words:
184 |                         self.tokens[num_tokens] = self.vocab.stoi[word]
185 |                         num_tokens += 1
186 | 
187 | 
188 |     def get_vocab_size(self):
189 |         return self.vocab.get_size()
190 | 
191 |     def preprocess(self, x):
192 |         return [self.vocab.SOS_TOKEN] + x + [self.vocab.EOS_TOKEN]
193 | 
194 |     def get_tokens(self):
195 |         return self.tokens
196 | 
197 | 
198 | 
199 | class LMEvalDataset():
200 |     def __init__(self, dataset_path, vocab_path):
201 |         if not os.path.exists(dataset_path):
202 |             print("Dataset not found at " + dataset_path)
203 |             sys.exit(1)
204 | 
205 |         self.vocab = Vocab(vocab_path)
206 |         self.sentences = []
207 | 
208 |         with open(dataset_path, 'r') as f:
209 |             for line in f:
210 |                 line = line.split("\t")
211 | 
212 |                 if len(line) >= 4:
213 |                     words = self.preprocess(line[3].strip().split(' '))
214 |                     self.sentences.append([self.vocab.stoi[x] for x in words])
215 | 
216 |         # # TODO: Maybe try later using collate_fn?
217 |         # self.sentences, self.sizes = pad_sentences(self.sentences, self.vocab)
218 | 
219 | 
220 |     def get_vocab_size(self):
221 |         return self.vocab.get_size()
222 | 
223 |     def preprocess(self, x):
224 |         return [self.vocab.SOS_TOKEN] + x + [self.vocab.EOS_TOKEN]
225 | 
226 | 


--------------------------------------------------------------------------------
/acceptability/modules/early_stopping.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | import os
 3 | import numpy as np
 4 | import torch
 5 | 
 6 | from acceptability.utils import Checkpoint
 7 | 
 8 | 
 9 | class EarlyStopping:
10 |     """
11 |     Provides early stopping functionality. Keeps track of model metrics,
12 |     and if it doesn't improve over time restores last best performing
13 |     parameters.
14 |     """
15 | 
16 |     def __init__(self, model: torch.nn.Module, checkpoint_instance: Checkpoint,
17 |                  patience=100, minimize=False):
18 |         self.minimize = minimize
19 |         self.patience = patience
20 |         self.model = model
21 |         self.checkpoint = checkpoint_instance
22 |         self.best_monitored_metric = 0. if not minimize else np.inf
23 |         self.other_metrics = {
24 |             'acc': 0,
25 |             'val_loss': np.inf
26 |         }
27 |         self.best_monitored_epoch = 0
28 |         self.activated = False
29 | 
30 |     def __call__(self, value, other_metrics, epoch):
31 |         """
32 |         Method to be called everytime you need to check whether to early stop or not
33 | 
34 | 
35 |         Arguments:
36 |             value {number} -- Metric to be used for early stopping
37 |             other_metrics {dict} -- Any other metrics that need to be tracked
38 |             epoch {number} -- Current epoch number
39 | 
40 |         Returns:
41 |             bool -- Tells whether early stopping occurred or not
42 |         """
43 |         if (self.minimize and value < self.best_monitored_metric) or \
44 |                 (not self.minimize and value > self.best_monitored_metric):
45 |             self.best_monitored_metric = value
46 |             self.other_metrics = other_metrics
47 |             self.best_monitored_epoch = epoch
48 |             self.checkpoint.save()
49 | 
50 |         elif self.best_monitored_epoch + self.patience < epoch:
51 |             self.checkpoint.restore()
52 |             self.checkpoint.finalize()
53 |             self.activated = True
54 |             return True
55 | 
56 |         return False
57 | 
58 |     def is_activated(self):
59 |         return self.activated
60 | 
61 |     def init_from_checkpoint(self):
62 |         # Maybe do something later if expected
63 |         return
64 | 
65 |     def get_info(self):
66 |         return "Best Matthews: %.5f, Best Accuracy: %.5f, Best Loss: %.9f at epoch %d" \
67 |               % (self.best_monitored_metric,
68 |                  self.other_metrics['acc'],
69 |                  self.other_metrics['val_loss'],
70 |                  self.best_monitored_epoch)
71 | 
72 |     def get_info_lm(self):
73 |         return "Best Preplexity: %5f, Best Loss %.5f at epoch %d" \
74 |                 % (self.best_monitored_metric,
75 |                    self.other_metrics['val_loss'],
76 |                    self.best_monitored_epoch)
77 | 
78 | 


--------------------------------------------------------------------------------
/acceptability/modules/lm_evaluator.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import torch
 3 | import os
 4 | 
 5 | from torch.autograd import Variable
 6 | 
 7 | from .dataset import Vocab
 8 | from acceptability.utils import seed_torch
 9 | from acceptability.utils.flags import get_lm_evaluator_parser
10 | from .dataset import LMEvalDataset
11 | from acceptability.utils import batchify, get_batch, repackage_hidden
12 | import numpy as np
13 | import torch.nn.functional as F
14 | 
15 | 
16 | 
17 | 
18 | class LMEvaluator():
19 |     def __init__(self):
20 |         parser = get_lm_evaluator_parser()
21 |         self.args = parser.parse_args()
22 |         print(self.args)
23 |         seed_torch(self.args)
24 |         self.vocab = Vocab(self.args.vocab_file)
25 |         self.ntokens = self.vocab.get_size()
26 |         self.data = LMEvalDataset(os.path.join(self.args.data), self.args.vocab_file)
27 |         self.out = open(self.args.outf, "w")
28 | 
29 | 
30 |     def load(self):
31 |         with open(self.args.checkpoint, 'rb') as f:
32 |             self.model = torch.load(f, map_location=lambda storage, loc: storage)
33 |         self.model.eval()
34 | 
35 |         if self.args.gpu:
36 |             self.model.cuda()
37 |         else:
38 |             self.model.cpu()
39 | 
40 |     def get_batches(self, loader):
41 |         batches = []
42 |         for _, i in enumerate(range(0, loader.size(0) -1, self.args.seq_length)):
43 |             data, targets = get_batch(loader, i, self.args.seq_length)
44 |             batches.append((data, targets))
45 |         return batches
46 | 
47 |     def eval(self):
48 |         for i in self.data.sentences:
49 |             hidden = self.model.init_hidden(1)
50 |             data = Variable(torch.LongTensor(i))
51 |             hidden = repackage_hidden(hidden)
52 |             output, hiddens = self.model(data, hidden)
53 |             output = F.log_softmax(output)
54 |             log_probs = []
55 |             for w, o in zip(i[1:], output[:-1]):
56 |                 log_probs.append(o[w])
57 |             self.out.write("%s,%s\n" % (str(sum(log_probs).data[0]), ",".join([str(x.data[0]) for x in log_probs])))
58 |         self.out.close()
59 | 
60 | 
61 | 


--------------------------------------------------------------------------------
/acceptability/modules/lm_generator.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | import torch
 3 | import os
 4 | 
 5 | from torch.autograd import Variable
 6 | 
 7 | from .dataset import Vocab
 8 | from acceptability.utils import get_lm_generator_parser, seed_torch
 9 | 
10 | 
11 | class LMGenerator():
12 |     def __init__(self):
13 |         parser = get_lm_generator_parser()
14 |         self.args = parser.parse_args()
15 |         print(self.args)
16 |         if self.args.temperature < 1e-3:
17 |             parser.error("--temperature has to be greater or equal 1e-3")
18 | 
19 |         seed_torch(self.args)
20 | 
21 | 
22 |     def load(self):
23 |         with open(self.args.checkpoint, 'rb') as f:
24 |             self.model = torch.load(f)
25 |         self.model.eval()
26 | 
27 |         if self.args.gpu:
28 |             self.model.cuda()
29 |         else:
30 |             self.model.cpu()
31 | 
32 |         self.vocab = Vocab(self.args.vocab_file)
33 |         self.ntokens = self.vocab.get_size()
34 | 
35 |     def generate(self):
36 |         hidden = self.model.init_hidden(1)
37 |         inp = Variable(torch.LongTensor([self.vocab.SOS_INDEX]).unsqueeze(0), volatile=True)
38 |         if self.args.gpu:
39 |             inp.data = inp.data.cuda()
40 | 
41 |         with open(self.args.outf, 'w') as outf:
42 |             for i in range(self.args.nlines):
43 |                 words = []
44 |                 while True:
45 |                     output, hidden = self.model(inp, hidden)
46 |                     word_weights = output.squeeze().data.div(self.args.temperature).exp().cpu()
47 |                     word_idx = torch.multinomial(word_weights, 1)[0]
48 |                     inp.data.fill_(word_idx)
49 |                     word = self.vocab.itos[word_idx]
50 | 
51 |                     if word == self.vocab.EOS_TOKEN:
52 |                         line = ['lm', '0', '', ' '.join(words) + '\n']
53 |                         outf.write('\t'.join(line))
54 |                         break
55 |                     else:
56 |                         words.append(word)
57 | 
58 |                 if i % self.args.log_interval == 0:
59 |                     print('| Generated {}/{} lines, {} words'
60 |                             .format(i, self.args.nlines, len(words)))
61 | 


--------------------------------------------------------------------------------
/acceptability/modules/lm_trainer.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import os
  3 | import sys
  4 | import math
  5 | import numpy as np
  6 | 
  7 | from torch.autograd import Variable
  8 | from acceptability.utils import get_lm_parser, get_lm_model_instance, get_lm_experiment_name
  9 | from acceptability.utils import Checkpoint, Timer
 10 | from acceptability.utils import batchify, get_batch, repackage_hidden
 11 | from acceptability.utils import seed_torch
 12 | from .dataset import LMDataset
 13 | from .early_stopping import EarlyStopping
 14 | from .logger import Logger
 15 | 
 16 | 
 17 | class LMTrainer:
 18 |     def __init__(self):
 19 |         parser = get_lm_parser()
 20 |         self.args = parser.parse_args()
 21 | 
 22 |         seed_torch(self.args)
 23 |         print("Loading datasets")
 24 |         self.train_data = LMDataset(os.path.join(self.args.data, 'train.tsv'), self.args.vocab_file)
 25 |         print("Train dataset loaded")
 26 | 
 27 |         self.val_data = LMDataset(os.path.join(self.args.data, 'valid.tsv'), self.args.vocab_file)
 28 |         print("Val dataset loaded")
 29 | 
 30 |         self.test_data = LMDataset(os.path.join(self.args.data, 'test.tsv'), self.args.vocab_file)
 31 |         print("Test dataset loaded")
 32 | 
 33 |         self.args.vocab_size = self.train_data.get_vocab_size()
 34 |         self.args.gpu = self.args.gpu and torch.cuda.is_available()
 35 | 
 36 | 
 37 |         print("Created dataloaders")
 38 |         self.train_loader = batchify(self.train_data.get_tokens(), self.args.batch_size,
 39 |                                      self.args)
 40 |         self.val_loader = batchify(self.val_data.get_tokens(), self.args.batch_size,
 41 |                                    self.args)
 42 | 
 43 |         self.test_loader = batchify(self.test_data.get_tokens(), self.args.batch_size,
 44 |                                     self.args)
 45 | 
 46 |         if self.args.experiment_name is None:
 47 |             self.args.experiment_name = get_lm_experiment_name(self.args)
 48 |         self.checkpoint = Checkpoint(self)
 49 |         self.writer = Logger(self.args)
 50 |         self.writer.write(self.args)
 51 |         self.timer = Timer()
 52 | 
 53 |     def load(self):
 54 |         print("Creating model instance")
 55 |         self.model = get_lm_model_instance(self.args)
 56 | 
 57 |         if self.model is None:
 58 |             self.writer.write("Please pass a valid model name")
 59 |             sys.exit(1)
 60 | 
 61 |         self.current_epoch = 0
 62 | 
 63 |         self.early_stopping = EarlyStopping(self.model, self.checkpoint, patience=self.args.patience,
 64 |                                             minimize=True)
 65 | 
 66 |         self.optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad,
 67 |                                                  self.model.parameters()),
 68 |                                           lr=self.args.learning_rate)
 69 |         self.criterion = torch.nn.CrossEntropyLoss()
 70 | 
 71 |         self.checkpoint.load_state_dict()
 72 | 
 73 |         if self.args.gpu:
 74 |             self.model = self.model.cuda()
 75 | 
 76 |     # Truncated Backpropagation
 77 |     def detach(self, states):
 78 |         return [state.detach() for state in states]
 79 | 
 80 |     def get_batches(self, loader):
 81 |         batches = []
 82 |         for _, i in enumerate(range(0, loader.size(0) -1, self.args.seq_length)):
 83 |             data, targets = get_batch(loader, i, self.args.seq_length)
 84 |             batches.append((data, targets))
 85 | 
 86 |         return batches
 87 | 
 88 |     def train(self):
 89 |         print("Starting training")
 90 |         self.model.train()
 91 |         self.log_interval = len(self.train_loader) // self.args.stages_per_epoch
 92 |         self.log_interval //= self.args.seq_length
 93 | 
 94 |         if self.log_interval <= 0:
 95 |             self.log_interval = 1
 96 | 
 97 |         self.print_start_info()
 98 | 
 99 |         batches = self.get_batches(self.train_loader)
100 | 
101 |         for epoch in range(self.current_epoch + 1, self.args.epochs + 1):
102 |             self.current_epoch = epoch
103 |             total_loss = 0
104 |             ntokens = self.train_data.get_vocab_size()
105 |             hidden = self.model.init_hidden(self.args.batch_size)
106 | 
107 |             for step, i in enumerate(np.random.permutation(len(batches))):
108 |                 data, targets = batches[i]
109 |                 data, targets = Variable(data), Variable(targets)
110 |                 hidden = repackage_hidden(hidden)
111 | 
112 |                 self.model.zero_grad()
113 | 
114 |                 output, hidden = self.model(data, hidden)
115 | 
116 |                 loss = self.criterion(output.view(-1, ntokens), targets)
117 |                 loss.backward()
118 | 
119 |                 torch.nn.utils.clip_grad_norm(self.model.parameters(), self.args.clip)
120 |                 self.optimizer.step()
121 | 
122 |                 total_loss += loss.data
123 | 
124 |                 if (step + 1) % self.log_interval == 0 and step > 0:
125 |                     curr_loss = total_loss[0] / self.log_interval
126 |                     self.writer.write(
127 |                         'Train: Epoch [%d/%d], Step[%d/%d], Loss: %.3f, Perplexity: %5.2f' %
128 |                             (epoch, self.args.epochs, step, len(self.train_loader) // self.args.seq_length,
129 |                              curr_loss, math.exp(curr_loss)))
130 |                     total_loss = 0
131 | 
132 |                     val_loss = self.validate(self.val_loader)
133 |                     stop = self.early_stopping(math.exp(val_loss), {'val_loss': val_loss}, epoch)
134 | 
135 |                     if stop:
136 |                         self.writer.write("Early Stopping activated")
137 |                         break
138 |                     else:
139 |                         self.writer.write(
140 |                             'Val: Epoch [%d/%d], Step[%d/%d], Loss: %.3f, Perplexity: %5.2f' %
141 |                                 (epoch, self.args.epochs, step + 1, len(self.train_loader) // self.args.seq_length,
142 |                                 val_loss, math.exp(val_loss)))
143 | 
144 |             if self.early_stopping.is_activated():
145 |                 break
146 |             self.print_epoch_info()
147 | 
148 |         self.checkpoint.restore()
149 |         self.checkpoint.finalize()
150 | 
151 |     def validate(self, loader):
152 |         self.model.eval()
153 |         total_loss = 0
154 |         hidden = self.model.init_hidden(self.args.batch_size)
155 |         ntokens = self.train_data.get_vocab_size()
156 | 
157 |         tokens = 0
158 | 
159 |         for _, i in enumerate(range(0, loader.size(0) - 1, self.args.seq_length)):
160 |             data, targets = get_batch(loader, i, self.args.seq_length, evaluation=True)
161 |             data, targets = Variable(data, volatile=True), Variable(targets, volatile=True)
162 |             output, hidden = self.model(data, hidden)
163 |             output_flat = output.view(-1, ntokens)
164 |             tokens += output_flat.size(0)
165 |             total_loss += self.criterion(output_flat, targets).data
166 |             hidden = repackage_hidden(hidden)
167 | 
168 |         self.model.train()
169 |         return total_loss[0] / (loader.size(0) / self.args.seq_length)
170 | 
171 |     def print_epoch_info(self):
172 |         self.writer.write_new_line()
173 |         self.writer.write(self.early_stopping.get_info_lm())
174 |         self.writer.write("Time Elasped: %s" % self.timer.get_current())
175 | 
176 |     def print_start_info(self):
177 |         self.writer.write("======== General =======")
178 |         self.writer.write("Model: %s" % self.args.model)
179 |         self.writer.write("GPU: %s" % self.args.gpu)
180 |         self.writer.write("Experiment Name: %s" % self.args.experiment_name)
181 |         self.writer.write("Save location: %s" % self.args.save_loc)
182 |         self.writer.write("Logs dir: %s" % self.args.logs_dir)
183 |         self.writer.write("Timestamp: %s" % self.timer.get_time_hhmmss())
184 |         self.writer.write("Log Interval: %s" % self.log_interval)
185 |         self.writer.write_new_line()
186 | 
187 |         self.writer.write("======== Data =======")
188 |         self.writer.write("Training set: %d examples of size %d" %
189 |                           (len(self.train_data.get_tokens()) // self.args.seq_length,
190 |                            self.args.seq_length))
191 |         self.writer.write("Validation set: %d examples of size %d" %
192 |                           (len(self.val_data.get_tokens()) // self.args.seq_length,
193 |                            self.args.seq_length))
194 |         self.writer.write_new_line()
195 | 
196 |         self.writer.write("======= Parameters =======")
197 |         self.writer.write("Vocab Size: %d" % self.args.vocab_size)
198 |         self.writer.write("Sequence Length: %d" % self.args.seq_length)
199 |         self.writer.write("Learning Rate: %f" % self.args.learning_rate)
200 |         self.writer.write("Batch Size: %d" % self.args.batch_size)
201 |         self.writer.write("Epochs: %d" % self.args.epochs)
202 |         self.writer.write("Patience: %d" % self.args.patience)
203 |         self.writer.write("Stages per Epoch: %d" % self.args.stages_per_epoch)
204 |         self.writer.write("Embedding: %s" % self.args.embedding_size)
205 |         self.writer.write("Number of layers: %d" % self.args.num_layers)
206 |         self.writer.write("Hidden Size: %d" % self.args.hidden_size)
207 |         self.writer.write("Resume: %s" % self.args.resume)
208 |         self.writer.write_new_line()
209 | 
210 |         self.writer.write("======= Model =======")
211 |         self.writer.write(self.model)
212 |         self.writer.write_new_line()
213 | 
214 | 
215 | 


--------------------------------------------------------------------------------
/acceptability/modules/logger.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | import os
 3 | 
 4 | 
 5 | class Logger:
 6 |     # Implement filewrite later
 7 |     def __init__(self, args):
 8 |         if not os.path.exists(args.logs_dir):
 9 |             os.makedirs(args.logs_dir)
10 | 
11 |         log_file = os.path.join(args.logs_dir, args.experiment_name + '.log')
12 | 
13 |         self.log_file = open(log_file, 'a', args.buffer_size)
14 |         self.should_log = not args.should_not_log
15 | 
16 |     def __del__(self):
17 |         self.log_file.close()
18 | 
19 |     def write(self, x):
20 |         # Write to log file
21 |         if self.should_log:
22 |             self.log_file.write(str(x) + '\n')
23 | 
24 |         # Print to stdout
25 |         print(x)
26 | 
27 |     def write_new_line(self):
28 |         self.write(" ")
29 | 


--------------------------------------------------------------------------------
/acceptability/modules/meter.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | import numpy as np
 3 | from torchnet import meter
 4 | 
 5 | 
 6 | class Meter:
 7 |     def __init__(self, num_classes):
 8 |         self.num_classes = num_classes
 9 |         self.confusion_matrix = meter.ConfusionMeter(self.num_classes)
10 | 
11 |     def confusion(self):
12 |         return self.confusion_matrix.conf
13 | 
14 |     def add(self, pred, target):
15 |         self.confusion_matrix.add(pred, target)
16 | 
17 |     def _get_fps(self):
18 |         conf = self.confusion()
19 |         tp = int(conf[0][0])
20 |         fp = int(conf[0][1])
21 |         fn = int(conf[1][0])
22 |         tn = int(conf[1][1])
23 |         return tp, fp, fn, tn
24 | 
25 |     def matthews(self):
26 |         tp, fp, fn, tn = self._get_fps()
27 |         try:
28 |             m = float((tp * tn) - (fp * fn)) / \
29 |                 math.sqrt(float((tp + fp) * (tp + fn) * (tn + fp) * (tn + fn)))
30 |         except ZeroDivisionError:
31 |             m = 0
32 |         return m
33 | 
34 |     def f1(self):
35 |         tp, fp, fn, tn = self._get_fps()
36 |         try:
37 |             f = float(2 * tp) / float((2 * tp) + fp + fn)
38 |         except ZeroDivisionError:
39 |             f = 0
40 |         return f
41 | 
42 |     def accuracy(self):
43 |         tp, fp, fn, tn = self._get_fps()
44 |         return float(tp + tn) / float(tp + fp + tn + fn)
45 | 
46 |     def reset(self):
47 |         self.confusion_matrix = meter.ConfusionMeter(self.num_classes)
48 | 


--------------------------------------------------------------------------------
/acceptability/modules/trainer.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import os
  3 | import sys
  4 | import torchtext
  5 | import numpy as np
  6 | 
  7 | from torch import nn
  8 | from torch.autograd import Variable
  9 | from acceptability.utils import get_parser, get_model_instance, get_experiment_name
 10 | from acceptability.utils import seed_torch, pad_sentences
 11 | from .dataset import get_datasets
 12 | from .meter import Meter
 13 | from .early_stopping import EarlyStopping
 14 | from .logger import Logger
 15 | from acceptability.utils import Checkpoint
 16 | from acceptability.utils import Timer
 17 | 
 18 | 
 19 | class Trainer:
 20 |     def __init__(self):
 21 |         parser = get_parser()
 22 |         self.args = parser.parse_args()
 23 |         self.args.gpu = self.args.gpu and torch.cuda.is_available()
 24 | 
 25 |         seed_torch(self.args)
 26 | 
 27 | 
 28 |         if self.args.experiment_name is None:
 29 |             self.args.experiment_name = get_experiment_name(self.args)
 30 | 
 31 |         self.resuming = os.path.exists(os.path.join(self.args.logs_dir, self.args.experiment_name + '.log'))
 32 |         self.checkpoint = Checkpoint(self)
 33 |         self.num_classes = 2
 34 |         self.meter = Meter(self.num_classes)
 35 |         self.writer = Logger(self.args)
 36 |         self.writer.write(self.args)
 37 |         self.timer = Timer()
 38 |         self.load_datasets()
 39 |         self.weights = self.get_imbalance()
 40 | 
 41 | 
 42 |     def get_imbalance(self):
 43 |         if self.args.imbalance is None:
 44 |             return None
 45 |         else:
 46 |             total = 0
 47 |             positive = 0
 48 |             for set in [self.train_dataset, self.test_dataset, self.val_dataset]:
 49 |                 total += len(set.pairs)
 50 |                 positive += sum([x[0] for x in set.pairs])
 51 |             imbalance = float(positive)/float(total)
 52 |             return np.array([imbalance, 1 - imbalance])
 53 | 
 54 | 
 55 | 
 56 |     def load_datasets(self):
 57 |         self.train_dataset, self.val_dataset, self.test_dataset, \
 58 |             vocab = get_datasets(self.args)
 59 | 
 60 |         if self.args.glove:
 61 |             self.vocab = vocab
 62 |             self.embedding = nn.Embedding(len(vocab.vectors), len(vocab.vectors[0]))
 63 |             self.embedding.weight.data.copy_(vocab.vectors)
 64 |         else:
 65 |             self.vocab = vocab
 66 | 
 67 |             self.embedding = nn.Embedding(self.vocab.get_size(), self.args.embedding_size)
 68 |             self.embedding.weight.data.uniform_(-0.1, 0.1)
 69 | 
 70 |         self.train_loader = torch.utils.data.DataLoader(
 71 |             self.train_dataset,
 72 |             batch_size=self.args.batch_size,
 73 |             shuffle=True,
 74 |             pin_memory=self.args.gpu
 75 |         )
 76 |         self.val_loader = torch.utils.data.DataLoader(
 77 |             self.val_dataset,
 78 |             batch_size=self.args.batch_size,
 79 |             pin_memory=self.args.gpu
 80 |         )
 81 |         self.test_loader = torch.utils.data.DataLoader(
 82 |             self.test_dataset,
 83 |             batch_size=self.args.batch_size,
 84 |             pin_memory=self.args.gpu
 85 |         )
 86 | 
 87 |         if not self.args.train_embeddings:
 88 |             self.embedding.weight.requires_grad = False
 89 |             self.embedding.eval()
 90 | 
 91 |     def load(self):
 92 |         self.model = get_model_instance(self.args)
 93 | 
 94 |         if self.model is None:
 95 |             print("model not found at " + self.args.model)
 96 |             sys.exit(1)
 97 | 
 98 |         self.early_stopping = EarlyStopping(self.model, self.checkpoint, self.args.patience)
 99 | 
100 |         self.optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad,
101 |                                                  self.model.parameters()),
102 |                                           lr=self.args.learning_rate)
103 |         self.criterion = torch.nn.BCELoss()
104 | 
105 |         self.current_epoch = 0
106 |         training_done = self.checkpoint.load_state_dict()
107 |         if training_done:
108 |             print(".pth found at %s, training has already stopped." + self.checkpoint.final_model_path)
109 |             sys.exit(1)
110 | 
111 | 
112 |         if self.args.gpu:
113 |             self.model = self.model.cuda()
114 |             self.embedding = self.embedding.cuda()
115 | 
116 | 
117 |     def train(self):
118 |         if not self.resuming:
119 |             self.print_start_info()
120 |         log_interval = len(self.train_loader) // self.args.stages_per_epoch
121 | 
122 |         if log_interval <= 0:
123 |             log_interval = 1
124 | 
125 |         for i in range(self.current_epoch + 1, self.args.epochs + 1):
126 |             self.current_epoch = i
127 |             self.writer.write("========= Epoch %d =========" % i)
128 | 
129 |             for idx, data in enumerate(self.train_loader):
130 |                 x, y, _ = data
131 |                 x, y = Variable(x).long(), Variable(y)
132 | 
133 |                 if self.args.gpu:
134 |                     x = x.cuda()
135 |                     y = y.cuda()
136 | 
137 |                 x = self.embedding(x)
138 | 
139 |                 self.optimizer.zero_grad()
140 | 
141 |                 output = self.model(x)
142 | 
143 |                 if type(output) == tuple:
144 |                     output = output[0]
145 |                 output = output.squeeze()
146 | 
147 |                 if self.weights is not None:
148 |                     weights = torch.from_numpy(self.weights[y.data.cpu().numpy()])
149 |                     weights = weights.float()
150 | 
151 |                     if self.args.gpu:
152 |                         weights = weights.cuda()
153 | 
154 |                     loss = nn.functional.binary_cross_entropy(output, y.float(),
155 |                                                               weight=weights)
156 |                 else:
157 |                     loss = self.criterion(output, y.float())
158 |                 loss.backward()
159 | 
160 |                 self.optimizer.step()
161 | 
162 |                 if (idx + 1) % log_interval == 0 and idx > 0:
163 |                     acc, loss, matthews, confusion = self.validate(self.val_loader)
164 |                     other_metrics = {
165 |                         'acc': acc,
166 |                         'val_loss': loss,
167 |                         'confusion_matrix': confusion
168 |                     }
169 |                     stop = self.early_stopping(matthews, other_metrics, i)
170 |                     if stop:
171 |                         self.writer.write("Early Stopping activated")
172 |                         break
173 |                     else:
174 |                         self.print_current_info(idx + 1, len(self.train_loader),
175 |                                                 matthews, other_metrics)
176 | 
177 | 
178 |             self.print_epoch_info()
179 | 
180 |             if self.args.evaluate_train and i % self.args.train_evaluate_interval == 0:
181 |                 # At the some interval validate train loader
182 |                 self.writer.write("Evaluating training set")
183 |                 acc, loss, matthews, confusion = self.validate(self.train_loader)
184 |                 other_metrics = {
185 |                     'acc': acc,
186 |                     'val_loss': loss,
187 |                     'confusion_matrix': confusion
188 |                 }
189 |                 self.writer.write("Epoch:")
190 |                 self.print_current_info(i, self.args.epochs, matthews, other_metrics)
191 | 
192 |             if self.early_stopping.is_activated():
193 |                 break
194 | 
195 |         self.checkpoint.restore()
196 |         acc, loss, matthews, confusion = self.validate(self.test_loader, test=True)
197 |         other_metrics = {
198 |             'acc': acc,
199 |             'val_loss': loss,
200 |             'confusion_matrix': confusion
201 |         }
202 |         self.writer.write("Test Set:")
203 |         self.print_current_info(0, 0, matthews, other_metrics)
204 |         self.checkpoint.finalize()
205 | 
206 |     def validate(self, loader: torch.utils.data.DataLoader, test=False):
207 |         self.model.eval()
208 |         self.embedding.eval()
209 |         self.meter.reset()
210 |         correct = 0
211 |         total = 0
212 |         total_loss = 0
213 |         outputs = []
214 | 
215 |         for data in loader:
216 |             x, y, _ = data
217 |             x, y = Variable(x).long(), Variable(y)
218 | 
219 |             if self.args.gpu:
220 |                 x = x.cuda()
221 |                 y = y.cuda()
222 | 
223 |             x = self.embedding(x)
224 | 
225 |             output = self.model(x)
226 | 
227 |             if type(output) == tuple:
228 |                 output = output[0]
229 |             output = output.squeeze()
230 | 
231 |             if self.weights is not None:
232 |                 weights = torch.from_numpy(self.weights[y.data.cpu().numpy()])
233 |                 weights = weights.float()
234 |                 if self.args.gpu:
235 |                     weights = weights.cuda()
236 | 
237 |                 loss = nn.functional.binary_cross_entropy(output, y.float(),
238 |                                                           weight=weights,
239 |                                                           size_average=False)
240 |             else:
241 |                 loss = nn.functional.binary_cross_entropy(output, y.float(),
242 |                                                           size_average=False)
243 |             total_loss = loss.data[0]
244 |             total += len(y)
245 |             outputs.extend([float(o) for o in output])
246 |             output = (output > 0.5).long()
247 | 
248 |             self.meter.add(output.data, y.data)
249 |             if not self.args.gpu:
250 |                 correct += (y ==
251 |                             output).data.cpu().numpy().sum()
252 |             else:
253 |                 correct += (y == output).data.sum()
254 |         self.model.train()
255 | 
256 |         if self.args.train_embeddings:
257 |             self.embedding.train()
258 | 
259 |         try:
260 |             avg_loss = total_loss / total
261 |         # hack to prevent total loss from being 0. Why does this happen ever?
262 |         except ZeroDivisionError:
263 |             self.writer.write("Error: total number of validation examples is %d" % total)
264 |             self.writer.write("total loss is %d" % total_loss)
265 |             total = 1
266 |             avg_loss = 0
267 | 
268 |         if test and self.args.output_dir is not None:
269 |             if not os.path.exists(self.args.output_dir):
270 |                 os.makedirs(self.args.output_dir)
271 |             out_file = open(os.path.join(self.args.output_dir, self.args.experiment_name + ".tsv"), "w")
272 |             for x in outputs:
273 |                 out_file.write(str(x) + "\n")
274 |             out_file.close()
275 | 
276 |         return correct / total * 100, avg_loss, \
277 |                self.meter.matthews(), self.meter.confusion()
278 | 
279 |     def print_epoch_info(self):
280 |         self.writer.write_new_line()
281 |         self.writer.write(self.early_stopping.get_info())
282 |         self.writer.write("Time Elasped: %s" % self.timer.get_current())
283 | 
284 |     def print_current_info(self, it, total, matthews, other_metrics):
285 |         self.writer.write("%d/%d: Matthews %.5f, Accuracy: %.5f, Loss: %.9f" %
286 |               (it, total, matthews,
287 |                other_metrics['acc'], other_metrics['val_loss']))
288 | 
289 |     def print_start_info(self):
290 |         self.writer.write("======== General =======")
291 |         self.writer.write("Model: %s" % self.args.model)
292 |         self.writer.write("GPU: %s" % self.args.gpu)
293 |         self.writer.write("Experiment Name: %s" % self.args.experiment_name)
294 |         self.writer.write("Save location: %s" % self.args.save_loc)
295 |         self.writer.write("Logs dir: %s" % self.args.logs_dir)
296 |         self.writer.write("Timestamp: %s" % self.timer.get_time_hhmmss())
297 |         self.writer.write_new_line()
298 | 
299 |         self.writer.write("======== Data =======")
300 |         self.writer.write("Training set: %d examples" % (len(self.train_dataset)))
301 |         self.writer.write("Validation set: %d examples" % (len(self.val_dataset)))
302 |         self.writer.write("Test set: %d examples" % (len(self.test_dataset)))
303 |         self.writer.write_new_line()
304 | 
305 |         self.writer.write("======= Parameters =======")
306 |         self.writer.write("Learning Rate: %f" % self.args.learning_rate)
307 |         self.writer.write("Batch Size: %d" % self.args.batch_size)
308 |         self.writer.write("Epochs: %d" % self.args.epochs)
309 |         self.writer.write("Patience: %d" % self.args.patience)
310 |         self.writer.write("Stages per Epoch: %d" % self.args.stages_per_epoch)
311 | 
312 |         if self.args.glove:
313 |             self.writer.write("Embedding: %s" % self.args.embedding)
314 |         else:
315 |             self.writer.write("Embedding: %d x %d" % self.embedding.weight.size())
316 |         self.writer.write("Number of layers: %d" % self.args.num_layers)
317 |         self.writer.write("Hidden Size: %d" % self.args.hidden_size)
318 |         self.writer.write("Encoder Size: %d" % self.args.encoding_size)
319 |         self.writer.write("Resume: %s" % self.args.resume)
320 |         self.writer.write_new_line()
321 | 
322 |         self.writer.write("======= Model =======")
323 |         self.writer.write(self.model)
324 |         self.writer.write_new_line()
325 | 


--------------------------------------------------------------------------------
/acceptability/run.py:
--------------------------------------------------------------------------------
1 | from acceptability.modules import Trainer
2 | 
3 | if __name__ == '__main__':
4 |     trainer = Trainer()
5 |     trainer.load()
6 |     trainer.train()
7 | 


--------------------------------------------------------------------------------
/acceptability/scripts/kaggle/converter.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | 
 3 | inpfile = sys.argv[1]
 4 | 
 5 | with open(inpfile, 'r') as f:
 6 |     lines = f.readlines()
 7 |     final = []
 8 |     for idx, line in enumerate(lines):
 9 |         line = line.strip().split('\t')
10 |         label = line[1]
11 |         final.append(str(idx + 1) + ',' + str(label))
12 | 
13 |     outfile = '.'.join(inpfile.split('.')[:-1]) + '.csv'
14 |     final = ['Id,Expected'] + final    
15 |     with open(outfile, 'w') as o:
16 |         o.write('\n'.join(final))
17 |         
18 | 


--------------------------------------------------------------------------------
/acceptability/scripts/kaggle/converter_data.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | 
 3 | inpfile = sys.argv[1]
 4 | 
 5 | with open(inpfile, 'r') as f:
 6 |     lines = f.readlines()
 7 |     final = []
 8 |     for idx, line in enumerate(lines):
 9 |         line = line.strip().split('\t')
10 |         text = line[-1]
11 |         final.append(str(idx + 1) + '\t' + text)
12 | 
13 |     outfile = '.'.join(inpfile.split('.')[:-1]) + '-stripped.tsv'
14 |     final = ['Id\tSentence'] + final    
15 |     with open(outfile, 'w') as o:
16 |         o.write('\n'.join(final))
17 |         
18 | 


--------------------------------------------------------------------------------
/acceptability/scripts/kaggle/converter_random.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | 
 3 | inpfile = sys.argv[1]
 4 | 
 5 | with open(inpfile, 'r') as f:
 6 |     lines = f.readlines()
 7 |     final = []
 8 |     for idx, line in enumerate(lines):
 9 |         line = line.strip().split('\t')
10 |         label = line[1]
11 |         label = 1
12 |         final.append(str(idx + 1) + ',' + str(label))
13 | 
14 |     outfile = '.'.join(inpfile.split('.')[:-1]) + '-random.csv'
15 |     final = ['Id,Label'] + final    
16 |     with open(outfile, 'w') as o:
17 |         o.write('\n'.join(final))
18 |         
19 | 


--------------------------------------------------------------------------------
/acceptability/scripts/permuting.py:
--------------------------------------------------------------------------------
  1 | # This Python file uses the following encoding: utf-8
  2 | 
  3 | import random
  4 | import math
  5 | from functools import reduce
  6 | import re
  7 | 
  8 | 
  9 | # Various functions for generating permuted data for real/fake classifier
 10 | 
 11 | # SHUFFLE PERMUTING
 12 | # select a subset of words from a sequence and randomly reorder that subset, keeping all other words in situ.
 13 | # e.g.: 1 2 3 4 5 6 7 8 --> select 2 3 7 --> reorder 3 7 2 --> return 1 3 7 4 5 6 2 8
 14 | 
 15 | def shuffle_permute_file(input_path, out_path, min_percent, max_percent):
 16 |     """
 17 |     applies shuffle permutation to a file, 
 18 |     randomly selecting p percent of words to be shuffled, where min_percent < p < max_percent.
 19 |     """
 20 |     output = open(out_path, "w")
 21 |     for line in open(input_path):
 22 |         words = line.split()
 23 |         words, punc_map = remove_punc(words)
 24 |         words = shuffle_line(words, random.uniform(min_percent, max_percent))
 25 |         words = replace_punc(words, punc_map)
 26 |         new_line = reduce(lambda x, y: x + " " + y, words)
 27 |         output.write(new_line + "\n")
 28 |     output.close()
 29 | 
 30 | 
 31 | def shuffle_line(a_list, percent):
 32 |     """
 33 |     Shuffle p percent of elements of a list (of words), where where min_percent < p < max_percent.
 34 |     """
 35 |     n = int(math.floor(percent * len(a_list)))
 36 |     if n < 2 and len(a_list) >= 2:
 37 |         n = 2
 38 |     idx = range(len(a_list))
 39 |     random.shuffle(idx)
 40 |     idx = idx[:n]
 41 |     mapping = dict((idx[i], idx[i - 1]) for i in range(n))
 42 |     return [a_list[mapping.get(x, x)] for x in range(len(a_list))]
 43 | 
 44 | 
 45 | 
 46 | 
 47 | 
 48 | 
 49 | 
 50 | 
 51 | 
 52 | # SWAP PERMUTING BY PUNC
 53 | # chunk a sentence by selecting a number of split-points in a sequence
 54 | # (giving preference to immediately after punctuation)
 55 | # randomly reorder chunks
 56 | 
 57 | def permute_file_by_punc(input_path, out_path, lwr_bound, upr_bound):
 58 |     """
 59 |     Applies swap permuting to a file,
 60 |     with n number of chunks randomly chosen per sentence such that rlb <= n < rub
 61 |     """
 62 |     output = open(out_path, "w")
 63 |     for line in open(input_path):
 64 |         new_line = permute_by_punc(line, lwr_bound, upr_bound)
 65 |         output.write(new_line + "\n")
 66 |     output.close()
 67 | 
 68 | 
 69 | def permute_by_punc(line, lwr_bound, upr_bound):
 70 |     """
 71 |     Applies swap permuting to a sequence.
 72 |     """
 73 |     words = line.split()
 74 |     words = apostrophe_s(words)
 75 |     chunks, punc_map = chunk_at_punc(words)
 76 |     r = random.randint(lwr_bound, upr_bound)
 77 |     try:
 78 |         r1 = random.randint(0, min(r, len(chunks) - 1))
 79 |     except ValueError:
 80 |         r1 = 0
 81 |     r2 = r - r1
 82 |     chunks = swap_chunks(chunks, r1 + 1)
 83 |     chunks = swap_split_chunks(chunks, r2)
 84 |     chunks = replace_punc(chunks, punc_map)
 85 |     words = [item for sublist in chunks for item in sublist]
 86 |     new_line = reduce(lambda x, y: x + " " + y, words)
 87 |     return new_line
 88 | 
 89 | 
 90 | def chunk_at_punc(words):
 91 |     """
 92 |     Chunks a sequence of words at the punctuation.
 93 |     """
 94 |     punc = [".", ",", ";", ":", "?", "!", "\"", "\'", "`", "``", "\'\'", "(", ")", "[", "]", "‘", "’"]
 95 |     chunks = []
 96 |     curr_chunk = []
 97 |     punc_map = {}
 98 |     n = 0
 99 |     for i in range(len(words)):
100 |         if words[i] in punc:
101 |             if curr_chunk:
102 |                 chunks.append(curr_chunk[:])
103 |                 curr_chunk = []
104 |                 n += 1
105 |             punc_map[n] = [words[i]]
106 |             n += 1
107 |         else:
108 |             curr_chunk.append(words[i])
109 |     if curr_chunk:
110 |         chunks.append(curr_chunk[:])
111 |     return chunks, punc_map
112 | 
113 | 
114 | def swap_chunks(chunks, r):
115 |     idx = range(len(chunks))
116 |     random.shuffle(idx)
117 |     idx = idx[:r]
118 |     try:
119 |         mapping = dict((idx[i], idx[i - 1]) for i in range(r))
120 |     except IndexError:
121 |         return chunks
122 |     return [chunks[mapping.get(x, x)] for x in range(len(chunks))]
123 | 
124 | 
125 | def swap_split_chunks(chunks, r2):
126 |     for _ in range(r2):
127 |         try:
128 |             i = random.randint(0, len(chunks) - 1)
129 |         except ValueError:
130 |             return chunks
131 |         c = chunks[i]
132 |         j = random.randint(0, len(c) - 1)
133 |         chunks[i] = c[j:]
134 |         chunks[i].extend(c[0:j])
135 |     return chunks
136 | 
137 | 
138 | def apostrophe_s(words):
139 |     new_list = []
140 |     for w in words:
141 |         if re.match(".*\'.*", w) and new_list:
142 |             new_list[-1] = new_list[-1] + " " + w
143 |         else:
144 |             new_list.append(w)
145 |     return new_list
146 | 
147 | 
148 | def remove_punc(words):
149 |     punc = [".", ",", ";", ":", "?", "!", "\"", "\'", "`", "``", "\'\'", "(", ")", "[", "]", "‘"]
150 |     punc_map = {}
151 |     true_words = []
152 |     for i in range(len(words)):
153 |         if words[i] in punc:
154 |             punc_map[i] = words[i]
155 |         else:
156 |             true_words.append(words[i])
157 |     return true_words, punc_map
158 | 
159 | 
160 | def replace_punc(words, punc_map):
161 |     for i in range(len(words) + len(punc_map)):
162 |         if i in punc_map:
163 |             words.insert(i, punc_map[i])
164 |     return words
165 | 
166 | 
167 | 
168 | ################# MAIN ####################
169 | 


--------------------------------------------------------------------------------
/acceptability/test.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch.autograd import Variable
 3 | from acceptability.modules.dataset import AcceptabilityDataset, Vocab, GloVeIntersectedVocab
 4 | from acceptability.utils import seed_torch
 5 | from acceptability.utils import get_test_parser
 6 | from acceptability.modules.meter import Meter
 7 | from torch import nn
 8 | 
 9 | 
10 | def test(args):
11 |     vocab_path = args.vocab_file
12 |     dataset_path = args.dataset_path
13 |     gpu = args.gpu
14 | 
15 |     vocab = Vocab(vocab_path, True)
16 |     dataset = AcceptabilityDataset(args, dataset_path, vocab)
17 | 
18 |     seed_torch(args)
19 | 
20 |     if gpu:
21 |         model = torch.load(args.model_file)
22 |     else:
23 |         model = torch.load(args.model_file, map_location=lambda storage, loc: storage)
24 | 
25 |     if args.embedding_file is not None:
26 |         if gpu:
27 |             embedding = torch.load(args.embedding_file)
28 |         else:
29 |             embedding = torch.load(args.embedding_file, map_location=lambda storage, loc: storage)
30 |     elif "glove" in args.embedding:
31 |         vocab = GloVeIntersectedVocab(args, True)
32 |         embedding = nn.Embedding(len(vocab.vectors), len(vocab.vectors[0]))
33 |         embedding.weight.data.copy_(vocab.vectors)
34 |         if gpu:
35 |             embedding = embedding.cuda()
36 | 
37 | 
38 |     loader = torch.utils.data.DataLoader(
39 |         dataset,
40 |         batch_size=1,
41 |         pin_memory=gpu,
42 |         shuffle=False
43 |     )
44 | 
45 |     meter = Meter(2)
46 | 
47 |     model.eval()
48 |     embedding.eval()
49 |     outputs = []
50 | 
51 |     for data in loader:
52 |         x, y, _ = data
53 |         x, y = Variable(x).long(), Variable(y)
54 | 
55 |         if gpu:
56 |             x = x.cuda()
57 |             y = y.cuda()
58 | 
59 |         x = embedding(x)
60 | 
61 |         output = model(x)
62 | 
63 |         if type(output) == tuple:
64 |             output = output[0]
65 |         out_float = output.squeeze()
66 |         output = (out_float > 0.5).long()
67 |         # outputs.append(int(output))
68 |         outputs.append(float(out_float))
69 | 
70 |         if not gpu:
71 |             output = output.unsqueeze(0)
72 | 
73 |         meter.add(output.data, y.data)
74 | 
75 |     print("Matthews %.5f, Accuracy: %.5f" % (meter.matthews(), meter.accuracy()))
76 |     if args.output_file != None:
77 |         out_file = open(args.output_file, "w")
78 |         for x in outputs:
79 |             out_file.write(str(x) + "\n")
80 |         out_file.close()
81 | 
82 | 
83 | if __name__ == '__main__':
84 |     parser = get_test_parser()
85 |     test(parser.parse_args())
86 | 


--------------------------------------------------------------------------------
/acceptability/utils/__init__.py:
--------------------------------------------------------------------------------
 1 | __all__ = ['Checkpoint', 'get_parser', 'get_lm_parser', 'get_lm_generator_parser',
 2 |            'get_lm_model_instance', 'get_experiment_name', 'get_lm_experiment_name',
 3 |            'get_model_instance', 'Timer', 'repackage_hidden', 'batchify',
 4 |            'get_batch', 'get_test_parser', 'seed_torch', 'pad_sentences']
 5 | 
 6 | from .checkpoint import Checkpoint
 7 | from .flags import get_parser, get_lm_parser, get_lm_generator_parser, get_test_parser
 8 | from .general import get_model_instance, get_experiment_name
 9 | from .general import get_lm_model_instance, get_lm_experiment_name
10 | from .general import pad_sentences
11 | from .general import seed_torch
12 | from .timer import Timer
13 | from .lm import repackage_hidden, batchify, get_batch
14 | 


--------------------------------------------------------------------------------
/acceptability/utils/checkpoint.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import os
 3 | 
 4 | 
 5 | class Checkpoint:
 6 |     def __init__(self, trainer):
 7 |         self.args = trainer.args
 8 |         self.trainer = trainer
 9 |         self.experiment_ckpt_path = os.path.join(self.args.save_loc,
10 |                                                  self.args.experiment_name + ".ckpt")
11 |         self.final_model_path = os.path.join(self.args.save_loc,
12 |                                              self.args.experiment_name + ".pth")
13 |         self.embedding_path = os.path.join(self.args.save_loc,
14 |                                            self.args.experiment_name + ".emb")
15 | 
16 |     def load_state_dict(self):
17 |         training_done = False
18 | 
19 |         # First check if resume arg has been passed, then if experiment has .ckpt
20 |         if self.args.resume_file and os.path.exists(self.args.resume_file):
21 |             self._load(self.args.resume_file)
22 |         elif self.args.resume and os.path.exists(self.experiment_ckpt_path):
23 |             self._load(self.experiment_ckpt_path)
24 | 
25 |         # If .pth exists, then training is already finished
26 |         if os.path.exists(self.final_model_path):
27 |             training_done = True
28 | 
29 |         # Try loading embedding
30 |         if os.path.exists(self.embedding_path):
31 |             self.trainer.embedding = self._torch_load(self.embedding_path)
32 |         elif hasattr(self.args, 'embedding_path') and self.args.embedding_path \
33 |             and os.path.exists(self.args.embedding_path):
34 |             self.trainer.embedding = self._torch_load(self.args.embedding_path)
35 | 
36 |         return training_done
37 | 
38 |     def _load(self, file):
39 |         print("Loading checkpoint")
40 | 
41 |         loaded = self._torch_load(file)
42 |         self.trainer.model.load_state_dict(loaded['model'])
43 |         self.trainer.optimizer.load_state_dict(loaded['optimizer'])
44 |         self.trainer.current_epoch = loaded['current_epoch']
45 |         self.trainer.early_stopping.best_monitored_metric = loaded['best_metric']
46 |         self.trainer.early_stopping.best_monitored_epoch = loaded['best_epoch']
47 |         self.trainer.early_stopping.other_metrics = loaded['other_metrics']
48 | 
49 |         print("Checkpoint loaded")
50 |         self.trainer.writer.write("Checkpoint loaded at %s" % file)
51 | 
52 | 
53 |     def save(self):
54 |         if not os.path.exists(os.path.dirname(self.experiment_ckpt_path)):
55 |             os.mkdir(os.path.dirname(self.experiment_ckpt_path))
56 | 
57 |         save = {
58 |             'model': self.trainer.model.state_dict(),
59 |             'optimizer': self.trainer.optimizer.state_dict(),
60 |             'current_epoch': self.trainer.current_epoch,
61 |             'best_metric': self.trainer.early_stopping.best_monitored_metric,
62 |             'best_epoch': self.trainer.early_stopping.best_monitored_epoch,
63 |             'other_metrics': self.trainer.early_stopping.other_metrics
64 |         }
65 | 
66 |         torch.save(save, self.experiment_ckpt_path)
67 |         self.save_embedding()
68 | 
69 |     def _torch_load(self, file):
70 |         if self.args.gpu:
71 |             return torch.load(file)
72 |         else:
73 |             return torch.load(file, map_location=lambda storage, loc: storage)
74 | 
75 |     def restore(self):
76 |         if os.path.exists(self.experiment_ckpt_path):
77 |             self.trainer.model.load_state_dict(self._torch_load(self.experiment_ckpt_path)['model'])
78 | 
79 |     def finalize(self):
80 |         if not os.path.exists(os.path.dirname(self.final_model_path)):
81 |             os.mkdir(os.path.dirname(self.final_model_path))
82 | 
83 |         torch.save(self.trainer.model, self.final_model_path)
84 |         self.save_embedding()
85 | 
86 |     def save_embedding(self):
87 |         if hasattr(self.args, 'glove') and not self.args.glove:
88 |             if not os.path.exists(os.path.dirname(self.embedding_path)):
89 |                 os.mkdir(os.path.dirname(self.embedding_path))
90 | 
91 |             torch.save(self.trainer.embedding, self.embedding_path)
92 | 


--------------------------------------------------------------------------------
/acceptability/utils/flags.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | from datetime import datetime
  3 | import argparse
  4 | import torch
  5 | 
  6 | def get_parser():
  7 |     parser = argparse.ArgumentParser(description="Acceptability Judgments")
  8 |     parser.add_argument("-d", "--data", type=str, default="./data",
  9 |                         help="Directory containing train.txt, test.txt" +
 10 |                         "and valid.txt")
 11 |     parser.add_argument("-e", "--embedding", type=str, default="glove.840B.300d",
 12 |                         help="Embedding type to be used, select from" +
 13 |                         "http://torchtext.readthedocs.io/en/latest/vocab.html#pretrained-aliases")
 14 | 
 15 |     # Preprocess arguments
 16 |     parser.add_argument("--should_not_preprocess_data", action="store_true", default=False,
 17 |                         help="Whether to preprocess data? Default: true (Will preprocess)")
 18 |     parser.add_argument("--should_not_lowercase", action="store_true", default=False,
 19 |                         help="Should lowercase data? Default: true (Will lowercase)")
 20 |     parser.add_argument("--preprocess_tokenizer", default='space', type=str,
 21 |                         help="Type of tokenizer to use (space|nltk)")
 22 | 
 23 |     parser.add_argument("-v", "--vocab_file", type=str, default="./vocab_100k.tsv",
 24 |                         help="File containing vocabulary to be used with embedding")
 25 |     parser.add_argument("-o", "--output_dir", type=str,
 26 |                         help="Output file for model classifications.")
 27 |     parser.add_argument("--glove", action="store_true", default=False,
 28 |                         help="Whether to use GloVE embedidngs for models")
 29 |     parser.add_argument("-es", "--embedding_size", type=int, default=300,
 30 |                         help="Embedding dimension for custom embedding")
 31 |     parser.add_argument("-ep", "--embedding_path", type=str, default=None,
 32 |                         help="If specified, custom embedding will be loaded from this path")
 33 |     parser.add_argument("--train_embeddings", action="store_true", default=False,
 34 |                         help="Whether to train embeddings?")
 35 |     parser.add_argument("--imbalance", action="store_true", default=True,
 36 |                         help="Whether to use pass class imbalance to weighted loss function")
 37 | 
 38 |     parser.add_argument("-l", "--logs_dir", type=str, default="./logs",
 39 |                         help="Log directory")
 40 |     parser.add_argument("--should_not_log", action='store_true',
 41 |                         help="Specify when trainer should not log to file")
 42 | 
 43 |     parser.add_argument("-dt", "--data_type", type=str,
 44 |                         default="discriminator",
 45 |                         help="Data type")
 46 |     # TODO: Take a look on how to make this enum later
 47 |     parser.add_argument("-m", "--model", type=str,
 48 |                         default="lstm_pooling_classifier",
 49 |                         help="Type of the model you want to use")
 50 |     parser.add_argument("-s", "--save_loc", type=str, default="./save",
 51 |                         help="Save point for models")
 52 |     parser.add_argument("-g", "--gpu", action="store_true", default=False,
 53 |                         help="Whether use GPU or not")
 54 |     parser.add_argument("-cp", "--crop_pad_length", type=int, default=30,
 55 |                         help="Padding Crop length")
 56 | 
 57 |     parser.add_argument("--seed", type=int, default=1111,
 58 |                         help="Seed for reproducability")
 59 |     parser.add_argument("-bs", "--buffer_size", type=int, default=1,
 60 |                         help="Buffer size for logger")
 61 |     # Chunk parameters
 62 |     parser.add_argument("-se", "--stages_per_epoch", type=int, default=2,
 63 |                         help="Eval/Stats steps per epoch")
 64 |     parser.add_argument("--prints_per_stage", type=int, default=1,
 65 |                         help="How many times print stats per epoch")
 66 |     parser.add_argument("-p", "--patience", type=int, default=20,
 67 |                         help="Early stopping patience")
 68 |     parser.add_argument("-n", "--epochs", type=int, default=10,
 69 |                         help="Number of epochs")
 70 |     parser.add_argument("-b", "--batch_size", type=int, default=32,
 71 |                         help="Batch size")
 72 | 
 73 |     # by_source doesn'tw ork at the moment
 74 |     parser.add_argument("--by_source", action="store_true", default=False,
 75 |                         help="Break output stats by source")
 76 |     parser.add_argument("--max_pool", action="store_true", default=False,
 77 |                         help="Use max pooling for CBOW")
 78 | 
 79 |     # Tuneable parameters
 80 |     parser.add_argument("-hs", "--hidden_size", type=int, default=300,
 81 |                         help="Hidden dimension for LSTM")
 82 |     parser.add_argument("-nl", "--num_layers", type=int, default=1,
 83 |                         help="Number of layers for LSTM")
 84 |     parser.add_argument("-lr", "--learning_rate", type=float, default=.0005,
 85 |                         help="Learning rate")
 86 |     parser.add_argument("-do", "--dropout", type=float, default=0.5,
 87 |                         help="Dropout")
 88 | 
 89 | 
 90 |     # Encoder parameter
 91 |     parser.add_argument("--encoding_size", type=int, default=100,
 92 |                         help="Output size of encoder, input size of linear")
 93 |     parser.add_argument("--encoder_num_layers", type=int, default=1,
 94 |                         help="Number of layers in encoder network")
 95 | 
 96 |     # Path for LM to be used in ELMo embeddings
 97 |     parser.add_argument("--lm_path", type=str, default=None,
 98 |                         help="Location of LM checkpoint")
 99 | 
100 |     ## Take care to pass this argument for loading a pretrained encoder
101 |     parser.add_argument("--encoder_path", type=str, default=None,
102 |                         help="Location of encoder checkpoint")
103 |     parser.add_argument("--encoding_type", type=str,
104 |                         default="lstm_pooling_classifier",
105 |                         help="Class of encoder")
106 | 
107 |     # Train dataset evaluate parameters
108 |     # Can be useful when train dataset is small (like directly evaluating acceptability dataset)
109 |     parser.add_argument("--evaluate_train", action="store_true", default=False,
110 |                         help="Whether to evaluate training set after some interval (default: False)")
111 |     parser.add_argument("--train_evaluate_interval", type=int, default=10,
112 |                         help="Interval after which train dataset needs to be evaluated.")
113 | 
114 |     parser.add_argument("--experiment_name", type=str,
115 |                         default=None,
116 |                         help="Name of the current experiment")
117 |     parser.add_argument("-rf", "--resume_file", type=str,
118 |                         default=None,
119 |                         help="Use specific checkpoint path for resuming")
120 |     parser.add_argument("-r", "--resume", action="store_true", default=False,
121 |                         help="Whether should resume training?" +
122 |                         " Will look for checkpoint with experiment name")
123 |     return parser
124 | 
125 | 
126 | def get_lm_parser():
127 |     parser = argparse.ArgumentParser("Acceptability Judgments LM")
128 |     parser.add_argument("-d", "--data", type=str,
129 |                         help="Directory containing train.tsv and valid.tsv")
130 |     parser.add_argument("-v", "--vocab_file", type=str,
131 |                         help="Vocabulary file")
132 | 
133 |     parser.add_argument("-m", "--model", type=str, default="lstm",
134 |                         help="Model to be used for LM")
135 |     parser.add_argument("-l", "--logs_dir", type=str, default="./logs",
136 |                         help="Folder for storing logs")
137 |     parser.add_argument("--should_not_log", action='store_true',
138 |                         help="Specify when trainer should not log to file")
139 |     parser.add_argument("-se", "--stages_per_epoch", type=int, default=2,
140 |                         help="Eval/Stats steps per epoch")
141 | 
142 |     parser.add_argument("-p", "--patience", type=int, default=20,
143 |                         help="Early stopping patience")
144 | 
145 |     parser.add_argument("--seed", type=int, default=1111,
146 |                         help="Seed for reproducibility")
147 | 
148 |     parser.add_argument("-en", "--experiment_name", type=str, default=None,
149 |                         help="Name of the experiment")
150 |     parser.add_argument("-es", "--embedding_size", type=int, default=300,
151 |                         help="Size of the embedding dimension")
152 |     parser.add_argument("-sl", "--seq_length", type=int, default=35,
153 |                         help="Sequence length")
154 |     parser.add_argument("-hs", "--hidden_size", type=int, default=600,
155 |                         help="Size of the hidden dimension")
156 |     parser.add_argument("-nl", "--num_layers", type=int, default=1,
157 |                         help="Size of the hidden dimension")
158 |     parser.add_argument("-b", "--batch_size", type=int, default=32,
159 |                         help="Batch size")
160 |     parser.add_argument("-bs", "--buffer_size", type=int, default=1,
161 |                         help="Buffer size for logger")
162 |     parser.add_argument("-e", "--epochs", type=int, default=10,
163 |                         help="Number of epochs")
164 |     parser.add_argument("-do", "--dropout", type=float, default=0.5,
165 |                         help="Dropout")
166 |     parser.add_argument("-g", "--gpu", action="store_true", default=torch.cuda.is_available(),
167 |                         help="GPU")
168 |     parser.add_argument("-lr", "--learning_rate", type=float, default=0.001,
169 |                         help="Learning rate")
170 |     parser.add_argument("-s", "--save_loc", type=str, default='./save',
171 |                         help="Save folder")
172 |     parser.add_argument("-rf", "--resume_file", type=str,
173 |                         default=None,
174 |                         help="Use specific checkpoint path for resuming")
175 |     parser.add_argument("-r", "--resume", action="store_true", default=False,
176 |                         help="Whether should resume training?" +
177 |                         " Will look for checkpoint with experiment name")
178 |     parser.add_argument('--clip', type=float, default=0.5,
179 |                         help='gradient clipping')
180 |     return parser
181 | 
182 | def get_lm_generator_parser():
183 |     parser = argparse.ArgumentParser(description='Acceptability LM Generator')
184 | 
185 |     # Model parameters.
186 |     parser.add_argument("-m", "--checkpoint", type=str, default="./model.pth",
187 |                         help="model checkpoint to use")
188 |     parser.add_argument("-o", "--outf", type=str, default="generated.txt",
189 |                         help="output file for generated text")
190 |     parser.add_argument("-n", "--nlines", type=int, default="1000",
191 |                         help="number of lines to generate")
192 |     parser.add_argument("-v", "--vocab_file", type=str, default="vocab_100k.tsv",
193 |                         help="number of lines to generate")
194 |     parser.add_argument("--seed", type=int, default=1111,
195 |                         help="random seed")
196 |     # TODO: Change default value to False and check later explicity
197 |     parser.add_argument("-g", "--gpu", action="store_true", default=torch.cuda.is_available(),
198 |                         help="use CUDA")
199 |     parser.add_argument("-t", "--temperature", type=float, default=1.0,
200 |                         help="temperature - higher will increase diversity")
201 |     parser.add_argument("--log_interval", type=int, default=100,
202 |                         help="reporting interval")
203 | 
204 |     return parser
205 | 
206 | def get_lm_evaluator_parser():
207 |     """
208 |     python -u acceptability/lm_evaluate.py -d acceptability_corpus/tokenized/in_domain_test.tsv 
209 |     -m checkpoints/experiment_lstm_s_35_h_891_l_2_lr_0.0002_d_0.20.pth -o logs -v ../data/vocabs/vocab_100k.tsv
210 |     """
211 |     parser = argparse.ArgumentParser(description='Acceptability LM Evaluator')
212 | 
213 |     # Model parameters.
214 |     parser.add_argument("-m", "--checkpoint", type=str, default="./model.pth",
215 |                         help="model checkpoint to use")
216 |     parser.add_argument("-d", "--data", type=str,
217 |                         help="Directory containing data.tsv")
218 |     parser.add_argument("-o", "--outf", type=str, default="generated.txt",
219 |                         help="output file for log probs")
220 |     parser.add_argument("-v", "--vocab_file", type=str, default="vocab_100k.tsv",
221 |                         help="vocab location")
222 |     parser.add_argument("--seed", type=int, default=1111,
223 |                         help="random seed")
224 |     # TODO: Change default value to False and check later explicity
225 |     parser.add_argument("-g", "--gpu", action="store_true", default=torch.cuda.is_available(),
226 |                         help="use CUDA")
227 |     parser.add_argument("--log_interval", type=int, default=100,
228 |                         help="reporting interval")
229 |     parser.add_argument("-b", "--batch_size", type=int, default=32,
230 |                         help="Batch size")
231 | 
232 |     return parser
233 | 
234 | 
235 | def get_test_parser():
236 |     """
237 |     python acceptability/test.py -mf elmo.pth -vf vocab_100k.tsv -ef elmo.emb -d data/test.tsv -g
238 |     """
239 |     parser = argparse.ArgumentParser(description='Acceptability Test')
240 | 
241 |     parser.add_argument("-mf", "--model_file", type=str, help="Model file to load")
242 |     parser.add_argument("-vf", "--vocab_file", type=str, help="Vocab file to load")
243 |     parser.add_argument("-ef", "--embedding_file", type=str, help="Embedding file to load")
244 |     parser.add_argument("-o", "--output_file", type=str, help="Output file for model classifications.")
245 |     parser.add_argument("-d", "--dataset_path", type=str, help="Test file")
246 |     parser.add_argument("-s", "--seed", type=int, default=11111, help="Random seed")
247 |     parser.add_argument("-g", "--gpu", action="store_true", default=False, help="Use GPU")
248 |     parser.add_argument("--glove", action="store_true", default=False,
249 |                         help="Whether to use GloVE embeddings for models")
250 |     parser.add_argument("-e", "--embedding", type=str, default="glove.840B.300d",
251 |                         help="Embedding type to be used, select from" +
252 |                         "http://torchtext.readthedocs.io/en/latest/vocab.html#pretrained-aliases")
253 | 
254 |     # Preprocess arguments
255 |     parser.add_argument("--should_not_preprocess_data", action="store_true", default=False,
256 |                         help="Whether to preprocess data? Default: true (Will preprocess)")
257 |     parser.add_argument("--should_not_lowercase", action="store_true", default=False,
258 |                         help="Should lowercase data? Default: true (Will lowercase)")
259 |     parser.add_argument("--preprocess_tokenizer", default='space', type=str,
260 |                         help="Type of tokenizer to use (space|nltk)")
261 |     parser.add_argument("-cp", "--crop_pad_length", type=int, default=30,
262 |                         help="Padding Crop length")
263 |     return parser
264 | 


--------------------------------------------------------------------------------
/acceptability/utils/general.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import numpy as np
  3 | 
  4 | from torch import nn
  5 | from datetime import datetime
  6 | from acceptability.models import LSTMPoolingClassifier
  7 | from acceptability.models import ELMOClassifier
  8 | from acceptability.models import LSTMPoolingClassifierWithELMo
  9 | from acceptability.models import LinearClassifierWithEncoder
 10 | from acceptability.models import CBOWClassifier
 11 | from acceptability.models import LSTMLanguageModel
 12 | 
 13 | 
 14 | def get_model_instance(args):
 15 |     # Get embedding size from embedding parameter
 16 |     if args.glove:
 17 |         args.embedding_size = int(args.embedding.split('.')[-1][:-1])
 18 | 
 19 |     if args.model == "lstm_pooling_classifier":
 20 |         return LSTMPoolingClassifier(
 21 |             hidden_size=args.hidden_size,
 22 |             embedding_size=args.embedding_size,
 23 |             num_layers=args.num_layers,
 24 |             dropout=args.dropout
 25 |         )
 26 |     elif args.model == "linear_classifier":
 27 |         # TODO: Add support for encoder here later
 28 |         return LinearClassifierWithEncoder(
 29 |             hidden_size=args.hidden_size,
 30 |             embedding_size=args.embedding_size,
 31 |             encoding_size=args.encoding_size,
 32 |             num_layers=args.num_layers,
 33 |             encoder_type=args.encoding_type,
 34 |             encoder_num_layers=args.encoder_num_layers,
 35 |             encoder_path=args.encoder_path,
 36 |             dropout=args.dropout,
 37 |             gpu=args.gpu
 38 |         )
 39 |     elif args.model == "cbow_classifier":
 40 |         return CBOWClassifier(
 41 |             hidden_size=args.hidden_size,
 42 |             input_size=args.embedding_size,
 43 |             max_pool=args.max_pool,
 44 |             dropout=args.dropout
 45 |         )
 46 |     elif args.model == "elmo_classifier":
 47 |         return ELMOClassifier(
 48 |             lm_path=args.lm_path,
 49 |             last_hid=args.hidden_size,
 50 |             dropout=args.dropout,
 51 |             use_gpu=args.gpu
 52 |         )
 53 |     elif args.model == 'lstm_pooling_elmo':
 54 |         return LSTMPoolingClassifierWithELMo(
 55 |             lm_path=args.lm_path,
 56 |             hidden_size=args.hidden_size,
 57 |             num_layers=args.num_layers,
 58 |             dropout=args.dropout
 59 |         )
 60 |     else:
 61 |         return None
 62 | 
 63 | def get_lm_model_instance(args):
 64 |     if args.model == "lstm":
 65 |         return LSTMLanguageModel(
 66 |             args.embedding_size,
 67 |             args.seq_length,
 68 |             args.hidden_size,
 69 |             args.batch_size,
 70 |             args.vocab_size,
 71 |             args.num_layers,
 72 |             args.dropout
 73 |         )
 74 | 
 75 | def get_lm_experiment_name(args):
 76 |     # mapping:
 77 |     # h -> hidden_size
 78 |     # l -> layers
 79 |     # lr -> learning rate
 80 |     # e -> encoding_size
 81 |     name = "experiment_%s_s_%d_h_%d_l_%d_lr_%.4f_d_%.2f" % (
 82 |         args.model,
 83 |         args.seq_length,
 84 |         args.hidden_size,
 85 |         args.num_layers,
 86 |         args.learning_rate,
 87 |         args.dropout
 88 |     )
 89 | 
 90 |     return name
 91 | 
 92 | 
 93 | def get_experiment_name(args):
 94 |     # mapping:
 95 |     # h -> hidden_size
 96 |     # l -> layers
 97 |     # lr -> learning rate
 98 |     # e -> encoding_size
 99 |     name = "experiment_%s_h_%d_l_%d_lr_%.4f_e_%d_do_%.1f" % (
100 |         args.model,
101 |         args.hidden_size,
102 |         args.num_layers,
103 |         args.learning_rate,
104 |         args.embedding_size,
105 |         args.dropout
106 |     )
107 | 
108 |     return name
109 | 
110 | 
111 | def seed_torch(args):
112 |     # Set the random seed manually for reproducibility.
113 |     torch.manual_seed(args.seed)
114 | 
115 |     if torch.cuda.is_available():
116 |         if not args.gpu:
117 |             print("WARNING: You have a CUDA device," +
118 |                   " so you should probably run with -g")
119 |         else:
120 |             torch.cuda.manual_seed(args.seed)
121 | 
122 | 
123 | def pad_sentences(sentences, vocab, crop_length):
124 |     sizes = np.array([len(sent) for sent in sentences])
125 |     max_len = crop_length
126 | 
127 |     shape = (len(sentences), max_len)
128 |     array = np.full(shape, vocab.stoi[vocab.PAD_INDEX], dtype=np.int32)
129 | 
130 |     for i, sent in enumerate(sentences):
131 |         words = np.array(sent)
132 | 
133 |         if len(sent) > max_len:
134 |             words = words[0:max_len]
135 |             sizes[i] = max_len
136 | 
137 |         array[i, :len(words)] = words
138 | 
139 |     return array, sizes
140 | 
141 | 


--------------------------------------------------------------------------------
/acceptability/utils/lm.py:
--------------------------------------------------------------------------------
 1 | from torch.autograd import Variable
 2 | 
 3 | def repackage_hidden(h):
 4 |     """Wraps hidden states in new Variables, to detach them from their history."""
 5 |     if type(h) == Variable:
 6 |         return Variable(h.data)
 7 |     else:
 8 |         return tuple(repackage_hidden(v) for v in h)
 9 | 
10 | def batchify(data, bsz, args):
11 |     # Work out how cleanly we can divide the dataset into bsz parts.
12 |     nbatch = data.size(0) // bsz
13 |     # Trim off any extra elements that wouldn't cleanly fit (remainders).
14 |     data = data.narrow(0, 0, nbatch * bsz)
15 |     # Evenly divide the data across the bsz batches.
16 |     data = data.view(bsz, -1).t().contiguous()
17 |     if args.gpu:
18 |         data = data.cuda()
19 |     return data
20 | 
21 | def get_batch(source, i, seq_len, evaluation=False):
22 |     seq_len = min(seq_len, len(source) - 1 - i)
23 |     data = source[i:i+seq_len]
24 |     target = source[i+1:i+1+seq_len].view(-1)
25 |     return data, target
26 | 


--------------------------------------------------------------------------------
/acceptability/utils/timer.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | 
 3 | 
 4 | class Timer:
 5 |     def __init__(self):
 6 |         self.start = time.time()
 7 | 
 8 |     def get_current(self):
 9 |         return self.get_time_hhmmss(self.start)
10 | 
11 |     def get_time_hhmmss(self, start=None):
12 |         """
13 |         Calculates time since `start` and formats as a string.
14 |         """
15 |         if start is None:
16 |             return time.strftime("%Y/%m/%d %H:%M:%S")
17 |         end = time.time()
18 |         m, s = divmod(end - start, 60)
19 |         h, m = divmod(m, 60)
20 |         time_str = "%02d:%02d:%02d" % (h, m, s)
21 |         return time_str
22 | 


--------------------------------------------------------------------------------
/acceptability_corpus/cola_public.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nyu-mll/CoLA-baselines/dd095d3646ed05a315280aaa8ed4ec84ba435b3e/acceptability_corpus/cola_public.zip


--------------------------------------------------------------------------------
/acceptability_corpus/cola_public/README:
--------------------------------------------------------------------------------
  1 | CoLA
  2 | The Corpus of Linguistic Acceptability
  3 | CoLA - The Corpus of Linguistic Acceptability
  4 | http://nyu-mll.github.io/cola
  5 | 
  6 | 0. Authors
  7 | 
  8 | Alex Warstadt
  9 | Amanpreet Singh
 10 | Sam Bowman
 11 | (New York University)
 12 | 
 13 | 
 14 | 1. Introduction
 15 | 
 16 | The Corpus of Linguistic Acceptability (CoLA) in its full form consists of 10657 sentences from 23 linguistics publications, expertly annotated for acceptability (grammaticality) by their original authors. The public version provided here contains 9594 sentences belonging to training and development sets, and excludes 1063 sentences belonging to a held out test set. Contact alexwarstadt [at] gmail [dot] com with any questions or issues.
 17 | 
 18 | 
 19 | 2. Download
 20 | 
 21 | Download the dataset at: http://nyu-mll.github.io/cola
 22 | 
 23 | 
 24 | 3. Citation
 25 | 
 26 | @misc{warstadt-18,
 27 | Author = {Warstadt, Alexander and Singh, Amanpreet 
 28 | and Bowman, Samuel R},
 29 | Howpublished = {http://nyu-mll.github.io/cola},
 30 | Title = {Corpus of Linguistic Acceptability},
 31 | Year = {2018}}
 32 | 
 33 | 
 34 | 4. Data description
 35 | 
 36 | 4.1 Split
 37 | 
 38 | We have split the data into an in-domain set comprised sentences from 17 sources and an out-of-domain set comprised of the remaining 6 sources. The in-domain set is split into train/dev/test sets, and the out-of-domain is split into dev/test sets. The test sets are not made public. For convenience, each dataset is provided is provided twice, in raw form, and tokenized form. We used the NLTK tokenizer. The public data is split into the following files:
 39 | raw/in_domain_train.tsv (8551 lines)
 40 | raw/in_domain_dev.tsv (527 lines)
 41 | raw/out_of_domain_dev.tsv (516 lines)
 42 | tokenized/in_domain_train.tsv (8551 lines)
 43 | tokenized/in_domain_dev.tsv (527 lines)
 44 | tokenized/out_of_domain_dev.tsv (516 lines)
 45 | 
 46 | 4.2 Data Format
 47 | 
 48 | Each line in the .tsv files consists of 4 tab-separated columns.
 49 | Column 1:	the code representing the source of the sentence.
 50 | Column 2:	the acceptability judgment label (0=unacceptable, 1=acceptable).
 51 | Column 3:	the acceptability judgment as originally notated by the author.
 52 | Column 4:	the sentence.
 53 | 
 54 | 
 55 | 4.3 Corpus Sample
 56 | 
 57 | clc95	0	*	In which way is Sandy very anxious to see if the students will be able to solve the homework problem?
 58 | c-05	1		The book was written by John.
 59 | c-05	0	*	Books were sent to each other by the students.
 60 | swb04	1		She voted for herself.
 61 | swb04	1		I saw that gas can explode.
 62 | 
 63 | 4.4 Processing
 64 | 
 65 | During gathering of the data and processing, some sentences from the source documents may have been omitted or altered. We retained all acceptable examples, and excluded any examples given intermediate judgments such as “?” or “#”. In addition, we excluded examples of unacceptable sentences not suitable for the present task because they required reasoning about pragmatic violations, unavailable semantic readings, or nonexistent words. We take responsibility for any errors.
 66 | 
 67 | 
 68 | 5. Disclaimer
 69 | 
 70 | The text in this corpus is excerpted from the published works at the end of this document, and copyright (where applicable) remains with the original authors or publishers. We expect that research use within the US is legal under fair use, but make no guarantee of this.
 71 | 
 72 | The sentences were gathered from 23 sources, with full citations given below. Those sources were divided into two categories: 17 in-domain, and 6 out-of-domain (development and test only). Each source is associated with a code in the dataset. Below find a list of each in-domain and out-of-domain sources, its identifier code, and its size.
 73 | 
 74 | 
 75 | 6. Sources
 76 | 
 77 | In domain: 
 78 | (Source, Code, N)
 79 | Adger (2003), ad-03, 948
 80 | Baltin (1982), b_82, 96
 81 | Baltin and Collins (2001), bc01, 880
 82 | Bresnan (1973), b_73, 259
 83 | Carnie (2013), c_13, 870
 84 | Culicover and Jackendoff (1999), cj99, 233
 85 | Dayal (1998), d_98, 179
 86 | Gazdar (1981), g_81, 110
 87 | Goldberg and Jackendoff (2004), gj04, 106
 88 | Kadmon and Landman (1993), kl93, 93
 89 | Kim and Sells (2008), ks08, 1965
 90 | Levin (1993), l-93 1459
 91 | Miller (2002), m_02, 426
 92 | Rappaport Hovav and Levin (2008), rhl08, 151
 93 | Ross (1967), r-67, 1029
 94 | Sag et al. (1985), sgww85, 153
 95 | Sportiche et al. (2013), sks13, 651
 96 | 
 97 | Out of domain: 
 98 | (Source, Code, N)
 99 | Chung et al. (1995), clc95, 148
100 | Collins (2005), c-05, 66
101 | Jackendoff (1971), j_71, 94
102 | Sag (1997), s_97, 112
103 | Sag et al. (1999), swb04, 460
104 | Williams (1980), w_80, 169
105 | 
106 | 
107 | 7. References
108 | 
109 | David Adger. 2003. Core syntax: A minimalist approach. Oxford University Press Oxford.
110 | Mark Baltin and Chris Collins, editors. 2001. Handbook of Contemporary Syntactic Theory. Blackwell Publishing Ltd.
111 | Mark R Baltin. 1982. A landing site theory of movement rules. Linguistic Inquiry, 13(1):1–38.
112 | Joan W Bresnan. 1973. Syntax of the comparative clause construction in english. Linguistic inquiry, 4(3):275– 343.
113 | Andrew Carnie. 2013. Syntax: A generative introduction. John Wiley & Sons.
114 | Sandra Chung, William A Ladusaw, and James McCloskey. 1995. Sluicing and logical form. Natural language semantics, 3(3):239–282.
115 | Chris Collins. 2005. A smuggling approach to the passive in english. Syntax, 8(2):81–120.
116 | Peter W Culicover and Ray Jackendoff. 1999. The view from the periphery: The english comparative correlative. Linguistic inquiry, 30(4):543–571.
117 | Veneeta Dayal. 1998. Any as inherently modal. Linguistics and philosophy, 21(5):433–476.
118 | Gerald Gazdar. 1981. Unbounded dependencies and coordinate structure. In The Formal complexity of natural language, pages 183–226. Springer.
119 | Adele E Goldberg and Ray Jackendoff. 2004. The English resultative as a family of constructions. Language, 80(3):532–568.
120 | Ray S Jackendoff. 1971. Gapping and related rules. Linguistic inquiry, 2(1):21–35.
121 | Nirit Kadmon and Fred Landman. 1993. Any. Linguistics and Philosophy, 16(4):353–422, aug.
122 | Jong-Bok Kim and Peter Sells. 2008. English syntax: An introduction. CSLI publications.
123 | Beth Levin. 1993. English verb classes and alternations: A preliminary investigation. University of Chicago press.
124 | Jim Miller. 2002. An introduction to English syntax. Edinburgh Univ Press.
125 | Malka Rappaport Hovav and Beth Levin. 2008. The english dative alternation: The case for verb sensitivity1. Journal of linguistics, 44(1):129–167.
126 | John Robert Ross. 1967. Constraints on variables in syntax. Ph.D. thesis, MIT.
127 | Ivan A Sag, Gerald Gazdar, Thomas Wasow, and Steven Weisler. 1985. Coordination and how to distinguish categories. Natural Language & Linguistic Theory, 3(2):117–171.
128 | Ivan A Sag. 1997. English relative clause constructions. Journal of linguistics, 33(2):431–483.
129 | Dominique Sportiche, Hilda Koopman, and Edward Stabler. 2013. An introduction to syntactic analysis and theory. John Wiley & Sons.
130 | Edwin Williams. 1980. Predication. Linguistic inquiry, 11(1):203–238.


--------------------------------------------------------------------------------
/acceptability_corpus/phenomena/inchoative.tsv:
--------------------------------------------------------------------------------
  1 | 	1		jessica opened the door .
  2 | 	1		the door opened .
  3 | 	1		elizabeth opened the gate .
  4 | 	1		the gate opened .
  5 | 	1		kelly closed the door .
  6 | 	1		the door closed .
  7 | 	1		james closed the gate .
  8 | 	1		the gate closed .
  9 | 	1		james broke the vase .
 10 | 	1		the vase broke .
 11 | 	1		james broke the glass .
 12 | 	1		the glass broke .
 13 | 	1		christopher broke the bowl .
 14 | 	1		the bowl broke .
 15 | 	1		nicole broke the platter .
 16 | 	1		the platter broke .
 17 | 	1		kelly shattered the vase .
 18 | 	1		the vase shattered .
 19 | 	1		james shattered the glass .
 20 | 	1		the glass shattered .
 21 | 	1		elizabeth shattered the bowl .
 22 | 	1		the bowl shattered .
 23 | 	1		kelly shattered the platter .
 24 | 	1		the platter shattered .
 25 | 	1		david cracked the vase .
 26 | 	1		the vase cracked .
 27 | 	1		nicole cracked the glass .
 28 | 	1		the glass cracked .
 29 | 	1		nicole cracked the bowl .
 30 | 	1		the bowl cracked .
 31 | 	1		elizabeth cracked the platter .
 32 | 	1		the platter cracked .
 33 | 	1		james chipped the vase .
 34 | 	1		the vase chipped .
 35 | 	1		jessica chipped the glass .
 36 | 	1		the glass chipped .
 37 | 	1		michael chipped the bowl .
 38 | 	1		the bowl chipped .
 39 | 	1		david chipped the platter .
 40 | 	1		the platter chipped .
 41 | 	1		jason dropped the ball .
 42 | 	1		the ball dropped .
 43 | 	1		kelly dropped the platter .
 44 | 	1		the platter dropped .
 45 | 	1		david rolled the ball .
 46 | 	1		the ball rolled .
 47 | 	1		david rolled the wheel .
 48 | 	1		the wheel rolled .
 49 | 	1		jessica moved the car .
 50 | 	1		the car moved .
 51 | 	1		david moved the bag .
 52 | 	1		the bag moved .
 53 | 	1		rebecca popped the bubble .
 54 | 	1		the bubble popped .
 55 | 	1		jason popped the balloon .
 56 | 	1		the balloon popped .
 57 | 	1		rebecca popped the ball .
 58 | 	1		the ball popped .
 59 | 	1		david burst the bubble .
 60 | 	1		the bubble burst .
 61 | 	1		kelly burst the balloon .
 62 | 	1		the balloon burst .
 63 | 	1		kelly burst the ball .
 64 | 	1		the ball burst .
 65 | 	1		kelly burned the wood .
 66 | 	1		the wood burned .
 67 | 	1		james burned the candle .
 68 | 	1		the candle burned .
 69 | 	1		rebecca melted the ice .
 70 | 	1		the ice melted .
 71 | 	1		rebecca melted the chocolate .
 72 | 	1		the chocolate melted .
 73 | 	1		jessica fed the cat .
 74 | 	0		the cat fed .
 75 | 	1		nicole fed the dog .
 76 | 	0		the dog fed .
 77 | 	1		nicole fed the kids .
 78 | 	0		the kids fed .
 79 | 	1		christopher destroyed the evidence .
 80 | 	0		the evidence destroyed .
 81 | 	1		christopher washed the floor .
 82 | 	0		the floor washed .
 83 | 	1		christopher washed the dishes .
 84 | 	0		the dishes washed .
 85 | 	1		christopher washed the clothes .
 86 | 	0		the clothes washed .
 87 | 	1		jason cleaned the floor .
 88 | 	0		the floor cleaned .
 89 | 	1		elizabeth cleaned the dishes .
 90 | 	0		the dishes cleaned .
 91 | 	1		christopher cleaned the clothes .
 92 | 	0		the clothes cleaned .
 93 | 	1		nicole mopped the floor .
 94 | 	0		the floor mopped .
 95 | 	1		david swept the floor .
 96 | 	0		the floor swept .
 97 | 	1		kelly ate the apple .
 98 | 	0		the apple ate .
 99 | 	1		david ate the cake .
100 | 	0		the cake ate .
101 | 	1		jason ate the orange .
102 | 	0		the orange ate .
103 | 	1		elizabeth devoured the apple .
104 | 	0		the apple devoured .
105 | 	1		rebecca devoured the cake .
106 | 	0		the cake devoured .
107 | 	1		kelly devoured the orange .
108 | 	0		the orange devoured .
109 | 	1		kelly read the book .
110 | 	0		the book read .
111 | 	1		elizabeth read the article .
112 | 	0		the article read .
113 | 	1		nicole read the novel .
114 | 	0		the novel read .
115 | 	1		kelly wrote the book .
116 | 	0		the book wrote .
117 | 	1		elizabeth wrote the article .
118 | 	0		the article wrote .
119 | 	1		christopher wrote the novel .
120 | 	0		the novel wrote .
121 | 	1		kelly watched the movie .
122 | 	0		the movie watched .
123 | 	1		kelly watched the play .
124 | 	0		the play watched .
125 | 	1		nicole saw the movie .
126 | 	0		the movie saw .
127 | 	1		rebecca saw the play .
128 | 	0		the play saw .
129 | 	1		michael watered the crops .
130 | 	0		the crops watered .
131 | 	1		david watered the tomatoes .
132 | 	0		the tomatoes watered .
133 | 	1		kelly watered the oranges .
134 | 	0		the oranges watered .
135 | 	1		nicole harvested the crops .
136 | 	0		the crops harvested .
137 | 	1		elizabeth harvested the tomatoes .
138 | 	0		the tomatoes harvested .
139 | 	1		david harvested the oranges .
140 | 	0		the oranges harvested .
141 | 	1		rebecca cut the apple .
142 | 	0		the apple cut .
143 | 	1		rebecca cut the onion .
144 | 	0		the onion cut .
145 | 	1		nicole cut the garlic .
146 | 	0		the garlic cut .
147 | 	1		nicole chopped the apple .
148 | 	0		the apple chopped .
149 | 	1		james chopped the onion .
150 | 	0		the onion chopped .
151 | 	1		christopher chopped the garlic .
152 | 	0		the garlic chopped .
153 | 	1		michael peeled the apple .
154 | 	0		the apple peeled .
155 | 	1		michael peeled the onion .
156 | 	0		the onion peeled .
157 | 	1		michael peeled the garlic .
158 | 	0		the garlic peeled .
159 | 	1		jason polished the surface .
160 | 	0		the surface polished .
161 | 	1		jessica polished the brass .
162 | 	0		the brass polished .
163 | 	1		james understood the problem .
164 | 	0		the problem understood .
165 | 	1		kelly understood the dilemma .
166 | 	0		the dilemma understood .
167 | 	1		michael understood the puzzle .
168 | 	0		the puzzle understood .
169 | 	1		michael solved the problem .
170 | 	0		the problem solved .
171 | 	1		kelly solved the dilemma .
172 | 	0		the dilemma solved .
173 | 	1		michael solved the puzzle .
174 | 	0		the puzzle solved .
175 | 	1		rebecca deposited the cash .
176 | 	0		the cash deposited .
177 | 	1		kelly deposited the money .
178 | 	0		the money deposited .
179 | 	1		michael withdrew the cash .
180 | 	0		the cash withdrew .
181 | 	1		christopher withdrew the money .
182 | 	0		the money withdrew .
183 | 


--------------------------------------------------------------------------------
/acceptability_corpus/phenomena/reflexive.tsv:
--------------------------------------------------------------------------------
  1 | 	0		i saw himself .
  2 | 	0		i talked to himself .
  3 | 	0		i surprised himself .
  4 | 	0		i amused himself .
  5 | 	0		i saw herself .
  6 | 	0		i talked to herself .
  7 | 	0		i surprised herself .
  8 | 	0		i amused herself .
  9 | 	0		i saw themselves .
 10 | 	0		i talked to themselves .
 11 | 	0		i surprised themselves .
 12 | 	0		i amused themselves .
 13 | 	0		i saw yourself .
 14 | 	0		i talked to yourself .
 15 | 	0		i surprised yourself .
 16 | 	0		i amused yourself .
 17 | 	1		i saw myself .
 18 | 	1		i talked to myself .
 19 | 	1		i surprised myself .
 20 | 	1		i amused myself .
 21 | 	0		i saw ourselves .
 22 | 	0		i talked to ourselves .
 23 | 	0		i surprised ourselves .
 24 | 	0		i amused ourselves .
 25 | 	0		you saw himself .
 26 | 	0		you talked to himself .
 27 | 	0		you surprised himself .
 28 | 	0		you amused himself .
 29 | 	0		you saw herself .
 30 | 	0		you talked to herself .
 31 | 	0		you surprised herself .
 32 | 	0		you amused herself .
 33 | 	0		you saw themselves .
 34 | 	0		you talked to themselves .
 35 | 	0		you surprised themselves .
 36 | 	0		you amused themselves .
 37 | 	1		you saw yourself .
 38 | 	1		you talked to yourself .
 39 | 	1		you surprised yourself .
 40 | 	1		you amused yourself .
 41 | 	0		you saw myself .
 42 | 	0		you talked to myself .
 43 | 	0		you surprised myself .
 44 | 	0		you amused myself .
 45 | 	0		you saw ourselves .
 46 | 	0		you talked to ourselves .
 47 | 	0		you surprised ourselves .
 48 | 	0		you amused ourselves .
 49 | 	1		he saw himself .
 50 | 	1		he talked to himself .
 51 | 	1		he surprised himself .
 52 | 	1		he amused himself .
 53 | 	0		he saw herself .
 54 | 	0		he talked to herself .
 55 | 	0		he surprised herself .
 56 | 	0		he amused herself .
 57 | 	0		he saw themselves .
 58 | 	0		he talked to themselves .
 59 | 	0		he surprised themselves .
 60 | 	0		he amused themselves .
 61 | 	0		he saw yourself .
 62 | 	0		he talked to yourself .
 63 | 	0		he surprised yourself .
 64 | 	0		he amused yourself .
 65 | 	0		he saw myself .
 66 | 	0		he talked to myself .
 67 | 	0		he surprised myself .
 68 | 	0		he amused myself .
 69 | 	0		he saw ourselves .
 70 | 	0		he talked to ourselves .
 71 | 	0		he surprised ourselves .
 72 | 	0		he amused ourselves .
 73 | 	0		she saw himself .
 74 | 	0		she talked to himself .
 75 | 	0		she surprised himself .
 76 | 	0		she amused himself .
 77 | 	1		she saw herself .
 78 | 	1		she talked to herself .
 79 | 	1		she surprised herself .
 80 | 	1		she amused herself .
 81 | 	0		she saw themselves .
 82 | 	0		she talked to themselves .
 83 | 	0		she surprised themselves .
 84 | 	0		she amused themselves .
 85 | 	0		she saw yourself .
 86 | 	0		she talked to yourself .
 87 | 	0		she surprised yourself .
 88 | 	0		she amused yourself .
 89 | 	0		she saw myself .
 90 | 	0		she talked to myself .
 91 | 	0		she surprised myself .
 92 | 	0		she amused myself .
 93 | 	0		she saw ourselves .
 94 | 	0		she talked to ourselves .
 95 | 	0		she surprised ourselves .
 96 | 	0		she amused ourselves .
 97 | 	0		we saw himself .
 98 | 	0		we talked to himself .
 99 | 	0		we surprised himself .
100 | 	0		we amused himself .
101 | 	0		we saw herself .
102 | 	0		we talked to herself .
103 | 	0		we surprised herself .
104 | 	0		we amused herself .
105 | 	0		we saw themselves .
106 | 	0		we talked to themselves .
107 | 	0		we surprised themselves .
108 | 	0		we amused themselves .
109 | 	0		we saw yourself .
110 | 	0		we talked to yourself .
111 | 	0		we surprised yourself .
112 | 	0		we amused yourself .
113 | 	0		we saw myself .
114 | 	0		we talked to myself .
115 | 	0		we surprised myself .
116 | 	0		we amused myself .
117 | 	1		we saw ourselves .
118 | 	1		we talked to ourselves .
119 | 	1		we surprised ourselves .
120 | 	1		we amused ourselves .
121 | 	0		they saw himself .
122 | 	0		they talked to himself .
123 | 	0		they surprised himself .
124 | 	0		they amused himself .
125 | 	0		they saw herself .
126 | 	0		they talked to herself .
127 | 	0		they surprised herself .
128 | 	0		they amused herself .
129 | 	1		they saw themselves .
130 | 	1		they talked to themselves .
131 | 	1		they surprised themselves .
132 | 	1		they amused themselves .
133 | 	0		they saw yourself .
134 | 	0		they talked to yourself .
135 | 	0		they surprised yourself .
136 | 	0		they amused yourself .
137 | 	0		they saw myself .
138 | 	0		they talked to myself .
139 | 	0		they surprised myself .
140 | 	0		they amused myself .
141 | 	0		they saw ourselves .
142 | 	0		they talked to ourselves .
143 | 	0		they surprised ourselves .
144 | 	0		they amused ourselves .
145 | 


--------------------------------------------------------------------------------
/acceptability_corpus/phenomena/singular_pl.tsv:
--------------------------------------------------------------------------------
  1 | 	1		the boy is here .
  2 | 	1		the boy has n't been there .
  3 | 	1		the boy likes to read .
  4 | 	1		the boy knows where to go .
  5 | 	1		the boy seems nice .
  6 | 	1		the boy is walking .
  7 | 	1		the boy sleeps here .
  8 | 	1		the boy has a problem .
  9 | 	1		the boy has to go .
 10 | 	1		the boy gets it .
 11 | 	1		the boy has the flu .
 12 | 	1		the boy feels great .
 13 | 	1		the boy is n't going to come .
 14 | 	0		the boy are here .
 15 | 	0		the boy have n't been there .
 16 | 	0		the boy like to read .
 17 | 	0		the boy know where to go .
 18 | 	0		the boy seem nice .
 19 | 	0		the boy are walking .
 20 | 	0		the boy sleep here .
 21 | 	0		the boy have a problem .
 22 | 	0		the boy have to go .
 23 | 	0		the boy get it .
 24 | 	0		the boy have the flu .
 25 | 	0		the boy feel great .
 26 | 	0		the boy are n't going to come .
 27 | 	1		she is here .
 28 | 	1		she has n't been there .
 29 | 	1		she likes to read .
 30 | 	1		she knows where to go .
 31 | 	1		she seems nice .
 32 | 	1		she is walking .
 33 | 	1		she sleeps here .
 34 | 	1		she has a problem .
 35 | 	1		she has to go .
 36 | 	1		she gets it .
 37 | 	1		she has the flu .
 38 | 	1		she feels great .
 39 | 	1		she is n't going to come .
 40 | 	0		she are here .
 41 | 	0		she have n't been there .
 42 | 	0		she like to read .
 43 | 	0		she know where to go .
 44 | 	0		she seem nice .
 45 | 	0		she are walking .
 46 | 	0		she sleep here .
 47 | 	0		she have a problem .
 48 | 	0		she have to go .
 49 | 	0		she get it .
 50 | 	0		she have the flu .
 51 | 	0		she feel great .
 52 | 	0		she are n't going to come .
 53 | 	1		he is here .
 54 | 	1		he has n't been there .
 55 | 	1		he likes to read .
 56 | 	1		he knows where to go .
 57 | 	1		he seems nice .
 58 | 	1		he is walking .
 59 | 	1		he sleeps here .
 60 | 	1		he has a problem .
 61 | 	1		he has to go .
 62 | 	1		he gets it .
 63 | 	1		he has the flu .
 64 | 	1		he feels great .
 65 | 	1		he is n't going to come .
 66 | 	0		he are here .
 67 | 	0		he have n't been there .
 68 | 	0		he like to read .
 69 | 	0		he know where to go .
 70 | 	0		he seem nice .
 71 | 	0		he are walking .
 72 | 	0		he sleep here .
 73 | 	0		he have a problem .
 74 | 	0		he have to go .
 75 | 	0		he get it .
 76 | 	0		he have the flu .
 77 | 	0		he feel great .
 78 | 	0		he are n't going to come .
 79 | 	1		my friend is here .
 80 | 	1		my friend has n't been there .
 81 | 	1		my friend likes to read .
 82 | 	1		my friend knows where to go .
 83 | 	1		my friend seems nice .
 84 | 	1		my friend is walking .
 85 | 	1		my friend sleeps here .
 86 | 	1		my friend has a problem .
 87 | 	1		my friend has to go .
 88 | 	1		my friend gets it .
 89 | 	1		my friend has the flu .
 90 | 	1		my friend feels great .
 91 | 	1		my friend is n't going to come .
 92 | 	0		my friend are here .
 93 | 	0		my friend have n't been there .
 94 | 	0		my friend like to read .
 95 | 	0		my friend know where to go .
 96 | 	0		my friend seem nice .
 97 | 	0		my friend are walking .
 98 | 	0		my friend sleep here .
 99 | 	0		my friend have a problem .
100 | 	0		my friend have to go .
101 | 	0		my friend get it .
102 | 	0		my friend have the flu .
103 | 	0		my friend feel great .
104 | 	0		my friend are n't going to come .
105 | 	1		that guy is here .
106 | 	1		that guy has n't been there .
107 | 	1		that guy likes to read .
108 | 	1		that guy knows where to go .
109 | 	1		that guy seems nice .
110 | 	1		that guy is walking .
111 | 	1		that guy sleeps here .
112 | 	1		that guy has a problem .
113 | 	1		that guy has to go .
114 | 	1		that guy gets it .
115 | 	1		that guy has the flu .
116 | 	1		that guy feels great .
117 | 	1		that guy is n't going to come .
118 | 	0		that guy are here .
119 | 	0		that guy have n't been there .
120 | 	0		that guy like to read .
121 | 	0		that guy know where to go .
122 | 	0		that guy seem nice .
123 | 	0		that guy are walking .
124 | 	0		that guy sleep here .
125 | 	0		that guy have a problem .
126 | 	0		that guy have to go .
127 | 	0		that guy get it .
128 | 	0		that guy have the flu .
129 | 	0		that guy feel great .
130 | 	0		that guy are n't going to come .
131 | 	1		the dancer is here .
132 | 	1		the dancer has n't been there .
133 | 	1		the dancer likes to read .
134 | 	1		the dancer knows where to go .
135 | 	1		the dancer seems nice .
136 | 	1		the dancer is walking .
137 | 	1		the dancer sleeps here .
138 | 	1		the dancer has a problem .
139 | 	1		the dancer has to go .
140 | 	1		the dancer gets it .
141 | 	1		the dancer has the flu .
142 | 	1		the dancer feels great .
143 | 	1		the dancer is n't going to come .
144 | 	0		the dancer are here .
145 | 	0		the dancer have n't been there .
146 | 	0		the dancer like to read .
147 | 	0		the dancer know where to go .
148 | 	0		the dancer seem nice .
149 | 	0		the dancer are walking .
150 | 	0		the dancer sleep here .
151 | 	0		the dancer have a problem .
152 | 	0		the dancer have to go .
153 | 	0		the dancer get it .
154 | 	0		the dancer have the flu .
155 | 	0		the dancer feel great .
156 | 	0		the dancer are n't going to come .
157 | 	1		everyone is here .
158 | 	1		everyone has n't been there .
159 | 	1		everyone likes to read .
160 | 	1		everyone knows where to go .
161 | 	1		everyone seems nice .
162 | 	1		everyone is walking .
163 | 	1		everyone sleeps here .
164 | 	1		everyone has a problem .
165 | 	1		everyone has to go .
166 | 	1		everyone gets it .
167 | 	1		everyone has the flu .
168 | 	1		everyone feels great .
169 | 	1		everyone is n't going to come .
170 | 	0		everyone are here .
171 | 	0		everyone have n't been there .
172 | 	0		everyone like to read .
173 | 	0		everyone know where to go .
174 | 	0		everyone seem nice .
175 | 	0		everyone are walking .
176 | 	0		everyone sleep here .
177 | 	0		everyone have a problem .
178 | 	0		everyone have to go .
179 | 	0		everyone get it .
180 | 	0		everyone have the flu .
181 | 	0		everyone feel great .
182 | 	0		everyone are n't going to come .
183 | 	1		mary is here .
184 | 	1		mary has n't been there .
185 | 	1		mary likes to read .
186 | 	1		mary knows where to go .
187 | 	1		mary seems nice .
188 | 	1		mary is walking .
189 | 	1		mary sleeps here .
190 | 	1		mary has a problem .
191 | 	1		mary has to go .
192 | 	1		mary gets it .
193 | 	1		mary has the flu .
194 | 	1		mary feels great .
195 | 	1		mary is n't going to come .
196 | 	0		mary are here .
197 | 	0		mary have n't been there .
198 | 	0		mary like to read .
199 | 	0		mary know where to go .
200 | 	0		mary seem nice .
201 | 	0		mary are walking .
202 | 	0		mary sleep here .
203 | 	0		mary have a problem .
204 | 	0		mary have to go .
205 | 	0		mary get it .
206 | 	0		mary have the flu .
207 | 	0		mary feel great .
208 | 	0		mary are n't going to come .
209 | 	1		the doctor is here .
210 | 	1		the doctor has n't been there .
211 | 	1		the doctor likes to read .
212 | 	1		the doctor knows where to go .
213 | 	1		the doctor seems nice .
214 | 	1		the doctor is walking .
215 | 	1		the doctor sleeps here .
216 | 	1		the doctor has a problem .
217 | 	1		the doctor has to go .
218 | 	1		the doctor gets it .
219 | 	1		the doctor has the flu .
220 | 	1		the doctor feels great .
221 | 	1		the doctor is n't going to come .
222 | 	0		the doctor are here .
223 | 	0		the doctor have n't been there .
224 | 	0		the doctor like to read .
225 | 	0		the doctor know where to go .
226 | 	0		the doctor seem nice .
227 | 	0		the doctor are walking .
228 | 	0		the doctor sleep here .
229 | 	0		the doctor have a problem .
230 | 	0		the doctor have to go .
231 | 	0		the doctor get it .
232 | 	0		the doctor have the flu .
233 | 	0		the doctor feel great .
234 | 	0		the doctor are n't going to come .
235 | 	1		some woman is here .
236 | 	1		some woman has n't been there .
237 | 	1		some woman likes to read .
238 | 	1		some woman knows where to go .
239 | 	1		some woman seems nice .
240 | 	1		some woman is walking .
241 | 	1		some woman sleeps here .
242 | 	1		some woman has a problem .
243 | 	1		some woman has to go .
244 | 	1		some woman gets it .
245 | 	1		some woman has the flu .
246 | 	1		some woman feels great .
247 | 	1		some woman is n't going to come .
248 | 	0		some woman are here .
249 | 	0		some woman have n't been there .
250 | 	0		some woman like to read .
251 | 	0		some woman know where to go .
252 | 	0		some woman seem nice .
253 | 	0		some woman are walking .
254 | 	0		some woman sleep here .
255 | 	0		some woman have a problem .
256 | 	0		some woman have to go .
257 | 	0		some woman get it .
258 | 	0		some woman have the flu .
259 | 	0		some woman feel great .
260 | 	0		some woman are n't going to come .
261 | 	1		john is here .
262 | 	1		john has n't been there .
263 | 	1		john likes to read .
264 | 	1		john knows where to go .
265 | 	1		john seems nice .
266 | 	1		john is walking .
267 | 	1		john sleeps here .
268 | 	1		john has a problem .
269 | 	1		john has to go .
270 | 	1		john gets it .
271 | 	1		john has the flu .
272 | 	1		john feels great .
273 | 	1		john is n't going to come .
274 | 	0		john are here .
275 | 	0		john have n't been there .
276 | 	0		john like to read .
277 | 	0		john know where to go .
278 | 	0		john seem nice .
279 | 	0		john are walking .
280 | 	0		john sleep here .
281 | 	0		john have a problem .
282 | 	0		john have to go .
283 | 	0		john get it .
284 | 	0		john have the flu .
285 | 	0		john feel great .
286 | 	0		john are n't going to come .
287 | 	1		mr. smith is here .
288 | 	1		mr. smith has n't been there .
289 | 	1		mr. smith likes to read .
290 | 	1		mr. smith knows where to go .
291 | 	1		mr. smith seems nice .
292 | 	1		mr. smith is walking .
293 | 	1		mr. smith sleeps here .
294 | 	1		mr. smith has a problem .
295 | 	1		mr. smith has to go .
296 | 	1		mr. smith gets it .
297 | 	1		mr. smith has the flu .
298 | 	1		mr. smith feels great .
299 | 	1		mr. smith is n't going to come .
300 | 	0		mr. smith are here .
301 | 	0		mr. smith have n't been there .
302 | 	0		mr. smith like to read .
303 | 	0		mr. smith know where to go .
304 | 	0		mr. smith seem nice .
305 | 	0		mr. smith are walking .
306 | 	0		mr. smith sleep here .
307 | 	0		mr. smith have a problem .
308 | 	0		mr. smith have to go .
309 | 	0		mr. smith get it .
310 | 	0		mr. smith have the flu .
311 | 	0		mr. smith feel great .
312 | 	0		mr. smith are n't going to come .
313 | 	1		the guest of honor is here .
314 | 	1		the guest of honor has n't been there .
315 | 	1		the guest of honor likes to read .
316 | 	1		the guest of honor knows where to go .
317 | 	1		the guest of honor seems nice .
318 | 	1		the guest of honor is walking .
319 | 	1		the guest of honor sleeps here .
320 | 	1		the guest of honor has a problem .
321 | 	1		the guest of honor has to go .
322 | 	1		the guest of honor gets it .
323 | 	1		the guest of honor has the flu .
324 | 	1		the guest of honor feels great .
325 | 	1		the guest of honor is n't going to come .
326 | 	0		the guest of honor are here .
327 | 	0		the guest of honor have n't been there .
328 | 	0		the guest of honor like to read .
329 | 	0		the guest of honor know where to go .
330 | 	0		the guest of honor seem nice .
331 | 	0		the guest of honor are walking .
332 | 	0		the guest of honor sleep here .
333 | 	0		the guest of honor have a problem .
334 | 	0		the guest of honor have to go .
335 | 	0		the guest of honor get it .
336 | 	0		the guest of honor have the flu .
337 | 	0		the guest of honor feel great .
338 | 	0		the guest of honor are n't going to come .
339 | 	0		the boys is here .
340 | 	0		the boys has n't been there .
341 | 	0		the boys likes to read .
342 | 	0		the boys knows where to go .
343 | 	0		the boys seems nice .
344 | 	0		the boys is walking .
345 | 	0		the boys sleeps here .
346 | 	0		the boys has a problem .
347 | 	0		the boys has to go .
348 | 	0		the boys gets it .
349 | 	0		the boys has the flu .
350 | 	0		the boys feels great .
351 | 	0		the boys is n't going to come .
352 | 	1		the boys are here .
353 | 	1		the boys have n't been there .
354 | 	1		the boys like to read .
355 | 	1		the boys know where to go .
356 | 	1		the boys seem nice .
357 | 	1		the boys are walking .
358 | 	1		the boys sleep here .
359 | 	1		the boys have a problem .
360 | 	1		the boys have to go .
361 | 	1		the boys get it .
362 | 	1		the boys have the flu .
363 | 	1		the boys feel great .
364 | 	1		the boys are n't going to come .
365 | 	0		they is here .
366 | 	0		they has n't been there .
367 | 	0		they likes to read .
368 | 	0		they knows where to go .
369 | 	0		they seems nice .
370 | 	0		they is walking .
371 | 	0		they sleeps here .
372 | 	0		they has a problem .
373 | 	0		they has to go .
374 | 	0		they gets it .
375 | 	0		they has the flu .
376 | 	0		they feels great .
377 | 	0		they is n't going to come .
378 | 	1		they are here .
379 | 	1		they have n't been there .
380 | 	1		they like to read .
381 | 	1		they know where to go .
382 | 	1		they seem nice .
383 | 	1		they are walking .
384 | 	1		they sleep here .
385 | 	1		they have a problem .
386 | 	1		they have to go .
387 | 	1		they get it .
388 | 	1		they have the flu .
389 | 	1		they feel great .
390 | 	1		they are n't going to come .
391 | 	0		we is here .
392 | 	0		we has n't been there .
393 | 	0		we likes to read .
394 | 	0		we knows where to go .
395 | 	0		we seems nice .
396 | 	0		we is walking .
397 | 	0		we sleeps here .
398 | 	0		we has a problem .
399 | 	0		we has to go .
400 | 	0		we gets it .
401 | 	0		we has the flu .
402 | 	0		we feels great .
403 | 	0		we is n't going to come .
404 | 	1		we are here .
405 | 	1		we have n't been there .
406 | 	1		we like to read .
407 | 	1		we know where to go .
408 | 	1		we seem nice .
409 | 	1		we are walking .
410 | 	1		we sleep here .
411 | 	1		we have a problem .
412 | 	1		we have to go .
413 | 	1		we get it .
414 | 	1		we have the flu .
415 | 	1		we feel great .
416 | 	1		we are n't going to come .
417 | 	0		my friends is here .
418 | 	0		my friends has n't been there .
419 | 	0		my friends likes to read .
420 | 	0		my friends knows where to go .
421 | 	0		my friends seems nice .
422 | 	0		my friends is walking .
423 | 	0		my friends sleeps here .
424 | 	0		my friends has a problem .
425 | 	0		my friends has to go .
426 | 	0		my friends gets it .
427 | 	0		my friends has the flu .
428 | 	0		my friends feels great .
429 | 	0		my friends is n't going to come .
430 | 	1		my friends are here .
431 | 	1		my friends have n't been there .
432 | 	1		my friends like to read .
433 | 	1		my friends know where to go .
434 | 	1		my friends seem nice .
435 | 	1		my friends are walking .
436 | 	1		my friends sleep here .
437 | 	1		my friends have a problem .
438 | 	1		my friends have to go .
439 | 	1		my friends get it .
440 | 	1		my friends have the flu .
441 | 	1		my friends feel great .
442 | 	1		my friends are n't going to come .
443 | 	0		those guys is here .
444 | 	0		those guys has n't been there .
445 | 	0		those guys likes to read .
446 | 	0		those guys knows where to go .
447 | 	0		those guys seems nice .
448 | 	0		those guys is walking .
449 | 	0		those guys sleeps here .
450 | 	0		those guys has a problem .
451 | 	0		those guys has to go .
452 | 	0		those guys gets it .
453 | 	0		those guys has the flu .
454 | 	0		those guys feels great .
455 | 	0		those guys is n't going to come .
456 | 	1		those guys are here .
457 | 	1		those guys have n't been there .
458 | 	1		those guys like to read .
459 | 	1		those guys know where to go .
460 | 	1		those guys seem nice .
461 | 	1		those guys are walking .
462 | 	1		those guys sleep here .
463 | 	1		those guys have a problem .
464 | 	1		those guys have to go .
465 | 	1		those guys get it .
466 | 	1		those guys have the flu .
467 | 	1		those guys feel great .
468 | 	1		those guys are n't going to come .
469 | 	0		the dancers is here .
470 | 	0		the dancers has n't been there .
471 | 	0		the dancers likes to read .
472 | 	0		the dancers knows where to go .
473 | 	0		the dancers seems nice .
474 | 	0		the dancers is walking .
475 | 	0		the dancers sleeps here .
476 | 	0		the dancers has a problem .
477 | 	0		the dancers has to go .
478 | 	0		the dancers gets it .
479 | 	0		the dancers has the flu .
480 | 	0		the dancers feels great .
481 | 	0		the dancers is n't going to come .
482 | 	1		the dancers are here .
483 | 	1		the dancers have n't been there .
484 | 	1		the dancers like to read .
485 | 	1		the dancers know where to go .
486 | 	1		the dancers seem nice .
487 | 	1		the dancers are walking .
488 | 	1		the dancers sleep here .
489 | 	1		the dancers have a problem .
490 | 	1		the dancers have to go .
491 | 	1		the dancers get it .
492 | 	1		the dancers have the flu .
493 | 	1		the dancers feel great .
494 | 	1		the dancers are n't going to come .
495 | 	0		most people is here .
496 | 	0		most people has n't been there .
497 | 	0		most people likes to read .
498 | 	0		most people knows where to go .
499 | 	0		most people seems nice .
500 | 	0		most people is walking .
501 | 	0		most people sleeps here .
502 | 	0		most people has a problem .
503 | 	0		most people has to go .
504 | 	0		most people gets it .
505 | 	0		most people has the flu .
506 | 	0		most people feels great .
507 | 	0		most people is n't going to come .
508 | 	1		most people are here .
509 | 	1		most people have n't been there .
510 | 	1		most people like to read .
511 | 	1		most people know where to go .
512 | 	1		most people seem nice .
513 | 	1		most people are walking .
514 | 	1		most people sleep here .
515 | 	1		most people have a problem .
516 | 	1		most people have to go .
517 | 	1		most people get it .
518 | 	1		most people have the flu .
519 | 	1		most people feel great .
520 | 	1		most people are n't going to come .
521 | 	0		mary and sarah is here .
522 | 	0		mary and sarah has n't been there .
523 | 	0		mary and sarah likes to read .
524 | 	0		mary and sarah knows where to go .
525 | 	0		mary and sarah seems nice .
526 | 	0		mary and sarah is walking .
527 | 	0		mary and sarah sleeps here .
528 | 	0		mary and sarah has a problem .
529 | 	0		mary and sarah has to go .
530 | 	0		mary and sarah gets it .
531 | 	0		mary and sarah has the flu .
532 | 	0		mary and sarah feels great .
533 | 	0		mary and sarah is n't going to come .
534 | 	1		mary and sarah are here .
535 | 	1		mary and sarah have n't been there .
536 | 	1		mary and sarah like to read .
537 | 	1		mary and sarah know where to go .
538 | 	1		mary and sarah seem nice .
539 | 	1		mary and sarah are walking .
540 | 	1		mary and sarah sleep here .
541 | 	1		mary and sarah have a problem .
542 | 	1		mary and sarah have to go .
543 | 	1		mary and sarah get it .
544 | 	1		mary and sarah have the flu .
545 | 	1		mary and sarah feel great .
546 | 	1		mary and sarah are n't going to come .
547 | 	0		the doctors is here .
548 | 	0		the doctors has n't been there .
549 | 	0		the doctors likes to read .
550 | 	0		the doctors knows where to go .
551 | 	0		the doctors seems nice .
552 | 	0		the doctors is walking .
553 | 	0		the doctors sleeps here .
554 | 	0		the doctors has a problem .
555 | 	0		the doctors has to go .
556 | 	0		the doctors gets it .
557 | 	0		the doctors has the flu .
558 | 	0		the doctors feels great .
559 | 	0		the doctors is n't going to come .
560 | 	1		the doctors are here .
561 | 	1		the doctors have n't been there .
562 | 	1		the doctors like to read .
563 | 	1		the doctors know where to go .
564 | 	1		the doctors seem nice .
565 | 	1		the doctors are walking .
566 | 	1		the doctors sleep here .
567 | 	1		the doctors have a problem .
568 | 	1		the doctors have to go .
569 | 	1		the doctors get it .
570 | 	1		the doctors have the flu .
571 | 	1		the doctors feel great .
572 | 	1		the doctors are n't going to come .
573 | 	0		some women is here .
574 | 	0		some women has n't been there .
575 | 	0		some women likes to read .
576 | 	0		some women knows where to go .
577 | 	0		some women seems nice .
578 | 	0		some women is walking .
579 | 	0		some women sleeps here .
580 | 	0		some women has a problem .
581 | 	0		some women has to go .
582 | 	0		some women gets it .
583 | 	0		some women has the flu .
584 | 	0		some women feels great .
585 | 	0		some women is n't going to come .
586 | 	1		some women are here .
587 | 	1		some women have n't been there .
588 | 	1		some women like to read .
589 | 	1		some women know where to go .
590 | 	1		some women seem nice .
591 | 	1		some women are walking .
592 | 	1		some women sleep here .
593 | 	1		some women have a problem .
594 | 	1		some women have to go .
595 | 	1		some women get it .
596 | 	1		some women have the flu .
597 | 	1		some women feel great .
598 | 	1		some women are n't going to come .
599 | 	0		john and his wife is here .
600 | 	0		john and his wife has n't been there .
601 | 	0		john and his wife likes to read .
602 | 	0		john and his wife knows where to go .
603 | 	0		john and his wife seems nice .
604 | 	0		john and his wife is walking .
605 | 	0		john and his wife sleeps here .
606 | 	0		john and his wife has a problem .
607 | 	0		john and his wife has to go .
608 | 	0		john and his wife gets it .
609 | 	0		john and his wife has the flu .
610 | 	0		john and his wife feels great .
611 | 	0		john and his wife is n't going to come .
612 | 	1		john and his wife are here .
613 | 	1		john and his wife have n't been there .
614 | 	1		john and his wife like to read .
615 | 	1		john and his wife know where to go .
616 | 	1		john and his wife seem nice .
617 | 	1		john and his wife are walking .
618 | 	1		john and his wife sleep here .
619 | 	1		john and his wife have a problem .
620 | 	1		john and his wife have to go .
621 | 	1		john and his wife get it .
622 | 	1		john and his wife have the flu .
623 | 	1		john and his wife feel great .
624 | 	1		john and his wife are n't going to come .
625 | 	0		mr. and mrs. smith is here .
626 | 	0		mr. and mrs. smith has n't been there .
627 | 	0		mr. and mrs. smith likes to read .
628 | 	0		mr. and mrs. smith knows where to go .
629 | 	0		mr. and mrs. smith seems nice .
630 | 	0		mr. and mrs. smith is walking .
631 | 	0		mr. and mrs. smith sleeps here .
632 | 	0		mr. and mrs. smith has a problem .
633 | 	0		mr. and mrs. smith has to go .
634 | 	0		mr. and mrs. smith gets it .
635 | 	0		mr. and mrs. smith has the flu .
636 | 	0		mr. and mrs. smith feels great .
637 | 	0		mr. and mrs. smith is n't going to come .
638 | 	1		mr. and mrs. smith are here .
639 | 	1		mr. and mrs. smith have n't been there .
640 | 	1		mr. and mrs. smith like to read .
641 | 	1		mr. and mrs. smith know where to go .
642 | 	1		mr. and mrs. smith seem nice .
643 | 	1		mr. and mrs. smith are walking .
644 | 	1		mr. and mrs. smith sleep here .
645 | 	1		mr. and mrs. smith have a problem .
646 | 	1		mr. and mrs. smith have to go .
647 | 	1		mr. and mrs. smith get it .
648 | 	1		mr. and mrs. smith have the flu .
649 | 	1		mr. and mrs. smith feel great .
650 | 	1		mr. and mrs. smith are n't going to come .
651 | 	0		the guests of honor is here .
652 | 	0		the guests of honor has n't been there .
653 | 	0		the guests of honor likes to read .
654 | 	0		the guests of honor knows where to go .
655 | 	0		the guests of honor seems nice .
656 | 	0		the guests of honor is walking .
657 | 	0		the guests of honor sleeps here .
658 | 	0		the guests of honor has a problem .
659 | 	0		the guests of honor has to go .
660 | 	0		the guests of honor gets it .
661 | 	0		the guests of honor has the flu .
662 | 	0		the guests of honor feels great .
663 | 	0		the guests of honor is n't going to come .
664 | 	1		the guests of honor are here .
665 | 	1		the guests of honor have n't been there .
666 | 	1		the guests of honor like to read .
667 | 	1		the guests of honor know where to go .
668 | 	1		the guests of honor seem nice .
669 | 	1		the guests of honor are walking .
670 | 	1		the guests of honor sleep here .
671 | 	1		the guests of honor have a problem .
672 | 	1		the guests of honor have to go .
673 | 	1		the guests of honor get it .
674 | 	1		the guests of honor have the flu .
675 | 	1		the guests of honor feel great .
676 | 	1		the guests of honor are n't going to come .
677 | 


--------------------------------------------------------------------------------
/acceptability_corpus/phenomena/svo.tsv:
--------------------------------------------------------------------------------
  1 | 	1		michael read the book .
  2 | 	0		michael the book read .
  3 | 	0		read michael the book .
  4 | 	0		read the book michael .
  5 | 	0		the book read michael .
  6 | 	1		michael wrote the book .
  7 | 	0		michael the book wrote .
  8 | 	0		wrote michael the book .
  9 | 	0		wrote the book michael .
 10 | 	0		the book wrote michael .
 11 | 	1		michael read the article .
 12 | 	0		michael the article read .
 13 | 	0		read michael the article .
 14 | 	0		read the article michael .
 15 | 	0		the article read michael .
 16 | 	1		michael wrote the article .
 17 | 	0		michael the article wrote .
 18 | 	0		wrote michael the article .
 19 | 	0		wrote the article michael .
 20 | 	0		the article wrote michael .
 21 | 	1		michael read the letter .
 22 | 	0		michael the letter read .
 23 | 	0		read michael the letter .
 24 | 	0		read the letter michael .
 25 | 	0		the letter read michael .
 26 | 	1		michael wrote the letter .
 27 | 	0		michael the letter wrote .
 28 | 	0		wrote michael the letter .
 29 | 	0		wrote the letter michael .
 30 | 	0		the letter wrote michael .
 31 | 	1		michael read the story .
 32 | 	0		michael the story read .
 33 | 	0		read michael the story .
 34 | 	0		read the story michael .
 35 | 	0		the story read michael .
 36 | 	1		michael wrote the story .
 37 | 	0		michael the story wrote .
 38 | 	0		wrote michael the story .
 39 | 	0		wrote the story michael .
 40 | 	0		the story wrote michael .
 41 | 	1		michael read the chapter .
 42 | 	0		michael the chapter read .
 43 | 	0		read michael the chapter .
 44 | 	0		read the chapter michael .
 45 | 	0		the chapter read michael .
 46 | 	1		michael wrote the chapter .
 47 | 	0		michael the chapter wrote .
 48 | 	0		wrote michael the chapter .
 49 | 	0		wrote the chapter michael .
 50 | 	0		the chapter wrote michael .
 51 | 	1		christopher read the book .
 52 | 	0		christopher the book read .
 53 | 	0		read christopher the book .
 54 | 	0		read the book christopher .
 55 | 	0		the book read christopher .
 56 | 	1		christopher wrote the book .
 57 | 	0		christopher the book wrote .
 58 | 	0		wrote christopher the book .
 59 | 	0		wrote the book christopher .
 60 | 	0		the book wrote christopher .
 61 | 	1		christopher read the article .
 62 | 	0		christopher the article read .
 63 | 	0		read christopher the article .
 64 | 	0		read the article christopher .
 65 | 	0		the article read christopher .
 66 | 	1		christopher wrote the article .
 67 | 	0		christopher the article wrote .
 68 | 	0		wrote christopher the article .
 69 | 	0		wrote the article christopher .
 70 | 	0		the article wrote christopher .
 71 | 	1		christopher read the letter .
 72 | 	0		christopher the letter read .
 73 | 	0		read christopher the letter .
 74 | 	0		read the letter christopher .
 75 | 	0		the letter read christopher .
 76 | 	1		christopher wrote the letter .
 77 | 	0		christopher the letter wrote .
 78 | 	0		wrote christopher the letter .
 79 | 	0		wrote the letter christopher .
 80 | 	0		the letter wrote christopher .
 81 | 	1		christopher read the story .
 82 | 	0		christopher the story read .
 83 | 	0		read christopher the story .
 84 | 	0		read the story christopher .
 85 | 	0		the story read christopher .
 86 | 	1		christopher wrote the story .
 87 | 	0		christopher the story wrote .
 88 | 	0		wrote christopher the story .
 89 | 	0		wrote the story christopher .
 90 | 	0		the story wrote christopher .
 91 | 	1		christopher read the chapter .
 92 | 	0		christopher the chapter read .
 93 | 	0		read christopher the chapter .
 94 | 	0		read the chapter christopher .
 95 | 	0		the chapter read christopher .
 96 | 	1		christopher wrote the chapter .
 97 | 	0		christopher the chapter wrote .
 98 | 	0		wrote christopher the chapter .
 99 | 	0		wrote the chapter christopher .
100 | 	0		the chapter wrote christopher .
101 | 	1		jason read the book .
102 | 	0		jason the book read .
103 | 	0		read jason the book .
104 | 	0		read the book jason .
105 | 	0		the book read jason .
106 | 	1		jason wrote the book .
107 | 	0		jason the book wrote .
108 | 	0		wrote jason the book .
109 | 	0		wrote the book jason .
110 | 	0		the book wrote jason .
111 | 	1		jason read the article .
112 | 	0		jason the article read .
113 | 	0		read jason the article .
114 | 	0		read the article jason .
115 | 	0		the article read jason .
116 | 	1		jason wrote the article .
117 | 	0		jason the article wrote .
118 | 	0		wrote jason the article .
119 | 	0		wrote the article jason .
120 | 	0		the article wrote jason .
121 | 	1		jason read the letter .
122 | 	0		jason the letter read .
123 | 	0		read jason the letter .
124 | 	0		read the letter jason .
125 | 	0		the letter read jason .
126 | 	1		jason wrote the letter .
127 | 	0		jason the letter wrote .
128 | 	0		wrote jason the letter .
129 | 	0		wrote the letter jason .
130 | 	0		the letter wrote jason .
131 | 	1		jason read the story .
132 | 	0		jason the story read .
133 | 	0		read jason the story .
134 | 	0		read the story jason .
135 | 	0		the story read jason .
136 | 	1		jason wrote the story .
137 | 	0		jason the story wrote .
138 | 	0		wrote jason the story .
139 | 	0		wrote the story jason .
140 | 	0		the story wrote jason .
141 | 	1		jason read the chapter .
142 | 	0		jason the chapter read .
143 | 	0		read jason the chapter .
144 | 	0		read the chapter jason .
145 | 	0		the chapter read jason .
146 | 	1		jason wrote the chapter .
147 | 	0		jason the chapter wrote .
148 | 	0		wrote jason the chapter .
149 | 	0		wrote the chapter jason .
150 | 	0		the chapter wrote jason .
151 | 	1		david read the book .
152 | 	0		david the book read .
153 | 	0		read david the book .
154 | 	0		read the book david .
155 | 	0		the book read david .
156 | 	1		david wrote the book .
157 | 	0		david the book wrote .
158 | 	0		wrote david the book .
159 | 	0		wrote the book david .
160 | 	0		the book wrote david .
161 | 	1		david read the article .
162 | 	0		david the article read .
163 | 	0		read david the article .
164 | 	0		read the article david .
165 | 	0		the article read david .
166 | 	1		david wrote the article .
167 | 	0		david the article wrote .
168 | 	0		wrote david the article .
169 | 	0		wrote the article david .
170 | 	0		the article wrote david .
171 | 	1		david read the letter .
172 | 	0		david the letter read .
173 | 	0		read david the letter .
174 | 	0		read the letter david .
175 | 	0		the letter read david .
176 | 	1		david wrote the letter .
177 | 	0		david the letter wrote .
178 | 	0		wrote david the letter .
179 | 	0		wrote the letter david .
180 | 	0		the letter wrote david .
181 | 	1		david read the story .
182 | 	0		david the story read .
183 | 	0		read david the story .
184 | 	0		read the story david .
185 | 	0		the story read david .
186 | 	1		david wrote the story .
187 | 	0		david the story wrote .
188 | 	0		wrote david the story .
189 | 	0		wrote the story david .
190 | 	0		the story wrote david .
191 | 	1		david read the chapter .
192 | 	0		david the chapter read .
193 | 	0		read david the chapter .
194 | 	0		read the chapter david .
195 | 	0		the chapter read david .
196 | 	1		david wrote the chapter .
197 | 	0		david the chapter wrote .
198 | 	0		wrote david the chapter .
199 | 	0		wrote the chapter david .
200 | 	0		the chapter wrote david .
201 | 	1		james read the book .
202 | 	0		james the book read .
203 | 	0		read james the book .
204 | 	0		read the book james .
205 | 	0		the book read james .
206 | 	1		james wrote the book .
207 | 	0		james the book wrote .
208 | 	0		wrote james the book .
209 | 	0		wrote the book james .
210 | 	0		the book wrote james .
211 | 	1		james read the article .
212 | 	0		james the article read .
213 | 	0		read james the article .
214 | 	0		read the article james .
215 | 	0		the article read james .
216 | 	1		james wrote the article .
217 | 	0		james the article wrote .
218 | 	0		wrote james the article .
219 | 	0		wrote the article james .
220 | 	0		the article wrote james .
221 | 	1		james read the letter .
222 | 	0		james the letter read .
223 | 	0		read james the letter .
224 | 	0		read the letter james .
225 | 	0		the letter read james .
226 | 	1		james wrote the letter .
227 | 	0		james the letter wrote .
228 | 	0		wrote james the letter .
229 | 	0		wrote the letter james .
230 | 	0		the letter wrote james .
231 | 	1		james read the story .
232 | 	0		james the story read .
233 | 	0		read james the story .
234 | 	0		read the story james .
235 | 	0		the story read james .
236 | 	1		james wrote the story .
237 | 	0		james the story wrote .
238 | 	0		wrote james the story .
239 | 	0		wrote the story james .
240 | 	0		the story wrote james .
241 | 	1		james read the chapter .
242 | 	0		james the chapter read .
243 | 	0		read james the chapter .
244 | 	0		read the chapter james .
245 | 	0		the chapter read james .
246 | 	1		james wrote the chapter .
247 | 	0		james the chapter wrote .
248 | 	0		wrote james the chapter .
249 | 	0		wrote the chapter james .
250 | 	0		the chapter wrote james .
251 | 	1		nicole read the book .
252 | 	0		nicole the book read .
253 | 	0		read nicole the book .
254 | 	0		read the book nicole .
255 | 	0		the book read nicole .
256 | 	1		nicole wrote the book .
257 | 	0		nicole the book wrote .
258 | 	0		wrote nicole the book .
259 | 	0		wrote the book nicole .
260 | 	0		the book wrote nicole .
261 | 	1		nicole read the article .
262 | 	0		nicole the article read .
263 | 	0		read nicole the article .
264 | 	0		read the article nicole .
265 | 	0		the article read nicole .
266 | 	1		nicole wrote the article .
267 | 	0		nicole the article wrote .
268 | 	0		wrote nicole the article .
269 | 	0		wrote the article nicole .
270 | 	0		the article wrote nicole .
271 | 	1		nicole read the letter .
272 | 	0		nicole the letter read .
273 | 	0		read nicole the letter .
274 | 	0		read the letter nicole .
275 | 	0		the letter read nicole .
276 | 	1		nicole wrote the letter .
277 | 	0		nicole the letter wrote .
278 | 	0		wrote nicole the letter .
279 | 	0		wrote the letter nicole .
280 | 	0		the letter wrote nicole .
281 | 	1		nicole read the story .
282 | 	0		nicole the story read .
283 | 	0		read nicole the story .
284 | 	0		read the story nicole .
285 | 	0		the story read nicole .
286 | 	1		nicole wrote the story .
287 | 	0		nicole the story wrote .
288 | 	0		wrote nicole the story .
289 | 	0		wrote the story nicole .
290 | 	0		the story wrote nicole .
291 | 	1		nicole read the chapter .
292 | 	0		nicole the chapter read .
293 | 	0		read nicole the chapter .
294 | 	0		read the chapter nicole .
295 | 	0		the chapter read nicole .
296 | 	1		nicole wrote the chapter .
297 | 	0		nicole the chapter wrote .
298 | 	0		wrote nicole the chapter .
299 | 	0		wrote the chapter nicole .
300 | 	0		the chapter wrote nicole .
301 | 	1		jessica read the book .
302 | 	0		jessica the book read .
303 | 	0		read jessica the book .
304 | 	0		read the book jessica .
305 | 	0		the book read jessica .
306 | 	1		jessica wrote the book .
307 | 	0		jessica the book wrote .
308 | 	0		wrote jessica the book .
309 | 	0		wrote the book jessica .
310 | 	0		the book wrote jessica .
311 | 	1		jessica read the article .
312 | 	0		jessica the article read .
313 | 	0		read jessica the article .
314 | 	0		read the article jessica .
315 | 	0		the article read jessica .
316 | 	1		jessica wrote the article .
317 | 	0		jessica the article wrote .
318 | 	0		wrote jessica the article .
319 | 	0		wrote the article jessica .
320 | 	0		the article wrote jessica .
321 | 	1		jessica read the letter .
322 | 	0		jessica the letter read .
323 | 	0		read jessica the letter .
324 | 	0		read the letter jessica .
325 | 	0		the letter read jessica .
326 | 	1		jessica wrote the letter .
327 | 	0		jessica the letter wrote .
328 | 	0		wrote jessica the letter .
329 | 	0		wrote the letter jessica .
330 | 	0		the letter wrote jessica .
331 | 	1		jessica read the story .
332 | 	0		jessica the story read .
333 | 	0		read jessica the story .
334 | 	0		read the story jessica .
335 | 	0		the story read jessica .
336 | 	1		jessica wrote the story .
337 | 	0		jessica the story wrote .
338 | 	0		wrote jessica the story .
339 | 	0		wrote the story jessica .
340 | 	0		the story wrote jessica .
341 | 	1		jessica read the chapter .
342 | 	0		jessica the chapter read .
343 | 	0		read jessica the chapter .
344 | 	0		read the chapter jessica .
345 | 	0		the chapter read jessica .
346 | 	1		jessica wrote the chapter .
347 | 	0		jessica the chapter wrote .
348 | 	0		wrote jessica the chapter .
349 | 	0		wrote the chapter jessica .
350 | 	0		the chapter wrote jessica .
351 | 	1		elizabeth read the book .
352 | 	0		elizabeth the book read .
353 | 	0		read elizabeth the book .
354 | 	0		read the book elizabeth .
355 | 	0		the book read elizabeth .
356 | 	1		elizabeth wrote the book .
357 | 	0		elizabeth the book wrote .
358 | 	0		wrote elizabeth the book .
359 | 	0		wrote the book elizabeth .
360 | 	0		the book wrote elizabeth .
361 | 	1		elizabeth read the article .
362 | 	0		elizabeth the article read .
363 | 	0		read elizabeth the article .
364 | 	0		read the article elizabeth .
365 | 	0		the article read elizabeth .
366 | 	1		elizabeth wrote the article .
367 | 	0		elizabeth the article wrote .
368 | 	0		wrote elizabeth the article .
369 | 	0		wrote the article elizabeth .
370 | 	0		the article wrote elizabeth .
371 | 	1		elizabeth read the letter .
372 | 	0		elizabeth the letter read .
373 | 	0		read elizabeth the letter .
374 | 	0		read the letter elizabeth .
375 | 	0		the letter read elizabeth .
376 | 	1		elizabeth wrote the letter .
377 | 	0		elizabeth the letter wrote .
378 | 	0		wrote elizabeth the letter .
379 | 	0		wrote the letter elizabeth .
380 | 	0		the letter wrote elizabeth .
381 | 	1		elizabeth read the story .
382 | 	0		elizabeth the story read .
383 | 	0		read elizabeth the story .
384 | 	0		read the story elizabeth .
385 | 	0		the story read elizabeth .
386 | 	1		elizabeth wrote the story .
387 | 	0		elizabeth the story wrote .
388 | 	0		wrote elizabeth the story .
389 | 	0		wrote the story elizabeth .
390 | 	0		the story wrote elizabeth .
391 | 	1		elizabeth read the chapter .
392 | 	0		elizabeth the chapter read .
393 | 	0		read elizabeth the chapter .
394 | 	0		read the chapter elizabeth .
395 | 	0		the chapter read elizabeth .
396 | 	1		elizabeth wrote the chapter .
397 | 	0		elizabeth the chapter wrote .
398 | 	0		wrote elizabeth the chapter .
399 | 	0		wrote the chapter elizabeth .
400 | 	0		the chapter wrote elizabeth .
401 | 	1		rebecca read the book .
402 | 	0		rebecca the book read .
403 | 	0		read rebecca the book .
404 | 	0		read the book rebecca .
405 | 	0		the book read rebecca .
406 | 	1		rebecca wrote the book .
407 | 	0		rebecca the book wrote .
408 | 	0		wrote rebecca the book .
409 | 	0		wrote the book rebecca .
410 | 	0		the book wrote rebecca .
411 | 	1		rebecca read the article .
412 | 	0		rebecca the article read .
413 | 	0		read rebecca the article .
414 | 	0		read the article rebecca .
415 | 	0		the article read rebecca .
416 | 	1		rebecca wrote the article .
417 | 	0		rebecca the article wrote .
418 | 	0		wrote rebecca the article .
419 | 	0		wrote the article rebecca .
420 | 	0		the article wrote rebecca .
421 | 	1		rebecca read the letter .
422 | 	0		rebecca the letter read .
423 | 	0		read rebecca the letter .
424 | 	0		read the letter rebecca .
425 | 	0		the letter read rebecca .
426 | 	1		rebecca wrote the letter .
427 | 	0		rebecca the letter wrote .
428 | 	0		wrote rebecca the letter .
429 | 	0		wrote the letter rebecca .
430 | 	0		the letter wrote rebecca .
431 | 	1		rebecca read the story .
432 | 	0		rebecca the story read .
433 | 	0		read rebecca the story .
434 | 	0		read the story rebecca .
435 | 	0		the story read rebecca .
436 | 	1		rebecca wrote the story .
437 | 	0		rebecca the story wrote .
438 | 	0		wrote rebecca the story .
439 | 	0		wrote the story rebecca .
440 | 	0		the story wrote rebecca .
441 | 	1		rebecca read the chapter .
442 | 	0		rebecca the chapter read .
443 | 	0		read rebecca the chapter .
444 | 	0		read the chapter rebecca .
445 | 	0		the chapter read rebecca .
446 | 	1		rebecca wrote the chapter .
447 | 	0		rebecca the chapter wrote .
448 | 	0		wrote rebecca the chapter .
449 | 	0		wrote the chapter rebecca .
450 | 	0		the chapter wrote rebecca .
451 | 	1		kelly read the book .
452 | 	0		kelly the book read .
453 | 	0		read kelly the book .
454 | 	0		read the book kelly .
455 | 	0		the book read kelly .
456 | 	1		kelly wrote the book .
457 | 	0		kelly the book wrote .
458 | 	0		wrote kelly the book .
459 | 	0		wrote the book kelly .
460 | 	0		the book wrote kelly .
461 | 	1		kelly read the article .
462 | 	0		kelly the article read .
463 | 	0		read kelly the article .
464 | 	0		read the article kelly .
465 | 	0		the article read kelly .
466 | 	1		kelly wrote the article .
467 | 	0		kelly the article wrote .
468 | 	0		wrote kelly the article .
469 | 	0		wrote the article kelly .
470 | 	0		the article wrote kelly .
471 | 	1		kelly read the letter .
472 | 	0		kelly the letter read .
473 | 	0		read kelly the letter .
474 | 	0		read the letter kelly .
475 | 	0		the letter read kelly .
476 | 	1		kelly wrote the letter .
477 | 	0		kelly the letter wrote .
478 | 	0		wrote kelly the letter .
479 | 	0		wrote the letter kelly .
480 | 	0		the letter wrote kelly .
481 | 	1		kelly read the story .
482 | 	0		kelly the story read .
483 | 	0		read kelly the story .
484 | 	0		read the story kelly .
485 | 	0		the story read kelly .
486 | 	1		kelly wrote the story .
487 | 	0		kelly the story wrote .
488 | 	0		wrote kelly the story .
489 | 	0		wrote the story kelly .
490 | 	0		the story wrote kelly .
491 | 	1		kelly read the chapter .
492 | 	0		kelly the chapter read .
493 | 	0		read kelly the chapter .
494 | 	0		read the chapter kelly .
495 | 	0		the chapter read kelly .
496 | 	1		kelly wrote the chapter .
497 | 	0		kelly the chapter wrote .
498 | 	0		wrote kelly the chapter .
499 | 	0		wrote the chapter kelly .
500 | 	0		the chapter wrote kelly .
501 | 


--------------------------------------------------------------------------------
/acceptability_corpus/phenomena/wh_extraction.tsv:
--------------------------------------------------------------------------------
  1 | 	0		what did michael read the book ?
  2 | 	1		what did michael read ?
  3 | 	0		what did michael read the article ?
  4 | 	1		what did michael read ?
  5 | 	0		what did michael read the letter ?
  6 | 	1		what did michael read ?
  7 | 	0		what did michael read the story ?
  8 | 	1		what did michael read ?
  9 | 	0		what did michael read the chapter ?
 10 | 	1		what did michael read ?
 11 | 	0		what did christopher read the book ?
 12 | 	1		what did christopher read ?
 13 | 	0		what did christopher read the article ?
 14 | 	1		what did christopher read ?
 15 | 	0		what did christopher read the letter ?
 16 | 	1		what did christopher read ?
 17 | 	0		what did christopher read the story ?
 18 | 	1		what did christopher read ?
 19 | 	0		what did christopher read the chapter ?
 20 | 	1		what did christopher read ?
 21 | 	0		what did jason read the book ?
 22 | 	1		what did jason read ?
 23 | 	0		what did jason read the article ?
 24 | 	1		what did jason read ?
 25 | 	0		what did jason read the letter ?
 26 | 	1		what did jason read ?
 27 | 	0		what did jason read the story ?
 28 | 	1		what did jason read ?
 29 | 	0		what did jason read the chapter ?
 30 | 	1		what did jason read ?
 31 | 	0		what did david read the book ?
 32 | 	1		what did david read ?
 33 | 	0		what did david read the article ?
 34 | 	1		what did david read ?
 35 | 	0		what did david read the letter ?
 36 | 	1		what did david read ?
 37 | 	0		what did david read the story ?
 38 | 	1		what did david read ?
 39 | 	0		what did david read the chapter ?
 40 | 	1		what did david read ?
 41 | 	0		what did james read the book ?
 42 | 	1		what did james read ?
 43 | 	0		what did james read the article ?
 44 | 	1		what did james read ?
 45 | 	0		what did james read the letter ?
 46 | 	1		what did james read ?
 47 | 	0		what did james read the story ?
 48 | 	1		what did james read ?
 49 | 	0		what did james read the chapter ?
 50 | 	1		what did james read ?
 51 | 	0		what did nicole read the book ?
 52 | 	1		what did nicole read ?
 53 | 	0		what did nicole read the article ?
 54 | 	1		what did nicole read ?
 55 | 	0		what did nicole read the letter ?
 56 | 	1		what did nicole read ?
 57 | 	0		what did nicole read the story ?
 58 | 	1		what did nicole read ?
 59 | 	0		what did nicole read the chapter ?
 60 | 	1		what did nicole read ?
 61 | 	0		what did jessica read the book ?
 62 | 	1		what did jessica read ?
 63 | 	0		what did jessica read the article ?
 64 | 	1		what did jessica read ?
 65 | 	0		what did jessica read the letter ?
 66 | 	1		what did jessica read ?
 67 | 	0		what did jessica read the story ?
 68 | 	1		what did jessica read ?
 69 | 	0		what did jessica read the chapter ?
 70 | 	1		what did jessica read ?
 71 | 	0		what did elizabeth read the book ?
 72 | 	1		what did elizabeth read ?
 73 | 	0		what did elizabeth read the article ?
 74 | 	1		what did elizabeth read ?
 75 | 	0		what did elizabeth read the letter ?
 76 | 	1		what did elizabeth read ?
 77 | 	0		what did elizabeth read the story ?
 78 | 	1		what did elizabeth read ?
 79 | 	0		what did elizabeth read the chapter ?
 80 | 	1		what did elizabeth read ?
 81 | 	0		what did rebecca read the book ?
 82 | 	1		what did rebecca read ?
 83 | 	0		what did rebecca read the article ?
 84 | 	1		what did rebecca read ?
 85 | 	0		what did rebecca read the letter ?
 86 | 	1		what did rebecca read ?
 87 | 	0		what did rebecca read the story ?
 88 | 	1		what did rebecca read ?
 89 | 	0		what did rebecca read the chapter ?
 90 | 	1		what did rebecca read ?
 91 | 	0		what did kelly read the book ?
 92 | 	1		what did kelly read ?
 93 | 	0		what did kelly read the article ?
 94 | 	1		what did kelly read ?
 95 | 	0		what did kelly read the letter ?
 96 | 	1		what did kelly read ?
 97 | 	0		what did kelly read the story ?
 98 | 	1		what did kelly read ?
 99 | 	0		what did kelly read the chapter ?
100 | 	1		what did kelly read ?
101 | 	0		what did michael devour the cake ?
102 | 	1		what did michael devour ?
103 | 	0		what did michael devour the salad ?
104 | 	1		what did michael devour ?
105 | 	0		what did michael devour the sandwich ?
106 | 	1		what did michael devour ?
107 | 	0		what did michael devour the steak ?
108 | 	1		what did michael devour ?
109 | 	0		what did michael eat the cake ?
110 | 	1		what did michael eat ?
111 | 	0		what did michael eat the salad ?
112 | 	1		what did michael eat ?
113 | 	0		what did michael eat the sandwich ?
114 | 	1		what did michael eat ?
115 | 	0		what did michael eat the steak ?
116 | 	1		what did michael eat ?
117 | 	0		what did christopher devour the cake ?
118 | 	1		what did christopher devour ?
119 | 	0		what did christopher devour the salad ?
120 | 	1		what did christopher devour ?
121 | 	0		what did christopher devour the sandwich ?
122 | 	1		what did christopher devour ?
123 | 	0		what did christopher devour the steak ?
124 | 	1		what did christopher devour ?
125 | 	0		what did christopher eat the cake ?
126 | 	1		what did christopher eat ?
127 | 	0		what did christopher eat the salad ?
128 | 	1		what did christopher eat ?
129 | 	0		what did christopher eat the sandwich ?
130 | 	1		what did christopher eat ?
131 | 	0		what did christopher eat the steak ?
132 | 	1		what did christopher eat ?
133 | 	0		what did jason devour the cake ?
134 | 	1		what did jason devour ?
135 | 	0		what did jason devour the salad ?
136 | 	1		what did jason devour ?
137 | 	0		what did jason devour the sandwich ?
138 | 	1		what did jason devour ?
139 | 	0		what did jason devour the steak ?
140 | 	1		what did jason devour ?
141 | 	0		what did jason eat the cake ?
142 | 	1		what did jason eat ?
143 | 	0		what did jason eat the salad ?
144 | 	1		what did jason eat ?
145 | 	0		what did jason eat the sandwich ?
146 | 	1		what did jason eat ?
147 | 	0		what did jason eat the steak ?
148 | 	1		what did jason eat ?
149 | 	0		what did david devour the cake ?
150 | 	1		what did david devour ?
151 | 	0		what did david devour the salad ?
152 | 	1		what did david devour ?
153 | 	0		what did david devour the sandwich ?
154 | 	1		what did david devour ?
155 | 	0		what did david devour the steak ?
156 | 	1		what did david devour ?
157 | 	0		what did david eat the cake ?
158 | 	1		what did david eat ?
159 | 	0		what did david eat the salad ?
160 | 	1		what did david eat ?
161 | 	0		what did david eat the sandwich ?
162 | 	1		what did david eat ?
163 | 	0		what did david eat the steak ?
164 | 	1		what did david eat ?
165 | 	0		what did james devour the cake ?
166 | 	1		what did james devour ?
167 | 	0		what did james devour the salad ?
168 | 	1		what did james devour ?
169 | 	0		what did james devour the sandwich ?
170 | 	1		what did james devour ?
171 | 	0		what did james devour the steak ?
172 | 	1		what did james devour ?
173 | 	0		what did james eat the cake ?
174 | 	1		what did james eat ?
175 | 	0		what did james eat the salad ?
176 | 	1		what did james eat ?
177 | 	0		what did james eat the sandwich ?
178 | 	1		what did james eat ?
179 | 	0		what did james eat the steak ?
180 | 	1		what did james eat ?
181 | 	0		what did nicole devour the cake ?
182 | 	1		what did nicole devour ?
183 | 	0		what did nicole devour the salad ?
184 | 	1		what did nicole devour ?
185 | 	0		what did nicole devour the sandwich ?
186 | 	1		what did nicole devour ?
187 | 	0		what did nicole devour the steak ?
188 | 	1		what did nicole devour ?
189 | 	0		what did nicole eat the cake ?
190 | 	1		what did nicole eat ?
191 | 	0		what did nicole eat the salad ?
192 | 	1		what did nicole eat ?
193 | 	0		what did nicole eat the sandwich ?
194 | 	1		what did nicole eat ?
195 | 	0		what did nicole eat the steak ?
196 | 	1		what did nicole eat ?
197 | 	0		what did jessica devour the cake ?
198 | 	1		what did jessica devour ?
199 | 	0		what did jessica devour the salad ?
200 | 	1		what did jessica devour ?
201 | 	0		what did jessica devour the sandwich ?
202 | 	1		what did jessica devour ?
203 | 	0		what did jessica devour the steak ?
204 | 	1		what did jessica devour ?
205 | 	0		what did jessica eat the cake ?
206 | 	1		what did jessica eat ?
207 | 	0		what did jessica eat the salad ?
208 | 	1		what did jessica eat ?
209 | 	0		what did jessica eat the sandwich ?
210 | 	1		what did jessica eat ?
211 | 	0		what did jessica eat the steak ?
212 | 	1		what did jessica eat ?
213 | 	0		what did elizabeth devour the cake ?
214 | 	1		what did elizabeth devour ?
215 | 	0		what did elizabeth devour the salad ?
216 | 	1		what did elizabeth devour ?
217 | 	0		what did elizabeth devour the sandwich ?
218 | 	1		what did elizabeth devour ?
219 | 	0		what did elizabeth devour the steak ?
220 | 	1		what did elizabeth devour ?
221 | 	0		what did elizabeth eat the cake ?
222 | 	1		what did elizabeth eat ?
223 | 	0		what did elizabeth eat the salad ?
224 | 	1		what did elizabeth eat ?
225 | 	0		what did elizabeth eat the sandwich ?
226 | 	1		what did elizabeth eat ?
227 | 	0		what did elizabeth eat the steak ?
228 | 	1		what did elizabeth eat ?
229 | 	0		what did rebecca devour the cake ?
230 | 	1		what did rebecca devour ?
231 | 	0		what did rebecca devour the salad ?
232 | 	1		what did rebecca devour ?
233 | 	0		what did rebecca devour the sandwich ?
234 | 	1		what did rebecca devour ?
235 | 	0		what did rebecca devour the steak ?
236 | 	1		what did rebecca devour ?
237 | 	0		what did rebecca eat the cake ?
238 | 	1		what did rebecca eat ?
239 | 	0		what did rebecca eat the salad ?
240 | 	1		what did rebecca eat ?
241 | 	0		what did rebecca eat the sandwich ?
242 | 	1		what did rebecca eat ?
243 | 	0		what did rebecca eat the steak ?
244 | 	1		what did rebecca eat ?
245 | 	0		what did kelly devour the cake ?
246 | 	1		what did kelly devour ?
247 | 	0		what did kelly devour the salad ?
248 | 	1		what did kelly devour ?
249 | 	0		what did kelly devour the sandwich ?
250 | 	1		what did kelly devour ?
251 | 	0		what did kelly devour the steak ?
252 | 	1		what did kelly devour ?
253 | 	0		what did kelly eat the cake ?
254 | 	1		what did kelly eat ?
255 | 	0		what did kelly eat the salad ?
256 | 	1		what did kelly eat ?
257 | 	0		what did kelly eat the sandwich ?
258 | 	1		what did kelly eat ?
259 | 	0		what did kelly eat the steak ?
260 | 	1		what did kelly eat ?
261 | 	0		what did michael bake the cake ?
262 | 	1		what did michael bake ?
263 | 	0		what did michael bake the cookies ?
264 | 	1		what did michael bake ?
265 | 	0		what did christopher bake the cake ?
266 | 	1		what did christopher bake ?
267 | 	0		what did christopher bake the cookies ?
268 | 	1		what did christopher bake ?
269 | 	0		what did jason bake the cake ?
270 | 	1		what did jason bake ?
271 | 	0		what did jason bake the cookies ?
272 | 	1		what did jason bake ?
273 | 	0		what did david bake the cake ?
274 | 	1		what did david bake ?
275 | 	0		what did david bake the cookies ?
276 | 	1		what did david bake ?
277 | 	0		what did james bake the cake ?
278 | 	1		what did james bake ?
279 | 	0		what did james bake the cookies ?
280 | 	1		what did james bake ?
281 | 	0		what did nicole bake the cake ?
282 | 	1		what did nicole bake ?
283 | 	0		what did nicole bake the cookies ?
284 | 	1		what did nicole bake ?
285 | 	0		what did jessica bake the cake ?
286 | 	1		what did jessica bake ?
287 | 	0		what did jessica bake the cookies ?
288 | 	1		what did jessica bake ?
289 | 	0		what did elizabeth bake the cake ?
290 | 	1		what did elizabeth bake ?
291 | 	0		what did elizabeth bake the cookies ?
292 | 	1		what did elizabeth bake ?
293 | 	0		what did rebecca bake the cake ?
294 | 	1		what did rebecca bake ?
295 | 	0		what did rebecca bake the cookies ?
296 | 	1		what did rebecca bake ?
297 | 	0		what did kelly bake the cake ?
298 | 	1		what did kelly bake ?
299 | 	0		what did kelly bake the cookies ?
300 | 	1		what did kelly bake ?
301 | 	0		what did michael erase the board ?
302 | 	1		what did michael erase ?
303 | 	0		what did michael erase the chalk ?
304 | 	1		what did michael erase ?
305 | 	0		what did christopher erase the board ?
306 | 	1		what did christopher erase ?
307 | 	0		what did christopher erase the chalk ?
308 | 	1		what did christopher erase ?
309 | 	0		what did jason erase the board ?
310 | 	1		what did jason erase ?
311 | 	0		what did jason erase the chalk ?
312 | 	1		what did jason erase ?
313 | 	0		what did david erase the board ?
314 | 	1		what did david erase ?
315 | 	0		what did david erase the chalk ?
316 | 	1		what did david erase ?
317 | 	0		what did james erase the board ?
318 | 	1		what did james erase ?
319 | 	0		what did james erase the chalk ?
320 | 	1		what did james erase ?
321 | 	0		what did nicole erase the board ?
322 | 	1		what did nicole erase ?
323 | 	0		what did nicole erase the chalk ?
324 | 	1		what did nicole erase ?
325 | 	0		what did jessica erase the board ?
326 | 	1		what did jessica erase ?
327 | 	0		what did jessica erase the chalk ?
328 | 	1		what did jessica erase ?
329 | 	0		what did elizabeth erase the board ?
330 | 	1		what did elizabeth erase ?
331 | 	0		what did elizabeth erase the chalk ?
332 | 	1		what did elizabeth erase ?
333 | 	0		what did rebecca erase the board ?
334 | 	1		what did rebecca erase ?
335 | 	0		what did rebecca erase the chalk ?
336 | 	1		what did rebecca erase ?
337 | 	0		what did kelly erase the board ?
338 | 	1		what did kelly erase ?
339 | 	0		what did kelly erase the chalk ?
340 | 	1		what did kelly erase ?
341 | 	0		what did michael rake the leaves ?
342 | 	1		what did michael rake ?
343 | 	0		what did christopher rake the leaves ?
344 | 	1		what did christopher rake ?
345 | 	0		what did jason rake the leaves ?
346 | 	1		what did jason rake ?
347 | 	0		what did david rake the leaves ?
348 | 	1		what did david rake ?
349 | 	0		what did james rake the leaves ?
350 | 	1		what did james rake ?
351 | 	0		what did nicole rake the leaves ?
352 | 	1		what did nicole rake ?
353 | 	0		what did jessica rake the leaves ?
354 | 	1		what did jessica rake ?
355 | 	0		what did elizabeth rake the leaves ?
356 | 	1		what did elizabeth rake ?
357 | 	0		what did rebecca rake the leaves ?
358 | 	1		what did rebecca rake ?
359 | 	0		what did kelly rake the leaves ?
360 | 	1		what did kelly rake ?
361 | 	0		what did michael boil the egg ?
362 | 	1		what did michael boil ?
363 | 	0		what did michael boil the potato ?
364 | 	1		what did michael boil ?
365 | 	0		what did michael fry the egg ?
366 | 	1		what did michael fry ?
367 | 	0		what did michael fry the potato ?
368 | 	1		what did michael fry ?
369 | 	0		what did christopher boil the egg ?
370 | 	1		what did christopher boil ?
371 | 	0		what did christopher boil the potato ?
372 | 	1		what did christopher boil ?
373 | 	0		what did christopher fry the egg ?
374 | 	1		what did christopher fry ?
375 | 	0		what did christopher fry the potato ?
376 | 	1		what did christopher fry ?
377 | 	0		what did jason boil the egg ?
378 | 	1		what did jason boil ?
379 | 	0		what did jason boil the potato ?
380 | 	1		what did jason boil ?
381 | 	0		what did jason fry the egg ?
382 | 	1		what did jason fry ?
383 | 	0		what did jason fry the potato ?
384 | 	1		what did jason fry ?
385 | 	0		what did david boil the egg ?
386 | 	1		what did david boil ?
387 | 	0		what did david boil the potato ?
388 | 	1		what did david boil ?
389 | 	0		what did david fry the egg ?
390 | 	1		what did david fry ?
391 | 	0		what did david fry the potato ?
392 | 	1		what did david fry ?
393 | 	0		what did james boil the egg ?
394 | 	1		what did james boil ?
395 | 	0		what did james boil the potato ?
396 | 	1		what did james boil ?
397 | 	0		what did james fry the egg ?
398 | 	1		what did james fry ?
399 | 	0		what did james fry the potato ?
400 | 	1		what did james fry ?
401 | 	0		what did nicole boil the egg ?
402 | 	1		what did nicole boil ?
403 | 	0		what did nicole boil the potato ?
404 | 	1		what did nicole boil ?
405 | 	0		what did nicole fry the egg ?
406 | 	1		what did nicole fry ?
407 | 	0		what did nicole fry the potato ?
408 | 	1		what did nicole fry ?
409 | 	0		what did jessica boil the egg ?
410 | 	1		what did jessica boil ?
411 | 	0		what did jessica boil the potato ?
412 | 	1		what did jessica boil ?
413 | 	0		what did jessica fry the egg ?
414 | 	1		what did jessica fry ?
415 | 	0		what did jessica fry the potato ?
416 | 	1		what did jessica fry ?
417 | 	0		what did elizabeth boil the egg ?
418 | 	1		what did elizabeth boil ?
419 | 	0		what did elizabeth boil the potato ?
420 | 	1		what did elizabeth boil ?
421 | 	0		what did elizabeth fry the egg ?
422 | 	1		what did elizabeth fry ?
423 | 	0		what did elizabeth fry the potato ?
424 | 	1		what did elizabeth fry ?
425 | 	0		what did rebecca boil the egg ?
426 | 	1		what did rebecca boil ?
427 | 	0		what did rebecca boil the potato ?
428 | 	1		what did rebecca boil ?
429 | 	0		what did rebecca fry the egg ?
430 | 	1		what did rebecca fry ?
431 | 	0		what did rebecca fry the potato ?
432 | 	1		what did rebecca fry ?
433 | 	0		what did kelly boil the egg ?
434 | 	1		what did kelly boil ?
435 | 	0		what did kelly boil the potato ?
436 | 	1		what did kelly boil ?
437 | 	0		what did kelly fry the egg ?
438 | 	1		what did kelly fry ?
439 | 	0		what did kelly fry the potato ?
440 | 	1		what did kelly fry ?
441 | 	0		what did michael sweep the floor ?
442 | 	1		what did michael sweep ?
443 | 	0		what did michael mop the floor ?
444 | 	1		what did michael mop ?
445 | 	0		what did christopher sweep the floor ?
446 | 	1		what did christopher sweep ?
447 | 	0		what did christopher mop the floor ?
448 | 	1		what did christopher mop ?
449 | 	0		what did jason sweep the floor ?
450 | 	1		what did jason sweep ?
451 | 	0		what did jason mop the floor ?
452 | 	1		what did jason mop ?
453 | 	0		what did david sweep the floor ?
454 | 	1		what did david sweep ?
455 | 	0		what did david mop the floor ?
456 | 	1		what did david mop ?
457 | 	0		what did james sweep the floor ?
458 | 	1		what did james sweep ?
459 | 	0		what did james mop the floor ?
460 | 	1		what did james mop ?
461 | 	0		what did nicole sweep the floor ?
462 | 	1		what did nicole sweep ?
463 | 	0		what did nicole mop the floor ?
464 | 	1		what did nicole mop ?
465 | 	0		what did jessica sweep the floor ?
466 | 	1		what did jessica sweep ?
467 | 	0		what did jessica mop the floor ?
468 | 	1		what did jessica mop ?
469 | 	0		what did elizabeth sweep the floor ?
470 | 	1		what did elizabeth sweep ?
471 | 	0		what did elizabeth mop the floor ?
472 | 	1		what did elizabeth mop ?
473 | 	0		what did rebecca sweep the floor ?
474 | 	1		what did rebecca sweep ?
475 | 	0		what did rebecca mop the floor ?
476 | 	1		what did rebecca mop ?
477 | 	0		what did kelly sweep the floor ?
478 | 	1		what did kelly sweep ?
479 | 	0		what did kelly mop the floor ?
480 | 	1		what did kelly mop ?
481 | 	0		what did michael ride the bicycle ?
482 | 	1		what did michael ride ?
483 | 	0		what did michael ride the train ?
484 | 	1		what did michael ride ?
485 | 	0		what did christopher ride the bicycle ?
486 | 	1		what did christopher ride ?
487 | 	0		what did christopher ride the train ?
488 | 	1		what did christopher ride ?
489 | 	0		what did jason ride the bicycle ?
490 | 	1		what did jason ride ?
491 | 	0		what did jason ride the train ?
492 | 	1		what did jason ride ?
493 | 	0		what did david ride the bicycle ?
494 | 	1		what did david ride ?
495 | 	0		what did david ride the train ?
496 | 	1		what did david ride ?
497 | 	0		what did james ride the bicycle ?
498 | 	1		what did james ride ?
499 | 	0		what did james ride the train ?
500 | 	1		what did james ride ?
501 | 	0		what did nicole ride the bicycle ?
502 | 	1		what did nicole ride ?
503 | 	0		what did nicole ride the train ?
504 | 	1		what did nicole ride ?
505 | 	0		what did jessica ride the bicycle ?
506 | 	1		what did jessica ride ?
507 | 	0		what did jessica ride the train ?
508 | 	1		what did jessica ride ?
509 | 	0		what did elizabeth ride the bicycle ?
510 | 	1		what did elizabeth ride ?
511 | 	0		what did elizabeth ride the train ?
512 | 	1		what did elizabeth ride ?
513 | 	0		what did rebecca ride the bicycle ?
514 | 	1		what did rebecca ride ?
515 | 	0		what did rebecca ride the train ?
516 | 	1		what did rebecca ride ?
517 | 	0		what did kelly ride the bicycle ?
518 | 	1		what did kelly ride ?
519 | 	0		what did kelly ride the train ?
520 | 	1		what did kelly ride ?
521 | 


--------------------------------------------------------------------------------
/acceptability_corpus/raw/in_domain_dev.tsv:
--------------------------------------------------------------------------------
  1 | gj04	1		The sailors rode the breeze clear of the rocks.
  2 | gj04	1		The weights made the rope stretch over the pulley.
  3 | gj04	1		The mechanical doll wriggled itself loose.
  4 | cj99	1		If you had eaten more, you would want less.
  5 | cj99	0	*	As you eat the most, you want the least.
  6 | cj99	0	*	The more you would want, the less you would eat.
  7 | cj99	0	*	I demand that the more John eat, the more he pays.
  8 | cj99	1		Mary listens to the Grateful Dead, she gets depressed.
  9 | cj99	1		The angrier Mary got, the more she looked at pictures.
 10 | cj99	1		The higher the stakes, the lower his expectations are.
 11 | cj99	1		The more Fred is obnoxious, the less attention you should pay to him.
 12 | cj99	1		John was lots more obnoxious than Fred.
 13 | cj99	1		The more people you give beer to, the more people get sick.
 14 | cj99	0	*	The more does Bill smoke, the more Susan hates him.
 15 | cj99	1		The more pictures of him that appear in the news, the more embarrassed John becomes.
 16 | cj99	1		Every senator seems to become more corrupt, as he talks to more lobbyists.
 17 | bc01	0	*	Who does John visit Sally because he likes?
 18 | bc01	1		Marianne did not leave.
 19 | bc01	1		He could not have been working.
 20 | bc01	1		He can not have been working.
 21 | bc01	1		You will believe Bob.
 22 | bc01	1		John has not kissed Mary.
 23 | bc01	1		I said that never in my life had I seen a place like Bangor.
 24 | bc01	0	*	Mickey looked up it.
 25 | bc01	1		There tended to be a lot of discussion.
 26 | bc01	1		John tried to be a good boy.
 27 | bc01	1		John is eager.
 28 | bc01	1		We want John to win.
 29 | bc01	0	*	The box contained the ball from the tree.
 30 | bc01	0	*	The tube was escaped by gas.
 31 | bc01	1		Water bubbled up out of the kettle.
 32 | bc01	1		The tub leaked water.
 33 | bc01	0	*	What the water did to the bottle was fill it.
 34 | bc01	0	*	What the water did to the whole bottle was fill it.
 35 | bc01	1		The tank leaked the fluid free.
 36 | bc01	1		John lay the ball in the box.
 37 | bc01	1		John owns the book.
 38 | bc01	1		We persuaded Mary to leave and Sue to stay.
 39 | bc01	1		Most people probably consider, even though the courts didn't actually find, Klaus guilty of murder.
 40 | bc01	0	*	Mary beautifully plays the violin.
 41 | bc01	1		Clearly, John probably will immediately learn French perfectly.
 42 | bc01	0	*	Sue gave to Bill a book.
 43 | bc01	1		The men will all leave.
 44 | bc01	1		John went home.
 45 | bc01	0	*	They represented seriously to the dean Mary as a genuine linguist.
 46 | bc01	0	*	Us love they.
 47 | bc01	1		It is nice to go abroad.
 48 | bc01	0	*	Mary intended John to go abroad.
 49 | bc01	1		I remembered having kissed Mary.
 50 | bc01	1		I can't believe Fred won't, either.
 51 | bc01	1		John wants to read Fred's story, and I also want to.
 52 | bc01	0	*	We wanted to invite someone, but we couldn't decide who to.
 53 | bc01	1		Mary will read Fred's story, and Joe will read Holly's.
 54 | bc01	1		Mary claimed that eating cabbage, Holly shouldn't.
 55 | bc01	0	*	Mary came to be introduced by the bartender and I also came to be.
 56 | bc01	1		If I can, I will work on it.
 57 | bc01	1		Joe's neuroses bother his patrons, and Sally does too.
 58 | bc01	0	*	I know which book José didn't read for class, and which book Lilly did it for him.
 59 | bc01	0	*	This is the book which Bob reviewed, and this is the one which Fred won't do it.
 60 | bc01	1		I know which book Mag read, and which book Bob said that you hadn't.
 61 | bc01	0	?*	I know which book Mag read, and which book Bob read my report that you hadn't.
 62 | bc01	1		I'm sure I would like him to eat fruit more than I would cookies.
 63 | bc01	1		Rusty talked about himself only after Mary did talk about him.
 64 | bc01	1		Fred talked about everything before Rusty did talk about something.
 65 | bc01	1		John often meets Mary.
 66 | bc01	0	*	The problem perceives easily.
 67 | bc01	1		A hundred men surrounded the fort.
 68 | bc01	1		We elected me.
 69 | bc01	0	*	Which report that John was incompetent did he submit?
 70 | bc01	1		Mary has always preferred lemons to limes.
 71 | r-67	1		He let the cats which were whining out.
 72 | r-67	1		What did Bill buy?
 73 | r-67	1		Mary saw the boy walking toward the railroad station.
 74 | r-67	0	*	A proof that the claim had been. made was giver that John had lied.
 75 | r-67	1		He attributed to a short circuit which was caused by an overloaded transducer the fire which destroyed most of my factory.
 76 | r-67	0	?*	The mayor regarded as being absurd the proposal to build a sidewalk from Dartmouth to Smith.
 77 | r-67	0	?*	I want that Bill left to remain a secret.
 78 | r-67	0	*	I know a man who Tom drives as drives.
 79 | r-67	0	*	Drowning cats, which is against the law, are hard to rescue.
 80 | r-67	1		Muriel said nothing else than that she had been insulted.
 81 | r-67	0	*	Himself is understood by Rutherford.
 82 | r-67	1		I feel that Arch will show up.
 83 | r-67	0	*	The proof this set is recursive is difficult.
 84 | r-67	0	*	The madrigals which Henry plays the lute and sings sound lousy.
 85 | r-67	1		Tom picked these grapes, and I washed some turnips, and Suzie will prepare these grapes.
 86 | r-67	1		Where did you go and who ate what?
 87 | r-67	0	*	Which boy's did we elect guardian's employer president?
 88 | r-67	1		How sane is Peter?
 89 | r-67	1		I live at the place where Route 150 crosses the River and my dad lives at the place where Route 150 crosses the Hudson River too.
 90 | r-67	0	*	I live at the place where Route 150 crosses the Hudson River and my dad lives at it too.
 91 | r-67	1		Who is she trying to make up to now?
 92 | r-67	0	*	Wind was gotten of a plot to negotiate an honorable end to the war in Vietnam.
 93 | r-67	1		Mike talked about politics yesterday to my friends.
 94 | r-67	1		It was expected by the reporters that the principal would fire some teacher.
 95 | r-67	0	*	Which hat did Mike quip that she never wore?
 96 | r-67	0	*	Which girl did Mike quip never wore this hat?
 97 | r-67	1		We donated wire for the convicts to build cages with.
 98 | r-67	0	*	I won't have some money.
 99 | r-67	1		Do you believe the claim that somebody was looking for something?
100 | r-67	1		I won't ask you to believe that he tried to force me to give her any money.
101 | r-67	1		That Sam sometimes didn't sleep must have pleased somebody.
102 | r-67	1		I talked to Winston about himself.
103 | r-67	1		That the fuzz wanted him worried John, but that the fuzz wanted her didn't worry Mary.
104 | r-67	1		I'll work on it if Sam will be working on it.
105 | r-67	1		I'll work on it if I can.
106 | r-67	0	*	Here's a knife with which for you to cut up the onions.
107 | r-67	1		Fluffy is sick, which not everybody knows.
108 | r-67	1		Maxwell is quite a doctor.
109 | r-67	0	*	The younger woman might have been tall and, and the older one definitely was, blond.
110 | r-67	1		Sally is tall, and may be blond, and Sheila is short, and definitely is, blond.
111 | r-67	1		I have to try to finish grading some papers.
112 | r-67	0	*	The socks are ready for for you to put on to be planned.
113 | r-67	1		It is easy to play sonatas on this violin.
114 | r-67	1		This violin is difficult to play sonatas on.
115 | r-67	0	*	My mother is easy to please my father and.
116 | r-67	1		Poor Bill, it had started to rain and he had no umbrella.
117 | r-67	0	?*	That the cops spoke to the janitor about it yesterday is terrible, that robbery.
118 | r-67	0	*	Every student, and he wears socks, is a swinger.
119 | r-67	1		That girl was given my binoculars by him.
120 | r-67	1		Bill didn't allege that Roger had eaten anything.
121 | r-67	1		Nobody who hates to eat anything should work in a delicatessen.
122 | r-67	1		Everybody around here who ever buys anything on credit talks in his sleep.
123 | r-67	0	*	I can't remember the name of somebody who had misgivings.
124 | r-67	0	*	No writer, and no playwright, meets in Vienna.
125 | r-67	0	*	No writer, nor any playwright, meets in Vienna.
126 | r-67	1		That you will marry any student is not certain.
127 | rhl07	1		Felicia kicked the ball off the bench.
128 | rhl07	1		I sent the package halfway around the world.
129 | rhl07	0	*	Sam gave the ball out of the basket.
130 | rhl07	0	*	Sam offered the ball out of the basket.
131 | rhl07	1		Park Square has a festive air.
132 | rhl07	1		The worker will have a job.
133 | rhl07	0	*	No one can forgive that comment to you.
134 | rhl07	1		We launched the rocket to the moon, but it blew up before it got there.
135 | rhl07	1		Sarah promised Catherine her old car, but then gave it to her son instead.
136 | rhl07	0	*	I lent the book partway to Tony.
137 | l-93	1		The farmer loaded the cart with apples.
138 | l-93	0	*	The farmer dumped the cart with apples.
139 | l-93	1		Martha carved the baby a toy out of wood.
140 | l-93	1		The bread cuts easily.
141 | l-93	0	*	Janet broke Bill on the finger.
142 | l-93	1		Janet broke the cup.
143 | l-93	1		The visitor rang the bell.
144 | l-93	1		We pulled free.
145 | l-93	1		That movie always shocks people.
146 | l-93	1		That movie always shocks.
147 | l-93	0	*	Sharon came the room.
148 | l-93	1		Bill sent a package to Tom.
149 | l-93	1		That acorn will grow into an oak tree.
150 | l-93	1		He turned into a frog.
151 | l-93	1		I mixed the sugar into the butter.
152 | l-93	0	*	Brian threw the fence with the stick.
153 | l-93	1		Mira condemned Terry for the accident.
154 | l-93	1		We investigated the area for bombs.
155 | l-93	1		I sensed his eagerness.
156 | l-93	0	*	They praised the dedication in the volunteers.
157 | l-93	1		The earth was believed to be round.
158 | l-93	1		Sarah smiled a charming smile.
159 | l-93	1		Sandra beamed a cheerful welcome.
160 | l-93	1		You've really lived it up.
161 | l-93	0	*	Paperback books lift onto the table easily.
162 | l-93	0	*	The books lifted onto the table.
163 | l-93	1		Jessica loaded boxes under the wagon.
164 | l-93	1		Jessica loaded boxes on the wagon.
165 | l-93	0	*	Jessica crammed boxes at the truck.
166 | l-93	0	*	Lora buttered at the toast.
167 | l-93	1		Carla shoveled the walk.
168 | l-93	1		Nora sent the book.
169 | l-93	1		Nora sent Peter the book.
170 | l-93	1		Carla slid the book.
171 | l-93	0	*	Carla slid at the book.
172 | l-93	1		Amanda carried the package to Pamela.
173 | l-93	0	*	Packages drive easily to New York.
174 | l-93	0	*	The chair pushed.
175 | l-93	1		We offered a job to her.
176 | l-93	0	*	A job offered.
177 | l-93	1		Brown presented a plaque to Jones.
178 | l-93	1		Carmen bought Mary a dress.
179 | l-93	1		Carmen obtained the spare part at the hardware store.
180 | l-93	0	*	Michelle kept the desk with the papers.
181 | l-93	1		Frances hid the presents in the drawer.
182 | l-93	1		The needle poked the cloth.
183 | l-93	1		Carrie touched that cat.
184 | l-93	0	*	Herman whipped the sugar and the cream.
185 | l-93	1		Linda taped the picture to the wall.
186 | l-93	1		Linda taped the picture onto the wall.
187 | l-93	1		The child and her mother clung together.
188 | l-93	1		This flyer and that flyer differ.
189 | l-93	0	*	This flyer and that flyer differ apart.
190 | l-93	0	*	The jeweller scribbled the contract with his name.
191 | l-93	1		The gardener grew that acorn into an oak tree.
192 | l-93	0	*	I shaped a loaf.
193 | l-93	0	*	The children amused.
194 | l-93	1		Susan whispered the news.
195 | l-93	1		Susan whispered at Rachel.
196 | l-93	1		Ellen said that melons were selling well.
197 | l-93	0	*	Ellen said about the present conditions.
198 | l-93	1		Ellen warned Helen against skating on thin ice.
199 | l-93	1		Cynthia nibbled on the carrot .
200 | l-93	0	*	Cynthia chewed.
201 | l-93	1		Paul laughed at Mary.
202 | l-93	0	*	Linda winked her lip.
203 | l-93	0	*	My heart is pounding me.
204 | l-93	1		Sharon fainted from hunger.
205 | l-93	1		The witch poisoned the children.
206 | l-93	1		A grandfather clock ticked in the hallway.
207 | l-93	0	*	I squeaked the door.
208 | l-93	1		Over the fire there bubbled a fragrant stew.
209 | l-93	1		Soaring temperatures are predicted for this weekend.
210 | l-93	0	*	The fort fluttered with many flags.
211 | l-93	1		The voices echoed in the hall.
212 | l-93	1		The stream twists through the valley.
213 | l-93	0	*	There presented itself a wonderful opportunity yesterday.
214 | l-93	1		A wonderful opportunity presented itself to him yesterday.
215 | l-93	1		Out of the box jumped a little white rabbit.
216 | l-93	1		Penny skated around the rink.
217 | l-93	1		Jackie accompanied Rose.
218 | ks08	0	*	many information was provided.
219 | ks08	0	*	John offers many advice.
220 | ks08	1		The cake that Jones got was more delicious than the one that Smith got.
221 | ks08	1		We recommend to eat less cake and pastry.
222 | ks08	1		I saw that gas can explode.
223 | ks08	1		He washed her.
224 | ks08	0	*	Wash you!
225 | ks08	0	*	They have no in.
226 | ks08	0	*	Some my jobs are in jeopardy.
227 | ks08	0	*	It was the policeman met that several young students in the park last night.
228 | ks08	1		did the policeman meet in the park?
229 | ks08	1		John put old books in the box.
230 | ks08	0	*	The monkeys proud of their leader.
231 | ks08	0	*	John sounded very.
232 | ks08	1		I don't know if I should agree.
233 | ks08	1		John bought a book on the table.
234 | ks08	1		John called the president a fool.
235 | ks08	1		I forgot how good beer tastes.
236 | ks08	1		This teacher is a genius.
237 | ks08	0	*	A good friend is remained to me by him.
238 | ks08	1		John ate his noodle quietly.
239 | ks08	1		A smith hammered the metal.
240 | ks08	1		Sandy removed her ballet shoes.
241 | ks08	1		This week will be a difficult one for us.
242 | ks08	1		Benny worked in a shoe factory when he was a student.
243 | ks08	1		Everyone hoped that she would sing.
244 | ks08	1		This proved a decisive factor.
245 | ks08	1		The crocodile devoured the doughnut.
246 | ks08	1		John is afraid of Bill.
247 | ks08	0	*	John put under the bathtub.
248 | ks08	1		John placed Kim behind the garage.
249 | ks08	0	*	Kim depends for Sandy.
250 | ks08	1		John put the book in the box.
251 | ks08	1		John taught English Syntax to new students.
252 | ks08	1		John regards Bill as a good friend.
253 | ks08	0	*	Kim put in the box.
254 | ks08	1		The chickens seem fond of the farmer.
255 | ks08	1		The rules require that the executives be polite.
256 | ks08	0	*	That Fred was unpopular nominated Bill.
257 | ks08	1		They were taking a hard look at possible FTA.
258 | ks08	0	*	John paid me against the book.
259 | ks08	1		The committee will study the feasibility of setting up a national computer network.
260 | ks08	1		We made them be rude.
261 | ks08	1		After reading the pamphlet, Judy threw it into the garbage can.
262 | ks08	0	*	Students studying English reads Conrad's Heart of Darkness while at university.
263 | ks08	1		I read some of the book.
264 | ks08	1		Most of the fruit is rotten.
265 | ks08	0	*	Neither of students failed.
266 | ks08	0	*	I drank some of water.
267 | ks08	1		It is a golden hair.
268 | ks08	1		The boy in the doorway waved to his father.
269 | ks08	1		That dog is so ferocious, it even tried to bite itself.
270 | ks08	0	*	He washed yourself.
271 | ks08	0	*	John is easy to please Kim.
272 | ks08	0	*	There seemed to be intelligent.
273 | ks08	1		John tried to please Stephen.
274 | ks08	0	*	We believed John to be a fountain in the park.
275 | ks08	0	*	It tries to leave the country.
276 | ks08	1		John tries to leave the country.
277 | ks08	1		Under the bed seems to be a fun place to hide.
278 | ks08	0	*	There is believed to be sheep in the park.
279 | ks08	0	*	I hope to would study in France.
280 | ks08	1		John can kick the ball.
281 | ks08	0	*	John will rain tomorrow.
282 | ks08	0	*	We expect there to will rain.
283 | ks08	1		John was found in the office.
284 | ks08	0	*	Did the child be in the school?
285 | ks08	1		It did not rain.
286 | ks08	1		John wants not to leave the town.
287 | ks08	1		Mary likes to tour art galleries, but Bill hates to.
288 | ks08	1		They needn't take this exam.
289 | ks08	1		Ann may spend her vacation in Italy.
290 | ks08	1		They love to play golf, but I do not.
291 | ks08	1		George has spent a lot of money, hasn't he?
292 | ks08	1		As a statesman, scarcely could he do anything worth mentioning.
293 | ks08	1		We never found any of the unicorns.
294 | ks08	1		One of Korea's most famous poets wrote these lines.
295 | ks08	1		Somebody apparently struck the unidentified victim during the early morning hours.
296 | ks08	1		The car was driven.
297 | ks08	1		Ricky can be relied on.
298 | ks08	1		The bed was slept in.
299 | ks08	0	*	A pound was weighed by the book.
300 | ks08	1		Who do you think Tom saw?
301 | ks08	1		he had spent five thousand dollars.
302 | ks08	0	*	Fed knows which politician her to vote for.
303 | ks08	1		How did you guess that he fixed the computer?
304 | ks08	1		The committee knows whose efforts to achieve peace the world should honor.
305 | ks08	0	*	Which house does your friend live?
306 | ks08	1		the baker from whom I bought these bagels left.
307 | ks08	1		I found the place where we can relax.
308 | ks08	0	*	Jack is the person with whom Jenny fell in love with.
309 | ks08	1		There is a bench to sit on.
310 | ks08	1		I met the man who grows peaches.
311 | ks08	1		Which topic did you choose without getting his approval?
312 | ks08	0	*	Which topic did you get bored because Mary talked about?
313 | ks08	1		That is the reason why he resigned.
314 | ks08	1		It bothers me that John coughs.
315 | ks08	1		To please John is easy.
316 | ks08	1		Kim is eager to please Tom.
317 | ks08	1		That we invaded Iraq really bites.
318 | ks08	1		It annoys me that Fido barks.
319 | ks08	0	*	Who achieved the best result was Angela.
320 | ks08	1		It was the peasant girl who got it.
321 | ks08	1		That kind of person is hard to find anyone to look after.
322 | kl93	1		A sick owl doesn't hunt mice.
323 | kl93	1		Each candidate who has interest in semantics will be admitted to the department.
324 | kl93	1		Each author whose contribution is written in any language other than English will provide a summary in English.
325 | kl93	1		I'm sure we even got these tickets!
326 | kl93	1		I'm even sure we got these tickets!
327 | kl93	1		It's not because I have any sympathy for urban guerillas that I helped him.
328 | kl93	1		It isn't because Sue said anything bad about me that I'm angry.
329 | b_82	0	*	That he was hungry, John whined.
330 | b_82	0	*	1 gave Mary after the party a book.
331 | b_82	0	*	Because she's so pleasant, as for Mary I really like her.
332 | b_82	1		Though he may hate those that criticize Carter, it doesn't matter.
333 | b_82	1		With no job would John be happy.
334 | b_73	1		I have much of the manuscript left to type.
335 | b_73	1		He's a more reliable man.
336 | b_73	0	*	Any trouble is what I don't want.
337 | b_73	1		They may grow as high as bamboo.
338 | b_73	1		Some of them made as many errors as 20.
339 | c_13	0	*	Sally kissed himself.
340 | c_13	1		Drew believes I think Rosie loves magazine ads.
341 | c_13	1		Dave, Dan, Erin, Jaime, and Alina left.
342 | c_13	1		Hopefully, we'll make it through the winter without snow.
343 | c_13	0	*	Blue leather shows herself that Betsy is pretty.
344 | c_13	1		Gwen hit the baseball.
345 | c_13	0	*	I hit that you knew the answer.
346 | c_13	1		I've lost my wallet or I've lost my mind.
347 | c_13	1		It was a brand new car that he bought.
348 | c_13	1		He likes cookies and he hates crumb cake.
349 | c_13	1		They chased the man with the car.
350 | c_13	1		I didn't have a red cent.
351 | c_13	0	*	the book of poems and from Blackwell takes a very long time to read.
352 | c_13	1		the one with a red cover takes a very long time to read.
353 | c_13	1		John has a fear of dogs.
354 | c_13	0	*	The building's the roof is leaking.
355 | c_13	0	*	the panther's the coat is dark black.
356 | c_13	1		Colin asked if they could get a mortgage.
357 | c_13	1		The man I saw get into the cab robbed the bank.
358 | c_13	1		I know you eat asparagus.
359 | c_13	1		John's drum will always bother me.
360 | c_13	1		An evil thought struck Dave.
361 | c_13	1		In the classroom John put the book on the table.
362 | c_13	1		Phillip gave the medal to the soldier.
363 | c_13	0	*	Heidi thinks that Andy to eat salmon flavored candy bars.
364 | c_13	1		Heidi thinks that Andy should eat salmon flavored candy bars.
365 | c_13	1		He danced.
366 | c_13	0	*	Did Calvin his homework?
367 | c_13	1		Sylvia was slapping Jeff upside the head in martial arts class.
368 | c_13	0	*	If I am a rich man, I'd buy a diamond ring.
369 | c_13	1		If he were a rich man, he'd buy a diamond ring.
370 | c_13	1		John is likely to leave.
371 | c_13	1		The manager laughed.
372 | c_13	0	*	Was sunk.
373 | c_13	1		Who did you think kissed the gorilla?
374 | c_13	1		Have you seen my model airplane collection?
375 | c_13	1		It seems that Lucy was mugged.
376 | c_13	1		I ate a salad that was filled with lima beans.
377 | c_13	1		There were four men arriving at the station when I pulled up.
378 | c_13	1		Jean is reluctant to dance.
379 | c_13	1		I want her to dance.
380 | c_13	1		Jean persuaded Robert.
381 | c_13	1		Jean wants Bill to do the Macarena.
382 | c_13	1		The children admire their mother.
383 | c_13	1		Who has Peter talked with?
384 | c_13	1		Heidi likes her violin.
385 | c_13	0	*	John thinks that Mary loves himself.
386 | c_13	1		Which pictures of himself does John like?
387 | d_98	1		Any owl hunts mice.
388 | d_98	0	*	Any man didn't eat dinner.
389 | d_98	1		A pilot could be flying this plane.
390 | d_98	1		Everybody who attended last week's huge rally, whoever they were, signed the petition.
391 | d_98	1		Anybody who attended last week's huge rally signed the petition.
392 | d_98	1		Any tiger has orange fur, marked with black stripes.
393 | d_98	1		Any albino tiger has orange fur, marked with black stripes.
394 | d_98	1		You must pick any flower you see.
395 | d_98	1		Any pilot on duty today could be flying this plane.
396 | d_98	0	*	You may pick every flower, but leave a few for Mary.
397 | g_81	1		The Dodgers beat the Red Sox and were beaten by the Giants.
398 | g_81	1		In which car was the man seen?
399 | g_81	1		The man who Mary loves and Sally hates computed my tax.
400 | g_81	0	*	The kennel which Mary made and Fido sleeps has been stolen.
401 | g_81	1		Tom said he would and Bill actually did eat a raw eggplant.
402 | m_02	1		The wealthy young man bought that piano for his secret fiancée.
403 | m_02	1		The dog stole the turkey.
404 | m_02	1		a tall building.
405 | m_02	1		This building is tall.
406 | m_02	1		I like the book which you gave me.
407 | m_02	1		Captain Wentworth wrote a letter to Anne Elliott.
408 | m_02	1		She asked was Alison coming to the party.
409 | m_02	1		They realised that never had Sir Thomas been so offended.
410 | m_02	1		Fanny regretted having to talk to Aunt Norris.
411 | m_02	1		Knowing the country well, he took a short cut.
412 | m_02	1		He left the train with somebody else's wallet in his pocket.
413 | m_02	1		Magnus went to Ireland.
414 | m_02	1		Who did John send the book?
415 | m_02	1		The idea dismayed the Prime Minister that the Dome was dull.
416 | m_02	1		Came right in he did without so much as a knock.
417 | m_02	1		Harriet admired Mr Knightley.
418 | m_02	1		The book is boring.
419 | m_02	1		Ethel wishes to ask you some awkward questions.
420 | m_02	1		Emma made Harriet some food.
421 | m_02	1		The window was broken with a hammer.
422 | m_02	1		The fugitive lay motionless in order to avoid discovery.
423 | m_02	1		The guard marched the prisoners round the yard.
424 | m_02	1		Frank Churchill crossed the street.
425 | m_02	1		Emma and Harriet were attacked yesterday.
426 | m_02	1		Fiona might be here by 5 o'clock.
427 | sgww85	1		I am both expecting to get the job and of the opinion that it is a desirable one.
428 | sgww85	1		Pat was awarded the Golden Fleece Award and very upset about it.
429 | sgww85	0	*	Kim alienated cats and beating his dog.
430 | sgww85	1		They knew that pictures of each other would be on sale.
431 | sgww85	0	*	Which article did Terry file papers without reading?
432 | sgww85	1		I want to try and buy some whiskey.
433 | sgww85	0	*	She goes and buying some whiskey.
434 | sgww85	0	*	Kim and Terry is happy.
435 | sgww85	1		Kim gave a dollar to Bobbie and a dime to Jean.
436 | sks13	1		This girl in the red coat will put a picture of Bill in the mailbox and on your desk before tomorrow.
437 | sks13	0	*	Put a picture of Bill on your desk before tomorrow, this girl in the red coat will put a picture of Bill on your desk before tomorrow.
438 | sks13	0	*	Picture of Bill, this girl in the red coat will put a picture of Bill on your desk before tomorrow.
439 | sks13	1		Ann bought a first edition of Richard III for $1000.
440 | sks13	1		John became deadly afraid of flying.
441 | sks13	1		What John promised is to be gentle.
442 | sks13	1		Henri wants to buy which books about cooking?
443 | sks13	1		I will fix the computer for Carol.
444 | sks13	1		Smith loaned a valuable collection of manuscripts to the library.
445 | sks13	1		The paper is cut by Mary.
446 | sks13	1		It was from six to nine or it took place from six to nine.
447 | sks13	1		Suddenly, there arrived two inspectors from the INS.
448 | sks13	1		The soup cooks, thickens.
449 | sks13	1		I gave money to the charity.
450 | sks13	1		John asked.
451 | sks13	1		I saw even the student.
452 | sks13	0	*	John whispered Mary left.
453 | sks13	0	*	Mary wonders that Bill will come.
454 | sks13	0	*	Sophie will theater.
455 | sks13	1		John finished the cake and drank the lemonade.
456 | sks13	0	*	Herself likes Mary's mother.
457 | sks13	1		Each of the boys fought with some of the other boys.
458 | sks13	0	*	John's mother likes himself.
459 | sks13	0	*	Mary revealed himself to John.
460 | sks13	1		Mary believes that Bill saw himself.
461 | sks13	0	*	John heard that they criticized themselves.
462 | sks13	1		Mary thinks that she is smart.
463 | sks13	1		Henry found that Bill is sad.
464 | sks13	1		John considers himself proud of Mary.
465 | sks13	0	*	You should sit before there.
466 | sks13	1		There is a nurse available.
467 | sks13	0	*	Everyone hopes everyone to sleep.
468 | sks13	1		Everyone hopes that he will sleep.
469 | sks13	0	*	Only Churchill remembered Churchill giving the Blood, Sweat and Tears speech.
470 | sks13	1		John promise Mary to shave himself.
471 | sks13	1		John convinced Bill that Mary should sleep.
472 | sks13	0	*	I saw Bill while you did so Mary.
473 | sks13	0	*	What Mary did Bill was give a book.
474 | ad03	1		The bears sniffed
475 | ad03	1		to steal talismans from witches is dangerous
476 | ad03	0	*	Anson demonized
477 | ad03	1		You kicked yourselves
478 | ad03	1		The book that I said that I'd never read.
479 | ad03	0	*	Anson demonized up the Khyber
480 | ad03	1		It is not true that I might be going to the party.
481 | ad03	0	*	Kick you!
482 | ad03	1		Jason persuaded the doctor to treat Medea
483 | ad03	1		Who did you get an accurate description of?
484 | ad03	1		The Peter we all like was at the party
485 | ad03	1		The government's imposition of a fine.
486 | ad03	1		Did Athena help us?
487 | ad03	1		You wanted to meet the man I saw.
488 | ad03	1		Anson demonized David every day
489 | ad03	1		There is a programme about Euripides on a Radio 4 tonight?
490 | ad03	1		Medea denied poisoning the phoenix.
491 | ad03	1		The elixir was hidden in a hole in the ground
492 | ad03	1		Extremely frantically, Anson danced at Trade
493 | ad03	0	*	What did you ask who Medea gave?
494 | ad03	1		It's Anson that I like
495 | ad03	0	*	Medea tried the nurse to poison her children.
496 | ad03	1		I felt that I know you.
497 | ad03	1		Will he can do it?
498 | ad03	0	*	I ate that she was happy
499 | ad03	1		Anson believed himself to be handsome.
500 | ad03	1		We arrived about six o'clock
501 | ad03	0	*	Gilgamesh is having fought the dragon.
502 | ad03	0	*	Anson thought that himself was happy.
503 | ad03	1		Fortunately Hermione passed biology
504 | ad03	1		I might be not going to the party but washing my hair
505 | ad03	0	*	I thought he liked myself
506 | ad03	1		The butler sent the poison to Dinah.
507 | ad03	1		Jason happens to appear to seem to be sick.
508 | ad03	0	*	What did you ask who saw?
509 | ad03	1		We linguists love to argue
510 | ad03	0	*	He can will go
511 | ad03	0	*	How fierce the battle?
512 | ad03	0	*	Which king did you ask which city invaded?
513 | ad03	1		It is some disgruntled old pigs in those ditches that humans love to eat.
514 | ad03	1		That banana is eating the monkey.
515 | ad03	1		Burn them!
516 | ad03	1		It stinks that Aphrodite is omnipotent.
517 | ad03	1		Agamemnon seems to be a maniac
518 | ad03	1		Will Anson come to the party?
519 | ad03	1		I wondered who Medea had poisoned.
520 | ad03	0	*	I inquired when could we leave.
521 | ad03	1		They kicked them
522 | ad03	0	*	The monkey is ate the banana
523 | ad03	1		I would like to could swim
524 | ad03	1		I kicked myself
525 | ad03	0	*	The bookcase ran
526 | ad03	1		I shaved myself.
527 | ad03	1		Anson became a muscle bound.
528 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | -r https://raw.githubusercontent.com/pytorch/text/master/requirements.txt
2 | git+https://github.com/pytorch/text.git
3 | git+https://github.com/pytorch/tnt.git@master
4 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | import os
 3 | import io
 4 | import re
 5 | from setuptools import setup, find_packages
 6 | 
 7 | 
 8 | def read(*names, **kwargs):
 9 |     with io.open(
10 |         os.path.join(os.path.dirname(__file__), *names),
11 |         encoding=kwargs.get("encoding", "utf8")
12 |     ) as fp:
13 |         return fp.read()
14 | 
15 | 
16 | def find_version(*file_paths):
17 |     version_file = read(*file_paths)
18 |     version_match = re.search(r"^__version__ = ['\"]([^'\"]*)['\"]",
19 |                               version_file, re.M)
20 |     if version_match:
21 |         return version_match.group(1)
22 |     raise RuntimeError("Unable to find version string.")
23 | 
24 | 
25 | VERSION = find_version('acceptability', '__init__.py')
26 | long_description = read('README.md')
27 | 
28 | requirements = [
29 |     'numpy',
30 |     'pillow >= 4.1.1',
31 |     'six',
32 |     'torch',
33 |     'torchtext'
34 | ]
35 | 
36 | setup_info = dict(
37 |     # Metadata
38 |     name='acceptability',
39 |     version=VERSION,
40 |     author='Amanpreet Singh and Alex Warstadt',
41 |     author_email='apsdehal@gmail.com',
42 |     url='https://github.com/nyu-mll/acceptability-judgments',
43 |     description='Models for Grammaticality Judgments data',
44 |     long_description=long_description,
45 |     license='BSD',
46 |     install_requires=requirements,
47 | 
48 |     # Package info
49 |     packages=find_packages(),
50 |     zip_safe=True,
51 | )
52 | 
53 | setup(**setup_info)
54 | 


--------------------------------------------------------------------------------