├── .editorconfig ├── .gitignore ├── README.md ├── acceptability ├── LICENSE.md ├── __init__.py ├── generate.py ├── generate_sweep.py ├── lm_evaluate.py ├── lm_run.py ├── models │ ├── __init__.py │ ├── cbow_classifier.py │ ├── elmo_classifier.py │ ├── generators │ │ ├── __init__.py │ │ ├── lm.py │ │ └── lstm_lm.py │ ├── linear_classifier.py │ └── lstm_classifiers.py ├── modules │ ├── __init__.py │ ├── dataset.py │ ├── early_stopping.py │ ├── lm_evaluator.py │ ├── lm_generator.py │ ├── lm_trainer.py │ ├── logger.py │ ├── meter.py │ └── trainer.py ├── run.py ├── scripts │ ├── kaggle │ │ ├── converter.py │ │ ├── converter_data.py │ │ └── converter_random.py │ └── permuting.py ├── test.py └── utils │ ├── __init__.py │ ├── checkpoint.py │ ├── flags.py │ ├── general.py │ ├── lm.py │ └── timer.py ├── acceptability_corpus ├── cola_public.zip ├── cola_public │ ├── README │ ├── raw │ │ ├── in_domain_dev.tsv │ │ ├── in_domain_train.tsv │ │ └── out_of_domain_dev.tsv │ └── tokenized │ │ ├── in_domain_dev.tsv │ │ ├── in_domain_train.tsv │ │ └── out_of_domain_dev.tsv ├── phenomena │ ├── inchoative.tsv │ ├── reflexive.tsv │ ├── singular_pl.tsv │ ├── svo.tsv │ └── wh_extraction.tsv ├── raw │ ├── all.tsv │ ├── in_domain.tsv │ ├── in_domain_dev.tsv │ ├── in_domain_train.tsv │ ├── mixed_dev.tsv │ ├── out_of_domain.tsv │ └── out_of_domain_dev.tsv └── tokenized │ ├── in_domain_dev.tsv │ ├── in_domain_train.tsv │ ├── mixed_dev.tsv │ └── out_of_domain_dev.tsv ├── requirements.txt └── setup.py /.editorconfig: -------------------------------------------------------------------------------- 1 | root = true 2 | 3 | [*.py] 4 | charset = utf-8 5 | trim_trailing_whitespace = true 6 | end_of_line = lf 7 | insert_final_newline = true 8 | indent_style = space 9 | indent_size = 4 10 | 11 | [*.md] 12 | trim_trailing_whitespace = false 13 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__ 2 | *.pyc 3 | *.egg* 4 | ./raw 5 | save 6 | .vscode 7 | .vector_cache 8 | .idea 9 | acceptability_corpus/raw/in_domain_test.tsv 10 | acceptability_corpus/source/ad03.tsv 11 | acceptability_corpus/source/bc01.tsv 12 | acceptability_corpus/source/ks08.tsv 13 | acceptability_corpus/source/l-93.tsv 14 | acceptability_corpus/source/r-67.tsv 15 | acceptability_corpus/tokenized/run/dev.tsv 16 | acceptability_corpus/tokenized/run/test.tsv 17 | acceptability_corpus/tokenized/run/train.tsv 18 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # CoLA Baselines 2 | 3 | Baselines accompanying paper [Neural Network Acceptability Judgments](https://www.nyu.edu/projects/bowman/neural_network_acceptability.pdf). Check the [CoLA website](https://nyu-mll.github.io/CoLA) to download The Corpus of Linguistic Acceptability (CoLA), for more information, and for a demo model. 4 | 5 | ## Dataset 6 | 7 | Training and validation sets for CoLA are available under [acceptability_corpus/raw](acceptability_corpus/raw) with a tokenized version available under [tokenized](acceptability_corpus/tokenized). Test data (unlabeled) is available here: [in domain](https://www.kaggle.com/c/cola-in-domain-open-evaluation) [out of domain](https://www.kaggle.com/c/cola-out-of-domain-open-evaluation). All models require tokenized data (we use the default NLTK tokenizer). 8 | 9 | ## Requirements 10 | 11 | - Python >= 3 12 | - PyTorch v0.3.0 13 | - TorchNet and TorchText 14 | - NLTK (Optional: For NLTK preprocessing) 15 | 16 | ## Running 17 | 18 | Install [Pytorch](https://pytorch.org/) v0.3.0. 19 | 20 | Then run: 21 | 22 | ``` 23 | git clone https://github.com/nyu-mll/acceptability-judgments.git 24 | cd acceptability-judgments 25 | pip3 install -r requirements.txt 26 | python setup.py develop 27 | ``` 28 | 29 | Run a simple training session by: 30 | 31 | `python acceptability/run.py -d acceptability_corpus/tokenized` 32 | 33 | This will use default classifier model and all of the default settings. 34 | 35 | ## Model 36 | 37 | Our general model structure looks like figure below. Follow paper for more in-depth details. 38 | 39 | ![Model](https://i.imgur.com/eI4tNvd.png) 40 | 41 | ## Complex Run 42 | 43 | Example of a command for running ELMo + Real/Fake on top of transferred encoder: 44 | 45 | The directory containing data (value of `-d`) must contain three files, `train.tsv`, `dev.tsv` and `test.tsv`. Download vocabulary file used by us in our experiments from this [link](https://drive.google.com/file/d/14HNMByzrUM2ZJBjOqCzelFz5yJMHskFb/view?usp=sharing). 46 | 47 | ``` 48 | python acceptability/run.py -m linear_classifier -d data --save_loc save --vocab_file ./vocab_100k.tsv --logs_dir ./logs -g -r -p 40 -se 2 -n 1000 --encoder_path ./elmo_best_real_fake/experiment_lstm_pooling_elmo_h_528_l_3_lr_0.0001_e_360_do_0.2.pth --encoding_size 528 --embedding_size 217 --embedding_path ./elmo_best_real_fake/experiment_lstm_pooling_elmo_h_528_l_3_lr_0.0001_e_360_do_0.2.emb -lr 0.00005 -nl 3 -hs 1134 -do 0.2 49 | ``` 50 | 51 | ## Pretrained Models and Testing 52 | 53 | Pretrained models are available at this [link](https://drive.google.com/drive/folders/1HoHjdkc68fh7MTUBKAGZETGH5jfjsXR8?usp=sharing) 54 | 55 | To do a test run over a dataset: 56 | 57 | - Create your data-folder in same format as acceptability_corpus with `train.tsv`, `dev.tsv` and `test.tsv`. 58 | - Download one of the pretrained encoders 59 | - Download vocabulary file from this [link](https://drive.google.com/file/d/14HNMByzrUM2ZJBjOqCzelFz5yJMHskFb/view?usp=sharing) 60 | - OPTIONAL: Train a classifier on CoLA using the command above 61 | - Run (with the `-g` flag to use GPU hardware) the following command, e.g. for the downloadable ELMo real/fake encoder (without CoLA training) it would be something like: 62 | `python acceptability/test.py -mf elmo.pth -vf vocab_100k.tsv -ef elmo.emb -d data/test.tsv -g` 63 | 64 | To save the model predictions for each of the sentences in `test.tsv`, you can append to the last command the additional flag `-o predictions.txt`, which will write them in the `predictions.txt` file. 65 | 66 | ## Cite 67 | 68 | If you use CoLA or the baselines in your research, please cite the accompanying paper using the following entry: 69 | 70 | ``` 71 | @article{warstadt2019neural, 72 | title={Neural network acceptability judgments}, 73 | author={Warstadt, Alex and Singh, Amanpreet and Bowman, Samuel R}, 74 | journal={Transactions of the Association for Computational Linguistics}, 75 | volume={7}, 76 | pages={625--641}, 77 | year={2019}, 78 | publisher={MIT Press} 79 | } 80 | ``` 81 | 82 | ## License 83 | 84 | Baseline code is available under MIT license. 85 | 86 | The text in this corpus is excerpted from the published works available on website, and copyright (where applicable) remains with the original authors or publishers. We expect that research use within the US is legal under fair use, but make no guarantee of this. 87 | -------------------------------------------------------------------------------- /acceptability/LICENSE.md: -------------------------------------------------------------------------------- 1 | Copyright 2018, New York University 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 4 | 5 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 6 | 7 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -------------------------------------------------------------------------------- /acceptability/__init__.py: -------------------------------------------------------------------------------- 1 | __all__ = ['utils', 'modules', 'models'] 2 | 3 | __version__ = '0.1.0' 4 | 5 | from . import utils 6 | from . import modules 7 | from . import models 8 | -------------------------------------------------------------------------------- /acceptability/generate.py: -------------------------------------------------------------------------------- 1 | from acceptability.modules import LMGenerator 2 | 3 | if __name__ == '__main__': 4 | trainer = LMGenerator() 5 | trainer.load() 6 | trainer.generate() 7 | -------------------------------------------------------------------------------- /acceptability/generate_sweep.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | import hyperopt.pyll.stochastic as stoc 4 | 5 | from copy import deepcopy 6 | from hyperopt import hp 7 | 8 | parser = argparse.ArgumentParser() 9 | 10 | parser.add_argument('-f', '--folder', default="/home/$USER/acceptability-judgments", 11 | help="Path for acceptability judgments repository") 12 | parser.add_argument('-sf', '--slurm_folder', default="./slurm_jobs", 13 | help="Folder in which we should generate sbatch files") 14 | parser.add_argument('-n', '--num_sweeps', type=int, default=1, 15 | help="Number of sweeps to generate") 16 | 17 | parser.add_argument('-j', '--job_name', default=None, 18 | help="Job name, sweep sample number will be appended to this") 19 | parser.add_argument('-t', '--time', default="47:00:00", 20 | help="Time limit of sweep") 21 | parser.add_argument('-me', '--mem', default="32GB", 22 | help="Memory for sweep") 23 | parser.add_argument('-g', '--gres', default="gpu:1", 24 | help="GPU type to be specified in sweep") 25 | parser.add_argument('-c', '--cpus-per-task', default="2", 26 | help="CPUs per task to be specified in sweeps") 27 | parser.add_argument('-p', '--patience', type=int, default=4, help="Early stopping patience") 28 | parser.add_argument('-l', '--logs_dir', default='./logs', 29 | help="Directory for storing logs") 30 | parser.add_argument('-s', '--save_loc', default='./save', 31 | help="Directory for saving models") 32 | parser.add_argument('-e', '--epochs', type=int, default=None, 33 | help="Epochs") 34 | parser.add_argument('-d', '--data', default='./data', 35 | help="Folder containing data tsvs") 36 | parser.add_argument('-eu', '--email', default=None, 37 | help="Email to be mailed for slurm notification") 38 | parser.add_argument('-pr', '--pre_command', default=None, 39 | help="Shell command to run before running main command") 40 | parser.add_argument('-ps', '--post_command', default=None, 41 | help="Shell command to run after running main command") 42 | parser.add_argument('-se', '--stages_per_epoch', type=int, default=None, 43 | help="Number of evaluation steps, if not passed default will be used") 44 | 45 | 46 | subparsers = parser.add_subparsers() 47 | lm_parser = subparsers.add_parser('lm', help="Generate sweeps for lm") 48 | 49 | lm_parser.add_argument('-v', '--vocab', help="Vocab file location") 50 | lm_parser.add_argument('-m', '--model', default="lstm", 51 | help="Model type to be used for lm") 52 | lm_parser.set_defaults(sweep_type="lm") 53 | 54 | classifier_parser = subparsers.add_parser('classifier', 55 | help="Generate sweeps for classifier") 56 | classifier_parser.set_defaults(sweep_type="classifier") 57 | 58 | classifier_parser.add_argument('-v', '--vocab', type=str, 59 | help="Vocab file location") 60 | classifier_parser.add_argument('-o', "--output_dir", type=str, default=None, 61 | help="Location of output directory") 62 | classifier_parser.add_argument('--encoder_path', type=str, default=None, 63 | help="Location of encoder checkpoint") 64 | classifier_parser.add_argument('--encoding_type', type=str, default=None, 65 | help="Class of encoder") 66 | classifier_parser.add_argument('--embedding_path', type=str, default=None, 67 | help="Path of embedding to load") 68 | classifier_parser.add_argument('--encoding_size', type=int, default=None, 69 | help="Size of encoding, only to be used if you are loading a pretrained encoder") 70 | classifier_parser.add_argument('--encoder_num_layers', type=int, default=None, 71 | help="Num layers of encoder, only to be used if you are loading a pretrained encoder") 72 | classifier_parser.add_argument('--embedding_size', type=int, default=None, 73 | help="Embedding size, only to used if you are loading a pretrained encoder") 74 | classifier_parser.add_argument('--embedding', type=str, default=None, 75 | help="Embedding, use to enter name of GloVe embedding") 76 | classifier_parser.add_argument('--max_pool', action="store_true", default=False, 77 | help="Use max-pooling for CBOW") 78 | classifier_parser.add_argument('--train_embeddings', action="store_true", default=False, 79 | help="Train word embeddings") 80 | classifier_parser.add_argument('--glove', action="store_true", default=False, 81 | help="Use glove") 82 | classifier_parser.add_argument('--lm_path', type=str, default=None, 83 | help="LM path, to be used with elmo classifier") 84 | classifier_parser.add_argument('-m', '--model', type=str, default=None, 85 | help="Model") 86 | classifier_parser.add_argument("--should_not_preprocess_data", action="store_true", default=False, 87 | help="Whether to preprocess data? Default: true (Will preprocess)") 88 | classifier_parser.add_argument("--imbalance", action="store_true", default=False, 89 | help="Is there class imbalance?") 90 | classifier_parser.add_argument("--should_not_lowercase", action="store_true", default=False, 91 | help="Should lowercase data? Default: true (Will lowercase)") 92 | classifier_parser.add_argument("--preprocess_tokenizer", default=None, type=str, 93 | help="Type of tokenizer to use (space|nltk)") 94 | 95 | hashbang_line = '#!/bin/bash' 96 | 97 | space = { 98 | 'lm': hp.choice('lm', [{ 99 | 'hidden_size': hp.uniform('hidden_size', 300, 1200), 100 | 'embedding_size': hp.uniform('embedding_size', 200, 600), 101 | 'learning_rate': hp.uniform('learning_rate', -4, -2.5), 102 | 'num_layers': hp.uniform('num_layers', 1, 5), 103 | 'dropout': hp.choice('dropout', [0.2, 0.5]) 104 | }]), 105 | 'classifier': hp.choice('classifier', [{ 106 | 'hidden_size': hp.uniform('hidden_size', 20, 1200), 107 | 'embedding_size': hp.uniform('embedding_size', 200, 600), 108 | 'learning_rate': hp.uniform('learning_rate', -4, -5), 109 | 'num_layers': hp.uniform('num_layers', 1, 2), 110 | 'encoding_size': hp.uniform('encoding_size', 300, 1200), 111 | 'encoder_num_layers': hp.uniform('encoder_num_layers', 1, 5), 112 | 'dropout': hp.choice('dropout', [0.2, 0.5]) 113 | }]) 114 | } 115 | 116 | def generate_lm_sweeps(args): 117 | all_lines, post_shell = get_fixed_lines(args) 118 | 119 | run_line = get_fixed_lm_run_params(args) 120 | 121 | run_line = 'python -u acceptability/lm_run.py ' + run_line 122 | 123 | current_space = space[args.sweep_type] 124 | 125 | for index in range(args.num_sweeps): 126 | lines = deepcopy(all_lines) 127 | params_line, output_name = get_sampled_params_for_lm(current_space, index) 128 | 129 | lines[4] += str(index) 130 | 131 | if args.email: 132 | lines[9] = lines[9] + '-' + str(index) + '-%j_' + output_name 133 | else: 134 | lines[7] = lines[7] + '-' + str(index) + '-%j_' + output_name 135 | 136 | params_line = run_line + ' ' + params_line 137 | 138 | lines.append(params_line) 139 | 140 | lines = lines + post_shell 141 | slurm_file = '\n'.join(lines) 142 | 143 | write_slurm_file(slurm_file, args.slurm_folder, args.sweep_type, args.model, index) 144 | 145 | 146 | def generate_classifier_sweeps(args): 147 | all_lines, post_shell = get_fixed_lines(args) 148 | run_line = get_fixed_classifier_run_params(args) 149 | 150 | run_line = 'python -u acceptability/run.py ' + run_line 151 | 152 | current_space = space[args.sweep_type] 153 | 154 | has_pretrained_encoder = args.encoder_path is not None 155 | for index in range(args.num_sweeps): 156 | lines = deepcopy(all_lines) 157 | 158 | params_line, output_name = get_sampled_params_for_classifier(args, current_space, 159 | index, has_pretrained_encoder) 160 | 161 | lines[4] += str(index) 162 | 163 | if args.email: 164 | lines[9] = lines[9] + '-' + str(index) + '-%j_' + output_name 165 | else: 166 | lines[7] = lines[7] + '-' + str(index) + '-%j_' + output_name 167 | 168 | params_line = run_line + ' ' + params_line 169 | 170 | lines.append(params_line) 171 | 172 | lines = lines + post_shell 173 | slurm_file = '\n'.join(lines) 174 | 175 | write_slurm_file(slurm_file, args.slurm_folder, args.sweep_type, args.model, index) 176 | 177 | 178 | def get_fixed_lines(args): 179 | all_lines = [hashbang_line, ''] 180 | 181 | sbatch_lines = generate_sbatch_params(args) 182 | module_lines = [get_module_load_lines()] 183 | cd_lines = ['cdir=' + args.folder, 'cd $cdir'] 184 | pre_shell = get_shell_line(args.pre_command) 185 | post_shell = get_shell_line(args.post_command) 186 | xdg_line = [get_xdg_line()] 187 | 188 | all_lines = all_lines + sbatch_lines + module_lines + cd_lines + pre_shell + xdg_line 189 | 190 | return all_lines, post_shell 191 | 192 | def write_slurm_file(data, folder, typ, model_name, index): 193 | if not os.path.exists(folder): 194 | os.makedirs(folder) 195 | file_name = 'run_acceptabilty_%s_%s_%d.sbatch' % (typ, model_name, index) 196 | with open(os.path.join(folder, file_name), 'w') as f: 197 | f.write(data) 198 | 199 | def generate_sbatch_params(args): 200 | params = { 201 | 'job-name': 'a' + args.sweep_type if args.job_name is None else args.job_name, 202 | 'output': 'slurm', 203 | 'nodes': 1, 204 | 'cpus-per-task': args.cpus_per_task, 205 | 'mem': args.mem, 206 | 'time': args.time, 207 | 'gres': args.gres, 208 | } 209 | 210 | if args.email: 211 | params['mail-type'] = 'ALL' 212 | params['mail-user'] = args.email 213 | 214 | lines = [] 215 | sbatch_prepend = '#SBATCH ' 216 | for key in sorted(list(params.keys())): 217 | lines.append('%s --%s=%s' % (sbatch_prepend, key, str(params[key]))) 218 | 219 | return lines 220 | 221 | def get_module_load_lines(): 222 | return """module purge 223 | module load cuda/8.0.44 224 | module load cudnn/8.0v5.1 225 | """ 226 | 227 | def get_fixed_lm_run_params(args): 228 | params = ['-d', args.data, '-v', args.vocab, '--save_loc', args.save_loc, 229 | '--logs_dir', args.logs_dir, '-g', '-r', '-p', str(args.patience)] 230 | 231 | if args.stages_per_epoch is not None: 232 | params.append('-se') 233 | params.append(str(args.stages_per_epoch)) 234 | 235 | if args.epochs is not None: 236 | params.append('-e') 237 | params.append(str(args.epochs)) 238 | 239 | return ' '.join(params) 240 | 241 | def get_fixed_classifier_run_params(args): 242 | params = ['-m', args.model, '-d', args.data, '--save_loc', args.save_loc, '--vocab_file', args.vocab, 243 | '--logs_dir', args.logs_dir, '-g', '-r', '-p', str(args.patience)] 244 | 245 | if args.output_dir is not None: 246 | params.append('-o') 247 | params.append(args.output_dir) 248 | 249 | if args.max_pool: 250 | params.append('--max_pool') 251 | 252 | if args.should_not_preprocess_data: 253 | params.append('--should_not_preprocess_data') 254 | 255 | if args.should_not_lowercase: 256 | params.append('--should_not_lowercase') 257 | 258 | if args.imbalance: 259 | params.append('--imbalance') 260 | 261 | if args.train_embeddings: 262 | params.append('--train_embeddings') 263 | 264 | if args.glove: 265 | params.append('--glove') 266 | 267 | if args.preprocess_tokenizer is not None: 268 | params.append('--preprocess_tokenizer') 269 | params.append(args.preprocess_tokenizer) 270 | 271 | if args.stages_per_epoch is not None: 272 | params.append('-se') 273 | params.append(str(args.stages_per_epoch)) 274 | 275 | if args.epochs is not None: 276 | params.append('-n') 277 | params.append(str(args.epochs)) 278 | 279 | if args.encoder_path is not None: 280 | params.append('--encoder_path') 281 | params.append(str(args.encoder_path)) 282 | 283 | if args.encoder_num_layers is not None: 284 | params.append('--encoder_num_layers') 285 | params.append(str(args.encoder_num_layers)) 286 | 287 | if args.encoding_size is not None: 288 | params.append('--encoding_size') 289 | params.append(str(args.encoding_size)) 290 | 291 | if args.embedding_size is not None: 292 | params.append('--embedding_size') 293 | params.append(str(args.embedding_size)) 294 | 295 | if args.encoding_type is not None: 296 | params.append('--encoding_type') 297 | params.append(str(args.encoding_type)) 298 | 299 | if args.embedding_path is not None: 300 | params.append('--embedding_path') 301 | params.append(str(args.embedding_path)) 302 | 303 | if args.embedding is not None: 304 | params.append('--embedding') 305 | params.append(str(args.embedding)) 306 | 307 | if args.lm_path is not None: 308 | params.append('--lm_path') 309 | params.append(str(args.lm_path)) 310 | 311 | # if args.number_experiment is not None: 312 | # params.append('--experiment_name') 313 | # params.append(str(args.job_name)) 314 | 315 | return ' '.join(params) 316 | 317 | def get_sampled_params_for_classifier(args, space, index=1, has_pretrained_encoder=False): 318 | sample = stoc.sample(space) 319 | sample['learning_rate'] = 10 ** (sample['learning_rate']) 320 | sample['hidden_size'] = int(sample['hidden_size']) 321 | sample['embedding_size'] = int(sample['embedding_size']) 322 | sample['num_layers'] = int(sample['num_layers']) 323 | sample['encoding_size'] = int(sample['encoding_size']) 324 | sample['encoder_num_layers'] = int(sample['encoder_num_layers']) 325 | 326 | 327 | output = 'lr_%.5f_nl_%d_hs_%d_do_%.1f' % (sample['learning_rate'], 328 | sample['num_layers'], sample['hidden_size'], sample['dropout']) 329 | 330 | params = '-lr %.5f -nl %d -hs %d -do %.1f' % (sample['learning_rate'], 331 | sample['num_layers'], sample['hidden_size'], sample['dropout']) 332 | 333 | embedding_size = sample["embedding_size"] if args.embedding_size is None else args.embedding_size 334 | 335 | if has_pretrained_encoder: 336 | sample.pop('encoder_num_layers') 337 | sample.pop('encoding_size') 338 | sample.pop('embedding_size') 339 | output += '_ed_%d_es_%d_enl_%d.out' % (embedding_size, 340 | args.encoder_num_layers, args.encoding_size) 341 | else: 342 | output += '_ed_%d_es_%d_enl_%d.out' % (embedding_size, sample['encoding_size'], 343 | sample['encoder_num_layers']) 344 | params += ' -es %d --encoding_size %d --encoder_num_layers %d' % (embedding_size, 345 | sample['encoding_size'], 346 | sample['encoder_num_layers']) 347 | 348 | print("Sweep ", index, sample) 349 | 350 | return params, output 351 | 352 | def get_sampled_params_for_lm(space, index=1): 353 | sample = stoc.sample(space) 354 | sample['learning_rate'] = 10 ** (sample['learning_rate']) 355 | sample['embedding_size'] = int(sample['embedding_size']) 356 | sample['hidden_size'] = int(sample['hidden_size']) 357 | sample['num_layers'] = int(sample['num_layers']) 358 | 359 | print("Sweep ", index, sample) 360 | 361 | output = 'lr_%.5f_do_%.1f_nl_%d_hs_%d_es_%d.out' % (sample['learning_rate'], 362 | sample['dropout'], sample['num_layers'], sample['hidden_size'], 363 | sample['embedding_size']) 364 | 365 | params = '-lr %.5f -do %.1f -nl %d -hs %d -es %d' % (sample['learning_rate'], 366 | sample['dropout'], sample['num_layers'], sample['hidden_size'], 367 | sample['embedding_size']) 368 | 369 | return params, output 370 | 371 | def get_xdg_line(): 372 | return """cat<