├── README.md ├── addition_generator.py ├── data.py ├── date-normalization.ipynb ├── date_generator.py ├── learning-to-execute.ipynb ├── model.py ├── program_generator.py ├── train.py └── trained_model ├── checkpoint ├── history.json ├── model_(2, 3).ckpt └── model_(2, 3).ckpt.meta /README.md: -------------------------------------------------------------------------------- 1 | # Learning to execute Python in Tensorflow 2 | 3 | Reimplementation of the paper [Learning to execute](https://arxiv.org/abs/1410.4615) in Tensorflow (0.7.0 and 0.8.0). 4 | 5 | Original implementation in Torch: 6 | https://github.com/wojciechz/learning_to_execute 7 | 8 | A Seq2Seq model is trained to execute simple Python programs. 9 | 10 | Example: 11 | ``` 12 | Input: 13 | g=6 14 | a=-77 15 | if a>2: 16 | g=a-4 17 | print(a+g) 18 | ----------- 19 | Target: -71 20 | Model prediction: -71 21 | ``` 22 | 23 | The program generation code is simplified compared to the original paper but can easily be extended. 24 | 25 | ## Date normalization 26 | 27 | date-normalization.ipynb also contains a simple date format normalization example using the same model code. 28 | 29 | ## PyCon Sweden 30 | This work was presented at PyCon Sweden 2016, Stockholm. 31 | [Presentation](https://docs.google.com/presentation/d/14hkW1uOC7TUk2iPknvDUHN_vZKHkQwyd5NwLLavxErs/edit?usp=sharing) 32 | 33 | -------------------------------------------------------------------------------- /addition_generator.py: -------------------------------------------------------------------------------- 1 | import random 2 | import string 3 | 4 | from data import encode_sequences, dense_to_one_hot 5 | 6 | GO_SYMBOL = 'G' 7 | PAD_SYMBOL = '_' 8 | INPUT_LETTERS = string.digits + '+' 9 | SYMBOLS = [GO_SYMBOL, PAD_SYMBOL] + list(INPUT_LETTERS) 10 | SYMBOL_TO_IDX = dict((l, i) for i, l in enumerate(SYMBOLS)) 11 | 12 | MAX_NUM_LEN = 5 13 | INPUT_SEQ_LEN = MAX_NUM_LEN * 2 + 3 14 | OUTPUT_SEQ_LEN = MAX_NUM_LEN + 2 15 | 16 | 17 | class AdditionGenerator(): 18 | def __init__(self, batch_size, number_len=2): 19 | self.number_len = number_len 20 | self.batch_size = batch_size 21 | 22 | def random_digit(self): 23 | return random.randint(0, 10 ** random.randint(1, self.number_len)) 24 | 25 | def increase_difficulty(self): 26 | if self.number_len < MAX_NUM_LEN: 27 | self.number_len += 1 28 | 29 | def has_max_difficulty(self): 30 | return self.number_len == MAX_NUM_LEN 31 | 32 | def difficulty(self): 33 | return self.number_len 34 | 35 | def next_batch(self, validation=False): 36 | ints_batch = [(self.random_digit(), 37 | self.random_digit()) for _ in range(self.batch_size)] 38 | int_sum_batch = [sum(ints) for ints in ints_batch] 39 | 40 | addition_strings = ["{0}+{1}".format(*ints) for ints in ints_batch] 41 | sum_strings = [str(s) for s in int_sum_batch] 42 | 43 | input_sequences = encode_sequences(addition_strings, 44 | symbol_to_idx=SYMBOL_TO_IDX, 45 | sequence_len=INPUT_SEQ_LEN, 46 | pad_symbol=PAD_SYMBOL, 47 | pad_beginning=True, 48 | reverse=False) 49 | input_sequences = dense_to_one_hot(input_sequences, 50 | num_classes=len(SYMBOL_TO_IDX)) 51 | 52 | target_sequences = encode_sequences(sum_strings, 53 | symbol_to_idx=SYMBOL_TO_IDX, 54 | sequence_len=OUTPUT_SEQ_LEN, 55 | pad_beginning=False, 56 | pad_symbol=PAD_SYMBOL) 57 | target_sequences = dense_to_one_hot(target_sequences, 58 | num_classes=len(SYMBOL_TO_IDX)) 59 | 60 | return input_sequences, target_sequences 61 | -------------------------------------------------------------------------------- /data.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def encode_sequences(letter_sequences, symbol_to_idx, sequence_len, pad_symbol=None, go_symbol=None, 5 | pad_beginning=True, reverse=False, ): 6 | """ 7 | Given a set of symbols and their index/label encoded the given 8 | list of string sequences as numeric sequences. 9 | """ 10 | 11 | pad_idx = symbol_to_idx[pad_symbol] 12 | 13 | if go_symbol is None: 14 | go_idx = None 15 | else: 16 | go_idx = symbol_to_idx[go_symbol] 17 | 18 | assert sequence_len >= len(max(letter_sequences, key=len)) + 0 if go_idx is None else 1 19 | 20 | encoded_sequences = np.full((len(letter_sequences), sequence_len), 21 | fill_value=pad_idx, 22 | dtype=np.int32) 23 | 24 | for i, sequence in enumerate(letter_sequences): 25 | 26 | idxs = [symbol_to_idx[symbol] for symbol in sequence] 27 | 28 | if reverse: 29 | idxs = idxs[::-1] 30 | 31 | # Insert the idx of the GO symbol to the end of the sequence. 32 | if go_idx is not None: 33 | idxs.append(go_idx) 34 | 35 | if pad_beginning: 36 | encoded_sequences[i, -len(idxs):] = idxs 37 | else: 38 | encoded_sequences[i, :len(idxs)] = idxs 39 | 40 | return encoded_sequences 41 | 42 | 43 | def decode_output_sequences(sequences, symbols): 44 | """ 45 | Args: 46 | sequences: ndarray 47 | Shape: (num_seq, time_steps, output_size) 48 | symbols: [str] 49 | 50 | Returns: 51 | decoded_sequences: [str] 52 | """ 53 | 54 | decoded_sequences = [] 55 | for sequence in np.argmax(sequences, axis=2): 56 | decoded_sequences.append(''.join(symbols[idx] for idx in sequence)) 57 | return decoded_sequences 58 | 59 | 60 | def dense_to_one_hot(labels_dense, num_classes): 61 | """ 62 | Convert class labels from scalars to one-hot vectors. 63 | 64 | Args: 65 | labels_dense: array, 1D or 2D, int32 66 | Shape: (num_samples) or (num_sequences, sequence_len) 67 | num_classes: int 68 | 69 | Returns: 70 | labels_one_hot: array, 2D or 3D, float32 71 | Shape: (num_samples, num_classes) or 72 | (num_sequences, sequence_len, num_classes) 73 | """ 74 | 75 | assert labels_dense.ndim == 1 or labels_dense.ndim == 2 76 | assert labels_dense.dtype == np.int32 77 | 78 | if labels_dense.ndim == 1: 79 | num_sequences = 0 80 | sequence_len = labels_dense.shape 81 | else: 82 | num_sequences, sequence_len = labels_dense.shape 83 | 84 | labels_dense = labels_dense.reshape(-1) 85 | num_labels = labels_dense.shape[0] 86 | index_offset = np.arange(num_labels) * num_classes 87 | labels_one_hot = np.zeros((num_labels, num_classes), dtype=np.float32) 88 | labels_one_hot.flat[index_offset + labels_dense.ravel()] = 1 89 | 90 | if num_sequences > 0: 91 | labels_one_hot = labels_one_hot.reshape((num_sequences, sequence_len, num_classes)) 92 | 93 | return labels_one_hot 94 | -------------------------------------------------------------------------------- /date-normalization.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "collapsed": false 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "from pprint import pprint\n", 12 | "from ipywidgets import interact\n", 13 | "import tensorflow as tf\n", 14 | "session = tf.InteractiveSession()" 15 | ] 16 | }, 17 | { 18 | "cell_type": "code", 19 | "execution_count": null, 20 | "metadata": { 21 | "collapsed": false 22 | }, 23 | "outputs": [], 24 | "source": [ 25 | "from data import decode_output_sequences\n", 26 | "from model import Seq2SeqModel" 27 | ] 28 | }, 29 | { 30 | "cell_type": "markdown", 31 | "metadata": {}, 32 | "source": [ 33 | "# Date normalization model\n", 34 | "Trains a model to normalize date formats" 35 | ] 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": null, 40 | "metadata": { 41 | "collapsed": false, 42 | "scrolled": false 43 | }, 44 | "outputs": [], 45 | "source": [ 46 | "from date_generator import DateGenerator, SYMBOLS, SYMBOL_TO_IDX, INPUT_SEQ_LEN, OUTPUT_SEQ_LEN\n", 47 | "\n", 48 | "session.close()\n", 49 | "tf.reset_default_graph()\n", 50 | "session = tf.InteractiveSession()\n", 51 | "\n", 52 | "hidden_units = 128\n", 53 | "batch_size = 32\n", 54 | "num_symbols = len(SYMBOL_TO_IDX)\n", 55 | "\n", 56 | "model = Seq2SeqModel(session=session,\n", 57 | " hidden_units = hidden_units, \n", 58 | " input_sequence_len = INPUT_SEQ_LEN,\n", 59 | " output_sequence_len = OUTPUT_SEQ_LEN,\n", 60 | " num_input_symbols = num_symbols,\n", 61 | " num_output_symbols = num_symbols,\n", 62 | " batch_size = batch_size,\n", 63 | " symbols=SYMBOLS)\n", 64 | "\n", 65 | "model.init_variables()\n", 66 | "\n", 67 | "\n", 68 | "print(\"Finished building model\")" 69 | ] 70 | }, 71 | { 72 | "cell_type": "markdown", 73 | "metadata": {}, 74 | "source": [ 75 | "Generate some data to look at." 76 | ] 77 | }, 78 | { 79 | "cell_type": "code", 80 | "execution_count": null, 81 | "metadata": { 82 | "collapsed": false 83 | }, 84 | "outputs": [], 85 | "source": [ 86 | "date_generator = DateGenerator(batch_size=batch_size, random_format=False)\n", 87 | "x, y = date_generator.next_batch()\n", 88 | "\n", 89 | "input_strings = decode_output_sequences(x, symbols=SYMBOLS)\n", 90 | "target_strings = decode_output_sequences(y, symbols=SYMBOLS)\n", 91 | "\n", 92 | "pprint([(\"Input\", \"Target\")] + \n", 93 | " list(zip(input_strings, target_strings)))" 94 | ] 95 | }, 96 | { 97 | "cell_type": "code", 98 | "execution_count": null, 99 | "metadata": { 100 | "collapsed": false, 101 | "scrolled": false 102 | }, 103 | "outputs": [], 104 | "source": [ 105 | "model.fit(date_generator, num_epochs=20, batches_per_epoch=64)\n", 106 | "print(\"Finished training\")" 107 | ] 108 | }, 109 | { 110 | "cell_type": "code", 111 | "execution_count": null, 112 | "metadata": { 113 | "collapsed": false, 114 | "scrolled": false 115 | }, 116 | "outputs": [], 117 | "source": [ 118 | "from pprint import pprint\n", 119 | "\n", 120 | "x, y = date_generator.next_batch()\n", 121 | "input_strings = decode_output_sequences(x, symbols=SYMBOLS)\n", 122 | "target_strings = decode_output_sequences(y, symbols=SYMBOLS)\n", 123 | "\n", 124 | "model_output = model.predict(x)\n", 125 | "pred_strings = decode_output_sequences(model_output, symbols=SYMBOLS)\n", 126 | "\n", 127 | "pprint([(\"Input\", \"Target\", \"Output\")] + \n", 128 | " list(zip(input_strings, target_strings, pred_strings)))" 129 | ] 130 | }, 131 | { 132 | "cell_type": "code", 133 | "execution_count": null, 134 | "metadata": { 135 | "collapsed": true 136 | }, 137 | "outputs": [], 138 | "source": [] 139 | } 140 | ], 141 | "metadata": { 142 | "kernelspec": { 143 | "display_name": "Python 3", 144 | "language": "python", 145 | "name": "python3" 146 | }, 147 | "language_info": { 148 | "codemirror_mode": { 149 | "name": "ipython", 150 | "version": 3 151 | }, 152 | "file_extension": ".py", 153 | "mimetype": "text/x-python", 154 | "name": "python", 155 | "nbconvert_exporter": "python", 156 | "pygments_lexer": "ipython3", 157 | "version": "3.5.1" 158 | } 159 | }, 160 | "nbformat": 4, 161 | "nbformat_minor": 0 162 | } 163 | -------------------------------------------------------------------------------- /date_generator.py: -------------------------------------------------------------------------------- 1 | import random 2 | import string 3 | from datetime import datetime 4 | from datetime import timedelta 5 | 6 | from data import encode_sequences, dense_to_one_hot 7 | 8 | START_DATE = datetime.strptime('1950-01-01', '%Y-%m-%d') 9 | END_DATE = datetime.strptime('2050-12-31', '%Y-%m-%d') 10 | 11 | FORMAT_TOKENS = ('%d', '%b', '%B', '%m', '%y', '%Y') 12 | INPUT_FORMATS = ['%Y %B, %d', 13 | '%B %d, %Y', 14 | '%b %d %y', 15 | '%Y-%m-%d', 16 | '%Y/%m/%d', 17 | '%y-%m-%d', 18 | '%d-%m-%y'] 19 | 20 | OUTPUT_FORMAT = '%Y-%m-%d' 21 | 22 | PAD_SYMBOL = '_' 23 | LETTERS = string.ascii_lowercase + string.digits + ',- .:/' 24 | SYMBOLS = [PAD_SYMBOL] + list(LETTERS) 25 | SYMBOL_TO_IDX = dict((l, i) for i, l in enumerate(SYMBOLS)) 26 | 27 | INPUT_SEQ_LEN = 20 28 | OUTPUT_SEQ_LEN = 12 + 1 29 | 30 | 31 | def random_datetime(start=START_DATE, end=END_DATE): 32 | """ 33 | This function will return a random datetime between two datetime 34 | objects. 35 | """ 36 | delta = end - start 37 | int_delta = (delta.days * 24 * 60 * 60) + delta.seconds 38 | random_second = random.randrange(int_delta) 39 | return start + timedelta(seconds=random_second) 40 | 41 | 42 | def format_date(date, random_format=False): 43 | if random_format: 44 | random_format_tokens = random.sample(FORMAT_TOKENS, random.randint(3, 5)) 45 | date_format = '-'.join(random_format_tokens) 46 | else: 47 | date_format = random.choice(INPUT_FORMATS) 48 | return date.strftime(date_format).lower() 49 | 50 | 51 | class DateGenerator(): 52 | def __init__(self, batch_size, output_format=OUTPUT_FORMAT, random_format=False): 53 | self.batch_size = batch_size 54 | self.output_format = output_format 55 | self.random_format = False 56 | 57 | def next_batch(self, validation=False): 58 | datetimes = [random_datetime() for _ in range(self.batch_size)] 59 | input_date_strings = [format_date(dt, random_format=self.random_format) for dt in datetimes] 60 | target_date_strings = [dt.strftime(self.output_format) for dt in datetimes] 61 | 62 | input_sequences = encode_sequences(input_date_strings, 63 | symbol_to_idx=SYMBOL_TO_IDX, 64 | sequence_len=INPUT_SEQ_LEN, 65 | pad_symbol=PAD_SYMBOL, 66 | go_symbol=None, 67 | pad_beginning=True, 68 | reverse=True) 69 | input_sequences = dense_to_one_hot(input_sequences, 70 | num_classes=len(SYMBOL_TO_IDX)) 71 | 72 | target_sequences = encode_sequences(target_date_strings, 73 | symbol_to_idx=SYMBOL_TO_IDX, 74 | sequence_len=OUTPUT_SEQ_LEN, 75 | pad_symbol=PAD_SYMBOL, 76 | go_symbol=None, 77 | pad_beginning=False) 78 | target_sequences = dense_to_one_hot(target_sequences, 79 | num_classes=len(SYMBOL_TO_IDX)) 80 | 81 | return input_sequences, target_sequences 82 | 83 | def has_max_difficulty(self): 84 | return True 85 | 86 | def difficulty(self): 87 | return 1 88 | -------------------------------------------------------------------------------- /learning-to-execute.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": null, 6 | "metadata": { 7 | "collapsed": false 8 | }, 9 | "outputs": [], 10 | "source": [ 11 | "from ipywidgets import interact\n", 12 | "import tensorflow as tf\n", 13 | "session = tf.InteractiveSession()" 14 | ] 15 | }, 16 | { 17 | "cell_type": "code", 18 | "execution_count": null, 19 | "metadata": { 20 | "collapsed": false 21 | }, 22 | "outputs": [], 23 | "source": [ 24 | "from data import decode_output_sequences\n", 25 | "from model import Seq2SeqModel" 26 | ] 27 | }, 28 | { 29 | "cell_type": "markdown", 30 | "metadata": {}, 31 | "source": [ 32 | "# Addition model\n", 33 | "\n", 34 | "First train the model to add two numbers, x+y, and output the sum.\n", 35 | "\n", 36 | "Generate some example data:" 37 | ] 38 | }, 39 | { 40 | "cell_type": "code", 41 | "execution_count": null, 42 | "metadata": { 43 | "collapsed": false 44 | }, 45 | "outputs": [], 46 | "source": [ 47 | "from addition_generator import AdditionGenerator, SYMBOLS, SYMBOL_TO_IDX, INPUT_SEQ_LEN, OUTPUT_SEQ_LEN \n", 48 | "\n", 49 | "addition_generator = AdditionGenerator(batch_size=3)\n", 50 | "x, y = addition_generator.next_batch()\n", 51 | "\n", 52 | "input_strings = decode_output_sequences(x, symbols=SYMBOLS)\n", 53 | "target_strings = decode_output_sequences(y, symbols=SYMBOLS)\n", 54 | "\n", 55 | "print(\" Inputs:\", input_strings)\n", 56 | "print(\"Targets:\", target_strings)" 57 | ] 58 | }, 59 | { 60 | "cell_type": "code", 61 | "execution_count": null, 62 | "metadata": { 63 | "collapsed": false 64 | }, 65 | "outputs": [], 66 | "source": [ 67 | "session.close()\n", 68 | "tf.reset_default_graph()\n", 69 | "session = tf.InteractiveSession()\n", 70 | "\n", 71 | "hidden_units = 128\n", 72 | "num_layers = 2\n", 73 | "training_batch_size = 32\n", 74 | "num_symbols = len(SYMBOL_TO_IDX)\n", 75 | "\n", 76 | "addition_model = Seq2SeqModel(session=session,\n", 77 | " hidden_units=hidden_units, \n", 78 | " num_layers=num_layers,\n", 79 | " input_sequence_len = INPUT_SEQ_LEN,\n", 80 | " output_sequence_len = OUTPUT_SEQ_LEN,\n", 81 | " num_input_symbols = num_symbols,\n", 82 | " num_output_symbols = num_symbols,\n", 83 | " batch_size=training_batch_size,\n", 84 | " symbols=SYMBOLS)\n", 85 | "\n", 86 | "addition_model.init_variables()\n", 87 | "\n", 88 | "addition_generator = AdditionGenerator(batch_size=training_batch_size)\n", 89 | "\n", 90 | "print(\"Finished building model\")" 91 | ] 92 | }, 93 | { 94 | "cell_type": "code", 95 | "execution_count": null, 96 | "metadata": { 97 | "collapsed": false 98 | }, 99 | "outputs": [], 100 | "source": [ 101 | "addition_model.fit(addition_generator, num_epochs=40, batches_per_epoch=20)\n", 102 | "print(\"Finished training\")" 103 | ] 104 | }, 105 | { 106 | "cell_type": "code", 107 | "execution_count": null, 108 | "metadata": { 109 | "collapsed": false 110 | }, 111 | "outputs": [], 112 | "source": [ 113 | "from pprint import pprint\n", 114 | "\n", 115 | "batch_size = 10\n", 116 | "test_generator = AdditionGenerator(batch_size=batch_size, number_len=3)\n", 117 | "\n", 118 | "x, y = test_generator.next_batch()\n", 119 | "input_strings = decode_output_sequences(x, symbols=SYMBOLS)\n", 120 | "target_strings = decode_output_sequences(y, symbols=SYMBOLS)\n", 121 | "\n", 122 | "model_output = testing_model.predict(x)\n", 123 | "pred_strings = decode_output_sequences(model_output, symbols=SYMBOLS)\n", 124 | "\n", 125 | "print(\"Error rate:\", testing_model.validate([(x, y)]))\n", 126 | "\n", 127 | "pprint([(\"Input\", \"Target\", \"Output\")] + \n", 128 | " list(zip(input_strings, target_strings, pred_strings)))\n" 129 | ] 130 | }, 131 | { 132 | "cell_type": "markdown", 133 | "metadata": {}, 134 | "source": [ 135 | "# Program execution model\n", 136 | "Train the model to execute simple programs. Curriculum learning is used to increamentally increase the program difficulty." 137 | ] 138 | }, 139 | { 140 | "cell_type": "code", 141 | "execution_count": null, 142 | "metadata": { 143 | "collapsed": false 144 | }, 145 | "outputs": [], 146 | "source": [ 147 | "from program_generator import ProgramGenerator, SYMBOLS, SYMBOL_TO_IDX, INPUT_SEQ_LEN, OUTPUT_SEQ_LEN \n", 148 | "\n", 149 | "program_generator = ProgramGenerator(batch_size=10, program_length=1, num_len=2)\n", 150 | "x, y = program_generator.next_batch()\n", 151 | "\n", 152 | "input_strings = decode_output_sequences(x, symbols=SYMBOLS)\n", 153 | "target_strings = decode_output_sequences(y, symbols=SYMBOLS)\n", 154 | "\n", 155 | "print(\" Inputs:\", input_strings)\n", 156 | "print(\"Targets:\", target_strings)" 157 | ] 158 | }, 159 | { 160 | "cell_type": "code", 161 | "execution_count": null, 162 | "metadata": { 163 | "collapsed": false 164 | }, 165 | "outputs": [], 166 | "source": [ 167 | "session.close()\n", 168 | "tf.reset_default_graph()\n", 169 | "session = tf.InteractiveSession()\n", 170 | "\n", 171 | "hidden_units = 320\n", 172 | "num_layers = 2\n", 173 | "training_batch_size = 128\n", 174 | "num_symbols = len(SYMBOL_TO_IDX)\n", 175 | "\n", 176 | "program_model = Seq2SeqModel(session=session,\n", 177 | " hidden_units=hidden_units, \n", 178 | " num_layers=num_layers,\n", 179 | " input_sequence_len = INPUT_SEQ_LEN,\n", 180 | " output_sequence_len = OUTPUT_SEQ_LEN,\n", 181 | " num_input_symbols = num_symbols,\n", 182 | " num_output_symbols = num_symbols,\n", 183 | " batch_size=training_batch_size,\n", 184 | " symbols=SYMBOLS,\n", 185 | " scope='model')\n", 186 | "\n", 187 | "program_model.init_variables()\n", 188 | "\n", 189 | "program_generator = ProgramGenerator(batch_size=training_batch_size, program_length=1, num_len=2)\n", 190 | "\n", 191 | "print(\"Finished building model\")" 192 | ] 193 | }, 194 | { 195 | "cell_type": "code", 196 | "execution_count": null, 197 | "metadata": { 198 | "collapsed": false, 199 | "scrolled": false 200 | }, 201 | "outputs": [], 202 | "source": [ 203 | "program_model.fit(program_generator, \n", 204 | " num_epochs=20000, \n", 205 | " batches_per_epoch=128)\n", 206 | "print(\"Finished training\")" 207 | ] 208 | }, 209 | { 210 | "cell_type": "code", 211 | "execution_count": 23, 212 | "metadata": { 213 | "collapsed": false, 214 | "scrolled": false 215 | }, 216 | "outputs": [], 217 | "source": [ 218 | "# Restore previously trained model with 320 hidden units took about 10h to train on an AWS instance.\n", 219 | "saver = tf.train.Saver()\n", 220 | "saver.restore(session, \"trained_model/model_(2, 3).ckpt\")" 221 | ] 222 | }, 223 | { 224 | "cell_type": "markdown", 225 | "metadata": {}, 226 | "source": [ 227 | "\n", 228 | "\n", 229 | "\n", 230 | "# View predictions\n" 231 | ] 232 | }, 233 | { 234 | "cell_type": "code", 235 | "execution_count": null, 236 | "metadata": { 237 | "collapsed": false, 238 | "scrolled": false 239 | }, 240 | "outputs": [], 241 | "source": [ 242 | "from random import seed\n", 243 | "\n", 244 | "seed(101)\n", 245 | "test_generator = ProgramGenerator(batch_size=training_batch_size, num_len=2, program_length=3)\n", 246 | "\n", 247 | "x, y = test_generator.next_batch(validation=True)\n", 248 | "\n", 249 | "input_strings = decode_output_sequences(x, symbols=SYMBOLS)\n", 250 | "target_strings = decode_output_sequences(y, symbols=SYMBOLS)\n", 251 | "\n", 252 | "model_output = program_model.predict(x)\n", 253 | "\n", 254 | "pred_strings = decode_output_sequences(model_output, symbols=SYMBOLS)\n", 255 | "\n", 256 | "def view_prediction(i):\n", 257 | " print(input_strings[i][::-1].strip('_'))\n", 258 | " print(\"--------\")\n", 259 | " print(\"Targ:\", target_strings[i].strip('_'))\n", 260 | " print(\"Pred:\", pred_strings[i].strip('_'))\n", 261 | "a = interact(view_prediction, i=(0, training_batch_size - 1))" 262 | ] 263 | }, 264 | { 265 | "cell_type": "code", 266 | "execution_count": null, 267 | "metadata": { 268 | "collapsed": false 269 | }, 270 | "outputs": [], 271 | "source": [] 272 | } 273 | ], 274 | "metadata": { 275 | "kernelspec": { 276 | "display_name": "Python 3", 277 | "language": "python", 278 | "name": "python3" 279 | }, 280 | "language_info": { 281 | "codemirror_mode": { 282 | "name": "ipython", 283 | "version": 3 284 | }, 285 | "file_extension": ".py", 286 | "mimetype": "text/x-python", 287 | "name": "python", 288 | "nbconvert_exporter": "python", 289 | "pygments_lexer": "ipython3", 290 | "version": "3.5.1" 291 | } 292 | }, 293 | "nbformat": 4, 294 | "nbformat_minor": 0 295 | } 296 | -------------------------------------------------------------------------------- /model.py: -------------------------------------------------------------------------------- 1 | import time 2 | import numpy as np 3 | import tensorflow as tf 4 | import json 5 | 6 | from tensorflow.python.framework import dtypes 7 | from tensorflow.python.ops import variable_scope 8 | from tensorflow.models.rnn import seq2seq, rnn, rnn_cell 9 | 10 | DEFAULT_LEARNING_RATE = 0.01 11 | 12 | from data import decode_output_sequences 13 | 14 | 15 | class Seq2SeqGraph(): 16 | def __init__(self, 17 | is_training=False, 18 | hidden_units=128, 19 | num_layers=1, 20 | input_sequence_len=20, 21 | output_sequence_len=10, 22 | num_input_symbols=20, 23 | num_output_symbols=20, 24 | weight_amplitude=0.08, 25 | batch_size=32, 26 | peep=False): 27 | 28 | self.encoder_inputs = [] 29 | self.decoder_inputs = [] 30 | 31 | for i in range(input_sequence_len): 32 | self.encoder_inputs.append(tf.placeholder(tf.float32, shape=(None, num_input_symbols), 33 | name="encoder_{0}".format(i))) 34 | 35 | for i in range(output_sequence_len + 1): 36 | self.decoder_inputs.append(tf.placeholder(tf.float32, shape=(None, num_output_symbols), 37 | name="decoder_{0}".format(i))) 38 | 39 | def random_uniform(): 40 | return tf.random_uniform_initializer(-weight_amplitude, weight_amplitude) 41 | 42 | if num_layers > 1: 43 | cells = [rnn_cell.LSTMCell(hidden_units, use_peepholes=peep, input_size=num_input_symbols, 44 | initializer=random_uniform())] 45 | cells += [rnn_cell.LSTMCell(hidden_units, use_peepholes=peep, input_size=hidden_units, 46 | initializer=random_uniform()) for _ in range(num_layers - 1)] 47 | self.cell = rnn_cell.MultiRNNCell(cells) 48 | else: 49 | self.cell = rnn_cell.LSTMCell(hidden_units, use_peepholes=peep, 50 | initializer=random_uniform()) 51 | 52 | self.w_softmax = tf.get_variable('w_softmax', shape=(hidden_units, num_output_symbols), 53 | initializer=random_uniform()) 54 | self.b_softmax = tf.get_variable('b_softmax', shape=(num_output_symbols,), 55 | initializer=random_uniform()) 56 | 57 | # decoder_outputs is a list of tensors with output_sequence_len: [(batch_size x hidden_units)] 58 | decoder_outputs, _ = self._init_seq2seq(self.encoder_inputs, self.decoder_inputs, self.cell, 59 | feed_previous=not is_training) 60 | 61 | output_logits = [tf.matmul(decoder_output, self.w_softmax) + self.b_softmax 62 | for decoder_output in decoder_outputs] 63 | self.output_probs = [tf.nn.softmax(logit) for logit in output_logits] 64 | 65 | # If this is a training model create the training operation and loss function 66 | if is_training: 67 | self.targets = self.decoder_inputs[1:] 68 | losses = [tf.nn.softmax_cross_entropy_with_logits(logit, target) 69 | for logit, target in zip(output_logits, self.targets)] 70 | 71 | loss = tf.reduce_sum(tf.add_n(losses)) 72 | self.cost = loss / output_sequence_len / batch_size 73 | self.learning_rate = tf.Variable(DEFAULT_LEARNING_RATE, trainable=False) 74 | 75 | train_vars = tf.trainable_variables() 76 | grads = tf.gradients(self.cost, train_vars) 77 | optimizer = tf.train.AdamOptimizer(self.learning_rate) 78 | 79 | self.train_op = optimizer.apply_gradients(zip(grads, train_vars)) 80 | 81 | def _init_seq2seq(self, encoder_inputs, decoder_inputs, cell, feed_previous): 82 | 83 | def inference_loop_function(prev, _): 84 | prev = tf.nn.xw_plus_b(prev, self.w_softmax, self.b_softmax) 85 | return tf.to_float(tf.equal(prev, tf.reduce_max(prev, reduction_indices=[1], keep_dims=True))) 86 | 87 | loop_function = inference_loop_function if feed_previous else None 88 | 89 | with variable_scope.variable_scope('seq2seq'): 90 | _, final_enc_state = rnn.rnn(cell, encoder_inputs, dtype=dtypes.float32) 91 | return seq2seq.rnn_decoder(decoder_inputs, final_enc_state, cell, loop_function=loop_function) 92 | 93 | 94 | class Seq2SeqModel: 95 | def __init__(self, 96 | session, 97 | hidden_units=128, 98 | num_layers=1, 99 | input_sequence_len=20, 100 | output_sequence_len=10, 101 | num_input_symbols=20, 102 | num_output_symbols=20, 103 | batch_size=32, 104 | go_symbol_idx=0, 105 | symbols=None, 106 | scope='seq2seq_model'): 107 | 108 | self.session = session 109 | self.batch_size = batch_size 110 | self.symbols = symbols 111 | self.go_decoder_input_value = np.zeros((batch_size, num_output_symbols), dtype=np.float32) 112 | self.go_decoder_input_value[:, go_symbol_idx] = 1.0 113 | 114 | # We need to creat two different graphs one where the output of the decoder is looped back 115 | # to the decoder input (inference) and one where the decoder input is set to the targets (training 116 | with tf.variable_scope(scope, reuse=None): 117 | self.training_graph = Seq2SeqGraph(hidden_units=hidden_units, 118 | num_layers=num_layers, 119 | input_sequence_len=input_sequence_len, 120 | output_sequence_len=output_sequence_len, 121 | num_input_symbols=num_input_symbols, 122 | num_output_symbols=num_output_symbols, 123 | batch_size=batch_size, 124 | is_training=True) 125 | 126 | with tf.variable_scope(scope, reuse=True): 127 | self.testing_graph = Seq2SeqGraph(hidden_units=hidden_units, 128 | num_layers=num_layers, 129 | input_sequence_len=input_sequence_len, 130 | output_sequence_len=output_sequence_len, 131 | num_input_symbols=num_input_symbols, 132 | num_output_symbols=num_output_symbols, 133 | batch_size=batch_size, 134 | is_training=False) 135 | 136 | def set_learning_rate(self, learning_rate): 137 | self.session.run(tf.assign(self.training_graph.learning_rate, learning_rate)) 138 | 139 | def get_learning_rate(self): 140 | return self.training_graph.learning_rate.eval() 141 | 142 | def init_variables(self): 143 | tf.initialize_all_variables().run() 144 | 145 | def _fit_batch(self, input_values, targets): 146 | assert targets.shape[0] == input_values.shape[0] == self.batch_size 147 | assert len(self.training_graph.encoder_inputs) == input_values.shape[1] 148 | assert len(self.training_graph.decoder_inputs) == targets.shape[1] + 1 149 | 150 | input_feed = {} 151 | for i, encoder_input in enumerate(self.training_graph.encoder_inputs): 152 | input_feed[encoder_input.name] = input_values[:, i, :] 153 | 154 | # The first input of the decoder is the padding symbol (we use the same symbol for GO and PAD) 155 | input_feed[self.training_graph.decoder_inputs[0].name] = self.go_decoder_input_value 156 | 157 | for i, decoder_input in enumerate(self.training_graph.decoder_inputs[1:]): 158 | input_feed[decoder_input.name] = targets[:, i] 159 | 160 | train_loss, _ = self.session.run([self.training_graph.cost, 161 | self.training_graph.train_op], feed_dict=input_feed) 162 | 163 | return train_loss 164 | 165 | def fit(self, 166 | data_generator, 167 | num_epochs=30, 168 | batches_per_epoch=256, 169 | lr_decay=0.95, 170 | num_val_batches=128, 171 | output_dir='output'): 172 | 173 | with tf.device('/cpu:0'): 174 | saver = tf.train.Saver() 175 | 176 | history = [] 177 | prev_error_rate = np.inf 178 | val_error_rate = np.inf 179 | best_val_error_rate = np.inf 180 | 181 | val_set = [data_generator.next_batch(validation=True) for _ in range(num_val_batches)] 182 | 183 | epochs_since_init = 0 184 | 185 | for e in range(num_epochs): 186 | 187 | if self.symbols: 188 | self.examples(data_generator) 189 | 190 | start = time.time() 191 | for b in range(batches_per_epoch): 192 | inputs, targets = data_generator.next_batch(validation=False) 193 | train_loss = self._fit_batch(inputs, targets) 194 | 195 | end = time.time() 196 | 197 | val_error_rate = self.validate(val_set) 198 | 199 | if epochs_since_init > 15 and epochs_since_init < 17 and val_error_rate > 0.85: 200 | self.init_variables() 201 | epochs_since_init = 0 202 | print("Restarting...") 203 | continue 204 | epochs_since_init += 1 205 | 206 | print("Epoch {}: train_loss = {:.3f}, val_error_rate = {:.3f}, time/epoch = {:.3f}, diff: {}" 207 | .format(e, train_loss, val_error_rate, end - start, data_generator.difficulty())) 208 | 209 | if best_val_error_rate > val_error_rate: 210 | save_path = saver.save(self.session, "{}/model_{}.ckpt".format(output_dir, 211 | data_generator.difficulty())) 212 | print("Model saved in file: %s" % save_path) 213 | best_val_error_rate = val_error_rate 214 | 215 | if val_error_rate > prev_error_rate and data_generator.has_max_difficulty(): 216 | self.set_learning_rate(self.get_learning_rate() * lr_decay) 217 | print("Decreasing LR to {:.5f}".format(self.get_learning_rate())) 218 | 219 | elif val_error_rate < 0.10: 220 | print("Increasing difficulty") 221 | if not data_generator.has_max_difficulty(): 222 | data_generator.increase_difficulty() 223 | best_val_error_rate = np.inf 224 | val_set = [data_generator.next_batch() for _ in range(num_val_batches)] 225 | 226 | history.append({ 227 | 'val_error_rate': float(val_error_rate), 228 | 'train_loss': float(train_loss), 229 | 'learning_rate': float(self.get_learning_rate()), 230 | 'difficulty': data_generator.difficulty() 231 | }) 232 | 233 | with open('{}/history.json'.format(output_dir), 'w') as outfile: 234 | json.dump(history, outfile) 235 | 236 | prev_error_rate = val_error_rate 237 | 238 | def predict(self, encoder_input_values, pad_symbol_idx=0): 239 | 240 | input_feed = {} 241 | for i, encoder_input in enumerate(self.testing_graph.encoder_inputs): 242 | input_feed[encoder_input.name] = encoder_input_values[:, i, :] 243 | 244 | for decoder_input in self.testing_graph.decoder_inputs: 245 | input_feed[decoder_input.name] = self.go_decoder_input_value 246 | 247 | symbol_probs = self.session.run(self.testing_graph.output_probs, input_feed) 248 | symbol_probs = np.array(symbol_probs) 249 | symbol_probs = np.transpose(symbol_probs, (1, 0, 2)) 250 | 251 | return symbol_probs 252 | 253 | def validate(self, val_set): 254 | 255 | num_correct = 0 256 | num_samples = 0 257 | 258 | for batch in val_set: 259 | x, y = batch 260 | target = np.argmax(y, axis=2) 261 | prediction = np.argmax(self.predict(x), axis=2)[:, :-1] 262 | 263 | num_correct += sum([int(np.all(t == p)) for t, p in zip(target, prediction)]) 264 | num_samples += len(x) 265 | 266 | return 1.0 - float(num_correct) / num_samples 267 | 268 | def load(self, checkpoint_file): 269 | saver = tf.train.Saver() 270 | saver.restore(self.session, checkpoint_file) 271 | 272 | def examples(self, data_generator, num_examples=5): 273 | """ 274 | Prints some examples during training 275 | Args: 276 | data_generator: 277 | 278 | """ 279 | assert self.symbols 280 | 281 | x, y = data_generator.next_batch(validation=True) 282 | 283 | # input_strings = decode_output_sequences(x, symbols=SYMBOLS) 284 | target_strings = decode_output_sequences(y, symbols=self.symbols) 285 | 286 | model_output = self.predict(x) 287 | pred_strings = decode_output_sequences(model_output, symbols=self.symbols) 288 | 289 | print(target_strings[:num_examples]) 290 | print(pred_strings[:num_examples]) 291 | -------------------------------------------------------------------------------- /program_generator.py: -------------------------------------------------------------------------------- 1 | import string 2 | import io 3 | from contextlib import redirect_stdout 4 | 5 | from random import choice, random, randint 6 | from data import encode_sequences, dense_to_one_hot 7 | 8 | GO_SYMBOL = 'G' 9 | PAD_SYMBOL = '_' 10 | LETTERS = string.digits + string.ascii_lowercase + ' +-=():<>\n' 11 | SYMBOLS = [GO_SYMBOL, PAD_SYMBOL] + list(LETTERS) 12 | #SYMBOLS = [PAD_SYMBOL, GO_SYMBOL] + list(LETTERS) 13 | SYMBOL_TO_IDX = dict((l, i) for i, l in enumerate(SYMBOLS)) 14 | 15 | INPUT_SEQ_LEN = 70 16 | OUTPUT_SEQ_LEN = 6 17 | 18 | MAX_NUM_LENGTH = 2 19 | MAX_PROGRAM_LENGTH = 3 20 | 21 | COMPARATORS = ('<', '>') 22 | OPERATORS = ('+', '-') 23 | VARIABLE_NAMES = list('abcdefgh') 24 | 25 | 26 | def if_operation(variables, nesting, difficulty): 27 | compare_variable = choice(list(variables)) 28 | comparator = choice(COMPARATORS) 29 | compare_value = random_digit(difficulty) 30 | code = 'if {0}{1}{2}:'.format(compare_variable, 31 | comparator, 32 | compare_value) 33 | nesting += 1 34 | return code, nesting 35 | 36 | 37 | def assign_operation(variables, nesting, num_len): 38 | variable = choice(VARIABLE_NAMES) 39 | variables.add(variable) 40 | value = random_digit(num_len) 41 | code = '{0}={1}'.format(variable, value) 42 | return code, nesting 43 | 44 | 45 | def add_or_sub_operation(variables, nesting, num_len): 46 | variable = choice(list(variables)) 47 | operator = choice(OPERATORS) 48 | value = random_digit(num_len) 49 | if random() < 0.5: 50 | code = '{0}{1}={2}'.format(variable, operator, value) 51 | else: 52 | variable2 = choice(list(variables)) 53 | code = '{0}={1}{2}{3}'.format(variable, variable2, operator, value) 54 | 55 | return code, nesting 56 | 57 | 58 | def print_operation(variables, nesting, num_len): 59 | operator = choice(OPERATORS) 60 | code = 'print({0})'.format(operator.join(list(variables))) 61 | return code, nesting 62 | 63 | 64 | OPERATIONS = (add_or_sub_operation, if_operation, assign_operation) 65 | 66 | 67 | def generate_program(num_len, length): 68 | variables = set() 69 | nesting = 0 70 | 71 | lines = [] 72 | lines.append(assign_operation(variables, nesting, num_len)[0]) 73 | 74 | if length > 0: 75 | num_lines = randint(1, length) 76 | for i in range(num_lines): 77 | if num_lines <= 1: 78 | operation = add_or_sub_operation 79 | elif nesting == 0: 80 | operation = choice(OPERATIONS) 81 | else: 82 | operation = choice((add_or_sub_operation, if_operation)) 83 | 84 | code, new_nesting = operation(variables, nesting, num_len) 85 | lines.append(''.join([' '] * nesting) + code) 86 | if nesting == new_nesting and random() < 0.5: 87 | nesting -= 1 88 | nesting = new_nesting 89 | 90 | if nesting > 0: 91 | code, new_nesting = add_or_sub_operation(variables, nesting, num_len) 92 | lines.append(''.join([' '] * nesting) + code) 93 | 94 | lines.append(print_operation(variables, nesting, num_len)[0]) 95 | 96 | return '\n'.join(lines) 97 | 98 | 99 | def random_digit(difficulty): 100 | size = 10 ** randint(1, difficulty) 101 | if difficulty > 1: 102 | return randint(-size, size) 103 | else: 104 | return randint(0, size) 105 | 106 | 107 | class ProgramGenerator(): 108 | def __init__(self, batch_size, program_length=1, num_len=1): 109 | self.program_length = program_length 110 | self.num_len = num_len 111 | self.num_len_was_last_inc = False 112 | self.batch_size = batch_size 113 | 114 | def generate_program(self, hash_mod=None): 115 | 116 | if hash_mod is None: 117 | program = generate_program(num_len=self.num_len, 118 | length=self.program_length) 119 | else: 120 | program_hash = None 121 | while program_hash != hash_mod: 122 | program = generate_program(num_len=self.num_len, 123 | length=self.program_length) 124 | program_hash = hash(program) % 2 125 | return program 126 | 127 | def increase_difficulty(self): 128 | 129 | if self.has_max_difficulty(): 130 | return 131 | 132 | # Alternate between increasing program length and number length 133 | if self.num_len_was_last_inc: 134 | if self.num_len < MAX_PROGRAM_LENGTH: 135 | self.program_length += 1 136 | self.num_len_was_last_inc = False 137 | else: 138 | if self.num_len < MAX_NUM_LENGTH: 139 | self.num_len += 1 140 | self.num_len_was_last_inc = True 141 | 142 | print("Difficulty:", self.num_len, self.program_length) 143 | 144 | def has_max_difficulty(self): 145 | return self.num_len >= MAX_NUM_LENGTH and \ 146 | self.program_length >= MAX_PROGRAM_LENGTH 147 | 148 | def difficulty(self): 149 | return (self.num_len, self.program_length) 150 | 151 | def next_batch(self, validation=False): 152 | 153 | programs = [self.generate_program(hash_mod=0 if validation else 1) 154 | for _ in range(self.batch_size)] 155 | 156 | # Execute the programs to get the targets 157 | results = [] 158 | for program in programs: 159 | with io.StringIO() as buf, redirect_stdout(buf): 160 | exec(program) 161 | results.append(buf.getvalue()[:-1]) 162 | 163 | input_sequences = encode_sequences(programs, 164 | symbol_to_idx=SYMBOL_TO_IDX, 165 | sequence_len=INPUT_SEQ_LEN, 166 | #go_symbol=GO_SYMBOL, 167 | pad_symbol=PAD_SYMBOL, 168 | pad_beginning=True, 169 | reverse=True) 170 | input_sequences = dense_to_one_hot(input_sequences, 171 | num_classes=len(SYMBOL_TO_IDX)) 172 | 173 | target_sequences = encode_sequences(results, 174 | symbol_to_idx=SYMBOL_TO_IDX, 175 | sequence_len=OUTPUT_SEQ_LEN, 176 | go_symbol=None, 177 | pad_beginning=False, 178 | pad_symbol=PAD_SYMBOL) 179 | target_sequences = dense_to_one_hot(target_sequences, 180 | num_classes=len(SYMBOL_TO_IDX)) 181 | 182 | return input_sequences, target_sequences 183 | -------------------------------------------------------------------------------- /train.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | from program_generator import ProgramGenerator, SYMBOL_TO_IDX, INPUT_SEQ_LEN, OUTPUT_SEQ_LEN 4 | # from addition_generator import AdditionGenerator, SYMBOL_TO_IDX, INPUT_SEQ_LEN, OUTPUT_SEQ_LEN 5 | from model import Seq2SeqModel 6 | 7 | hidden_units = 320 8 | num_layers = 2 9 | num_symbols = len(SYMBOL_TO_IDX) 10 | num_epochs = 20000 11 | training_batch_size = 128 12 | batches_per_epoch = 128 13 | num_val_batches = 256 14 | 15 | data_generator = ProgramGenerator(batch_size=training_batch_size, program_length=1, num_len=1) 16 | # data_generator = AdditionGenerator(batch_size=training_batch_size) 17 | 18 | with tf.device('/gpu:0'): 19 | with tf.Session() as session: 20 | 21 | print("Building model") 22 | model = Seq2SeqModel(session=session, 23 | hidden_units=hidden_units, 24 | num_layers=num_layers, 25 | input_sequence_len=INPUT_SEQ_LEN, 26 | output_sequence_len=OUTPUT_SEQ_LEN, 27 | num_input_symbols=num_symbols, 28 | num_output_symbols=num_symbols, 29 | batch_size=training_batch_size, 30 | is_training=True, 31 | scope='model') 32 | 33 | model.init_variables() 34 | 35 | print("Finished building model") 36 | 37 | model.fit(data_generator, 38 | num_epochs=num_epochs, 39 | batches_per_epoch=batches_per_epoch, 40 | num_val_batches=num_val_batches) 41 | 42 | print("Finished training") 43 | -------------------------------------------------------------------------------- /trained_model/checkpoint: -------------------------------------------------------------------------------- 1 | model_checkpoint_path: "model_(2, 3).ckpt" 2 | all_model_checkpoint_paths: "model_(1, 1).ckpt" 3 | all_model_checkpoint_paths: "model_(2, 1).ckpt" 4 | all_model_checkpoint_paths: "model_(2, 2).ckpt" 5 | all_model_checkpoint_paths: "model_(2, 3).ckpt" 6 | -------------------------------------------------------------------------------- /trained_model/history.json: -------------------------------------------------------------------------------- 1 | [{"train_loss": 0.5438666939735413, "difficulty": [1, 1], "val_error_rate": 0.940521240234375, "learning_rate": 0.009999999776482582}, {"train_loss": 0.5343071818351746, "difficulty": [1, 1], "val_error_rate": 0.93402099609375, "learning_rate": 0.009999999776482582}, {"train_loss": 0.48942017555236816, "difficulty": [1, 1], "val_error_rate": 0.898590087890625, "learning_rate": 0.009999999776482582}, {"train_loss": 0.3220485746860504, "difficulty": [1, 1], "val_error_rate": 0.6900634765625, "learning_rate": 0.009999999776482582}, {"train_loss": 0.16858474910259247, "difficulty": [1, 1], "val_error_rate": 0.360015869140625, "learning_rate": 0.009999999776482582}, {"train_loss": 0.12000581622123718, "difficulty": [1, 1], "val_error_rate": 0.249542236328125, "learning_rate": 0.009999999776482582}, {"train_loss": 0.0853034183382988, "difficulty": [1, 1], "val_error_rate": 0.150665283203125, "learning_rate": 0.009999999776482582}, {"train_loss": 0.05491195246577263, "difficulty": [1, 1], "val_error_rate": 0.12774658203125, "learning_rate": 0.009999999776482582}, {"train_loss": 0.0354338176548481, "difficulty": [2, 1], "val_error_rate": 0.07904052734375, "learning_rate": 0.009999999776482582}, {"train_loss": 0.7448346018791199, "difficulty": [2, 1], "val_error_rate": 0.861328125, "learning_rate": 0.009999999776482582}, {"train_loss": 0.5493052005767822, "difficulty": [2, 1], "val_error_rate": 0.807159423828125, "learning_rate": 0.009999999776482582}, {"train_loss": 0.4686998426914215, "difficulty": [2, 1], "val_error_rate": 0.72998046875, "learning_rate": 0.009999999776482582}, {"train_loss": 0.4019433259963989, "difficulty": [2, 1], "val_error_rate": 0.674041748046875, "learning_rate": 0.009999999776482582}, {"train_loss": 0.3583047688007355, "difficulty": [2, 1], "val_error_rate": 0.630035400390625, "learning_rate": 0.009999999776482582}, {"train_loss": 0.30027034878730774, "difficulty": [2, 1], "val_error_rate": 0.59503173828125, "learning_rate": 0.009999999776482582}, {"train_loss": 0.3630479872226715, "difficulty": [2, 1], "val_error_rate": 0.579833984375, "learning_rate": 0.009999999776482582}, {"train_loss": 0.2809283435344696, "difficulty": [2, 1], "val_error_rate": 0.524810791015625, "learning_rate": 0.009999999776482582}, {"train_loss": 0.2906297743320465, "difficulty": [2, 1], "val_error_rate": 0.49395751953125, "learning_rate": 0.009999999776482582}, {"train_loss": 0.22749899327754974, "difficulty": [2, 1], "val_error_rate": 0.498626708984375, "learning_rate": 0.009999999776482582}, {"train_loss": 0.2696778476238251, "difficulty": [2, 1], "val_error_rate": 0.4683837890625, "learning_rate": 0.009999999776482582}, {"train_loss": 0.239532932639122, "difficulty": [2, 1], "val_error_rate": 0.47442626953125, "learning_rate": 0.009999999776482582}, {"train_loss": 0.2023962289094925, "difficulty": [2, 1], "val_error_rate": 0.4603271484375, "learning_rate": 0.009999999776482582}, {"train_loss": 0.20937423408031464, "difficulty": [2, 1], "val_error_rate": 0.43377685546875, "learning_rate": 0.009999999776482582}, {"train_loss": 0.21245865523815155, "difficulty": [2, 1], "val_error_rate": 0.41973876953125, "learning_rate": 0.009999999776482582}, {"train_loss": 0.17527253925800323, "difficulty": [2, 1], "val_error_rate": 0.423004150390625, "learning_rate": 0.009999999776482582}, {"train_loss": 0.19117236137390137, "difficulty": [2, 1], "val_error_rate": 0.402252197265625, "learning_rate": 0.009999999776482582}, {"train_loss": 0.17628486454486847, "difficulty": [2, 1], "val_error_rate": 0.38372802734375, "learning_rate": 0.009999999776482582}, {"train_loss": 0.2151779681444168, "difficulty": [2, 1], "val_error_rate": 0.39532470703125, "learning_rate": 0.009999999776482582}, {"train_loss": 0.20386607944965363, "difficulty": [2, 1], "val_error_rate": 0.383270263671875, "learning_rate": 0.009999999776482582}, {"train_loss": 0.1555018573999405, "difficulty": [2, 1], "val_error_rate": 0.36724853515625, "learning_rate": 0.009999999776482582}, {"train_loss": 0.16241700947284698, "difficulty": [2, 1], "val_error_rate": 0.356536865234375, "learning_rate": 0.009999999776482582}, {"train_loss": 0.16277427971363068, "difficulty": [2, 1], "val_error_rate": 0.340576171875, "learning_rate": 0.009999999776482582}, {"train_loss": 0.16500522196292877, "difficulty": [2, 1], "val_error_rate": 0.3475341796875, "learning_rate": 0.009999999776482582}, {"train_loss": 0.1796792596578598, "difficulty": [2, 1], "val_error_rate": 0.327789306640625, "learning_rate": 0.009999999776482582}, {"train_loss": 0.16699658334255219, "difficulty": [2, 1], "val_error_rate": 0.3238525390625, "learning_rate": 0.009999999776482582}, {"train_loss": 0.14037348330020905, "difficulty": [2, 1], "val_error_rate": 0.312591552734375, "learning_rate": 0.009999999776482582}, {"train_loss": 0.16519129276275635, "difficulty": [2, 1], "val_error_rate": 0.3192138671875, "learning_rate": 0.009999999776482582}, {"train_loss": 0.15160901844501495, "difficulty": [2, 1], "val_error_rate": 0.294586181640625, "learning_rate": 0.009999999776482582}, {"train_loss": 0.16264033317565918, "difficulty": [2, 1], "val_error_rate": 0.300506591796875, "learning_rate": 0.009999999776482582}, {"train_loss": 0.1710287481546402, "difficulty": [2, 1], "val_error_rate": 0.27874755859375, "learning_rate": 0.009999999776482582}, {"train_loss": 0.1308770626783371, "difficulty": [2, 1], "val_error_rate": 0.279296875, "learning_rate": 0.009999999776482582}, {"train_loss": 0.12129714339971542, "difficulty": [2, 1], "val_error_rate": 0.262908935546875, "learning_rate": 0.009999999776482582}, {"train_loss": 0.16091518104076385, "difficulty": [2, 1], "val_error_rate": 0.274932861328125, "learning_rate": 0.009999999776482582}, {"train_loss": 0.10790327936410904, "difficulty": [2, 1], "val_error_rate": 0.267333984375, "learning_rate": 0.009999999776482582}, {"train_loss": 0.09888085722923279, "difficulty": [2, 1], "val_error_rate": 0.245697021484375, "learning_rate": 0.009999999776482582}, {"train_loss": 0.12124428898096085, "difficulty": [2, 1], "val_error_rate": 0.235107421875, "learning_rate": 0.009999999776482582}, {"train_loss": 0.09399795532226562, "difficulty": [2, 1], "val_error_rate": 0.223175048828125, "learning_rate": 0.009999999776482582}, {"train_loss": 0.1289391666650772, "difficulty": [2, 1], "val_error_rate": 0.22625732421875, "learning_rate": 0.009999999776482582}, {"train_loss": 0.08477660268545151, "difficulty": [2, 1], "val_error_rate": 0.21038818359375, "learning_rate": 0.009999999776482582}, {"train_loss": 0.09577822685241699, "difficulty": [2, 1], "val_error_rate": 0.21630859375, "learning_rate": 0.009999999776482582}, {"train_loss": 0.11526720970869064, "difficulty": [2, 1], "val_error_rate": 0.20806884765625, "learning_rate": 0.009999999776482582}, {"train_loss": 0.1202889010310173, "difficulty": [2, 1], "val_error_rate": 0.19439697265625, "learning_rate": 0.009999999776482582}, {"train_loss": 0.06830235570669174, "difficulty": [2, 1], "val_error_rate": 0.191925048828125, "learning_rate": 0.009999999776482582}, {"train_loss": 0.13588543236255646, "difficulty": [2, 1], "val_error_rate": 0.191131591796875, "learning_rate": 0.009999999776482582}, {"train_loss": 0.06915948539972305, "difficulty": [2, 1], "val_error_rate": 0.171234130859375, "learning_rate": 0.009999999776482582}, {"train_loss": 0.08834987878799438, "difficulty": [2, 1], "val_error_rate": 0.17181396484375, "learning_rate": 0.009999999776482582}, {"train_loss": 0.05237932503223419, "difficulty": [2, 1], "val_error_rate": 0.16326904296875, "learning_rate": 0.009999999776482582}, {"train_loss": 0.07550384849309921, "difficulty": [2, 1], "val_error_rate": 0.1585693359375, "learning_rate": 0.009999999776482582}, {"train_loss": 0.0787455290555954, "difficulty": [2, 1], "val_error_rate": 0.162872314453125, "learning_rate": 0.009999999776482582}, {"train_loss": 0.12355358153581619, "difficulty": [2, 1], "val_error_rate": 0.161956787109375, "learning_rate": 0.009999999776482582}, {"train_loss": 0.06518866121768951, "difficulty": [2, 1], "val_error_rate": 0.13677978515625, "learning_rate": 0.009999999776482582}, {"train_loss": 0.06093141809105873, "difficulty": [2, 1], "val_error_rate": 0.13299560546875, "learning_rate": 0.009999999776482582}, {"train_loss": 0.08786659687757492, "difficulty": [2, 1], "val_error_rate": 0.128265380859375, "learning_rate": 0.009999999776482582}, {"train_loss": 0.07940343767404556, "difficulty": [2, 1], "val_error_rate": 0.10589599609375, "learning_rate": 0.009999999776482582}, {"train_loss": 0.06167734041810036, "difficulty": [2, 1], "val_error_rate": 0.1173095703125, "learning_rate": 0.009999999776482582}, {"train_loss": 0.06332778930664062, "difficulty": [2, 1], "val_error_rate": 0.1082763671875, "learning_rate": 0.009999999776482582}, {"train_loss": 0.03182936832308769, "difficulty": [2, 1], "val_error_rate": 0.1029052734375, "learning_rate": 0.009999999776482582}, {"train_loss": 0.05347074940800667, "difficulty": [2, 2], "val_error_rate": 0.099090576171875, "learning_rate": 0.009999999776482582}, {"train_loss": 0.4995358884334564, "difficulty": [2, 2], "val_error_rate": 0.552337646484375, "learning_rate": 0.009999999776482582}, {"train_loss": 0.38360610604286194, "difficulty": [2, 2], "val_error_rate": 0.493438720703125, "learning_rate": 0.009999999776482582}, {"train_loss": 0.33346882462501526, "difficulty": [2, 2], "val_error_rate": 0.464202880859375, "learning_rate": 0.009999999776482582}, {"train_loss": 0.34358859062194824, "difficulty": [2, 2], "val_error_rate": 0.45452880859375, "learning_rate": 0.009999999776482582}, {"train_loss": 0.39091333746910095, "difficulty": [2, 2], "val_error_rate": 0.44732666015625, "learning_rate": 0.009999999776482582}, {"train_loss": 0.35901331901550293, "difficulty": [2, 2], "val_error_rate": 0.4423828125, "learning_rate": 0.009999999776482582}, {"train_loss": 0.29908910393714905, "difficulty": [2, 2], "val_error_rate": 0.438873291015625, "learning_rate": 0.009999999776482582}, {"train_loss": 0.2788660526275635, "difficulty": [2, 2], "val_error_rate": 0.439788818359375, "learning_rate": 0.009999999776482582}, {"train_loss": 0.31126970052719116, "difficulty": [2, 2], "val_error_rate": 0.42767333984375, "learning_rate": 0.009999999776482582}, {"train_loss": 0.4066505432128906, "difficulty": [2, 2], "val_error_rate": 0.42431640625, "learning_rate": 0.009999999776482582}, {"train_loss": 0.2788117825984955, "difficulty": [2, 2], "val_error_rate": 0.423583984375, "learning_rate": 0.009999999776482582}, {"train_loss": 0.31168583035469055, "difficulty": [2, 2], "val_error_rate": 0.42425537109375, "learning_rate": 0.009999999776482582}, {"train_loss": 0.2508351504802704, "difficulty": [2, 2], "val_error_rate": 0.4139404296875, "learning_rate": 0.009999999776482582}, {"train_loss": 0.3732130229473114, "difficulty": [2, 2], "val_error_rate": 0.4124755859375, "learning_rate": 0.009999999776482582}, {"train_loss": 0.34829390048980713, "difficulty": [2, 2], "val_error_rate": 0.4173583984375, "learning_rate": 0.009999999776482582}, {"train_loss": 0.3458172380924225, "difficulty": [2, 2], "val_error_rate": 0.41485595703125, "learning_rate": 0.009999999776482582}, {"train_loss": 0.31477755308151245, "difficulty": [2, 2], "val_error_rate": 0.407073974609375, "learning_rate": 0.009999999776482582}, {"train_loss": 0.3006502687931061, "difficulty": [2, 2], "val_error_rate": 0.4093017578125, "learning_rate": 0.009999999776482582}, {"train_loss": 0.2248721569776535, "difficulty": [2, 2], "val_error_rate": 0.403045654296875, "learning_rate": 0.009999999776482582}, {"train_loss": 0.23060709238052368, "difficulty": [2, 2], "val_error_rate": 0.404449462890625, "learning_rate": 0.009999999776482582}, {"train_loss": 0.3201897144317627, "difficulty": [2, 2], "val_error_rate": 0.401397705078125, "learning_rate": 0.009999999776482582}, {"train_loss": 0.33132368326187134, "difficulty": [2, 2], "val_error_rate": 0.401641845703125, "learning_rate": 0.009999999776482582}, {"train_loss": 0.29900088906288147, "difficulty": [2, 2], "val_error_rate": 0.40350341796875, "learning_rate": 0.009999999776482582}, {"train_loss": 0.29769644141197205, "difficulty": [2, 2], "val_error_rate": 0.39959716796875, "learning_rate": 0.009999999776482582}, {"train_loss": 0.27976706624031067, "difficulty": [2, 2], "val_error_rate": 0.394683837890625, "learning_rate": 0.009999999776482582}, {"train_loss": 0.2629372179508209, "difficulty": [2, 2], "val_error_rate": 0.385223388671875, "learning_rate": 0.009999999776482582}, {"train_loss": 0.2238117903470993, "difficulty": [2, 2], "val_error_rate": 0.3876953125, "learning_rate": 0.009999999776482582}, {"train_loss": 0.22684220969676971, "difficulty": [2, 2], "val_error_rate": 0.392974853515625, "learning_rate": 0.009999999776482582}, {"train_loss": 0.3004258871078491, "difficulty": [2, 2], "val_error_rate": 0.3931884765625, "learning_rate": 0.009999999776482582}, {"train_loss": 0.21086299419403076, "difficulty": [2, 2], "val_error_rate": 0.3909912109375, "learning_rate": 0.009999999776482582}, {"train_loss": 0.2542201578617096, "difficulty": [2, 2], "val_error_rate": 0.3863525390625, "learning_rate": 0.009999999776482582}, {"train_loss": 0.27540943026542664, "difficulty": [2, 2], "val_error_rate": 0.38555908203125, "learning_rate": 0.009999999776482582}, {"train_loss": 0.28210869431495667, "difficulty": [2, 2], "val_error_rate": 0.380340576171875, "learning_rate": 0.009999999776482582}, {"train_loss": 0.2732688784599304, "difficulty": [2, 2], "val_error_rate": 0.381439208984375, "learning_rate": 0.009999999776482582}, {"train_loss": 0.2419741153717041, "difficulty": [2, 2], "val_error_rate": 0.38604736328125, "learning_rate": 0.009999999776482582}, {"train_loss": 0.3279503583908081, "difficulty": [2, 2], "val_error_rate": 0.37506103515625, "learning_rate": 0.009999999776482582}, {"train_loss": 0.27696260809898376, "difficulty": [2, 2], "val_error_rate": 0.37786865234375, "learning_rate": 0.009999999776482582}, {"train_loss": 0.2341153770685196, "difficulty": [2, 2], "val_error_rate": 0.375091552734375, "learning_rate": 0.009999999776482582}, {"train_loss": 0.23348434269428253, "difficulty": [2, 2], "val_error_rate": 0.381317138671875, "learning_rate": 0.009999999776482582}, {"train_loss": 0.25080516934394836, "difficulty": [2, 2], "val_error_rate": 0.376800537109375, "learning_rate": 0.009999999776482582}, {"train_loss": 0.19900846481323242, "difficulty": [2, 2], "val_error_rate": 0.374267578125, "learning_rate": 0.009999999776482582}, {"train_loss": 0.25027215480804443, "difficulty": [2, 2], "val_error_rate": 0.3765869140625, "learning_rate": 0.009999999776482582}, {"train_loss": 0.2777952253818512, "difficulty": [2, 2], "val_error_rate": 0.37445068359375, "learning_rate": 0.009999999776482582}, {"train_loss": 0.2235075831413269, "difficulty": [2, 2], "val_error_rate": 0.37652587890625, "learning_rate": 0.009999999776482582}, {"train_loss": 0.2324107438325882, "difficulty": [2, 2], "val_error_rate": 0.371246337890625, "learning_rate": 0.009999999776482582}, {"train_loss": 0.22197890281677246, "difficulty": [2, 2], "val_error_rate": 0.365997314453125, "learning_rate": 0.009999999776482582}, {"train_loss": 0.2267022281885147, "difficulty": [2, 2], "val_error_rate": 0.366790771484375, "learning_rate": 0.009999999776482582}, {"train_loss": 0.2439119815826416, "difficulty": [2, 2], "val_error_rate": 0.3731689453125, "learning_rate": 0.009999999776482582}, {"train_loss": 0.2732413113117218, "difficulty": [2, 2], "val_error_rate": 0.364898681640625, "learning_rate": 0.009999999776482582}, {"train_loss": 0.3148665726184845, "difficulty": [2, 2], "val_error_rate": 0.362762451171875, "learning_rate": 0.009999999776482582}, {"train_loss": 0.24677427113056183, "difficulty": [2, 2], "val_error_rate": 0.364166259765625, "learning_rate": 0.009999999776482582}, {"train_loss": 0.28367435932159424, "difficulty": [2, 2], "val_error_rate": 0.358551025390625, "learning_rate": 0.009999999776482582}, {"train_loss": 0.2460661679506302, "difficulty": [2, 2], "val_error_rate": 0.360595703125, "learning_rate": 0.009999999776482582}, {"train_loss": 0.2519047260284424, "difficulty": [2, 2], "val_error_rate": 0.3590087890625, "learning_rate": 0.009999999776482582}, {"train_loss": 0.27500393986701965, "difficulty": [2, 2], "val_error_rate": 0.355926513671875, "learning_rate": 0.009999999776482582}, {"train_loss": 0.2749870717525482, "difficulty": [2, 2], "val_error_rate": 0.358978271484375, "learning_rate": 0.009999999776482582}, {"train_loss": 0.24623610079288483, "difficulty": [2, 2], "val_error_rate": 0.358795166015625, "learning_rate": 0.009999999776482582}, {"train_loss": 0.2536994218826294, "difficulty": [2, 2], "val_error_rate": 0.356475830078125, "learning_rate": 0.009999999776482582}, {"train_loss": 0.25915899872779846, "difficulty": [2, 2], "val_error_rate": 0.356231689453125, "learning_rate": 0.009999999776482582}, {"train_loss": 0.23985938727855682, "difficulty": [2, 2], "val_error_rate": 0.359771728515625, "learning_rate": 0.009999999776482582}, {"train_loss": 0.2921011745929718, "difficulty": [2, 2], "val_error_rate": 0.356353759765625, "learning_rate": 0.009999999776482582}, {"train_loss": 0.22829312086105347, "difficulty": [2, 2], "val_error_rate": 0.35272216796875, "learning_rate": 0.009999999776482582}, {"train_loss": 0.21344612538814545, "difficulty": [2, 2], "val_error_rate": 0.35552978515625, "learning_rate": 0.009999999776482582}, {"train_loss": 0.22878621518611908, "difficulty": [2, 2], "val_error_rate": 0.3463134765625, "learning_rate": 0.009999999776482582}, {"train_loss": 0.19045265018939972, "difficulty": [2, 2], "val_error_rate": 0.355316162109375, "learning_rate": 0.009999999776482582}, {"train_loss": 0.22971101105213165, "difficulty": [2, 2], "val_error_rate": 0.35125732421875, "learning_rate": 0.009999999776482582}, {"train_loss": 0.20570075511932373, "difficulty": [2, 2], "val_error_rate": 0.349884033203125, "learning_rate": 0.009999999776482582}, {"train_loss": 0.2308378964662552, "difficulty": [2, 2], "val_error_rate": 0.35052490234375, "learning_rate": 0.009999999776482582}, {"train_loss": 0.21425700187683105, "difficulty": [2, 2], "val_error_rate": 0.3460693359375, "learning_rate": 0.009999999776482582}, {"train_loss": 0.19813399016857147, "difficulty": [2, 2], "val_error_rate": 0.34918212890625, "learning_rate": 0.009999999776482582}, {"train_loss": 0.19552691280841827, "difficulty": [2, 2], "val_error_rate": 0.347991943359375, "learning_rate": 0.009999999776482582}, {"train_loss": 0.2696538269519806, "difficulty": [2, 2], "val_error_rate": 0.344696044921875, "learning_rate": 0.009999999776482582}, {"train_loss": 0.19396109879016876, "difficulty": [2, 2], "val_error_rate": 0.340423583984375, "learning_rate": 0.009999999776482582}, {"train_loss": 0.251103013753891, "difficulty": [2, 2], "val_error_rate": 0.3448486328125, "learning_rate": 0.009999999776482582}, {"train_loss": 0.29234910011291504, "difficulty": [2, 2], "val_error_rate": 0.342498779296875, "learning_rate": 0.009999999776482582}, {"train_loss": 0.2514925003051758, "difficulty": [2, 2], "val_error_rate": 0.347320556640625, "learning_rate": 0.009999999776482582}, {"train_loss": 0.22208233177661896, "difficulty": [2, 2], "val_error_rate": 0.345458984375, "learning_rate": 0.009999999776482582}, {"train_loss": 0.2428443431854248, "difficulty": [2, 2], "val_error_rate": 0.3453369140625, "learning_rate": 0.009999999776482582}, {"train_loss": 0.2305757850408554, "difficulty": [2, 2], "val_error_rate": 0.346923828125, "learning_rate": 0.009999999776482582}, {"train_loss": 0.23404254019260406, "difficulty": [2, 2], "val_error_rate": 0.342376708984375, "learning_rate": 0.009999999776482582}, {"train_loss": 0.22062623500823975, "difficulty": [2, 2], "val_error_rate": 0.34210205078125, "learning_rate": 0.009999999776482582}, {"train_loss": 0.18328601121902466, "difficulty": [2, 2], "val_error_rate": 0.343170166015625, "learning_rate": 0.009999999776482582}, {"train_loss": 0.23020325601100922, "difficulty": [2, 2], "val_error_rate": 0.33990478515625, "learning_rate": 0.009999999776482582}, {"train_loss": 0.16943462193012238, "difficulty": [2, 2], "val_error_rate": 0.33599853515625, "learning_rate": 0.009999999776482582}, {"train_loss": 0.19590039551258087, "difficulty": [2, 2], "val_error_rate": 0.34027099609375, "learning_rate": 0.009999999776482582}, {"train_loss": 0.22205667197704315, "difficulty": [2, 2], "val_error_rate": 0.33660888671875, "learning_rate": 0.009999999776482582}, {"train_loss": 0.22817087173461914, "difficulty": [2, 2], "val_error_rate": 0.337371826171875, "learning_rate": 0.009999999776482582}, {"train_loss": 0.2070523500442505, "difficulty": [2, 2], "val_error_rate": 0.33258056640625, "learning_rate": 0.009999999776482582}, {"train_loss": 0.21104943752288818, "difficulty": [2, 2], "val_error_rate": 0.334136962890625, "learning_rate": 0.009999999776482582}, {"train_loss": 0.19799883663654327, "difficulty": [2, 2], "val_error_rate": 0.33245849609375, "learning_rate": 0.009999999776482582}, {"train_loss": 0.240397647023201, "difficulty": [2, 2], "val_error_rate": 0.337158203125, "learning_rate": 0.009999999776482582}, {"train_loss": 0.1688796877861023, "difficulty": [2, 2], "val_error_rate": 0.333465576171875, "learning_rate": 0.009999999776482582}, {"train_loss": 0.22960512340068817, "difficulty": [2, 2], "val_error_rate": 0.33074951171875, "learning_rate": 0.009999999776482582}, {"train_loss": 0.2413996011018753, "difficulty": [2, 2], "val_error_rate": 0.331939697265625, "learning_rate": 0.009999999776482582}, {"train_loss": 0.22051189839839935, "difficulty": [2, 2], "val_error_rate": 0.33233642578125, "learning_rate": 0.009999999776482582}, {"train_loss": 0.15080906450748444, "difficulty": [2, 2], "val_error_rate": 0.327484130859375, "learning_rate": 0.009999999776482582}, {"train_loss": 0.22397439181804657, "difficulty": [2, 2], "val_error_rate": 0.328399658203125, "learning_rate": 0.009999999776482582}, {"train_loss": 0.17378221452236176, "difficulty": [2, 2], "val_error_rate": 0.325408935546875, "learning_rate": 0.009999999776482582}, {"train_loss": 0.22854356467723846, "difficulty": [2, 2], "val_error_rate": 0.32708740234375, "learning_rate": 0.009999999776482582}, {"train_loss": 0.2250664234161377, "difficulty": [2, 2], "val_error_rate": 0.32940673828125, "learning_rate": 0.009999999776482582}, {"train_loss": 0.23776447772979736, "difficulty": [2, 2], "val_error_rate": 0.326629638671875, "learning_rate": 0.009999999776482582}, {"train_loss": 0.2650570273399353, "difficulty": [2, 2], "val_error_rate": 0.3243408203125, "learning_rate": 0.009999999776482582}, {"train_loss": 0.2095811367034912, "difficulty": [2, 2], "val_error_rate": 0.321014404296875, "learning_rate": 0.009999999776482582}, {"train_loss": 0.27277716994285583, "difficulty": [2, 2], "val_error_rate": 0.3231201171875, "learning_rate": 0.009999999776482582}, {"train_loss": 0.16997337341308594, "difficulty": [2, 2], "val_error_rate": 0.31805419921875, "learning_rate": 0.009999999776482582}, {"train_loss": 0.2247086763381958, "difficulty": [2, 2], "val_error_rate": 0.321380615234375, "learning_rate": 0.009999999776482582}, {"train_loss": 0.17758838832378387, "difficulty": [2, 2], "val_error_rate": 0.326019287109375, "learning_rate": 0.009999999776482582}, {"train_loss": 0.2019013613462448, "difficulty": [2, 2], "val_error_rate": 0.31964111328125, "learning_rate": 0.009999999776482582}, {"train_loss": 0.13579528033733368, "difficulty": [2, 2], "val_error_rate": 0.3165283203125, "learning_rate": 0.009999999776482582}, {"train_loss": 0.2020387202501297, "difficulty": [2, 2], "val_error_rate": 0.321868896484375, "learning_rate": 0.009999999776482582}, {"train_loss": 0.18779458105564117, "difficulty": [2, 2], "val_error_rate": 0.319610595703125, "learning_rate": 0.009999999776482582}, {"train_loss": 0.22497940063476562, "difficulty": [2, 2], "val_error_rate": 0.319305419921875, "learning_rate": 0.009999999776482582}, {"train_loss": 0.20177268981933594, "difficulty": [2, 2], "val_error_rate": 0.31134033203125, "learning_rate": 0.009999999776482582}, {"train_loss": 0.23352162539958954, "difficulty": [2, 2], "val_error_rate": 0.317108154296875, "learning_rate": 0.009999999776482582}, {"train_loss": 0.2223552018404007, "difficulty": [2, 2], "val_error_rate": 0.319183349609375, "learning_rate": 0.009999999776482582}, {"train_loss": 0.17036736011505127, "difficulty": [2, 2], "val_error_rate": 0.314910888671875, "learning_rate": 0.009999999776482582}, {"train_loss": 0.2785346806049347, "difficulty": [2, 2], "val_error_rate": 0.3201904296875, "learning_rate": 0.009999999776482582}, {"train_loss": 0.1931735724210739, "difficulty": [2, 2], "val_error_rate": 0.313262939453125, "learning_rate": 0.009999999776482582}, {"train_loss": 0.18432080745697021, "difficulty": [2, 2], "val_error_rate": 0.313629150390625, "learning_rate": 0.009999999776482582}, {"train_loss": 0.13220728933811188, "difficulty": [2, 2], "val_error_rate": 0.3121337890625, "learning_rate": 0.009999999776482582}, {"train_loss": 0.18803934752941132, "difficulty": [2, 2], "val_error_rate": 0.31268310546875, "learning_rate": 0.009999999776482582}, {"train_loss": 0.23905766010284424, "difficulty": [2, 2], "val_error_rate": 0.313995361328125, "learning_rate": 0.009999999776482582}, {"train_loss": 0.22354035079479218, "difficulty": [2, 2], "val_error_rate": 0.312835693359375, "learning_rate": 0.009999999776482582}, {"train_loss": 0.17835886776447296, "difficulty": [2, 2], "val_error_rate": 0.30865478515625, "learning_rate": 0.009999999776482582}, {"train_loss": 0.19303558766841888, "difficulty": [2, 2], "val_error_rate": 0.32037353515625, "learning_rate": 0.009999999776482582}, {"train_loss": 0.20611126720905304, "difficulty": [2, 2], "val_error_rate": 0.312408447265625, "learning_rate": 0.009999999776482582}, {"train_loss": 0.22549259662628174, "difficulty": [2, 2], "val_error_rate": 0.312286376953125, "learning_rate": 0.009999999776482582}, {"train_loss": 0.1803305745124817, "difficulty": [2, 2], "val_error_rate": 0.31378173828125, "learning_rate": 0.009999999776482582}, {"train_loss": 0.18076448142528534, "difficulty": [2, 2], "val_error_rate": 0.308013916015625, "learning_rate": 0.009999999776482582}, {"train_loss": 0.1969650238752365, "difficulty": [2, 2], "val_error_rate": 0.307098388671875, "learning_rate": 0.009999999776482582}, {"train_loss": 0.1997346132993698, "difficulty": [2, 2], "val_error_rate": 0.303131103515625, "learning_rate": 0.009999999776482582}, {"train_loss": 0.14946173131465912, "difficulty": [2, 2], "val_error_rate": 0.302947998046875, "learning_rate": 0.009999999776482582}, {"train_loss": 0.16191406548023224, "difficulty": [2, 2], "val_error_rate": 0.304412841796875, "learning_rate": 0.009999999776482582}, {"train_loss": 0.23496806621551514, "difficulty": [2, 2], "val_error_rate": 0.296539306640625, "learning_rate": 0.009999999776482582}, {"train_loss": 0.20995043218135834, "difficulty": [2, 2], "val_error_rate": 0.2974853515625, "learning_rate": 0.009999999776482582}, {"train_loss": 0.19276106357574463, "difficulty": [2, 2], "val_error_rate": 0.30517578125, "learning_rate": 0.009999999776482582}, {"train_loss": 0.1737765073776245, "difficulty": [2, 2], "val_error_rate": 0.300140380859375, "learning_rate": 0.009999999776482582}, {"train_loss": 0.17472527921199799, "difficulty": [2, 2], "val_error_rate": 0.29547119140625, "learning_rate": 0.009999999776482582}, {"train_loss": 0.22122208774089813, "difficulty": [2, 2], "val_error_rate": 0.300689697265625, "learning_rate": 0.009999999776482582}, {"train_loss": 0.18929047882556915, "difficulty": [2, 2], "val_error_rate": 0.293670654296875, "learning_rate": 0.009999999776482582}, {"train_loss": 0.18571646511554718, "difficulty": [2, 2], "val_error_rate": 0.297393798828125, "learning_rate": 0.009999999776482582}, {"train_loss": 0.16773492097854614, "difficulty": [2, 2], "val_error_rate": 0.291595458984375, "learning_rate": 0.009999999776482582}, {"train_loss": 0.18946672976016998, "difficulty": [2, 2], "val_error_rate": 0.291839599609375, "learning_rate": 0.009999999776482582}, {"train_loss": 0.16302326321601868, "difficulty": [2, 2], "val_error_rate": 0.29315185546875, "learning_rate": 0.009999999776482582}, {"train_loss": 0.15861736238002777, "difficulty": [2, 2], "val_error_rate": 0.29058837890625, "learning_rate": 0.009999999776482582}, {"train_loss": 0.1421351283788681, "difficulty": [2, 2], "val_error_rate": 0.287628173828125, "learning_rate": 0.009999999776482582}, {"train_loss": 0.19475464522838593, "difficulty": [2, 2], "val_error_rate": 0.29095458984375, "learning_rate": 0.009999999776482582}, {"train_loss": 0.21293358504772186, "difficulty": [2, 2], "val_error_rate": 0.2835693359375, "learning_rate": 0.009999999776482582}, {"train_loss": 0.16410334408283234, "difficulty": [2, 2], "val_error_rate": 0.28814697265625, "learning_rate": 0.009999999776482582}, {"train_loss": 0.16712450981140137, "difficulty": [2, 2], "val_error_rate": 0.28875732421875, "learning_rate": 0.009999999776482582}, {"train_loss": 0.1964436024427414, "difficulty": [2, 2], "val_error_rate": 0.296234130859375, "learning_rate": 0.009999999776482582}, {"train_loss": 0.1712726205587387, "difficulty": [2, 2], "val_error_rate": 0.289703369140625, "learning_rate": 0.009999999776482582}, {"train_loss": 0.1819090098142624, "difficulty": [2, 2], "val_error_rate": 0.27996826171875, "learning_rate": 0.009999999776482582}, {"train_loss": 0.17827630043029785, "difficulty": [2, 2], "val_error_rate": 0.282745361328125, "learning_rate": 0.009999999776482582}, {"train_loss": 0.12750257551670074, "difficulty": [2, 2], "val_error_rate": 0.281494140625, "learning_rate": 0.009999999776482582}, {"train_loss": 0.17948795855045319, "difficulty": [2, 2], "val_error_rate": 0.282135009765625, "learning_rate": 0.009999999776482582}, {"train_loss": 0.1741170436143875, "difficulty": [2, 2], "val_error_rate": 0.275787353515625, "learning_rate": 0.009999999776482582}, {"train_loss": 0.18948788940906525, "difficulty": [2, 2], "val_error_rate": 0.27435302734375, "learning_rate": 0.009999999776482582}, {"train_loss": 0.17249566316604614, "difficulty": [2, 2], "val_error_rate": 0.276702880859375, "learning_rate": 0.009999999776482582}, {"train_loss": 0.18240679800510406, "difficulty": [2, 2], "val_error_rate": 0.27606201171875, "learning_rate": 0.009999999776482582}, {"train_loss": 0.16776098310947418, "difficulty": [2, 2], "val_error_rate": 0.27874755859375, "learning_rate": 0.009999999776482582}, {"train_loss": 0.13002975285053253, "difficulty": [2, 2], "val_error_rate": 0.274017333984375, "learning_rate": 0.009999999776482582}, {"train_loss": 0.16917435824871063, "difficulty": [2, 2], "val_error_rate": 0.272247314453125, "learning_rate": 0.009999999776482582}, {"train_loss": 0.14565324783325195, "difficulty": [2, 2], "val_error_rate": 0.273468017578125, "learning_rate": 0.009999999776482582}, {"train_loss": 0.15807360410690308, "difficulty": [2, 2], "val_error_rate": 0.274322509765625, "learning_rate": 0.009999999776482582}, {"train_loss": 0.14420710504055023, "difficulty": [2, 2], "val_error_rate": 0.2713623046875, "learning_rate": 0.009999999776482582}, {"train_loss": 0.15737852454185486, "difficulty": [2, 2], "val_error_rate": 0.271453857421875, "learning_rate": 0.009999999776482582}, {"train_loss": 0.12346180528402328, "difficulty": [2, 2], "val_error_rate": 0.27191162109375, "learning_rate": 0.009999999776482582}, {"train_loss": 0.17038960754871368, "difficulty": [2, 2], "val_error_rate": 0.2677001953125, "learning_rate": 0.009999999776482582}, {"train_loss": 0.14148205518722534, "difficulty": [2, 2], "val_error_rate": 0.268402099609375, "learning_rate": 0.009999999776482582}, {"train_loss": 0.13465256989002228, "difficulty": [2, 2], "val_error_rate": 0.27166748046875, "learning_rate": 0.009999999776482582}, {"train_loss": 0.18406163156032562, "difficulty": [2, 2], "val_error_rate": 0.2659912109375, "learning_rate": 0.009999999776482582}, {"train_loss": 0.1671721339225769, "difficulty": [2, 2], "val_error_rate": 0.25860595703125, "learning_rate": 0.009999999776482582}, {"train_loss": 0.16681937873363495, "difficulty": [2, 2], "val_error_rate": 0.268829345703125, "learning_rate": 0.009999999776482582}, {"train_loss": 0.17039375007152557, "difficulty": [2, 2], "val_error_rate": 0.2650146484375, "learning_rate": 0.009999999776482582}, {"train_loss": 0.1499783843755722, "difficulty": [2, 2], "val_error_rate": 0.27191162109375, "learning_rate": 0.009999999776482582}, {"train_loss": 0.13534493744373322, "difficulty": [2, 2], "val_error_rate": 0.269561767578125, "learning_rate": 0.009999999776482582}, {"train_loss": 0.1915862113237381, "difficulty": [2, 2], "val_error_rate": 0.266571044921875, "learning_rate": 0.009999999776482582}, {"train_loss": 0.17001253366470337, "difficulty": [2, 2], "val_error_rate": 0.257232666015625, "learning_rate": 0.009999999776482582}, {"train_loss": 0.1601436287164688, "difficulty": [2, 2], "val_error_rate": 0.255828857421875, "learning_rate": 0.009999999776482582}, {"train_loss": 0.1558694988489151, "difficulty": [2, 2], "val_error_rate": 0.255157470703125, "learning_rate": 0.009999999776482582}, {"train_loss": 0.16198113560676575, "difficulty": [2, 2], "val_error_rate": 0.25958251953125, "learning_rate": 0.009999999776482582}, {"train_loss": 0.1397121697664261, "difficulty": [2, 2], "val_error_rate": 0.259857177734375, "learning_rate": 0.009999999776482582}, {"train_loss": 0.12223625928163528, "difficulty": [2, 2], "val_error_rate": 0.2506103515625, "learning_rate": 0.009999999776482582}, {"train_loss": 0.14032059907913208, "difficulty": [2, 2], "val_error_rate": 0.261199951171875, "learning_rate": 0.009999999776482582}, {"train_loss": 0.11478119343519211, "difficulty": [2, 2], "val_error_rate": 0.258392333984375, "learning_rate": 0.009999999776482582}, {"train_loss": 0.16256314516067505, "difficulty": [2, 2], "val_error_rate": 0.258331298828125, "learning_rate": 0.009999999776482582}, {"train_loss": 0.17711444199085236, "difficulty": [2, 2], "val_error_rate": 0.254547119140625, "learning_rate": 0.009999999776482582}, {"train_loss": 0.14857076108455658, "difficulty": [2, 2], "val_error_rate": 0.25732421875, "learning_rate": 0.009999999776482582}, {"train_loss": 0.15512777864933014, "difficulty": [2, 2], "val_error_rate": 0.257080078125, "learning_rate": 0.009999999776482582}, {"train_loss": 0.12325545400381088, "difficulty": [2, 2], "val_error_rate": 0.25274658203125, "learning_rate": 0.009999999776482582}, {"train_loss": 0.11409404128789902, "difficulty": [2, 2], "val_error_rate": 0.24761962890625, "learning_rate": 0.009999999776482582}, {"train_loss": 0.11031665652990341, "difficulty": [2, 2], "val_error_rate": 0.248809814453125, "learning_rate": 0.009999999776482582}, {"train_loss": 0.1611456722021103, "difficulty": [2, 2], "val_error_rate": 0.2540283203125, "learning_rate": 0.009999999776482582}, {"train_loss": 0.1354769915342331, "difficulty": [2, 2], "val_error_rate": 0.2513427734375, "learning_rate": 0.009999999776482582}, {"train_loss": 0.15799854695796967, "difficulty": [2, 2], "val_error_rate": 0.25018310546875, "learning_rate": 0.009999999776482582}, {"train_loss": 0.11199430376291275, "difficulty": [2, 2], "val_error_rate": 0.256072998046875, "learning_rate": 0.009999999776482582}, {"train_loss": 0.18615145981311798, "difficulty": [2, 2], "val_error_rate": 0.249755859375, "learning_rate": 0.009999999776482582}, {"train_loss": 0.1822851151227951, "difficulty": [2, 2], "val_error_rate": 0.25653076171875, "learning_rate": 0.009999999776482582}, {"train_loss": 0.10304438322782516, "difficulty": [2, 2], "val_error_rate": 0.249481201171875, "learning_rate": 0.009999999776482582}, {"train_loss": 0.1923665553331375, "difficulty": [2, 2], "val_error_rate": 0.2442626953125, "learning_rate": 0.009999999776482582}, {"train_loss": 0.12556178867816925, "difficulty": [2, 2], "val_error_rate": 0.253753662109375, "learning_rate": 0.009999999776482582}, {"train_loss": 0.10855985432863235, "difficulty": [2, 2], "val_error_rate": 0.247711181640625, "learning_rate": 0.009999999776482582}, {"train_loss": 0.18713098764419556, "difficulty": [2, 2], "val_error_rate": 0.252105712890625, "learning_rate": 0.009999999776482582}, {"train_loss": 0.1698780655860901, "difficulty": [2, 2], "val_error_rate": 0.24609375, "learning_rate": 0.009999999776482582}, {"train_loss": 0.1230522096157074, "difficulty": [2, 2], "val_error_rate": 0.240234375, "learning_rate": 0.009999999776482582}, {"train_loss": 0.14848436415195465, "difficulty": [2, 2], "val_error_rate": 0.24853515625, "learning_rate": 0.009999999776482582}, {"train_loss": 0.1592850685119629, "difficulty": [2, 2], "val_error_rate": 0.242706298828125, "learning_rate": 0.009999999776482582}, {"train_loss": 0.19196093082427979, "difficulty": [2, 2], "val_error_rate": 0.240325927734375, "learning_rate": 0.009999999776482582}, {"train_loss": 0.14365904033184052, "difficulty": [2, 2], "val_error_rate": 0.233245849609375, "learning_rate": 0.009999999776482582}, {"train_loss": 0.09568861871957779, "difficulty": [2, 2], "val_error_rate": 0.23065185546875, "learning_rate": 0.009999999776482582}, {"train_loss": 0.12348607927560806, "difficulty": [2, 2], "val_error_rate": 0.233245849609375, "learning_rate": 0.009999999776482582}, {"train_loss": 0.14290839433670044, "difficulty": [2, 2], "val_error_rate": 0.232330322265625, "learning_rate": 0.009999999776482582}, {"train_loss": 0.10459312796592712, "difficulty": [2, 2], "val_error_rate": 0.23077392578125, "learning_rate": 0.009999999776482582}, {"train_loss": 0.14209717512130737, "difficulty": [2, 2], "val_error_rate": 0.234832763671875, "learning_rate": 0.009999999776482582}, {"train_loss": 0.10982587933540344, "difficulty": [2, 2], "val_error_rate": 0.234161376953125, "learning_rate": 0.009999999776482582}, {"train_loss": 0.15955953299999237, "difficulty": [2, 2], "val_error_rate": 0.233489990234375, "learning_rate": 0.009999999776482582}, {"train_loss": 0.1510736346244812, "difficulty": [2, 2], "val_error_rate": 0.2342529296875, "learning_rate": 0.009999999776482582}, {"train_loss": 0.09894698113203049, "difficulty": [2, 2], "val_error_rate": 0.2332763671875, "learning_rate": 0.009999999776482582}, {"train_loss": 0.10856249928474426, "difficulty": [2, 2], "val_error_rate": 0.23333740234375, "learning_rate": 0.009999999776482582}, {"train_loss": 0.1661567986011505, "difficulty": [2, 2], "val_error_rate": 0.23162841796875, "learning_rate": 0.009999999776482582}, {"train_loss": 0.15735994279384613, "difficulty": [2, 2], "val_error_rate": 0.22802734375, "learning_rate": 0.009999999776482582}, {"train_loss": 0.1448587328195572, "difficulty": [2, 2], "val_error_rate": 0.227447509765625, "learning_rate": 0.009999999776482582}, {"train_loss": 0.1306549608707428, "difficulty": [2, 2], "val_error_rate": 0.228485107421875, "learning_rate": 0.009999999776482582}, {"train_loss": 0.17596596479415894, "difficulty": [2, 2], "val_error_rate": 0.22491455078125, "learning_rate": 0.009999999776482582}, {"train_loss": 0.12549559772014618, "difficulty": [2, 2], "val_error_rate": 0.22283935546875, "learning_rate": 0.009999999776482582}, {"train_loss": 0.10766275972127914, "difficulty": [2, 2], "val_error_rate": 0.224761962890625, "learning_rate": 0.009999999776482582}, {"train_loss": 0.19789989292621613, "difficulty": [2, 2], "val_error_rate": 0.22735595703125, "learning_rate": 0.009999999776482582}, {"train_loss": 0.09528663009405136, "difficulty": [2, 2], "val_error_rate": 0.220428466796875, "learning_rate": 0.009999999776482582}, {"train_loss": 0.12275596708059311, "difficulty": [2, 2], "val_error_rate": 0.2269287109375, "learning_rate": 0.009999999776482582}, {"train_loss": 0.12046810239553452, "difficulty": [2, 2], "val_error_rate": 0.22509765625, "learning_rate": 0.009999999776482582}, {"train_loss": 0.11235322803258896, "difficulty": [2, 2], "val_error_rate": 0.22210693359375, "learning_rate": 0.009999999776482582}, {"train_loss": 0.15640130639076233, "difficulty": [2, 2], "val_error_rate": 0.222412109375, "learning_rate": 0.009999999776482582}, {"train_loss": 0.10351967066526413, "difficulty": [2, 2], "val_error_rate": 0.215606689453125, "learning_rate": 0.009999999776482582}, {"train_loss": 0.12254957109689713, "difficulty": [2, 2], "val_error_rate": 0.217193603515625, "learning_rate": 0.009999999776482582}, {"train_loss": 0.1266094446182251, "difficulty": [2, 2], "val_error_rate": 0.21917724609375, "learning_rate": 0.009999999776482582}, {"train_loss": 0.0791960284113884, "difficulty": [2, 2], "val_error_rate": 0.217803955078125, "learning_rate": 0.009999999776482582}, {"train_loss": 0.14439481496810913, "difficulty": [2, 2], "val_error_rate": 0.214599609375, "learning_rate": 0.009999999776482582}, {"train_loss": 0.08990862220525742, "difficulty": [2, 2], "val_error_rate": 0.2144775390625, "learning_rate": 0.009999999776482582}, {"train_loss": 0.18042097985744476, "difficulty": [2, 2], "val_error_rate": 0.209808349609375, "learning_rate": 0.009999999776482582}, {"train_loss": 0.17017221450805664, "difficulty": [2, 2], "val_error_rate": 0.212554931640625, "learning_rate": 0.009999999776482582}, {"train_loss": 0.13807500898838043, "difficulty": [2, 2], "val_error_rate": 0.20849609375, "learning_rate": 0.009999999776482582}, {"train_loss": 0.13890399038791656, "difficulty": [2, 2], "val_error_rate": 0.212493896484375, "learning_rate": 0.009999999776482582}, {"train_loss": 0.16133731603622437, "difficulty": [2, 2], "val_error_rate": 0.215240478515625, "learning_rate": 0.009999999776482582}, {"train_loss": 0.13021308183670044, "difficulty": [2, 2], "val_error_rate": 0.212310791015625, "learning_rate": 0.009999999776482582}, {"train_loss": 0.1273355931043625, "difficulty": [2, 2], "val_error_rate": 0.206390380859375, "learning_rate": 0.009999999776482582}, {"train_loss": 0.11826124787330627, "difficulty": [2, 2], "val_error_rate": 0.206695556640625, "learning_rate": 0.009999999776482582}, {"train_loss": 0.1344607025384903, "difficulty": [2, 2], "val_error_rate": 0.206878662109375, "learning_rate": 0.009999999776482582}, {"train_loss": 0.12345585972070694, "difficulty": [2, 2], "val_error_rate": 0.206512451171875, "learning_rate": 0.009999999776482582}, {"train_loss": 0.14735007286071777, "difficulty": [2, 2], "val_error_rate": 0.201995849609375, "learning_rate": 0.009999999776482582}, {"train_loss": 0.08862695842981339, "difficulty": [2, 2], "val_error_rate": 0.207305908203125, "learning_rate": 0.009999999776482582}, {"train_loss": 0.14572332799434662, "difficulty": [2, 2], "val_error_rate": 0.201690673828125, "learning_rate": 0.009999999776482582}, {"train_loss": 0.11998623609542847, "difficulty": [2, 2], "val_error_rate": 0.1988525390625, "learning_rate": 0.009999999776482582}, {"train_loss": 0.12034044414758682, "difficulty": [2, 2], "val_error_rate": 0.20159912109375, "learning_rate": 0.009999999776482582}, {"train_loss": 0.133199080824852, "difficulty": [2, 2], "val_error_rate": 0.1993408203125, "learning_rate": 0.009999999776482582}, {"train_loss": 0.0829177051782608, "difficulty": [2, 2], "val_error_rate": 0.198974609375, "learning_rate": 0.009999999776482582}, {"train_loss": 0.11359449476003647, "difficulty": [2, 2], "val_error_rate": 0.1966552734375, "learning_rate": 0.009999999776482582}, {"train_loss": 0.13804002106189728, "difficulty": [2, 2], "val_error_rate": 0.1981201171875, "learning_rate": 0.009999999776482582}, {"train_loss": 0.10442576557397842, "difficulty": [2, 2], "val_error_rate": 0.19696044921875, "learning_rate": 0.009999999776482582}, {"train_loss": 0.09183675795793533, "difficulty": [2, 2], "val_error_rate": 0.192535400390625, "learning_rate": 0.009999999776482582}, {"train_loss": 0.13810567557811737, "difficulty": [2, 2], "val_error_rate": 0.196868896484375, "learning_rate": 0.009999999776482582}, {"train_loss": 0.08067893236875534, "difficulty": [2, 2], "val_error_rate": 0.18914794921875, "learning_rate": 0.009999999776482582}, {"train_loss": 0.11738383769989014, "difficulty": [2, 2], "val_error_rate": 0.18927001953125, "learning_rate": 0.009999999776482582}, {"train_loss": 0.13745109736919403, "difficulty": [2, 2], "val_error_rate": 0.1929931640625, "learning_rate": 0.009999999776482582}, {"train_loss": 0.1079721748828888, "difficulty": [2, 2], "val_error_rate": 0.18988037109375, "learning_rate": 0.009999999776482582}, {"train_loss": 0.10386542230844498, "difficulty": [2, 2], "val_error_rate": 0.1947021484375, "learning_rate": 0.009999999776482582}, {"train_loss": 0.11989051103591919, "difficulty": [2, 2], "val_error_rate": 0.189849853515625, "learning_rate": 0.009999999776482582}, {"train_loss": 0.11468305438756943, "difficulty": [2, 2], "val_error_rate": 0.1846923828125, "learning_rate": 0.009999999776482582}, {"train_loss": 0.1363135427236557, "difficulty": [2, 2], "val_error_rate": 0.18511962890625, "learning_rate": 0.009999999776482582}, {"train_loss": 0.08891666680574417, "difficulty": [2, 2], "val_error_rate": 0.18170166015625, "learning_rate": 0.009999999776482582}, {"train_loss": 0.17317940294742584, "difficulty": [2, 2], "val_error_rate": 0.177093505859375, "learning_rate": 0.009999999776482582}, {"train_loss": 0.10706473141908646, "difficulty": [2, 2], "val_error_rate": 0.17730712890625, "learning_rate": 0.009999999776482582}, {"train_loss": 0.11660302430391312, "difficulty": [2, 2], "val_error_rate": 0.1807861328125, "learning_rate": 0.009999999776482582}, {"train_loss": 0.10762370377779007, "difficulty": [2, 2], "val_error_rate": 0.18670654296875, "learning_rate": 0.009999999776482582}, {"train_loss": 0.10006389766931534, "difficulty": [2, 2], "val_error_rate": 0.17205810546875, "learning_rate": 0.009999999776482582}, {"train_loss": 0.10667148977518082, "difficulty": [2, 2], "val_error_rate": 0.17181396484375, "learning_rate": 0.009999999776482582}, {"train_loss": 0.10852629691362381, "difficulty": [2, 2], "val_error_rate": 0.184478759765625, "learning_rate": 0.009999999776482582}, {"train_loss": 0.07606642693281174, "difficulty": [2, 2], "val_error_rate": 0.181732177734375, "learning_rate": 0.009999999776482582}, {"train_loss": 0.09462843090295792, "difficulty": [2, 2], "val_error_rate": 0.178680419921875, "learning_rate": 0.009999999776482582}, {"train_loss": 0.10829441994428635, "difficulty": [2, 2], "val_error_rate": 0.17718505859375, "learning_rate": 0.009999999776482582}, {"train_loss": 0.12181029468774796, "difficulty": [2, 2], "val_error_rate": 0.176605224609375, "learning_rate": 0.009999999776482582}, {"train_loss": 0.060787033289670944, "difficulty": [2, 2], "val_error_rate": 0.17401123046875, "learning_rate": 0.009999999776482582}, {"train_loss": 0.10193172842264175, "difficulty": [2, 2], "val_error_rate": 0.172454833984375, "learning_rate": 0.009999999776482582}, {"train_loss": 0.10584760457277298, "difficulty": [2, 2], "val_error_rate": 0.17388916015625, "learning_rate": 0.009999999776482582}, {"train_loss": 0.10501012206077576, "difficulty": [2, 2], "val_error_rate": 0.167938232421875, "learning_rate": 0.009999999776482582}, {"train_loss": 0.1397731751203537, "difficulty": [2, 2], "val_error_rate": 0.172607421875, "learning_rate": 0.009999999776482582}, {"train_loss": 0.1078808531165123, "difficulty": [2, 2], "val_error_rate": 0.166351318359375, "learning_rate": 0.009999999776482582}, {"train_loss": 0.09696624428033829, "difficulty": [2, 2], "val_error_rate": 0.16705322265625, "learning_rate": 0.009999999776482582}, {"train_loss": 0.12093830853700638, "difficulty": [2, 2], "val_error_rate": 0.165191650390625, "learning_rate": 0.009999999776482582}, {"train_loss": 0.05707736313343048, "difficulty": [2, 2], "val_error_rate": 0.163055419921875, "learning_rate": 0.009999999776482582}, {"train_loss": 0.12366495281457901, "difficulty": [2, 2], "val_error_rate": 0.16339111328125, "learning_rate": 0.009999999776482582}, {"train_loss": 0.10003285855054855, "difficulty": [2, 2], "val_error_rate": 0.1610107421875, "learning_rate": 0.009999999776482582}, {"train_loss": 0.0899076834321022, "difficulty": [2, 2], "val_error_rate": 0.16558837890625, "learning_rate": 0.009999999776482582}, {"train_loss": 0.1214803084731102, "difficulty": [2, 2], "val_error_rate": 0.17047119140625, "learning_rate": 0.009999999776482582}, {"train_loss": 0.12017807364463806, "difficulty": [2, 2], "val_error_rate": 0.17291259765625, "learning_rate": 0.009999999776482582}, {"train_loss": 0.12041320651769638, "difficulty": [2, 2], "val_error_rate": 0.1683349609375, "learning_rate": 0.009999999776482582}, {"train_loss": 0.09489943832159042, "difficulty": [2, 2], "val_error_rate": 0.158294677734375, "learning_rate": 0.009999999776482582}, {"train_loss": 0.09964824467897415, "difficulty": [2, 2], "val_error_rate": 0.157318115234375, "learning_rate": 0.009999999776482582}, {"train_loss": 0.12915056943893433, "difficulty": [2, 2], "val_error_rate": 0.158660888671875, "learning_rate": 0.009999999776482582}, {"train_loss": 0.06697998195886612, "difficulty": [2, 2], "val_error_rate": 0.159912109375, "learning_rate": 0.009999999776482582}, {"train_loss": 0.11098407953977585, "difficulty": [2, 2], "val_error_rate": 0.15509033203125, "learning_rate": 0.009999999776482582}, {"train_loss": 0.1282997876405716, "difficulty": [2, 2], "val_error_rate": 0.156341552734375, "learning_rate": 0.009999999776482582}, {"train_loss": 0.1334405094385147, "difficulty": [2, 2], "val_error_rate": 0.157928466796875, "learning_rate": 0.009999999776482582}, {"train_loss": 0.09580719470977783, "difficulty": [2, 2], "val_error_rate": 0.153076171875, "learning_rate": 0.009999999776482582}, {"train_loss": 0.08266565203666687, "difficulty": [2, 2], "val_error_rate": 0.155059814453125, "learning_rate": 0.009999999776482582}, {"train_loss": 0.0759914293885231, "difficulty": [2, 2], "val_error_rate": 0.150299072265625, "learning_rate": 0.009999999776482582}, {"train_loss": 0.11364278197288513, "difficulty": [2, 2], "val_error_rate": 0.152801513671875, "learning_rate": 0.009999999776482582}, {"train_loss": 0.09013242274522781, "difficulty": [2, 2], "val_error_rate": 0.155792236328125, "learning_rate": 0.009999999776482582}, {"train_loss": 0.08128396421670914, "difficulty": [2, 2], "val_error_rate": 0.1492919921875, "learning_rate": 0.009999999776482582}, {"train_loss": 0.10308653116226196, "difficulty": [2, 2], "val_error_rate": 0.152130126953125, "learning_rate": 0.009999999776482582}, {"train_loss": 0.07275652140378952, "difficulty": [2, 2], "val_error_rate": 0.149261474609375, "learning_rate": 0.009999999776482582}, {"train_loss": 0.0664568543434143, "difficulty": [2, 2], "val_error_rate": 0.146575927734375, "learning_rate": 0.009999999776482582}, {"train_loss": 0.05253872275352478, "difficulty": [2, 2], "val_error_rate": 0.151885986328125, "learning_rate": 0.009999999776482582}, {"train_loss": 0.09072402864694595, "difficulty": [2, 2], "val_error_rate": 0.14837646484375, "learning_rate": 0.009999999776482582}, {"train_loss": 0.13874654471874237, "difficulty": [2, 2], "val_error_rate": 0.146575927734375, "learning_rate": 0.009999999776482582}, {"train_loss": 0.11824148148298264, "difficulty": [2, 2], "val_error_rate": 0.148406982421875, "learning_rate": 0.009999999776482582}, {"train_loss": 0.07704862952232361, "difficulty": [2, 2], "val_error_rate": 0.14373779296875, "learning_rate": 0.009999999776482582}, {"train_loss": 0.08912190049886703, "difficulty": [2, 2], "val_error_rate": 0.142059326171875, "learning_rate": 0.009999999776482582}, {"train_loss": 0.06741318851709366, "difficulty": [2, 2], "val_error_rate": 0.13800048828125, "learning_rate": 0.009999999776482582}, {"train_loss": 0.11022332310676575, "difficulty": [2, 2], "val_error_rate": 0.142303466796875, "learning_rate": 0.009999999776482582}, {"train_loss": 0.07434417307376862, "difficulty": [2, 2], "val_error_rate": 0.141265869140625, "learning_rate": 0.009999999776482582}, {"train_loss": 0.12416180223226547, "difficulty": [2, 2], "val_error_rate": 0.137054443359375, "learning_rate": 0.009999999776482582}, {"train_loss": 0.08884850889444351, "difficulty": [2, 2], "val_error_rate": 0.148773193359375, "learning_rate": 0.009999999776482582}, {"train_loss": 0.08153882622718811, "difficulty": [2, 2], "val_error_rate": 0.14495849609375, "learning_rate": 0.009999999776482582}, {"train_loss": 0.12555718421936035, "difficulty": [2, 2], "val_error_rate": 0.1470947265625, "learning_rate": 0.009999999776482582}, {"train_loss": 0.07169749587774277, "difficulty": [2, 2], "val_error_rate": 0.1392822265625, "learning_rate": 0.009999999776482582}, {"train_loss": 0.054245006293058395, "difficulty": [2, 2], "val_error_rate": 0.14093017578125, "learning_rate": 0.009999999776482582}, {"train_loss": 0.06851699948310852, "difficulty": [2, 2], "val_error_rate": 0.134033203125, "learning_rate": 0.009999999776482582}, {"train_loss": 0.09342509508132935, "difficulty": [2, 2], "val_error_rate": 0.1343994140625, "learning_rate": 0.009999999776482582}, {"train_loss": 0.07477488368749619, "difficulty": [2, 2], "val_error_rate": 0.131805419921875, "learning_rate": 0.009999999776482582}, {"train_loss": 0.0647723600268364, "difficulty": [2, 2], "val_error_rate": 0.13482666015625, "learning_rate": 0.009999999776482582}, {"train_loss": 0.08073583990335464, "difficulty": [2, 2], "val_error_rate": 0.134490966796875, "learning_rate": 0.009999999776482582}, {"train_loss": 0.11057481169700623, "difficulty": [2, 2], "val_error_rate": 0.132781982421875, "learning_rate": 0.009999999776482582}, {"train_loss": 0.08796454221010208, "difficulty": [2, 2], "val_error_rate": 0.139923095703125, "learning_rate": 0.009999999776482582}, {"train_loss": 0.1157897487282753, "difficulty": [2, 2], "val_error_rate": 0.1395263671875, "learning_rate": 0.009999999776482582}, {"train_loss": 0.08469685912132263, "difficulty": [2, 2], "val_error_rate": 0.13311767578125, "learning_rate": 0.009999999776482582}, {"train_loss": 0.08405361324548721, "difficulty": [2, 2], "val_error_rate": 0.13763427734375, "learning_rate": 0.009999999776482582}, {"train_loss": 0.08878511190414429, "difficulty": [2, 2], "val_error_rate": 0.137115478515625, "learning_rate": 0.009999999776482582}, {"train_loss": 0.0917859897017479, "difficulty": [2, 2], "val_error_rate": 0.1295166015625, "learning_rate": 0.009999999776482582}, {"train_loss": 0.07785295695066452, "difficulty": [2, 2], "val_error_rate": 0.13031005859375, "learning_rate": 0.009999999776482582}, {"train_loss": 0.0941905677318573, "difficulty": [2, 2], "val_error_rate": 0.13275146484375, "learning_rate": 0.009999999776482582}, {"train_loss": 0.10396695137023926, "difficulty": [2, 2], "val_error_rate": 0.128631591796875, "learning_rate": 0.009999999776482582}, {"train_loss": 0.06719895452260971, "difficulty": [2, 2], "val_error_rate": 0.12664794921875, "learning_rate": 0.009999999776482582}, {"train_loss": 0.13335426151752472, "difficulty": [2, 2], "val_error_rate": 0.12738037109375, "learning_rate": 0.009999999776482582}, {"train_loss": 0.11794605851173401, "difficulty": [2, 2], "val_error_rate": 0.123565673828125, "learning_rate": 0.009999999776482582}, {"train_loss": 0.09469164162874222, "difficulty": [2, 2], "val_error_rate": 0.124847412109375, "learning_rate": 0.009999999776482582}, {"train_loss": 0.08351477235555649, "difficulty": [2, 2], "val_error_rate": 0.1226806640625, "learning_rate": 0.009999999776482582}, {"train_loss": 0.11011994630098343, "difficulty": [2, 2], "val_error_rate": 0.120849609375, "learning_rate": 0.009999999776482582}, {"train_loss": 0.05351632833480835, "difficulty": [2, 2], "val_error_rate": 0.119842529296875, "learning_rate": 0.009999999776482582}, {"train_loss": 0.1142042800784111, "difficulty": [2, 2], "val_error_rate": 0.1279296875, "learning_rate": 0.009999999776482582}, {"train_loss": 0.10270682722330093, "difficulty": [2, 2], "val_error_rate": 0.12078857421875, "learning_rate": 0.009999999776482582}, {"train_loss": 0.13693612813949585, "difficulty": [2, 2], "val_error_rate": 0.121551513671875, "learning_rate": 0.009999999776482582}, {"train_loss": 0.09527017921209335, "difficulty": [2, 2], "val_error_rate": 0.115966796875, "learning_rate": 0.009999999776482582}, {"train_loss": 0.0676453486084938, "difficulty": [2, 2], "val_error_rate": 0.11407470703125, "learning_rate": 0.009999999776482582}, {"train_loss": 0.04664560779929161, "difficulty": [2, 2], "val_error_rate": 0.112274169921875, "learning_rate": 0.009999999776482582}, {"train_loss": 0.0633842870593071, "difficulty": [2, 2], "val_error_rate": 0.109619140625, "learning_rate": 0.009999999776482582}, {"train_loss": 0.10217082500457764, "difficulty": [2, 2], "val_error_rate": 0.1131591796875, "learning_rate": 0.009999999776482582}, {"train_loss": 0.10457736253738403, "difficulty": [2, 2], "val_error_rate": 0.110015869140625, "learning_rate": 0.009999999776482582}, {"train_loss": 0.08056329190731049, "difficulty": [2, 2], "val_error_rate": 0.109100341796875, "learning_rate": 0.009999999776482582}, {"train_loss": 0.07559089362621307, "difficulty": [2, 2], "val_error_rate": 0.107879638671875, "learning_rate": 0.009999999776482582}, {"train_loss": 0.04874647781252861, "difficulty": [2, 2], "val_error_rate": 0.111114501953125, "learning_rate": 0.009999999776482582}, {"train_loss": 0.041164640337228775, "difficulty": [2, 2], "val_error_rate": 0.10748291015625, "learning_rate": 0.009999999776482582}, {"train_loss": 0.07211009413003922, "difficulty": [2, 2], "val_error_rate": 0.111907958984375, "learning_rate": 0.009999999776482582}, {"train_loss": 0.09013127535581589, "difficulty": [2, 2], "val_error_rate": 0.109771728515625, "learning_rate": 0.009999999776482582}, {"train_loss": 0.09135448932647705, "difficulty": [2, 2], "val_error_rate": 0.106689453125, "learning_rate": 0.009999999776482582}, {"train_loss": 0.08304668962955475, "difficulty": [2, 2], "val_error_rate": 0.103851318359375, "learning_rate": 0.009999999776482582}, {"train_loss": 0.06851308047771454, "difficulty": [2, 2], "val_error_rate": 0.107086181640625, "learning_rate": 0.009999999776482582}, {"train_loss": 0.060873210430145264, "difficulty": [2, 2], "val_error_rate": 0.110382080078125, "learning_rate": 0.009999999776482582}, {"train_loss": 0.08112649619579315, "difficulty": [2, 2], "val_error_rate": 0.115936279296875, "learning_rate": 0.009999999776482582}, {"train_loss": 0.09299388527870178, "difficulty": [2, 2], "val_error_rate": 0.112091064453125, "learning_rate": 0.009999999776482582}, {"train_loss": 0.06284867972135544, "difficulty": [2, 2], "val_error_rate": 0.11358642578125, "learning_rate": 0.009999999776482582}, {"train_loss": 0.059216246008872986, "difficulty": [2, 2], "val_error_rate": 0.10986328125, "learning_rate": 0.009999999776482582}, {"train_loss": 0.04128409922122955, "difficulty": [2, 2], "val_error_rate": 0.10906982421875, "learning_rate": 0.009999999776482582}, {"train_loss": 0.07379622012376785, "difficulty": [2, 2], "val_error_rate": 0.10394287109375, "learning_rate": 0.009999999776482582}, {"train_loss": 0.08498696237802505, "difficulty": [2, 2], "val_error_rate": 0.104156494140625, "learning_rate": 0.009999999776482582}, {"train_loss": 0.06966274231672287, "difficulty": [2, 2], "val_error_rate": 0.108367919921875, "learning_rate": 0.009999999776482582}, {"train_loss": 0.06080028787255287, "difficulty": [2, 2], "val_error_rate": 0.104736328125, "learning_rate": 0.009999999776482582}, {"train_loss": 0.0906866118311882, "difficulty": [2, 2], "val_error_rate": 0.113189697265625, "learning_rate": 0.009999999776482582}, {"train_loss": 0.08223623782396317, "difficulty": [2, 2], "val_error_rate": 0.10784912109375, "learning_rate": 0.009999999776482582}, {"train_loss": 0.08146905899047852, "difficulty": [2, 2], "val_error_rate": 0.11468505859375, "learning_rate": 0.009999999776482582}, {"train_loss": 0.09083820134401321, "difficulty": [2, 2], "val_error_rate": 0.112091064453125, "learning_rate": 0.009999999776482582}, {"train_loss": 0.045574773102998734, "difficulty": [2, 2], "val_error_rate": 0.105743408203125, "learning_rate": 0.009999999776482582}, {"train_loss": 0.0651831179857254, "difficulty": [2, 2], "val_error_rate": 0.1015625, "learning_rate": 0.009999999776482582}, {"train_loss": 0.0726873055100441, "difficulty": [2, 2], "val_error_rate": 0.104400634765625, "learning_rate": 0.009999999776482582}, {"train_loss": 0.08689721673727036, "difficulty": [2, 2], "val_error_rate": 0.103729248046875, "learning_rate": 0.009999999776482582}, {"train_loss": 0.04922051727771759, "difficulty": [2, 2], "val_error_rate": 0.105255126953125, "learning_rate": 0.009999999776482582}, {"train_loss": 0.03973228484392166, "difficulty": [2, 2], "val_error_rate": 0.105377197265625, "learning_rate": 0.009999999776482582}, {"train_loss": 0.07625988125801086, "difficulty": [2, 2], "val_error_rate": 0.108734130859375, "learning_rate": 0.009999999776482582}, {"train_loss": 0.06785934418439865, "difficulty": [2, 2], "val_error_rate": 0.0968017578125, "learning_rate": 0.009999999776482582}, {"train_loss": 0.11248888820409775, "difficulty": [2, 2], "val_error_rate": 0.1329345703125, "learning_rate": 0.009999999776482582}, {"train_loss": 0.08637084811925888, "difficulty": [2, 2], "val_error_rate": 0.1024169921875, "learning_rate": 0.009999999776482582}, {"train_loss": 0.10260481387376785, "difficulty": [2, 3], "val_error_rate": 0.0994873046875, "learning_rate": 0.009999999776482582}, {"train_loss": 0.1313001811504364, "difficulty": [2, 3], "val_error_rate": 0.24993896484375, "learning_rate": 0.009499999694526196}, {"train_loss": 0.216561958193779, "difficulty": [2, 3], "val_error_rate": 0.263946533203125, "learning_rate": 0.009025000035762787}, {"train_loss": 0.13094757497310638, "difficulty": [2, 3], "val_error_rate": 0.23443603515625, "learning_rate": 0.009025000035762787}, {"train_loss": 0.17311209440231323, "difficulty": [2, 3], "val_error_rate": 0.226226806640625, "learning_rate": 0.009025000035762787}, {"train_loss": 0.1639590859413147, "difficulty": [2, 3], "val_error_rate": 0.231719970703125, "learning_rate": 0.008573750033974648}, {"train_loss": 0.2257838398218155, "difficulty": [2, 3], "val_error_rate": 0.218994140625, "learning_rate": 0.008573750033974648}, {"train_loss": 0.18163125216960907, "difficulty": [2, 3], "val_error_rate": 0.2159423828125, "learning_rate": 0.008573750033974648}, {"train_loss": 0.1315772980451584, "difficulty": [2, 3], "val_error_rate": 0.210174560546875, "learning_rate": 0.008573750033974648}, {"train_loss": 0.1469651758670807, "difficulty": [2, 3], "val_error_rate": 0.212677001953125, "learning_rate": 0.008145062252879143}, {"train_loss": 0.1432885378599167, "difficulty": [2, 3], "val_error_rate": 0.20819091796875, "learning_rate": 0.008145062252879143}, {"train_loss": 0.167526975274086, "difficulty": [2, 3], "val_error_rate": 0.2042236328125, "learning_rate": 0.008145062252879143}, {"train_loss": 0.1796434372663498, "difficulty": [2, 3], "val_error_rate": 0.201171875, "learning_rate": 0.008145062252879143}, {"train_loss": 0.14398764073848724, "difficulty": [2, 3], "val_error_rate": 0.201385498046875, "learning_rate": 0.0077378093264997005}, {"train_loss": 0.1858719140291214, "difficulty": [2, 3], "val_error_rate": 0.199462890625, "learning_rate": 0.0077378093264997005}, {"train_loss": 0.18986070156097412, "difficulty": [2, 3], "val_error_rate": 0.198516845703125, "learning_rate": 0.0077378093264997005}, {"train_loss": 0.16389544308185577, "difficulty": [2, 3], "val_error_rate": 0.194732666015625, "learning_rate": 0.0077378093264997005}, {"train_loss": 0.16369064152240753, "difficulty": [2, 3], "val_error_rate": 0.191925048828125, "learning_rate": 0.0077378093264997005}, {"train_loss": 0.14774753153324127, "difficulty": [2, 3], "val_error_rate": 0.19384765625, "learning_rate": 0.007350918836891651}, {"train_loss": 0.14176689088344574, "difficulty": [2, 3], "val_error_rate": 0.189178466796875, "learning_rate": 0.007350918836891651}, {"train_loss": 0.13451871275901794, "difficulty": [2, 3], "val_error_rate": 0.19281005859375, "learning_rate": 0.00698337284848094}, {"train_loss": 0.14618712663650513, "difficulty": [2, 3], "val_error_rate": 0.184539794921875, "learning_rate": 0.00698337284848094}, {"train_loss": 0.1418842226266861, "difficulty": [2, 3], "val_error_rate": 0.178253173828125, "learning_rate": 0.00698337284848094}, {"train_loss": 0.12638838589191437, "difficulty": [2, 3], "val_error_rate": 0.182373046875, "learning_rate": 0.0066342041827738285}, {"train_loss": 0.08715029805898666, "difficulty": [2, 3], "val_error_rate": 0.1795654296875, "learning_rate": 0.0066342041827738285}, {"train_loss": 0.13704828917980194, "difficulty": [2, 3], "val_error_rate": 0.17822265625, "learning_rate": 0.0066342041827738285}, {"train_loss": 0.15106366574764252, "difficulty": [2, 3], "val_error_rate": 0.18060302734375, "learning_rate": 0.006302494090050459}, {"train_loss": 0.12342995405197144, "difficulty": [2, 3], "val_error_rate": 0.175140380859375, "learning_rate": 0.006302494090050459}, {"train_loss": 0.13680203258991241, "difficulty": [2, 3], "val_error_rate": 0.1785888671875, "learning_rate": 0.005987369455397129}, {"train_loss": 0.13871580362319946, "difficulty": [2, 3], "val_error_rate": 0.172821044921875, "learning_rate": 0.005987369455397129}, {"train_loss": 0.15438656508922577, "difficulty": [2, 3], "val_error_rate": 0.170867919921875, "learning_rate": 0.005987369455397129}, {"train_loss": 0.13823679089546204, "difficulty": [2, 3], "val_error_rate": 0.17279052734375, "learning_rate": 0.005688000936061144}, {"train_loss": 0.1025126576423645, "difficulty": [2, 3], "val_error_rate": 0.167999267578125, "learning_rate": 0.005688000936061144}, {"train_loss": 0.11355362087488174, "difficulty": [2, 3], "val_error_rate": 0.167266845703125, "learning_rate": 0.005688000936061144}, {"train_loss": 0.1376952975988388, "difficulty": [2, 3], "val_error_rate": 0.166473388671875, "learning_rate": 0.005688000936061144}, {"train_loss": 0.18709401786327362, "difficulty": [2, 3], "val_error_rate": 0.16552734375, "learning_rate": 0.005688000936061144}, {"train_loss": 0.11586830019950867, "difficulty": [2, 3], "val_error_rate": 0.1669921875, "learning_rate": 0.005403601098805666}, {"train_loss": 0.1755884438753128, "difficulty": [2, 3], "val_error_rate": 0.16522216796875, "learning_rate": 0.005403601098805666}, {"train_loss": 0.1273139864206314, "difficulty": [2, 3], "val_error_rate": 0.162872314453125, "learning_rate": 0.005403601098805666}, {"train_loss": 0.13836489617824554, "difficulty": [2, 3], "val_error_rate": 0.165740966796875, "learning_rate": 0.0051334211602807045}, {"train_loss": 0.0881040096282959, "difficulty": [2, 3], "val_error_rate": 0.16278076171875, "learning_rate": 0.0051334211602807045}, {"train_loss": 0.07895239442586899, "difficulty": [2, 3], "val_error_rate": 0.157012939453125, "learning_rate": 0.0051334211602807045}, {"train_loss": 0.14272479712963104, "difficulty": [2, 3], "val_error_rate": 0.157318115234375, "learning_rate": 0.0048767500557005405}, {"train_loss": 0.1168317198753357, "difficulty": [2, 3], "val_error_rate": 0.158935546875, "learning_rate": 0.004632912576198578}, {"train_loss": 0.08066699653863907, "difficulty": [2, 3], "val_error_rate": 0.157684326171875, "learning_rate": 0.004632912576198578}, {"train_loss": 0.13849255442619324, "difficulty": [2, 3], "val_error_rate": 0.157379150390625, "learning_rate": 0.004632912576198578}, {"train_loss": 0.15166467428207397, "difficulty": [2, 3], "val_error_rate": 0.162017822265625, "learning_rate": 0.0044012670405209064}, {"train_loss": 0.1229318305850029, "difficulty": [2, 3], "val_error_rate": 0.154876708984375, "learning_rate": 0.0044012670405209064}, {"train_loss": 0.1195249930024147, "difficulty": [2, 3], "val_error_rate": 0.1541748046875, "learning_rate": 0.0044012670405209064}, {"train_loss": 0.11516621708869934, "difficulty": [2, 3], "val_error_rate": 0.153778076171875, "learning_rate": 0.0044012670405209064}, {"train_loss": 0.13792754709720612, "difficulty": [2, 3], "val_error_rate": 0.1549072265625, "learning_rate": 0.00418120389804244}, {"train_loss": 0.09904330968856812, "difficulty": [2, 3], "val_error_rate": 0.15570068359375, "learning_rate": 0.003972143866121769}, {"train_loss": 0.12394475191831589, "difficulty": [2, 3], "val_error_rate": 0.152618408203125, "learning_rate": 0.003972143866121769}, {"train_loss": 0.07252167910337448, "difficulty": [2, 3], "val_error_rate": 0.150604248046875, "learning_rate": 0.003972143866121769}, {"train_loss": 0.16723839938640594, "difficulty": [2, 3], "val_error_rate": 0.15106201171875, "learning_rate": 0.003773536765947938}, {"train_loss": 0.09647113084793091, "difficulty": [2, 3], "val_error_rate": 0.15277099609375, "learning_rate": 0.0035848598927259445}, {"train_loss": 0.09437180310487747, "difficulty": [2, 3], "val_error_rate": 0.148712158203125, "learning_rate": 0.0035848598927259445}, {"train_loss": 0.10967829823493958, "difficulty": [2, 3], "val_error_rate": 0.14825439453125, "learning_rate": 0.0035848598927259445}, {"train_loss": 0.09645280987024307, "difficulty": [2, 3], "val_error_rate": 0.14495849609375, "learning_rate": 0.0035848598927259445}, {"train_loss": 0.09812894463539124, "difficulty": [2, 3], "val_error_rate": 0.147979736328125, "learning_rate": 0.0034056168515235186}, {"train_loss": 0.1171368733048439, "difficulty": [2, 3], "val_error_rate": 0.145355224609375, "learning_rate": 0.0034056168515235186}, {"train_loss": 0.15363289415836334, "difficulty": [2, 3], "val_error_rate": 0.145111083984375, "learning_rate": 0.0034056168515235186}, {"train_loss": 0.15581566095352173, "difficulty": [2, 3], "val_error_rate": 0.14398193359375, "learning_rate": 0.0034056168515235186}, {"train_loss": 0.11940810829401016, "difficulty": [2, 3], "val_error_rate": 0.144134521484375, "learning_rate": 0.0032353359274566174}, {"train_loss": 0.10177186131477356, "difficulty": [2, 3], "val_error_rate": 0.14453125, "learning_rate": 0.003073569154366851}, {"train_loss": 0.09260865300893784, "difficulty": [2, 3], "val_error_rate": 0.14398193359375, "learning_rate": 0.003073569154366851}, {"train_loss": 0.1374061405658722, "difficulty": [2, 3], "val_error_rate": 0.14276123046875, "learning_rate": 0.003073569154366851}, {"train_loss": 0.12915615737438202, "difficulty": [2, 3], "val_error_rate": 0.141876220703125, "learning_rate": 0.003073569154366851}, {"train_loss": 0.09266867488622665, "difficulty": [2, 3], "val_error_rate": 0.140411376953125, "learning_rate": 0.003073569154366851}, {"train_loss": 0.1539233922958374, "difficulty": [2, 3], "val_error_rate": 0.139862060546875, "learning_rate": 0.003073569154366851}, {"train_loss": 0.15989245474338531, "difficulty": [2, 3], "val_error_rate": 0.140777587890625, "learning_rate": 0.002919890685006976}, {"train_loss": 0.14159877598285675, "difficulty": [2, 3], "val_error_rate": 0.138671875, "learning_rate": 0.002919890685006976}, {"train_loss": 0.11660506576299667, "difficulty": [2, 3], "val_error_rate": 0.138885498046875, "learning_rate": 0.0027738960925489664}, {"train_loss": 0.06519336253404617, "difficulty": [2, 3], "val_error_rate": 0.138092041015625, "learning_rate": 0.0027738960925489664}, {"train_loss": 0.12281831353902817, "difficulty": [2, 3], "val_error_rate": 0.135894775390625, "learning_rate": 0.0027738960925489664}, {"train_loss": 0.12092379480600357, "difficulty": [2, 3], "val_error_rate": 0.13787841796875, "learning_rate": 0.002635201206430793}, {"train_loss": 0.15630444884300232, "difficulty": [2, 3], "val_error_rate": 0.137237548828125, "learning_rate": 0.002635201206430793}, {"train_loss": 0.06840194016695023, "difficulty": [2, 3], "val_error_rate": 0.134857177734375, "learning_rate": 0.002635201206430793}, {"train_loss": 0.12069642543792725, "difficulty": [2, 3], "val_error_rate": 0.1370849609375, "learning_rate": 0.0025034411810338497}, {"train_loss": 0.14038029313087463, "difficulty": [2, 3], "val_error_rate": 0.1365966796875, "learning_rate": 0.0025034411810338497}, {"train_loss": 0.1395648866891861, "difficulty": [2, 3], "val_error_rate": 0.13397216796875, "learning_rate": 0.0025034411810338497}, {"train_loss": 0.10829981416463852, "difficulty": [2, 3], "val_error_rate": 0.13360595703125, "learning_rate": 0.0025034411810338497}, {"train_loss": 0.167385533452034, "difficulty": [2, 3], "val_error_rate": 0.134552001953125, "learning_rate": 0.002378269098699093}, {"train_loss": 0.0947343111038208, "difficulty": [2, 3], "val_error_rate": 0.133575439453125, "learning_rate": 0.002378269098699093}, {"train_loss": 0.09872404485940933, "difficulty": [2, 3], "val_error_rate": 0.13287353515625, "learning_rate": 0.002378269098699093}, {"train_loss": 0.07851063460111618, "difficulty": [2, 3], "val_error_rate": 0.133453369140625, "learning_rate": 0.0022593557368963957}, {"train_loss": 0.15169648826122284, "difficulty": [2, 3], "val_error_rate": 0.132568359375, "learning_rate": 0.0022593557368963957}, {"train_loss": 0.09851813316345215, "difficulty": [2, 3], "val_error_rate": 0.13189697265625, "learning_rate": 0.0022593557368963957}, {"train_loss": 0.1345604509115219, "difficulty": [2, 3], "val_error_rate": 0.132904052734375, "learning_rate": 0.0021463879384100437}, {"train_loss": 0.09887353330850601, "difficulty": [2, 3], "val_error_rate": 0.132843017578125, "learning_rate": 0.0021463879384100437}, {"train_loss": 0.09715205430984497, "difficulty": [2, 3], "val_error_rate": 0.130584716796875, "learning_rate": 0.0021463879384100437}, {"train_loss": 0.13912218809127808, "difficulty": [2, 3], "val_error_rate": 0.132568359375, "learning_rate": 0.0020390686113387346}, {"train_loss": 0.1128544881939888, "difficulty": [2, 3], "val_error_rate": 0.131256103515625, "learning_rate": 0.0020390686113387346}, {"train_loss": 0.16052109003067017, "difficulty": [2, 3], "val_error_rate": 0.131561279296875, "learning_rate": 0.0019371152156963944}, {"train_loss": 0.10753045231103897, "difficulty": [2, 3], "val_error_rate": 0.131591796875, "learning_rate": 0.001840259414166212}, {"train_loss": 0.1252230554819107, "difficulty": [2, 3], "val_error_rate": 0.130462646484375, "learning_rate": 0.001840259414166212}, {"train_loss": 0.13335800170898438, "difficulty": [2, 3], "val_error_rate": 0.132110595703125, "learning_rate": 0.0017482464900240302}, {"train_loss": 0.10076916962862015, "difficulty": [2, 3], "val_error_rate": 0.1302490234375, "learning_rate": 0.0017482464900240302}, {"train_loss": 0.11858474463224411, "difficulty": [2, 3], "val_error_rate": 0.1297607421875, "learning_rate": 0.0017482464900240302}, {"train_loss": 0.10388076305389404, "difficulty": [2, 3], "val_error_rate": 0.12945556640625, "learning_rate": 0.0017482464900240302}, {"train_loss": 0.0919782891869545, "difficulty": [2, 3], "val_error_rate": 0.131256103515625, "learning_rate": 0.001660834182985127}, {"train_loss": 0.08900802582502365, "difficulty": [2, 3], "val_error_rate": 0.129364013671875, "learning_rate": 0.001660834182985127}, {"train_loss": 0.08715420216321945, "difficulty": [2, 3], "val_error_rate": 0.125885009765625, "learning_rate": 0.001660834182985127}, {"train_loss": 0.07617384940385818, "difficulty": [2, 3], "val_error_rate": 0.1275634765625, "learning_rate": 0.0015777924563735723}, {"train_loss": 0.13528023660182953, "difficulty": [2, 3], "val_error_rate": 0.12799072265625, "learning_rate": 0.0014989027986302972}, {"train_loss": 0.09915974736213684, "difficulty": [2, 3], "val_error_rate": 0.12750244140625, "learning_rate": 0.0014989027986302972}, {"train_loss": 0.08025579154491425, "difficulty": [2, 3], "val_error_rate": 0.126922607421875, "learning_rate": 0.0014989027986302972}, {"train_loss": 0.06645131856203079, "difficulty": [2, 3], "val_error_rate": 0.126068115234375, "learning_rate": 0.0014989027986302972}, {"train_loss": 0.10559975355863571, "difficulty": [2, 3], "val_error_rate": 0.126434326171875, "learning_rate": 0.001423957641236484}, {"train_loss": 0.06404153257608414, "difficulty": [2, 3], "val_error_rate": 0.126068115234375, "learning_rate": 0.001423957641236484}, {"train_loss": 0.10858219861984253, "difficulty": [2, 3], "val_error_rate": 0.126251220703125, "learning_rate": 0.0013527597766369581}, {"train_loss": 0.10064060240983963, "difficulty": [2, 3], "val_error_rate": 0.126861572265625, "learning_rate": 0.001285121776163578}, {"train_loss": 0.13534870743751526, "difficulty": [2, 3], "val_error_rate": 0.126312255859375, "learning_rate": 0.001285121776163578}, {"train_loss": 0.11842752248048782, "difficulty": [2, 3], "val_error_rate": 0.125030517578125, "learning_rate": 0.001285121776163578}, {"train_loss": 0.1079772338271141, "difficulty": [2, 3], "val_error_rate": 0.12548828125, "learning_rate": 0.0012208656407892704}, {"train_loss": 0.0937325656414032, "difficulty": [2, 3], "val_error_rate": 0.1248779296875, "learning_rate": 0.0012208656407892704}, {"train_loss": 0.08562683314085007, "difficulty": [2, 3], "val_error_rate": 0.125091552734375, "learning_rate": 0.0011598223354667425}, {"train_loss": 0.15157641470432281, "difficulty": [2, 3], "val_error_rate": 0.126220703125, "learning_rate": 0.0011018312070518732}, {"train_loss": 0.12215886265039444, "difficulty": [2, 3], "val_error_rate": 0.124359130859375, "learning_rate": 0.0011018312070518732}, {"train_loss": 0.10479845851659775, "difficulty": [2, 3], "val_error_rate": 0.12432861328125, "learning_rate": 0.0011018312070518732}, {"train_loss": 0.11462608724832535, "difficulty": [2, 3], "val_error_rate": 0.12457275390625, "learning_rate": 0.0010467396350577474}, {"train_loss": 0.0937768742442131, "difficulty": [2, 3], "val_error_rate": 0.124725341796875, "learning_rate": 0.0009944026824086905}, {"train_loss": 0.12375161796808243, "difficulty": [2, 3], "val_error_rate": 0.122955322265625, "learning_rate": 0.0009944026824086905}, {"train_loss": 0.1112215593457222, "difficulty": [2, 3], "val_error_rate": 0.1241455078125, "learning_rate": 0.0009446825715713203}, {"train_loss": 0.0837259516119957, "difficulty": [2, 3], "val_error_rate": 0.123199462890625, "learning_rate": 0.0009446825715713203}, {"train_loss": 0.09451698511838913, "difficulty": [2, 3], "val_error_rate": 0.12249755859375, "learning_rate": 0.0009446825715713203}, {"train_loss": 0.09041454643011093, "difficulty": [2, 3], "val_error_rate": 0.12310791015625, "learning_rate": 0.0008974484517239034}, {"train_loss": 0.10788146406412125, "difficulty": [2, 3], "val_error_rate": 0.122711181640625, "learning_rate": 0.0008974484517239034}, {"train_loss": 0.1271788328886032, "difficulty": [2, 3], "val_error_rate": 0.12249755859375, "learning_rate": 0.0008974484517239034}, {"train_loss": 0.12510661780834198, "difficulty": [2, 3], "val_error_rate": 0.121856689453125, "learning_rate": 0.0008974484517239034}, {"train_loss": 0.10982891172170639, "difficulty": [2, 3], "val_error_rate": 0.121856689453125, "learning_rate": 0.0008974484517239034}, {"train_loss": 0.11343694478273392, "difficulty": [2, 3], "val_error_rate": 0.121612548828125, "learning_rate": 0.0008974484517239034}, {"train_loss": 0.10525206476449966, "difficulty": [2, 3], "val_error_rate": 0.121612548828125, "learning_rate": 0.0008974484517239034}, {"train_loss": 0.11126488447189331, "difficulty": [2, 3], "val_error_rate": 0.12255859375, "learning_rate": 0.0008525760495103896}, {"train_loss": 0.11318358033895493, "difficulty": [2, 3], "val_error_rate": 0.12200927734375, "learning_rate": 0.0008525760495103896}, {"train_loss": 0.09710032492876053, "difficulty": [2, 3], "val_error_rate": 0.122314453125, "learning_rate": 0.0008099472615867853}, {"train_loss": 0.11846166104078293, "difficulty": [2, 3], "val_error_rate": 0.12225341796875, "learning_rate": 0.0008099472615867853}, {"train_loss": 0.07204774022102356, "difficulty": [2, 3], "val_error_rate": 0.1221923828125, "learning_rate": 0.0008099472615867853}, {"train_loss": 0.05024376139044762, "difficulty": [2, 3], "val_error_rate": 0.121002197265625, "learning_rate": 0.0008099472615867853}, {"train_loss": 0.08309416472911835, "difficulty": [2, 3], "val_error_rate": 0.12139892578125, "learning_rate": 0.0007694499217905104}, {"train_loss": 0.06494946032762527, "difficulty": [2, 3], "val_error_rate": 0.119415283203125, "learning_rate": 0.0007694499217905104}, {"train_loss": 0.11317253857851028, "difficulty": [2, 3], "val_error_rate": 0.120147705078125, "learning_rate": 0.0007309774518944323}, {"train_loss": 0.08861247450113297, "difficulty": [2, 3], "val_error_rate": 0.121124267578125, "learning_rate": 0.0006944285705685616}, {"train_loss": 0.18907225131988525, "difficulty": [2, 3], "val_error_rate": 0.121673583984375, "learning_rate": 0.0006597071187570691}, {"train_loss": 0.08916380256414413, "difficulty": [2, 3], "val_error_rate": 0.12060546875, "learning_rate": 0.0006597071187570691}, {"train_loss": 0.07114296406507492, "difficulty": [2, 3], "val_error_rate": 0.120361328125, "learning_rate": 0.0006597071187570691}, {"train_loss": 0.10440665483474731, "difficulty": [2, 3], "val_error_rate": 0.120025634765625, "learning_rate": 0.0006597071187570691}, {"train_loss": 0.1197696328163147, "difficulty": [2, 3], "val_error_rate": 0.119415283203125, "learning_rate": 0.0006597071187570691}, {"train_loss": 0.1094246581196785, "difficulty": [2, 3], "val_error_rate": 0.1202392578125, "learning_rate": 0.0006267217686399817}, {"train_loss": 0.06326153129339218, "difficulty": [2, 3], "val_error_rate": 0.119659423828125, "learning_rate": 0.0006267217686399817}, {"train_loss": 0.09391533583402634, "difficulty": [2, 3], "val_error_rate": 0.120208740234375, "learning_rate": 0.0005953856743872166}, {"train_loss": 0.11112755537033081, "difficulty": [2, 3], "val_error_rate": 0.118896484375, "learning_rate": 0.0005953856743872166}, {"train_loss": 0.08529426902532578, "difficulty": [2, 3], "val_error_rate": 0.11859130859375, "learning_rate": 0.0005953856743872166}, {"train_loss": 0.11251378804445267, "difficulty": [2, 3], "val_error_rate": 0.119140625, "learning_rate": 0.0005656164139509201}, {"train_loss": 0.07960841059684753, "difficulty": [2, 3], "val_error_rate": 0.119110107421875, "learning_rate": 0.0005656164139509201}, {"train_loss": 0.07300024479627609, "difficulty": [2, 3], "val_error_rate": 0.118865966796875, "learning_rate": 0.0005656164139509201}, {"train_loss": 0.09006762504577637, "difficulty": [2, 3], "val_error_rate": 0.11859130859375, "learning_rate": 0.0005656164139509201}, {"train_loss": 0.1505012810230255, "difficulty": [2, 3], "val_error_rate": 0.118438720703125, "learning_rate": 0.0005656164139509201}, {"train_loss": 0.11586304754018784, "difficulty": [2, 3], "val_error_rate": 0.117706298828125, "learning_rate": 0.0005656164139509201}, {"train_loss": 0.08360829204320908, "difficulty": [2, 3], "val_error_rate": 0.11846923828125, "learning_rate": 0.0005373355816118419}, {"train_loss": 0.14608609676361084, "difficulty": [2, 3], "val_error_rate": 0.11883544921875, "learning_rate": 0.0005104687879793346}, {"train_loss": 0.0769650787115097, "difficulty": [2, 3], "val_error_rate": 0.117828369140625, "learning_rate": 0.0005104687879793346}, {"train_loss": 0.06229093298316002, "difficulty": [2, 3], "val_error_rate": 0.118560791015625, "learning_rate": 0.0004849453398492187}, {"train_loss": 0.0898217037320137, "difficulty": [2, 3], "val_error_rate": 0.117828369140625, "learning_rate": 0.0004849453398492187}, {"train_loss": 0.09827569127082825, "difficulty": [2, 3], "val_error_rate": 0.118560791015625, "learning_rate": 0.0004606980655808002}] -------------------------------------------------------------------------------- /trained_model/model_(2, 3).ckpt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/raindeer/seq2seq_experiments/357108fd1c2214888a95b0cac46123a722e0439e/trained_model/model_(2, 3).ckpt -------------------------------------------------------------------------------- /trained_model/model_(2, 3).ckpt.meta: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/raindeer/seq2seq_experiments/357108fd1c2214888a95b0cac46123a722e0439e/trained_model/model_(2, 3).ckpt.meta --------------------------------------------------------------------------------