├── README.md
├── bert_text_classification_demo
└── bert_classification.ipynb
└── glove
├── .idea
├── .gitignore
├── glove.iml
├── inspectionProfiles
│ └── profiles_settings.xml
├── misc.xml
├── modules.xml
└── vcs.xml
├── corpus
├── text8.dev.txt
├── text8.test.txt
└── text8.train.txt
├── logs
├── glove-50-1.log
├── glove-50-2.log
├── glove-50-3.log
└── glove-50.log
├── model
├── GloveModel.py
├── GloveTrain.py
├── WordEmbeddingDataset.py
├── __pycache__
│ ├── GloveModel.cpython-36.pyc
│ ├── GloveModel.cpython-37.pyc
│ ├── WordEmbeddingDataset.cpython-36.pyc
│ └── WordEmbeddingDataset.cpython-37.pyc
└── record-50.log
└── weights
├── glove-50-1.th
├── glove-50-2.th
├── glove-50-3.th
└── glove-50.th
/README.md:
--------------------------------------------------------------------------------
1 | # deep-learning-examples
2 | 深度学习相关的实例
3 |
--------------------------------------------------------------------------------
/bert_text_classification_demo/bert_classification.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "#part2: bert feature-base\n",
10 | "import numpy as np\n",
11 | "import pandas as pd\n",
12 | "from sklearn.model_selection import train_test_split\n",
13 | "from sklearn.linear_model import LogisticRegression\n",
14 | "from sklearn.model_selection import GridSearchCV\n",
15 | "from sklearn.model_selection import cross_val_score\n",
16 | "import torch\n",
17 | "import transformers as tfs\n",
18 | "import warnings\n",
19 | "\n",
20 | "warnings.filterwarnings('ignore')"
21 | ]
22 | },
23 | {
24 | "cell_type": "code",
25 | "execution_count": 5,
26 | "metadata": {},
27 | "outputs": [],
28 | "source": [
29 | "train_df = pd.read_csv('https://github.com/clairett/pytorch-sentiment-classification/raw/master/data/SST2/train.tsv', delimiter='\\t', header=None)"
30 | ]
31 | },
32 | {
33 | "cell_type": "code",
34 | "execution_count": 6,
35 | "metadata": {},
36 | "outputs": [
37 | {
38 | "name": "stdout",
39 | "output_type": "stream",
40 | "text": [
41 | "Train set shape: (3000, 2)\n"
42 | ]
43 | }
44 | ],
45 | "source": [
46 | "train_set = train_df[:3000]\n",
47 | "\n",
48 | "print(\"Train set shape:\", train_set.shape)"
49 | ]
50 | },
51 | {
52 | "cell_type": "code",
53 | "execution_count": 7,
54 | "metadata": {},
55 | "outputs": [
56 | {
57 | "data": {
58 | "text/plain": [
59 | "1 1565\n",
60 | "0 1435\n",
61 | "Name: 1, dtype: int64"
62 | ]
63 | },
64 | "execution_count": 7,
65 | "metadata": {},
66 | "output_type": "execute_result"
67 | }
68 | ],
69 | "source": [
70 | "train_set[1].value_counts()"
71 | ]
72 | },
73 | {
74 | "cell_type": "code",
75 | "execution_count": 12,
76 | "metadata": {},
77 | "outputs": [],
78 | "source": [
79 | "model_class, tokenizer_class, pretrained_weights = (tfs.BertModel, tfs.BertTokenizer, 'bert-base-uncased')\n",
80 | "tokenizer = tokenizer_class.from_pretrained(pretrained_weights)\n",
81 | "model = model_class.from_pretrained(pretrained_weights)"
82 | ]
83 | },
84 | {
85 | "cell_type": "code",
86 | "execution_count": 13,
87 | "metadata": {},
88 | "outputs": [],
89 | "source": [
90 | "train_tokenized = train_set[0].apply((lambda x: tokenizer.encode(x, add_special_tokens=True)))"
91 | ]
92 | },
93 | {
94 | "cell_type": "code",
95 | "execution_count": 14,
96 | "metadata": {},
97 | "outputs": [
98 | {
99 | "name": "stdout",
100 | "output_type": "stream",
101 | "text": [
102 | "train set shape: (3000, 66)\n"
103 | ]
104 | }
105 | ],
106 | "source": [
107 | "train_max_len = 0\n",
108 | "for i in train_tokenized.values:\n",
109 | " if len(i) > train_max_len:\n",
110 | " train_max_len = len(i)\n",
111 | "\n",
112 | "train_padded = np.array([i + [0] * (train_max_len-len(i)) for i in train_tokenized.values])\n",
113 | "print(\"train set shape:\",train_padded.shape)"
114 | ]
115 | },
116 | {
117 | "cell_type": "code",
118 | "execution_count": 15,
119 | "metadata": {},
120 | "outputs": [
121 | {
122 | "name": "stdout",
123 | "output_type": "stream",
124 | "text": [
125 | "[ 101 1037 18385 1010 6057 1998 2633 18276 2128 16603 1997 5053\n",
126 | " 1998 1996 6841 1998 5687 5469 3152 102 0 0 0 0\n",
127 | " 0 0 0 0 0 0 0 0 0 0 0 0\n",
128 | " 0 0 0 0 0 0 0 0 0 0 0 0\n",
129 | " 0 0 0 0 0 0 0 0 0 0 0 0\n",
130 | " 0 0 0 0 0 0]\n",
131 | "[1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
132 | " 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n"
133 | ]
134 | }
135 | ],
136 | "source": [
137 | "print(train_padded[0])\n",
138 | "train_attention_mask = np.where(train_padded != 0, 1, 0)\n",
139 | "print(train_attention_mask[0])"
140 | ]
141 | },
142 | {
143 | "cell_type": "code",
144 | "execution_count": 16,
145 | "metadata": {},
146 | "outputs": [],
147 | "source": [
148 | "# 训练集\n",
149 | "train_input_ids = torch.tensor(train_padded).long()\n",
150 | "train_attention_mask = torch.tensor(train_attention_mask).long()\n",
151 | "with torch.no_grad():\n",
152 | " train_last_hidden_states = model(train_input_ids, attention_mask=train_attention_mask)"
153 | ]
154 | },
155 | {
156 | "cell_type": "code",
157 | "execution_count": 17,
158 | "metadata": {},
159 | "outputs": [
160 | {
161 | "data": {
162 | "text/plain": [
163 | "torch.Size([3000, 66, 768])"
164 | ]
165 | },
166 | "execution_count": 17,
167 | "metadata": {},
168 | "output_type": "execute_result"
169 | }
170 | ],
171 | "source": [
172 | "train_last_hidden_states[0].size()"
173 | ]
174 | },
175 | {
176 | "cell_type": "code",
177 | "execution_count": 18,
178 | "metadata": {},
179 | "outputs": [],
180 | "source": [
181 | "train_features = train_last_hidden_states[0][:,0,:].numpy()\n",
182 | "train_labels = train_set[1]"
183 | ]
184 | },
185 | {
186 | "cell_type": "code",
187 | "execution_count": 19,
188 | "metadata": {},
189 | "outputs": [],
190 | "source": [
191 | "train_features, test_features, train_labels, test_labels = train_test_split(train_features, train_labels)"
192 | ]
193 | },
194 | {
195 | "cell_type": "code",
196 | "execution_count": 20,
197 | "metadata": {},
198 | "outputs": [
199 | {
200 | "data": {
201 | "text/plain": [
202 | "LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,\n",
203 | " intercept_scaling=1, max_iter=100, multi_class='warn',\n",
204 | " n_jobs=None, penalty='l2', random_state=None, solver='warn',\n",
205 | " tol=0.0001, verbose=0, warm_start=False)"
206 | ]
207 | },
208 | "execution_count": 20,
209 | "metadata": {},
210 | "output_type": "execute_result"
211 | }
212 | ],
213 | "source": [
214 | "lr_clf = LogisticRegression()\n",
215 | "lr_clf.fit(train_features, train_labels)"
216 | ]
217 | },
218 | {
219 | "cell_type": "code",
220 | "execution_count": 21,
221 | "metadata": {},
222 | "outputs": [
223 | {
224 | "data": {
225 | "text/plain": [
226 | "0.8306666666666667"
227 | ]
228 | },
229 | "execution_count": 21,
230 | "metadata": {},
231 | "output_type": "execute_result"
232 | }
233 | ],
234 | "source": [
235 | "lr_clf.score(test_features, test_labels)"
236 | ]
237 | },
238 | {
239 | "cell_type": "code",
240 | "execution_count": 23,
241 | "metadata": {},
242 | "outputs": [],
243 | "source": [
244 | "#part 2 - bert fine-tuned\n",
245 | "import torch\n",
246 | "from torch import nn\n",
247 | "from torch import optim\n",
248 | "import transformers as tfs\n",
249 | "import math\n",
250 | "\n",
251 | "class BertClassificationModel(nn.Module):\n",
252 | " def __init__(self):\n",
253 | " super(BertClassificationModel, self).__init__() \n",
254 | " model_class, tokenizer_class, pretrained_weights = (tfs.BertModel, tfs.BertTokenizer, 'bert-base-uncased') \n",
255 | " self.tokenizer = tokenizer_class.from_pretrained(pretrained_weights)\n",
256 | " self.bert = model_class.from_pretrained(pretrained_weights)\n",
257 | " self.dense = nn.Linear(768, 2) #bert默认的隐藏单元数是768, 输出单元是2,表示二分类\n",
258 | " \n",
259 | " def forward(self, batch_sentences):\n",
260 | " #print(batch_sentences[4])\n",
261 | " batch_tokenized = self.tokenizer.batch_encode_plus(batch_sentences, add_special_tokens=True,\n",
262 | " max_len=66, pad_to_max_length=True) #tokenize、add special token、pad\n",
263 | " input_ids = torch.tensor(batch_tokenized['input_ids'])\n",
264 | " attention_mask = torch.tensor(batch_tokenized['attention_mask'])\n",
265 | " bert_output = self.bert(input_ids, attention_mask=attention_mask)\n",
266 | " bert_cls_hidden_state = bert_output[0][:,0,:] #提取[CLS]对应的隐藏状态\n",
267 | " linear_output = self.dense(bert_cls_hidden_state)\n",
268 | " return linear_output"
269 | ]
270 | },
271 | {
272 | "cell_type": "code",
273 | "execution_count": 24,
274 | "metadata": {},
275 | "outputs": [],
276 | "source": [
277 | "sentences = train_set[0].values\n",
278 | "targets = train_set[1].values\n",
279 | "train_inputs, test_inputs, train_targets, test_targets = train_test_split(sentences, targets)\n",
280 | "\n",
281 | "batch_size = 64\n",
282 | "batch_count = int(len(train_inputs) / batch_size)\n",
283 | "batch_train_inputs, batch_train_targets = [], []\n",
284 | "for i in range(batch_count):\n",
285 | " batch_train_inputs.append(train_inputs[i*batch_size : (i+1)*batch_size])\n",
286 | " batch_train_targets.append(train_targets[i*batch_size : (i+1)*batch_size])"
287 | ]
288 | },
289 | {
290 | "cell_type": "code",
291 | "execution_count": 28,
292 | "metadata": {},
293 | "outputs": [
294 | {
295 | "name": "stdout",
296 | "output_type": "stream",
297 | "text": [
298 | "Batch: 10, Loss: 0.7067\n",
299 | "Batch: 20, Loss: 0.6043\n",
300 | "Batch: 30, Loss: 0.5483\n",
301 | "Batch: 10, Loss: 0.5091\n",
302 | "Batch: 20, Loss: 0.3184\n",
303 | "Batch: 30, Loss: 0.3790\n",
304 | "Batch: 10, Loss: 0.2721\n",
305 | "Batch: 20, Loss: 0.2135\n",
306 | "Batch: 30, Loss: 0.3903\n"
307 | ]
308 | }
309 | ],
310 | "source": [
311 | "#train the model\n",
312 | "epochs = 3\n",
313 | "lr = 0.01\n",
314 | "print_every_batch = 10\n",
315 | "bert_classifier_model = BertClassificationModel()\n",
316 | "optimizer = optim.SGD(bert_classifier_model.parameters(), lr=lr)\n",
317 | "criterion = nn.CrossEntropyLoss()\n",
318 | "\n",
319 | "for epoch in range(epochs):\n",
320 | " print_avg_loss = 0\n",
321 | " for i in range(batch_count):\n",
322 | " inputs = batch_train_inputs[i]\n",
323 | " labels = torch.tensor(batch_train_targets[i])\n",
324 | " optimizer.zero_grad()\n",
325 | " outputs = bert_classifier_model(inputs)\n",
326 | " loss = criterion(outputs, labels)\n",
327 | " loss.backward()\n",
328 | " optimizer.step()\n",
329 | " \n",
330 | " print_avg_loss += loss.item()\n",
331 | " if i % print_every_batch == (print_every_batch-1):\n",
332 | " print(\"Batch: %d, Loss: %.4f\" % ((i+1), print_avg_loss/print_every_batch))\n",
333 | " print_avg_loss = 0\n",
334 | " "
335 | ]
336 | },
337 | {
338 | "cell_type": "code",
339 | "execution_count": 29,
340 | "metadata": {},
341 | "outputs": [
342 | {
343 | "name": "stdout",
344 | "output_type": "stream",
345 | "text": [
346 | "Accuracy: 90.53%\n"
347 | ]
348 | }
349 | ],
350 | "source": [
351 | "# eval the trained model\n",
352 | "total = len(test_inputs)\n",
353 | "hit = 0\n",
354 | "with torch.no_grad():\n",
355 | " for i in range(total):\n",
356 | " outputs = bert_classifier_model([test_inputs[i]])\n",
357 | " _, predicted = torch.max(outputs, 1)\n",
358 | " if predicted == test_targets[i]:\n",
359 | " hit += 1\n",
360 | "\n",
361 | "print(\"Accuracy: %.2f%%\" % (hit / total * 100))"
362 | ]
363 | },
364 | {
365 | "cell_type": "code",
366 | "execution_count": null,
367 | "metadata": {},
368 | "outputs": [],
369 | "source": []
370 | }
371 | ],
372 | "metadata": {
373 | "kernelspec": {
374 | "display_name": "Python 3",
375 | "language": "python",
376 | "name": "python3"
377 | },
378 | "language_info": {
379 | "codemirror_mode": {
380 | "name": "ipython",
381 | "version": 3
382 | },
383 | "file_extension": ".py",
384 | "mimetype": "text/x-python",
385 | "name": "python",
386 | "nbconvert_exporter": "python",
387 | "pygments_lexer": "ipython3",
388 | "version": "3.7.3"
389 | }
390 | },
391 | "nbformat": 4,
392 | "nbformat_minor": 2
393 | }
394 |
--------------------------------------------------------------------------------
/glove/.idea/.gitignore:
--------------------------------------------------------------------------------
1 | # Default ignored files
2 | /workspace.xml
--------------------------------------------------------------------------------
/glove/.idea/glove.iml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
--------------------------------------------------------------------------------
/glove/.idea/inspectionProfiles/profiles_settings.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/glove/.idea/misc.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/glove/.idea/modules.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/glove/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/glove/logs/glove-50-1.log:
--------------------------------------------------------------------------------
1 | epoch: 0, iter: 10000 (0.3761%), loss: 92.86457, 0h 0m 26s (- 1h 55m 23s)
2 | epoch: 0, iter: 20000 (0.7523%), loss: 49.96140, 0h 0m 50s (- 1h 52m 1s)
3 | epoch: 0, iter: 30000 (1.1284%), loss: 27.20846, 0h 1m 15s (- 1h 50m 17s)
4 | epoch: 0, iter: 40000 (1.5046%), loss: 19.99873, 0h 1m 40s (- 1h 50m 9s)
5 | epoch: 0, iter: 50000 (1.8807%), loss: 17.49622, 0h 2m 5s (- 1h 49m 19s)
6 | epoch: 0, iter: 60000 (2.2569%), loss: 15.70824, 0h 2m 32s (- 1h 49m 44s)
7 | epoch: 0, iter: 70000 (2.6330%), loss: 14.27283, 0h 2m 58s (- 1h 49m 54s)
8 | epoch: 0, iter: 80000 (3.0092%), loss: 13.31666, 0h 3m 24s (- 1h 49m 39s)
9 | epoch: 0, iter: 90000 (3.3853%), loss: 12.25934, 0h 3m 51s (- 1h 50m 17s)
10 | epoch: 0, iter: 100000 (3.7615%), loss: 11.57755, 0h 4m 20s (- 1h 50m 59s)
11 | epoch: 0, iter: 110000 (4.1376%), loss: 10.76983, 0h 4m 47s (- 1h 51m 5s)
12 | epoch: 0, iter: 120000 (4.5138%), loss: 10.15626, 0h 5m 16s (- 1h 51m 45s)
13 | epoch: 0, iter: 130000 (4.8899%), loss: 9.58190, 0h 5m 43s (- 1h 51m 29s)
14 | epoch: 0, iter: 140000 (5.2660%), loss: 9.25488, 0h 6m 14s (- 1h 52m 15s)
15 | epoch: 0, iter: 150000 (5.6422%), loss: 8.73018, 0h 6m 42s (- 1h 52m 4s)
16 | epoch: 0, iter: 160000 (6.0183%), loss: 8.20880, 0h 7m 10s (- 1h 52m 3s)
17 | epoch: 0, iter: 170000 (6.3945%), loss: 8.14306, 0h 7m 37s (- 1h 51m 30s)
18 | epoch: 0, iter: 180000 (6.7706%), loss: 7.79843, 0h 8m 5s (- 1h 51m 32s)
19 | epoch: 0, iter: 190000 (7.1468%), loss: 7.61587, 0h 8m 34s (- 1h 51m 30s)
20 | epoch: 0, iter: 200000 (7.5229%), loss: 7.25375, 0h 9m 1s (- 1h 51m 1s)
21 | epoch: 0, iter: 210000 (7.8991%), loss: 7.19949, 0h 9m 28s (- 1h 50m 22s)
22 | epoch: 0, iter: 220000 (8.2752%), loss: 6.90067, 0h 9m 53s (- 1h 49m 38s)
23 | epoch: 0, iter: 230000 (8.6514%), loss: 6.67233, 0h 10m 19s (- 1h 48m 58s)
24 | epoch: 0, iter: 240000 (9.0275%), loss: 6.40627, 0h 10m 44s (- 1h 48m 13s)
25 | epoch: 0, iter: 250000 (9.4037%), loss: 6.53895, 0h 11m 9s (- 1h 47m 30s)
26 | epoch: 0, iter: 260000 (9.7798%), loss: 6.18961, 0h 11m 34s (- 1h 46m 47s)
27 | epoch: 1, iter: 270000 (10.1560%), loss: 2.43159, 0h 11m 59s (- 1h 46m 8s)
28 | epoch: 1, iter: 280000 (10.5321%), loss: 5.87573, 0h 12m 25s (- 1h 45m 31s)
29 | epoch: 1, iter: 290000 (10.9082%), loss: 5.87013, 0h 12m 50s (- 1h 44m 52s)
30 | epoch: 1, iter: 300000 (11.2844%), loss: 5.71348, 0h 13m 15s (- 1h 44m 12s)
31 | epoch: 1, iter: 310000 (11.6605%), loss: 5.61802, 0h 13m 41s (- 1h 43m 43s)
32 | epoch: 1, iter: 320000 (12.0367%), loss: 5.53479, 0h 14m 8s (- 1h 43m 18s)
33 | epoch: 1, iter: 330000 (12.4128%), loss: 5.53895, 0h 14m 36s (- 1h 43m 2s)
34 | epoch: 1, iter: 340000 (12.7890%), loss: 5.45641, 0h 15m 3s (- 1h 42m 37s)
35 | epoch: 1, iter: 350000 (13.1651%), loss: 5.28671, 0h 15m 30s (- 1h 42m 19s)
36 | epoch: 1, iter: 360000 (13.5413%), loss: 5.26368, 0h 15m 59s (- 1h 42m 6s)
37 | epoch: 1, iter: 370000 (13.9174%), loss: 5.24398, 0h 16m 26s (- 1h 41m 39s)
38 | epoch: 1, iter: 380000 (14.2936%), loss: 5.15462, 0h 16m 52s (- 1h 41m 13s)
39 | epoch: 1, iter: 390000 (14.6697%), loss: 5.16390, 0h 17m 18s (- 1h 40m 40s)
40 | epoch: 1, iter: 400000 (15.0459%), loss: 5.08515, 0h 17m 43s (- 1h 40m 5s)
41 | epoch: 1, iter: 410000 (15.4220%), loss: 4.99704, 0h 18m 8s (- 1h 39m 30s)
42 | epoch: 1, iter: 420000 (15.7981%), loss: 4.95028, 0h 18m 34s (- 1h 38m 58s)
43 | epoch: 1, iter: 430000 (16.1743%), loss: 5.05588, 0h 18m 59s (- 1h 38m 24s)
44 | epoch: 1, iter: 440000 (16.5504%), loss: 5.11084, 0h 19m 24s (- 1h 37m 49s)
45 | epoch: 1, iter: 450000 (16.9266%), loss: 4.83665, 0h 19m 49s (- 1h 37m 18s)
46 | epoch: 1, iter: 460000 (17.3027%), loss: 4.76196, 0h 20m 15s (- 1h 36m 50s)
47 | epoch: 1, iter: 470000 (17.6789%), loss: 4.80667, 0h 20m 41s (- 1h 36m 21s)
48 | epoch: 1, iter: 480000 (18.0550%), loss: 4.72734, 0h 21m 7s (- 1h 35m 54s)
49 | epoch: 1, iter: 490000 (18.4312%), loss: 4.68293, 0h 21m 33s (- 1h 35m 23s)
50 | epoch: 1, iter: 500000 (18.8073%), loss: 4.66898, 0h 21m 58s (- 1h 34m 52s)
51 | epoch: 1, iter: 510000 (19.1835%), loss: 4.68207, 0h 22m 24s (- 1h 34m 25s)
52 | epoch: 1, iter: 520000 (19.5596%), loss: 4.61534, 0h 22m 51s (- 1h 34m 0s)
53 | epoch: 1, iter: 530000 (19.9358%), loss: 4.59207, 0h 23m 18s (- 1h 33m 35s)
54 | epoch: 2, iter: 540000 (20.3119%), loss: 3.59004, 0h 23m 46s (- 1h 33m 14s)
55 | epoch: 2, iter: 550000 (20.6880%), loss: 4.42532, 0h 24m 12s (- 1h 32m 48s)
56 | epoch: 2, iter: 560000 (21.0642%), loss: 4.48541, 0h 24m 39s (- 1h 32m 23s)
57 | epoch: 2, iter: 570000 (21.4403%), loss: 4.38792, 0h 25m 5s (- 1h 31m 54s)
58 | epoch: 2, iter: 580000 (21.8165%), loss: 4.36692, 0h 25m 30s (- 1h 31m 24s)
59 | epoch: 2, iter: 590000 (22.1926%), loss: 4.36353, 0h 25m 55s (- 1h 30m 54s)
60 | epoch: 2, iter: 600000 (22.5688%), loss: 4.31120, 0h 26m 21s (- 1h 30m 24s)
61 | epoch: 2, iter: 610000 (22.9449%), loss: 4.41320, 0h 26m 46s (- 1h 29m 53s)
62 | epoch: 2, iter: 620000 (23.3211%), loss: 4.32036, 0h 27m 10s (- 1h 29m 22s)
63 | epoch: 2, iter: 630000 (23.6972%), loss: 4.42845, 0h 27m 35s (- 1h 28m 50s)
64 | epoch: 2, iter: 640000 (24.0734%), loss: 4.31551, 0h 27m 59s (- 1h 28m 16s)
65 | epoch: 2, iter: 650000 (24.4495%), loss: 4.21666, 0h 28m 23s (- 1h 27m 43s)
66 | epoch: 2, iter: 660000 (24.8257%), loss: 4.26595, 0h 28m 48s (- 1h 27m 13s)
67 | epoch: 2, iter: 670000 (25.2018%), loss: 4.15088, 0h 29m 12s (- 1h 26m 41s)
68 | epoch: 2, iter: 680000 (25.5779%), loss: 4.29729, 0h 29m 37s (- 1h 26m 10s)
69 | epoch: 2, iter: 690000 (25.9541%), loss: 4.23945, 0h 30m 2s (- 1h 25m 41s)
70 | epoch: 2, iter: 700000 (26.3302%), loss: 4.11713, 0h 30m 26s (- 1h 25m 10s)
71 | epoch: 2, iter: 710000 (26.7064%), loss: 4.22030, 0h 30m 50s (- 1h 24m 39s)
72 | epoch: 2, iter: 720000 (27.0825%), loss: 4.09687, 0h 31m 15s (- 1h 24m 9s)
73 | epoch: 2, iter: 730000 (27.4587%), loss: 4.15297, 0h 31m 40s (- 1h 23m 39s)
74 | epoch: 2, iter: 740000 (27.8348%), loss: 4.03311, 0h 32m 4s (- 1h 23m 10s)
75 | epoch: 2, iter: 750000 (28.2110%), loss: 3.98184, 0h 32m 29s (- 1h 22m 40s)
76 | epoch: 2, iter: 760000 (28.5871%), loss: 4.06679, 0h 32m 53s (- 1h 22m 10s)
77 | epoch: 2, iter: 770000 (28.9633%), loss: 4.01613, 0h 33m 18s (- 1h 21m 41s)
78 | epoch: 2, iter: 780000 (29.3394%), loss: 4.09542, 0h 33m 43s (- 1h 21m 13s)
79 | epoch: 2, iter: 790000 (29.7156%), loss: 3.96401, 0h 34m 8s (- 1h 20m 44s)
80 | epoch: 3, iter: 800000 (30.0917%), loss: 0.94648, 0h 34m 33s (- 1h 20m 16s)
81 | epoch: 3, iter: 810000 (30.4679%), loss: 3.96123, 0h 34m 57s (- 1h 19m 47s)
82 | epoch: 3, iter: 820000 (30.8440%), loss: 3.86490, 0h 35m 22s (- 1h 19m 19s)
83 | epoch: 3, iter: 830000 (31.2201%), loss: 3.88823, 0h 35m 47s (- 1h 18m 50s)
84 | epoch: 3, iter: 840000 (31.5963%), loss: 3.86794, 0h 36m 12s (- 1h 18m 22s)
85 | epoch: 3, iter: 850000 (31.9724%), loss: 3.94755, 0h 36m 37s (- 1h 17m 55s)
86 | epoch: 3, iter: 860000 (32.3486%), loss: 3.81933, 0h 37m 2s (- 1h 17m 28s)
87 | epoch: 3, iter: 870000 (32.7247%), loss: 3.80439, 0h 37m 27s (- 1h 17m 0s)
88 | epoch: 3, iter: 880000 (33.1009%), loss: 3.86829, 0h 37m 52s (- 1h 16m 33s)
89 | epoch: 3, iter: 890000 (33.4770%), loss: 3.75349, 0h 38m 17s (- 1h 16m 6s)
90 | epoch: 3, iter: 900000 (33.8532%), loss: 3.81364, 0h 38m 43s (- 1h 15m 39s)
91 | epoch: 3, iter: 910000 (34.2293%), loss: 3.78310, 0h 39m 8s (- 1h 15m 12s)
92 | epoch: 3, iter: 920000 (34.6055%), loss: 3.73968, 0h 39m 33s (- 1h 14m 44s)
93 | epoch: 3, iter: 930000 (34.9816%), loss: 3.78936, 0h 39m 58s (- 1h 14m 17s)
94 | epoch: 3, iter: 940000 (35.3578%), loss: 3.80942, 0h 40m 23s (- 1h 13m 50s)
95 | epoch: 3, iter: 950000 (35.7339%), loss: 3.71867, 0h 40m 48s (- 1h 13m 24s)
96 | epoch: 3, iter: 960000 (36.1100%), loss: 3.68658, 0h 41m 14s (- 1h 12m 57s)
97 | epoch: 3, iter: 970000 (36.4862%), loss: 3.63413, 0h 41m 39s (- 1h 12m 30s)
98 | epoch: 3, iter: 980000 (36.8623%), loss: 3.82311, 0h 42m 4s (- 1h 12m 3s)
99 | epoch: 3, iter: 990000 (37.2385%), loss: 3.75991, 0h 42m 31s (- 1h 11m 39s)
100 | epoch: 3, iter: 1000000 (37.6146%), loss: 3.65309, 0h 42m 58s (- 1h 11m 16s)
101 | epoch: 3, iter: 1010000 (37.9908%), loss: 3.87733, 0h 43m 24s (- 1h 10m 50s)
102 | epoch: 3, iter: 1020000 (38.3669%), loss: 3.61614, 0h 43m 50s (- 1h 10m 25s)
103 | epoch: 3, iter: 1030000 (38.7431%), loss: 3.64257, 0h 44m 15s (- 1h 9m 58s)
104 | epoch: 3, iter: 1040000 (39.1192%), loss: 3.64530, 0h 44m 40s (- 1h 9m 31s)
105 | epoch: 3, iter: 1050000 (39.4954%), loss: 3.65395, 0h 45m 5s (- 1h 9m 4s)
106 | epoch: 3, iter: 1060000 (39.8715%), loss: 3.64212, 0h 45m 30s (- 1h 8m 38s)
107 | epoch: 4, iter: 1070000 (40.2477%), loss: 2.36440, 0h 45m 57s (- 1h 8m 13s)
108 | epoch: 4, iter: 1080000 (40.6238%), loss: 3.53831, 0h 46m 25s (- 1h 7m 50s)
109 | epoch: 4, iter: 1090000 (40.9999%), loss: 3.58030, 0h 46m 51s (- 1h 7m 25s)
110 | epoch: 4, iter: 1100000 (41.3761%), loss: 3.48421, 0h 47m 19s (- 1h 7m 3s)
111 | epoch: 4, iter: 1110000 (41.7522%), loss: 3.53088, 0h 47m 45s (- 1h 6m 38s)
112 | epoch: 4, iter: 1120000 (42.1284%), loss: 3.51427, 0h 48m 11s (- 1h 6m 12s)
113 | epoch: 4, iter: 1130000 (42.5045%), loss: 3.58787, 0h 48m 37s (- 1h 5m 46s)
114 | epoch: 4, iter: 1140000 (42.8807%), loss: 3.50215, 0h 49m 3s (- 1h 5m 20s)
115 | epoch: 4, iter: 1150000 (43.2568%), loss: 3.40816, 0h 49m 28s (- 1h 4m 53s)
116 | epoch: 4, iter: 1160000 (43.6330%), loss: 3.58338, 0h 49m 53s (- 1h 4m 26s)
117 | epoch: 4, iter: 1170000 (44.0091%), loss: 3.44755, 0h 50m 18s (- 1h 3m 59s)
118 | epoch: 4, iter: 1180000 (44.3853%), loss: 3.47206, 0h 50m 43s (- 1h 3m 33s)
119 | epoch: 4, iter: 1190000 (44.7614%), loss: 3.50208, 0h 51m 8s (- 1h 3m 7s)
120 | epoch: 4, iter: 1200000 (45.1376%), loss: 3.40210, 0h 51m 34s (- 1h 2m 41s)
121 | epoch: 4, iter: 1210000 (45.5137%), loss: 3.43364, 0h 52m 0s (- 1h 2m 15s)
122 | epoch: 4, iter: 1220000 (45.8898%), loss: 3.47704, 0h 52m 26s (- 1h 1m 50s)
123 | epoch: 4, iter: 1230000 (46.2660%), loss: 3.45663, 0h 52m 52s (- 1h 1m 24s)
124 | epoch: 4, iter: 1240000 (46.6421%), loss: 3.38673, 0h 53m 19s (- 1h 0m 59s)
125 | epoch: 4, iter: 1250000 (47.0183%), loss: 3.36118, 0h 53m 45s (- 1h 0m 34s)
126 | epoch: 4, iter: 1260000 (47.3944%), loss: 3.46180, 0h 54m 12s (- 1h 0m 10s)
127 | epoch: 4, iter: 1270000 (47.7706%), loss: 3.42837, 0h 54m 38s (- 0h 59m 44s)
128 | epoch: 4, iter: 1280000 (48.1467%), loss: 3.44432, 0h 55m 4s (- 0h 59m 18s)
129 | epoch: 4, iter: 1290000 (48.5229%), loss: 3.40011, 0h 55m 31s (- 0h 58m 54s)
130 | epoch: 4, iter: 1300000 (48.8990%), loss: 3.40122, 0h 55m 58s (- 0h 58m 29s)
131 | epoch: 4, iter: 1310000 (49.2752%), loss: 3.40839, 0h 56m 24s (- 0h 58m 3s)
132 | epoch: 4, iter: 1320000 (49.6513%), loss: 3.37471, 0h 56m 51s (- 0h 57m 39s)
133 | epoch: 5, iter: 1330000 (50.0275%), loss: 0.25569, 0h 57m 18s (- 0h 57m 14s)
134 | epoch: 5, iter: 1340000 (50.4036%), loss: 3.21507, 0h 57m 42s (- 0h 56m 47s)
135 | epoch: 5, iter: 1350000 (50.7798%), loss: 3.26745, 0h 58m 7s (- 0h 56m 20s)
136 | epoch: 5, iter: 1360000 (51.1559%), loss: 3.29456, 0h 58m 34s (- 0h 55m 56s)
137 | epoch: 5, iter: 1370000 (51.5320%), loss: 3.30635, 0h 59m 9s (- 0h 55m 38s)
138 | epoch: 5, iter: 1380000 (51.9082%), loss: 3.24455, 0h 59m 50s (- 0h 55m 26s)
139 | epoch: 5, iter: 1390000 (52.2843%), loss: 3.26913, 1h 0m 26s (- 0h 55m 9s)
140 | epoch: 5, iter: 1400000 (52.6605%), loss: 3.29212, 1h 1m 0s (- 0h 54m 50s)
141 | epoch: 5, iter: 1410000 (53.0366%), loss: 3.27594, 1h 1m 34s (- 0h 54m 31s)
142 | epoch: 5, iter: 1420000 (53.4128%), loss: 3.20994, 1h 2m 7s (- 0h 54m 11s)
143 | epoch: 5, iter: 1430000 (53.7889%), loss: 3.23428, 1h 2m 41s (- 0h 53m 51s)
144 | epoch: 5, iter: 1440000 (54.1651%), loss: 3.16178, 1h 3m 14s (- 0h 53m 30s)
145 | epoch: 5, iter: 1450000 (54.5412%), loss: 3.32330, 1h 3m 48s (- 0h 53m 10s)
146 | epoch: 5, iter: 1460000 (54.9174%), loss: 3.22935, 1h 4m 22s (- 0h 52m 51s)
147 | epoch: 5, iter: 1470000 (55.2935%), loss: 3.27961, 1h 4m 56s (- 0h 52m 30s)
148 | epoch: 5, iter: 1480000 (55.6697%), loss: 3.29182, 1h 5m 29s (- 0h 52m 9s)
149 | epoch: 5, iter: 1490000 (56.0458%), loss: 3.25655, 1h 6m 2s (- 0h 51m 47s)
150 | epoch: 5, iter: 1500000 (56.4219%), loss: 3.21777, 1h 6m 36s (- 0h 51m 26s)
151 | epoch: 5, iter: 1510000 (56.7981%), loss: 3.19342, 1h 7m 10s (- 0h 51m 5s)
152 | epoch: 5, iter: 1520000 (57.1742%), loss: 3.30349, 1h 7m 43s (- 0h 50m 44s)
153 | epoch: 5, iter: 1530000 (57.5504%), loss: 3.25336, 1h 8m 17s (- 0h 50m 22s)
154 | epoch: 5, iter: 1540000 (57.9265%), loss: 3.19261, 1h 8m 51s (- 0h 50m 0s)
155 | epoch: 5, iter: 1550000 (58.3027%), loss: 3.14542, 1h 9m 24s (- 0h 49m 38s)
156 | epoch: 5, iter: 1560000 (58.6788%), loss: 3.16256, 1h 10m 1s (- 0h 49m 18s)
157 | epoch: 5, iter: 1570000 (59.0550%), loss: 3.18417, 1h 10m 28s (- 0h 48m 51s)
158 | epoch: 5, iter: 1580000 (59.4311%), loss: 3.26877, 1h 10m 53s (- 0h 48m 23s)
159 | epoch: 5, iter: 1590000 (59.8073%), loss: 3.19909, 1h 11m 17s (- 0h 47m 54s)
160 | epoch: 6, iter: 1600000 (60.1834%), loss: 1.46111, 1h 11m 42s (- 0h 47m 26s)
161 | epoch: 6, iter: 1610000 (60.5596%), loss: 3.16764, 1h 12m 7s (- 0h 46m 58s)
162 | epoch: 6, iter: 1620000 (60.9357%), loss: 3.08508, 1h 12m 32s (- 0h 46m 30s)
163 | epoch: 6, iter: 1630000 (61.3118%), loss: 3.08024, 1h 12m 59s (- 0h 46m 3s)
164 | epoch: 6, iter: 1640000 (61.6880%), loss: 3.08277, 1h 13m 25s (- 0h 45m 35s)
165 | epoch: 6, iter: 1650000 (62.0641%), loss: 3.10155, 1h 13m 50s (- 0h 45m 8s)
166 | epoch: 6, iter: 1660000 (62.4403%), loss: 3.07659, 1h 14m 17s (- 0h 44m 41s)
167 | epoch: 6, iter: 1670000 (62.8164%), loss: 3.12121, 1h 14m 42s (- 0h 44m 13s)
168 | epoch: 6, iter: 1680000 (63.1926%), loss: 3.08521, 1h 15m 9s (- 0h 43m 46s)
169 | epoch: 6, iter: 1690000 (63.5687%), loss: 3.06605, 1h 15m 37s (- 0h 43m 20s)
170 | epoch: 6, iter: 1700000 (63.9449%), loss: 2.98073, 1h 16m 4s (- 0h 42m 53s)
171 | epoch: 6, iter: 1710000 (64.3210%), loss: 3.09513, 1h 16m 30s (- 0h 42m 26s)
172 | epoch: 6, iter: 1720000 (64.6972%), loss: 3.08373, 1h 16m 55s (- 0h 41m 58s)
173 | epoch: 6, iter: 1730000 (65.0733%), loss: 3.16916, 1h 17m 22s (- 0h 41m 31s)
174 | epoch: 6, iter: 1740000 (65.4495%), loss: 3.03526, 1h 17m 48s (- 0h 41m 4s)
175 | epoch: 6, iter: 1750000 (65.8256%), loss: 3.05531, 1h 18m 15s (- 0h 40m 37s)
176 | epoch: 6, iter: 1760000 (66.2017%), loss: 3.07280, 1h 18m 41s (- 0h 40m 10s)
177 | epoch: 6, iter: 1770000 (66.5779%), loss: 3.04820, 1h 19m 6s (- 0h 39m 42s)
178 | epoch: 6, iter: 1780000 (66.9540%), loss: 3.10741, 1h 19m 33s (- 0h 39m 15s)
179 | epoch: 6, iter: 1790000 (67.3302%), loss: 2.99784, 1h 19m 59s (- 0h 38m 48s)
180 | epoch: 6, iter: 1800000 (67.7063%), loss: 3.10673, 1h 20m 25s (- 0h 38m 21s)
181 | epoch: 6, iter: 1810000 (68.0825%), loss: 3.07649, 1h 20m 51s (- 0h 37m 54s)
182 | epoch: 6, iter: 1820000 (68.4586%), loss: 3.07239, 1h 21m 17s (- 0h 37m 27s)
183 | epoch: 6, iter: 1830000 (68.8348%), loss: 2.97465, 1h 21m 42s (- 0h 36m 59s)
184 | epoch: 6, iter: 1840000 (69.2109%), loss: 3.12573, 1h 22m 6s (- 0h 36m 31s)
185 | epoch: 6, iter: 1850000 (69.5871%), loss: 3.02472, 1h 22m 31s (- 0h 36m 3s)
186 | epoch: 6, iter: 1860000 (69.9632%), loss: 2.96794, 1h 22m 55s (- 0h 35m 36s)
187 | epoch: 7, iter: 1870000 (70.3394%), loss: 2.70732, 1h 23m 20s (- 0h 35m 8s)
188 | epoch: 7, iter: 1880000 (70.7155%), loss: 2.94222, 1h 23m 44s (- 0h 34m 40s)
189 | epoch: 7, iter: 1890000 (71.0917%), loss: 2.99878, 1h 24m 9s (- 0h 34m 13s)
190 | epoch: 7, iter: 1900000 (71.4678%), loss: 2.97443, 1h 24m 34s (- 0h 33m 46s)
191 | epoch: 7, iter: 1910000 (71.8439%), loss: 2.96662, 1h 25m 2s (- 0h 33m 19s)
192 | epoch: 7, iter: 1920000 (72.2201%), loss: 2.93785, 1h 25m 29s (- 0h 32m 53s)
193 | epoch: 7, iter: 1930000 (72.5962%), loss: 2.91697, 1h 25m 54s (- 0h 32m 25s)
194 | epoch: 7, iter: 1940000 (72.9724%), loss: 2.97648, 1h 26m 19s (- 0h 31m 58s)
195 | epoch: 7, iter: 1950000 (73.3485%), loss: 2.89172, 1h 26m 44s (- 0h 31m 31s)
196 | epoch: 7, iter: 1960000 (73.7247%), loss: 2.90661, 1h 27m 9s (- 0h 31m 3s)
197 | epoch: 7, iter: 1970000 (74.1008%), loss: 2.93943, 1h 27m 35s (- 0h 30m 36s)
198 | epoch: 7, iter: 1980000 (74.4770%), loss: 2.99296, 1h 28m 1s (- 0h 30m 9s)
199 | epoch: 7, iter: 1990000 (74.8531%), loss: 2.92378, 1h 28m 26s (- 0h 29m 42s)
200 | epoch: 7, iter: 2000000 (75.2293%), loss: 2.94073, 1h 28m 52s (- 0h 29m 15s)
201 | epoch: 7, iter: 2010000 (75.6054%), loss: 3.01182, 1h 29m 17s (- 0h 28m 48s)
202 | epoch: 7, iter: 2020000 (75.9816%), loss: 2.93459, 1h 29m 41s (- 0h 28m 21s)
203 | epoch: 7, iter: 2030000 (76.3577%), loss: 2.84046, 1h 30m 6s (- 0h 27m 53s)
204 | epoch: 7, iter: 2040000 (76.7338%), loss: 2.89216, 1h 30m 31s (- 0h 27m 26s)
205 | epoch: 7, iter: 2050000 (77.1100%), loss: 3.04648, 1h 30m 55s (- 0h 26m 59s)
206 | epoch: 7, iter: 2060000 (77.4861%), loss: 2.96871, 1h 31m 20s (- 0h 26m 32s)
207 | epoch: 7, iter: 2070000 (77.8623%), loss: 2.90001, 1h 31m 48s (- 0h 26m 6s)
208 | epoch: 7, iter: 2080000 (78.2384%), loss: 2.92839, 1h 32m 14s (- 0h 25m 39s)
209 | epoch: 7, iter: 2090000 (78.6146%), loss: 2.90026, 1h 32m 41s (- 0h 25m 12s)
210 | epoch: 7, iter: 2100000 (78.9907%), loss: 2.93536, 1h 33m 7s (- 0h 24m 46s)
211 | epoch: 7, iter: 2110000 (79.3669%), loss: 2.93882, 1h 33m 33s (- 0h 24m 19s)
212 | epoch: 7, iter: 2120000 (79.7430%), loss: 2.88577, 1h 34m 0s (- 0h 23m 52s)
213 | epoch: 8, iter: 2130000 (80.1192%), loss: 0.88244, 1h 34m 27s (- 0h 23m 26s)
214 | epoch: 8, iter: 2140000 (80.4953%), loss: 2.85178, 1h 34m 55s (- 0h 22m 59s)
215 | epoch: 8, iter: 2150000 (80.8715%), loss: 2.81177, 1h 35m 20s (- 0h 22m 33s)
216 | epoch: 8, iter: 2160000 (81.2476%), loss: 2.84692, 1h 35m 47s (- 0h 22m 6s)
217 | epoch: 8, iter: 2170000 (81.6237%), loss: 2.81680, 1h 36m 13s (- 0h 21m 39s)
218 | epoch: 8, iter: 2180000 (81.9999%), loss: 2.82378, 1h 36m 39s (- 0h 21m 13s)
219 | epoch: 8, iter: 2190000 (82.3760%), loss: 2.84853, 1h 37m 5s (- 0h 20m 46s)
220 | epoch: 8, iter: 2200000 (82.7522%), loss: 2.78139, 1h 37m 31s (- 0h 20m 19s)
221 | epoch: 8, iter: 2210000 (83.1283%), loss: 2.82786, 1h 37m 56s (- 0h 19m 52s)
222 | epoch: 8, iter: 2220000 (83.5045%), loss: 2.78604, 1h 38m 22s (- 0h 19m 26s)
223 | epoch: 8, iter: 2230000 (83.8806%), loss: 2.84531, 1h 38m 52s (- 0h 19m 0s)
224 | epoch: 8, iter: 2240000 (84.2568%), loss: 2.87955, 1h 39m 20s (- 0h 18m 33s)
225 | epoch: 8, iter: 2250000 (84.6329%), loss: 2.86468, 1h 39m 47s (- 0h 18m 7s)
226 | epoch: 8, iter: 2260000 (85.0091%), loss: 2.84353, 1h 40m 16s (- 0h 17m 40s)
227 | epoch: 8, iter: 2270000 (85.3852%), loss: 2.84273, 1h 40m 47s (- 0h 17m 15s)
228 | epoch: 8, iter: 2280000 (85.7614%), loss: 2.87725, 1h 41m 17s (- 0h 16m 49s)
229 | epoch: 8, iter: 2290000 (86.1375%), loss: 2.83547, 1h 41m 44s (- 0h 16m 22s)
230 | epoch: 8, iter: 2300000 (86.5137%), loss: 2.85483, 1h 42m 11s (- 0h 15m 55s)
231 | epoch: 8, iter: 2310000 (86.8898%), loss: 2.79206, 1h 42m 39s (- 0h 15m 29s)
232 | epoch: 8, iter: 2320000 (87.2659%), loss: 2.87458, 1h 43m 9s (- 0h 15m 3s)
233 | epoch: 8, iter: 2330000 (87.6421%), loss: 2.80382, 1h 43m 37s (- 0h 14m 36s)
234 | epoch: 8, iter: 2340000 (88.0182%), loss: 2.86297, 1h 44m 5s (- 0h 14m 10s)
235 | epoch: 8, iter: 2350000 (88.3944%), loss: 2.79714, 1h 44m 29s (- 0h 13m 43s)
236 | epoch: 8, iter: 2360000 (88.7705%), loss: 2.79348, 1h 44m 53s (- 0h 13m 16s)
237 | epoch: 8, iter: 2370000 (89.1467%), loss: 2.85355, 1h 45m 18s (- 0h 12m 49s)
238 | epoch: 8, iter: 2380000 (89.5228%), loss: 2.85424, 1h 45m 44s (- 0h 12m 22s)
239 | epoch: 8, iter: 2390000 (89.8990%), loss: 2.81557, 1h 46m 9s (- 0h 11m 55s)
240 | epoch: 9, iter: 2400000 (90.2751%), loss: 1.98737, 1h 46m 37s (- 0h 11m 29s)
241 | epoch: 9, iter: 2410000 (90.6513%), loss: 2.77624, 1h 47m 5s (- 0h 11m 2s)
242 | epoch: 9, iter: 2420000 (91.0274%), loss: 2.72111, 1h 47m 31s (- 0h 10m 35s)
243 | epoch: 9, iter: 2430000 (91.4036%), loss: 2.74775, 1h 47m 56s (- 0h 10m 9s)
244 | epoch: 9, iter: 2440000 (91.7797%), loss: 2.72274, 1h 48m 22s (- 0h 9m 42s)
245 | epoch: 9, iter: 2450000 (92.1558%), loss: 2.77082, 1h 48m 50s (- 0h 9m 15s)
246 | epoch: 9, iter: 2460000 (92.5320%), loss: 2.72169, 1h 49m 19s (- 0h 8m 49s)
247 | epoch: 9, iter: 2470000 (92.9081%), loss: 2.73264, 1h 49m 46s (- 0h 8m 22s)
248 | epoch: 9, iter: 2480000 (93.2843%), loss: 2.75245, 1h 50m 13s (- 0h 7m 56s)
249 | epoch: 9, iter: 2490000 (93.6604%), loss: 2.73358, 1h 50m 39s (- 0h 7m 29s)
250 | epoch: 9, iter: 2500000 (94.0366%), loss: 2.73293, 1h 51m 4s (- 0h 7m 2s)
251 | epoch: 9, iter: 2510000 (94.4127%), loss: 2.79031, 1h 51m 30s (- 0h 6m 35s)
252 | epoch: 9, iter: 2520000 (94.7889%), loss: 2.75548, 1h 51m 57s (- 0h 6m 9s)
253 | epoch: 9, iter: 2530000 (95.1650%), loss: 2.73723, 1h 52m 27s (- 0h 5m 42s)
254 | epoch: 9, iter: 2540000 (95.5412%), loss: 2.71113, 1h 52m 57s (- 0h 5m 16s)
255 | epoch: 9, iter: 2550000 (95.9173%), loss: 2.68406, 1h 53m 23s (- 0h 4m 49s)
256 | epoch: 9, iter: 2560000 (96.2935%), loss: 2.78313, 1h 53m 49s (- 0h 4m 22s)
257 | epoch: 9, iter: 2570000 (96.6696%), loss: 2.79222, 1h 54m 17s (- 0h 3m 56s)
258 | epoch: 9, iter: 2580000 (97.0457%), loss: 2.79014, 1h 54m 49s (- 0h 3m 29s)
259 | epoch: 9, iter: 2590000 (97.4219%), loss: 2.79996, 1h 55m 15s (- 0h 3m 3s)
260 | epoch: 9, iter: 2600000 (97.7980%), loss: 2.74891, 1h 55m 42s (- 0h 2m 36s)
261 | epoch: 9, iter: 2610000 (98.1742%), loss: 2.76762, 1h 56m 9s (- 0h 2m 9s)
262 | epoch: 9, iter: 2620000 (98.5503%), loss: 2.74303, 1h 56m 38s (- 0h 1m 42s)
263 | epoch: 9, iter: 2630000 (98.9265%), loss: 2.76841, 1h 57m 4s (- 0h 1m 16s)
264 | epoch: 9, iter: 2640000 (99.3026%), loss: 2.76362, 1h 57m 32s (- 0h 0m 49s)
265 | epoch: 9, iter: 2650000 (99.6788%), loss: 2.72746, 1h 58m 7s (- 0h 0m 22s)
266 |
--------------------------------------------------------------------------------
/glove/logs/glove-50-2.log:
--------------------------------------------------------------------------------
1 | epoch: 0, iter: 10000 (0.3761%), loss: 89.00255, 0h 0m 26s (- 1h 57m 52s)
2 | epoch: 0, iter: 10000 (0.3761%), loss: 4.23990, 0h 0m 25s (- 1h 52m 9s)
3 | epoch: 0, iter: 20000 (0.7523%), loss: 3.56112, 0h 0m 50s (- 1h 50m 0s)
4 | epoch: 0, iter: 30000 (1.1284%), loss: 3.37674, 0h 1m 14s (- 1h 49m 6s)
5 | epoch: 0, iter: 40000 (1.5046%), loss: 3.24785, 0h 1m 40s (- 1h 49m 48s)
6 | epoch: 0, iter: 50000 (1.8807%), loss: 3.14348, 0h 2m 9s (- 1h 52m 25s)
7 | epoch: 0, iter: 60000 (2.2569%), loss: 3.00857, 0h 2m 35s (- 1h 52m 18s)
8 | epoch: 0, iter: 70000 (2.6330%), loss: 3.06209, 0h 3m 3s (- 1h 52m 54s)
9 | epoch: 0, iter: 80000 (3.0092%), loss: 3.00152, 0h 3m 30s (- 1h 52m 49s)
10 | epoch: 0, iter: 90000 (3.3853%), loss: 2.92013, 0h 3m 57s (- 1h 53m 0s)
11 | epoch: 0, iter: 100000 (3.7615%), loss: 2.94168, 0h 4m 24s (- 1h 52m 44s)
12 | epoch: 0, iter: 110000 (4.1376%), loss: 2.97350, 0h 4m 50s (- 1h 52m 6s)
13 | epoch: 0, iter: 120000 (4.5138%), loss: 2.99004, 0h 5m 14s (- 1h 50m 56s)
14 | epoch: 0, iter: 130000 (4.8899%), loss: 2.87887, 0h 5m 40s (- 1h 50m 22s)
15 | epoch: 0, iter: 140000 (5.2660%), loss: 2.82599, 0h 6m 6s (- 1h 49m 53s)
16 | epoch: 0, iter: 150000 (5.6422%), loss: 2.89015, 0h 6m 33s (- 1h 49m 46s)
17 | epoch: 0, iter: 160000 (6.0183%), loss: 2.89898, 0h 6m 58s (- 1h 48m 58s)
18 | epoch: 0, iter: 170000 (6.3945%), loss: 2.88210, 0h 7m 23s (- 1h 48m 12s)
19 | epoch: 0, iter: 180000 (6.7706%), loss: 2.87438, 0h 7m 48s (- 1h 47m 32s)
20 | epoch: 0, iter: 190000 (7.1468%), loss: 2.79753, 0h 8m 13s (- 1h 46m 49s)
21 | epoch: 0, iter: 200000 (7.5229%), loss: 2.85030, 0h 8m 38s (- 1h 46m 12s)
22 | epoch: 0, iter: 210000 (7.8991%), loss: 2.90540, 0h 9m 3s (- 1h 45m 32s)
23 | epoch: 0, iter: 220000 (8.2752%), loss: 2.77856, 0h 9m 38s (- 1h 46m 48s)
24 | epoch: 0, iter: 230000 (8.6514%), loss: 2.79845, 0h 10m 11s (- 1h 47m 32s)
25 | epoch: 0, iter: 240000 (9.0275%), loss: 2.79822, 0h 10m 47s (- 1h 48m 40s)
26 | epoch: 0, iter: 250000 (9.4037%), loss: 2.80284, 0h 11m 22s (- 1h 49m 35s)
27 | epoch: 0, iter: 260000 (9.7798%), loss: 2.81575, 0h 11m 58s (- 1h 50m 24s)
28 | epoch: 1, iter: 270000 (10.1560%), loss: 1.07692, 0h 12m 31s (- 1h 50m 49s)
29 | epoch: 1, iter: 280000 (10.5321%), loss: 2.50637, 0h 12m 56s (- 1h 49m 56s)
30 | epoch: 1, iter: 290000 (10.9082%), loss: 2.55343, 0h 13m 21s (- 1h 49m 2s)
31 | epoch: 1, iter: 300000 (11.2844%), loss: 2.52180, 0h 13m 46s (- 1h 48m 19s)
32 | epoch: 1, iter: 310000 (11.6605%), loss: 2.46981, 0h 14m 12s (- 1h 47m 37s)
33 | epoch: 1, iter: 320000 (12.0367%), loss: 2.53523, 0h 14m 37s (- 1h 46m 53s)
34 | epoch: 1, iter: 330000 (12.4128%), loss: 2.48009, 0h 15m 2s (- 1h 46m 10s)
35 | epoch: 1, iter: 340000 (12.7890%), loss: 2.52317, 0h 15m 28s (- 1h 45m 28s)
36 | epoch: 1, iter: 350000 (13.1651%), loss: 2.47349, 0h 15m 53s (- 1h 44m 48s)
37 | epoch: 1, iter: 360000 (13.5413%), loss: 2.52527, 0h 16m 18s (- 1h 44m 9s)
38 | epoch: 1, iter: 370000 (13.9174%), loss: 2.56186, 0h 16m 44s (- 1h 43m 34s)
39 | epoch: 1, iter: 380000 (14.2936%), loss: 2.56055, 0h 17m 9s (- 1h 42m 55s)
40 | epoch: 1, iter: 390000 (14.6697%), loss: 2.53532, 0h 17m 35s (- 1h 42m 20s)
41 | epoch: 1, iter: 400000 (15.0459%), loss: 2.46117, 0h 18m 1s (- 1h 41m 43s)
42 | epoch: 1, iter: 410000 (15.4220%), loss: 2.54861, 0h 18m 26s (- 1h 41m 7s)
43 | epoch: 1, iter: 420000 (15.7981%), loss: 2.53792, 0h 18m 51s (- 1h 40m 31s)
44 | epoch: 1, iter: 430000 (16.1743%), loss: 2.52409, 0h 19m 20s (- 1h 40m 13s)
45 | epoch: 1, iter: 440000 (16.5504%), loss: 2.49265, 0h 19m 49s (- 1h 39m 58s)
46 | epoch: 1, iter: 450000 (16.9266%), loss: 2.58585, 0h 20m 17s (- 1h 39m 36s)
47 | epoch: 1, iter: 460000 (17.3027%), loss: 2.52512, 0h 20m 45s (- 1h 39m 15s)
48 | epoch: 1, iter: 470000 (17.6789%), loss: 2.54264, 0h 21m 14s (- 1h 38m 55s)
49 | epoch: 1, iter: 480000 (18.0550%), loss: 2.52345, 0h 21m 45s (- 1h 38m 46s)
50 | epoch: 1, iter: 490000 (18.4312%), loss: 2.55483, 0h 22m 20s (- 1h 38m 51s)
51 | epoch: 1, iter: 500000 (18.8073%), loss: 2.50006, 0h 22m 53s (- 1h 38m 50s)
52 | epoch: 1, iter: 510000 (19.1835%), loss: 2.50646, 0h 23m 27s (- 1h 38m 51s)
53 | epoch: 1, iter: 520000 (19.5596%), loss: 2.55317, 0h 24m 0s (- 1h 38m 42s)
54 | epoch: 1, iter: 530000 (19.9358%), loss: 2.52338, 0h 24m 32s (- 1h 38m 32s)
55 | epoch: 2, iter: 540000 (20.3119%), loss: 1.96104, 0h 25m 8s (- 1h 38m 38s)
56 | epoch: 2, iter: 550000 (20.6880%), loss: 2.37426, 0h 25m 40s (- 1h 38m 25s)
57 | epoch: 2, iter: 560000 (21.0642%), loss: 2.34239, 0h 26m 13s (- 1h 38m 15s)
58 | epoch: 2, iter: 570000 (21.4403%), loss: 2.41956, 0h 26m 48s (- 1h 38m 12s)
59 | epoch: 2, iter: 580000 (21.8165%), loss: 2.40809, 0h 27m 21s (- 1h 38m 4s)
60 | epoch: 2, iter: 590000 (22.1926%), loss: 2.40283, 0h 27m 55s (- 1h 37m 53s)
61 | epoch: 2, iter: 600000 (22.5688%), loss: 2.41015, 0h 28m 29s (- 1h 37m 43s)
62 | epoch: 2, iter: 610000 (22.9449%), loss: 2.36017, 0h 29m 2s (- 1h 37m 33s)
63 | epoch: 2, iter: 620000 (23.3211%), loss: 2.39561, 0h 29m 37s (- 1h 37m 22s)
64 | epoch: 2, iter: 630000 (23.6972%), loss: 2.38860, 0h 30m 10s (- 1h 37m 8s)
65 | epoch: 2, iter: 640000 (24.0734%), loss: 2.37138, 0h 30m 42s (- 1h 36m 51s)
66 | epoch: 2, iter: 650000 (24.4495%), loss: 2.39914, 0h 31m 21s (- 1h 36m 54s)
67 | epoch: 2, iter: 660000 (24.8257%), loss: 2.42098, 0h 32m 0s (- 1h 36m 55s)
68 | epoch: 2, iter: 670000 (25.2018%), loss: 2.40523, 0h 32m 39s (- 1h 36m 55s)
69 | epoch: 2, iter: 680000 (25.5779%), loss: 2.45009, 0h 33m 16s (- 1h 36m 48s)
70 | epoch: 2, iter: 690000 (25.9541%), loss: 2.43312, 0h 33m 52s (- 1h 36m 38s)
71 | epoch: 2, iter: 700000 (26.3302%), loss: 2.40913, 0h 34m 29s (- 1h 36m 30s)
72 | epoch: 2, iter: 710000 (26.7064%), loss: 2.39561, 0h 35m 6s (- 1h 36m 20s)
73 | epoch: 2, iter: 720000 (27.0825%), loss: 2.38998, 0h 35m 43s (- 1h 36m 11s)
74 | epoch: 2, iter: 730000 (27.4587%), loss: 2.41761, 0h 36m 20s (- 1h 35m 59s)
75 | epoch: 2, iter: 740000 (27.8348%), loss: 2.46604, 0h 36m 56s (- 1h 35m 46s)
76 | epoch: 2, iter: 750000 (28.2110%), loss: 2.41224, 0h 37m 32s (- 1h 35m 32s)
77 | epoch: 2, iter: 760000 (28.5871%), loss: 2.38202, 0h 38m 8s (- 1h 35m 16s)
78 | epoch: 2, iter: 770000 (28.9633%), loss: 2.39928, 0h 38m 43s (- 1h 34m 57s)
79 | epoch: 2, iter: 780000 (29.3394%), loss: 2.40417, 0h 39m 17s (- 1h 34m 38s)
80 | epoch: 2, iter: 790000 (29.7156%), loss: 2.47527, 0h 39m 54s (- 1h 34m 23s)
81 | epoch: 3, iter: 800000 (30.0917%), loss: 0.57014, 0h 40m 36s (- 1h 34m 20s)
82 | epoch: 3, iter: 810000 (30.4679%), loss: 2.30265, 0h 41m 14s (- 1h 34m 6s)
83 | epoch: 3, iter: 820000 (30.8440%), loss: 2.30050, 0h 41m 45s (- 1h 33m 37s)
84 | epoch: 3, iter: 830000 (31.2201%), loss: 2.33637, 0h 42m 14s (- 1h 33m 4s)
85 | epoch: 3, iter: 840000 (31.5963%), loss: 2.31170, 0h 42m 48s (- 1h 32m 41s)
86 | epoch: 3, iter: 850000 (31.9724%), loss: 2.29906, 0h 43m 19s (- 1h 32m 10s)
87 | epoch: 3, iter: 860000 (32.3486%), loss: 2.28992, 0h 43m 44s (- 1h 31m 29s)
88 | epoch: 3, iter: 870000 (32.7247%), loss: 2.29410, 0h 44m 10s (- 1h 30m 48s)
89 | epoch: 3, iter: 880000 (33.1009%), loss: 2.28426, 0h 44m 35s (- 1h 30m 7s)
90 | epoch: 3, iter: 890000 (33.4770%), loss: 2.31735, 0h 45m 1s (- 1h 29m 27s)
91 | epoch: 3, iter: 900000 (33.8532%), loss: 2.30471, 0h 45m 26s (- 1h 28m 48s)
92 | epoch: 3, iter: 910000 (34.2293%), loss: 2.31350, 0h 45m 52s (- 1h 28m 8s)
93 | epoch: 3, iter: 920000 (34.6055%), loss: 2.28167, 0h 46m 17s (- 1h 27m 29s)
94 | epoch: 3, iter: 930000 (34.9816%), loss: 2.34886, 0h 46m 42s (- 1h 26m 49s)
95 | epoch: 3, iter: 940000 (35.3578%), loss: 2.33444, 0h 47m 8s (- 1h 26m 10s)
96 | epoch: 3, iter: 950000 (35.7339%), loss: 2.29114, 0h 47m 33s (- 1h 25m 32s)
97 | epoch: 3, iter: 960000 (36.1100%), loss: 2.30619, 0h 47m 58s (- 1h 24m 53s)
98 | epoch: 3, iter: 970000 (36.4862%), loss: 2.38447, 0h 48m 23s (- 1h 24m 14s)
99 | epoch: 3, iter: 980000 (36.8623%), loss: 2.34918, 0h 48m 48s (- 1h 23m 36s)
100 | epoch: 3, iter: 990000 (37.2385%), loss: 2.33532, 0h 49m 13s (- 1h 22m 58s)
101 | epoch: 3, iter: 1000000 (37.6146%), loss: 2.36332, 0h 49m 38s (- 1h 22m 20s)
102 | epoch: 3, iter: 1010000 (37.9908%), loss: 2.33649, 0h 50m 2s (- 1h 21m 41s)
103 | epoch: 3, iter: 1020000 (38.3669%), loss: 2.35362, 0h 50m 27s (- 1h 21m 2s)
104 | epoch: 3, iter: 1030000 (38.7431%), loss: 2.36210, 0h 50m 51s (- 1h 20m 24s)
105 | epoch: 3, iter: 1040000 (39.1192%), loss: 2.40225, 0h 51m 15s (- 1h 19m 46s)
106 | epoch: 3, iter: 1050000 (39.4954%), loss: 2.35172, 0h 51m 39s (- 1h 19m 8s)
107 | epoch: 3, iter: 1060000 (39.8715%), loss: 2.36974, 0h 52m 3s (- 1h 18m 31s)
108 | epoch: 4, iter: 1070000 (40.2477%), loss: 1.45886, 0h 52m 28s (- 1h 17m 54s)
109 | epoch: 4, iter: 1080000 (40.6238%), loss: 2.23856, 0h 52m 52s (- 1h 17m 16s)
110 | epoch: 4, iter: 1090000 (40.9999%), loss: 2.26468, 0h 53m 16s (- 1h 16m 40s)
111 | epoch: 4, iter: 1100000 (41.3761%), loss: 2.28752, 0h 53m 40s (- 1h 16m 3s)
112 | epoch: 4, iter: 1110000 (41.7522%), loss: 2.23307, 0h 54m 4s (- 1h 15m 26s)
113 | epoch: 4, iter: 1120000 (42.1284%), loss: 2.25352, 0h 54m 28s (- 1h 14m 50s)
114 | epoch: 4, iter: 1130000 (42.5045%), loss: 2.25366, 0h 54m 53s (- 1h 14m 14s)
115 | epoch: 4, iter: 1140000 (42.8807%), loss: 2.25482, 0h 55m 17s (- 1h 13m 38s)
116 | epoch: 4, iter: 1150000 (43.2568%), loss: 2.30038, 0h 55m 41s (- 1h 13m 3s)
117 | epoch: 4, iter: 1160000 (43.6330%), loss: 2.28757, 0h 56m 5s (- 1h 12m 28s)
118 | epoch: 4, iter: 1170000 (44.0091%), loss: 2.24441, 0h 56m 30s (- 1h 11m 52s)
119 | epoch: 4, iter: 1180000 (44.3853%), loss: 2.21708, 0h 56m 54s (- 1h 11m 17s)
120 | epoch: 4, iter: 1190000 (44.7614%), loss: 2.28030, 0h 57m 18s (- 1h 10m 43s)
121 | epoch: 4, iter: 1200000 (45.1376%), loss: 2.26103, 0h 57m 42s (- 1h 10m 8s)
122 | epoch: 4, iter: 1210000 (45.5137%), loss: 2.28927, 0h 58m 6s (- 1h 9m 33s)
123 | epoch: 4, iter: 1220000 (45.8898%), loss: 2.28697, 0h 58m 30s (- 1h 8m 59s)
124 | epoch: 4, iter: 1230000 (46.2660%), loss: 2.28894, 0h 58m 54s (- 1h 8m 25s)
125 | epoch: 4, iter: 1240000 (46.6421%), loss: 2.31984, 0h 59m 19s (- 1h 7m 51s)
126 | epoch: 4, iter: 1250000 (47.0183%), loss: 2.28814, 0h 59m 43s (- 1h 7m 17s)
127 | epoch: 4, iter: 1260000 (47.3944%), loss: 2.32861, 1h 0m 7s (- 1h 6m 44s)
128 | epoch: 4, iter: 1270000 (47.7706%), loss: 2.31273, 1h 0m 32s (- 1h 6m 11s)
129 | epoch: 4, iter: 1280000 (48.1467%), loss: 2.29261, 1h 0m 56s (- 1h 5m 38s)
130 | epoch: 4, iter: 1290000 (48.5229%), loss: 2.27582, 1h 1m 21s (- 1h 5m 6s)
131 | epoch: 4, iter: 1300000 (48.8990%), loss: 2.33096, 1h 1m 46s (- 1h 4m 32s)
132 | epoch: 4, iter: 1310000 (49.2752%), loss: 2.29106, 1h 2m 10s (- 1h 3m 59s)
133 | epoch: 4, iter: 1320000 (49.6513%), loss: 2.24239, 1h 2m 34s (- 1h 3m 26s)
134 | epoch: 5, iter: 1330000 (50.0275%), loss: 0.15822, 1h 2m 58s (- 1h 2m 54s)
135 | epoch: 5, iter: 1340000 (50.4036%), loss: 2.18791, 1h 3m 22s (- 1h 2m 21s)
136 | epoch: 5, iter: 1350000 (50.7798%), loss: 2.21895, 1h 3m 47s (- 1h 1m 50s)
137 | epoch: 5, iter: 1360000 (51.1559%), loss: 2.23810, 1h 4m 16s (- 1h 1m 21s)
138 | epoch: 5, iter: 1370000 (51.5320%), loss: 2.26210, 1h 4m 41s (- 1h 0m 50s)
139 | epoch: 5, iter: 1380000 (51.9082%), loss: 2.23711, 1h 5m 5s (- 1h 0m 18s)
140 | epoch: 5, iter: 1390000 (52.2843%), loss: 2.20307, 1h 5m 29s (- 0h 59m 46s)
141 | epoch: 5, iter: 1400000 (52.6605%), loss: 2.21629, 1h 5m 54s (- 0h 59m 14s)
142 | epoch: 5, iter: 1410000 (53.0366%), loss: 2.21685, 1h 6m 18s (- 0h 58m 43s)
143 | epoch: 5, iter: 1420000 (53.4128%), loss: 2.23507, 1h 6m 45s (- 0h 58m 13s)
144 | epoch: 5, iter: 1430000 (53.7889%), loss: 2.24258, 1h 7m 13s (- 0h 57m 45s)
145 | epoch: 5, iter: 1440000 (54.1651%), loss: 2.20575, 1h 7m 40s (- 0h 57m 16s)
146 | epoch: 5, iter: 1450000 (54.5412%), loss: 2.20946, 1h 8m 6s (- 0h 56m 46s)
147 | epoch: 5, iter: 1460000 (54.9174%), loss: 2.20850, 1h 8m 31s (- 0h 56m 15s)
148 | epoch: 5, iter: 1470000 (55.2935%), loss: 2.21064, 1h 8m 55s (- 0h 55m 44s)
149 | epoch: 5, iter: 1480000 (55.6697%), loss: 2.25551, 1h 9m 21s (- 0h 55m 13s)
150 | epoch: 5, iter: 1490000 (56.0458%), loss: 2.25426, 1h 9m 51s (- 0h 54m 47s)
151 | epoch: 5, iter: 1500000 (56.4219%), loss: 2.19820, 1h 10m 19s (- 0h 54m 19s)
152 | epoch: 5, iter: 1510000 (56.7981%), loss: 2.24299, 1h 10m 48s (- 0h 53m 51s)
153 | epoch: 5, iter: 1520000 (57.1742%), loss: 2.20730, 1h 11m 16s (- 0h 53m 23s)
154 | epoch: 5, iter: 1530000 (57.5504%), loss: 2.25956, 1h 11m 46s (- 0h 52m 56s)
155 | epoch: 5, iter: 1540000 (57.9265%), loss: 2.23702, 1h 12m 12s (- 0h 52m 26s)
156 | epoch: 5, iter: 1550000 (58.3027%), loss: 2.26998, 1h 12m 38s (- 0h 51m 57s)
157 | epoch: 5, iter: 1560000 (58.6788%), loss: 2.25470, 1h 13m 4s (- 0h 51m 27s)
158 | epoch: 5, iter: 1570000 (59.0550%), loss: 2.26432, 1h 13m 30s (- 0h 50m 58s)
159 | epoch: 5, iter: 1580000 (59.4311%), loss: 2.26547, 1h 13m 55s (- 0h 50m 27s)
160 | epoch: 5, iter: 1590000 (59.8073%), loss: 2.28580, 1h 14m 19s (- 0h 49m 56s)
161 | epoch: 6, iter: 1600000 (60.1834%), loss: 1.06640, 1h 14m 47s (- 0h 49m 28s)
162 | epoch: 6, iter: 1610000 (60.5596%), loss: 2.19394, 1h 15m 14s (- 0h 49m 0s)
163 | epoch: 6, iter: 1620000 (60.9357%), loss: 2.16942, 1h 15m 40s (- 0h 48m 30s)
164 | epoch: 6, iter: 1630000 (61.3118%), loss: 2.17930, 1h 16m 6s (- 0h 48m 1s)
165 | epoch: 6, iter: 1640000 (61.6880%), loss: 2.17686, 1h 16m 32s (- 0h 47m 32s)
166 | epoch: 6, iter: 1650000 (62.0641%), loss: 2.18260, 1h 16m 58s (- 0h 47m 2s)
167 | epoch: 6, iter: 1660000 (62.4403%), loss: 2.19942, 1h 17m 23s (- 0h 46m 33s)
168 | epoch: 6, iter: 1670000 (62.8164%), loss: 2.16456, 1h 17m 49s (- 0h 46m 3s)
169 | epoch: 6, iter: 1680000 (63.1926%), loss: 2.20212, 1h 18m 17s (- 0h 45m 35s)
170 | epoch: 6, iter: 1690000 (63.5687%), loss: 2.17432, 1h 18m 43s (- 0h 45m 6s)
171 | epoch: 6, iter: 1700000 (63.9449%), loss: 2.16437, 1h 19m 10s (- 0h 44m 38s)
172 | epoch: 6, iter: 1710000 (64.3210%), loss: 2.18602, 1h 19m 36s (- 0h 44m 9s)
173 | epoch: 6, iter: 1720000 (64.6972%), loss: 2.23061, 1h 20m 1s (- 0h 43m 40s)
174 | epoch: 6, iter: 1730000 (65.0733%), loss: 2.20753, 1h 20m 27s (- 0h 43m 11s)
175 | epoch: 6, iter: 1740000 (65.4495%), loss: 2.21474, 1h 20m 51s (- 0h 42m 41s)
176 | epoch: 6, iter: 1750000 (65.8256%), loss: 2.15269, 1h 21m 18s (- 0h 42m 12s)
177 | epoch: 6, iter: 1760000 (66.2017%), loss: 2.23088, 1h 21m 45s (- 0h 41m 44s)
178 | epoch: 6, iter: 1770000 (66.5779%), loss: 2.20283, 1h 22m 11s (- 0h 41m 15s)
179 | epoch: 6, iter: 1780000 (66.9540%), loss: 2.20501, 1h 22m 35s (- 0h 40m 45s)
180 | epoch: 6, iter: 1790000 (67.3302%), loss: 2.19465, 1h 23m 0s (- 0h 40m 16s)
181 | epoch: 6, iter: 1800000 (67.7063%), loss: 2.22396, 1h 23m 24s (- 0h 39m 46s)
182 | epoch: 6, iter: 1810000 (68.0825%), loss: 2.22853, 1h 23m 48s (- 0h 39m 17s)
183 | epoch: 6, iter: 1820000 (68.4586%), loss: 2.27065, 1h 24m 13s (- 0h 38m 48s)
184 | epoch: 6, iter: 1830000 (68.8348%), loss: 2.24704, 1h 24m 37s (- 0h 38m 19s)
185 | epoch: 6, iter: 1840000 (69.2109%), loss: 2.18483, 1h 25m 5s (- 0h 37m 51s)
186 | epoch: 6, iter: 1850000 (69.5871%), loss: 2.19509, 1h 25m 31s (- 0h 37m 22s)
187 | epoch: 6, iter: 1860000 (69.9632%), loss: 2.27627, 1h 25m 57s (- 0h 36m 54s)
188 | epoch: 7, iter: 1870000 (70.3394%), loss: 1.96092, 1h 26m 43s (- 0h 36m 34s)
189 | epoch: 7, iter: 1880000 (70.7155%), loss: 2.14027, 1h 27m 18s (- 0h 36m 9s)
190 | epoch: 7, iter: 1890000 (71.0917%), loss: 2.13711, 1h 27m 48s (- 0h 35m 42s)
191 | epoch: 7, iter: 1900000 (71.4678%), loss: 2.14337, 1h 28m 16s (- 0h 35m 14s)
192 | epoch: 7, iter: 1910000 (71.8439%), loss: 2.12662, 1h 28m 42s (- 0h 34m 46s)
193 | epoch: 7, iter: 1920000 (72.2201%), loss: 2.17027, 1h 29m 9s (- 0h 34m 17s)
194 | epoch: 7, iter: 1930000 (72.5962%), loss: 2.12584, 1h 29m 54s (- 0h 33m 56s)
195 | epoch: 7, iter: 1940000 (72.9724%), loss: 2.17203, 1h 30m 41s (- 0h 33m 35s)
196 | epoch: 7, iter: 1950000 (73.3485%), loss: 2.16768, 1h 31m 16s (- 0h 33m 10s)
197 | epoch: 7, iter: 1960000 (73.7247%), loss: 2.16004, 1h 31m 44s (- 0h 32m 41s)
198 | epoch: 7, iter: 1970000 (74.1008%), loss: 2.13675, 1h 32m 10s (- 0h 32m 12s)
199 | epoch: 7, iter: 1980000 (74.4770%), loss: 2.16717, 1h 32m 35s (- 0h 31m 43s)
200 | epoch: 7, iter: 1990000 (74.8531%), loss: 2.16852, 1h 33m 0s (- 0h 31m 14s)
201 | epoch: 7, iter: 2000000 (75.2293%), loss: 2.15482, 1h 33m 25s (- 0h 30m 45s)
202 | epoch: 7, iter: 2010000 (75.6054%), loss: 2.15962, 1h 33m 51s (- 0h 30m 17s)
203 | epoch: 7, iter: 2020000 (75.9816%), loss: 2.22663, 1h 34m 17s (- 0h 29m 48s)
204 | epoch: 7, iter: 2030000 (76.3577%), loss: 2.20139, 1h 34m 42s (- 0h 29m 19s)
205 | epoch: 7, iter: 2040000 (76.7338%), loss: 2.23373, 1h 35m 7s (- 0h 28m 50s)
206 | epoch: 7, iter: 2050000 (77.1100%), loss: 2.16034, 1h 35m 32s (- 0h 28m 21s)
207 | epoch: 7, iter: 2060000 (77.4861%), loss: 2.18742, 1h 35m 58s (- 0h 27m 53s)
208 | epoch: 7, iter: 2070000 (77.8623%), loss: 2.19489, 1h 36m 23s (- 0h 27m 24s)
209 | epoch: 7, iter: 2080000 (78.2384%), loss: 2.20431, 1h 36m 48s (- 0h 26m 55s)
210 | epoch: 7, iter: 2090000 (78.6146%), loss: 2.19879, 1h 37m 13s (- 0h 26m 26s)
211 | epoch: 7, iter: 2100000 (78.9907%), loss: 2.18698, 1h 37m 37s (- 0h 25m 57s)
212 | epoch: 7, iter: 2110000 (79.3669%), loss: 2.20181, 1h 38m 2s (- 0h 25m 29s)
213 | epoch: 7, iter: 2120000 (79.7430%), loss: 2.23161, 1h 38m 26s (- 0h 25m 0s)
214 | epoch: 8, iter: 2130000 (80.1192%), loss: 0.65337, 1h 38m 51s (- 0h 24m 31s)
215 | epoch: 8, iter: 2140000 (80.4953%), loss: 2.12894, 1h 39m 16s (- 0h 24m 3s)
216 | epoch: 8, iter: 2150000 (80.8715%), loss: 2.11727, 1h 39m 42s (- 0h 23m 34s)
217 | epoch: 8, iter: 2160000 (81.2476%), loss: 2.15135, 1h 40m 7s (- 0h 23m 6s)
218 | epoch: 8, iter: 2170000 (81.6237%), loss: 2.11057, 1h 40m 31s (- 0h 22m 37s)
219 | epoch: 8, iter: 2180000 (81.9999%), loss: 2.13452, 1h 40m 56s (- 0h 22m 9s)
220 | epoch: 8, iter: 2190000 (82.3760%), loss: 2.12348, 1h 41m 22s (- 0h 21m 41s)
221 | epoch: 8, iter: 2200000 (82.7522%), loss: 2.14145, 1h 41m 48s (- 0h 21m 13s)
222 | epoch: 8, iter: 2210000 (83.1283%), loss: 2.11348, 1h 42m 13s (- 0h 20m 44s)
223 | epoch: 8, iter: 2220000 (83.5045%), loss: 2.18577, 1h 42m 39s (- 0h 20m 16s)
224 | epoch: 8, iter: 2230000 (83.8806%), loss: 2.12986, 1h 43m 12s (- 0h 19m 50s)
225 | epoch: 8, iter: 2240000 (84.2568%), loss: 2.16181, 1h 43m 47s (- 0h 19m 23s)
226 | epoch: 8, iter: 2250000 (84.6329%), loss: 2.14731, 1h 44m 20s (- 0h 18m 56s)
227 | epoch: 8, iter: 2260000 (85.0091%), loss: 2.17851, 1h 44m 53s (- 0h 18m 29s)
228 | epoch: 8, iter: 2270000 (85.3852%), loss: 2.13049, 1h 45m 26s (- 0h 18m 2s)
229 | epoch: 8, iter: 2280000 (85.7614%), loss: 2.15400, 1h 46m 8s (- 0h 17m 37s)
230 | epoch: 8, iter: 2290000 (86.1375%), loss: 2.14661, 1h 46m 43s (- 0h 17m 10s)
231 | epoch: 8, iter: 2300000 (86.5137%), loss: 2.15712, 1h 47m 16s (- 0h 16m 43s)
232 | epoch: 8, iter: 2310000 (86.8898%), loss: 2.13982, 1h 47m 49s (- 0h 16m 16s)
233 | epoch: 8, iter: 2320000 (87.2659%), loss: 2.15644, 1h 48m 22s (- 0h 15m 48s)
234 | epoch: 8, iter: 2330000 (87.6421%), loss: 2.17828, 1h 48m 54s (- 0h 15m 21s)
235 | epoch: 8, iter: 2340000 (88.0182%), loss: 2.15768, 1h 49m 27s (- 0h 14m 54s)
236 | epoch: 8, iter: 2350000 (88.3944%), loss: 2.20500, 1h 50m 3s (- 0h 14m 27s)
237 | epoch: 8, iter: 2360000 (88.7705%), loss: 2.16378, 1h 50m 36s (- 0h 13m 59s)
238 | epoch: 8, iter: 2370000 (89.1467%), loss: 2.18628, 1h 51m 9s (- 0h 13m 31s)
239 | epoch: 8, iter: 2380000 (89.5228%), loss: 2.16947, 1h 51m 41s (- 0h 13m 4s)
240 | epoch: 8, iter: 2390000 (89.8990%), loss: 2.20324, 1h 52m 14s (- 0h 12m 36s)
241 | epoch: 9, iter: 2400000 (90.2751%), loss: 1.54456, 1h 52m 46s (- 0h 12m 8s)
242 | epoch: 9, iter: 2410000 (90.6513%), loss: 2.09595, 1h 53m 19s (- 0h 11m 41s)
243 | epoch: 9, iter: 2420000 (91.0274%), loss: 2.10083, 1h 53m 50s (- 0h 11m 13s)
244 | epoch: 9, iter: 2430000 (91.4036%), loss: 2.10994, 1h 54m 22s (- 0h 10m 45s)
245 | epoch: 9, iter: 2440000 (91.7797%), loss: 2.12289, 1h 54m 54s (- 0h 10m 17s)
246 | epoch: 9, iter: 2450000 (92.1558%), loss: 2.13542, 1h 55m 26s (- 0h 9m 49s)
247 | epoch: 9, iter: 2460000 (92.5320%), loss: 2.12480, 1h 55m 58s (- 0h 9m 21s)
248 | epoch: 9, iter: 2470000 (92.9081%), loss: 2.13357, 1h 56m 30s (- 0h 8m 53s)
249 | epoch: 9, iter: 2480000 (93.2843%), loss: 2.12404, 1h 57m 1s (- 0h 8m 25s)
250 | epoch: 9, iter: 2490000 (93.6604%), loss: 2.12222, 1h 57m 33s (- 0h 7m 57s)
251 | epoch: 9, iter: 2500000 (94.0366%), loss: 2.10488, 1h 58m 5s (- 0h 7m 29s)
252 | epoch: 9, iter: 2510000 (94.4127%), loss: 2.14049, 1h 58m 37s (- 0h 7m 1s)
253 | epoch: 9, iter: 2520000 (94.7889%), loss: 2.13937, 1h 59m 8s (- 0h 6m 32s)
254 | epoch: 9, iter: 2530000 (95.1650%), loss: 2.15016, 1h 59m 40s (- 0h 6m 4s)
255 | epoch: 9, iter: 2540000 (95.5412%), loss: 2.13877, 2h 0m 11s (- 0h 5m 36s)
256 | epoch: 9, iter: 2550000 (95.9173%), loss: 2.12774, 2h 0m 43s (- 0h 5m 8s)
257 | epoch: 9, iter: 2560000 (96.2935%), loss: 2.12512, 2h 1m 15s (- 0h 4m 40s)
258 | epoch: 9, iter: 2570000 (96.6696%), loss: 2.13565, 2h 1m 46s (- 0h 4m 11s)
259 | epoch: 9, iter: 2580000 (97.0457%), loss: 2.12948, 2h 2m 18s (- 0h 3m 43s)
260 | epoch: 9, iter: 2590000 (97.4219%), loss: 2.14585, 2h 2m 49s (- 0h 3m 15s)
261 | epoch: 9, iter: 2600000 (97.7980%), loss: 2.17543, 2h 3m 21s (- 0h 2m 46s)
262 | epoch: 9, iter: 2610000 (98.1742%), loss: 2.12660, 2h 3m 53s (- 0h 2m 18s)
263 | epoch: 9, iter: 2620000 (98.5503%), loss: 2.12200, 2h 4m 27s (- 0h 1m 49s)
264 | epoch: 9, iter: 2630000 (98.9265%), loss: 2.16472, 2h 5m 2s (- 0h 1m 21s)
265 | epoch: 9, iter: 2640000 (99.3026%), loss: 2.14623, 2h 5m 39s (- 0h 0m 52s)
266 | epoch: 9, iter: 2650000 (99.6788%), loss: 2.14151, 2h 6m 13s (- 0h 0m 24s)
267 |
--------------------------------------------------------------------------------
/glove/logs/glove-50-3.log:
--------------------------------------------------------------------------------
1 | epoch: 0, iter: 10000 (0.3761%), loss: 3.44183, 0h 0m 25s (- 1h 50m 31s)
2 | epoch: 0, iter: 20000 (0.7523%), loss: 3.01936, 0h 0m 49s (- 1h 48m 7s)
3 | epoch: 0, iter: 30000 (1.1284%), loss: 2.84242, 0h 1m 13s (- 1h 46m 54s)
4 | epoch: 0, iter: 40000 (1.5046%), loss: 2.70479, 0h 1m 37s (- 1h 46m 49s)
5 | epoch: 0, iter: 50000 (1.8807%), loss: 2.61509, 0h 2m 2s (- 1h 46m 23s)
6 | epoch: 0, iter: 60000 (2.2569%), loss: 2.56834, 0h 2m 26s (- 1h 45m 40s)
7 | epoch: 0, iter: 70000 (2.6330%), loss: 2.56049, 0h 2m 50s (- 1h 45m 3s)
8 | epoch: 0, iter: 80000 (3.0092%), loss: 2.52846, 0h 3m 14s (- 1h 44m 29s)
9 | epoch: 0, iter: 90000 (3.3853%), loss: 2.51799, 0h 3m 40s (- 1h 44m 46s)
10 | epoch: 0, iter: 100000 (3.7615%), loss: 2.47396, 0h 4m 4s (- 1h 44m 22s)
11 | epoch: 0, iter: 110000 (4.1376%), loss: 2.49277, 0h 4m 29s (- 1h 43m 53s)
12 | epoch: 0, iter: 120000 (4.5138%), loss: 2.44337, 0h 4m 54s (- 1h 43m 43s)
13 | epoch: 0, iter: 130000 (4.8899%), loss: 2.47379, 0h 5m 18s (- 1h 43m 16s)
14 | epoch: 0, iter: 140000 (5.2660%), loss: 2.46379, 0h 5m 42s (- 1h 42m 44s)
15 | epoch: 0, iter: 150000 (5.6422%), loss: 2.50746, 0h 6m 6s (- 1h 42m 13s)
16 | epoch: 0, iter: 160000 (6.0183%), loss: 2.44395, 0h 6m 30s (- 1h 41m 43s)
17 | epoch: 0, iter: 170000 (6.3945%), loss: 2.41002, 0h 6m 54s (- 1h 41m 14s)
18 | epoch: 0, iter: 180000 (6.7706%), loss: 2.44952, 0h 7m 18s (- 1h 40m 44s)
19 | epoch: 0, iter: 190000 (7.1468%), loss: 2.45448, 0h 7m 43s (- 1h 40m 16s)
20 | epoch: 0, iter: 200000 (7.5229%), loss: 2.41002, 0h 8m 7s (- 1h 39m 50s)
21 | epoch: 0, iter: 210000 (7.8991%), loss: 2.39892, 0h 8m 31s (- 1h 39m 22s)
22 | epoch: 0, iter: 220000 (8.2752%), loss: 2.43904, 0h 8m 55s (- 1h 38m 55s)
23 | epoch: 0, iter: 230000 (8.6514%), loss: 2.43715, 0h 9m 20s (- 1h 38m 34s)
24 | epoch: 0, iter: 240000 (9.0275%), loss: 2.44379, 0h 9m 44s (- 1h 38m 9s)
25 | epoch: 0, iter: 250000 (9.4037%), loss: 2.43152, 0h 10m 8s (- 1h 37m 43s)
26 | epoch: 0, iter: 260000 (9.7798%), loss: 2.42755, 0h 10m 32s (- 1h 37m 17s)
27 | epoch: 1, iter: 270000 (10.1560%), loss: 0.87230, 0h 10m 57s (- 1h 36m 53s)
28 | epoch: 1, iter: 280000 (10.5321%), loss: 2.10191, 0h 11m 21s (- 1h 36m 25s)
29 | epoch: 1, iter: 290000 (10.9082%), loss: 2.12928, 0h 11m 45s (- 1h 36m 0s)
30 | epoch: 1, iter: 300000 (11.2844%), loss: 2.14489, 0h 12m 9s (- 1h 35m 34s)
31 | epoch: 1, iter: 310000 (11.6605%), loss: 2.14480, 0h 12m 33s (- 1h 35m 8s)
32 | epoch: 1, iter: 320000 (12.0367%), loss: 2.14743, 0h 12m 57s (- 1h 34m 40s)
33 | epoch: 1, iter: 330000 (12.4128%), loss: 2.16723, 0h 13m 21s (- 1h 34m 14s)
34 | epoch: 1, iter: 340000 (12.7890%), loss: 2.16226, 0h 13m 45s (- 1h 33m 47s)
35 | epoch: 1, iter: 350000 (13.1651%), loss: 2.19250, 0h 14m 9s (- 1h 33m 22s)
36 | epoch: 1, iter: 360000 (13.5413%), loss: 2.15121, 0h 14m 33s (- 1h 32m 56s)
37 | epoch: 1, iter: 370000 (13.9174%), loss: 2.18072, 0h 14m 57s (- 1h 32m 31s)
38 | epoch: 1, iter: 380000 (14.2936%), loss: 2.16037, 0h 15m 21s (- 1h 32m 6s)
39 | epoch: 1, iter: 390000 (14.6697%), loss: 2.20750, 0h 15m 46s (- 1h 31m 44s)
40 | epoch: 1, iter: 400000 (15.0459%), loss: 2.16055, 0h 16m 12s (- 1h 31m 29s)
41 | epoch: 1, iter: 410000 (15.4220%), loss: 2.20408, 0h 16m 37s (- 1h 31m 13s)
42 | epoch: 1, iter: 420000 (15.7981%), loss: 2.21058, 0h 17m 9s (- 1h 31m 28s)
43 | epoch: 1, iter: 430000 (16.1743%), loss: 2.17911, 0h 17m 43s (- 1h 31m 52s)
44 | epoch: 1, iter: 440000 (16.5504%), loss: 2.22190, 0h 18m 21s (- 1h 32m 35s)
45 | epoch: 1, iter: 450000 (16.9266%), loss: 2.22899, 0h 18m 56s (- 1h 32m 56s)
46 | epoch: 1, iter: 460000 (17.3027%), loss: 2.22010, 0h 19m 30s (- 1h 33m 13s)
47 | epoch: 1, iter: 470000 (17.6789%), loss: 2.22067, 0h 20m 1s (- 1h 33m 14s)
48 | epoch: 1, iter: 480000 (18.0550%), loss: 2.20816, 0h 20m 31s (- 1h 33m 9s)
49 | epoch: 1, iter: 490000 (18.4312%), loss: 2.22236, 0h 21m 1s (- 1h 33m 3s)
50 | epoch: 1, iter: 500000 (18.8073%), loss: 2.18344, 0h 21m 32s (- 1h 33m 1s)
51 | epoch: 1, iter: 510000 (19.1835%), loss: 2.21158, 0h 22m 5s (- 1h 33m 5s)
52 | epoch: 1, iter: 520000 (19.5596%), loss: 2.22212, 0h 22m 35s (- 1h 32m 56s)
53 | epoch: 1, iter: 530000 (19.9358%), loss: 2.18697, 0h 23m 6s (- 1h 32m 47s)
54 | epoch: 2, iter: 540000 (20.3119%), loss: 1.72911, 0h 23m 42s (- 1h 32m 59s)
55 | epoch: 2, iter: 550000 (20.6880%), loss: 2.06331, 0h 24m 17s (- 1h 33m 7s)
56 | epoch: 2, iter: 560000 (21.0642%), loss: 2.09845, 0h 24m 47s (- 1h 32m 55s)
57 | epoch: 2, iter: 570000 (21.4403%), loss: 2.07759, 0h 25m 22s (- 1h 33m 0s)
58 | epoch: 2, iter: 580000 (21.8165%), loss: 2.06383, 0h 25m 57s (- 1h 33m 2s)
59 | epoch: 2, iter: 590000 (22.1926%), loss: 2.07357, 0h 26m 33s (- 1h 33m 6s)
60 | epoch: 2, iter: 600000 (22.5688%), loss: 2.08772, 0h 27m 8s (- 1h 33m 8s)
61 | epoch: 2, iter: 610000 (22.9449%), loss: 2.09649, 0h 27m 42s (- 1h 33m 1s)
62 | epoch: 2, iter: 620000 (23.3211%), loss: 2.10091, 0h 28m 14s (- 1h 32m 50s)
63 | epoch: 2, iter: 630000 (23.6972%), loss: 2.10469, 0h 28m 45s (- 1h 32m 37s)
64 | epoch: 2, iter: 640000 (24.0734%), loss: 2.10093, 0h 29m 17s (- 1h 32m 21s)
65 | epoch: 2, iter: 650000 (24.4495%), loss: 2.09875, 0h 29m 48s (- 1h 32m 7s)
66 | epoch: 2, iter: 660000 (24.8257%), loss: 2.12848, 0h 30m 22s (- 1h 31m 57s)
67 | epoch: 2, iter: 670000 (25.2018%), loss: 2.11049, 0h 30m 55s (- 1h 31m 46s)
68 | epoch: 2, iter: 680000 (25.5779%), loss: 2.12384, 0h 31m 27s (- 1h 31m 30s)
69 | epoch: 2, iter: 690000 (25.9541%), loss: 2.13213, 0h 32m 0s (- 1h 31m 17s)
70 | epoch: 2, iter: 700000 (26.3302%), loss: 2.10539, 0h 32m 32s (- 1h 31m 3s)
71 | epoch: 2, iter: 710000 (26.7064%), loss: 2.13846, 0h 33m 3s (- 1h 30m 44s)
72 | epoch: 2, iter: 720000 (27.0825%), loss: 2.14516, 0h 33m 37s (- 1h 30m 30s)
73 | epoch: 2, iter: 730000 (27.4587%), loss: 2.15690, 0h 34m 15s (- 1h 30m 30s)
74 | epoch: 2, iter: 740000 (27.8348%), loss: 2.17276, 0h 34m 54s (- 1h 30m 30s)
75 | epoch: 2, iter: 750000 (28.2110%), loss: 2.14199, 0h 35m 32s (- 1h 30m 25s)
76 | epoch: 2, iter: 760000 (28.5871%), loss: 2.12951, 0h 36m 6s (- 1h 30m 11s)
77 | epoch: 2, iter: 770000 (28.9633%), loss: 2.12076, 0h 36m 38s (- 1h 29m 51s)
78 | epoch: 2, iter: 780000 (29.3394%), loss: 2.09621, 0h 37m 3s (- 1h 29m 15s)
79 | epoch: 2, iter: 790000 (29.7156%), loss: 2.15327, 0h 37m 30s (- 1h 28m 42s)
80 | epoch: 3, iter: 800000 (30.0917%), loss: 0.49388, 0h 37m 54s (- 1h 28m 4s)
81 | epoch: 3, iter: 810000 (30.4679%), loss: 1.99995, 0h 38m 18s (- 1h 27m 26s)
82 | epoch: 3, iter: 820000 (30.8440%), loss: 2.04447, 0h 38m 42s (- 1h 26m 48s)
83 | epoch: 3, iter: 830000 (31.2201%), loss: 2.02838, 0h 39m 6s (- 1h 26m 10s)
84 | epoch: 3, iter: 840000 (31.5963%), loss: 2.06405, 0h 39m 32s (- 1h 25m 37s)
85 | epoch: 3, iter: 850000 (31.9724%), loss: 2.04268, 0h 39m 58s (- 1h 25m 3s)
86 | epoch: 3, iter: 860000 (32.3486%), loss: 2.05325, 0h 40m 23s (- 1h 24m 29s)
87 | epoch: 3, iter: 870000 (32.7247%), loss: 2.01556, 0h 40m 50s (- 1h 23m 58s)
88 | epoch: 3, iter: 880000 (33.1009%), loss: 2.09432, 0h 41m 17s (- 1h 23m 26s)
89 | epoch: 3, iter: 890000 (33.4770%), loss: 2.07266, 0h 41m 41s (- 1h 22m 51s)
90 | epoch: 3, iter: 900000 (33.8532%), loss: 2.09371, 0h 42m 8s (- 1h 22m 19s)
91 | epoch: 3, iter: 910000 (34.2293%), loss: 2.05994, 0h 42m 34s (- 1h 21m 49s)
92 | epoch: 3, iter: 920000 (34.6055%), loss: 2.07405, 0h 43m 0s (- 1h 21m 15s)
93 | epoch: 3, iter: 930000 (34.9816%), loss: 2.08685, 0h 43m 25s (- 1h 20m 41s)
94 | epoch: 3, iter: 940000 (35.3578%), loss: 2.08037, 0h 43m 49s (- 1h 20m 6s)
95 | epoch: 3, iter: 950000 (35.7339%), loss: 2.08255, 0h 44m 12s (- 1h 19m 31s)
96 | epoch: 3, iter: 960000 (36.1100%), loss: 2.07661, 0h 44m 36s (- 1h 18m 56s)
97 | epoch: 3, iter: 970000 (36.4862%), loss: 2.08263, 0h 45m 1s (- 1h 18m 22s)
98 | epoch: 3, iter: 980000 (36.8623%), loss: 2.12395, 0h 45m 25s (- 1h 17m 48s)
99 | epoch: 3, iter: 990000 (37.2385%), loss: 2.07975, 0h 45m 49s (- 1h 17m 13s)
100 | epoch: 3, iter: 1000000 (37.6146%), loss: 2.09179, 0h 46m 14s (- 1h 16m 41s)
101 | epoch: 3, iter: 1010000 (37.9908%), loss: 2.09122, 0h 46m 40s (- 1h 16m 11s)
102 | epoch: 3, iter: 1020000 (38.3669%), loss: 2.11465, 0h 47m 5s (- 1h 15m 39s)
103 | epoch: 3, iter: 1030000 (38.7431%), loss: 2.10626, 0h 47m 32s (- 1h 15m 10s)
104 | epoch: 3, iter: 1040000 (39.1192%), loss: 2.07933, 0h 47m 57s (- 1h 14m 38s)
105 | epoch: 3, iter: 1050000 (39.4954%), loss: 2.10543, 0h 48m 23s (- 1h 14m 8s)
106 | epoch: 3, iter: 1060000 (39.8715%), loss: 2.10846, 0h 48m 50s (- 1h 13m 39s)
107 | epoch: 4, iter: 1070000 (40.2477%), loss: 1.28646, 0h 49m 17s (- 1h 13m 10s)
108 | epoch: 4, iter: 1080000 (40.6238%), loss: 2.02433, 0h 49m 44s (- 1h 12m 41s)
109 | epoch: 4, iter: 1090000 (40.9999%), loss: 1.98960, 0h 50m 10s (- 1h 12m 12s)
110 | epoch: 4, iter: 1100000 (41.3761%), loss: 2.02933, 0h 50m 36s (- 1h 11m 41s)
111 | epoch: 4, iter: 1110000 (41.7522%), loss: 2.02056, 0h 50m 59s (- 1h 11m 8s)
112 | epoch: 4, iter: 1120000 (42.1284%), loss: 2.00377, 0h 51m 23s (- 1h 10m 35s)
113 | epoch: 4, iter: 1130000 (42.5045%), loss: 2.02278, 0h 51m 47s (- 1h 10m 3s)
114 | epoch: 4, iter: 1140000 (42.8807%), loss: 2.03479, 0h 52m 11s (- 1h 9m 30s)
115 | epoch: 4, iter: 1150000 (43.2568%), loss: 2.02364, 0h 52m 35s (- 1h 8m 58s)
116 | epoch: 4, iter: 1160000 (43.6330%), loss: 2.06939, 0h 52m 58s (- 1h 8m 26s)
117 | epoch: 4, iter: 1170000 (44.0091%), loss: 2.09282, 0h 53m 22s (- 1h 7m 54s)
118 | epoch: 4, iter: 1180000 (44.3853%), loss: 2.06587, 0h 53m 46s (- 1h 7m 22s)
119 | epoch: 4, iter: 1190000 (44.7614%), loss: 2.01028, 0h 54m 10s (- 1h 6m 51s)
120 | epoch: 4, iter: 1200000 (45.1376%), loss: 2.02984, 0h 54m 34s (- 1h 6m 20s)
121 | epoch: 4, iter: 1210000 (45.5137%), loss: 2.05224, 0h 54m 58s (- 1h 5m 49s)
122 | epoch: 4, iter: 1220000 (45.8898%), loss: 2.07301, 0h 55m 22s (- 1h 5m 18s)
123 | epoch: 4, iter: 1230000 (46.2660%), loss: 2.08169, 0h 55m 46s (- 1h 4m 47s)
124 | epoch: 4, iter: 1240000 (46.6421%), loss: 2.03644, 0h 56m 10s (- 1h 4m 15s)
125 | epoch: 4, iter: 1250000 (47.0183%), loss: 2.07852, 0h 56m 34s (- 1h 3m 44s)
126 | epoch: 4, iter: 1260000 (47.3944%), loss: 2.08143, 0h 56m 58s (- 1h 3m 13s)
127 | epoch: 4, iter: 1270000 (47.7706%), loss: 2.08055, 0h 57m 21s (- 1h 2m 43s)
128 | epoch: 4, iter: 1280000 (48.1467%), loss: 2.08491, 0h 57m 45s (- 1h 2m 12s)
129 | epoch: 4, iter: 1290000 (48.5229%), loss: 2.06902, 0h 58m 9s (- 1h 1m 41s)
130 | epoch: 4, iter: 1300000 (48.8990%), loss: 2.07177, 0h 58m 33s (- 1h 1m 11s)
131 | epoch: 4, iter: 1310000 (49.2752%), loss: 2.07476, 0h 58m 56s (- 1h 0m 40s)
132 | epoch: 4, iter: 1320000 (49.6513%), loss: 2.08070, 0h 59m 20s (- 1h 0m 10s)
133 | epoch: 5, iter: 1330000 (50.0275%), loss: 0.13933, 0h 59m 44s (- 0h 59m 40s)
134 | epoch: 5, iter: 1340000 (50.4036%), loss: 1.98851, 1h 0m 8s (- 0h 59m 10s)
135 | epoch: 5, iter: 1350000 (50.7798%), loss: 1.97842, 1h 0m 32s (- 0h 58m 40s)
136 | epoch: 5, iter: 1360000 (51.1559%), loss: 1.99991, 1h 0m 55s (- 0h 58m 10s)
137 | epoch: 5, iter: 1370000 (51.5320%), loss: 2.01677, 1h 1m 19s (- 0h 57m 40s)
138 | epoch: 5, iter: 1380000 (51.9082%), loss: 2.00795, 1h 1m 43s (- 0h 57m 11s)
139 | epoch: 5, iter: 1390000 (52.2843%), loss: 2.01434, 1h 2m 7s (- 0h 56m 41s)
140 | epoch: 5, iter: 1400000 (52.6605%), loss: 1.97428, 1h 2m 30s (- 0h 56m 11s)
141 | epoch: 5, iter: 1410000 (53.0366%), loss: 2.00983, 1h 2m 54s (- 0h 55m 42s)
142 | epoch: 5, iter: 1420000 (53.4128%), loss: 2.03191, 1h 3m 18s (- 0h 55m 13s)
143 | epoch: 5, iter: 1430000 (53.7889%), loss: 1.99030, 1h 3m 42s (- 0h 54m 43s)
144 | epoch: 5, iter: 1440000 (54.1651%), loss: 2.01654, 1h 4m 6s (- 0h 54m 14s)
145 | epoch: 5, iter: 1450000 (54.5412%), loss: 2.03098, 1h 4m 30s (- 0h 53m 45s)
146 | epoch: 5, iter: 1460000 (54.9174%), loss: 2.04121, 1h 4m 54s (- 0h 53m 16s)
147 | epoch: 5, iter: 1470000 (55.2935%), loss: 2.04271, 1h 5m 17s (- 0h 52m 47s)
148 | epoch: 5, iter: 1480000 (55.6697%), loss: 2.04228, 1h 5m 41s (- 0h 52m 18s)
149 | epoch: 5, iter: 1490000 (56.0458%), loss: 2.05897, 1h 6m 5s (- 0h 51m 49s)
150 | epoch: 5, iter: 1500000 (56.4219%), loss: 2.05661, 1h 6m 29s (- 0h 51m 21s)
151 | epoch: 5, iter: 1510000 (56.7981%), loss: 2.06066, 1h 6m 53s (- 0h 50m 52s)
152 | epoch: 5, iter: 1520000 (57.1742%), loss: 2.04659, 1h 7m 16s (- 0h 50m 23s)
153 | epoch: 5, iter: 1530000 (57.5504%), loss: 2.03776, 1h 7m 40s (- 0h 49m 55s)
154 | epoch: 5, iter: 1540000 (57.9265%), loss: 2.05630, 1h 8m 4s (- 0h 49m 26s)
155 | epoch: 5, iter: 1550000 (58.3027%), loss: 2.04782, 1h 8m 28s (- 0h 48m 58s)
156 | epoch: 5, iter: 1560000 (58.6788%), loss: 2.03437, 1h 8m 52s (- 0h 48m 29s)
157 | epoch: 5, iter: 1570000 (59.0550%), loss: 2.06714, 1h 9m 17s (- 0h 48m 2s)
158 | epoch: 5, iter: 1580000 (59.4311%), loss: 2.04709, 1h 9m 43s (- 0h 47m 35s)
159 | epoch: 5, iter: 1590000 (59.8073%), loss: 2.04397, 1h 10m 9s (- 0h 47m 8s)
160 | epoch: 6, iter: 1600000 (60.1834%), loss: 0.94560, 1h 10m 33s (- 0h 46m 40s)
161 | epoch: 6, iter: 1610000 (60.5596%), loss: 1.96777, 1h 10m 57s (- 0h 46m 12s)
162 | epoch: 6, iter: 1620000 (60.9357%), loss: 1.97319, 1h 11m 22s (- 0h 45m 45s)
163 | epoch: 6, iter: 1630000 (61.3118%), loss: 1.97417, 1h 11m 47s (- 0h 45m 18s)
164 | epoch: 6, iter: 1640000 (61.6880%), loss: 1.96789, 1h 12m 11s (- 0h 44m 50s)
165 | epoch: 6, iter: 1650000 (62.0641%), loss: 1.96592, 1h 12m 36s (- 0h 44m 22s)
166 | epoch: 6, iter: 1660000 (62.4403%), loss: 1.99629, 1h 12m 59s (- 0h 43m 54s)
167 | epoch: 6, iter: 1670000 (62.8164%), loss: 2.00893, 1h 13m 24s (- 0h 43m 27s)
168 | epoch: 6, iter: 1680000 (63.1926%), loss: 2.02915, 1h 13m 49s (- 0h 43m 0s)
169 | epoch: 6, iter: 1690000 (63.5687%), loss: 2.01853, 1h 14m 15s (- 0h 42m 33s)
170 | epoch: 6, iter: 1700000 (63.9449%), loss: 2.01835, 1h 14m 40s (- 0h 42m 6s)
171 | epoch: 6, iter: 1710000 (64.3210%), loss: 2.03439, 1h 15m 5s (- 0h 41m 39s)
172 | epoch: 6, iter: 1720000 (64.6972%), loss: 2.04839, 1h 15m 31s (- 0h 41m 12s)
173 | epoch: 6, iter: 1730000 (65.0733%), loss: 2.01215, 1h 15m 57s (- 0h 40m 46s)
174 | epoch: 6, iter: 1740000 (65.4495%), loss: 2.02629, 1h 16m 23s (- 0h 40m 19s)
175 | epoch: 6, iter: 1750000 (65.8256%), loss: 1.99861, 1h 16m 59s (- 0h 39m 58s)
176 | epoch: 6, iter: 1760000 (66.2017%), loss: 2.02231, 1h 17m 35s (- 0h 39m 37s)
177 | epoch: 6, iter: 1770000 (66.5779%), loss: 2.01290, 1h 18m 8s (- 0h 39m 13s)
178 | epoch: 6, iter: 1780000 (66.9540%), loss: 2.02131, 1h 18m 39s (- 0h 38m 49s)
179 | epoch: 6, iter: 1790000 (67.3302%), loss: 2.01125, 1h 19m 12s (- 0h 38m 25s)
180 | epoch: 6, iter: 1800000 (67.7063%), loss: 2.01449, 1h 19m 40s (- 0h 38m 0s)
181 | epoch: 6, iter: 1810000 (68.0825%), loss: 2.01962, 1h 20m 10s (- 0h 37m 35s)
182 | epoch: 6, iter: 1820000 (68.4586%), loss: 2.02819, 1h 20m 39s (- 0h 37m 9s)
183 | epoch: 6, iter: 1830000 (68.8348%), loss: 2.06762, 1h 21m 7s (- 0h 36m 43s)
184 | epoch: 6, iter: 1840000 (69.2109%), loss: 2.03281, 1h 21m 38s (- 0h 36m 19s)
185 | epoch: 6, iter: 1850000 (69.5871%), loss: 2.06630, 1h 22m 7s (- 0h 35m 53s)
186 | epoch: 6, iter: 1860000 (69.9632%), loss: 2.03683, 1h 22m 36s (- 0h 35m 27s)
187 | epoch: 7, iter: 1870000 (70.3394%), loss: 1.75568, 1h 23m 4s (- 0h 35m 1s)
188 | epoch: 7, iter: 1880000 (70.7155%), loss: 1.94221, 1h 23m 31s (- 0h 34m 35s)
189 | epoch: 7, iter: 1890000 (71.0917%), loss: 1.97357, 1h 23m 59s (- 0h 34m 9s)
190 | epoch: 7, iter: 1900000 (71.4678%), loss: 1.94950, 1h 24m 28s (- 0h 33m 43s)
191 | epoch: 7, iter: 1910000 (71.8439%), loss: 1.98160, 1h 24m 57s (- 0h 33m 17s)
192 | epoch: 7, iter: 1920000 (72.2201%), loss: 1.98204, 1h 25m 26s (- 0h 32m 52s)
193 | epoch: 7, iter: 1930000 (72.5962%), loss: 1.98766, 1h 26m 4s (- 0h 32m 29s)
194 | epoch: 7, iter: 1940000 (72.9724%), loss: 2.02286, 1h 26m 37s (- 0h 32m 4s)
195 | epoch: 7, iter: 1950000 (73.3485%), loss: 1.98105, 1h 27m 9s (- 0h 31m 40s)
196 | epoch: 7, iter: 1960000 (73.7247%), loss: 1.97440, 1h 27m 41s (- 0h 31m 15s)
197 | epoch: 7, iter: 1970000 (74.1008%), loss: 2.02292, 1h 28m 14s (- 0h 30m 50s)
198 | epoch: 7, iter: 1980000 (74.4770%), loss: 2.00684, 1h 28m 47s (- 0h 30m 25s)
199 | epoch: 7, iter: 1990000 (74.8531%), loss: 2.00329, 1h 29m 21s (- 0h 30m 1s)
200 | epoch: 7, iter: 2000000 (75.2293%), loss: 2.02230, 1h 29m 55s (- 0h 29m 36s)
201 | epoch: 7, iter: 2010000 (75.6054%), loss: 2.03330, 1h 30m 28s (- 0h 29m 11s)
202 | epoch: 7, iter: 2020000 (75.9816%), loss: 2.00720, 1h 31m 0s (- 0h 28m 46s)
203 | epoch: 7, iter: 2030000 (76.3577%), loss: 1.99933, 1h 31m 33s (- 0h 28m 20s)
204 | epoch: 7, iter: 2040000 (76.7338%), loss: 2.02754, 1h 32m 5s (- 0h 27m 55s)
205 | epoch: 7, iter: 2050000 (77.1100%), loss: 2.02407, 1h 32m 37s (- 0h 27m 29s)
206 | epoch: 7, iter: 2060000 (77.4861%), loss: 2.00424, 1h 33m 10s (- 0h 27m 4s)
207 | epoch: 7, iter: 2070000 (77.8623%), loss: 2.03398, 1h 33m 40s (- 0h 26m 37s)
208 | epoch: 7, iter: 2080000 (78.2384%), loss: 2.00374, 1h 34m 10s (- 0h 26m 11s)
209 | epoch: 7, iter: 2090000 (78.6146%), loss: 2.02240, 1h 34m 40s (- 0h 25m 45s)
210 | epoch: 7, iter: 2100000 (78.9907%), loss: 2.00050, 1h 35m 10s (- 0h 25m 18s)
211 | epoch: 7, iter: 2110000 (79.3669%), loss: 2.03274, 1h 35m 40s (- 0h 24m 52s)
212 | epoch: 7, iter: 2120000 (79.7430%), loss: 2.00418, 1h 36m 10s (- 0h 24m 25s)
213 | epoch: 8, iter: 2130000 (80.1192%), loss: 0.60949, 1h 36m 40s (- 0h 23m 59s)
214 | epoch: 8, iter: 2140000 (80.4953%), loss: 1.96516, 1h 37m 10s (- 0h 23m 32s)
215 | epoch: 8, iter: 2150000 (80.8715%), loss: 1.93759, 1h 37m 41s (- 0h 23m 6s)
216 | epoch: 8, iter: 2160000 (81.2476%), loss: 1.95875, 1h 38m 11s (- 0h 22m 39s)
217 | epoch: 8, iter: 2170000 (81.6237%), loss: 1.97303, 1h 38m 42s (- 0h 22m 13s)
218 | epoch: 8, iter: 2180000 (81.9999%), loss: 1.97177, 1h 39m 12s (- 0h 21m 46s)
219 | epoch: 8, iter: 2190000 (82.3760%), loss: 1.95263, 1h 39m 42s (- 0h 21m 19s)
220 | epoch: 8, iter: 2200000 (82.7522%), loss: 1.95736, 1h 40m 13s (- 0h 20m 53s)
221 | epoch: 8, iter: 2210000 (83.1283%), loss: 1.97626, 1h 40m 43s (- 0h 20m 26s)
222 | epoch: 8, iter: 2220000 (83.5045%), loss: 1.98855, 1h 41m 13s (- 0h 19m 59s)
223 | epoch: 8, iter: 2230000 (83.8806%), loss: 1.99695, 1h 41m 43s (- 0h 19m 32s)
224 | epoch: 8, iter: 2240000 (84.2568%), loss: 1.97252, 1h 42m 14s (- 0h 19m 6s)
225 | epoch: 8, iter: 2250000 (84.6329%), loss: 1.99628, 1h 42m 44s (- 0h 18m 39s)
226 | epoch: 8, iter: 2260000 (85.0091%), loss: 1.97783, 1h 43m 15s (- 0h 18m 12s)
227 | epoch: 8, iter: 2270000 (85.3852%), loss: 1.98183, 1h 43m 45s (- 0h 17m 45s)
228 | epoch: 8, iter: 2280000 (85.7614%), loss: 1.98071, 1h 44m 15s (- 0h 17m 18s)
229 | epoch: 8, iter: 2290000 (86.1375%), loss: 2.02002, 1h 44m 42s (- 0h 16m 51s)
230 | epoch: 8, iter: 2300000 (86.5137%), loss: 1.98167, 1h 45m 8s (- 0h 16m 23s)
231 | epoch: 8, iter: 2310000 (86.8898%), loss: 2.01519, 1h 45m 34s (- 0h 15m 55s)
232 | epoch: 8, iter: 2320000 (87.2659%), loss: 2.04242, 1h 45m 59s (- 0h 15m 28s)
233 | epoch: 8, iter: 2330000 (87.6421%), loss: 2.01019, 1h 46m 24s (- 0h 15m 0s)
234 | epoch: 8, iter: 2340000 (88.0182%), loss: 2.00114, 1h 46m 49s (- 0h 14m 32s)
235 | epoch: 8, iter: 2350000 (88.3944%), loss: 2.02041, 1h 47m 14s (- 0h 14m 4s)
236 | epoch: 8, iter: 2360000 (88.7705%), loss: 2.01906, 1h 47m 39s (- 0h 13m 37s)
237 | epoch: 8, iter: 2370000 (89.1467%), loss: 1.99272, 1h 48m 4s (- 0h 13m 9s)
238 | epoch: 8, iter: 2380000 (89.5228%), loss: 2.01103, 1h 48m 28s (- 0h 12m 41s)
239 | epoch: 8, iter: 2390000 (89.8990%), loss: 2.03332, 1h 48m 52s (- 0h 12m 13s)
240 | epoch: 9, iter: 2400000 (90.2751%), loss: 1.44418, 1h 49m 16s (- 0h 11m 46s)
241 | epoch: 9, iter: 2410000 (90.6513%), loss: 1.92688, 1h 49m 40s (- 0h 11m 18s)
242 | epoch: 9, iter: 2420000 (91.0274%), loss: 1.93048, 1h 50m 5s (- 0h 10m 51s)
243 | epoch: 9, iter: 2430000 (91.4036%), loss: 1.95862, 1h 50m 29s (- 0h 10m 23s)
244 | epoch: 9, iter: 2440000 (91.7797%), loss: 1.95498, 1h 50m 53s (- 0h 9m 55s)
245 | epoch: 9, iter: 2450000 (92.1558%), loss: 1.94435, 1h 51m 17s (- 0h 9m 28s)
246 | epoch: 9, iter: 2460000 (92.5320%), loss: 1.95451, 1h 51m 41s (- 0h 9m 0s)
247 | epoch: 9, iter: 2470000 (92.9081%), loss: 1.97202, 1h 52m 5s (- 0h 8m 33s)
248 | epoch: 9, iter: 2480000 (93.2843%), loss: 1.99874, 1h 52m 29s (- 0h 8m 5s)
249 | epoch: 9, iter: 2490000 (93.6604%), loss: 1.97803, 1h 52m 54s (- 0h 7m 38s)
250 | epoch: 9, iter: 2500000 (94.0366%), loss: 1.96782, 1h 53m 18s (- 0h 7m 11s)
251 | epoch: 9, iter: 2510000 (94.4127%), loss: 1.99530, 1h 53m 43s (- 0h 6m 43s)
252 | epoch: 9, iter: 2520000 (94.7889%), loss: 1.96846, 1h 54m 6s (- 0h 6m 16s)
253 | epoch: 9, iter: 2530000 (95.1650%), loss: 2.00842, 1h 54m 30s (- 0h 5m 49s)
254 | epoch: 9, iter: 2540000 (95.5412%), loss: 1.96119, 1h 54m 55s (- 0h 5m 21s)
255 | epoch: 9, iter: 2550000 (95.9173%), loss: 2.00466, 1h 55m 19s (- 0h 4m 54s)
256 | epoch: 9, iter: 2560000 (96.2935%), loss: 2.02193, 1h 55m 45s (- 0h 4m 27s)
257 | epoch: 9, iter: 2570000 (96.6696%), loss: 1.98834, 1h 56m 10s (- 0h 4m 0s)
258 | epoch: 9, iter: 2580000 (97.0457%), loss: 1.99262, 1h 56m 34s (- 0h 3m 32s)
259 | epoch: 9, iter: 2590000 (97.4219%), loss: 1.96559, 1h 56m 59s (- 0h 3m 5s)
260 | epoch: 9, iter: 2600000 (97.7980%), loss: 2.00044, 1h 57m 23s (- 0h 2m 38s)
261 | epoch: 9, iter: 2610000 (98.1742%), loss: 2.00297, 1h 57m 49s (- 0h 2m 11s)
262 | epoch: 9, iter: 2620000 (98.5503%), loss: 1.99540, 1h 58m 13s (- 0h 1m 44s)
263 | epoch: 9, iter: 2630000 (98.9265%), loss: 2.00239, 1h 58m 38s (- 0h 1m 17s)
264 | epoch: 9, iter: 2640000 (99.3026%), loss: 1.99627, 1h 59m 2s (- 0h 0m 50s)
265 | epoch: 9, iter: 2650000 (99.6788%), loss: 1.98041, 1h 59m 26s (- 0h 0m 23s)
266 |
--------------------------------------------------------------------------------
/glove/logs/glove-50.log:
--------------------------------------------------------------------------------
1 | epoch: 0, iter: 10000 (0.2837%), loss: 3582.88399, 0h 2m 10s (- 12h 41m 49s)
2 | epoch: 0, iter: 20000 (0.5674%), loss: 1026.73824, 0h 2m 44s (- 7h 59m 13s)
3 | epoch: 0, iter: 30000 (0.8510%), loss: 713.78422, 0h 3m 18s (- 6h 25m 6s)
4 | epoch: 0, iter: 10000 (0.2837%), loss: 3562.17056, 0h 1m 7s (- 6h 36m 37s)
5 | epoch: 0, iter: 20000 (0.5674%), loss: 998.42624, 0h 2m 15s (- 6h 36m 53s)
6 | epoch: 0, iter: 30000 (0.8510%), loss: 696.92690, 0h 3m 21s (- 6h 30m 46s)
7 | epoch: 1, iter: 40000 (1.1347%), loss: 260.98812, 0h 4m 31s (- 6h 34m 11s)
8 | epoch: 1, iter: 50000 (1.4184%), loss: 512.52003, 0h 5m 38s (- 6h 32m 17s)
9 | epoch: 1, iter: 60000 (1.7021%), loss: 465.81203, 0h 6m 41s (- 6h 26m 46s)
10 | epoch: 1, iter: 70000 (1.9858%), loss: 432.56304, 0h 7m 44s (- 6h 22m 27s)
11 | epoch: 2, iter: 80000 (2.2694%), loss: 381.46248, 0h 8m 49s (- 6h 19m 42s)
12 | epoch: 2, iter: 90000 (2.5531%), loss: 380.94147, 0h 9m 54s (- 6h 18m 8s)
13 | epoch: 2, iter: 100000 (2.8368%), loss: 366.49043, 0h 10m 59s (- 6h 16m 23s)
14 | epoch: 3, iter: 110000 (3.1205%), loss: 147.19145, 0h 12m 4s (- 6h 14m 51s)
15 | epoch: 3, iter: 120000 (3.4042%), loss: 341.42547, 0h 13m 8s (- 6h 12m 58s)
16 | epoch: 3, iter: 130000 (3.6878%), loss: 334.46817, 0h 14m 11s (- 6h 10m 33s)
17 | epoch: 3, iter: 140000 (3.9715%), loss: 325.10545, 0h 15m 11s (- 6h 7m 26s)
18 | epoch: 4, iter: 150000 (4.2552%), loss: 281.61939, 0h 16m 11s (- 6h 4m 24s)
19 | epoch: 4, iter: 160000 (4.5389%), loss: 306.69933, 0h 17m 12s (- 6h 1m 47s)
20 | epoch: 4, iter: 170000 (4.8226%), loss: 307.50975, 0h 18m 11s (- 5h 59m 7s)
21 | epoch: 5, iter: 180000 (5.1062%), loss: 109.63375, 0h 19m 12s (- 5h 57m 3s)
22 | epoch: 5, iter: 190000 (5.3899%), loss: 293.00476, 0h 20m 13s (- 5h 55m 9s)
23 | epoch: 5, iter: 200000 (5.6736%), loss: 289.99045, 0h 21m 14s (- 5h 53m 12s)
24 | epoch: 5, iter: 210000 (5.9573%), loss: 285.68641, 0h 22m 15s (- 5h 51m 27s)
25 | epoch: 6, iter: 220000 (6.2410%), loss: 235.27522, 0h 23m 16s (- 5h 49m 34s)
26 | epoch: 6, iter: 230000 (6.5246%), loss: 276.59855, 0h 24m 16s (- 5h 47m 42s)
27 | epoch: 6, iter: 240000 (6.8083%), loss: 275.16386, 0h 25m 16s (- 5h 45m 53s)
28 | epoch: 7, iter: 250000 (7.0920%), loss: 86.25059, 0h 26m 20s (- 5h 45m 7s)
29 | epoch: 7, iter: 260000 (7.3757%), loss: 266.46127, 0h 27m 17s (- 5h 42m 46s)
30 | epoch: 7, iter: 270000 (7.6594%), loss: 266.14367, 0h 28m 17s (- 5h 41m 10s)
31 | epoch: 7, iter: 280000 (7.9430%), loss: 264.08965, 0h 29m 22s (- 5h 40m 22s)
32 | epoch: 8, iter: 290000 (8.2267%), loss: 204.13761, 0h 30m 24s (- 5h 39m 13s)
33 | epoch: 8, iter: 300000 (8.5104%), loss: 259.30568, 0h 31m 26s (- 5h 38m 0s)
34 | epoch: 8, iter: 310000 (8.7941%), loss: 256.38581, 0h 32m 28s (- 5h 36m 44s)
35 | epoch: 9, iter: 320000 (9.0778%), loss: 68.41626, 0h 33m 30s (- 5h 35m 37s)
36 | epoch: 9, iter: 330000 (9.3614%), loss: 250.75895, 0h 34m 32s (- 5h 34m 25s)
37 | epoch: 9, iter: 340000 (9.6451%), loss: 249.10208, 0h 35m 33s (- 5h 33m 4s)
38 | epoch: 9, iter: 350000 (9.9288%), loss: 251.56643, 0h 36m 33s (- 5h 31m 42s)
39 | epoch: 10, iter: 360000 (10.2125%), loss: 181.30389, 0h 37m 37s (- 5h 30m 46s)
40 | epoch: 10, iter: 370000 (10.4962%), loss: 244.77551, 0h 38m 40s (- 5h 29m 43s)
41 | epoch: 10, iter: 380000 (10.7798%), loss: 244.02124, 0h 39m 44s (- 5h 28m 51s)
42 | epoch: 11, iter: 390000 (11.0635%), loss: 53.46864, 0h 40m 47s (- 5h 27m 53s)
43 | epoch: 11, iter: 400000 (11.3472%), loss: 240.09029, 0h 41m 52s (- 5h 27m 12s)
44 | epoch: 11, iter: 410000 (11.6309%), loss: 240.33604, 0h 42m 56s (- 5h 26m 14s)
45 | epoch: 11, iter: 420000 (11.9146%), loss: 238.76899, 0h 44m 2s (- 5h 25m 33s)
46 | epoch: 12, iter: 430000 (12.1982%), loss: 164.52532, 0h 45m 4s (- 5h 24m 27s)
47 | epoch: 12, iter: 440000 (12.4819%), loss: 235.60874, 0h 46m 15s (- 5h 24m 20s)
48 | epoch: 12, iter: 450000 (12.7656%), loss: 236.03032, 0h 47m 28s (- 5h 24m 25s)
49 | epoch: 13, iter: 460000 (13.0493%), loss: 40.60788, 0h 48m 29s (- 5h 23m 5s)
50 | epoch: 13, iter: 470000 (13.3330%), loss: 231.00218, 0h 49m 27s (- 5h 21m 29s)
51 | epoch: 13, iter: 480000 (13.6166%), loss: 232.39673, 0h 50m 27s (- 5h 20m 5s)
52 | epoch: 13, iter: 490000 (13.9003%), loss: 231.26124, 0h 51m 27s (- 5h 18m 42s)
53 | epoch: 14, iter: 500000 (14.1840%), loss: 147.38845, 0h 52m 27s (- 5h 17m 23s)
54 | epoch: 14, iter: 510000 (14.4677%), loss: 228.44521, 0h 53m 27s (- 5h 16m 1s)
55 | epoch: 14, iter: 520000 (14.7514%), loss: 228.46959, 0h 54m 29s (- 5h 14m 53s)
56 | epoch: 15, iter: 530000 (15.0350%), loss: 27.49651, 0h 55m 29s (- 5h 13m 33s)
57 | epoch: 15, iter: 540000 (15.3187%), loss: 225.34432, 0h 56m 29s (- 5h 12m 18s)
58 | epoch: 15, iter: 550000 (15.6024%), loss: 225.72398, 0h 57m 32s (- 5h 11m 16s)
59 | epoch: 15, iter: 560000 (15.8861%), loss: 225.67404, 0h 58m 32s (- 5h 9m 59s)
60 | epoch: 16, iter: 570000 (16.1698%), loss: 132.21409, 0h 59m 34s (- 5h 8m 50s)
61 | epoch: 16, iter: 580000 (16.4534%), loss: 222.80537, 1h 0m 34s (- 5h 7m 37s)
62 | epoch: 16, iter: 590000 (16.7371%), loss: 223.86467, 1h 1m 36s (- 5h 6m 30s)
63 | epoch: 17, iter: 600000 (17.0208%), loss: 16.66648, 1h 2m 38s (- 5h 5m 25s)
64 | epoch: 17, iter: 610000 (17.3045%), loss: 218.38828, 1h 3m 39s (- 5h 4m 12s)
65 | epoch: 17, iter: 620000 (17.5882%), loss: 220.96036, 1h 4m 42s (- 5h 3m 13s)
66 | epoch: 17, iter: 630000 (17.8718%), loss: 221.90386, 1h 5m 46s (- 5h 2m 14s)
67 | epoch: 18, iter: 640000 (18.1555%), loss: 118.85014, 1h 6m 47s (- 5h 1m 6s)
68 | epoch: 18, iter: 650000 (18.4392%), loss: 218.49976, 1h 7m 49s (- 5h 0m 1s)
69 | epoch: 18, iter: 660000 (18.7229%), loss: 219.08425, 1h 8m 54s (- 4h 59m 9s)
70 | epoch: 19, iter: 670000 (19.0066%), loss: 4.87612, 1h 9m 57s (- 4h 58m 8s)
71 | epoch: 19, iter: 680000 (19.2902%), loss: 215.69654, 1h 11m 1s (- 4h 57m 8s)
72 | epoch: 19, iter: 690000 (19.5739%), loss: 217.48698, 1h 12m 5s (- 4h 56m 14s)
73 | epoch: 19, iter: 700000 (19.8576%), loss: 217.26550, 1h 13m 7s (- 4h 55m 8s)
74 | epoch: 20, iter: 710000 (20.1413%), loss: 106.15122, 1h 14m 8s (- 4h 53m 57s)
75 | epoch: 20, iter: 720000 (20.4250%), loss: 215.69722, 1h 15m 10s (- 4h 52m 54s)
76 | epoch: 20, iter: 730000 (20.7086%), loss: 214.81781, 1h 16m 14s (- 4h 51m 53s)
77 | epoch: 20, iter: 740000 (20.9923%), loss: 215.54455, 1h 17m 15s (- 4h 50m 46s)
78 | epoch: 21, iter: 750000 (21.2760%), loss: 205.78288, 1h 18m 17s (- 4h 49m 40s)
79 | epoch: 21, iter: 760000 (21.5597%), loss: 212.94497, 1h 19m 18s (- 4h 48m 34s)
80 | epoch: 21, iter: 770000 (21.8434%), loss: 215.04291, 1h 20m 20s (- 4h 47m 27s)
81 | epoch: 22, iter: 780000 (22.1270%), loss: 94.40948, 1h 21m 22s (- 4h 46m 23s)
82 | epoch: 22, iter: 790000 (22.4107%), loss: 211.39550, 1h 22m 25s (- 4h 45m 20s)
83 | epoch: 22, iter: 800000 (22.6944%), loss: 211.91150, 1h 23m 25s (- 4h 44m 11s)
84 | epoch: 22, iter: 810000 (22.9781%), loss: 212.92126, 1h 24m 29s (- 4h 43m 12s)
85 | epoch: 23, iter: 820000 (23.2618%), loss: 193.75593, 1h 25m 41s (- 4h 42m 41s)
86 | epoch: 23, iter: 830000 (23.5454%), loss: 210.32490, 1h 26m 45s (- 4h 41m 41s)
87 | epoch: 23, iter: 840000 (23.8291%), loss: 210.28689, 1h 27m 51s (- 4h 40m 50s)
88 | epoch: 24, iter: 850000 (24.1128%), loss: 82.83581, 1h 28m 58s (- 4h 40m 1s)
89 | epoch: 24, iter: 860000 (24.3965%), loss: 208.82878, 1h 30m 11s (- 4h 39m 29s)
90 | epoch: 24, iter: 870000 (24.6802%), loss: 209.46116, 1h 31m 17s (- 4h 38m 36s)
91 | epoch: 24, iter: 880000 (24.9638%), loss: 209.11254, 1h 32m 24s (- 4h 37m 46s)
92 | epoch: 25, iter: 890000 (25.2475%), loss: 179.79879, 1h 33m 30s (- 4h 36m 50s)
93 | epoch: 25, iter: 900000 (25.5312%), loss: 208.10891, 1h 34m 34s (- 4h 35m 50s)
94 | epoch: 25, iter: 910000 (25.8149%), loss: 208.86848, 1h 35m 40s (- 4h 34m 55s)
95 | epoch: 26, iter: 920000 (26.0986%), loss: 71.40685, 1h 36m 45s (- 4h 34m 0s)
96 | epoch: 26, iter: 930000 (26.3822%), loss: 205.39371, 1h 37m 48s (- 4h 32m 56s)
97 | epoch: 26, iter: 940000 (26.6659%), loss: 206.74949, 1h 38m 53s (- 4h 31m 58s)
98 | epoch: 26, iter: 950000 (26.9496%), loss: 208.45557, 1h 39m 56s (- 4h 30m 53s)
99 | epoch: 27, iter: 960000 (27.2333%), loss: 168.39441, 1h 40m 58s (- 4h 29m 48s)
100 | epoch: 27, iter: 970000 (27.5169%), loss: 205.84451, 1h 42m 1s (- 4h 28m 45s)
101 | epoch: 27, iter: 980000 (27.8006%), loss: 206.20411, 1h 43m 5s (- 4h 27m 43s)
102 | epoch: 28, iter: 990000 (28.0843%), loss: 60.06616, 1h 44m 10s (- 4h 26m 45s)
103 | epoch: 28, iter: 1000000 (28.3680%), loss: 203.89503, 1h 45m 19s (- 4h 25m 58s)
104 | epoch: 28, iter: 1010000 (28.6517%), loss: 204.76428, 1h 46m 28s (- 4h 25m 8s)
105 | epoch: 28, iter: 1020000 (28.9353%), loss: 206.33285, 1h 47m 42s (- 4h 24m 31s)
106 | epoch: 29, iter: 1030000 (29.2190%), loss: 156.33513, 1h 49m 25s (- 4h 25m 3s)
107 | epoch: 29, iter: 1040000 (29.5027%), loss: 202.96980, 1h 50m 39s (- 4h 24m 24s)
108 | epoch: 29, iter: 1050000 (29.7864%), loss: 204.89602, 1h 51m 43s (- 4h 23m 22s)
109 | epoch: 30, iter: 1060000 (30.0701%), loss: 49.88182, 1h 52m 48s (- 4h 22m 20s)
110 | epoch: 30, iter: 1070000 (30.3537%), loss: 201.73471, 1h 53m 51s (- 4h 21m 15s)
111 | epoch: 30, iter: 1080000 (30.6374%), loss: 203.28619, 1h 54m 57s (- 4h 20m 14s)
112 | epoch: 30, iter: 1090000 (30.9211%), loss: 203.50570, 1h 56m 0s (- 4h 19m 9s)
113 | epoch: 31, iter: 1100000 (31.2048%), loss: 145.25405, 1h 57m 2s (- 4h 18m 2s)
114 | epoch: 31, iter: 1110000 (31.4885%), loss: 202.30928, 1h 58m 4s (- 4h 16m 54s)
115 | epoch: 31, iter: 1120000 (31.7721%), loss: 202.39322, 1h 59m 5s (- 4h 15m 45s)
116 | epoch: 32, iter: 1130000 (32.0558%), loss: 38.98377, 2h 0m 8s (- 4h 14m 39s)
117 | epoch: 32, iter: 1140000 (32.3395%), loss: 200.49852, 2h 1m 11s (- 4h 13m 32s)
118 | epoch: 32, iter: 1150000 (32.6232%), loss: 201.25514, 2h 2m 13s (- 4h 12m 25s)
119 | epoch: 32, iter: 1160000 (32.9069%), loss: 202.17546, 2h 3m 17s (- 4h 11m 22s)
120 | epoch: 33, iter: 1170000 (33.1905%), loss: 134.48769, 2h 4m 22s (- 4h 10m 20s)
121 | epoch: 33, iter: 1180000 (33.4742%), loss: 199.38152, 2h 5m 24s (- 4h 9m 14s)
122 | epoch: 33, iter: 1190000 (33.7579%), loss: 201.35127, 2h 6m 26s (- 4h 8m 7s)
123 | epoch: 34, iter: 1200000 (34.0416%), loss: 29.18051, 2h 7m 28s (- 4h 7m 0s)
124 | epoch: 34, iter: 1210000 (34.3253%), loss: 198.56807, 2h 8m 31s (- 4h 5m 53s)
125 | epoch: 34, iter: 1220000 (34.6089%), loss: 200.19857, 2h 9m 32s (- 4h 4m 46s)
126 | epoch: 34, iter: 1230000 (34.8926%), loss: 200.78890, 2h 10m 34s (- 4h 3m 37s)
127 | epoch: 35, iter: 1240000 (35.1763%), loss: 123.22386, 2h 11m 35s (- 4h 2m 30s)
128 | epoch: 35, iter: 1250000 (35.4600%), loss: 198.95047, 2h 12m 37s (- 4h 1m 23s)
129 | epoch: 35, iter: 1260000 (35.7437%), loss: 199.50285, 2h 13m 39s (- 4h 0m 16s)
130 | epoch: 36, iter: 1270000 (36.0273%), loss: 19.00897, 2h 14m 42s (- 3h 59m 11s)
131 | epoch: 36, iter: 1280000 (36.3110%), loss: 197.41577, 2h 15m 44s (- 3h 58m 4s)
132 | epoch: 36, iter: 1290000 (36.5947%), loss: 199.19375, 2h 16m 47s (- 3h 57m 1s)
133 | epoch: 36, iter: 1300000 (36.8784%), loss: 198.93374, 2h 17m 52s (- 3h 56m 0s)
134 | epoch: 37, iter: 1310000 (37.1621%), loss: 111.87777, 2h 18m 57s (- 3h 54m 57s)
135 | epoch: 37, iter: 1320000 (37.4457%), loss: 198.36039, 2h 20m 0s (- 3h 53m 52s)
136 | epoch: 37, iter: 1330000 (37.7294%), loss: 198.38075, 2h 21m 4s (- 3h 52m 49s)
137 | epoch: 38, iter: 1340000 (38.0131%), loss: 9.10398, 2h 22m 5s (- 3h 51m 41s)
138 | epoch: 38, iter: 1350000 (38.2968%), loss: 197.34802, 2h 23m 8s (- 3h 50m 37s)
139 | epoch: 38, iter: 1360000 (38.5805%), loss: 197.36508, 2h 24m 9s (- 3h 49m 29s)
140 | epoch: 38, iter: 1370000 (38.8641%), loss: 197.52020, 2h 25m 13s (- 3h 48m 26s)
141 | epoch: 39, iter: 1380000 (39.1478%), loss: 102.50339, 2h 26m 14s (- 3h 47m 19s)
142 | epoch: 39, iter: 1390000 (39.4315%), loss: 195.94070, 2h 27m 15s (- 3h 46m 11s)
143 | epoch: 39, iter: 1400000 (39.7152%), loss: 197.69650, 2h 28m 35s (- 3h 45m 32s)
144 | epoch: 39, iter: 1410000 (39.9989%), loss: 197.20569, 2h 29m 57s (- 3h 44m 56s)
145 | epoch: 40, iter: 1420000 (40.2825%), loss: 194.08130, 2h 31m 22s (- 3h 44m 23s)
146 | epoch: 40, iter: 1430000 (40.5662%), loss: 196.66872, 2h 32m 44s (- 3h 43m 46s)
147 | epoch: 40, iter: 1440000 (40.8499%), loss: 197.52420, 2h 34m 7s (- 3h 43m 10s)
148 | epoch: 41, iter: 1450000 (41.1336%), loss: 91.28728, 2h 35m 31s (- 3h 42m 34s)
149 | epoch: 41, iter: 1460000 (41.4173%), loss: 195.71225, 2h 36m 55s (- 3h 41m 57s)
150 | epoch: 41, iter: 1470000 (41.7009%), loss: 196.67277, 2h 38m 19s (- 3h 41m 21s)
151 | epoch: 41, iter: 1480000 (41.9846%), loss: 196.16795, 2h 39m 39s (- 3h 40m 37s)
152 | epoch: 42, iter: 1490000 (42.2683%), loss: 183.53463, 2h 41m 3s (- 3h 39m 58s)
153 | epoch: 42, iter: 1500000 (42.5520%), loss: 195.69018, 2h 42m 24s (- 3h 39m 15s)
154 | epoch: 42, iter: 1510000 (42.8357%), loss: 195.52721, 2h 43m 45s (- 3h 38m 32s)
155 | epoch: 43, iter: 1520000 (43.1193%), loss: 81.73509, 2h 45m 6s (- 3h 37m 48s)
156 | epoch: 43, iter: 1530000 (43.4030%), loss: 194.57625, 2h 46m 26s (- 3h 37m 2s)
157 | epoch: 43, iter: 1540000 (43.6867%), loss: 194.90987, 2h 47m 51s (- 3h 36m 21s)
158 | epoch: 43, iter: 1550000 (43.9704%), loss: 195.82993, 2h 49m 12s (- 3h 35m 37s)
159 | epoch: 44, iter: 1560000 (44.2541%), loss: 173.04376, 2h 50m 34s (- 3h 34m 52s)
160 | epoch: 44, iter: 1570000 (44.5377%), loss: 194.38690, 2h 51m 56s (- 3h 34m 6s)
161 | epoch: 44, iter: 1580000 (44.8214%), loss: 194.51451, 2h 53m 16s (- 3h 33m 18s)
162 | epoch: 45, iter: 1590000 (45.1051%), loss: 71.48242, 2h 54m 49s (- 3h 32m 46s)
163 | epoch: 45, iter: 1600000 (45.3888%), loss: 193.77686, 2h 56m 29s (- 3h 32m 21s)
164 | epoch: 45, iter: 1610000 (45.6725%), loss: 193.85211, 2h 58m 9s (- 3h 31m 54s)
165 | epoch: 45, iter: 1620000 (45.9561%), loss: 195.10095, 2h 59m 50s (- 3h 31m 29s)
166 | epoch: 46, iter: 1630000 (46.2398%), loss: 163.05437, 3h 1m 29s (- 3h 31m 0s)
167 | epoch: 46, iter: 1640000 (46.5235%), loss: 193.99109, 3h 3m 4s (- 3h 30m 26s)
168 | epoch: 46, iter: 1650000 (46.8072%), loss: 193.76603, 3h 4m 43s (- 3h 29m 55s)
169 | epoch: 47, iter: 1660000 (47.0909%), loss: 60.89032, 3h 6m 20s (- 3h 29m 21s)
170 | epoch: 47, iter: 1670000 (47.3745%), loss: 193.86451, 3h 7m 56s (- 3h 28m 45s)
171 | epoch: 47, iter: 1680000 (47.6582%), loss: 193.51411, 3h 9m 32s (- 3h 28m 10s)
172 | epoch: 47, iter: 1690000 (47.9419%), loss: 193.61232, 3h 11m 10s (- 3h 27m 35s)
173 | epoch: 48, iter: 1700000 (48.2256%), loss: 152.46293, 3h 13m 2s (- 3h 27m 15s)
174 | epoch: 48, iter: 1710000 (48.5093%), loss: 192.63297, 3h 14m 57s (- 3h 26m 56s)
175 | epoch: 48, iter: 1720000 (48.7929%), loss: 193.65949, 3h 16m 49s (- 3h 26m 33s)
176 | epoch: 49, iter: 1730000 (49.0766%), loss: 51.62913, 3h 18m 24s (- 3h 25m 52s)
177 | epoch: 49, iter: 1740000 (49.3603%), loss: 192.14340, 3h 19m 58s (- 3h 25m 9s)
178 | epoch: 49, iter: 1750000 (49.6440%), loss: 192.66957, 3h 21m 59s (- 3h 24m 53s)
179 | epoch: 49, iter: 1760000 (49.9277%), loss: 193.33952, 3h 24m 2s (- 3h 24m 37s)
180 | epoch: 50, iter: 1770000 (50.2113%), loss: 142.94142, 3h 26m 6s (- 3h 24m 22s)
181 | epoch: 50, iter: 1780000 (50.4950%), loss: 191.93783, 3h 28m 9s (- 3h 24m 4s)
182 | epoch: 50, iter: 1790000 (50.7787%), loss: 192.62905, 3h 29m 46s (- 3h 23m 20s)
183 | epoch: 51, iter: 1800000 (51.0624%), loss: 41.93212, 3h 31m 11s (- 3h 22m 24s)
184 | epoch: 51, iter: 1810000 (51.3461%), loss: 191.50055, 3h 32m 36s (- 3h 21m 27s)
185 | epoch: 51, iter: 1820000 (51.6297%), loss: 191.93131, 3h 34m 2s (- 3h 20m 32s)
186 | epoch: 51, iter: 1830000 (51.9134%), loss: 192.73529, 3h 35m 27s (- 3h 19m 34s)
187 | epoch: 52, iter: 1840000 (52.1971%), loss: 132.04742, 3h 36m 51s (- 3h 18m 36s)
188 | epoch: 52, iter: 1850000 (52.4808%), loss: 191.65766, 3h 38m 16s (- 3h 17m 38s)
189 | epoch: 52, iter: 1860000 (52.7645%), loss: 192.32697, 3h 39m 40s (- 3h 16m 39s)
190 | epoch: 53, iter: 1870000 (53.0481%), loss: 31.68704, 3h 41m 1s (- 3h 15m 37s)
191 | epoch: 53, iter: 1880000 (53.3318%), loss: 190.24711, 3h 42m 16s (- 3h 14m 29s)
192 | epoch: 53, iter: 1890000 (53.6155%), loss: 191.68136, 3h 43m 41s (- 3h 13m 31s)
193 | epoch: 53, iter: 1900000 (53.8992%), loss: 191.96693, 3h 45m 0s (- 3h 12m 26s)
194 | epoch: 54, iter: 1910000 (54.1829%), loss: 121.97977, 3h 46m 35s (- 3h 11m 36s)
195 | epoch: 54, iter: 1920000 (54.4665%), loss: 191.36788, 3h 47m 57s (- 3h 10m 34s)
196 | epoch: 54, iter: 1930000 (54.7502%), loss: 191.69581, 3h 49m 21s (- 3h 9m 33s)
197 | epoch: 55, iter: 1940000 (55.0339%), loss: 22.47068, 3h 50m 45s (- 3h 8m 32s)
198 | epoch: 55, iter: 1950000 (55.3176%), loss: 189.54484, 3h 52m 8s (- 3h 7m 30s)
199 | epoch: 55, iter: 1960000 (55.6013%), loss: 191.35433, 3h 53m 29s (- 3h 6m 27s)
200 | epoch: 55, iter: 1970000 (55.8849%), loss: 191.31744, 3h 54m 52s (- 3h 5m 24s)
201 | epoch: 56, iter: 1980000 (56.1686%), loss: 112.48494, 3h 56m 16s (- 3h 4m 22s)
202 | epoch: 56, iter: 1990000 (56.4523%), loss: 190.35771, 3h 57m 37s (- 3h 3m 18s)
203 | epoch: 56, iter: 2000000 (56.7360%), loss: 191.57365, 3h 58m 58s (- 3h 2m 13s)
204 | epoch: 57, iter: 2010000 (57.0197%), loss: 13.13259, 4h 0m 21s (- 3h 1m 10s)
205 | epoch: 57, iter: 2020000 (57.3033%), loss: 189.06992, 4h 1m 42s (- 3h 0m 5s)
206 | epoch: 57, iter: 2030000 (57.5870%), loss: 190.45278, 4h 3m 4s (- 2h 59m 1s)
207 | epoch: 57, iter: 2040000 (57.8707%), loss: 191.29769, 4h 4m 28s (- 2h 57m 58s)
208 | epoch: 58, iter: 2050000 (58.1544%), loss: 102.97000, 4h 5m 53s (- 2h 56m 55s)
209 | epoch: 58, iter: 2060000 (58.4381%), loss: 189.96727, 4h 7m 15s (- 2h 55m 51s)
210 | epoch: 58, iter: 2070000 (58.7217%), loss: 189.94243, 4h 8m 37s (- 2h 54m 46s)
211 | epoch: 59, iter: 2080000 (59.0054%), loss: 3.58441, 4h 10m 1s (- 2h 53m 42s)
212 | epoch: 59, iter: 2090000 (59.2891%), loss: 188.07650, 4h 11m 24s (- 2h 52m 38s)
213 | epoch: 59, iter: 2100000 (59.5728%), loss: 189.51323, 4h 12m 48s (- 2h 51m 33s)
214 | epoch: 59, iter: 2110000 (59.8565%), loss: 190.90891, 4h 14m 11s (- 2h 50m 28s)
215 | epoch: 60, iter: 2120000 (60.1401%), loss: 93.07775, 4h 15m 35s (- 2h 49m 24s)
216 | epoch: 60, iter: 2130000 (60.4238%), loss: 189.24776, 4h 16m 57s (- 2h 48m 18s)
217 | epoch: 60, iter: 2140000 (60.7075%), loss: 189.21180, 4h 18m 21s (- 2h 47m 13s)
218 | epoch: 60, iter: 2150000 (60.9912%), loss: 190.79515, 4h 19m 25s (- 2h 45m 55s)
219 | epoch: 61, iter: 2160000 (61.2749%), loss: 182.08201, 4h 20m 27s (- 2h 44m 36s)
220 | epoch: 61, iter: 2170000 (61.5585%), loss: 189.82015, 4h 21m 43s (- 2h 43m 26s)
221 | epoch: 61, iter: 2180000 (61.8422%), loss: 190.10941, 4h 23m 8s (- 2h 42m 21s)
222 | epoch: 62, iter: 2190000 (62.1259%), loss: 82.96230, 4h 24m 27s (- 2h 41m 13s)
223 | epoch: 62, iter: 2200000 (62.4096%), loss: 188.30301, 4h 25m 35s (- 2h 39m 58s)
224 | epoch: 62, iter: 2210000 (62.6933%), loss: 189.52637, 4h 26m 41s (- 2h 38m 41s)
225 | epoch: 62, iter: 2220000 (62.9769%), loss: 190.43668, 4h 27m 46s (- 2h 37m 25s)
226 | epoch: 63, iter: 2230000 (63.2606%), loss: 172.52425, 4h 28m 52s (- 2h 36m 9s)
227 | epoch: 63, iter: 2240000 (63.5443%), loss: 188.84229, 4h 29m 57s (- 2h 34m 52s)
228 | epoch: 63, iter: 2250000 (63.8280%), loss: 189.59928, 4h 31m 2s (- 2h 33m 36s)
229 | epoch: 64, iter: 2260000 (64.1117%), loss: 73.24027, 4h 32m 8s (- 2h 32m 20s)
230 | epoch: 64, iter: 2270000 (64.3953%), loss: 187.56424, 4h 33m 13s (- 2h 31m 4s)
231 | epoch: 64, iter: 2280000 (64.6790%), loss: 189.16940, 4h 34m 19s (- 2h 29m 48s)
232 | epoch: 64, iter: 2290000 (64.9627%), loss: 190.08061, 4h 35m 24s (- 2h 28m 32s)
233 | epoch: 65, iter: 2300000 (65.2464%), loss: 163.61077, 4h 36m 32s (- 2h 27m 18s)
234 | epoch: 65, iter: 2310000 (65.5301%), loss: 188.06103, 4h 37m 49s (- 2h 26m 8s)
235 | epoch: 65, iter: 2320000 (65.8137%), loss: 188.57039, 4h 39m 15s (- 2h 25m 3s)
236 | epoch: 66, iter: 2330000 (66.0974%), loss: 63.89059, 4h 40m 32s (- 2h 23m 53s)
237 | epoch: 66, iter: 2340000 (66.3811%), loss: 187.96240, 4h 41m 55s (- 2h 22m 47s)
238 | epoch: 66, iter: 2350000 (66.6648%), loss: 188.44596, 4h 43m 17s (- 2h 21m 39s)
239 | epoch: 66, iter: 2360000 (66.9485%), loss: 189.03845, 4h 44m 34s (- 2h 20m 29s)
240 | epoch: 67, iter: 2370000 (67.2321%), loss: 153.15918, 4h 45m 46s (- 2h 19m 16s)
241 | epoch: 67, iter: 2380000 (67.5158%), loss: 188.08269, 4h 46m 55s (- 2h 18m 2s)
242 | epoch: 67, iter: 2390000 (67.7995%), loss: 188.58399, 4h 48m 4s (- 2h 16m 49s)
243 | epoch: 68, iter: 2400000 (68.0832%), loss: 54.74805, 4h 49m 14s (- 2h 15m 35s)
244 | epoch: 68, iter: 2410000 (68.3669%), loss: 187.11809, 4h 50m 23s (- 2h 14m 21s)
245 | epoch: 68, iter: 2420000 (68.6505%), loss: 188.76441, 4h 51m 31s (- 2h 13m 7s)
246 | epoch: 68, iter: 2430000 (68.9342%), loss: 188.09942, 4h 52m 39s (- 2h 11m 53s)
247 | epoch: 69, iter: 2440000 (69.2179%), loss: 142.46104, 4h 53m 47s (- 2h 10m 39s)
248 | epoch: 69, iter: 2450000 (69.5016%), loss: 188.07412, 4h 54m 56s (- 2h 9m 25s)
249 | epoch: 69, iter: 2460000 (69.7853%), loss: 188.46198, 4h 56m 4s (- 2h 8m 11s)
250 | epoch: 70, iter: 2470000 (70.0689%), loss: 45.00636, 4h 57m 16s (- 2h 6m 59s)
251 | epoch: 70, iter: 2480000 (70.3526%), loss: 186.25546, 4h 58m 24s (- 2h 5m 45s)
252 | epoch: 70, iter: 2490000 (70.6363%), loss: 187.25174, 4h 59m 30s (- 2h 4m 30s)
253 | epoch: 70, iter: 2500000 (70.9200%), loss: 189.34450, 5h 0m 34s (- 2h 3m 14s)
254 | epoch: 71, iter: 2510000 (71.2037%), loss: 134.05612, 5h 1m 42s (- 2h 2m 1s)
255 | epoch: 71, iter: 2520000 (71.4873%), loss: 186.95486, 5h 2m 48s (- 2h 0m 46s)
256 | epoch: 71, iter: 2530000 (71.7710%), loss: 187.59962, 5h 3m 56s (- 1h 59m 32s)
257 | epoch: 72, iter: 2540000 (72.0547%), loss: 36.19762, 5h 5m 6s (- 1h 58m 19s)
258 | epoch: 72, iter: 2550000 (72.3384%), loss: 186.68176, 5h 6m 16s (- 1h 57m 7s)
259 | epoch: 72, iter: 2560000 (72.6221%), loss: 187.22045, 5h 7m 24s (- 1h 55m 53s)
260 | epoch: 72, iter: 2570000 (72.9057%), loss: 187.31267, 5h 8m 31s (- 1h 54m 39s)
261 | epoch: 73, iter: 2580000 (73.1894%), loss: 124.06747, 5h 9m 39s (- 1h 53m 25s)
262 | epoch: 73, iter: 2590000 (73.4731%), loss: 186.53858, 5h 10m 47s (- 1h 52m 12s)
263 | epoch: 73, iter: 2600000 (73.7568%), loss: 187.53413, 5h 11m 54s (- 1h 50m 58s)
264 | epoch: 74, iter: 2610000 (74.0405%), loss: 26.42638, 5h 13m 3s (- 1h 49m 45s)
265 | epoch: 74, iter: 2620000 (74.3241%), loss: 186.32935, 5h 14m 12s (- 1h 48m 32s)
266 | epoch: 74, iter: 2630000 (74.6078%), loss: 186.82291, 5h 15m 21s (- 1h 47m 19s)
267 | epoch: 74, iter: 2640000 (74.8915%), loss: 187.14992, 5h 16m 29s (- 1h 46m 6s)
268 | epoch: 75, iter: 2650000 (75.1752%), loss: 114.61958, 5h 17m 35s (- 1h 44m 52s)
269 | epoch: 75, iter: 2660000 (75.4589%), loss: 186.03291, 5h 18m 36s (- 1h 43m 37s)
270 | epoch: 75, iter: 2670000 (75.7425%), loss: 187.27173, 5h 19m 40s (- 1h 42m 22s)
271 | epoch: 76, iter: 2680000 (76.0262%), loss: 17.14513, 5h 20m 41s (- 1h 41m 7s)
272 | epoch: 76, iter: 2690000 (76.3099%), loss: 185.77682, 5h 21m 45s (- 1h 39m 53s)
273 | epoch: 76, iter: 2700000 (76.5936%), loss: 186.77826, 5h 22m 45s (- 1h 38m 37s)
274 | epoch: 76, iter: 2710000 (76.8773%), loss: 187.23163, 5h 23m 43s (- 1h 37m 21s)
275 | epoch: 77, iter: 2720000 (77.1609%), loss: 105.15286, 5h 24m 45s (- 1h 36m 7s)
276 | epoch: 77, iter: 2730000 (77.4446%), loss: 186.09209, 5h 25m 44s (- 1h 34m 52s)
277 | epoch: 77, iter: 2740000 (77.7283%), loss: 186.81952, 5h 26m 46s (- 1h 33m 37s)
278 | epoch: 78, iter: 2750000 (78.0120%), loss: 7.82204, 5h 27m 44s (- 1h 32m 22s)
279 | epoch: 78, iter: 2760000 (78.2957%), loss: 185.20877, 5h 28m 46s (- 1h 31m 8s)
280 | epoch: 78, iter: 2770000 (78.5793%), loss: 186.83001, 5h 30m 11s (- 1h 30m 0s)
281 | epoch: 78, iter: 2780000 (78.8630%), loss: 186.80129, 5h 31m 38s (- 1h 28m 53s)
282 | epoch: 79, iter: 2790000 (79.1467%), loss: 95.57875, 5h 33m 10s (- 1h 27m 47s)
283 | epoch: 79, iter: 2800000 (79.4304%), loss: 185.83061, 5h 34m 39s (- 1h 26m 39s)
284 | epoch: 79, iter: 2810000 (79.7141%), loss: 186.88767, 5h 36m 9s (- 1h 25m 32s)
285 | epoch: 79, iter: 2820000 (79.9977%), loss: 186.91404, 5h 37m 39s (- 1h 24m 25s)
286 | epoch: 80, iter: 2830000 (80.2814%), loss: 183.73249, 5h 39m 10s (- 1h 23m 18s)
287 | epoch: 80, iter: 2840000 (80.5651%), loss: 185.67632, 5h 40m 41s (- 1h 22m 11s)
288 | epoch: 80, iter: 2850000 (80.8488%), loss: 187.01350, 5h 42m 11s (- 1h 21m 3s)
289 | epoch: 81, iter: 2860000 (81.1325%), loss: 86.17994, 5h 43m 42s (- 1h 19m 55s)
290 | epoch: 81, iter: 2870000 (81.4161%), loss: 185.69051, 5h 45m 11s (- 1h 18m 47s)
291 | epoch: 81, iter: 2880000 (81.6998%), loss: 186.18324, 5h 46m 41s (- 1h 17m 39s)
292 | epoch: 81, iter: 2890000 (81.9835%), loss: 186.92896, 5h 48m 11s (- 1h 16m 30s)
293 | epoch: 82, iter: 2900000 (82.2672%), loss: 173.45726, 5h 49m 40s (- 1h 15m 22s)
294 | epoch: 82, iter: 2910000 (82.5508%), loss: 186.54825, 5h 51m 10s (- 1h 14m 13s)
295 | epoch: 82, iter: 2920000 (82.8345%), loss: 185.77585, 5h 52m 43s (- 1h 13m 5s)
296 | epoch: 83, iter: 2930000 (83.1182%), loss: 76.42856, 5h 54m 18s (- 1h 11m 57s)
297 | epoch: 83, iter: 2940000 (83.4019%), loss: 185.42695, 5h 55m 47s (- 1h 10m 48s)
298 | epoch: 83, iter: 2950000 (83.6856%), loss: 186.75486, 5h 57m 15s (- 1h 9m 38s)
299 | epoch: 83, iter: 2960000 (83.9692%), loss: 186.05918, 5h 58m 48s (- 1h 8m 29s)
300 | epoch: 84, iter: 2970000 (84.2529%), loss: 165.03357, 6h 0m 28s (- 1h 7m 22s)
301 | epoch: 84, iter: 2980000 (84.5366%), loss: 185.36279, 6h 2m 6s (- 1h 6m 14s)
302 | epoch: 84, iter: 2990000 (84.8203%), loss: 185.68220, 6h 3m 38s (- 1h 5m 4s)
303 | epoch: 85, iter: 3000000 (85.1040%), loss: 67.56534, 6h 4m 57s (- 1h 3m 52s)
304 | epoch: 85, iter: 3010000 (85.3876%), loss: 185.08859, 6h 6m 8s (- 1h 2m 39s)
305 | epoch: 85, iter: 3020000 (85.6713%), loss: 185.98866, 6h 7m 18s (- 1h 1m 25s)
306 | epoch: 85, iter: 3030000 (85.9550%), loss: 186.04008, 6h 8m 30s (- 1h 0m 12s)
307 | epoch: 86, iter: 3040000 (86.2387%), loss: 154.25655, 6h 9m 43s (- 0h 58m 59s)
308 | epoch: 86, iter: 3050000 (86.5224%), loss: 186.40076, 6h 10m 54s (- 0h 57m 46s)
309 | epoch: 86, iter: 3060000 (86.8060%), loss: 186.47730, 6h 12m 6s (- 0h 56m 33s)
310 | epoch: 87, iter: 3070000 (87.0897%), loss: 58.51510, 6h 13m 16s (- 0h 55m 20s)
311 | epoch: 87, iter: 3080000 (87.3734%), loss: 184.87556, 6h 14m 27s (- 0h 54m 6s)
312 | epoch: 87, iter: 3090000 (87.6571%), loss: 184.98283, 6h 15m 37s (- 0h 52m 53s)
313 | epoch: 87, iter: 3100000 (87.9408%), loss: 186.32554, 6h 16m 47s (- 0h 51m 40s)
314 | epoch: 88, iter: 3110000 (88.2244%), loss: 145.56026, 6h 17m 58s (- 0h 50m 26s)
315 | epoch: 88, iter: 3120000 (88.5081%), loss: 185.61519, 6h 19m 8s (- 0h 49m 13s)
316 | epoch: 88, iter: 3130000 (88.7918%), loss: 185.73383, 6h 20m 17s (- 0h 48m 0s)
317 | epoch: 89, iter: 3140000 (89.0755%), loss: 49.11769, 6h 21m 33s (- 0h 46m 47s)
318 | epoch: 89, iter: 3150000 (89.3592%), loss: 184.29239, 6h 22m 44s (- 0h 45m 34s)
319 | epoch: 89, iter: 3160000 (89.6428%), loss: 185.69499, 6h 23m 56s (- 0h 44m 21s)
320 | epoch: 89, iter: 3170000 (89.9265%), loss: 185.43642, 6h 25m 7s (- 0h 43m 8s)
321 | epoch: 90, iter: 3180000 (90.2102%), loss: 136.36600, 6h 26m 20s (- 0h 41m 55s)
322 | epoch: 90, iter: 3190000 (90.4939%), loss: 184.45063, 6h 27m 36s (- 0h 40m 43s)
323 | epoch: 90, iter: 3200000 (90.7776%), loss: 185.33140, 6h 28m 35s (- 0h 39m 28s)
324 | epoch: 91, iter: 3210000 (91.0612%), loss: 39.55210, 6h 29m 35s (- 0h 38m 14s)
325 | epoch: 91, iter: 3220000 (91.3449%), loss: 184.63664, 6h 30m 36s (- 0h 37m 0s)
326 | epoch: 91, iter: 3230000 (91.6286%), loss: 184.75182, 6h 31m 36s (- 0h 35m 46s)
327 | epoch: 91, iter: 3240000 (91.9123%), loss: 185.16648, 6h 32m 35s (- 0h 34m 32s)
328 | epoch: 92, iter: 3250000 (92.1960%), loss: 127.44406, 6h 33m 34s (- 0h 33m 18s)
329 | epoch: 92, iter: 3260000 (92.4796%), loss: 184.26087, 6h 34m 36s (- 0h 32m 5s)
330 | epoch: 92, iter: 3270000 (92.7633%), loss: 184.61929, 6h 35m 36s (- 0h 30m 51s)
331 | epoch: 93, iter: 3280000 (93.0470%), loss: 30.61833, 6h 36m 35s (- 0h 29m 38s)
332 | epoch: 93, iter: 3290000 (93.3307%), loss: 183.67716, 6h 37m 34s (- 0h 28m 24s)
333 | epoch: 93, iter: 3300000 (93.6144%), loss: 184.60973, 6h 38m 32s (- 0h 27m 11s)
334 | epoch: 93, iter: 3310000 (93.8980%), loss: 185.73265, 6h 39m 34s (- 0h 25m 57s)
335 | epoch: 94, iter: 3320000 (94.1817%), loss: 117.35624, 6h 40m 36s (- 0h 24m 44s)
336 | epoch: 94, iter: 3330000 (94.4654%), loss: 183.38167, 6h 41m 36s (- 0h 23m 31s)
337 | epoch: 94, iter: 3340000 (94.7491%), loss: 184.96043, 6h 42m 38s (- 0h 22m 18s)
338 | epoch: 95, iter: 3350000 (95.0328%), loss: 21.26559, 6h 43m 43s (- 0h 21m 6s)
339 | epoch: 95, iter: 3360000 (95.3164%), loss: 183.66279, 6h 44m 42s (- 0h 19m 53s)
340 | epoch: 95, iter: 3370000 (95.6001%), loss: 184.92417, 6h 45m 41s (- 0h 18m 40s)
341 | epoch: 95, iter: 3380000 (95.8838%), loss: 185.02349, 6h 46m 44s (- 0h 17m 27s)
342 | epoch: 96, iter: 3390000 (96.1675%), loss: 108.06199, 6h 47m 44s (- 0h 16m 14s)
343 | epoch: 96, iter: 3400000 (96.4512%), loss: 183.88079, 6h 48m 44s (- 0h 15m 2s)
344 | epoch: 96, iter: 3410000 (96.7348%), loss: 184.99662, 6h 49m 47s (- 0h 13m 49s)
345 | epoch: 97, iter: 3420000 (97.0185%), loss: 12.03456, 6h 50m 48s (- 0h 12m 37s)
346 | epoch: 97, iter: 3430000 (97.3022%), loss: 183.98048, 6h 51m 49s (- 0h 11m 25s)
347 | epoch: 97, iter: 3440000 (97.5859%), loss: 184.29520, 6h 52m 48s (- 0h 10m 12s)
348 | epoch: 97, iter: 3450000 (97.8696%), loss: 185.03215, 6h 53m 50s (- 0h 9m 0s)
349 | epoch: 98, iter: 3460000 (98.1532%), loss: 99.17649, 6h 54m 52s (- 0h 7m 48s)
350 | epoch: 98, iter: 3470000 (98.4369%), loss: 183.97261, 6h 55m 51s (- 0h 6m 36s)
351 | epoch: 98, iter: 3480000 (98.7206%), loss: 184.93565, 6h 56m 53s (- 0h 5m 24s)
352 | epoch: 99, iter: 3490000 (99.0043%), loss: 2.76205, 6h 57m 52s (- 0h 4m 12s)
353 | epoch: 99, iter: 3500000 (99.2880%), loss: 183.49325, 6h 58m 51s (- 0h 3m 0s)
354 | epoch: 99, iter: 3510000 (99.5716%), loss: 183.66532, 6h 59m 55s (- 0h 1m 48s)
355 | epoch: 99, iter: 3520000 (99.8553%), loss: 185.75300, 7h 0m 58s (- 0h 0m 36s)
356 |
--------------------------------------------------------------------------------
/glove/model/GloveModel.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn #神经网络工具箱torch.nn
3 | import torch.nn.functional as F #神经网络函数torch.nn.functional
4 | import numpy as np
5 | import sys
6 | import math
7 |
8 | class GloveModel(nn.Module):
9 | def __init__(self, vocab_size, embed_size):
10 | super().__init__()
11 | self.vocab_size = vocab_size
12 | self.embed_size = embed_size
13 |
14 | #声明v和w为Embedding向量
15 | self.v = nn.Embedding(vocab_size, embed_size)
16 | self.w = nn.Embedding(vocab_size, embed_size)
17 | self.biasv = nn.Embedding(vocab_size, 1)
18 | self.biasw = nn.Embedding(vocab_size, 1)
19 |
20 | #随机初始化参数
21 | initrange = 0.5 / self.embed_size
22 | self.v.weight.data.uniform_(-initrange, initrange)
23 | self.w.weight.data.uniform_(-initrange, initrange)
24 |
25 | def forward(self, i, j, co_occur, weight):
26 | vi = self.v(i)
27 | wj = self.w(j)
28 | bi = self.biasv(i)
29 | bj = self.biasw(j)
30 |
31 | similarity = torch.mul(vi, wj)
32 | similarity = torch.sum(similarity, dim=1)
33 |
34 | loss = similarity + bi + bj - torch.log(co_occur)
35 | loss = 0.5 * weight * loss * loss
36 |
37 | return loss.sum().mean()
38 |
39 | def gloveMatrix(self):
40 | '''
41 | 获得词向量,这里把两个向量相加作为最后的词向量
42 | :return:
43 | '''
44 | return self.v.weight.data.numpy() + self.w.weight.data.numpy()
45 |
--------------------------------------------------------------------------------
/glove/model/GloveTrain.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.utils.data as tud #Pytorch读取训练集需要用到torch.utils.data类
3 |
4 | from collections import Counter
5 | from sklearn.metrics.pairwise import cosine_similarity
6 |
7 | import pandas as pd
8 | import numpy as np
9 | import scipy
10 |
11 | import time
12 | import math
13 | import random
14 | import sys
15 | import matplotlib.pyplot as plt
16 |
17 | from WordEmbeddingDataset import WordEmbeddingDataset
18 | from GloveModel import GloveModel
19 |
20 | EMBEDDING_SIZE = 50
21 | MAX_VOCAB_SIZE = 2000
22 | WINDOW_SIZE = 5
23 |
24 | NUM_EPOCHS = 10
25 | BATCH_SIZE = 10
26 | LEARNING_RATE = 0.05
27 |
28 | TEXT_SIZE = 20000000
29 | LOG_FILE = "../logs/glove-{}.log".format(EMBEDDING_SIZE)
30 | WEIGHT_FILE = "../weights/glove-{}.th".format(EMBEDDING_SIZE)
31 |
32 | def getCorpus(filetype, size):
33 | if filetype == 'dev':
34 | filepath = '../corpus/text8.dev.txt'
35 | elif filetype == 'test':
36 | filepath = '../corpus/text8.test.txt'
37 | else:
38 | filepath = '../corpus/text8.train.txt'
39 |
40 | with open(filepath, "r") as f:
41 | text = f.read()
42 | text = text.lower().split()
43 | text = text[: min(len(text), size)]
44 | vocab_dict = dict(Counter(text).most_common(MAX_VOCAB_SIZE - 1))
45 | vocab_dict[''] = len(text) - sum(list(vocab_dict.values()))
46 | idx_to_word = list(vocab_dict.keys())
47 | word_to_idx = {word:ind for ind, word in enumerate(idx_to_word)}
48 | word_counts = np.array(list(vocab_dict.values()), dtype=np.float32)
49 | word_freqs = word_counts / sum(word_counts)
50 | print("Words list length:{}".format(len(text)))
51 | print("Vocab size:{}".format(len(idx_to_word)))
52 | return text, idx_to_word, word_to_idx, word_counts, word_freqs
53 |
54 | def buildCooccuranceMatrix(text, word_to_idx):
55 | vocab_size = len(word_to_idx)
56 | maxlength = len(text)
57 | text_ids = [word_to_idx.get(word, word_to_idx[""]) for word in text]
58 | cooccurance_matrix = np.zeros((vocab_size, vocab_size), dtype=np.int32)
59 | print("Co-Matrix consumed mem:%.2fMB" % (sys.getsizeof(cooccurance_matrix)/(1024*1024)))
60 | for i, center_word_id in enumerate(text_ids):
61 | window_indices = list(range(i - WINDOW_SIZE, i)) + list(range(i + 1, i + WINDOW_SIZE + 1))
62 | window_indices = [i % maxlength for i in window_indices]
63 | window_word_ids = [text_ids[index] for index in window_indices]
64 | for context_word_id in window_word_ids:
65 | cooccurance_matrix[center_word_id][context_word_id] += 1
66 | if (i+1) % 1000000 == 0:
67 | print(">>>>> Process %dth word" % (i+1))
68 | print(">>>>> Build co-occurance matrix completed.")
69 | return cooccurance_matrix
70 |
71 | def buildWeightMatrix(co_matrix):
72 | xmax = 100.0
73 | weight_matrix = np.zeros_like(co_matrix, dtype=np.float32)
74 | print("Weight-Matrix consumed mem:%.2fMB" % (sys.getsizeof(weight_matrix) / (1024 * 1024)))
75 | for i in range(co_matrix.shape[0]):
76 | for j in range(co_matrix.shape[1]):
77 | weight_matrix[i][j] = math.pow(co_matrix[i][j] / xmax, 0.75) if co_matrix[i][j] < xmax else 1
78 | if (i+1) % 1000 == 0:
79 | print(">>>>> Process %dth weight" % (i+1))
80 | print(">>>>> Build weight matrix completed.")
81 | return weight_matrix
82 |
83 | def find_nearest(word, embedding_weights):
84 | index = word_to_idx[word]
85 | embedding = embedding_weights[index]
86 | cos_dis = np.array([scipy.spatial.distance.cosine(e, embedding) for e in embedding_weights])
87 | return [idx_to_word[i] for i in cos_dis.argsort()[:10]]
88 |
89 | def asMinutes(s):
90 | h = math.floor(s / 3600)
91 | s = s - h * 3600
92 | m = math.floor(s / 60)
93 | s -= m * 60
94 | return '%dh %dm %ds' % (h, m, s)
95 |
96 | def timeSince(since, percent):
97 | now = time.time()
98 | s = now - since
99 | es = s / percent
100 | rs = es - s
101 | return '%s (- %s)' % (asMinutes(s), asMinutes(rs))
102 |
103 | def loadModel():
104 | path = WEIGHT_FILE
105 | model = GloveModel(MAX_VOCAB_SIZE, EMBEDDING_SIZE)
106 | model.load_state_dict(torch.load(path))
107 | return model
108 |
109 | def findRelationshipVector(word1, word2, word3):
110 | word1_idx = word_to_idx[word1]
111 | word2_idx = word_to_idx[word2]
112 | word3_idx = word_to_idx[word3]
113 | embedding = glove_matrix[word2_idx] - glove_matrix[word1_idx] + glove_matrix[word3_idx]
114 | cos_dis = np.array([scipy.spatial.distance.cosine(e, embedding) for e in glove_matrix])
115 | for i in cos_dis.argsort()[:5]:
116 | print("{} to {} as {} to {}".format(word1, word2, word3, idx_to_word[i]))
117 |
118 | if __name__ == '__main__':
119 | text, idx_to_word, word_to_idx, word_counts, word_freqs = getCorpus('train', size=TEXT_SIZE) #加载语料及预处理
120 | co_matrix = buildCooccuranceMatrix(text, word_to_idx) #构建共现矩阵
121 | weight_matrix = buildWeightMatrix(co_matrix) #构建权重矩阵
122 | dataset = WordEmbeddingDataset(co_matrix, weight_matrix) #创建dataset
123 | dataloader = tud.DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=0)
124 | model = GloveModel(MAX_VOCAB_SIZE, EMBEDDING_SIZE) #创建模型
125 | #model = loadModel()
126 | optimizer = torch.optim.Adagrad(model.parameters(), lr=LEARNING_RATE) #选择Adagrad优化器
127 |
128 |
129 | print_every = 10000
130 | save_every = 50000
131 | epochs = NUM_EPOCHS
132 | iters_per_epoch = int(dataset.__len__() / BATCH_SIZE)
133 | total_iterations = iters_per_epoch * epochs
134 | print("Iterations: %d per one epoch, Total iterations: %d " % (iters_per_epoch, total_iterations))
135 |
136 | start = time.time()
137 | for epoch in range(epochs):
138 | loss_print_avg = 0
139 | iteration = iters_per_epoch * epoch
140 | for i, j, co_occur, weight in dataloader:
141 | iteration += 1
142 | optimizer.zero_grad() #每一批样本训练前重置缓存的梯度
143 | loss = model(i, j, co_occur, weight) #前向传播
144 | loss.backward() #反向传播
145 | optimizer.step() #更新梯度
146 | loss_print_avg += loss.item()
147 |
148 | if iteration % print_every == 0:
149 | time_desc = timeSince(start, iteration / total_iterations)
150 | iter_percent = iteration / total_iterations * 100
151 | loss_avg = loss_print_avg / print_every
152 | loss_print_avg = 0
153 | with open(LOG_FILE, "a") as fout:
154 | fout.write("epoch: %d, iter: %d (%.4f%%), loss: %.5f, %s\n" %
155 | (epoch, iteration, iter_percent, loss_avg, time_desc))
156 | print("epoch: %d, iter: %d/%d (%.4f%%), loss: %.5f, %s" %
157 | (epoch, iteration, total_iterations, iter_percent, loss_avg, time_desc))
158 | if iteration % save_every == 0:
159 | torch.save(model.state_dict(), WEIGHT_FILE)
160 | torch.save(model.state_dict(), WEIGHT_FILE)
161 |
162 | glove_matrix = model.gloveMatrix()
163 | for word in ["good", "one", "green", "like", "america", "queen", "better", "paris", "work", "computer", "language"]:
164 | print(word, find_nearest(word, glove_matrix))
165 | findRelationshipVector('man', 'king', 'woman')
166 | findRelationshipVector('america', 'washington', 'france')
167 | findRelationshipVector('good', 'better', 'little')
168 |
169 | #数据降维以及可视化
170 | candidate_words = ['one','two','three','four','five','six','seven','eight','night','ten','color','green','blue','red','black',
171 | 'man','woman','king','queen','wife','son','daughter','brown','zero','computer','hardware','software','system','program',
172 | 'america','china','france','washington','good','better','bad']
173 | candidate_indexes = [word_to_idx[word] for word in candidate_words]
174 | choosen_indexes = candidate_indexes
175 | choosen_vectors = [glove_matrix[index] for index in choosen_indexes]
176 |
177 | U, S, VH = np.linalg.svd(choosen_vectors, full_matrices=False)
178 | for i in range(len(choosen_indexes)):
179 | plt.text(U[i, 0], U[i, 1], idx_to_word[choosen_indexes[i]])
180 |
181 | coordinate = U[:, 0:2]
182 | plt.xlim((np.min(coordinate[:, 0]) - 0.1, np.max(coordinate[:, 0]) + 0.1))
183 | plt.ylim((np.min(coordinate[:, 1]) - 0.1, np.max(coordinate[:, 1]) + 0.1))
184 | plt.show()
--------------------------------------------------------------------------------
/glove/model/WordEmbeddingDataset.py:
--------------------------------------------------------------------------------
1 | import torch.utils.data as tud
2 | import torch
3 |
4 | class WordEmbeddingDataset(tud.Dataset):
5 | def __init__(self, co_matrix, weight_matrix):
6 | self.co_matrix = co_matrix
7 | self.weight_matrix = weight_matrix
8 | self.train_set = []
9 |
10 | for i in range(self.weight_matrix.shape[0]):
11 | for j in range(self.weight_matrix.shape[1]):
12 | if weight_matrix[i][j] != 0:
13 | # 这里对权重进行了筛选,去掉权重为0的项
14 | # 因为共现次数为0会导致log(X)变成nan
15 | self.train_set.append((i, j))
16 |
17 | def __len__(self):
18 | '''
19 | 必须重写的方法
20 | :return: 返回训练集的大小
21 | '''
22 | return len(self.train_set)
23 |
24 | def __getitem__(self, index):
25 | '''
26 | 必须重写的方法
27 | :param index:样本索引
28 | :return: 返回一个样本
29 | '''
30 | (i, j) = self.train_set[index]
31 | return i, j, torch.tensor(self.co_matrix[i][j], dtype=torch.float), self.weight_matrix[i][j]
32 |
--------------------------------------------------------------------------------
/glove/model/__pycache__/GloveModel.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chenyuAndroid/deep-learning-examples/f909239ee1482ff977fe3e4210ac0e714f7de728/glove/model/__pycache__/GloveModel.cpython-36.pyc
--------------------------------------------------------------------------------
/glove/model/__pycache__/GloveModel.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chenyuAndroid/deep-learning-examples/f909239ee1482ff977fe3e4210ac0e714f7de728/glove/model/__pycache__/GloveModel.cpython-37.pyc
--------------------------------------------------------------------------------
/glove/model/__pycache__/WordEmbeddingDataset.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chenyuAndroid/deep-learning-examples/f909239ee1482ff977fe3e4210ac0e714f7de728/glove/model/__pycache__/WordEmbeddingDataset.cpython-36.pyc
--------------------------------------------------------------------------------
/glove/model/__pycache__/WordEmbeddingDataset.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chenyuAndroid/deep-learning-examples/f909239ee1482ff977fe3e4210ac0e714f7de728/glove/model/__pycache__/WordEmbeddingDataset.cpython-37.pyc
--------------------------------------------------------------------------------
/glove/model/record-50.log:
--------------------------------------------------------------------------------
1 | nohup: ignoring input
2 |
--------------------------------------------------------------------------------
/glove/weights/glove-50-1.th:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chenyuAndroid/deep-learning-examples/f909239ee1482ff977fe3e4210ac0e714f7de728/glove/weights/glove-50-1.th
--------------------------------------------------------------------------------
/glove/weights/glove-50-2.th:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chenyuAndroid/deep-learning-examples/f909239ee1482ff977fe3e4210ac0e714f7de728/glove/weights/glove-50-2.th
--------------------------------------------------------------------------------
/glove/weights/glove-50-3.th:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chenyuAndroid/deep-learning-examples/f909239ee1482ff977fe3e4210ac0e714f7de728/glove/weights/glove-50-3.th
--------------------------------------------------------------------------------
/glove/weights/glove-50.th:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/chenyuAndroid/deep-learning-examples/f909239ee1482ff977fe3e4210ac0e714f7de728/glove/weights/glove-50.th
--------------------------------------------------------------------------------